Class: Export::Dwca::Data

Inherits:
Object
  • Object
show all
Defined in:
lib/export/dwca/data.rb

Overview

Wrapper to build DWCA zipfiles for a specific project. See tasks/accesssions/report/dwc_controller.rb for use.

With help from thinkingeek.com/2013/11/15/create-temporary-zip-file-send-response-rails/

Usage:

begin
 data = Dwca::Data.new(DwcOccurrence.where(project_id: sessions_current_project_id)
ensure
 data.cleanup
end

Always use the ensure/data.cleanup pattern!

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(core_scope: nil, extension_scopes: {}, predicate_extension_params: {}) ⇒ Data

Returns a new instance of Data.

Parameters:

  • args (Hash)


47
48
49
50
51
52
53
# File 'lib/export/dwca/data.rb', line 47

def initialize(core_scope: nil, extension_scopes: {}, predicate_extension_params: {} )
  # raise ArgumentError, 'must pass a core_scope' if !record_core_scope.kind_of?( ActiveRecord::Relation )
  @core_scope = get_scope(core_scope)
  @biological_extension_scope = extension_scopes[:biological_extension_scope] #  = get_scope(core_scope)

  @data_predicate_ids = { collection_object_predicate_id: [], collecting_event_predicate_id: [] }.merge(predicate_extension_params)
end

Instance Attribute Details

#all_dataObject

Returns Tempfile.

Returns:

  • Tempfile



44
45
46
# File 'lib/export/dwca/data.rb', line 44

def all_data
  @all_data
end

#biological_extension_scopeObject

Returns the value of attribute biological_extension_scope.



31
32
33
# File 'lib/export/dwca/data.rb', line 31

def biological_extension_scope
  @biological_extension_scope
end

#core_scopeObject

Returns the value of attribute core_scope.



29
30
31
# File 'lib/export/dwca/data.rb', line 29

def core_scope
  @core_scope
end

#dataTempfile

Returns the csv data as a tempfile.

Returns:

  • (Tempfile)

    the csv data as a tempfile



85
86
87
# File 'lib/export/dwca/data.rb', line 85

def data
  @data
end

#data_predicate_idsObject

collection_object_predicate_id: [], collecting_event_predicate_id: []

Returns:

  • Hash



41
42
43
# File 'lib/export/dwca/data.rb', line 41

def data_predicate_ids
  @data_predicate_ids
end

#emlTempfile

This is a stub, and only half-heartedly done. You should be using IPT for the time being. See also

https://github.com/gbif/ipt/wiki/
https://github.com/gbif/ipt/wiki/#exemplar-datasets

TODO: reference biological_resource_extension.csv

Returns:

  • (Tempfile)

    metadata about this dataset



216
217
218
# File 'lib/export/dwca/data.rb', line 216

def eml
  @eml
end

#filenameString (readonly)

the name of zipfile

Returns:

  • (String)


389
390
391
# File 'lib/export/dwca/data.rb', line 389

def filename
  @filename
end

#metaObject

Returns the value of attribute meta.



25
26
27
# File 'lib/export/dwca/data.rb', line 25

def meta
  @meta
end

#predicate_dataObject

Returns the value of attribute predicate_data.



37
38
39
# File 'lib/export/dwca/data.rb', line 37

def predicate_data
  @predicate_data
end

#totalObject

TODO update



33
34
35
# File 'lib/export/dwca/data.rb', line 33

def total
  @total
end

#zipfileTempfile

Returns the zipfile.

Returns:

  • (Tempfile)

    the zipfile



380
381
382
# File 'lib/export/dwca/data.rb', line 380

def zipfile
  @zipfile
end

Instance Method Details

#biological_resource_relationshipObject



303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
# File 'lib/export/dwca/data.rb', line 303

def biological_resource_relationship
  return nil if biological_extension_scope.nil?
  @biological_resource_relationship = Tempfile.new('biological_resource_relationship.xml')

  content = nil
  if no_records?
    content = "\n"
  else
    content = ::Export::Download.generate_csv(
      biological_extension_scope.computed_columns,
      #          exclude_columns: []
      trim_columns: false,
      trim_rows: false,
      header_converters: []
    )
  end

  @biological_resource_relationship.write(content)
  @biological_resource_relationship.flush
  @biological_resource_relationship.rewind
  @biological_resource_relationship
end

#build_zipObject



365
366
367
368
369
370
371
372
373
374
375
376
# File 'lib/export/dwca/data.rb', line 365

def build_zip
  t = Tempfile.new(filename)

  Zip::OutputStream.open(t) { |zos| }

  Zip::File.open(t.path, Zip::File::CREATE) do |zip|
    zip.add('data.csv', all_data.path)
    zip.add('meta.xml', meta.path)
    zip.add('eml.xml', eml.path)
  end
  t
end

#cleanupTrue

Returns close and delete all temporary files.

Returns:

  • (True)

    close and delete all temporary files



396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
# File 'lib/export/dwca/data.rb', line 396

def cleanup
  zipfile.close
  zipfile.unlink
  meta.close
  meta.unlink
  eml.close
  eml.unlink
  data.close
  data.unlink
  if predicate_options_present?
    predicate_data.close
    predicate_data.unlink
  end
  all_data.close
  all_data.unlink
  true
end

#csvCSV

Returns the data as a CSV object.

Returns:

  • (CSV)

    the data as a CSV object



65
66
67
68
69
70
71
72
73
74
75
# File 'lib/export/dwca/data.rb', line 65

def csv
  ::Export::Download.generate_csv(
    core_scope.computed_columns,
    # TODO: check to see if we nee dthis
    exclude_columns: ::DwcOccurrence.excluded_columns,
    column_order: ::CollectionObject::DWC_OCCURRENCE_MAP.keys, # TODO: add other maps here
    trim_columns: true, # going to have to be optional
    trim_rows: false,
    header_converters: [:dwc_headers]
  )
end

#get_scope(scope) ⇒ Object

!params core_scope [String, ActiveRecord::Relation]

String is fully formed SQL


416
417
418
419
420
421
422
423
424
# File 'lib/export/dwca/data.rb', line 416

def get_scope(scope)
  if scope.kind_of?(String)
    DwcOccurrence.from('(' + scope + ') as dwc_occurrences')
  elsif scope.kind_of?(ActiveRecord::Relation)
    scope
  else
    raise ArgumentError, 'Scope is not a SQL string or ActiveRecord::Relation'
  end
end

#meta_fieldsArray

id, and non-standard DwC colums are handled elsewhere

Returns:

  • (Array)

    use the temporarily written, and refined, CSV file to read off the existing headers so we can use them in writing meta.yml



330
331
332
333
334
335
# File 'lib/export/dwca/data.rb', line 330

def meta_fields
  return [] if no_records?
  h = File.open(all_data, &:gets)&.strip&.split("\t")
  h&.shift
  h || []
end

#no_records?Boolean

Returns true if provided core_scope returns no records.

Returns:

  • (Boolean)

    true if provided core_scope returns no records



79
80
81
# File 'lib/export/dwca/data.rb', line 79

def no_records?
  total == 0
end

#package_download(download) ⇒ Download

Returns a download instance.

Parameters:

Returns:



428
429
430
431
# File 'lib/export/dwca/data.rb', line 428

def package_download(download)
  download.update!(source_file_path: zipfile.path)
  download
end

#predicate_options_present?Boolean

Returns:

  • (Boolean)


55
56
57
# File 'lib/export/dwca/data.rb', line 55

def predicate_options_present?
  data_predicate_ids[:collection_object_predicate_id].present? || data_predicate_ids[:collecting_event_predicate_id].present?
end