Class: Export::Dwca::Data

Inherits:
Object
  • Object
show all
Defined in:
lib/export/dwca/data.rb

Overview

Wrapper to build DWCA zipfiles for a specific project. See tasks/accesssions/report/dwc_controller.rb for use.

With help from thinkingeek.com/2013/11/15/create-temporary-zip-file-send-response-rails/

Usage:

begin
 data = Dwca::Data.new(DwcOccurrence.where(project_id: sessions_current_project_id)
ensure
 data.cleanup
end

Always use the ensure/data.cleanup pattern!

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(core_scope: nil, extension_scopes: {}, predicate_extensions: {}, taxonworks_extensions: []) ⇒ Data

Returns a new instance of Data.

Parameters:

  • taxonworks_extensions (Array<Symbol>) (defaults to: [])

    List of methods to perform on each CO

Raises:

  • (ArgumentError)


71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/export/dwca/data.rb', line 71

def initialize(core_scope: nil, extension_scopes: {}, predicate_extensions: {}, taxonworks_extensions: [])
  raise ArgumentError, 'must pass a core_scope' if core_scope.nil?



  @core_scope = core_scope

  @biological_associations_extension = extension_scopes[:biological_associations] #! String
  @media_extension = extension_scopes[:media] #  = get_scope(core_scope)

  @data_predicate_ids = { collection_object_predicate_id: [], collecting_event_predicate_id: [] }.merge(predicate_extensions)

  @taxonworks_extension_methods = taxonworks_extensions
end

Instance Attribute Details

#all_dataObject

Returns Tempfile.

Returns:

  • Tempfile



68
69
70
# File 'lib/export/dwca/data.rb', line 68

def all_data
  @all_data
end

#biological_associations_extensionScope?

Returning BiologicalAssociation

Returns:

  • (Scope, nil)

    Returning BiologicalAssociation



35
36
37
# File 'lib/export/dwca/data.rb', line 35

def biological_associations_extension
  @biological_associations_extension
end

#collecting_event_attributesObject

@return Array

1 row per CO per DA (type) on CE


60
61
62
# File 'lib/export/dwca/data.rb', line 60

def collecting_event_attributes
  @collecting_event_attributes
end

#collection_object_attributesObject

Returns Array.

Returns:

  • Array



57
58
59
# File 'lib/export/dwca/data.rb', line 57

def collection_object_attributes
  @collection_object_attributes
end

#collection_object_idsObject

Returns Array ordered in the order they will be placed in the file !!! Breaks if inter-mingled with asserted distributions !!!.

Returns:

  • Array ordered in the order they will be placed in the file !!! Breaks if inter-mingled with asserted distributions !!!



54
55
56
# File 'lib/export/dwca/data.rb', line 54

def collection_object_ids
  @collection_object_ids
end

#core_scopeObject

!params core_scope [String, ActiveRecord::Relation]

String is fully formed SQL


31
32
33
# File 'lib/export/dwca/data.rb', line 31

def core_scope
  @core_scope
end

#dataTempfile

Returns the csv data as a tempfile.

Returns:

  • (Tempfile)

    the csv data as a tempfile



145
146
147
# File 'lib/export/dwca/data.rb', line 145

def data
  @data
end

#data_predicate_idsObject

collection_object_predicate_id: [], collecting_event_predicate_id: []

Returns:

  • Hash



49
50
51
# File 'lib/export/dwca/data.rb', line 49

def data_predicate_ids
  @data_predicate_ids
end

#emlTempfile

This is a stub, and only half-heartedly done. You should be using IPT for the time being. See also

https://github.com/gbif/ipt/wiki/
https://github.com/gbif/ipt/wiki/#exemplar-datasets

TODO: reference biological_resource_extension.csv

Returns:

  • (Tempfile)

    metadata about this dataset



433
434
435
# File 'lib/export/dwca/data.rb', line 433

def eml
  @eml
end

#filenameString (readonly)

the name of zipfile

Returns:

  • (String)


607
608
609
# File 'lib/export/dwca/data.rb', line 607

def filename
  @filename
end

#media_extensionScope?

Returns @return Image(?).

Returns:

  • (Scope, nil)

    @return Image(?)



39
40
41
# File 'lib/export/dwca/data.rb', line 39

def media_extension
  @media_extension
end

#metaObject

Returns the value of attribute meta.



25
26
27
# File 'lib/export/dwca/data.rb', line 25

def meta
  @meta
end

#predicate_dataObject

Returns the value of attribute predicate_data.



45
46
47
# File 'lib/export/dwca/data.rb', line 45

def predicate_data
  @predicate_data
end

#taxonworks_extension_dataObject

rubocop:disable Metrics/MethodLength



166
167
168
# File 'lib/export/dwca/data.rb', line 166

def taxonworks_extension_data
  @taxonworks_extension_data
end

#taxonworks_extension_methodsObject

Returns the value of attribute taxonworks_extension_methods.



65
66
67
# File 'lib/export/dwca/data.rb', line 65

def taxonworks_extension_methods
  @taxonworks_extension_methods
end

#totalObject

TODO update



41
42
43
# File 'lib/export/dwca/data.rb', line 41

def total
  @total
end

#zipfileTempfile

Returns the zipfile.

Returns:

  • (Tempfile)

    the zipfile



598
599
600
# File 'lib/export/dwca/data.rb', line 598

def zipfile
  @zipfile
end

Instance Method Details

#biological_associations_resource_relationshipObject

rubocop:enable Metrics/MethodLength



522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
# File 'lib/export/dwca/data.rb', line 522

def biological_associations_resource_relationship
  return nil if biological_associations_extension.nil?
  @biological_associations_resource_relationship = Tempfile.new('biological_resource_relationship.xml')

  content = nil

  if no_records?
    content = "\n"
  else
    content = Export::CSV::Dwc::Extension::BiologicalAssociations.csv(biological_associations_extension)
  end

  @biological_associations_resource_relationship.write(content)
  @biological_associations_resource_relationship.flush
  @biological_associations_resource_relationship.rewind
  @biological_associations_resource_relationship
end

#build_zipObject



579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
# File 'lib/export/dwca/data.rb', line 579

def build_zip
  t = Tempfile.new(filename)

  Zip::OutputStream.open(t) { |zos| }

  Zip::File.open(t.path, Zip::File::CREATE) do |zip|
    zip.add('data.tsv', all_data.path)

    zip.add('media.csv', media.path) if media_extension
    zip.add('resource_relationships.tsv', biological_associations_resource_relationship.path) if biological_associations_extension

    zip.add('meta.xml', meta.path)
    zip.add('eml.xml', eml.path)
  end
  t
end

#cleanupTrue

Returns close and delete all temporary files.

Returns:

  • (True)

    close and delete all temporary files



614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
# File 'lib/export/dwca/data.rb', line 614

def cleanup
  zipfile.close
  zipfile.unlink
  meta.close
  meta.unlink
  eml.close
  eml.unlink
  data.close
  data.unlink

  if biological_associations_extension
    biological_associations_resource_relationship.close
    biological_associations_resource_relationship.unlink
  end

  if predicate_options_present?
    predicate_data.close
    predicate_data.unlink
  end

  if taxonworks_options_present?
    taxonworks_extension_data.close
    taxonworks_extension_data.unlink
  end

  all_data.close
  all_data.unlink
  true
end

#collecting_event_attributes_queryObject

Returns Relation the uniqe attributes derived from CollectingEvents.

Returns:

  • Relation the uniqe attributes derived from CollectingEvents



288
289
290
291
292
293
294
295
296
297
298
299
300
# File 'lib/export/dwca/data.rb', line 288

def collecting_event_attributes_query

  s = 'WITH touched_collection_objects AS (' + collection_objects.to_sql + ') ' + ::InternalAttribute
    .joins("JOIN collecting_events on data_attributes.attribute_subject_id = collecting_events.id AND data_attributes.attribute_subject_type = 'CollectingEvent'")
    .joins('JOIN touched_collection_objects as tco1 on tco1.collecting_event_id = collecting_events.id')
    .distinct
    .to_sql

  ::InternalAttribute.from('(' + s + ') as data_attributes')
  # .joins(:predicate)
  # .joins("JOIN collecting_events on data_attributes.attribute_subject_id = collecting_events.id AND data_attributes.attribute_subject_type = 'CollectingEvent'")
  # .joins('JOIN collection_objects on collection_objects.collecting_event_id = collecting_events.id')
end

#collecting_eventsObject

def asserted_distributions

AssertedDistribution.joins(:dwc_occurrence).where(dwc_occurrence: core_scope)

end



267
268
269
# File 'lib/export/dwca/data.rb', line 267

def collecting_events
  CollectingEvent.joins(:collection_objects, :data_attributes).where(collection_objects:)
end

#collection_object_attributes_queryObject



271
272
273
274
275
# File 'lib/export/dwca/data.rb', line 271

def collection_object_attributes_query
  @collection_object_attributes ||= InternalAttribute
     .joins(:predicate)
     .where(attribute_subject: collection_objects)
end

#collection_objectsObject



320
321
322
# File 'lib/export/dwca/data.rb', line 320

def collection_objects
  CollectionObject.joins(:dwc_occurrence).where(dwc_occurrence: core_scope).order('dwc_occurrences.id')
end

#csvCSV

Returns the data as a CSV object.

Returns:

  • (CSV)

    the data as a CSV object



125
126
127
128
129
130
131
132
133
134
135
# File 'lib/export/dwca/data.rb', line 125

def csv
  ::Export::CSV.generate_csv(
    core_scope.computed_columns,
    # TODO: check to see if we nee dthis
    exclude_columns: ::DwcOccurrence.excluded_columns,
    column_order: ::CollectionObject::DWC_OCCURRENCE_MAP.keys + ::CollectionObject::EXTENSION_FIELDS, # TODO: add other maps here
    trim_columns: true, # going to have to be optional
    trim_rows: false,
    header_converters: [:dwc_headers]
  )
end

#meta_fieldsArray

id, and non-standard DwC colums are handled elsewhere

Returns:

  • (Array)

    use the temporarily written, and refined, CSV file to read off the existing headers so we can use them in writing meta.yml



544
545
546
547
548
549
# File 'lib/export/dwca/data.rb', line 544

def meta_fields
  return [] if no_records?
  h = File.open(all_data, &:gets)&.strip&.split("\t")
  h&.shift
  h || []
end

#no_records?Boolean

Returns true if provided core_scope returns no records.

Returns:

  • (Boolean)

    true if provided core_scope returns no records



139
140
141
# File 'lib/export/dwca/data.rb', line 139

def no_records?
  total == 0
end

#package_download(download) ⇒ Download

Returns a download instance.

Parameters:

Returns:



646
647
648
649
# File 'lib/export/dwca/data.rb', line 646

def package_download(download)
  download.update!(source_file_path: zipfile.path)
  download
end

#predicate_options_present?Boolean

Returns:

  • (Boolean)


111
112
113
# File 'lib/export/dwca/data.rb', line 111

def predicate_options_present?
  data_predicate_ids[:collection_object_predicate_id].present? || data_predicate_ids[:collecting_event_predicate_id].present?
end

#taxonworks_options_present?Boolean

Returns:

  • (Boolean)


115
116
117
# File 'lib/export/dwca/data.rb', line 115

def taxonworks_options_present?
  taxonworks_extension_methods.present?
end

#used_collecting_event_predicatesObject



330
331
332
333
# File 'lib/export/dwca/data.rb', line 330

def used_collecting_event_predicates
  collecting_event_attributes_query.joins(:predicate).select("CONCAT('TW:DataAttribute:CollectingEvent:', controlled_vocabulary_terms.name) predicate_name").distinct
  .collect{|r| r['predicate_name']}
end

#used_collection_object_predicatesObject



324
325
326
327
328
# File 'lib/export/dwca/data.rb', line 324

def used_collection_object_predicates
  collection_object_attributes_query.select("CONCAT('TW:DataAttribute:CollectionObject:', controlled_vocabulary_terms.name) predicate_name")
  .distinct
  .collect{|r| r['predicate_name']}
end

#used_predicatesArray

Returns of distinct Predicate names in the format

`TW:DataAttribute:<CollectingEvent|CollectionObject>:<name>`.

Returns:

  • (Array)

    of distinct Predicate names in the format

    `TW:DataAttribute:<CollectingEvent|CollectionObject>:<name>`
    


338
339
340
# File 'lib/export/dwca/data.rb', line 338

def used_predicates
  used_collection_object_predicates + used_collecting_event_predicates
end