Class: Export::Dwca::Checklist::Data

Inherits:
Object
  • Object
show all
Defined in:
lib/export/dwca/checklist/data.rb

Constant Summary collapse

DESCRIPTION_EXTENSION =

Available extensions.

:description
DISTRIBUTION_EXTENSION =
:distribution
REFERENCES_EXTENSION =
:references
TYPES_AND_SPECIMEN_EXTENSION =
:types_and_specimen
VERNACULAR_NAME_EXTENSION =
:vernacular_name
CHECKLIST_EXTENSION_OPTIONS =
[
  { value: DESCRIPTION_EXTENSION, displayed_in_gbif: true },
  { value: DISTRIBUTION_EXTENSION, displayed_in_gbif: true },
  { value: REFERENCES_EXTENSION, displayed_in_gbif: false },
  { value: TYPES_AND_SPECIMEN_EXTENSION, displayed_in_gbif: false },
  { value: VERNACULAR_NAME_EXTENSION, displayed_in_gbif: true }
].freeze
REPLACE_WITH_ACCEPTED_NAME =

Accepted name mode values.

'replace_with_accepted_name'
ACCEPTED_NAME_USAGE_ID =
'accepted_name_usage_id'
GBIF_TAXONOMIC_STATUS_FROM_RELATIONSHIP =

Maps TaxonNameRelationship type strings to GBIF 2022 taxonomic status vocabulary terms. Checked in order; first match wins. https://rs.gbif.org/vocabulary/gbif/taxonomic_status_2022-02-02.xml

[
  [/Misapplication/,                             'misapplied'        ],
  [/Synonym::Objective|Synonym::Homotypic/,      'homotypicSynonym'  ],
  [/Synonym::Subjective|Synonym::Heterotypic/,   'heterotypicSynonym'],
  [/Synonym|Invalidating|Unaccepting/,           'synonym'           ],
].freeze
ACCEPTED_NAME_MODE_OPTIONS =
[
  REPLACE_WITH_ACCEPTED_NAME,
  ACCEPTED_NAME_USAGE_ID
].freeze
CHECKLIST_TAXON_EXTENSION_COLUMNS =

Keys are dwc_occurrence columns, values are DwC Taxon Extension columns: https://rs.gbif.org/core/dwc_taxon_2024-02-19.xml

{
  scientificName: :scientificName,
  taxonRank: :taxonRank,
  acceptedNameUsage: :acceptedNameUsage,
  parentNameUsage: :parentNameUsage,
  originalNameUsage: :originalNameUsage,
  nameAccordingTo: :nameAccordingTo,
  namePublishedIn: :namePublishedIn,
  namePublishedInYear: :namePublishedInYear,
  higherClassification: :higherClassification,
  kingdom: :kingdom,
  phylum: :phylum,
  dwcClass: :class,  # Note: column is dwcClass, DwC Taxon field is 'class'
  order: :order,
  superfamily: :superfamily,
  family: :family,
  subfamily: :subfamily,
  tribe: :tribe,
  subtribe: :subtribe,
  genus: :genus,
  subgenus: :subgenus,
  specificEpithet: :specificEpithet,
  infraspecificEpithet: :infraspecificEpithet,
  verbatimTaxonRank: :verbatimTaxonRank,
  scientificNameAuthorship: :scientificNameAuthorship,
  vernacularName: :vernacularName,
  nomenclaturalCode: :nomenclaturalCode,
  taxonomicStatus: :taxonomicStatus,
  nomenclaturalStatus: :nomenclaturalStatus,
  taxonRemarks: :taxonRemarks
}.freeze
CHECKLIST_TAXON_NAMESPACES =

Namespace URIs for checklist taxon core fields. Maps DwC Taxon field names to their full namespace URIs.

{
  taxonID: 'http://rs.tdwg.org/dwc/terms/taxonID',
  scientificName: 'http://rs.tdwg.org/dwc/terms/scientificName',
  taxonRank: 'http://rs.tdwg.org/dwc/terms/taxonRank',
  acceptedNameUsage: 'http://rs.tdwg.org/dwc/terms/acceptedNameUsage',
  acceptedNameUsageID: 'http://rs.tdwg.org/dwc/terms/acceptedNameUsageID',
  parentNameUsage: 'http://rs.tdwg.org/dwc/terms/parentNameUsage',
  parentNameUsageID: 'http://rs.tdwg.org/dwc/terms/parentNameUsageID',
  originalNameUsage: 'http://rs.tdwg.org/dwc/terms/originalNameUsage',
  nameAccordingTo: 'http://rs.tdwg.org/dwc/terms/nameAccordingTo',
  namePublishedIn: 'http://rs.tdwg.org/dwc/terms/namePublishedIn',
  namePublishedInYear: 'http://rs.tdwg.org/dwc/terms/namePublishedInYear',
  higherClassification: 'http://rs.tdwg.org/dwc/terms/higherClassification',
  kingdom: 'http://rs.tdwg.org/dwc/terms/kingdom',
  phylum: 'http://rs.tdwg.org/dwc/terms/phylum',
  class: 'http://rs.tdwg.org/dwc/terms/class',
  order: 'http://rs.tdwg.org/dwc/terms/order',
  superfamily: 'http://rs.tdwg.org/dwc/terms/superfamily',
  family: 'http://rs.tdwg.org/dwc/terms/family',
  subfamily: 'http://rs.tdwg.org/dwc/terms/subfamily',
  tribe: 'http://rs.tdwg.org/dwc/terms/tribe',
  subtribe: 'http://rs.tdwg.org/dwc/terms/subtribe',
  genus: 'http://rs.tdwg.org/dwc/terms/genus',
  subgenus: 'http://rs.tdwg.org/dwc/terms/subgenus',
  specificEpithet: 'http://rs.tdwg.org/dwc/terms/specificEpithet',
  infraspecificEpithet: 'http://rs.tdwg.org/dwc/terms/infraspecificEpithet',
  verbatimTaxonRank: 'http://rs.tdwg.org/dwc/terms/verbatimTaxonRank',
  scientificNameAuthorship: 'http://rs.tdwg.org/dwc/terms/scientificNameAuthorship',
  vernacularName: 'http://rs.tdwg.org/dwc/terms/vernacularName',
  nomenclaturalCode: 'http://rs.tdwg.org/dwc/terms/nomenclaturalCode',
  taxonomicStatus: 'http://rs.tdwg.org/dwc/terms/taxonomicStatus',
  nomenclaturalStatus: 'http://rs.tdwg.org/dwc/terms/nomenclaturalStatus',
  taxonRemarks: 'http://rs.tdwg.org/dwc/terms/taxonRemarks'
}.freeze
ORDERED_RANKS =
ordered_ranks.freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(core_otu_scope_params: nil, extensions: [], accepted_name_mode: 'replace_with_accepted_name', description_topics: []) ⇒ Data

Returns a new instance of Data.



232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# File 'lib/export/dwca/checklist/data.rb', line 232

def initialize(
  core_otu_scope_params: nil,
  extensions: [],
  accepted_name_mode: 'replace_with_accepted_name',
  description_topics: []
)
  @accepted_name_mode = accepted_name_mode

  @core_otu_scope_params = (core_otu_scope_params&.to_h || {}).deep_symbolize_keys

  @extensions = extensions
  @description_topics = description_topics

  # Strip pagination/meta params — they are not OTU filter criteria and
  # would trigger the otu_query subquery chain unnecessarily.
  otu_filter_criteria = @core_otu_scope_params
    .except(:per, :page, :paginate, :extend)

  # When no actual OTU filter criteria remain, skip the otu_query subquery
  # chain entirely — it would build complex nested CTEs for "all OTUs in
  # project" that are slow and unnecessary.
  # An empty filter simply returns all project DwcOccurrences directly.
  @core_occurrence_scope = if otu_filter_criteria.present?
    ::Queries::DwcOccurrence::Filter.new(
      otu_query: otu_filter_criteria
    ).all
  else
    ::Queries::DwcOccurrence::Filter.new({}).all
  end

  @description_extension = extensions.include?(DESCRIPTION_EXTENSION)
  @species_distribution_extension = extensions.include?(DISTRIBUTION_EXTENSION)
  @references_extension = extensions.include?(REFERENCES_EXTENSION)
  @types_and_specimen_extension = extensions.include?(TYPES_AND_SPECIMEN_EXTENSION)
  @vernacular_name_extension = extensions.include?(VERNACULAR_NAME_EXTENSION)
end

Instance Attribute Details

#accepted_name_modeObject

Parameters:

  • accepted_name_mode (String)

    How to handle unaccepted names 'replace_with_accepted_name' - replace invalid names with their valid names (default) 'accepted_name_usage_id' - include all names, using acceptedNameUsageID for synonyms



224
225
226
# File 'lib/export/dwca/checklist/data.rb', line 224

def accepted_name_mode
  @accepted_name_mode
end

#core_occurrence_scopeScope

Derived from core_otu_scope_params

Returns:

  • (Scope)

    of DwcOccurrence



175
176
177
# File 'lib/export/dwca/checklist/data.rb', line 175

def core_occurrence_scope
  @core_occurrence_scope
end

#core_otu_scope_paramsHash

Returns of Otu query params Required.

Returns:

  • (Hash)

    of Otu query params Required.



171
172
173
# File 'lib/export/dwca/checklist/data.rb', line 171

def core_otu_scope_params
  @core_otu_scope_params
end

#data_fileTempfile

Returns the csv data as a tempfile.

Returns:

  • (Tempfile)

    the csv data as a tempfile



192
193
194
# File 'lib/export/dwca/checklist/data.rb', line 192

def data_file
  @data_file
end

#description_extensionBoolean

Returns whether to include description extension.

Returns:

  • (Boolean)

    whether to include description extension



199
200
201
# File 'lib/export/dwca/checklist/data.rb', line 199

def description_extension
  @description_extension
end

#description_topicsArray<Integer>

Returns ordered list of topic IDs for description extension.

Returns:

  • (Array<Integer>)

    ordered list of topic IDs for description extension



214
215
216
# File 'lib/export/dwca/checklist/data.rb', line 214

def description_topics
  @description_topics
end

#emlTempfile

This is a stub, and only half-heartedly done. You should be using IPT for the time being. See also https://github.com/gbif/ipt/wiki/resourceMetadata https://github.com/gbif/ipt/wiki/resourceMetadata#exemplar-datasets

Returns:

  • (Tempfile)

    metadata about this dataset



189
190
191
# File 'lib/export/dwca/checklist/data.rb', line 189

def eml
  @eml
end

#extensionsArray<Symbol>

Returns list of extensions to include.

Returns:

  • (Array<Symbol>)

    list of extensions to include



217
218
219
# File 'lib/export/dwca/checklist/data.rb', line 217

def extensions
  @extensions
end

#metaObject

meta.xml tempfile



186
187
188
# File 'lib/export/dwca/checklist/data.rb', line 186

def meta
  @meta
end

#occurrence_to_otuHash

Build mapping from occurrence to OTU ID

Returns:

  • (Hash)

    "dwc_occurrence_object_type:dwc_occurrence_object_id" => otu_id



227
228
229
# File 'lib/export/dwca/checklist/data.rb', line 227

def occurrence_to_otu
  @occurrence_to_otu
end

#otu_to_taxon_name_dataHash

Fetch TaxonName data by unique OTU (not by occurrence).

Returns:

  • (Hash)

    otu_id => { cached:, cached_is_valid:, cached_valid_taxon_name_id: }



230
231
232
# File 'lib/export/dwca/checklist/data.rb', line 230

def otu_to_taxon_name_data
  @otu_to_taxon_name_data
end

#references_extensionBoolean

Returns whether to include references extension.

Returns:

  • (Boolean)

    whether to include references extension



205
206
207
# File 'lib/export/dwca/checklist/data.rb', line 205

def references_extension
  @references_extension
end

#species_distribution_extensionBoolean

Returns whether to include distribution extension.

Returns:

  • (Boolean)

    whether to include distribution extension



202
203
204
# File 'lib/export/dwca/checklist/data.rb', line 202

def species_distribution_extension
  @species_distribution_extension
end

#taxon_name_id_to_taxon_idObject (readonly)

Hash mapping taxon_name_id to taxonID for extension star joins. Example: => 5, 456 => 3



196
197
198
# File 'lib/export/dwca/checklist/data.rb', line 196

def taxon_name_id_to_taxon_id
  @taxon_name_id_to_taxon_id
end

#totalObject

Size of core_occurrence_scope



178
179
180
# File 'lib/export/dwca/checklist/data.rb', line 178

def total
  @total
end

#types_and_specimen_extensionBoolean

Returns whether to include types and specimen extension.

Returns:

  • (Boolean)

    whether to include types and specimen extension



208
209
210
# File 'lib/export/dwca/checklist/data.rb', line 208

def types_and_specimen_extension
  @types_and_specimen_extension
end

#vernacular_name_extensionBoolean

Returns whether to include vernacular name extension.

Returns:

  • (Boolean)

    whether to include vernacular name extension



211
212
213
# File 'lib/export/dwca/checklist/data.rb', line 211

def vernacular_name_extension
  @vernacular_name_extension
end

#zipfileTempfile

Returns the zipfile.

Returns:

  • (Tempfile)

    the zipfile



643
644
645
# File 'lib/export/dwca/checklist/data.rb', line 643

def zipfile
  @zipfile
end

#zipfile_nameString (readonly)

the name of zipfile

Returns:

  • (String)


183
184
185
# File 'lib/export/dwca/checklist/data.rb', line 183

def zipfile_name
  @zipfile_name
end

Class Method Details

.gbif_taxonomic_status_for(relationship_type) ⇒ String?

Returns GBIF 2022 taxonomic status term.

Parameters:

  • relationship_type (String, nil)

    value of taxon_name_relationships.type

Returns:

  • (String, nil)

    GBIF 2022 taxonomic status term



37
38
39
40
41
42
43
# File 'lib/export/dwca/checklist/data.rb', line 37

def self.gbif_taxonomic_status_for(relationship_type)
  return nil if relationship_type.nil?
  GBIF_TAXONOMIC_STATUS_FROM_RELATIONSHIP.each do |pattern, status|
    return status if relationship_type.match?(pattern)
  end
  nil
end

.gbif_taxonomic_status_from_types(relationship_types) ⇒ String?

Returns highest-priority GBIF 2022 taxonomic status term.

Parameters:

  • relationship_types (Array<String>)

    values of taxon_name_relationships.type

Returns:

  • (String, nil)

    highest-priority GBIF 2022 taxonomic status term



47
48
49
50
51
52
# File 'lib/export/dwca/checklist/data.rb', line 47

def self.gbif_taxonomic_status_from_types(relationship_types)
  _, status = GBIF_TAXONOMIC_STATUS_FROM_RELATIONSHIP.find { |pattern, _|
    relationship_types.any? { |t| t.match?(pattern) }
  }
  status
end

.ordered_ranksArray

Includes both column-based ranks (kingdom, phylum, etc.) and all possible taxonRank values that may appear for terminal taxa (species, subspecies, variety, form, etc.).

Returns:

  • (Array)

    of rank strings in hierarchical order (highest to lowest).



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/export/dwca/checklist/data.rb', line 141

def self.ordered_ranks
  # Get rank columns available in DwcOccurrence (mapped to DwC field names).
  dwc_rank_columns = CHECKLIST_TAXON_EXTENSION_COLUMNS.keys
    .select { |col| [:kingdom, :phylum, :dwcClass, :order, :superfamily, :family, :subfamily, :tribe, :subtribe, :genus, :subgenus].include?(col) }
    .map { |col| col == :dwcClass ? 'class' : col.to_s }

  # Get all species-level ranks from all nomenclatural codes.
  # These can appear as taxonRank values for terminal taxa.
  iczn_species = ::NomenclaturalRank::Iczn::SpeciesGroup.ordered_ranks.map(&:rank_name)
  icn_species = ::NomenclaturalRank::Icn::SpeciesAndInfraspeciesGroup.ordered_ranks.map(&:rank_name)
  icnp_species = ::NomenclaturalRank::Icnp::SpeciesGroup.ordered_ranks.map(&:rank_name)
  # ICVCN only has "species" rank, no infraspecific ranks.

  species_ranks = (iczn_species + icn_species + icnp_species).uniq

  relevant_ranks = (dwc_rank_columns + species_ranks).uniq

  # Use ICZN ordering as the base (most comprehensive for higher ranks).
  all_iczn = ::NomenclaturalRank::Iczn.ordered_ranks.map(&:rank_name)
  base_order = all_iczn.select { |r| relevant_ranks.include?(r) }

  # Add any species ranks not in ICZN order (like ICN's variety, form).
  missing_ranks = relevant_ranks - base_order
  base_order + missing_ranks
end

Instance Method Details

#add_extension_to_meta(xml, extension_module:, file_location:, row_type:, extension_name:) ⇒ Object

Helper to add extension XML to meta.xml

Parameters:

  • xml (Nokogiri::XML::Builder)

    XML builder

  • extension_module (Module)

    extension module with HEADERS_NAMESPACES constant

  • file_location (String)

    filename in the archive

  • row_type (String)

    DwC rowType URI

  • extension_name (String)

    name for error messages



447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
# File 'lib/export/dwca/checklist/data.rb', line 447

def add_extension_to_meta(xml, extension_module:, file_location:, row_type:, extension_name:)
  xml.extension(encoding: 'UTF-8', linesTerminatedBy: '\n', fieldsTerminatedBy: '\t',
                fieldsEnclosedBy: '"', ignoreHeaderLines: '1', rowType: row_type) {
    xml.files {
      xml.location file_location
    }
    extension_module::HEADERS_NAMESPACES.each_with_index do |n, i|
      if i == 0
        n == '' || (raise TaxonWorks::Error, "First #{extension_name} column (id) should have namespace '', got '#{n}'")
        xml.id(index: 0)
      else
        xml.field(index: i, term: n)
      end
    end
  }
end

#build_zipObject



620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
# File 'lib/export/dwca/checklist/data.rb', line 620

def build_zip
  t = Tempfile.new(zipfile_name)

  Zip::OutputStream.open(t) { |zos| }

  Zip::File.open(t.path, create: true) do |zip|
    zip.add('data.tsv', data_file.path)

    zip.add('description.tsv', description_extension_tmp.path) if description_extension
    zip.add('species_distribution.tsv', species_distribution_extension_tmp.path) if species_distribution_extension
    zip.add('references.tsv', references_extension_tmp.path) if references_extension
    zip.add('types_and_specimen.tsv', types_and_specimen_extension_tmp.path) if types_and_specimen_extension
    zip.add('vernacular_name.tsv', vernacular_name_extension_tmp.path) if vernacular_name_extension

    zip.add('meta.xml', meta.path)
    zip.add('eml.xml', eml.path)
  end

  t
end

#cleanupObject

Cleanup temporary files



673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
# File 'lib/export/dwca/checklist/data.rb', line 673

def cleanup
  zipfile&.close
  zipfile&.unlink

  data_file&.close
  data_file&.unlink

  @description_extension_tmp&.close
  @description_extension_tmp&.unlink

  @species_distribution_extension_tmp&.close
  @species_distribution_extension_tmp&.unlink

  @references_extension_tmp&.close
  @references_extension_tmp&.unlink

  @types_and_specimen_extension_tmp&.close
  @types_and_specimen_extension_tmp&.unlink

  @vernacular_name_extension_tmp&.close
  @vernacular_name_extension_tmp&.unlink

  eml&.close
  eml&.unlink

  meta&.close
  meta&.unlink

  true
end

#csvCSV

Returns The data as a CSV object. For checklists, this produces a normalized taxonomy (one row per unique taxon) with OTU UUID taxonIDs and parentNameUsageID relationships - see https://ipt.gbif.org/manual/en/ipt/latest/best-practices-checklists#normalized-classifications-parentchild.

Returns:



295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# File 'lib/export/dwca/checklist/data.rb', line 295

def csv
  return "\n" if no_records?

  # We need dwc_occurrence_object_type/id for OTU lookups.
  # Don't exclude them initially - we'll remove them after processing.
  excluded_columns = excluded_checklist_columns - [:dwc_occurrence_object_type, :dwc_occurrence_object_id]

  # Get raw occurrence data with all taxonomy columns.
  raw_csv = ::Export::CSV.generate_csv(
    core_occurrence_scope.select(target_checklist_columns),
    exclude_columns: excluded_columns,
    column_order: CHECKLIST_TAXON_EXTENSION_COLUMNS.keys,
    header_converters: [:checklist_headers]
  )

  # Return normalized taxonomy with OTU UUID taxonIDs.
  normalize_occurrence_csv(raw_csv)
end

#description_extension_tmpObject



594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
# File 'lib/export/dwca/checklist/data.rb', line 594

def description_extension_tmp
  return nil unless description_extension

  tempfile = Tempfile.new('description.tsv')

  if no_records?
    content = "\n"
  else
    csv unless taxon_name_id_to_taxon_id

    content = Export::CSV::Dwc::Extension::Checklist::Description.csv(
      core_otu_scope_params,
      taxon_name_id_to_taxon_id,
      accepted_name_mode: accepted_name_mode,
      description_topics: description_topics
    )
  end

  tempfile.write(content)
  tempfile.flush
  tempfile.rewind

  @description_extension_tmp = tempfile
  tempfile
end

#excluded_checklist_columnsArray (private)

Returns of symbols Columns to exclude from dwc_occurrences for checklist exports.

Returns:

  • (Array)

    of symbols Columns to exclude from dwc_occurrences for checklist exports.



718
719
720
721
722
723
724
725
# File 'lib/export/dwca/checklist/data.rb', line 718

def excluded_checklist_columns
  (::DwcOccurrence.columns.collect{ |c| c.name.to_sym } -
    (
      CHECKLIST_TAXON_EXTENSION_COLUMNS.keys -
        [:dwc_occurrence_object_id, :dwc_occurrence_object_type]
    )
  )
end

#generate_extension_tmp(extension_name, scope: core_occurrence_scope) ⇒ Tempfile?

Helper to generate extension tempfiles

Parameters:

  • extension_name (String)

    name of extension (e.g., 'species_distribution')

  • scope (ActiveRecord::Relation, Hash) (defaults to: core_occurrence_scope)

    scope to pass to extension's csv method

Returns:

  • (Tempfile, nil)


544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
# File 'lib/export/dwca/checklist/data.rb', line 544

def generate_extension_tmp(extension_name, scope: core_occurrence_scope)
  # Check if extension is enabled
  return nil unless send("#{extension_name}_extension")

  tempfile = Tempfile.new("#{extension_name}.tsv")

  if no_records?
    content = "\n"
  else
    csv unless taxon_name_id_to_taxon_id

    # Build module name from extension_name (e.g., 'species_distribution' -> 'SpeciesDistribution')
    extension_module = "Export::CSV::Dwc::Extension::Checklist::#{extension_name.classify}".constantize

    content = extension_module.csv(
      scope,
      taxon_name_id_to_taxon_id,
      accepted_name_mode: accepted_name_mode
    )
  end

  tempfile.write(content)
  tempfile.flush
  tempfile.rewind

  instance_variable_set("@#{extension_name}_extension_tmp", tempfile)
  tempfile
end

#meta_fieldsArray

Returns use the temporarily written, and refined, CSV file to read off the existing headers so we can use them in writing meta.yml. Non-standard DwC colums are handled elsewhere.

Returns:

  • (Array)

    use the temporarily written, and refined, CSV file to read off the existing headers so we can use them in writing meta.yml. Non-standard DwC colums are handled elsewhere.



272
273
274
275
276
277
278
# File 'lib/export/dwca/checklist/data.rb', line 272

def meta_fields
  return [] if no_records?

  h = File.open(data_file, &:gets)&.strip&.split("\t")
  h&.shift # shift because the first column, id, will be specified by hand
  h || []
end

#no_records?Boolean

Returns true if core_occurrence_scope returns no records.

Returns:

  • (Boolean)

    true if core_occurrence_scope returns no records



286
287
288
# File 'lib/export/dwca/checklist/data.rb', line 286

def no_records?
  total == 0
end

#normalize_occurrence_csv(raw_csv) ⇒ String

Normalize taxonomy: deduplicate, assign OTU UUID taxonIDs, add parentNameUsageID.

Parameters:

  • raw_csv (String)

    CSV with one row per occurrence

Returns:

  • (String)

    CSV with one row per unique taxon



318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
# File 'lib/export/dwca/checklist/data.rb', line 318

def normalize_occurrence_csv(raw_csv)
  normalizer = OccurrenceNormalizer.new(
    raw_csv: raw_csv,
    accepted_name_mode: accepted_name_mode,
    otu_to_taxon_name_data: otu_to_taxon_name_data,
    occurrence_to_otu: occurrence_to_otu
  )

  csv_output, taxon_name_id_to_taxon_id = normalizer.normalize

  # Store mapping for extensions to use
  @taxon_name_id_to_taxon_id = taxon_name_id_to_taxon_id

  csv_output
end

#package_download(download) ⇒ Object

Parameters:



657
658
659
660
661
662
663
664
665
666
667
668
669
670
# File 'lib/export/dwca/checklist/data.rb', line 657

def package_download(download)
  p = zipfile.path

  record_count = taxon_name_id_to_taxon_id&.size || 0

  if download.persisted?
    download.update_columns(total_records: record_count)
  else
    download.total_records = record_count
  end

  # This doesn't touch the db (source_file_path is an instance var).
  download.update!(source_file_path: p) # triggers save_file callback
end

#references_extension_tmpTempfile?

Returns Literature references extension data.

Returns:

  • (Tempfile, nil)

    Literature references extension data.



582
583
584
# File 'lib/export/dwca/checklist/data.rb', line 582

def references_extension_tmp
  generate_extension_tmp('references')
end

#species_distribution_extension_tmpTempfile?

Returns Species distribution extension data from AssertedDistribution records.

Returns:

  • (Tempfile, nil)

    Species distribution extension data from AssertedDistribution records.



575
576
577
578
# File 'lib/export/dwca/checklist/data.rb', line 575

def species_distribution_extension_tmp
  scope = core_occurrence_scope.where(dwc_occurrence_object_type: 'AssertedDistribution')
  generate_extension_tmp('species_distribution', scope: scope)
end

#target_checklist_columnsArray (private)

Returns of symbols Columns to select from dwc_occurrences for checklist exports.

Returns:

  • (Array)

    of symbols Columns to select from dwc_occurrences for checklist exports.



708
709
710
711
712
713
714
# File 'lib/export/dwca/checklist/data.rb', line 708

def target_checklist_columns
  [
   :id,
   :dwc_occurrence_object_id, # !! We don't want this, but need it in joins, it is removed in trim via excluded_checklist_columns below
   :dwc_occurrence_object_type, # !! ^
  ] + CHECKLIST_TAXON_EXTENSION_COLUMNS.keys
end

#types_and_specimen_extension_tmpObject



586
587
588
# File 'lib/export/dwca/checklist/data.rb', line 586

def types_and_specimen_extension_tmp
  generate_extension_tmp('types_and_specimen')
end

#vernacular_name_extension_tmpObject



590
591
592
# File 'lib/export/dwca/checklist/data.rb', line 590

def vernacular_name_extension_tmp
  generate_extension_tmp('vernacular_name', scope: core_otu_scope_params)
end