Module: Export::CSV::Dwc::Extension::Checklist::SpeciesDistribution

Defined in:
lib/export/csv/dwc/extension/checklist/species_distribution.rb

Overview

CSV for Species Distribution extension (for checklist archives) See http://rs.gbif.org/extension/gbif/1.0/distribution.xml

Constant Summary collapse

GBIF =
Export::Dwca::GbifProfile::SpeciesDistribution
CHECKLIST_FIELDS =

Fields used in checklist exports (subset of full GBIF profile).

[
  :id, # Required for DwC-A star joins (taxonID, an OTU UUID)
  :locality,
  :occurrenceStatus,
  :source
].freeze
HEADERS =
CHECKLIST_FIELDS
HEADERS_NAMESPACES =
CHECKLIST_FIELDS.map do |field|
  field == :id ? '' : GBIF::NAMESPACES[field]
end.freeze

Class Method Summary collapse

Class Method Details

.csv(scope, taxon_name_id_to_taxon_id, accepted_name_mode:) ⇒ String

Generate CSV for species distribution extension.

Parameters:

  • scope (ActiveRecord::Relation)

    DwcOccurrence records from AssertedDistribution

  • taxon_name_id_to_taxon_id (Hash)

    taxon_name_id => OTU UUID (used as dwc:taxonID in the checklist core)

  • accepted_name_mode (String)

    checklist synonym handling mode

Returns:

  • (String)

    CSV content



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/export/csv/dwc/extension/checklist/species_distribution.rb', line 28

def self.csv(scope, taxon_name_id_to_taxon_id, accepted_name_mode:)
  tbl = []
  tbl[0] = HEADERS
  grouped_rows = {}

  otu_to_taxon_name_id = scope
    .joins('JOIN otus ON otus.id = dwc_occurrences.otu_id')
    .joins('JOIN taxon_names ON taxon_names.id = otus.taxon_name_id')
    .pluck(
      Arel.sql('dwc_occurrences.otu_id'),
      Arel.sql(
        if accepted_name_mode == ::Export::Dwca::Checklist::Data::ACCEPTED_NAME_USAGE_ID
          'taxon_names.id'
        else
          'COALESCE(taxon_names.cached_valid_taxon_name_id, taxon_names.id)'
        end
      )
    )
    .to_h

  scope.find_each do |dwc_occ|
    # Use locality field if populated (for regional areas like "West Tropical Africa"),
    # otherwise build from country/state/county.
    locality = dwc_occ.locality.presence || begin
      locality_parts = [
        dwc_occ.country,
        dwc_occ.stateProvince,
        dwc_occ.county
      ].compact.reject(&:empty?)
      locality_parts.join(', ').presence
    end

    taxon_name_id = otu_to_taxon_name_id[dwc_occ.otu_id]
    next unless taxon_name_id

    taxon_id = taxon_name_id_to_taxon_id[taxon_name_id]
    next unless taxon_id

    key = [taxon_id, locality, dwc_occ.occurrenceStatus]
    source_parts = split_sources(dwc_occ.associatedReferences)

    grouped_rows[key] ||= []
    source_parts.each do |source|
      grouped_rows[key] << source unless grouped_rows[key].include?(source)
    end
  end

  grouped_rows.each do |(taxon_id, locality, occurrence_status), sources|
    tbl << [
      taxon_id,
      locality,
      occurrence_status,
      join_sources(sources)
    ]
  end

  ::Export::Dwca.output_csv(tbl)
end

.join_sources(sources) ⇒ Object



93
94
95
96
97
# File 'lib/export/csv/dwc/extension/checklist/species_distribution.rb', line 93

def self.join_sources(sources)
  return nil if sources.blank?

  sources.join(' | ')
end

.split_sources(source_string) ⇒ Object



87
88
89
90
91
# File 'lib/export/csv/dwc/extension/checklist/species_distribution.rb', line 87

def self.split_sources(source_string)
  return [] if source_string.blank?

  source_string.split(' | ').map(&:strip).reject(&:blank?)
end