Module: Export::Coldp::Files::Distribution

Defined in:: lib/export/coldp/files/distribution.rb

Overview

Feels like this is very non-standard at the moment. Ideally we would move to DwcOccurrence vectors as a way to provide

Class Method Summary collapse

.add_asserted_distributions(otus, csv, project_members) ⇒ Object

TODO: change to reference DwcOccurrences.
.add_content_distributions(otus, csv, project_members, project_id:) ⇒ Object
.asserted_distributions(otus) ⇒ Object

TODO: Arbitrarily using MAX to grab one source is janky, but if CoL doesn’t have an extended model perhaps it doesn’t matter.
.content_distributions(otus, project_id: nil) ⇒ Object
.generate(otus, project_members, reference_csv = nil, project_id: nil) ⇒ Object

Class Method Details

.add_asserted_distributions(otus, csv, project_members) ⇒ `Object`

TODO: change to reference DwcOccurrences

# File 'lib/export/coldp/files/distribution.rb', line 76

def self.add_asserted_distributions(otus, csv, project_members)
  ads = asserted_distributions(otus)

  ads.each do |ad|
    if !ad.iso_3166_a3.blank?
      gazetteer = 'iso'
      area_id = ad.iso_3166_a3
      area = ad.iso_3166_a3
    elsif !ad.iso_3166_a2.blank?
      gazetteer = 'iso'
      area_id = ad.iso_3166_a2
      area = ad.iso_3166_a2
    elsif !ad.tdwgID.blank?
      gazetteer = 'tdwg'
      if ad.data_origin == 'tdwg_l3' or ad.data_origin == 'tdwg_l4'
        area_id = ad.tdwgID.gsub(/^[0-9]{1,2}(.+)$/, '\1')  # fixes mismatch in TW vs CoL TDWG level 3 & 4 identifiers
      else
        area_id = ad.tdwgID
      end
      area = area_id
    else
      gazetteer = 'text'
      area_id = nil
      area = ad.name
    end

    csv << [
      ad.asserted_distribution_object_id,
      area_id,
      area,
      gazetteer,
      nil,
      ad.source_id,                                                  # reference_id - only 1 distribution reference allowed
      Export::Coldp.modified(ad.updated_at),                         # modified
      Export::Coldp.modified_by(ad.updated_by_id, project_members),  # modified_by
      nil
    ]
  end

  ads # return scope for reference handling
end

.add_content_distributions(otus, csv, project_members, project_id:) ⇒ `Object`

# File 'lib/export/coldp/files/distribution.rb', line 118

def self.add_content_distributions(otus, csv, project_members, project_id: )
  cd = content_distributions(otus, project_id: )
  cd.length # TODO: remove !?

  cd.each do |o|
    csv << [
      o.id,
      nil,
      o.text,
      'text',
      nil,
      nil,
      Export::Coldp.modified(o.updated_at),
      Export::Coldp.modified_by(o.updated_by_id, project_members),
      nil
    ]
  end

  cd # return scope for reference handling
end

.asserted_distributions(otus) ⇒ `Object`

TODO:

Arbitrarily using MAX to grab one source is janky, but if CoL doesn't have
an extended model perhaps it doesn't matter.

We only export Otu ADs for GeographicArea based records.

# File 'lib/export/coldp/files/distribution.rb', line 12

def self.asserted_distributions(otus)
  # TODO: Include Gaz ADs.
  AssertedDistribution.with(otu_scope: otus.unscope(:order).select(:id))
    .joins("JOIN otu_scope on otu_scope.id = asserted_distributions.asserted_distribution_object_id AND asserted_distributions.asserted_distribution_object_type = 'Otu'")
    .joins("JOIN geographic_areas on asserted_distributions.asserted_distribution_shape_id = geographic_areas.id AND asserted_distributions.asserted_distribution_shape_type = 'GeographicArea'")
    .joins(:sources)
    .where(is_absent: [false, nil])
    .select('asserted_distribution_shape_id, asserted_distribution_object_id, name, iso_3166_a3, iso_3166_a2, "tdwgID", data_origin, asserted_distributions.updated_at, asserted_distributions.updated_by_id,
            MAX(sources.cached) AS cached, MAX(sources.id) AS source_id')
    .group('asserted_distribution_shape_id, asserted_distribution_object_id, name, iso_3166_a3, iso_3166_a2, "tdwgID", data_origin, asserted_distributions.updated_at, asserted_distributions.updated_by_id' )
end

.content_distributions(otus, project_id: nil) ⇒ `Object`

# File 'lib/export/coldp/files/distribution.rb', line 24

def self.content_distributions(otus, project_id: nil)
  # TODO: change to CVT URI
  cvt_name  = 'Distribution text'

  topic_id = ControlledVocabularyTerm.find_by(
    project_id:,
    name: cvt_name)

  return [] if topic_id.blank?

  Content.with(otu_scope: otus.unscope(:order).select(:id))
    .joins('JOIN otu_scope on otu_scope.id = contents.otu_id')
    .where(contents: {topic_id: })
    .select('otus.id, contents.text, contents.updated_at, contents.updated_by_id')
    .distinct
end

.generate(otus, project_members, reference_csv = nil, project_id: nil) ⇒ `Object`

# File 'lib/export/coldp/files/distribution.rb', line 41

def self.generate(otus, project_members, reference_csv = nil, project_id: nil )
  CSV.generate(col_sep: "\t") do |csv|

    csv << %w{
      taxonID
      areaID
      area
      gazetteer
      status
      referenceID
      modified
      modifiedBy
      remarks
    }

    # We gather the scope (not data) so we can add references en-masse after
    ad_scope = add_asserted_distributions(otus, csv, project_members)
    cd_scope = add_content_distributions(otus, csv, project_members, project_id:)

    r1 = Source.with(d_scope: ad_scope)
      .joins('JOIN d_scope on d_scope.source_id = sources.id')
      .select('sources.id, sources.cached, sources.updated_at, sources.updated_by_id')
      .distinct

    r2 = Source.with(d_scope: cd_scope)
      .joins('JOIN d_scope on d_scope.source_id = sources.id')
      .select('sources.id, sources.cached, sources.updated_at, sources.updated_by_id')
      .distinct

    Export::Coldp::Files::Reference.add_reference_rows(r1.to_a, reference_csv, project_members) unless ad_scope.empty?
    Export::Coldp::Files::Reference.add_reference_rows(r2.to_a, reference_csv, project_members) unless cd_scope.empty?
  end
end

Module: Export::Coldp::Files::Distribution

Overview

Class Method Summary collapse

Class Method Details

.add_asserted_distributions(otus, csv, project_members) ⇒ Object

.add_content_distributions(otus, csv, project_members, project_id:) ⇒ Object

.asserted_distributions(otus) ⇒ Object

.content_distributions(otus, project_id: nil) ⇒ Object

.generate(otus, project_members, reference_csv = nil, project_id: nil) ⇒ Object

.add_asserted_distributions(otus, csv, project_members) ⇒ `Object`

.add_content_distributions(otus, csv, project_members, project_id:) ⇒ `Object`

.asserted_distributions(otus) ⇒ `Object`

.content_distributions(otus, project_id: nil) ⇒ `Object`

.generate(otus, project_members, reference_csv = nil, project_id: nil) ⇒ `Object`