Module: Export::Coldp

Defined in:
lib/export/coldp.rb

Overview

Exports to the Catalog of Life in the new “coldp” format. api.col.plus/datapackage

  • write tests to check for coverage (missing methods)

  • Update all files formats to use tabs

  • Pending handling of both BibTeX and Verbatim

Constant Summary collapse

FILETYPES =
%w{Description Name Synonym NameRelation TaxonConceptRelation TypeMaterial VernacularName}.freeze

Class Method Summary collapse

Class Method Details

.basionym_id(taxon_name) ⇒ Object

Parameters:

  • taxon_name (a valid Protonym or a Combination)

    see also exclusion of OTUs/Names based on Ranks not handled



157
158
159
160
161
162
163
164
165
# File 'lib/export/coldp.rb', line 157

def self.basionym_id(taxon_name)
  if taxon_name.type == 'Protonym'
    taxon_name.reified_id
  elsif taxon_name.type == 'Combination'
    taxon_name.protonyms.last.reified_id
  else
    nil
  end
end

.download(otu, request = nil, prefer_unlabelled_otus: true) ⇒ Object



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/export/coldp.rb', line 114

def self.download(otu, request = nil, prefer_unlabelled_otus: true)
  file_path = ::Export::Coldp.export(
    otu.id,
    prefer_unlabelled_otus:
  )
  name = "coldp_otu_id_#{otu.id}_#{DateTime.now}.zip"

  ::Download::Coldp.create!(
    name: "ColDP Download for #{otu.otu_name} on #{Time.now}.",
    description: 'A zip file containing CoLDP formatted data.',
    filename: filename(otu),
    source_file_path: file_path,
    request:,
    expires: 2.days.from_now
  )
end

.download_async(otu, request = nil, prefer_unlabelled_otus: true) ⇒ Object



131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/export/coldp.rb', line 131

def self.download_async(otu, request = nil, prefer_unlabelled_otus: true)
  download = ::Download::Coldp.create!(
    name: "ColDP Download for #{otu.otu_name} on #{Time.now}.",
    description: 'A zip file containing CoLDP formatted data.',
    filename: filename(otu),
    request:,
    expires: 2.days.from_now
  )

  ColdpCreateDownloadJob.perform_later(otu, download, prefer_unlabelled_otus:)

  download
end

.export(otu_id, prefer_unlabelled_otus: true) ⇒ Object

Return path to the data itself



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/export/coldp.rb', line 55

def self.export(otu_id, prefer_unlabelled_otus: true)
  otus = otus(otu_id)

  # source_id: [csv_array]
  ref_tsv = {}

  otu = ::Otu.find(otu_id)
  project = ::Project.find(otu.project_id)
  project_members = project_members(otu.project_id)

  # TODO: This will likely have to change, it is renamed on serving the file.
  zip_file_path = "/tmp/_#{SecureRandom.hex(8)}_coldp.zip"

   = Zaru::sanitize!("/tmp/#{project.name}_#{DateTime.now}_metadata.yaml").gsub(' ', '_').downcase
  version = TaxonWorks::VERSION
  if Settings.sandbox_mode?
    version = Settings.sandbox_commit_sha
  end
   = {
    'title' => project.name,
    'version' => version,
    'issued' => DateTime.now.strftime('%Y-%m-%d'),
  }
   = Tempfile.new()
  .write(.to_yaml)
  .close

  Zip::File.open(zip_file_path, Zip::File::CREATE) do |zipfile|
    (FILETYPES - ['Name']).each do |ft|
      m = "Export::Coldp::Files::#{ft}".safe_constantize
      zipfile.get_output_stream("#{ft}.tsv") { |f| f.write m.generate(otus, project_members, ref_tsv) }
    end

    zipfile.get_output_stream('Name.tsv') { |f| f.write Export::Coldp::Files::Name.generate(otu, project_members, ref_tsv) }
    zipfile.get_output_stream('Taxon.tsv') do |f|
      f.write Export::Coldp::Files::Taxon.generate(otus, project_members, otu_id, ref_tsv)
    end

    # Sort the refs by full citation string
    sorted_refs = ref_tsv.values.sort{|a,b| a[1] <=> b[1]}

    d = ::CSV.generate(col_sep: "\t") do |tsv|
      tsv << %w{ID citation	doi modified modifiedBy} # author year source details
      sorted_refs.each do |r|
        tsv << r
      end
    end

    zipfile.get_output_stream('References.tsv') { |f| f.write d }
    zipfile.add('metadata.yaml', .path)
  end

  zip_file_path
end

.filename(otu) ⇒ Object



110
111
112
# File 'lib/export/coldp.rb', line 110

def self.filename(otu)
  Zaru::sanitize!("#{::Project.find(otu.project_id).name}_coldp_otu_id_#{otu.id}_#{DateTime.now}.zip").gsub(' ', '_').downcase
end

.modified(updated_at) ⇒ Object



42
43
44
# File 'lib/export/coldp.rb', line 42

def self.modified(updated_at)
  updated_at.iso8601
end

.modified_by(updated_by_id, project_members) ⇒ Object



46
47
48
# File 'lib/export/coldp.rb', line 46

def self.modified_by(updated_by_id, project_members)
  project_members[updated_by_id]
end

.original_field(taxon_name) ⇒ Boolean

Returns ‘true` if no parens in `cached_author_year` `false` if parens in `cached_author_year`.

Returns:

  • (Boolean)

    ‘true` if no parens in `cached_author_year` `false` if parens in `cached_author_year`



151
152
153
# File 'lib/export/coldp.rb', line 151

def self.original_field(taxon_name)
  (taxon_name.type == 'Protonym') && taxon_name.is_original_name?
end

.otus(otu_id) ⇒ Scope

Returns A full set of valid only Otus (= Taxa in CoLDP) that are to be sent. !! At present no OTU with a ‘name` is sent. In the future this may !! need to change.

Returns:

  • (Scope)

    A full set of valid only Otus (= Taxa in CoLDP) that are to be sent. !! At present no OTU with a ‘name` is sent. In the future this may !! need to change.



20
21
22
23
24
25
26
27
28
# File 'lib/export/coldp.rb', line 20

def self.otus(otu_id)
  o = ::Otu.find(otu_id)
  return ::Otu.none if o.taxon_name_id.nil?

  Otu.joins(taxon_name: [:ancestor_hierarchies])
    .where('taxon_name_hierarchies.ancestor_id = ?', o.taxon_name_id)
    .where(taxon_name_id: TaxonName.that_is_valid)
    .where('(otus.name IS NULL) OR (otus.name = taxon_names.cached)')
end

.project_members(project_id) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
# File 'lib/export/coldp.rb', line 30

def self.project_members(project_id)
  project_members = {}
  ProjectMember.where(project_id:).each do |pm|
    if pm.user.orcid.nil?
      project_members[pm.user_id] = pm.user.name
    else
      project_members[pm.user_id] = pm.user.orcid
    end
  end
  project_members
end

.reified_id(taxon_name_id, cached, cached_original_combination) ⇒ Object

Replicate TaxonName.refified_id without having to use AR



168
169
170
171
172
173
174
175
# File 'lib/export/coldp.rb', line 168

def self.reified_id(taxon_name_id, cached, cached_original_combination)
  # Protonym#has_alternate_original?
  if cached_original_combination && (cached != cached_original_combination)
    taxon_name_id.to_s + '-' + Digest::MD5.hexdigest(cached_original_combination)
  else
    taxon_name_id
  end
end

.sanitize_remarks(remarks) ⇒ Object



50
51
52
# File 'lib/export/coldp.rb', line 50

def self.sanitize_remarks(remarks)
  remarks&.gsub('\r\n', ' ')&.gsub('\n', ' ')&.gsub('\t', ' ')&.gsub(/[ ]+/, ' ')
end