Module: Export::Coldp::Files::Name

Defined in:
lib/export/coldp/files/name.rb

Overview

The names table includes

  • All name strings, even if hanging (= not attached to OTUs/Taxa)

  • It contains strings that may be invalid OR valid

Class Method Summary collapse

Class Method Details

.add_higher_original_name(t, csv, origin_citation, name_remarks_vocab_id, project_members) ⇒ Object



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/export/coldp/files/name.rb', line 62

def self.add_higher_original_name(t, csv, origin_citation, name_remarks_vocab_id, project_members)

  id = t.reified_id
  uninomial = clean_sic({:scientific_name => t.cached_original_combination})[:scientific_name]

  csv << [
    id,                                                                 # ID
    nil,                                                                # basionymID
    uninomial,                                                          # scientificName
    t.original_author_year,                                             # authorship
    t.rank,                                                             # rank
    uninomial,                                                          # uninomial
    nil,                                                                # genus
    nil,                                                                # subgenus (no parens)
    nil,                                                                # species
    nil,                                                                # infraspecificEpithet
    origin_citation&.source_id,                                         # referenceID    |
    origin_citation&.pages,                                             # publishedInPage  | !! All origin citations get added to reference_csv via the main loop, not here
    t.year_of_publication,                                              # publishedInYear  |
    true,                                                               # original
    code_field(t),                                                      # code
    nil,                                                                # status https://api.checklistbank.org/vocab/nomStatus
    nil,                                                                # link (probably TW public or API)
    Export::Coldp.sanitize_remarks(remarks(t, name_remarks_vocab_id)),  # remarks
    Export::Coldp.modified(t[:updated_at]),                             # modified
    Export::Coldp.modified_by(t[:updated_by_id], project_members)       # modifiedBy
  ]
end

.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members) ⇒ Object

Invalid Protonyms are rendered only as their original Combination

Parameters:

  • t (Protonym)

    only place that var./frm can be handled.



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/export/coldp/files/name.rb', line 94

def self.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members)
  # TODO: Should [sic] handling be added to the Protonym#original_combination_elements method? Need to discuss with DD and MJY
  e = {}
  
  # TODO: Not sure why, but the data stucture from  t.original_combination_elements seems to be either of the following:
  #   {:genus=>[nil, "Sabacon"], :species=>[nil, "vizcayanus [sic]"]} 
  #   {:genus=>[nil, "Sabacon"], :species=>[nil, "vizcayanus", "[sic]"]}

  t.original_combination_elements.each do |k, v|
    v.delete('[sic]')
    e[k] = v
  end

  epithets = clean_sic({:scientific_name => t.cached_original_combination, :genus => e[:genus]&.last, :subgenus => e[:subgenus]&.last, :species => e[:species]&.last, :subspecies => e[:subspecies]&.last})
  infraspecific_element = t.original_combination_infraspecific_element(t.original_combination_elements, remove_sic: true)

  rank = nil
  if infraspecific_element
    rank = infraspecific_element.first
    rank = 'forma' if rank == 'form' # CoL preferred string
  else
    [:subspecies, :species, :subgenus, :genus].each do |r|
      if e[r]
        rank = r
        break
      end
    end
  end

  id = t.reified_id

  # skip names with "NOT SPECIFIED" elements
  if t.cached_original_combination =~ /NOT SPECIFIED/
    @skipped_name_ids.push(id)
    return
  end

  basionym_id = if !t.valid?
                  id
                elsif t.has_misspelling_relationship?  # uses cached values now.
                  t.valid_taxon_name.reified_id
                else
                  id
                end
 
  # case 1 - original combination difference
  # case 2 - misspelling (same combination)

  uninomial, genus, subgenus, species = nil, nil, nil, nil

  scientific_name = epithets[:scientific_name]
  if rank == :genus
    uninomial = epithets[:genus]
  else
    genus = epithets[:genus]
    subgenus = epithets[:subgenus]&.gsub(/[\)\(]/, '')
    species = epithets[:species]
  end

  csv << [
    id,                                                                 # ID
    basionym_id,                                                        # basionymID
    scientific_name,                                                    # scientificName
    t.original_author_year,                                             # authorship
    rank,                                                               # rank
    uninomial,                                                          # uninomial
    genus,                                                              # genus
    subgenus,                                                           # subgenus (no parens)
    species,                                                            # species
    infraspecific_element ? infraspecific_element.last : nil,           # infraspecificEpithet
    origin_citation&.source_id,                                         # referenceID    |
    origin_citation&.pages,                                             # publishedInPage  | !! All origin citations get added to reference_csv via the main loop, not here
    t.year_of_publication,                                              # publishedInYear  |
    true,                                                               # original
    code_field(t),                                                      # code
    nil,                                                                # status https://api.checklistbank.org/vocab/nomStatus
    nil,                                                                # link (probably TW public or API)
    Export::Coldp.sanitize_remarks(remarks(t, name_remarks_vocab_id)),  # remarks
    Export::Coldp.modified(t[:updated_at]),                             # modified
    Export::Coldp.modified_by(t[:updated_by_id], project_members)       # modifiedBy
  ]
end

.clean_sic(epithets) ⇒ Object



177
178
179
180
181
182
183
# File 'lib/export/coldp/files/name.rb', line 177

def self.clean_sic(epithets)
  if epithets.values.any? { |value| value&.include?('[sic]') }
    epithets.transform_values { |value| value&.gsub(/\s*\[sic\]/, '') }
  else
    epithets
  end
end

.code_field(taxon_name) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/export/coldp/files/name.rb', line 12

def self.code_field(taxon_name)
  case taxon_name.nomenclatural_code
  when :iczn
    'ICZN'
  when :icn
    'ICN'
  when :icnp
    'ICNP'
  when :icvcn
    'ICVCN'
  end
end

.generate(otu, project_members, reference_csv = nil) ⇒ Object



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/export/coldp/files/name.rb', line 187

def self.generate(otu, project_members, reference_csv = nil)
  name_total = 0

  output = {}
  output[:csv] = ::CSV.generate(col_sep: "\t") do |csv|

    csv << %w{
      ID
      basionymID
      scientificName
      authorship
      rank
      uninomial
      genus
      infragenericEpithet
      specificEpithet
      infraspecificEpithet
      referenceID
      publishedInPage
      publishedInYear
      original
      code
      status
      link
      remarks
      modified
      modifiedBy
    }

    # We should not be setting this here !!
    project_id = otu.project_id

    name_remarks_vocab_id = Predicate.find_by(
      uri: 'https://github.com/catalogueoflife/coldp#Name.remarks',
      project_id: project_id)&.id

    # TODO: create a base select that covers all fields, to which we add where'joins to isolate sets of names.
    # TODO: All top level queries should add names from SQL without NOT checks
    # TODO: consider a materialized view for COLDP names, refreshed nightly, outside the loop?
    #   we are basically going to need that logic for BORG anyways  
    otu.taxon_name.self_and_descendants.that_is_valid
      .select(:id, :cached)
      .find_each do |name|

      # TODO: handle > quadranomial names (e.g. super species like `Bus (Dus aus aus) aus eus var. fus`
      # Proposal is to exclude names of a specific ranks see taxon.rb
      #
      # Need the next highest valid parent not in this list!!
      # %w{
      #   NomenclaturalRank::Iczn::SpeciesGroup::Supersuperspecies
      #   NomenclaturalRank::Iczn::SpeciesGroup::Superspecies
      # }
      #
      # infragenericEpithet needs to handle subsection (NomenclaturalRank::Icn::GenusGroup::Subsection)

      name_total += 1

      # TODO: remove this loopp, using a with to top 
      TaxonName
        .where(cached_valid_taxon_name_id: name.id) # == .historical_taxon_names
        .where.not("(taxon_names.type = 'Combination' AND taxon_names.cached = ?)", name.cached) # This eliminates Combinations that are identical to the current placement.
        .eager_load(origin_citation: [:source])
        .find_each do |t|

        #  TODO: refactor to a single method, test, then we should only have to check if the name is valid, without relationships?
        # TODO: family-group cached original combinations do not get exported in either Name or Synonym tables
        # exclude duplicate protonyms created for family group relationships
        if !t.is_combination? and t.is_family_rank? # We are already excluding combinationss from above
          if TaxonNameRelationship::Iczn::Invalidating::Usage::FamilyGroupNameForm.where(subject_taxon_name: t).any? #  t.taxon_name_relationships.any? {|tnr| tnr.type == 'TaxonNameRelationship::Iczn::Invalidating::Usage::FamilyGroupNameForm'}
            valid = TaxonName.find(t.cached_valid_taxon_name_id)
            if valid.name == t.name  and valid.cached_author = t.cached_author and t.id != valid.id # !! valid.name should never = t.name, by definition?
              next
            end
          end
        end

        origin_citation = t.origin_citation

        original = Export::Coldp.original_field(t) # Protonym, no parens

        basionym_id = t.reified_id unless !t.is_combination? and t.is_family_rank?

        is_genus_species = t.is_genus_or_species_rank?

        # TODO: Subgenus as Genus combination may break this
        is_col_uninomial = !t.is_combination? && ((t.rank == 'genus') || !is_genus_species)

        higher = !t.is_combination? && !is_genus_species

        uninomial, generic_epithet, infrageneric_epithet, specific_epithet, infraspecific_epithet = nil, nil, nil, nil, nil

        if !is_col_uninomial
          elements = t.full_name_hash

          epithets = clean_sic({:scientific_name => t.cached, :genus => elements['genus']&.last, :subgenus => elements['subgenus']&.last, :species => elements['species']&.last, :subspecies => elements['subspecies']&.last})

          name_string = epithets[:scientific_name]
          generic_epithet = epithets[:genus]
          infrageneric_epithet = epithets[:subgenus]
          specific_epithet = epithets[:species]
          infraspecific_epithet = epithets[:subspecies]
        else
          uninomial = name_string = clean_sic({:scientific_name => t.cached})[:scientific_name]
        end

        if t.is_combination?
          rank = t.protonyms_by_rank.keys.last
        else
          rank = t.rank
        end

        # Here we truly want no higher
        if t.cached_original_combination.present? && (!t.is_combination? && is_genus_species && (!t.is_valid? || t.has_alternate_original?))
          name_total += 1
          add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members)
        end

        # Here we add reified ID's for higher taxa in which cached != cached_original_combination (e.g., TaxonName stores both Lamotialnina and Lamotialnini so needs a reified ID)
        if t.cached_original_combination.present? && t.is_family_rank? && t.has_alternate_original? # t.cached != t.cached_original_combination
          add_higher_original_name(t, csv, origin_citation, name_remarks_vocab_id, project_members)
        end

        basionym_id = nil if @skipped_name_ids.include?(basionym_id)

        # Set is: no original combination OR (valid or invalid higher, valid lower, past combinations)
        if t.cached_original_combination.blank? || higher || t.is_valid? || t.is_combination?
          csv << [
            t.id,                                                               # ID
            basionym_id,                                                        # basionymID
            name_string,                                                        # scientificName  # should just be t.cached
            t.cached_author_year,                                               # authorship
            rank,                                                               # rank
            uninomial,                                                          # uninomial   <- if genus here
            generic_epithet,                                                    # genus and below - IIF species or lower
            infrageneric_epithet,                                               # infragenericEpithet
            specific_epithet,                                                   # specificEpithet
            infraspecific_epithet,                                              # infraspecificEpithet
            origin_citation&.source_id,                                         # publishedInID
            origin_citation&.pages,                                             # publishedInPage
            t.year_of_publication,                                              # publishedInYear
            original,                                                           # original
            code_field(t),                                                      # code
            nom_status_field(t),                                                # nomStatus
            nil,                                                                # link (probably TW public or API)
            Export::Coldp.sanitize_remarks(remarks(t, name_remarks_vocab_id)),  # remarks
            Export::Coldp.modified(t[:updated_at]),                             # modified
            Export::Coldp.modified_by(t[:updated_by_id], project_members)       # modifiedBy
          ]
        end

        Export::Coldp::Files::Reference.add_reference_rows([origin_citation.source].compact, reference_csv, project_members) if reference_csv && origin_citation
      end
    end
  end
end

.nom_status_field(taxon_name) ⇒ String?

Returns:

  • (String, nil)


37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/export/coldp/files/name.rb', line 37

def self.nom_status_field(taxon_name)
  case taxon_name.type
  when 'Combination'
    nil # This is *not* 'chresonym' sensu CoL (which is this: [correct: 'Aus bus Smith 1920', chresonym: 'Aus bus Jones 1922'])
  else
    if taxon_name.is_valid?
      ::TaxonName::NOMEN_VALID[taxon_name.nomenclatural_code]
    else

      ## TODO: very expensive, consider caching in TN
      # c = taxon_name.taxon_name_classifications_for_statuses.order_by_youngest_source_first.first

      c = TaxonNameClassification.youngest(taxon_name.taxon_name_classifications_for_statuses)

      # We should also infer status from TaxonNameRelationship and be more specific, but if CoL doesn't
      # use NOMEN this won't mean much
      #
      # Note: We supply `nil` when relationship is used here because it is declared in synonym table.
      # Note: This means that the *type* of synonym is lost (e.g. Misspelling)

      c ? c.class::NOMEN_URI : nil
    end
  end
end

.remarks(name, name_remarks_vocab_id) ⇒ Object



25
26
27
28
29
30
31
# File 'lib/export/coldp/files/name.rb', line 25

def self.remarks(name, name_remarks_vocab_id)
  if !name_remarks_vocab_id.nil? && name.data_attributes.where(controlled_vocabulary_term_id: name_remarks_vocab_id).any?
    name.data_attributes.where(controlled_vocabulary_term_id: name_remarks_vocab_id).pluck(:value).join('|')
  else
    nil
  end
end

.skipped_name_idsObject



8
9
10
# File 'lib/export/coldp/files/name.rb', line 8

def self.skipped_name_ids
  @skipped_name_ids
end