Module: Export::Coldp::Files::Name

Defined in:
lib/export/coldp/files/name.rb

Overview

The names table includes

  • All names strings, even if hanging (= not attached to OTUs/Taxa)

Class Method Summary collapse

Class Method Details

.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members) ⇒ Object

Invalid Protonyms are rendered only as their original Combination

Parameters:

  • t (Protonym)

    only place that var./frm can be handled.



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/export/coldp/files/name.rb', line 61

def self.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members)
  e = t.original_combination_elements

  infraspecific_element = t.original_combination_infraspecific_element(e, remove_sic: true)

  rank = nil
  if infraspecific_element
    rank = infraspecific_element.first
    rank = 'forma' if rank == 'form' # CoL preferred string
  else
    [:subspecies, :species, :subgenus, :genus].each do |r|
      if e[r]
        rank = r
        break
      end
    end
  end

  id = t.reified_id
  basionym_id = t.has_misspelling_relationship? ? t.valid_taxon_name.reified_id : id # => t.reified_id
  # case 1 - original combination difference
  # case 2 - misspelling (same combination)

  genus, subgenus, species = nil, nil, nil

  uninomial = nil

  if rank == :genus
    uninomial = e[:genus][1]

  else
    if e[:genus]
      if e[:genus][1] =~ /NOT SPECIFIED/
        genus = nil
      else
        genus = e[:genus][1]
      end
    end

    if e[:subgenus]
      if e[:subgenus][1] =~ /NOT SPECIFIED/
        subgenus = nil
      else
        subgenus = e[:subgenus][1]&.gsub(/[\)\(]/, '')
      end
    end

    if e[:species]
      if e[:species][1] =~ /NOT SPECIFIED/
        species = nil
      else
        species = e[:species][1]
      end
    end

  end

  csv << [
    id,                                                                 # ID
    basionym_id,                                                        # basionymID
    clean_sic(t.cached_original_combination),                           # scientificName
    authorship_field(t, true),                                          # authorship
    rank,                                                               # rank
    uninomial,                                                          # uninomial
    genus,                                                              # genus
    subgenus,                                                           # subgenus (no parens)
    species,                                                            # species
    infraspecific_element ? infraspecific_element.last : nil,           # infraspecificEpithet
    origin_citation&.source_id,                                         # referenceID    |
    origin_citation&.pages,                                             # publishedInPage  | !! All origin citations get added to reference_csv via the main loop, not here
    t.year_of_publication,                                              # publishedInYear  |
    true,                                                               # original
    code_field(t),                                                      # code
    nil,                                                                # status https://api.checklistbank.org/vocab/nomStatus
    nil,                                                                # link (probably TW public or API)
    Export::Coldp.sanitize_remarks(remarks(t, name_remarks_vocab_id)),  # remarks
    Export::Coldp.modified(t[:updated_at]),                             # modified
    Export::Coldp.modified_by(t[:updated_by_id], project_members)       # modifiedBy
  ]
end

.authorship_field(taxon_name, original) ⇒ Object

Returns String.

Returns:

  • String



29
30
31
# File 'lib/export/coldp/files/name.rb', line 29

def self.authorship_field(taxon_name, original)
  original ? taxon_name.original_author_year : taxon_name.cached_author_year
end

.clean_sic(name) ⇒ Object



142
143
144
# File 'lib/export/coldp/files/name.rb', line 142

def self.clean_sic(name)
  name&.gsub(/\s+\[sic\]/, '') # TODO: remove `&` once cached_original_combination is re-indexed
end

.code_field(taxon_name) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/export/coldp/files/name.rb', line 7

def self.code_field(taxon_name)
  case taxon_name.nomenclatural_code
  when :iczn
    'ICZN'
  when :icn
    'ICN'
  when :icnp
    'ICNP'
  when :icvcn
    'ICVCN'
  end
end

.generate(otu, project_members, reference_csv = nil) ⇒ Object



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
# File 'lib/export/coldp/files/name.rb', line 148

def self.generate(otu, project_members, reference_csv = nil)
   name_total = 0
  ::CSV.generate(col_sep: "\t") do |csv|
    csv << %w{
      ID
      basionymID
      scientificName
      authorship
      rank
      uninomial
      genus
      infragenericEpithet
      specificEpithet
      infraspecificEpithet
      referenceID
      publishedInPage
      publishedInYear
      original
      code
      status
      link
      remarks
      modified
      modifiedBy
    }

    Current.project_id = otu.project_id
    name_remarks_vocab_id = Predicate.find_by(uri: 'https://github.com/catalogueoflife/coldp#Name.remarks',
                                              project_id: Current.project_id)&.id

    otu.taxon_name.self_and_descendants.that_is_valid
      .pluck(:id, :cached)
      .each do |name|

      # TODO: handle > quadranomial names (e.g. super species like `Bus (Dus aus aus) aus eus var. fus`
      # Proposal is to exclude names of a specific ranks see taxon.rb
      #
      # Need the next highest valid parent not in this list!!
      # %w{
      #   NomenclaturalRank::Iczn::SpeciesGroup::Supersuperspecies
      #   NomenclaturalRank::Iczn::SpeciesGroup::Superspecies
      # }
      #
      # infragenericEpithet needs to handle subsection (NomenclaturalRank::Icn::GenusGroup::Subsection)

      name_total += 1

      TaxonName
        .where(cached_valid_taxon_name_id: name[0]) # == .historical_taxon_names
        .where.not("(taxon_names.type = 'Combination' AND taxon_names.cached = ?)", name[1]) # This eliminates Combinations that are identical to the current placement.
        .find_each do |t|

        origin_citation = t.origin_citation

        original = Export::Coldp.original_field(t) # Protonym, no parens

        basionym_id = t.reified_id

        is_genus_species = t.is_genus_or_species_rank?

        # TODO: Subgenus as Genus combination may break this
        is_col_uninomial = !t.is_combination? && ((t.rank == 'genus') || !is_genus_species)

        higher = !t.is_combination? && !is_genus_species

        # TODO: consider faster ways to check for misspellings
        name_string = clean_sic(t.cached) # if higher and misspelling, then it's in name too

        uninomial = nil
        generic_epithet, infrageneric_epithet, specific_epithet, infraspecific_epithet = nil, nil, nil, nil

        if !is_col_uninomial
          elements = t.full_name_hash

          generic_epithet = clean_sic(elements['genus']&.last)
          infrageneric_epithet = clean_sic(elements['subgenus']&.last)
          specific_epithet = clean_sic(elements['species']&.last)
          infraspecific_epithet = clean_sic(elements['subspecies']&.last)
        else
          uninomial = name_string
        end

        if t.is_combination?
          rank = t.protonyms_by_rank.keys.last
        else
          rank = t.rank
        end

        # Set is: no original combination OR (valid or invalid higher, valid lower, past combinations)
        if t.cached_original_combination.blank? || higher || t.is_valid? || t.is_combination?
          csv << [
            t.id,                                                               # ID
            basionym_id,                                                        # basionymID
            name_string,                                                        # scientificName  # should just be t.cached
            t.cached_author_year,                                               # authorship
            rank,                                                               # rank
            uninomial,                                                          # uninomial   <- if genus here
            generic_epithet,                                                    # genus and below - IIF species or lower
            infrageneric_epithet,                                               # infragenericEpithet
            specific_epithet,                                                   # specificEpithet
            infraspecific_epithet,                                              # infraspecificEpithet
            origin_citation&.source_id,                                         # publishedInID
            origin_citation&.pages,                                             # publishedInPage
            t.year_of_publication,                                              # publishedInYear
            original,                                                           # original
            code_field(t),                                                      # code
            nom_status_field(t),                                                # nomStatus
            nil,                                                                # link (probably TW public or API)
            Export::Coldp.sanitize_remarks(remarks(t, name_remarks_vocab_id)),  # remarks
            Export::Coldp.modified(t[:updated_at]),                             # modified
            Export::Coldp.modified_by(t[:updated_by_id], project_members)       # modifiedBy
          ]
        end

        # Here we truly want no higher
        if !t.cached_original_combination.blank? && (is_genus_species && !t.is_combination? && (!t.is_valid? || t.has_alternate_original?))
          name_total += 1
          add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members)
        end

        Export::Coldp::Files::Reference.add_reference_rows([origin_citation.source].compact, reference_csv, project_members) if reference_csv && origin_citation
      end
    end
  end
end

.nom_status_field(taxon_name) ⇒ String?

Returns:

  • (String, nil)


37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/export/coldp/files/name.rb', line 37

def self.nom_status_field(taxon_name)
  case taxon_name.type
  when 'Combination'
    nil # This is *not* 'chresonym' sensu CoL (which is this: [correct: 'Aus bus Smith 1920', chresonym: 'Aus bus Jones 1922'])
  else
    if taxon_name.is_valid?
      ::TaxonName::NOMEN_VALID[taxon_name.nomenclatural_code]
    else
      c = taxon_name.taxon_name_classifications_for_statuses.order_by_youngest_source_first.first

      # We should also infer status from TaxonNameRelationship and be more specific, but if CoL doesn't
      # use NOMEN this won't mean much
      #
      # Note: We supply `nil` when relationship is used here because it is declared in synonym table.
      # Note: This means that the *type* of synonym is lost (e.g. Misspelling)

      c ? c.class::NOMEN_URI : nil
    end
  end
end

.remarks(name, name_remarks_vocab_id) ⇒ Object



20
21
22
23
24
25
26
# File 'lib/export/coldp/files/name.rb', line 20

def self.remarks(name, name_remarks_vocab_id)
  if !name_remarks_vocab_id.nil? && name.data_attributes.where(controlled_vocabulary_term_id: name_remarks_vocab_id).any?
    name.data_attributes.where(controlled_vocabulary_term_id: name_remarks_vocab_id).pluck(:value).join('|')
  else
    nil
  end
end