Module: Export::Coldp::Files::Name
- Defined in:
- lib/export/coldp/files/name.rb
Overview
The names table includes
-
All name strings, even if hanging (= not attached to OTUs/Taxa)
-
It contains strings that may be invalid OR valid
Class Method Summary collapse
- .add_higher_original_name(t, csv, origin_citation, name_remarks_vocab_id, project_members) ⇒ Object
-
.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members) ⇒ Object
Invalid Protonyms are rendered only as their original Combination.
- .clean_sic(epithets) ⇒ Object
- .code_field(taxon_name) ⇒ Object
- .generate(otu, project_members, reference_csv = nil) ⇒ Object
- .nom_status_field(taxon_name) ⇒ String?
- .remarks(name, name_remarks_vocab_id) ⇒ Object
- .skipped_name_ids ⇒ Object
Class Method Details
.add_higher_original_name(t, csv, origin_citation, name_remarks_vocab_id, project_members) ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/export/coldp/files/name.rb', line 62 def self.add_higher_original_name(t, csv, origin_citation, name_remarks_vocab_id, project_members) id = t.reified_id uninomial = clean_sic({:scientific_name => t.cached_original_combination})[:scientific_name] csv << [ id, # ID nil, # basionymID uninomial, # scientificName t., # authorship t.rank, # rank uninomial, # uninomial nil, # genus nil, # subgenus (no parens) nil, # species nil, # infraspecificEpithet origin_citation&.source_id, # referenceID | origin_citation&.pages, # publishedInPage | !! All origin citations get added to reference_csv via the main loop, not here t.year_of_publication, # publishedInYear | true, # original code_field(t), # code nil, # status https://api.checklistbank.org/vocab/nomStatus nil, # link (probably TW public or API) Export::Coldp.sanitize_remarks(remarks(t, name_remarks_vocab_id)), # remarks Export::Coldp.modified(t[:updated_at]), # modified Export::Coldp.modified_by(t[:updated_by_id], project_members) # modifiedBy ] end |
.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members) ⇒ Object
Invalid Protonyms are rendered only as their original Combination
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
# File 'lib/export/coldp/files/name.rb', line 94 def self.add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members) # TODO: Should [sic] handling be added to the Protonym#original_combination_elements method? Need to discuss with DD and MJY e = {} # TODO: Not sure why, but the data stucture from t.original_combination_elements seems to be either of the following: # {:genus=>[nil, "Sabacon"], :species=>[nil, "vizcayanus [sic]"]} # {:genus=>[nil, "Sabacon"], :species=>[nil, "vizcayanus", "[sic]"]} t.original_combination_elements.each do |k, v| v.delete('[sic]') e[k] = v end epithets = clean_sic({:scientific_name => t.cached_original_combination, :genus => e[:genus]&.last, :subgenus => e[:subgenus]&.last, :species => e[:species]&.last, :subspecies => e[:subspecies]&.last}) infraspecific_element = t.original_combination_infraspecific_element(t.original_combination_elements, remove_sic: true) rank = nil if infraspecific_element rank = infraspecific_element.first rank = 'forma' if rank == 'form' # CoL preferred string else [:subspecies, :species, :subgenus, :genus].each do |r| if e[r] rank = r break end end end id = t.reified_id # skip names with "NOT SPECIFIED" elements if t.cached_original_combination =~ /NOT SPECIFIED/ @skipped_name_ids.push(id) return end basionym_id = if !t.valid? id elsif t.has_misspelling_relationship? # uses cached values now. t.valid_taxon_name.reified_id else id end # case 1 - original combination difference # case 2 - misspelling (same combination) uninomial, genus, subgenus, species = nil, nil, nil, nil scientific_name = epithets[:scientific_name] if rank == :genus uninomial = epithets[:genus] else genus = epithets[:genus] subgenus = epithets[:subgenus]&.gsub(/[\)\(]/, '') species = epithets[:species] end csv << [ id, # ID basionym_id, # basionymID scientific_name, # scientificName t., # authorship rank, # rank uninomial, # uninomial genus, # genus subgenus, # subgenus (no parens) species, # species infraspecific_element ? infraspecific_element.last : nil, # infraspecificEpithet origin_citation&.source_id, # referenceID | origin_citation&.pages, # publishedInPage | !! All origin citations get added to reference_csv via the main loop, not here t.year_of_publication, # publishedInYear | true, # original code_field(t), # code nil, # status https://api.checklistbank.org/vocab/nomStatus nil, # link (probably TW public or API) Export::Coldp.sanitize_remarks(remarks(t, name_remarks_vocab_id)), # remarks Export::Coldp.modified(t[:updated_at]), # modified Export::Coldp.modified_by(t[:updated_by_id], project_members) # modifiedBy ] end |
.clean_sic(epithets) ⇒ Object
177 178 179 180 181 182 183 |
# File 'lib/export/coldp/files/name.rb', line 177 def self.clean_sic(epithets) if epithets.values.any? { |value| value&.include?('[sic]') } epithets.transform_values { |value| value&.gsub(/\s*\[sic\]/, '') } else epithets end end |
.code_field(taxon_name) ⇒ Object
12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/export/coldp/files/name.rb', line 12 def self.code_field(taxon_name) case taxon_name.nomenclatural_code when :iczn 'ICZN' when :icn 'ICN' when :icnp 'ICNP' when :icvcn 'ICVCN' end end |
.generate(otu, project_members, reference_csv = nil) ⇒ Object
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 |
# File 'lib/export/coldp/files/name.rb', line 187 def self.generate(otu, project_members, reference_csv = nil) name_total = 0 output = {} output[:csv] = ::CSV.generate(col_sep: "\t") do |csv| csv << %w{ ID basionymID scientificName authorship rank uninomial genus infragenericEpithet specificEpithet infraspecificEpithet referenceID publishedInPage publishedInYear original code status link remarks modified modifiedBy } # We should not be setting this here !! project_id = otu.project_id name_remarks_vocab_id = Predicate.find_by( uri: 'https://github.com/catalogueoflife/coldp#Name.remarks', project_id: project_id)&.id # TODO: create a base select that covers all fields, to which we add where'joins to isolate sets of names. # TODO: All top level queries should add names from SQL without NOT checks # TODO: consider a materialized view for COLDP names, refreshed nightly, outside the loop? # we are basically going to need that logic for BORG anyways otu.taxon_name.self_and_descendants.that_is_valid .select(:id, :cached) .find_each do |name| # TODO: handle > quadranomial names (e.g. super species like `Bus (Dus aus aus) aus eus var. fus` # Proposal is to exclude names of a specific ranks see taxon.rb # # Need the next highest valid parent not in this list!! # %w{ # NomenclaturalRank::Iczn::SpeciesGroup::Supersuperspecies # NomenclaturalRank::Iczn::SpeciesGroup::Superspecies # } # # infragenericEpithet needs to handle subsection (NomenclaturalRank::Icn::GenusGroup::Subsection) name_total += 1 # TODO: remove this loopp, using a with to top TaxonName .where(cached_valid_taxon_name_id: name.id) # == .historical_taxon_names .where.not("(taxon_names.type = 'Combination' AND taxon_names.cached = ?)", name.cached) # This eliminates Combinations that are identical to the current placement. .eager_load(origin_citation: [:source]) .find_each do |t| # TODO: refactor to a single method, test, then we should only have to check if the name is valid, without relationships? # TODO: family-group cached original combinations do not get exported in either Name or Synonym tables # exclude duplicate protonyms created for family group relationships if !t.is_combination? and t.is_family_rank? # We are already excluding combinationss from above if TaxonNameRelationship::Iczn::Invalidating::Usage::FamilyGroupNameForm.where(subject_taxon_name: t).any? # t.taxon_name_relationships.any? {|tnr| tnr.type == 'TaxonNameRelationship::Iczn::Invalidating::Usage::FamilyGroupNameForm'} valid = TaxonName.find(t.cached_valid_taxon_name_id) if valid.name == t.name and valid. = t. and t.id != valid.id # !! valid.name should never = t.name, by definition? next end end end origin_citation = t.origin_citation original = Export::Coldp.original_field(t) # Protonym, no parens basionym_id = t.reified_id unless !t.is_combination? and t.is_family_rank? is_genus_species = t.is_genus_or_species_rank? # TODO: Subgenus as Genus combination may break this is_col_uninomial = !t.is_combination? && ((t.rank == 'genus') || !is_genus_species) higher = !t.is_combination? && !is_genus_species uninomial, generic_epithet, infrageneric_epithet, specific_epithet, infraspecific_epithet = nil, nil, nil, nil, nil if !is_col_uninomial elements = t.full_name_hash epithets = clean_sic({:scientific_name => t.cached, :genus => elements['genus']&.last, :subgenus => elements['subgenus']&.last, :species => elements['species']&.last, :subspecies => elements['subspecies']&.last}) name_string = epithets[:scientific_name] generic_epithet = epithets[:genus] infrageneric_epithet = epithets[:subgenus] specific_epithet = epithets[:species] infraspecific_epithet = epithets[:subspecies] else uninomial = name_string = clean_sic({:scientific_name => t.cached})[:scientific_name] end if t.is_combination? rank = t.protonyms_by_rank.keys.last else rank = t.rank end # Here we truly want no higher if t.cached_original_combination.present? && (!t.is_combination? && is_genus_species && (!t.is_valid? || t.has_alternate_original?)) name_total += 1 add_original_combination(t, csv, origin_citation, name_remarks_vocab_id, project_members) end # Here we add reified ID's for higher taxa in which cached != cached_original_combination (e.g., TaxonName stores both Lamotialnina and Lamotialnini so needs a reified ID) if t.cached_original_combination.present? && t.is_family_rank? && t.has_alternate_original? # t.cached != t.cached_original_combination add_higher_original_name(t, csv, origin_citation, name_remarks_vocab_id, project_members) end basionym_id = nil if @skipped_name_ids.include?(basionym_id) # Set is: no original combination OR (valid or invalid higher, valid lower, past combinations) if t.cached_original_combination.blank? || higher || t.is_valid? || t.is_combination? csv << [ t.id, # ID basionym_id, # basionymID name_string, # scientificName # should just be t.cached t., # authorship rank, # rank uninomial, # uninomial <- if genus here generic_epithet, # genus and below - IIF species or lower infrageneric_epithet, # infragenericEpithet specific_epithet, # specificEpithet infraspecific_epithet, # infraspecificEpithet origin_citation&.source_id, # publishedInID origin_citation&.pages, # publishedInPage t.year_of_publication, # publishedInYear original, # original code_field(t), # code nom_status_field(t), # nomStatus nil, # link (probably TW public or API) Export::Coldp.sanitize_remarks(remarks(t, name_remarks_vocab_id)), # remarks Export::Coldp.modified(t[:updated_at]), # modified Export::Coldp.modified_by(t[:updated_by_id], project_members) # modifiedBy ] end Export::Coldp::Files::Reference.add_reference_rows([origin_citation.source].compact, reference_csv, project_members) if reference_csv && origin_citation end end end end |
.nom_status_field(taxon_name) ⇒ String?
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/export/coldp/files/name.rb', line 37 def self.nom_status_field(taxon_name) case taxon_name.type when 'Combination' nil # This is *not* 'chresonym' sensu CoL (which is this: [correct: 'Aus bus Smith 1920', chresonym: 'Aus bus Jones 1922']) else if taxon_name.is_valid? ::TaxonName::NOMEN_VALID[taxon_name.nomenclatural_code] else ## TODO: very expensive, consider caching in TN # c = taxon_name.taxon_name_classifications_for_statuses.order_by_youngest_source_first.first c = TaxonNameClassification.youngest(taxon_name.taxon_name_classifications_for_statuses) # We should also infer status from TaxonNameRelationship and be more specific, but if CoL doesn't # use NOMEN this won't mean much # # Note: We supply `nil` when relationship is used here because it is declared in synonym table. # Note: This means that the *type* of synonym is lost (e.g. Misspelling) c ? c.class::NOMEN_URI : nil end end end |
.remarks(name, name_remarks_vocab_id) ⇒ Object
25 26 27 28 29 30 31 |
# File 'lib/export/coldp/files/name.rb', line 25 def self.remarks(name, name_remarks_vocab_id) if !name_remarks_vocab_id.nil? && name.data_attributes.where(controlled_vocabulary_term_id: name_remarks_vocab_id).any? name.data_attributes.where(controlled_vocabulary_term_id: name_remarks_vocab_id).pluck(:value).join('|') else nil end end |
.skipped_name_ids ⇒ Object
8 9 10 |
# File 'lib/export/coldp/files/name.rb', line 8 def self.skipped_name_ids @skipped_name_ids end |