Class: DatasetRecord::DarwinCore::Taxon

Inherits:
DatasetRecord::DarwinCore show all
Defined in:
app/models/dataset_record/darwin_core/taxon.rb

Constant Summary collapse

KNOWN_KEYS_COMBINATIONS =
[
  %i{uninomial},
  %i{uninomial rank parent},
  %i{genus species},
  %i{genus species infraspecies},
  %i{genus subgenus species},
  %i{genus subgenus species infraspecies}
].freeze
PARSE_DETAILS_KEYS =
%i(uninomial genus species infraspecies).freeze

Instance Attribute Summary

Attributes inherited from DatasetRecord

#metadata, #status

Instance Method Summary collapse

Methods inherited from DatasetRecord::DarwinCore

#get_field_mapping, #get_field_value, #get_fields_mapping, #get_tw_biocuration_groups, #get_tw_data_attribute_fields_for, #get_tw_fields_for, #normalize_value!, #term_value_changed

Methods inherited from DatasetRecord

#create_fields, #data_fields, #dataset_record_fields, #destroy_fields, #field_db_attributes, #fields_db_attributes, #frozen_fields?, #get_data_field, #initialize_data_fields, #set_data_field, #update_fields

Methods included from Shared::IsData

#errors_excepting, #full_error_messages_excepting, #identical, #is_community?, #is_destroyable?, #is_editable?, #is_in_use?, #is_in_users_projects?, #metamorphosize, #similar

Methods included from Housekeeping

#has_polymorphic_relationship?

Methods inherited from ApplicationRecord

transaction_with_retry

Instance Method Details

#data_field_changed(index, value) ⇒ Object (private)

TODO add restage button/trigger when relevant fields change. Changing an id here means recalculating dependencies



330
331
332
333
334
# File 'app/models/dataset_record/darwin_core/taxon.rb', line 330

def data_field_changed(index, value)
  # if index == get_field_mapping(:parentNameUsageID) && status == "NotReady"
  #   self.status = "Ready" if %w[Ready Imported].include? get_parent&.status
  # end
end

#dependencies_imported?(taxon_id) ⇒ Boolean (private)

Check if all dependencies of a taxonID are imported

Returns:

  • (Boolean)


313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
# File 'app/models/dataset_record/darwin_core/taxon.rb', line 313

def dependencies_imported?(taxon_id)
  dependency_taxon_ids = DatasetRecord::DarwinCore::Taxon.where(id: import_dataset.core_records_fields
                                                                       .at(get_field_mapping(:taxonID))
                                                                       .with_value(taxon_id.to_s)
                                                                       .select(:dataset_record_id)
  ).pick(:metadata)['dependencies']

  DatasetRecord::DarwinCore::Taxon.where(id: import_dataset.core_records_fields
                                                                          .at(get_field_mapping(:taxonID))
                                                                          .with_values(dependency_taxon_ids.map { |d| d.to_s })
                                                                          .select(:dataset_record_id)
  ).where(status: 'Imported').count == dependency_taxon_ids.length


end

#find_by_taxonID(taxon_id) ⇒ DatasetRecord::DarwinCore::Taxon+ (private)



295
296
297
298
299
300
301
# File 'app/models/dataset_record/darwin_core/taxon.rb', line 295

def find_by_taxonID(taxon_id)
  DatasetRecord::DarwinCore::Taxon.where(id: import_dataset.core_records_fields
                                                           .at(get_field_mapping(:taxonID))
                                                           .with_value(taxon_id.to_s)
                                                           .select(:dataset_record_id)
  ).first
end

#get_original_combinationDatasetRecord::DarwinCore::Taxon+ (private)



286
287
288
289
290
291
292
# File 'app/models/dataset_record/darwin_core/taxon.rb', line 286

def get_original_combination
  DatasetRecord::DarwinCore::Taxon.where(id: import_dataset.core_records_fields
                                                           .at(get_field_mapping(:taxonID))
                                                           .with_value(get_field_value(:originalNameUsageID))
                                                           .select(:dataset_record_id)
  ).first
end

#get_parentDatasetRecord::DarwinCore::Taxon+ (private)



277
278
279
280
281
282
283
# File 'app/models/dataset_record/darwin_core/taxon.rb', line 277

def get_parent
  DatasetRecord::DarwinCore::Taxon.where(id: import_dataset.core_records_fields
                                                           .at(get_field_mapping(:taxonID))
                                                           .with_value(get_field_value(:parentNameUsageID))
                                                           .select(:dataset_record_id)
  ).first
end

#get_taxon_name_from_taxon_id(taxon_id) ⇒ TaxonName (private)

Returns:



304
305
306
307
308
309
310
# File 'app/models/dataset_record/darwin_core/taxon.rb', line 304

def get_taxon_name_from_taxon_id(taxon_id)
  TaxonName.find(DatasetRecord::DarwinCore::Taxon.where(id: import_dataset.core_records_fields
                                                                          .at(get_field_mapping(:taxonID))
                                                                          .with_value(taxon_id.to_s)
                                                                          .select(:dataset_record_id)
  ).pick(:metadata)['imported_objects']['taxon_name']['id'])
end

#import(dwc_data_attributes = {}) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
# File 'app/models/dataset_record/darwin_core/taxon.rb', line 14

def import(dwc_data_attributes = {})
  super
  begin
    DatasetRecord.transaction do
      self..delete('error_data')

      nomenclature_code = get_field_value('nomenclaturalCode')&.downcase&.to_sym || import_dataset.default_nomenclatural_code
      parse_results_details = Biodiversity::Parser.parse(get_field_value('scientificName') || '')[:details]&.values&.first

      parse_results = Biodiversity::Parser.parse(get_field_value(:scientificName) || '')
      parse_results_details = parse_results[:details]
      parse_results_details = (parse_results_details.keys - PARSE_DETAILS_KEYS).empty? ? parse_results_details.values.first : nil if parse_results_details

      raise DarwinCore::InvalidData.new({
                                          "scientificName": parse_results[:qualityWarnings] ?
                                                              parse_results[:qualityWarnings].map { |q| q[:warning] } :
                                                              ['Unable to parse scientific name. Please make sure it is correctly spelled.']
                                        }) unless (1..3).include?(parse_results[:quality]) && parse_results_details

      raise 'UNKNOWN NAME DETAILS COMBINATION' unless KNOWN_KEYS_COMBINATIONS.include?(parse_results_details.keys - [:authorship])

      name_key = parse_results_details[:uninomial] ? :uninomial : (parse_results_details.keys - [:authorship]).last
      name_details = parse_results_details[name_key]

      name = name_details.kind_of?(Array) ? name_details.first[:value] : name_details

      authorship = parse_results_details.dig(:authorship, :normalized) || get_field_value('scientificNameAuthorship')

      author_name = nil

      # split authorship into name and year
      if nomenclature_code == :iczn
        if (authorship_matchdata = authorship.match(/\(?(?<author>.+),? (?<year>\d{4})?\)?/))

          # regex will include comma, no easy way around it
          author_name = authorship_matchdata[:author].delete_suffix(',')
          year = authorship_matchdata[:year]

          # author name should be wrapped in parentheses if the verbatim authorship was
          if authorship.start_with?('(') and authorship.end_with?(')')
            author_name = '(' + author_name + ')'
          end
        end

      else
        # Fall back to simple name + date parsing
        author_name = Utilities::Strings.verbatim_author(authorship)
        year = Utilities::Strings.year_of_publication(authorship)
      end

      # TODO should a year provided in namePublishedInYear overwrite the parsed value?
      year ||= get_field_value('namePublishedInYear')

      # TODO validate that rank is a real rank, otherwise Combination will crash on find_or_initialize_by
      rank = get_field_value('taxonRank')
      is_hybrid = ['is_hybrid'] # TODO: NO...

      if ['parent'].nil?
        parent = project.root_taxon_name
      else
        parent = TaxonName.find(get_parent.['imported_objects']['taxon_name']['id'])
      end

      if ['type'] == 'protonym'
        protonym_attributes = {
          name: name,
          parent: parent,
          rank_class: Ranks.lookup(nomenclature_code, rank),
          also_create_otu: false,
          verbatim_author: author_name,
          year_of_publication: year
        }

        taxon_name = Protonym.create_with(verbatim_author: author_name, project: project)
                             .find_or_initialize_by(protonym_attributes.slice(:name, :parent, :rank_class, :year_of_publication))

        unless taxon_name.persisted?
          taxon_name.taxon_name_classifications.build(type: TaxonNameClassification::Icn::Hybrid) if is_hybrid
          taxon_name.data_attributes.build(import_predicate: 'DwC-A import metadata', type: 'ImportAttribute', value: {
            scientificName: get_field_value('scientificName'),
            scientificNameAuthorship: get_field_value('scientificNameAuthorship'),
            taxonRank: get_field_value('taxonRank'),
            metadata: 
          })

        end

        # create original combination relationship, get parent of original combination to set as subject taxon name

        original_combination_types = {
          genus: 'TaxonNameRelationship::OriginalCombination::OriginalGenus',
          subgenus: 'TaxonNameRelationship::OriginalCombination::OriginalSubgenus',
          species: 'TaxonNameRelationship::OriginalCombination::OriginalSpecies',
          subspecies: 'TaxonNameRelationship::OriginalCombination::OriginalSubspecies',
          variety: 'TaxonNameRelationship::OriginalCombination::OriginalVariety',
          form: 'TaxonNameRelationship::OriginalCombination::OriginalForm'
        }

        if get_field_value(:taxonID) == get_field_value(:originalNameUsageID)
          # create relationships for genus rank and below pointing to self and parents

          taxon_name.safe_self_and_ancestors.each do |ancestor|
            if (rank_in_type = original_combination_types[ancestor.rank.downcase.to_sym])
              TaxonNameRelationship.find_or_create_by!(type: rank_in_type, subject_taxon_name: ancestor, object_taxon_name: taxon_name)
            end
          end
        else

          # create OC with self at lowest rank
          if original_combination_types.has_key?(taxon_name.rank.downcase.to_sym)
            TaxonNameRelationship.find_or_create_by!(type: original_combination_types[rank.downcase.to_sym], subject_taxon_name: taxon_name, object_taxon_name: taxon_name)
          end

          unless parent == project.root_taxon_name
            original_combination_parent = TaxonName.find(find_by_taxonID(get_original_combination.['parent'])
                                               .['imported_objects']['taxon_name']['id'])

            original_combination_parent.safe_self_and_ancestors.each do |ancestor|
              if (rank_in_type = original_combination_types[ancestor.rank.downcase.to_sym])
                TaxonNameRelationship.find_or_create_by!(type: rank_in_type, subject_taxon_name: ancestor, object_taxon_name: taxon_name)
              end
            end
          end
        end

        # if taxonomicStatus is a synonym or homonym, create the relationship to acceptedNameUsageID
        if ['has_external_accepted_name']
          valid_name = get_taxon_name_from_taxon_id(get_field_value(:acceptedNameUsageID))

          synonym_classes = {
            iczn: {
              synonym: 'TaxonNameRelationship::Iczn::Invalidating::Synonym',
              homonym: 'TaxonNameRelationship::Iczn::Invalidating::Synonym::Objective::ReplacedHomonym',
            },
            # TODO support other nomenclatural codes
            # icnp: {
            #   synonym: "TaxonNameRelationship::Icnp::Unaccepting::Synonym",
            #   homonym: "TaxonNameRelationship::Icnp::Unaccepting::Homonym"
            # },
            # icn: {
            #   synonym: "TaxonNameRelationship::Icn::Unaccepting::Synonym",
            #   homonym: "TaxonNameRelationship::Icn::Unaccepting::Homonym"
            # }
          }.freeze

          if (status = get_field_value(:taxonomicStatus)&.downcase)
            type = synonym_classes[nomenclature_code][status.to_sym]

            raise DarwinCore::InvalidData.new({ "taxonomicStatus": ["Status #{status} did not match synonym, homonym, invalid, unavailable, excluded"] }) if type.nil?

            taxon_name.taxon_name_relationships.find_or_initialize_by(object_taxon_name: valid_name, type: type)

            # Add homonym status (if applicable)
            if status == 'homonym'
              taxon_name.taxon_name_classifications.find_or_initialize_by(type: 'TaxonNameClassification::Iczn::Available::Invalid::Homonym')
            end

          else
            raise DarwinCore::InvalidData.new({ "taxonomicStatus": ['No taxonomic status, but acceptedNameUsageID has different protonym'] })
          end

          # if taxonomicStatus is a homonym, invalid, unavailable, excluded, create the status
        elsif get_field_value(:taxonomicStatus) != 'valid' || get_field_value(:taxonomicStatus).nil?
          status_types = {
            invalid: 'TaxonNameClassification::Iczn::Available::Invalid',
            unavailable: 'TaxonNameClassification::Iczn::Unavailable',
            excluded: 'TaxonNameClassification::Iczn::Unavailable::Excluded'
          }.freeze

          if (status = get_field_value(:taxonomicStatus)&.downcase)

            type = status_types[status.to_sym]

            raise DarwinCore::InvalidData.new({ "taxonomicStatus": ["Couldn't find a status that matched #{status}"] }) if type.nil?

            taxon_name.taxon_name_classifications.find_or_initialize_by(type: type)
          end
        end

      elsif ['type'] == 'combination'

        # get protonym from staging metadata
        protonym_record = find_by_taxonID(['protonym_taxon_id'])
        # current_name_record = find_by_taxonID(get_field_value(:originalNameUsageID))

        current_name = Protonym.find(protonym_record.['imported_objects']['taxon_name']['id'])

        # because Combination uses named arguments, we need to get the ranks of the parent names to create the combination
        if parent.is_a?(Combination)
          parent_elements = parent.combination_taxon_names.index_by { |protonym| protonym.rank }
        else
          parent_elements = { parent.rank => parent }
        end

        combination_attributes = {
          **parent_elements,
          rank.downcase => current_name
        }

        # Can't use find_or_initialize_by because of dynamic parameters, causes query to fail because ranks are not columns in db
        # => PG::UndefinedTable: ERROR:  missing FROM-clause entry for table "genus"
        # LINE 1: ..."taxon_names" WHERE "taxon_names"."type" = $1 AND "genus"."i...

        taxon_name = Combination.matching_protonyms(**combination_attributes.transform_values { |v| v.id }).first
        taxon_name = Combination.new(combination_attributes) if taxon_name.nil?

      else
        raise DarwinCore::InvalidData.new({ "originalNameUsageID": ['Could not determine if name is protonym or combination'] })
      end

      if taxon_name.save
        # TODO add relationships and combinations to this hash
        self.[:imported_objects] = { taxon_name: { id: taxon_name.id } }
        self.status = 'Imported'
      else
        self.status = 'Errored'
        self.[:error_data] = {
          messages: taxon_name.errors.messages
        }
      end

      save!

      if self.status == 'Imported'
        # loop over dependants, see if all other dependencies are met, if so mark them as ready
        ['dependants'].each do |dependant_taxonID|
          if dependencies_imported?(dependant_taxonID)
            DatasetRecord::DarwinCore::Taxon.where(id: import_dataset.core_records_fields
                                                                     .at(get_field_mapping(:taxonID))
                                                                     .where(value: dependant_taxonID)
                                                                     .select(:dataset_record_id)
            ).first.update!(status: 'Ready')
          end
        end
      end
    end
  rescue DarwinCore::InvalidData => invalid
    self.status = 'Errored'
    self.['error_data'] = { messages: invalid.error_data }
  rescue ActiveRecord::RecordInvalid => invalid
    self.status = 'Errored'
    self.['error_data'] = {
      messages: invalid.record.errors.messages
    }
  rescue StandardError => e
    raise if Rails.env.development?
    self.status = 'Failed'
    self.[:error_data] = {
      exception: {
        message: e.message,
        backtrace: e.backtrace
      }
    }
  ensure
    save!
  end

  self
end