Class: ImportDataset::DarwinCore::Occurrences

Inherits:
ImportDataset::DarwinCore show all
Defined in:
app/models/import_dataset/darwin_core/occurrences.rb

Constant Summary collapse

MINIMUM_FIELD_SET =

TODO: Can occurrenceID requirement be dropped? Should other fields be added here?

['occurrenceID', 'scientificName', 'basisOfRecord'].freeze

Constants inherited from ImportDataset::DarwinCore

CHECKLIST_ROW_TYPE, OCCURRENCES_ROW_TYPE

Instance Attribute Summary

Attributes inherited from ImportDataset

#description, #metadata, #source_content_type, #source_file_name, #source_file_size, #source_updated_at, #status

Instance Method Summary collapse

Methods inherited from ImportDataset::DarwinCore

#add_filters, #check_field_set, #core_records_are_readable, #core_records_fields, create_with_subtype_detection, default_if_absent, #default_nomenclatural_code, #destroy_namespace, #get_col_sep, #get_core_record_identifier_namespace, #get_dwc_default_values, #get_dwc_headers, #get_dwc_records, #get_field_mapping, #get_fields_mapping, #get_normalized_dwc_term, #get_quote_char, #get_records, #import, #initialize, #progress, #set_import_settings, #stage, #start_import, #stop_import

Methods inherited from ImportDataset

#delete_origin_relationships, #stage

Methods included from Shared::OriginRelationship

#new_objects, #old_objects, #reject_origin_relationships, #set_origin

Methods included from Shared::IsData

#errors_excepting, #full_error_messages_excepting, #identical, #is_community?, #is_destroyable?, #is_editable?, #is_in_use?, #is_in_users_projects?, #metamorphosize, #similar

Methods included from Housekeeping

#has_polymorphic_relationship?

Methods inherited from ApplicationRecord

transaction_with_retry

Constructor Details

This class inherits a constructor from ImportDataset::DarwinCore

Instance Method Details

#add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil) ⇒ Object



190
191
192
193
194
195
196
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 190

def add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil)
  unless collection_code.nil? || get_catalog_number_collection_code_namespace_mapping(collection_code)
    self.['catalog_numbers_collection_code_namespaces'] << [collection_code, namespace_id]
    self.['catalog_numbers_collection_code_namespaces'].sort! { |a, b| a[0].to_s <=> b[0].to_s }
  end
  save!
end

#add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil) ⇒ Object



182
183
184
185
186
187
188
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 182

def add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil)
  unless get_catalog_number_namespace_mapping(institution_code, collection_code)
    self.['catalog_numbers_namespaces'] << [[institution_code, collection_code], namespace_id]
    self.['catalog_numbers_namespaces'].sort! { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
  end
  save!
end

#containerize_dup_cat_no?Boolean

Returns:

  • (Boolean)


198
199
200
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 198

def containerize_dup_cat_no?
  !!self..dig('import_settings', 'containerize_dup_cat_no')
end

#core_records_classObject



12
13
14
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 12

def core_records_class
  DatasetRecord::DarwinCore::Occurrence
end

#core_records_identifier_nameObject



16
17
18
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 16

def core_records_identifier_name
  'occurrenceID'
end

#enable_organization_determiners?Boolean

Returns:

  • (Boolean)


218
219
220
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 218

def enable_organization_determiners?
  !!self..dig('import_settings', 'enable_organization_determiners')
end

#enable_organization_determiners_alt_name?Boolean

Returns:

  • (Boolean)


222
223
224
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 222

def enable_organization_determiners_alt_name?
  !!self..dig('import_settings', 'enable_organization_determiners_alt_name')
end

#get_catalog_number_collection_code_namespace_mapping(collection_code) ⇒ Object (private)



232
233
234
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 232

def get_catalog_number_collection_code_namespace_mapping(collection_code)
  self.['catalog_numbers_collection_code_namespaces']&.detect { |m| m[0] == collection_code }
end

#get_catalog_number_namespace(institution_code, collection_code) ⇒ Object



111
112
113
114
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 111

def get_catalog_number_namespace(institution_code, collection_code)
  get_catalog_number_namespace_mapping(institution_code, collection_code)&.at(1) ||
  get_catalog_number_collection_code_namespace_mapping(collection_code)&.at(1)
end

#get_catalog_number_namespace_mapping(institution_code, collection_code) ⇒ Object (private)



228
229
230
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 228

def get_catalog_number_namespace_mapping(institution_code, collection_code)
  self.['catalog_numbers_namespaces']&.detect { |m| m[0] == [institution_code, collection_code] }
end

#get_event_id_namespaceObject



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 20

def get_event_id_namespace
  id = .dig('namespaces', 'eventID')

  if id.nil? || (@event_id_identifier_namespace ||= Namespace.find_by(id:)).nil?
    random = SecureRandom.hex(4)
    project_name = Project.find(Current.project_id).name
    namespace_name = "eventID namespace for \"#{description}\" dataset in \"#{project_name}\" project [#{random}]"

    @event_id_identifier_namespace = Namespace.create!(
      name: namespace_name,
      short_name: "eventID-#{random}",
      verbatim_short_name: 'eventID',
      delimiter: ':'
    )

    ['namespaces']['eventID'] = @event_id_identifier_namespace.id
    save!
  end

  @event_id_identifier_namespace
end

#perform_stagingObject

Stages core (Occurrence) records and all extension records.



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 43

def perform_staging
  records, headers = get_records(source.path)

  update!(metadata:
    .merge({
      core_headers: headers[:core],
      extensions_headers: headers[:extensions],
      catalog_numbers_namespaces: []
    })
  )

  core_records = records[:core].map do |record|
    {
      src_data: record,
      basisOfRecord: record['basisOfRecord']
    }
  end

  catalog_numbers_namespaces = Set[]
  catalog_numbers_collection_code_namespaces = Set[]

  core_records.each do |record|
    dwc_occurrence = DatasetRecord::DarwinCore::Occurrence.new(import_dataset: self)
    dwc_occurrence.initialize_data_fields(record[:src_data].map { |k, v| v })

    catalog_numbers_namespaces << [
      [
        dwc_occurrence.get_field_value(:institutionCode),
        dwc_occurrence.get_field_value(:collectionCode)
      ],
      nil # User will select namespace through UI. TODO: Should we attempt guessing here?
    ]
    catalog_numbers_collection_code_namespaces << [dwc_occurrence.get_field_value(:collectionCode), nil]

    if dwc_occurrence.get_field_value(:catalogNumber).blank? || dwc_occurrence.get_field_value('TW:Namespace:catalogNumber').present?
      dwc_occurrence.status = 'Ready'
    else
      dwc_occurrence.status = 'NotReady'
      record['error_data'] = { messages: { catalogNumber: ['Record cannot be imported until namespace is set.'] } }
    end

    record.delete(:src_data)
    dwc_occurrence. = record

    dwc_occurrence.save!
  end

  records[:extensions].each do |extension_type, records|
    records.each do |record|
      dwc_extension = DatasetRecord::DarwinCore::Extension.new(import_dataset: self)
      dwc_extension.initialize_data_fields(record.map { |k, v| v })
      dwc_extension.status = 'Unsupported'
      dwc_extension. = { 'type' => extension_type }

      dwc_extension.save!
    end
  end

  self..merge!(
    catalog_numbers_namespaces: catalog_numbers_namespaces.sort { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
  )
  self..merge!(
    catalog_numbers_collection_code_namespaces: catalog_numbers_collection_code_namespaces.sort { |a, b| a[0].to_s <=> b[0].to_s }
  )

  save!
end

#require_catalog_number_match_verbatim?Boolean

Returns:

  • (Boolean)


214
215
216
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 214

def require_catalog_number_match_verbatim?
  !!self..dig('import_settings', 'require_catalog_number_match_verbatim')
end

#require_tripcode_match_verbatim?Boolean

Returns:

  • (Boolean)


210
211
212
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 210

def require_tripcode_match_verbatim?
  !!self..dig('import_settings', 'require_tripcode_match_verbatim')
end

#require_type_material_success?Boolean

Returns:

  • (Boolean)


206
207
208
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 206

def require_type_material_success?
  !!self..dig('import_settings', 'require_type_material_success')
end

#restrict_to_existing_nomenclature?Boolean

Returns:

  • (Boolean)


202
203
204
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 202

def restrict_to_existing_nomenclature?
  !!self..dig('import_settings', 'restrict_to_existing_nomenclature')
end

#update_catalog_number_collection_code_namespace(collection_code, namespace_id) ⇒ Object



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 152

def update_catalog_number_collection_code_namespace(collection_code, namespace_id)
  return if collection_code.nil? # No support for mapping blank data at this time

  transaction do
    mapping = get_catalog_number_collection_code_namespace_mapping(collection_code)
    mapping[1] = namespace_id
    ready = namespace_id.to_i > 0
    save!

    query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])

    if ready
      query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
      ).update_all(
        "status = 'Ready', metadata = metadata - 'error_data'"
      )
    else
      institution_codes = self.['catalog_numbers_namespaces']&.select { |m| m[0][1] == collection_code && m[1] }&.map { |m| m[0][0] } || []
      query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
      ).where.not(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).having_values(institution_codes).select(:dataset_record_id)
      ).update_all(
        "status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
      )
    end
  end
end

#update_catalog_number_namespace(institution_code, collection_code, namespace_id) ⇒ Object



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 116

def update_catalog_number_namespace(institution_code, collection_code, namespace_id)
  transaction do
    mapping = get_catalog_number_namespace_mapping(institution_code, collection_code)
    mapping[1] = namespace_id
    ready = namespace_id.to_i > 0
    save!

    query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])

    # TODO: Add scopes/methods in DatasetRecord to handle nil fields values transparently
    unless institution_code.nil?
      query = query.where(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).having_value(institution_code).select(:dataset_record_id)
      )
    else
      query = query.where.not(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).select(:dataset_record_id)
      )
    end
    unless collection_code.nil?
      query = query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
      )
    else
      query = query.where.not(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).select(:dataset_record_id)
      )
    end

    query.update_all(ready ?
      "status = 'Ready', metadata = metadata - 'error_data'" :
      "status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
    )
  end
end