Class: ImportDataset::DarwinCore::Occurrences

Inherits:
ImportDataset::DarwinCore show all
Defined in:
app/models/import_dataset/darwin_core/occurrences.rb

Constant Summary collapse

MINIMUM_FIELD_SET =

TODO: Can occurrenceID requirement be dropped? Should other fields be added here?

['occurrenceID', 'scientificName', 'basisOfRecord']

Constants inherited from ImportDataset::DarwinCore

CHECKLIST_ROW_TYPE, OCCURRENCES_ROW_TYPE

Instance Attribute Summary

Attributes inherited from ImportDataset

#description, #metadata, #source_content_type, #source_file_name, #source_file_size, #source_updated_at, #status

Instance Method Summary collapse

Methods inherited from ImportDataset::DarwinCore

#add_filters, #check_field_set, #core_records_are_readable, #core_records_fields, create_with_subtype_detection, default_if_absent, #default_nomenclatural_code, #destroy_namespace, #get_col_sep, #get_core_record_identifier_namespace, #get_dwc_default_values, #get_dwc_headers, #get_dwc_records, #get_field_mapping, #get_fields_mapping, #get_normalized_dwc_term, #get_quote_char, #get_records, #import, #initialize, #progress, #set_import_settings, #stage, #start_import, #stop_import

Methods inherited from ImportDataset

#delete_origin_relationships, #stage

Methods included from Shared::OriginRelationship

#new_objects, #old_objects, #reject_origin_relationships, #set_origin

Methods included from Shared::IsData

#errors_excepting, #full_error_messages_excepting, #identical, #is_community?, #is_destroyable?, #is_editable?, #is_in_use?, #is_in_users_projects?, #metamorphosize, #similar

Methods included from Housekeeping

#has_polymorphic_relationship?

Methods inherited from ApplicationRecord

transaction_with_retry

Constructor Details

This class inherits a constructor from ImportDataset::DarwinCore

Instance Method Details

#add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil) ⇒ Object



191
192
193
194
195
196
197
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 191

def add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil)
  unless collection_code.nil? || get_catalog_number_collection_code_namespace_mapping(collection_code)
    self.['catalog_numbers_collection_code_namespaces'] << [collection_code, namespace_id]
    self.['catalog_numbers_collection_code_namespaces'].sort! { |a, b| a[0].to_s <=> b[0].to_s }
  end
  save!
end

#add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil) ⇒ Object



183
184
185
186
187
188
189
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 183

def add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil)
  unless get_catalog_number_namespace_mapping(institution_code, collection_code)
    self.['catalog_numbers_namespaces'] << [[institution_code, collection_code], namespace_id]
    self.['catalog_numbers_namespaces'].sort! { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
  end
  save!
end

#containerize_dup_cat_no?Boolean

Returns:

  • (Boolean)


199
200
201
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 199

def containerize_dup_cat_no?
  !!self..dig('import_settings', 'containerize_dup_cat_no')
end

#core_records_classObject



12
13
14
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 12

def core_records_class
  DatasetRecord::DarwinCore::Occurrence
end

#core_records_identifier_nameObject



16
17
18
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 16

def core_records_identifier_name
  'occurrenceID'
end

#enable_organization_determiners?Boolean

Returns:

  • (Boolean)


219
220
221
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 219

def enable_organization_determiners?
  !!self..dig('import_settings', 'enable_organization_determiners')
end

#enable_organization_determiners_alt_name?Boolean

Returns:

  • (Boolean)


223
224
225
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 223

def enable_organization_determiners_alt_name?
  !!self..dig('import_settings', 'enable_organization_determiners_alt_name')
end

#get_catalog_number_collection_code_namespace_mapping(collection_code) ⇒ Object (private)



233
234
235
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 233

def get_catalog_number_collection_code_namespace_mapping(collection_code)
  self.['catalog_numbers_collection_code_namespaces']&.detect { |m| m[0] == collection_code }
end

#get_catalog_number_namespace(institution_code, collection_code) ⇒ Object



112
113
114
115
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 112

def get_catalog_number_namespace(institution_code, collection_code)
  get_catalog_number_namespace_mapping(institution_code, collection_code)&.at(1) ||
  get_catalog_number_collection_code_namespace_mapping(collection_code)&.at(1)
end

#get_catalog_number_namespace_mapping(institution_code, collection_code) ⇒ Object (private)



229
230
231
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 229

def get_catalog_number_namespace_mapping(institution_code, collection_code)
  self.['catalog_numbers_namespaces']&.detect { |m| m[0] == [institution_code, collection_code] }
end

#get_event_id_namespaceObject



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 20

def get_event_id_namespace
  id = .dig('namespaces', 'eventID')

  if id.nil? || (@event_id_identifier_namespace ||= Namespace.find_by(id:)).nil?
    random = SecureRandom.hex(4)
    project_name = Project.find(Current.project_id).name
    namespace_name = "eventID namespace for \"#{description}\" dataset in \"#{project_name}\" project [#{random}]"

    @event_id_identifier_namespace = Namespace.create!(
      name: namespace_name,
      short_name: "eventID-#{random}",
      verbatim_short_name: 'eventID',
      delimiter: ':'
    )

    ['namespaces']['eventID'] = @event_id_identifier_namespace.id
    save!
  end

  @event_id_identifier_namespace
end

#perform_stagingObject

Stages core (Occurrence) records and all extension records.



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 43

def perform_staging
  records, headers = get_records(source.path)

  update!(metadata:
    .merge({
      core_headers: headers[:core],
      extensions_headers: headers[:extensions],
      catalog_numbers_namespaces: []
    })
  )

  core_records = records[:core].map do |record|
    {
      src_data: record,
      basisOfRecord: record['basisOfRecord']
    }
  end

  catalog_numbers_namespaces = Set[]
  catalog_numbers_collection_code_namespaces = Set[]

  core_records.each do |record|
    dwc_occurrence = DatasetRecord::DarwinCore::Occurrence.new(import_dataset: self)
    dwc_occurrence.initialize_data_fields(record[:src_data].map { |k, v| v })

    catalog_numbers_namespaces << [
      [
        dwc_occurrence.get_field_value(:institutionCode),
        dwc_occurrence.get_field_value(:collectionCode)
      ],
      nil # User will select namespace through UI. TODO: Should we attempt guessing here?
    ]
    catalog_numbers_collection_code_namespaces << [dwc_occurrence.get_field_value(:collectionCode), nil]

    if dwc_occurrence.get_field_value(:catalogNumber).blank? || dwc_occurrence.get_field_value('TW:Namespace:catalogNumber').present?
      dwc_occurrence.status = 'Ready'
    else
      dwc_occurrence.status = 'NotReady'
      record['error_data'] = { messages: { catalogNumber: ['Record cannot be imported until namespace is set.'] } }
    end

    record.delete(:src_data)
    dwc_occurrence. = record

    dwc_occurrence.save!
  end

  records[:extensions].each do |extension_type, records|
    records.each do |record|
      dwc_extension = DatasetRecord::DarwinCore::Extension.new(import_dataset: self)
      dwc_extension.initialize_data_fields(record.map { |k, v| v })
      dwc_extension.status = 'Unsupported'
      dwc_extension. = { 'type' => extension_type }

      dwc_extension.save!
    end
  end

  self..merge!(
    catalog_numbers_namespaces: catalog_numbers_namespaces.sort { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
  )
  self..merge!(
    catalog_numbers_collection_code_namespaces: catalog_numbers_collection_code_namespaces.sort { |a, b| a[0].to_s <=> b[0].to_s }
  )

  save!
end

#require_catalog_number_match_verbatim?Boolean

Returns:

  • (Boolean)


215
216
217
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 215

def require_catalog_number_match_verbatim?
  !!self..dig('import_settings', 'require_catalog_number_match_verbatim')
end

#require_tripcode_match_verbatim?Boolean

Returns:

  • (Boolean)


211
212
213
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 211

def require_tripcode_match_verbatim?
  !!self..dig('import_settings', 'require_tripcode_match_verbatim')
end

#require_type_material_success?Boolean

Returns:

  • (Boolean)


207
208
209
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 207

def require_type_material_success?
  !!self..dig('import_settings', 'require_type_material_success')
end

#restrict_to_existing_nomenclature?Boolean

Returns:

  • (Boolean)


203
204
205
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 203

def restrict_to_existing_nomenclature?
  !!self..dig('import_settings', 'restrict_to_existing_nomenclature')
end

#update_catalog_number_collection_code_namespace(collection_code, namespace_id) ⇒ Object



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 153

def update_catalog_number_collection_code_namespace(collection_code, namespace_id)
  return if collection_code.nil? # No support for mapping blank data at this time

  transaction do
    mapping = get_catalog_number_collection_code_namespace_mapping(collection_code)
    mapping[1] = namespace_id
    ready = namespace_id.to_i > 0
    save!

    query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])

    if ready
      query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
      ).update_all(
        "status = 'Ready', metadata = metadata - 'error_data'"
      )
    else
      institution_codes = self.['catalog_numbers_namespaces']&.select { |m| m[0][1] == collection_code && m[1] }&.map { |m| m[0][0] } || []
      query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
      ).where.not(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).having_values(institution_codes).select(:dataset_record_id)
      ).update_all(
        "status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
      )
    end
  end
end

#update_catalog_number_namespace(institution_code, collection_code, namespace_id) ⇒ Object



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 117

def update_catalog_number_namespace(institution_code, collection_code, namespace_id)
  transaction do
    mapping = get_catalog_number_namespace_mapping(institution_code, collection_code)
    mapping[1] = namespace_id
    ready = namespace_id.to_i > 0
    save!

    query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])

    # TODO: Add scopes/methods in DatasetRecord to handle nil fields values transparently
    unless institution_code.nil?
      query = query.where(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).having_value(institution_code).select(:dataset_record_id)
      )
    else
      query = query.where.not(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).select(:dataset_record_id)
      )
    end
    unless collection_code.nil?
      query = query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
      )
    else
      query = query.where.not(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).select(:dataset_record_id)
      )
    end

    query.update_all(ready ?
      "status = 'Ready', metadata = metadata - 'error_data'" :
      "status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
    )
  end
end