Class: ImportDataset::DarwinCore::Occurrences

Inherits:
ImportDataset::DarwinCore show all
Defined in:
app/models/import_dataset/darwin_core/occurrences.rb

Constant Summary collapse

MINIMUM_FIELD_SETS =

TODO: Can occurrenceID requirement be dropped? Should other fields be added here?

[
    ['occurrenceID', 'scientificName', 'basisOfRecord'],
    ['occurrenceID', 'TW:TaxonDetermination:otu_id', 'basisOfRecord']
].freeze

Constants inherited from ImportDataset::DarwinCore

CHECKLIST_ROW_TYPE, OCCURRENCES_ROW_TYPE

Instance Method Summary collapse

Methods inherited from ImportDataset::DarwinCore

#add_filters, #check_field_set, #core_records_fields, #core_records_mapped_fields, create_with_subtype_detection, default_if_absent, #default_nomenclatural_code, #destroy_namespace, #dwc_data_attributes, #get_col_sep, #get_core_record_identifier_namespace, #get_dwc_default_values, #get_dwc_headers, #get_dwc_records, #get_field_mapping, #get_fields_mapping, #get_normalized_dwc_term, #get_quote_char, #get_records, #import, #initialize, #progress, #set_import_settings, #stage, #start_import, #stop_import, #well_formed

Constructor Details

This class inherits a constructor from ImportDataset::DarwinCore

Instance Method Details

#add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil) ⇒ Object



193
194
195
196
197
198
199
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 193

def add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil)
  unless collection_code.nil? || get_catalog_number_collection_code_namespace_mapping(collection_code)
    self.['catalog_numbers_collection_code_namespaces'] << [collection_code, namespace_id]
    self.['catalog_numbers_collection_code_namespaces'].sort! { |a, b| a[0].to_s <=> b[0].to_s }
  end
  save!
end

#add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil) ⇒ Object



185
186
187
188
189
190
191
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 185

def add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil)
  unless get_catalog_number_namespace_mapping(institution_code, collection_code)
    self.['catalog_numbers_namespaces'] << [[institution_code, collection_code], namespace_id]
    self.['catalog_numbers_namespaces'].sort! { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
  end
  save!
end

#containerize_dup_cat_no?Boolean

Returns:

  • (Boolean)


201
202
203
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 201

def containerize_dup_cat_no?
  !!self..dig('import_settings', 'containerize_dup_cat_no')
end

#core_records_classObject



15
16
17
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 15

def core_records_class
  DatasetRecord::DarwinCore::Occurrence
end

#core_records_identifier_nameObject



19
20
21
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 19

def core_records_identifier_name
  'occurrenceID'
end

#enable_organization_determiners?Boolean

Returns:

  • (Boolean)


221
222
223
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 221

def enable_organization_determiners?
  !!self..dig('import_settings', 'enable_organization_determiners')
end

#enable_organization_determiners_alt_name?Boolean

Returns:

  • (Boolean)


225
226
227
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 225

def enable_organization_determiners_alt_name?
  !!self..dig('import_settings', 'enable_organization_determiners_alt_name')
end

#get_catalog_number_collection_code_namespace_mapping(collection_code) ⇒ Object (private)



235
236
237
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 235

def get_catalog_number_collection_code_namespace_mapping(collection_code)
  self.['catalog_numbers_collection_code_namespaces']&.detect { |m| m[0] == collection_code }
end

#get_catalog_number_namespace(institution_code, collection_code) ⇒ Object



114
115
116
117
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 114

def get_catalog_number_namespace(institution_code, collection_code)
  get_catalog_number_namespace_mapping(institution_code, collection_code)&.at(1) ||
  get_catalog_number_collection_code_namespace_mapping(collection_code)&.at(1)
end

#get_catalog_number_namespace_mapping(institution_code, collection_code) ⇒ Object (private)



231
232
233
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 231

def get_catalog_number_namespace_mapping(institution_code, collection_code)
  self.['catalog_numbers_namespaces']&.detect { |m| m[0] == [institution_code, collection_code] }
end

#get_event_id_namespaceObject



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 23

def get_event_id_namespace
  id = .dig('namespaces', 'eventID')

  if id.nil? || (@event_id_identifier_namespace ||= Namespace.find_by(id:)).nil?
    random = SecureRandom.hex(4)
    project_name = Project.find(Current.project_id).name
    namespace_name = "eventID namespace for \"#{description}\" dataset in \"#{project_name}\" project [#{random}]"

    @event_id_identifier_namespace = Namespace.create!(
      name: namespace_name,
      short_name: "eventID-#{random}",
      verbatim_short_name: 'eventID',
      delimiter: ':'
    )

    ['namespaces']['eventID'] = @event_id_identifier_namespace.id
    save!
  end

  @event_id_identifier_namespace
end

#perform_stagingObject

Stages core (Occurrence) records and all extension records.



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 46

def perform_staging
  records, headers = get_records(source.path)

  update!(metadata:
    .merge({
      core_headers: headers[:core],
      extensions_headers: headers[:extensions],
      catalog_numbers_namespaces: []
    })
  )

  core_records = records[:core].map do |record|
    {
      src_data: record,
      basisOfRecord: record['basisOfRecord']
    }
  end

  catalog_numbers_namespaces = Set[]
  catalog_numbers_collection_code_namespaces = Set[]

  core_records.each do |record|
    dwc_occurrence = DatasetRecord::DarwinCore::Occurrence.new(import_dataset: self)
    dwc_occurrence.initialize_data_fields(record[:src_data].map { |k, v| v })

    catalog_numbers_namespaces << [
      [
        dwc_occurrence.get_field_value(:institutionCode),
        dwc_occurrence.get_field_value(:collectionCode)
      ],
      nil # User will select namespace through UI. TODO: Should we attempt guessing here?
    ]
    catalog_numbers_collection_code_namespaces << [dwc_occurrence.get_field_value(:collectionCode), nil]

    if dwc_occurrence.get_field_value(:catalogNumber).blank? || dwc_occurrence.get_field_value('TW:Namespace:catalogNumber').present?
      dwc_occurrence.status = 'Ready'
    else
      dwc_occurrence.status = 'NotReady'
      record['error_data'] = { messages: { catalogNumber: ['Record cannot be imported until namespace is set, see "Settings".'] } }
    end

    record.delete(:src_data)
    dwc_occurrence. = record

    dwc_occurrence.save!
  end

  records[:extensions].each do |extension_type, records|
    records.each do |record|
      dwc_extension = DatasetRecord::DarwinCore::Extension.new(import_dataset: self)
      dwc_extension.initialize_data_fields(record.map { |k, v| v })
      dwc_extension.status = 'Unsupported'
      dwc_extension. = { 'type' => extension_type }

      dwc_extension.save!
    end
  end

  self..merge!(
    catalog_numbers_namespaces: catalog_numbers_namespaces.sort { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
  )
  self..merge!(
    catalog_numbers_collection_code_namespaces: catalog_numbers_collection_code_namespaces.sort { |a, b| a[0].to_s <=> b[0].to_s }
  )

  save!
end

#require_catalog_number_match_verbatim?Boolean

Returns:

  • (Boolean)


217
218
219
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 217

def require_catalog_number_match_verbatim?
  !!self..dig('import_settings', 'require_catalog_number_match_verbatim')
end

#require_tripcode_match_verbatim?Boolean

Returns:

  • (Boolean)


213
214
215
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 213

def require_tripcode_match_verbatim?
  !!self..dig('import_settings', 'require_tripcode_match_verbatim')
end

#require_type_material_success?Boolean

Returns:

  • (Boolean)


209
210
211
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 209

def require_type_material_success?
  !!self..dig('import_settings', 'require_type_material_success')
end

#restrict_to_existing_nomenclature?Boolean

Returns:

  • (Boolean)


205
206
207
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 205

def restrict_to_existing_nomenclature?
  !!self..dig('import_settings', 'restrict_to_existing_nomenclature')
end

#update_catalog_number_collection_code_namespace(collection_code, namespace_id) ⇒ Object



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 155

def update_catalog_number_collection_code_namespace(collection_code, namespace_id)
  return if collection_code.nil? # No support for mapping blank data at this time

  transaction do
    mapping = get_catalog_number_collection_code_namespace_mapping(collection_code)
    mapping[1] = namespace_id
    ready = namespace_id.to_i > 0
    save!

    query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])

    if ready
      query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
      ).update_all(
        "status = 'Ready', metadata = metadata - 'error_data'"
      )
    else
      institution_codes = self.['catalog_numbers_namespaces']&.select { |m| m[0][1] == collection_code && m[1] }&.map { |m| m[0][0] } || []
      query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
      ).where.not(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).having_values(institution_codes).select(:dataset_record_id)
      ).update_all(
        "status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
      )
    end
  end
end

#update_catalog_number_namespace(institution_code, collection_code, namespace_id) ⇒ Object



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 119

def update_catalog_number_namespace(institution_code, collection_code, namespace_id)
  transaction do
    mapping = get_catalog_number_namespace_mapping(institution_code, collection_code)
    mapping[1] = namespace_id
    ready = namespace_id.to_i > 0
    save!

    query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])

    # TODO: Add scopes/methods in DatasetRecord to handle nil fields values transparently
    unless institution_code.nil?
      query = query.where(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).having_value(institution_code).select(:dataset_record_id)
      )
    else
      query = query.where.not(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).select(:dataset_record_id)
      )
    end
    unless collection_code.nil?
      query = query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
      )
    else
      query = query.where.not(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).select(:dataset_record_id)
      )
    end

    query.update_all(ready ?
      "status = 'Ready', metadata = metadata - 'error_data'" :
      "status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
    )
  end
end