Class: ImportDataset::DarwinCore::Occurrences
Constant Summary
collapse
- MINIMUM_FIELD_SET =
TODO: Can occurrenceID requirement be dropped? Should other fields be added here?
['occurrenceID', 'scientificName', 'basisOfRecord']
CHECKLIST_ROW_TYPE, OCCURRENCES_ROW_TYPE
Instance Attribute Summary
#description, #metadata, #source_content_type, #source_file_name, #source_file_size, #source_updated_at, #status
Instance Method Summary
collapse
#add_filters, #check_field_set, #core_records_are_readable, #core_records_fields, create_with_subtype_detection, default_if_absent, #default_nomenclatural_code, #destroy_namespace, #get_col_sep, #get_core_record_identifier_namespace, #get_dwc_default_values, #get_dwc_headers, #get_dwc_records, #get_field_mapping, #get_fields_mapping, #get_normalized_dwc_term, #get_quote_char, #get_records, #import, #initialize, #progress, #set_import_settings, #stage, #start_import, #stop_import
#delete_origin_relationships, #stage
#new_objects, #old_objects, #reject_origin_relationships, #set_origin
#errors_excepting, #full_error_messages_excepting, #identical, #is_community?, #is_destroyable?, #is_editable?, #is_in_use?, #is_in_users_projects?, #metamorphosize, #similar
#has_polymorphic_relationship?
transaction_with_retry
Instance Method Details
#add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil) ⇒ Object
191
192
193
194
195
196
197
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 191
def add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil)
unless collection_code.nil? || get_catalog_number_collection_code_namespace_mapping(collection_code)
self.metadata['catalog_numbers_collection_code_namespaces'] << [collection_code, namespace_id]
self.metadata['catalog_numbers_collection_code_namespaces'].sort! { |a, b| a[0].to_s <=> b[0].to_s }
end
save!
end
|
#add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil) ⇒ Object
183
184
185
186
187
188
189
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 183
def add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil)
unless get_catalog_number_namespace_mapping(institution_code, collection_code)
self.metadata['catalog_numbers_namespaces'] << [[institution_code, collection_code], namespace_id]
self.metadata['catalog_numbers_namespaces'].sort! { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
end
save!
end
|
#containerize_dup_cat_no? ⇒ Boolean
199
200
201
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 199
def containerize_dup_cat_no?
!!self.metadata.dig('import_settings', 'containerize_dup_cat_no')
end
|
#core_records_class ⇒ Object
#core_records_identifier_name ⇒ Object
16
17
18
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 16
def core_records_identifier_name
'occurrenceID'
end
|
#enable_organization_determiners? ⇒ Boolean
219
220
221
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 219
def enable_organization_determiners?
!!self.metadata.dig('import_settings', 'enable_organization_determiners')
end
|
#enable_organization_determiners_alt_name? ⇒ Boolean
223
224
225
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 223
def enable_organization_determiners_alt_name?
!!self.metadata.dig('import_settings', 'enable_organization_determiners_alt_name')
end
|
#get_catalog_number_collection_code_namespace_mapping(collection_code) ⇒ Object
233
234
235
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 233
def get_catalog_number_collection_code_namespace_mapping(collection_code)
self.metadata['catalog_numbers_collection_code_namespaces']&.detect { |m| m[0] == collection_code }
end
|
#get_catalog_number_namespace(institution_code, collection_code) ⇒ Object
112
113
114
115
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 112
def get_catalog_number_namespace(institution_code, collection_code)
get_catalog_number_namespace_mapping(institution_code, collection_code)&.at(1) ||
get_catalog_number_collection_code_namespace_mapping(collection_code)&.at(1)
end
|
#get_catalog_number_namespace_mapping(institution_code, collection_code) ⇒ Object
229
230
231
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 229
def get_catalog_number_namespace_mapping(institution_code, collection_code)
self.metadata['catalog_numbers_namespaces']&.detect { |m| m[0] == [institution_code, collection_code] }
end
|
#get_event_id_namespace ⇒ Object
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 20
def get_event_id_namespace
id = metadata.dig('namespaces', 'eventID')
if id.nil? || (@event_id_identifier_namespace ||= Namespace.find_by(id:)).nil?
random = SecureRandom.hex(4)
project_name = Project.find(Current.project_id).name
namespace_name = "eventID namespace for \"#{description}\" dataset in \"#{project_name}\" project [#{random}]"
@event_id_identifier_namespace = Namespace.create!(
name: namespace_name,
short_name: "eventID-#{random}",
verbatim_short_name: 'eventID',
delimiter: ':'
)
metadata['namespaces']['eventID'] = @event_id_identifier_namespace.id
save!
end
@event_id_identifier_namespace
end
|
Stages core (Occurrence) records and all extension records.
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 43
def perform_staging
records, = get_records(source.path)
update!(metadata:
metadata.merge({
core_headers: [:core],
extensions_headers: [:extensions],
catalog_numbers_namespaces: []
})
)
core_records = records[:core].map do |record|
{
src_data: record,
basisOfRecord: record['basisOfRecord']
}
end
catalog_numbers_namespaces = Set[]
catalog_numbers_collection_code_namespaces = Set[]
core_records.each do |record|
dwc_occurrence = DatasetRecord::DarwinCore::Occurrence.new(import_dataset: self)
dwc_occurrence.initialize_data_fields(record[:src_data].map { |k, v| v })
catalog_numbers_namespaces << [
[
dwc_occurrence.get_field_value(:institutionCode),
dwc_occurrence.get_field_value(:collectionCode)
],
nil ]
catalog_numbers_collection_code_namespaces << [dwc_occurrence.get_field_value(:collectionCode), nil]
if dwc_occurrence.get_field_value(:catalogNumber).blank? || dwc_occurrence.get_field_value('TW:Namespace:catalogNumber').present?
dwc_occurrence.status = 'Ready'
else
dwc_occurrence.status = 'NotReady'
record['error_data'] = { messages: { catalogNumber: ['Record cannot be imported until namespace is set.'] } }
end
record.delete(:src_data)
dwc_occurrence.metadata = record
dwc_occurrence.save!
end
records[:extensions].each do |extension_type, records|
records.each do |record|
dwc_extension = DatasetRecord::DarwinCore::Extension.new(import_dataset: self)
dwc_extension.initialize_data_fields(record.map { |k, v| v })
dwc_extension.status = 'Unsupported'
dwc_extension.metadata = { 'type' => extension_type }
dwc_extension.save!
end
end
self.metadata.merge!(
catalog_numbers_namespaces: catalog_numbers_namespaces.sort { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
)
self.metadata.merge!(
catalog_numbers_collection_code_namespaces: catalog_numbers_collection_code_namespaces.sort { |a, b| a[0].to_s <=> b[0].to_s }
)
save!
end
|
#require_catalog_number_match_verbatim? ⇒ Boolean
215
216
217
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 215
def require_catalog_number_match_verbatim?
!!self.metadata.dig('import_settings', 'require_catalog_number_match_verbatim')
end
|
#require_tripcode_match_verbatim? ⇒ Boolean
211
212
213
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 211
def require_tripcode_match_verbatim?
!!self.metadata.dig('import_settings', 'require_tripcode_match_verbatim')
end
|
#require_type_material_success? ⇒ Boolean
207
208
209
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 207
def require_type_material_success?
!!self.metadata.dig('import_settings', 'require_type_material_success')
end
|
#restrict_to_existing_nomenclature? ⇒ Boolean
203
204
205
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 203
def restrict_to_existing_nomenclature?
!!self.metadata.dig('import_settings', 'restrict_to_existing_nomenclature')
end
|
#update_catalog_number_collection_code_namespace(collection_code, namespace_id) ⇒ Object
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 153
def update_catalog_number_collection_code_namespace(collection_code, namespace_id)
return if collection_code.nil?
transaction do
mapping = get_catalog_number_collection_code_namespace_mapping(collection_code)
mapping[1] = namespace_id
ready = namespace_id.to_i > 0
save!
query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])
if ready
query.where(
id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
).update_all(
"status = 'Ready', metadata = metadata - 'error_data'"
)
else
institution_codes = self.metadata['catalog_numbers_namespaces']&.select { |m| m[0][1] == collection_code && m[1] }&.map { |m| m[0][0] } || []
query.where(
id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
).where.not(
id: core_records_fields.at(get_field_mapping(:institutionCode)).having_values(institution_codes).select(:dataset_record_id)
).update_all(
"status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
)
end
end
end
|
#update_catalog_number_namespace(institution_code, collection_code, namespace_id) ⇒ Object
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 117
def update_catalog_number_namespace(institution_code, collection_code, namespace_id)
transaction do
mapping = get_catalog_number_namespace_mapping(institution_code, collection_code)
mapping[1] = namespace_id
ready = namespace_id.to_i > 0
save!
query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])
unless institution_code.nil?
query = query.where(
id: core_records_fields.at(get_field_mapping(:institutionCode)).having_value(institution_code).select(:dataset_record_id)
)
else
query = query.where.not(
id: core_records_fields.at(get_field_mapping(:institutionCode)).select(:dataset_record_id)
)
end
unless collection_code.nil?
query = query.where(
id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
)
else
query = query.where.not(
id: core_records_fields.at(get_field_mapping(:collectionCode)).select(:dataset_record_id)
)
end
query.update_all(ready ?
"status = 'Ready', metadata = metadata - 'error_data'" :
"status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
)
end
end
|