Class: ImportDataset::DarwinCore::Occurrences

Inherits:
ImportDataset::DarwinCore show all
Defined in:
app/models/import_dataset/darwin_core/occurrences.rb

Constant Summary collapse

MINIMUM_FIELD_SET =

TODO: Can occurrenceID requirement be dropped? Should other fields be added here?

["occurrenceID", "scientificName", "basisOfRecord"]

Constants inherited from ImportDataset::DarwinCore

CHECKLIST_ROW_TYPE, OCCURRENCES_ROW_TYPE

Instance Attribute Summary

Attributes inherited from ImportDataset

#description, #metadata, #source_content_type, #source_file_name, #source_file_size, #source_updated_at, #status

Instance Method Summary collapse

Methods inherited from ImportDataset::DarwinCore

#core_records_fields, create_with_subtype_detection, #default_nomenclatural_code, #destroy_namespace, #get_core_record_identifier_namespace, #get_dwc_default_values, #get_dwc_headers, #get_dwc_records, #get_field_mapping, #get_fields_mapping, #get_normalized_dwc_term, #get_records, #import, #initialize, #progress, #set_import_settings, #stage, #start_import, #stop_import

Methods inherited from ImportDataset

#delete_origin_relationships, #stage

Methods included from Shared::OriginRelationship

#new_objects, #old_objects, #reject_origin_relationships, #set_origin

Methods included from Shared::IsData

#errors_excepting, #full_error_messages_excepting, #identical, #is_community?, #is_destroyable?, #is_editable?, #is_in_use?, #is_in_users_projects?, #metamorphosize, #similar

Methods included from Housekeeping

#has_polymorphic_relationship?

Methods inherited from ApplicationRecord

transaction_with_retry

Constructor Details

This class inherits a constructor from ImportDataset::DarwinCore

Instance Method Details

#add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil) ⇒ Object



162
163
164
165
166
167
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 162

def add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil)
  unless get_catalog_number_namespace_mapping(institution_code, collection_code)
    self.["catalog_numbers_namespaces"] << [[institution_code, collection_code], namespace_id]
  end
  save!
end

#check_field_setObject



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 101

def check_field_set
  if source.staged?

    if source.staged_path =~ /\.zip\z/i
      headers = get_dwc_headers(::DarwinCore.new(source.staged_path).core)
    else
      if source.staged_path =~ /\.(xlsx?|ods)\z/i
        headers = CSV.parse(Roo::Spreadsheet.open(source.staged_path).to_csv, headers: true).headers
      else
        headers = CSV.read(source.staged_path, headers: true, col_sep: "\t", quote_char: nil, encoding: 'bom|utf-8').headers
      end
    end

    missing_headers = MINIMUM_FIELD_SET - headers

    missing_headers.each do |header|
      errors.add(:source, "required field #{header} missing.")
    end
  end
end

#containerize_dup_cat_no?Boolean

Returns:

  • (Boolean)


169
170
171
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 169

def containerize_dup_cat_no?
  !!self..dig("import_settings", "containerize_dup_cat_no")
end

#core_records_classObject



12
13
14
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 12

def core_records_class
  DatasetRecord::DarwinCore::Occurrence
end

#core_records_identifier_nameObject



16
17
18
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 16

def core_records_identifier_name
  'occurrenceID'
end

#get_catalog_number_namespace(institution_code, collection_code) ⇒ Object



122
123
124
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 122

def get_catalog_number_namespace(institution_code, collection_code)
  get_catalog_number_namespace_mapping(institution_code, collection_code)&.at(1)
end

#get_catalog_number_namespace_mapping(institution_code, collection_code) ⇒ Object (private)



179
180
181
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 179

def get_catalog_number_namespace_mapping(institution_code, collection_code)
  self.["catalog_numbers_namespaces"]&.detect { |m| m[0] == [institution_code, collection_code] }
end

#get_event_id_namespaceObject



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 20

def get_event_id_namespace
  id = .dig("namespaces", "eventID")

  if id.nil? || (@event_id_identifier_namespace ||= Namespace.find_by(id: id)).nil?
    random = SecureRandom.hex(4)
    project_name = Project.find(Current.project_id).name
    namespace_name = "eventID namespace for \"#{description}\" dataset in \"#{project_name}\" project [#{random}]"

    @event_id_identifier_namespace = Namespace.create!(
      name: namespace_name,
      short_name: "eventID-#{random}",
      verbatim_short_name: "eventID",
      delimiter: ':'
    )

    ["namespaces"]["eventID"] = @event_id_identifier_namespace.id
    save!
  end

  @event_id_identifier_namespace
end

#perform_stagingObject

Stages core (Occurrence) records and all extension records.



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 43

def perform_staging
  records, headers = get_records(source)

  update!(metadata:
    .merge({
      core_headers: headers[:core],
      extensions_headers: headers[:extensions],
      catalog_numbers_namespaces: []
    })
  )

  core_records = records[:core].map do |record|
    {
      src_data: record,
      basisOfRecord: record["basisOfRecord"]
    }
  end

  catalog_numbers_namespaces = Set[]

  core_records.each do |record|
    dwc_occurrence = DatasetRecord::DarwinCore::Occurrence.new(import_dataset: self)
    dwc_occurrence.initialize_data_fields(record[:src_data].map { |k, v| v })

    catalog_numbers_namespaces << [
      [
        dwc_occurrence.get_field_value(:institutionCode),
        dwc_occurrence.get_field_value(:collectionCode)
      ],
      nil # User will select namespace through UI. TODO: Should we attempt guessing here?
    ]

    if dwc_occurrence.get_field_value(:catalogNumber).blank?
      dwc_occurrence.status = "Ready"
    else
      dwc_occurrence.status = "NotReady"
      record["error_data"] = { messages: { catalogNumber: ["Record cannot be imported until namespace is set."] } }
    end

    record.delete(:src_data)
    dwc_occurrence. = record

    dwc_occurrence.save!
  end
  records[:extensions].each do |extension_type, records|
    records.each do |record|
      dwc_extension = DatasetRecord::DarwinCore::Extension.new(import_dataset: self)
      dwc_extension.initialize_data_fields(record.map { |k, v| v })
      dwc_extension.status = "Unsupported"
      dwc_extension. = { "type" => extension_type }

      dwc_extension.save!
    end
  end

  update!(metadata: self..merge!(catalog_numbers_namespaces: catalog_numbers_namespaces))
end

#restrict_to_existing_nomenclature?Boolean

Returns:

  • (Boolean)


173
174
175
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 173

def restrict_to_existing_nomenclature?
  !!self..dig("import_settings", "restrict_to_existing_nomenclature")
end

#update_catalog_number_namespace(institution_code, collection_code, namespace_id) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 126

def update_catalog_number_namespace(institution_code, collection_code, namespace_id)
  transaction do
    mapping = get_catalog_number_namespace_mapping(institution_code, collection_code)
    mapping[1] = namespace_id
    ready = namespace_id.to_i > 0
    save!

    query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])

    # TODO: Add scopes/methods in DatasetRecord to handle nil fields values transparently
    unless institution_code.nil?
      query = query.where(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).with_value(institution_code).select(:dataset_record_id)
      )
    else
      query = query.where.not(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).select(:dataset_record_id)
      )
    end
    unless collection_code.nil?
      query = query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).with_value(collection_code).select(:dataset_record_id)
      )
    else
      query = query.where.not(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).select(:dataset_record_id)
      )
    end

    query.update_all(ready ?
      "status = 'Ready', metadata = metadata - 'error_data'" :
      "status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
    )
  end
end