Class: ImportDataset::DarwinCore::Occurrences

Inherits:

ImportDataset::DarwinCore

Object
ActiveRecord::Base
ApplicationRecord
ImportDataset
ImportDataset::DarwinCore
ImportDataset::DarwinCore::Occurrences

show all

Defined in:: app/models/import_dataset/darwin_core/occurrences.rb

Constant Summary collapse

MINIMUM_FIELD_SET = TODO: Can occurrenceID requirement be dropped? Should other fields be added here?

['occurrenceID', 'scientificName', 'basisOfRecord'].freeze

Constants inherited from ImportDataset::DarwinCore

CHECKLIST_ROW_TYPE, OCCURRENCES_ROW_TYPE

Instance Attribute Summary

Attributes inherited from ImportDataset

#description, #metadata, #source_content_type, #source_file_name, #source_file_size, #source_updated_at, #status

Instance Method Summary collapse

#add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil) ⇒ Object
#add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil) ⇒ Object
#containerize_dup_cat_no? ⇒ Boolean
#core_records_class ⇒ Object
#core_records_identifier_name ⇒ Object
#enable_organization_determiners? ⇒ Boolean
#enable_organization_determiners_alt_name? ⇒ Boolean
#get_catalog_number_collection_code_namespace_mapping(collection_code) ⇒ Object private
#get_catalog_number_namespace(institution_code, collection_code) ⇒ Object
#get_catalog_number_namespace_mapping(institution_code, collection_code) ⇒ Object private
#get_event_id_namespace ⇒ Object
#perform_staging ⇒ Object

Stages core (Occurrence) records and all extension records.
#require_catalog_number_match_verbatim? ⇒ Boolean
#require_tripcode_match_verbatim? ⇒ Boolean
#require_type_material_success? ⇒ Boolean
#restrict_to_existing_nomenclature? ⇒ Boolean
#update_catalog_number_collection_code_namespace(collection_code, namespace_id) ⇒ Object
#update_catalog_number_namespace(institution_code, collection_code, namespace_id) ⇒ Object

Constructor Details

This class inherits a constructor from ImportDataset::DarwinCore

Instance Method Details

#add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil) ⇒ `Object`

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 190

def add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil)
  unless collection_code.nil? || get_catalog_number_collection_code_namespace_mapping(collection_code)
    self.metadata['catalog_numbers_collection_code_namespaces'] << [collection_code, namespace_id]
    self.metadata['catalog_numbers_collection_code_namespaces'].sort! { |a, b| a[0].to_s <=> b[0].to_s }
  end
  save!
end

#add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil) ⇒ `Object`

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 182

def add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil)
  unless get_catalog_number_namespace_mapping(institution_code, collection_code)
    self.metadata['catalog_numbers_namespaces'] << [[institution_code, collection_code], namespace_id]
    self.metadata['catalog_numbers_namespaces'].sort! { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
  end
  save!
end

#containerize_dup_cat_no? ⇒ `Boolean`

Returns:

(Boolean)



198
199
200

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 198

def containerize_dup_cat_no?
  !!self.metadata.dig('import_settings', 'containerize_dup_cat_no')
end

#core_records_class ⇒ `Object`



12
13
14

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 12

def core_records_class
  DatasetRecord::DarwinCore::Occurrence
end

#core_records_identifier_name ⇒ `Object`



16
17
18

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 16

def core_records_identifier_name
  'occurrenceID'
end

#enable_organization_determiners? ⇒ `Boolean`

Returns:

(Boolean)



218
219
220

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 218

def enable_organization_determiners?
  !!self.metadata.dig('import_settings', 'enable_organization_determiners')
end

#enable_organization_determiners_alt_name? ⇒ `Boolean`

Returns:

(Boolean)



222
223
224

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 222

def enable_organization_determiners_alt_name?
  !!self.metadata.dig('import_settings', 'enable_organization_determiners_alt_name')
end

#get_catalog_number_collection_code_namespace_mapping(collection_code) ⇒ `Object` (private)



232
233
234

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 232

def get_catalog_number_collection_code_namespace_mapping(collection_code)
  self.metadata['catalog_numbers_collection_code_namespaces']&.detect { |m| m[0] == collection_code }
end

#get_catalog_number_namespace(institution_code, collection_code) ⇒ `Object`

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 111

def get_catalog_number_namespace(institution_code, collection_code)
  get_catalog_number_namespace_mapping(institution_code, collection_code)&.at(1) ||
  get_catalog_number_collection_code_namespace_mapping(collection_code)&.at(1)
end

#get_catalog_number_namespace_mapping(institution_code, collection_code) ⇒ `Object` (private)



228
229
230

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 228

def get_catalog_number_namespace_mapping(institution_code, collection_code)
  self.metadata['catalog_numbers_namespaces']&.detect { |m| m[0] == [institution_code, collection_code] }
end

#get_event_id_namespace ⇒ `Object`

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 20

def get_event_id_namespace
  id = metadata.dig('namespaces', 'eventID')

  if id.nil? || (@event_id_identifier_namespace ||= Namespace.find_by(id:)).nil?
    random = SecureRandom.hex(4)
    project_name = Project.find(Current.project_id).name
    namespace_name = "eventID namespace for \"#{description}\" dataset in \"#{project_name}\" project [#{random}]"

    @event_id_identifier_namespace = Namespace.create!(
      name: namespace_name,
      short_name: "eventID-#{random}",
      verbatim_short_name: 'eventID',
      delimiter: ':'
    )

    metadata['namespaces']['eventID'] = @event_id_identifier_namespace.id
    save!
  end

  @event_id_identifier_namespace
end

#perform_staging ⇒ `Object`

Stages core (Occurrence) records and all extension records.

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 43

def perform_staging
  records, headers = get_records(source.path)

  update!(metadata:
    metadata.merge({
      core_headers: headers[:core],
      extensions_headers: headers[:extensions],
      catalog_numbers_namespaces: []
    })
  )

  core_records = records[:core].map do |record|
    {
      src_data: record,
      basisOfRecord: record['basisOfRecord']
    }
  end

  catalog_numbers_namespaces = Set[]
  catalog_numbers_collection_code_namespaces = Set[]

  core_records.each do |record|
    dwc_occurrence = DatasetRecord::DarwinCore::Occurrence.new(import_dataset: self)
    dwc_occurrence.initialize_data_fields(record[:src_data].map { |k, v| v })

    catalog_numbers_namespaces << [
      [
        dwc_occurrence.get_field_value(:institutionCode),
        dwc_occurrence.get_field_value(:collectionCode)
      ],
      nil # User will select namespace through UI. TODO: Should we attempt guessing here?
    ]
    catalog_numbers_collection_code_namespaces << [dwc_occurrence.get_field_value(:collectionCode), nil]

    if dwc_occurrence.get_field_value(:catalogNumber).blank? || dwc_occurrence.get_field_value('TW:Namespace:catalogNumber').present?
      dwc_occurrence.status = 'Ready'
    else
      dwc_occurrence.status = 'NotReady'
      record['error_data'] = { messages: { catalogNumber: ['Record cannot be imported until namespace is set.'] } }
    end

    record.delete(:src_data)
    dwc_occurrence.metadata = record

    dwc_occurrence.save!
  end

  records[:extensions].each do |extension_type, records|
    records.each do |record|
      dwc_extension = DatasetRecord::DarwinCore::Extension.new(import_dataset: self)
      dwc_extension.initialize_data_fields(record.map { |k, v| v })
      dwc_extension.status = 'Unsupported'
      dwc_extension.metadata = { 'type' => extension_type }

      dwc_extension.save!
    end
  end

  self.metadata.merge!(
    catalog_numbers_namespaces: catalog_numbers_namespaces.sort { |a, b| a[0].map(&:to_s) <=> b[0].map(&:to_s) }
  )
  self.metadata.merge!(
    catalog_numbers_collection_code_namespaces: catalog_numbers_collection_code_namespaces.sort { |a, b| a[0].to_s <=> b[0].to_s }
  )

  save!
end

#require_catalog_number_match_verbatim? ⇒ `Boolean`

Returns:

(Boolean)



214
215
216

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 214

def require_catalog_number_match_verbatim?
  !!self.metadata.dig('import_settings', 'require_catalog_number_match_verbatim')
end

#require_tripcode_match_verbatim? ⇒ `Boolean`

Returns:

(Boolean)



210
211
212

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 210

def require_tripcode_match_verbatim?
  !!self.metadata.dig('import_settings', 'require_tripcode_match_verbatim')
end

#require_type_material_success? ⇒ `Boolean`

Returns:

(Boolean)



206
207
208

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 206

def require_type_material_success?
  !!self.metadata.dig('import_settings', 'require_type_material_success')
end

#restrict_to_existing_nomenclature? ⇒ `Boolean`

Returns:

(Boolean)



202
203
204

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 202

def restrict_to_existing_nomenclature?
  !!self.metadata.dig('import_settings', 'restrict_to_existing_nomenclature')
end

#update_catalog_number_collection_code_namespace(collection_code, namespace_id) ⇒ `Object`

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 152

def update_catalog_number_collection_code_namespace(collection_code, namespace_id)
  return if collection_code.nil? # No support for mapping blank data at this time

  transaction do
    mapping = get_catalog_number_collection_code_namespace_mapping(collection_code)
    mapping[1] = namespace_id
    ready = namespace_id.to_i > 0
    save!

    query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])

    if ready
      query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
      ).update_all(
        "status = 'Ready', metadata = metadata - 'error_data'"
      )
    else
      institution_codes = self.metadata['catalog_numbers_namespaces']&.select { |m| m[0][1] == collection_code && m[1] }&.map { |m| m[0][0] } || []
      query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
      ).where.not(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).having_values(institution_codes).select(:dataset_record_id)
      ).update_all(
        "status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
      )
    end
  end
end

#update_catalog_number_namespace(institution_code, collection_code, namespace_id) ⇒ `Object`

# File 'app/models/import_dataset/darwin_core/occurrences.rb', line 116

def update_catalog_number_namespace(institution_code, collection_code, namespace_id)
  transaction do
    mapping = get_catalog_number_namespace_mapping(institution_code, collection_code)
    mapping[1] = namespace_id
    ready = namespace_id.to_i > 0
    save!

    query = ready ? core_records.where(status: 'NotReady') : core_records.where.not(status: ['NotReady', 'Imported', 'Unsupported'])

    # TODO: Add scopes/methods in DatasetRecord to handle nil fields values transparently
    unless institution_code.nil?
      query = query.where(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).having_value(institution_code).select(:dataset_record_id)
      )
    else
      query = query.where.not(
        id: core_records_fields.at(get_field_mapping(:institutionCode)).select(:dataset_record_id)
      )
    end
    unless collection_code.nil?
      query = query.where(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).having_value(collection_code).select(:dataset_record_id)
      )
    else
      query = query.where.not(
        id: core_records_fields.at(get_field_mapping(:collectionCode)).select(:dataset_record_id)
      )
    end

    query.update_all(ready ?
      "status = 'Ready', metadata = metadata - 'error_data'" :
      "status = 'NotReady', metadata = jsonb_set(metadata, '{error_data}', '{ \"messages\": { \"catalogNumber\": [\"Record cannot be imported until namespace is set, see \\\"Settings\\\".\"] } }')"
    )
  end
end

Class: ImportDataset::DarwinCore::Occurrences

Constant Summary collapse

Constants inherited from ImportDataset::DarwinCore

Instance Attribute Summary

Attributes inherited from ImportDataset

Instance Method Summary collapse

Methods inherited from ImportDataset::DarwinCore

Methods inherited from ImportDataset

Methods included from Shared::OriginRelationship

Methods included from Shared::IsData

Methods included from Housekeeping

Methods inherited from ApplicationRecord

Constructor Details

Instance Method Details

#add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil) ⇒ Object

#add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil) ⇒ Object

#containerize_dup_cat_no? ⇒ Boolean

#core_records_class ⇒ Object

#core_records_identifier_name ⇒ Object

#enable_organization_determiners? ⇒ Boolean

#enable_organization_determiners_alt_name? ⇒ Boolean

#get_catalog_number_collection_code_namespace_mapping(collection_code) ⇒ Object (private)

#get_catalog_number_namespace(institution_code, collection_code) ⇒ Object

#get_catalog_number_namespace_mapping(institution_code, collection_code) ⇒ Object (private)

#get_event_id_namespace ⇒ Object

#perform_staging ⇒ Object

#require_catalog_number_match_verbatim? ⇒ Boolean

#require_tripcode_match_verbatim? ⇒ Boolean

#require_type_material_success? ⇒ Boolean

#restrict_to_existing_nomenclature? ⇒ Boolean

#update_catalog_number_collection_code_namespace(collection_code, namespace_id) ⇒ Object

#update_catalog_number_namespace(institution_code, collection_code, namespace_id) ⇒ Object

#add_catalog_number_collection_code_namespace(collection_code, namespace_id = nil) ⇒ `Object`

#add_catalog_number_namespace(institution_code, collection_code, namespace_id = nil) ⇒ `Object`

#containerize_dup_cat_no? ⇒ `Boolean`

#core_records_class ⇒ `Object`

#core_records_identifier_name ⇒ `Object`

#enable_organization_determiners? ⇒ `Boolean`

#enable_organization_determiners_alt_name? ⇒ `Boolean`

#get_catalog_number_collection_code_namespace_mapping(collection_code) ⇒ `Object` (private)

#get_catalog_number_namespace(institution_code, collection_code) ⇒ `Object`

#get_catalog_number_namespace_mapping(institution_code, collection_code) ⇒ `Object` (private)

#get_event_id_namespace ⇒ `Object`

#perform_staging ⇒ `Object`

#require_catalog_number_match_verbatim? ⇒ `Boolean`

#require_tripcode_match_verbatim? ⇒ `Boolean`

#require_type_material_success? ⇒ `Boolean`

#restrict_to_existing_nomenclature? ⇒ `Boolean`

#update_catalog_number_collection_code_namespace(collection_code, namespace_id) ⇒ `Object`

#update_catalog_number_namespace(institution_code, collection_code, namespace_id) ⇒ `Object`