Class: DatasetRecord

Inherits:
ApplicationRecord show all
Includes:
Housekeeping, Shared::IsData
Defined in:
app/models/dataset_record.rb

Overview

A DatasetRecord is the unit of data (typically a table row) from an ImportDataset

Direct Known Subclasses

DarwinCore

Defined Under Namespace

Classes: DarwinCore

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Shared::IsData

#errors_excepting, #full_error_messages_excepting, #identical, #is_community?, #is_destroyable?, #is_editable?, #is_in_use?, #is_in_users_projects?, #metamorphosize, #similar

Methods included from Housekeeping

#has_polymorphic_relationship?

Methods inherited from ApplicationRecord

transaction_with_retry

Instance Attribute Details

#metadataHash

Returns data about the record. No particular structure is enforced, any subclass may store metadata (typically to aid the import process).

Returns:

  • (Hash)

    data about the record. No particular structure is enforced, any subclass may store metadata (typically to aid the import process).



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'app/models/dataset_record.rb', line 11

class DatasetRecord < ApplicationRecord
  include Housekeeping
  include Shared::IsData

  belongs_to :import_dataset

  validates :type, presence: true
  validates :status, presence: true

  after_create :create_fields
  after_update :update_fields
  before_destroy :destroy_fields

  # has_many has serious performance consequences when deleting an import dataset, so using class method instead
  def dataset_record_fields
    DatasetRecordField.where(dataset_record: self)
  end

  # Sets up internal representation of data fields with field data, skipping those fields that are blank for performance and space-efficiency reasons.
  # @param field_data [Array] ordered list of values, each value representing a cell/field of the record.
  def initialize_data_fields(field_data)
    @data_fields = []
    field_data.each_with_index do |value, index|
      @data_fields[index] = (value.blank? ? nil : value)
    end
    @data_field_changed = Array.new(@data_fields.size)
  end

  def data_fields
    unless @data_fields
      @data_fields = self.dataset_record_fields
        .pluck(:position, :value)
        .inject([]) { |a, f| a[f[0]] = f[1]; a }
      @data_field_changed = Array.new(@data_fields.size)
    end

    @data_fields
  end

  def get_data_field(index)
    self.data_fields[index]
  end

  def set_data_field(index, value)
    unless frozen_fields?
      old = self.data_fields[index]
      self.data_fields[index] = value

      begin
        data_field_changed(index, value)
        @data_field_changed[index] = true
      rescue
        self.data_fields[index] = old
        raise
      end
    end
  end

  def frozen_fields?
    self.status == 'Imported'
  end

  private

  def data_field_changed(index, value)
    # Subclasses may re-implement to perform actions when field change
  end

  def field_db_attributes(position, value)
    {
      position: position,
      value: value,
      dataset_record_id: id,
      project_id: project_id,
      import_dataset_id: import_dataset_id,
      encoded_dataset_record_type: DatasetRecordField.encode_record_type(self.class)
    } if value
  end

  def fields_db_attributes
    data_fields.filter_map.with_index { |v, p| field_db_attributes(p, v) }
  end

  def create_fields
    attributes = fields_db_attributes
    DatasetRecordField.insert_all(attributes) if attributes.any?
  end

  def update_fields
    upsert_fields = @data_field_changed
      &.filter_map&.with_index { |c, i| field_db_attributes(i, data_fields[i]) if c } || []
    delete_fields = @data_field_changed
      &.filter_map&.with_index { |c, i| i if c && data_fields[i].blank? } || []

    DatasetRecordField.upsert_all(upsert_fields, unique_by: [:dataset_record_id, :position]) if upsert_fields.any?
    dataset_record_fields.where(position: delete_fields).delete_all if delete_fields.any?
  end

  def destroy_fields
    dataset_record_fields.delete_all
  end
end

#statusString

Returns current import status (e.g. Pending, Imported, Deleted, etc.).

Returns:

  • (String)

    current import status (e.g. Pending, Imported, Deleted, etc.)



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'app/models/dataset_record.rb', line 11

class DatasetRecord < ApplicationRecord
  include Housekeeping
  include Shared::IsData

  belongs_to :import_dataset

  validates :type, presence: true
  validates :status, presence: true

  after_create :create_fields
  after_update :update_fields
  before_destroy :destroy_fields

  # has_many has serious performance consequences when deleting an import dataset, so using class method instead
  def dataset_record_fields
    DatasetRecordField.where(dataset_record: self)
  end

  # Sets up internal representation of data fields with field data, skipping those fields that are blank for performance and space-efficiency reasons.
  # @param field_data [Array] ordered list of values, each value representing a cell/field of the record.
  def initialize_data_fields(field_data)
    @data_fields = []
    field_data.each_with_index do |value, index|
      @data_fields[index] = (value.blank? ? nil : value)
    end
    @data_field_changed = Array.new(@data_fields.size)
  end

  def data_fields
    unless @data_fields
      @data_fields = self.dataset_record_fields
        .pluck(:position, :value)
        .inject([]) { |a, f| a[f[0]] = f[1]; a }
      @data_field_changed = Array.new(@data_fields.size)
    end

    @data_fields
  end

  def get_data_field(index)
    self.data_fields[index]
  end

  def set_data_field(index, value)
    unless frozen_fields?
      old = self.data_fields[index]
      self.data_fields[index] = value

      begin
        data_field_changed(index, value)
        @data_field_changed[index] = true
      rescue
        self.data_fields[index] = old
        raise
      end
    end
  end

  def frozen_fields?
    self.status == 'Imported'
  end

  private

  def data_field_changed(index, value)
    # Subclasses may re-implement to perform actions when field change
  end

  def field_db_attributes(position, value)
    {
      position: position,
      value: value,
      dataset_record_id: id,
      project_id: project_id,
      import_dataset_id: import_dataset_id,
      encoded_dataset_record_type: DatasetRecordField.encode_record_type(self.class)
    } if value
  end

  def fields_db_attributes
    data_fields.filter_map.with_index { |v, p| field_db_attributes(p, v) }
  end

  def create_fields
    attributes = fields_db_attributes
    DatasetRecordField.insert_all(attributes) if attributes.any?
  end

  def update_fields
    upsert_fields = @data_field_changed
      &.filter_map&.with_index { |c, i| field_db_attributes(i, data_fields[i]) if c } || []
    delete_fields = @data_field_changed
      &.filter_map&.with_index { |c, i| i if c && data_fields[i].blank? } || []

    DatasetRecordField.upsert_all(upsert_fields, unique_by: [:dataset_record_id, :position]) if upsert_fields.any?
    dataset_record_fields.where(position: delete_fields).delete_all if delete_fields.any?
  end

  def destroy_fields
    dataset_record_fields.delete_all
  end
end

Instance Method Details

#create_fieldsObject (private)



94
95
96
97
# File 'app/models/dataset_record.rb', line 94

def create_fields
  attributes = fields_db_attributes
  DatasetRecordField.insert_all(attributes) if attributes.any?
end

#data_field_changed(index, value) ⇒ Object (private)



75
76
77
# File 'app/models/dataset_record.rb', line 75

def data_field_changed(index, value)
  # Subclasses may re-implement to perform actions when field change
end

#data_fieldsObject



39
40
41
42
43
44
45
46
47
48
# File 'app/models/dataset_record.rb', line 39

def data_fields
  unless @data_fields
    @data_fields = self.dataset_record_fields
      .pluck(:position, :value)
      .inject([]) { |a, f| a[f[0]] = f[1]; a }
    @data_field_changed = Array.new(@data_fields.size)
  end

  @data_fields
end

#dataset_record_fieldsObject

has_many has serious performance consequences when deleting an import dataset, so using class method instead



25
26
27
# File 'app/models/dataset_record.rb', line 25

def dataset_record_fields
  DatasetRecordField.where(dataset_record: self)
end

#destroy_fieldsObject (private)



109
110
111
# File 'app/models/dataset_record.rb', line 109

def destroy_fields
  dataset_record_fields.delete_all
end

#field_db_attributes(position, value) ⇒ Object (private)



79
80
81
82
83
84
85
86
87
88
# File 'app/models/dataset_record.rb', line 79

def field_db_attributes(position, value)
  {
    position: position,
    value: value,
    dataset_record_id: id,
    project_id: project_id,
    import_dataset_id: import_dataset_id,
    encoded_dataset_record_type: DatasetRecordField.encode_record_type(self.class)
  } if value
end

#fields_db_attributesObject (private)



90
91
92
# File 'app/models/dataset_record.rb', line 90

def fields_db_attributes
  data_fields.filter_map.with_index { |v, p| field_db_attributes(p, v) }
end

#frozen_fields?Boolean

Returns:

  • (Boolean)


69
70
71
# File 'app/models/dataset_record.rb', line 69

def frozen_fields?
  self.status == 'Imported'
end

#get_data_field(index) ⇒ Object



50
51
52
# File 'app/models/dataset_record.rb', line 50

def get_data_field(index)
  self.data_fields[index]
end

#initialize_data_fields(field_data) ⇒ Object

Sets up internal representation of data fields with field data, skipping those fields that are blank for performance and space-efficiency reasons.

Parameters:

  • field_data (Array)

    ordered list of values, each value representing a cell/field of the record.



31
32
33
34
35
36
37
# File 'app/models/dataset_record.rb', line 31

def initialize_data_fields(field_data)
  @data_fields = []
  field_data.each_with_index do |value, index|
    @data_fields[index] = (value.blank? ? nil : value)
  end
  @data_field_changed = Array.new(@data_fields.size)
end

#set_data_field(index, value) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'app/models/dataset_record.rb', line 54

def set_data_field(index, value)
  unless frozen_fields?
    old = self.data_fields[index]
    self.data_fields[index] = value

    begin
      data_field_changed(index, value)
      @data_field_changed[index] = true
    rescue
      self.data_fields[index] = old
      raise
    end
  end
end

#update_fieldsObject (private)



99
100
101
102
103
104
105
106
107
# File 'app/models/dataset_record.rb', line 99

def update_fields
  upsert_fields = @data_field_changed
    &.filter_map&.with_index { |c, i| field_db_attributes(i, data_fields[i]) if c } || []
  delete_fields = @data_field_changed
    &.filter_map&.with_index { |c, i| i if c && data_fields[i].blank? } || []

  DatasetRecordField.upsert_all(upsert_fields, unique_by: [:dataset_record_id, :position]) if upsert_fields.any?
  dataset_record_fields.where(position: delete_fields).delete_all if delete_fields.any?
end