Class: Catalog::DescriptionFromObservationMatrix

Inherits:
Object
  • Object
show all
Defined in:
lib/catalog/description_from_observation_matrix.rb

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(observation_matrix_id: nil, project_id: nil, include_descendants: nil, language_id: nil, keyword_ids: nil, observation_matrix_row_id: nil, otu_id: nil, similar_objects: []) ⇒ DescriptionFromObservationMatrix

Returns a new instance of DescriptionFromObservationMatrix.



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/catalog/description_from_observation_matrix.rb', line 107

def initialize(
  observation_matrix_id: nil,
  project_id: nil,
  include_descendants: nil,
  language_id: nil,
  keyword_ids: nil,
  observation_matrix_row_id: nil,
  otu_id: nil,
  similar_objects: [])

  # raise if observation_matrix_id.blank? || project_id.blank?
  @observation_matrix_id = observation_matrix_id
  @project_id = project_id
  @observation_matrix_row_id = observation_matrix_row_id
  @observation_matrix = find_matrix
  @include_descendants = include_descendants
  @language_to_use = language_to_use
  @descriptor_available_keywords = descriptor_available_keywords
  @descriptors_with_filter = descriptors_with_keywords
  @descriptor_available_languages = descriptor_available_languages
  @language_id = language_id
  @otu_id = otu_id
  @otu_id_filter_array = otu_id_array
  @collection_object_id_filter_array = collection_object_id_array
  ###main_logic
  @descriptor_hash = get_descriptor_hash
  @generated_description = get_description
  @generated_diagnosis = get_diagnosis
  ###delete temporary data
  @descriptors_with_filter = nil
  @descriptor_hash = nil
end

Instance Attribute Details

#collection_object_id_filter_arrayObject

Array of collection_object_ids



79
80
81
# File 'lib/catalog/description_from_observation_matrix.rb', line 79

def collection_object_id_filter_array
  @collection_object_id_filter_array
end

#collection_object_ids_count_hashObject

Hash of similar collection_objects



89
90
91
# File 'lib/catalog/description_from_observation_matrix.rb', line 89

def collection_object_ids_count_hash
  @collection_object_ids_count_hash
end

#descriptor_available_keywordsObject

Returns the list of all Tags used with the descriptors. Descriptors could be filtered by tag_id



59
60
61
# File 'lib/catalog/description_from_observation_matrix.rb', line 59

def descriptor_available_keywords
  @descriptor_available_keywords
end

#descriptor_available_languagesObject

Returns the list of available Languages used as translations for descriptors and character_states (in translations are available)



49
50
51
# File 'lib/catalog/description_from_observation_matrix.rb', line 49

def descriptor_available_languages
  @descriptor_available_languages
end

#descriptor_hashObject

Temporary attribute. Used to generated description and diagnosis



95
96
97
# File 'lib/catalog/description_from_observation_matrix.rb', line 95

def descriptor_hash
  @descriptor_hash
end

#descriptorsObject

Temporary attribute. Used for validation.



54
55
56
# File 'lib/catalog/description_from_observation_matrix.rb', line 54

def descriptors
  @descriptors
end

#descriptors_with_filterObject

Temporary attribute. Used for validation. List of descriptors reduced by keyword_ids



69
70
71
# File 'lib/catalog/description_from_observation_matrix.rb', line 69

def descriptors_with_filter
  @descriptors_with_filter
end

#generated_descriptionObject

Returns generated description for OTU



100
101
102
# File 'lib/catalog/description_from_observation_matrix.rb', line 100

def generated_description
  @generated_description
end

#generated_diagnosisObject

Returns generated diagnosis for OTU



105
106
107
# File 'lib/catalog/description_from_observation_matrix.rb', line 105

def generated_diagnosis
  @generated_diagnosis
end

#include_descendantsObject

Optional attribude to include descentant otus and collection_objects



17
18
19
# File 'lib/catalog/description_from_observation_matrix.rb', line 17

def include_descendants
  @include_descendants
end

#language_idObject

Optional attribute to display the descriptors and character_states in a particular language (when translations are available)



27
28
29
# File 'lib/catalog/description_from_observation_matrix.rb', line 27

def language_id
  @language_id
end

#language_to_useObject

Returns Language as an object if the language_id was provided (used to display descriptors in a particular language)



64
65
66
# File 'lib/catalog/description_from_observation_matrix.rb', line 64

def language_to_use
  @language_to_use
end

#observation_matrixObject

Returns observation_matrix as an object



44
45
46
# File 'lib/catalog/description_from_observation_matrix.rb', line 44

def observation_matrix
  @observation_matrix
end

#observation_matrix_idObject

Optional attribude to build the description



12
13
14
# File 'lib/catalog/description_from_observation_matrix.rb', line 12

def observation_matrix_id
  @observation_matrix_id
end

#observation_matrix_row_idObject

Optional attribute to provide a rowID



32
33
34
# File 'lib/catalog/description_from_observation_matrix.rb', line 32

def observation_matrix_row_id
  @observation_matrix_row_id
end

#otu_idObject

Optional attribute to provide a otuID



37
38
39
# File 'lib/catalog/description_from_observation_matrix.rb', line 37

def otu_id
  @otu_id
end

#otu_id_filter_arrayObject

Array of otu_ids in the @otu_filter



74
75
76
# File 'lib/catalog/description_from_observation_matrix.rb', line 74

def otu_id_filter_array
  @otu_id_filter_array
end

#project_idObject

Required attribute to build the key



22
23
24
# File 'lib/catalog/description_from_observation_matrix.rb', line 22

def project_id
  @project_id
end

#similar_objectsObject

Hash of similar otus_ids and collection_objects_ids



84
85
86
# File 'lib/catalog/description_from_observation_matrix.rb', line 84

def similar_objects
  @similar_objects
end

Instance Method Details

#collection_object_id_arrayObject



212
213
214
215
216
217
218
219
220
# File 'lib/catalog/description_from_observation_matrix.rb', line 212

def collection_object_id_array
  if @include_descendants = 'true' && !@otu_id_filter_array.blank?
    CollectionObject.joins(:taxon_determinations).where(taxon_determinations: {position: 1, otu_id: @otu_id_filter_array}).pluck(:id)
  elsif !@observation_matrix_row_id.blank?
    [ObservationMatrixRow.find(@observation_matrix_row_id.to_i)&.collection_object_id.to_i]
  else
    [0]
  end
end

#descriptors_with_keywordsObject



191
192
193
194
195
196
197
# File 'lib/catalog/description_from_observation_matrix.rb', line 191

def descriptors_with_keywords
  if @keyword_ids
    descriptors.joins(:tags).where('tags.keyword_id IN (?)', @keyword_ids.to_s.split('|').map(&:to_i) )
  else
    descriptors
  end
end

#find_matrixObject



140
141
142
143
144
145
146
147
148
149
150
# File 'lib/catalog/description_from_observation_matrix.rb', line 140

def find_matrix
  return nil if (@observation_matrix_id.blank? || @observation_matrix_id.to_s == '0') && @observation_matrix_row_id.blank?
  if @observation_matrix_row_id.blank?
    m = ObservationMatrix.where(project_id: @project_id).find(@observation_matrix_id)
  else
    m = ObservationMatrixRow.find(@observation_matrix_row_id)&.observation_matrix
    @observation_matrix_id = m&.id.to_s
    @project_id = m&.project_id.to_s
  end
  m
end

#get_descriptionObject



347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
# File 'lib/catalog/description_from_observation_matrix.rb', line 347

def get_description
  return nil if @descriptor_hash.empty?
  or_separator = ' or '
  language = @language_id.blank? ? nil : @language_id.to_i
  str = ''
  descriptor_name = ''
  state_name = ''
  @descriptor_hash.each do |d_key, d_value|
    next if (d_value[:descriptor].type == 'Descriptor::Qualitative' && d_value[:char_states].empty?) ||
      ((d_value[:descriptor].type == 'Descriptor::Continuous' || d_value[:descriptor].type == 'Descriptor::Sample') && d_value[:min] == 999999) ||
      (d_value[:descriptor].type == 'Descriptor::PresenceAbsence' && d_value[:presence].nil?)
    descriptor_name_new = d_value[:descriptor].target_name(:description, language)
    if descriptor_name != descriptor_name_new
      descriptor_name = descriptor_name_new
      str += '. ' unless str.blank?
      str += descriptor_name + ' '
    else
      str += ', '
    end
    case d_value[:descriptor].type
    when 'Descriptor::Qualitative'
      st_str = []
      d_value[:char_states].each do |cs|
        st_str.append(cs.target_name(:description, language))
      end
      str += st_str.uniq.join(or_separator)
    when 'Descriptor::Continuous'
      if d_value[:min] == d_value[:max]
        str += ["%g" % d_value[:min], d_value[:descriptor].default_unit].compact.join(' ')
      else
        str += ["%g" % d_value[:min] + '' + "%g" % d_value[:max], d_value[:descriptor].default_unit].compact.join(' ')
      end
    when 'Descriptor::Sample'
      if d_value[:min] == d_value[:max]
        str += ["%g" % d_value[:min], d_value[:descriptor].default_unit].compact.join(' ')
      else
        str += ["%g" % d_value[:min] + '' + "%g" % d_value[:max], d_value[:descriptor].default_unit].compact.join(' ')
      end
    when 'Descriptor::PresenceAbsence'
      str += d_value[:presence].to_s
    end
  end
  str += '.' unless str.blank?
  str
end

#get_descriptor_hashObject



222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
# File 'lib/catalog/description_from_observation_matrix.rb', line 222

def get_descriptor_hash
  descriptor_ids = @descriptors_with_filter.collect{|i| i.id}
  t = ['Observation::Continuous', 'Observation::PresenceAbsence', 'Observation::Sample']
  otu_ids = @otu_id_filter_array.to_a + [0]
  collection_object_ids = @collection_object_id_filter_array.to_a + [0]
  descriptor_hash = {}
  @descriptors_with_filter.each do |d|
    descriptor_hash[d.id] = {}
    descriptor_hash[d.id][:descriptor] = d
    descriptor_hash[d.id][:similar_otu_ids] = []
    descriptor_hash[d.id][:similar_collection_object_ids] = []
    descriptor_hash[d.id][:char_states] = [] if d.type == 'Descriptor::Qualitative'
    descriptor_hash[d.id][:char_states_ids] = [] if d.type == 'Descriptor::Qualitative'
    descriptor_hash[d.id][:min] = 999999 if d.type == 'Descriptor::Continuous' || d.type == 'Descriptor::Sample' # min value used as continuous or sample
    descriptor_hash[d.id][:max] = -999999 if d.type == 'Descriptor::Continuous' || d.type == 'Descriptor::Sample' # max value used as continuous or sample
    descriptor_hash[d.id][:presence] = nil if d.type == 'Descriptor::PresenceAbsence'
  end

  char_states = CharacterState.joins(:observations).
    where('character_states.descriptor_id IN (?) AND (otu_id IN (?) OR collection_object_id IN (?) )', descriptor_ids, otu_ids, collection_object_ids).
    uniq
  char_states.each do |cs|
    descriptor_hash[cs.descriptor_id][:char_states].append(cs)
    descriptor_hash[cs.descriptor_id][:char_states_ids].append(cs.id)
  end

  observations = Observation.where('observations.type IN (?) AND descriptor_id IN (?) AND (otu_id IN (?) OR collection_object_id IN (?) )', t, descriptor_ids, otu_ids, collection_object_ids).uniq
  observations.each do |o|
    if !o.continuous_value.nil?
      descriptor_hash[o.descriptor_id][:min] = o.continuous_value if descriptor_hash[o.descriptor_id][:min] > o.continuous_value
      descriptor_hash[o.descriptor_id][:max] = o.continuous_value if descriptor_hash[o.descriptor_id][:max] < o.continuous_value
    elsif !o.sample_min.nil?
      descriptor_hash[o.descriptor_id][:min] = o.sample_min if descriptor_hash[o.descriptor_id][:min] > o.sample_min
      if o.sample_max
        descriptor_hash[o.descriptor_id][:max] = o.sample_max if descriptor_hash[o.descriptor_id][:max] < o.sample_max
      else
        descriptor_hash[o.descriptor_id][:max] = o.sample_min if descriptor_hash[o.descriptor_id][:max] < o.sample_min
      end
    elsif !o.presence.nil?
      if o.presence == true && descriptor_hash[o.descriptor_id][:presence].nil?
        descriptor_hash[o.descriptor_id][:presence] = 'present'
      elsif o.presence == false && descriptor_hash[o.descriptor_id][:presence].nil?
        descriptor_hash[o.descriptor_id][:presence] = 'absent'
      elsif o.presence == true && descriptor_hash[o.descriptor_id][:presence] == 'absent'
        descriptor_hash[o.descriptor_id][:presence] = 'present or absent'
      elsif o.presence == false && descriptor_hash[o.descriptor_id][:presence] == 'present'
        descriptor_hash[o.descriptor_id][:presence] = 'present or absent'
      end
    end
  end

  otu_ids = @otu_id_filter_array.to_a + [0]
  collection_object_ids = @collection_object_id_filter_array.to_a + [0]

  unless @observation_matrix_id.nil?
    otu_ids_count = {}
    collection_object_ids_count = {}
    t = ['Descriptor::Continuous', 'Descriptor::PresenceAbsence', 'Descriptor::Sample']

    char_states = ObservationMatrix.
      select('descriptors.id AS d_id, character_states.id AS cs_id, observations.id AS o_id, observations.otu_id AS o_otu_id, observations.collection_object_id AS o_collection_object_id, observation_matrix_rows.otu_id AS r_otu_id, observation_matrix_rows.collection_object_id AS r_collection_object_id, observations.character_state_id AS o_cs_id').
      left_outer_joins(:descriptors).
      left_outer_joins(:observation_matrix_rows).
      joins('LEFT OUTER JOIN observations ON observations.descriptor_id = descriptors.id AND (observations.otu_id = observation_matrix_rows.otu_id OR observations.collection_object_id = observation_matrix_rows.collection_object_id )').
      joins('LEFT OUTER JOIN character_states ON character_states.id = observations.character_state_id').
      where("descriptors.type = 'Descriptor::Qualitative'").
      where('descriptors.id IN (?)', descriptor_ids).
      where('(observation_matrix_rows.otu_id NOT IN (?) OR observation_matrix_rows.otu_id IS NULL)', otu_ids).
      where('(observation_matrix_rows.collection_object_id NOT IN (?) OR observation_matrix_rows.collection_object_id IS NULL)', collection_object_ids).
      where('observation_matrices.id = ?', @observation_matrix_id)
    char_states.each do |cs|
      if !descriptor_hash[cs.d_id][:similar_otu_ids].include?(cs.r_otu_id) &&
         !descriptor_hash[cs.d_id][:similar_collection_object_ids].include?(cs.r_collection_object_id)
        if !cs.r_otu_id.nil? && (descriptor_hash[cs.d_id][:char_states_ids].include?(cs.cs_id) || cs.o_id.nil?)
          descriptor_hash[cs.d_id][:similar_otu_ids].append(cs.r_otu_id)
          otu_ids_count[cs.r_otu_id] = otu_ids_count[cs.r_otu_id].to_i + 1
        elsif !cs.r_collection_object_id.nil? && (descriptor_hash[cs.d_id][:char_states_ids].include?(cs.cs_id) || cs.o_id.nil?)
          descriptor_hash[cs.d_id][:similar_collection_object_ids].append(cs.r_collection_object_id)
          collection_object_ids_count[cs.r_collection_object_id] = collection_object_ids_count[cs.r_collection_object_id].to_i + 1
        end
      end
    end

    observations = ObservationMatrix.
      select('descriptors.id AS d_id, observations.id AS o_id, observations.otu_id AS o_otu_id, observations.collection_object_id AS o_collection_object_id, observations.type, observations.continuous_value, observations.sample_min, observations.sample_max, observations.presence AS o_presence, observation_matrix_rows.otu_id AS r_otu_id, observation_matrix_rows.collection_object_id AS r_collection_object_id, observations.character_state_id AS o_cs_id').
      left_outer_joins(:descriptors).
      left_outer_joins(:observation_matrix_rows).
      joins('LEFT OUTER JOIN observations ON observations.descriptor_id = descriptors.id AND (observations.otu_id = observation_matrix_rows.otu_id OR observations.collection_object_id = observation_matrix_rows.collection_object_id )').
      where('descriptors.type IN (?)', t).
      where('descriptors.id IN (?)', descriptor_ids).
      where('(observation_matrix_rows.otu_id NOT IN (?) OR observation_matrix_rows.otu_id IS NULL)', otu_ids).
      where('(observation_matrix_rows.collection_object_id NOT IN (?) OR observation_matrix_rows.collection_object_id IS NULL)', collection_object_ids).
      where('observation_matrices.id = ?', @observation_matrix_id)
    observations.each do |o|
      if !descriptor_hash[o.d_id][:similar_otu_ids].include?(o.r_otu_id) &&
         !descriptor_hash[o.d_id][:similar_collection_object_ids].include?(o.r_collection_object_id)
        yes = false
        if o.continuous_value.nil? && o.sample_min.nil? && o.o_presence.nil?
          yes = true
        elsif !o.continuous_value.nil? && o.continuous_value >= descriptor_hash[o.d_id][:min] && o.continuous_value <= descriptor_hash[o.d_id][:max]
          yes = true
        elsif !o.sample_max.nil? && o.sample_max >= descriptor_hash[o.d_id][:min] && o.sample_max <= descriptor_hash[o.d_id][:max]
          yes = true
        elsif !o.sample_min.nil? && o.sample_min >= descriptor_hash[o.d_id][:min] && o.sample_min <= descriptor_hash[o.d_id][:max]
          yes = true
        elsif !o.o_presence.nil? && o.o_presence == true && descriptor_hash[o.d_id][:presence].to_s.include?('present')
          yes = true
        elsif !o.o_presence.nil? && o.o_presence == false && descriptor_hash[o.d_id][:presence].to_s.include?('absent')
          yes = true
        end
        if !o.r_otu_id.nil? && yes
          descriptor_hash[o.d_id][:similar_otu_ids].append(o.r_otu_id)
          otu_ids_count[o.r_otu_id] = otu_ids_count[o.r_otu_id].to_i + 1
        elsif !o.r_collection_object_id.nil? && yes
          descriptor_hash[o.d_id][:similar_collection_object_ids].append(o.r_collection_object_id)
          collection_object_ids_count[o.r_collection_object_id] = collection_object_ids_count[o.r_collection_object_id].to_i + 1
        end
      end
    end
    @similar_objects = (otu_ids_count.to_a.map{|i| {otu_id: i[0], similarities: i[1]}} + collection_object_ids_count.to_a.map{|i| {collection_object_id: i[0], similarities: i[1]}}).sort_by{|j| -j[:similarities]}
  end

  descriptor_hash
end

#get_diagnosisObject



393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
# File 'lib/catalog/description_from_observation_matrix.rb', line 393

def get_diagnosis
  return nil if @descriptor_hash.empty?

  descriptor_array = @descriptor_hash.values.sort_by{|i| i[:similar_otu_ids].count + i[:similar_collection_object_ids].count}
  i = 2
  i_max = descriptor_array.count
  j = 0

  while j < 3
    while i < i_max && i_max > 2 do
      count1 = (descriptor_array[i][:similar_otu_ids] & descriptor_array[i-1][:similar_otu_ids]).count +
        (descriptor_array[i][:similar_collection_object_ids] & descriptor_array[i-1][:similar_collection_object_ids]).count
      count2 = (descriptor_array[i][:similar_otu_ids] & descriptor_array[i-2][:similar_otu_ids]).count +
        (descriptor_array[i][:similar_collection_object_ids] & descriptor_array[i-2][:similar_collection_object_ids]).count
      if count2 > count1
        temp = descriptor_array[i-2]
        descriptor_array[i-2] = descriptor_array[i-1]
        descriptor_array[i-1] = temp
      end
      i += 1
    end
    j += 1
  end

  or_separator = ' or '
  language = @language_id.blank? ? nil : @language_id.to_i
  str = ''
  descriptor_name = ''
  state_name = ''
  remaining_otus = descriptor_array.first[:similar_otu_ids].to_a
  remaining_co = descriptor_array.first[:similar_collection_object_ids].to_a
  descriptor_array.each do |d_value|
    remaining_otus = remaining_otus & d_value[:similar_otu_ids].to_a
    remaining_co = remaining_co & d_value[:similar_collection_object_ids].to_a
    next if (d_value[:descriptor].type == 'Descriptor::Qualitative' && d_value[:char_states].empty?) ||
      ((d_value[:descriptor].type == 'Descriptor::Continuous' || d_value[:descriptor].type == 'Descriptor::Sample') && d_value[:min] == 999999) ||
      (d_value[:descriptor].type == 'Descriptor::PresenceAbsence' && d_value[:presence].nil?)
    descriptor_name_new = d_value[:descriptor].target_name(:description, language)
    if descriptor_name != descriptor_name_new
      descriptor_name = descriptor_name_new
      str += '. ' unless str.blank?
      str += descriptor_name + ' '
    else
      str += ', '
    end
    case d_value[:descriptor].type
    when 'Descriptor::Qualitative'
      st_str = []
      d_value[:char_states].each do |cs|
        st_str.append(cs.target_name(:description, language))
      end
      str += st_str.uniq.join(or_separator)
    when 'Descriptor::Continuous'
      if d_value[:min] == d_value[:max]
        str += ["%g" % d_value[:min], d_value[:descriptor].default_unit].compact.join(' ')
      else
        str += ["%g" % d_value[:min] + '' + "%g" % d_value[:max], d_value[:descriptor].default_unit].compact.join(' ')
      end
    when 'Descriptor::Sample'
      if d_value[:min] == d_value[:max]
        str += ["%g" % d_value[:min], d_value[:descriptor].default_unit].compact.join(' ')
      else
        str += ["%g" % d_value[:min] + '' + "%g" % d_value[:max], d_value[:descriptor].default_unit].compact.join(' ')
      end
    when 'Descriptor::PresenceAbsence'
      str += d_value[:presence].to_s
    end
    break if remaining_otus.empty? && remaining_co.empty?
  end
  if remaining_otus.empty? && remaining_co.empty?
    str += '.' unless str.blank?
  else
    str = 'Cannot be separated from other rows in the matrix!'
  end
  str
end

#otu_id_arrayObject



199
200
201
202
203
204
205
206
207
208
209
210
# File 'lib/catalog/description_from_observation_matrix.rb', line 199

def otu_id_array
  if !@observation_matrix_row_id.blank?
    @otu_id = ObservationMatrixRow.find(@observation_matrix_row_id.to_i)&.otu_id
  end
  if @otu_id.blank?
    nil
  elsif @include_descendants == 'true'
    Otu.self_and_descendants_of(@otu_id.to_i).pluck(:id)
  else
    [@otu_id]
  end
end