Class: Otu

Overview

An Otu (loosely, operational taxonomic unit) can be thought of as a unit of study. In most cases an otu is a taxon.

An Otu is defined by its underlying data and may be labeled with a name (TaxonName). Otus are used to represent rows in matrices, taxon pages, individuals or populations, or arbitrary clusters of organisms (e.g. ‘unsorted specimens in this container’). Otus are a primary unit of work in TaxonWorks.

OTU is labeled with a name, either arbitrarily given or specifically linked to a taxon_name_id.

TODO Semantics vs. taxon_name_id

Defined Under Namespace

Modules: DwcExtensions, Maps, MatrixHooks

Constant Summary collapse

GRAPH_ENTRY_POINTS =
[:asserted_distributions, :biological_associations, :common_names, :contents, :data_attributes, :observation_matrices].freeze

Constants included from SoftValidation

SoftValidation::ANCESTORS_WITH_SOFT_VALIDATIONS

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Shared::QueryBatchUpdate

#query_update

Methods included from Shared::IsData

#errors_excepting, #full_error_messages_excepting, #identical, #is_community?, #is_destroyable?, #is_editable?, #is_in_use?, #is_in_users_projects?, #metamorphosize, #similar

Methods included from Maps

#cached_map, #cached_map_geo_json, #cached_map_id, #cached_map_string, #create_cached_map, #quicker_cached_map

Methods included from MatrixHooks

#member_of_new_matrix_row_items, #member_of_old_matrix_row_items

Methods included from Shared::MatrixHooks::Member

#member_add_matrix_columns, #member_add_matrix_rows, #member_add_to_matrix_items, #member_of_new_matrix_column_items, #member_of_new_matrix_row_items, #member_of_old_matrix_column_items, #member_of_old_matrix_row_items, #member_remove_from_matrix_items, #member_remove_matrix_columns, #member_remove_matrix_rows, #member_synchronize_matrices, #member_update_matrix_items?

Methods included from Shared::AutoUuid

#create_object_uuid, #generate_uuid_if_required

Methods included from Shared::OriginRelationship

#new_objects, #old_objects, #reject_origin_relationships, #set_origin

Methods included from Shared::HasPapertrail

#attribute_updated, #attribute_updater

Methods included from Shared::Confidences

#reject_confidences

Methods included from Shared::Loanable

#all_loan_items, #all_loans, #container_loan_items, #container_loaned?, #container_loans, #container_times_loaned, #current_loan, #current_loan_item, #has_been_loaned?, #is_loanable?, #loan_return_date, #loaned_in_container, #on_loan?, #times_loaned

Methods included from Shared::Depictions

#has_depictions?, #image_array=, #reject_depictions, #reject_images

Methods included from Shared::Tags

#reject_tags, #tag_with, #tagged?, #tagged_with?

Methods included from Shared::Notes

#concatenated_notes_string, #reject_notes

Methods included from Shared::Identifiers

#dwc_occurrence_id, #identified?, #next_by_identifier, #previous_by_identifier, #reject_identifiers, #uri, #uuid

Methods included from Shared::DataAttributes

#import_attributes, #internal_attributes, #keyword_value_hash, #reject_data_attributes

Methods included from Shared::Citations

#cited?, #mark_citations_for_destruction, #nomenclature_date, #origin_citation_source_id, #reject_citations, #requires_citation?, #sources_by_topic_id

Methods included from SoftValidation

#clear_soft_validations, #fix_for, #fix_soft_validations, #soft_fixed?, #soft_valid?, #soft_validate, #soft_validated?, #soft_validations, #soft_validators

Methods included from Housekeeping

#has_polymorphic_relationship?

Methods inherited from ApplicationRecord

transaction_with_retry

Instance Attribute Details

#nameString

A label for the OTU.

Returns:

  • (String)


24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
# File 'app/models/otu.rb', line 24

class Otu < ApplicationRecord
  include Housekeeping
  include SoftValidation
  # include Shared::AlternateValues # No alternate values on Name!! Consequences - search cumbersome, names not unified and controllable ... others?
  include Shared::Citations
  include Shared::DataAttributes
  include Shared::Identifiers
  include Shared::Notes
  include Shared::Tags
  include Shared::Depictions
  include Shared::Loanable
  include Shared::Confidences
  include Shared::Observations
  include Shared::BiologicalAssociations
  include Shared::HasPapertrail
  include Shared::OriginRelationship

  include Shared::AutoUuid
  include Shared::Taxonomy
  include Otu::DwcExtensions

  include Shared::MatrixHooks::Member
  include Otu::MatrixHooks
  include Otu::Maps

  include Shared::DwcOccurrenceHooks
  include Shared::IsData

  include Shared::QueryBatchUpdate

  is_origin_for 'Sequence', 'Extract'

  GRAPH_ENTRY_POINTS = [:asserted_distributions, :biological_associations, :common_names, :contents, :data_attributes, :observation_matrices].freeze

  belongs_to :taxon_name, inverse_of: :otus

  # Why?  Could be combination too.
  belongs_to :protonym, -> { where(type: 'Protonym') }, foreign_key: :taxon_name_id

  has_many :in_scope_observation_matrices, inverse_of: :otu, class_name: 'ObservationMatrix'

  has_many :asserted_distributions, inverse_of: :otu, dependent: :restrict_with_error

  has_many :taxon_determinations, inverse_of: :otu, dependent: :destroy # TODO: change

  # TODO, move to infer BiologicalCollectionObject
  has_many :collection_objects, through: :taxon_determinations, source: :taxon_determination_object, inverse_of: :otus, source_type: 'CollectionObject'
  has_many :field_occurrences, through: :taxon_determinations, source: :taxon_determination_object, inverse_of: :otus, source_type: 'FieldOccurrence'

  has_many :type_materials, through: :protonym

  # TODO: no longer true since they can come through Otu as well
  has_many :extracts, through: :collection_objects, source: :extracts
  has_many :sequences, through: :extracts, source: :derived_sequences

  has_many :collecting_events, -> { distinct }, through: :collection_objects
  has_many :common_names, dependent: :destroy, inverse_of: :otu
  has_many :collection_profiles, inverse_of: :otu, dependent: :restrict_with_error # Do not destroy old profiles

  has_many :contents, inverse_of: :otu, dependent: :destroy
  has_many :public_contents, inverse_of: :otu, dependent: :destroy

  has_many :geographic_areas_from_asserted_distributions, through: :asserted_distributions, source: :geographic_area
  has_many :geographic_areas_from_collecting_events, through: :collecting_events, source: :geographic_area
  has_many :georeferences, through: :collecting_events

  has_many :content_topics, through: :contents, source: :topic

  has_many :otu_relationships, foreign_key: :subject_otu_id, inverse_of: :subject_otu
  has_many :related_otu_relationships, class_name: 'OtuRelationship', foreign_key: :object_otu_id, inverse_of: :object_otu

  has_many :leads, inverse_of: :otu, dependent: :restrict_with_error

  scope :with_taxon_name_id, -> (taxon_name_id) { where(taxon_name_id:) }
  scope :with_name, -> (name) { where(name:) }

  validate :check_required_fields

  soft_validate(:sv_taxon_name, set: :taxon_name)
  soft_validate(:sv_duplicate_otu, set: :duplicate_otu)

  accepts_nested_attributes_for :common_names, allow_destroy: true

  # @return Scope
  def self.alphabetically
    includes(:taxon_name).select('otus.*, taxon_names.cached').references(:taxon_names).order('taxon_names.cached ASC')
  end

  # @param [Integer] otu_id
  # @param [String] rank_class
  # @return [Scope]
  #    Otu.joins(:taxon_name).where(taxon_name: q).to_sql
  def self.self_and_descendants_of(otu_id, rank_class = nil)
    if o = Otu.joins(:taxon_name).find(otu_id)
      if rank_class.nil?
        joins(:taxon_name).
          where('cached_valid_taxon_name_id IN (?)', o.taxon_name.self_and_descendants.pluck(:id)) #this also covers synonyms of self
      else
        joins(:taxon_name).
          where('cached_valid_taxon_name_id IN (?)', o.taxon_name.self_and_descendants.pluck(:id)).
          where( 'taxon_names.rank_class = ?', rank_class)
      end
    else # no taxon name just return self in scope
      Otu.where(id: otu_id)
    end
  end

  # @return [Otu::ActiveRecordRelation]
  #
  # All OTUs that are synonymous/same/matching target, for either
  #    historical and pragmatic (i.e. share the same `taxon_name_id`), or
  #    nomenclatural reasons (are synonyms of the taxon name). Includes self.
  #
  # TODO: Replace with Queries::Otu::Filter
  #
  def self.coordinate_otus(otu_id)
    begin
      i = Otu.joins(:taxon_name).find(otu_id)
      j = i.taxon_name.cached_valid_taxon_name_id
      o = Otu.arel_table
      t = TaxonName.arel_table

      q = o.join(t, Arel::Nodes::InnerJoin).on(
        o[:taxon_name_id].eq( t[:id] ).and(t[:cached_valid_taxon_name_id].eq(j))
      )

      Otu.joins(q.join_sources)
    rescue ActiveRecord::RecordNotFound
      Otu.where(id: otu_id)
    end
  end

  # TODO: REplace with Queries::Otu::Filter
  # TODO: This is coordinate_otus with children,
  #       it should probably be renamed coordinate.
  # @return [Otu::ActiveRecordRelation]
  #   all OTUs linked to the taxon_name_id, it descendants, and
  #   any synonym of any of the previous
  #   linked directly to the taxon name
  #   !! Invalid taxon_name_ids return nothing
  #   !! Taxon names with synonyms return the OTUs of their synonyms
  # @param taxon_name_id [The id of a valid TaxonName]
  def self.descendant_of_taxon_name(taxon_name_id = [])
    ids = [taxon_name_id].flatten.compact.uniq

    o = Otu.arel_table
    t = TaxonName.arel_table
    h = TaxonNameHierarchy.arel_table

    q = o.join(t, Arel::Nodes::InnerJoin).on(
      o[:taxon_name_id].eq( t[:id]))
      .join(h, Arel::Nodes::InnerJoin).on(
        t[:cached_valid_taxon_name_id].eq(h[:descendant_id]))

    Otu.joins(q.join_sources).where(h[:ancestor_id].in(ids).to_sql)
  end

  def self.coordinate_otu_ids(otu_ids = [])
    ids = []
    otu_ids.each do |id|
      ids += ::Otu.coordinate_otus(id).pluck(:id)
    end
    ids.uniq
  end

  # TODO: replace with filter
  # return [Scope] the Otus bound to that taxon name and its descendants
  def self.for_taxon_name(taxon_name)
    if taxon_name.kind_of?(String) || taxon_name.kind_of?(Integer)
      tn = TaxonName.find(taxon_name)
    else
      tn = taxon_name
    end
    Otu.joins(taxon_name: [:ancestor_hierarchies]).where(taxon_name_hierarchies: {ancestor_id: tn.id})
  end

  # TODO: This need to be renamed to reflect "simple" association
  def self.batch_preview(file: nil, ** args)
    # f     = CSV.read(file, headers: true, col_sep: "\t", skip_blanks: true, header_converters: :symbol)
    @otus = []
    File.open(file).each do |row|
      name = row.strip
      next if name.blank?
      @otus.push(Otu.new(name: row.strip))
    end
    @otus
  end

  def self.batch_create(otus: {}, ** args)
    new_otus = []
    begin
      Otu.transaction do
        otus.each_key do |k|
          o = Otu.new(otus[k])
          o.save!
          new_otus.push(o)
        end
      end
    rescue
      return false
    end
    new_otus
  end

  # Batch update

  # @params params [Hash]
  #   { otu_query: {},
  #     otu_filter_query: {},
  #     async_cutoff: 1
  #   }
  def self.batch_update(params)
    request = QueryBatchRequest.new(
      async_cutoff: params[:async_cutoff] || 26,
      klass: 'Otu',
      object_filter_params: params[:otu_query],
      object_params: params[:otu],
      preview: params[:preview],
    )

    a = request.filter

    v = a.all.select(:taxon_name_id).distinct.limit(2).pluck(:taxon_name_id)

    cap = 0

    case v.size
    when 1
      if v.first.nil?
        cap = 10000
        request.cap_reason = 'Maximum allowed for empty records.'
      else
        cap = 2000
        request.cap_reason = 'Maximum allowed for 1 unique taxon name id.'
      end
    when 2
      if v.include?(nil)
        cap = 2000
        request.cap_reason = 'Maximum allowed for 1 unique taxon name id.'
      else
        cap = 25
        request.cap_reason = '> 1 taxon name id'
      end
    else
      cap = 25
      request.cap_reason = '> 1 taxon name id'
    end

    request.cap = cap

    query_batch_update(request)
  end

  # @param used_on [String] required, one of `AssertedDistribution`, `Content`, `BiologicalAssociation`, `TaxonDetermination`
  # @return [Array]
  #   ids of the max 10 most recently used otus, as `used_on`
  def self.used_recently(user_id, project_id, used_on = '')
    t = case used_on
        when 'AssertedDistribution'
          AssertedDistribution.arel_table
        when 'Content'
          ::Content.arel_table
        when 'BiologicalAssociation'
          BiologicalAssociation.arel_table
        when 'TaxonDetermination'
          TaxonDetermination.arel_table
        else
          return Otu.none
        end

    p = Otu.arel_table

    # i is a select manager
    i = case used_on
        when 'BiologicalAssociation'
          t.project(t['biological_association_object_id'], t['updated_at']).from(t)
            .where(
              t['updated_at'].gt(1.week.ago).and(
                t['biological_association_object_type'].eq('Otu')
              )
            )
              .where(t['updated_by_id'].eq(user_id))
              .where(t['project_id'].eq(project_id))
              .order(t['updated_at'].desc)
        else
          t.project(t['otu_id'], t['updated_at']).from(t)
            .where(t['updated_at'].gt( 1.week.ago ))
            .where(t['updated_by_id'].eq(user_id))
            .where(t['project_id'].eq(project_id))
            .order(t['updated_at'].desc)
        end

    z = i.as('recent_t')

    case used_on
    when 'BiologicalAssociation'
      Otu.joins(
        Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['biological_association_object_id'].eq(p['id'])))
      ).pluck(:id).uniq
    else
      Otu.joins(
        Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['otu_id'].eq(p['id'])))
      ).pluck(:id).uniq
    end
  end

  # @params target [String] required, one of nil, `AssertedDistribution`, `Content`, `BiologicalAssociation`, 'TaxonDetermination'
  # @return [Hash] otus optimized for user selection
  def self.select_optimized(user_id, project_id, target = nil)
    r = used_recently(user_id, project_id, target)

    q = Otu.where(project_id:).includes(:taxon_name) # faster than eager_load(), even with n+1

    h = {
      quick: [],
      pinboard: q.pinned_by(user_id).to_a,
      recent: []
    }

    if target && !r.empty?
      h[:recent] = (
        q.where(id: r.first(10) ).to_a +
        q.where(created_by_id: user_id, created_at: 3.hours.ago..Time.now).order('updated_at DESC').limit(3).to_a
      ).uniq.sort{|a,b| a.otu_name <=> b.otu_name}
      h[:quick] = (
        q.pinned_by(user_id).to_a +
        q.where(created_by_id: user_id, created_at: 3.hours.ago..Time.now).order('updated_at DESC').limit(1).to_a +
        q.where(id: r.first(4) ).to_a
      ).uniq.sort{|a,b| a.otu_name <=> b.otu_name}
    else
      h[:recent] = q.order(updated_at: :desc).limit(10).to_a.sort{|a,b| a.otu_name <=> b.otu_name}

      h[:quick] = q.pinned_by(user_id).to_a.sort{|a,b| a.otu_name <=> b.otu_name}
    end

    h
  end

  def current_collection_objects
    collection_objects.where(taxon_determinations: {position: 1})
  end

  # @return [Boolean]
  #   whether or not this otu is coordinate (see coordinate_otus) with this otu
  def coordinate_with?(otu_id)
    Otu.coordinate_otus(otu_id).where(otus: {id:}).any?
  end

  # TODO: Deprecate for helper method, HTML does not belong here
  #   this is also a costly sort because it n+1 to taxon_name
  def otu_name
    if name.present?
      name
    elsif !taxon_name_id.nil?
      taxon_name.cached_html_name_and_author_year
    else
      nil
    end
  end

  # TODO: move to helper method likely
  def distribution_geoJSON
    a_ds = Gis::GeoJSON.feature_collection(geographic_areas_from_asserted_distributions, :asserted_distributions)
    c_os = Gis::GeoJSON.feature_collection(collecting_events, :collecting_events_georeferences)
    c_es = Gis::GeoJSON.feature_collection(geographic_areas_from_collecting_events, :collecting_events_geographic_area)
    Gis::GeoJSON.aggregation([a_ds, c_os, c_es], :distribution)
  end

  # TODO: needs spec
  # A convenience method to wrap coordinate_otus and descendant_of_taxon_name
  # @return Scope
  def coordinate_otus_with_children
    if taxon_name_id.nil?
      Otu.coordinate_otus(id)
    else
      Otu.descendant_of_taxon_name(taxon_name.valid_taxon_name.id) # TODO: why not taxon_name.cached_valid_taxon_name_id
    end
  end

  # @return [Array]
  #   of ancestral otu_ids
  # !! This method does not fork, as soon as 2 ancestors are
  # !! hit the list terminates.
  def ancestor_otu_ids(prefer_unlabelled_otus: true)
    ids =  []
    a = parent_otu_id(prefer_unlabelled_otus: true)
    while a
      ids.push a
      b = Otu.find(a)
      a = b.parent_otu_id(prefer_unlabelled_otus: true)
    end
    ids
  end

  # @return [Array]
  #   all bilogical associations this Otu is part of
  def all_biological_associations
    # !! If self relationships are ever made possible this needs a DISTINCT clause
    BiologicalAssociation.find_by_sql(
      "SELECT biological_associations.*
         FROM biological_associations
         WHERE biological_associations.biological_association_subject_id = #{self.id}
           AND biological_associations.biological_association_subject_type = 'Otu'
       UNION
       SELECT biological_associations.*
         FROM biological_associations
         WHERE biological_associations.biological_association_object_id = #{self.id}
           AND biological_associations.biological_association_object_type = 'Otu' ")
  end

  # @return [Otu#id, nil, false]
  #  nil - there is no OTU parent with a valid taxon name possible
  #  id - the (unambiguous) id of the nearest parent OTU attached to a valid taxon name
  #
  #  Note this is used CoLDP export. Do not change without considerations there.
  def parent_otu_id(skip_ranks: [], prefer_unlabelled_otus: false)
    return nil if taxon_name_id.nil?

    # TODO: Unify to a single query

    candidates = TaxonName.joins(:otus, :descendant_hierarchies)
      .that_is_valid
      .where.not(id: taxon_name_id)
      .where(taxon_name_hierarchies: {descendant_id: taxon_name_id})
      .where.not(rank_class: skip_ranks)
      .order('taxon_name_hierarchies.generations')
      .limit(1)
      .pluck(:id)

    if candidates.size == 1
      otus = Otu.where(taxon_name_id: candidates.first).to_a
      otus.select! { |o| o.name.nil? } if prefer_unlabelled_otus && otus.size > 1

      if otus.size > 0
        return otus.first.id
      else
        return nil
      end
    else
      return nil
    end
  end

  # TODO: Re/move
  # temporary method to gent list of taxa from a geographic area and save it to csv file
  def taxa_by_geographic_area
    area = 'China'
    file_name1 = '/tmp/' + area + '_geographic_area_' + Time.now.to_i.to_s + '.csv'
    file_name2 = '/tmp/' + area + '_collection_object_' + Time.now.to_i.to_s + '.csv'
    c1 = GeographicArea.where(name: area).pluck(:id)
    c2 = GeographicArea.where('parent_id in (?)', c1).pluck(:id)
    c3 = GeographicArea.where('parent_id in (?)', c2).pluck(:id)
    c = c1 + c2 + c3
    ad = AssertedDistribution.where('geographic_area_id in (?)', c)

    CSV.open(file_name1, 'w') do |csv|
      csv << ['genus', 'species', 'geographic_area']
      ad.find_each do |z|
        tn = z.otu&.taxon_name&.valid_taxon_name
        unless tn.nil?
          ga, gn, sp = nil, nil, nil
          if z.geographic_area.name == area
            ga = area
          elsif z.geographic_area.parent.name == area
            ga = area + ', ' + z.geographic_area.name
          elsif z.geographic_area.parent.parent.name == area
            ga = area + ', ' + z.geographic_area.parent.name + ', ' + z.geographic_area.name
          end
          sp = tn.cached.to_s + ' ' + tn.cached_author_year.to_s
          tn1 = tn.ancestor_at_rank('genus')
          unless tn1.nil?
            gn = tn1&.cached.to_s + ' ' + tn1&.cached_author_year.to_s
            csv << [gn, sp, ga]
          end
        end
      end
    end

    co = CollectionObject.joins(:collecting_event).where('collecting_events.geographic_area_id in (?)', c)

    CSV.open(file_name2, 'w') do |csv|
      csv << ['genus', 'species', 'geographic_area', 'lat', 'long']
      co.find_each do |z|
        tn = z.taxon_determinations.last&.otu&.taxon_name&.valid_taxon_name
        unless tn.nil?
          ga, gn, sp, lat, long = nil, nil, nil, nil, nil
          ce = z.collecting_event.geographic_area
          if ce.name == area
            ga = area
          elsif ce.parent.name == area
            ga = area + ', ' + ce.name
          elsif ce.parent.parent.name == area
            ga = area + ', ' + ce.parent.name + ', ' + ce.name
          end
          lat_long = z.collecting_event&.georeferences&.last&.geographic_item&.to_a
          if !lat_long.nil? && lat_long.length == 2
            lat = lat_long[1]
            long = lat_long[0]
          end

          sp = tn.cached.to_s + ' ' + tn.cached_author_year.to_s
          tn1 = tn.ancestor_at_rank('genus')
          unless tn1.nil?
            gn = tn1&.cached.to_s + ' ' + tn1&.cached_author_year.to_s
            csv << [gn, sp, ga, lat, long]
          end
        end
      end
    end
  end

  def dwc_occurrences
    a = ::Queries::DwcOccurrence::Filter.new( asserted_distribution_query: {otu_id: id, project_id:},).all
    b = ::Queries::DwcOccurrence::Filter.new( collection_object_query: {otu_id: id, project_id:},).all
    # TODO FieldOccurrence in same pattern

    ::Queries.union(
      ::DwcOccurrence, [ a, b ]
    )
  end

  protected

  def check_required_fields
    if taxon_name_id.blank? && name.blank? && !(taxon_name && taxon_name.persisted?) # true, true, nil is not true
      errors.add(:taxon_name_id, 'and/or name should be selected')
      errors.add(:name, 'and/or taxon name should be selected')
    end
  end

  def sv_taxon_name
    soft_validations.add(:taxon_name_id, 'Nomenclature (taxon name) is not assigned') if taxon_name_id.nil?
  end

  def sv_duplicate_otu
    unless Otu.with_taxon_name_id(taxon_name_id).with_name(name).not_self(self).with_project_id(project_id).empty?
      m = 'Another OTU with an identical nomenclature (taxon name) and name exists in this project'
      soft_validations.add(:base, m )
    end
  end
end

#project_idInteger

the project ID

Returns:

  • (Integer)


24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
# File 'app/models/otu.rb', line 24

class Otu < ApplicationRecord
  include Housekeeping
  include SoftValidation
  # include Shared::AlternateValues # No alternate values on Name!! Consequences - search cumbersome, names not unified and controllable ... others?
  include Shared::Citations
  include Shared::DataAttributes
  include Shared::Identifiers
  include Shared::Notes
  include Shared::Tags
  include Shared::Depictions
  include Shared::Loanable
  include Shared::Confidences
  include Shared::Observations
  include Shared::BiologicalAssociations
  include Shared::HasPapertrail
  include Shared::OriginRelationship

  include Shared::AutoUuid
  include Shared::Taxonomy
  include Otu::DwcExtensions

  include Shared::MatrixHooks::Member
  include Otu::MatrixHooks
  include Otu::Maps

  include Shared::DwcOccurrenceHooks
  include Shared::IsData

  include Shared::QueryBatchUpdate

  is_origin_for 'Sequence', 'Extract'

  GRAPH_ENTRY_POINTS = [:asserted_distributions, :biological_associations, :common_names, :contents, :data_attributes, :observation_matrices].freeze

  belongs_to :taxon_name, inverse_of: :otus

  # Why?  Could be combination too.
  belongs_to :protonym, -> { where(type: 'Protonym') }, foreign_key: :taxon_name_id

  has_many :in_scope_observation_matrices, inverse_of: :otu, class_name: 'ObservationMatrix'

  has_many :asserted_distributions, inverse_of: :otu, dependent: :restrict_with_error

  has_many :taxon_determinations, inverse_of: :otu, dependent: :destroy # TODO: change

  # TODO, move to infer BiologicalCollectionObject
  has_many :collection_objects, through: :taxon_determinations, source: :taxon_determination_object, inverse_of: :otus, source_type: 'CollectionObject'
  has_many :field_occurrences, through: :taxon_determinations, source: :taxon_determination_object, inverse_of: :otus, source_type: 'FieldOccurrence'

  has_many :type_materials, through: :protonym

  # TODO: no longer true since they can come through Otu as well
  has_many :extracts, through: :collection_objects, source: :extracts
  has_many :sequences, through: :extracts, source: :derived_sequences

  has_many :collecting_events, -> { distinct }, through: :collection_objects
  has_many :common_names, dependent: :destroy, inverse_of: :otu
  has_many :collection_profiles, inverse_of: :otu, dependent: :restrict_with_error # Do not destroy old profiles

  has_many :contents, inverse_of: :otu, dependent: :destroy
  has_many :public_contents, inverse_of: :otu, dependent: :destroy

  has_many :geographic_areas_from_asserted_distributions, through: :asserted_distributions, source: :geographic_area
  has_many :geographic_areas_from_collecting_events, through: :collecting_events, source: :geographic_area
  has_many :georeferences, through: :collecting_events

  has_many :content_topics, through: :contents, source: :topic

  has_many :otu_relationships, foreign_key: :subject_otu_id, inverse_of: :subject_otu
  has_many :related_otu_relationships, class_name: 'OtuRelationship', foreign_key: :object_otu_id, inverse_of: :object_otu

  has_many :leads, inverse_of: :otu, dependent: :restrict_with_error

  scope :with_taxon_name_id, -> (taxon_name_id) { where(taxon_name_id:) }
  scope :with_name, -> (name) { where(name:) }

  validate :check_required_fields

  soft_validate(:sv_taxon_name, set: :taxon_name)
  soft_validate(:sv_duplicate_otu, set: :duplicate_otu)

  accepts_nested_attributes_for :common_names, allow_destroy: true

  # @return Scope
  def self.alphabetically
    includes(:taxon_name).select('otus.*, taxon_names.cached').references(:taxon_names).order('taxon_names.cached ASC')
  end

  # @param [Integer] otu_id
  # @param [String] rank_class
  # @return [Scope]
  #    Otu.joins(:taxon_name).where(taxon_name: q).to_sql
  def self.self_and_descendants_of(otu_id, rank_class = nil)
    if o = Otu.joins(:taxon_name).find(otu_id)
      if rank_class.nil?
        joins(:taxon_name).
          where('cached_valid_taxon_name_id IN (?)', o.taxon_name.self_and_descendants.pluck(:id)) #this also covers synonyms of self
      else
        joins(:taxon_name).
          where('cached_valid_taxon_name_id IN (?)', o.taxon_name.self_and_descendants.pluck(:id)).
          where( 'taxon_names.rank_class = ?', rank_class)
      end
    else # no taxon name just return self in scope
      Otu.where(id: otu_id)
    end
  end

  # @return [Otu::ActiveRecordRelation]
  #
  # All OTUs that are synonymous/same/matching target, for either
  #    historical and pragmatic (i.e. share the same `taxon_name_id`), or
  #    nomenclatural reasons (are synonyms of the taxon name). Includes self.
  #
  # TODO: Replace with Queries::Otu::Filter
  #
  def self.coordinate_otus(otu_id)
    begin
      i = Otu.joins(:taxon_name).find(otu_id)
      j = i.taxon_name.cached_valid_taxon_name_id
      o = Otu.arel_table
      t = TaxonName.arel_table

      q = o.join(t, Arel::Nodes::InnerJoin).on(
        o[:taxon_name_id].eq( t[:id] ).and(t[:cached_valid_taxon_name_id].eq(j))
      )

      Otu.joins(q.join_sources)
    rescue ActiveRecord::RecordNotFound
      Otu.where(id: otu_id)
    end
  end

  # TODO: REplace with Queries::Otu::Filter
  # TODO: This is coordinate_otus with children,
  #       it should probably be renamed coordinate.
  # @return [Otu::ActiveRecordRelation]
  #   all OTUs linked to the taxon_name_id, it descendants, and
  #   any synonym of any of the previous
  #   linked directly to the taxon name
  #   !! Invalid taxon_name_ids return nothing
  #   !! Taxon names with synonyms return the OTUs of their synonyms
  # @param taxon_name_id [The id of a valid TaxonName]
  def self.descendant_of_taxon_name(taxon_name_id = [])
    ids = [taxon_name_id].flatten.compact.uniq

    o = Otu.arel_table
    t = TaxonName.arel_table
    h = TaxonNameHierarchy.arel_table

    q = o.join(t, Arel::Nodes::InnerJoin).on(
      o[:taxon_name_id].eq( t[:id]))
      .join(h, Arel::Nodes::InnerJoin).on(
        t[:cached_valid_taxon_name_id].eq(h[:descendant_id]))

    Otu.joins(q.join_sources).where(h[:ancestor_id].in(ids).to_sql)
  end

  def self.coordinate_otu_ids(otu_ids = [])
    ids = []
    otu_ids.each do |id|
      ids += ::Otu.coordinate_otus(id).pluck(:id)
    end
    ids.uniq
  end

  # TODO: replace with filter
  # return [Scope] the Otus bound to that taxon name and its descendants
  def self.for_taxon_name(taxon_name)
    if taxon_name.kind_of?(String) || taxon_name.kind_of?(Integer)
      tn = TaxonName.find(taxon_name)
    else
      tn = taxon_name
    end
    Otu.joins(taxon_name: [:ancestor_hierarchies]).where(taxon_name_hierarchies: {ancestor_id: tn.id})
  end

  # TODO: This need to be renamed to reflect "simple" association
  def self.batch_preview(file: nil, ** args)
    # f     = CSV.read(file, headers: true, col_sep: "\t", skip_blanks: true, header_converters: :symbol)
    @otus = []
    File.open(file).each do |row|
      name = row.strip
      next if name.blank?
      @otus.push(Otu.new(name: row.strip))
    end
    @otus
  end

  def self.batch_create(otus: {}, ** args)
    new_otus = []
    begin
      Otu.transaction do
        otus.each_key do |k|
          o = Otu.new(otus[k])
          o.save!
          new_otus.push(o)
        end
      end
    rescue
      return false
    end
    new_otus
  end

  # Batch update

  # @params params [Hash]
  #   { otu_query: {},
  #     otu_filter_query: {},
  #     async_cutoff: 1
  #   }
  def self.batch_update(params)
    request = QueryBatchRequest.new(
      async_cutoff: params[:async_cutoff] || 26,
      klass: 'Otu',
      object_filter_params: params[:otu_query],
      object_params: params[:otu],
      preview: params[:preview],
    )

    a = request.filter

    v = a.all.select(:taxon_name_id).distinct.limit(2).pluck(:taxon_name_id)

    cap = 0

    case v.size
    when 1
      if v.first.nil?
        cap = 10000
        request.cap_reason = 'Maximum allowed for empty records.'
      else
        cap = 2000
        request.cap_reason = 'Maximum allowed for 1 unique taxon name id.'
      end
    when 2
      if v.include?(nil)
        cap = 2000
        request.cap_reason = 'Maximum allowed for 1 unique taxon name id.'
      else
        cap = 25
        request.cap_reason = '> 1 taxon name id'
      end
    else
      cap = 25
      request.cap_reason = '> 1 taxon name id'
    end

    request.cap = cap

    query_batch_update(request)
  end

  # @param used_on [String] required, one of `AssertedDistribution`, `Content`, `BiologicalAssociation`, `TaxonDetermination`
  # @return [Array]
  #   ids of the max 10 most recently used otus, as `used_on`
  def self.used_recently(user_id, project_id, used_on = '')
    t = case used_on
        when 'AssertedDistribution'
          AssertedDistribution.arel_table
        when 'Content'
          ::Content.arel_table
        when 'BiologicalAssociation'
          BiologicalAssociation.arel_table
        when 'TaxonDetermination'
          TaxonDetermination.arel_table
        else
          return Otu.none
        end

    p = Otu.arel_table

    # i is a select manager
    i = case used_on
        when 'BiologicalAssociation'
          t.project(t['biological_association_object_id'], t['updated_at']).from(t)
            .where(
              t['updated_at'].gt(1.week.ago).and(
                t['biological_association_object_type'].eq('Otu')
              )
            )
              .where(t['updated_by_id'].eq(user_id))
              .where(t['project_id'].eq(project_id))
              .order(t['updated_at'].desc)
        else
          t.project(t['otu_id'], t['updated_at']).from(t)
            .where(t['updated_at'].gt( 1.week.ago ))
            .where(t['updated_by_id'].eq(user_id))
            .where(t['project_id'].eq(project_id))
            .order(t['updated_at'].desc)
        end

    z = i.as('recent_t')

    case used_on
    when 'BiologicalAssociation'
      Otu.joins(
        Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['biological_association_object_id'].eq(p['id'])))
      ).pluck(:id).uniq
    else
      Otu.joins(
        Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['otu_id'].eq(p['id'])))
      ).pluck(:id).uniq
    end
  end

  # @params target [String] required, one of nil, `AssertedDistribution`, `Content`, `BiologicalAssociation`, 'TaxonDetermination'
  # @return [Hash] otus optimized for user selection
  def self.select_optimized(user_id, project_id, target = nil)
    r = used_recently(user_id, project_id, target)

    q = Otu.where(project_id:).includes(:taxon_name) # faster than eager_load(), even with n+1

    h = {
      quick: [],
      pinboard: q.pinned_by(user_id).to_a,
      recent: []
    }

    if target && !r.empty?
      h[:recent] = (
        q.where(id: r.first(10) ).to_a +
        q.where(created_by_id: user_id, created_at: 3.hours.ago..Time.now).order('updated_at DESC').limit(3).to_a
      ).uniq.sort{|a,b| a.otu_name <=> b.otu_name}
      h[:quick] = (
        q.pinned_by(user_id).to_a +
        q.where(created_by_id: user_id, created_at: 3.hours.ago..Time.now).order('updated_at DESC').limit(1).to_a +
        q.where(id: r.first(4) ).to_a
      ).uniq.sort{|a,b| a.otu_name <=> b.otu_name}
    else
      h[:recent] = q.order(updated_at: :desc).limit(10).to_a.sort{|a,b| a.otu_name <=> b.otu_name}

      h[:quick] = q.pinned_by(user_id).to_a.sort{|a,b| a.otu_name <=> b.otu_name}
    end

    h
  end

  def current_collection_objects
    collection_objects.where(taxon_determinations: {position: 1})
  end

  # @return [Boolean]
  #   whether or not this otu is coordinate (see coordinate_otus) with this otu
  def coordinate_with?(otu_id)
    Otu.coordinate_otus(otu_id).where(otus: {id:}).any?
  end

  # TODO: Deprecate for helper method, HTML does not belong here
  #   this is also a costly sort because it n+1 to taxon_name
  def otu_name
    if name.present?
      name
    elsif !taxon_name_id.nil?
      taxon_name.cached_html_name_and_author_year
    else
      nil
    end
  end

  # TODO: move to helper method likely
  def distribution_geoJSON
    a_ds = Gis::GeoJSON.feature_collection(geographic_areas_from_asserted_distributions, :asserted_distributions)
    c_os = Gis::GeoJSON.feature_collection(collecting_events, :collecting_events_georeferences)
    c_es = Gis::GeoJSON.feature_collection(geographic_areas_from_collecting_events, :collecting_events_geographic_area)
    Gis::GeoJSON.aggregation([a_ds, c_os, c_es], :distribution)
  end

  # TODO: needs spec
  # A convenience method to wrap coordinate_otus and descendant_of_taxon_name
  # @return Scope
  def coordinate_otus_with_children
    if taxon_name_id.nil?
      Otu.coordinate_otus(id)
    else
      Otu.descendant_of_taxon_name(taxon_name.valid_taxon_name.id) # TODO: why not taxon_name.cached_valid_taxon_name_id
    end
  end

  # @return [Array]
  #   of ancestral otu_ids
  # !! This method does not fork, as soon as 2 ancestors are
  # !! hit the list terminates.
  def ancestor_otu_ids(prefer_unlabelled_otus: true)
    ids =  []
    a = parent_otu_id(prefer_unlabelled_otus: true)
    while a
      ids.push a
      b = Otu.find(a)
      a = b.parent_otu_id(prefer_unlabelled_otus: true)
    end
    ids
  end

  # @return [Array]
  #   all bilogical associations this Otu is part of
  def all_biological_associations
    # !! If self relationships are ever made possible this needs a DISTINCT clause
    BiologicalAssociation.find_by_sql(
      "SELECT biological_associations.*
         FROM biological_associations
         WHERE biological_associations.biological_association_subject_id = #{self.id}
           AND biological_associations.biological_association_subject_type = 'Otu'
       UNION
       SELECT biological_associations.*
         FROM biological_associations
         WHERE biological_associations.biological_association_object_id = #{self.id}
           AND biological_associations.biological_association_object_type = 'Otu' ")
  end

  # @return [Otu#id, nil, false]
  #  nil - there is no OTU parent with a valid taxon name possible
  #  id - the (unambiguous) id of the nearest parent OTU attached to a valid taxon name
  #
  #  Note this is used CoLDP export. Do not change without considerations there.
  def parent_otu_id(skip_ranks: [], prefer_unlabelled_otus: false)
    return nil if taxon_name_id.nil?

    # TODO: Unify to a single query

    candidates = TaxonName.joins(:otus, :descendant_hierarchies)
      .that_is_valid
      .where.not(id: taxon_name_id)
      .where(taxon_name_hierarchies: {descendant_id: taxon_name_id})
      .where.not(rank_class: skip_ranks)
      .order('taxon_name_hierarchies.generations')
      .limit(1)
      .pluck(:id)

    if candidates.size == 1
      otus = Otu.where(taxon_name_id: candidates.first).to_a
      otus.select! { |o| o.name.nil? } if prefer_unlabelled_otus && otus.size > 1

      if otus.size > 0
        return otus.first.id
      else
        return nil
      end
    else
      return nil
    end
  end

  # TODO: Re/move
  # temporary method to gent list of taxa from a geographic area and save it to csv file
  def taxa_by_geographic_area
    area = 'China'
    file_name1 = '/tmp/' + area + '_geographic_area_' + Time.now.to_i.to_s + '.csv'
    file_name2 = '/tmp/' + area + '_collection_object_' + Time.now.to_i.to_s + '.csv'
    c1 = GeographicArea.where(name: area).pluck(:id)
    c2 = GeographicArea.where('parent_id in (?)', c1).pluck(:id)
    c3 = GeographicArea.where('parent_id in (?)', c2).pluck(:id)
    c = c1 + c2 + c3
    ad = AssertedDistribution.where('geographic_area_id in (?)', c)

    CSV.open(file_name1, 'w') do |csv|
      csv << ['genus', 'species', 'geographic_area']
      ad.find_each do |z|
        tn = z.otu&.taxon_name&.valid_taxon_name
        unless tn.nil?
          ga, gn, sp = nil, nil, nil
          if z.geographic_area.name == area
            ga = area
          elsif z.geographic_area.parent.name == area
            ga = area + ', ' + z.geographic_area.name
          elsif z.geographic_area.parent.parent.name == area
            ga = area + ', ' + z.geographic_area.parent.name + ', ' + z.geographic_area.name
          end
          sp = tn.cached.to_s + ' ' + tn.cached_author_year.to_s
          tn1 = tn.ancestor_at_rank('genus')
          unless tn1.nil?
            gn = tn1&.cached.to_s + ' ' + tn1&.cached_author_year.to_s
            csv << [gn, sp, ga]
          end
        end
      end
    end

    co = CollectionObject.joins(:collecting_event).where('collecting_events.geographic_area_id in (?)', c)

    CSV.open(file_name2, 'w') do |csv|
      csv << ['genus', 'species', 'geographic_area', 'lat', 'long']
      co.find_each do |z|
        tn = z.taxon_determinations.last&.otu&.taxon_name&.valid_taxon_name
        unless tn.nil?
          ga, gn, sp, lat, long = nil, nil, nil, nil, nil
          ce = z.collecting_event.geographic_area
          if ce.name == area
            ga = area
          elsif ce.parent.name == area
            ga = area + ', ' + ce.name
          elsif ce.parent.parent.name == area
            ga = area + ', ' + ce.parent.name + ', ' + ce.name
          end
          lat_long = z.collecting_event&.georeferences&.last&.geographic_item&.to_a
          if !lat_long.nil? && lat_long.length == 2
            lat = lat_long[1]
            long = lat_long[0]
          end

          sp = tn.cached.to_s + ' ' + tn.cached_author_year.to_s
          tn1 = tn.ancestor_at_rank('genus')
          unless tn1.nil?
            gn = tn1&.cached.to_s + ' ' + tn1&.cached_author_year.to_s
            csv << [gn, sp, ga, lat, long]
          end
        end
      end
    end
  end

  def dwc_occurrences
    a = ::Queries::DwcOccurrence::Filter.new( asserted_distribution_query: {otu_id: id, project_id:},).all
    b = ::Queries::DwcOccurrence::Filter.new( collection_object_query: {otu_id: id, project_id:},).all
    # TODO FieldOccurrence in same pattern

    ::Queries.union(
      ::DwcOccurrence, [ a, b ]
    )
  end

  protected

  def check_required_fields
    if taxon_name_id.blank? && name.blank? && !(taxon_name && taxon_name.persisted?) # true, true, nil is not true
      errors.add(:taxon_name_id, 'and/or name should be selected')
      errors.add(:name, 'and/or taxon name should be selected')
    end
  end

  def sv_taxon_name
    soft_validations.add(:taxon_name_id, 'Nomenclature (taxon name) is not assigned') if taxon_name_id.nil?
  end

  def sv_duplicate_otu
    unless Otu.with_taxon_name_id(taxon_name_id).with_name(name).not_self(self).with_project_id(project_id).empty?
      m = 'Another OTU with an identical nomenclature (taxon name) and name exists in this project'
      soft_validations.add(:base, m )
    end
  end
end

#taxon_name_idInteger

The id of the nomenclatural name for this OTU. The presence of a nomenclatural name carries no biological meaning, it is simply a means to organize concepts within a nomenclatural system.

Returns:

  • (Integer)


24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
# File 'app/models/otu.rb', line 24

class Otu < ApplicationRecord
  include Housekeeping
  include SoftValidation
  # include Shared::AlternateValues # No alternate values on Name!! Consequences - search cumbersome, names not unified and controllable ... others?
  include Shared::Citations
  include Shared::DataAttributes
  include Shared::Identifiers
  include Shared::Notes
  include Shared::Tags
  include Shared::Depictions
  include Shared::Loanable
  include Shared::Confidences
  include Shared::Observations
  include Shared::BiologicalAssociations
  include Shared::HasPapertrail
  include Shared::OriginRelationship

  include Shared::AutoUuid
  include Shared::Taxonomy
  include Otu::DwcExtensions

  include Shared::MatrixHooks::Member
  include Otu::MatrixHooks
  include Otu::Maps

  include Shared::DwcOccurrenceHooks
  include Shared::IsData

  include Shared::QueryBatchUpdate

  is_origin_for 'Sequence', 'Extract'

  GRAPH_ENTRY_POINTS = [:asserted_distributions, :biological_associations, :common_names, :contents, :data_attributes, :observation_matrices].freeze

  belongs_to :taxon_name, inverse_of: :otus

  # Why?  Could be combination too.
  belongs_to :protonym, -> { where(type: 'Protonym') }, foreign_key: :taxon_name_id

  has_many :in_scope_observation_matrices, inverse_of: :otu, class_name: 'ObservationMatrix'

  has_many :asserted_distributions, inverse_of: :otu, dependent: :restrict_with_error

  has_many :taxon_determinations, inverse_of: :otu, dependent: :destroy # TODO: change

  # TODO, move to infer BiologicalCollectionObject
  has_many :collection_objects, through: :taxon_determinations, source: :taxon_determination_object, inverse_of: :otus, source_type: 'CollectionObject'
  has_many :field_occurrences, through: :taxon_determinations, source: :taxon_determination_object, inverse_of: :otus, source_type: 'FieldOccurrence'

  has_many :type_materials, through: :protonym

  # TODO: no longer true since they can come through Otu as well
  has_many :extracts, through: :collection_objects, source: :extracts
  has_many :sequences, through: :extracts, source: :derived_sequences

  has_many :collecting_events, -> { distinct }, through: :collection_objects
  has_many :common_names, dependent: :destroy, inverse_of: :otu
  has_many :collection_profiles, inverse_of: :otu, dependent: :restrict_with_error # Do not destroy old profiles

  has_many :contents, inverse_of: :otu, dependent: :destroy
  has_many :public_contents, inverse_of: :otu, dependent: :destroy

  has_many :geographic_areas_from_asserted_distributions, through: :asserted_distributions, source: :geographic_area
  has_many :geographic_areas_from_collecting_events, through: :collecting_events, source: :geographic_area
  has_many :georeferences, through: :collecting_events

  has_many :content_topics, through: :contents, source: :topic

  has_many :otu_relationships, foreign_key: :subject_otu_id, inverse_of: :subject_otu
  has_many :related_otu_relationships, class_name: 'OtuRelationship', foreign_key: :object_otu_id, inverse_of: :object_otu

  has_many :leads, inverse_of: :otu, dependent: :restrict_with_error

  scope :with_taxon_name_id, -> (taxon_name_id) { where(taxon_name_id:) }
  scope :with_name, -> (name) { where(name:) }

  validate :check_required_fields

  soft_validate(:sv_taxon_name, set: :taxon_name)
  soft_validate(:sv_duplicate_otu, set: :duplicate_otu)

  accepts_nested_attributes_for :common_names, allow_destroy: true

  # @return Scope
  def self.alphabetically
    includes(:taxon_name).select('otus.*, taxon_names.cached').references(:taxon_names).order('taxon_names.cached ASC')
  end

  # @param [Integer] otu_id
  # @param [String] rank_class
  # @return [Scope]
  #    Otu.joins(:taxon_name).where(taxon_name: q).to_sql
  def self.self_and_descendants_of(otu_id, rank_class = nil)
    if o = Otu.joins(:taxon_name).find(otu_id)
      if rank_class.nil?
        joins(:taxon_name).
          where('cached_valid_taxon_name_id IN (?)', o.taxon_name.self_and_descendants.pluck(:id)) #this also covers synonyms of self
      else
        joins(:taxon_name).
          where('cached_valid_taxon_name_id IN (?)', o.taxon_name.self_and_descendants.pluck(:id)).
          where( 'taxon_names.rank_class = ?', rank_class)
      end
    else # no taxon name just return self in scope
      Otu.where(id: otu_id)
    end
  end

  # @return [Otu::ActiveRecordRelation]
  #
  # All OTUs that are synonymous/same/matching target, for either
  #    historical and pragmatic (i.e. share the same `taxon_name_id`), or
  #    nomenclatural reasons (are synonyms of the taxon name). Includes self.
  #
  # TODO: Replace with Queries::Otu::Filter
  #
  def self.coordinate_otus(otu_id)
    begin
      i = Otu.joins(:taxon_name).find(otu_id)
      j = i.taxon_name.cached_valid_taxon_name_id
      o = Otu.arel_table
      t = TaxonName.arel_table

      q = o.join(t, Arel::Nodes::InnerJoin).on(
        o[:taxon_name_id].eq( t[:id] ).and(t[:cached_valid_taxon_name_id].eq(j))
      )

      Otu.joins(q.join_sources)
    rescue ActiveRecord::RecordNotFound
      Otu.where(id: otu_id)
    end
  end

  # TODO: REplace with Queries::Otu::Filter
  # TODO: This is coordinate_otus with children,
  #       it should probably be renamed coordinate.
  # @return [Otu::ActiveRecordRelation]
  #   all OTUs linked to the taxon_name_id, it descendants, and
  #   any synonym of any of the previous
  #   linked directly to the taxon name
  #   !! Invalid taxon_name_ids return nothing
  #   !! Taxon names with synonyms return the OTUs of their synonyms
  # @param taxon_name_id [The id of a valid TaxonName]
  def self.descendant_of_taxon_name(taxon_name_id = [])
    ids = [taxon_name_id].flatten.compact.uniq

    o = Otu.arel_table
    t = TaxonName.arel_table
    h = TaxonNameHierarchy.arel_table

    q = o.join(t, Arel::Nodes::InnerJoin).on(
      o[:taxon_name_id].eq( t[:id]))
      .join(h, Arel::Nodes::InnerJoin).on(
        t[:cached_valid_taxon_name_id].eq(h[:descendant_id]))

    Otu.joins(q.join_sources).where(h[:ancestor_id].in(ids).to_sql)
  end

  def self.coordinate_otu_ids(otu_ids = [])
    ids = []
    otu_ids.each do |id|
      ids += ::Otu.coordinate_otus(id).pluck(:id)
    end
    ids.uniq
  end

  # TODO: replace with filter
  # return [Scope] the Otus bound to that taxon name and its descendants
  def self.for_taxon_name(taxon_name)
    if taxon_name.kind_of?(String) || taxon_name.kind_of?(Integer)
      tn = TaxonName.find(taxon_name)
    else
      tn = taxon_name
    end
    Otu.joins(taxon_name: [:ancestor_hierarchies]).where(taxon_name_hierarchies: {ancestor_id: tn.id})
  end

  # TODO: This need to be renamed to reflect "simple" association
  def self.batch_preview(file: nil, ** args)
    # f     = CSV.read(file, headers: true, col_sep: "\t", skip_blanks: true, header_converters: :symbol)
    @otus = []
    File.open(file).each do |row|
      name = row.strip
      next if name.blank?
      @otus.push(Otu.new(name: row.strip))
    end
    @otus
  end

  def self.batch_create(otus: {}, ** args)
    new_otus = []
    begin
      Otu.transaction do
        otus.each_key do |k|
          o = Otu.new(otus[k])
          o.save!
          new_otus.push(o)
        end
      end
    rescue
      return false
    end
    new_otus
  end

  # Batch update

  # @params params [Hash]
  #   { otu_query: {},
  #     otu_filter_query: {},
  #     async_cutoff: 1
  #   }
  def self.batch_update(params)
    request = QueryBatchRequest.new(
      async_cutoff: params[:async_cutoff] || 26,
      klass: 'Otu',
      object_filter_params: params[:otu_query],
      object_params: params[:otu],
      preview: params[:preview],
    )

    a = request.filter

    v = a.all.select(:taxon_name_id).distinct.limit(2).pluck(:taxon_name_id)

    cap = 0

    case v.size
    when 1
      if v.first.nil?
        cap = 10000
        request.cap_reason = 'Maximum allowed for empty records.'
      else
        cap = 2000
        request.cap_reason = 'Maximum allowed for 1 unique taxon name id.'
      end
    when 2
      if v.include?(nil)
        cap = 2000
        request.cap_reason = 'Maximum allowed for 1 unique taxon name id.'
      else
        cap = 25
        request.cap_reason = '> 1 taxon name id'
      end
    else
      cap = 25
      request.cap_reason = '> 1 taxon name id'
    end

    request.cap = cap

    query_batch_update(request)
  end

  # @param used_on [String] required, one of `AssertedDistribution`, `Content`, `BiologicalAssociation`, `TaxonDetermination`
  # @return [Array]
  #   ids of the max 10 most recently used otus, as `used_on`
  def self.used_recently(user_id, project_id, used_on = '')
    t = case used_on
        when 'AssertedDistribution'
          AssertedDistribution.arel_table
        when 'Content'
          ::Content.arel_table
        when 'BiologicalAssociation'
          BiologicalAssociation.arel_table
        when 'TaxonDetermination'
          TaxonDetermination.arel_table
        else
          return Otu.none
        end

    p = Otu.arel_table

    # i is a select manager
    i = case used_on
        when 'BiologicalAssociation'
          t.project(t['biological_association_object_id'], t['updated_at']).from(t)
            .where(
              t['updated_at'].gt(1.week.ago).and(
                t['biological_association_object_type'].eq('Otu')
              )
            )
              .where(t['updated_by_id'].eq(user_id))
              .where(t['project_id'].eq(project_id))
              .order(t['updated_at'].desc)
        else
          t.project(t['otu_id'], t['updated_at']).from(t)
            .where(t['updated_at'].gt( 1.week.ago ))
            .where(t['updated_by_id'].eq(user_id))
            .where(t['project_id'].eq(project_id))
            .order(t['updated_at'].desc)
        end

    z = i.as('recent_t')

    case used_on
    when 'BiologicalAssociation'
      Otu.joins(
        Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['biological_association_object_id'].eq(p['id'])))
      ).pluck(:id).uniq
    else
      Otu.joins(
        Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['otu_id'].eq(p['id'])))
      ).pluck(:id).uniq
    end
  end

  # @params target [String] required, one of nil, `AssertedDistribution`, `Content`, `BiologicalAssociation`, 'TaxonDetermination'
  # @return [Hash] otus optimized for user selection
  def self.select_optimized(user_id, project_id, target = nil)
    r = used_recently(user_id, project_id, target)

    q = Otu.where(project_id:).includes(:taxon_name) # faster than eager_load(), even with n+1

    h = {
      quick: [],
      pinboard: q.pinned_by(user_id).to_a,
      recent: []
    }

    if target && !r.empty?
      h[:recent] = (
        q.where(id: r.first(10) ).to_a +
        q.where(created_by_id: user_id, created_at: 3.hours.ago..Time.now).order('updated_at DESC').limit(3).to_a
      ).uniq.sort{|a,b| a.otu_name <=> b.otu_name}
      h[:quick] = (
        q.pinned_by(user_id).to_a +
        q.where(created_by_id: user_id, created_at: 3.hours.ago..Time.now).order('updated_at DESC').limit(1).to_a +
        q.where(id: r.first(4) ).to_a
      ).uniq.sort{|a,b| a.otu_name <=> b.otu_name}
    else
      h[:recent] = q.order(updated_at: :desc).limit(10).to_a.sort{|a,b| a.otu_name <=> b.otu_name}

      h[:quick] = q.pinned_by(user_id).to_a.sort{|a,b| a.otu_name <=> b.otu_name}
    end

    h
  end

  def current_collection_objects
    collection_objects.where(taxon_determinations: {position: 1})
  end

  # @return [Boolean]
  #   whether or not this otu is coordinate (see coordinate_otus) with this otu
  def coordinate_with?(otu_id)
    Otu.coordinate_otus(otu_id).where(otus: {id:}).any?
  end

  # TODO: Deprecate for helper method, HTML does not belong here
  #   this is also a costly sort because it n+1 to taxon_name
  def otu_name
    if name.present?
      name
    elsif !taxon_name_id.nil?
      taxon_name.cached_html_name_and_author_year
    else
      nil
    end
  end

  # TODO: move to helper method likely
  def distribution_geoJSON
    a_ds = Gis::GeoJSON.feature_collection(geographic_areas_from_asserted_distributions, :asserted_distributions)
    c_os = Gis::GeoJSON.feature_collection(collecting_events, :collecting_events_georeferences)
    c_es = Gis::GeoJSON.feature_collection(geographic_areas_from_collecting_events, :collecting_events_geographic_area)
    Gis::GeoJSON.aggregation([a_ds, c_os, c_es], :distribution)
  end

  # TODO: needs spec
  # A convenience method to wrap coordinate_otus and descendant_of_taxon_name
  # @return Scope
  def coordinate_otus_with_children
    if taxon_name_id.nil?
      Otu.coordinate_otus(id)
    else
      Otu.descendant_of_taxon_name(taxon_name.valid_taxon_name.id) # TODO: why not taxon_name.cached_valid_taxon_name_id
    end
  end

  # @return [Array]
  #   of ancestral otu_ids
  # !! This method does not fork, as soon as 2 ancestors are
  # !! hit the list terminates.
  def ancestor_otu_ids(prefer_unlabelled_otus: true)
    ids =  []
    a = parent_otu_id(prefer_unlabelled_otus: true)
    while a
      ids.push a
      b = Otu.find(a)
      a = b.parent_otu_id(prefer_unlabelled_otus: true)
    end
    ids
  end

  # @return [Array]
  #   all bilogical associations this Otu is part of
  def all_biological_associations
    # !! If self relationships are ever made possible this needs a DISTINCT clause
    BiologicalAssociation.find_by_sql(
      "SELECT biological_associations.*
         FROM biological_associations
         WHERE biological_associations.biological_association_subject_id = #{self.id}
           AND biological_associations.biological_association_subject_type = 'Otu'
       UNION
       SELECT biological_associations.*
         FROM biological_associations
         WHERE biological_associations.biological_association_object_id = #{self.id}
           AND biological_associations.biological_association_object_type = 'Otu' ")
  end

  # @return [Otu#id, nil, false]
  #  nil - there is no OTU parent with a valid taxon name possible
  #  id - the (unambiguous) id of the nearest parent OTU attached to a valid taxon name
  #
  #  Note this is used CoLDP export. Do not change without considerations there.
  def parent_otu_id(skip_ranks: [], prefer_unlabelled_otus: false)
    return nil if taxon_name_id.nil?

    # TODO: Unify to a single query

    candidates = TaxonName.joins(:otus, :descendant_hierarchies)
      .that_is_valid
      .where.not(id: taxon_name_id)
      .where(taxon_name_hierarchies: {descendant_id: taxon_name_id})
      .where.not(rank_class: skip_ranks)
      .order('taxon_name_hierarchies.generations')
      .limit(1)
      .pluck(:id)

    if candidates.size == 1
      otus = Otu.where(taxon_name_id: candidates.first).to_a
      otus.select! { |o| o.name.nil? } if prefer_unlabelled_otus && otus.size > 1

      if otus.size > 0
        return otus.first.id
      else
        return nil
      end
    else
      return nil
    end
  end

  # TODO: Re/move
  # temporary method to gent list of taxa from a geographic area and save it to csv file
  def taxa_by_geographic_area
    area = 'China'
    file_name1 = '/tmp/' + area + '_geographic_area_' + Time.now.to_i.to_s + '.csv'
    file_name2 = '/tmp/' + area + '_collection_object_' + Time.now.to_i.to_s + '.csv'
    c1 = GeographicArea.where(name: area).pluck(:id)
    c2 = GeographicArea.where('parent_id in (?)', c1).pluck(:id)
    c3 = GeographicArea.where('parent_id in (?)', c2).pluck(:id)
    c = c1 + c2 + c3
    ad = AssertedDistribution.where('geographic_area_id in (?)', c)

    CSV.open(file_name1, 'w') do |csv|
      csv << ['genus', 'species', 'geographic_area']
      ad.find_each do |z|
        tn = z.otu&.taxon_name&.valid_taxon_name
        unless tn.nil?
          ga, gn, sp = nil, nil, nil
          if z.geographic_area.name == area
            ga = area
          elsif z.geographic_area.parent.name == area
            ga = area + ', ' + z.geographic_area.name
          elsif z.geographic_area.parent.parent.name == area
            ga = area + ', ' + z.geographic_area.parent.name + ', ' + z.geographic_area.name
          end
          sp = tn.cached.to_s + ' ' + tn.cached_author_year.to_s
          tn1 = tn.ancestor_at_rank('genus')
          unless tn1.nil?
            gn = tn1&.cached.to_s + ' ' + tn1&.cached_author_year.to_s
            csv << [gn, sp, ga]
          end
        end
      end
    end

    co = CollectionObject.joins(:collecting_event).where('collecting_events.geographic_area_id in (?)', c)

    CSV.open(file_name2, 'w') do |csv|
      csv << ['genus', 'species', 'geographic_area', 'lat', 'long']
      co.find_each do |z|
        tn = z.taxon_determinations.last&.otu&.taxon_name&.valid_taxon_name
        unless tn.nil?
          ga, gn, sp, lat, long = nil, nil, nil, nil, nil
          ce = z.collecting_event.geographic_area
          if ce.name == area
            ga = area
          elsif ce.parent.name == area
            ga = area + ', ' + ce.name
          elsif ce.parent.parent.name == area
            ga = area + ', ' + ce.parent.name + ', ' + ce.name
          end
          lat_long = z.collecting_event&.georeferences&.last&.geographic_item&.to_a
          if !lat_long.nil? && lat_long.length == 2
            lat = lat_long[1]
            long = lat_long[0]
          end

          sp = tn.cached.to_s + ' ' + tn.cached_author_year.to_s
          tn1 = tn.ancestor_at_rank('genus')
          unless tn1.nil?
            gn = tn1&.cached.to_s + ' ' + tn1&.cached_author_year.to_s
            csv << [gn, sp, ga, lat, long]
          end
        end
      end
    end
  end

  def dwc_occurrences
    a = ::Queries::DwcOccurrence::Filter.new( asserted_distribution_query: {otu_id: id, project_id:},).all
    b = ::Queries::DwcOccurrence::Filter.new( collection_object_query: {otu_id: id, project_id:},).all
    # TODO FieldOccurrence in same pattern

    ::Queries.union(
      ::DwcOccurrence, [ a, b ]
    )
  end

  protected

  def check_required_fields
    if taxon_name_id.blank? && name.blank? && !(taxon_name && taxon_name.persisted?) # true, true, nil is not true
      errors.add(:taxon_name_id, 'and/or name should be selected')
      errors.add(:name, 'and/or taxon name should be selected')
    end
  end

  def sv_taxon_name
    soft_validations.add(:taxon_name_id, 'Nomenclature (taxon name) is not assigned') if taxon_name_id.nil?
  end

  def sv_duplicate_otu
    unless Otu.with_taxon_name_id(taxon_name_id).with_name(name).not_self(self).with_project_id(project_id).empty?
      m = 'Another OTU with an identical nomenclature (taxon name) and name exists in this project'
      soft_validations.add(:base, m )
    end
  end
end

Class Method Details

.alphabeticallyObject

Returns Scope.

Returns:

  • Scope



108
109
110
# File 'app/models/otu.rb', line 108

def self.alphabetically
  includes(:taxon_name).select('otus.*, taxon_names.cached').references(:taxon_names).order('taxon_names.cached ASC')
end

.batch_create(otus: {}, **args) ⇒ Object



212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# File 'app/models/otu.rb', line 212

def self.batch_create(otus: {}, ** args)
  new_otus = []
  begin
    Otu.transaction do
      otus.each_key do |k|
        o = Otu.new(otus[k])
        o.save!
        new_otus.push(o)
      end
    end
  rescue
    return false
  end
  new_otus
end

.batch_preview(file: nil, **args) ⇒ Object

TODO: This need to be renamed to reflect “simple” association



201
202
203
204
205
206
207
208
209
210
# File 'app/models/otu.rb', line 201

def self.batch_preview(file: nil, ** args)
  # f     = CSV.read(file, headers: true, col_sep: "\t", skip_blanks: true, header_converters: :symbol)
  @otus = []
  File.open(file).each do |row|
    name = row.strip
    next if name.blank?
    @otus.push(Otu.new(name: row.strip))
  end
  @otus
end

.batch_update(params) ⇒ Object



235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
# File 'app/models/otu.rb', line 235

def self.batch_update(params)
  request = QueryBatchRequest.new(
    async_cutoff: params[:async_cutoff] || 26,
    klass: 'Otu',
    object_filter_params: params[:otu_query],
    object_params: params[:otu],
    preview: params[:preview],
  )

  a = request.filter

  v = a.all.select(:taxon_name_id).distinct.limit(2).pluck(:taxon_name_id)

  cap = 0

  case v.size
  when 1
    if v.first.nil?
      cap = 10000
      request.cap_reason = 'Maximum allowed for empty records.'
    else
      cap = 2000
      request.cap_reason = 'Maximum allowed for 1 unique taxon name id.'
    end
  when 2
    if v.include?(nil)
      cap = 2000
      request.cap_reason = 'Maximum allowed for 1 unique taxon name id.'
    else
      cap = 25
      request.cap_reason = '> 1 taxon name id'
    end
  else
    cap = 25
    request.cap_reason = '> 1 taxon name id'
  end

  request.cap = cap

  query_batch_update(request)
end

.coordinate_otu_ids(otu_ids = []) ⇒ Object



181
182
183
184
185
186
187
# File 'app/models/otu.rb', line 181

def self.coordinate_otu_ids(otu_ids = [])
  ids = []
  otu_ids.each do |id|
    ids += ::Otu.coordinate_otus(id).pluck(:id)
  end
  ids.uniq
end

.coordinate_otus(otu_id) ⇒ Otu::ActiveRecordRelation

All OTUs that are synonymous/same/matching target, for either

historical and pragmatic (i.e. share the same `taxon_name_id`), or
nomenclatural reasons (are synonyms of the taxon name). Includes self.

TODO: Replace with Queries::Otu::Filter

Returns:

  • (Otu::ActiveRecordRelation)


139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'app/models/otu.rb', line 139

def self.coordinate_otus(otu_id)
  begin
    i = Otu.joins(:taxon_name).find(otu_id)
    j = i.taxon_name.cached_valid_taxon_name_id
    o = Otu.arel_table
    t = TaxonName.arel_table

    q = o.join(t, Arel::Nodes::InnerJoin).on(
      o[:taxon_name_id].eq( t[:id] ).and(t[:cached_valid_taxon_name_id].eq(j))
    )

    Otu.joins(q.join_sources)
  rescue ActiveRecord::RecordNotFound
    Otu.where(id: otu_id)
  end
end

.descendant_of_taxon_name(taxon_name_id = []) ⇒ Otu::ActiveRecordRelation

TODO: REplace with Queries::Otu::Filter TODO: This is coordinate_otus with children,

it should probably be renamed coordinate.

Parameters:

  • taxon_name_id (The id of a valid TaxonName) (defaults to: [])

Returns:

  • (Otu::ActiveRecordRelation)

    all OTUs linked to the taxon_name_id, it descendants, and any synonym of any of the previous linked directly to the taxon name !! Invalid taxon_name_ids return nothing !! Taxon names with synonyms return the OTUs of their synonyms



166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'app/models/otu.rb', line 166

def self.descendant_of_taxon_name(taxon_name_id = [])
  ids = [taxon_name_id].flatten.compact.uniq

  o = Otu.arel_table
  t = TaxonName.arel_table
  h = TaxonNameHierarchy.arel_table

  q = o.join(t, Arel::Nodes::InnerJoin).on(
    o[:taxon_name_id].eq( t[:id]))
    .join(h, Arel::Nodes::InnerJoin).on(
      t[:cached_valid_taxon_name_id].eq(h[:descendant_id]))

  Otu.joins(q.join_sources).where(h[:ancestor_id].in(ids).to_sql)
end

.for_taxon_name(taxon_name) ⇒ Object

TODO: replace with filter return [Scope] the Otus bound to that taxon name and its descendants



191
192
193
194
195
196
197
198
# File 'app/models/otu.rb', line 191

def self.for_taxon_name(taxon_name)
  if taxon_name.kind_of?(String) || taxon_name.kind_of?(Integer)
    tn = TaxonName.find(taxon_name)
  else
    tn = taxon_name
  end
  Otu.joins(taxon_name: [:ancestor_hierarchies]).where(taxon_name_hierarchies: {ancestor_id: tn.id})
end

.select_optimized(user_id, project_id, target = nil) ⇒ Hash

Returns otus optimized for user selection.

Returns:

  • (Hash)

    otus optimized for user selection



332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
# File 'app/models/otu.rb', line 332

def self.select_optimized(user_id, project_id, target = nil)
  r = used_recently(user_id, project_id, target)

  q = Otu.where(project_id:).includes(:taxon_name) # faster than eager_load(), even with n+1

  h = {
    quick: [],
    pinboard: q.pinned_by(user_id).to_a,
    recent: []
  }

  if target && !r.empty?
    h[:recent] = (
      q.where(id: r.first(10) ).to_a +
      q.where(created_by_id: user_id, created_at: 3.hours.ago..Time.now).order('updated_at DESC').limit(3).to_a
    ).uniq.sort{|a,b| a.otu_name <=> b.otu_name}
    h[:quick] = (
      q.pinned_by(user_id).to_a +
      q.where(created_by_id: user_id, created_at: 3.hours.ago..Time.now).order('updated_at DESC').limit(1).to_a +
      q.where(id: r.first(4) ).to_a
    ).uniq.sort{|a,b| a.otu_name <=> b.otu_name}
  else
    h[:recent] = q.order(updated_at: :desc).limit(10).to_a.sort{|a,b| a.otu_name <=> b.otu_name}

    h[:quick] = q.pinned_by(user_id).to_a.sort{|a,b| a.otu_name <=> b.otu_name}
  end

  h
end

.self_and_descendants_of(otu_id, rank_class = nil) ⇒ Scope

Returns Otu.joins(:taxon_name).where(taxon_name: q).to_sql.

Parameters:

  • otu_id (Integer)
  • rank_class (String) (defaults to: nil)

Returns:

  • (Scope)

    Otu.joins(:taxon_name).where(taxon_name: q).to_sql



116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'app/models/otu.rb', line 116

def self.self_and_descendants_of(otu_id, rank_class = nil)
  if o = Otu.joins(:taxon_name).find(otu_id)
    if rank_class.nil?
      joins(:taxon_name).
        where('cached_valid_taxon_name_id IN (?)', o.taxon_name.self_and_descendants.pluck(:id)) #this also covers synonyms of self
    else
      joins(:taxon_name).
        where('cached_valid_taxon_name_id IN (?)', o.taxon_name.self_and_descendants.pluck(:id)).
        where( 'taxon_names.rank_class = ?', rank_class)
    end
  else # no taxon name just return self in scope
    Otu.where(id: otu_id)
  end
end

.used_recently(user_id, project_id, used_on = '') ⇒ Array

Returns ids of the max 10 most recently used otus, as ‘used_on`.

Parameters:

  • used_on (String) (defaults to: '')

    required, one of ‘AssertedDistribution`, `Content`, `BiologicalAssociation`, `TaxonDetermination`

Returns:

  • (Array)

    ids of the max 10 most recently used otus, as ‘used_on`



280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
# File 'app/models/otu.rb', line 280

def self.used_recently(user_id, project_id, used_on = '')
  t = case used_on
      when 'AssertedDistribution'
        AssertedDistribution.arel_table
      when 'Content'
        ::Content.arel_table
      when 'BiologicalAssociation'
        BiologicalAssociation.arel_table
      when 'TaxonDetermination'
        TaxonDetermination.arel_table
      else
        return Otu.none
      end

  p = Otu.arel_table

  # i is a select manager
  i = case used_on
      when 'BiologicalAssociation'
        t.project(t['biological_association_object_id'], t['updated_at']).from(t)
          .where(
            t['updated_at'].gt(1.week.ago).and(
              t['biological_association_object_type'].eq('Otu')
            )
          )
            .where(t['updated_by_id'].eq(user_id))
            .where(t['project_id'].eq(project_id))
            .order(t['updated_at'].desc)
      else
        t.project(t['otu_id'], t['updated_at']).from(t)
          .where(t['updated_at'].gt( 1.week.ago ))
          .where(t['updated_by_id'].eq(user_id))
          .where(t['project_id'].eq(project_id))
          .order(t['updated_at'].desc)
      end

  z = i.as('recent_t')

  case used_on
  when 'BiologicalAssociation'
    Otu.joins(
      Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['biological_association_object_id'].eq(p['id'])))
    ).pluck(:id).uniq
  else
    Otu.joins(
      Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['otu_id'].eq(p['id'])))
    ).pluck(:id).uniq
  end
end

Instance Method Details

#all_biological_associationsArray

Returns all bilogical associations this Otu is part of.

Returns:

  • (Array)

    all bilogical associations this Otu is part of



420
421
422
423
424
425
426
427
428
429
430
431
432
# File 'app/models/otu.rb', line 420

def all_biological_associations
  # !! If self relationships are ever made possible this needs a DISTINCT clause
  BiologicalAssociation.find_by_sql(
    "SELECT biological_associations.*
       FROM biological_associations
       WHERE biological_associations.biological_association_subject_id = #{self.id}
         AND biological_associations.biological_association_subject_type = 'Otu'
     UNION
     SELECT biological_associations.*
       FROM biological_associations
       WHERE biological_associations.biological_association_object_id = #{self.id}
         AND biological_associations.biological_association_object_type = 'Otu' ")
end

#ancestor_otu_ids(prefer_unlabelled_otus: true) ⇒ Array

!! This method does not fork, as soon as 2 ancestors are !! hit the list terminates.

Returns:

  • (Array)

    of ancestral otu_ids



407
408
409
410
411
412
413
414
415
416
# File 'app/models/otu.rb', line 407

def ancestor_otu_ids(prefer_unlabelled_otus: true)
  ids =  []
  a = parent_otu_id(prefer_unlabelled_otus: true)
  while a
    ids.push a
    b = Otu.find(a)
    a = b.parent_otu_id(prefer_unlabelled_otus: true)
  end
  ids
end

#check_required_fieldsObject (protected)



547
548
549
550
551
552
# File 'app/models/otu.rb', line 547

def check_required_fields
  if taxon_name_id.blank? && name.blank? && !(taxon_name && taxon_name.persisted?) # true, true, nil is not true
    errors.add(:taxon_name_id, 'and/or name should be selected')
    errors.add(:name, 'and/or taxon name should be selected')
  end
end

#coordinate_otus_with_childrenObject

TODO: needs spec A convenience method to wrap coordinate_otus and descendant_of_taxon_name

Returns:

  • Scope



395
396
397
398
399
400
401
# File 'app/models/otu.rb', line 395

def coordinate_otus_with_children
  if taxon_name_id.nil?
    Otu.coordinate_otus(id)
  else
    Otu.descendant_of_taxon_name(taxon_name.valid_taxon_name.id) # TODO: why not taxon_name.cached_valid_taxon_name_id
  end
end

#coordinate_with?(otu_id) ⇒ Boolean

Returns whether or not this otu is coordinate (see coordinate_otus) with this otu.

Returns:

  • (Boolean)

    whether or not this otu is coordinate (see coordinate_otus) with this otu



368
369
370
# File 'app/models/otu.rb', line 368

def coordinate_with?(otu_id)
  Otu.coordinate_otus(otu_id).where(otus: {id:}).any?
end

#current_collection_objectsObject



362
363
364
# File 'app/models/otu.rb', line 362

def current_collection_objects
  collection_objects.where(taxon_determinations: {position: 1})
end

#distribution_geoJSONObject

TODO: move to helper method likely



385
386
387
388
389
390
# File 'app/models/otu.rb', line 385

def distribution_geoJSON
  a_ds = Gis::GeoJSON.feature_collection(geographic_areas_from_asserted_distributions, :asserted_distributions)
  c_os = Gis::GeoJSON.feature_collection(collecting_events, :collecting_events_georeferences)
  c_es = Gis::GeoJSON.feature_collection(geographic_areas_from_collecting_events, :collecting_events_geographic_area)
  Gis::GeoJSON.aggregation([a_ds, c_os, c_es], :distribution)
end

#dwc_occurrencesObject



535
536
537
538
539
540
541
542
543
# File 'app/models/otu.rb', line 535

def dwc_occurrences
  a = ::Queries::DwcOccurrence::Filter.new( asserted_distribution_query: {otu_id: id, project_id:},).all
  b = ::Queries::DwcOccurrence::Filter.new( collection_object_query: {otu_id: id, project_id:},).all
  # TODO FieldOccurrence in same pattern

  ::Queries.union(
    ::DwcOccurrence, [ a, b ]
  )
end

#otu_nameObject

TODO: Deprecate for helper method, HTML does not belong here

this is also a costly sort because it n+1 to taxon_name


374
375
376
377
378
379
380
381
382
# File 'app/models/otu.rb', line 374

def otu_name
  if name.present?
    name
  elsif !taxon_name_id.nil?
    taxon_name.cached_html_name_and_author_year
  else
    nil
  end
end

#parent_otu_id(skip_ranks: [], prefer_unlabelled_otus: false) ⇒ Otu#id, ...

Returns nil - there is no OTU parent with a valid taxon name possible id - the (unambiguous) id of the nearest parent OTU attached to a valid taxon name

Note this is used CoLDP export. Do not change without considerations there.

Returns:

  • (Otu#id, nil, false)

    nil - there is no OTU parent with a valid taxon name possible id - the (unambiguous) id of the nearest parent OTU attached to a valid taxon name

    Note this is used CoLDP export. Do not change without considerations there.



439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
# File 'app/models/otu.rb', line 439

def parent_otu_id(skip_ranks: [], prefer_unlabelled_otus: false)
  return nil if taxon_name_id.nil?

  # TODO: Unify to a single query

  candidates = TaxonName.joins(:otus, :descendant_hierarchies)
    .that_is_valid
    .where.not(id: taxon_name_id)
    .where(taxon_name_hierarchies: {descendant_id: taxon_name_id})
    .where.not(rank_class: skip_ranks)
    .order('taxon_name_hierarchies.generations')
    .limit(1)
    .pluck(:id)

  if candidates.size == 1
    otus = Otu.where(taxon_name_id: candidates.first).to_a
    otus.select! { |o| o.name.nil? } if prefer_unlabelled_otus && otus.size > 1

    if otus.size > 0
      return otus.first.id
    else
      return nil
    end
  else
    return nil
  end
end

#sv_duplicate_otuObject (protected)



558
559
560
561
562
563
# File 'app/models/otu.rb', line 558

def sv_duplicate_otu
  unless Otu.with_taxon_name_id(taxon_name_id).with_name(name).not_self(self).with_project_id(project_id).empty?
    m = 'Another OTU with an identical nomenclature (taxon name) and name exists in this project'
    soft_validations.add(:base, m )
  end
end

#sv_taxon_nameObject (protected)



554
555
556
# File 'app/models/otu.rb', line 554

def sv_taxon_name
  soft_validations.add(:taxon_name_id, 'Nomenclature (taxon name) is not assigned') if taxon_name_id.nil?
end

#taxa_by_geographic_areaObject

TODO: Re/move temporary method to gent list of taxa from a geographic area and save it to csv file



469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
# File 'app/models/otu.rb', line 469

def taxa_by_geographic_area
  area = 'China'
  file_name1 = '/tmp/' + area + '_geographic_area_' + Time.now.to_i.to_s + '.csv'
  file_name2 = '/tmp/' + area + '_collection_object_' + Time.now.to_i.to_s + '.csv'
  c1 = GeographicArea.where(name: area).pluck(:id)
  c2 = GeographicArea.where('parent_id in (?)', c1).pluck(:id)
  c3 = GeographicArea.where('parent_id in (?)', c2).pluck(:id)
  c = c1 + c2 + c3
  ad = AssertedDistribution.where('geographic_area_id in (?)', c)

  CSV.open(file_name1, 'w') do |csv|
    csv << ['genus', 'species', 'geographic_area']
    ad.find_each do |z|
      tn = z.otu&.taxon_name&.valid_taxon_name
      unless tn.nil?
        ga, gn, sp = nil, nil, nil
        if z.geographic_area.name == area
          ga = area
        elsif z.geographic_area.parent.name == area
          ga = area + ', ' + z.geographic_area.name
        elsif z.geographic_area.parent.parent.name == area
          ga = area + ', ' + z.geographic_area.parent.name + ', ' + z.geographic_area.name
        end
        sp = tn.cached.to_s + ' ' + tn.cached_author_year.to_s
        tn1 = tn.ancestor_at_rank('genus')
        unless tn1.nil?
          gn = tn1&.cached.to_s + ' ' + tn1&.cached_author_year.to_s
          csv << [gn, sp, ga]
        end
      end
    end
  end

  co = CollectionObject.joins(:collecting_event).where('collecting_events.geographic_area_id in (?)', c)

  CSV.open(file_name2, 'w') do |csv|
    csv << ['genus', 'species', 'geographic_area', 'lat', 'long']
    co.find_each do |z|
      tn = z.taxon_determinations.last&.otu&.taxon_name&.valid_taxon_name
      unless tn.nil?
        ga, gn, sp, lat, long = nil, nil, nil, nil, nil
        ce = z.collecting_event.geographic_area
        if ce.name == area
          ga = area
        elsif ce.parent.name == area
          ga = area + ', ' + ce.name
        elsif ce.parent.parent.name == area
          ga = area + ', ' + ce.parent.name + ', ' + ce.name
        end
        lat_long = z.collecting_event&.georeferences&.last&.geographic_item&.to_a
        if !lat_long.nil? && lat_long.length == 2
          lat = lat_long[1]
          long = lat_long[0]
        end

        sp = tn.cached.to_s + ' ' + tn.cached_author_year.to_s
        tn1 = tn.ancestor_at_rank('genus')
        unless tn1.nil?
          gn = tn1&.cached.to_s + ' ' + tn1&.cached_author_year.to_s
          csv << [gn, sp, ga, lat, long]
        end
      end
    end
  end
end