Class: Queries::TaxonName::Tabular

Inherits:
Query::Autocomplete show all
Includes:
Helpers
Defined in:
lib/queries/taxon_name/tabular.rb

Overview

Summarize data across a nomenclatural hierarchy.

Results are Arrays, not AR objects.

The approach is to use OVER/PARTITION to build a ranked list, then to summarize only those ranked data requested. When we select without preserving all ranks, then the data are not sortable according to their overall nesting.

The approach only succeeds on the hierarchical data because we cache the current valid id for every name, this allows us to build an ordered, nested by hierarchy list across not only valid, but also combination and invalid names.

These were useful references:

* https://sonnym.github.io/2017/06/05/common-table-expressions-in-activerecord-a-case-study-of-quantiles/
* https://blog.codeship.com/folding-postgres-window-functions-into-rails/

Instance Attribute Summary collapse

Attributes inherited from Query::Autocomplete

#dynamic_limit, #project_id, #query_string

Attributes inherited from Query

#query_string, #terms

Instance Method Summary collapse

Methods included from Helpers

#boolean_param, #integer_param, #split_pairs, #split_repeated_pairs

Methods inherited from Query::Autocomplete

#autocomplete, #autocomplete_cached, #autocomplete_cached_wildcard_anywhere, #autocomplete_common_name_exact, #autocomplete_common_name_like, #autocomplete_exact_id, #autocomplete_exactly_named, #autocomplete_named, #autocomplete_ordered_wildcard_pieces_in_cached, #cached_facet, #combine_or_clauses, #common_name_name, #common_name_table, #common_name_wild_pieces, #exactly_named, #fragments, #integers, #least_levenshtein, #match_wildcard_end_in_cached, #match_wildcard_in_cached, #named, #only_ids, #only_integers?, #parent, #parent_child_join, #parent_child_where, #pieces, #safe_integers, #scope, #string_fragments, #wildcard_wrapped_integers, #wildcard_wrapped_years, #with_cached, #with_cached_like, #with_id, #with_project_id, #year_letter, #years

Methods inherited from Query

#alphabetic_strings, #alphanumeric_strings, base_name, #base_name, #build_terms, #cached_facet, #end_wildcard, #levenshtein_distance, #match_ordered_wildcard_pieces_in_cached, #no_terms?, referenced_klass, #referenced_klass, #referenced_klass_except, #referenced_klass_intersection, #referenced_klass_union, #start_and_end_wildcard, #start_wildcard, #table, #wildcard_pieces

Constructor Details

#initialize(params = {}) ⇒ Tabular

Returns a new instance of Tabular.

Parameters:

  • params (Hash) (defaults to: {})

    keys are symbols



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/queries/taxon_name/tabular.rb', line 88

def initialize(params = {})
  super(nil, project_id: params[:project_id]) # We don't actually use project_id here

  @taxon_name_id = params[:taxon_name_id]
  @combinations = boolean_param(params, :combinations)
  @limit = params[:limit]
  @ranks = params[:ranks]
  @rank_data = params[:rank_data]
  @validity = boolean_param(params, :validity)
  @fieldsets = params[:fieldsets]
  @rank_id_fields = []
  @otu_observation_count = boolean_param(params, :otu_observation_count)
  @otu_observation_depictions = boolean_param(params, :otu_observation_depictions)
  @descriptors_scored_for_otu = boolean_param(params, :descriptors_scored_for_otu)
  @otus = boolean_param(params, :otus)

  initialize_columns
  build_query
end

Instance Attribute Details

#ancestorProtonym?

Returns:



76
77
78
# File 'lib/queries/taxon_name/tabular.rb', line 76

def ancestor
  @ancestor
end

#columnsArray

Returns of hashes, with name of column, and SQL field projection definition

{ header: 'foo'
 projected: 'taxon_names.id'  }.

Returns:

  • (Array)

    of hashes, with name of column, and SQL field projection definition

    { header: 'foo'
     projected: 'taxon_names.id'  }
    


84
85
86
# File 'lib/queries/taxon_name/tabular.rb', line 84

def columns
  @columns
end

#combinationsBoolean

Returns if true than also include Combinations in counts.

Returns:

  • (Boolean)

    if true than also include Combinations in counts



41
42
43
# File 'lib/queries/taxon_name/tabular.rb', line 41

def combinations
  @combinations
end

#descriptors_scored_for_otuBoolean

Returns if true than also include Combinations in counts.

Returns:

  • (Boolean)

    if true than also include Combinations in counts



53
54
55
# File 'lib/queries/taxon_name/tabular.rb', line 53

def descriptors_scored_for_otu
  @descriptors_scored_for_otu
end

#fieldsetsArray of Strings

Returns named sets of columns to include valid values are

- nomenclatural_stats
- observations.

Returns:

  • (Array of Strings)

    named sets of columns to include valid values are

    - nomenclatural_stats
    - observations
    


60
61
62
# File 'lib/queries/taxon_name/tabular.rb', line 60

def fieldsets
  @fieldsets
end

#limitObject

!! CAREFUL, too small limits will not properly nest all names !!

defaults to 1000


37
38
39
# File 'lib/queries/taxon_name/tabular.rb', line 37

def limit
  @limit
end

#otu_observation_countBoolean

Returns if true than also include Combinations in counts.

Returns:

  • (Boolean)

    if true than also include Combinations in counts



45
46
47
# File 'lib/queries/taxon_name/tabular.rb', line 45

def otu_observation_count
  @otu_observation_count
end

#otu_observation_depictionsBoolean

Returns if true than also include Combinations in counts.

Returns:

  • (Boolean)

    if true than also include Combinations in counts



49
50
51
# File 'lib/queries/taxon_name/tabular.rb', line 49

def otu_observation_depictions
  @otu_observation_depictions
end

#otusBoolean

Returns if true then only names with otus.

Returns:

  • (Boolean)

    if true then only names with otus



68
69
70
# File 'lib/queries/taxon_name/tabular.rb', line 68

def otus
  @otus
end

#querythe build query

Returns:



73
74
75
# File 'lib/queries/taxon_name/tabular.rb', line 73

def query
  @query
end

#rank_dataArray

ORDER MATTERS

Returns:

  • (Array)

    like ‘family’, ‘genus’, ‘species’



33
34
35
# File 'lib/queries/taxon_name/tabular.rb', line 33

def rank_data
  @rank_data
end

#rank_id_fieldsObject

Returns the value of attribute rank_id_fields.



78
79
80
# File 'lib/queries/taxon_name/tabular.rb', line 78

def rank_id_fields
  @rank_id_fields
end

#ranksArray

ORDER matters

Returns:

  • (Array)

    like ‘family’, ‘genus’, ‘species’



28
29
30
# File 'lib/queries/taxon_name/tabular.rb', line 28

def ranks
  @ranks
end

#taxon_name_idInteger

Returns required, the id scoping the result set.

Returns:

  • (Integer)

    required, the id scoping the result set



23
24
25
# File 'lib/queries/taxon_name/tabular.rb', line 23

def taxon_name_id
  @taxon_name_id
end

#validityBoolean

Returns if true then only include valid names.

Returns:

  • (Boolean)

    if true then only include valid names



64
65
66
# File 'lib/queries/taxon_name/tabular.rb', line 64

def validity
  @validity
end

Instance Method Details

#add_descriptors_scored_for_otu(query, o) ⇒ Object



265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
# File 'lib/queries/taxon_name/tabular.rb', line 265

def add_descriptors_scored_for_otu(query, o)
  f  = 'descriptors_scored_for_otus'
  fa = 'fs_d1'

  @columns << {
    header: f,
    projected: %("#{fa}"."#{f}" as #{f})
  }

  z = o.project(
    o[:observation_object_id],
    o[:observation_object_type],
    o[:descriptor_id].count(true).as(f)
  ).group(
    o[:observation_object_id],
    o[:observation_object_type]
  ).as(fa)

  query.join(z, Arel::Nodes::OuterJoin).on(
    z[:observation_object_id].eq(otu_table[:id])
      .and(z[:observation_object_type].eq('Otu'))
  )
end

#add_otu_observation_count(query, o) ⇒ Object



289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
# File 'lib/queries/taxon_name/tabular.rb', line 289

def add_otu_observation_count(query, o)
  f  = 'otu_observation_count'
  fa = 'fs_o1'

  @columns << {
    header: f,
    projected: %("#{fa}"."#{f}" as #{f})
  }

  x = o.project(
    o[:observation_object_id],
    o[:observation_object_type],
    o[:id].count.as(f)
  ).group(
    o[:observation_object_id],
    o[:observation_object_type]
  ).as(fa)

  query.join(x, Arel::Nodes::OuterJoin).on(
    x[:observation_object_id].eq(otu_table[:id])
      .and(x[:observation_object_type].eq('Otu'))
  )
end

#add_otu_observation_depictions(query, o) ⇒ Object



313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
# File 'lib/queries/taxon_name/tabular.rb', line 313

def add_otu_observation_depictions(query, o)
  f  = 'otu_observation_depictions'
  fa = 'fs_o2'

  @columns << {
    header: f,
    projected: %("#{fa}"."#{f}" as #{f})
  }

  p = ::Depiction.arel_table

  y = o.join(p, Arel::Nodes::InnerJoin).on(
    o[:id].eq(p[:depiction_object_id])
      .and(p[:depiction_object_type].eq('Observation'))
  ).project(
    o[:observation_object_id],
    p[:depiction_object_type],
    p[:id].count.as(f)
  ).group(
    o[:observation_object_id],
    p[:depiction_object_type]
  ).as(fa)

  query.join(y, Arel::Nodes::OuterJoin).on(
    y[:observation_object_id].eq(otu_table[:id])
  )
end

#allObject



254
255
256
# File 'lib/queries/taxon_name/tabular.rb', line 254

def all
  ApplicationRecord.connection.execute(@query.to_sql)
end

#base_queryObject



170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/queries/taxon_name/tabular.rb', line 170

def base_query
  q = table
  h = hierarchy_table

  # Scope all names in the result
  a = table[:id].eq(h[:descendant_id])
    .and(h[:ancestor_id].eq(taxon_name_id) )
  a = a.and(table[:cached_is_valid].eq(true)) if validity
  #a = a.and(table[:cached_valid_taxon_name_id].eq(table[:id])) if validity

  # Start a query
  q = q.join(h, Arel::Nodes::InnerJoin).on(a)

  # TODO: not always true now
  # All results can reference an otu
  q.join(otu_table, Arel::Nodes::OuterJoin).on(
    otu_table[:taxon_name_id].eq( table[:id] ).and( otu_table[:id].not_eq(nil)))
end

#build_queryObject



189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# File 'lib/queries/taxon_name/tabular.rb', line 189

def build_query
  q = base_query
  q = filter_by_otus(q)

  # Add a join to the valid parent_id
  valid_table = table.alias('valid_taxon_names')
  q = q.join(valid_table, Arel::Nodes::InnerJoin).on(
    table[:cached_valid_taxon_name_id].eq(valid_table[:id])
  )

  # Setup the basic joins for each requested rank,
  # these are re-used in data gathering queries.
  ranks.each_with_index do |r, i|
    l = table.alias("j_#{i}")

    a = l[:id].eq(hierarchy_table[:descendant_id])
    a = a.and( l[:rank_class].eq( Ranks.lookup(ancestor.nomenclatural_code, ranks[i])) )

    q = q.join(l, Arel::Nodes::OuterJoin).on(a)

    @columns.push( {header: r, projected:  l[:name].as( ranks[i] )  } )

    @rank_id_fields.push l[:id]
  end

  # add individual sets of data columns
  fieldsets.each do |f|
    q = send(f + '_set', q)
  end

  # TODO: rank_classes getter should merge nil when combinations,
  # i.e. we likely don't need nil
  w = table[:rank_class].in(rank_classes)
  w = w.or(table[:rank_class].eq(nil)) if combinations

  q = q.project(*projected_columns)
  q = q.from(table).where(w).distinct
  q = q.take(limit.to_i) if limit

  @query = table.project( Arel::Nodes::SqlLiteral.new('*') ).from(q.as('p')).order( 'cached_valid_taxon_name_id', 'ro' ) # if every field had a value this would work
end

#coalesce_ranks(fields) ⇒ Object



240
241
242
# File 'lib/queries/taxon_name/tabular.rb', line 240

def coalesce_ranks(fields)
  Arel::Nodes::NamedFunction.new('coalesce', [fields])
end

#column_headersObject



137
138
139
# File 'lib/queries/taxon_name/tabular.rb', line 137

def column_headers
  columns.collect{|c| c[:header]}
end

#filter_by_otus(query) ⇒ Object



259
260
261
262
263
# File 'lib/queries/taxon_name/tabular.rb', line 259

def filter_by_otus(query)
  return query unless @otus

  query.where(otu_table[:id].not_eq(nil))
end

#finest_rank_idObject



236
237
238
# File 'lib/queries/taxon_name/tabular.rb', line 236

def finest_rank_id
  coalesce_ranks(rank_id_fields.reverse)
end

#hierarchy_tableObject



162
163
164
# File 'lib/queries/taxon_name/tabular.rb', line 162

def hierarchy_table
  Arel::Table.new(:taxon_name_hierarchies)
end

#initialize_columnsObject

These are generic headers used for existing queries, ultimately subclass this with a reference to core, if it can be abstracted as such



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/queries/taxon_name/tabular.rb', line 110

def initialize_columns
  @columns = []
  valid_table = table.alias('valid_taxon_names')

  @columns.push({ header: 'ro', projected: rank_over(table, valid_table) })

  @columns.push({header: 'taxon_name_id', projected: table[:id].as('taxon_name_id') } )
  @columns.push({header: 'cached_valid_taxon_name_id', projected: table[:cached_valid_taxon_name_id].as('cached_valid_taxon_name_id') } )
  @columns.push({header: 'cached', projected: table[:cached].as('cached') } )
  @columns.push({header: 'cached_author_year', projected: table[:cached_author_year].as('cached_author_year') } )
  @columns.push({header: 'cached_is_valid', projected: table[:cached_is_valid].as('cached_is_valid') } )

  if fieldsets.include?('observations')
    @columns.push({header: 'otu_id', projected: otu_table[:id].as('otu_id')  } )
    @columns.push({header: 'otu_name', projected:  otu_table[:name].as('otu_name')} )
  end
end

#nomenclatural_stats_set(query) ⇒ Object



352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
# File 'lib/queries/taxon_name/tabular.rb', line 352

def nomenclatural_stats_set(query)
  c = rank_data

  i = 0
  c.each do |r|
    %w{valid invalid}.each do |v|
      nomenclature_stats_column(query, r, v, i)
      i += 1
    end
  end

  if combinations
    nomenclature_stats_column(query, 'combination', nil, i)
  end

  query
end

#nomenclature_stats_column(query, rank, valid = nil, index = 0) ⇒ Object



370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
# File 'lib/queries/taxon_name/tabular.rb', line 370

def nomenclature_stats_column(query, rank, valid = nil, index = 0)
  i = index
  v = valid
  r = rank

  h = hierarchy_table

  # May be alias?
  t = table

  s = [v, r].compact.join('_')
  a = ['ns_o', r, i, v].join('_')
  @columns.push({header: s, projected: "\"#{a}\".\"#{s}\" as #{s}" })

  x = t.where(
    # h[:generations].gt(0)
    t[:cached_is_valid].eq( v == 'valid' ? true : false )
    .and( t[:rank_class].eq( Ranks.lookup(ancestor.nomenclatural_code, r) ) )
  ).join(h, Arel::Nodes::InnerJoin).on(

    h[:descendant_id].eq( t[:cached_valid_taxon_name_id] )  ) # h[:descendant_id].eq( t[:id] ) )
    .project(
      h[:ancestor_id],
      t[:id].count.as( s )
    ).group(h[:ancestor_id]) # TODO: Might be wrong
      .as(a)

    query.join(x, Arel::Nodes::OuterJoin).on( x[:ancestor_id].eq( finest_rank_id  ))
    query
end

#observations_set(query) ⇒ Object

Only calculates for OTUs, not through to get CollectionObjects, nor Extracts



343
344
345
346
347
348
349
350
# File 'lib/queries/taxon_name/tabular.rb', line 343

def observations_set(query)
  o = ::Observation.arel_table
  
  add_descriptors_scored_for_otu(query, o) if @descriptors_scored_for_otu != false
  add_otu_observation_count(query, o) if @otu_observation_count != false
  add_otu_observation_depictions(query, o) if @otu_observation_depictions != false
  query
end

#otu_tableObject



166
167
168
# File 'lib/queries/taxon_name/tabular.rb', line 166

def otu_table
  ::Otu.arel_table
end

#projected_columnsObject



141
142
143
# File 'lib/queries/taxon_name/tabular.rb', line 141

def projected_columns
  columns.collect{|c| c[:projected]}
end

#rank_classesObject



231
232
233
# File 'lib/queries/taxon_name/tabular.rb', line 231

def rank_classes
  ranks.collect{|r| Ranks.lookup(ancestor.nomenclatural_code, r)}
end

#rank_over(source_table, valid_source_table) ⇒ Object

Lets us generate a sort order across valid, invalid, and combinations



245
246
247
248
249
250
251
252
# File 'lib/queries/taxon_name/tabular.rb', line 245

def rank_over(source_table, valid_source_table)
  Arel::Nodes::Over.new(
    Arel::Nodes::SqlLiteral.new('rank()'),
    Arel::Nodes::Window.new.partition(
      [ valid_source_table[:parent_id] ]
    ).order( source_table[:cached_valid_taxon_name_id], source_table[:cached], source_table[:name] )
  ).as('ro')
end