Class: Queries::TaxonName::Tabular

Inherits:
Query
  • Object
show all
Includes:
Helpers
Defined in:
lib/queries/taxon_name/tabular.rb

Overview

Summarize data across a nomenclatural hierarchy.

Results are Arrays, not AR objects.

The approach is to use OVER/PARTITION to build a ranked list, then to summarize only those ranked data requested. When we select without preserving all ranks, then the data are not sortable according to their overall nesting.

The approach only succeeds on the hierarchical data because we cache the current valid id for every name, this allows us to build an ordered, nested by hierarchy list across not only valid, but also combination and invalid names.

These were useful references:

* https://sonnym.github.io/2017/06/05/common-table-expressions-in-activerecord-a-case-study-of-quantiles/
* https://blog.codeship.com/folding-postgres-window-functions-into-rails/

Instance Attribute Summary collapse

Attributes inherited from Query

#dynamic_limit, #options, #project_id, #query_string, #terms

Instance Method Summary collapse

Methods included from Helpers

#boolean_param

Methods inherited from Query

#alphabetic_strings, #alphanumeric_strings, #attribute_exact_facet, #autocomplete, #autocomplete_cached, #autocomplete_cached_wildcard_anywhere, #autocomplete_common_name_exact, #autocomplete_common_name_like, #autocomplete_exact_id, #autocomplete_exactly_named, #autocomplete_named, #autocomplete_ordered_wildcard_pieces_in_cached, #build_terms, #cached, #combine_or_clauses, #common_name_name, #common_name_table, #common_name_wild_pieces, #end_wildcard, #exactly_named, #fragments, #integers, #levenshtein_distance, #match_ordered_wildcard_pieces_in_cached, #match_wildcard_end_in_cached, #match_wildcard_in_cached, #named, #no_terms?, #only_ids, #only_integers?, #parent, #parent_child_join, #parent_child_where, #pieces, #result, #scope, #start_and_end_wildcard, #start_wildcard, #string_fragments, #wildcard_pieces, #wildcard_wrapped_integers, #wildcard_wrapped_years, #with_cached, #with_cached_like, #with_id, #with_project_id, #year_letter, #years

Constructor Details

#initialize(params = {}) ⇒ Tabular

Returns a new instance of Tabular.

Parameters:

  • params (Hash) (defaults to: {})

    keys are symbols



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/queries/taxon_name/tabular.rb', line 72

def initialize(params = {})
  super(nil, project_id: params[:project_id]) # We don't actually use project_id here

  @ancestor_id = params[:ancestor_id]
  @combinations = boolean_param(params, :combinations)
  @limit = params[:limit]
  @ranks = params[:ranks]
  @rank_data = params[:rank_data]
  @validity = boolean_param(params, :validity)
  @fieldsets = params[:fieldsets]
  @rank_id_fields = []

  initialize_columns
  build_query
end

Instance Attribute Details

#ancestorProtonym?

Returns:



60
61
62
# File 'lib/queries/taxon_name/tabular.rb', line 60

def ancestor
  @ancestor
end

#ancestor_idInteger

Returns required, the id scoping the result set.

Returns:

  • (Integer)

    required, the id scoping the result set



23
24
25
# File 'lib/queries/taxon_name/tabular.rb', line 23

def ancestor_id
  @ancestor_id
end

#columnsArray

Returns of hashes, with name of column, and SQL field projection definition

{ header: 'foo'
 projected: 'taxon_names.id'  }.

Returns:

  • (Array)

    of hashes, with name of column, and SQL field projection definition

    { header: 'foo'
     projected: 'taxon_names.id'  }
    


68
69
70
# File 'lib/queries/taxon_name/tabular.rb', line 68

def columns
  @columns
end

#combinationsBoolean

Returns if true than also include Combinations in counts.

Returns:

  • (Boolean)

    if true than also include Combinations in counts



41
42
43
# File 'lib/queries/taxon_name/tabular.rb', line 41

def combinations
  @combinations
end

#fieldsetsArray of Strings

Returns named sets of columns to include valid values are

- nomenclatural_stats
- observations.

Returns:

  • (Array of Strings)

    named sets of columns to include valid values are

    - nomenclatural_stats
    - observations
    


48
49
50
# File 'lib/queries/taxon_name/tabular.rb', line 48

def fieldsets
  @fieldsets
end

#limitObject

!! CAREFUL, too small limits will not properly nest all names !!

defaults to 1000


37
38
39
# File 'lib/queries/taxon_name/tabular.rb', line 37

def limit
  @limit
end

#querythe build query

Returns:



57
58
59
# File 'lib/queries/taxon_name/tabular.rb', line 57

def query
  @query
end

#rank_dataArray

ORDER MATTERS

Returns:

  • (Array)

    like 'family', 'genus', 'species'



33
34
35
# File 'lib/queries/taxon_name/tabular.rb', line 33

def rank_data
  @rank_data
end

#rank_id_fieldsObject

Returns the value of attribute rank_id_fields.



62
63
64
# File 'lib/queries/taxon_name/tabular.rb', line 62

def rank_id_fields
  @rank_id_fields
end

#ranksArray

ORDER matters

Returns:

  • (Array)

    like 'family', 'genus', 'species'



28
29
30
# File 'lib/queries/taxon_name/tabular.rb', line 28

def ranks
  @ranks
end

#validityBoolean

Returns if true then only include valid names.

Returns:

  • (Boolean)

    if true then only include valid names



52
53
54
# File 'lib/queries/taxon_name/tabular.rb', line 52

def validity
  @validity
end

Instance Method Details

#allObject



237
238
239
# File 'lib/queries/taxon_name/tabular.rb', line 237

def all
  ApplicationRecord.connection.execute(@query.to_sql)
end

#base_queryObject



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/queries/taxon_name/tabular.rb', line 154

def base_query
  q = table
  h = hierarchy_table

  # Scope all names in the result
  a = table[:id].eq(h[:descendant_id])
    .and(h[:ancestor_id].eq(ancestor_id) )
  a = a.and(table[:cached_is_valid].eq(true)) if validity
  #a = a.and(table[:cached_valid_taxon_name_id].eq(table[:id])) if validity

  # Start a query
  q = q.join(h, Arel::Nodes::InnerJoin).on(a)

  # TODO: not always true now
  # All results can reference an otu
  q.join(otu_table, Arel::Nodes::OuterJoin).on(
    otu_table[:taxon_name_id].eq( table[:id] ).and( otu_table[:id].not_eq(nil)))
end

#build_queryObject



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/queries/taxon_name/tabular.rb', line 173

def build_query
  q = base_query

  # Add a join to the valid parent_id
  valid_table = table.alias('valid_taxon_names')
  q = q.join(valid_table, Arel::Nodes::InnerJoin).on(
    table[:cached_valid_taxon_name_id].eq(valid_table[:id])
  )

  # Setup the basic joins for each requested rank,
  # these are re-used in data gathering queries.
  ranks.each_with_index do |r, i|
    l = table.alias("j_#{i}")

    a = l[:id].eq(hierarchy_table[:descendant_id])
    a = a.and( l[:rank_class].eq( Ranks.lookup(ancestor.nomenclatural_code, ranks[i])) )

    q = q.join(l, Arel::Nodes::OuterJoin).on(a)

    @columns.push( {header: r, projected:  l[:name].as( ranks[i] )  } )

    @rank_id_fields.push l[:id]
  end

  # add individual sets of data columns
  fieldsets.each do |f|
    q = send(f + '_set', q)
  end

  # TODO: rank_classes getter should merge nil when combinations,
  # i.e. we likely don't need nil
  w = table[:rank_class].eq_any(rank_classes)
  w = w.or(table[:rank_class].eq(nil)) if combinations

  q = q.project(*projected_columns)
  q = q.from(table).where(w).distinct
  q = q.take(limit.to_i) if limit

  @query = table.project( Arel::Nodes::SqlLiteral.new('*') ).from(q.as('p')).order( 'cached_valid_taxon_name_id', 'ro' ) # if every field had a value this would work
end

#coalesce_ranks(fields) ⇒ Object



223
224
225
# File 'lib/queries/taxon_name/tabular.rb', line 223

def coalesce_ranks(fields)
  Arel::Nodes::NamedFunction.new('coalesce', [fields])
end

#column_headersObject



117
118
119
# File 'lib/queries/taxon_name/tabular.rb', line 117

def column_headers
  columns.collect{|c| c[:header]}
end

#finest_rank_idObject



219
220
221
# File 'lib/queries/taxon_name/tabular.rb', line 219

def finest_rank_id
  coalesce_ranks(rank_id_fields.reverse)
end

#hierarchy_tableObject



146
147
148
# File 'lib/queries/taxon_name/tabular.rb', line 146

def hierarchy_table
  Arel::Table.new(:taxon_name_hierarchies)
end

#initialize_columnsObject

These are generic headers used for existing queries, ultimately subclass this with a reference to core, if it can be abstracted as such



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/queries/taxon_name/tabular.rb', line 90

def initialize_columns
  @columns = []
  valid_table = table.alias('valid_taxon_names')

  @columns.push({ header: 'ro', projected: rank_over(table, valid_table) })

  @columns.push({header: 'taxon_name_id', projected: table[:id].as('taxon_name_id') } )
  @columns.push({header: 'cached_valid_taxon_name_id', projected: table[:cached_valid_taxon_name_id].as('cached_valid_taxon_name_id') } )
  @columns.push({header: 'cached', projected: table[:cached].as('cached') } )
  @columns.push({header: 'cached_author_year', projected: table[:cached_author_year].as('cached_author_year') } )
  @columns.push({header: 'cached_is_valid', projected: table[:cached_is_valid].as('cached_is_valid') } )

  if fieldsets.include?('observations')
    @columns.push({header: 'otu_id', projected: otu_table[:id].as('otu_id')  } )
    @columns.push({header: 'otu_name', projected:  otu_table[:name].as('otu_name')} )
  end
end

#nomenclatural_stats_set(query) ⇒ Object



303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
# File 'lib/queries/taxon_name/tabular.rb', line 303

def nomenclatural_stats_set(query)
  c = rank_data

  i = 0
  c.each do |r|
    %w{valid invalid}.each do |v|
      nomenclature_stats_column(query, r, v, i)
      i += 1
    end
  end

  nomenclature_stats_column(query, 'combination', nil, i)

  query
end

#nomenclature_stats_column(query, rank, valid = nil, index = 0) ⇒ Object



319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
# File 'lib/queries/taxon_name/tabular.rb', line 319

def nomenclature_stats_column(query, rank, valid = nil, index = 0)
  i = index
  v = valid
  r = rank

  h = hierarchy_table

  # May be alias?
  t = table

  s = [v, r].compact.join('_')
  a = ['ns_o', r, i, v].join('_')
  @columns.push({header: s, projected: "\"#{a}\".\"#{s}\" as #{s}" })

  x = t.where(
    # h[:generations].gt(0)
    t[:cached_is_valid].eq( v == 'valid' ? true : false )
    .and( t[:rank_class].eq( Ranks.lookup(ancestor.nomenclatural_code, r) ) )
  ).join(h, Arel::Nodes::InnerJoin).on(

    h[:descendant_id].eq( t[:cached_valid_taxon_name_id] )  ) # h[:descendant_id].eq( t[:id] ) )
    .project(
      h[:ancestor_id],
      t[:id].count.as( s )
    ).group(h[:ancestor_id]) # TODO: Might be wrong
      .as(a)

    query.join(x, Arel::Nodes::OuterJoin).on( x[:ancestor_id].eq( finest_rank_id  ))
    query
end

#observations_set(query) ⇒ Object

Only calculates for OTUs, not through to get CollectionObjects, nor Extracts



243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# File 'lib/queries/taxon_name/tabular.rb', line 243

def observations_set(query)
  o = ::Observation.arel_table

  # Descriptors on OTUs scored
  f = 'descriptors_scored_for_otus'
  fa = 'fs_d1'
  @columns.push({header: f, projected: '"' + fa + '"."' + f + '" as ' + f } )

  z = o.project(
    o[:observation_object_id],
    o[:observation_object_type],
    o[:descriptor_id].count(true).as(f) # count(true) == distinct
  ).group(
    o[:observation_object_id],
    o[:observation_object_type],
  ).as(fa)

  query.join(z, Arel::Nodes::OuterJoin).on(
    z[:observation_object_id].eq(otu_table[:id])
    .and(z[:observation_object_type].eq('Otu'))
  )

  # Observations on OTUs
  f = 'otu_observation_count'
  fa = 'fs_o1'
  @columns.push({header: f, projected: '"' + fa +  '"."' + f + '" as ' + f })

  x = o.project(
    o[:observation_object_id],
    o[:observation_object_type],
    o[:id].count.as(f)
  ).group(o[:observation_object_id], o[:observation_object_type]).as(fa)

  query.join(x, Arel::Nodes::OuterJoin).on(x[:observation_object_id].eq(otu_table[:id]).and( x[:observation_object_type].eq('Otu')  ))

  # Depictions on observations on OTUs
  f = 'otu_observation_depictions'
  fa = 'fs_o2'
  @columns.push( {header: f, projected: '"' + fa + '"."' + f + '" as ' + f } )

  p = ::Depiction.arel_table
  y = o.join(p, Arel::Nodes::InnerJoin).on(
    o[:id].eq(p[:depiction_object_id]).and(
      p[:depiction_object_type].eq('Observation')
    )
  ).project(
    o[:observation_object_id], # an OTU id
    p[:depiction_object_type],
    p[:id].count.as(f)
  ).group(
    o[:observation_object_id],
    p[:depiction_object_type]
  ).as(fa)

  query.join(y, Arel::Nodes::OuterJoin).on(
    y[:observation_object_id].eq(otu_table[:id])
  )
  query
end

#otu_tableObject



150
151
152
# File 'lib/queries/taxon_name/tabular.rb', line 150

def otu_table
  ::Otu.arel_table
end

#projected_columnsObject



121
122
123
# File 'lib/queries/taxon_name/tabular.rb', line 121

def projected_columns
  columns.collect{|c| c[:projected]}
end

#rank_classesObject



214
215
216
# File 'lib/queries/taxon_name/tabular.rb', line 214

def rank_classes
  ranks.collect{|r| Ranks.lookup(ancestor.nomenclatural_code, r)}
end

#rank_over(source_table, valid_source_table) ⇒ Object

Lets us generate a sort order across valid, invalid, and combinations



228
229
230
231
232
233
234
235
# File 'lib/queries/taxon_name/tabular.rb', line 228

def rank_over(source_table, valid_source_table)
  Arel::Nodes::Over.new(
    Arel::Nodes::SqlLiteral.new('rank()'),
    Arel::Nodes::Window.new.partition(
      [ valid_source_table[:parent_id] ]
    ).order( source_table[:cached_valid_taxon_name_id], source_table[:cached], source_table[:name] )
  ).as('ro')
end

#tableObject



142
143
144
# File 'lib/queries/taxon_name/tabular.rb', line 142

def table
  ::TaxonName.arel_table
end