Class: Queries::TaxonName::Tabular

Inherits:
Query
  • Object
show all
Defined in:
lib/queries/taxon_name/tabular.rb

Overview

Summarize data across a nomenclatural hierarchy.

Results are Arrays, not AR objects.

The approach is to use OVER/PARTITION build ranked list, then to summarize only those ranked data requested. When we select without preserving all ranks, then the data are not sortable according to their overall neesting.

The approach only succeeds on the hierarchical data because we cache the current valid id for every name, this allows us to build an ordered, nested by hierarchy list across not only valid, but also combination and invalid names.

These were useful references:

* https://sonnym.github.io/2017/06/05/common-table-expressions-in-activerecord-a-case-study-of-quantiles/
* https://blog.codeship.com/folding-postgres-window-functions-into-rails/

Instance Attribute Summary collapse

Attributes inherited from Query

#dynamic_limit, #options, #project_id, #query_string, #terms

Instance Method Summary collapse

Methods inherited from Query

#alphabetic_strings, #attribute_exact_facet, #autocomplete, #autocomplete_cached, #autocomplete_cached_wildcard_anywhere, #autocomplete_common_name_exact, #autocomplete_common_name_like, #autocomplete_exact_id, #autocomplete_exactly_named, #autocomplete_named, #autocomplete_ordered_wildcard_pieces_in_cached, #build_terms, #cached, #combine_or_clauses, #common_name_name, #common_name_table, #common_name_wild_pieces, #end_wildcard, #exactly_named, #fragments, #integers, #levenshtein_distance, #match_ordered_wildcard_pieces_in_cached, #match_wildcard_end_in_cached, #match_wildcard_in_cached, #named, #no_terms?, #only_ids, #only_integers?, #parent, #parent_child_join, #parent_child_where, #pieces, #result, #scope, #start_and_end_wildcard, #start_wildcard, #wildcard_pieces, #wildcard_wrapped_integers, #wildcard_wrapped_years, #with_cached, #with_cached_like, #with_id, #with_project_id, #year_letter, #years

Constructor Details

#initialize(params = {}) ⇒ Tabular

Returns a new instance of Tabular.

Parameters:

  • params (Hash) (defaults to: {})

    keys are symbols



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/queries/taxon_name/tabular.rb', line 69

def initialize(params = {}) 
  super(nil, project_id: params[:project_id]) # We don't actually use project_id here

  @ancestor_id = params[:ancestor_id]
  @combinations = (params[:combinations]&.downcase == 'true' ? true : false)
  @limit = params[:limit]
  @ranks = params[:ranks] || [] 
  @rank_data = params[:rank_data] || [] 
  @validity = (params[:validity]&.downcase == 'true' ? true : false)

  @column_headers = ['rank_over', 'otu_id', 'taxon_name_id', 'cached_valid_taxon_name_id', *ranks, 'cached', 'otu']
  @fieldsets = params[:fieldsets] || []
  @rank_id_fields = []
  @rank_joins = []
  @data_fields = []

  build_query
end

Instance Attribute Details

#ancestorProtonym?

Internal

Returns:



65
66
67
# File 'lib/queries/taxon_name/tabular.rb', line 65

def ancestor
  @ancestor
end

#ancestor_idInteger

Returns required, the id scoping the result set.

Returns:

  • (Integer)

    required, the id scoping the result set



21
22
23
# File 'lib/queries/taxon_name/tabular.rb', line 21

def ancestor_id
  @ancestor_id
end

#column_headersObject

Returns the value of attribute column_headers.



58
59
60
# File 'lib/queries/taxon_name/tabular.rb', line 58

def column_headers
  @column_headers
end

#combinationsBoolean

Returns if true than also include Combinations in counts.

Returns:

  • (Boolean)

    if true than also include Combinations in counts



39
40
41
# File 'lib/queries/taxon_name/tabular.rb', line 39

def combinations
  @combinations
end

#data_fieldsObject

Returns the value of attribute data_fields.



57
58
59
# File 'lib/queries/taxon_name/tabular.rb', line 57

def data_fields
  @data_fields
end

#fieldsetsArray of Strings

Returns named sets of columns to include valid values are

- nomenclatural_stats
- observations.

Returns:

  • (Array of Strings)

    named sets of columns to include valid values are

    - nomenclatural_stats
    - observations
    


46
47
48
# File 'lib/queries/taxon_name/tabular.rb', line 46

def fieldsets
  @fieldsets
end

#limitObject

!! CAREFUL, too small limits will not properly nest all names !!

defaults to 1000


35
36
37
# File 'lib/queries/taxon_name/tabular.rb', line 35

def limit
  @limit
end

#querythe build query

Returns:



55
56
57
# File 'lib/queries/taxon_name/tabular.rb', line 55

def query
  @query
end

#rank_dataArray

ORDER MATTERS

Returns:

  • (Array)

    like 'family', 'genus', 'species'



31
32
33
# File 'lib/queries/taxon_name/tabular.rb', line 31

def rank_data
  @rank_data
end

#rank_id_fieldsObject

Returns the value of attribute rank_id_fields.



60
61
62
# File 'lib/queries/taxon_name/tabular.rb', line 60

def rank_id_fields
  @rank_id_fields
end

#rank_joinsObject

Returns the value of attribute rank_joins.



59
60
61
# File 'lib/queries/taxon_name/tabular.rb', line 59

def rank_joins
  @rank_joins
end

#ranksArray

ORDER matters

Returns:

  • (Array)

    like 'family', 'genus', 'species'



26
27
28
# File 'lib/queries/taxon_name/tabular.rb', line 26

def ranks
  @ranks
end

#validityBoolean

Returns if true then only include valid names.

Returns:

  • (Boolean)

    if true then only include valid names



50
51
52
# File 'lib/queries/taxon_name/tabular.rb', line 50

def validity
  @validity
end

Instance Method Details

#allObject



201
202
203
# File 'lib/queries/taxon_name/tabular.rb', line 201

def all
  ApplicationRecord.connection.execute(@query.to_sql)
end

#base_queryObject



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/queries/taxon_name/tabular.rb', line 109

def base_query
  q = table
  h = hierarchy_table

  # Scope all names in the result
  a = table[:id].eq(h[:descendant_id])
    .and(h[:ancestor_id].eq(ancestor_id) )
  a = a.and(table[:cached_valid_taxon_name_id].eq(table[:id])) if validity

  # Start a query 
  q = q.join(h, Arel::Nodes::InnerJoin).on(a)

  # All results can reference an otu
  q.join(otu_table, Arel::Nodes::OuterJoin).on(
    otu_table[:taxon_name_id].eq( table[:id] ).and( otu_table[:id].not_eq(nil)))
end

#build_queryObject



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/queries/taxon_name/tabular.rb', line 126

def build_query
  name_fields = {}

  q = base_query

  # Add a join to the valid parent_id 
  valid_table = table.alias('valid_taxon_names')
  q = q.join(valid_table, Arel::Nodes::InnerJoin).on(
    table[:cached_valid_taxon_name_id].eq(valid_table[:id])
  )

  # Setup the basic joins for each requested rank,
  # these are re-used in data gathering queries.
  ranks.each_with_index do |r, i|
    l = table.alias("j_#{i}")
    @rank_joins.push l 

    a = l[:id].eq(hierarchy_table[:descendant_id]) 
    a = a.and( l[:rank_class].eq( Ranks.lookup(ancestor.nomenclatural_code, ranks[i])) )

    q = q.join(l, Arel::Nodes::OuterJoin).on(a)

    name_fields[ ranks[i] ] = l[:name].as( ranks[i] )
    @rank_id_fields.push l[:id]
  end

  # add individual sets of data columns 
  fieldsets.each do |f|
    q = send(f + '_set', q) 
  end

  # scope the query to only specified ranks
  w = table[:rank_class].eq_any(rank_classes)
  w = w.or(table[:rank_class].eq(nil)) if combinations
 
  q = q.project(
    rank_over(table, valid_table), # the sort column
    otu_table[:id].as('otu_id'),        
    table[:id].as('taxon_name_id'),
    table[:cached_valid_taxon_name_id].as('cached_valid_taxon_name_id'),
    *name_fields.values,
    table[:cached].as('cached'),
    otu_table[:name].as('otu_name'),
    *data_fields,
  ).from(table).where(w).distinct

  # !! CAREFUL, too small limits will not properly nest all names !!
  q = q.take(limit.to_i) if limit

  @query = table.project( Arel::Nodes::SqlLiteral.new('*') ).from(q.as('p')).order( 'cached_valid_taxon_name_id', 'ro'  ) # if every field had a value this would work
end

#coalesce_ranks(fields) ⇒ Object



187
188
189
# File 'lib/queries/taxon_name/tabular.rb', line 187

def coalesce_ranks(fields)
  Arel::Nodes::NamedFunction.new('coalesce', [fields])
end

#finest_rank_idObject



183
184
185
# File 'lib/queries/taxon_name/tabular.rb', line 183

def finest_rank_id
  coalesce_ranks(rank_id_fields.reverse)
end

#hierarchy_tableObject



101
102
103
# File 'lib/queries/taxon_name/tabular.rb', line 101

def hierarchy_table
  Arel::Table.new(:taxon_name_hierarchies)
end

#nomenclatural_stats_set(query) ⇒ Object

TODO: break out fieldset to its own concern



240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
# File 'lib/queries/taxon_name/tabular.rb', line 240

def nomenclatural_stats_set(query)
  h = hierarchy_table
  t = table

  c = rank_data
  c << nil if combinations

  c.each_with_index do |r,i|
    %w{valid invalid}.each do |v|
      s = "#{v}_#{r ? r : 'combination' }"
      a = "ns_o#{i}_#{v}"
      @data_fields.push "\"#{a}\".\"#{s}\" as #{s}"
      @column_headers.push s

      x = t.where(
        h[:generations].gt(0) 
        .and( v == 'valid' ? t[:cached_valid_taxon_name_id].eq(t[:id]) : t[:cached_valid_taxon_name_id].not_eq(t[:id]) ) 
        .and( t[:rank_class].eq( Ranks.lookup(ancestor.nomenclatural_code, r) ) )
      )
        .join(h, Arel::Nodes::InnerJoin).on(
          h[:descendant_id].eq( t[:id] )  )
        .project(
          h[:ancestor_id],
          t[:id].count.as( s )
      ).group(h[:ancestor_id])
        .as(a)

      query.join(x, Arel::Nodes::OuterJoin).on( x[:ancestor_id].eq( finest_rank_id  ) )  # # NOT query['taxon_name_id']
    end
  end
  query
end

#observations_set(query) ⇒ Object

TODO: break out fieldset to its own concern



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/queries/taxon_name/tabular.rb', line 206

def observations_set(query)
  @data_fields.push '"fs_o1"."observation_count" as observation_count'
  @column_headers.push 'observation_count'
  o = ::Observation.arel_table
  x = o.project(
    o[:otu_id], 
    o[:otu_id].count.as('observation_count')
  ).group(o[:otu_id]).as('fs_o1')

  query.join(x, Arel::Nodes::OuterJoin).on(x[:otu_id].eq(otu_table[:id]))

  @data_fields.push '"fs_o2"."observation_depictions" as observation_depictions'
  @column_headers.push 'observation_depictions'
  p = ::Depiction.arel_table
  y = p.join(o, Arel::Nodes::OuterJoin).on(
    p[:depiction_object_id].eq(o[:otu_id])).where(p[:depiction_object_type].eq('Observation'))
    .project(
      p[:depiction_object_id], 
      p[:depiction_object_id].count.as('observation_depictions')
  ).group(p[:depiction_object_id]).as('fs_o2')

  query.join(y, Arel::Nodes::OuterJoin).on(y[:depiction_object_id].eq(otu_table[:id]))

  @data_fields.push '"fs_d1"."descriptors_scored" as descriptors_scored'
  @column_headers.push 'descriptors_scored'
  z = o.project(
    o[:otu_id], 
    o[:descriptor_id].count.as('descriptors_scored')
  ).group(o[:otu_id]).as('fs_d1')

  query.join(z, Arel::Nodes::OuterJoin).on(z[:otu_id].eq(otu_table[:id]))
end

#otu_tableObject



105
106
107
# File 'lib/queries/taxon_name/tabular.rb', line 105

def otu_table
  ::Otu.arel_table
end

#rank_classesObject



178
179
180
# File 'lib/queries/taxon_name/tabular.rb', line 178

def rank_classes
  ranks.collect{|r| Ranks.lookup(ancestor.nomenclatural_code, r)}
end

#rank_over(source_table, valid_source_table) ⇒ Object

Lets us generate a sort order across valid, invalid, and combinations



192
193
194
195
196
197
198
199
# File 'lib/queries/taxon_name/tabular.rb', line 192

def rank_over(source_table, valid_source_table)
  Arel::Nodes::Over.new(
    Arel::Nodes::SqlLiteral.new('rank()'),
    Arel::Nodes::Window.new.partition(
      [ valid_source_table[:parent_id] ]
    ).order( source_table[:cached_valid_taxon_name_id], source_table[:cached], source_table[:name] )
  ).as('ro') 
end

#tableObject



97
98
99
# File 'lib/queries/taxon_name/tabular.rb', line 97

def table
  ::TaxonName.arel_table
end