Class: Queries::TaxonName::Autocomplete

Inherits:
Query::Autocomplete show all
Defined in:
lib/queries/taxon_name/autocomplete.rb

Constant Summary collapse

CACHED_NAME_WEIGHT =

Weights. Theory (using this loosely) is that this will proportionally increase the importance in the list of the corresponding element. The tradeoff is subtle, but seems to work at first try.

8.0
CACHED_AUTHOR_YEAR_WEIGHT =
6.0
CACHED_WEIGHT =
4.0
CACHED_ORIGINAL_COMBINATION_WEIGHT =
2.0

Instance Attribute Summary collapse

Attributes inherited from Query::Autocomplete

#dynamic_limit, #project_id, #query_string

Attributes inherited from Query

#query_string, #terms

Instance Method Summary collapse

Methods inherited from Query::Autocomplete

#autocomplete_cached_wildcard_anywhere, #autocomplete_common_name_exact, #autocomplete_common_name_like, #autocomplete_exact_id, #autocomplete_exactly_named, #autocomplete_named, #autocomplete_ordered_wildcard_pieces_in_cached, #combine_or_clauses, #common_name_name, #common_name_table, #common_name_wild_pieces, #exactly_named, #fragments, #integers, #least_levenshtein, #match_wildcard_end_in_cached, #match_wildcard_in_cached, #named, #only_ids, #only_integers?, #parent, #parent_child_join, #parent_child_where, #pieces, #scope, #string_fragments, #wildcard_wrapped_integers, #wildcard_wrapped_years, #with_cached, #with_cached_like, #with_id, #with_project_id, #year_letter, #years

Methods inherited from Query

#alphabetic_strings, #alphanumeric_strings, base_name, #base_name, #build_terms, #cached_facet, #end_wildcard, #levenshtein_distance, #match_ordered_wildcard_pieces_in_cached, #no_terms?, referenced_klass, #referenced_klass, #referenced_klass_except, #referenced_klass_intersection, #referenced_klass_union, #start_and_end_wildcard, #start_wildcard, #table, #wildcard_pieces

Constructor Details

#initialize(string, **params) ⇒ Autocomplete

Returns a new instance of Autocomplete.

Parameters:

  • args (Hash)


42
43
44
45
46
47
48
49
50
51
52
# File 'lib/queries/taxon_name/autocomplete.rb', line 42

def initialize(string, **params)
  @nomenclature_group = params[:nomenclature_group]
  @valid = boolean_param(params, :valid)
  @type = params[:type]
  @parent_id = params[:parent_id]
  @no_leaves = boolean_param(params, :no_leaves)

  # TODO: move to mode
  @exact = boolean_param(params, :exact)
  super
end

Instance Attribute Details

#authorshipString

Returns (including empty).

Returns:

  • (String)

    (including empty)



39
40
41
# File 'lib/queries/taxon_name/autocomplete.rb', line 39

def authorship
  @authorship
end

#exactBoolean

Returns &exact=<“true”|“false”> if ‘true’ then only #name = query_string results are returned (no fuzzy matching).

Returns:

  • (Boolean)

    &exact=<“true”|“false”> if ‘true’ then only #name = query_string results are returned (no fuzzy matching)



31
32
33
# File 'lib/queries/taxon_name/autocomplete.rb', line 31

def exact
  @exact
end

#no_leavesBoolean

Returns &no_leaves=<“true”|“false”>

if 'true' then only names with descendents will be returned.

Returns:

  • (Boolean)

    &no_leaves=<“true”|“false”>

    if 'true' then only names with descendents will be returned
    


36
37
38
# File 'lib/queries/taxon_name/autocomplete.rb', line 36

def no_leaves
  @no_leaves
end

#nomenclature_groupArray

Returns &nomenclature_group[]=<<Iczn|Icnp|Icn>::<Higher|Family|Genus|Species>>.

Returns:

  • (Array)

    &nomenclature_group[]=<<Iczn|Icnp|Icn>::<Higher|Family|Genus|Species>>



8
9
10
# File 'lib/queries/taxon_name/autocomplete.rb', line 8

def nomenclature_group
  @nomenclature_group
end

#parent_idArray

Returns &parent_id[]=<int>&parent_id=<other_int> etc.

Returns:



24
25
26
# File 'lib/queries/taxon_name/autocomplete.rb', line 24

def parent_id
  @parent_id
end

#typeArray

Returns &type[]=<Protonym, Combination, Hybrid, etc.>&type[]=<other type> etc.

Returns:

  • (Array)

    &type[]=<Protonym, Combination, Hybrid, etc.>&type[]=<other type> etc.



20
21
22
# File 'lib/queries/taxon_name/autocomplete.rb', line 20

def type
  @type
end

#validBoolean?

Returns &valid=<“true”|“false”>

if 'true'  then id == cached_valid_taxon_name_id
if 'false' then id != cached_valid_taxon_name
if nil   then no check made, i.e. all names

string is converted to Boolean here.

Returns:

  • (Boolean, nil)

    &valid=<“true”|“false”>

    if 'true'  then id == cached_valid_taxon_name_id
    if 'false' then id != cached_valid_taxon_name
    if nil   then no check made, i.e. all names
    

    string is converted to Boolean here



16
17
18
# File 'lib/queries/taxon_name/autocomplete.rb', line 16

def valid
  @valid
end

Instance Method Details

#and_clausesArel:Nodes?

Returns:

  • (Arel:Nodes, nil)


67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/queries/taxon_name/autocomplete.rb', line 67

def and_clauses
  clauses = [
    valid_state,
    is_type,
    with_parent_id,
    with_nomenclature_group,
  ].compact

  return nil if clauses.nil?

  a = clauses.shift
  clauses.each do |b|
    a = a.and(b)
  end
  a
end

#autocompleteArray

Returns:

  • (Array)


355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
# File 'lib/queries/taxon_name/autocomplete.rb', line 355

def autocomplete

  # exact, unified, comprehensive

  queries = (exact ? exact_autocomplete : comprehensive_autocomplete )
  queries.compact!

  result = []

  queries.each_with_index do |q,i|
    a = q
    a = q.where(project_id:) if project_id.present?

    a = a.where(and_clauses.to_sql) if and_clauses

    if !parent_id.empty?
      a = a.descendants_of(::TaxonName.where(id: parent_id))
    end

    a = a.not_leaves if no_leaves

    result += a.limit(20).to_a
    break if result.count > 19
  end

  result.uniq!
  # result[0..19]
  result
end

#autocomplete_cachedObject

—- gin methods Consider word_similarity()



235
236
237
238
239
240
# File 'lib/queries/taxon_name/autocomplete.rb', line 235

def autocomplete_cached
  ::TaxonName.where(project_id:).select(ApplicationRecord.sanitize_sql(['taxon_names.*, similarity(?, cached) AS sml', query_string]))
    .where('cached % ?', query_string) # `%` in where means nothing < 0.3 (internal PG similarity value)
    .where(ApplicationRecord.sanitize_sql_array(["similarity('%s', cached) > 0.6", query_string]))
    .order('sml DESC, cached')
end

#autocomplete_cached_author_yearObject



249
250
251
252
253
254
# File 'lib/queries/taxon_name/autocomplete.rb', line 249

def autocomplete_cached_author_year
  ::TaxonName.select(ApplicationRecord.sanitize_sql(['taxon_names.*, similarity(?, taxon_names.cached_author_year) AS sml', query_string]))
    .where('taxon_names.cached_author_year % ?', query_string)
    .where(ApplicationRecord.sanitize_sql(["similarity('%s', taxon_names.cached_author_year) > 0.6", query_string]))
    .order('sml DESC, taxon_names.cached_author_year')
end

#autocomplete_cached_end_wildcardScope

Returns:

  • (Scope)


156
157
158
159
160
# File 'lib/queries/taxon_name/autocomplete.rb', line 156

def autocomplete_cached_end_wildcard
  s = query_string.delete('\\')
  a = table[:cached].matches("#{s}%")
  base_query.where(a.to_sql).limit(20)
end

#autocomplete_cached_name_end_wildcardScope

Returns:

  • (Scope)


185
186
187
188
# File 'lib/queries/taxon_name/autocomplete.rb', line 185

def autocomplete_cached_name_end_wildcard
  a = table[:name].matches("#{query_string}%")
  base_query.where(a.to_sql).limit(20)
end

#autocomplete_cached_wildcard_whitespaceScope

Returns:

  • (Scope)


191
192
193
194
# File 'lib/queries/taxon_name/autocomplete.rb', line 191

def autocomplete_cached_wildcard_whitespace
  a = table[:cached].matches("#{query_string.gsub('. ', ' ').gsub(' ', '%')}")
  base_query.where(a.to_sql).limit(20)
end

#autocomplete_combined_ginObject

Used in /otus/api/v1/autocomplete



265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# File 'lib/queries/taxon_name/autocomplete.rb', line 265

def autocomplete_combined_gin
  a = ::TaxonName.select(ApplicationRecord.sanitize_sql(
    ['taxon_names.*, similarity(?, name) AS sml_n, similarity(?, taxon_names.cached_author_year) AS sml_cay, similarity(?, cached) AS sml_c, similarity(?, taxon_names.cached_original_combination) AS sml_coc',
     query_string, authorship, query_string, query_string])
                        ).where('taxon_names.cached_author_year % ? OR taxon_names.cached_original_combination % ? OR cached % ?', query_string, query_string, query_string)

  s = 'WITH tns AS (' + a.to_sql + ') ' +
    ::TaxonName
    .select(Arel.sql("taxon_names.*, (( COALESCE(tns1.sml_n,0) * #{CACHED_NAME_WEIGHT} + \
                                            COALESCE(tns1.sml_cay,0) * #{CACHED_AUTHOR_YEAR_WEIGHT} + \
                                            COALESCE(tns1.sml_c,0) * #{CACHED_WEIGHT} + \
                                            COALESCE(tns1.sml_coc,0) * #{CACHED_ORIGINAL_COMBINATION_WEIGHT} \
                                          )) sml_tn"))
    .joins('JOIN tns as tns1  on tns1.id = taxon_names.id')
    .to_sql

  ::TaxonName.select('taxon_names.*, sml_tn as sml_t').from('(' + s + ') as taxon_names').order('sml_tn DESC').distinct
end

#autocomplete_exact_cachedScope

Returns:

  • (Scope)


113
114
115
116
# File 'lib/queries/taxon_name/autocomplete.rb', line 113

def autocomplete_exact_cached
  a = table[:cached].eq(query_string)
  base_query.where(a.to_sql).order('cached_author_year ASC').limit(20)
end

#autocomplete_exact_cached_original_combinationScope

Returns:

  • (Scope)


119
120
121
122
# File 'lib/queries/taxon_name/autocomplete.rb', line 119

def autocomplete_exact_cached_original_combination
  a = table[:cached_original_combination].eq(query_string)
  base_query.where(a.to_sql).order('cached_author_year ASC').limit(20)
end

#autocomplete_exact_nameScope

Returns:

  • (Scope)


143
144
145
146
# File 'lib/queries/taxon_name/autocomplete.rb', line 143

def autocomplete_exact_name
  a = table[:name].eq(query_string)
  base_query.where(a.to_sql).order('cached_author_year ASC').limit(20)
end

#autocomplete_exact_name_and_yearScope

Returns:

  • (Scope)


131
132
133
134
135
136
137
138
139
140
# File 'lib/queries/taxon_name/autocomplete.rb', line 131

def autocomplete_exact_name_and_year
  a = alphabetic_strings.select { |b| !(b =~ /\d/) }
  b = years
  if a.size == 1 && !b.empty?
    a = table[:name].eq(a.first).and(table[:cached_author_year].matches_any(wildcard_wrapped_years))
    base_query.where(a.to_sql).limit(10)
  else
    nil
  end
end

#autocomplete_genus_species1(result) ⇒ Scope

Parameters:

  • result (String)

Returns:

  • (Scope)


170
171
172
173
174
# File 'lib/queries/taxon_name/autocomplete.rb', line 170

def autocomplete_genus_species1(result)
  return nil if result.nil?
  a = table[:cached].matches(result)
  base_query.where(a.to_sql).order('type DESC, cached ASC').limit(8)
end

#autocomplete_genus_species2(result) ⇒ Scope

Parameters:

  • result (String)

Returns:

  • (Scope)


178
179
180
181
182
# File 'lib/queries/taxon_name/autocomplete.rb', line 178

def autocomplete_genus_species2(result)
  return nil if result.nil?
  a = table[:cached].matches(result + '%')
  base_query.where(a.to_sql).order('type DESC, cached ASC').limit(8)
end

#autocomplete_name_author_year_fragmentScope?

Returns:

  • (Scope, nil)


197
198
199
200
201
202
203
204
205
# File 'lib/queries/taxon_name/autocomplete.rb', line 197

def autocomplete_name_author_year_fragment
  f = fragments
  if f.size == 2
    a = table[:name].matches(f[0]).and(table[:cached_author_year].matches(f[1]))
    base_query.where(a.to_sql).limit(20)
  else
    nil
  end
end

#autocomplete_original_combinationObject



242
243
244
245
246
247
# File 'lib/queries/taxon_name/autocomplete.rb', line 242

def autocomplete_original_combination
  ::TaxonName.select(ApplicationRecord.sanitize_sql(['taxon_names.*, similarity(?, taxon_names.cached_original_combination) AS sml', query_string]))
    .where('taxon_names.cached_original_combination % ?', query_string)
    .where(ApplicationRecord.sanitize_sql_array(["similarity('%s', taxon_names.cached_original_combination) > 0.6", query_string]))
    .order('sml DESC, taxon_names.cached_original_combination')
end

#autocomplete_taxon_name_author_year_matchesArel::Nodes::Matches

Returns:

  • (Arel::Nodes::Matches)


222
223
224
225
226
# File 'lib/queries/taxon_name/autocomplete.rb', line 222

def autocomplete_taxon_name_author_year_matches
  a = authorship
  return nil if a.nil?
  base_query.where(table[:cached_author_year].matches(a).to_sql).limit(10)
end

#autocomplete_top_cachedScope

Returns:

  • (Scope)


149
150
151
152
153
# File 'lib/queries/taxon_name/autocomplete.rb', line 149

def autocomplete_top_cached
  s = query_string
  a = table[:cached].matches("#{s}%")
  base_query.where(a.to_sql).limit(1)
end

#autocomplete_top_cached_subgenusScope

Returns:

  • (Scope)


163
164
165
166
# File 'lib/queries/taxon_name/autocomplete.rb', line 163

def autocomplete_top_cached_subgenus
  a = table[:cached].matches("%(#{query_string})")
  base_query.where(a.to_sql).limit(1)
end

#autocomplete_wildcard_author_year_joined_piecesScope?

Returns:

  • (Scope, nil)


208
209
210
211
212
# File 'lib/queries/taxon_name/autocomplete.rb', line 208

def autocomplete_wildcard_author_year_joined_pieces
  return nil if pieces.empty?
  a = table[:cached_author_year].matches("%#{pieces.join('%')}%")
  base_query.where(a.to_sql).order('cached ASC').limit(20)
end

#autocomplete_wildcard_cached_original_combinationScope

Returns:

  • (Scope)


125
126
127
128
# File 'lib/queries/taxon_name/autocomplete.rb', line 125

def autocomplete_wildcard_cached_original_combination
  a = table[:cached_original_combination].matches(wildcard_pieces)
  base_query.where(a.to_sql).order('cached_author_year ASC').limit(20)
end

#autocomplete_wildcard_joined_stringsScope?

Returns:

  • (Scope, nil)


215
216
217
218
219
# File 'lib/queries/taxon_name/autocomplete.rb', line 215

def autocomplete_wildcard_joined_strings
  return nil if alphabetic_strings.empty?
  a = table[:cached].matches("%#{alphabetic_strings.join('%')}%")
  base_query.where(a.to_sql).limit(10)
end

#base_queryScope

TODO: this should deprecate for gin based approaches.

Returns:

  • (Scope)


404
405
406
407
408
# File 'lib/queries/taxon_name/autocomplete.rb', line 404

def base_query
  ::TaxonName.select('taxon_names.*, char_length(taxon_names.cached)')
    .includes(:ancestor_hierarchies)
    .order(Arel.sql('char_length(taxon_names.cached), taxon_names.cached ASC'))
end

#comprehensive_autocompleteObject

TODO: Refactor to OTU approach?



308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
# File 'lib/queries/taxon_name/autocomplete.rb', line 308

def comprehensive_autocomplete
  z = genus_species
  queries = [
    autocomplete_exact_cached,
    autocomplete_exact_cached_original_combination,
    autocomplete_exact_name_and_year,
    autocomplete_exact_name,

    autocomplete_exact_id,
    autocomplete_identifier_cached_exact,
    autocomplete_identifier_identifier_exact,

    # All exact should be before these?
    #
    # There are left in, but the cutoff
    # is now 2x as high, i.e. more like wildcard matches we
    # were originally used to.
    autocomplete_cached, # sim
    autocomplete_original_combination, # sim
    autocomplete_cached_author_year, # sim

    # Specialized results
    autocomplete_genus_species1(z),    # not tested
    autocomplete_genus_species2(z),    # not tested
    autocomplete_top_cached_subgenus,  # not tested

    # autocomplete_top_cached, # Wildcard end
    # autocomplete_cached_end_wildcard,
    # autocomplete_cached_name_end_wildcard,
    # autocomplete_cached_wildcard_whitespace,
    # autocomplete_name_author_year_fragment,
    # autocomplete_taxon_name_author_year_matches,
    autocomplete_wildcard_joined_strings,
    autocomplete_wildcard_author_year_joined_pieces,
    autocomplete_wildcard_cached_original_combination
  ]
end

#exact_autocompleteObject

Used in New taxon name task, for example

TODO: what is intent?


286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
# File 'lib/queries/taxon_name/autocomplete.rb', line 286

def exact_autocomplete
  [
    autocomplete_exact_id,
    autocomplete_exact_cached,
    autocomplete_exact_cached_original_combination,
    autocomplete_identifier_cached_exact,
    autocomplete_identifier_identifier_exact,
    autocomplete_exact_name_and_year,

    autocomplete_cached_end_wildcard,
    autocomplete_cached_wildcard_whitespace,
    autocomplete_name_author_year_fragment,
    autocomplete_taxon_name_author_year_matches,
    autocomplete_wildcard_joined_strings,
    autocomplete_wildcard_author_year_joined_pieces,
    autocomplete_wildcard_cached_original_combination,
    autocomplete_exact_name, # not exact enough, want the whole thing?
    # autocomplete_top_cached, # not exact at all
  ]
end

#genus_speciesString?

Returns parse and only return what is assumed to be genus/species, with a wildcard in front.

Returns:

  • (String, nil)

    parse and only return what is assumed to be genus/species, with a wildcard in front



387
388
389
390
391
392
393
394
395
396
397
398
399
400
# File 'lib/queries/taxon_name/autocomplete.rb', line 387

def genus_species
  p = Vendor::Biodiversity::Result.new
  p.name = query_string
  r = p.parse

  a = p.genus
  b = p.species

  if a && b
    a + '%' + b
  else
    nil
  end
end

#is_typeArel::Nodes::<>?

and clause

Returns:

  • (Arel::Nodes::<>, nil)


93
94
95
96
# File 'lib/queries/taxon_name/autocomplete.rb', line 93

def is_type
  return nil if type.empty?
  table[:type].in(type)
end

#taxon_name_hierarchies_tableArel::Table

Returns:

  • (Arel::Table)


411
412
413
# File 'lib/queries/taxon_name/autocomplete.rb', line 411

def taxon_name_hierarchies_table
  Arel::Table.new('taxon_name_hierarchies')
end

#unified_autocompleteObject



346
347
348
349
350
351
352
# File 'lib/queries/taxon_name/autocomplete.rb', line 346

def unified_autocomplete
  [
    autocomplete_exact_id,
    autocomplete_combined_gin,
    autocomplete_identifier_cached_exact,
  ]
end

#valid_stateArel::Nodes::<>?

and clause

Returns:

  • (Arel::Nodes::<>, nil)


86
87
88
89
# File 'lib/queries/taxon_name/autocomplete.rb', line 86

def valid_state
  return nil if @valid.nil?
  valid ? table[:id].eq(table[:cached_valid_taxon_name_id]) : table[:id].not_eq(table[:cached_valid_taxon_name_id])
end

#with_cached_author_yearArel::Nodes::Matches

Returns:

  • (Arel::Nodes::Matches)


416
417
418
# File 'lib/queries/taxon_name/autocomplete.rb', line 416

def with_cached_author_year
  table[:cached_author_year].matches_any(terms)
end

#with_nomenclature_groupArel::Nodes::Grouping?

Returns and clause.

Returns:

  • (Arel::Nodes::Grouping, nil)

    and clause



107
108
109
110
# File 'lib/queries/taxon_name/autocomplete.rb', line 107

def with_nomenclature_group
  return nil if nomenclature_group.empty?
  table[:rank_class].matches_any(nomenclature_group)
end

#with_parent_idArel::Nodes::<>?

and clause, limit to ancestors or [ids]

Returns:

  • (Arel::Nodes::<>, nil)


100
101
102
103
# File 'lib/queries/taxon_name/autocomplete.rb', line 100

def with_parent_id
  return nil if parent_id.empty?
  taxon_name_hierarchies_table[:ancestor_id].in(parent_id)
end