Class: Queries::TaxonName::Autocomplete

Inherits:
Query::Autocomplete show all
Defined in:
lib/queries/taxon_name/autocomplete.rb

Constant Summary collapse

CACHED_NAME_WEIGHT =

Weights. Theory (using this loosely) is that this will proportionally increase the importance in the list of the corresponding element. The tradeoff is subtle, but seems to work at first try.

8.0
CACHED_AUTHOR_YEAR_WEIGHT =
6.0
CACHED_WEIGHT =
4.0
CACHED_ORIGINAL_COMBINATION_WEIGHT =
2.0

Instance Attribute Summary collapse

Attributes inherited from Query::Autocomplete

#dynamic_limit, #project_id, #query_string

Attributes inherited from Query

#query_string, #terms

Instance Method Summary collapse

Methods inherited from Query::Autocomplete

#autocomplete_cached_wildcard_anywhere, #autocomplete_common_name_exact, #autocomplete_common_name_like, #autocomplete_exact_id, #autocomplete_exactly_named, #autocomplete_named, #autocomplete_ordered_wildcard_pieces_in_cached, #cached_facet, #combine_or_clauses, #common_name_name, #common_name_table, #common_name_wild_pieces, #exactly_named, #fragments, #integers, #least_levenshtein, #match_wildcard_end_in_cached, #match_wildcard_in_cached, #named, #only_ids, #only_integers?, #parent, #parent_child_join, #parent_child_where, #pieces, #safe_integers, #scope, #string_fragments, #wildcard_wrapped_integers, #wildcard_wrapped_years, #with_cached, #with_cached_like, #with_id, #with_project_id, #year_letter, #years

Methods inherited from Query

#alphabetic_strings, #alphanumeric_strings, base_name, #base_name, #build_terms, #cached_facet, #end_wildcard, #levenshtein_distance, #match_ordered_wildcard_pieces_in_cached, #no_terms?, referenced_klass, #referenced_klass, #referenced_klass_except, #referenced_klass_intersection, #referenced_klass_union, #start_and_end_wildcard, #start_wildcard, #table, #wildcard_pieces

Constructor Details

#initialize(string, **params) ⇒ Autocomplete

Returns a new instance of Autocomplete.

Parameters:

  • args (Hash)


42
43
44
45
46
47
48
49
50
51
52
# File 'lib/queries/taxon_name/autocomplete.rb', line 42

def initialize(string, **params)
  @nomenclature_group = params[:nomenclature_group]
  @valid = boolean_param(params, :valid)
  @type = params[:type]
  @parent_id = params[:parent_id]
  @no_leaves = boolean_param(params, :no_leaves)

  # TODO: move to mode
  @exact = boolean_param(params, :exact)
  super
end

Instance Attribute Details

#authorshipString

Returns (including empty).

Returns:

  • (String)

    (including empty)



39
40
41
# File 'lib/queries/taxon_name/autocomplete.rb', line 39

def authorship
  @authorship
end

#exactBoolean

Returns &exact=<“true”|“false”> if ‘true’ then only #name = query_string results are returned (no fuzzy matching).

Returns:

  • (Boolean)

    &exact=<“true”|“false”> if ‘true’ then only #name = query_string results are returned (no fuzzy matching)



31
32
33
# File 'lib/queries/taxon_name/autocomplete.rb', line 31

def exact
  @exact
end

#no_leavesBoolean

Returns &no_leaves=<“true”|“false”>

if 'true' then only names with descendents will be returned.

Returns:

  • (Boolean)

    &no_leaves=<“true”|“false”>

    if 'true' then only names with descendents will be returned
    


36
37
38
# File 'lib/queries/taxon_name/autocomplete.rb', line 36

def no_leaves
  @no_leaves
end

#nomenclature_groupArray

Returns &nomenclature_group[]=<<Iczn|Icnp|Icn>::<Higher|Family|Genus|Species>>.

Returns:

  • (Array)

    &nomenclature_group[]=<<Iczn|Icnp|Icn>::<Higher|Family|Genus|Species>>



8
9
10
# File 'lib/queries/taxon_name/autocomplete.rb', line 8

def nomenclature_group
  @nomenclature_group
end

#parent_idArray

Returns &parent_id[]=<int>&parent_id=<other_int> etc.

Returns:



24
25
26
# File 'lib/queries/taxon_name/autocomplete.rb', line 24

def parent_id
  @parent_id
end

#typeArray

Returns &type[]=<Protonym, Combination, Hybrid, etc.>&type[]=<other type> etc.

Returns:

  • (Array)

    &type[]=<Protonym, Combination, Hybrid, etc.>&type[]=<other type> etc.



20
21
22
# File 'lib/queries/taxon_name/autocomplete.rb', line 20

def type
  @type
end

#validBoolean?

Returns &valid=<“true”|“false”>

if 'true'  then id == cached_valid_taxon_name_id
if 'false' then id != cached_valid_taxon_name
if nil   then no check made, i.e. all names

string is converted to Boolean here.

Returns:

  • (Boolean, nil)

    &valid=<“true”|“false”>

    if 'true'  then id == cached_valid_taxon_name_id
    if 'false' then id != cached_valid_taxon_name
    if nil   then no check made, i.e. all names
    

    string is converted to Boolean here



16
17
18
# File 'lib/queries/taxon_name/autocomplete.rb', line 16

def valid
  @valid
end

Instance Method Details

#and_clausesArel:Nodes?

Returns:

  • (Arel:Nodes, nil)


67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/queries/taxon_name/autocomplete.rb', line 67

def and_clauses
  clauses = [
    valid_state,
    is_type,
    with_parent_id,
    with_nomenclature_group,
  ].compact

  return nil if clauses.nil?

  a = clauses.shift
  clauses.each do |b|
    a = a.and(b)
  end
  a
end

#autocompleteArray

Returns:

  • (Array)


367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
# File 'lib/queries/taxon_name/autocomplete.rb', line 367

def autocomplete
  # exact, unified, comprehensive

  queries = (exact ? exact_autocomplete : comprehensive_autocomplete )
  queries.compact!

  result = []

  queries.each_with_index do |q,i|
    a = q
    a = q.where(project_id:) if project_id.present?

    a = a.where(and_clauses.to_sql) if and_clauses

    if !parent_id.empty?
      a = a.descendants_of(::TaxonName.where(id: parent_id))
    end

    a = a.not_leaves if no_leaves

    result += a.limit(20).to_a
    break if result.count > 19
  end

  result.uniq!
  # result[0..19]
  result
end

#autocomplete_cachedObject

—- gin methods Consider word_similarity()



245
246
247
248
249
250
# File 'lib/queries/taxon_name/autocomplete.rb', line 245

def autocomplete_cached
  ::TaxonName.where(project_id:).select(ApplicationRecord.sanitize_sql(['taxon_names.*, similarity(?, taxon_names.cached) AS sml', query_string]))
    .where('taxon_names.cached % ?', query_string) # `%` in where means nothing < 0.3 (internal PG similarity value)
    .where(ApplicationRecord.sanitize_sql_array(["similarity('%s', taxon_names.cached) > 0.6", query_string]))
    .order('sml DESC, taxon_names.cached')
end

#autocomplete_cached_author_yearObject



259
260
261
262
263
264
# File 'lib/queries/taxon_name/autocomplete.rb', line 259

def autocomplete_cached_author_year
  ::TaxonName.select(ApplicationRecord.sanitize_sql(['taxon_names.*, similarity(?, taxon_names.cached_author_year) AS sml', query_string]))
    .where('taxon_names.cached_author_year % ?', query_string)
    .where(ApplicationRecord.sanitize_sql(["similarity('%s', taxon_names.cached_author_year) > 0.6", query_string]))
    .order('sml DESC, taxon_names.cached_author_year')
end

#autocomplete_cached_end_wildcardScope

Returns:

  • (Scope)


156
157
158
159
160
# File 'lib/queries/taxon_name/autocomplete.rb', line 156

def autocomplete_cached_end_wildcard
  s = query_string.delete('\\')
  a = table[:cached].matches("#{s}%")
  base_query.where(a.to_sql).limit(20)
end

#autocomplete_cached_name_end_wildcardScope

Returns:

  • (Scope)


185
186
187
188
# File 'lib/queries/taxon_name/autocomplete.rb', line 185

def autocomplete_cached_name_end_wildcard
  a = table[:name].matches("#{query_string}%")
  base_query.where(a.to_sql).limit(20)
end

#autocomplete_cached_original_combination_wildcard_whitespace_with_spaceObject



195
196
197
198
# File 'lib/queries/taxon_name/autocomplete.rb', line 195

def autocomplete_cached_original_combination_wildcard_whitespace_with_space
  a = table[:cached_original_combination].matches("#{query_string.gsub('. ', ' ').gsub(/[\s\\]/, '% ')}%")
  base_query.where(a.to_sql).limit(20)
end

#autocomplete_cached_wildcard_whitespaceScope

Returns:

  • (Scope)


201
202
203
204
# File 'lib/queries/taxon_name/autocomplete.rb', line 201

def autocomplete_cached_wildcard_whitespace
  a = table[:cached].matches("#{query_string.gsub('. ', ' ').gsub(/[\s\\]/, '%')}")
  base_query.where(a.to_sql).limit(20)
end

#autocomplete_cached_wildcard_whitespace_with_spaceObject



190
191
192
193
# File 'lib/queries/taxon_name/autocomplete.rb', line 190

def autocomplete_cached_wildcard_whitespace_with_space
  a = table[:cached].matches("#{query_string.gsub('. ', ' ').gsub(/[\s\\]/, '% ')}%")
  base_query.where(a.to_sql).limit(20)
end

#autocomplete_combined_ginObject

Used in /otus/api/v1/autocomplete



275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# File 'lib/queries/taxon_name/autocomplete.rb', line 275

def autocomplete_combined_gin
  a = ::TaxonName.select(ApplicationRecord.sanitize_sql(
    ['taxon_names.*, similarity(?, name) AS sml_n, similarity(?, taxon_names.cached_author_year) AS sml_cay, similarity(?, cached) AS sml_c, similarity(?, taxon_names.cached_original_combination) AS sml_coc',
     query_string, authorship, query_string, query_string])
                        ).where('taxon_names.cached_author_year % ? OR taxon_names.cached_original_combination % ? OR cached % ?', query_string, query_string, query_string)

  s = 'WITH tns AS (' + a.to_sql + ') ' +
    ::TaxonName
    .select(Arel.sql("taxon_names.*, (( COALESCE(tns1.sml_n,0) * #{CACHED_NAME_WEIGHT} + \
                                            COALESCE(tns1.sml_cay,0) * #{CACHED_AUTHOR_YEAR_WEIGHT} + \
                                            COALESCE(tns1.sml_c,0) * #{CACHED_WEIGHT} + \
                                            COALESCE(tns1.sml_coc,0) * #{CACHED_ORIGINAL_COMBINATION_WEIGHT} \
                                          )) sml_tn"))
    .joins('JOIN tns as tns1  on tns1.id = taxon_names.id')
    .to_sql

  ::TaxonName.select('taxon_names.*, sml_tn as sml_t').from('(' + s + ') as taxon_names').order('sml_tn DESC').distinct
end

#autocomplete_exact_cachedScope

Returns:

  • (Scope)


113
114
115
116
# File 'lib/queries/taxon_name/autocomplete.rb', line 113

def autocomplete_exact_cached
  a = table[:cached].eq(query_string)
  base_query.where(a.to_sql).order('cached_author_year ASC').limit(20)
end

#autocomplete_exact_cached_original_combinationScope

Returns:

  • (Scope)


119
120
121
122
# File 'lib/queries/taxon_name/autocomplete.rb', line 119

def autocomplete_exact_cached_original_combination
  a = table[:cached_original_combination].eq(query_string)
  base_query.where(a.to_sql).order('cached_author_year ASC').limit(20)
end

#autocomplete_exact_nameScope

Returns:

  • (Scope)


143
144
145
146
# File 'lib/queries/taxon_name/autocomplete.rb', line 143

def autocomplete_exact_name
  a = table[:name].eq(query_string)
  base_query.where(a.to_sql).order('cached_author_year ASC').limit(20)
end

#autocomplete_exact_name_and_yearScope

Returns:

  • (Scope)


131
132
133
134
135
136
137
138
139
140
# File 'lib/queries/taxon_name/autocomplete.rb', line 131

def autocomplete_exact_name_and_year
  a = alphabetic_strings.select { |b| !(b =~ /\d/) }
  b = years
  if a.size == 1 && !b.empty?
    a = table[:name].eq(a.first).and(table[:cached_author_year].matches_any(wildcard_wrapped_years))
    base_query.where(a.to_sql).limit(10)
  else
    nil
  end
end

#autocomplete_genus_species1(result) ⇒ Scope

Parameters:

  • result (String)

Returns:

  • (Scope)


170
171
172
173
174
# File 'lib/queries/taxon_name/autocomplete.rb', line 170

def autocomplete_genus_species1(result)
  return nil if result.nil?
  a = table[:cached].matches(result)
  base_query.where(a.to_sql).order('type DESC, cached ASC').limit(8)
end

#autocomplete_genus_species2(result) ⇒ Scope

Parameters:

  • result (String)

Returns:

  • (Scope)


178
179
180
181
182
# File 'lib/queries/taxon_name/autocomplete.rb', line 178

def autocomplete_genus_species2(result)
  return nil if result.nil?
  a = table[:cached].matches(result + '%')
  base_query.where(a.to_sql).order('type DESC, cached ASC').limit(8)
end

#autocomplete_name_author_year_fragmentScope?

Returns:

  • (Scope, nil)


207
208
209
210
211
212
213
214
215
# File 'lib/queries/taxon_name/autocomplete.rb', line 207

def autocomplete_name_author_year_fragment
  f = fragments
  if f.size == 2
    a = table[:name].matches(f[0]).and(table[:cached_author_year].matches(f[1]))
    base_query.where(a.to_sql).limit(20)
  else
    nil
  end
end

#autocomplete_original_combinationObject



252
253
254
255
256
257
# File 'lib/queries/taxon_name/autocomplete.rb', line 252

def autocomplete_original_combination
  ::TaxonName.select(ApplicationRecord.sanitize_sql(['taxon_names.*, similarity(?, taxon_names.cached_original_combination) AS sml', query_string]))
    .where('taxon_names.cached_original_combination % ?', query_string)
    .where(ApplicationRecord.sanitize_sql_array(["similarity('%s', taxon_names.cached_original_combination) > 0.6", query_string]))
    .order('sml DESC, taxon_names.cached_original_combination')
end

#autocomplete_taxon_name_author_year_matchesArel::Nodes::Matches

Returns:

  • (Arel::Nodes::Matches)


232
233
234
235
236
# File 'lib/queries/taxon_name/autocomplete.rb', line 232

def autocomplete_taxon_name_author_year_matches
  a = authorship
  return nil if a.nil?
  base_query.where(table[:cached_author_year].matches(a).to_sql).limit(10)
end

#autocomplete_top_cachedScope

Returns:

  • (Scope)


149
150
151
152
153
# File 'lib/queries/taxon_name/autocomplete.rb', line 149

def autocomplete_top_cached
  s = query_string
  a = table[:cached].matches("#{s}%")
  base_query.where(a.to_sql).limit(1)
end

#autocomplete_top_cached_subgenusScope

Returns:

  • (Scope)


163
164
165
166
# File 'lib/queries/taxon_name/autocomplete.rb', line 163

def autocomplete_top_cached_subgenus
  a = table[:cached].matches("%(#{query_string})")
  base_query.where(a.to_sql).limit(1)
end

#autocomplete_wildcard_author_year_joined_piecesScope?

Returns:

  • (Scope, nil)


218
219
220
221
222
# File 'lib/queries/taxon_name/autocomplete.rb', line 218

def autocomplete_wildcard_author_year_joined_pieces
  return nil if pieces.empty?
  a = table[:cached_author_year].matches("%#{pieces.join('%')}%")
  base_query.where(a.to_sql).order('cached ASC').limit(20)
end

#autocomplete_wildcard_cached_original_combinationScope

Returns:

  • (Scope)


125
126
127
128
# File 'lib/queries/taxon_name/autocomplete.rb', line 125

def autocomplete_wildcard_cached_original_combination
  a = table[:cached_original_combination].matches(wildcard_pieces)
  base_query.where(a.to_sql).order('cached_author_year ASC').limit(20)
end

#autocomplete_wildcard_joined_stringsScope?

Returns:

  • (Scope, nil)


225
226
227
228
229
# File 'lib/queries/taxon_name/autocomplete.rb', line 225

def autocomplete_wildcard_joined_strings
  return nil if alphabetic_strings.empty?
  a = table[:cached].matches("%#{alphabetic_strings.join('%')}%")
  base_query.where(a.to_sql).limit(10)
end

#base_queryScope

TODO: this should deprecate for gin based approaches.

Returns:

  • (Scope)


415
416
417
418
419
# File 'lib/queries/taxon_name/autocomplete.rb', line 415

def base_query
  ::TaxonName.select('taxon_names.*, char_length(taxon_names.cached)')
    .includes(:ancestor_hierarchies)
    .order(Arel.sql('char_length(taxon_names.cached), taxon_names.cached ASC'))
end

#comprehensive_autocompleteObject

TODO: Refactor to OTU approach?



320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
# File 'lib/queries/taxon_name/autocomplete.rb', line 320

def comprehensive_autocomplete
  z = genus_species
  queries = [
    autocomplete_exact_cached,
    autocomplete_exact_cached_original_combination,
    autocomplete_exact_name_and_year,
    autocomplete_exact_name,

    autocomplete_exact_id,
    autocomplete_identifier_cached_exact,
    autocomplete_identifier_identifier_exact,

    # All exact should be before these?
    #
    # There are left in, but the cutoff
    # is now 2x as high, i.e. more like wildcard matches we
    # were originally used to.
    autocomplete_cached, # sim
    autocomplete_original_combination, # sim
    autocomplete_cached_author_year, # sim

    # Specialized results
    autocomplete_genus_species1(z),    # not tested
    autocomplete_genus_species2(z),    # not tested
    autocomplete_top_cached_subgenus,  # not tested

    # autocomplete_top_cached, # Wildcard end
    # autocomplete_cached_end_wildcard,
    # autocomplete_cached_name_end_wildcard,
    # autocomplete_cached_wildcard_whitespace,
    # autocomplete_name_author_year_fragment,
    # autocomplete_taxon_name_author_year_matches,
    autocomplete_wildcard_joined_strings,
    autocomplete_wildcard_author_year_joined_pieces,
    autocomplete_wildcard_cached_original_combination
  ]
end

#exact_autocompleteObject

Used in New taxon name task, for example

TODO: what is intent?


296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
# File 'lib/queries/taxon_name/autocomplete.rb', line 296

def exact_autocomplete
  [
    autocomplete_exact_id,
    autocomplete_exact_cached,
    autocomplete_exact_cached_original_combination,
    autocomplete_identifier_cached_exact,
    autocomplete_identifier_identifier_exact,
    autocomplete_exact_name_and_year,

    autocomplete_cached_end_wildcard,
    autocomplete_cached_wildcard_whitespace_with_space,
    autocomplete_cached_original_combination_wildcard_whitespace_with_space,
    autocomplete_cached_wildcard_whitespace,
    autocomplete_name_author_year_fragment,
    autocomplete_taxon_name_author_year_matches,
    autocomplete_wildcard_joined_strings,
    autocomplete_wildcard_author_year_joined_pieces,
    autocomplete_wildcard_cached_original_combination,
    autocomplete_exact_name, # not exact enough, want the whole thing?
    # autocomplete_top_cached, # not exact at all
  ]
end

#genus_speciesString?

Returns parse and only return what is assumed to be genus/species, with a wildcard in front.

Returns:

  • (String, nil)

    parse and only return what is assumed to be genus/species, with a wildcard in front



398
399
400
401
402
403
404
405
406
407
408
409
410
411
# File 'lib/queries/taxon_name/autocomplete.rb', line 398

def genus_species
  p = Vendor::Biodiversity::Result.new
  p.name = query_string
  r = p.parse

  a = p.genus
  b = p.species

  if a && b
    a + '%' + b
  else
    nil
  end
end

#is_typeArel::Nodes::<>?

and clause

Returns:

  • (Arel::Nodes::<>, nil)


93
94
95
96
# File 'lib/queries/taxon_name/autocomplete.rb', line 93

def is_type
  return nil if type.empty?
  table[:type].in(type)
end

#taxon_name_hierarchies_tableArel::Table

Returns:

  • (Arel::Table)


422
423
424
# File 'lib/queries/taxon_name/autocomplete.rb', line 422

def taxon_name_hierarchies_table
  Arel::Table.new('taxon_name_hierarchies')
end

#unified_autocompleteObject



358
359
360
361
362
363
364
# File 'lib/queries/taxon_name/autocomplete.rb', line 358

def unified_autocomplete
  [
    autocomplete_exact_id,
    autocomplete_combined_gin,
    autocomplete_identifier_cached_exact,
  ]
end

#valid_stateArel::Nodes::<>?

and clause

Returns:

  • (Arel::Nodes::<>, nil)


86
87
88
89
# File 'lib/queries/taxon_name/autocomplete.rb', line 86

def valid_state
  return nil if @valid.nil?
  valid ? table[:id].eq(table[:cached_valid_taxon_name_id]) : table[:id].not_eq(table[:cached_valid_taxon_name_id])
end

#with_cached_author_yearArel::Nodes::Matches

Returns:

  • (Arel::Nodes::Matches)


427
428
429
# File 'lib/queries/taxon_name/autocomplete.rb', line 427

def with_cached_author_year
  table[:cached_author_year].matches_any(terms)
end

#with_nomenclature_groupArel::Nodes::Grouping?

Returns and clause.

Returns:

  • (Arel::Nodes::Grouping, nil)

    and clause



107
108
109
110
# File 'lib/queries/taxon_name/autocomplete.rb', line 107

def with_nomenclature_group
  return nil if nomenclature_group.empty?
  table[:rank_class].matches_any(nomenclature_group)
end

#with_parent_idArel::Nodes::<>?

and clause, limit to ancestors or [ids]

Returns:

  • (Arel::Nodes::<>, nil)


100
101
102
103
# File 'lib/queries/taxon_name/autocomplete.rb', line 100

def with_parent_id
  return nil if parent_id.empty?
  taxon_name_hierarchies_table[:ancestor_id].in(parent_id)
end