Class: Match::Otu::TaxonName
- Inherits:
-
Object
- Object
- Match::Otu::TaxonName
- Defined in:
- lib/match/otu/taxon_name.rb
Constant Summary collapse
- MAX_NAMES =
1000- MATCHABLE_COLUMNS =
[:cached, :cached_secondary_homonym, :cached_primary_homonym].freeze
Instance Attribute Summary collapse
-
#levenshtein_distance ⇒ Object
readonly
Returns the value of attribute levenshtein_distance.
-
#names ⇒ Object
readonly
Returns the value of attribute names.
-
#project_id ⇒ Object
readonly
Returns the value of attribute project_id.
-
#resolve_synonyms ⇒ Object
readonly
Returns the value of attribute resolve_synonyms.
-
#taxon_name_id ⇒ Object
readonly
Returns the value of attribute taxon_name_id.
-
#try_without_subgenus ⇒ Object
readonly
Returns the value of attribute try_without_subgenus.
Instance Method Summary collapse
-
#base_scope ⇒ ActiveRecord::Relation
private
Build the base TaxonName scope, optionally constrained to descendants of taxon_name_id.
- #call ⇒ Array<Hash>
- #find_taxon_names(name, column: :cached) ⇒ Array<TaxonName> private
- #find_taxon_names_exact(name, column: :cached) ⇒ Array<TaxonName> private
- #find_taxon_names_fuzzy(name, column: :cached) ⇒ Array<TaxonName> private
-
#initialize(names:, project_id:, levenshtein_distance: 0, taxon_name_id: nil, resolve_synonyms: false, try_without_subgenus: false) ⇒ TaxonName
constructor
A new instance of TaxonName.
- #match_name(name) ⇒ Hash private
-
#rank_taxon_names(taxon_names) ⇒ Array<TaxonName>
private
Rank candidate TaxonNames: 1.
Constructor Details
#initialize(names:, project_id:, levenshtein_distance: 0, taxon_name_id: nil, resolve_synonyms: false, try_without_subgenus: false) ⇒ TaxonName
Returns a new instance of TaxonName.
39 40 41 42 43 44 45 46 |
# File 'lib/match/otu/taxon_name.rb', line 39 def initialize(names:, project_id:, levenshtein_distance: 0, taxon_name_id: nil, resolve_synonyms: false, try_without_subgenus: false) @names = names.first(MAX_NAMES) @project_id = project_id @levenshtein_distance = levenshtein_distance.to_i @taxon_name_id = taxon_name_id @resolve_synonyms = resolve_synonyms @try_without_subgenus = try_without_subgenus end |
Instance Attribute Details
#levenshtein_distance ⇒ Object (readonly)
Returns the value of attribute levenshtein_distance.
31 32 33 |
# File 'lib/match/otu/taxon_name.rb', line 31 def levenshtein_distance @levenshtein_distance end |
#names ⇒ Object (readonly)
Returns the value of attribute names.
31 32 33 |
# File 'lib/match/otu/taxon_name.rb', line 31 def names @names end |
#project_id ⇒ Object (readonly)
Returns the value of attribute project_id.
31 32 33 |
# File 'lib/match/otu/taxon_name.rb', line 31 def project_id @project_id end |
#resolve_synonyms ⇒ Object (readonly)
Returns the value of attribute resolve_synonyms.
31 32 33 |
# File 'lib/match/otu/taxon_name.rb', line 31 def resolve_synonyms @resolve_synonyms end |
#taxon_name_id ⇒ Object (readonly)
Returns the value of attribute taxon_name_id.
31 32 33 |
# File 'lib/match/otu/taxon_name.rb', line 31 def taxon_name_id @taxon_name_id end |
#try_without_subgenus ⇒ Object (readonly)
Returns the value of attribute try_without_subgenus.
31 32 33 |
# File 'lib/match/otu/taxon_name.rb', line 31 def try_without_subgenus @try_without_subgenus end |
Instance Method Details
#base_scope ⇒ ActiveRecord::Relation (private)
Build the base TaxonName scope, optionally constrained to descendants of taxon_name_id.
148 149 150 151 152 153 154 155 156 157 158 |
# File 'lib/match/otu/taxon_name.rb', line 148 def base_scope scope = ::TaxonName.where(project_id: project_id) if taxon_name_id.present? scope = scope .joins('JOIN taxon_name_hierarchies ON taxon_names.id = taxon_name_hierarchies.descendant_id') .where(taxon_name_hierarchies: { ancestor_id: taxon_name_id }) end scope end |
#call ⇒ Array<Hash>
49 50 51 52 53 54 55 56 57 58 |
# File 'lib/match/otu/taxon_name.rb', line 49 def call unique_names = names.uniq match_cache = {} unique_names.each do |name| match_cache[name] = match_name(name) end names.map { |name| match_cache[name].merge(scientific_name: name) } end |
#find_taxon_names(name, column: :cached) ⇒ Array<TaxonName> (private)
100 101 102 103 104 105 106 |
# File 'lib/match/otu/taxon_name.rb', line 100 def find_taxon_names(name, column: :cached) if levenshtein_distance > 0 find_taxon_names_fuzzy(name, column:) else find_taxon_names_exact(name, column:) end end |
#find_taxon_names_exact(name, column: :cached) ⇒ Array<TaxonName> (private)
111 112 113 114 |
# File 'lib/match/otu/taxon_name.rb', line 111 def find_taxon_names_exact(name, column: :cached) scope = base_scope scope.where(column => name).to_a end |
#find_taxon_names_fuzzy(name, column: :cached) ⇒ Array<TaxonName> (private)
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'lib/match/otu/taxon_name.rb', line 121 def find_taxon_names_fuzzy(name, column: :cached) raise ArgumentError, "Invalid column: #{column}" unless MATCHABLE_COLUMNS.include?(column) scope = base_scope truncated_name = name[0..254] distance = [levenshtein_distance, 8].min qualified_column = "taxon_names.#{column}" scope .where( "levenshtein(left(#{qualified_column}, 255), ?) <= ?", truncated_name, distance ) .order( Arel.sql( ::TaxonName.sanitize_sql_array( ["levenshtein(left(#{qualified_column}, 255), ?)", truncated_name] ) ) ) .limit(10) .to_a end |
#match_name(name) ⇒ Hash (private)
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/match/otu/taxon_name.rb', line 64 def match_name(name) taxon_names = find_taxon_names(name) if taxon_names.empty? && try_without_subgenus taxon_names = find_taxon_names(name, column: :cached_secondary_homonym) if taxon_names.empty? taxon_names = find_taxon_names(name, column: :cached_primary_homonym) end end return { taxon_name_id: nil, taxon_name: nil, otus: [], ambiguous: false, matched: false } if taxon_names.empty? ranked = rank_taxon_names(taxon_names) best = ranked.first taxon_name_for_otus = best if resolve_synonyms && best.cached_valid_taxon_name_id != best.id valid = ::TaxonName.where(project_id: project_id).find_by(id: best.cached_valid_taxon_name_id) taxon_name_for_otus = valid if valid end otus = ::Otu.where(project_id: project_id, taxon_name_id: taxon_name_for_otus.id).to_a { taxon_name_id: best.id, taxon_name: best, otus: otus, ambiguous: ranked.length > 1, matched: true } end |
#rank_taxon_names(taxon_names) ⇒ Array<TaxonName> (private)
Rank candidate TaxonNames:
1. Prefer those with OTUs
2. Prefer valid names
165 166 167 168 169 170 171 172 173 174 175 |
# File 'lib/match/otu/taxon_name.rb', line 165 def rank_taxon_names(taxon_names) taxon_name_ids = taxon_names.map(&:id) ids_with_otus = ::Otu.where(project_id: project_id, taxon_name_id: taxon_name_ids).distinct.pluck(:taxon_name_id).to_set taxon_names.sort_by do |tn| [ ids_with_otus.include?(tn.id) ? 0 : 1, tn.cached_valid_taxon_name_id == tn.id ? 0 : 1 ] end end |