Class: Tools::InteractiveKey
- Inherits:
-
Object
- Object
- Tools::InteractiveKey
- Defined in:
- lib/tools/interactive_key.rb
Overview
Contains methods used to build an interactive key
Instance Attribute Summary collapse
-
#descriptor_available_keywords ⇒ Object
Returns the list of all Tags used with the descriptors.
-
#descriptor_available_languages ⇒ Object
Returns the list of available Languages used as translations for descriptors and character_states (in translations are available).
-
#descriptors ⇒ Object
Temporary attribute.
-
#descriptors_hash ⇒ Object
temporary hash of descriptors; used for calculation of useful and not useful descriptors and their states.
-
#descriptors_with_filter ⇒ Object
Temporary attribute.
-
#eliminate_unknown ⇒ Object
Optional attribute to eliminate taxa with no scores on a used descriptor: ‘false’ - default or ‘true’ If true, the rows without scores will be eliminated.
-
#eliminated ⇒ Object
Returns the list of objects eliminated by previously used descriptors.
-
#error_tolerance ⇒ Object
Optional attribute.
-
#identified_to_rank ⇒ Object
Optional attribute to limit identification to OTU or a particular nomenclatural rank.
-
#keyword_ids ⇒ Object
Optional attribute to provide a list of tagIDs to limit the set of descriptors to those taged: “keyword_ids=1|5|15”.
-
#language_id ⇒ Object
Optional attribute to display the descriptors and character_states in a particular language (when translations are available).
-
#language_to_use ⇒ Object
Returns Language as an object if the language_id was provided (used to display descriptors in a particular language).
-
#list_of_descriptors ⇒ Object
Return the list of descriptors and their states.
-
#observation_matrix ⇒ Object
Returns observation_matrix as an object.
-
#observation_matrix_citation ⇒ Object
Returns observation_matrix_citation as an object.
-
#observation_matrix_id ⇒ Object
Required attribude to build the key.
-
#otu_filter ⇒ Object
Optional attribute to provide a list of otuIDs to limit the set “otu_filter=1|5|10”.
-
#otu_id_filter_array ⇒ Object
Array of otu_ids in the @otu_filter.
-
#project_id ⇒ Object
Required attribute to build the key.
-
#remaining ⇒ Object
Returns the list of objects not eliminated by previously used descriptors.
-
#row_filter ⇒ Object
Optional attribute to provide a list of rowIDs to limit the set “row_filter=1|5|10”.
-
#row_hash ⇒ Object
Temporary hash of rows; used for calculation of remaining and eliminated rows.
-
#row_id_filter_array ⇒ Object
Array of row_ids in the @row_filter.
-
#rows_with_filter ⇒ Object
Temporary attribute.
-
#selected_descriptors ⇒ Object
Optional attribute: descriptors and states selected during identification “123:1|3||125:3|5||135:2||140:3-5” Each used descriptor is separated by ‘||’.
-
#selected_descriptors_hash ⇒ Object
selected_descriptors String is converted into Hash.
-
#sorting ⇒ Object
Optional attribute to sort the list of descriptors.
Instance Method Summary collapse
-
#descriptors_hash_initiate ⇒ Object
descriptors_hash: {:descriptor, ### (descriptor) :observations, ### (array of observations for ) :state_ids, ### {hash of state_ids used in the particular matrix }}.
- #descriptors_with_keywords ⇒ Object
- #eliminated_taxa ⇒ Object
- #get_rows_with_filter ⇒ Object
-
#initialize(observation_matrix_id: nil, project_id: nil, language_id: nil, keyword_ids: nil, row_filter: nil, otu_filter: nil, sorting: 'weighted', error_tolerance: 0, identified_to_rank: nil, eliminate_unknown: nil, selected_descriptors: nil) ⇒ InteractiveKey
constructor
A new instance of InteractiveKey.
- #otu_filter_array ⇒ Object
-
#remaining_taxa ⇒ Object
@error_tolerance - integer @eliminate_unknown ‘true’ or ‘false’ @descriptors_hash.
- #row_filter_array ⇒ Object
-
#row_hash_initiate ⇒ Object
row_hash: {:object, ### (collection_object or OTU) :object_at_rank, ### (converted to OTU or TN) :row_id, :otu_id, :errors, ### (calculated number of errors) :status } ### (‘remaining’, ‘eliminated’).
-
#selected_descriptors_hash_initiate ⇒ Object
returns [‘1’, ‘3’], 125: [‘3’, ‘5’], 135: [‘2’], 136: [‘true’], 140: [‘5-10’] “123:1|3||125:3|5||135:2”.
- #useful_descriptors ⇒ Object
Constructor Details
#initialize(observation_matrix_id: nil, project_id: nil, language_id: nil, keyword_ids: nil, row_filter: nil, otu_filter: nil, sorting: 'weighted', error_tolerance: 0, identified_to_rank: nil, eliminate_unknown: nil, selected_descriptors: nil) ⇒ InteractiveKey
Returns a new instance of InteractiveKey.
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
# File 'lib/tools/interactive_key.rb', line 151 def initialize( observation_matrix_id: nil, project_id: nil, language_id: nil, keyword_ids: nil, row_filter: nil, otu_filter: nil, sorting: 'weighted', error_tolerance: 0, identified_to_rank: nil, eliminate_unknown: nil, selected_descriptors: nil) # raise if observation_matrix_id.blank? || project_id.blank? @observation_matrix_id = observation_matrix_id @project_id = project_id @observation_matrix = ObservationMatrix.where(project_id: project_id).find(observation_matrix_id) @observation_matrix_citation = @observation_matrix.source @descriptor_available_languages = descriptor_available_languages @language_id = language_id @language_to_use = language_to_use @keyword_ids = keyword_ids @descriptor_available_keywords = descriptor_available_keywords @descriptors_with_filter = descriptors_with_keywords @row_filter = row_filter @otu_filter = otu_filter @row_id_filter_array = row_filter_array @otu_id_filter_array = otu_filter_array @rows_with_filter = get_rows_with_filter @sorting = sorting @error_tolerance = error_tolerance.to_i @eliminate_unknown = eliminate_unknown == 'true' ? true : false @identified_to_rank = identified_to_rank @selected_descriptors = selected_descriptors @selected_descriptors_hash = selected_descriptors_hash_initiate @row_hash = row_hash_initiate @descriptors_hash = descriptors_hash_initiate ###main_logic @remaining = remaining_taxa @eliminated = eliminated_taxa @list_of_descriptors = useful_descriptors ### delete temporary data @row_hash = nil # @descriptors_hash = nil @rows_with_filter = nil @descriptors_with_filter = nil end |
Instance Attribute Details
#descriptor_available_keywords ⇒ Object
Returns the list of all Tags used with the descriptors. Descriptors could be filtered by tag_id
91 92 93 |
# File 'lib/tools/interactive_key.rb', line 91 def descriptor_available_keywords @descriptor_available_keywords end |
#descriptor_available_languages ⇒ Object
Returns the list of available Languages used as translations for descriptors and character_states (in translations are available)
81 82 83 |
# File 'lib/tools/interactive_key.rb', line 81 def descriptor_available_languages @descriptor_available_languages end |
#descriptors ⇒ Object
Temporary attribute. Used for validation.
86 87 88 |
# File 'lib/tools/interactive_key.rb', line 86 def descriptors @descriptors end |
#descriptors_hash ⇒ Object
temporary hash of descriptors; used for calculation of useful and not useful descriptors and their states
149 150 151 |
# File 'lib/tools/interactive_key.rb', line 149 def descriptors_hash @descriptors_hash end |
#descriptors_with_filter ⇒ Object
Temporary attribute. Used for validation. List of descriptors reduced by keyword_ids
101 102 103 |
# File 'lib/tools/interactive_key.rb', line 101 def descriptors_with_filter @descriptors_with_filter end |
#eliminate_unknown ⇒ Object
Optional attribute to eliminate taxa with no scores on a used descriptor: ‘false’ - default or ‘true’ If true, the rows without scores will be eliminated
45 46 47 |
# File 'lib/tools/interactive_key.rb', line 45 def eliminate_unknown @eliminate_unknown end |
#eliminated ⇒ Object
Returns the list of objects eliminated by previously used descriptors. The list may include collection_objects OR otus OR valid taxon_names
134 135 136 |
# File 'lib/tools/interactive_key.rb', line 134 def eliminated @eliminated end |
#error_tolerance ⇒ Object
Optional attribute. Number of allowed errors during identification
50 51 52 |
# File 'lib/tools/interactive_key.rb', line 50 def error_tolerance @error_tolerance end |
#identified_to_rank ⇒ Object
Optional attribute to limit identification to OTU or a particular nomenclatural rank. Valid values are ‘otu’, ‘species’, ‘genus’, etc.
55 56 57 |
# File 'lib/tools/interactive_key.rb', line 55 def identified_to_rank @identified_to_rank end |
#keyword_ids ⇒ Object
Optional attribute to provide a list of tagIDs to limit the set of descriptors to those taged: “keyword_ids=1|5|15”
24 25 26 |
# File 'lib/tools/interactive_key.rb', line 24 def keyword_ids @keyword_ids end |
#language_id ⇒ Object
Optional attribute to display the descriptors and character_states in a particular language (when translations are available)
19 20 21 |
# File 'lib/tools/interactive_key.rb', line 19 def language_id @language_id end |
#language_to_use ⇒ Object
Returns Language as an object if the language_id was provided (used to display descriptors in a particular language)
96 97 98 |
# File 'lib/tools/interactive_key.rb', line 96 def language_to_use @language_to_use end |
#list_of_descriptors ⇒ Object
Return the list of descriptors and their states. Translated (if needed) and Sorted Each descriptor has an attribute :status, which could be ‘used’, ‘useful’, ‘useless’ for further identification
122 123 124 |
# File 'lib/tools/interactive_key.rb', line 122 def list_of_descriptors @list_of_descriptors end |
#observation_matrix ⇒ Object
Returns observation_matrix as an object
71 72 73 |
# File 'lib/tools/interactive_key.rb', line 71 def observation_matrix @observation_matrix end |
#observation_matrix_citation ⇒ Object
Returns observation_matrix_citation as an object
76 77 78 |
# File 'lib/tools/interactive_key.rb', line 76 def observation_matrix_citation @observation_matrix_citation end |
#observation_matrix_id ⇒ Object
Required attribude to build the key
9 10 11 |
# File 'lib/tools/interactive_key.rb', line 9 def observation_matrix_id @observation_matrix_id end |
#otu_filter ⇒ Object
Optional attribute to provide a list of otuIDs to limit the set “otu_filter=1|5|10”
34 35 36 |
# File 'lib/tools/interactive_key.rb', line 34 def otu_filter @otu_filter end |
#otu_id_filter_array ⇒ Object
Array of otu_ids in the @otu_filter
116 117 118 |
# File 'lib/tools/interactive_key.rb', line 116 def otu_id_filter_array @otu_id_filter_array end |
#project_id ⇒ Object
Required attribute to build the key
14 15 16 |
# File 'lib/tools/interactive_key.rb', line 14 def project_id @project_id end |
#remaining ⇒ Object
Returns the list of objects not eliminated by previously used descriptors. The list may include collection_objects OR otus OR valid taxon_names
128 129 130 |
# File 'lib/tools/interactive_key.rb', line 128 def remaining @remaining end |
#row_filter ⇒ Object
Optional attribute to provide a list of rowIDs to limit the set “row_filter=1|5|10”
29 30 31 |
# File 'lib/tools/interactive_key.rb', line 29 def row_filter @row_filter end |
#row_hash ⇒ Object
Temporary hash of rows; used for calculation of remaining and eliminated rows
144 145 146 |
# File 'lib/tools/interactive_key.rb', line 144 def row_hash @row_hash end |
#row_id_filter_array ⇒ Object
Array of row_ids in the @row_filter
111 112 113 |
# File 'lib/tools/interactive_key.rb', line 111 def row_id_filter_array @row_id_filter_array end |
#rows_with_filter ⇒ Object
Temporary attribute. Used for validation. list of rows to be included into the matrix
106 107 108 |
# File 'lib/tools/interactive_key.rb', line 106 def rows_with_filter @rows_with_filter end |
#selected_descriptors ⇒ Object
Optional attribute: descriptors and states selected during identification “123:1|3||125:3|5||135:2||140:3-5” Each used descriptor is separated by ‘||’. States or values are separated from descriptors with ‘:’. Multiple selected character_states for one descriptor are separated by ‘|’. Sample states can use numerical ranges
64 65 66 |
# File 'lib/tools/interactive_key.rb', line 64 def selected_descriptors @selected_descriptors end |
#selected_descriptors_hash ⇒ Object
selected_descriptors String is converted into Hash
139 140 141 |
# File 'lib/tools/interactive_key.rb', line 139 def selected_descriptors_hash @selected_descriptors_hash end |
#sorting ⇒ Object
Optional attribute to sort the list of descriptors. Options: ‘ordered’, ‘weighted’, ‘optimized’. Optimized is a default if nothing is provided
39 40 41 |
# File 'lib/tools/interactive_key.rb', line 39 def sorting @sorting end |
Instance Method Details
#descriptors_hash_initiate ⇒ Object
descriptors_hash: {:descriptor, ### (descriptor)
:observations, ### (array of observations for )
:state_ids, ### {hash of state_ids used in the particular matrix
}}
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 |
# File 'lib/tools/interactive_key.rb', line 287 def descriptors_hash_initiate h = {} descriptors_with_keywords.each do |d| h[d.id] = {} h[d.id][:descriptor] = d h[d.id][:weight_index] = 0 h[d.id][:state_ids] = {} h[d.id][:count] = 0 h[d.id][:min] = 999999 if d.type == 'Descriptor::Continuous' || d.type == 'Descriptor::Sample' # min value used as continuous or sample h[d.id][:max] = -999999 if d.type == 'Descriptor::Continuous' || d.type == 'Descriptor::Sample' # max value used as continuous or sample if d.type == 'Descriptor::PresenceAbsence' h[d.id][:state_ids]['true'] = {} h[d.id][:state_ids]['true'][:rows] = {} h[d.id][:state_ids]['true'][:status] = 'useless' h[d.id][:state_ids]['false'] = {} h[d.id][:state_ids]['false'][:rows] = {} h[d.id][:state_ids]['true'][:status] = 'useless' end h[d.id][:observations] = {} # all observation for a particular h[d.id][:observation_hash] = {} ### state_ids, true/false for a particular descriptor/otu_id/catalog_id combination (for PresenceAbsence or Qualitative or Continuous) h[d.id][:status] = 'useless' ### 'used', 'useful', 'useless' h[d.id][:status] = 'used' if selected_descriptors_hash[d.id] end t = ['Observation::Continuous', 'Observation::PresenceAbsence', 'Observation::Qualitative', 'Observation::Sample'] observation_matrix.observations.where('"observations"."type" IN (?)', t).each do |o| if h[o.descriptor_id] otu_collection_object = o.observation_object_type + o.observation_object_id.to_s # otu_id.to_s + '|' + o.collection_object_id.to_s h[o.descriptor_id][:observations][otu_collection_object] = [] if h[o.descriptor_id][:observations][otu_collection_object].nil? #?????? h[o.descriptor_id][:observations][otu_collection_object] += [o] #?????? h[o.descriptor_id][:observation_hash][otu_collection_object] = [] if h[o.descriptor_id][:observation_hash][otu_collection_object].nil? h[o.descriptor_id][:observation_hash][otu_collection_object] += [o.character_state_id.to_s] if o.character_state_id h[o.descriptor_id][:observation_hash][otu_collection_object] += ["%g" % o.continuous_value] if o.continuous_value h[o.descriptor_id][:observation_hash][otu_collection_object] += [o.presence.to_s] unless o.presence.nil? end end h end |
#descriptors_with_keywords ⇒ Object
236 237 238 239 240 241 242 |
# File 'lib/tools/interactive_key.rb', line 236 def descriptors_with_keywords if keyword_ids descriptors.joins(:tags).where('tags.keyword_id IN (?)', keyword_ids.to_s.split('|').map(&:to_i) ) else descriptors end end |
#eliminated_taxa ⇒ Object
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 |
# File 'lib/tools/interactive_key.rb', line 420 def eliminated_taxa h = {} row_hash.each do |r_key, r_value| obj = r_value[:object_at_rank].class.to_s + '|' + r_value[:object_at_rank].id.to_s if r_value[:status] == 'eliminated' && !remaining.include?(r_value[:object_at_rank].class.to_s + '|' + r_value[:object_at_rank].id.to_s) h[obj] = {object: r_value[:object_at_rank], row_id: r_value[:object].id, errors: r_value[:errors], error_descriptors: r_value[:error_descriptors] } if h[obj].nil? end end return h.values end |
#get_rows_with_filter ⇒ Object
252 253 254 |
# File 'lib/tools/interactive_key.rb', line 252 def get_rows_with_filter observation_matrix.observation_matrix_rows.order(:position) end |
#otu_filter_array ⇒ Object
248 249 250 |
# File 'lib/tools/interactive_key.rb', line 248 def otu_filter_array otu_filter.blank? ? nil : otu_filter.to_s.split('|').map(&:to_i) end |
#remaining_taxa ⇒ Object
@error_tolerance - integer
@eliminate_unknown 'true' or 'false'
@descriptors_hash
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 |
# File 'lib/tools/interactive_key.rb', line 344 def remaining_taxa h = {} language = language_id.blank? ? nil : language_id.to_i row_hash.each do |r_key, r_value| selected_descriptors_hash.each do |d_key, d_value| otu_collection_object = r_value[:object].observation_object_type + r_value[:object].observation_object_id.to_s # otu_id.to_s + '|' + r_value[:object].collection_object_id.to_s next if descriptors_hash[d_key].blank? d_name = descriptors_hash[d_key][:descriptor].target_name(:key, language) + ': ' if eliminate_unknown && descriptors_hash[d_key][:observation_hash][otu_collection_object].nil? r_value[:errors] += 1 r_value[:error_descriptors] += [d_name + 'unknown'] elsif descriptors_hash[d_key][:observation_hash][otu_collection_object].nil? #character not scored but no error else case descriptors_hash[d_key][:descriptor].type when 'Descriptor::Continuous' if (descriptors_hash[d_key][:observation_hash][otu_collection_object] & d_value).empty? r_value[:errors] += 1 str = d_name + descriptors_hash[d_key][:observations][otu_collection_object].collect{|o| "%g" % o.continuous_value}.join(' OR ') r_value[:error_descriptors] += [str] end when 'Descriptor::PresenceAbsence' if (descriptors_hash[d_key][:observation_hash][otu_collection_object] & d_value).empty? r_value[:errors] += 1 str = d_name + descriptors_hash[d_key][:observations][otu_collection_object].collect{|o| o.presence}.join(' OR ') r_value[:error_descriptors] += [str] end when 'Descriptor::Qualitative' if (descriptors_hash[d_key][:observation_hash][otu_collection_object] & d_value).empty? r_value[:errors] += 1 str = d_name + descriptors_hash[d_key][:observations][otu_collection_object].collect{|o| o.character_state.target_name(:key, language)}.join(' OR ') r_value[:error_descriptors] += [str] end when 'Descriptor::Sample' p = false a = d_value.first.split('-') d_min = a[0].to_f d_max = a[1].nil? ? d_min : a[1].to_f descriptors_hash[d_key][:observations][otu_collection_object].each do |o| s_min = o.sample_min.to_f s_max = o.sample_max.nil? ? s_min : o.sample_max.to_f p = true if (d_min >= s_min && d_min <= s_max) || (d_max >= s_min && d_max <= s_max) || (d_min <= s_min && d_max >= s_max) end if p == false r_value[:errors] += 1 str = d_name + descriptors_hash[d_key][:observations][otu_collection_object].collect{|o| o.sample_min.to_s + '–' + o.sample_max.to_s}.join(' OR ') r_value[:error_descriptors] += [str] end end end end obj = r_value[:object_at_rank].class.to_s + '|' + r_value[:object_at_rank].id.to_s if (row_id_filter_array && !row_id_filter_array.include?(r_value[:object].id)) || (otu_id_filter_array && !otu_id_filter_array.include?(r_value[:otu_id])) r_value[:status] = 'eliminated' r_value[:errors] = 'F' r_value[:error_descriptors] = ['Filtered out'] end if r_value[:errors] == 'F' || r_value[:errors] > error_tolerance r_value[:status] = 'eliminated' elsif h[obj].nil? h[obj] = {object: r_value[:object_at_rank], row_id: r_value[:object].id, errors: r_value[:errors], error_descriptors: r_value[:error_descriptors] } end end return h.values end |
#row_filter_array ⇒ Object
244 245 246 |
# File 'lib/tools/interactive_key.rb', line 244 def row_filter_array row_filter.blank? ? nil : row_filter.to_s.split('|').map(&:to_i) end |
#row_hash_initiate ⇒ Object
row_hash: {:object, ### (collection_object or OTU)
:object_at_rank, ### (converted to OTU or TN)
:row_id,
:otu_id,
:errors, ### (calculated number of errors)
:status } ### ('remaining', 'eliminated')
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 |
# File 'lib/tools/interactive_key.rb', line 262 def row_hash_initiate h = {} rows_with_filter.each do |r| otu_collection_object = r.observation_object_type + r.observation_object_id.to_s # r.otu_id.to_s + '|' + r.collection_object_id.to_s h[otu_collection_object] = {} h[otu_collection_object][:object] = r if identified_to_rank == 'otu' h[otu_collection_object][:object_at_rank] = r.current_otu || r elsif identified_to_rank h[otu_collection_object][:object_at_rank] = r&.current_taxon_name&.ancestor_at_rank(identified_to_rank, inlude_self = true) || r else h[otu_collection_object][:object_at_rank] = r end h[otu_collection_object][:otu_id] = r.observation_object_type == 'Otu' ? r.observation_object_id : r.current_otu.id h[otu_collection_object][:errors] = 0 h[otu_collection_object][:error_descriptors] = [] h[otu_collection_object][:status] = 'remaining' ### if number of errors > @error_tolerance, replaced to 'eliminated' end h end |
#selected_descriptors_hash_initiate ⇒ Object
returns [‘1’, ‘3’], 125: [‘3’, ‘5’], 135: [‘2’], 136: [‘true’], 140: [‘5-10’] “123:1|3||125:3|5||135:2”
330 331 332 333 334 335 336 337 338 339 |
# File 'lib/tools/interactive_key.rb', line 330 def selected_descriptors_hash_initiate h = {} return h if selected_descriptors.blank? a = selected_descriptors.include?('||') ? selected_descriptors.to_s.split('||') : [selected_descriptors] a.each do |i| d = i.split(':') h[d[0].to_i] = d[1].to_s.include?('|') ? d[1].split('|') : [d[1]] end h end |
#useful_descriptors ⇒ Object
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 |
# File 'lib/tools/interactive_key.rb', line 436 def useful_descriptors list_of_remaining_taxa = {} language = language_id.blank? ? nil : language_id.to_i row_hash.each do |r_key, r_value| if r_value[:status] != 'eliminated' list_of_remaining_taxa[r_value[:object_at_rank] ] = true end end number_of_taxa = list_of_remaining_taxa.count.to_i array_of_descriptors = [] descriptors_hash.each do |d_key, d_value| taxa_with_unknown_character_states = {} list_of_remaining_taxa.each do |key, value| taxa_with_unknown_character_states[key] = value end # taxa_with_unknown_character_states = list_of_remaining_taxa if @eliminate_unknown == false d_value[:observations].each do |otu_key, otu_value| otu_collection_object = otu_key if true #@row_hash[otu_collection_object] otu_value.each do |o| if o.character_state_id d_value[:state_ids][o.character_state_id.to_s] = {} if d_value[:state_ids][o.character_state_id.to_s].nil? d_value[:state_ids][o.character_state_id.to_s][:rows] = {} if d_value[:state_ids][o.character_state_id.to_s][:rows].nil? ## rows which this state identifies d_value[:state_ids][o.character_state_id.to_s][:rows][ @row_hash[otu_collection_object][:object_at_rank] ] = true if @row_hash[otu_collection_object][:status] != 'eliminated' if selected_descriptors_hash[d_key] && selected_descriptors_hash[d_key].include?(o.character_state_id.to_s) d_value[:state_ids][o.character_state_id.to_s][:status] = 'used' ## 'used', 'useful', 'useless' else d_value[:state_ids][o.character_state_id.to_s][:status] = 'useful' ## 'used', 'useful', 'useless' end end unless o.presence.nil? #d_value[:state_ids][o.presence.to_s] = {} if d_value[:state_ids][o.presence.to_s].nil? #d_value[:state_ids][o.presence.to_s][:rows] = {} if d_value[:state_ids][o.presence.to_s][:rows].nil? ## rows which this state identifies d_value[:state_ids][o.presence.to_s][:rows][ @row_hash[otu_collection_object][:object_at_rank] ] = true if @row_hash[otu_collection_object][:status] != 'eliminated' if selected_descriptors_hash[d_key] && selected_descriptors_hash[d_key].include?(o.presence.to_s) d_value[:state_ids][o.presence.to_s][:status] = 'used' ## 'used', 'useful', 'useless' else d_value[:state_ids][o.presence.to_s][:status] = 'useful' ## 'used', 'useful', 'useless' end end unless o.continuous_value.nil? d_value[:state_ids][o.id] = true if @row_hash[otu_collection_object][:status] != 'eliminated' d_value[:count] +=1 d_value[:min] = o.continuous_value if d_value[:min] > o.continuous_value d_value[:max] = o.continuous_value if d_value[:max] < o.continuous_value end end unless o.sample_min.nil? d_value[:state_ids][o.id] = {o_min: o.sample_min.to_f, o_max: o.sample_max.to_f} if @row_hash[otu_collection_object][:status] != 'eliminated' d_value[:count] +=1 d_value[:min] = o.sample_min if d_value[:min] > o.sample_min if o.sample_max d_value[:max] = o.sample_max if d_value[:max] < o.sample_max else d_value[:max] = o.sample_min if d_value[:max] < o.sample_min end end end taxa_with_unknown_character_states[ @row_hash[otu_collection_object][:object_at_rank] ] = false if @eliminate_unknown == false end end end number_of_taxa_with_unknown_character_states = 0 number_of_taxa_with_unknown_character_states = taxa_with_unknown_character_states.select{|key, value| value == true}.count if @eliminate_unknown == false descriptor = {} descriptor[:id] = d_key descriptor[:type] = d_value[:descriptor].type descriptor[:name] = d_value[:descriptor].target_name(:key, language) descriptor[:weight] = d_value[:descriptor].weight descriptor[:position] = d_value[:descriptor].position descriptor[:usefulness] = 0 descriptor[:status] = d_value[:status] == 'used' ? 'used' : 'useless' descriptor[:description] = d_value[:descriptor].description descriptor[:depiction_ids] = d_value[:descriptor].depictions.order(:position).pluck(:id) s = 0 case d_value[:descriptor].type when 'Descriptor::Qualitative' number_of_states = d_value[:state_ids].count.to_i descriptor[:states] = [] d_value[:state_ids].each do |s_key, s_value| c = CharacterState.find(s_key.to_i) state = {} state[:id] = c.id state[:name] = c.target_name(:key, language) state[:position] = c.position state[:label] = c.label state[:number_of_objects] = s_value[:rows].count + number_of_taxa_with_unknown_character_states state[:status] = s_value[:status] == 'used' ? 'used' : 'useful' n = s_value[:rows].count if descriptor[:status] == 'used' #do nothing elsif n == number_of_taxa || n == 0 s_value[:status] = 'useless' state[:status] = 'useless' else d_value[:status] = 'useful' descriptor[:status] = 'useful' end state[:depiction_ids] = c.depictions.order(:position).pluck(:id) # weight = rem_taxa/number_of_states + squer (sum (rem_taxa/number_of_states - taxa_in_each_state)^2) s += (number_of_taxa / number_of_states - s_value[:rows].count) ** 2 descriptor[:states] += [state] end descriptor[:usefulness] = number_of_taxa / number_of_states + Math.sqrt(s) if number_of_states > 0 descriptor[:states].sort_by!{|i| i[:position]} when 'Descriptor::Continuous' descriptor[:default_unit] = d_value[:descriptor].default_unit descriptor[:min] = d_value[:min] descriptor[:max] = d_value[:max] number_of_measurements = d_value[:count] s = (d_value[:min] - (d_value[:min] / 10)) / (d_value[:max] + 0.0000001 - d_value[:min]) descriptor[:usefulness] = number_of_taxa * s * (2 - (number_of_measurements / number_of_taxa)) if number_of_taxa > 0 if descriptor[:status] != 'used' && descriptor[:min] != descriptor[:max] d_value[:status] = 'useful' descriptor[:status] = 'useful' end when 'Descriptor::Sample' descriptor[:default_unit] = d_value[:descriptor].default_unit descriptor[:min] = d_value[:min] descriptor[:max] = d_value[:max] number_of_measurements = d_value[:count] # i = max - min ; if 0 then (numMax - numMin / 10) # sum of all i # if numMax = numMin then numMax = numMax + 0.00001 # weight = rem_taxa * (sum of i / number of measuments for taxon / (numMax - numMin) ) * (2 - number of measuments for taxon / rem_taxa) if d_value[:max] != d_value[:min] && number_of_measurements > 0 d_value[:state_ids].each do |s_key, s_value| if s_value[:o_min] == s_value[:o_max] || s_value[:o_max].blank? s += (s_value[:o_min] - (s_value[:o_min] / 10)) / number_of_measurements / (d_value[:max] + 0.0000001 - d_value[:min]) else s += (s_value[:o_max] - s_value[:o_min]) / number_of_measurements / (d_value[:max] + 0.0000001 - d_value[:min]) end if descriptor[:status] != 'used' && (s_value[:o_min] != d_value[:min] || (!s_value[:o_max].blank? && s_value[:o_max] != d_value[:max])) d_value[:status] = 'useful' descriptor[:status] = 'useful' end end end descriptor[:usefulness] = number_of_taxa * s * (2 - (number_of_measurements / number_of_taxa)) if number_of_taxa > 0 when 'Descriptor::PresenceAbsence' number_of_states = 2 descriptor[:states] = [] d_value[:state_ids].each do |s_key, s_value| state = {} state[:name] = s_key state[:number_of_objects] = s_value[:rows].count + number_of_taxa_with_unknown_character_states state[:status] = s_value[:status] == 'used' ? 'used' : 'useful' n = s_value[:rows].count if descriptor[:status] == 'used' #do nothing elsif n == number_of_taxa || n == 0 s_value[:status] = 'useless' state[:status] = 'useless' else d_value[:status] = 'useful' descriptor[:status] = 'useful' end s += (number_of_taxa / number_of_states - s_value[:rows].count) ** 2 descriptor[:states] += [state] end descriptor[:usefulness] = number_of_taxa / number_of_states + Math.sqrt(s) if number_of_states > 0 descriptor[:states].sort_by!{|i| -i[:name]} end descriptor[:min] = nil if descriptor[:min] == 999999 descriptor[:max] = nil if descriptor[:max] == -999999 descriptor[:min] = "%g" % descriptor[:min] if descriptor[:min] descriptor[:max] = "%g" % descriptor[:max] if descriptor[:max] array_of_descriptors += [descriptor] end case sorting when 'ordered' array_of_descriptors.sort_by!{|i| i[:position]} when 'weighted' array_of_descriptors.sort_by!{|i| [-i[:weight].to_i, i[:usefulness]] } when 'optimized' array_of_descriptors.sort_by!{|i| i[:usefulness]} end end |