Class: Utilities::MaterialExamined
- Inherits:
-
Object
- Object
- Utilities::MaterialExamined
- Defined in:
- lib/utilities/material_examined.rb
Constant Summary collapse
- MONTH_ROMAN =
%w[i ii iii iv v vi vii viii ix x xi xii].freeze
- SEX_SYMBOLS =
{ 'male' => '♂', 'female' => '♀', 'gynandromorph' => '♂♀' }.freeze
- LOOP_VARIABLES =
Lambdas that extract a grouping value from a DwC occurrence hash.
{ type_status: ->(r) { r['typeStatus'].to_s.strip }, country: ->(r) { r['country'].to_s.strip }, state: ->(r) { r['stateProvince'].to_s.strip }, county: ->(r) { r['county'].to_s.strip }, identifier_namespace: ->(r) { catalog_namespace(r['catalogNumber']) }, identifier: ->(r) { catalog_identifier(r['catalogNumber']) }, sex: ->(r) { SEX_SYMBOLS.fetch(normalize_sex(r['sex']), r['sex'].to_s.strip) }, stage: ->(r) { r['lifeStage'].to_s.strip }, repository: ->(r) { r['institutionCode'].to_s.strip }, month_range: ->(r) { m = extract_month(r); m.positive? ? m.to_s : '' }, # :total is a passthrough — handled before grouping in render_group; # the lambda is never called but must exist for controller key validation. total: ->(_r) { '' } }.freeze
- DEFAULT_ORDER =
[ :type_status, :country, :state, :county, :month_range, :total, :identifier_namespace, :identifier, :stage, :sex, :repository ].freeze
- TYPE_STATUS_SORT =
Sort position for primary type designations
{ 'holotype' => 0, 'lectotype' => 1, 'neotype' => 2, }.freeze
Instance Attribute Summary collapse
-
#augmentations ⇒ Object
readonly
Returns the value of attribute augmentations.
-
#occurrences ⇒ Object
readonly
Returns the value of attribute occurrences.
-
#order ⇒ Object
readonly
Returns the value of attribute order.
-
#todo_occurrence_ids ⇒ Object
readonly
Returns the value of attribute todo_occurrence_ids.
Class Method Summary collapse
-
.catalog_identifier(catalog_number) ⇒ Object
Returns the identifier (numeric) portion of a catalogNumber string.
-
.catalog_namespace(catalog_number) ⇒ Object
Returns the namespace portion of a catalogNumber string.
-
.extract_month(record) ⇒ Object
Returns the month (1–12) from a DwC record, or 0 if absent/invalid.
-
.normalize_sex(value) ⇒ Object
Normalises a raw DwC sex string to a SEX_SYMBOLS key.
Instance Method Summary collapse
- #catalog_identifier(val) ⇒ Object private
-
#catalog_namespace(val) ⇒ Object
private
Delegates to class methods so lambdas in LOOP_VARIABLES can call them.
-
#consecutive_runs(sorted_ints) ⇒ Object
private
Splits a sorted array of integers into runs of consecutive numbers.
- #extract_month(r) ⇒ Object private
-
#group_ordered_by_type_status(sorted_records) ⇒ Object
private
Returns an ordered array of [type_status_value, records] pairs preserving the sort order established by sort_by_type_status.
-
#initialize(occurrences, order: DEFAULT_ORDER, augmentations: {}, todo: false) ⇒ MaterialExamined
constructor
A new instance of MaterialExamined.
- #normalize_sex(val) ⇒ Object private
-
#range_label(start_num, end_num) ⇒ Object
private
Formats a numeric range as an abbreviated string.
-
#render ⇒ String
Markdown-formatted material examined text.
-
#render_group(records, keys) ⇒ Object
private
Recursively renders a set of records using the remaining loop keys.
-
#render_identifier_group(grouped, rest, namespace: '') ⇒ Object
private
Renders the :identifier level with consecutive-range detection.
-
#render_leaf(records) ⇒ Object
private
Renders the leaf node.
-
#render_month_group(grouped, rest) ⇒ Object
private
Renders the :month_range level with consecutive-month detection.
-
#roman_month_range(start_month, end_month) ⇒ Object
private
Formats a month range as Roman numerals with an en-dash separator.
- #sort_by_type_status(records) ⇒ Object private
- #total_specimens(records) ⇒ Object private
- #type_status_sort_key(ts) ⇒ Object private
Constructor Details
#initialize(occurrences, order: DEFAULT_ORDER, augmentations: {}, todo: false) ⇒ MaterialExamined
Returns a new instance of MaterialExamined.
66 67 68 69 70 71 72 |
# File 'lib/utilities/material_examined.rb', line 66 def initialize(occurrences, order: DEFAULT_ORDER, augmentations: {}, todo: false) @occurrences = occurrences @order = order @augmentations = augmentations @todo = todo @todo_occurrence_ids = [] end |
Instance Attribute Details
#augmentations ⇒ Object (readonly)
Returns the value of attribute augmentations.
59 60 61 |
# File 'lib/utilities/material_examined.rb', line 59 def augmentations @augmentations end |
#occurrences ⇒ Object (readonly)
Returns the value of attribute occurrences.
59 60 61 |
# File 'lib/utilities/material_examined.rb', line 59 def occurrences @occurrences end |
#order ⇒ Object (readonly)
Returns the value of attribute order.
59 60 61 |
# File 'lib/utilities/material_examined.rb', line 59 def order @order end |
#todo_occurrence_ids ⇒ Object (readonly)
Returns the value of attribute todo_occurrence_ids.
74 75 76 |
# File 'lib/utilities/material_examined.rb', line 74 def todo_occurrence_ids @todo_occurrence_ids end |
Class Method Details
.catalog_identifier(catalog_number) ⇒ Object
Returns the identifier (numeric) portion of a catalogNumber string. e.g. "USNM 1234" => "1234", "MCZ:Ent:12345" => "12345", "ABC" => "ABC"
117 118 119 120 |
# File 'lib/utilities/material_examined.rb', line 117 def self.catalog_identifier(catalog_number) s = catalog_number.to_s.strip (m = s.match(/(\d+)\z/)) ? m[1] : s end |
.catalog_namespace(catalog_number) ⇒ Object
Returns the namespace portion of a catalogNumber string. e.g. "USNM 1234" => "USNM", "MCZ:Ent:12345" => "MCZ:Ent", "1234" => ""
100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/utilities/material_examined.rb', line 100 def self.catalog_namespace(catalog_number) s = catalog_number.to_s.strip return '' if s.empty? # Preferred: text + separator + trailing digits if (m = s.match(/\A(.+?)[\s:_-]+\d+\z/)) m[1].strip # Fallback: non-digit prefix + digits elsif (m = s.match(/\A(\D+)\d+\z/)) m[1].strip else '' end end |
.extract_month(record) ⇒ Object
Returns the month (1–12) from a DwC record, or 0 if absent/invalid.
Reads the integer month field first; falls back to parsing eventDate.
136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/utilities/material_examined.rb', line 136 def self.extract_month(record) m = record['month'].to_i return m if (1..12).cover?(m) date_str = record['eventDate'].to_s if (md = date_str.match(/\A\d{4}-(\d{2})/)) m2 = md[1].to_i return m2 if (1..12).cover?(m2) end 0 end |
.normalize_sex(value) ⇒ Object
Normalises a raw DwC sex string to a SEX_SYMBOLS key. Handles case, plurals ('females' → 'female'), and gynandromorph variants ('Gynandromorphic', 'gynandomorph', 'gynandromorphs', …).
125 126 127 128 129 130 131 132 |
# File 'lib/utilities/material_examined.rb', line 125 def self.normalize_sex(value) s = value.to_s.strip.downcase # Gynandromorph and its common variants (gynandomorph misspelling, -ic, -s, …) return 'gynandromorph' if s.match?(/\Agyn(andro|ando)/) # Strip trailing plural 's' for 'males' / 'females' s = s.delete_suffix('s') if s.length > 3 s end |
Instance Method Details
#catalog_identifier(val) ⇒ Object (private)
152 |
# File 'lib/utilities/material_examined.rb', line 152 def catalog_identifier(val) = self.class.catalog_identifier(val) |
#catalog_namespace(val) ⇒ Object (private)
Delegates to class methods so lambdas in LOOP_VARIABLES can call them.
151 |
# File 'lib/utilities/material_examined.rb', line 151 def catalog_namespace(val) = self.class.catalog_namespace(val) |
#consecutive_runs(sorted_ints) ⇒ Object (private)
Splits a sorted array of integers into runs of consecutive numbers. [1, 2, 3, 5, 6] => [[1, 2, 3], [5, 6]]
386 387 388 389 390 391 392 393 394 395 396 397 398 |
# File 'lib/utilities/material_examined.rb', line 386 def consecutive_runs(sorted_ints) return [] if sorted_ints.empty? runs = [[sorted_ints.first]] sorted_ints[1..].each do |n| if n == runs.last.last + 1 runs.last << n else runs << [n] end end runs end |
#extract_month(r) ⇒ Object (private)
154 |
# File 'lib/utilities/material_examined.rb', line 154 def extract_month(r) = self.class.extract_month(r) |
#group_ordered_by_type_status(sorted_records) ⇒ Object (private)
Returns an ordered array of [type_status_value, records] pairs preserving the sort order established by sort_by_type_status.
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
# File 'lib/utilities/material_examined.rb', line 169 def group_ordered_by_type_status(sorted_records) groups = [] current_key = nil sorted_records.each do |r| key = r['typeStatus'].to_s.strip if key != current_key current_key = key groups << [key, [r]] else groups.last[1] << r end end groups end |
#normalize_sex(val) ⇒ Object (private)
153 |
# File 'lib/utilities/material_examined.rb', line 153 def normalize_sex(val) = self.class.normalize_sex(val) |
#range_label(start_num, end_num) ⇒ Object (private)
Formats a numeric range as an abbreviated string. Only abbreviates when start and end share at least 2 leading digits, e.g. range_label(1234, 1236) => "1234-6" range_label(1000, 1234) => "1000-1234" (shared prefix only "1", no abbreviation)
404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 |
# File 'lib/utilities/material_examined.rb', line 404 def range_label(start_num, end_num) return start_num.to_s if start_num == end_num s = start_num.to_s e = end_num.to_s # Find length of shared prefix i = 0 i += 1 while i < s.length && i < e.length && s[i] == e[i] return "#{s}-#{e}" if i < 2 suffix = e[i..] abbreviated = "#{s}-#{suffix}" full = "#{s}-#{e}" abbreviated.length < full.length ? abbreviated : full end |
#render ⇒ String
Returns Markdown-formatted material examined text.
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/utilities/material_examined.rb', line 77 def render return '' if occurrences.empty? sorted = sort_by_type_status(occurrences) # When type_status is the first loop key, render each status block as its # own paragraph so holotype/paratypes are visually separated. if order.first == :type_status blocks = group_ordered_by_type_status(sorted) blocks.map { |ts_val, records| total = total_specimens(records) body = render_group(records, order[1..]) ts_val.empty? ? body : "**#{ts_val.upcase}** (#{total}) #{body}" }.join("\n\n") else render_group(sorted, order) end end |
#render_group(records, keys) ⇒ Object (private)
Recursively renders a set of records using the remaining loop keys.
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 |
# File 'lib/utilities/material_examined.rb', line 187 def render_group(records, keys) return render_leaf(records) if keys.empty? || records.empty? key = keys.first rest = keys[1..] # :total injects the summed individualCount at this nesting position and # continues rendering the remaining keys. render_leaf suppresses its own # count whenever :total appears anywhere in the order, avoiding duplication. if key == :total inner = render_group(records, rest) total = total_specimens(records) return inner.empty? ? total.to_s : "#{total} #{inner}" end extractor = LOOP_VARIABLES[key] grouped = {} records.each do |r| val = extractor.call(r) if val.empty? && @todo val = '[TODO]' @todo_occurrence_ids << r['occurrenceID'] end (grouped[val] ||= []) << r end grouped = grouped.sort.to_h if %i[country state county].include?(key) if key == :identifier render_identifier_group(grouped, rest) elsif key == :month_range render_month_group(grouped, rest) elsif key == :identifier_namespace && rest.first == :identifier # Paired namespace+identifier: "NAMESPACE NUMBER-RANGE [content]" parts = grouped.filter_map { |ns_val, group_records| id_extractor = LOOP_VARIABLES[:identifier] id_grouped = {} group_records.each do |r| id_val = id_extractor.call(r) (id_grouped[id_val] ||= []) << r end result = render_identifier_group(id_grouped, rest[1..], namespace: ns_val) result.empty? ? nil : result } parts.join('; ') elsif key == :repository # Show "(CODEN)" even when inner is empty — value still meaningful without sub-detail. parts = grouped.filter_map { |val, group_records| inner = render_group(group_records, rest) next nil if val.empty? && inner.empty? val.empty? ? inner : (inner.empty? ? "(#{val})" : "(#{val}): #{inner}") } parts.reject(&:empty?).join('; ') elsif key == :stage # Show stage label even when inner is empty. parts = grouped.filter_map { |val, group_records| inner = render_group(group_records, rest) next nil if val.empty? && inner.empty? val.empty? ? inner : (inner.empty? ? val : "#{val} #{inner}") } parts.reject(&:empty?).join('; ') elsif key == :sex # Show sex symbol even when inner is empty. parts = grouped.filter_map { |val, group_records| inner = render_group(group_records, rest) next nil if val.empty? && inner.empty? val.empty? ? inner : (inner.empty? ? "**#{val}**" : "**#{val}**: #{inner}") } parts.reject(&:empty?).join('; ') else # Geographic and other grouping levels: skip entirely when inner is empty. parts = grouped.filter_map { |val, group_records| inner = render_group(group_records, rest) next nil if inner.empty? val.empty? ? inner : "**#{val}**: #{inner}" } parts.join('; ') end end |
#render_identifier_group(grouped, rest, namespace: '') ⇒ Object (private)
Renders the :identifier level with consecutive-range detection. Numeric identifiers that are consecutive AND share the same inner content are collapsed into a range label, e.g. "1234-6".
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 |
# File 'lib/utilities/material_examined.rb', line 272 def render_identifier_group(grouped, rest, namespace: '') numeric = {} other = {} grouped.each do |val, recs| val.match?(/\A\d+\z/) ? numeric[val.to_i] = recs : other[val] = recs end parts = [] ns_prefix = namespace.empty? ? '' : "#{namespace} " # Non-numeric identifiers — no bold, namespace prefixed other.each do |val, recs| inner = render_group(recs, rest) label = val.empty? ? ns_prefix.strip : "#{ns_prefix}#{val}" entry = label.empty? ? inner : (inner.empty? ? label : "#{label} #{inner}") parts << entry unless entry.empty? end # Numeric identifiers — group by inner content, then detect consecutive runs unless numeric.empty? # Pre-render inner content keyed by numeric id rendered = numeric.transform_values { |recs| render_group(recs, rest) } # Group ids that share the same inner content by_content = {} numeric.keys.sort.each do |n| (by_content[rendered[n]] ||= []) << n end by_content.each do |_inner, ids| consecutive_runs(ids).each do |run| run_records = run.flat_map { |n| numeric[n] } combined_inner = render_group(run_records, rest) num_label = run.length == 1 ? run.first.to_s : range_label(run.first, run.last) label = "#{ns_prefix}#{num_label}" parts << (combined_inner.empty? ? label : "#{label} #{combined_inner}") end end end parts.compact.join('; ') end |
#render_leaf(records) ⇒ Object (private)
Renders the leaf node. When :total is in the order the count was already shown upstream, so only augmentation labels are emitted here. Without :total the count is shown.
430 431 432 433 434 435 436 437 438 439 440 441 442 443 |
# File 'lib/utilities/material_examined.rb', line 430 def render_leaf(records) show_labels = order.include?(:identifier) || order.include?(:identifier_namespace) labels = show_labels ? records.filter_map { |r| aug = augmentations[r['occurrenceID']] aug&.dig(:label) } : [] if order.include?(:total) labels.empty? ? '' : "(#{labels.join('; ')})" else count = total_specimens(records) labels.empty? ? "(#{count})" : "#{count} (#{labels.join('; ')})" end end |
#render_month_group(grouped, rest) ⇒ Object (private)
Renders the :month_range level with consecutive-month detection. Months are represented as integers ("1"–"12") and rendered as Roman numerals. Consecutive months sharing the same inner content collapse to a range, e.g. months 8 and 9 → "viii–ix". Output preserves chronological order.
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 |
# File 'lib/utilities/material_examined.rb', line 321 def render_month_group(grouped, rest) parts = [] # Records with no extractable month — rendered last, no month label unknown_recs = grouped[''].to_a unknown_recs = nil if unknown_recs.empty? numeric = {} labeled = {} grouped.each do |val, recs| next if val.empty? m = val.to_i if m.positive? numeric[m] = recs else labeled[val] = recs # e.g. '[TODO]' end end # Non-numeric literal labels (e.g. '[TODO]') — rendered before numeric months labeled.each do |label, recs| content = render_group(recs, rest) next if content.empty? parts << "#{label}, #{content}" end unless numeric.empty? rendered = numeric.transform_values { |recs| render_group(recs, rest) } sorted = numeric.keys.sort # Walk sorted months, collapsing consecutive runs with identical inner content i = 0 while i < sorted.length start_m = sorted[i] content = rendered[start_m] j = i + 1 j += 1 while j < sorted.length && sorted[j] == sorted[j - 1] + 1 && rendered[sorted[j]] == content next if content.empty? label = roman_month_range(start_m, sorted[j - 1]) parts << "#{label}, #{content}" i = j end end if unknown_recs inner = render_group(unknown_recs, rest) parts << inner unless inner.empty? end parts.join('; ') end |
#roman_month_range(start_month, end_month) ⇒ Object (private)
Formats a month range as Roman numerals with an en-dash separator. roman_month_range(8, 9) => "viii–ix" roman_month_range(7, 7) => "vii"
378 379 380 381 382 |
# File 'lib/utilities/material_examined.rb', line 378 def roman_month_range(start_month, end_month) start_label = MONTH_ROMAN[start_month - 1] return start_label if start_month == end_month "#{start_label}–#{MONTH_ROMAN[end_month - 1]}" end |
#sort_by_type_status(records) ⇒ Object (private)
156 157 158 |
# File 'lib/utilities/material_examined.rb', line 156 def sort_by_type_status(records) records.sort_by { |r| type_status_sort_key(r['typeStatus'].to_s.downcase.strip) } end |
#total_specimens(records) ⇒ Object (private)
423 424 425 |
# File 'lib/utilities/material_examined.rb', line 423 def total_specimens(records) records.sum { |r| [r['individualCount'].to_i, 1].max } end |
#type_status_sort_key(ts) ⇒ Object (private)
160 161 162 163 164 165 |
# File 'lib/utilities/material_examined.rb', line 160 def type_status_sort_key(ts) return TYPE_STATUS_SORT[ts] if TYPE_STATUS_SORT.key?(ts) return 3 if ts.end_with?('s') # paratypes, syntypes, paralectotypes, … return 5 if ts.empty? 4 end |