Class: Utilities::MaterialExamined

Inherits:
Object
  • Object
show all
Defined in:
lib/utilities/material_examined.rb

Constant Summary collapse

MONTH_ROMAN =
%w[i ii iii iv v vi vii viii ix x xi xii].freeze
SEX_SYMBOLS =
{
  'male'          => '',
  'female'        => '',
  'gynandromorph' => '♂♀'
}.freeze
LOOP_VARIABLES =

Lambdas that extract a grouping value from a DwC occurrence hash.

{
  type_status:          ->(r) { r['typeStatus'].to_s.strip },
  country:              ->(r) { r['country'].to_s.strip },
  state:                ->(r) { r['stateProvince'].to_s.strip },
  county:               ->(r) { r['county'].to_s.strip },
  identifier_namespace: ->(r) { catalog_namespace(r['catalogNumber']) },
  identifier:           ->(r) { catalog_identifier(r['catalogNumber']) },
  sex:                  ->(r) { SEX_SYMBOLS.fetch(normalize_sex(r['sex']), r['sex'].to_s.strip) },
  stage:                ->(r) { r['lifeStage'].to_s.strip },
  repository:           ->(r) { r['institutionCode'].to_s.strip },
  month_range:          ->(r) { m = extract_month(r); m.positive? ? m.to_s : '' },
  # :total is a passthrough — handled before grouping in render_group;
  # the lambda is never called but must exist for controller key validation.
  total:                ->(_r) { '' }
}.freeze
DEFAULT_ORDER =
[
  :type_status,
  :country, :state, :county,
  :month_range,
  :total,
  :identifier_namespace,
  :identifier,
  :stage,
  :sex,
  :repository
].freeze
TYPE_STATUS_SORT =

Sort position for primary type designations

{
  'holotype'  => 0,
  'lectotype' => 1,
  'neotype'   => 2,
}.freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(occurrences, order: DEFAULT_ORDER, augmentations: {}, todo: false) ⇒ MaterialExamined

Returns a new instance of MaterialExamined.

Parameters:

  • occurrences (Array<Hash>)

    DwC occurrence records (string keys)

  • order (Array<Symbol>) (defaults to: DEFAULT_ORDER)

    nesting order using keys from LOOP_VARIABLES

  • augmentations (Hash) (defaults to: {})

    occurrence-level additions keyed by occurrenceID

  • todo (Boolean) (defaults to: false)

    when true, blank active-field values render as "[TODO]" and todo_occurrence_ids is populated for downstream link generation



66
67
68
69
70
71
72
# File 'lib/utilities/material_examined.rb', line 66

def initialize(occurrences, order: DEFAULT_ORDER, augmentations: {}, todo: false)
  @occurrences        = occurrences
  @order              = order
  @augmentations      = augmentations
  @todo               = todo
  @todo_occurrence_ids = []
end

Instance Attribute Details

#augmentationsObject (readonly)

Returns the value of attribute augmentations.



59
60
61
# File 'lib/utilities/material_examined.rb', line 59

def augmentations
  @augmentations
end

#occurrencesObject (readonly)

Returns the value of attribute occurrences.



59
60
61
# File 'lib/utilities/material_examined.rb', line 59

def occurrences
  @occurrences
end

#orderObject (readonly)

Returns the value of attribute order.



59
60
61
# File 'lib/utilities/material_examined.rb', line 59

def order
  @order
end

#todo_occurrence_idsObject (readonly)

Returns the value of attribute todo_occurrence_ids.



74
75
76
# File 'lib/utilities/material_examined.rb', line 74

def todo_occurrence_ids
  @todo_occurrence_ids
end

Class Method Details

.catalog_identifier(catalog_number) ⇒ Object

Returns the identifier (numeric) portion of a catalogNumber string. e.g. "USNM 1234" => "1234", "MCZ:Ent:12345" => "12345", "ABC" => "ABC"



117
118
119
120
# File 'lib/utilities/material_examined.rb', line 117

def self.catalog_identifier(catalog_number)
  s = catalog_number.to_s.strip
  (m = s.match(/(\d+)\z/)) ? m[1] : s
end

.catalog_namespace(catalog_number) ⇒ Object

Returns the namespace portion of a catalogNumber string. e.g. "USNM 1234" => "USNM", "MCZ:Ent:12345" => "MCZ:Ent", "1234" => ""



100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/utilities/material_examined.rb', line 100

def self.catalog_namespace(catalog_number)
  s = catalog_number.to_s.strip
  return '' if s.empty?

  # Preferred: text + separator + trailing digits
  if (m = s.match(/\A(.+?)[\s:_-]+\d+\z/))
    m[1].strip
  # Fallback: non-digit prefix + digits
  elsif (m = s.match(/\A(\D+)\d+\z/))
    m[1].strip
  else
    ''
  end
end

.extract_month(record) ⇒ Object

Returns the month (1–12) from a DwC record, or 0 if absent/invalid. Reads the integer month field first; falls back to parsing eventDate.



136
137
138
139
140
141
142
143
144
145
146
# File 'lib/utilities/material_examined.rb', line 136

def self.extract_month(record)
  m = record['month'].to_i
  return m if (1..12).cover?(m)

  date_str = record['eventDate'].to_s
  if (md = date_str.match(/\A\d{4}-(\d{2})/))
    m2 = md[1].to_i
    return m2 if (1..12).cover?(m2)
  end
  0
end

.normalize_sex(value) ⇒ Object

Normalises a raw DwC sex string to a SEX_SYMBOLS key. Handles case, plurals ('females' → 'female'), and gynandromorph variants ('Gynandromorphic', 'gynandomorph', 'gynandromorphs', …).



125
126
127
128
129
130
131
132
# File 'lib/utilities/material_examined.rb', line 125

def self.normalize_sex(value)
  s = value.to_s.strip.downcase
  # Gynandromorph and its common variants (gynandomorph misspelling, -ic, -s, …)
  return 'gynandromorph' if s.match?(/\Agyn(andro|ando)/)
  # Strip trailing plural 's' for 'males' / 'females'
  s = s.delete_suffix('s') if s.length > 3
  s
end

Instance Method Details

#catalog_identifier(val) ⇒ Object (private)



152
# File 'lib/utilities/material_examined.rb', line 152

def catalog_identifier(val) = self.class.catalog_identifier(val)

#catalog_namespace(val) ⇒ Object (private)

Delegates to class methods so lambdas in LOOP_VARIABLES can call them.



151
# File 'lib/utilities/material_examined.rb', line 151

def catalog_namespace(val) = self.class.catalog_namespace(val)

#consecutive_runs(sorted_ints) ⇒ Object (private)

Splits a sorted array of integers into runs of consecutive numbers. [1, 2, 3, 5, 6] => [[1, 2, 3], [5, 6]]



386
387
388
389
390
391
392
393
394
395
396
397
398
# File 'lib/utilities/material_examined.rb', line 386

def consecutive_runs(sorted_ints)
  return [] if sorted_ints.empty?

  runs = [[sorted_ints.first]]
  sorted_ints[1..].each do |n|
    if n == runs.last.last + 1
      runs.last << n
    else
      runs << [n]
    end
  end
  runs
end

#extract_month(r) ⇒ Object (private)



154
# File 'lib/utilities/material_examined.rb', line 154

def extract_month(r)        = self.class.extract_month(r)

#group_ordered_by_type_status(sorted_records) ⇒ Object (private)

Returns an ordered array of [type_status_value, records] pairs preserving the sort order established by sort_by_type_status.



169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/utilities/material_examined.rb', line 169

def group_ordered_by_type_status(sorted_records)
  groups = []
  current_key = nil

  sorted_records.each do |r|
    key = r['typeStatus'].to_s.strip
    if key != current_key
      current_key = key
      groups << [key, [r]]
    else
      groups.last[1] << r
    end
  end

  groups
end

#normalize_sex(val) ⇒ Object (private)



153
# File 'lib/utilities/material_examined.rb', line 153

def normalize_sex(val)      = self.class.normalize_sex(val)

#range_label(start_num, end_num) ⇒ Object (private)

Formats a numeric range as an abbreviated string. Only abbreviates when start and end share at least 2 leading digits, e.g. range_label(1234, 1236) => "1234-6" range_label(1000, 1234) => "1000-1234" (shared prefix only "1", no abbreviation)



404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
# File 'lib/utilities/material_examined.rb', line 404

def range_label(start_num, end_num)
  return start_num.to_s if start_num == end_num

  s = start_num.to_s
  e = end_num.to_s

  # Find length of shared prefix
  i = 0
  i += 1 while i < s.length && i < e.length && s[i] == e[i]

  return "#{s}-#{e}" if i < 2

  suffix      = e[i..]
  abbreviated = "#{s}-#{suffix}"
  full        = "#{s}-#{e}"

  abbreviated.length < full.length ? abbreviated : full
end

#renderString

Returns Markdown-formatted material examined text.

Returns:

  • (String)

    Markdown-formatted material examined text



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/utilities/material_examined.rb', line 77

def render
  return '' if occurrences.empty?

  sorted = sort_by_type_status(occurrences)

  # When type_status is the first loop key, render each status block as its
  # own paragraph so holotype/paratypes are visually separated.
  if order.first == :type_status
    blocks = group_ordered_by_type_status(sorted)
    blocks.map { |ts_val, records|
      total = total_specimens(records)
      body  = render_group(records, order[1..])
      ts_val.empty? ? body : "**#{ts_val.upcase}** (#{total}) #{body}"
    }.join("\n\n")
  else
    render_group(sorted, order)
  end
end

#render_group(records, keys) ⇒ Object (private)

Recursively renders a set of records using the remaining loop keys.



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
# File 'lib/utilities/material_examined.rb', line 187

def render_group(records, keys)
  return render_leaf(records) if keys.empty? || records.empty?

  key  = keys.first
  rest = keys[1..]

  # :total injects the summed individualCount at this nesting position and
  # continues rendering the remaining keys. render_leaf suppresses its own
  # count whenever :total appears anywhere in the order, avoiding duplication.
  if key == :total
    inner = render_group(records, rest)
    total = total_specimens(records)
    return inner.empty? ? total.to_s : "#{total} #{inner}"
  end

  extractor = LOOP_VARIABLES[key]

  grouped = {}
  records.each do |r|
    val = extractor.call(r)
    if val.empty? && @todo
      val = '[TODO]'
      @todo_occurrence_ids << r['occurrenceID']
    end
    (grouped[val] ||= []) << r
  end

  grouped = grouped.sort.to_h if %i[country state county].include?(key)

  if key == :identifier
    render_identifier_group(grouped, rest)
  elsif key == :month_range
    render_month_group(grouped, rest)
  elsif key == :identifier_namespace && rest.first == :identifier
    # Paired namespace+identifier: "NAMESPACE NUMBER-RANGE [content]"
    parts = grouped.filter_map { |ns_val, group_records|
      id_extractor = LOOP_VARIABLES[:identifier]
      id_grouped = {}
      group_records.each do |r|
        id_val = id_extractor.call(r)
        (id_grouped[id_val] ||= []) << r
      end
      result = render_identifier_group(id_grouped, rest[1..], namespace: ns_val)
      result.empty? ? nil : result
    }
    parts.join('; ')
  elsif key == :repository
    # Show "(CODEN)" even when inner is empty — value still meaningful without sub-detail.
    parts = grouped.filter_map { |val, group_records|
      inner = render_group(group_records, rest)
      next nil if val.empty? && inner.empty?
      val.empty? ? inner : (inner.empty? ? "(#{val})" : "(#{val}): #{inner}")
    }
    parts.reject(&:empty?).join('; ')
  elsif key == :stage
    # Show stage label even when inner is empty.
    parts = grouped.filter_map { |val, group_records|
      inner = render_group(group_records, rest)
      next nil if val.empty? && inner.empty?
      val.empty? ? inner : (inner.empty? ? val : "#{val} #{inner}")
    }
    parts.reject(&:empty?).join('; ')
  elsif key == :sex
    # Show sex symbol even when inner is empty.
    parts = grouped.filter_map { |val, group_records|
      inner = render_group(group_records, rest)
      next nil if val.empty? && inner.empty?
      val.empty? ? inner : (inner.empty? ? "**#{val}**" : "**#{val}**: #{inner}")
    }
    parts.reject(&:empty?).join('; ')
  else
    # Geographic and other grouping levels: skip entirely when inner is empty.
    parts = grouped.filter_map { |val, group_records|
      inner = render_group(group_records, rest)
      next nil if inner.empty?
      val.empty? ? inner : "**#{val}**: #{inner}"
    }
    parts.join('; ')
  end
end

#render_identifier_group(grouped, rest, namespace: '') ⇒ Object (private)

Renders the :identifier level with consecutive-range detection. Numeric identifiers that are consecutive AND share the same inner content are collapsed into a range label, e.g. "1234-6".

Parameters:

  • namespace (String) (defaults to: '')

    optional prefix prepended to each range label



272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'lib/utilities/material_examined.rb', line 272

def render_identifier_group(grouped, rest, namespace: '')
  numeric  = {}
  other    = {}

  grouped.each do |val, recs|
    val.match?(/\A\d+\z/) ? numeric[val.to_i] = recs : other[val] = recs
  end

  parts = []

  ns_prefix = namespace.empty? ? '' : "#{namespace} "

  # Non-numeric identifiers — no bold, namespace prefixed
  other.each do |val, recs|
    inner = render_group(recs, rest)
    label = val.empty? ? ns_prefix.strip : "#{ns_prefix}#{val}"
    entry = label.empty? ? inner : (inner.empty? ? label : "#{label} #{inner}")
    parts << entry unless entry.empty?
  end

  # Numeric identifiers — group by inner content, then detect consecutive runs
  unless numeric.empty?
    # Pre-render inner content keyed by numeric id
    rendered = numeric.transform_values { |recs| render_group(recs, rest) }

    # Group ids that share the same inner content
    by_content = {}
    numeric.keys.sort.each do |n|
      (by_content[rendered[n]] ||= []) << n
    end

    by_content.each do |_inner, ids|
      consecutive_runs(ids).each do |run|
        run_records    = run.flat_map { |n| numeric[n] }
        combined_inner = render_group(run_records, rest)
        num_label = run.length == 1 ? run.first.to_s : range_label(run.first, run.last)
        label     = "#{ns_prefix}#{num_label}"
        parts << (combined_inner.empty? ? label : "#{label} #{combined_inner}")
      end
    end
  end

  parts.compact.join('; ')
end

#render_leaf(records) ⇒ Object (private)

Renders the leaf node. When :total is in the order the count was already shown upstream, so only augmentation labels are emitted here. Without :total the count is shown.



430
431
432
433
434
435
436
437
438
439
440
441
442
443
# File 'lib/utilities/material_examined.rb', line 430

def render_leaf(records)
  show_labels = order.include?(:identifier) || order.include?(:identifier_namespace)
  labels = show_labels ? records.filter_map { |r|
    aug = augmentations[r['occurrenceID']]
    aug&.dig(:label)
  } : []

  if order.include?(:total)
    labels.empty? ? '' : "(#{labels.join('; ')})"
  else
    count = total_specimens(records)
    labels.empty? ? "(#{count})" : "#{count} (#{labels.join('; ')})"
  end
end

#render_month_group(grouped, rest) ⇒ Object (private)

Renders the :month_range level with consecutive-month detection. Months are represented as integers ("1"–"12") and rendered as Roman numerals. Consecutive months sharing the same inner content collapse to a range, e.g. months 8 and 9 → "viii–ix". Output preserves chronological order.



321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
# File 'lib/utilities/material_examined.rb', line 321

def render_month_group(grouped, rest)
  parts = []

  # Records with no extractable month — rendered last, no month label
  unknown_recs = grouped[''].to_a
  unknown_recs = nil if unknown_recs.empty?

  numeric = {}
  labeled = {}
  grouped.each do |val, recs|
    next if val.empty?
    m = val.to_i
    if m.positive?
      numeric[m] = recs
    else
      labeled[val] = recs  # e.g. '[TODO]'
    end
  end

  # Non-numeric literal labels (e.g. '[TODO]') — rendered before numeric months
  labeled.each do |label, recs|
    content = render_group(recs, rest)
    next if content.empty?
    parts << "#{label}, #{content}"
  end

  unless numeric.empty?
    rendered = numeric.transform_values { |recs| render_group(recs, rest) }
    sorted   = numeric.keys.sort

    # Walk sorted months, collapsing consecutive runs with identical inner content
    i = 0
    while i < sorted.length
      start_m = sorted[i]
      content  = rendered[start_m]
      j = i + 1
      j += 1 while j < sorted.length &&
                    sorted[j] == sorted[j - 1] + 1 &&
                    rendered[sorted[j]] == content
      next if content.empty?
      label = roman_month_range(start_m, sorted[j - 1])
      parts << "#{label}, #{content}"
      i = j
    end
  end

  if unknown_recs
    inner = render_group(unknown_recs, rest)
    parts << inner unless inner.empty?
  end

  parts.join('; ')
end

#roman_month_range(start_month, end_month) ⇒ Object (private)

Formats a month range as Roman numerals with an en-dash separator. roman_month_range(8, 9) => "viii–ix" roman_month_range(7, 7) => "vii"



378
379
380
381
382
# File 'lib/utilities/material_examined.rb', line 378

def roman_month_range(start_month, end_month)
  start_label = MONTH_ROMAN[start_month - 1]
  return start_label if start_month == end_month
  "#{start_label}#{MONTH_ROMAN[end_month - 1]}"
end

#sort_by_type_status(records) ⇒ Object (private)



156
157
158
# File 'lib/utilities/material_examined.rb', line 156

def sort_by_type_status(records)
  records.sort_by { |r| type_status_sort_key(r['typeStatus'].to_s.downcase.strip) }
end

#total_specimens(records) ⇒ Object (private)



423
424
425
# File 'lib/utilities/material_examined.rb', line 423

def total_specimens(records)
  records.sum { |r| [r['individualCount'].to_i, 1].max }
end

#type_status_sort_key(ts) ⇒ Object (private)



160
161
162
163
164
165
# File 'lib/utilities/material_examined.rb', line 160

def type_status_sort_key(ts)
  return TYPE_STATUS_SORT[ts] if TYPE_STATUS_SORT.key?(ts)
  return 3 if ts.end_with?('s')   # paratypes, syntypes, paralectotypes, …
  return 5 if ts.empty?
  4
end