Module: Vendor::Nasturtium

Defined in:
lib/vendor/nasturtium.rb

Overview

Possible Extensions

  • CE with a iNat global UUID
  • Bonus set GA for CE based on string matching
  • predict_otu

A middle-layer wrapper between Nasturtium and TaxonWorks

Constant Summary collapse

INAT_LICENSE_CODE_TO_TW_LICENSE =

Maps iNaturalist photo license_code values to CREATIVE_COMMONS_LICENSES keys. Derived from the inat_codes entries in CREATIVE_COMMONS_LICENSES. iNat codes not in this map (e.g. nil, 'arr') are not importable.

CREATIVE_COMMONS_LICENSES
.each_with_object({}) { |(k, v), h| h[v[:inat_code]] = k if v[:inat_code] }
.freeze
INAT_API_TIMEOUT =

Seconds before a synchronous iNat API call is abandoned.

15

Class Method Summary collapse

Class Method Details

.build_image!(obs_photo, result:, observed_year: nil) ⇒ Image

Build and save an Image (with Attribution, copyright holder Person, and iNat identifier) from an iNat photo hash. Raises on failure so the caller's savepoint can roll back.

Parameters:

  • photo (Hash)

    the 'photo' object from an iNat observation_photo

  • result (Hash)

    the full Nasturtium observation result (for ORCID matching)

  • observed_year (Integer, nil) (defaults to: nil)

    year of observation, used as copyright year

Returns:



350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
# File 'lib/vendor/nasturtium.rb', line 350

def self.build_image!(obs_photo, result:, observed_year: nil)
  photo = obs_photo['photo']
  license_key = INAT_LICENSE_CODE_TO_TW_LICENSE[photo['license_code']]

  copyright_person = stub_copyright_person(result, media: photo)
  copyright_person.save! if copyright_person.new_record?

  attribution = Attribution.new(
    license: license_key,
    copyright_year: observed_year,
    copyright_holder_roles: [
      AttributionCopyrightHolder.new(person: copyright_person)
    ]
  )

  image_url = large_photo_url(photo['url'])
  tempfile = download_to_tempfile(image_url)
  begin
    image = Image.new(image_file: tempfile)
    image.attribution = attribution
    if obs_photo['uuid'].present?
      image.identifiers << Identifier::Global::Uuid::InaturalistObservationPhoto.new(
        identifier: obs_photo['uuid']
      )
    end
    begin
      image.save!
    rescue ActiveRecord::RecordInvalid
      existing = Image.find_by(image_file_fingerprint: image.image_file_fingerprint)
      raise unless existing
      image = existing
    end
  ensure
    tempfile.close!
  end

  image
end

.build_sound!(obs_sound, result:, observed_year: nil) ⇒ Sound

Build and save a Sound (with Attribution, copyright holder Person, and iNat identifier) from an iNat observation_sound hash. Raises on failure so the caller's savepoint can roll back.

Parameters:

  • obs_sound (Hash)

    the outer observation_sound object (carries uuid)

  • result (Hash)

    the full Nasturtium observation result (for ORCID matching)

  • observed_year (Integer, nil) (defaults to: nil)

    year of observation, used as copyright year

Returns:



274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
# File 'lib/vendor/nasturtium.rb', line 274

def self.build_sound!(obs_sound, result:, observed_year: nil)
  sound_data  = obs_sound['sound']
  license_key = INAT_LICENSE_CODE_TO_TW_LICENSE[sound_data['license_code']]

  copyright_person = stub_copyright_person(result, media: sound_data)
  copyright_person.save! if copyright_person.new_record?

  attribution = Attribution.new(
    license: license_key,
    copyright_year: observed_year,
    copyright_holder_roles: [
      AttributionCopyrightHolder.new(person: copyright_person)
    ]
  )

  sound = Sound.new(name: sound_data['original_filename'].presence || obs_sound['uuid'])
  tempfile = download_to_tempfile(sound_data['file_url'])
  begin
    sound.sound_file.attach(
      io: File.open(tempfile.path),
      filename: tempfile.original_filename,
      content_type: Marcel::MimeType.for(Pathname.new(tempfile.path), name: tempfile.original_filename)
    )
  ensure
    tempfile.close!
  end
  sound.attribution = attribution
  if obs_sound['uuid'].present?
    sound.identifiers << Identifier::Global::Uuid::InaturalistObservationSound.new(
      identifier: obs_sound['uuid']
    )
  end
  begin
    sound.save!
  rescue ActiveRecord::RecordInvalid
    existing = obs_sound['uuid'].present? &&
      Identifier::Global::Uuid::InaturalistObservationSound.find_by(identifier: obs_sound['uuid'])
    raise unless existing
    sound = existing.identifier_object
  end

  sound
end

.by_observation_ids(ids) ⇒ Array<Hash>

Fetch multiple observations in a single iNat API request. IDs are joined as a comma-separated string because Faraday serializes Ruby arrays as id=...&id=... which the iNat API does not accept.

Parameters:

  • ids (Array<String>)

    iNat observation integer IDs

Returns:

  • (Array<Hash>)

    Nasturtium result hashes (only found observations are returned)

Raises:

  • (Timeout::Error)

    if the iNat API does not respond within INAT_API_TIMEOUT seconds



32
33
34
35
36
37
38
# File 'lib/vendor/nasturtium.rb', line 32

def self.by_observation_ids(ids)
  return [] if ids.blank?

  Timeout.timeout(INAT_API_TIMEOUT) do
    ::Nasturtium.observations(id: ids.join(','), per_page: ids.size)['results']
  end
end

.download_to_tempfile(url) ⇒ Tempfile

Download a remote image to a Tempfile with the original_filename method that Paperclip expects.

Parameters:

  • url (String)

Returns:

  • (Tempfile)


415
416
417
418
419
420
421
422
423
424
425
426
# File 'lib/vendor/nasturtium.rb', line 415

def self.download_to_tempfile(url)
  uri_path = URI.parse(url).path
  tempfile = Tempfile.new(['inat_media', File.extname(uri_path)], binmode: true)

  URI.open(url, 'rb') { |io| tempfile.write(io.read) }
  tempfile.rewind

  basename = File.basename(uri_path)
  tempfile.define_singleton_method(:original_filename) { basename }

  tempfile
end

.large_photo_url(photo_url) ⇒ String

Returns URL for the large version.

Parameters:

  • photo_url (String)

    iNat square thumbnail URL

Returns:

  • (String)

    URL for the large version



404
405
406
407
408
# File 'lib/vendor/nasturtium.rb', line 404

def self.large_photo_url(photo_url)
  return nil if photo_url.blank?

  photo_url.sub('/square.', '/large.')
end

.observer_identification_uuid(result) ⇒ String?

Find the UUID of the observer's current identification on the observation. Only the observer's own identifications are considered; community taxon has no UUID.

Parameters:

  • result (Hash)

    a Nasturtium result

Returns:

  • (String, nil)

    UUID string, or nil if not found



150
151
152
153
154
155
156
157
158
159
# File 'lib/vendor/nasturtium.rb', line 150

def self.observer_identification_uuid(result)
  user_id = result.dig('user', 'id')
  return nil if user_id.blank?

  ident = (result['identifications'] || []).find do |i|
    i.dig('user', 'id') == user_id && i['current']
  end

  ident&.dig('uuid')
end

.permitted_photos(result) ⇒ Array<Hash>

Returns the iNat observation_photos that carry a CC or PD license importable into TW.

Parameters:

  • result (Hash)

    a Nasturtium result

Returns:

  • (Array<Hash>)

    observation_photo hashes (the outer object, which carries uuid)



239
240
241
242
243
244
245
246
247
248
249
# File 'lib/vendor/nasturtium.rb', line 239

def self.permitted_photos(result)
  return [] if result.blank?

  (result['observation_photos'] || []).filter_map do |obs_photo|
    photo = obs_photo['photo']
    next if photo.blank?
    next unless INAT_LICENSE_CODE_TO_TW_LICENSE.key?(photo['license_code'])

    obs_photo
  end
end

.permitted_sounds(result) ⇒ Array<Hash>

Returns the iNat observation_sounds that carry a CC or PD license importable into TW.

Parameters:

  • result (Hash)

    a Nasturtium result

Returns:

  • (Array<Hash>)

    observation_sound hashes (the outer object, which carries uuid)



255
256
257
258
259
260
261
262
263
264
265
# File 'lib/vendor/nasturtium.rb', line 255

def self.permitted_sounds(result)
  return [] if result.blank?

  (result['observation_sounds'] || []).filter_map do |obs_sound|
    sound = obs_sound['sound']
    next if sound.blank?
    next unless INAT_LICENSE_CODE_TO_TW_LICENSE.key?(sound['license_code'])

    obs_sound
  end
end

.person_by_orcid(result) ⇒ Person?

Attempt to find a Person in TW by the observer's ORCID. Returns nil if iNat provides no ORCID or no matching Person exists.

Parameters:

  • result (Hash)

    a Nasturtium result

Returns:



109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/vendor/nasturtium.rb', line 109

def self.person_by_orcid(result)
  orcid = result.dig('user', 'orcid')
  return nil if orcid.blank?

  # iNat may return the bare ID (0000-0001-2345-6789) or a full URL; normalise to URL form
  orcid_url = orcid.start_with?('http') ? orcid : "https://orcid.org/#{orcid}"

  Person
    .joins(:identifiers)
    .where(identifiers: { type: 'Identifier::Global::Orcid', cached: orcid_url })
    .first
end

.person_from_display_name(name) ⇒ Person::Unvetted

Build a Person::Unvetted from a display name string. Multi-word names (e.g. "Greg Lasley") are parsed via BibTeX so that first/last are split correctly. Single-word strings (login slugs or single-name users) go directly into last_name unchanged.

Parameters:

  • name (String)

Returns:



396
397
398
399
400
# File 'lib/vendor/nasturtium.rb', line 396

def self.person_from_display_name(name)
  return Person::Unvetted.new(last_name: name) unless name.include?(' ')

  Person.parse_to_people(name).first || Person::Unvetted.new(last_name: name)
end

.stub_biocuration_classes(result, project_id:) ⇒ Array<BiocurationClass>

Find BiocurationClass records in the project that match iNat annotations on the observation, via INAT_ANNOTATION_LABEL_TO_DWC_URI → BiocurationGroup URI. Only annotations with a DwC mapping are considered; unmatched annotations are skipped.

Parameters:

  • result (Hash)

    a Nasturtium result

  • project_id (Integer)

Returns:



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/vendor/nasturtium.rb', line 206

def self.stub_biocuration_classes(result, project_id:)
  annotations = (result['annotations'] || []).select do |a|
    a.dig('controlled_attribute', 'label').present? &&
      a.dig('controlled_value', 'label').present? &&
      INAT_ANNOTATION_LABEL_TO_DWC_URI.key?(a.dig('controlled_attribute', 'label'))
  end
  return [] if annotations.empty?

  relevant_uris = annotations.map { |a| INAT_ANNOTATION_LABEL_TO_DWC_URI[a.dig('controlled_attribute', 'label')] }.uniq
  group_ids_by_uri = BiocurationGroup
    .where(project_id:, uri: relevant_uris)
    .pluck(:uri, :id)
    .each_with_object(Hash.new { |h, k| h[k] = [] }) { |(uri, id), h| h[uri] << id }

  annotations.filter_map do |annotation|
    term_label  = annotation.dig('controlled_attribute', 'label')
    value_label = annotation.dig('controlled_value', 'label')

    group_ids = group_ids_by_uri[INAT_ANNOTATION_LABEL_TO_DWC_URI[term_label]]
    next if group_ids.blank?

    BiocurationClass
      .where(project_id:)
      .joins(:tags)
      .where(tags: { keyword_id: group_ids })
      .find_by('lower(controlled_vocabulary_terms.name) = lower(?)', value_label)
  end.compact
end

.stub_collecting_event(result, guess_as_locality: true) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/vendor/nasturtium.rb', line 48

def self.stub_collecting_event(result, guess_as_locality: true)
  return nil if result.blank?

  d = result['observed_on_details']

  p = {
    verbatim_collectors: result.dig('user', 'name').presence,
    verbatim_date: result['observed_on_string'].presence,
    start_date_day: d['day'],
    start_date_month: d['month'],
    start_date_year: d['year'],
  }

  if (t = result['time_observed_at']).present?
    parsed = Time.parse(t)
    p[:time_start_hour]   = parsed.hour
    p[:time_start_minute] = parsed.min
    p[:time_start_second] = parsed.sec if parsed.sec > 0
  else
    p[:time_start_hour] = d['hour'] unless d['hour'].nil?
  end

  p[:verbatim_locality] = result['place_guess'] if guess_as_locality

  ce = CollectingEvent.new(
    p.merge(
      georeferences: [stub_georeference(result)].compact,
    )
  )

  collector = stub_collector(result)
  ce.collector_roles.build(person: collector) if collector

  ce
end

.stub_collector(result) ⇒ Person?

Attempt to find a Person in TW by ORCID. Returns nil if iNat provides no ORCID, or if no matching Person exists.

Parameters:

  • result (Hash)

    a Nasturtium result

Returns:



89
90
91
# File 'lib/vendor/nasturtium.rb', line 89

def self.stub_collector(result)
  person_by_orcid(result)
end

Find or build the copyright holder Person for a photo or sound.

Strategy (in order):

1. ORCID match — if the observer has an ORCID and a matching Person exists in TW, use them.
2. Name fallback — parse the attribution string for a name and create a new Person::Unvetted.

Parameters:

  • result (Hash)

    the full Nasturtium observation result (used for ORCID lookup)

  • media (Hash)

    the photo or sound hash (used for attribution string fallback)

Returns:



327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/vendor/nasturtium.rb', line 327

def self.stub_copyright_person(result, media:)
  # 1. Try ORCID
  matched = person_by_orcid(result)
  return matched if matched

  # 2. Name fallback from attribution string, e.g.
  #    "(c) username, some rights reserved (CC BY-NC)" → "username"
  copyright_name = if media['attribution'] =~ /\(c\)\s+(.+?),/
    $1.strip
  else
    media['attribution'].presence || 'Unknown'
  end

  person_from_display_name(copyright_name)
end

.stub_georeference(result) ⇒ Object



122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/vendor/nasturtium.rb', line 122

def self.stub_georeference(result)
  return nil if result.blank?

  # Skip georeference for obscured observations — iNat jitters coordinates
  # within a ~22km bounding box, exceeding TW's 10km error_radius maximum.
  return nil if result['obscured']

  # Skip if the reported accuracy itself exceeds TW's 10km error_radius limit.
  accuracy = result['positional_accuracy']
  return nil if accuracy.present? && accuracy > 10_000

  c = result.dig('geojson', 'coordinates')

  return nil if c.blank?

  Georeference::Inaturalist.new(
    error_radius: result['positional_accuracy'],
    geographic_item: GeographicItem.new(
      geography: Gis::FACTORY.parse_wkt("POINT(#{c.first} #{c.second})")
    )
  )
end

.stub_identifier(result) ⇒ Identifier::Global::Uuid::InaturalistObservation?

Parameters:

  • result (Hash)

    a Nasturtium result

Returns:



163
164
165
166
167
168
169
170
# File 'lib/vendor/nasturtium.rb', line 163

def self.stub_identifier(result)
  return nil if result.blank?

  uuid = result['uuid']
  return nil if uuid.blank?

  Identifier::Global::Uuid::InaturalistObservation.new(identifier: uuid)
end

.stub_observer_person(result) ⇒ Person

Find or build the observer as a Person. Strategy: ORCID match first, then Person::Unvetted from user.name or user.login. Used as determiner on TaxonDetermination and georeferencer on Georeference.

Parameters:

  • result (Hash)

    a Nasturtium result

Returns:



99
100
101
102
# File 'lib/vendor/nasturtium.rb', line 99

def self.stub_observer_person(result)
  person_by_orcid(result) ||
    person_from_display_name(result.dig('user', 'name').presence || result.dig('user', 'login'))
end

.stub_otu(result, project_id:, match_by_name: false, use_community_taxon: true) ⇒ Otu?

Find or build an OTU for the iNat taxon.

Parameters:

  • result (Hash)

    a Nasturtium result

  • project_id (Integer)
  • match_by_name (Boolean) (defaults to: false)

    if true, look for an existing OTU with matching name in the project first

  • use_community_taxon (Boolean) (defaults to: true)

    if true, use the community consensus taxon (community_taxon, falling back to taxon); if false, use the observation taxon (taxon), which is the observer's own most recent ID when no community consensus exists

Returns:



183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/vendor/nasturtium.rb', line 183

def self.stub_otu(result, project_id:, match_by_name: false, use_community_taxon: true)
  taxon_name = self.taxon_name(result, use_community_taxon:)
  return nil if taxon_name.blank?

  if match_by_name
    existing = Otu.where(project_id:)
      .left_joins(:taxon_name)
      .where('otus.name = ? OR taxon_names.cached = ?', taxon_name, taxon_name)
      .order(Arel.sql('taxon_names.id IS NULL ASC'))
      .first
    return existing if existing
  end

  Otu.new(name: taxon_name)
end

.taxon_name(result, use_community_taxon: true) ⇒ Object



40
41
42
43
44
45
46
# File 'lib/vendor/nasturtium.rb', line 40

def self.taxon_name(result, use_community_taxon: true)
  if use_community_taxon
    result.dig('community_taxon', 'name').presence || result.dig('taxon', 'name')
  else
    result.dig('taxon', 'name')
  end
end