Class: Gazetteer

Inherits:
ApplicationRecord show all
Includes:
Housekeeping, Shared::AlternateValues, Shared::Citations, Shared::DataAttributes, Shared::IsData, Shared::Notes
Defined in:
app/models/gazetteer.rb

Overview

Gazetteer allows a project to add its own named shapes to participate in filtering, etc.

Constant Summary collapse

ALTERNATE_VALUES_FOR =
[:name].freeze
GZ_DATA_ORIGIN =
'TaxonWorks Gazetteer'.freeze
COMBINE_BUFFER_DEGREES =

Buffer applied after union (+) and intersection (-) to absorb ~1e-14° floating-point slivers at shared borders. Applied in geometry (degree) space; 1e-7° ≈ ~11 mm at the equator. See combine_rgeo_shapes for full explanation.

1e-7

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Shared::IsData

#errors_excepting, #full_error_messages_excepting, #identical, #is_community?, #is_in_use?, #similar

Methods included from Shared::AlternateValues

#all_values_for, #alternate_valued?

Methods included from Shared::DataAttributes

#import_attributes, #internal_attributes, #keyword_value_hash, #reject_data_attributes

Methods included from Shared::Notes

#concatenated_notes_string, #reject_notes

Methods included from Shared::Citations

#cited?, #mark_citations_for_destruction, #nomenclature_date, #origin_citation_source_id, #reject_citations, #requires_citation?, #sources_by_topic_id

Methods included from Housekeeping

#has_polymorphic_relationship?

Methods inherited from ApplicationRecord

transaction_with_retry

Instance Attribute Details

#geographic_item_idInteger

The shape of the gazetteer

Returns:

  • (Integer)


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
# File 'app/models/gazetteer.rb', line 26

class Gazetteer < ApplicationRecord
  include Housekeeping
  include Shared::Citations
  include Shared::Notes
  include Shared::DataAttributes
  include Shared::AlternateValues
  include Shared::IsData

  attr_accessor :geographic_item_id_for_cleanup

  ALTERNATE_VALUES_FOR = [:name].freeze

  GZ_DATA_ORIGIN = 'TaxonWorks Gazetteer'.freeze

  # Buffer applied after union (+) and intersection (-) to absorb ~1e-14°
  # floating-point slivers at shared borders. Applied in geometry (degree)
  # space; 1e-7° ≈ ~11 mm at the equator.
  # See combine_rgeo_shapes for full explanation.
  COMBINE_BUFFER_DEGREES = 1e-7

  delegate :geo_object, to: :geographic_item

  belongs_to :geographic_item, inverse_of: :gazetteers

  has_many :asserted_distributions,
    as: :asserted_distribution_shape,
    inverse_of: :asserted_distribution_shape,
    dependent: :restrict_with_error

  before_validation do
    self.iso_3166_a2 = iso_3166_a2.strip.upcase if iso_3166_a2.present?
  end
  before_validation do
    self.iso_3166_a3 = iso_3166_a3.strip.upcase if iso_3166_a3.present?
  end

  validates :name, presence: true, length: {minimum: 1}
  validate :iso_3166_a2_is_two_characters
  validate :iso_3166_a3_is_three_characters

  before_destroy :capture_geographic_item_id_for_cleanup
  after_destroy :destroy_geographic_item_if_orphaned

  accepts_nested_attributes_for :geographic_item

  # @return [Hash] of the pieces of a GeoJSON 'Feature'
  def to_geo_json_feature
    to_simple_json_feature.merge(
      'properties' => {
        # cf. GeographicArea
        'shape' => {
          'type' => 'Gazetteer',
          'id' => id,
          'tag' => name
        }
      }
    )
  end

  def to_simple_json_feature
    {
      'type' => 'Feature',
      'properties' => {},
      'geometry' => geographic_item.to_geo_json
    }
  end

  # @param shapes [Hash]
  #   geojson: array of geojson feature hashes,
  #   wkt: array of wkt strings,
  #   points: array of geojson feature points
  #   ga_combine: array of GA ids
  #   gz_combine: array of GZ ids
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # Builds a GeographicItem for this gazetteer from the combined input shapes
  def build_gi_from_shapes(shapes, operation_is_union=true)
    begin
      rgeo_shape = self.class.combine_shapes_to_rgeo(shapes, operation_is_union)
    rescue TaxonWorks::Error => e
      errors.add(:base, e)
      return
    end

    build_geographic_item(
      geography: rgeo_shape
    )
  end

  # @param [Hash] hash as in build_gi_from_shapes
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # @return A single rgeo shape that is the combination of all of the input shapes
  # Raises TaxonWorks::Error on error
  def self.combine_shapes_to_rgeo(shapes, operation_is_union)
    begin
      if shapes[:geojson].blank? && shapes[:wkt].blank? &&
          shapes[:points].blank? && shapes[:ga_combine].blank? &&
          shapes[:gz_combine].blank?
        raise TaxonWorks::Error, 'No shapes provided'
      end

      leaflet_rgeo = convert_geojson_to_rgeo(shapes[:geojson])
      wkt_rgeo = convert_wkt_to_rgeo(shapes[:wkt])
      points_rgeo = convert_geojson_to_rgeo(shapes[:points])
      ga_rgeo = convert_ga_to_rgeo(shapes[:ga_combine])
      gz_rgeo = convert_gz_to_rgeo(shapes[:gz_combine])

      user_input_shapes = leaflet_rgeo + wkt_rgeo + points_rgeo

      return combine_rgeo_shapes(
        user_input_shapes + ga_rgeo + gz_rgeo, operation_is_union
      )

    # This is more specific than RGeo::Error::RgeoError
    rescue RGeo::Error::InvalidGeometry => e
      raise TaxonWorks::Error, e
    rescue RGeo::Error::RGeoError => e
      raise TaxonWorks::Error, e
    end
  end

  # @return [Array] of RGeo::Geographic::Projected*Impl
  # Raises RGeo::Error::InvalidGeometry on error
  def self.convert_geojson_to_rgeo(shapes)
    return [] if shapes.blank?

    rgeo_shapes = shapes.map do |shape|
      # Raises RGeo::Error::InvalidGeometry on error
      rgeo_shape = RGeo::GeoJSON.decode(shape, geo_factory: Gis::FACTORY)

      circle = nil
      if rgeo_shape.geometry.geometry_type.to_s == 'Point' &&
           rgeo_shape.properties['radius'].present?
        r = rgeo_shape.properties['radius']

        circle = GeographicItem.circle(rgeo_shape.geometry, r)
      end

      s = circle || rgeo_shape.geometry

      GeographicItem.make_valid_non_anti_meridian_crossing_shape(s.as_text)
    end

    rgeo_shapes
  end

  def self.convert_ga_to_rgeo(ga_ids)
    return [] if ga_ids.blank?

    GeographicArea.where(id: ga_ids).map { |ga| ga.geo_object }
  end

  def self.convert_gz_to_rgeo(gz_ids)
    return [] if gz_ids.blank?

    Gazetteer.where(id: gz_ids).map { |gz| gz.geo_object }
  end

  # @return [Array] of RGeo::Geographic::Projected*Impl
  # Raises RGeo::Error::RGeoError on error
  def self.convert_wkt_to_rgeo(wkt_shapes)
    return [] if wkt_shapes.blank?

    wkt_shapes.map do |shape|
      begin
        s = ::Gis::FACTORY.parse_wkt(shape)
      rescue RGeo::Error::RGeoError => e
        raise e.exception("Invalid WKT: #{e.message}")
      end

      GeographicItem.make_valid_non_anti_meridian_crossing_shape(s.as_text)
    end
  end

  # @param [Array] rgeo_shapes of RGeo::Geographic::Projected*Impl
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # @return [RGeo::Geographic::Projected*Impl] A single shape combining all of the
  #   input shapes
  # Raises TaxonWorks::Error on error
  def self.combine_rgeo_shapes(rgeo_shapes, operation_is_union)
    if rgeo_shapes.count == 1
      return rgeo_shapes[0]
    end

    if operation_is_union
      # Drops Z values (ST_Buffer is 2D-only).
      # Use PostGIS ST_UnaryUnion rather than RGeo's iterative .union().
      # Adjacent GA shapes store their shared border vertices with slightly
      # different coordinate values — even shapes from the same source dataset.
      # GEOS must find where these nearly-coincident edges intersect to compute
      # the union boundary. That intersection is numerically unstable, so the
      # union boundary ends up at coordinates that match neither input, leaving
      # a sliver where ST_CoveredBy(input_GA, Gaz) returns false. This causes
      # the OTU spatial filter to silently drop asserted-distribution OTUs for
      # any GA whose shape fails the coverage check.
      #
      # A post-union ST_Buffer (COMBINE_BUFFER_DEGREES) absorbs the sliver.
      # Empirical testing across 13 country-pair/group/chain combinations found
      # the minimum buffer needed was ~1e-11°; 1e-7° (~11 mm at the equator)
      # gives a comfortable margin and is imperceptible for any biodiversity
      # application.
      # ST_MakeValid is applied after ST_Buffer as a safety net — neither
      # GEOS nor PostGIS guarantees topologically valid output from geometric
      # operations.
      geom_exprs = rgeo_shapes.map { |s|
        "ST_GeomFromText(#{ActiveRecord::Base.connection.quote(s.as_text)}, 4326)"
      }
      result_wkb = ActiveRecord::Base.connection.select_value(<<~SQL)
        SELECT ST_Force3D(ST_MakeValid(ST_Buffer(ST_UnaryUnion(ST_Collect(ARRAY[#{geom_exprs.join(', ')}])), #{COMBINE_BUFFER_DEGREES})))
      SQL
      u = Gis::FACTORY.parse_wkb(result_wkb)
    else # Intersection
      # Drops Z values (ST_Buffer is 2D-only).
      # See discussion in the union case for motivation.
      geom_exprs = rgeo_shapes.map { |s|
        "ST_GeomFromText(#{ActiveRecord::Base.connection.quote(s.as_text)}, 4326)"
      }
      intersection_expr = geom_exprs.reduce { |acc, g| "ST_Intersection(#{acc}, #{g})" }
      result_wkb = ActiveRecord::Base.connection.select_value(<<~SQL)
        SELECT ST_Force3D(ST_MakeValid(ST_Buffer(#{intersection_expr}, -#{COMBINE_BUFFER_DEGREES})))
      SQL
      u = Gis::FACTORY.parse_wkb(result_wkb)
    end

    if u.empty?
      message = operation_is_union ?
        "Empty union can't be saved!" : "Empty intersection can't be saved!"
      raise TaxonWorks::Error, message
    end

    u
  end

  # @param gz [Gazetteer] Unsaved Gazetteer to save and clone from
  # @param project_ids [Array] project ids to clone gz into - gz is always
  #   saved to the current project.
  #   If saves occur in more than one project then all saves occur in a
  #   transaction.
  # @param citation [Hash] Citation object to save to each Gazetteer created
  # Raises ActiveRecord::RecordInvalid on error
  def self.save_and_clone_to_projects(gz, project_ids, citation = nil)
    project_ids.delete(Current.project_id)
    project_ids.uniq!

    if project_ids.count > 0
      Gazetteer.transaction do
        perform_save_and_clone_to_projects(gz, project_ids, citation)
      end
    else
      perform_save_and_clone_to_projects(gz, [], citation)
    end
  end

  def self.validate_iso_3166_a2(a2)
    return false if a2.blank? || a2.class.to_s != 'String'
    /\A[A-Z][A-Z]\z/.match?(a2.strip.upcase)
  end

  def self.validate_iso_3166_a3(a3)
    return false if a3.blank? || a3.class.to_s != 'String'
    /\A[A-Z][A-Z][A-Z]\z/.match?(a3.strip.upcase)
  end

  def self.import_gzs_from_shapefile(
    shapefile, citation_options, progress_tracker, projects
  )
    begin
      shp_doc = Document.find(shapefile[:shp_doc_id])
      shx_doc = Document.find(shapefile[:shx_doc_id])
      dbf_doc = Document.find(shapefile[:dbf_doc_id])
      prj_doc = Document.find(shapefile[:prj_doc_id])
      cpg_doc = shapefile[:cpg_doc_id] ?
        Document.find(shapefile[:cpg_doc_id]) : nil
    rescue ActiveRecord::RecordNotFound => e
      progress_tracker.update!(
        num_records_imported: 0,
        error_messages: e.message,
        started_at: DateTime.now,
        ended_at: DateTime.now
      )
      return
    end
    name_field = shapefile[:name_field]

    # The above shapefile files are unlikely to all be in the same directory as
    # required by rgeo-shapefile, so create symbolic links to each in a new
    # temporary folder.
    tmp_dir = Rails.root.join('tmp', 'shapefiles', SecureRandom.hex)
    FileUtils.mkdir_p(tmp_dir)

    shp_link = File.join(tmp_dir, 'shapefile.shp')
    shx_link = File.join(tmp_dir, 'shapefile.shx')
    dbf_link = File.join(tmp_dir, 'shapefile.dbf')
    prj_link = File.join(tmp_dir, 'shapefile.prj')

    FileUtils.ln_s(shp_doc.document_file.path, shp_link)
    FileUtils.ln_s(shx_doc.document_file.path, shx_link)
    FileUtils.ln_s(dbf_doc.document_file.path, dbf_link)
    FileUtils.ln_s(prj_doc.document_file.path, prj_link)

    cpg_link = ''
    if cpg_doc.present?
      cpg_link = File.join(tmp_dir, 'shapefile.cpg')
      FileUtils.ln_s(cpg_doc.document_file.path, cpg_link)
    end

    prj = File.read(prj_doc.document_file.path)
    crs = RGeo::CoordSys::CS.create_from_wkt(prj)

    citation = citation_options[:cite_gzs] ? citation_options[:citation] : nil

    process_shape_file(
      shp_link, crs, name_field,
      shapefile[:iso_a2_field], shapefile[:iso_a3_field],
      citation, progress_tracker, projects
    )

    FileUtils.rm_f([shp_link, dbf_link, shx_link, prj_link, cpg_link])
    FileUtils.rmdir(tmp_dir)
  end

  # @param used_on [String] currently `AssertedDistribution`
  # @return [Scope]
  #    the max 10 most recently used (1 week, could parameterize) gazetteers, as used `use_on`
  def self.used_recently(user_id, project_id, used_on = 'AssertedDistribution')

    case used_on
    when 'AssertedDistribution'
      t = Citation.arel_table
      # i is a select manager
      i = t.project(t['citation_object_id'], t['citation_object_type'], t['created_at']).from(t)
        .where(t['created_at'].gt(1.week.ago))
        .where(t['created_by_id'].eq(user_id))
        .where(t['project_id'].eq(project_id))
        .order(t['created_at'].desc)

      # z is a table alias
      z = i.as('recent_t')
      p = AssertedDistribution.arel_table

      AssertedDistribution
        .joins(
          Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['citation_object_id'].eq(p['id']).and(z['citation_object_type'].eq('AssertedDistribution')))  )
        )
        .where(asserted_distribution_shape_type: 'Gazetteer')
        .pluck(:asserted_distribution_shape_id).uniq
    end
  end

  # @params target [String] currently only `AssertedDistribution`
  # @return [Hash] gazetteers optimized for user selection
  def self.select_optimized(user_id, project_id, target = 'AssertedDistribution')
    target = 'AssertedDistribution' if target.blank?
    r = used_recently(user_id, project_id, target) || []
    h = {
      quick: [],
      pinboard: Gazetteer.pinned_by(user_id).where(pinboard_items: {project_id:}).to_a,
      recent: []
    }

    if r.empty?
      h[:quick] = Gazetteer.pinned_by(user_id).pinboard_inserted.where(pinboard_items: {project_id:}).to_a
    else
      case target
      when 'AssertedDistribution'
        h[:recent] = Gazetteer.where('"gazetteers"."id" IN (?)', r.first(15) ).order(:name).to_a
      end
      h[:quick] = (Gazetteer.pinned_by(user_id).pinboard_inserted.where(pinboard_items: {project_id:}).to_a +
        Gazetteer.where('"gazetteers"."id" IN (?)', r.first(5) ).order(:name).to_a).uniq
    end

    h
  end

  def geographic_name_classification
    # Return a2/a3 country?
    {}
  end

  def geographic_items
    [geographic_item]
  end

  def default_geographic_item
    geographic_item
  end

  def default_geographic_item_id
    geographic_item.id
  end

  def data_origin
    GZ_DATA_ORIGIN
  end

  private

  # @param project_ids [Array] the projects to clone to - does not include the
  # current project which gz is saved to.
  def self.perform_save_and_clone_to_projects(gz, project_ids, citation)
    if citation.present?
      gz.citations.build(citation.merge({ project_id: Current.project_id }))
    end
    gz.save!

    project_ids.each do |pr_id|
      g = gz.dup
      g.project_id = pr_id
      if citation.present?
        g.citations.build(citation.merge({ project_id: pr_id }))
      end
      g.save!
    end
  end

  def self.process_shape_file(
    shpfile, crs, name_field, iso_a2_field, iso_a3_field, citation,
    progress_tracker, projects
  )
    r = {
      num_records: 0,
      num_records_imported: 0,
      error_messages: nil,
    }

    # We'll need to transform from whatever CRS the shapefile is in to our WGS84
    # coordinates.
    if (crs_is_wgs84 = Vendor::Rgeo.coord_sys_is_wgs84?(crs))
      from_factory = Gis::FACTORY
    else
      from_proj4 = RGeo::CoordSys::Proj4.create(crs.to_s)
      from_factory = from_proj4.projected? ?
        # Shapefiles using a projected CRS always store their geometries using
        # projected coordinates.
        RGeo::Geographic.projected_factory(
          coord_sys: from_proj4, has_z_coordinate: true
        ).projection_factory :
        RGeo::Geographic.spherical_factory( # geographic? true
          coord_sys: from_proj4, has_z_coordinate: true
        )

      to_proj4 = Gis::FACTORY.coord_sys
      to_factory = Gis::FACTORY
    end

    begin
      # TODO: https://github.com/rgeo/rgeo-shapefile could use a fork and updates to dbf/rgeo
      file = RGeo::Shapefile::Reader.open(
        shpfile, factory: from_factory, allow_unsafe: true
      )
    rescue Errno::ENOENT => e
      progress_tracker.update!(
        num_records_imported: 0,
        error_messages: e.message,
        started_at: DateTime.now,
        ended_at: DateTime.now
      )
      return
    end

    r[:num_records] = file.num_records

    progress_tracker.update!(
      num_records: file.num_records,
      project_names:
        Project.where(id: projects).order(:name).pluck(:name).join(', '),
      started_at: DateTime.now
    )

    # Iterate over an index so we can record index on error and then resume
    for i in 0...file.num_records
      begin
        # This can throw GeosError even when allow_unsafe: true
        record = file[i]

        # iso a2/a3 are optional fields, we ignore them if the shapefile
        # doesn't provide valid data.
        a2 = record[iso_a2_field]
        a3 = record[iso_a3_field]
        iso_3166_a2 = validate_iso_3166_a2(a2) ? a2: nil
        iso_3166_a3 = validate_iso_3166_a3(a3) ? a3: nil

        g = new(
          name: record[name_field],
          iso_3166_a2:,
          iso_3166_a3:
        )

        if crs_is_wgs84
          record_geometry = record.geometry
        else
          # TODO: what might this raise? Might want to cap our total number of
          # errors recorded here
          record_geometry = RGeo::CoordSys::Proj4.transform(
            from_proj4,
            record.geometry,
            to_proj4,
            to_factory
          )
        end

        shape = GeographicItem.make_valid_non_anti_meridian_crossing_shape(
          record_geometry.as_text
        )

        g.build_geographic_item(
          geography: shape
        )

        save_and_clone_to_projects(g, projects, citation)
        r[:num_records_imported] = r[:num_records_imported] + 1

        if i % 5 == 0
          progress_tracker.update!(
            num_records_imported: r[:num_records_imported]
          )
        end

      rescue RGeo::Error::InvalidGeometry => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue ActiveRecord::RecordInvalid => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue RGeo::Error::GeosError => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue ActiveRecord::StatementInvalid => e
        # In known instances this is a result of something like:
        # PG::InternalError:
        #   ERROR:  lwgeom_intersection_prec: GEOS Error: TopologyException:
        #   Input geom 0 is invalid: Self-intersection at 185 5 0
        # !! Any containing transaction (from running in a spec e.g.) is now
        # aborted and open, any attempts to interact with the db will now raise
        # PG::InFailedSqlTransaction: ERROR:  current transaction is aborted,
        #   commands ignored until end of transaction block
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      end
    end

    progress_tracker.update!(
      num_records_imported: r[:num_records_imported],
      ended_at: DateTime.now
    )
  end

  def self.process_import_error(
    progress_tracker, recorder, error_index, error_message
  )
    m = "#{error_index}: '#{error_message}'"
    recorder[:error_messages] = recorder[:error_messages].present? ?
      "#{recorder[:error_messages]}; #{m}" : m

    progress_tracker.update!(
      error_messages: recorder[:error_messages]
    )
  end

  def iso_3166_a2_is_two_characters
    errors.add(:iso_3166_a2, 'must be exactly two characters') unless
      iso_3166_a2.nil? || self.class.validate_iso_3166_a2(iso_3166_a2)
  end

  def iso_3166_a3_is_three_characters
    errors.add(:iso_3166_a3, 'must be exactly three characters') unless
      iso_3166_a3.nil? || self.class.validate_iso_3166_a3(iso_3166_a3)
  end

  def destroy_geographic_item_if_orphaned
    item = GeographicItem.find_by(id: geographic_item_id_for_cleanup)
    item&.destroy! if item&.unreferenced_for_cleanup?
  end

  def capture_geographic_item_id_for_cleanup
    # Capture id before destroy so cleanup does not depend on association state
    # on the destroyed record.
    self.geographic_item_id_for_cleanup = geographic_item_id
  end
end

#geographic_item_id_for_cleanupObject

Returns the value of attribute geographic_item_id_for_cleanup.



34
35
36
# File 'app/models/gazetteer.rb', line 34

def geographic_item_id_for_cleanup
  @geographic_item_id_for_cleanup
end

#iso_3166_a2String

Two alpha-character identification of country.

Returns:

  • (String)


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
# File 'app/models/gazetteer.rb', line 26

class Gazetteer < ApplicationRecord
  include Housekeeping
  include Shared::Citations
  include Shared::Notes
  include Shared::DataAttributes
  include Shared::AlternateValues
  include Shared::IsData

  attr_accessor :geographic_item_id_for_cleanup

  ALTERNATE_VALUES_FOR = [:name].freeze

  GZ_DATA_ORIGIN = 'TaxonWorks Gazetteer'.freeze

  # Buffer applied after union (+) and intersection (-) to absorb ~1e-14°
  # floating-point slivers at shared borders. Applied in geometry (degree)
  # space; 1e-7° ≈ ~11 mm at the equator.
  # See combine_rgeo_shapes for full explanation.
  COMBINE_BUFFER_DEGREES = 1e-7

  delegate :geo_object, to: :geographic_item

  belongs_to :geographic_item, inverse_of: :gazetteers

  has_many :asserted_distributions,
    as: :asserted_distribution_shape,
    inverse_of: :asserted_distribution_shape,
    dependent: :restrict_with_error

  before_validation do
    self.iso_3166_a2 = iso_3166_a2.strip.upcase if iso_3166_a2.present?
  end
  before_validation do
    self.iso_3166_a3 = iso_3166_a3.strip.upcase if iso_3166_a3.present?
  end

  validates :name, presence: true, length: {minimum: 1}
  validate :iso_3166_a2_is_two_characters
  validate :iso_3166_a3_is_three_characters

  before_destroy :capture_geographic_item_id_for_cleanup
  after_destroy :destroy_geographic_item_if_orphaned

  accepts_nested_attributes_for :geographic_item

  # @return [Hash] of the pieces of a GeoJSON 'Feature'
  def to_geo_json_feature
    to_simple_json_feature.merge(
      'properties' => {
        # cf. GeographicArea
        'shape' => {
          'type' => 'Gazetteer',
          'id' => id,
          'tag' => name
        }
      }
    )
  end

  def to_simple_json_feature
    {
      'type' => 'Feature',
      'properties' => {},
      'geometry' => geographic_item.to_geo_json
    }
  end

  # @param shapes [Hash]
  #   geojson: array of geojson feature hashes,
  #   wkt: array of wkt strings,
  #   points: array of geojson feature points
  #   ga_combine: array of GA ids
  #   gz_combine: array of GZ ids
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # Builds a GeographicItem for this gazetteer from the combined input shapes
  def build_gi_from_shapes(shapes, operation_is_union=true)
    begin
      rgeo_shape = self.class.combine_shapes_to_rgeo(shapes, operation_is_union)
    rescue TaxonWorks::Error => e
      errors.add(:base, e)
      return
    end

    build_geographic_item(
      geography: rgeo_shape
    )
  end

  # @param [Hash] hash as in build_gi_from_shapes
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # @return A single rgeo shape that is the combination of all of the input shapes
  # Raises TaxonWorks::Error on error
  def self.combine_shapes_to_rgeo(shapes, operation_is_union)
    begin
      if shapes[:geojson].blank? && shapes[:wkt].blank? &&
          shapes[:points].blank? && shapes[:ga_combine].blank? &&
          shapes[:gz_combine].blank?
        raise TaxonWorks::Error, 'No shapes provided'
      end

      leaflet_rgeo = convert_geojson_to_rgeo(shapes[:geojson])
      wkt_rgeo = convert_wkt_to_rgeo(shapes[:wkt])
      points_rgeo = convert_geojson_to_rgeo(shapes[:points])
      ga_rgeo = convert_ga_to_rgeo(shapes[:ga_combine])
      gz_rgeo = convert_gz_to_rgeo(shapes[:gz_combine])

      user_input_shapes = leaflet_rgeo + wkt_rgeo + points_rgeo

      return combine_rgeo_shapes(
        user_input_shapes + ga_rgeo + gz_rgeo, operation_is_union
      )

    # This is more specific than RGeo::Error::RgeoError
    rescue RGeo::Error::InvalidGeometry => e
      raise TaxonWorks::Error, e
    rescue RGeo::Error::RGeoError => e
      raise TaxonWorks::Error, e
    end
  end

  # @return [Array] of RGeo::Geographic::Projected*Impl
  # Raises RGeo::Error::InvalidGeometry on error
  def self.convert_geojson_to_rgeo(shapes)
    return [] if shapes.blank?

    rgeo_shapes = shapes.map do |shape|
      # Raises RGeo::Error::InvalidGeometry on error
      rgeo_shape = RGeo::GeoJSON.decode(shape, geo_factory: Gis::FACTORY)

      circle = nil
      if rgeo_shape.geometry.geometry_type.to_s == 'Point' &&
           rgeo_shape.properties['radius'].present?
        r = rgeo_shape.properties['radius']

        circle = GeographicItem.circle(rgeo_shape.geometry, r)
      end

      s = circle || rgeo_shape.geometry

      GeographicItem.make_valid_non_anti_meridian_crossing_shape(s.as_text)
    end

    rgeo_shapes
  end

  def self.convert_ga_to_rgeo(ga_ids)
    return [] if ga_ids.blank?

    GeographicArea.where(id: ga_ids).map { |ga| ga.geo_object }
  end

  def self.convert_gz_to_rgeo(gz_ids)
    return [] if gz_ids.blank?

    Gazetteer.where(id: gz_ids).map { |gz| gz.geo_object }
  end

  # @return [Array] of RGeo::Geographic::Projected*Impl
  # Raises RGeo::Error::RGeoError on error
  def self.convert_wkt_to_rgeo(wkt_shapes)
    return [] if wkt_shapes.blank?

    wkt_shapes.map do |shape|
      begin
        s = ::Gis::FACTORY.parse_wkt(shape)
      rescue RGeo::Error::RGeoError => e
        raise e.exception("Invalid WKT: #{e.message}")
      end

      GeographicItem.make_valid_non_anti_meridian_crossing_shape(s.as_text)
    end
  end

  # @param [Array] rgeo_shapes of RGeo::Geographic::Projected*Impl
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # @return [RGeo::Geographic::Projected*Impl] A single shape combining all of the
  #   input shapes
  # Raises TaxonWorks::Error on error
  def self.combine_rgeo_shapes(rgeo_shapes, operation_is_union)
    if rgeo_shapes.count == 1
      return rgeo_shapes[0]
    end

    if operation_is_union
      # Drops Z values (ST_Buffer is 2D-only).
      # Use PostGIS ST_UnaryUnion rather than RGeo's iterative .union().
      # Adjacent GA shapes store their shared border vertices with slightly
      # different coordinate values — even shapes from the same source dataset.
      # GEOS must find where these nearly-coincident edges intersect to compute
      # the union boundary. That intersection is numerically unstable, so the
      # union boundary ends up at coordinates that match neither input, leaving
      # a sliver where ST_CoveredBy(input_GA, Gaz) returns false. This causes
      # the OTU spatial filter to silently drop asserted-distribution OTUs for
      # any GA whose shape fails the coverage check.
      #
      # A post-union ST_Buffer (COMBINE_BUFFER_DEGREES) absorbs the sliver.
      # Empirical testing across 13 country-pair/group/chain combinations found
      # the minimum buffer needed was ~1e-11°; 1e-7° (~11 mm at the equator)
      # gives a comfortable margin and is imperceptible for any biodiversity
      # application.
      # ST_MakeValid is applied after ST_Buffer as a safety net — neither
      # GEOS nor PostGIS guarantees topologically valid output from geometric
      # operations.
      geom_exprs = rgeo_shapes.map { |s|
        "ST_GeomFromText(#{ActiveRecord::Base.connection.quote(s.as_text)}, 4326)"
      }
      result_wkb = ActiveRecord::Base.connection.select_value(<<~SQL)
        SELECT ST_Force3D(ST_MakeValid(ST_Buffer(ST_UnaryUnion(ST_Collect(ARRAY[#{geom_exprs.join(', ')}])), #{COMBINE_BUFFER_DEGREES})))
      SQL
      u = Gis::FACTORY.parse_wkb(result_wkb)
    else # Intersection
      # Drops Z values (ST_Buffer is 2D-only).
      # See discussion in the union case for motivation.
      geom_exprs = rgeo_shapes.map { |s|
        "ST_GeomFromText(#{ActiveRecord::Base.connection.quote(s.as_text)}, 4326)"
      }
      intersection_expr = geom_exprs.reduce { |acc, g| "ST_Intersection(#{acc}, #{g})" }
      result_wkb = ActiveRecord::Base.connection.select_value(<<~SQL)
        SELECT ST_Force3D(ST_MakeValid(ST_Buffer(#{intersection_expr}, -#{COMBINE_BUFFER_DEGREES})))
      SQL
      u = Gis::FACTORY.parse_wkb(result_wkb)
    end

    if u.empty?
      message = operation_is_union ?
        "Empty union can't be saved!" : "Empty intersection can't be saved!"
      raise TaxonWorks::Error, message
    end

    u
  end

  # @param gz [Gazetteer] Unsaved Gazetteer to save and clone from
  # @param project_ids [Array] project ids to clone gz into - gz is always
  #   saved to the current project.
  #   If saves occur in more than one project then all saves occur in a
  #   transaction.
  # @param citation [Hash] Citation object to save to each Gazetteer created
  # Raises ActiveRecord::RecordInvalid on error
  def self.save_and_clone_to_projects(gz, project_ids, citation = nil)
    project_ids.delete(Current.project_id)
    project_ids.uniq!

    if project_ids.count > 0
      Gazetteer.transaction do
        perform_save_and_clone_to_projects(gz, project_ids, citation)
      end
    else
      perform_save_and_clone_to_projects(gz, [], citation)
    end
  end

  def self.validate_iso_3166_a2(a2)
    return false if a2.blank? || a2.class.to_s != 'String'
    /\A[A-Z][A-Z]\z/.match?(a2.strip.upcase)
  end

  def self.validate_iso_3166_a3(a3)
    return false if a3.blank? || a3.class.to_s != 'String'
    /\A[A-Z][A-Z][A-Z]\z/.match?(a3.strip.upcase)
  end

  def self.import_gzs_from_shapefile(
    shapefile, citation_options, progress_tracker, projects
  )
    begin
      shp_doc = Document.find(shapefile[:shp_doc_id])
      shx_doc = Document.find(shapefile[:shx_doc_id])
      dbf_doc = Document.find(shapefile[:dbf_doc_id])
      prj_doc = Document.find(shapefile[:prj_doc_id])
      cpg_doc = shapefile[:cpg_doc_id] ?
        Document.find(shapefile[:cpg_doc_id]) : nil
    rescue ActiveRecord::RecordNotFound => e
      progress_tracker.update!(
        num_records_imported: 0,
        error_messages: e.message,
        started_at: DateTime.now,
        ended_at: DateTime.now
      )
      return
    end
    name_field = shapefile[:name_field]

    # The above shapefile files are unlikely to all be in the same directory as
    # required by rgeo-shapefile, so create symbolic links to each in a new
    # temporary folder.
    tmp_dir = Rails.root.join('tmp', 'shapefiles', SecureRandom.hex)
    FileUtils.mkdir_p(tmp_dir)

    shp_link = File.join(tmp_dir, 'shapefile.shp')
    shx_link = File.join(tmp_dir, 'shapefile.shx')
    dbf_link = File.join(tmp_dir, 'shapefile.dbf')
    prj_link = File.join(tmp_dir, 'shapefile.prj')

    FileUtils.ln_s(shp_doc.document_file.path, shp_link)
    FileUtils.ln_s(shx_doc.document_file.path, shx_link)
    FileUtils.ln_s(dbf_doc.document_file.path, dbf_link)
    FileUtils.ln_s(prj_doc.document_file.path, prj_link)

    cpg_link = ''
    if cpg_doc.present?
      cpg_link = File.join(tmp_dir, 'shapefile.cpg')
      FileUtils.ln_s(cpg_doc.document_file.path, cpg_link)
    end

    prj = File.read(prj_doc.document_file.path)
    crs = RGeo::CoordSys::CS.create_from_wkt(prj)

    citation = citation_options[:cite_gzs] ? citation_options[:citation] : nil

    process_shape_file(
      shp_link, crs, name_field,
      shapefile[:iso_a2_field], shapefile[:iso_a3_field],
      citation, progress_tracker, projects
    )

    FileUtils.rm_f([shp_link, dbf_link, shx_link, prj_link, cpg_link])
    FileUtils.rmdir(tmp_dir)
  end

  # @param used_on [String] currently `AssertedDistribution`
  # @return [Scope]
  #    the max 10 most recently used (1 week, could parameterize) gazetteers, as used `use_on`
  def self.used_recently(user_id, project_id, used_on = 'AssertedDistribution')

    case used_on
    when 'AssertedDistribution'
      t = Citation.arel_table
      # i is a select manager
      i = t.project(t['citation_object_id'], t['citation_object_type'], t['created_at']).from(t)
        .where(t['created_at'].gt(1.week.ago))
        .where(t['created_by_id'].eq(user_id))
        .where(t['project_id'].eq(project_id))
        .order(t['created_at'].desc)

      # z is a table alias
      z = i.as('recent_t')
      p = AssertedDistribution.arel_table

      AssertedDistribution
        .joins(
          Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['citation_object_id'].eq(p['id']).and(z['citation_object_type'].eq('AssertedDistribution')))  )
        )
        .where(asserted_distribution_shape_type: 'Gazetteer')
        .pluck(:asserted_distribution_shape_id).uniq
    end
  end

  # @params target [String] currently only `AssertedDistribution`
  # @return [Hash] gazetteers optimized for user selection
  def self.select_optimized(user_id, project_id, target = 'AssertedDistribution')
    target = 'AssertedDistribution' if target.blank?
    r = used_recently(user_id, project_id, target) || []
    h = {
      quick: [],
      pinboard: Gazetteer.pinned_by(user_id).where(pinboard_items: {project_id:}).to_a,
      recent: []
    }

    if r.empty?
      h[:quick] = Gazetteer.pinned_by(user_id).pinboard_inserted.where(pinboard_items: {project_id:}).to_a
    else
      case target
      when 'AssertedDistribution'
        h[:recent] = Gazetteer.where('"gazetteers"."id" IN (?)', r.first(15) ).order(:name).to_a
      end
      h[:quick] = (Gazetteer.pinned_by(user_id).pinboard_inserted.where(pinboard_items: {project_id:}).to_a +
        Gazetteer.where('"gazetteers"."id" IN (?)', r.first(5) ).order(:name).to_a).uniq
    end

    h
  end

  def geographic_name_classification
    # Return a2/a3 country?
    {}
  end

  def geographic_items
    [geographic_item]
  end

  def default_geographic_item
    geographic_item
  end

  def default_geographic_item_id
    geographic_item.id
  end

  def data_origin
    GZ_DATA_ORIGIN
  end

  private

  # @param project_ids [Array] the projects to clone to - does not include the
  # current project which gz is saved to.
  def self.perform_save_and_clone_to_projects(gz, project_ids, citation)
    if citation.present?
      gz.citations.build(citation.merge({ project_id: Current.project_id }))
    end
    gz.save!

    project_ids.each do |pr_id|
      g = gz.dup
      g.project_id = pr_id
      if citation.present?
        g.citations.build(citation.merge({ project_id: pr_id }))
      end
      g.save!
    end
  end

  def self.process_shape_file(
    shpfile, crs, name_field, iso_a2_field, iso_a3_field, citation,
    progress_tracker, projects
  )
    r = {
      num_records: 0,
      num_records_imported: 0,
      error_messages: nil,
    }

    # We'll need to transform from whatever CRS the shapefile is in to our WGS84
    # coordinates.
    if (crs_is_wgs84 = Vendor::Rgeo.coord_sys_is_wgs84?(crs))
      from_factory = Gis::FACTORY
    else
      from_proj4 = RGeo::CoordSys::Proj4.create(crs.to_s)
      from_factory = from_proj4.projected? ?
        # Shapefiles using a projected CRS always store their geometries using
        # projected coordinates.
        RGeo::Geographic.projected_factory(
          coord_sys: from_proj4, has_z_coordinate: true
        ).projection_factory :
        RGeo::Geographic.spherical_factory( # geographic? true
          coord_sys: from_proj4, has_z_coordinate: true
        )

      to_proj4 = Gis::FACTORY.coord_sys
      to_factory = Gis::FACTORY
    end

    begin
      # TODO: https://github.com/rgeo/rgeo-shapefile could use a fork and updates to dbf/rgeo
      file = RGeo::Shapefile::Reader.open(
        shpfile, factory: from_factory, allow_unsafe: true
      )
    rescue Errno::ENOENT => e
      progress_tracker.update!(
        num_records_imported: 0,
        error_messages: e.message,
        started_at: DateTime.now,
        ended_at: DateTime.now
      )
      return
    end

    r[:num_records] = file.num_records

    progress_tracker.update!(
      num_records: file.num_records,
      project_names:
        Project.where(id: projects).order(:name).pluck(:name).join(', '),
      started_at: DateTime.now
    )

    # Iterate over an index so we can record index on error and then resume
    for i in 0...file.num_records
      begin
        # This can throw GeosError even when allow_unsafe: true
        record = file[i]

        # iso a2/a3 are optional fields, we ignore them if the shapefile
        # doesn't provide valid data.
        a2 = record[iso_a2_field]
        a3 = record[iso_a3_field]
        iso_3166_a2 = validate_iso_3166_a2(a2) ? a2: nil
        iso_3166_a3 = validate_iso_3166_a3(a3) ? a3: nil

        g = new(
          name: record[name_field],
          iso_3166_a2:,
          iso_3166_a3:
        )

        if crs_is_wgs84
          record_geometry = record.geometry
        else
          # TODO: what might this raise? Might want to cap our total number of
          # errors recorded here
          record_geometry = RGeo::CoordSys::Proj4.transform(
            from_proj4,
            record.geometry,
            to_proj4,
            to_factory
          )
        end

        shape = GeographicItem.make_valid_non_anti_meridian_crossing_shape(
          record_geometry.as_text
        )

        g.build_geographic_item(
          geography: shape
        )

        save_and_clone_to_projects(g, projects, citation)
        r[:num_records_imported] = r[:num_records_imported] + 1

        if i % 5 == 0
          progress_tracker.update!(
            num_records_imported: r[:num_records_imported]
          )
        end

      rescue RGeo::Error::InvalidGeometry => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue ActiveRecord::RecordInvalid => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue RGeo::Error::GeosError => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue ActiveRecord::StatementInvalid => e
        # In known instances this is a result of something like:
        # PG::InternalError:
        #   ERROR:  lwgeom_intersection_prec: GEOS Error: TopologyException:
        #   Input geom 0 is invalid: Self-intersection at 185 5 0
        # !! Any containing transaction (from running in a spec e.g.) is now
        # aborted and open, any attempts to interact with the db will now raise
        # PG::InFailedSqlTransaction: ERROR:  current transaction is aborted,
        #   commands ignored until end of transaction block
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      end
    end

    progress_tracker.update!(
      num_records_imported: r[:num_records_imported],
      ended_at: DateTime.now
    )
  end

  def self.process_import_error(
    progress_tracker, recorder, error_index, error_message
  )
    m = "#{error_index}: '#{error_message}'"
    recorder[:error_messages] = recorder[:error_messages].present? ?
      "#{recorder[:error_messages]}; #{m}" : m

    progress_tracker.update!(
      error_messages: recorder[:error_messages]
    )
  end

  def iso_3166_a2_is_two_characters
    errors.add(:iso_3166_a2, 'must be exactly two characters') unless
      iso_3166_a2.nil? || self.class.validate_iso_3166_a2(iso_3166_a2)
  end

  def iso_3166_a3_is_three_characters
    errors.add(:iso_3166_a3, 'must be exactly three characters') unless
      iso_3166_a3.nil? || self.class.validate_iso_3166_a3(iso_3166_a3)
  end

  def destroy_geographic_item_if_orphaned
    item = GeographicItem.find_by(id: geographic_item_id_for_cleanup)
    item&.destroy! if item&.unreferenced_for_cleanup?
  end

  def capture_geographic_item_id_for_cleanup
    # Capture id before destroy so cleanup does not depend on association state
    # on the destroyed record.
    self.geographic_item_id_for_cleanup = geographic_item_id
  end
end

#iso_3166_a3String

Three alpha-character identification of country.

Returns:

  • (String)


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
# File 'app/models/gazetteer.rb', line 26

class Gazetteer < ApplicationRecord
  include Housekeeping
  include Shared::Citations
  include Shared::Notes
  include Shared::DataAttributes
  include Shared::AlternateValues
  include Shared::IsData

  attr_accessor :geographic_item_id_for_cleanup

  ALTERNATE_VALUES_FOR = [:name].freeze

  GZ_DATA_ORIGIN = 'TaxonWorks Gazetteer'.freeze

  # Buffer applied after union (+) and intersection (-) to absorb ~1e-14°
  # floating-point slivers at shared borders. Applied in geometry (degree)
  # space; 1e-7° ≈ ~11 mm at the equator.
  # See combine_rgeo_shapes for full explanation.
  COMBINE_BUFFER_DEGREES = 1e-7

  delegate :geo_object, to: :geographic_item

  belongs_to :geographic_item, inverse_of: :gazetteers

  has_many :asserted_distributions,
    as: :asserted_distribution_shape,
    inverse_of: :asserted_distribution_shape,
    dependent: :restrict_with_error

  before_validation do
    self.iso_3166_a2 = iso_3166_a2.strip.upcase if iso_3166_a2.present?
  end
  before_validation do
    self.iso_3166_a3 = iso_3166_a3.strip.upcase if iso_3166_a3.present?
  end

  validates :name, presence: true, length: {minimum: 1}
  validate :iso_3166_a2_is_two_characters
  validate :iso_3166_a3_is_three_characters

  before_destroy :capture_geographic_item_id_for_cleanup
  after_destroy :destroy_geographic_item_if_orphaned

  accepts_nested_attributes_for :geographic_item

  # @return [Hash] of the pieces of a GeoJSON 'Feature'
  def to_geo_json_feature
    to_simple_json_feature.merge(
      'properties' => {
        # cf. GeographicArea
        'shape' => {
          'type' => 'Gazetteer',
          'id' => id,
          'tag' => name
        }
      }
    )
  end

  def to_simple_json_feature
    {
      'type' => 'Feature',
      'properties' => {},
      'geometry' => geographic_item.to_geo_json
    }
  end

  # @param shapes [Hash]
  #   geojson: array of geojson feature hashes,
  #   wkt: array of wkt strings,
  #   points: array of geojson feature points
  #   ga_combine: array of GA ids
  #   gz_combine: array of GZ ids
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # Builds a GeographicItem for this gazetteer from the combined input shapes
  def build_gi_from_shapes(shapes, operation_is_union=true)
    begin
      rgeo_shape = self.class.combine_shapes_to_rgeo(shapes, operation_is_union)
    rescue TaxonWorks::Error => e
      errors.add(:base, e)
      return
    end

    build_geographic_item(
      geography: rgeo_shape
    )
  end

  # @param [Hash] hash as in build_gi_from_shapes
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # @return A single rgeo shape that is the combination of all of the input shapes
  # Raises TaxonWorks::Error on error
  def self.combine_shapes_to_rgeo(shapes, operation_is_union)
    begin
      if shapes[:geojson].blank? && shapes[:wkt].blank? &&
          shapes[:points].blank? && shapes[:ga_combine].blank? &&
          shapes[:gz_combine].blank?
        raise TaxonWorks::Error, 'No shapes provided'
      end

      leaflet_rgeo = convert_geojson_to_rgeo(shapes[:geojson])
      wkt_rgeo = convert_wkt_to_rgeo(shapes[:wkt])
      points_rgeo = convert_geojson_to_rgeo(shapes[:points])
      ga_rgeo = convert_ga_to_rgeo(shapes[:ga_combine])
      gz_rgeo = convert_gz_to_rgeo(shapes[:gz_combine])

      user_input_shapes = leaflet_rgeo + wkt_rgeo + points_rgeo

      return combine_rgeo_shapes(
        user_input_shapes + ga_rgeo + gz_rgeo, operation_is_union
      )

    # This is more specific than RGeo::Error::RgeoError
    rescue RGeo::Error::InvalidGeometry => e
      raise TaxonWorks::Error, e
    rescue RGeo::Error::RGeoError => e
      raise TaxonWorks::Error, e
    end
  end

  # @return [Array] of RGeo::Geographic::Projected*Impl
  # Raises RGeo::Error::InvalidGeometry on error
  def self.convert_geojson_to_rgeo(shapes)
    return [] if shapes.blank?

    rgeo_shapes = shapes.map do |shape|
      # Raises RGeo::Error::InvalidGeometry on error
      rgeo_shape = RGeo::GeoJSON.decode(shape, geo_factory: Gis::FACTORY)

      circle = nil
      if rgeo_shape.geometry.geometry_type.to_s == 'Point' &&
           rgeo_shape.properties['radius'].present?
        r = rgeo_shape.properties['radius']

        circle = GeographicItem.circle(rgeo_shape.geometry, r)
      end

      s = circle || rgeo_shape.geometry

      GeographicItem.make_valid_non_anti_meridian_crossing_shape(s.as_text)
    end

    rgeo_shapes
  end

  def self.convert_ga_to_rgeo(ga_ids)
    return [] if ga_ids.blank?

    GeographicArea.where(id: ga_ids).map { |ga| ga.geo_object }
  end

  def self.convert_gz_to_rgeo(gz_ids)
    return [] if gz_ids.blank?

    Gazetteer.where(id: gz_ids).map { |gz| gz.geo_object }
  end

  # @return [Array] of RGeo::Geographic::Projected*Impl
  # Raises RGeo::Error::RGeoError on error
  def self.convert_wkt_to_rgeo(wkt_shapes)
    return [] if wkt_shapes.blank?

    wkt_shapes.map do |shape|
      begin
        s = ::Gis::FACTORY.parse_wkt(shape)
      rescue RGeo::Error::RGeoError => e
        raise e.exception("Invalid WKT: #{e.message}")
      end

      GeographicItem.make_valid_non_anti_meridian_crossing_shape(s.as_text)
    end
  end

  # @param [Array] rgeo_shapes of RGeo::Geographic::Projected*Impl
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # @return [RGeo::Geographic::Projected*Impl] A single shape combining all of the
  #   input shapes
  # Raises TaxonWorks::Error on error
  def self.combine_rgeo_shapes(rgeo_shapes, operation_is_union)
    if rgeo_shapes.count == 1
      return rgeo_shapes[0]
    end

    if operation_is_union
      # Drops Z values (ST_Buffer is 2D-only).
      # Use PostGIS ST_UnaryUnion rather than RGeo's iterative .union().
      # Adjacent GA shapes store their shared border vertices with slightly
      # different coordinate values — even shapes from the same source dataset.
      # GEOS must find where these nearly-coincident edges intersect to compute
      # the union boundary. That intersection is numerically unstable, so the
      # union boundary ends up at coordinates that match neither input, leaving
      # a sliver where ST_CoveredBy(input_GA, Gaz) returns false. This causes
      # the OTU spatial filter to silently drop asserted-distribution OTUs for
      # any GA whose shape fails the coverage check.
      #
      # A post-union ST_Buffer (COMBINE_BUFFER_DEGREES) absorbs the sliver.
      # Empirical testing across 13 country-pair/group/chain combinations found
      # the minimum buffer needed was ~1e-11°; 1e-7° (~11 mm at the equator)
      # gives a comfortable margin and is imperceptible for any biodiversity
      # application.
      # ST_MakeValid is applied after ST_Buffer as a safety net — neither
      # GEOS nor PostGIS guarantees topologically valid output from geometric
      # operations.
      geom_exprs = rgeo_shapes.map { |s|
        "ST_GeomFromText(#{ActiveRecord::Base.connection.quote(s.as_text)}, 4326)"
      }
      result_wkb = ActiveRecord::Base.connection.select_value(<<~SQL)
        SELECT ST_Force3D(ST_MakeValid(ST_Buffer(ST_UnaryUnion(ST_Collect(ARRAY[#{geom_exprs.join(', ')}])), #{COMBINE_BUFFER_DEGREES})))
      SQL
      u = Gis::FACTORY.parse_wkb(result_wkb)
    else # Intersection
      # Drops Z values (ST_Buffer is 2D-only).
      # See discussion in the union case for motivation.
      geom_exprs = rgeo_shapes.map { |s|
        "ST_GeomFromText(#{ActiveRecord::Base.connection.quote(s.as_text)}, 4326)"
      }
      intersection_expr = geom_exprs.reduce { |acc, g| "ST_Intersection(#{acc}, #{g})" }
      result_wkb = ActiveRecord::Base.connection.select_value(<<~SQL)
        SELECT ST_Force3D(ST_MakeValid(ST_Buffer(#{intersection_expr}, -#{COMBINE_BUFFER_DEGREES})))
      SQL
      u = Gis::FACTORY.parse_wkb(result_wkb)
    end

    if u.empty?
      message = operation_is_union ?
        "Empty union can't be saved!" : "Empty intersection can't be saved!"
      raise TaxonWorks::Error, message
    end

    u
  end

  # @param gz [Gazetteer] Unsaved Gazetteer to save and clone from
  # @param project_ids [Array] project ids to clone gz into - gz is always
  #   saved to the current project.
  #   If saves occur in more than one project then all saves occur in a
  #   transaction.
  # @param citation [Hash] Citation object to save to each Gazetteer created
  # Raises ActiveRecord::RecordInvalid on error
  def self.save_and_clone_to_projects(gz, project_ids, citation = nil)
    project_ids.delete(Current.project_id)
    project_ids.uniq!

    if project_ids.count > 0
      Gazetteer.transaction do
        perform_save_and_clone_to_projects(gz, project_ids, citation)
      end
    else
      perform_save_and_clone_to_projects(gz, [], citation)
    end
  end

  def self.validate_iso_3166_a2(a2)
    return false if a2.blank? || a2.class.to_s != 'String'
    /\A[A-Z][A-Z]\z/.match?(a2.strip.upcase)
  end

  def self.validate_iso_3166_a3(a3)
    return false if a3.blank? || a3.class.to_s != 'String'
    /\A[A-Z][A-Z][A-Z]\z/.match?(a3.strip.upcase)
  end

  def self.import_gzs_from_shapefile(
    shapefile, citation_options, progress_tracker, projects
  )
    begin
      shp_doc = Document.find(shapefile[:shp_doc_id])
      shx_doc = Document.find(shapefile[:shx_doc_id])
      dbf_doc = Document.find(shapefile[:dbf_doc_id])
      prj_doc = Document.find(shapefile[:prj_doc_id])
      cpg_doc = shapefile[:cpg_doc_id] ?
        Document.find(shapefile[:cpg_doc_id]) : nil
    rescue ActiveRecord::RecordNotFound => e
      progress_tracker.update!(
        num_records_imported: 0,
        error_messages: e.message,
        started_at: DateTime.now,
        ended_at: DateTime.now
      )
      return
    end
    name_field = shapefile[:name_field]

    # The above shapefile files are unlikely to all be in the same directory as
    # required by rgeo-shapefile, so create symbolic links to each in a new
    # temporary folder.
    tmp_dir = Rails.root.join('tmp', 'shapefiles', SecureRandom.hex)
    FileUtils.mkdir_p(tmp_dir)

    shp_link = File.join(tmp_dir, 'shapefile.shp')
    shx_link = File.join(tmp_dir, 'shapefile.shx')
    dbf_link = File.join(tmp_dir, 'shapefile.dbf')
    prj_link = File.join(tmp_dir, 'shapefile.prj')

    FileUtils.ln_s(shp_doc.document_file.path, shp_link)
    FileUtils.ln_s(shx_doc.document_file.path, shx_link)
    FileUtils.ln_s(dbf_doc.document_file.path, dbf_link)
    FileUtils.ln_s(prj_doc.document_file.path, prj_link)

    cpg_link = ''
    if cpg_doc.present?
      cpg_link = File.join(tmp_dir, 'shapefile.cpg')
      FileUtils.ln_s(cpg_doc.document_file.path, cpg_link)
    end

    prj = File.read(prj_doc.document_file.path)
    crs = RGeo::CoordSys::CS.create_from_wkt(prj)

    citation = citation_options[:cite_gzs] ? citation_options[:citation] : nil

    process_shape_file(
      shp_link, crs, name_field,
      shapefile[:iso_a2_field], shapefile[:iso_a3_field],
      citation, progress_tracker, projects
    )

    FileUtils.rm_f([shp_link, dbf_link, shx_link, prj_link, cpg_link])
    FileUtils.rmdir(tmp_dir)
  end

  # @param used_on [String] currently `AssertedDistribution`
  # @return [Scope]
  #    the max 10 most recently used (1 week, could parameterize) gazetteers, as used `use_on`
  def self.used_recently(user_id, project_id, used_on = 'AssertedDistribution')

    case used_on
    when 'AssertedDistribution'
      t = Citation.arel_table
      # i is a select manager
      i = t.project(t['citation_object_id'], t['citation_object_type'], t['created_at']).from(t)
        .where(t['created_at'].gt(1.week.ago))
        .where(t['created_by_id'].eq(user_id))
        .where(t['project_id'].eq(project_id))
        .order(t['created_at'].desc)

      # z is a table alias
      z = i.as('recent_t')
      p = AssertedDistribution.arel_table

      AssertedDistribution
        .joins(
          Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['citation_object_id'].eq(p['id']).and(z['citation_object_type'].eq('AssertedDistribution')))  )
        )
        .where(asserted_distribution_shape_type: 'Gazetteer')
        .pluck(:asserted_distribution_shape_id).uniq
    end
  end

  # @params target [String] currently only `AssertedDistribution`
  # @return [Hash] gazetteers optimized for user selection
  def self.select_optimized(user_id, project_id, target = 'AssertedDistribution')
    target = 'AssertedDistribution' if target.blank?
    r = used_recently(user_id, project_id, target) || []
    h = {
      quick: [],
      pinboard: Gazetteer.pinned_by(user_id).where(pinboard_items: {project_id:}).to_a,
      recent: []
    }

    if r.empty?
      h[:quick] = Gazetteer.pinned_by(user_id).pinboard_inserted.where(pinboard_items: {project_id:}).to_a
    else
      case target
      when 'AssertedDistribution'
        h[:recent] = Gazetteer.where('"gazetteers"."id" IN (?)', r.first(15) ).order(:name).to_a
      end
      h[:quick] = (Gazetteer.pinned_by(user_id).pinboard_inserted.where(pinboard_items: {project_id:}).to_a +
        Gazetteer.where('"gazetteers"."id" IN (?)', r.first(5) ).order(:name).to_a).uniq
    end

    h
  end

  def geographic_name_classification
    # Return a2/a3 country?
    {}
  end

  def geographic_items
    [geographic_item]
  end

  def default_geographic_item
    geographic_item
  end

  def default_geographic_item_id
    geographic_item.id
  end

  def data_origin
    GZ_DATA_ORIGIN
  end

  private

  # @param project_ids [Array] the projects to clone to - does not include the
  # current project which gz is saved to.
  def self.perform_save_and_clone_to_projects(gz, project_ids, citation)
    if citation.present?
      gz.citations.build(citation.merge({ project_id: Current.project_id }))
    end
    gz.save!

    project_ids.each do |pr_id|
      g = gz.dup
      g.project_id = pr_id
      if citation.present?
        g.citations.build(citation.merge({ project_id: pr_id }))
      end
      g.save!
    end
  end

  def self.process_shape_file(
    shpfile, crs, name_field, iso_a2_field, iso_a3_field, citation,
    progress_tracker, projects
  )
    r = {
      num_records: 0,
      num_records_imported: 0,
      error_messages: nil,
    }

    # We'll need to transform from whatever CRS the shapefile is in to our WGS84
    # coordinates.
    if (crs_is_wgs84 = Vendor::Rgeo.coord_sys_is_wgs84?(crs))
      from_factory = Gis::FACTORY
    else
      from_proj4 = RGeo::CoordSys::Proj4.create(crs.to_s)
      from_factory = from_proj4.projected? ?
        # Shapefiles using a projected CRS always store their geometries using
        # projected coordinates.
        RGeo::Geographic.projected_factory(
          coord_sys: from_proj4, has_z_coordinate: true
        ).projection_factory :
        RGeo::Geographic.spherical_factory( # geographic? true
          coord_sys: from_proj4, has_z_coordinate: true
        )

      to_proj4 = Gis::FACTORY.coord_sys
      to_factory = Gis::FACTORY
    end

    begin
      # TODO: https://github.com/rgeo/rgeo-shapefile could use a fork and updates to dbf/rgeo
      file = RGeo::Shapefile::Reader.open(
        shpfile, factory: from_factory, allow_unsafe: true
      )
    rescue Errno::ENOENT => e
      progress_tracker.update!(
        num_records_imported: 0,
        error_messages: e.message,
        started_at: DateTime.now,
        ended_at: DateTime.now
      )
      return
    end

    r[:num_records] = file.num_records

    progress_tracker.update!(
      num_records: file.num_records,
      project_names:
        Project.where(id: projects).order(:name).pluck(:name).join(', '),
      started_at: DateTime.now
    )

    # Iterate over an index so we can record index on error and then resume
    for i in 0...file.num_records
      begin
        # This can throw GeosError even when allow_unsafe: true
        record = file[i]

        # iso a2/a3 are optional fields, we ignore them if the shapefile
        # doesn't provide valid data.
        a2 = record[iso_a2_field]
        a3 = record[iso_a3_field]
        iso_3166_a2 = validate_iso_3166_a2(a2) ? a2: nil
        iso_3166_a3 = validate_iso_3166_a3(a3) ? a3: nil

        g = new(
          name: record[name_field],
          iso_3166_a2:,
          iso_3166_a3:
        )

        if crs_is_wgs84
          record_geometry = record.geometry
        else
          # TODO: what might this raise? Might want to cap our total number of
          # errors recorded here
          record_geometry = RGeo::CoordSys::Proj4.transform(
            from_proj4,
            record.geometry,
            to_proj4,
            to_factory
          )
        end

        shape = GeographicItem.make_valid_non_anti_meridian_crossing_shape(
          record_geometry.as_text
        )

        g.build_geographic_item(
          geography: shape
        )

        save_and_clone_to_projects(g, projects, citation)
        r[:num_records_imported] = r[:num_records_imported] + 1

        if i % 5 == 0
          progress_tracker.update!(
            num_records_imported: r[:num_records_imported]
          )
        end

      rescue RGeo::Error::InvalidGeometry => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue ActiveRecord::RecordInvalid => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue RGeo::Error::GeosError => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue ActiveRecord::StatementInvalid => e
        # In known instances this is a result of something like:
        # PG::InternalError:
        #   ERROR:  lwgeom_intersection_prec: GEOS Error: TopologyException:
        #   Input geom 0 is invalid: Self-intersection at 185 5 0
        # !! Any containing transaction (from running in a spec e.g.) is now
        # aborted and open, any attempts to interact with the db will now raise
        # PG::InFailedSqlTransaction: ERROR:  current transaction is aborted,
        #   commands ignored until end of transaction block
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      end
    end

    progress_tracker.update!(
      num_records_imported: r[:num_records_imported],
      ended_at: DateTime.now
    )
  end

  def self.process_import_error(
    progress_tracker, recorder, error_index, error_message
  )
    m = "#{error_index}: '#{error_message}'"
    recorder[:error_messages] = recorder[:error_messages].present? ?
      "#{recorder[:error_messages]}; #{m}" : m

    progress_tracker.update!(
      error_messages: recorder[:error_messages]
    )
  end

  def iso_3166_a2_is_two_characters
    errors.add(:iso_3166_a2, 'must be exactly two characters') unless
      iso_3166_a2.nil? || self.class.validate_iso_3166_a2(iso_3166_a2)
  end

  def iso_3166_a3_is_three_characters
    errors.add(:iso_3166_a3, 'must be exactly three characters') unless
      iso_3166_a3.nil? || self.class.validate_iso_3166_a3(iso_3166_a3)
  end

  def destroy_geographic_item_if_orphaned
    item = GeographicItem.find_by(id: geographic_item_id_for_cleanup)
    item&.destroy! if item&.unreferenced_for_cleanup?
  end

  def capture_geographic_item_id_for_cleanup
    # Capture id before destroy so cleanup does not depend on association state
    # on the destroyed record.
    self.geographic_item_id_for_cleanup = geographic_item_id
  end
end

#nameString

The name of the gazetteer

Returns:

  • (String)


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
# File 'app/models/gazetteer.rb', line 26

class Gazetteer < ApplicationRecord
  include Housekeeping
  include Shared::Citations
  include Shared::Notes
  include Shared::DataAttributes
  include Shared::AlternateValues
  include Shared::IsData

  attr_accessor :geographic_item_id_for_cleanup

  ALTERNATE_VALUES_FOR = [:name].freeze

  GZ_DATA_ORIGIN = 'TaxonWorks Gazetteer'.freeze

  # Buffer applied after union (+) and intersection (-) to absorb ~1e-14°
  # floating-point slivers at shared borders. Applied in geometry (degree)
  # space; 1e-7° ≈ ~11 mm at the equator.
  # See combine_rgeo_shapes for full explanation.
  COMBINE_BUFFER_DEGREES = 1e-7

  delegate :geo_object, to: :geographic_item

  belongs_to :geographic_item, inverse_of: :gazetteers

  has_many :asserted_distributions,
    as: :asserted_distribution_shape,
    inverse_of: :asserted_distribution_shape,
    dependent: :restrict_with_error

  before_validation do
    self.iso_3166_a2 = iso_3166_a2.strip.upcase if iso_3166_a2.present?
  end
  before_validation do
    self.iso_3166_a3 = iso_3166_a3.strip.upcase if iso_3166_a3.present?
  end

  validates :name, presence: true, length: {minimum: 1}
  validate :iso_3166_a2_is_two_characters
  validate :iso_3166_a3_is_three_characters

  before_destroy :capture_geographic_item_id_for_cleanup
  after_destroy :destroy_geographic_item_if_orphaned

  accepts_nested_attributes_for :geographic_item

  # @return [Hash] of the pieces of a GeoJSON 'Feature'
  def to_geo_json_feature
    to_simple_json_feature.merge(
      'properties' => {
        # cf. GeographicArea
        'shape' => {
          'type' => 'Gazetteer',
          'id' => id,
          'tag' => name
        }
      }
    )
  end

  def to_simple_json_feature
    {
      'type' => 'Feature',
      'properties' => {},
      'geometry' => geographic_item.to_geo_json
    }
  end

  # @param shapes [Hash]
  #   geojson: array of geojson feature hashes,
  #   wkt: array of wkt strings,
  #   points: array of geojson feature points
  #   ga_combine: array of GA ids
  #   gz_combine: array of GZ ids
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # Builds a GeographicItem for this gazetteer from the combined input shapes
  def build_gi_from_shapes(shapes, operation_is_union=true)
    begin
      rgeo_shape = self.class.combine_shapes_to_rgeo(shapes, operation_is_union)
    rescue TaxonWorks::Error => e
      errors.add(:base, e)
      return
    end

    build_geographic_item(
      geography: rgeo_shape
    )
  end

  # @param [Hash] hash as in build_gi_from_shapes
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # @return A single rgeo shape that is the combination of all of the input shapes
  # Raises TaxonWorks::Error on error
  def self.combine_shapes_to_rgeo(shapes, operation_is_union)
    begin
      if shapes[:geojson].blank? && shapes[:wkt].blank? &&
          shapes[:points].blank? && shapes[:ga_combine].blank? &&
          shapes[:gz_combine].blank?
        raise TaxonWorks::Error, 'No shapes provided'
      end

      leaflet_rgeo = convert_geojson_to_rgeo(shapes[:geojson])
      wkt_rgeo = convert_wkt_to_rgeo(shapes[:wkt])
      points_rgeo = convert_geojson_to_rgeo(shapes[:points])
      ga_rgeo = convert_ga_to_rgeo(shapes[:ga_combine])
      gz_rgeo = convert_gz_to_rgeo(shapes[:gz_combine])

      user_input_shapes = leaflet_rgeo + wkt_rgeo + points_rgeo

      return combine_rgeo_shapes(
        user_input_shapes + ga_rgeo + gz_rgeo, operation_is_union
      )

    # This is more specific than RGeo::Error::RgeoError
    rescue RGeo::Error::InvalidGeometry => e
      raise TaxonWorks::Error, e
    rescue RGeo::Error::RGeoError => e
      raise TaxonWorks::Error, e
    end
  end

  # @return [Array] of RGeo::Geographic::Projected*Impl
  # Raises RGeo::Error::InvalidGeometry on error
  def self.convert_geojson_to_rgeo(shapes)
    return [] if shapes.blank?

    rgeo_shapes = shapes.map do |shape|
      # Raises RGeo::Error::InvalidGeometry on error
      rgeo_shape = RGeo::GeoJSON.decode(shape, geo_factory: Gis::FACTORY)

      circle = nil
      if rgeo_shape.geometry.geometry_type.to_s == 'Point' &&
           rgeo_shape.properties['radius'].present?
        r = rgeo_shape.properties['radius']

        circle = GeographicItem.circle(rgeo_shape.geometry, r)
      end

      s = circle || rgeo_shape.geometry

      GeographicItem.make_valid_non_anti_meridian_crossing_shape(s.as_text)
    end

    rgeo_shapes
  end

  def self.convert_ga_to_rgeo(ga_ids)
    return [] if ga_ids.blank?

    GeographicArea.where(id: ga_ids).map { |ga| ga.geo_object }
  end

  def self.convert_gz_to_rgeo(gz_ids)
    return [] if gz_ids.blank?

    Gazetteer.where(id: gz_ids).map { |gz| gz.geo_object }
  end

  # @return [Array] of RGeo::Geographic::Projected*Impl
  # Raises RGeo::Error::RGeoError on error
  def self.convert_wkt_to_rgeo(wkt_shapes)
    return [] if wkt_shapes.blank?

    wkt_shapes.map do |shape|
      begin
        s = ::Gis::FACTORY.parse_wkt(shape)
      rescue RGeo::Error::RGeoError => e
        raise e.exception("Invalid WKT: #{e.message}")
      end

      GeographicItem.make_valid_non_anti_meridian_crossing_shape(s.as_text)
    end
  end

  # @param [Array] rgeo_shapes of RGeo::Geographic::Projected*Impl
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # @return [RGeo::Geographic::Projected*Impl] A single shape combining all of the
  #   input shapes
  # Raises TaxonWorks::Error on error
  def self.combine_rgeo_shapes(rgeo_shapes, operation_is_union)
    if rgeo_shapes.count == 1
      return rgeo_shapes[0]
    end

    if operation_is_union
      # Drops Z values (ST_Buffer is 2D-only).
      # Use PostGIS ST_UnaryUnion rather than RGeo's iterative .union().
      # Adjacent GA shapes store their shared border vertices with slightly
      # different coordinate values — even shapes from the same source dataset.
      # GEOS must find where these nearly-coincident edges intersect to compute
      # the union boundary. That intersection is numerically unstable, so the
      # union boundary ends up at coordinates that match neither input, leaving
      # a sliver where ST_CoveredBy(input_GA, Gaz) returns false. This causes
      # the OTU spatial filter to silently drop asserted-distribution OTUs for
      # any GA whose shape fails the coverage check.
      #
      # A post-union ST_Buffer (COMBINE_BUFFER_DEGREES) absorbs the sliver.
      # Empirical testing across 13 country-pair/group/chain combinations found
      # the minimum buffer needed was ~1e-11°; 1e-7° (~11 mm at the equator)
      # gives a comfortable margin and is imperceptible for any biodiversity
      # application.
      # ST_MakeValid is applied after ST_Buffer as a safety net — neither
      # GEOS nor PostGIS guarantees topologically valid output from geometric
      # operations.
      geom_exprs = rgeo_shapes.map { |s|
        "ST_GeomFromText(#{ActiveRecord::Base.connection.quote(s.as_text)}, 4326)"
      }
      result_wkb = ActiveRecord::Base.connection.select_value(<<~SQL)
        SELECT ST_Force3D(ST_MakeValid(ST_Buffer(ST_UnaryUnion(ST_Collect(ARRAY[#{geom_exprs.join(', ')}])), #{COMBINE_BUFFER_DEGREES})))
      SQL
      u = Gis::FACTORY.parse_wkb(result_wkb)
    else # Intersection
      # Drops Z values (ST_Buffer is 2D-only).
      # See discussion in the union case for motivation.
      geom_exprs = rgeo_shapes.map { |s|
        "ST_GeomFromText(#{ActiveRecord::Base.connection.quote(s.as_text)}, 4326)"
      }
      intersection_expr = geom_exprs.reduce { |acc, g| "ST_Intersection(#{acc}, #{g})" }
      result_wkb = ActiveRecord::Base.connection.select_value(<<~SQL)
        SELECT ST_Force3D(ST_MakeValid(ST_Buffer(#{intersection_expr}, -#{COMBINE_BUFFER_DEGREES})))
      SQL
      u = Gis::FACTORY.parse_wkb(result_wkb)
    end

    if u.empty?
      message = operation_is_union ?
        "Empty union can't be saved!" : "Empty intersection can't be saved!"
      raise TaxonWorks::Error, message
    end

    u
  end

  # @param gz [Gazetteer] Unsaved Gazetteer to save and clone from
  # @param project_ids [Array] project ids to clone gz into - gz is always
  #   saved to the current project.
  #   If saves occur in more than one project then all saves occur in a
  #   transaction.
  # @param citation [Hash] Citation object to save to each Gazetteer created
  # Raises ActiveRecord::RecordInvalid on error
  def self.save_and_clone_to_projects(gz, project_ids, citation = nil)
    project_ids.delete(Current.project_id)
    project_ids.uniq!

    if project_ids.count > 0
      Gazetteer.transaction do
        perform_save_and_clone_to_projects(gz, project_ids, citation)
      end
    else
      perform_save_and_clone_to_projects(gz, [], citation)
    end
  end

  def self.validate_iso_3166_a2(a2)
    return false if a2.blank? || a2.class.to_s != 'String'
    /\A[A-Z][A-Z]\z/.match?(a2.strip.upcase)
  end

  def self.validate_iso_3166_a3(a3)
    return false if a3.blank? || a3.class.to_s != 'String'
    /\A[A-Z][A-Z][A-Z]\z/.match?(a3.strip.upcase)
  end

  def self.import_gzs_from_shapefile(
    shapefile, citation_options, progress_tracker, projects
  )
    begin
      shp_doc = Document.find(shapefile[:shp_doc_id])
      shx_doc = Document.find(shapefile[:shx_doc_id])
      dbf_doc = Document.find(shapefile[:dbf_doc_id])
      prj_doc = Document.find(shapefile[:prj_doc_id])
      cpg_doc = shapefile[:cpg_doc_id] ?
        Document.find(shapefile[:cpg_doc_id]) : nil
    rescue ActiveRecord::RecordNotFound => e
      progress_tracker.update!(
        num_records_imported: 0,
        error_messages: e.message,
        started_at: DateTime.now,
        ended_at: DateTime.now
      )
      return
    end
    name_field = shapefile[:name_field]

    # The above shapefile files are unlikely to all be in the same directory as
    # required by rgeo-shapefile, so create symbolic links to each in a new
    # temporary folder.
    tmp_dir = Rails.root.join('tmp', 'shapefiles', SecureRandom.hex)
    FileUtils.mkdir_p(tmp_dir)

    shp_link = File.join(tmp_dir, 'shapefile.shp')
    shx_link = File.join(tmp_dir, 'shapefile.shx')
    dbf_link = File.join(tmp_dir, 'shapefile.dbf')
    prj_link = File.join(tmp_dir, 'shapefile.prj')

    FileUtils.ln_s(shp_doc.document_file.path, shp_link)
    FileUtils.ln_s(shx_doc.document_file.path, shx_link)
    FileUtils.ln_s(dbf_doc.document_file.path, dbf_link)
    FileUtils.ln_s(prj_doc.document_file.path, prj_link)

    cpg_link = ''
    if cpg_doc.present?
      cpg_link = File.join(tmp_dir, 'shapefile.cpg')
      FileUtils.ln_s(cpg_doc.document_file.path, cpg_link)
    end

    prj = File.read(prj_doc.document_file.path)
    crs = RGeo::CoordSys::CS.create_from_wkt(prj)

    citation = citation_options[:cite_gzs] ? citation_options[:citation] : nil

    process_shape_file(
      shp_link, crs, name_field,
      shapefile[:iso_a2_field], shapefile[:iso_a3_field],
      citation, progress_tracker, projects
    )

    FileUtils.rm_f([shp_link, dbf_link, shx_link, prj_link, cpg_link])
    FileUtils.rmdir(tmp_dir)
  end

  # @param used_on [String] currently `AssertedDistribution`
  # @return [Scope]
  #    the max 10 most recently used (1 week, could parameterize) gazetteers, as used `use_on`
  def self.used_recently(user_id, project_id, used_on = 'AssertedDistribution')

    case used_on
    when 'AssertedDistribution'
      t = Citation.arel_table
      # i is a select manager
      i = t.project(t['citation_object_id'], t['citation_object_type'], t['created_at']).from(t)
        .where(t['created_at'].gt(1.week.ago))
        .where(t['created_by_id'].eq(user_id))
        .where(t['project_id'].eq(project_id))
        .order(t['created_at'].desc)

      # z is a table alias
      z = i.as('recent_t')
      p = AssertedDistribution.arel_table

      AssertedDistribution
        .joins(
          Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['citation_object_id'].eq(p['id']).and(z['citation_object_type'].eq('AssertedDistribution')))  )
        )
        .where(asserted_distribution_shape_type: 'Gazetteer')
        .pluck(:asserted_distribution_shape_id).uniq
    end
  end

  # @params target [String] currently only `AssertedDistribution`
  # @return [Hash] gazetteers optimized for user selection
  def self.select_optimized(user_id, project_id, target = 'AssertedDistribution')
    target = 'AssertedDistribution' if target.blank?
    r = used_recently(user_id, project_id, target) || []
    h = {
      quick: [],
      pinboard: Gazetteer.pinned_by(user_id).where(pinboard_items: {project_id:}).to_a,
      recent: []
    }

    if r.empty?
      h[:quick] = Gazetteer.pinned_by(user_id).pinboard_inserted.where(pinboard_items: {project_id:}).to_a
    else
      case target
      when 'AssertedDistribution'
        h[:recent] = Gazetteer.where('"gazetteers"."id" IN (?)', r.first(15) ).order(:name).to_a
      end
      h[:quick] = (Gazetteer.pinned_by(user_id).pinboard_inserted.where(pinboard_items: {project_id:}).to_a +
        Gazetteer.where('"gazetteers"."id" IN (?)', r.first(5) ).order(:name).to_a).uniq
    end

    h
  end

  def geographic_name_classification
    # Return a2/a3 country?
    {}
  end

  def geographic_items
    [geographic_item]
  end

  def default_geographic_item
    geographic_item
  end

  def default_geographic_item_id
    geographic_item.id
  end

  def data_origin
    GZ_DATA_ORIGIN
  end

  private

  # @param project_ids [Array] the projects to clone to - does not include the
  # current project which gz is saved to.
  def self.perform_save_and_clone_to_projects(gz, project_ids, citation)
    if citation.present?
      gz.citations.build(citation.merge({ project_id: Current.project_id }))
    end
    gz.save!

    project_ids.each do |pr_id|
      g = gz.dup
      g.project_id = pr_id
      if citation.present?
        g.citations.build(citation.merge({ project_id: pr_id }))
      end
      g.save!
    end
  end

  def self.process_shape_file(
    shpfile, crs, name_field, iso_a2_field, iso_a3_field, citation,
    progress_tracker, projects
  )
    r = {
      num_records: 0,
      num_records_imported: 0,
      error_messages: nil,
    }

    # We'll need to transform from whatever CRS the shapefile is in to our WGS84
    # coordinates.
    if (crs_is_wgs84 = Vendor::Rgeo.coord_sys_is_wgs84?(crs))
      from_factory = Gis::FACTORY
    else
      from_proj4 = RGeo::CoordSys::Proj4.create(crs.to_s)
      from_factory = from_proj4.projected? ?
        # Shapefiles using a projected CRS always store their geometries using
        # projected coordinates.
        RGeo::Geographic.projected_factory(
          coord_sys: from_proj4, has_z_coordinate: true
        ).projection_factory :
        RGeo::Geographic.spherical_factory( # geographic? true
          coord_sys: from_proj4, has_z_coordinate: true
        )

      to_proj4 = Gis::FACTORY.coord_sys
      to_factory = Gis::FACTORY
    end

    begin
      # TODO: https://github.com/rgeo/rgeo-shapefile could use a fork and updates to dbf/rgeo
      file = RGeo::Shapefile::Reader.open(
        shpfile, factory: from_factory, allow_unsafe: true
      )
    rescue Errno::ENOENT => e
      progress_tracker.update!(
        num_records_imported: 0,
        error_messages: e.message,
        started_at: DateTime.now,
        ended_at: DateTime.now
      )
      return
    end

    r[:num_records] = file.num_records

    progress_tracker.update!(
      num_records: file.num_records,
      project_names:
        Project.where(id: projects).order(:name).pluck(:name).join(', '),
      started_at: DateTime.now
    )

    # Iterate over an index so we can record index on error and then resume
    for i in 0...file.num_records
      begin
        # This can throw GeosError even when allow_unsafe: true
        record = file[i]

        # iso a2/a3 are optional fields, we ignore them if the shapefile
        # doesn't provide valid data.
        a2 = record[iso_a2_field]
        a3 = record[iso_a3_field]
        iso_3166_a2 = validate_iso_3166_a2(a2) ? a2: nil
        iso_3166_a3 = validate_iso_3166_a3(a3) ? a3: nil

        g = new(
          name: record[name_field],
          iso_3166_a2:,
          iso_3166_a3:
        )

        if crs_is_wgs84
          record_geometry = record.geometry
        else
          # TODO: what might this raise? Might want to cap our total number of
          # errors recorded here
          record_geometry = RGeo::CoordSys::Proj4.transform(
            from_proj4,
            record.geometry,
            to_proj4,
            to_factory
          )
        end

        shape = GeographicItem.make_valid_non_anti_meridian_crossing_shape(
          record_geometry.as_text
        )

        g.build_geographic_item(
          geography: shape
        )

        save_and_clone_to_projects(g, projects, citation)
        r[:num_records_imported] = r[:num_records_imported] + 1

        if i % 5 == 0
          progress_tracker.update!(
            num_records_imported: r[:num_records_imported]
          )
        end

      rescue RGeo::Error::InvalidGeometry => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue ActiveRecord::RecordInvalid => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue RGeo::Error::GeosError => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue ActiveRecord::StatementInvalid => e
        # In known instances this is a result of something like:
        # PG::InternalError:
        #   ERROR:  lwgeom_intersection_prec: GEOS Error: TopologyException:
        #   Input geom 0 is invalid: Self-intersection at 185 5 0
        # !! Any containing transaction (from running in a spec e.g.) is now
        # aborted and open, any attempts to interact with the db will now raise
        # PG::InFailedSqlTransaction: ERROR:  current transaction is aborted,
        #   commands ignored until end of transaction block
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      end
    end

    progress_tracker.update!(
      num_records_imported: r[:num_records_imported],
      ended_at: DateTime.now
    )
  end

  def self.process_import_error(
    progress_tracker, recorder, error_index, error_message
  )
    m = "#{error_index}: '#{error_message}'"
    recorder[:error_messages] = recorder[:error_messages].present? ?
      "#{recorder[:error_messages]}; #{m}" : m

    progress_tracker.update!(
      error_messages: recorder[:error_messages]
    )
  end

  def iso_3166_a2_is_two_characters
    errors.add(:iso_3166_a2, 'must be exactly two characters') unless
      iso_3166_a2.nil? || self.class.validate_iso_3166_a2(iso_3166_a2)
  end

  def iso_3166_a3_is_three_characters
    errors.add(:iso_3166_a3, 'must be exactly three characters') unless
      iso_3166_a3.nil? || self.class.validate_iso_3166_a3(iso_3166_a3)
  end

  def destroy_geographic_item_if_orphaned
    item = GeographicItem.find_by(id: geographic_item_id_for_cleanup)
    item&.destroy! if item&.unreferenced_for_cleanup?
  end

  def capture_geographic_item_id_for_cleanup
    # Capture id before destroy so cleanup does not depend on association state
    # on the destroyed record.
    self.geographic_item_id_for_cleanup = geographic_item_id
  end
end

#project_idInteger

The project ID

Returns:

  • (Integer)


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
# File 'app/models/gazetteer.rb', line 26

class Gazetteer < ApplicationRecord
  include Housekeeping
  include Shared::Citations
  include Shared::Notes
  include Shared::DataAttributes
  include Shared::AlternateValues
  include Shared::IsData

  attr_accessor :geographic_item_id_for_cleanup

  ALTERNATE_VALUES_FOR = [:name].freeze

  GZ_DATA_ORIGIN = 'TaxonWorks Gazetteer'.freeze

  # Buffer applied after union (+) and intersection (-) to absorb ~1e-14°
  # floating-point slivers at shared borders. Applied in geometry (degree)
  # space; 1e-7° ≈ ~11 mm at the equator.
  # See combine_rgeo_shapes for full explanation.
  COMBINE_BUFFER_DEGREES = 1e-7

  delegate :geo_object, to: :geographic_item

  belongs_to :geographic_item, inverse_of: :gazetteers

  has_many :asserted_distributions,
    as: :asserted_distribution_shape,
    inverse_of: :asserted_distribution_shape,
    dependent: :restrict_with_error

  before_validation do
    self.iso_3166_a2 = iso_3166_a2.strip.upcase if iso_3166_a2.present?
  end
  before_validation do
    self.iso_3166_a3 = iso_3166_a3.strip.upcase if iso_3166_a3.present?
  end

  validates :name, presence: true, length: {minimum: 1}
  validate :iso_3166_a2_is_two_characters
  validate :iso_3166_a3_is_three_characters

  before_destroy :capture_geographic_item_id_for_cleanup
  after_destroy :destroy_geographic_item_if_orphaned

  accepts_nested_attributes_for :geographic_item

  # @return [Hash] of the pieces of a GeoJSON 'Feature'
  def to_geo_json_feature
    to_simple_json_feature.merge(
      'properties' => {
        # cf. GeographicArea
        'shape' => {
          'type' => 'Gazetteer',
          'id' => id,
          'tag' => name
        }
      }
    )
  end

  def to_simple_json_feature
    {
      'type' => 'Feature',
      'properties' => {},
      'geometry' => geographic_item.to_geo_json
    }
  end

  # @param shapes [Hash]
  #   geojson: array of geojson feature hashes,
  #   wkt: array of wkt strings,
  #   points: array of geojson feature points
  #   ga_combine: array of GA ids
  #   gz_combine: array of GZ ids
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # Builds a GeographicItem for this gazetteer from the combined input shapes
  def build_gi_from_shapes(shapes, operation_is_union=true)
    begin
      rgeo_shape = self.class.combine_shapes_to_rgeo(shapes, operation_is_union)
    rescue TaxonWorks::Error => e
      errors.add(:base, e)
      return
    end

    build_geographic_item(
      geography: rgeo_shape
    )
  end

  # @param [Hash] hash as in build_gi_from_shapes
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # @return A single rgeo shape that is the combination of all of the input shapes
  # Raises TaxonWorks::Error on error
  def self.combine_shapes_to_rgeo(shapes, operation_is_union)
    begin
      if shapes[:geojson].blank? && shapes[:wkt].blank? &&
          shapes[:points].blank? && shapes[:ga_combine].blank? &&
          shapes[:gz_combine].blank?
        raise TaxonWorks::Error, 'No shapes provided'
      end

      leaflet_rgeo = convert_geojson_to_rgeo(shapes[:geojson])
      wkt_rgeo = convert_wkt_to_rgeo(shapes[:wkt])
      points_rgeo = convert_geojson_to_rgeo(shapes[:points])
      ga_rgeo = convert_ga_to_rgeo(shapes[:ga_combine])
      gz_rgeo = convert_gz_to_rgeo(shapes[:gz_combine])

      user_input_shapes = leaflet_rgeo + wkt_rgeo + points_rgeo

      return combine_rgeo_shapes(
        user_input_shapes + ga_rgeo + gz_rgeo, operation_is_union
      )

    # This is more specific than RGeo::Error::RgeoError
    rescue RGeo::Error::InvalidGeometry => e
      raise TaxonWorks::Error, e
    rescue RGeo::Error::RGeoError => e
      raise TaxonWorks::Error, e
    end
  end

  # @return [Array] of RGeo::Geographic::Projected*Impl
  # Raises RGeo::Error::InvalidGeometry on error
  def self.convert_geojson_to_rgeo(shapes)
    return [] if shapes.blank?

    rgeo_shapes = shapes.map do |shape|
      # Raises RGeo::Error::InvalidGeometry on error
      rgeo_shape = RGeo::GeoJSON.decode(shape, geo_factory: Gis::FACTORY)

      circle = nil
      if rgeo_shape.geometry.geometry_type.to_s == 'Point' &&
           rgeo_shape.properties['radius'].present?
        r = rgeo_shape.properties['radius']

        circle = GeographicItem.circle(rgeo_shape.geometry, r)
      end

      s = circle || rgeo_shape.geometry

      GeographicItem.make_valid_non_anti_meridian_crossing_shape(s.as_text)
    end

    rgeo_shapes
  end

  def self.convert_ga_to_rgeo(ga_ids)
    return [] if ga_ids.blank?

    GeographicArea.where(id: ga_ids).map { |ga| ga.geo_object }
  end

  def self.convert_gz_to_rgeo(gz_ids)
    return [] if gz_ids.blank?

    Gazetteer.where(id: gz_ids).map { |gz| gz.geo_object }
  end

  # @return [Array] of RGeo::Geographic::Projected*Impl
  # Raises RGeo::Error::RGeoError on error
  def self.convert_wkt_to_rgeo(wkt_shapes)
    return [] if wkt_shapes.blank?

    wkt_shapes.map do |shape|
      begin
        s = ::Gis::FACTORY.parse_wkt(shape)
      rescue RGeo::Error::RGeoError => e
        raise e.exception("Invalid WKT: #{e.message}")
      end

      GeographicItem.make_valid_non_anti_meridian_crossing_shape(s.as_text)
    end
  end

  # @param [Array] rgeo_shapes of RGeo::Geographic::Projected*Impl
  # @param operation_is_union [Boolean] Union if true, intersection if false
  # @return [RGeo::Geographic::Projected*Impl] A single shape combining all of the
  #   input shapes
  # Raises TaxonWorks::Error on error
  def self.combine_rgeo_shapes(rgeo_shapes, operation_is_union)
    if rgeo_shapes.count == 1
      return rgeo_shapes[0]
    end

    if operation_is_union
      # Drops Z values (ST_Buffer is 2D-only).
      # Use PostGIS ST_UnaryUnion rather than RGeo's iterative .union().
      # Adjacent GA shapes store their shared border vertices with slightly
      # different coordinate values — even shapes from the same source dataset.
      # GEOS must find where these nearly-coincident edges intersect to compute
      # the union boundary. That intersection is numerically unstable, so the
      # union boundary ends up at coordinates that match neither input, leaving
      # a sliver where ST_CoveredBy(input_GA, Gaz) returns false. This causes
      # the OTU spatial filter to silently drop asserted-distribution OTUs for
      # any GA whose shape fails the coverage check.
      #
      # A post-union ST_Buffer (COMBINE_BUFFER_DEGREES) absorbs the sliver.
      # Empirical testing across 13 country-pair/group/chain combinations found
      # the minimum buffer needed was ~1e-11°; 1e-7° (~11 mm at the equator)
      # gives a comfortable margin and is imperceptible for any biodiversity
      # application.
      # ST_MakeValid is applied after ST_Buffer as a safety net — neither
      # GEOS nor PostGIS guarantees topologically valid output from geometric
      # operations.
      geom_exprs = rgeo_shapes.map { |s|
        "ST_GeomFromText(#{ActiveRecord::Base.connection.quote(s.as_text)}, 4326)"
      }
      result_wkb = ActiveRecord::Base.connection.select_value(<<~SQL)
        SELECT ST_Force3D(ST_MakeValid(ST_Buffer(ST_UnaryUnion(ST_Collect(ARRAY[#{geom_exprs.join(', ')}])), #{COMBINE_BUFFER_DEGREES})))
      SQL
      u = Gis::FACTORY.parse_wkb(result_wkb)
    else # Intersection
      # Drops Z values (ST_Buffer is 2D-only).
      # See discussion in the union case for motivation.
      geom_exprs = rgeo_shapes.map { |s|
        "ST_GeomFromText(#{ActiveRecord::Base.connection.quote(s.as_text)}, 4326)"
      }
      intersection_expr = geom_exprs.reduce { |acc, g| "ST_Intersection(#{acc}, #{g})" }
      result_wkb = ActiveRecord::Base.connection.select_value(<<~SQL)
        SELECT ST_Force3D(ST_MakeValid(ST_Buffer(#{intersection_expr}, -#{COMBINE_BUFFER_DEGREES})))
      SQL
      u = Gis::FACTORY.parse_wkb(result_wkb)
    end

    if u.empty?
      message = operation_is_union ?
        "Empty union can't be saved!" : "Empty intersection can't be saved!"
      raise TaxonWorks::Error, message
    end

    u
  end

  # @param gz [Gazetteer] Unsaved Gazetteer to save and clone from
  # @param project_ids [Array] project ids to clone gz into - gz is always
  #   saved to the current project.
  #   If saves occur in more than one project then all saves occur in a
  #   transaction.
  # @param citation [Hash] Citation object to save to each Gazetteer created
  # Raises ActiveRecord::RecordInvalid on error
  def self.save_and_clone_to_projects(gz, project_ids, citation = nil)
    project_ids.delete(Current.project_id)
    project_ids.uniq!

    if project_ids.count > 0
      Gazetteer.transaction do
        perform_save_and_clone_to_projects(gz, project_ids, citation)
      end
    else
      perform_save_and_clone_to_projects(gz, [], citation)
    end
  end

  def self.validate_iso_3166_a2(a2)
    return false if a2.blank? || a2.class.to_s != 'String'
    /\A[A-Z][A-Z]\z/.match?(a2.strip.upcase)
  end

  def self.validate_iso_3166_a3(a3)
    return false if a3.blank? || a3.class.to_s != 'String'
    /\A[A-Z][A-Z][A-Z]\z/.match?(a3.strip.upcase)
  end

  def self.import_gzs_from_shapefile(
    shapefile, citation_options, progress_tracker, projects
  )
    begin
      shp_doc = Document.find(shapefile[:shp_doc_id])
      shx_doc = Document.find(shapefile[:shx_doc_id])
      dbf_doc = Document.find(shapefile[:dbf_doc_id])
      prj_doc = Document.find(shapefile[:prj_doc_id])
      cpg_doc = shapefile[:cpg_doc_id] ?
        Document.find(shapefile[:cpg_doc_id]) : nil
    rescue ActiveRecord::RecordNotFound => e
      progress_tracker.update!(
        num_records_imported: 0,
        error_messages: e.message,
        started_at: DateTime.now,
        ended_at: DateTime.now
      )
      return
    end
    name_field = shapefile[:name_field]

    # The above shapefile files are unlikely to all be in the same directory as
    # required by rgeo-shapefile, so create symbolic links to each in a new
    # temporary folder.
    tmp_dir = Rails.root.join('tmp', 'shapefiles', SecureRandom.hex)
    FileUtils.mkdir_p(tmp_dir)

    shp_link = File.join(tmp_dir, 'shapefile.shp')
    shx_link = File.join(tmp_dir, 'shapefile.shx')
    dbf_link = File.join(tmp_dir, 'shapefile.dbf')
    prj_link = File.join(tmp_dir, 'shapefile.prj')

    FileUtils.ln_s(shp_doc.document_file.path, shp_link)
    FileUtils.ln_s(shx_doc.document_file.path, shx_link)
    FileUtils.ln_s(dbf_doc.document_file.path, dbf_link)
    FileUtils.ln_s(prj_doc.document_file.path, prj_link)

    cpg_link = ''
    if cpg_doc.present?
      cpg_link = File.join(tmp_dir, 'shapefile.cpg')
      FileUtils.ln_s(cpg_doc.document_file.path, cpg_link)
    end

    prj = File.read(prj_doc.document_file.path)
    crs = RGeo::CoordSys::CS.create_from_wkt(prj)

    citation = citation_options[:cite_gzs] ? citation_options[:citation] : nil

    process_shape_file(
      shp_link, crs, name_field,
      shapefile[:iso_a2_field], shapefile[:iso_a3_field],
      citation, progress_tracker, projects
    )

    FileUtils.rm_f([shp_link, dbf_link, shx_link, prj_link, cpg_link])
    FileUtils.rmdir(tmp_dir)
  end

  # @param used_on [String] currently `AssertedDistribution`
  # @return [Scope]
  #    the max 10 most recently used (1 week, could parameterize) gazetteers, as used `use_on`
  def self.used_recently(user_id, project_id, used_on = 'AssertedDistribution')

    case used_on
    when 'AssertedDistribution'
      t = Citation.arel_table
      # i is a select manager
      i = t.project(t['citation_object_id'], t['citation_object_type'], t['created_at']).from(t)
        .where(t['created_at'].gt(1.week.ago))
        .where(t['created_by_id'].eq(user_id))
        .where(t['project_id'].eq(project_id))
        .order(t['created_at'].desc)

      # z is a table alias
      z = i.as('recent_t')
      p = AssertedDistribution.arel_table

      AssertedDistribution
        .joins(
          Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['citation_object_id'].eq(p['id']).and(z['citation_object_type'].eq('AssertedDistribution')))  )
        )
        .where(asserted_distribution_shape_type: 'Gazetteer')
        .pluck(:asserted_distribution_shape_id).uniq
    end
  end

  # @params target [String] currently only `AssertedDistribution`
  # @return [Hash] gazetteers optimized for user selection
  def self.select_optimized(user_id, project_id, target = 'AssertedDistribution')
    target = 'AssertedDistribution' if target.blank?
    r = used_recently(user_id, project_id, target) || []
    h = {
      quick: [],
      pinboard: Gazetteer.pinned_by(user_id).where(pinboard_items: {project_id:}).to_a,
      recent: []
    }

    if r.empty?
      h[:quick] = Gazetteer.pinned_by(user_id).pinboard_inserted.where(pinboard_items: {project_id:}).to_a
    else
      case target
      when 'AssertedDistribution'
        h[:recent] = Gazetteer.where('"gazetteers"."id" IN (?)', r.first(15) ).order(:name).to_a
      end
      h[:quick] = (Gazetteer.pinned_by(user_id).pinboard_inserted.where(pinboard_items: {project_id:}).to_a +
        Gazetteer.where('"gazetteers"."id" IN (?)', r.first(5) ).order(:name).to_a).uniq
    end

    h
  end

  def geographic_name_classification
    # Return a2/a3 country?
    {}
  end

  def geographic_items
    [geographic_item]
  end

  def default_geographic_item
    geographic_item
  end

  def default_geographic_item_id
    geographic_item.id
  end

  def data_origin
    GZ_DATA_ORIGIN
  end

  private

  # @param project_ids [Array] the projects to clone to - does not include the
  # current project which gz is saved to.
  def self.perform_save_and_clone_to_projects(gz, project_ids, citation)
    if citation.present?
      gz.citations.build(citation.merge({ project_id: Current.project_id }))
    end
    gz.save!

    project_ids.each do |pr_id|
      g = gz.dup
      g.project_id = pr_id
      if citation.present?
        g.citations.build(citation.merge({ project_id: pr_id }))
      end
      g.save!
    end
  end

  def self.process_shape_file(
    shpfile, crs, name_field, iso_a2_field, iso_a3_field, citation,
    progress_tracker, projects
  )
    r = {
      num_records: 0,
      num_records_imported: 0,
      error_messages: nil,
    }

    # We'll need to transform from whatever CRS the shapefile is in to our WGS84
    # coordinates.
    if (crs_is_wgs84 = Vendor::Rgeo.coord_sys_is_wgs84?(crs))
      from_factory = Gis::FACTORY
    else
      from_proj4 = RGeo::CoordSys::Proj4.create(crs.to_s)
      from_factory = from_proj4.projected? ?
        # Shapefiles using a projected CRS always store their geometries using
        # projected coordinates.
        RGeo::Geographic.projected_factory(
          coord_sys: from_proj4, has_z_coordinate: true
        ).projection_factory :
        RGeo::Geographic.spherical_factory( # geographic? true
          coord_sys: from_proj4, has_z_coordinate: true
        )

      to_proj4 = Gis::FACTORY.coord_sys
      to_factory = Gis::FACTORY
    end

    begin
      # TODO: https://github.com/rgeo/rgeo-shapefile could use a fork and updates to dbf/rgeo
      file = RGeo::Shapefile::Reader.open(
        shpfile, factory: from_factory, allow_unsafe: true
      )
    rescue Errno::ENOENT => e
      progress_tracker.update!(
        num_records_imported: 0,
        error_messages: e.message,
        started_at: DateTime.now,
        ended_at: DateTime.now
      )
      return
    end

    r[:num_records] = file.num_records

    progress_tracker.update!(
      num_records: file.num_records,
      project_names:
        Project.where(id: projects).order(:name).pluck(:name).join(', '),
      started_at: DateTime.now
    )

    # Iterate over an index so we can record index on error and then resume
    for i in 0...file.num_records
      begin
        # This can throw GeosError even when allow_unsafe: true
        record = file[i]

        # iso a2/a3 are optional fields, we ignore them if the shapefile
        # doesn't provide valid data.
        a2 = record[iso_a2_field]
        a3 = record[iso_a3_field]
        iso_3166_a2 = validate_iso_3166_a2(a2) ? a2: nil
        iso_3166_a3 = validate_iso_3166_a3(a3) ? a3: nil

        g = new(
          name: record[name_field],
          iso_3166_a2:,
          iso_3166_a3:
        )

        if crs_is_wgs84
          record_geometry = record.geometry
        else
          # TODO: what might this raise? Might want to cap our total number of
          # errors recorded here
          record_geometry = RGeo::CoordSys::Proj4.transform(
            from_proj4,
            record.geometry,
            to_proj4,
            to_factory
          )
        end

        shape = GeographicItem.make_valid_non_anti_meridian_crossing_shape(
          record_geometry.as_text
        )

        g.build_geographic_item(
          geography: shape
        )

        save_and_clone_to_projects(g, projects, citation)
        r[:num_records_imported] = r[:num_records_imported] + 1

        if i % 5 == 0
          progress_tracker.update!(
            num_records_imported: r[:num_records_imported]
          )
        end

      rescue RGeo::Error::InvalidGeometry => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue ActiveRecord::RecordInvalid => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue RGeo::Error::GeosError => e
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      rescue ActiveRecord::StatementInvalid => e
        # In known instances this is a result of something like:
        # PG::InternalError:
        #   ERROR:  lwgeom_intersection_prec: GEOS Error: TopologyException:
        #   Input geom 0 is invalid: Self-intersection at 185 5 0
        # !! Any containing transaction (from running in a spec e.g.) is now
        # aborted and open, any attempts to interact with the db will now raise
        # PG::InFailedSqlTransaction: ERROR:  current transaction is aborted,
        #   commands ignored until end of transaction block
        process_import_error(progress_tracker, r, i + 1, e.to_s)
      end
    end

    progress_tracker.update!(
      num_records_imported: r[:num_records_imported],
      ended_at: DateTime.now
    )
  end

  def self.process_import_error(
    progress_tracker, recorder, error_index, error_message
  )
    m = "#{error_index}: '#{error_message}'"
    recorder[:error_messages] = recorder[:error_messages].present? ?
      "#{recorder[:error_messages]}; #{m}" : m

    progress_tracker.update!(
      error_messages: recorder[:error_messages]
    )
  end

  def iso_3166_a2_is_two_characters
    errors.add(:iso_3166_a2, 'must be exactly two characters') unless
      iso_3166_a2.nil? || self.class.validate_iso_3166_a2(iso_3166_a2)
  end

  def iso_3166_a3_is_three_characters
    errors.add(:iso_3166_a3, 'must be exactly three characters') unless
      iso_3166_a3.nil? || self.class.validate_iso_3166_a3(iso_3166_a3)
  end

  def destroy_geographic_item_if_orphaned
    item = GeographicItem.find_by(id: geographic_item_id_for_cleanup)
    item&.destroy! if item&.unreferenced_for_cleanup?
  end

  def capture_geographic_item_id_for_cleanup
    # Capture id before destroy so cleanup does not depend on association state
    # on the destroyed record.
    self.geographic_item_id_for_cleanup = geographic_item_id
  end
end

Class Method Details

.combine_rgeo_shapes(rgeo_shapes, operation_is_union) ⇒ RGeo::Geographic::Projected*Impl

Raises TaxonWorks::Error on error

Parameters:

  • rgeo_shapes (Array)

    of RGeo::Geographic::Projected*Impl

  • operation_is_union (Boolean)

    Union if true, intersection if false

Returns:

  • (RGeo::Geographic::Projected*Impl)

    A single shape combining all of the input shapes



204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
# File 'app/models/gazetteer.rb', line 204

def self.combine_rgeo_shapes(rgeo_shapes, operation_is_union)
  if rgeo_shapes.count == 1
    return rgeo_shapes[0]
  end

  if operation_is_union
    # Drops Z values (ST_Buffer is 2D-only).
    # Use PostGIS ST_UnaryUnion rather than RGeo's iterative .union().
    # Adjacent GA shapes store their shared border vertices with slightly
    # different coordinate values — even shapes from the same source dataset.
    # GEOS must find where these nearly-coincident edges intersect to compute
    # the union boundary. That intersection is numerically unstable, so the
    # union boundary ends up at coordinates that match neither input, leaving
    # a sliver where ST_CoveredBy(input_GA, Gaz) returns false. This causes
    # the OTU spatial filter to silently drop asserted-distribution OTUs for
    # any GA whose shape fails the coverage check.
    #
    # A post-union ST_Buffer (COMBINE_BUFFER_DEGREES) absorbs the sliver.
    # Empirical testing across 13 country-pair/group/chain combinations found
    # the minimum buffer needed was ~1e-11°; 1e-7° (~11 mm at the equator)
    # gives a comfortable margin and is imperceptible for any biodiversity
    # application.
    # ST_MakeValid is applied after ST_Buffer as a safety net — neither
    # GEOS nor PostGIS guarantees topologically valid output from geometric
    # operations.
    geom_exprs = rgeo_shapes.map { |s|
      "ST_GeomFromText(#{ActiveRecord::Base.connection.quote(s.as_text)}, 4326)"
    }
    result_wkb = ActiveRecord::Base.connection.select_value(<<~SQL)
      SELECT ST_Force3D(ST_MakeValid(ST_Buffer(ST_UnaryUnion(ST_Collect(ARRAY[#{geom_exprs.join(', ')}])), #{COMBINE_BUFFER_DEGREES})))
    SQL
    u = Gis::FACTORY.parse_wkb(result_wkb)
  else # Intersection
    # Drops Z values (ST_Buffer is 2D-only).
    # See discussion in the union case for motivation.
    geom_exprs = rgeo_shapes.map { |s|
      "ST_GeomFromText(#{ActiveRecord::Base.connection.quote(s.as_text)}, 4326)"
    }
    intersection_expr = geom_exprs.reduce { |acc, g| "ST_Intersection(#{acc}, #{g})" }
    result_wkb = ActiveRecord::Base.connection.select_value(<<~SQL)
      SELECT ST_Force3D(ST_MakeValid(ST_Buffer(#{intersection_expr}, -#{COMBINE_BUFFER_DEGREES})))
    SQL
    u = Gis::FACTORY.parse_wkb(result_wkb)
  end

  if u.empty?
    message = operation_is_union ?
      "Empty union can't be saved!" : "Empty intersection can't be saved!"
    raise TaxonWorks::Error, message
  end

  u
end

.combine_shapes_to_rgeo(shapes, operation_is_union) ⇒ Object

Raises TaxonWorks::Error on error

Parameters:

  • hash (Hash)

    as in build_gi_from_shapes

  • operation_is_union (Boolean)

    Union if true, intersection if false

Returns:

  • A single rgeo shape that is the combination of all of the input shapes



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'app/models/gazetteer.rb', line 118

def self.combine_shapes_to_rgeo(shapes, operation_is_union)
  begin
    if shapes[:geojson].blank? && shapes[:wkt].blank? &&
        shapes[:points].blank? && shapes[:ga_combine].blank? &&
        shapes[:gz_combine].blank?
      raise TaxonWorks::Error, 'No shapes provided'
    end

    leaflet_rgeo = convert_geojson_to_rgeo(shapes[:geojson])
    wkt_rgeo = convert_wkt_to_rgeo(shapes[:wkt])
    points_rgeo = convert_geojson_to_rgeo(shapes[:points])
    ga_rgeo = convert_ga_to_rgeo(shapes[:ga_combine])
    gz_rgeo = convert_gz_to_rgeo(shapes[:gz_combine])

    user_input_shapes = leaflet_rgeo + wkt_rgeo + points_rgeo

    return combine_rgeo_shapes(
      user_input_shapes + ga_rgeo + gz_rgeo, operation_is_union
    )

  # This is more specific than RGeo::Error::RgeoError
  rescue RGeo::Error::InvalidGeometry => e
    raise TaxonWorks::Error, e
  rescue RGeo::Error::RGeoError => e
    raise TaxonWorks::Error, e
  end
end

.convert_ga_to_rgeo(ga_ids) ⇒ Object



171
172
173
174
175
# File 'app/models/gazetteer.rb', line 171

def self.convert_ga_to_rgeo(ga_ids)
  return [] if ga_ids.blank?

  GeographicArea.where(id: ga_ids).map { |ga| ga.geo_object }
end

.convert_geojson_to_rgeo(shapes) ⇒ Array

Raises RGeo::Error::InvalidGeometry on error

Returns:

  • (Array)

    of RGeo::Geographic::Projected*Impl



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'app/models/gazetteer.rb', line 148

def self.convert_geojson_to_rgeo(shapes)
  return [] if shapes.blank?

  rgeo_shapes = shapes.map do |shape|
    # Raises RGeo::Error::InvalidGeometry on error
    rgeo_shape = RGeo::GeoJSON.decode(shape, geo_factory: Gis::FACTORY)

    circle = nil
    if rgeo_shape.geometry.geometry_type.to_s == 'Point' &&
         rgeo_shape.properties['radius'].present?
      r = rgeo_shape.properties['radius']

      circle = GeographicItem.circle(rgeo_shape.geometry, r)
    end

    s = circle || rgeo_shape.geometry

    GeographicItem.make_valid_non_anti_meridian_crossing_shape(s.as_text)
  end

  rgeo_shapes
end

.convert_gz_to_rgeo(gz_ids) ⇒ Object



177
178
179
180
181
# File 'app/models/gazetteer.rb', line 177

def self.convert_gz_to_rgeo(gz_ids)
  return [] if gz_ids.blank?

  Gazetteer.where(id: gz_ids).map { |gz| gz.geo_object }
end

.convert_wkt_to_rgeo(wkt_shapes) ⇒ Array

Raises RGeo::Error::RGeoError on error

Returns:

  • (Array)

    of RGeo::Geographic::Projected*Impl



185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'app/models/gazetteer.rb', line 185

def self.convert_wkt_to_rgeo(wkt_shapes)
  return [] if wkt_shapes.blank?

  wkt_shapes.map do |shape|
    begin
      s = ::Gis::FACTORY.parse_wkt(shape)
    rescue RGeo::Error::RGeoError => e
      raise e.exception("Invalid WKT: #{e.message}")
    end

    GeographicItem.make_valid_non_anti_meridian_crossing_shape(s.as_text)
  end
end

.import_gzs_from_shapefile(shapefile, citation_options, progress_tracker, projects) ⇒ Object



288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
# File 'app/models/gazetteer.rb', line 288

def self.import_gzs_from_shapefile(
  shapefile, citation_options, progress_tracker, projects
)
  begin
    shp_doc = Document.find(shapefile[:shp_doc_id])
    shx_doc = Document.find(shapefile[:shx_doc_id])
    dbf_doc = Document.find(shapefile[:dbf_doc_id])
    prj_doc = Document.find(shapefile[:prj_doc_id])
    cpg_doc = shapefile[:cpg_doc_id] ?
      Document.find(shapefile[:cpg_doc_id]) : nil
  rescue ActiveRecord::RecordNotFound => e
    progress_tracker.update!(
      num_records_imported: 0,
      error_messages: e.message,
      started_at: DateTime.now,
      ended_at: DateTime.now
    )
    return
  end
  name_field = shapefile[:name_field]

  # The above shapefile files are unlikely to all be in the same directory as
  # required by rgeo-shapefile, so create symbolic links to each in a new
  # temporary folder.
  tmp_dir = Rails.root.join('tmp', 'shapefiles', SecureRandom.hex)
  FileUtils.mkdir_p(tmp_dir)

  shp_link = File.join(tmp_dir, 'shapefile.shp')
  shx_link = File.join(tmp_dir, 'shapefile.shx')
  dbf_link = File.join(tmp_dir, 'shapefile.dbf')
  prj_link = File.join(tmp_dir, 'shapefile.prj')

  FileUtils.ln_s(shp_doc.document_file.path, shp_link)
  FileUtils.ln_s(shx_doc.document_file.path, shx_link)
  FileUtils.ln_s(dbf_doc.document_file.path, dbf_link)
  FileUtils.ln_s(prj_doc.document_file.path, prj_link)

  cpg_link = ''
  if cpg_doc.present?
    cpg_link = File.join(tmp_dir, 'shapefile.cpg')
    FileUtils.ln_s(cpg_doc.document_file.path, cpg_link)
  end

  prj = File.read(prj_doc.document_file.path)
  crs = RGeo::CoordSys::CS.create_from_wkt(prj)

  citation = citation_options[:cite_gzs] ? citation_options[:citation] : nil

  process_shape_file(
    shp_link, crs, name_field,
    shapefile[:iso_a2_field], shapefile[:iso_a3_field],
    citation, progress_tracker, projects
  )

  FileUtils.rm_f([shp_link, dbf_link, shx_link, prj_link, cpg_link])
  FileUtils.rmdir(tmp_dir)
end

.perform_save_and_clone_to_projects(gz, project_ids, citation) ⇒ Object

current project which gz is saved to.

Parameters:

  • project_ids (Array)

    the projects to clone to - does not include the



424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
# File 'app/models/gazetteer.rb', line 424

def self.perform_save_and_clone_to_projects(gz, project_ids, citation)
  if citation.present?
    gz.citations.build(citation.merge({ project_id: Current.project_id }))
  end
  gz.save!

  project_ids.each do |pr_id|
    g = gz.dup
    g.project_id = pr_id
    if citation.present?
      g.citations.build(citation.merge({ project_id: pr_id }))
    end
    g.save!
  end
end

.process_import_error(progress_tracker, recorder, error_index, error_message) ⇒ Object



568
569
570
571
572
573
574
575
576
577
578
# File 'app/models/gazetteer.rb', line 568

def self.process_import_error(
  progress_tracker, recorder, error_index, error_message
)
  m = "#{error_index}: '#{error_message}'"
  recorder[:error_messages] = recorder[:error_messages].present? ?
    "#{recorder[:error_messages]}; #{m}" : m

  progress_tracker.update!(
    error_messages: recorder[:error_messages]
  )
end

.process_shape_file(shpfile, crs, name_field, iso_a2_field, iso_a3_field, citation, progress_tracker, projects) ⇒ Object



440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
# File 'app/models/gazetteer.rb', line 440

def self.process_shape_file(
  shpfile, crs, name_field, iso_a2_field, iso_a3_field, citation,
  progress_tracker, projects
)
  r = {
    num_records: 0,
    num_records_imported: 0,
    error_messages: nil,
  }

  # We'll need to transform from whatever CRS the shapefile is in to our WGS84
  # coordinates.
  if (crs_is_wgs84 = Vendor::Rgeo.coord_sys_is_wgs84?(crs))
    from_factory = Gis::FACTORY
  else
    from_proj4 = RGeo::CoordSys::Proj4.create(crs.to_s)
    from_factory = from_proj4.projected? ?
      # Shapefiles using a projected CRS always store their geometries using
      # projected coordinates.
      RGeo::Geographic.projected_factory(
        coord_sys: from_proj4, has_z_coordinate: true
      ).projection_factory :
      RGeo::Geographic.spherical_factory( # geographic? true
        coord_sys: from_proj4, has_z_coordinate: true
      )

    to_proj4 = Gis::FACTORY.coord_sys
    to_factory = Gis::FACTORY
  end

  begin
    # TODO: https://github.com/rgeo/rgeo-shapefile could use a fork and updates to dbf/rgeo
    file = RGeo::Shapefile::Reader.open(
      shpfile, factory: from_factory, allow_unsafe: true
    )
  rescue Errno::ENOENT => e
    progress_tracker.update!(
      num_records_imported: 0,
      error_messages: e.message,
      started_at: DateTime.now,
      ended_at: DateTime.now
    )
    return
  end

  r[:num_records] = file.num_records

  progress_tracker.update!(
    num_records: file.num_records,
    project_names:
      Project.where(id: projects).order(:name).pluck(:name).join(', '),
    started_at: DateTime.now
  )

  # Iterate over an index so we can record index on error and then resume
  for i in 0...file.num_records
    begin
      # This can throw GeosError even when allow_unsafe: true
      record = file[i]

      # iso a2/a3 are optional fields, we ignore them if the shapefile
      # doesn't provide valid data.
      a2 = record[iso_a2_field]
      a3 = record[iso_a3_field]
      iso_3166_a2 = validate_iso_3166_a2(a2) ? a2: nil
      iso_3166_a3 = validate_iso_3166_a3(a3) ? a3: nil

      g = new(
        name: record[name_field],
        iso_3166_a2:,
        iso_3166_a3:
      )

      if crs_is_wgs84
        record_geometry = record.geometry
      else
        # TODO: what might this raise? Might want to cap our total number of
        # errors recorded here
        record_geometry = RGeo::CoordSys::Proj4.transform(
          from_proj4,
          record.geometry,
          to_proj4,
          to_factory
        )
      end

      shape = GeographicItem.make_valid_non_anti_meridian_crossing_shape(
        record_geometry.as_text
      )

      g.build_geographic_item(
        geography: shape
      )

      save_and_clone_to_projects(g, projects, citation)
      r[:num_records_imported] = r[:num_records_imported] + 1

      if i % 5 == 0
        progress_tracker.update!(
          num_records_imported: r[:num_records_imported]
        )
      end

    rescue RGeo::Error::InvalidGeometry => e
      process_import_error(progress_tracker, r, i + 1, e.to_s)
    rescue ActiveRecord::RecordInvalid => e
      process_import_error(progress_tracker, r, i + 1, e.to_s)
    rescue RGeo::Error::GeosError => e
      process_import_error(progress_tracker, r, i + 1, e.to_s)
    rescue ActiveRecord::StatementInvalid => e
      # In known instances this is a result of something like:
      # PG::InternalError:
      #   ERROR:  lwgeom_intersection_prec: GEOS Error: TopologyException:
      #   Input geom 0 is invalid: Self-intersection at 185 5 0
      # !! Any containing transaction (from running in a spec e.g.) is now
      # aborted and open, any attempts to interact with the db will now raise
      # PG::InFailedSqlTransaction: ERROR:  current transaction is aborted,
      #   commands ignored until end of transaction block
      process_import_error(progress_tracker, r, i + 1, e.to_s)
    end
  end

  progress_tracker.update!(
    num_records_imported: r[:num_records_imported],
    ended_at: DateTime.now
  )
end

.save_and_clone_to_projects(gz, project_ids, citation = nil) ⇒ Object

Raises ActiveRecord::RecordInvalid on error

Parameters:

  • gz (Gazetteer)

    Unsaved Gazetteer to save and clone from

  • project_ids (Array)

    project ids to clone gz into - gz is always saved to the current project. If saves occur in more than one project then all saves occur in a transaction.

  • citation (Hash) (defaults to: nil)

    Citation object to save to each Gazetteer created



265
266
267
268
269
270
271
272
273
274
275
276
# File 'app/models/gazetteer.rb', line 265

def self.save_and_clone_to_projects(gz, project_ids, citation = nil)
  project_ids.delete(Current.project_id)
  project_ids.uniq!

  if project_ids.count > 0
    Gazetteer.transaction do
      perform_save_and_clone_to_projects(gz, project_ids, citation)
    end
  else
    perform_save_and_clone_to_projects(gz, [], citation)
  end
end

.select_optimized(user_id, project_id, target = 'AssertedDistribution') ⇒ Hash

Returns gazetteers optimized for user selection.

Returns:

  • (Hash)

    gazetteers optimized for user selection



376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
# File 'app/models/gazetteer.rb', line 376

def self.select_optimized(user_id, project_id, target = 'AssertedDistribution')
  target = 'AssertedDistribution' if target.blank?
  r = used_recently(user_id, project_id, target) || []
  h = {
    quick: [],
    pinboard: Gazetteer.pinned_by(user_id).where(pinboard_items: {project_id:}).to_a,
    recent: []
  }

  if r.empty?
    h[:quick] = Gazetteer.pinned_by(user_id).pinboard_inserted.where(pinboard_items: {project_id:}).to_a
  else
    case target
    when 'AssertedDistribution'
      h[:recent] = Gazetteer.where('"gazetteers"."id" IN (?)', r.first(15) ).order(:name).to_a
    end
    h[:quick] = (Gazetteer.pinned_by(user_id).pinboard_inserted.where(pinboard_items: {project_id:}).to_a +
      Gazetteer.where('"gazetteers"."id" IN (?)', r.first(5) ).order(:name).to_a).uniq
  end

  h
end

.used_recently(user_id, project_id, used_on = 'AssertedDistribution') ⇒ Scope

Returns the max 10 most recently used (1 week, could parameterize) gazetteers, as used use_on.

Parameters:

  • used_on (String) (defaults to: 'AssertedDistribution')

    currently AssertedDistribution

Returns:

  • (Scope)

    the max 10 most recently used (1 week, could parameterize) gazetteers, as used use_on



349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
# File 'app/models/gazetteer.rb', line 349

def self.used_recently(user_id, project_id, used_on = 'AssertedDistribution')

  case used_on
  when 'AssertedDistribution'
    t = Citation.arel_table
    # i is a select manager
    i = t.project(t['citation_object_id'], t['citation_object_type'], t['created_at']).from(t)
      .where(t['created_at'].gt(1.week.ago))
      .where(t['created_by_id'].eq(user_id))
      .where(t['project_id'].eq(project_id))
      .order(t['created_at'].desc)

    # z is a table alias
    z = i.as('recent_t')
    p = AssertedDistribution.arel_table

    AssertedDistribution
      .joins(
        Arel::Nodes::InnerJoin.new(z, Arel::Nodes::On.new(z['citation_object_id'].eq(p['id']).and(z['citation_object_type'].eq('AssertedDistribution')))  )
      )
      .where(asserted_distribution_shape_type: 'Gazetteer')
      .pluck(:asserted_distribution_shape_id).uniq
  end
end

.validate_iso_3166_a2(a2) ⇒ Object



278
279
280
281
# File 'app/models/gazetteer.rb', line 278

def self.validate_iso_3166_a2(a2)
  return false if a2.blank? || a2.class.to_s != 'String'
  /\A[A-Z][A-Z]\z/.match?(a2.strip.upcase)
end

.validate_iso_3166_a3(a3) ⇒ Object



283
284
285
286
# File 'app/models/gazetteer.rb', line 283

def self.validate_iso_3166_a3(a3)
  return false if a3.blank? || a3.class.to_s != 'String'
  /\A[A-Z][A-Z][A-Z]\z/.match?(a3.strip.upcase)
end

Instance Method Details

#build_gi_from_shapes(shapes, operation_is_union = true) ⇒ Object

Builds a GeographicItem for this gazetteer from the combined input shapes

Parameters:

  • shapes (Hash)

    geojson: array of geojson feature hashes, wkt: array of wkt strings, points: array of geojson feature points ga_combine: array of GA ids gz_combine: array of GZ ids

  • operation_is_union (Boolean) (defaults to: true)

    Union if true, intersection if false



101
102
103
104
105
106
107
108
109
110
111
112
# File 'app/models/gazetteer.rb', line 101

def build_gi_from_shapes(shapes, operation_is_union=true)
  begin
    rgeo_shape = self.class.combine_shapes_to_rgeo(shapes, operation_is_union)
  rescue TaxonWorks::Error => e
    errors.add(:base, e)
    return
  end

  build_geographic_item(
    geography: rgeo_shape
  )
end

#capture_geographic_item_id_for_cleanupObject (private)



595
596
597
598
599
# File 'app/models/gazetteer.rb', line 595

def capture_geographic_item_id_for_cleanup
  # Capture id before destroy so cleanup does not depend on association state
  # on the destroyed record.
  self.geographic_item_id_for_cleanup = geographic_item_id
end

#data_originObject



416
417
418
# File 'app/models/gazetteer.rb', line 416

def data_origin
  GZ_DATA_ORIGIN
end

#default_geographic_itemObject



408
409
410
# File 'app/models/gazetteer.rb', line 408

def default_geographic_item
  geographic_item
end

#default_geographic_item_idObject



412
413
414
# File 'app/models/gazetteer.rb', line 412

def default_geographic_item_id
  geographic_item.id
end

#destroy_geographic_item_if_orphanedObject (private)



590
591
592
593
# File 'app/models/gazetteer.rb', line 590

def destroy_geographic_item_if_orphaned
  item = GeographicItem.find_by(id: geographic_item_id_for_cleanup)
  item&.destroy! if item&.unreferenced_for_cleanup?
end

#geographic_itemsObject



404
405
406
# File 'app/models/gazetteer.rb', line 404

def geographic_items
  [geographic_item]
end

#geographic_name_classificationObject



399
400
401
402
# File 'app/models/gazetteer.rb', line 399

def geographic_name_classification
  # Return a2/a3 country?
  {}
end

#iso_3166_a2_is_two_charactersObject (private)



580
581
582
583
# File 'app/models/gazetteer.rb', line 580

def iso_3166_a2_is_two_characters
  errors.add(:iso_3166_a2, 'must be exactly two characters') unless
    iso_3166_a2.nil? || self.class.validate_iso_3166_a2(iso_3166_a2)
end

#iso_3166_a3_is_three_charactersObject (private)



585
586
587
588
# File 'app/models/gazetteer.rb', line 585

def iso_3166_a3_is_three_characters
  errors.add(:iso_3166_a3, 'must be exactly three characters') unless
    iso_3166_a3.nil? || self.class.validate_iso_3166_a3(iso_3166_a3)
end

#to_geo_json_featureHash

Returns of the pieces of a GeoJSON 'Feature'.

Returns:

  • (Hash)

    of the pieces of a GeoJSON 'Feature'



72
73
74
75
76
77
78
79
80
81
82
83
# File 'app/models/gazetteer.rb', line 72

def to_geo_json_feature
  to_simple_json_feature.merge(
    'properties' => {
      # cf. GeographicArea
      'shape' => {
        'type' => 'Gazetteer',
        'id' => id,
        'tag' => name
      }
    }
  )
end

#to_simple_json_featureObject



85
86
87
88
89
90
91
# File 'app/models/gazetteer.rb', line 85

def to_simple_json_feature
  {
    'type' => 'Feature',
    'properties' => {},
    'geometry' => geographic_item.to_geo_json
  }
end