Module: Lib::Vendor::RgeoShapefileHelper
- Included in:
- GazetteersController
- Defined in:
- app/helpers/lib/vendor/rgeo_shapefile_helper.rb
Instance Method Summary collapse
-
#addShapefileImportJobToQueue(shapefile, citation, projects, project_id, user_id) ⇒ Object
Raises TaxonWorks::Error on error.
-
#basename(filename) ⇒ Object
Assumes an extension of the form .xyz.
-
#dbf_column_type_is_string(dbf, column_name) ⇒ Object
Assumes the column_name is a valid dbf column name.
-
#fetch_shapefile_documents(shapefile, project_id) ⇒ Hash of Documents
Raises TaxonWorks::Error on error.
-
#fields_from_shapefile(shp_doc_id, dbf_doc_id, project_id) ⇒ Object
Raises Taxonworks::Error on error.
-
#find_doc_for_extension(base, ext, project_id) ⇒ Object
Raises TaxonWorks::Error on error.
-
#get_dbf_doc(shp_doc_id, dbf_doc_id, project_id) ⇒ Object
Raises Taxonworks::Error on error.
-
#text_field_values_from_shapefile(dbf_doc, text_fields, max_records) ⇒ Object
Raises Taxonworks::Error on error !! Assumes the second text_field is a name field, and the third field should be filled in with the # of records with that name.
-
#validate_and_fetch_shapefile_text_field_values(shapefile, project_id) ⇒ Object
Raises TaxonWorks::Error on error.
-
#validate_shape_file(shapefile, project_id) ⇒ Hash
Raises TaxonWorks::Error on error.
Instance Method Details
#addShapefileImportJobToQueue(shapefile, citation, projects, project_id, user_id) ⇒ Object
Raises TaxonWorks::Error on error.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# File 'app/helpers/lib/vendor/rgeo_shapefile_helper.rb', line 5 def addShapefileImportJobToQueue( shapefile, citation, projects, project_id, user_id ) shapefile_docs = validate_shape_file(shapefile, project_id) if citation[:cite_gzs] && !citation[:citation]&.dig(:source_id) raise TaxonWorks::Error, 'No citation source selected' end complete_shapefile = shapefile # shp_doc_id was required, the following may have been determined instead # during validation. complete_shapefile[:shx_doc_id] = shapefile_docs[:shx].id complete_shapefile[:dbf_doc_id] = shapefile_docs[:dbf].id complete_shapefile[:prj_doc_id] = shapefile_docs[:prj].id complete_shapefile[:cpg_doc_id] = shapefile_docs[:cpg]&.id progress_tracker = GazetteerImport.create!( shapefile: shapefile_docs[:shp].document_file_file_name ) ImportGazetteersJob.perform_later( complete_shapefile, citation, user_id, project_id, progress_tracker, projects ) end |
#basename(filename) ⇒ Object
Assumes an extension of the form .xyz
65 66 67 |
# File 'app/helpers/lib/vendor/rgeo_shapefile_helper.rb', line 65 def basename(filename) filename[0, filename.size - 4] end |
#dbf_column_type_is_string(dbf, column_name) ⇒ Object
Assumes the column_name is a valid dbf column name
177 178 179 180 181 |
# File 'app/helpers/lib/vendor/rgeo_shapefile_helper.rb', line 177 def dbf_column_type_is_string(dbf, column_name) column = dbf.columns.find { |c| c.name == column_name } column.type == 'C' ? # 'C' is for 'C'haracter true : DBF::Column::TYPE_CAST_CLASS[column.type.to_sym].to_s end |
#fetch_shapefile_documents(shapefile, project_id) ⇒ Hash of Documents
Returns Raises TaxonWorks::Error on error.
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'app/helpers/lib/vendor/rgeo_shapefile_helper.rb', line 36 def fetch_shapefile_documents(shapefile, project_id) begin docs = { shp: shapefile[:shp_doc_id] ? Document.find(shapefile[:shp_doc_id]) : nil, shx: shapefile[:shx_doc_id] ? Document.find(shapefile[:shx_doc_id]) : nil, dbf: shapefile[:dbf_doc_id] ? Document.find(shapefile[:dbf_doc_id]) : nil, prj: shapefile[:prj_doc_id] ? Document.find(shapefile[:prj_doc_id]) : nil, cpg: shapefile[:cpg_doc_id] ? Document.find(shapefile[:cpg_doc_id]) : nil } rescue ActiveRecord::RecordNotFound => e raise TaxonWorks::Error, e end raise TaxonWorks::Error, 'A .shp file is required' if docs[:shp].nil? base = basename(docs[:shp].document_file_file_name) docs.each do |ext, doc| if !doc docs[ext] = find_doc_for_extension(base, ext, project_id) elsif basename(doc.document_file_file_name) != base raise TaxonWorks::Error, ".#{ext} file must have the same name as the .shp file: '#{base}'" end end return docs end |
#fields_from_shapefile(shp_doc_id, dbf_doc_id, project_id) ⇒ Object
Raises Taxonworks::Error on error
184 185 186 187 188 189 190 |
# File 'app/helpers/lib/vendor/rgeo_shapefile_helper.rb', line 184 def fields_from_shapefile(shp_doc_id, dbf_doc_id, project_id) dbf_doc = get_dbf_doc(shp_doc_id, dbf_doc_id, project_id) dbf = ::DBF::Table.new(dbf_doc.document_file.path) dbf.column_names end |
#find_doc_for_extension(base, ext, project_id) ⇒ Object
Raises TaxonWorks::Error on error
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 |
# File 'app/helpers/lib/vendor/rgeo_shapefile_helper.rb', line 281 def find_doc_for_extension(base, ext, project_id) ext = ext.to_s ext_filename = base + '.' + ext ext_docs = Document.where( document_file_file_name: ext_filename, project_id: ) if ext_docs.count == 0 return nil if ext == 'cpg' # cpg isn't required raise TaxonWorks::Error, "Failed to find a '#{ext_filename}' document, has one been uploaded?" elsif ext_docs.count > 1 ids = ext_docs.map { |d| d.id } # (This makes cpg required when there are multiple matching) raise TaxonWorks::Error, "More than one '#{ext_filename}' document exists (ids #{ids.join(',')}), please add the correct one in the document selector" end # exactly one matching document ext_docs.first end |
#get_dbf_doc(shp_doc_id, dbf_doc_id, project_id) ⇒ Object
Raises Taxonworks::Error on error
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 |
# File 'app/helpers/lib/vendor/rgeo_shapefile_helper.rb', line 259 def get_dbf_doc(shp_doc_id, dbf_doc_id, project_id) if !shp_doc_id && !dbf_doc_id raise TaxonWorks::Error, '.shp or .dbf required to read shapefile fields' end dbf_doc = nil if dbf_doc_id dbf_doc = Document.find(dbf_doc_id) else shp_doc = Document.find(shp_doc_id) base = basename(shp_doc.document_file_file_name) dbf_doc = find_doc_for_extension(base, :dbf, project_id) end if dbf_doc.nil? raise TaxonWorks::Error, 'failed to find dbf shapefile document!' if dbf_doc.nil? end dbf_doc end |
#text_field_values_from_shapefile(dbf_doc, text_fields, max_records) ⇒ Object
Raises Taxonworks::Error on error !! Assumes the second text_field is a name field, and the third field should be filled in with the # of records with that name. !! !! The counts are restricted to the first max_records, so may not be accurate if there are more records than that. !!
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 |
# File 'app/helpers/lib/vendor/rgeo_shapefile_helper.rb', line 223 def text_field_values_from_shapefile(dbf_doc, text_fields, max_records) dbf = ::DBF::Table.new(dbf_doc.document_file.path) record_count = dbf.count text_fields_count = text_fields.count counts = {} record_number = 1 text_values = dbf.take(max_records).map do |r| a = Array.new(text_fields_count) text_fields.each_with_index do |k, i| case i when 0 a[i] = record_number when 1 # Name field w = r[k] a[i] = w if counts[w] counts[w] += 1 else counts[w] = 1 end when 2 # Name count a[i] = 0 # temporary, actual count updated below else # a2 and/or a3 a[i] = r[k] end end record_number += 1 a end text_values.each { |a| a[2] = counts[a[1]]} return record_count, text_values end |
#validate_and_fetch_shapefile_text_field_values(shapefile, project_id) ⇒ Object
Raises TaxonWorks::Error on error.
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 |
# File 'app/helpers/lib/vendor/rgeo_shapefile_helper.rb', line 193 def validate_and_fetch_shapefile_text_field_values(shapefile, project_id) shapefile_docs = validate_shape_file(shapefile, project_id) text_fields_hash = { # order here is significant record_number: '', # a computed field, see values_from_shapefile below name: shapefile[:name_field], count: '', # a computed field a2: shapefile[:iso_a2_field], a3: shapefile[:iso_a3_field] }.delete_if { |k,v| k != :record_number && k != :count && v.blank? } text_fields = text_fields_hash.values max_values_count = 1000 records_count, shapefile_text_values = text_field_values_from_shapefile( shapefile_docs[:dbf], text_fields, max_values_count ) { text_fields_hash:, text_values: shapefile_text_values, records_count:, max_values_count: } end |
#validate_shape_file(shapefile, project_id) ⇒ Hash
Raises TaxonWorks::Error on error.
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
# File 'app/helpers/lib/vendor/rgeo_shapefile_helper.rb', line 71 def validate_shape_file(shapefile, project_id) if shapefile[:name_field].nil? raise TaxonWorks::Error, 'Name field is required' end name_field = shapefile[:name_field] docs = fetch_shapefile_documents(shapefile, project_id) # Check that we can transform from the input CRS to our WGS84 CRS prj = File.read(docs[:prj].document_file.path) begin cs = RGeo::CoordSys::CS.create_from_wkt(prj) rescue RGeo::Error::ParseError => e raise TaxonWorks::Error, "Failed to parse the prj file: #{e}" end if !cs.geographic? && !cs.projected? raise TaxonWorks::Error, '.prj must be either geographic or projected' end if !Vendor::Rgeo.coord_sys_is_wgs84?(cs) # Make sure we can create a proj4 for the source CRS, it's needed for # transforming coordinates. begin RGeo::CoordSys::Proj4.create(cs.to_s) rescue RGeo::Error::InvalidProjection => e raise TaxonWorks::Error, "Invalid prj file? #{e}" end end # Check that each record has a name. dbf = ::DBF::Table.new(docs[:dbf].document_file.path) if dbf.record_count == 0 raise TaxonWorks::Error, 'Empty dbf file: shapefile must contain records' end if !dbf.column_names.include?(name_field) raise TaxonWorks::Error, "No column named '#{name_field}'" end rv = dbf_column_type_is_string(dbf, name_field) if rv != true raise TaxonWorks::Error, "Name error: column '#{name_field}' for Gazetteer names should be a string field, not '#{rv}'" end for i in 0...dbf.record_count record = dbf.find(i) if Utilities::Rails::Strings.nil_squish_strip(record[name_field]).nil? raise TaxonWorks::Error, "Record #{i} has no name - names are required for all records" end end # Check that iso a2/a3 fields, if provided, exist and are of type String iso_a2_field = shapefile[:iso_a2_field] if iso_a2_field.present? if !dbf.column_names.include?(iso_a2_field) raise TaxonWorks::Error, "No column named '#{iso_a2_field}'" end rv = dbf_column_type_is_string(dbf, iso_a2_field) if rv != true raise TaxonWorks::Error, "Iso_3166_a2 error: column '#{iso_a2_field}' for a2 values should be a string field, not '#{rv}'" end end iso_a3_field = shapefile[:iso_a3_field] if iso_a3_field.present? if !dbf.column_names.include?(iso_a3_field) raise TaxonWorks::Error, "No column named '#{iso_a3_field}'" end rv = dbf_column_type_is_string(dbf, iso_a3_field) if rv != true raise TaxonWorks::Error, "Iso_3166_a3 error: column '#{iso_a3_field}' for a3 values should be a string field, not '#{rv}'" end end if iso_a2_field.present? && iso_a3_field.present? && iso_a2_field == iso_a3_field raise TaxonWorks::Error, "Iso_3166_a2 column can't be the same as Iso_3166_a3 column" end # Check that the cpg encoding is recognized - strings can get returned # encoded as binary if failure here is allowed. # cf. https://github.com/rgeo/rgeo-shapefile/blob/d278da0b613425d64e3792497ac9cf474eec6e53/lib/rgeo/shapefile/reader.rb#L194-L198 if docs[:cpg].present? begin encoding = nil File.open(docs[:cpg].document_file.path, 'r') do |cpg| encoding = cpg.read end Encoding.find(encoding.strip) rescue Errno::ENOENT => e raise TaxonWorks::Error, "Failed to open .cpg document '#{docs[:cpg].id}'" rescue ArgumentError => e # Unrecognized encoding raise TaxonWorks::Error, "'#{e}' from .cpg document '#{docs[:cpg].id}'" end end docs end |