Class: Document
- Inherits:
-
ApplicationRecord
- Object
- ActiveRecord::Base
- ApplicationRecord
- Document
- Includes:
- Housekeeping, Shared::Identifiers, Shared::IsData, Shared::Notes, Shared::Tags, SoftValidation
- Defined in:
- app/models/document.rb
Overview
A Document is digital file that has text inhering within it. Handled formats are pdfs and txt at present.
Documents are to Documentation as Images are to Depictions.
Constant Summary
Constants included from SoftValidation
SoftValidation::ANCESTORS_WITH_SOFT_VALIDATIONS
Instance Attribute Summary collapse
-
#document_file_content_type ⇒ String
The content type (mime).
-
#document_file_file_name ⇒ String
the name of the file as uploaded by the user.
-
#document_file_file_size ⇒ Integer
Size of the document in K.
-
#document_file_updated_at ⇒ Timestamp
Last time this document was updated.
-
#initialize_start_page ⇒ Object
Returns the value of attribute initialize_start_page.
-
#page_map ⇒ Hash
A map of PDF page to printed page #, pages index starts at 1 behaviour: if no integer exists for a PDF page then page is assumed to be the page # of the PDF (almost never the real case) if an integer is provided it points to the page(s) represented in print e.g.: { “1”: “300”, “2”: [“301”, “302”, “xi”] } mapping can be many to many: { “1”: [“300”, “301”] “2”: [“301”] } …
Instance Method Summary collapse
- #check_for_documentation ⇒ Object protected
- #get_page_map(sp = 1) ⇒ Object
- #pdf_page_for(printed_page) ⇒ Array
- #pdftotext ⇒ Object
- #reject_documentation(attributed) ⇒ Object protected
- #set_page_map_page(index, page) ⇒ Object
- #set_pages_by_start(sp = 1) ⇒ Object
- #set_pdf_metadata ⇒ Object protected
Methods included from SoftValidation
#clear_soft_validations, #fix_for, #fix_soft_validations, #soft_fixed?, #soft_valid?, #soft_validate, #soft_validated?, #soft_validations, #soft_validators
Methods included from Shared::IsData
#errors_excepting, #full_error_messages_excepting, #identical, #is_community?, #is_destroyable?, #is_editable?, #is_in_use?, #is_in_users_projects?, #metamorphosize, #similar
Methods included from Shared::Tags
#reject_tags, #tag_with, #tagged?, #tagged_with?
Methods included from Shared::Notes
#concatenated_notes_string, #reject_notes
Methods included from Shared::Identifiers
#dwc_occurrence_id, #identified?, #next_by_identifier, #previous_by_identifier, #reject_identifiers
Methods included from Housekeeping
#has_polymorphic_relationship?
Methods inherited from ApplicationRecord
Instance Attribute Details
#document_file_content_type ⇒ String
Returns the content type (mime).
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'app/models/document.rb', line 36 class Document < ApplicationRecord include Housekeeping include Shared::Identifiers include Shared::Notes include Shared::Tags include Shared::IsData include SoftValidation attr_accessor :initialize_start_page before_destroy :check_for_documentation, prepend: true has_many :documentation, dependent: :destroy, inverse_of: :document has_many :sources, through: :documentation, source_type: 'Source', source: 'documentation_object' has_attached_file :document_file, filename_cleaner: Utilities::CleanseFilename :document_file, content_type: ['application/octet-stream', 'application/pdf', 'text/plain', 'text/xml'] :document_file :document_file, greater_than: 1.bytes accepts_nested_attributes_for :documentation, allow_destroy: true, reject_if: :reject_documentation before_save :set_pdf_metadata, if: -> { ActiveSupport::Deprecation.silence do changed_attributes.include?('document_file_file_size') && document_file_content_type =~ /pdf/ end } def set_pages_by_start(sp = 1) write_attribute(:page_map, get_page_map(sp)) end def get_page_map(sp = 1) m = {} if page_total && sp (0..(page_total - 1)).each do |p| m[p + 1] = (p + sp.to_i).to_s end end m end # @return [Array] def pdf_page_for(printed_page) p = [] page_map.each do |pdf_page, v| p.push(pdf_page) if printed_page.to_s == v || v.include?(printed_page.to_s) end p end def set_page_map_page(index, page) return false if index.kind_of?(Array) && page.kind_of?(Array) return false if !index.kind_of?(Array) && (index.to_i > page_total) p = page_map [index].flatten.map(&:to_s).each do |i| if page.kind_of?(Array) p[i] = page.map(&:to_s) else p[i] = page.to_s end end update_attribute(:page_map, p) end def initialize_start_page=(value) write_attribute(:page_map, get_page_map(value)) @initialize_start_page = value end def pdftotext `pdftotext -layout #{document_file.path} -` end protected def check_for_documentation if documentation.count > 1 errors.add(:base, 'document is used in more than one place, remove documentation first') throw :abort end end def begin File.open(document_file.staged_path, 'rb') do |io| reader = PDF::Reader.new(io) write_attribute(:page_total, reader.page_count) end rescue PDF::Reader::MalformedPDFError errors.add(:base, 'pdf is malformed') rescue PDF::Reader::EncryptedPDFError errors.add(:base, 'pdf is encrypted') rescue PDF::Reader::UnsupportedFeatureError errors.add(:base, 'pdf contains features not supported by the software') end set_pages_by_start(initialize_start_page) if initialize_start_page end def reject_documentation(attributed) attributed['type'].blank? || attributed['documentation_object'].blank? && (attributed['documentation_object_id'].blank? && attributed['documentation_object_type'].blank?) end end |
#document_file_file_name ⇒ String
the name of the file as uploaded by the user.
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'app/models/document.rb', line 36 class Document < ApplicationRecord include Housekeeping include Shared::Identifiers include Shared::Notes include Shared::Tags include Shared::IsData include SoftValidation attr_accessor :initialize_start_page before_destroy :check_for_documentation, prepend: true has_many :documentation, dependent: :destroy, inverse_of: :document has_many :sources, through: :documentation, source_type: 'Source', source: 'documentation_object' has_attached_file :document_file, filename_cleaner: Utilities::CleanseFilename :document_file, content_type: ['application/octet-stream', 'application/pdf', 'text/plain', 'text/xml'] :document_file :document_file, greater_than: 1.bytes accepts_nested_attributes_for :documentation, allow_destroy: true, reject_if: :reject_documentation before_save :set_pdf_metadata, if: -> { ActiveSupport::Deprecation.silence do changed_attributes.include?('document_file_file_size') && document_file_content_type =~ /pdf/ end } def set_pages_by_start(sp = 1) write_attribute(:page_map, get_page_map(sp)) end def get_page_map(sp = 1) m = {} if page_total && sp (0..(page_total - 1)).each do |p| m[p + 1] = (p + sp.to_i).to_s end end m end # @return [Array] def pdf_page_for(printed_page) p = [] page_map.each do |pdf_page, v| p.push(pdf_page) if printed_page.to_s == v || v.include?(printed_page.to_s) end p end def set_page_map_page(index, page) return false if index.kind_of?(Array) && page.kind_of?(Array) return false if !index.kind_of?(Array) && (index.to_i > page_total) p = page_map [index].flatten.map(&:to_s).each do |i| if page.kind_of?(Array) p[i] = page.map(&:to_s) else p[i] = page.to_s end end update_attribute(:page_map, p) end def initialize_start_page=(value) write_attribute(:page_map, get_page_map(value)) @initialize_start_page = value end def pdftotext `pdftotext -layout #{document_file.path} -` end protected def check_for_documentation if documentation.count > 1 errors.add(:base, 'document is used in more than one place, remove documentation first') throw :abort end end def begin File.open(document_file.staged_path, 'rb') do |io| reader = PDF::Reader.new(io) write_attribute(:page_total, reader.page_count) end rescue PDF::Reader::MalformedPDFError errors.add(:base, 'pdf is malformed') rescue PDF::Reader::EncryptedPDFError errors.add(:base, 'pdf is encrypted') rescue PDF::Reader::UnsupportedFeatureError errors.add(:base, 'pdf contains features not supported by the software') end set_pages_by_start(initialize_start_page) if initialize_start_page end def reject_documentation(attributed) attributed['type'].blank? || attributed['documentation_object'].blank? && (attributed['documentation_object_id'].blank? && attributed['documentation_object_type'].blank?) end end |
#document_file_file_size ⇒ Integer
Returns size of the document in K.
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'app/models/document.rb', line 36 class Document < ApplicationRecord include Housekeeping include Shared::Identifiers include Shared::Notes include Shared::Tags include Shared::IsData include SoftValidation attr_accessor :initialize_start_page before_destroy :check_for_documentation, prepend: true has_many :documentation, dependent: :destroy, inverse_of: :document has_many :sources, through: :documentation, source_type: 'Source', source: 'documentation_object' has_attached_file :document_file, filename_cleaner: Utilities::CleanseFilename :document_file, content_type: ['application/octet-stream', 'application/pdf', 'text/plain', 'text/xml'] :document_file :document_file, greater_than: 1.bytes accepts_nested_attributes_for :documentation, allow_destroy: true, reject_if: :reject_documentation before_save :set_pdf_metadata, if: -> { ActiveSupport::Deprecation.silence do changed_attributes.include?('document_file_file_size') && document_file_content_type =~ /pdf/ end } def set_pages_by_start(sp = 1) write_attribute(:page_map, get_page_map(sp)) end def get_page_map(sp = 1) m = {} if page_total && sp (0..(page_total - 1)).each do |p| m[p + 1] = (p + sp.to_i).to_s end end m end # @return [Array] def pdf_page_for(printed_page) p = [] page_map.each do |pdf_page, v| p.push(pdf_page) if printed_page.to_s == v || v.include?(printed_page.to_s) end p end def set_page_map_page(index, page) return false if index.kind_of?(Array) && page.kind_of?(Array) return false if !index.kind_of?(Array) && (index.to_i > page_total) p = page_map [index].flatten.map(&:to_s).each do |i| if page.kind_of?(Array) p[i] = page.map(&:to_s) else p[i] = page.to_s end end update_attribute(:page_map, p) end def initialize_start_page=(value) write_attribute(:page_map, get_page_map(value)) @initialize_start_page = value end def pdftotext `pdftotext -layout #{document_file.path} -` end protected def check_for_documentation if documentation.count > 1 errors.add(:base, 'document is used in more than one place, remove documentation first') throw :abort end end def begin File.open(document_file.staged_path, 'rb') do |io| reader = PDF::Reader.new(io) write_attribute(:page_total, reader.page_count) end rescue PDF::Reader::MalformedPDFError errors.add(:base, 'pdf is malformed') rescue PDF::Reader::EncryptedPDFError errors.add(:base, 'pdf is encrypted') rescue PDF::Reader::UnsupportedFeatureError errors.add(:base, 'pdf contains features not supported by the software') end set_pages_by_start(initialize_start_page) if initialize_start_page end def reject_documentation(attributed) attributed['type'].blank? || attributed['documentation_object'].blank? && (attributed['documentation_object_id'].blank? && attributed['documentation_object_type'].blank?) end end |
#document_file_updated_at ⇒ Timestamp
Returns last time this document was updated.
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'app/models/document.rb', line 36 class Document < ApplicationRecord include Housekeeping include Shared::Identifiers include Shared::Notes include Shared::Tags include Shared::IsData include SoftValidation attr_accessor :initialize_start_page before_destroy :check_for_documentation, prepend: true has_many :documentation, dependent: :destroy, inverse_of: :document has_many :sources, through: :documentation, source_type: 'Source', source: 'documentation_object' has_attached_file :document_file, filename_cleaner: Utilities::CleanseFilename :document_file, content_type: ['application/octet-stream', 'application/pdf', 'text/plain', 'text/xml'] :document_file :document_file, greater_than: 1.bytes accepts_nested_attributes_for :documentation, allow_destroy: true, reject_if: :reject_documentation before_save :set_pdf_metadata, if: -> { ActiveSupport::Deprecation.silence do changed_attributes.include?('document_file_file_size') && document_file_content_type =~ /pdf/ end } def set_pages_by_start(sp = 1) write_attribute(:page_map, get_page_map(sp)) end def get_page_map(sp = 1) m = {} if page_total && sp (0..(page_total - 1)).each do |p| m[p + 1] = (p + sp.to_i).to_s end end m end # @return [Array] def pdf_page_for(printed_page) p = [] page_map.each do |pdf_page, v| p.push(pdf_page) if printed_page.to_s == v || v.include?(printed_page.to_s) end p end def set_page_map_page(index, page) return false if index.kind_of?(Array) && page.kind_of?(Array) return false if !index.kind_of?(Array) && (index.to_i > page_total) p = page_map [index].flatten.map(&:to_s).each do |i| if page.kind_of?(Array) p[i] = page.map(&:to_s) else p[i] = page.to_s end end update_attribute(:page_map, p) end def initialize_start_page=(value) write_attribute(:page_map, get_page_map(value)) @initialize_start_page = value end def pdftotext `pdftotext -layout #{document_file.path} -` end protected def check_for_documentation if documentation.count > 1 errors.add(:base, 'document is used in more than one place, remove documentation first') throw :abort end end def begin File.open(document_file.staged_path, 'rb') do |io| reader = PDF::Reader.new(io) write_attribute(:page_total, reader.page_count) end rescue PDF::Reader::MalformedPDFError errors.add(:base, 'pdf is malformed') rescue PDF::Reader::EncryptedPDFError errors.add(:base, 'pdf is encrypted') rescue PDF::Reader::UnsupportedFeatureError errors.add(:base, 'pdf contains features not supported by the software') end set_pages_by_start(initialize_start_page) if initialize_start_page end def reject_documentation(attributed) attributed['type'].blank? || attributed['documentation_object'].blank? && (attributed['documentation_object_id'].blank? && attributed['documentation_object_type'].blank?) end end |
#initialize_start_page ⇒ Object
Returns the value of attribute initialize_start_page.
44 45 46 |
# File 'app/models/document.rb', line 44 def initialize_start_page @initialize_start_page end |
#page_map ⇒ Hash
Returns a map of PDF page to printed page #, pages index starts at 1 behaviour:
if no integer exists for a PDF page then page is assumed to be the page # of the PDF (almost never the real case)
if an integer is provided it points to the page(s) represented in print
e.g.:
{ "1": "300",
"2": ["301", "302", "xi"]
}
mapping can be many to many:
{ "1": ["300", "301"]
"2": ["301"]
} ... printed page 301 is on pdf pages 1,2; page 1 contains printed pages 300, and part of 301.
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'app/models/document.rb', line 36 class Document < ApplicationRecord include Housekeeping include Shared::Identifiers include Shared::Notes include Shared::Tags include Shared::IsData include SoftValidation attr_accessor :initialize_start_page before_destroy :check_for_documentation, prepend: true has_many :documentation, dependent: :destroy, inverse_of: :document has_many :sources, through: :documentation, source_type: 'Source', source: 'documentation_object' has_attached_file :document_file, filename_cleaner: Utilities::CleanseFilename :document_file, content_type: ['application/octet-stream', 'application/pdf', 'text/plain', 'text/xml'] :document_file :document_file, greater_than: 1.bytes accepts_nested_attributes_for :documentation, allow_destroy: true, reject_if: :reject_documentation before_save :set_pdf_metadata, if: -> { ActiveSupport::Deprecation.silence do changed_attributes.include?('document_file_file_size') && document_file_content_type =~ /pdf/ end } def set_pages_by_start(sp = 1) write_attribute(:page_map, get_page_map(sp)) end def get_page_map(sp = 1) m = {} if page_total && sp (0..(page_total - 1)).each do |p| m[p + 1] = (p + sp.to_i).to_s end end m end # @return [Array] def pdf_page_for(printed_page) p = [] page_map.each do |pdf_page, v| p.push(pdf_page) if printed_page.to_s == v || v.include?(printed_page.to_s) end p end def set_page_map_page(index, page) return false if index.kind_of?(Array) && page.kind_of?(Array) return false if !index.kind_of?(Array) && (index.to_i > page_total) p = page_map [index].flatten.map(&:to_s).each do |i| if page.kind_of?(Array) p[i] = page.map(&:to_s) else p[i] = page.to_s end end update_attribute(:page_map, p) end def initialize_start_page=(value) write_attribute(:page_map, get_page_map(value)) @initialize_start_page = value end def pdftotext `pdftotext -layout #{document_file.path} -` end protected def check_for_documentation if documentation.count > 1 errors.add(:base, 'document is used in more than one place, remove documentation first') throw :abort end end def begin File.open(document_file.staged_path, 'rb') do |io| reader = PDF::Reader.new(io) write_attribute(:page_total, reader.page_count) end rescue PDF::Reader::MalformedPDFError errors.add(:base, 'pdf is malformed') rescue PDF::Reader::EncryptedPDFError errors.add(:base, 'pdf is encrypted') rescue PDF::Reader::UnsupportedFeatureError errors.add(:base, 'pdf contains features not supported by the software') end set_pages_by_start(initialize_start_page) if initialize_start_page end def reject_documentation(attributed) attributed['type'].blank? || attributed['documentation_object'].blank? && (attributed['documentation_object_id'].blank? && attributed['documentation_object_type'].blank?) end end |
Instance Method Details
#check_for_documentation ⇒ Object (protected)
118 119 120 121 122 123 |
# File 'app/models/document.rb', line 118 def check_for_documentation if documentation.count > 1 errors.add(:base, 'document is used in more than one place, remove documentation first') throw :abort end end |
#get_page_map(sp = 1) ⇒ Object
71 72 73 74 75 76 77 78 79 |
# File 'app/models/document.rb', line 71 def get_page_map(sp = 1) m = {} if page_total && sp (0..(page_total - 1)).each do |p| m[p + 1] = (p + sp.to_i).to_s end end m end |
#pdf_page_for(printed_page) ⇒ Array
82 83 84 85 86 87 88 |
# File 'app/models/document.rb', line 82 def pdf_page_for(printed_page) p = [] page_map.each do |pdf_page, v| p.push(pdf_page) if printed_page.to_s == v || v.include?(printed_page.to_s) end p end |
#pdftotext ⇒ Object
112 113 114 |
# File 'app/models/document.rb', line 112 def pdftotext `pdftotext -layout #{document_file.path} -` end |
#reject_documentation(attributed) ⇒ Object (protected)
141 142 143 |
# File 'app/models/document.rb', line 141 def reject_documentation(attributed) attributed['type'].blank? || attributed['documentation_object'].blank? && (attributed['documentation_object_id'].blank? && attributed['documentation_object_type'].blank?) end |
#set_page_map_page(index, page) ⇒ Object
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'app/models/document.rb', line 90 def set_page_map_page(index, page) return false if index.kind_of?(Array) && page.kind_of?(Array) return false if !index.kind_of?(Array) && (index.to_i > page_total) p = page_map [index].flatten.map(&:to_s).each do |i| if page.kind_of?(Array) p[i] = page.map(&:to_s) else p[i] = page.to_s end end update_attribute(:page_map, p) end |
#set_pages_by_start(sp = 1) ⇒ Object
67 68 69 |
# File 'app/models/document.rb', line 67 def set_pages_by_start(sp = 1) write_attribute(:page_map, get_page_map(sp)) end |
#set_pdf_metadata ⇒ Object (protected)
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'app/models/document.rb', line 125 def begin File.open(document_file.staged_path, 'rb') do |io| reader = PDF::Reader.new(io) write_attribute(:page_total, reader.page_count) end rescue PDF::Reader::MalformedPDFError errors.add(:base, 'pdf is malformed') rescue PDF::Reader::EncryptedPDFError errors.add(:base, 'pdf is encrypted') rescue PDF::Reader::UnsupportedFeatureError errors.add(:base, 'pdf contains features not supported by the software') end set_pages_by_start(initialize_start_page) if initialize_start_page end |