Module: Vendor::Serrano
- Defined in:
- lib/vendor/serrano.rb
Overview
A middle-layer wrapper between Serrano and TaxonWorks
Defined Under Namespace
Classes: CrossRefLaTeX, CrossrefBibtexParseError
Constant Summary collapse
- CUTOFF =
50.0
Class Method Summary collapse
- .bibtex_from_citeproc(c, b) ⇒ Object
-
.citation_is_valid_doi?(citation) ⇒ Boolean
Boolean use our global identifier class to determined if value is DOI this isn’t super robust, but maybe OK.
- .cutoff ⇒ Float
-
.get_bibtex_string(doi, format = 'bibtex') ⇒ String?
; format == ‘bibtex’ or ‘citeproc’.
-
.new_from_citation(citation: nil) ⇒ Source::BibTex.new, ...
TODO: attempt to extract DOI from full string.
- .resolve_doi(citation) ⇒ Object
- .unurize_doi(doi) ⇒ String
Class Method Details
.bibtex_from_citeproc(c, b) ⇒ Object
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
# File 'lib/vendor/serrano.rb', line 107 def self.bibtex_from_citeproc(c, b) return nil unless c.present? && b.present? c = JSON.parse(c) b[:address] = ::Utilities::Strings.encode_with_utf8(c['address']) unless c['address'].blank? #b[:author] b[:booktitle] = ::Utilities::Strings.encode_with_utf8(c['booktitle']) unless c['booktitle'].blank? b[:chapter] = c['chapter'] unless c['chapter'].blank? b[:edition] = c['edition'] unless c['edition'].blank? #b[:editor] b[:howpublished] = ::Utilities::Strings.encode_with_utf8(c['how-published']) unless c['how-published'].blank? b[:institution] = ::Utilities::Strings.encode_with_utf8(c['institution']) unless c['institution'].blank? b[:journal] = ::Utilities::Strings.encode_with_utf8(c['container-title']) unless c['container-title'].blank? #b[:month] b[:note] = ::Utilities::Strings.encode_with_utf8(c['note']) unless c['note'].blank? #b[:number] = c['number'] unless c['volume'].blank? b[:organization] = ::Utilities::Strings.encode_with_utf8(c['organization']) unless c['organization'].blank? b[:pages] = c['page'] unless c['page'].blank? b[:publisher] = ::Utilities::Strings.encode_with_utf8(c['publisher']) unless c['publisher'].blank? b[:school] = ::Utilities::Strings.encode_with_utf8(c['school']) unless c['school'].blank? b[:series] = c['series'] unless c['series'].blank? #b[:title] = c['title'] # b[:typeb] # "Source::Bibtex" #b[:volume] = c['volume'] unless c['volume'].blank? #b[:doi] = c['DOI'] unless c['abstract'].blank? b[:abstract] = ::Utilities::Strings.encode_with_utf8(c['abstract']). gsub('</jats:p>', ''). gsub('<jats:p>', ''). gsub('</jats:sec>', ''). gsub('<jats:sec>', ''). gsub('</jats:title>', '</b>'). gsub('<jats:title>', '<b>'). gsub('</jats:italic>', '</i>'). gsub('<jats:italic>', '<i>'). gsub('</jats:bold>', '</b>'). gsub('<jats:bold>', '<b>') end b[:copyright] = c['copyright'] unless c['copyright'].blank? #b[:bibtex_type] = c['type'] unless c['type'].blank? b[:day] = c['issued']['date-parts'][0][2] if c['issued'] && c['issued']['date-parts'] && c['issued']['date-parts'][0] && c['issued']['date-parts'][0][2] b[:year] = c['issued']['date-parts'][0][0] if c['issued'] && c['issued']['date-parts'] && c['issued']['date-parts'][0] && c['issued']['date-parts'][0][0] b[:isbn] = c['ISBN'].first unless c['ISBN'].blank? b[:issn] = c['ISSN'].first unless c['ISSN'].blank? #b[:translator] #b[:url] b[:serial_id] = Serial.where(name: b[:journal]).first.try(:id) unless b[:journal].blank? b end |
.citation_is_valid_doi?(citation) ⇒ Boolean
Returns Boolean use our global identifier class to determined if value is DOI this isn’t super robust, but maybe OK.
171 172 173 174 175 |
# File 'lib/vendor/serrano.rb', line 171 def self.citation_is_valid_doi?(citation) doi = Identifier::Global::Doi.new(identifier: citation) doi.valid? !doi.errors.has_key?(:identifier) end |
.get_bibtex_string(doi, format = 'bibtex') ⇒ String?
Returns ; format == ‘bibtex’ or ‘citeproc’.
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/vendor/serrano.rb', line 91 def self.get_bibtex_string(doi, format = 'bibtex') begin if format == 'bibtex' bibtex = ::Serrano.content_negotiation(ids: unurize_doi(doi), format: "bibtex") unless doi.nil? return bibtex =~ /\A\s*@/ ? bibtex : nil elsif format == 'citeproc' citeproc = ::Serrano.content_negotiation(ids: unurize_doi(doi), format: "citeproc-json") unless doi.nil? return citeproc else return nil end rescue return nil end end |
.new_from_citation(citation: nil) ⇒ Source::BibTex.new, ...
TODO: attempt to extract DOI from full string
Four possible paths: 1) citation. 2) citation which includes a doi. 3) naked doi, e.g., ‘10.3897/zookeys.20.205’. 4) doi with preamble, e.g., ‘dx.doi.org/10.3897/zookeys.20.205’ or
'https://doi.org/10.3897/zookeys.20.205'.
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
# File 'lib/vendor/serrano.rb', line 59 def self.new_from_citation(citation: nil) citation&.strip! return false if citation.length < 6 doi = citation_is_valid_doi?(citation) ? citation : resolve_doi(citation) return Source::Verbatim.new(verbatim: citation) if doi.nil? # check string encoding, if not UTF-8, check if compatible with UTF-8, # if so convert to UTF-8 and parse with latex, else use type verbatim a = get_bibtex_string(doi, 'bibtex') b = ::Utilities::Strings.encode_with_utf8(a) if a if b begin bibtex = Source::Bibtex.new_from_bibtex(BibTeX::Bibliography.parse(b, filter: CrossRefLaTeX.instance).first) rescue BibTeX::ParseError => e # Handle year not being parsable but otherwise OK unless e..include?('Failed to parse BibTeX on value "year"') raise CrossrefBibtexParseError.new(doi: doi, bibtex: b, message: e.) end true end citeproc = get_bibtex_string(doi, 'citeproc') bibtex_from_citeproc(citeproc, bibtex) else Source::Verbatim.new(verbatim: a ? a : citation) end end |
.resolve_doi(citation) ⇒ Object
177 178 179 180 181 182 |
# File 'lib/vendor/serrano.rb', line 177 def self.resolve_doi(citation) # First item should be the one with highest score/relevance: https://github.com/CrossRef/rest-api-doc#sort-order res = ::Serrano.works(query: citation, limit: 1)&.dig('message', 'items')&.first score = res&.dig('score') || -1.0 score >= CUTOFF ? res&.dig('DOI') : nil end |
.unurize_doi(doi) ⇒ String
158 159 160 161 162 163 164 165 166 |
# File 'lib/vendor/serrano.rb', line 158 def self.unurize_doi(doi) doi = doi.strip if matches = doi.match(/https?:\/\/[^\/]+\/(.*)/) matches[1] else doi end end |