Module: Vendor::Serrano
- Defined in:
- lib/vendor/serrano.rb
Overview
A middle-layer wrapper between Serrano and TaxonWorks
Defined Under Namespace
Classes: CrossRefLaTeX
Constant Summary collapse
- CUTOFF =
50.0
Class Method Summary collapse
- .bibtex_from_citproc(c, b) ⇒ Object
-
.citation_is_valid_doi?(citation) ⇒ Boolean
Boolean use our global identifier class to determined if value is DOI this isn’t super robust, but maybe OK.
- .cutoff ⇒ Float
-
.get_bibtex_string(citation, format = 'bibtex') ⇒ String?
; format == ‘bibtex’ or ‘citeproc’.
-
.new_from_citation(citation: nil) ⇒ Source::BibTex.new, ...
TODO: attempt to extract DOI from full string.
- .unurize_doi(doi) ⇒ String
Class Method Details
.bibtex_from_citproc(c, b) ⇒ Object
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'lib/vendor/serrano.rb', line 112 def self.bibtex_from_citproc(c, b) return nil unless c.present? && b.present? c = JSON.parse(c) b[:address] = ::Utilities::Strings.encode_with_utf8(c['address']) unless c['address'].blank? #b[:author] b[:booktitle] = ::Utilities::Strings.encode_with_utf8(c['booktitle']) unless c['booktitle'].blank? b[:chapter] = c['chapter'] unless c['chapter'].blank? b[:edition] = c['edition'] unless c['edition'].blank? #b[:editor] b[:howpublished] = ::Utilities::Strings.encode_with_utf8(c['how-published']) unless c['how-published'].blank? b[:institution] = ::Utilities::Strings.encode_with_utf8(c['institution']) unless c['institution'].blank? b[:journal] = ::Utilities::Strings.encode_with_utf8(c['container-title']) unless c['container-title'].blank? #b[:month] b[:note] = ::Utilities::Strings.encode_with_utf8(c['note']) unless c['note'].blank? #b[:number] = c['number'] unless c['volume'].blank? b[:organization] = ::Utilities::Strings.encode_with_utf8(c['organization']) unless c['organization'].blank? b[:pages] = c['page'] unless c['page'].blank? b[:publisher] = ::Utilities::Strings.encode_with_utf8(c['publisher']) unless c['publisher'].blank? b[:school] = ::Utilities::Strings.encode_with_utf8(c['school']) unless c['school'].blank? b[:series] = c['series'] unless c['series'].blank? #b[:title] = c['title'] # b[:typeb] # "Source::Bibtex" #b[:volume] = c['volume'] unless c['volume'].blank? #b[:doi] = c['DOI'] unless c['abstract'].blank? b[:abstract] = ::Utilities::Strings.encode_with_utf8(c['abstract']). gsub('</jats:p>', ''). gsub('<jats:p>', ''). gsub('</jats:sec>', ''). gsub('<jats:sec>', ''). gsub('</jats:title>', '</b>'). gsub('<jats:title>', '<b>'). gsub('</jats:italic>', '</i>'). gsub('<jats:italic>', '<i>'). gsub('</jats:bold>', '</b>'). gsub('<jats:bold>', '<b>') end b[:copyright] = c['copyright'] unless c['copyright'].blank? #b[:bibtex_type] = c['type'] unless c['type'].blank? b[:day] = c['issued']['date-parts'][0][2] if c['issued'] && c['issued']['date-parts'] && c['issued']['date-parts'][0] && c['issued']['date-parts'][0][2] b[:year] = c['issued']['date-parts'][0][0] if c['issued'] && c['issued']['date-parts'] && c['issued']['date-parts'][0] && c['issued']['date-parts'][0][0] b[:isbn] = c['ISBN'].first unless c['ISBN'].blank? b[:issn] = c['ISSN'].first unless c['ISSN'].blank? #b[:translator] #b[:url] b[:serial_id] = Serial.where(name: b[:journal]).first.try(:id) unless b[:journal].blank? b end |
.citation_is_valid_doi?(citation) ⇒ Boolean
Returns Boolean use our global identifier class to determined if value is DOI this isn’t super robust, but maybe OK.
176 177 178 179 180 |
# File 'lib/vendor/serrano.rb', line 176 def self.citation_is_valid_doi?(citation) doi = Identifier::Global::Doi.new(identifier: citation) doi.valid? !doi.errors.has_key?(:identifier) end |
.get_bibtex_string(citation, format = 'bibtex') ⇒ String?
Returns ; format == ‘bibtex’ or ‘citeproc’.
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# File 'lib/vendor/serrano.rb', line 86 def self.get_bibtex_string(citation, format = 'bibtex') begin # Convert citation to DOI if it isn't already if !citation_is_valid_doi?(citation) # First item should be the one with highest score/relevance: https://github.com/CrossRef/rest-api-doc#sort-order res = ::Serrano.works(query: citation, limit: 1)&.dig("message", "items")&.first # citation = Serrano.works(query: citation)&.dig("message", "items")&.max_by { |i| i["score"] }&.dig("DOI") unless citation_is_valid_doi?(citation) score = res&.dig("score") || -1.0 citation = (score >= CUTOFF) ? res&.dig("DOI") : nil end if format == 'bibtex' bibtex = ::Serrano.content_negotiation(ids: unurize_doi(citation), format: "bibtex") unless citation.nil? return bibtex =~ /^\s*@/ ? bibtex : nil elsif format == 'citeproc' citeproc = ::Serrano.content_negotiation(ids: unurize_doi(citation), format: "citeproc-json") unless citation.nil? return citeproc else return nil end rescue return nil end end |
.new_from_citation(citation: nil) ⇒ Source::BibTex.new, ...
TODO: attempt to extract DOI from full string
Four possible paths: 1) citation. 2) citation which includes a doi. 3) naked doi, e.g., ‘10.3897/zookeys.20.205’. 4) doi with preamble, e.g., ‘dx.doi.org/10.3897/zookeys.20.205’ or
'https://doi.org/10.3897/zookeys.20.205'.
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/vendor/serrano.rb', line 49 def self.new_from_citation(citation: nil) citation&.strip! return false if citation.length < 6 # begin # check string encoding, if not UTF-8, check if compatible with UTF-8, # if so convert to UTF-8 and parse with latex, else use type verbatim a = get_bibtex_string(citation, 'bibtex') b = ::Utilities::Strings.encode_with_utf8(a) if a if b begin bibtex = Source::Bibtex.new_from_bibtex(BibTeX::Bibliography.parse(b, filter: CrossRefLaTeX.instance).first) rescue BibTeX::ParseError => e # Handle year not being parsable but otherwise OK unless e..include?('Failed to parse BibTeX on value "year"') raise e end true end citeproc = get_bibtex_string(citation, 'citeproc') bibtex_from_citproc(citeproc, bibtex) else Source::Verbatim.new(verbatim: a ? a : citation) end # rescue BibTeX::ParseError # return false # end end |
.unurize_doi(doi) ⇒ String
163 164 165 166 167 168 169 170 171 |
# File 'lib/vendor/serrano.rb', line 163 def self.unurize_doi(doi) doi = doi.strip if matches = doi.match(/https?:\/\/[^\/]+\/(.*)/) matches[1] else doi end end |