Module: TaxonWorks::Vendor::Serrano
- Defined in:
- lib/vendor/serrano.rb
Overview
A middle-layer wrapper between Serrano and TaxonWorks
Defined Under Namespace
Classes: CrossRefLaTeX
Constant Summary collapse
- CUTOFF =
50.0
Class Method Summary collapse
- .bibtex_from_citproc(c, b) ⇒ Object
-
.citation_is_valid_doi?(citation) ⇒ Boolean
Boolean use our global identifier class to determined if value is DOI this isn't super robust, but maybe OK.
- .cutoff ⇒ Float
-
.get_bibtex_string(citation, format = 'bibtex') ⇒ String?
; format == 'bibtex' or 'citeproc'.
-
.new_from_citation(citation: nil) ⇒ Source::BibTex.new, ...
TODO: attempt to extract DOI from full string.
- .unurize_doi(doi) ⇒ String
Class Method Details
.bibtex_from_citproc(c, b) ⇒ Object
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
# File 'lib/vendor/serrano.rb', line 93 def self.bibtex_from_citproc(c, b) return nil unless c.present? && b.present? c = JSON.parse(c) b[:address] = ::Utilities::Strings.encode_with_utf8(c['address']) unless c['address'].blank? #b[:author] b[:booktitle] = ::Utilities::Strings.encode_with_utf8(c['booktitle']) unless c['booktitle'].blank? b[:chapter] = c['chapter'] unless c['chapter'].blank? b[:edition] = c['edition'] unless c['edition'].blank? #b[:editor] b[:howpublished] = ::Utilities::Strings.encode_with_utf8(c['how-published']) unless c['how-published'].blank? b[:institution] = ::Utilities::Strings.encode_with_utf8(c['institution']) unless c['institution'].blank? b[:journal] = ::Utilities::Strings.encode_with_utf8(c['container-title']) unless c['container-title'].blank? #b[:month] b[:note] = ::Utilities::Strings.encode_with_utf8(c['note']) unless c['note'].blank? #b[:number] = c['number'] unless c['volume'].blank? b[:organization] = ::Utilities::Strings.encode_with_utf8(c['organization']) unless c['organization'].blank? b[:pages] = c['page'] unless c['page'].blank? b[:publisher] = ::Utilities::Strings.encode_with_utf8(c['publisher']) unless c['publisher'].blank? b[:school] = ::Utilities::Strings.encode_with_utf8(c['school']) unless c['school'].blank? b[:series] = c['series'] unless c['series'].blank? #b[:title] = c['title'] # b[:typeb] # "Source::Bibtex" #b[:volume] = c['volume'] unless c['volume'].blank? #b[:doi] = c['DOI'] unless c['abstract'].blank? b[:abstract] = ::Utilities::Strings.encode_with_utf8(c['abstract']). gsub('</jats:p>', ''). gsub('<jats:p>', ''). gsub('</jats:sec>', ''). gsub('<jats:sec>', ''). gsub('</jats:title>', '</b>'). gsub('<jats:title>', '<b>'). gsub('</jats:italic>', '</i>'). gsub('<jats:italic>', '<i>'). gsub('</jats:bold>', '</b>'). gsub('<jats:bold>', '<b>') end b[:copyright] = c['copyright'] unless c['copyright'].blank? #b[:bibtex_type] = c['type'] unless c['type'].blank? b[:day] = c['issued']['date-parts'][0][2] if c['issued'] && c['issued']['date-parts'] && c['issued']['date-parts'][0] && c['issued']['date-parts'][0][2] b[:year] = c['issued']['date-parts'][0][0] if c['issued'] && c['issued']['date-parts'] && c['issued']['date-parts'][0] && c['issued']['date-parts'][0][0] b[:isbn] = c['ISBN'].first unless c['ISBN'].blank? b[:issn] = c['ISSN'].first unless c['ISSN'].blank? #b[:translator] #b[:url] b[:serial_id] = Serial.where(name: b[:journal]).first.try(:id) unless b[:journal].blank? b end |
.citation_is_valid_doi?(citation) ⇒ Boolean
Returns Boolean use our global identifier class to determined if value is DOI this isn't super robust, but maybe OK.
157 158 159 160 161 |
# File 'lib/vendor/serrano.rb', line 157 def self.citation_is_valid_doi?(citation) doi = Identifier::Global::Doi.new(identifier: citation) doi.valid? !doi.errors.has_key?(:identifier) end |
.get_bibtex_string(citation, format = 'bibtex') ⇒ String?
Returns ; format == 'bibtex' or 'citeproc'.
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/vendor/serrano.rb', line 67 def self.get_bibtex_string(citation, format = 'bibtex') begin # Convert citation to DOI if it isn't already if !citation_is_valid_doi?(citation) # First item should be the one with highest score/relevance: https://github.com/CrossRef/rest-api-doc#sort-order res = ::Serrano.works(query: citation, limit: 1)&.dig("message", "items")&.first # citation = Serrano.works(query: citation)&.dig("message", "items")&.max_by { |i| i["score"] }&.dig("DOI") unless citation_is_valid_doi?(citation) score = res&.dig("score") || -1.0 citation = (score >= CUTOFF) ? res&.dig("DOI") : nil end if format == 'bibtex' bibtex = ::Serrano.content_negotiation(ids: unurize_doi(citation), format: "bibtex") unless citation.nil? return bibtex =~ /^\s*@/ ? bibtex : nil elsif format == 'citeproc' citeproc = ::Serrano.content_negotiation(ids: unurize_doi(citation), format: "citeproc-json") unless citation.nil? return citeproc else return nil end rescue return nil end end |
.new_from_citation(citation: nil) ⇒ Source::BibTex.new, ...
TODO: attempt to extract DOI from full string
Four possible paths: 1) citation. 2) citation which includes a doi. 3) naked doi, e.g., '10.3897/zookeys.20.205'. 4) doi with preamble, e.g., 'dx.doi.org/10.3897/zookeys.20.205' or
'https://doi.org/10.3897/zookeys.20.205'.
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/vendor/serrano.rb', line 50 def self.new_from_citation(citation: nil) citation&.strip! return false if citation.length < 6 # check string encoding, if not UTF-8, check if compatible with UTF-8, # if so convert to UTF-8 and parse with latex, else use type verbatim a = get_bibtex_string(citation, 'bibtex') b = ::Utilities::Strings.encode_with_utf8(a) if a if b bibtex = Source::Bibtex.new_from_bibtex(BibTeX::Bibliography.parse(b, filter: CrossRefLaTeX.instance).first) citeproc = get_bibtex_string(citation, 'citeproc') bibtex_from_citproc(citeproc, bibtex) else Source::Verbatim.new(verbatim: a ? a : citation) end end |
.unurize_doi(doi) ⇒ String
144 145 146 147 148 149 150 151 152 |
# File 'lib/vendor/serrano.rb', line 144 def self.unurize_doi(doi) doi = doi.strip if matches = doi.match(/https?:\/\/[^\/]+\/(.*)/) matches[1] else doi end end |