Module: Vendor::Serrano

Defined in:
lib/vendor/serrano.rb

Overview

A middle-layer wrapper between Serrano and TaxonWorks

Defined Under Namespace

Classes: CrossRefLaTeX, CrossrefBibtexParseError

Constant Summary collapse

CUTOFF =
50.0

Class Method Summary collapse

Class Method Details

.bibtex_from_citeproc(c, b) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/vendor/serrano.rb', line 107

def self.bibtex_from_citeproc(c, b)
  return nil unless c.present? && b.present?
  c = JSON.parse(c)

  b[:address] = ::Utilities::Strings.encode_with_utf8(c['address']) unless c['address'].blank?
  #b[:author]
  b[:booktitle] = ::Utilities::Strings.encode_with_utf8(c['booktitle']) unless c['booktitle'].blank?
  b[:chapter] = c['chapter'] unless c['chapter'].blank?
  b[:edition] = c['edition'] unless c['edition'].blank?
  #b[:editor]
  b[:howpublished] = ::Utilities::Strings.encode_with_utf8(c['how-published']) unless c['how-published'].blank?
  b[:institution] = ::Utilities::Strings.encode_with_utf8(c['institution']) unless c['institution'].blank?
  b[:journal] = ::Utilities::Strings.encode_with_utf8(c['container-title']) unless c['container-title'].blank?
  #b[:month]
  b[:note] = ::Utilities::Strings.encode_with_utf8(c['note']) unless c['note'].blank?
  #b[:number] = c['number'] unless c['volume'].blank?
  b[:organization] = ::Utilities::Strings.encode_with_utf8(c['organization']) unless c['organization'].blank?
  b[:pages] = c['page'] unless c['page'].blank?
  b[:publisher] = ::Utilities::Strings.encode_with_utf8(c['publisher']) unless c['publisher'].blank?
  b[:school] = ::Utilities::Strings.encode_with_utf8(c['school']) unless c['school'].blank?
  b[:series] = c['series'] unless c['series'].blank?
  #b[:title] = c['title']
  # b[:typeb]  # "Source::Bibtex"
  #b[:volume] = c['volume'] unless c['volume'].blank?
  #b[:doi] = c['DOI']
  unless c['abstract'].blank?
    b[:abstract] = ::Utilities::Strings.encode_with_utf8(c['abstract']).
      gsub('</jats:p>', '').
      gsub('<jats:p>', '').
      gsub('</jats:sec>', '').
      gsub('<jats:sec>', '').
      gsub('</jats:title>', '</b>').
      gsub('<jats:title>', '<b>').
      gsub('</jats:italic>', '</i>').
      gsub('<jats:italic>', '<i>').
      gsub('</jats:bold>', '</b>').
      gsub('<jats:bold>', '<b>')
  end
  b[:copyright] = c['copyright'] unless c['copyright'].blank?
  #b[:bibtex_type] = c['type'] unless c['type'].blank?
  b[:day]  = c['issued']['date-parts'][0][2] if c['issued'] && c['issued']['date-parts'] && c['issued']['date-parts'][0] && c['issued']['date-parts'][0][2]
  b[:year] = c['issued']['date-parts'][0][0] if c['issued'] && c['issued']['date-parts'] && c['issued']['date-parts'][0] && c['issued']['date-parts'][0][0]
  b[:isbn] = c['ISBN'].first unless c['ISBN'].blank?
  b[:issn] = c['ISSN'].first unless c['ISSN'].blank?
  #b[:translator]
  #b[:url]
  b[:serial_id] = Serial.where(name: b[:journal]).first.try(:id) unless b[:journal].blank?
  b
end

.citation_is_valid_doi?(citation) ⇒ Boolean

Returns Boolean use our global identifier class to determined if value is DOI this isn’t super robust, but maybe OK.

Returns:

  • (Boolean)

    Boolean use our global identifier class to determined if value is DOI this isn’t super robust, but maybe OK



171
172
173
174
175
# File 'lib/vendor/serrano.rb', line 171

def self.citation_is_valid_doi?(citation)
  doi = Identifier::Global::Doi.new(identifier: citation)
  doi.valid?
  !doi.errors.has_key?(:identifier)
end

.cutoffFloat

Returns:

  • (Float)


18
19
20
# File 'lib/vendor/serrano.rb', line 18

def self.cutoff
  CUTOFF
end

.get_bibtex_string(doi, format = 'bibtex') ⇒ String?

Returns ; format == ‘bibtex’ or ‘citeproc’.

Returns:

  • (String, nil)

    ; format == ‘bibtex’ or ‘citeproc’



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/vendor/serrano.rb', line 91

def self.get_bibtex_string(doi, format = 'bibtex')
  begin
    if format == 'bibtex'
      bibtex = ::Serrano.content_negotiation(ids: unurize_doi(doi), format: "bibtex") unless doi.nil?
      return bibtex =~ /\A\s*@/ ? bibtex : nil
    elsif format == 'citeproc'
      citeproc = ::Serrano.content_negotiation(ids: unurize_doi(doi), format: "citeproc-json") unless doi.nil?
      return citeproc
    else
      return nil
    end
  rescue
    return nil
  end
end

.new_from_citation(citation: nil) ⇒ Source::BibTex.new, ...

TODO: attempt to extract DOI from full string

Four possible paths: 1) citation. 2) citation which includes a doi. 3) naked doi, e.g., ‘10.3897/zookeys.20.205’. 4) doi with preamble, e.g., ‘dx.doi.org/10.3897/zookeys.20.205’ or

'https://doi.org/10.3897/zookeys.20.205'.

Returns:

  • (Source::BibTex.new)

    a new instance

  • (Source::Verbatim.new)

    a new instance

  • (false)


59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/vendor/serrano.rb', line 59

def self.new_from_citation(citation: nil)
  citation&.strip!
  return false if citation.length < 6

  doi = citation_is_valid_doi?(citation) ? citation : resolve_doi(citation)
  return Source::Verbatim.new(verbatim: citation) if doi.nil?

  # check string encoding, if not UTF-8, check if compatible with UTF-8,
  # if so convert to UTF-8 and parse with latex, else use type verbatim
  a = get_bibtex_string(doi, 'bibtex')

  b = ::Utilities::Strings.encode_with_utf8(a) if a

  if b
    begin
      bibtex = Source::Bibtex.new_from_bibtex(BibTeX::Bibliography.parse(b, filter: CrossRefLaTeX.instance).first)
    rescue BibTeX::ParseError => e
      # Handle year not being parsable but otherwise OK
      unless e.message.include?('Failed to parse BibTeX on value "year"')
        raise CrossrefBibtexParseError.new(doi: doi, bibtex: b, message: e.message)
      end
      true
    end

    citeproc = get_bibtex_string(doi, 'citeproc')
    bibtex_from_citeproc(citeproc, bibtex)
  else
    Source::Verbatim.new(verbatim: a ? a : citation)
  end
end

.resolve_doi(citation) ⇒ Object



177
178
179
180
181
182
# File 'lib/vendor/serrano.rb', line 177

def self.resolve_doi(citation)
  # First item should be the one with highest score/relevance: https://github.com/CrossRef/rest-api-doc#sort-order
  res = ::Serrano.works(query: citation, limit: 1)&.dig('message', 'items')&.first
  score = res&.dig('score') || -1.0
  score >= CUTOFF ? res&.dig('DOI') : nil
end

.unurize_doi(doi) ⇒ String

Returns:

  • (String)


158
159
160
161
162
163
164
165
166
# File 'lib/vendor/serrano.rb', line 158

def self.unurize_doi(doi)
  doi = doi.strip

  if matches = doi.match(/https?:\/\/[^\/]+\/(.*)/)
    matches[1]
  else
    doi
  end
end