Module: Vendor::Serrano

Defined in:
lib/vendor/serrano.rb

Overview

A middle-layer wrapper between Serrano and TaxonWorks

Defined Under Namespace

Classes: CrossRefLaTeX

Constant Summary collapse

CUTOFF =
50.0

Class Method Summary collapse

Class Method Details

.bibtex_from_citproc(c, b) ⇒ Object



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/vendor/serrano.rb', line 112

def self.bibtex_from_citproc(c, b)
  return nil unless c.present? && b.present?
  c = JSON.parse(c)

  b[:address] = ::Utilities::Strings.encode_with_utf8(c['address']) unless c['address'].blank?
  #b[:author]
  b[:booktitle] = ::Utilities::Strings.encode_with_utf8(c['booktitle']) unless c['booktitle'].blank?
  b[:chapter] = c['chapter'] unless c['chapter'].blank?
  b[:edition] = c['edition'] unless c['edition'].blank?
  #b[:editor]
  b[:howpublished] = ::Utilities::Strings.encode_with_utf8(c['how-published']) unless c['how-published'].blank?
  b[:institution] = ::Utilities::Strings.encode_with_utf8(c['institution']) unless c['institution'].blank?
  b[:journal] = ::Utilities::Strings.encode_with_utf8(c['container-title']) unless c['container-title'].blank?
  #b[:month]
  b[:note] = ::Utilities::Strings.encode_with_utf8(c['note']) unless c['note'].blank?
  #b[:number] = c['number'] unless c['volume'].blank?
  b[:organization] = ::Utilities::Strings.encode_with_utf8(c['organization']) unless c['organization'].blank?
  b[:pages] = c['page'] unless c['page'].blank?
  b[:publisher] = ::Utilities::Strings.encode_with_utf8(c['publisher']) unless c['publisher'].blank?
  b[:school] = ::Utilities::Strings.encode_with_utf8(c['school']) unless c['school'].blank?
  b[:series] = c['series'] unless c['series'].blank?
  #b[:title] = c['title']
  # b[:typeb]  # "Source::Bibtex"
  #b[:volume] = c['volume'] unless c['volume'].blank?
  #b[:doi] = c['DOI']
  unless c['abstract'].blank?
    b[:abstract] = ::Utilities::Strings.encode_with_utf8(c['abstract']).
      gsub('</jats:p>', '').
      gsub('<jats:p>', '').
      gsub('</jats:sec>', '').
      gsub('<jats:sec>', '').
      gsub('</jats:title>', '</b>').
      gsub('<jats:title>', '<b>').
      gsub('</jats:italic>', '</i>').
      gsub('<jats:italic>', '<i>').
      gsub('</jats:bold>', '</b>').
      gsub('<jats:bold>', '<b>')
  end
  b[:copyright] = c['copyright'] unless c['copyright'].blank?
  #b[:bibtex_type] = c['type'] unless c['type'].blank?
  b[:day]  = c['issued']['date-parts'][0][2] if c['issued'] && c['issued']['date-parts'] && c['issued']['date-parts'][0] && c['issued']['date-parts'][0][2]
  b[:year] = c['issued']['date-parts'][0][0] if c['issued'] && c['issued']['date-parts'] && c['issued']['date-parts'][0] && c['issued']['date-parts'][0][0]
  b[:isbn] = c['ISBN'].first unless c['ISBN'].blank?
  b[:issn] = c['ISSN'].first unless c['ISSN'].blank?
  #b[:translator]
  #b[:url]
  b[:serial_id] = Serial.where(name: b[:journal]).first.try(:id) unless b[:journal].blank?
  b
end

.citation_is_valid_doi?(citation) ⇒ Boolean

Returns Boolean use our global identifier class to determined if value is DOI this isn’t super robust, but maybe OK.

Returns:

  • (Boolean)

    Boolean use our global identifier class to determined if value is DOI this isn’t super robust, but maybe OK



176
177
178
179
180
# File 'lib/vendor/serrano.rb', line 176

def self.citation_is_valid_doi?(citation)
  doi = Identifier::Global::Doi.new(identifier: citation)
  doi.valid?
  !doi.errors.has_key?(:identifier)
end

.cutoffFloat

Returns:

  • (Float)


8
9
10
# File 'lib/vendor/serrano.rb', line 8

def self.cutoff
  CUTOFF
end

.get_bibtex_string(citation, format = 'bibtex') ⇒ String?

Returns ; format == ‘bibtex’ or ‘citeproc’.

Returns:

  • (String, nil)

    ; format == ‘bibtex’ or ‘citeproc’



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/vendor/serrano.rb', line 86

def self.get_bibtex_string(citation, format = 'bibtex')
  begin
    # Convert citation to DOI if it isn't already
    if !citation_is_valid_doi?(citation)
      # First item should be the one with highest score/relevance: https://github.com/CrossRef/rest-api-doc#sort-order
      res = ::Serrano.works(query: citation, limit: 1)&.dig("message", "items")&.first
      # citation = Serrano.works(query: citation)&.dig("message", "items")&.max_by { |i| i["score"] }&.dig("DOI") unless citation_is_valid_doi?(citation)

      score = res&.dig("score") || -1.0
      citation = (score >= CUTOFF) ? res&.dig("DOI") : nil
    end

    if format == 'bibtex'
      bibtex = ::Serrano.content_negotiation(ids: unurize_doi(citation), format: "bibtex") unless citation.nil?
      return bibtex =~ /^\s*@/ ? bibtex : nil
    elsif format == 'citeproc'
      citeproc = ::Serrano.content_negotiation(ids: unurize_doi(citation), format: "citeproc-json") unless citation.nil?
      return citeproc
    else
      return nil
    end
  rescue
    return nil
  end
end

.new_from_citation(citation: nil) ⇒ Source::BibTex.new, ...

TODO: attempt to extract DOI from full string

Four possible paths: 1) citation. 2) citation which includes a doi. 3) naked doi, e.g., ‘10.3897/zookeys.20.205’. 4) doi with preamble, e.g., ‘dx.doi.org/10.3897/zookeys.20.205’ or

'https://doi.org/10.3897/zookeys.20.205'.

Returns:

  • (Source::BibTex.new)

    a new instance

  • (Source::Verbatim.new)

    a new instance

  • (false)


49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/vendor/serrano.rb', line 49

def self.new_from_citation(citation: nil)
  citation&.strip!
  return false if citation.length < 6

# begin

    # check string encoding, if not UTF-8, check if compatible with UTF-8,
    # if so convert to UTF-8 and parse with latex, else use type verbatim
    a = get_bibtex_string(citation, 'bibtex')

    b = ::Utilities::Strings.encode_with_utf8(a) if a

    if b

      begin
        bibtex = Source::Bibtex.new_from_bibtex(BibTeX::Bibliography.parse(b, filter: CrossRefLaTeX.instance).first)
      rescue BibTeX::ParseError => e
        # Handle year not being parsable but otherwise OK
        unless e.message.include?('Failed to parse BibTeX on value "year"')
          raise e
        end
        true
      end

      citeproc = get_bibtex_string(citation, 'citeproc')
      bibtex_from_citproc(citeproc, bibtex)
    else
      Source::Verbatim.new(verbatim: a ? a : citation)
    end

#   rescue BibTeX::ParseError
#     return false
#   end

end

.unurize_doi(doi) ⇒ String

Returns:

  • (String)


163
164
165
166
167
168
169
170
171
# File 'lib/vendor/serrano.rb', line 163

def self.unurize_doi(doi)
  doi = doi.strip

  if matches = doi.match(/https?:\/\/[^\/]+\/(.*)/)
    matches[1]
  else
    doi
  end
end