Module: Vendor::NexusParser

Defined in:
lib/vendor/nexus_parser.rb

Class Method Summary collapse

Class Method Details

.assign_gap_names(nf) ⇒ Object

Assign name ‘gap’ to all gap states - nexus_parser outputs gap states that have no name, but TW requires a name. Raises on error.



51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/vendor/nexus_parser.rb', line 51

def self.assign_gap_names(nf)
  gap_label = nf&.vars[:gap]
  if gap_label.nil?
    return
  end

  nf.characters.each_with_index do |c, i|
    if c.state_labels.include? gap_label
      c.states[gap_label].name = gap_name_for_states(c.states, i)
    end
  end
end

.document_id_to_nexus(doc_id) ⇒ Object

Raises on error



3
4
5
6
# File 'lib/vendor/nexus_parser.rb', line 3

def self.document_id_to_nexus(doc_id)
  nexus_doc = Document.find(doc_id)
  document_to_nexus(nexus_doc)
end

.document_to_nexus(doc) ⇒ Object

Raises on error



9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/vendor/nexus_parser.rb', line 9

def self.document_to_nexus(doc)
  f = File.read(doc.document_file.path)
  nf = parse_nexus_file(f)

  assign_gap_names(nf)

  fixup_and_validate_characters_and_states(nf.characters,
    doc.document_file_file_name)

  nil_squish_strip_names(nf)

  nf
end

.find_duplicates(arr) ⇒ Object



90
91
92
93
94
95
96
97
# File 'lib/vendor/nexus_parser.rb', line 90

def self.find_duplicates(arr)
  # https://stackoverflow.com/a/786976
  s = Set.new
  dups = Set.new
  arr.each { |o| dups.add(o) unless s.add?(o) }

  dups.to_a
end

.fixup_and_validate_characters_and_states(characters, file_name) ⇒ Object

Raises on error



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/vendor/nexus_parser.rb', line 24

def self.fixup_and_validate_characters_and_states(characters, file_name)
  characters.each_with_index do |c, i|
    if c.name.nil? || c.name == 'Undefined' # nexus_parser special string
      c.name = "Undefined (#{i + 1}) from (#{file_name})"
    end

    # It shouldn't be possible to have duplicate state labels (right?) since
    # they're assigned sequentially, but nexus_parser does allow duplicate
    # and empty state names.
    state_names = []
    c.states.each do |label, state|
      if state.name == ''
        state.name = "Undefined (#{label}) from (#{file_name})"
      end
      state_names << state.name
    end

    dup_names = find_duplicates(state_names)
    if dup_names.present?
      dups = dup_names.join(', ')
      raise TaxonWorks::Error, "Error in character #{i + 1}: duplicate state name(s): '#{dups}'. In TaxonWorks character state names must be unique for a given descriptor."
    end
  end
end

.gap_name_for_states(states, i) ⇒ Object



64
65
66
67
68
69
70
71
# File 'lib/vendor/nexus_parser.rb', line 64

def self.gap_name_for_states(states, i)
  state_names = states.map { |k, v| v.name }
  if !state_names.include?('gap')
    return 'gap'
  else
    raise TaxonWorks::Error, "Nexus character #{i + 1} contains a gap state and a character state named 'gap', please rename the character state"
  end
end

.nil_squish_strip_names(nf) ⇒ Object

TW squishes names before saving to the db, incoming nexus names are unsquished - squish incoming so they match what might already be in the db.



75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/vendor/nexus_parser.rb', line 75

def self.nil_squish_strip_names(nf)
  nf.characters.each { |c|
    c.name = Utilities::Rails::Strings.nil_squish_strip(c.name)

    c.states.keys.each { |k|
      c.states[k].name =
        Utilities::Rails::Strings.nil_squish_strip(c.states[k].name)
    }
  }

  nf.taxa.each { |t|
    t.name = Utilities::Rails::Strings.nil_squish_strip(t.name)
  }
end