Module: Helpers::Sequences

Defined in:
lib/batch_load/helpers/sequences.rb

Instance Method Summary collapse

Instance Method Details

#create_origin_relationship(filename, sequence) ⇒ OriginRelationship

Parameters:

Returns:



42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/batch_load/helpers/sequences.rb', line 42

def create_origin_relationship(filename, sequence)
  # Extract that this sequence came from
  extract = Extract.with_namespaced_identifier('GenBank', get_voucher_number(filename)).take

  # OriginRelationship for Extract(source) and Sequence(target)
  origin_relationship = nil

  if extract.present?
    origin_relationship_attributes = { old_object: extract, new_object: sequence }
    origin_relationship = OriginRelationship.new(origin_relationship_attributes)
  end

  origin_relationship
end

#create_sequence(filename, file_content) ⇒ Sequence

Parameters:

  • filename (String)
  • file_content (String)

Returns:



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/batch_load/helpers/sequences.rb', line 6

def create_sequence(filename, file_content)
  # DRMSequenceId DRMSEQID
  namespace_sequence_id = Namespace.find_by(name: 'DRMSequenceId')

  # Sequence attributes
  sequence_attributes = {
    name: get_taxon_name(filename) + '_' + get_voucher_number(filename) + '_' + get_gene_fragement(filename),
    sequence_type: get_sequence_type(filename),
    sequence: get_sequence(file_content),
    identifiers_attributes: []
  }

  # Identifiers for Sequence
  sequence_identifier_genbank_text = get_genbank_text(filename)
  sequence_identifier_genbank = {
    type: 'Identifier::Global::GenBankAccessionCode',
    identifier: sequence_identifier_genbank_text
  }

  sequence_identifier_sequence_id_text = get_sequence_id_text(filename)
  sequence_identifier_sequence_id = {
    namespace: namespace_sequence_id,
    type: 'Identifier::Local::Import',
    identifier: sequence_identifier_sequence_id_text
  }

  sequence_attributes[:identifiers_attributes].push(sequence_identifier_genbank) if sequence_identifier_genbank_text.present?
  sequence_attributes[:identifiers_attributes].push(sequence_identifier_sequence_id) if sequence_identifier_sequence_id_text.present?

  sequence = Sequence.new(sequence_attributes)
  sequence
end

#get_between_strings(str, beg_marker, end_marker) ⇒ String

Parameters:

  • str (String)
  • beg_marker (String)
  • end_marker (String)

Returns:

  • (String)


127
128
129
130
131
132
133
134
135
136
137
# File 'lib/batch_load/helpers/sequences.rb', line 127

def get_between_strings(str, beg_marker, end_marker)
  beg_marker_index = str.index(beg_marker)

  if beg_marker_index
    beg_marker_index += beg_marker.length
    end_marker_index = str.index(end_marker, beg_marker_index)
    return str[beg_marker_index...end_marker_index] if end_marker_index
  end

  ''
end

#get_genbank_text(filename) ⇒ String

Parameters:

  • filename (String)

Returns:

  • (String)


59
60
61
62
# File 'lib/batch_load/helpers/sequences.rb', line 59

def get_genbank_text(filename)
  # _&aKJ624355_&
  return get_between_strings(filename, '_&a', '_&')
end

#get_gene_fragement(filename) ⇒ String

Parameters:

  • filename (String)

Returns:

  • (String)


94
95
96
97
98
# File 'lib/batch_load/helpers/sequences.rb', line 94

def get_gene_fragement(filename)
  # _&fCOIBC_& or _&gCOIBC_&
  f_fragement = get_between_strings(filename, '_&f', '_&')
  return f_fragement.presence || get_between_strings(filename, '_&g', '_&')
end

#get_sequence(file_content) ⇒ String

Parameters:

  • file_content (String)

Returns:

  • (String)


73
74
75
76
77
# File 'lib/batch_load/helpers/sequences.rb', line 73

def get_sequence(file_content)
  new_line_index = file_content.index("\n") # Double quotes are needed to properly interpret new line character
  new_line_index ||= 0
  file_content[(new_line_index + 1)...(file_content.length - 1)]
end

#get_sequence_id_text(filename) ⇒ String

Parameters:

  • filename (String)

Returns:

  • (String)


66
67
68
69
# File 'lib/batch_load/helpers/sequences.rb', line 66

def get_sequence_id_text(filename)
  # &iSEQID00000349_&
  return get_between_strings(filename, '&i', '_&')
end

#get_sequence_type(filename) ⇒ String

Parameters:

  • filename (String)

Returns:

  • (String)


81
82
83
# File 'lib/batch_load/helpers/sequences.rb', line 81

def get_sequence_type(filename)
  'DNA'
end

#get_taxon_name(filename) ⇒ String

Parameters:

  • filename (String)

Returns:

  • (String)


102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/batch_load/helpers/sequences.rb', line 102

def get_taxon_name(filename)
  # Identifier to find collection object
  voucher_number = get_voucher_number(filename)
  identifier_text = voucher_number

  collection_object = CollectionObject.with_namespaced_identifier('DRMDNA', identifier_text).take

  # Taxon determination associated with collection object
  if collection_object
    taxon_determination = TaxonDetermination.find_by(taxon_determination_object: collection_object)

    if taxon_determination
      otu = taxon_determination.otu
      taxon_name = otu.taxon_name
      return taxon_name.name
    end
  end

  ''
end

#get_voucher_number(filename) ⇒ String

Parameters:

  • filename (String)

Returns:

  • (String)


87
88
89
90
# File 'lib/batch_load/helpers/sequences.rb', line 87

def get_voucher_number(filename)
  # &vDRMDNA2303_&
  return get_between_strings(filename, '&vDRM', '_&')
end