Module: Helpers::Sequences
- Defined in:
- lib/batch_load/helpers/sequences.rb
Instance Method Summary collapse
- #create_origin_relationship(filename, sequence) ⇒ OriginRelationship
- #create_sequence(filename, file_content) ⇒ Sequence
- #get_between_strings(str, beg_marker, end_marker) ⇒ String
- #get_genbank_text(filename) ⇒ String
- #get_gene_fragement(filename) ⇒ String
- #get_sequence(file_content) ⇒ String
- #get_sequence_id_text(filename) ⇒ String
- #get_sequence_type(filename) ⇒ String
- #get_taxon_name(filename) ⇒ String
- #get_voucher_number(filename) ⇒ String
Instance Method Details
#create_origin_relationship(filename, sequence) ⇒ OriginRelationship
42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/batch_load/helpers/sequences.rb', line 42 def create_origin_relationship(filename, sequence) # Extract that this sequence came from extract = Extract.with_namespaced_identifier('GenBank', get_voucher_number(filename)).take # OriginRelationship for Extract(source) and Sequence(target) origin_relationship = nil if extract.present? origin_relationship_attributes = { old_object: extract, new_object: sequence } origin_relationship = OriginRelationship.new(origin_relationship_attributes) end origin_relationship end |
#create_sequence(filename, file_content) ⇒ Sequence
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/batch_load/helpers/sequences.rb', line 6 def create_sequence(filename, file_content) # DRMSequenceId DRMSEQID namespace_sequence_id = Namespace.find_by(name: 'DRMSequenceId') # Sequence attributes sequence_attributes = { name: get_taxon_name(filename) + '_' + get_voucher_number(filename) + '_' + get_gene_fragement(filename), sequence_type: get_sequence_type(filename), sequence: get_sequence(file_content), identifiers_attributes: [] } # Identifiers for Sequence sequence_identifier_genbank_text = get_genbank_text(filename) sequence_identifier_genbank = { type: 'Identifier::Global::GenBankAccessionCode', identifier: sequence_identifier_genbank_text } sequence_identifier_sequence_id_text = get_sequence_id_text(filename) sequence_identifier_sequence_id = { namespace: namespace_sequence_id, type: 'Identifier::Local::Import', identifier: sequence_identifier_sequence_id_text } sequence_attributes[:identifiers_attributes].push(sequence_identifier_genbank) if sequence_identifier_genbank_text.present? sequence_attributes[:identifiers_attributes].push(sequence_identifier_sequence_id) if sequence_identifier_sequence_id_text.present? sequence = Sequence.new(sequence_attributes) sequence end |
#get_between_strings(str, beg_marker, end_marker) ⇒ String
127 128 129 130 131 132 133 134 135 136 137 |
# File 'lib/batch_load/helpers/sequences.rb', line 127 def get_between_strings(str, beg_marker, end_marker) beg_marker_index = str.index(beg_marker) if beg_marker_index beg_marker_index += beg_marker.length end_marker_index = str.index(end_marker, beg_marker_index) return str[beg_marker_index...end_marker_index] if end_marker_index end '' end |
#get_genbank_text(filename) ⇒ String
59 60 61 62 |
# File 'lib/batch_load/helpers/sequences.rb', line 59 def get_genbank_text(filename) # _&aKJ624355_& return get_between_strings(filename, '_&a', '_&') end |
#get_gene_fragement(filename) ⇒ String
94 95 96 97 98 |
# File 'lib/batch_load/helpers/sequences.rb', line 94 def get_gene_fragement(filename) # _&fCOIBC_& or _&gCOIBC_& f_fragement = get_between_strings(filename, '_&f', '_&') return f_fragement.presence || get_between_strings(filename, '_&g', '_&') end |
#get_sequence(file_content) ⇒ String
73 74 75 76 77 |
# File 'lib/batch_load/helpers/sequences.rb', line 73 def get_sequence(file_content) new_line_index = file_content.index("\n") # Double quotes are needed to properly interpret new line character new_line_index ||= 0 file_content[(new_line_index + 1)...(file_content.length - 1)] end |
#get_sequence_id_text(filename) ⇒ String
66 67 68 69 |
# File 'lib/batch_load/helpers/sequences.rb', line 66 def get_sequence_id_text(filename) # &iSEQID00000349_& return get_between_strings(filename, '&i', '_&') end |
#get_sequence_type(filename) ⇒ String
81 82 83 |
# File 'lib/batch_load/helpers/sequences.rb', line 81 def get_sequence_type(filename) 'DNA' end |
#get_taxon_name(filename) ⇒ String
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/batch_load/helpers/sequences.rb', line 102 def get_taxon_name(filename) # Identifier to find collection object voucher_number = get_voucher_number(filename) identifier_text = voucher_number collection_object = CollectionObject.with_namespaced_identifier('DRMDNA', identifier_text).take # Taxon determination associated with collection object if collection_object taxon_determination = TaxonDetermination.find_by(taxon_determination_object: collection_object) if taxon_determination otu = taxon_determination.otu taxon_name = otu.taxon_name return taxon_name.name end end '' end |
#get_voucher_number(filename) ⇒ String
87 88 89 90 |
# File 'lib/batch_load/helpers/sequences.rb', line 87 def get_voucher_number(filename) # &vDRMDNA2303_& return get_between_strings(filename, '&vDRM', '_&') end |