Module | Bio::NCBIDB::Common |
In: |
lib/bio/db/genbank/common.rb
|
This module defines a common framework among GenBank, GenPept, RefSeq, and DDBJ. For more details, see the documentations in each genbank/*.rb files.
DELIMITER | = | RS = "\n//\n" |
TAGSIZE | = | 12 |
# File lib/bio/db/genbank/common.rb, line 30 30: def initialize(entry) 31: super(entry, TAGSIZE) 32: end
Returns the ACCESSION part of the acc_version.
# File lib/bio/db/genbank/common.rb, line 62 62: def accession 63: acc_version.split(/\./).first.to_s 64: end
ACCESSION — Returns contents of the ACCESSION record as an Array.
# File lib/bio/db/genbank/common.rb, line 46 46: def accessions 47: field_fetch('ACCESSION').strip.split(/\s+/) 48: end
# File lib/bio/db/genbank/common.rb, line 120 120: def common_name 121: source['common_name'] 122: end
FEATURES — Returns contents of the FEATURES record as an array of Bio::Feature objects.
# File lib/bio/db/genbank/common.rb, line 209 209: def features 210: unless @data['FEATURES'] 211: ary = [] 212: in_quote = false 213: get('FEATURES').each_line do |line| 214: next if line =~ /^FEATURES/ 215: 216: # feature type (source, CDS, ...) 217: head = line[0,20].to_s.strip 218: 219: # feature value (position or /qualifier=) 220: body = line[20,60].to_s.chomp 221: 222: # sub-array [ feature type, position, /q="data", ... ] 223: if line =~ /^ {5}\S/ 224: ary.push([ head, body ]) 225: 226: # feature qualifier start (/q="data..., /q="data...", /q=data, /q) 227: elsif body =~ /^ \// and not in_quote # gb:IRO125195 228: ary.last.push(body) 229: 230: # flag for open quote (/q="data...) 231: if body =~ /="/ and body !~ /"$/ 232: in_quote = true 233: end 234: 235: # feature qualifier continued (...data..., ...data...") 236: else 237: ary.last.last << body 238: 239: # flag for closing quote (/q="data... lines ...") 240: if body =~ /"$/ 241: in_quote = false 242: end 243: end 244: end 245: 246: ary.collect! do |subary| 247: parse_qualifiers(subary) 248: end 249: 250: @data['FEATURES'] = ary.extend(Bio::Features::BackwardCompatibility) 251: end 252: if block_given? 253: @data['FEATURES'].each do |f| 254: yield f 255: end 256: else 257: @data['FEATURES'] 258: end 259: end
KEYWORDS — Returns contents of the KEYWORDS record as an Array of Strings.
# File lib/bio/db/genbank/common.rb, line 84 84: def keywords 85: @data['KEYWORDS'] ||= fetch('KEYWORDS').chomp('.').split(/; /) 86: end
LOCUS — Locus class must be defined in child classes.
# File lib/bio/db/genbank/common.rb, line 35 35: def locus 36: # must be overrided in each subclass 37: end
ORIGIN — Returns contents of the ORIGIN record as a String.
# File lib/bio/db/genbank/common.rb, line 263 263: def origin 264: unless @data['ORIGIN'] 265: ori, seqstr = get('ORIGIN').split("\n", 2) 266: seqstr ||= "" 267: @data['ORIGIN'] = truncate(tag_cut(ori)) 268: @data['SEQUENCE'] = seqstr.tr("0-9 \t\n\r\/", '') 269: end 270: @data['ORIGIN'] 271: end
REFERENCE — Returns contents of the REFERENCE records as an Array of Bio::Reference objects.
# File lib/bio/db/genbank/common.rb, line 136 136: def references 137: unless @data['REFERENCE'] 138: ary = [] 139: toptag2array(get('REFERENCE')).each do |ref| 140: hash = Hash.new 141: subtag2array(ref).each do |field| 142: case tag_get(field) 143: when /REFERENCE/ 144: if /(\d+)(\s*\((.+)\))?/m =~ tag_cut(field) then 145: hash['embl_gb_record_number'] = $1.to_i 146: if $3 and $3 != 'sites' then 147: seqpos = $3 148: seqpos.sub!(/\A\s*bases\s+/, '') 149: seqpos.gsub!(/(\d+)\s+to\s+(\d+)/, "\\1-\\2") 150: seqpos.gsub!(/\s*\;\s*/, ', ') 151: hash['sequence_position'] = seqpos 152: end 153: end 154: when /AUTHORS/ 155: authors = truncate(tag_cut(field)) 156: authors = authors.split(/, /) 157: authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1] 158: authors = authors.flatten.map { |a| a.sub(/,/, ', ') } 159: hash['authors'] = authors 160: when /TITLE/ 161: hash['title'] = truncate(tag_cut(field)) 162: # CHECK Actually GenBank is not demanding for dot at the end of TITLE 163: #+ '.' 164: when /JOURNAL/ 165: journal = truncate(tag_cut(field)) 166: if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/ 167: hash['journal'] = $1 168: hash['volume'] = $2 169: hash['issue'] = $3 170: hash['pages'] = $4 171: hash['year'] = $5 172: else 173: hash['journal'] = journal 174: end 175: when /MEDLINE/ 176: hash['medline'] = truncate(tag_cut(field)) 177: when /PUBMED/ 178: hash['pubmed'] = truncate(tag_cut(field)) 179: when /REMARK/ 180: hash['comments'] ||= [] 181: hash['comments'].push truncate(tag_cut(field)) 182: end 183: end 184: ary.push(Reference.new(hash)) 185: end 186: @data['REFERENCE'] = ary.extend(Bio::References::BackwardCompatibility) 187: end 188: if block_given? 189: @data['REFERENCE'].each do |r| 190: yield r 191: end 192: else 193: @data['REFERENCE'] 194: end 195: end
SOURCE — Returns contents of the SOURCE record as a Hash.
# File lib/bio/db/genbank/common.rb, line 96 96: def source 97: unless @data['SOURCE'] 98: name, org = get('SOURCE').split('ORGANISM') 99: org ||= "" 100: if org[/\S+;/] 101: organism = $` 102: taxonomy = $& + $' 103: elsif org[/\S+\./] # rs:NC_001741 104: organism = $` 105: taxonomy = $& + $' 106: else 107: organism = org 108: taxonomy = '' 109: end 110: @data['SOURCE'] = { 111: 'common_name' => truncate(tag_cut(name)), 112: 'organism' => truncate(organism), 113: 'taxonomy' => truncate(taxonomy), 114: } 115: @data['SOURCE'].default = '' 116: end 117: @data['SOURCE'] 118: end
Returns the VERSION part of the acc_version as a Fixnum
# File lib/bio/db/genbank/common.rb, line 67 67: def version 68: acc_version.split(/\./).last.to_i 69: end