Module | Bio::EMBLDB::Common |
In: |
lib/bio/db/embl/common.rb
|
DELIMITER | = | "\n//\n" |
RS | = | DELIMITER |
TAGSIZE | = | 5 |
# File lib/bio/db/embl/common.rb, line 86 86: def initialize(entry) 87: super(entry, TAGSIZE) 88: end
returns a Array of accession numbers in the AC lines.
AC Line
"AC A12345; B23456;" AC [AC1;]+
Accession numbers format:
1 2 3 4 5 6 [O,P,Q] [0-9] [A-Z, 0-9] [A-Z, 0-9] [A-Z, 0-9] [0-9]
# File lib/bio/db/embl/common.rb, line 99 99: def ac 100: unless @data['AC'] 101: tmp = Array.new 102: field_fetch('AC').split(/ /).each do |e| 103: tmp.push(e.sub(/;/,'')) 104: end 105: @data['AC'] = tmp 106: end 107: @data['AC'] 108: end
returns contents in the DR line.
where <Database cross-reference Hash> is:
DR Line; defabases cross-reference (>=0) a cross_ref pre one line
"DR database_identifier; primary_identifier; secondary_identifier."
# File lib/bio/db/embl/common.rb, line 329 329: def dr 330: unless @data['DR'] 331: tmp = Hash.new 332: self.get('DR').split(/\n/).each do |db| 333: a = db.sub(/^DR /,'').sub(/.$/,'').strip.split(/;[ ]/) 334: dbname = a.shift 335: tmp[dbname] = Array.new unless tmp[dbname] 336: tmp[dbname].push(a) 337: end 338: @data['DR'] = tmp 339: end 340: if block_given? 341: @data['DR'].each do |k,v| 342: yield(k, v) 343: end 344: else 345: @data['DR'] 346: end 347: end
returns keywords in the KW line.
KW Line; keyword (>=1)
KW [Keyword;]+
# File lib/bio/db/embl/common.rb, line 220 220: def kw 221: unless @data['KW'] 222: if get('KW').size > 0 223: tmp = fetch('KW').sub(/.$/,'') 224: @data['KW'] = tmp.split(/;/).map {|e| e.strip } 225: else 226: @data['KW'] = [] 227: end 228: end 229: @data['KW'] 230: end
returns contents in the OC line.
OC Line; organism classification (>=1)
OC Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae; OC Theileria.
# File lib/bio/db/embl/common.rb, line 203 203: def oc 204: unless @data['OC'] 205: begin 206: @data['OC'] = fetch('OC').sub(/.$/,'').split(/;/).map {|e| 207: e.strip 208: } 209: rescue NameError 210: nil 211: end 212: end 213: @data['OC'] 214: end
returns contents in the OG line.
OG Line; organella (0 or 1/entry)
OG Plastid; Chloroplast. OG Mitochondrion. OG Plasmid sym pNGR234a. OG Plastid; Cyanelle. OG Plasmid pSymA (megaplasmid 1). OG Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1.
# File lib/bio/db/embl/common.rb, line 180 180: def og 181: unless @data['OG'] 182: og = Array.new 183: if get('OG').size > 0 184: ogstr = fetch('OG') 185: ogstr.sub!(/\.$/,'') 186: ogstr.sub!(/ and/,'') 187: ogstr.sub!(/;/, ',') 188: ogstr.split(',').each do |tmp| 189: og.push(tmp.strip) 190: end 191: end 192: @data['OG'] = og 193: end 194: @data['OG'] 195: end
returns contents in the OS line.
where <OS Hash> is:
[{'name'=>'Human', 'os'=>'Homo sapiens'}, {'name'=>'Rat', 'os'=>'Rattus norveticus'}]
OS Line; organism species (>=1)
"OS Trifolium repens (white clover)" OS Genus species (name). OS Genus species (name0) (name1). OS Genus species (name0) (name1). OS Genus species (name0), G s0 (name0), and G s (name1).
# File lib/bio/db/embl/common.rb, line 148 148: def os(num = nil) 149: unless @data['OS'] 150: os = Array.new 151: fetch('OS').split(/, and|, /).each do |tmp| 152: if tmp =~ /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d])/ 153: org = $1 154: tmp =~ /(\(.+\))/ 155: os.push({'name' => $1, 'os' => org}) 156: else 157: raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n" 158: end 159: end 160: @data['OS'] = os 161: end 162: if num 163: # EX. "Trifolium repens (white clover)" 164: "#{@data['OS'][num]['os']} {#data['OS'][num]['name']" 165: end 166: @data['OS'] 167: end
returns contents in the R lines.
where <reference information Hash> is:
{'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
R Lines
# File lib/bio/db/embl/common.rb, line 242 242: def ref 243: unless @data['R'] 244: ary = Array.new 245: get('R').split(/\nRN /).each do |str| 246: raw = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 247: 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''} 248: str = 'RN ' + str unless /^RN / =~ str 249: str.split("\n").each do |line| 250: if /^(R[NPXARLCTG]) (.+)/ =~ line 251: raw[$1] += $2 + ' ' 252: else 253: raise "Invalid format in R lines, \n[#{line}]\n" 254: end 255: end 256: raw.each_value {|v| 257: v.strip! 258: v.sub!(/^"/,'') 259: v.sub!(/;$/,'') 260: v.sub!(/"$/,'') 261: } 262: ary.push(raw) 263: end 264: @data['R'] = ary 265: end 266: @data['R'] 267: end
returns Bio::Reference object from Bio::EMBLDB::Common#ref.
# File lib/bio/db/embl/common.rb, line 271 271: def references 272: unless @data['references'] 273: ary = self.ref.map {|ent| 274: hash = Hash.new 275: ent.each {|key, value| 276: case key 277: when 'RN' 278: if /\[(\d+)\]/ =~ value.to_s 279: hash['embl_gb_record_number'] = $1.to_i 280: end 281: when 'RC' 282: unless value.to_s.strip.empty? 283: hash['comments'] ||= [] 284: hash['comments'].push value 285: end 286: when 'RP' 287: hash['sequence_position'] = value 288: when 'RA' 289: a = value.split(/\, /) 290: a.each do |x| 291: x.sub!(/( [^ ]+)\z/, ",\\1") 292: end 293: hash['authors'] = a 294: when 'RT' 295: hash['title'] = value 296: when 'RL' 297: if /(.*) (\d+) *(\(([^\)]+)\))?(\, |\:)([a-zA-Z\d]+\-[a-zA-Z\d]+) *\((\d+)\)\.?\z/ =~ value.to_s 298: hash['journal'] = $1.rstrip 299: hash['volume'] = $2 300: hash['issue'] = $4 301: hash['pages'] = $6 302: hash['year'] = $7 303: else 304: hash['journal'] = value 305: end 306: when 'RX' # PUBMED, DOI, (AGRICOLA) 307: value.split(/\. /).each {|item| 308: tag, xref = item.split(/\; /).map {|i| i.strip.sub(/\.\z/, '') } 309: hash[ tag.downcase ] = xref 310: } 311: end 312: } 313: Reference.new(hash) 314: } 315: @data['references'] = ary.extend(Bio::References::BackwardCompatibility) 316: end 317: @data['references'] 318: end