Module Bio::EMBLDB::Common
In: lib/bio/db/embl/common.rb

Methods

ac   accession   accessions   de   definition   description   dr   keywords   kw   new   oc   og   os   ref   references  

Constants

DELIMITER = "\n//\n"
RS = DELIMITER
TAGSIZE = 5

Public Class methods

[Source]

    # File lib/bio/db/embl/common.rb, line 86
86:   def initialize(entry)
87:     super(entry, TAGSIZE)
88:   end

Public Instance methods

returns a Array of accession numbers in the AC lines.

AC Line

  "AC   A12345; B23456;"
  AC [AC1;]+

Accession numbers format:

  1       2     3          4          5          6
  [O,P,Q] [0-9] [A-Z, 0-9] [A-Z, 0-9] [A-Z, 0-9] [0-9]

[Source]

     # File lib/bio/db/embl/common.rb, line 99
 99:   def ac
100:     unless @data['AC']
101:       tmp = Array.new
102:       field_fetch('AC').split(/ /).each do |e|
103:         tmp.push(e.sub(/;/,''))
104:       end
105:       @data['AC'] = tmp
106:     end
107:     @data['AC']
108:   end

returns the first accession number in the AC lines

[Source]

     # File lib/bio/db/embl/common.rb, line 113
113:   def accession
114:     ac[0]
115:   end
accessions()

Alias for ac

returns a String int the DE line.

DE Line

[Source]

     # File lib/bio/db/embl/common.rb, line 121
121:   def de
122:     unless @data['DE']
123:       @data['DE'] = fetch('DE')
124:     end
125:     @data['DE']
126:   end
definition()

Alias for de

description()

Alias for de

returns contents in the DR line.

where <Database cross-reference Hash> is:

DR Line; defabases cross-reference (>=0) a cross_ref pre one line

 "DR  database_identifier; primary_identifier; secondary_identifier."

[Source]

     # File lib/bio/db/embl/common.rb, line 329
329:   def dr
330:     unless @data['DR']
331:       tmp = Hash.new
332:       self.get('DR').split(/\n/).each do |db|
333:         a = db.sub(/^DR   /,'').sub(/.$/,'').strip.split(/;[ ]/)
334:         dbname = a.shift
335:         tmp[dbname] = Array.new unless tmp[dbname]
336:         tmp[dbname].push(a)
337:       end
338:       @data['DR'] = tmp
339:     end
340:     if block_given?
341:       @data['DR'].each do |k,v|
342:         yield(k, v)
343:       end
344:     else
345:       @data['DR']
346:     end
347:   end
keywords()

Alias for kw

returns keywords in the KW line.

KW Line; keyword (>=1)

 KW   [Keyword;]+

[Source]

     # File lib/bio/db/embl/common.rb, line 220
220:   def kw
221:     unless @data['KW']
222:       if get('KW').size > 0
223:         tmp = fetch('KW').sub(/.$/,'')
224:         @data['KW'] = tmp.split(/;/).map {|e| e.strip }
225:       else
226:         @data['KW'] = []
227:       end
228:     end
229:     @data['KW']
230:   end

returns contents in the OC line.

OC Line; organism classification (>=1)

 OC   Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae;
 OC   Theileria.

[Source]

     # File lib/bio/db/embl/common.rb, line 203
203:   def oc
204:     unless @data['OC']
205:       begin
206:         @data['OC'] = fetch('OC').sub(/.$/,'').split(/;/).map {|e|
207:           e.strip 
208:         }
209:       rescue NameError
210:         nil
211:       end
212:     end
213:     @data['OC']
214:   end

returns contents in the OG line.

OG Line; organella (0 or 1/entry)

 OG   Plastid; Chloroplast.
 OG   Mitochondrion.
 OG   Plasmid sym pNGR234a.
 OG   Plastid; Cyanelle.
 OG   Plasmid pSymA (megaplasmid 1).
 OG   Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1.

[Source]

     # File lib/bio/db/embl/common.rb, line 180
180:   def og
181:     unless @data['OG']
182:       og = Array.new
183:       if get('OG').size > 0
184:         ogstr = fetch('OG')
185:         ogstr.sub!(/\.$/,'')
186:         ogstr.sub!(/ and/,'')
187:         ogstr.sub!(/;/, ',')
188:         ogstr.split(',').each do |tmp|
189:           og.push(tmp.strip)
190:         end
191:       end
192:       @data['OG'] = og
193:     end
194:     @data['OG']
195:   end

returns contents in the OS line.

  • Bio::EMBLDB#os -> Array of <OS Hash>

where <OS Hash> is:

 [{'name'=>'Human', 'os'=>'Homo sapiens'},
  {'name'=>'Rat', 'os'=>'Rattus norveticus'}]
  • Bio::SPTR#os[0][‘name’] => "Human"
  • Bio::SPTR#os[0] => {‘name’=>"Human", ‘os’=>’Homo sapiens’}
  • Bio::STPR#os(0) => "Homo sapiens (Human)"

OS Line; organism species (>=1)

  "OS   Trifolium repens (white clover)"

  OS   Genus species (name).
  OS   Genus species (name0) (name1).
  OS   Genus species (name0) (name1).
  OS   Genus species (name0), G s0 (name0), and G s (name1).

[Source]

     # File lib/bio/db/embl/common.rb, line 148
148:   def os(num = nil)
149:     unless @data['OS']
150:       os = Array.new
151:       fetch('OS').split(/, and|, /).each do |tmp|
152:         if tmp =~ /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d])/
153:           org = $1
154:           tmp =~ /(\(.+\))/ 
155:           os.push({'name' => $1, 'os' => org})
156:         else
157:           raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n"
158:         end
159:       end
160:       @data['OS'] = os
161:     end
162:     if num
163:       # EX. "Trifolium repens (white clover)"
164:       "#{@data['OS'][num]['os']} {#data['OS'][num]['name']"
165:     end
166:     @data['OS']
167:   end

returns contents in the R lines.

where <reference information Hash> is:

 {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
  'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}

R Lines

  • RN RC RP RX RA RT RL RG

[Source]

     # File lib/bio/db/embl/common.rb, line 242
242:   def ref
243:     unless @data['R']
244:       ary = Array.new
245:       get('R').split(/\nRN   /).each do |str|
246:         raw = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 
247:                'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
248:         str = 'RN   ' + str unless /^RN   / =~ str
249:         str.split("\n").each do |line|
250:           if /^(R[NPXARLCTG])   (.+)/ =~ line
251:             raw[$1] += $2 + ' '
252:           else
253:             raise "Invalid format in R lines, \n[#{line}]\n"
254:           end
255:         end
256:         raw.each_value {|v| 
257:           v.strip! 
258:           v.sub!(/^"/,'')
259:           v.sub!(/;$/,'')
260:           v.sub!(/"$/,'')
261:         }
262:         ary.push(raw)
263:       end
264:       @data['R'] = ary
265:     end
266:     @data['R']
267:   end

returns Bio::Reference object from Bio::EMBLDB::Common#ref.

[Source]

     # File lib/bio/db/embl/common.rb, line 271
271:   def references
272:     unless @data['references']
273:       ary = self.ref.map {|ent|
274:         hash = Hash.new
275:         ent.each {|key, value|
276:           case key
277:           when 'RN'
278:             if /\[(\d+)\]/ =~ value.to_s
279:               hash['embl_gb_record_number'] = $1.to_i
280:             end
281:           when 'RC'
282:             unless value.to_s.strip.empty?
283:               hash['comments'] ||= []
284:               hash['comments'].push value
285:             end
286:           when 'RP'
287:             hash['sequence_position'] = value
288:           when 'RA'
289:             a = value.split(/\, /)
290:             a.each do |x|
291:               x.sub!(/( [^ ]+)\z/, ",\\1")
292:             end
293:             hash['authors'] = a
294:           when 'RT'
295:             hash['title'] = value
296:           when 'RL'
297:             if /(.*) (\d+) *(\(([^\)]+)\))?(\, |\:)([a-zA-Z\d]+\-[a-zA-Z\d]+) *\((\d+)\)\.?\z/ =~ value.to_s
298:               hash['journal'] = $1.rstrip
299:               hash['volume']  = $2
300:               hash['issue']   = $4
301:               hash['pages']   = $6
302:               hash['year']    = $7
303:             else
304:               hash['journal'] = value
305:             end
306:           when 'RX'  # PUBMED, DOI, (AGRICOLA)
307:             value.split(/\. /).each {|item|
308:               tag, xref = item.split(/\; /).map {|i| i.strip.sub(/\.\z/, '') }
309:               hash[ tag.downcase ]  = xref
310:             }
311:           end
312:         }
313:         Reference.new(hash)
314:       }
315:       @data['references'] = ary.extend(Bio::References::BackwardCompatibility)
316:     end
317:     @data['references']
318:   end

[Validate]