Class Bio::PSORT::PSORT2::Report
In: lib/bio/appl/psort/report.rb
Parent: Object

Methods

Constants

BOUNDARY = '-' * 75   Report boundary string.
RS = DELIMITER = "\)\n\n#{BOUNDARY}"   Report delimiter.

Attributes

definition  [RW]  Definition of query sequence.
entry_id  [RW]  entry_id of query sequence.
features  [RW]  Feature vector used the kNN prediction.
k  [RW]  k parameter of k-nearest neighbors classifier.
pred  [RW]  Predicted subcellular localization (three letters code).
prob  [RW]  Probability vector of kNN prediction.
raw  [RW]  Raw text of output report.
scl  [RW]  Given subcellular localization (three letters code).
seq  [RW]  Sequence of query sequence.

Public Class methods

Parser for the default report format. ``psort report’’ output.

[Source]

     # File lib/bio/appl/psort/report.rb, line 273
273:         def self.default_parser(ent, entry_id = nil)
274:           report = self.new(ent, entry_id)
275:           ent = ent.split(/\n\n/).map {|e| e.chomp }
276: 
277:           report.set_header_line(ent[0])
278: 
279:           # feature matrix
280:           ent[1].gsub(/\n/,' ').strip.split(/  /).map {|fe|
281:             pair = fe.split(/: /)
282:             report.features[pair[0].strip] = pair[1].strip.to_f
283:           }
284: 
285:           report.prob = self.set_kNN_prob(ent[2])
286:           report.set_prediction(ent[3])         
287: 
288:           return report
289:         end

Divides entry body

[Source]

     # File lib/bio/appl/psort/report.rb, line 392
392:         def self.divent(entry)
393:           boundary = entry.index(BOUNDARY)
394:           return entry[0..(boundary - 1)], entry[(boundary + 2)..(entry.length)]
395:         end

Constructs aBio::PSORT::PSORT2::Report object.

[Source]

     # File lib/bio/appl/psort/report.rb, line 227
227:         def initialize(raw = '', entry_id = nil, scl = nil, definition = nil, 
228:                        seq = nil, k = nil, features = {}, prob = {}, pred = nil)
229:           @entry_id   = entry_id
230:           @scl        = scl
231:           @definition = definition
232:           @seq        = seq
233:           @features   = features
234:           @prob       = prob
235:           @pred       = pred
236:           @k          = k
237:           @raw        = raw
238:         end

Parses output report with output format detection automatically.

[Source]

     # File lib/bio/appl/psort/report.rb, line 242
242:         def self.parser(str, entry_id)
243:           case str
244:           when /^ psg:/   # default report
245:             self.default_parser(str, entry_id)
246:           when /^PSG:/    # -v report
247:             self.v_parser(str, entry_id)
248:           when /: too short length /
249:             self.too_short_parser(str, entry_id)
250:           when /PSORT II server/
251:             tmp = self.new(ent, entry_id)
252:           else
253:             raise ArgumentError, "invalid format\n[#{str}]"
254:           end
255:         end

Returns @prob value.

[Source]

     # File lib/bio/appl/psort/report.rb, line 309
309:         def self.set_kNN_prob(str)
310:           prob = Hash.new
311:           Bio::PSORT::PSORT2::SclNames.keys.each {|a| 
312:             prob.update( {a => 0.0} )
313:           }
314:           str.gsub(/\t/,'').split(/\n/).each {|a|
315:             val,scl = a.strip.split(/ %: /)
316:             key = Bio::PSORT::PSORT2::SclNames.index(scl)
317:             prob[key] = val.to_f
318:           }
319:           return prob
320:         end

Parser for ``too short length’’ report.

 $id: too short length ($leng), skipped\n";

[Source]

     # File lib/bio/appl/psort/report.rb, line 260
260:         def self.too_short_parser(ent, entry_id = nil)
261:           report = self.new(ent)
262:           report.entry_id = entry_id
263:           if ent =~ /^(.+)?: too short length/
264:             report.entry_id = $1 unless report.entry_id
265:             report.scl = '---'
266:           end
267:           report
268:         end

Parser for the verbose output report format. ``psort -v report’’ and WWW server output.

[Source]

     # File lib/bio/appl/psort/report.rb, line 338
338:         def self.v_parser(ent, entry_id = nil)
339:           report = Bio::PSORT::PSORT2::Report.new(ent, entry_id)
340: 
341:           ent = ent.split(/\n\n/).map {|e| e.chomp }
342:           ent.each_with_index {|e, i|
343:             unless /^(\w|-|\>|\t)/ =~ e
344:               j = self.__send__(:search_j, i, ent)
345:               ent[i - j] += e
346:               ent[i] = nil
347:             end
348:             if /^none/ =~ e    # psort output bug
349:               j = self.__send__(:search_j, i, ent)
350:               ent[i - j] += e
351:               ent[i] = nil
352:             end
353:           }
354:           ent.compact!
355: 
356:           if /^ PSORT II server/ =~ ent[0] # for WWW version
357:             ent.shift 
358:             delline = ''
359:             ent.each {|e| delline = e if /^Results of Subprograms/ =~ e }
360:             i = ent.index(delline)
361:             ent.delete(delline)
362:             ent.delete_at(i - 1)
363:           end
364: 
365:           report.set_header_line(ent.shift)  
366:           report.seq = Bio::Sequence::AA.new(ent.shift)
367: 
368:           fent, pent = self.divent(ent)
369:           report.set_features(fent)          
370:           report.prob = self.set_kNN_prob(pent[0].strip)  
371:           report.set_prediction(pent[1].strip)
372: 
373:           return report
374:         end

Public Instance methods

Sets @features values.

[Source]

     # File lib/bio/appl/psort/report.rb, line 398
398:         def set_features(features_ary)
399:           features_ary.each {|fent|
400:             key = fent.split(/\:( |\n)/)[0].strip
401:             self.features[key] = fent # unless /^\>/ =~ key
402:           }
403:           self.features['AA'] = self.seq.length
404:         end

Returns header information.

[Source]

     # File lib/bio/appl/psort/report.rb, line 292
292:         def set_header_line(str)
293:           str.sub!(/^-+\n/,'')
294:           tmp = str.split(/\t| /)
295:           @entry_id = tmp.shift.sub(/^-+/,'').strip unless @entry_id
296: 
297:           case tmp.join(' ').chomp
298:           when /\(\d+ aa\) (.+)$/
299:             @definition = $1
300:           else
301:             @definition = tmp.join(' ').chomp
302:           end
303:           scl = @definition.split(' ')[0]
304: 
305:           @scl = scl if SclNames.keys.index(scl)
306:         end

Returns @prob and @k values.

[Source]

     # File lib/bio/appl/psort/report.rb, line 323
323:         def set_prediction(str)
324:           case str
325:           when /prediction for (\S+?) is (\w{3}) \(k=(\d+)\)/
326:             @entry_id ||= $1 unless @entry_id
327:             @pred = $2
328:             @k    = $3
329:           else
330:             raise ArgumentError, 
331:               "Invalid format at(#{self.entry_id}):\n[#{str}]\n"
332:           end
333:         end

[Validate]