Class Bio::Iprscan::Report
In: lib/bio/appl/iprscan/report.rb
Parent: Object

DESCRIPTION

Class for InterProScan report. It is used to parse results and reformat results from (raw|xml|txt) into (html, xml, ebihtml, txt, gff3) format.

See ftp.ebi.ac.uk/pub/software/unix/iprscan/README.html

USAGE

 # Read a marged.txt and split each entry.
 Bio::Iprscan::Report.parse_txt(File.read("marged.txt")) do |report|
   report.query_id
   report.matches.size
   report.matches.each do |match|
     match.ipr_id #=> 'IPR...'
     match.ipr_description
     match.method
     match.accession
     match.description
     match.match_start
     match.match_end
     match.evalue
   end
   # report.to_gff3
   # report.to_html
 end

 Bio::Iprscan::Report.parse_raw(File.read("marged.raw")) do |report|
   report.class #=> Bio::Iprscan::Report
 end

Methods

Classes and Modules

Class Bio::Iprscan::Report::Match

Constants

RS = DELIMITER = "\n\/\/\n"   Entry delimiter pattern.

External Aliases

query_id -> entry_id

Attributes

crc64  [RW]  CRC64 checksum of query sequence.
matches  [RW]  Matched InterPro motifs in Hash. Each InterPro motif have :name, :definition, :accession and :motifs keys. And :motifs key contains motifs in Array. Each motif have :method, :accession, :definition, :score, :location_from and :location_to keys.
query_id  [RW]  Qeury sequence name (entry_id).
query_length  [RW]  Qeury sequence length.

Public Class methods

[Source]

     # File lib/bio/appl/iprscan/report.rb, line 236
236:       def initialize
237:         @query_id = nil
238:         @query_length = nil
239:         @crc64 = nil
240:         @matches = []
241:       end

Splits entry stream.

Usage

 Bio::Iprscan::Report.parse_ptxt(File.open("merged.txt")) do |report|
   report
 end

[Source]

     # File lib/bio/appl/iprscan/report.rb, line 194
194:       def self.parse_ptxt(io)
195:         io.each("\n\/\/\n") do |entry|
196:           yield self.parse_ptxt_entry(entry)
197:         end
198:       end

Parser method for a pseudo-txt formated entry. Retruns a Bio::Iprscan::Report object.

Usage

 File.read("marged.txt").each(Bio::Iprscan::Report::RS) do |e|
   report = Bio::Iprscan::Report.parse_ptxt_entry(e)
 end

[Source]

     # File lib/bio/appl/iprscan/report.rb, line 209
209:       def self.parse_ptxt_entry(str)
210:         report = self.new
211:         ipr_line = ''
212:         str.split(/\n/).each do |line|
213:           line = line.split("\t")
214:           if line.size == 2
215:             report.query_id = line[0]
216:             report.query_length = line[1].to_i
217:           elsif line.first == '//'
218:           elsif line.first == 'InterPro'
219:             ipr_line = line
220:           else
221:             startp, endp = line[4].split("-")
222:             report.matches << Match.new(:ipr_id => ipr_line[1], 
223:                                         :ipr_description => ipr_line[2],
224:                                         :method => line[0], 
225:                                         :accession => line[1],
226:                                         :description => line[2], 
227:                                         :evalue => line[3],
228:                                         :match_start => startp.to_i,
229:                                         :match_end => endp.to_i)
230:           end
231:         end
232:         report
233:       end

USAGE

 Bio::Iprscan::Report.parse_raw(File.open("merged.raw")) do |report|
   report
 end

[Source]

    # File lib/bio/appl/iprscan/report.rb, line 72
72:       def self.parse_raw(io)
73:         entry = ''
74:         while line = io.gets
75:           if entry != '' and entry.split("\t").first == line.split("\t").first
76:             entry << line
77:           elsif entry != ''
78:             yield Bio::Iprscan::Report.parse_raw_entry(entry)
79:             entry = line
80:           else
81:             entry << line
82:           end
83:         end
84:         yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != ''
85:       end

Parser method for a raw formated entry. Retruns a Bio::Iprscan::Report object.

[Source]

     # File lib/bio/appl/iprscan/report.rb, line 89
 89:       def self.parse_raw_entry(str)
 90:         report = self.new
 91:         str.split(/\n/).each do |line|
 92:           line = line.split("\t")
 93:           report.matches << Match.new(:query_id => line[0],
 94:                                       :crc64    => line[1],
 95:                                       :query_length => line[2].to_i,
 96:                                       :method       => line[3], 
 97:                                       :accession    => line[4],
 98:                                       :description => line[5], 
 99:                                       :match_start => line[6].to_i,
100:                                       :match_end   => line[7].to_i,
101:                                       :evalue => line[8],
102:                                       :status => line[9],
103:                                       :date   => line[10])
104:           if line[11]
105:             report.matches.last.ipr_id = line[11]
106:             report.matches.last.ipr_description = line[12]
107:           end
108:           report.matches.last.go_terms = line[13].scan(/(\w+ \w+\:.+? \(GO:\d+\))/).flatten if line[13]          
109:         end
110:         report.query_id = report.matches.first.query_id
111:         report.query_length = report.matches.first.query_length
112:         report
113:       end

Splits the entry stream.

Usage

 Bio::Iprscan::Report.reports_txt(File.open("merged.txt")) do |report|
   report.class #=> Bio::Iprscan::Report
 end

[Source]

     # File lib/bio/appl/iprscan/report.rb, line 130
130:       def self.parse_txt(io)
131:         io.each("\n\nSequence") do |entry|
132:           if entry =~ /Sequence$/
133:             entry = entry.sub(/Sequence$/, '')
134:           end
135:           unless entry =~ /^Sequence/
136:             entry = 'Sequence' + entry
137:           end
138:           yield self.parse_txt_entry(entry)
139:         end
140:       end

Parser method for a txt formated entry. Returns a Bio::Iprscan::Report object.

[Source]

     # File lib/bio/appl/iprscan/report.rb, line 147
147:       def self.parse_txt_entry(str)
148:         unless str =~ /^Sequence /
149:           raise ArgumentError, "Invalid format:  \n\n#{str}"
150:         end
151:         header, *matches = str.split(/\n\n/)
152:         report = self.new
153:         report.query_id = if header =~ /Sequence \"(.+)\" / then $1 else '' end
154:         report.query_length = if header =~ /length: (\d+) aa./ then $1.to_i else nil end
155:         report.crc64 = if header =~ /crc64 checksum: (\S+) / then $1 else nil end
156:         ipr_line = ''
157:         go_annotation = ''
158:         matches.each do |m|
159:           m = m.split(/\n/).map {|x| x.split(/  +/) }
160:           m.each do |match|
161:             case match[0]
162:             when 'method'
163:             when /(Molecular Function|Cellular Component|Biological Process):/
164:               go_annotation = match[0].scan(/([MCB]\w+ \w+): (\S.+?\S) \((GO:\d+)\),*/)
165:             when 'InterPro'
166:               ipr_line = match
167:             else
168:               pos_scores = match[3].scan(/(\S)\[(\d+)-(\d+)\] (\S+) */)
169:               pos_scores.each do |pos_score|
170:                 report.matches << Match.new(:ipr_id          => ipr_line[1],
171:                                             :ipr_description => ipr_line[2],
172:                                             :method      => match[0], 
173:                                             :accession   => match[1],
174:                                             :description => match[2], 
175:                                             :evalue      => pos_score[3],
176:                                             :status      => pos_score[0],
177:                                             :match_start => pos_score[1].to_i,
178:                                             :match_end   => pos_score[2].to_i,
179:                                             :go_terms => go_annotation)
180:               end
181:             end
182:           end
183:         end
184:         return report
185:       end

Public Instance methods

def format_txt end

[Source]

     # File lib/bio/appl/iprscan/report.rb, line 266
266:       def format_raw
267:         @matches.map { |match|
268:           [self.query_id,
269:            self.crc64,
270:            self.query_length,
271:            match.method_name,
272:            match.accession,
273:            match.description,
274:            match.match_start,
275:            match.match_end,
276:            match.evalue,
277:            match.status,
278:            match.date,
279:            match.ipr_id,
280:            match.ipr_description,
281:            match.go_terms.map {|x| x[0] + ': ' + x[1] + ' (' + x[2] + ')' }.join(', ')
282:           ].join("\t")
283:         }.join("\n")
284:       end

Output interpro matches in the format_type.

[Source]

     # File lib/bio/appl/iprscan/report.rb, line 245
245:       def output(format_type)
246:         case format_type
247:         when 'raw', :raw
248:           format_raw
249:         else
250:           raise NameError, "Invalid format_type."
251:         end
252:       end

Returns a Hash (key as an Interpro ID and value as a Match).

  report.to_hash.each do |ipr_id, matches|
    matches.each do |match|
      report.matches.ipr_id == ipr_id #=> true
    end
  end

[Source]

     # File lib/bio/appl/iprscan/report.rb, line 298
298:       def to_hash
299:         unless @ipr_ids
300:           @ipr_ids = {} 
301:           @matches.each_with_index do |match, i|
302:             @ipr_ids[match.ipr_id] ||= []
303:             @ipr_ids[match.ipr_id] << match
304:           end
305:           return @ipr_ids
306:         else
307:           return @ipr_ids
308:         end
309:       end

[Validate]