Class | Bio::Iprscan::Report |
In: |
lib/bio/appl/iprscan/report.rb
|
Parent: | Object |
Class for InterProScan report. It is used to parse results and reformat results from (raw|xml|txt) into (html, xml, ebihtml, txt, gff3) format.
See ftp.ebi.ac.uk/pub/software/unix/iprscan/README.html
# Read a marged.txt and split each entry. Bio::Iprscan::Report.parse_txt(File.read("marged.txt")) do |report| report.query_id report.matches.size report.matches.each do |match| match.ipr_id #=> 'IPR...' match.ipr_description match.method match.accession match.description match.match_start match.match_end match.evalue end # report.to_gff3 # report.to_html end Bio::Iprscan::Report.parse_raw(File.read("marged.raw")) do |report| report.class #=> Bio::Iprscan::Report end
RS | = | DELIMITER = "\n\/\/\n" | Entry delimiter pattern. |
query_id | -> | entry_id |
crc64 | [RW] | CRC64 checksum of query sequence. |
matches | [RW] | Matched InterPro motifs in Hash. Each InterPro motif have :name, :definition, :accession and :motifs keys. And :motifs key contains motifs in Array. Each motif have :method, :accession, :definition, :score, :location_from and :location_to keys. |
query_id | [RW] | Qeury sequence name (entry_id). |
query_length | [RW] | Qeury sequence length. |
# File lib/bio/appl/iprscan/report.rb, line 236 236: def initialize 237: @query_id = nil 238: @query_length = nil 239: @crc64 = nil 240: @matches = [] 241: end
Splits entry stream.
Bio::Iprscan::Report.parse_ptxt(File.open("merged.txt")) do |report| report end
# File lib/bio/appl/iprscan/report.rb, line 194 194: def self.parse_ptxt(io) 195: io.each("\n\/\/\n") do |entry| 196: yield self.parse_ptxt_entry(entry) 197: end 198: end
Parser method for a pseudo-txt formated entry. Retruns a Bio::Iprscan::Report object.
File.read("marged.txt").each(Bio::Iprscan::Report::RS) do |e| report = Bio::Iprscan::Report.parse_ptxt_entry(e) end
# File lib/bio/appl/iprscan/report.rb, line 209 209: def self.parse_ptxt_entry(str) 210: report = self.new 211: ipr_line = '' 212: str.split(/\n/).each do |line| 213: line = line.split("\t") 214: if line.size == 2 215: report.query_id = line[0] 216: report.query_length = line[1].to_i 217: elsif line.first == '//' 218: elsif line.first == 'InterPro' 219: ipr_line = line 220: else 221: startp, endp = line[4].split("-") 222: report.matches << Match.new(:ipr_id => ipr_line[1], 223: :ipr_description => ipr_line[2], 224: :method => line[0], 225: :accession => line[1], 226: :description => line[2], 227: :evalue => line[3], 228: :match_start => startp.to_i, 229: :match_end => endp.to_i) 230: end 231: end 232: report 233: end
Bio::Iprscan::Report.parse_raw(File.open("merged.raw")) do |report| report end
# File lib/bio/appl/iprscan/report.rb, line 72 72: def self.parse_raw(io) 73: entry = '' 74: while line = io.gets 75: if entry != '' and entry.split("\t").first == line.split("\t").first 76: entry << line 77: elsif entry != '' 78: yield Bio::Iprscan::Report.parse_raw_entry(entry) 79: entry = line 80: else 81: entry << line 82: end 83: end 84: yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != '' 85: end
Parser method for a raw formated entry. Retruns a Bio::Iprscan::Report object.
# File lib/bio/appl/iprscan/report.rb, line 89 89: def self.parse_raw_entry(str) 90: report = self.new 91: str.split(/\n/).each do |line| 92: line = line.split("\t") 93: report.matches << Match.new(:query_id => line[0], 94: :crc64 => line[1], 95: :query_length => line[2].to_i, 96: :method => line[3], 97: :accession => line[4], 98: :description => line[5], 99: :match_start => line[6].to_i, 100: :match_end => line[7].to_i, 101: :evalue => line[8], 102: :status => line[9], 103: :date => line[10]) 104: if line[11] 105: report.matches.last.ipr_id = line[11] 106: report.matches.last.ipr_description = line[12] 107: end 108: report.matches.last.go_terms = line[13].scan(/(\w+ \w+\:.+? \(GO:\d+\))/).flatten if line[13] 109: end 110: report.query_id = report.matches.first.query_id 111: report.query_length = report.matches.first.query_length 112: report 113: end
Splits the entry stream.
Bio::Iprscan::Report.reports_txt(File.open("merged.txt")) do |report| report.class #=> Bio::Iprscan::Report end
# File lib/bio/appl/iprscan/report.rb, line 130 130: def self.parse_txt(io) 131: io.each("\n\nSequence") do |entry| 132: if entry =~ /Sequence$/ 133: entry = entry.sub(/Sequence$/, '') 134: end 135: unless entry =~ /^Sequence/ 136: entry = 'Sequence' + entry 137: end 138: yield self.parse_txt_entry(entry) 139: end 140: end
Parser method for a txt formated entry. Returns a Bio::Iprscan::Report object.
# File lib/bio/appl/iprscan/report.rb, line 147 147: def self.parse_txt_entry(str) 148: unless str =~ /^Sequence / 149: raise ArgumentError, "Invalid format: \n\n#{str}" 150: end 151: header, *matches = str.split(/\n\n/) 152: report = self.new 153: report.query_id = if header =~ /Sequence \"(.+)\" / then $1 else '' end 154: report.query_length = if header =~ /length: (\d+) aa./ then $1.to_i else nil end 155: report.crc64 = if header =~ /crc64 checksum: (\S+) / then $1 else nil end 156: ipr_line = '' 157: go_annotation = '' 158: matches.each do |m| 159: m = m.split(/\n/).map {|x| x.split(/ +/) } 160: m.each do |match| 161: case match[0] 162: when 'method' 163: when /(Molecular Function|Cellular Component|Biological Process):/ 164: go_annotation = match[0].scan(/([MCB]\w+ \w+): (\S.+?\S) \((GO:\d+)\),*/) 165: when 'InterPro' 166: ipr_line = match 167: else 168: pos_scores = match[3].scan(/(\S)\[(\d+)-(\d+)\] (\S+) */) 169: pos_scores.each do |pos_score| 170: report.matches << Match.new(:ipr_id => ipr_line[1], 171: :ipr_description => ipr_line[2], 172: :method => match[0], 173: :accession => match[1], 174: :description => match[2], 175: :evalue => pos_score[3], 176: :status => pos_score[0], 177: :match_start => pos_score[1].to_i, 178: :match_end => pos_score[2].to_i, 179: :go_terms => go_annotation) 180: end 181: end 182: end 183: end 184: return report 185: end
def format_txt end
# File lib/bio/appl/iprscan/report.rb, line 266 266: def format_raw 267: @matches.map { |match| 268: [self.query_id, 269: self.crc64, 270: self.query_length, 271: match.method_name, 272: match.accession, 273: match.description, 274: match.match_start, 275: match.match_end, 276: match.evalue, 277: match.status, 278: match.date, 279: match.ipr_id, 280: match.ipr_description, 281: match.go_terms.map {|x| x[0] + ': ' + x[1] + ' (' + x[2] + ')' }.join(', ') 282: ].join("\t") 283: }.join("\n") 284: end
Output interpro matches in the format_type.
# File lib/bio/appl/iprscan/report.rb, line 245 245: def output(format_type) 246: case format_type 247: when 'raw', :raw 248: format_raw 249: else 250: raise NameError, "Invalid format_type." 251: end 252: end
Returns a Hash (key as an Interpro ID and value as a Match).
report.to_hash.each do |ipr_id, matches| matches.each do |match| report.matches.ipr_id == ipr_id #=> true end end
# File lib/bio/appl/iprscan/report.rb, line 298 298: def to_hash 299: unless @ipr_ids 300: @ipr_ids = {} 301: @matches.each_with_index do |match, i| 302: @ipr_ids[match.ipr_id] ||= [] 303: @ipr_ids[match.ipr_id] << match 304: end 305: return @ipr_ids 306: else 307: return @ipr_ids 308: end 309: end