Class Bio::GCG::Msf
In: lib/bio/appl/gcg/msf.rb
Parent: Object

The msf is a multiple sequence alignment format developed by Wisconsin. Bio::GCG::Msf is a msf format parser.

Methods

Constants

DELIMITER = RS = nil   delimiter used by Bio::FlatFile

Attributes

checksum  [R]  checksum
date  [R]  date
description  [R]  description
entry_id  [R]  ID of the alignment
heading  [R]  heading (’!!NA_MULTIPLE_ALIGNMENT 1.0’ or whatever like this)
length  [R]  alignment length
seq_type  [R]  sequence type ("N" for DNA/RNA or "P" for protein)

Public Class methods

Creates a new Msf object.

[Source]

    # File lib/bio/appl/gcg/msf.rb, line 31
31:       def initialize(str)
32:         str = str.sub(/\A[\r\n]+/, '')
33:         preamble, @data = str.split(/^\/\/$/, 2)
34:         preamble.sub!(/\A\!\![A-Z]+\_MULTIPLE\_ALIGNMENT.*/, '')
35:         @heading = $& # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this
36:         preamble.sub!(/.*\.\.\s*$/m, '')
37:         @description = $&.to_s.sub(/^.*\.\.\s*$/, '').to_s
38:         d = $&.to_s
39:         if m = /^(?:(.+)\s+)?MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then
40:           @entry_id = m[1].to_s.strip
41:           @length   = (m[2] ? m[2].to_i : nil)
42:           @seq_type = m[3]
43:           @date     = m[4].to_s.strip
44:           @checksum = (m[6] ? m[6].to_i : nil)
45:         end
46: 
47:         @seq_info = []
48:         preamble.each_line do |x|
49:           if /Name\: / =~ x then
50:             s = {}
51:             x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 }
52:             @seq_info << s
53:           end
54:         end
55: 
56:         @description.sub!(/\A(\r\n|\r|\n)/, '')
57:         @align = nil
58:       end

Public Instance methods

returns Bio::Alignment object.

[Source]

     # File lib/bio/appl/gcg/msf.rb, line 176
176:       def alignment
177:         do_parse
178:         @align
179:       end

CompCheck field

[Source]

     # File lib/bio/appl/gcg/msf.rb, line 118
118:       def compcheck
119:         unless defined?(@compcheck)
120:           if /CompCheck\: +(\d+)/ =~ @description then
121:             @compcheck = $1.to_i
122:           else
123:             @compcheck = nil
124:           end
125:         end
126:         @compcheck
127:       end

gap length weight

[Source]

     # File lib/bio/appl/gcg/msf.rb, line 109
109:       def gap_length_weight
110:         unless defined?(@gap_length_weight)
111:           /GapLengthWeight\: +(\S+)/ =~ @description
112:           @gap_length_weight = $1
113:         end
114:         @gap_length_weight
115:       end

gap weight

[Source]

     # File lib/bio/appl/gcg/msf.rb, line 100
100:       def gap_weight
101:         unless defined?(@gap_weight)
102:           /GapWeight\: +(\S+)/ =~ @description
103:           @gap_weight = $1
104:         end
105:         @gap_weight
106:       end

gets seq data (used internally) (will be obsoleted)

[Source]

     # File lib/bio/appl/gcg/msf.rb, line 182
182:       def seq_data
183:         do_parse
184:         @seq_data
185:       end

symbol comparison table

[Source]

    # File lib/bio/appl/gcg/msf.rb, line 91
91:       def symbol_comparison_table
92:         unless defined?(@symbol_comparison_table)
93:           /Symbol comparison table\: +(\S+)/ =~ @description
94:           @symbol_comparison_table = $1
95:         end
96:         @symbol_comparison_table
97:       end

validates checksum

[Source]

     # File lib/bio/appl/gcg/msf.rb, line 188
188:       def validate_checksum
189:         do_parse
190:         valid = true
191:         total = 0
192:         @seq_data.each_with_index do |x, i|
193:           sum = Bio::GCG::Seq.calc_checksum(x)
194:           if sum != @seq_info[i]['Check'].to_i
195:             valid = false
196:             break
197:           end
198:           total += sum
199:         end
200:         return false unless valid
201:         if @checksum != 0 # "Check:" field of BioPerl is always 0
202:           valid = ((total % 10000) == @checksum)
203:         end
204:         valid
205:       end

[Validate]