Class | Bio::GCG::Msf |
In: |
lib/bio/appl/gcg/msf.rb
|
Parent: | Object |
The msf is a multiple sequence alignment format developed by Wisconsin. Bio::GCG::Msf is a msf format parser.
DELIMITER | = | RS = nil | delimiter used by Bio::FlatFile |
# File lib/bio/appl/gcg/msf.rb, line 31 31: def initialize(str) 32: str = str.sub(/\A[\r\n]+/, '') 33: preamble, @data = str.split(/^\/\/$/, 2) 34: preamble.sub!(/\A\!\![A-Z]+\_MULTIPLE\_ALIGNMENT.*/, '') 35: @heading = $& # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this 36: preamble.sub!(/.*\.\.\s*$/m, '') 37: @description = $&.to_s.sub(/^.*\.\.\s*$/, '').to_s 38: d = $&.to_s 39: if m = /^(?:(.+)\s+)?MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then 40: @entry_id = m[1].to_s.strip 41: @length = (m[2] ? m[2].to_i : nil) 42: @seq_type = m[3] 43: @date = m[4].to_s.strip 44: @checksum = (m[6] ? m[6].to_i : nil) 45: end 46: 47: @seq_info = [] 48: preamble.each_line do |x| 49: if /Name\: / =~ x then 50: s = {} 51: x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 } 52: @seq_info << s 53: end 54: end 55: 56: @description.sub!(/\A(\r\n|\r|\n)/, '') 57: @align = nil 58: end
returns Bio::Alignment object.
# File lib/bio/appl/gcg/msf.rb, line 176 176: def alignment 177: do_parse 178: @align 179: end
CompCheck field
# File lib/bio/appl/gcg/msf.rb, line 118 118: def compcheck 119: unless defined?(@compcheck) 120: if /CompCheck\: +(\d+)/ =~ @description then 121: @compcheck = $1.to_i 122: else 123: @compcheck = nil 124: end 125: end 126: @compcheck 127: end
gap length weight
# File lib/bio/appl/gcg/msf.rb, line 109 109: def gap_length_weight 110: unless defined?(@gap_length_weight) 111: /GapLengthWeight\: +(\S+)/ =~ @description 112: @gap_length_weight = $1 113: end 114: @gap_length_weight 115: end
gap weight
# File lib/bio/appl/gcg/msf.rb, line 100 100: def gap_weight 101: unless defined?(@gap_weight) 102: /GapWeight\: +(\S+)/ =~ @description 103: @gap_weight = $1 104: end 105: @gap_weight 106: end
gets seq data (used internally) (will be obsoleted)
# File lib/bio/appl/gcg/msf.rb, line 182 182: def seq_data 183: do_parse 184: @seq_data 185: end
symbol comparison table
# File lib/bio/appl/gcg/msf.rb, line 91 91: def symbol_comparison_table 92: unless defined?(@symbol_comparison_table) 93: /Symbol comparison table\: +(\S+)/ =~ @description 94: @symbol_comparison_table = $1 95: end 96: @symbol_comparison_table 97: end
validates checksum
# File lib/bio/appl/gcg/msf.rb, line 188 188: def validate_checksum 189: do_parse 190: valid = true 191: total = 0 192: @seq_data.each_with_index do |x, i| 193: sum = Bio::GCG::Seq.calc_checksum(x) 194: if sum != @seq_info[i]['Check'].to_i 195: valid = false 196: break 197: end 198: total += sum 199: end 200: return false unless valid 201: if @checksum != 0 # "Check:" field of BioPerl is always 0 202: valid = ((total % 10000) == @checksum) 203: end 204: valid 205: end