Module Bio::Alignment::Output
In: lib/bio/alignment.rb

Methods

Public Instance methods

common routine for interleaved/non-interleaved phylip format

[Source]

      # File lib/bio/alignment.rb, line 1099
1099:       def __output_phylip_common(options = {})
1100:         len = self.alignment_length
1101:         aln = [ " #{self.number_of_sequences} #{len}\n" ]
1102:         sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
1103:         if options[:replace_space]
1104:           sn.collect! { |x| x.gsub(/\s/, '_') }
1105:         end
1106:         if !options.has_key?(:escape) or options[:escape]
1107:           sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
1108:         end
1109:         if !options.has_key?(:split) or options[:split]
1110:           sn.collect! { |x| x.split(/\s/)[0].to_s }
1111:         end
1112:         if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
1113:           sn = __clustal_avoid_same_name(sn, 10)
1114:         end
1115: 
1116:         namewidth = 10
1117:         seqwidth  = (options[:width] or 60)
1118:         seqwidth = seqwidth.div(10) * 10
1119:         seqregexp = Regexp.new("(.{1,#{seqwidth.div(10) * 11}})")
1120:         gchar = (options[:gap_char] or '-')
1121: 
1122:         aseqs = Array.new(self.number_of_sequences).clear
1123:         self.each_seq do |s|
1124:           aseqs << s.to_s.gsub(self.gap_regexp, gchar)
1125:         end
1126:         case options[:case].to_s
1127:         when /lower/i
1128:           aseqs.each { |s| s.downcase! }
1129:         when /upper/i
1130:           aseqs.each { |s| s.upcase! }
1131:         end
1132:         
1133:         aseqs.collect! do |s|
1134:           snx = sn.shift
1135:           head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth]
1136:           head2 = ' ' * namewidth
1137:           s << (gchar * (len - s.length))
1138:           s.gsub!(/(.{1,10})/n, " \\1")
1139:           s.gsub!(seqregexp, "\\1\n")
1140:           a = s.split(/^/)
1141:           head += a.shift
1142:           ret = a.collect { |x| head2 + x }
1143:           ret.unshift(head)
1144:           ret
1145:         end
1146:         lines = (len + seqwidth - 1).div(seqwidth)
1147:         [ aln, aseqs, lines ]
1148:       end

[Source]

     # File lib/bio/alignment.rb, line 873
873:       def output(format, *arg)
874:         case format
875:         when :clustal
876:           output_clustal(*arg)
877:         when :fasta
878:           output_fasta(*arg)
879:         when :phylip
880:           output_phylip(*arg)
881:         when :phylipnon
882:           output_phylipnon(*arg)
883:         when :msf
884:           output_msf(*arg)
885:         when :molphy
886:           output_molphy(*arg)
887:         else
888:           raise "Unknown format: #{format.inspect}"
889:         end
890:       end

Generates ClustalW-formatted text

seqs:sequences (must be an alignment object)
names:names of the sequences
options:options

[Source]

      # File lib/bio/alignment.rb, line 1045
1045:       def output_clustal(options = {})
1046:         __clustal_formatter(self, self.sequence_names, options)
1047:       end

Generates fasta format text and returns a string.

[Source]

      # File lib/bio/alignment.rb, line 1059
1059:       def output_fasta(options={})
1060:         #(original)
1061:         width = (options[:width] or 70)
1062:         if options[:avoid_same_name] then
1063:           na = __clustal_avoid_same_name(self.sequence_names, 30)
1064:         else
1065:           na = self.sequence_names.collect do |k|
1066:             k.to_s.gsub(/[\r\n\x00]/, ' ')
1067:           end
1068:         end
1069:         if width and width > 0 then
1070:           w_reg = Regexp.new(".{1,#{width}}")
1071:           self.collect do |s|
1072:             ">#{na.shift}\n" + s.to_s.gsub(w_reg, "\\0\n")
1073:           end.join('')
1074:         else
1075:           self.collect do |s|
1076:             ">#{na.shift}\n" + s.to_s + "\n"
1077:           end.join('')
1078:         end
1079:       end

Generates Molphy alignment format text as a string

[Source]

      # File lib/bio/alignment.rb, line 1151
1151:       def output_molphy(options = {})
1152:         len = self.alignment_length
1153:         header = "#{self.number_of_sequences} #{len}\n"
1154:         sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
1155:         if options[:replace_space]
1156:           sn.collect! { |x| x.gsub(/\s/, '_') }
1157:         end
1158:         if !options.has_key?(:escape) or options[:escape]
1159:           sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
1160:         end
1161:         if !options.has_key?(:split) or options[:split]
1162:           sn.collect! { |x| x.split(/\s/)[0].to_s }
1163:         end
1164:         if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
1165:           sn = __clustal_avoid_same_name(sn, 30)
1166:         end
1167: 
1168:         seqwidth  = (options[:width] or 60)
1169:         seqregexp = Regexp.new("(.{1,#{seqwidth}})")
1170:         gchar = (options[:gap_char] or '-')
1171: 
1172:         aseqs = Array.new(len).clear
1173:         self.each_seq do |s|
1174:           aseqs << s.to_s.gsub(self.gap_regexp, gchar)
1175:         end
1176:         case options[:case].to_s
1177:         when /lower/i
1178:           aseqs.each { |s| s.downcase! }
1179:         when /upper/i
1180:           aseqs.each { |s| s.upcase! }
1181:         end
1182:         
1183:         aseqs.collect! do |s|
1184:           s << (gchar * (len - s.length))
1185:           s.gsub!(seqregexp, "\\1\n")
1186:           sn.shift + "\n" + s
1187:         end
1188:         aseqs.unshift(header)
1189:         aseqs.join('')
1190:       end

Generates msf formatted text as a string

[Source]

      # File lib/bio/alignment.rb, line 1193
1193:       def output_msf(options = {})
1194:         len = self.seq_length
1195: 
1196:         if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
1197:           sn = __clustal_avoid_same_name(self.sequence_names)
1198:         else
1199:           sn = self.sequence_names.collect do |x|
1200:             x.to_s.gsub(/[\r\n\x00]/, ' ')
1201:           end
1202:         end
1203:         if !options.has_key?(:replace_space) or options[:replace_space]
1204:           sn.collect! { |x| x.gsub(/\s/, '_') }
1205:         end
1206:         if !options.has_key?(:escape) or options[:escape]
1207:           sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
1208:         end
1209:         if !options.has_key?(:split) or options[:split]
1210:           sn.collect! { |x| x.split(/\s/)[0].to_s }
1211:         end
1212: 
1213:         seqwidth = 50
1214:         namewidth = [31, sn.collect { |x| x.length }.max ].min
1215:         sep = ' ' * 2
1216: 
1217:         seqregexp = Regexp.new("(.{1,#{seqwidth}})")
1218:         gchar = (options[:gap_char]  or '.')
1219:         pchar = (options[:padding_char] or '~')
1220: 
1221:         aseqs = Array.new(self.number_of_sequences).clear
1222:         self.each_seq do |s|
1223:           aseqs << s.to_s.gsub(self.gap_regexp, gchar)
1224:         end
1225:         aseqs.each do |s|
1226:           s.sub!(/\A#{Regexp.escape(gchar)}+/) { |x| pchar * x.length }
1227:           s.sub!(/#{Regexp.escape(gchar)}+\z/, '')
1228:           s << (pchar * (len - s.length))
1229:         end
1230: 
1231:         case options[:case].to_s
1232:         when /lower/i
1233:           aseqs.each { |s| s.downcase! }
1234:         when /upper/i
1235:           aseqs.each { |s| s.upcase! }
1236:         else #default upcase
1237:           aseqs.each { |s| s.upcase! }
1238:         end
1239: 
1240:         case options[:type].to_s
1241:         when /protein/i, /aa/i
1242:           amino = true
1243:         when /na/i
1244:           amino = false
1245:         else
1246:           if seqclass == Bio::Sequence::AA then
1247:             amino = true
1248:           elsif seqclass == Bio::Sequence::NA then
1249:             amino = false
1250:           else
1251:             # if we can't determine, we asuume as protein.
1252:             amino = aseqs.size
1253:             aseqs.each { |x| amino -= 1 if /\A[acgt]\z/i =~ x }
1254:             amino = false if amino <= 0
1255:           end
1256:         end
1257: 
1258:         seq_type = (amino ? 'P' : 'N')
1259: 
1260:         fn = (options[:entry_id] or self.__id__.abs.to_s + '.msf')
1261:         dt = (options[:time] or Time.now).strftime('%B %d, %Y %H:%M')
1262: 
1263:         sums = aseqs.collect { |s| GCG::Seq.calc_checksum(s) }
1264:         #sums = aseqs.collect { |s| 0 }
1265:         sum = 0; sums.each { |x| sum += x }; sum %= 10000
1266:         msf =
1267:           [
1268:            "#{seq_type == 'N' ? 'N' : 'A' }A_MULTIPLE_ALIGNMENT 1.0\n",
1269:            "\n",
1270:            "\n",
1271:            " #{fn}  MSF: #{len}  Type: #{seq_type}  #{dt}  Check: #{sum} ..\n",
1272:            "\n"
1273:           ]
1274: 
1275:         sn.each do |snx|
1276:           msf << ' Name: ' +
1277:             sprintf('%*s', -namewidth, snx.to_s)[0, namewidth] +
1278:             "  Len: #{len}  Check: #{sums.shift}  Weight: 1.00\n"
1279:         end
1280:         msf << "\n//\n"
1281: 
1282:         aseqs.collect! do |s|
1283:           snx = sn.shift
1284:           head = sprintf("%*s", namewidth, snx.to_s)[0, namewidth] + sep
1285:           s.gsub!(seqregexp, "\\1\n")
1286:           a = s.split(/^/)
1287:           a.collect { |x| head + x }
1288:         end
1289:         lines = (len + seqwidth - 1).div(seqwidth)
1290:         i = 1
1291:         lines.times do
1292:           msf << "\n"
1293:           n_l = i
1294:           n_r = [ i + seqwidth - 1, len ].min
1295:           if n_l != n_r then
1296:             w = [ n_r - n_l + 1 - n_l.to_s.length - n_r.to_s.length, 1 ].max
1297:             msf << (' ' * namewidth + sep + n_l.to_s + 
1298:                     ' ' * w + n_r.to_s + "\n")
1299:           else
1300:             msf << (' ' * namewidth + sep + n_l.to_s + "\n")
1301:           end
1302:           aseqs.each { |a| msf << a.shift }
1303:           i += seqwidth
1304:         end
1305:         msf << "\n"
1306:         msf.join('')
1307:       end

generates phylip interleaved alignment format as a string

[Source]

      # File lib/bio/alignment.rb, line 1082
1082:       def output_phylip(options = {})
1083:         aln, aseqs, lines = __output_phylip_common(options)
1084:         lines.times do
1085:           aseqs.each { |a| aln << a.shift }
1086:           aln << "\n"
1087:         end
1088:         aln.pop if aln[-1] == "\n"
1089:         aln.join('')
1090:       end

generates Phylip3.2 (old) non-interleaved format as a string

[Source]

      # File lib/bio/alignment.rb, line 1093
1093:       def output_phylipnon(options = {})
1094:         aln, aseqs, lines = __output_phylip_common(options)
1095:         aln.first + aseqs.join('')
1096:       end

# to_clustal is deprecated. Instead, please use output_clustal. +

[Source]

      # File lib/bio/alignment.rb, line 1053
1053:       def to_clustal(*arg)
1054:         warn "to_clustal is deprecated. Please use output_clustal."
1055:         output_clustal(*arg)
1056:       end

[Validate]