Module | Bio::Alignment::Output |
In: |
lib/bio/alignment.rb
|
common routine for interleaved/non-interleaved phylip format
# File lib/bio/alignment.rb, line 1099 1099: def __output_phylip_common(options = {}) 1100: len = self.alignment_length 1101: aln = [ " #{self.number_of_sequences} #{len}\n" ] 1102: sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } 1103: if options[:replace_space] 1104: sn.collect! { |x| x.gsub(/\s/, '_') } 1105: end 1106: if !options.has_key?(:escape) or options[:escape] 1107: sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } 1108: end 1109: if !options.has_key?(:split) or options[:split] 1110: sn.collect! { |x| x.split(/\s/)[0].to_s } 1111: end 1112: if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] 1113: sn = __clustal_avoid_same_name(sn, 10) 1114: end 1115: 1116: namewidth = 10 1117: seqwidth = (options[:width] or 60) 1118: seqwidth = seqwidth.div(10) * 10 1119: seqregexp = Regexp.new("(.{1,#{seqwidth.div(10) * 11}})") 1120: gchar = (options[:gap_char] or '-') 1121: 1122: aseqs = Array.new(self.number_of_sequences).clear 1123: self.each_seq do |s| 1124: aseqs << s.to_s.gsub(self.gap_regexp, gchar) 1125: end 1126: case options[:case].to_s 1127: when /lower/i 1128: aseqs.each { |s| s.downcase! } 1129: when /upper/i 1130: aseqs.each { |s| s.upcase! } 1131: end 1132: 1133: aseqs.collect! do |s| 1134: snx = sn.shift 1135: head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth] 1136: head2 = ' ' * namewidth 1137: s << (gchar * (len - s.length)) 1138: s.gsub!(/(.{1,10})/n, " \\1") 1139: s.gsub!(seqregexp, "\\1\n") 1140: a = s.split(/^/) 1141: head += a.shift 1142: ret = a.collect { |x| head2 + x } 1143: ret.unshift(head) 1144: ret 1145: end 1146: lines = (len + seqwidth - 1).div(seqwidth) 1147: [ aln, aseqs, lines ] 1148: end
# File lib/bio/alignment.rb, line 873 873: def output(format, *arg) 874: case format 875: when :clustal 876: output_clustal(*arg) 877: when :fasta 878: output_fasta(*arg) 879: when :phylip 880: output_phylip(*arg) 881: when :phylipnon 882: output_phylipnon(*arg) 883: when :msf 884: output_msf(*arg) 885: when :molphy 886: output_molphy(*arg) 887: else 888: raise "Unknown format: #{format.inspect}" 889: end 890: end
Generates fasta format text and returns a string.
# File lib/bio/alignment.rb, line 1059 1059: def output_fasta(options={}) 1060: #(original) 1061: width = (options[:width] or 70) 1062: if options[:avoid_same_name] then 1063: na = __clustal_avoid_same_name(self.sequence_names, 30) 1064: else 1065: na = self.sequence_names.collect do |k| 1066: k.to_s.gsub(/[\r\n\x00]/, ' ') 1067: end 1068: end 1069: if width and width > 0 then 1070: w_reg = Regexp.new(".{1,#{width}}") 1071: self.collect do |s| 1072: ">#{na.shift}\n" + s.to_s.gsub(w_reg, "\\0\n") 1073: end.join('') 1074: else 1075: self.collect do |s| 1076: ">#{na.shift}\n" + s.to_s + "\n" 1077: end.join('') 1078: end 1079: end
Generates Molphy alignment format text as a string
# File lib/bio/alignment.rb, line 1151 1151: def output_molphy(options = {}) 1152: len = self.alignment_length 1153: header = "#{self.number_of_sequences} #{len}\n" 1154: sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') } 1155: if options[:replace_space] 1156: sn.collect! { |x| x.gsub(/\s/, '_') } 1157: end 1158: if !options.has_key?(:escape) or options[:escape] 1159: sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } 1160: end 1161: if !options.has_key?(:split) or options[:split] 1162: sn.collect! { |x| x.split(/\s/)[0].to_s } 1163: end 1164: if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] 1165: sn = __clustal_avoid_same_name(sn, 30) 1166: end 1167: 1168: seqwidth = (options[:width] or 60) 1169: seqregexp = Regexp.new("(.{1,#{seqwidth}})") 1170: gchar = (options[:gap_char] or '-') 1171: 1172: aseqs = Array.new(len).clear 1173: self.each_seq do |s| 1174: aseqs << s.to_s.gsub(self.gap_regexp, gchar) 1175: end 1176: case options[:case].to_s 1177: when /lower/i 1178: aseqs.each { |s| s.downcase! } 1179: when /upper/i 1180: aseqs.each { |s| s.upcase! } 1181: end 1182: 1183: aseqs.collect! do |s| 1184: s << (gchar * (len - s.length)) 1185: s.gsub!(seqregexp, "\\1\n") 1186: sn.shift + "\n" + s 1187: end 1188: aseqs.unshift(header) 1189: aseqs.join('') 1190: end
Generates msf formatted text as a string
# File lib/bio/alignment.rb, line 1193 1193: def output_msf(options = {}) 1194: len = self.seq_length 1195: 1196: if !options.has_key?(:avoid_same_name) or options[:avoid_same_name] 1197: sn = __clustal_avoid_same_name(self.sequence_names) 1198: else 1199: sn = self.sequence_names.collect do |x| 1200: x.to_s.gsub(/[\r\n\x00]/, ' ') 1201: end 1202: end 1203: if !options.has_key?(:replace_space) or options[:replace_space] 1204: sn.collect! { |x| x.gsub(/\s/, '_') } 1205: end 1206: if !options.has_key?(:escape) or options[:escape] 1207: sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') } 1208: end 1209: if !options.has_key?(:split) or options[:split] 1210: sn.collect! { |x| x.split(/\s/)[0].to_s } 1211: end 1212: 1213: seqwidth = 50 1214: namewidth = [31, sn.collect { |x| x.length }.max ].min 1215: sep = ' ' * 2 1216: 1217: seqregexp = Regexp.new("(.{1,#{seqwidth}})") 1218: gchar = (options[:gap_char] or '.') 1219: pchar = (options[:padding_char] or '~') 1220: 1221: aseqs = Array.new(self.number_of_sequences).clear 1222: self.each_seq do |s| 1223: aseqs << s.to_s.gsub(self.gap_regexp, gchar) 1224: end 1225: aseqs.each do |s| 1226: s.sub!(/\A#{Regexp.escape(gchar)}+/) { |x| pchar * x.length } 1227: s.sub!(/#{Regexp.escape(gchar)}+\z/, '') 1228: s << (pchar * (len - s.length)) 1229: end 1230: 1231: case options[:case].to_s 1232: when /lower/i 1233: aseqs.each { |s| s.downcase! } 1234: when /upper/i 1235: aseqs.each { |s| s.upcase! } 1236: else #default upcase 1237: aseqs.each { |s| s.upcase! } 1238: end 1239: 1240: case options[:type].to_s 1241: when /protein/i, /aa/i 1242: amino = true 1243: when /na/i 1244: amino = false 1245: else 1246: if seqclass == Bio::Sequence::AA then 1247: amino = true 1248: elsif seqclass == Bio::Sequence::NA then 1249: amino = false 1250: else 1251: # if we can't determine, we asuume as protein. 1252: amino = aseqs.size 1253: aseqs.each { |x| amino -= 1 if /\A[acgt]\z/i =~ x } 1254: amino = false if amino <= 0 1255: end 1256: end 1257: 1258: seq_type = (amino ? 'P' : 'N') 1259: 1260: fn = (options[:entry_id] or self.__id__.abs.to_s + '.msf') 1261: dt = (options[:time] or Time.now).strftime('%B %d, %Y %H:%M') 1262: 1263: sums = aseqs.collect { |s| GCG::Seq.calc_checksum(s) } 1264: #sums = aseqs.collect { |s| 0 } 1265: sum = 0; sums.each { |x| sum += x }; sum %= 10000 1266: msf = 1267: [ 1268: "#{seq_type == 'N' ? 'N' : 'A' }A_MULTIPLE_ALIGNMENT 1.0\n", 1269: "\n", 1270: "\n", 1271: " #{fn} MSF: #{len} Type: #{seq_type} #{dt} Check: #{sum} ..\n", 1272: "\n" 1273: ] 1274: 1275: sn.each do |snx| 1276: msf << ' Name: ' + 1277: sprintf('%*s', -namewidth, snx.to_s)[0, namewidth] + 1278: " Len: #{len} Check: #{sums.shift} Weight: 1.00\n" 1279: end 1280: msf << "\n//\n" 1281: 1282: aseqs.collect! do |s| 1283: snx = sn.shift 1284: head = sprintf("%*s", namewidth, snx.to_s)[0, namewidth] + sep 1285: s.gsub!(seqregexp, "\\1\n") 1286: a = s.split(/^/) 1287: a.collect { |x| head + x } 1288: end 1289: lines = (len + seqwidth - 1).div(seqwidth) 1290: i = 1 1291: lines.times do 1292: msf << "\n" 1293: n_l = i 1294: n_r = [ i + seqwidth - 1, len ].min 1295: if n_l != n_r then 1296: w = [ n_r - n_l + 1 - n_l.to_s.length - n_r.to_s.length, 1 ].max 1297: msf << (' ' * namewidth + sep + n_l.to_s + 1298: ' ' * w + n_r.to_s + "\n") 1299: else 1300: msf << (' ' * namewidth + sep + n_l.to_s + "\n") 1301: end 1302: aseqs.each { |a| msf << a.shift } 1303: i += seqwidth 1304: end 1305: msf << "\n" 1306: msf.join('') 1307: end
generates phylip interleaved alignment format as a string
# File lib/bio/alignment.rb, line 1082 1082: def output_phylip(options = {}) 1083: aln, aseqs, lines = __output_phylip_common(options) 1084: lines.times do 1085: aseqs.each { |a| aln << a.shift } 1086: aln << "\n" 1087: end 1088: aln.pop if aln[-1] == "\n" 1089: aln.join('') 1090: end
generates Phylip3.2 (old) non-interleaved format as a string
# File lib/bio/alignment.rb, line 1093 1093: def output_phylipnon(options = {}) 1094: aln, aseqs, lines = __output_phylip_common(options) 1095: aln.first + aseqs.join('') 1096: end
# to_clustal is deprecated. Instead, please use output_clustal. +
# File lib/bio/alignment.rb, line 1053 1053: def to_clustal(*arg) 1054: warn "to_clustal is deprecated. Please use output_clustal." 1055: output_clustal(*arg) 1056: end