Class | Bio::FlatFileIndex::Flat_1::FlatMappingFile |
In: |
lib/bio/io/flatfile/index.rb
|
Parent: | Object |
FlatMappingFile class.
Internal use only.
filename | [R] | |
mode | [RW] |
# File lib/bio/io/flatfile/index.rb, line 923 923: def self.external_merge_proc(sort_program = [ '/usr/bin/env', 924: 'LC_ALL=C', 925: '/usr/bin/sort' ]) 926: Proc.new do |out, in1, *files| 927: # files (and in1) must be sorted 928: cmd = sort_program + [ '-m', '-o', out, in1, *files ] 929: system(*cmd) 930: end 931: end
# File lib/bio/io/flatfile/index.rb, line 900 900: def self.external_merge_sort_proc(sort_program = [ '/usr/bin/env', 901: 'LC_ALL=C', 902: '/usr/bin/sort' ]) 903: Proc.new do |out, in1, *files| 904: # (in1 may be sorted) 905: tf_all = [] 906: tfn_all = [] 907: files.each do |fn| 908: tf = Tempfile.open('sort') 909: tf.close(false) 910: cmd = sort_program + [ '-o', tf.path, fn ] 911: system(*cmd) 912: tf_all << tf 913: tfn_all << tf.path 914: end 915: cmd_fin = sort_program + [ '-m', '-o', out, in1, *tfn_all ] 916: system(*cmd_fin) 917: tf_all.each do |tf| 918: tf.close(true) 919: end 920: end 921: end
# File lib/bio/io/flatfile/index.rb, line 891 891: def self.external_sort_proc(sort_program = [ '/usr/bin/env', 892: 'LC_ALL=C', 893: '/usr/bin/sort' ]) 894: Proc.new do |out, in1, *files| 895: cmd = sort_program + [ '-o', out, in1, *files ] 896: system(*cmd) 897: end 898: end
# File lib/bio/io/flatfile/index.rb, line 933 933: def self.internal_sort_proc 934: Proc.new do |out, in1, *files| 935: a = IO.readlines(in1) 936: files.each do |fn| 937: IO.foreach(fn) do |x| 938: a << x 939: end 940: end 941: a.sort! 942: of = File.open(out, 'w') 943: a.each { |x| of << x } 944: of.close 945: end 946: end
# File lib/bio/io/flatfile/index.rb, line 734 734: def initialize(filename, mode = 'rb') 735: @filename = filename 736: @mode = mode 737: @file = nil 738: #@file = File.open(filename, mode) 739: @record_size = nil 740: @records = nil 741: end
# File lib/bio/io/flatfile/index.rb, line 806 806: def add_record(str) 807: n = records 808: rs = record_size 809: @file.seek(0, IO::SEEK_END) 810: write_record(str) 811: @records += 1 812: end
# File lib/bio/io/flatfile/index.rb, line 755 755: def close 756: if @file then 757: DEBUG.print "FlatMappingFile: close #{@filename}\n" 758: @file.close 759: @file = nil 760: end 761: nil 762: end
export/import/edit data
# File lib/bio/io/flatfile/index.rb, line 841 841: def each 842: n = records 843: seek(0) 844: (0...n).each do |i| 845: yield Record.new(get_record(i)) 846: end 847: self 848: end
# File lib/bio/io/flatfile/index.rb, line 850 850: def export_tsv(stream) 851: self.each do |x| 852: stream << "#{x.to_s}\n" 853: end 854: stream 855: end
# File lib/bio/io/flatfile/index.rb, line 776 776: def get_record(i) 777: rs = record_size 778: seek(i) 779: str = @file.read(rs) 780: #DEBUG.print "get_record(#{i})=#{str.inspect}\n" 781: str 782: end
# File lib/bio/io/flatfile/index.rb, line 948 948: def import_tsv_files(flag_primary, mode, sort_proc, *files) 949: require 'tempfile' 950: 951: tmpfile1 = Tempfile.open('flat') 952: self.export_tsv(tmpfile1) unless mode == :new 953: tmpfile1.close(false) 954: 955: tmpfile0 = Tempfile.open('sorted') 956: tmpfile0.close(false) 957: 958: sort_proc.call(tmpfile0.path, tmpfile1.path, *files) 959: 960: tmpmap = self.class.new(self.filename + ".#{$$}.tmp~", 'wb+') 961: tmpmap.init_with_sorted_tsv_file(tmpfile0.path, flag_primary) 962: tmpmap.close 963: self.close 964: 965: begin 966: File.rename(self.filename, self.filename + ".#{$$}.bak~") 967: rescue Errno::ENOENT 968: end 969: File.rename(tmpmap.filename, self.filename) 970: begin 971: File.delete(self.filename + ".#{$$}.bak~") 972: rescue Errno::ENOENT 973: end 974: 975: tmpfile0.close(true) 976: tmpfile1.close(true) 977: self 978: end
# File lib/bio/io/flatfile/index.rb, line 827 827: def init(rs) 828: unless 0 < rs and rs < 10 ** @@recsize_width then 829: raise 'record size out of range' 830: end 831: open 832: @record_size = rs 833: str = sprintf("%0*d", @@recsize_width, rs) 834: @file.truncate(0) 835: @file.seek(0, IO::SEEK_SET) 836: @file.write(str) 837: @records = 0 838: end
# File lib/bio/io/flatfile/index.rb, line 857 857: def init_with_sorted_tsv_file(filename, flag_primary = false) 858: rec_size = 1 859: f = File.open(filename) 860: f.each do |y| 861: rec_size = y.chomp.length if rec_size < y.chomp.length 862: end 863: self.init(rec_size) 864: 865: prev = nil 866: f.rewind 867: if flag_primary then 868: f.each do |y| 869: x = Record.new(y.chomp, rec_size) 870: if prev then 871: if x.key == prev.key 872: DEBUG.print "Warining: overwrote unique id #{x.key.inspect}\n" 873: else 874: self.add_record(prev.to_s) 875: end 876: end 877: prev = x 878: end 879: self.add_record(prev.to_s) if prev 880: else 881: f.each do |y| 882: x = Record.new(y.chomp, rec_size) 883: self.add_record(x.to_s) if x != prev 884: prev = x 885: end 886: end 887: f.close 888: self 889: end
# File lib/bio/io/flatfile/index.rb, line 745 745: def open 746: unless @file then 747: DEBUG.print "FlatMappingFile: open #{@filename}\n" 748: @file = File.open(@filename, @mode) 749: true 750: else 751: nil 752: end 753: end
# File lib/bio/io/flatfile/index.rb, line 814 814: def put_record(i, str) 815: n = records 816: rs = record_size 817: if i >= n then 818: @file.seek(0, IO::SEEK_END) 819: @file.write(sprintf("%-*s", rs, '') * (i - n)) 820: @records = i + 1 821: else 822: seek(i) 823: end 824: write_record(str) 825: end
# File lib/bio/io/flatfile/index.rb, line 764 764: def record_size 765: unless @record_size then 766: open 767: @file.seek(0, IO::SEEK_SET) 768: s = @file.read(@@recsize_width) 769: raise 'strange record size' unless s =~ @@recsize_regex 770: @record_size = s.to_i 771: DEBUG.print "FlatMappingFile: record_size: #{@record_size}\n" 772: end 773: @record_size 774: end
# File lib/bio/io/flatfile/index.rb, line 789 789: def records 790: unless @records then 791: rs = record_size 792: @records = (@file.stat.size - @@recsize_width) / rs 793: DEBUG.print "FlatMappingFile: records: #{@records}\n" 794: end 795: @records 796: end
methods for searching
# File lib/bio/io/flatfile/index.rb, line 982 982: def search(key) 983: n = records 984: return [] if n <= 0 985: i = n / 2 986: i_prev = nil 987: DEBUG.print "binary search starts...\n" 988: begin 989: rec = Record.new(get_record(i)) 990: i_prev = i 991: if key < rec.key then 992: n = i 993: i = i / 2 994: elsif key > rec.key then 995: i = (i + n) / 2 996: else # key == rec.key 997: result = [ rec.val ] 998: j = i - 1 999: while j >= 0 and 1000: (rec = Record.new(get_record(j))).key == key 1001: result << rec.val 1002: j = j - 1 1003: end 1004: result.reverse! 1005: j = i + 1 1006: while j < n and 1007: (rec = Record.new(get_record(j))).key == key 1008: result << rec.val 1009: j = j + 1 1010: end 1011: DEBUG.print "#{result.size} hits found!!\n" 1012: return result 1013: end 1014: end until i_prev == i 1015: DEBUG.print "no hits found\n" 1016: #nil 1017: [] 1018: end
# File lib/bio/io/flatfile/index.rb, line 784 784: def seek(i) 785: rs = record_size 786: @file.seek(@@recsize_width + rs * i) 787: end