Class | Bio::SPTR |
In: |
lib/bio/db/embl/sptr.rb
|
Parent: | EMBLDB |
dr | -> | embl_dr |
Backup Bio::EMBLDB#dr as embl_dr |
returns contents in the CC lines.
returns an object of contents in the TOPIC.
returns contents of the "ALTERNATIVE PRODUCTS".
{'Event' => str, 'Named isoforms' => int, 'Comment' => str, 'Variants'=>[{'Name' => str, 'Synonyms' => str, 'IsoId' => str, 'Sequence' => []}]} CC -!- ALTERNATIVE PRODUCTS: CC Event=Alternative splicing; Named isoforms=15; ... CC placentae isoforms. All tissues differentially splice exon 13; CC Name=A; Synonyms=no del; CC IsoId=P15529-1; Sequence=Displayed;
returns contents of the "DATABASE".
[{'NAME'=>str,'NOTE'=>str, 'WWW'=>URI,'FTP'=>URI}, ...] CC -!- DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
returns contents of the "MASS SPECTROMETRY".
[{'MW"=>float,'MW_ERR'=>float, 'METHOD'=>str,'RANGE'=>str}, ...] CC -!- MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
CC -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT CC IN LIVER, KIDNEY, LUNG AND BRAIN. CC -!- TOPIC: FIRST LINE OF A COMMENT BLOCK; CC SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK.
See also www.expasy.org/sprot/userman.html#CC_line
# File lib/bio/db/embl/sptr.rb, line 612 612: def cc(topic = nil) 613: unless @data['CC'] 614: cc = Hash.new 615: comment_border= '-' * (77 - 4 + 1) 616: dlm = /-!- / 617: 618: # 12KD_MYCSM has no CC lines. 619: return cc if get('CC').size == 0 620: 621: cc_raw = fetch('CC') 622: 623: # Removing the copyright statement. 624: cc_raw.sub!(/ *---.+---/m, '') 625: 626: # Not any CC Lines without the copyright statement. 627: return cc if cc_raw == '' 628: 629: begin 630: cc_raw, copyright = cc_raw.split(/#{comment_border}/)[0] 631: cc_raw = cc_raw.sub(dlm,'') 632: cc_raw.split(dlm).each do |tmp| 633: tmp = tmp.strip 634: 635: if /(^[A-Z ]+[A-Z]): (.+)/ =~ tmp 636: key = $1 637: body = $2 638: body.gsub!(/- (?!AND)/,'-') 639: body.strip! 640: unless cc[key] 641: cc[key] = [body] 642: else 643: cc[key].push(body) 644: end 645: else 646: raise ["Error: [#{entry_id}]: CC Lines", '"', tmp, '"', 647: '', get('CC'),''].join("\n") 648: end 649: end 650: rescue NameError 651: if fetch('CC') == '' 652: return {} 653: else 654: raise ["Error: Invalid CC Lines: [#{entry_id}]: ", 655: "\n'#{self.get('CC')}'\n", "(#{$!})"].join 656: end 657: rescue NoMethodError 658: end 659: 660: @data['CC'] = cc 661: end 662: 663: 664: case topic 665: when 'ALLERGEN' 666: return @data['CC'][topic] 667: when 'ALTERNATIVE PRODUCTS' 668: return cc_alternative_products(@data['CC'][topic]) 669: when 'BIOPHYSICOCHEMICAL PROPERTIES' 670: return cc_biophysiochemical_properties(@data['CC'][topic]) 671: when 'BIOTECHNOLOGY' 672: return @data['CC'][topic] 673: when 'CATALITIC ACTIVITY' 674: return cc_catalytic_activity(@data['CC'][topic]) 675: when 'CAUTION' 676: return cc_caution(@data['CC'][topic]) 677: when 'COFACTOR' 678: return @data['CC'][topic] 679: when 'DEVELOPMENTAL STAGE' 680: return @data['CC'][topic].join('') 681: when 'DISEASE' 682: return @data['CC'][topic].join('') 683: when 'DOMAIN' 684: return @data['CC'][topic] 685: when 'ENZYME REGULATION' 686: return @data['CC'][topic].join('') 687: when 'FUNCTION' 688: return @data['CC'][topic].join('') 689: when 'INDUCTION' 690: return @data['CC'][topic].join('') 691: when 'INTERACTION' 692: return cc_interaction(@data['CC'][topic]) 693: when 'MASS SPECTROMETRY' 694: return cc_mass_spectrometry(@data['CC'][topic]) 695: when 'MISCELLANEOUS' 696: return @data['CC'][topic] 697: when 'PATHWAY' 698: return cc_pathway(@data['CC'][topic]) 699: when 'PHARMACEUTICAL' 700: return @data['CC'][topic] 701: when 'POLYMORPHISM' 702: return @data['CC'][topic] 703: when 'PTM' 704: return @data['CC'][topic] 705: when 'RNA EDITING' 706: return cc_rna_editing(@data['CC'][topic]) 707: when 'SIMILARITY' 708: return @data['CC'][topic] 709: when 'SUBCELLULAR LOCATION' 710: return cc_subcellular_location(@data['CC'][topic]) 711: when 'SUBUNIT' 712: return @data['CC'][topic] 713: when 'TISSUE SPECIFICITY' 714: return @data['CC'][topic] 715: when 'TOXIC DOSE' 716: return @data['CC'][topic] 717: when 'WEB RESOURCE' 718: return cc_web_resource(@data['CC'][topic]) 719: when 'DATABASE' 720: # DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"]. 721: tmp = Array.new 722: db = @data['CC']['DATABASE'] 723: return db unless db 724: 725: db.each do |e| 726: db = {'NAME' => nil, 'NOTE' => nil, 'WWW' => nil, 'FTP' => nil} 727: e.sub(/.$/,'').split(/;/).each do |line| 728: case line 729: when /NAME=(.+)/ 730: db['NAME'] = $1 731: when /NOTE=(.+)/ 732: db['NOTE'] = $1 733: when /WWW="(.+)"/ 734: db['WWW'] = $1 735: when /FTP="(.+)"/ 736: db['FTP'] = $1 737: end 738: end 739: tmp.push(db) 740: end 741: return tmp 742: when nil 743: return @data['CC'] 744: else 745: return @data['CC'][topic] 746: end 747: end
CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText][; URL=WWWAddress].
# File lib/bio/db/embl/sptr.rb, line 924 924: def cc_web_resource(data) 925: data.map {|x| 926: entry = {'NAME' => nil, 'NOTE' => nil, 'URL' => nil} 927: x.split(';').each do |y| 928: case y 929: when /NAME=(.+)/ 930: entry['NAME'] = $1.strip 931: when /NOTE=(.+)/ 932: entry['NOTE'] = $1.strip 933: when /URL="(.+)"/ 934: entry['URL'] = $1.strip 935: end 936: end 937: entry 938: } 939: end
# File lib/bio/db/embl/sptr.rb, line 959 959: def dr(key = nil) 960: unless key 961: embl_dr 962: else 963: (embl_dr[key] or []).map {|x| 964: {'Accession' => x[0], 965: 'Version' => x[1], 966: ' ' => x[2], 967: 'Molecular Type' => x[3]} 968: } 969: end 970: end
returns a Hash of information in the DT lines.
hash keys: ['created', 'sequence', 'annotation'] also Symbols acceptable (ASAP): [:created, :sequence, :annotation]
returns a String of information in the DT lines by a given key..
DT DD-MMM-YYY (rel. NN, Created) DT DD-MMM-YYY (rel. NN, Last sequence update) DT DD-MMM-YYY (rel. NN, Last annotation update)
# File lib/bio/db/embl/sptr.rb, line 123 123: def dt(key = nil) 124: return dt[key] if key 125: return @data['DT'] if @data['DT'] 126: 127: part = self.get('DT').split(/\n/) 128: @data['DT'] = { 129: 'created' => part[0].sub(/\w{2} /,'').strip, 130: 'sequence' => part[1].sub(/\w{2} /,'').strip, 131: 'annotation' => part[2].sub(/\w{2} /,'').strip 132: } 133: end
returns a ENTRY_NAME in the ID line.
# File lib/bio/db/embl/sptr.rb, line 79 79: def entry_id 80: id_line('ENTRY_NAME') 81: end
returns contents in the feature table.
sp = Bio::SPTR.new(entry) ft = sp.ft ft.class #=> Hash ft.keys.each do |feature_key| ft[feature_key].each do |feature| feature['From'] #=> '1' feature['To'] #=> '21' feature['Description'] #=> '' feature['FTId'] #=> '' feature['diff'] #=> [] feature['original'] #=> [feature_key, '1', '21', '', ''] end end
{FEATURE_KEY => [{'From' => int, 'To' => int, 'Description' => aStr, 'FTId' => aStr, 'diff' => [original_residues, changed_residues], 'original' => aAry }],...}
returns an Array of the information about the feature_name in the feature table.
[{'From' => str, 'To' => str, 'Description' => str, 'FTId' => str},...]
Col Data item ----- ----------------- 1- 2 FT 6-13 Feature name 15-20 `FROM' endpoint 22-27 `TO' endpoint 35-75 Description (>=0 per key) ----- -----------------
Note: ‘FROM’ and ‘TO’ endopoints are allowed to use non-numerial charactors including ’<’, ’>’ or ’?’. (c.f. ’<1’, ’?42’)
See also www.expasy.org/sprot/userman.html#FT_line
# File lib/bio/db/embl/sptr.rb, line 1024 1024: def ft(feature_key = nil) 1025: return ft[feature_key] if feature_key 1026: return @data['FT'] if @data['FT'] 1027: 1028: table = [] 1029: begin 1030: get('FT').split("\n").each do |line| 1031: if line =~ /^FT \w/ 1032: feature = line.chomp.ljust(74) 1033: table << [feature[ 5..12].strip, # Feature Name 1034: feature[14..19].strip, # From 1035: feature[21..26].strip, # To 1036: feature[34..74].strip ] # Description 1037: else 1038: table.last << line.chomp.sub!(/^FT +/, '') 1039: end 1040: end 1041: 1042: # Joining Description lines 1043: table = table.map { |feature| 1044: ftid = feature.pop if feature.last =~ /FTId=/ 1045: if feature.size > 4 1046: feature = [feature[0], 1047: feature[1], 1048: feature[2], 1049: feature[3, feature.size - 3].join(" ")] 1050: end 1051: feature << if ftid then ftid else '' end 1052: } 1053: 1054: hash = {} 1055: table.each do |feature| 1056: hash[feature[0]] = [] unless hash[feature[0]] 1057: hash[feature[0]] << { 1058: # Removing '<', '>' or '?' in FROM/TO endopoint. 1059: 'From' => feature[1].sub(/\D/, '').to_i, 1060: 'To' => feature[2].sub(/\D/, '').to_i, 1061: 'Description' => feature[3], 1062: 'FTId' => feature[4].to_s.sub(/\/FTId=/, '').sub(/\.$/, ''), 1063: 'diff' => [], 1064: 'original' => feature 1065: } 1066: 1067: case feature[0] 1068: when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT' 1069: case hash[feature[0]].last['Description'] 1070: when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/ 1071: original_res = $1 1072: changed_res = $2 1073: original_res = original_res.gsub(/ /,'').strip 1074: chenged_res = changed_res.gsub(/ /,'').strip 1075: when /Missing/i 1076: original_res = seq.subseq(hash[feature[0]].last['From'], 1077: hash[feature[0]].last['To']) 1078: changed_res = '' 1079: end 1080: hash[feature[0]].last['diff'] = [original_res, chenged_res] 1081: end 1082: end 1083: rescue 1084: raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n" 1085: end 1086: 1087: @data['FT'] = hash 1088: end
returns a Array of gene names in the GN line.
# File lib/bio/db/embl/sptr.rb, line 264 264: def gene_names 265: gn # set @data['GN'] if it hasn't been already done 266: if @data['GN'].first.class == Hash then 267: @data['GN'].collect { |element| element[:name] } 268: else 269: @data['GN'].first 270: end 271: end
returns gene names in the GN line.
where <gene record> is:
{ :name => '...', :synonyms => [ 's1', 's2', ... ], :loci => [ 'l1', 'l2', ... ], :orfs => [ 'o1', 'o2', ... ] }
Old format:
# File lib/bio/db/embl/sptr.rb, line 188 188: def gn 189: unless @data['GN'] 190: case fetch('GN') 191: when /Name=/,/ORFNames=/ 192: @data['GN'] = gn_uniprot_parser 193: else 194: @data['GN'] = gn_old_parser 195: end 196: end 197: @data['GN'] 198: end
Bio::SPTR#hi #=> hash
# File lib/bio/db/embl/sptr.rb, line 528 528: def hi 529: unless @data['HI'] 530: @data['HI'] = [] 531: fetch('HI').split(/\. /).each do |hlist| 532: hash = {'Category' => '', 'Keywords' => [], 'Keyword' => ''} 533: hash['Category'], hash['Keywords'] = hlist.split(': ') 534: hash['Keywords'] = hash['Keywords'].split('; ') 535: hash['Keyword'] = hash['Keywords'].pop 536: hash['Keyword'].sub!(/\.$/, '') 537: @data['HI'] << hash 538: end 539: end 540: @data['HI'] 541: end
returns a Hash of the ID line.
returns a content (Int or String) of the ID line by a given key. Hash keys: [‘ENTRY_NAME’, ‘DATA_CLASS’, ‘MODECULE_TYPE’, ‘SEQUENCE_LENGTH’]
ID P53_HUMAN STANDARD; PRT; 393 AA. #"ID #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}."
obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD", "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"} obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
# File lib/bio/db/embl/sptr.rb, line 63 63: def id_line(key = nil) 64: return id_line[key] if key 65: return @data['ID'] if @data['ID'] 66: 67: part = @orig['ID'].split(/ +/) 68: @data['ID'] = { 69: 'ENTRY_NAME' => part[1], 70: 'DATA_CLASS' => part[2].sub(/;/,''), 71: 'MOLECULE_TYPE' => part[3].sub(/;/,''), 72: 'SEQUENCE_LENGTH' => part[4].to_i 73: } 74: end
returns a MOLECULE_TYPE in the ID line.
A short-cut for Bio::SPTR#id_line(‘MOLECULE_TYPE’).
# File lib/bio/db/embl/sptr.rb, line 89 89: def molecule 90: id_line('MOLECULE_TYPE') 91: end
OH NCBI_TaxID=TaxID; HostName. br.expasy.org/sprot/userman.html#OH_line
# File lib/bio/db/embl/sptr.rb, line 358 358: def oh 359: unless @data['OH'] 360: @data['OH'] = fetch('OH').split("\. ").map {|x| 361: if x =~ /NCBI_TaxID=(\d+);/ 362: taxid = $1 363: else 364: raise ArgumentError, ["Error: Invalid OH line format (#{self.entry_id}):", 365: $!, "\n", get('OH'), "\n"].join 366: 367: end 368: if x =~ /NCBI_TaxID=\d+; (.+)/ 369: host_name = $1 370: host_name.sub!(/\.$/, '') 371: else 372: host_name = nil 373: end 374: {'NCBI_TaxID' => taxid, 'HostName' => host_name} 375: } 376: end 377: @data['OH'] 378: end
returns a Array of Hashs or a String of the OS line when a key given.
[{'name' => '(Human)', 'os' => 'Homo sapiens'}, {'name' => '(Rat)', 'os' => 'Rattus norveticus'}]
{'name' => "(Human)", 'os' => 'Homo sapiens'}
OS Genus species (name). OS Genus species (name0) (name1). OS Genus species (name0) (name1). OS Genus species (name0), G s0 (name0), and G s (name0) (name1). OS Homo sapiens (Human), and Rarrus norveticus (Rat) OS Hippotis sp. Clark and Watts 825. OS unknown cyperaceous sp.
# File lib/bio/db/embl/sptr.rb, line 297 297: def os(num = nil) 298: unless @data['OS'] 299: os = Array.new 300: fetch('OS').split(/, and|, /).each do |tmp| 301: if tmp =~ /(\w+ *[\w\d \:\'\+\-\.]+[\w\d\.])/ 302: org = $1 303: tmp =~ /(\(.+\))/ 304: os.push({'name' => $1, 'os' => org}) 305: else 306: raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n" 307: end 308: end 309: @data['OS'] = os 310: end 311: 312: if num 313: # EX. "Trifolium repens (white clover)" 314: return "#{@data['OS'][num]['os']} #{@data['OS'][num]['name']}" 315: else 316: return @data['OS'] 317: end 318: end
returns a Hash of oraganism taxonomy cross-references.
{'NCBI_TaxID' => ['1234','2345','3456','4567'], ...}
OX NCBI_TaxID=1234; OX NCBI_TaxID=1234, 2345, 3456, 4567;
# File lib/bio/db/embl/sptr.rb, line 341 341: def ox 342: unless @data['OX'] 343: tmp = fetch('OX').sub(/\.$/,'').split(/;/).map { |e| e.strip } 344: hsh = Hash.new 345: tmp.each do |e| 346: db,refs = e.split(/=/) 347: hsh[db] = refs.split(/, */) 348: end 349: @data['OX'] = hsh 350: end 351: return @data['OX'] 352: end
returns the proposed official name of the protein.
"DE #{OFFICIAL_NAME} (#{SYNONYM})" "DE #{OFFICIAL_NAME} (#{SYNONYM}) [CONTEINS: #1; #2]." OFFICIAL_NAME 1/entry SYNONYM >=0 CONTEINS >=0
# File lib/bio/db/embl/sptr.rb, line 144 144: def protein_name 145: name = "" 146: if de_line = fetch('DE') then 147: str = de_line[/^[^\[]*/] # everything preceding the first [ (the "contains" part) 148: name = str[/^[^(]*/].strip 149: name << ' (Fragment)' if str =~ /fragment/i 150: end 151: return name 152: end
returns contents in the R lines.
where <reference information Hash> is:
{'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
R Lines
# File lib/bio/db/embl/sptr.rb, line 394 394: def ref 395: unless @data['R'] 396: @data['R'] = [get('R').split(/\nRN /)].flatten.map { |str| 397: hash = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 398: 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''} 399: str = 'RN ' + str unless /^RN / =~ str 400: 401: str.split("\n").each do |line| 402: if /^(R[NPXARLCTG]) (.+)/ =~ line 403: hash[$1] += $2 + ' ' 404: else 405: raise "Invalid format in R lines, \n[#{line}]\n" 406: end 407: end 408: 409: hash['RN'] = set_RN(hash['RN']) 410: hash['RC'] = set_RC(hash['RC']) 411: hash['RP'] = set_RP(hash['RP']) 412: hash['RX'] = set_RX(hash['RX']) 413: hash['RA'] = set_RA(hash['RA']) 414: hash['RT'] = set_RT(hash['RT']) 415: hash['RL'] = set_RL(hash['RL']) 416: hash['RG'] = set_RG(hash['RG']) 417: 418: hash 419: } 420: 421: end 422: @data['R'] 423: end
returns Bio::Reference object from Bio::EMBLDB::Common#ref.
# File lib/bio/db/embl/sptr.rb, line 488 488: def references 489: unless @data['references'] 490: ary = self.ref.map {|ent| 491: hash = Hash.new('') 492: ent.each {|key, value| 493: case key 494: when 'RA' 495: hash['authors'] = value.split(/, /) 496: when 'RT' 497: hash['title'] = value 498: when 'RL' 499: if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/ 500: hash['journal'] = $1 501: hash['volume'] = $2 502: hash['issue'] = $3 503: hash['pages'] = $4 504: hash['year'] = $5 505: else 506: hash['journal'] = value 507: end 508: when 'RX' # PUBMED, MEDLINE, DOI 509: value.each do |tag, xref| 510: hash[ tag.downcase ] = xref 511: end 512: end 513: } 514: Reference.new(hash) 515: } 516: @data['references'] = References.new(ary) 517: end 518: @data['references'] 519: end
returns a Bio::Sequence::AA of the amino acid sequence.
blank Line; sequence data (>=1)
# File lib/bio/db/embl/sptr.rb, line 1134 1134: def seq 1135: unless @data[''] 1136: @data[''] = Sequence::AA.new( fetch('').gsub(/ |\d+/,'') ) 1137: end 1138: return @data[''] 1139: end
returns a SEQUENCE_LENGTH in the ID line.
A short-cut for Bio::SPTR#id_line(‘SEQUENCE_LENGHT’).
# File lib/bio/db/embl/sptr.rb, line 98 98: def sequence_length 99: id_line('SEQUENCE_LENGTH') 100: end
returns a Hash of conteins in the SQ lines.
returns a value of a key given in the SQ lines.
'CRC64']
SQ SEQUENCE 233 AA; 25630 MW; 146A1B48A1475C86 CRC64; SQ SEQUENCE \d+ AA; \d+ MW; [0-9A-Z]+ CRC64;
MW, Dalton unit. CRC64 (64-bit Cyclic Redundancy Check, ISO 3309).
# File lib/bio/db/embl/sptr.rb, line 1106 1106: def sq(key = nil) 1107: unless @data['SQ'] 1108: if fetch('SQ') =~ /(\d+) AA\; (\d+) MW; (.+) CRC64;/ 1109: @data['SQ'] = { 'aalen' => $1.to_i, 'MW' => $2.to_i, 'CRC64' => $3 } 1110: else 1111: raise "Invalid SQ Line: \n'#{fetch('SQ')}'" 1112: end 1113: end 1114: 1115: if key 1116: case key 1117: when /mw/, /molecular/, /weight/ 1118: @data['SQ']['MW'] 1119: when /len/, /length/, /AA/ 1120: @data['SQ']['aalen'] 1121: else 1122: @data['SQ'][key] 1123: end 1124: else 1125: @data['SQ'] 1126: end 1127: end
returns an array of synonyms (unofficial names).
synonyms are each placed in () following the official name on the DE line.
# File lib/bio/db/embl/sptr.rb, line 158 158: def synonyms 159: ary = Array.new 160: if de_line = fetch('DE') then 161: line = de_line.sub(/\[.*\]/,'') # ignore stuff between [ and ]. That's the "contains" part 162: line.scan(/\([^)]+/) do |synonym| 163: unless synonym =~ /fragment/i then 164: ary << synonym[1..-1].strip # index to remove the leading ( 165: end 166: end 167: end 168: return ary 169: end