Package Bio :: Package SCOP :: Module Cla
[hide private]
[frames] | no frames]

Source Code for Module Bio.SCOP.Cla

  1  # Copyright 2001 by Gavin E. Crooks.  All rights reserved. 
  2  # Modifications Copyright 2010 Jeffrey Finkelstein. All rights reserved. 
  3  # 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """ Handle the SCOP CLAssification file, which describes SCOP domains. 
  9   
 10  The file format is described in the scop 
 11  "release notes.":http://scop.mrc-lmb.cam.ac.uk/scop/release-notes.html 
 12  The latest CLA file can be found 
 13  "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ 
 14     
 15  "Release 1.73": http://scop.mrc-lmb.cam.ac.uk/scop/parse/dir.cla.scop.txt_1.73 
 16  (July 2008) 
 17   
 18  """ 
 19   
 20   
 21   
 22  from Residues import *  
 23   
 24   
25 -class Record(object):
26 """Holds information for one SCOP domain. 27 28 sid -- SCOP identifier. e.g. d1danl2 29 30 residues -- The domain definition as a Residues object 31 32 sccs -- SCOP concise classification strings. e.g. b.1.2.1 33 34 sunid -- SCOP unique identifier for this domain 35 36 hierarchy -- A dictionary, keys are nodetype, values are sunid, 37 describing the location of this domain in the SCOP 38 hierarchy. See the Scop module for a description of 39 nodetypes. This used to be a list of (key,value) tuples 40 in older versions of Biopython (see Bug 3109). 41 """
42 - def __init__(self, line=None):
43 self.sid = '' 44 self.residues = None 45 self.sccs = '' 46 self.sunid ='' 47 self.hierarchy = {} 48 if line: 49 self._process(line)
50
51 - def _process(self, line):
52 line = line.rstrip() # no trailing whitespace 53 columns = line.split('\t') # separate the tab-delineated cols 54 if len(columns) != 6: 55 raise ValueError("I don't understand the format of %s" % line) 56 57 self.sid, pdbid, residues, self.sccs, self.sunid, hierarchy = columns 58 self.residues = Residues(residues) 59 self.residues.pdbid = pdbid 60 self.sunid = int(self.sunid) 61 62 for ht in hierarchy.split(","): 63 key, value = ht.split('=') 64 self.hierarchy[key] = int(value)
65
66 - def __str__(self):
67 s = [] 68 s.append(self.sid) 69 s += str(self.residues).split(" ") 70 s.append(self.sccs) 71 s.append(self.sunid) 72 73 s.append(','.join('='.join((key, str(value))) for key, value 74 in self.hierarchy.iteritems())) 75 76 return "\t".join(map(str,s)) + "\n"
77 78
79 -def parse(handle):
80 """Iterates over a CLA file, returning a Cla record for each line 81 in the file. 82 83 Arguments: 84 85 handle -- file-like object. 86 """ 87 for line in handle: 88 if line.startswith('#'): 89 continue 90 yield Record(line)
91 92
93 -class Index(dict):
94 """A CLA file indexed by SCOP identifiers, allowing rapid 95 random access into a file."""
96 - def __init__(self, filename):
97 """ 98 Arguments: 99 100 filename -- The file to index 101 """ 102 dict.__init__(self) 103 self.filename = filename 104 f = open(self.filename, "rU") 105 try: 106 position = 0 107 while True: 108 line = f.readline() 109 if not line: break 110 if line.startswith('#'): 111 continue 112 record = Record(line) 113 key = record.sid 114 if key != None: 115 self[key] = position 116 position = f.tell() 117 finally: 118 f.close()
119
120 - def __getitem__(self, key):
121 """ Return an item from the indexed file. """ 122 position = dict.__getitem__(self,key) 123 124 f = open(self.filename, "rU") 125 try: 126 f.seek(position) 127 line = f.readline() 128 record = Record(line) 129 finally: 130 f.close() 131 return record
132