Package Bio :: Package PDB :: Module PDBParser'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.PDBParser'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package.   
  5   
  6  """Parser for PDB files.""" 
  7   
  8  import warnings 
  9   
 10  import numpy 
 11   
 12  from Bio.PDB.PDBExceptions import \ 
 13          PDBConstructionException, PDBConstructionWarning 
 14  from Bio.PDB.StructureBuilder import StructureBuilder 
 15  from Bio.PDB.parse_pdb_header import _parse_pdb_header_list 
 16   
 17   
 18  # If PDB spec says "COLUMNS 18-20" this means line[17:20] 
 19   
 20   
21 -class PDBParser(object):
22 """ 23 Parse a PDB file and return a Structure object. 24 """ 25
26 - def __init__(self, PERMISSIVE=True, get_header=False, 27 structure_builder=None, QUIET=False):
28 """ 29 The PDB parser call a number of standard methods in an aggregated 30 StructureBuilder object. Normally this object is instanciated by the 31 PDBParser object itself, but if the user provides his own StructureBuilder 32 object, the latter is used instead. 33 34 Arguments: 35 36 o PERMISSIVE - Evaluated as a Boolean. If false, exceptions in 37 constructing the SMCRA data structure are fatal. If true (DEFAULT), 38 the exceptions are caught, but some residues or atoms will be missing. 39 THESE EXCEPTIONS ARE DUE TO PROBLEMS IN THE PDB FILE!. 40 41 o structure_builder - an optional user implemented StructureBuilder class. 42 43 o QUIET - Evaluated as a Boolean. If true, warnings issued in constructing 44 the SMCRA data will be supressed. If false (DEFAULT), they will be shown. 45 These warnings might be indicative of problems in the PDB file! 46 """ 47 if structure_builder!=None: 48 self.structure_builder=structure_builder 49 else: 50 self.structure_builder=StructureBuilder() 51 self.header=None 52 self.trailer=None 53 self.line_counter=0 54 self.PERMISSIVE=bool(PERMISSIVE) 55 self.QUIET=bool(QUIET)
56 57 # Public methods 58
59 - def get_structure(self, id, file):
60 """Return the structure. 61 62 Arguments: 63 o id - string, the id that will be used for the structure 64 o file - name of the PDB file OR an open filehandle 65 """ 66 67 if self.QUIET: 68 warning_list = warnings.filters[:] 69 warnings.filterwarnings('ignore', category=PDBConstructionWarning) 70 71 self.header=None 72 self.trailer=None 73 # Make a StructureBuilder instance (pass id of structure as parameter) 74 self.structure_builder.init_structure(id) 75 handle_close = False 76 if isinstance(file, basestring): 77 file=open(file) 78 handle_close = True 79 self._parse(file.readlines()) 80 self.structure_builder.set_header(self.header) 81 # Return the Structure instance 82 structure = self.structure_builder.get_structure() 83 if handle_close: 84 file.close() 85 86 if self.QUIET: 87 warnings.filters = warning_list 88 89 return structure
90
91 - def get_header(self):
92 "Return the header." 93 return self.header
94
95 - def get_trailer(self):
96 "Return the trailer." 97 return self.trailer
98 99 # Private methods 100
101 - def _parse(self, header_coords_trailer):
102 "Parse the PDB file." 103 # Extract the header; return the rest of the file 104 self.header, coords_trailer=self._get_header(header_coords_trailer) 105 # Parse the atomic data; return the PDB file trailer 106 self.trailer=self._parse_coordinates(coords_trailer)
107
108 - def _get_header(self, header_coords_trailer):
109 "Get the header of the PDB file, return the rest." 110 structure_builder=self.structure_builder 111 i = 0 112 for i in range(0, len(header_coords_trailer)): 113 structure_builder.set_line_counter(i+1) 114 line=header_coords_trailer[i] 115 record_type=line[0:6] 116 if(record_type=='ATOM ' or record_type=='HETATM' or record_type=='MODEL '): 117 break 118 header=header_coords_trailer[0:i] 119 # Return the rest of the coords+trailer for further processing 120 self.line_counter=i 121 coords_trailer=header_coords_trailer[i:] 122 header_dict=_parse_pdb_header_list(header) 123 return header_dict, coords_trailer
124
125 - def _parse_coordinates(self, coords_trailer):
126 "Parse the atomic data in the PDB file." 127 local_line_counter=0 128 structure_builder=self.structure_builder 129 current_model_id=0 130 # Flag we have an open model 131 model_open=0 132 current_chain_id=None 133 current_segid=None 134 current_residue_id=None 135 current_resname=None 136 for i in range(0, len(coords_trailer)): 137 line=coords_trailer[i] 138 record_type=line[0:6] 139 global_line_counter=self.line_counter+local_line_counter+1 140 structure_builder.set_line_counter(global_line_counter) 141 if(record_type=='ATOM ' or record_type=='HETATM'): 142 # Initialize the Model - there was no explicit MODEL record 143 if not model_open: 144 structure_builder.init_model(current_model_id) 145 current_model_id+=1 146 model_open=1 147 fullname=line[12:16] 148 # get rid of whitespace in atom names 149 split_list=fullname.split() 150 if len(split_list)!=1: 151 # atom name has internal spaces, e.g. " N B ", so 152 # we do not strip spaces 153 name=fullname 154 else: 155 # atom name is like " CA ", so we can strip spaces 156 name=split_list[0] 157 altloc=line[16:17] 158 resname=line[17:20] 159 chainid=line[21:22] 160 try: 161 serial_number=int(line[6:11]) 162 except: 163 serial_number=0 164 resseq=int(line[22:26].split()[0]) # sequence identifier 165 icode=line[26:27] # insertion code 166 if record_type=='HETATM': # hetero atom flag 167 if resname=="HOH" or resname=="WAT": 168 hetero_flag="W" 169 else: 170 hetero_flag="H" 171 else: 172 hetero_flag=" " 173 residue_id=(hetero_flag, resseq, icode) 174 # atomic coordinates 175 try: 176 x=float(line[30:38]) 177 y=float(line[38:46]) 178 z=float(line[46:54]) 179 except: 180 #Should we allow parsing to continue in permissive mode? 181 #If so what coordindates should we default to? Easier to abort! 182 raise PDBConstructionException(\ 183 "Invalid or missing coordinate(s) at line %i." \ 184 % global_line_counter) 185 coord=numpy.array((x, y, z), 'f') 186 # occupancy & B factor 187 try: 188 occupancy=float(line[54:60]) 189 except: 190 self._handle_PDB_exception("Invalid or missing occupancy", 191 global_line_counter) 192 occupancy = 0.0 #Is one or zero a good default? 193 try: 194 bfactor=float(line[60:66]) 195 except: 196 self._handle_PDB_exception("Invalid or missing B factor", 197 global_line_counter) 198 bfactor = 0.0 #The PDB use a default of zero if the data is missing 199 segid=line[72:76] 200 element=line[76:78].strip() 201 if current_segid!=segid: 202 current_segid=segid 203 structure_builder.init_seg(current_segid) 204 if current_chain_id!=chainid: 205 current_chain_id=chainid 206 structure_builder.init_chain(current_chain_id) 207 current_residue_id=residue_id 208 current_resname=resname 209 try: 210 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 211 except PDBConstructionException, message: 212 self._handle_PDB_exception(message, global_line_counter) 213 elif current_residue_id!=residue_id or current_resname!=resname: 214 current_residue_id=residue_id 215 current_resname=resname 216 try: 217 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 218 except PDBConstructionException, message: 219 self._handle_PDB_exception(message, global_line_counter) 220 # init atom 221 try: 222 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, 223 fullname, serial_number, element) 224 except PDBConstructionException, message: 225 self._handle_PDB_exception(message, global_line_counter) 226 elif(record_type=='ANISOU'): 227 anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70])) 228 # U's are scaled by 10^4 229 anisou_array=(numpy.array(anisou, 'f')/10000.0).astype('f') 230 structure_builder.set_anisou(anisou_array) 231 elif(record_type=='MODEL '): 232 try: 233 serial_num=int(line[10:14]) 234 except: 235 self._handle_PDB_exception("Invalid or missing model serial number", 236 global_line_counter) 237 serial_num=0 238 structure_builder.init_model(current_model_id,serial_num) 239 current_model_id+=1 240 model_open=1 241 current_chain_id=None 242 current_residue_id=None 243 elif(record_type=='END ' or record_type=='CONECT'): 244 # End of atomic data, return the trailer 245 self.line_counter=self.line_counter+local_line_counter 246 return coords_trailer[local_line_counter:] 247 elif(record_type=='ENDMDL'): 248 model_open=0 249 current_chain_id=None 250 current_residue_id=None 251 elif(record_type=='SIGUIJ'): 252 # standard deviation of anisotropic B factor 253 siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70])) 254 # U sigma's are scaled by 10^4 255 siguij_array=(numpy.array(siguij, 'f')/10000.0).astype('f') 256 structure_builder.set_siguij(siguij_array) 257 elif(record_type=='SIGATM'): 258 # standard deviation of atomic positions 259 sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66])) 260 sigatm_array=numpy.array(sigatm, 'f') 261 structure_builder.set_sigatm(sigatm_array) 262 local_line_counter=local_line_counter+1 263 # EOF (does not end in END or CONECT) 264 self.line_counter=self.line_counter+local_line_counter 265 return []
266
267 - def _handle_PDB_exception(self, message, line_counter):
268 """ 269 This method catches an exception that occurs in the StructureBuilder 270 object (if PERMISSIVE), or raises it again, this time adding the 271 PDB line number to the error message. 272 """ 273 message="%s at line %i." % (message, line_counter) 274 if self.PERMISSIVE: 275 # just print a warning - some residues/atoms may be missing 276 warnings.warn("PDBConstructionException: %s\n" 277 "Exception ignored.\n" 278 "Some atoms or residues may be missing in the data structure." 279 % message, PDBConstructionWarning) 280 else: 281 # exceptions are fatal - raise again with new message (including line nr) 282 raise PDBConstructionException(message)
283 284 285 if __name__=="__main__": 286 287 import sys 288 289 p=PDBParser(PERMISSIVE=True) 290 291 filename = sys.argv[1] 292 s=p.get_structure("scr", filename) 293 294 for m in s: 295 p=m.get_parent() 296 assert(p is s) 297 for c in m: 298 p=c.get_parent() 299 assert(p is m) 300 for r in c: 301 print r 302 p=r.get_parent() 303 assert(p is c) 304 for a in r: 305 p=a.get_parent() 306 if not p is r: 307 print p, r 308