Package Bio :: Package PDB :: Module DSSP'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.DSSP'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Use the DSSP program to calculate secondary structure and accessibility. 
  7   
  8  You need to have a working version of DSSP (and a license, free for academic 
  9  use) in order to use this. For DSSP, see U{http://www.cmbi.kun.nl/gv/dssp/}. 
 10   
 11  The DSSP codes for secondary structure used here are: 
 12   
 13      - H        Alpha helix (4-12) 
 14      - B        Isolated beta-bridge residue 
 15      - E        Strand 
 16      - G        3-10 helix 
 17      - I        pi helix 
 18      - T        Turn 
 19      - S        Bend 
 20      - -        None 
 21  """ 
 22   
 23  import os 
 24  import re 
 25  import tempfile 
 26   
 27  from Bio.SCOP.Raf import to_one_letter_code 
 28   
 29  from Bio.PDB.AbstractPropertyMap import AbstractResiduePropertyMap 
 30  from Bio.PDB.PDBExceptions import PDBException 
 31  from Bio.PDB.PDBParser import PDBParser 
 32   
 33   
 34  # Match C in DSSP 
 35  _dssp_cys=re.compile('[a-z]') 
 36   
 37  # Maximal ASA of amino acids 
 38  # Values from Sander & Rost, (1994), Proteins, 20:216-226 
 39  # Used for relative accessibility 
 40  MAX_ACC={} 
 41  MAX_ACC["ALA"]=106.0 
 42  MAX_ACC["CYS"]=135.0 
 43  MAX_ACC["ASP"]=163.0 
 44  MAX_ACC["GLU"]=194.0 
 45  MAX_ACC["PHE"]=197.0 
 46  MAX_ACC["GLY"]=84.0 
 47  MAX_ACC["HIS"]=184.0 
 48  MAX_ACC["ILE"]=169.0 
 49  MAX_ACC["LYS"]=205.0 
 50  MAX_ACC["LEU"]=164.0 
 51  MAX_ACC["MET"]=188.0 
 52  MAX_ACC["ASN"]=157.0 
 53  MAX_ACC["PRO"]=136.0 
 54  MAX_ACC["GLN"]=198.0 
 55  MAX_ACC["ARG"]=248.0 
 56  MAX_ACC["SER"]=130.0 
 57  MAX_ACC["THR"]=142.0 
 58  MAX_ACC["VAL"]=142.0 
 59  MAX_ACC["TRP"]=227.0 
 60  MAX_ACC["TYR"]=222.0 
 61   
 62   
63 -def ss_to_index(ss):
64 """ 65 Secondary structure symbol to index. 66 H=0 67 E=1 68 C=2 69 """ 70 if ss=='H': 71 return 0 72 if ss=='E': 73 return 1 74 if ss=='C': 75 return 2 76 assert 0
77 78
79 -def dssp_dict_from_pdb_file(in_file, DSSP="dssp"):
80 """ 81 Create a DSSP dictionary from a PDB file. 82 83 Example: 84 >>> dssp_dict=dssp_dict_from_pdb_file("1fat.pdb") 85 >>> aa, ss, acc=dssp_dict[('A', 1)] 86 87 @param in_file: pdb file 88 @type in_file: string 89 90 @param DSSP: DSSP executable (argument to os.system) 91 @type DSSP: string 92 93 @return: a dictionary that maps (chainid, resid) to 94 amino acid type, secondary structure code and 95 accessibility. 96 @rtype: {} 97 """ 98 out_file = tempfile.NamedTemporaryFile(suffix='.dssp') 99 out_file.flush() # needed? 100 os.system("%s %s > %s" % (DSSP, in_file, out_file.name)) 101 out_dict, keys = make_dssp_dict(out_file.name) 102 out_file.close() 103 return out_dict, keys
104 105
106 -def make_dssp_dict(filename):
107 """ 108 Return a DSSP dictionary that maps (chainid, resid) to 109 aa, ss and accessibility, from a DSSP file. 110 111 @param filename: the DSSP output file 112 @type filename: string 113 """ 114 dssp = {} 115 handle = open(filename, "r") 116 try: 117 start = 0 118 keys = [] 119 for l in handle.readlines(): 120 sl = l.split() 121 if sl[1] == "RESIDUE": 122 # Start parsing from here 123 start = 1 124 continue 125 if not start: 126 continue 127 if l[9] == " ": 128 # Skip -- missing residue 129 continue 130 resseq = int(l[5:10]) 131 icode = l[10] 132 chainid = l[11] 133 aa = l[13] 134 ss = l[16] 135 if ss == " ": 136 ss = "-" 137 try: 138 acc = int(l[34:38]) 139 phi = float(l[103:109]) 140 psi = float(l[109:115]) 141 except ValueError, exc: 142 # DSSP output breaks its own format when there are >9999 143 # residues, since only 4 digits are allocated to the seq num 144 # field. See 3kic chain T res 321, 1vsy chain T res 6077. 145 # Here, look for whitespace to figure out the number of extra 146 # digits, and shift parsing the rest of the line by that amount. 147 if l[34] != ' ': 148 shift = l[34:].find(' ') 149 acc = int((l[34+shift:38+shift])) 150 phi = float(l[103+shift:109+shift]) 151 psi = float(l[109+shift:115+shift]) 152 else: 153 raise ValueError, exc 154 res_id = (" ", resseq, icode) 155 dssp[(chainid, res_id)] = (aa, ss, acc, phi, psi) 156 keys.append((chainid, res_id)) 157 finally: 158 handle.close() 159 return dssp, keys
160 161
162 -class DSSP(AbstractResiduePropertyMap):
163 """ 164 Run DSSP on a pdb file, and provide a handle to the 165 DSSP secondary structure and accessibility. 166 167 Note that DSSP can only handle one model. 168 169 Example: 170 171 >>> p = PDBParser() 172 >>> structure = p.get_structure("1MOT", "1MOT.pdb") 173 >>> model = structure[0] 174 >>> dssp = DSSP(model, "1MOT.pdb") 175 >>> # DSSP data is accessed by a tuple (chain_id, res_id) 176 >>> a_key = dssp.keys()[2] 177 >>> # residue object, secondary structure, solvent accessibility, 178 >>> # relative accessiblity, phi, psi 179 >>> dssp[a_key] 180 (<Residue ALA het= resseq=251 icode= >, 181 'H', 182 72, 183 0.67924528301886788, 184 -61.200000000000003, 185 -42.399999999999999) 186 """ 187
188 - def __init__(self, model, pdb_file, dssp="dssp"):
189 """ 190 @param model: the first model of the structure 191 @type model: L{Model} 192 193 @param pdb_file: a PDB file 194 @type pdb_file: string 195 196 @param dssp: the dssp executable (ie. the argument to os.system) 197 @type dssp: string 198 """ 199 # create DSSP dictionary 200 dssp_dict, dssp_keys = dssp_dict_from_pdb_file(pdb_file, dssp) 201 dssp_map = {} 202 dssp_list = [] 203 204 def resid2code(res_id): 205 """Serialize a residue's resseq and icode for easy comparison.""" 206 return '%s%s' % (res_id[1], res_id[2])
207 208 # Now create a dictionary that maps Residue objects to 209 # secondary structure and accessibility, and a list of 210 # (residue, (secondary structure, accessibility)) tuples 211 for key in dssp_keys: 212 chain_id, res_id = key 213 chain = model[chain_id] 214 try: 215 res = chain[res_id] 216 except KeyError: 217 # In DSSP, HET field is not considered in residue identifier. 218 # Thus HETATM records may cause unnecessary exceptions. 219 # (See 3jui chain A res 593.) 220 # Try the lookup again with all HETATM other than water 221 res_seq_icode = resid2code(res_id) 222 for r in chain: 223 if r.id[0] not in (' ', 'W'): 224 # Compare resseq + icode 225 if resid2code(r.id) == res_seq_icode: 226 # Found a matching residue 227 res = r 228 break 229 else: 230 raise KeyError(res_id) 231 232 # For disordered residues of point mutations, BioPython uses the 233 # last one as default, But DSSP takes the first one (alternative 234 # location is blank, A or 1). See 1h9h chain E resi 22. 235 # Here we select the res in which all atoms have altloc blank, A or 236 # 1. If no such residues are found, simply use the first one appears 237 # (as DSSP does). 238 if res.is_disordered() == 2: 239 for rk in res.disordered_get_id_list(): 240 # All atoms in the disordered residue should have the same 241 # altloc, so it suffices to check the altloc of the first 242 # atom. 243 altloc = res.child_dict[rk].get_list()[0].get_altloc() 244 if altloc in tuple('A1 '): 245 res.disordered_select(rk) 246 break 247 else: 248 # Simply select the first one 249 res.disordered_select(res.disordered_get_id_list()[0]) 250 251 # Sometimes point mutations are put into HETATM and ATOM with altloc 252 # 'A' and 'B'. 253 # See 3piu chain A residue 273: 254 # <Residue LLP het=H_LLP resseq=273 icode= > 255 # <Residue LYS het= resseq=273 icode= > 256 # DSSP uses the HETATM LLP as it has altloc 'A' 257 # We check the altloc code here. 258 elif res.is_disordered() == 1: 259 # Check altloc of all atoms in the DisorderedResidue. If it 260 # contains blank, A or 1, then use it. Otherwise, look for HET 261 # residues of the same seq+icode. If not such HET residues are 262 # found, just accept the current one. 263 altlocs = set(a.get_altloc() for a in res.get_unpacked_list()) 264 if altlocs.isdisjoint('A1 '): 265 # Try again with all HETATM other than water 266 res_seq_icode = resid2code(res_id) 267 for r in chain: 268 if r.id[0] not in (' ', 'W'): 269 if (resid2code(r.id) == res_seq_icode and 270 r.get_list()[0].get_altloc() in tuple('A1 ')): 271 res = r 272 break 273 274 aa, ss, acc, phi, psi = dssp_dict[key] 275 res.xtra["SS_DSSP"] = ss 276 res.xtra["EXP_DSSP_ASA"] = acc 277 res.xtra["PHI_DSSP"] = phi 278 res.xtra["PSI_DSSP"] = psi 279 # Relative accessibility 280 resname = res.get_resname() 281 try: 282 rel_acc = acc/MAX_ACC[resname] 283 except KeyError: 284 # Invalid value for resname 285 rel_acc = 'NA' 286 else: 287 if rel_acc > 1.0: 288 rel_acc = 1.0 289 res.xtra["EXP_DSSP_RASA"] = rel_acc 290 # Verify if AA in DSSP == AA in Structure 291 # Something went wrong if this is not true! 292 # NB: DSSP uses X often 293 resname = to_one_letter_code.get(resname, 'X') 294 if resname == "C": 295 # DSSP renames C in C-bridges to a,b,c,d,... 296 # - we rename it back to 'C' 297 if _dssp_cys.match(aa): 298 aa = 'C' 299 # Take care of HETATM again 300 if (resname != aa) and (res.id[0] == ' ' or aa != 'X'): 301 raise PDBException("Structure/DSSP mismatch at %s" % res) 302 dssp_map[key] = ((res, ss, acc, rel_acc, phi, psi)) 303 dssp_list.append((res, ss, acc, rel_acc, phi, psi)) 304 305 AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, 306 dssp_list)
307 308 309 if __name__ == "__main__": 310 import sys 311 312 p = PDBParser() 313 s = p.get_structure('X', sys.argv[1]) 314 model = s[0] 315 d = DSSP(model, sys.argv[1]) 316 317 for r in d: 318 print r 319 print "Handled", len(d), "residues" 320 print d.keys() 321 if ('A', 1) in d: 322 print d[('A', 1)] 323 print s[0]['A'][1].xtra 324 # Secondary structure 325 print ''.join(d[key][1] for key in d.keys()) 326