1
2
3
4
5
6 """Use the DSSP program to calculate secondary structure and accessibility.
7
8 You need to have a working version of DSSP (and a license, free for academic
9 use) in order to use this. For DSSP, see U{http://www.cmbi.kun.nl/gv/dssp/}.
10
11 The DSSP codes for secondary structure used here are:
12
13 - H Alpha helix (4-12)
14 - B Isolated beta-bridge residue
15 - E Strand
16 - G 3-10 helix
17 - I pi helix
18 - T Turn
19 - S Bend
20 - - None
21 """
22
23 import os
24 import re
25 import tempfile
26
27 from Bio.SCOP.Raf import to_one_letter_code
28
29 from Bio.PDB.AbstractPropertyMap import AbstractResiduePropertyMap
30 from Bio.PDB.PDBExceptions import PDBException
31 from Bio.PDB.PDBParser import PDBParser
32
33
34
35 _dssp_cys=re.compile('[a-z]')
36
37
38
39
40 MAX_ACC={}
41 MAX_ACC["ALA"]=106.0
42 MAX_ACC["CYS"]=135.0
43 MAX_ACC["ASP"]=163.0
44 MAX_ACC["GLU"]=194.0
45 MAX_ACC["PHE"]=197.0
46 MAX_ACC["GLY"]=84.0
47 MAX_ACC["HIS"]=184.0
48 MAX_ACC["ILE"]=169.0
49 MAX_ACC["LYS"]=205.0
50 MAX_ACC["LEU"]=164.0
51 MAX_ACC["MET"]=188.0
52 MAX_ACC["ASN"]=157.0
53 MAX_ACC["PRO"]=136.0
54 MAX_ACC["GLN"]=198.0
55 MAX_ACC["ARG"]=248.0
56 MAX_ACC["SER"]=130.0
57 MAX_ACC["THR"]=142.0
58 MAX_ACC["VAL"]=142.0
59 MAX_ACC["TRP"]=227.0
60 MAX_ACC["TYR"]=222.0
61
62
64 """
65 Secondary structure symbol to index.
66 H=0
67 E=1
68 C=2
69 """
70 if ss=='H':
71 return 0
72 if ss=='E':
73 return 1
74 if ss=='C':
75 return 2
76 assert 0
77
78
80 """
81 Create a DSSP dictionary from a PDB file.
82
83 Example:
84 >>> dssp_dict=dssp_dict_from_pdb_file("1fat.pdb")
85 >>> aa, ss, acc=dssp_dict[('A', 1)]
86
87 @param in_file: pdb file
88 @type in_file: string
89
90 @param DSSP: DSSP executable (argument to os.system)
91 @type DSSP: string
92
93 @return: a dictionary that maps (chainid, resid) to
94 amino acid type, secondary structure code and
95 accessibility.
96 @rtype: {}
97 """
98 out_file = tempfile.NamedTemporaryFile(suffix='.dssp')
99 out_file.flush()
100 os.system("%s %s > %s" % (DSSP, in_file, out_file.name))
101 out_dict, keys = make_dssp_dict(out_file.name)
102 out_file.close()
103 return out_dict, keys
104
105
107 """
108 Return a DSSP dictionary that maps (chainid, resid) to
109 aa, ss and accessibility, from a DSSP file.
110
111 @param filename: the DSSP output file
112 @type filename: string
113 """
114 dssp = {}
115 handle = open(filename, "r")
116 try:
117 start = 0
118 keys = []
119 for l in handle.readlines():
120 sl = l.split()
121 if sl[1] == "RESIDUE":
122
123 start = 1
124 continue
125 if not start:
126 continue
127 if l[9] == " ":
128
129 continue
130 resseq = int(l[5:10])
131 icode = l[10]
132 chainid = l[11]
133 aa = l[13]
134 ss = l[16]
135 if ss == " ":
136 ss = "-"
137 try:
138 acc = int(l[34:38])
139 phi = float(l[103:109])
140 psi = float(l[109:115])
141 except ValueError, exc:
142
143
144
145
146
147 if l[34] != ' ':
148 shift = l[34:].find(' ')
149 acc = int((l[34+shift:38+shift]))
150 phi = float(l[103+shift:109+shift])
151 psi = float(l[109+shift:115+shift])
152 else:
153 raise ValueError, exc
154 res_id = (" ", resseq, icode)
155 dssp[(chainid, res_id)] = (aa, ss, acc, phi, psi)
156 keys.append((chainid, res_id))
157 finally:
158 handle.close()
159 return dssp, keys
160
161
162 -class DSSP(AbstractResiduePropertyMap):
163 """
164 Run DSSP on a pdb file, and provide a handle to the
165 DSSP secondary structure and accessibility.
166
167 Note that DSSP can only handle one model.
168
169 Example:
170
171 >>> p = PDBParser()
172 >>> structure = p.get_structure("1MOT", "1MOT.pdb")
173 >>> model = structure[0]
174 >>> dssp = DSSP(model, "1MOT.pdb")
175 >>> # DSSP data is accessed by a tuple (chain_id, res_id)
176 >>> a_key = dssp.keys()[2]
177 >>> # residue object, secondary structure, solvent accessibility,
178 >>> # relative accessiblity, phi, psi
179 >>> dssp[a_key]
180 (<Residue ALA het= resseq=251 icode= >,
181 'H',
182 72,
183 0.67924528301886788,
184 -61.200000000000003,
185 -42.399999999999999)
186 """
187
188 - def __init__(self, model, pdb_file, dssp="dssp"):
189 """
190 @param model: the first model of the structure
191 @type model: L{Model}
192
193 @param pdb_file: a PDB file
194 @type pdb_file: string
195
196 @param dssp: the dssp executable (ie. the argument to os.system)
197 @type dssp: string
198 """
199
200 dssp_dict, dssp_keys = dssp_dict_from_pdb_file(pdb_file, dssp)
201 dssp_map = {}
202 dssp_list = []
203
204 def resid2code(res_id):
205 """Serialize a residue's resseq and icode for easy comparison."""
206 return '%s%s' % (res_id[1], res_id[2])
207
208
209
210
211 for key in dssp_keys:
212 chain_id, res_id = key
213 chain = model[chain_id]
214 try:
215 res = chain[res_id]
216 except KeyError:
217
218
219
220
221 res_seq_icode = resid2code(res_id)
222 for r in chain:
223 if r.id[0] not in (' ', 'W'):
224
225 if resid2code(r.id) == res_seq_icode:
226
227 res = r
228 break
229 else:
230 raise KeyError(res_id)
231
232
233
234
235
236
237
238 if res.is_disordered() == 2:
239 for rk in res.disordered_get_id_list():
240
241
242
243 altloc = res.child_dict[rk].get_list()[0].get_altloc()
244 if altloc in tuple('A1 '):
245 res.disordered_select(rk)
246 break
247 else:
248
249 res.disordered_select(res.disordered_get_id_list()[0])
250
251
252
253
254
255
256
257
258 elif res.is_disordered() == 1:
259
260
261
262
263 altlocs = set(a.get_altloc() for a in res.get_unpacked_list())
264 if altlocs.isdisjoint('A1 '):
265
266 res_seq_icode = resid2code(res_id)
267 for r in chain:
268 if r.id[0] not in (' ', 'W'):
269 if (resid2code(r.id) == res_seq_icode and
270 r.get_list()[0].get_altloc() in tuple('A1 ')):
271 res = r
272 break
273
274 aa, ss, acc, phi, psi = dssp_dict[key]
275 res.xtra["SS_DSSP"] = ss
276 res.xtra["EXP_DSSP_ASA"] = acc
277 res.xtra["PHI_DSSP"] = phi
278 res.xtra["PSI_DSSP"] = psi
279
280 resname = res.get_resname()
281 try:
282 rel_acc = acc/MAX_ACC[resname]
283 except KeyError:
284
285 rel_acc = 'NA'
286 else:
287 if rel_acc > 1.0:
288 rel_acc = 1.0
289 res.xtra["EXP_DSSP_RASA"] = rel_acc
290
291
292
293 resname = to_one_letter_code.get(resname, 'X')
294 if resname == "C":
295
296
297 if _dssp_cys.match(aa):
298 aa = 'C'
299
300 if (resname != aa) and (res.id[0] == ' ' or aa != 'X'):
301 raise PDBException("Structure/DSSP mismatch at %s" % res)
302 dssp_map[key] = ((res, ss, acc, rel_acc, phi, psi))
303 dssp_list.append((res, ss, acc, rel_acc, phi, psi))
304
305 AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys,
306 dssp_list)
307
308
309 if __name__ == "__main__":
310 import sys
311
312 p = PDBParser()
313 s = p.get_structure('X', sys.argv[1])
314 model = s[0]
315 d = DSSP(model, sys.argv[1])
316
317 for r in d:
318 print r
319 print "Handled", len(d), "residues"
320 print d.keys()
321 if ('A', 1) in d:
322 print d[('A', 1)]
323 print s[0]['A'][1].xtra
324
325 print ''.join(d[key][1] for key in d.keys())
326