1
2
3
4
5
6 """Polypeptide-related classes (construction and representation).
7
8 Simple example with multiple chains,
9
10 >>> from Bio.PDB.PDBParser import PDBParser
11 >>> from Bio.PDB.Polypeptide import PPBuilder
12 >>> structure = PDBParser().get_structure('2BEG', 'PDB/2BEG.pdb')
13 >>> ppb=PPBuilder()
14 >>> for pp in ppb.build_peptides(structure):
15 ... print pp.get_sequence()
16 LVFFAEDVGSNKGAIIGLMVGGVVIA
17 LVFFAEDVGSNKGAIIGLMVGGVVIA
18 LVFFAEDVGSNKGAIIGLMVGGVVIA
19 LVFFAEDVGSNKGAIIGLMVGGVVIA
20 LVFFAEDVGSNKGAIIGLMVGGVVIA
21
22 Example with non-standard amino acids using HETATM lines in the PDB file,
23 in this case selenomethionine (MSE):
24
25 >>> from Bio.PDB.PDBParser import PDBParser
26 >>> from Bio.PDB.Polypeptide import PPBuilder
27 >>> structure = PDBParser().get_structure('1A8O', 'PDB/1A8O.pdb')
28 >>> ppb=PPBuilder()
29 >>> for pp in ppb.build_peptides(structure):
30 ... print pp.get_sequence()
31 DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW
32 TETLLVQNANPDCKTILKALGPGATLEE
33 TACQG
34
35 If you want to, you can include non-standard amino acids in the peptides:
36
37 >>> for pp in ppb.build_peptides(structure, aa_only=False):
38 ... print pp.get_sequence()
39 ... print pp.get_sequence()[0], pp[0].get_resname()
40 ... print pp.get_sequence()[-7], pp[-7].get_resname()
41 ... print pp.get_sequence()[-6], pp[-6].get_resname()
42 MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG
43 M MSE
44 M MSE
45 M MSE
46
47 In this case the selenomethionines (the first and also seventh and sixth from
48 last residues) have been shown as M (methionine) by the get_sequence method.
49 """
50
51 import warnings
52
53 from Bio.Alphabet import generic_protein
54 from Bio.Seq import Seq
55 from Bio.SCOP.Raf import to_one_letter_code
56 from Bio.PDB.PDBExceptions import PDBException
57 from Bio.PDB.Residue import Residue, DisorderedResidue
58 from Bio.PDB.Vector import calc_dihedral, calc_angle
59
60
61 standard_aa_names=["ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS", "ILE", "LYS",
62 "LEU", "MET", "ASN", "PRO", "GLN", "ARG", "SER", "THR", "VAL",
63 "TRP", "TYR"]
64
65
66 aa1="ACDEFGHIKLMNPQRSTVWY"
67 aa3=standard_aa_names
68
69 d1_to_index={}
70 dindex_to_1={}
71 d3_to_index={}
72 dindex_to_3={}
73
74
75 for i in range(0, 20):
76 n1=aa1[i]
77 n3=aa3[i]
78 d1_to_index[n1]=i
79 dindex_to_1[i]=n1
80 d3_to_index[n3]=i
81 dindex_to_3[i]=n3
82
84 """Index to corresponding one letter amino acid name.
85
86 >>> index_to_one(0)
87 'A'
88 >>> index_to_one(19)
89 'Y'
90 """
91 return dindex_to_1[index]
92
94 """One letter code to index.
95
96 >>> one_to_index('A')
97 0
98 >>> one_to_index('Y')
99 19
100 """
101 return d1_to_index[s]
102
104 """Index to corresponding three letter amino acid name.
105
106 >>> index_to_three(0)
107 'ALA'
108 >>> index_to_three(19)
109 'TYR'
110 """
111 return dindex_to_3[i]
112
114 """Three letter code to index.
115
116 >>> three_to_index('ALA')
117 0
118 >>> three_to_index('TYR')
119 19
120 """
121 return d3_to_index[s]
122
124 """Three letter code to one letter code.
125
126 >>> three_to_one('ALA')
127 'A'
128 >>> three_to_one('TYR')
129 'Y'
130
131 For non-standard amino acids, you get a KeyError:
132
133 >>> three_to_one('MSE')
134 Traceback (most recent call last):
135 ...
136 KeyError: 'MSE'
137 """
138 i=d3_to_index[s]
139 return dindex_to_1[i]
140
142 """One letter code to three letter code.
143
144 >>> one_to_three('A')
145 'ALA'
146 >>> one_to_three('Y')
147 'TYR'
148 """
149 i=d1_to_index[s]
150 return dindex_to_3[i]
151
152 -def is_aa(residue, standard=False):
153 """Return True if residue object/string is an amino acid.
154
155 @param residue: a L{Residue} object OR a three letter amino acid code
156 @type residue: L{Residue} or string
157
158 @param standard: flag to check for the 20 AA (default false)
159 @type standard: boolean
160
161 >>> is_aa('ALA')
162 True
163
164 Known three letter codes for modified amino acids are supported,
165
166 >>> is_aa('FME')
167 True
168 >>> is_aa('FME', standard=True)
169 False
170 """
171
172 if not isinstance(residue, basestring):
173 residue=residue.get_resname()
174 residue=residue.upper()
175 if standard:
176 return residue in d3_to_index
177 else:
178 return residue in to_one_letter_code
179
180
182 """A polypeptide is simply a list of L{Residue} objects."""
184 """Get list of C-alpha atoms in the polypeptide.
185
186 @return: the list of C-alpha atoms
187 @rtype: [L{Atom}, L{Atom}, ...]
188 """
189 ca_list=[]
190 for res in self:
191 ca=res["CA"]
192 ca_list.append(ca)
193 return ca_list
194
196 """Return the list of phi/psi dihedral angles."""
197 ppl=[]
198 lng=len(self)
199 for i in range(0, lng):
200 res=self[i]
201 try:
202 n=res['N'].get_vector()
203 ca=res['CA'].get_vector()
204 c=res['C'].get_vector()
205 except:
206
207
208 ppl.append((None, None))
209 res.xtra["PHI"]=None
210 res.xtra["PSI"]=None
211 continue
212
213 if i>0:
214 rp=self[i-1]
215 try:
216 cp=rp['C'].get_vector()
217 phi=calc_dihedral(cp, n, ca, c)
218 except:
219 phi=None
220 else:
221
222 phi=None
223
224 if i<(lng-1):
225 rn=self[i+1]
226 try:
227 nn=rn['N'].get_vector()
228 psi=calc_dihedral(n, ca, c, nn)
229 except:
230 psi=None
231 else:
232
233 psi=None
234 ppl.append((phi, psi))
235
236 res.xtra["PHI"]=phi
237 res.xtra["PSI"]=psi
238 return ppl
239
241 """List of tau torsions angles for all 4 consecutive Calpha atoms."""
242 ca_list=self.get_ca_list()
243 tau_list=[]
244 for i in range(0, len(ca_list)-3):
245 atom_list = (ca_list[i], ca_list[i+1], ca_list[i+2], ca_list[i+3])
246 v1, v2, v3, v4 = [a.get_vector() for a in atom_list]
247 tau=calc_dihedral(v1, v2, v3, v4)
248 tau_list.append(tau)
249
250 res=ca_list[i+2].get_parent()
251 res.xtra["TAU"]=tau
252 return tau_list
253
255 """List of theta angles for all 3 consecutive Calpha atoms."""
256 theta_list=[]
257 ca_list=self.get_ca_list()
258 for i in range(0, len(ca_list)-2):
259 atom_list = (ca_list[i], ca_list[i+1], ca_list[i+2])
260 v1, v2, v3 = [a.get_vector() for a in atom_list]
261 theta=calc_angle(v1, v2, v3)
262 theta_list.append(theta)
263
264 res=ca_list[i+1].get_parent()
265 res.xtra["THETA"]=theta
266 return theta_list
267
279
281 """Return string representation of the polypeptide.
282
283 Return <Polypeptide start=START end=END>, where START
284 and END are sequence identifiers of the outer residues.
285 """
286 start=self[0].get_id()[1]
287 end=self[-1].get_id()[1]
288 s="<Polypeptide start=%s end=%s>" % (start, end)
289 return s
290
292 """Base class to extract polypeptides.
293
294 It checks if two consecutive residues in a chain are connected.
295 The connectivity test is implemented by a subclass.
296
297 This assumes you want both standard and non-standard amino acids.
298 """
300 """
301 @param radius: distance
302 @type radius: float
303 """
304 self.radius=radius
305
306 - def _accept(self, residue, standard_aa_only):
307 """Check if the residue is an amino acid (PRIVATE)."""
308 if is_aa(residue, standard=standard_aa_only):
309 return True
310 elif not standard_aa_only and "CA" in residue.child_dict:
311
312
313
314 warnings.warn("Assuming residue %s is an unknown modified "
315 "amino acid" % residue.get_resname())
316 return True
317 else:
318
319 return False
320
322 """Build and return a list of Polypeptide objects.
323
324 @param entity: polypeptides are searched for in this object
325 @type entity: L{Structure}, L{Model} or L{Chain}
326
327 @param aa_only: if 1, the residue needs to be a standard AA
328 @type aa_only: int
329 """
330 is_connected=self._is_connected
331 accept=self._accept
332 level=entity.get_level()
333
334 if level=="S":
335 model=entity[0]
336 chain_list=model.get_list()
337 elif level=="M":
338 chain_list=entity.get_list()
339 elif level=="C":
340 chain_list=[entity]
341 else:
342 raise PDBException("Entity should be Structure, Model or Chain.")
343 pp_list=[]
344 for chain in chain_list:
345 chain_it=iter(chain)
346 try:
347 prev_res = chain_it.next()
348 while not accept(prev_res, aa_only):
349 prev_res = chain_it.next()
350 except StopIteration:
351
352 continue
353 pp=None
354 for next_res in chain_it:
355 if accept(prev_res, aa_only) \
356 and accept(next_res, aa_only) \
357 and is_connected(prev_res, next_res):
358 if pp is None:
359 pp=Polypeptide()
360 pp.append(prev_res)
361 pp_list.append(pp)
362 pp.append(next_res)
363 else:
364
365
366 pp=None
367 prev_res=next_res
368 return pp_list
369
370
372 """Use CA--CA distance to find polypeptides."""
375
377 for r in [prev_res, next_res]:
378 if not r.has_id("CA"):
379 return False
380 n=next_res["CA"]
381 p=prev_res["CA"]
382
383 if n.is_disordered():
384 nlist=n.disordered_get_list()
385 else:
386 nlist=[n]
387 if p.is_disordered():
388 plist=p.disordered_get_list()
389 else:
390 plist=[p]
391 for nn in nlist:
392 for pp in plist:
393 if (nn-pp)<self.radius:
394 return True
395 return False
396
397
399 """Use C--N distance to find polypeptides."""
402
437
439 """Return 1 if distance between atoms<radius (PRIVATE)."""
440 if (c-n)<self.radius:
441 return 1
442 else:
443 return 0
444
445
446 if __name__=="__main__":
447 import sys
448 from Bio.PDB.PDBParser import PDBParser
449
450 p=PDBParser(PERMISSIVE=True)
451
452 s=p.get_structure("scr", sys.argv[1])
453
454 ppb=PPBuilder()
455
456 print "C-N"
457 for pp in ppb.build_peptides(s):
458 print pp.get_sequence()
459 for pp in ppb.build_peptides(s[0]):
460 print pp.get_sequence()
461 for pp in ppb.build_peptides(s[0]["A"]):
462 print pp.get_sequence()
463
464 for pp in ppb.build_peptides(s):
465 for phi, psi in pp.get_phi_psi_list():
466 print phi, psi
467
468 ppb=CaPPBuilder()
469
470 print "CA-CA"
471 for pp in ppb.build_peptides(s):
472 print pp.get_sequence()
473 for pp in ppb.build_peptides(s[0]):
474 print pp.get_sequence()
475 for pp in ppb.build_peptides(s[0]["A"]):
476 print pp.get_sequence()
477