1
2
3
4
5
6 """Classify protein backbone structure according to Kolodny et al's fragment
7 libraries.
8
9 It can be regarded as a form of objective secondary structure classification.
10 Only fragments of length 5 or 7 are supported (ie. there is a 'central'
11 residue).
12
13 Full reference:
14
15 Kolodny R, Koehl P, Guibas L, Levitt M.
16 Small libraries of protein fragments model native protein structures accurately.
17 J Mol Biol. 2002 323(2):297-307.
18
19 The definition files of the fragments can be obtained from:
20
21 U{http://csb.stanford.edu/~rachel/fragments/}
22
23 You need these files to use this module.
24
25 The following example uses the library with 10 fragments of length 5.
26 The library files can be found in directory 'fragment_data'.
27
28 >>> model = structure[0]
29 >>> fm = FragmentMapper(model, lsize=10, flength=5, dir="fragment_data")
30 >>> fragment = fm[residue]
31 """
32
33 import numpy
34
35 from Bio.SVDSuperimposer import SVDSuperimposer
36
37 from Bio.PDB import Selection
38 from Bio.PDB.PDBExceptions import PDBException
39 from Bio.PDB.PDBParser import PDBParser
40 from Bio.PDB.Polypeptide import PPBuilder
41
42
43
44
45
46 _FRAGMENT_FILE="lib_%s_z_%s.txt"
47
48
50 """
51 Read a fragment spec file (available from
52 U{http://csb.stanford.edu/rachel/fragments/}
53 and return a list of Fragment objects.
54
55 @param size: number of fragments in the library
56 @type size: int
57
58 @param length: length of the fragments
59 @type length: int
60
61 @param dir: directory where the fragment spec files can be found
62 @type dir: string
63 """
64 filename=(dir+"/"+_FRAGMENT_FILE) % (size, length)
65 fp=open(filename, "r")
66 flist=[]
67
68 fid=0
69 for l in fp.readlines():
70
71 if l[0]=="*" or l[0]=="\n":
72 continue
73 sl=l.split()
74 if sl[1]=="------":
75
76 f=Fragment(length, fid)
77 flist.append(f)
78
79 fid+=1
80 continue
81
82 coord=numpy.array(map(float, sl[0:3]))
83
84 f.add_residue("XXX", coord)
85 fp.close()
86 return flist
87
88
90 """
91 Represent a polypeptide C-alpha fragment.
92 """
94 """
95 @param length: length of the fragment
96 @type length: int
97
98 @param fid: id for the fragment
99 @type fid: int
100 """
101
102 self.length=length
103
104 self.counter=0
105 self.resname_list=[]
106
107 self.coords_ca=numpy.zeros((length, 3), "d")
108 self.fid=fid
109
111 """
112 @return: the residue names
113 @rtype: [string, string,...]
114 """
115 return self.resname_list
116
118 """
119 @return: id for the fragment
120 @rtype: int
121 """
122 return self.fid
123
125 """
126 @return: the CA coords in the fragment
127 @rtype: Numeric (Nx3) array
128 """
129 return self.coords_ca
130
132 """
133 @param resname: residue name (eg. GLY).
134 @type resname: string
135
136 @param ca_coord: the c-alpha coorinates of the residues
137 @type ca_coord: Numeric array with length 3
138 """
139 if self.counter>=self.length:
140 raise PDBException("Fragment boundary exceeded.")
141 self.resname_list.append(resname)
142 self.coords_ca[self.counter]=ca_coord
143 self.counter=self.counter+1
144
146 """
147 @return: length of fragment
148 @rtype: int
149 """
150 return self.length
151
153 """
154 Return rmsd between two fragments.
155
156 Example:
157 >>> rmsd=fragment1-fragment2
158
159 @return: rmsd between fragments
160 @rtype: float
161 """
162 sup=SVDSuperimposer()
163 sup.set(self.coords_ca, other.coords_ca)
164 sup.run()
165 return sup.get_rms()
166
168 """
169 Returns <Fragment length=L id=ID> where L=length of fragment
170 and ID the identifier (rank in the library).
171 """
172 return "<Fragment length=%i id=%i>" % (self.length, self.fid)
173
174
176 """
177 Dice up a peptide in fragments of length "length".
178
179 @param pp: a list of residues (part of one peptide)
180 @type pp: [L{Residue}, L{Residue}, ...]
181
182 @param length: fragment length
183 @type length: int
184 """
185 frag_list=[]
186 for i in range(0, len(pp)-length+1):
187 f=Fragment(length, -1)
188 for j in range(0, length):
189 residue=pp[i+j]
190 resname=residue.get_resname()
191 if residue.has_id("CA"):
192 ca=residue["CA"]
193 else:
194 raise PDBException("CHAINBREAK")
195 if ca.is_disordered():
196 raise PDBException("CHAINBREAK")
197 ca_coord=ca.get_coord()
198 f.add_residue(resname, ca_coord)
199 frag_list.append(f)
200 return frag_list
201
202
204 """
205 Map all frgaments in flist to the closest
206 (in RMSD) fragment in reflist.
207
208 Returns a list of reflist indices.
209
210 @param flist: list of protein fragments
211 @type flist: [L{Fragment}, L{Fragment}, ...]
212
213 @param reflist: list of reference (ie. library) fragments
214 @type reflist: [L{Fragment}, L{Fragment}, ...]
215 """
216 mapped=[]
217 for f in flist:
218 rank=[]
219 for i in range(0, len(reflist)):
220 rf=reflist[i]
221 rms=f-rf
222 rank.append((rms, rf))
223 rank.sort()
224 fragment=rank[0][1]
225 mapped.append(fragment)
226 return mapped
227
228
230 """
231 Map polypeptides in a model to lists of representative fragments.
232 """
233 - def __init__(self, model, lsize=20, flength=5, fdir="."):
234 """
235 @param model: the model that will be mapped
236 @type model: L{Model}
237
238 @param lsize: number of fragments in the library
239 @type lsize: int
240
241 @param flength: length of fragments in the library
242 @type flength: int
243
244 @param fdir: directory where the definition files are
245 found (default=".")
246 @type fdir: string
247 """
248 if flength==5:
249 self.edge=2
250 elif flength==7:
251 self.edge=3
252 else:
253 raise PDBException("Fragment length should be 5 or 7.")
254 self.flength=flength
255 self.lsize=lsize
256 self.reflist=_read_fragments(lsize, flength, fdir)
257 self.model=model
258 self.fd=self._map(self.model)
259
260 - def _map(self, model):
261 """
262 @param model: the model that will be mapped
263 @type model: L{Model}
264 """
265 ppb=PPBuilder()
266 ppl=ppb.build_peptides(model)
267 fd={}
268 for pp in ppl:
269 try:
270
271 flist=_make_fragment_list(pp, self.flength)
272
273 mflist=_map_fragment_list(flist, self.reflist)
274 for i in range(0, len(pp)):
275 res=pp[i]
276 if i<self.edge:
277
278 continue
279 elif i>=(len(pp)-self.edge):
280
281 continue
282 else:
283
284 index=i-self.edge
285 assert(index>=0)
286 fd[res]=mflist[index]
287 except PDBException, why:
288 if why == 'CHAINBREAK':
289
290 pass
291 else:
292 raise PDBException(why)
293 return fd
294
296 """(Obsolete)
297
298 @type res: L{Residue}
299 """
300 import warnings
301 warnings.warn("has_key is obsolete; use 'res in object' instead", PendingDeprecationWarning)
302 return (res in self)
303
305 """True if the given residue is in any of the mapped fragments.
306
307 @type res: L{Residue}
308 """
309 return (res in self.fd)
310
312 """
313 @type res: L{Residue}
314
315 @return: fragment classification
316 @rtype: L{Fragment}
317 """
318 return self.fd[res]
319
320
321 if __name__=="__main__":
322
323 import sys
324
325 p=PDBParser()
326 s=p.get_structure("X", sys.argv[1])
327
328 m=s[0]
329 fm=FragmentMapper(m, 10, 5, "levitt_data")
330
331
332 for r in Selection.unfold_entities(m, "R"):
333
334 print r,
335 if r in fm:
336 print fm[r]
337 else:
338 print
339