1
2
3
4
5
6 """
7 Code to deal with COMPASS output, a program for profile/profile comparison.
8
9 Compass is described in:
10
11 Sadreyev R, Grishin N. COMPASS: a tool for comparison of multiple protein
12 alignments with assessment of statistical significance. J Mol Biol. 2003 Feb
13 7;326(1):317-36.
14
15 Tested with COMPASS 1.24.
16
17 Functions:
18 read Reads a COMPASS file containing one COMPASS record
19 parse Iterates over records in a COMPASS file.
20
21 Classes:
22 Record One result of a COMPASS file
23
24 DEPRECATED CLASSES:
25
26 _Scanner Scan compass results
27 _Consumer Consume scanner events
28 RecordParser Parse one compass record
29 Iterator Iterate through a number of compass records
30 """
31 import re
32
33
34
66
104
106 """
107 Hold information from one compass hit.
108 Ali1 one is the query, Ali2 the hit.
109 """
110
112 self.query=''
113 self.hit=''
114 self.gap_threshold=0
115 self.query_length=0
116 self.query_filtered_length=0
117 self.query_nseqs=0
118 self.query_neffseqs=0
119 self.hit_length=0
120 self.hit_filtered_length=0
121 self.hit_nseqs=0
122 self.hit_neffseqs=0
123 self.sw_score=0
124 self.evalue=-1
125 self.query_start=-1
126 self.hit_start=-1
127 self.query_aln=''
128 self.hit_aln=''
129 self.positives=''
130
131
133 """Return the length of the query covered in alignment"""
134 s = self.query_aln.replace("=", "")
135 return len(s)
136
138 """Return the length of the hit covered in the alignment"""
139 s = self.hit_aln.replace("=", "")
140 return len(s)
141
142
143
144 __regex = {"names": re.compile("Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+"),
145 "threshold": re.compile("Threshold of effective gap content in columns: (\S+)"),
146 "lengths": re.compile("length1=(\S+)\s+filtered_length1=(\S+)\s+length2=(\S+)\s+filtered_length2=(\S+)"),
147 "profilewidth": re.compile("Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)"),
148 "scores": re.compile("Smith-Waterman score = (\S+)\s+Evalue = (\S+)"),
149 "start": re.compile("(\d+)"),
150 "align": re.compile("^.{15}(\S+)"),
151 "positive_alignment": re.compile("^.{15}(.+)"),
152 }
153
155 """
156 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln
157 ------query----- -------hit-------------
158 """
159 if not "Ali1:" in line:
160 raise ValueError("Line does not contain 'Ali1:':\n%s" % line)
161 m = __regex["names"].search(line)
162 record.query = m.group(1)
163 record.hit = m.group(2)
164
166 if not line.startswith("Threshold"):
167 raise ValueError("Line does not start with 'Threshold':\n%s" % line)
168 m = __regex["threshold"].search(line)
169 record.gap_threshold = float(m.group(1))
170
172 if not line.startswith("length1="):
173 raise ValueError("Line does not start with 'length1=':\n%s" % line)
174 m = __regex["lengths"].search(line)
175 record.query_length = int(m.group(1))
176 record.query_filtered_length = float(m.group(2))
177 record.hit_length = int(m.group(3))
178 record.hit_filtered_length = float(m.group(4))
179
181 if not "Nseqs1" in line:
182 raise ValueError("Line does not contain 'Nseqs1':\n%s" % line)
183 m = __regex["profilewidth"].search(line)
184 record.query_nseqs = int(m.group(1))
185 record.query_neffseqs = float(m.group(2))
186 record.hit_nseqs = int(m.group(3))
187 record.hit_neffseqs = float(m.group(4))
188
190 if not line.startswith("Smith-Waterman"):
191 raise ValueError("Line does not start with 'Smith-Waterman':\n%s" % line)
192 m = __regex["scores"].search(line)
193 if m:
194 record.sw_score = int(m.group(1))
195 record.evalue = float(m.group(2))
196 else:
197 record.sw_score = 0
198 record.evalue = -1.0
199
201 m = __regex["start"].search(line)
202 if m:
203 record.query_start = int(m.group(1))
204 m = __regex["align"].match(line)
205 assert m!=None, "invalid match"
206 record.query_aln += m.group(1)
207
209 m = __regex["positive_alignment"].match(line)
210 assert m!=None, "invalid match"
211 record.positives += m.group(1)
212
214 m = __regex["start"].search(line)
215 if m:
216 record.hit_start = int(m.group(1))
217 m = __regex["align"].match(line)
218 assert m!=None, "invalid match"
219 record.hit_aln += m.group(1)
220
221
222
223 from Bio import File
224 from Bio.ParserSupport import *
225 import Bio
226
227
229 """Reads compass output and generate events (DEPRECATED)"""
231 import warnings
232 warnings.warn("Bio.Compass._Scanner is deprecated; please use the read() and parse() functions in this module instead", Bio.BiopythonDeprecationWarning)
233
234 - def feed(self, handle, consumer):
247
248
256
262
268
274
280
286
288 """
289 QUERY 2 LSDRLELVSASEIRKLFDIAAGMKDVISLGIGEPDFDTPQHIKEYAKEALDKGLTHYGPN
290 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
291 QUERY 2 LSDRLELVSASEIRKLFDIAAGMKDVISLGIGEPDFDTPQHIKEYAKEALDKGLTHYGPN
292
293
294 QUERY IGLLELREAIAEKLKKQNGIEADPKTEIMVLLGANQAFLMGLSAFLKDGEEVLIPTPAFV
295 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
296 QUERY IGLLELREAIAEKLKKQNGIEADPKTEIMVLLGANQAFLMGLSAFLKDGEEVLIPTPAFV
297
298 """
299 while 1:
300 line = handle.readline()
301 if not line:
302 break
303 if is_blank_line(line):
304 continue
305 else:
306 consumer.query_alignment(line)
307 read_and_call(handle, consumer.positive_alignment)
308 read_and_call(handle, consumer.hit_alignment)
309
311
312 _re_names = re.compile("Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+")
313 _re_threshold = \
314 re.compile("Threshold of effective gap content in columns: (\S+)")
315 _re_lengths = \
316 re.compile("length1=(\S+)\s+filtered_length1=(\S+)\s+length2=(\S+)"
317 + "\s+filtered_length2=(\S+)")
318 _re_profilewidth = \
319 re.compile("Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)")
320 _re_scores = re.compile("Smith-Waterman score = (\S+)\s+Evalue = (\S+)")
321 _re_start = re.compile("(\d+)")
322 _re_align = re.compile("^.{15}(\S+)")
323 _re_positive_alignment = re.compile("^.{15}(.+)")
324
326 import warnings
327 warnings.warn("Bio.Compass._Consumer is deprecated; please use the read() and parse() functions in this module instead", Bio.BiopythonDeprecationWarning)
328 self.data = None
329
331 """
332 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln
333 ------query----- -------hit-------------
334 """
335 self.data = Record()
336 m = self.__class__._re_names.search(line)
337 self.data.query = m.group(1)
338 self.data.hit = m.group(2)
339
343
345 m = self.__class__._re_lengths.search(line)
346 self.data.query_length = int(m.group(1))
347 self.data.query_filtered_length = float(m.group(2))
348 self.data.hit_length = int(m.group(3))
349 self.data.hit_filtered_length = float(m.group(4))
350
352 m = self.__class__._re_profilewidth.search(line)
353 self.data.query_nseqs = int(m.group(1))
354 self.data.query_neffseqs = float(m.group(2))
355 self.data.hit_nseqs = int(m.group(3))
356 self.data.hit_neffseqs = float(m.group(4))
357
359 m = self.__class__._re_scores.search(line)
360 if m:
361 self.data.sw_score = int(m.group(1))
362 self.data.evalue = float(m.group(2))
363 else:
364 self.data.sw_score = 0
365 self.data.evalue = -1.0
366
368 m = self.__class__._re_start.search(line)
369 if m:
370 self.data.query_start = int(m.group(1))
371 m = self.__class__._re_align.match(line)
372 assert m!=None, "invalid match"
373 self.data.query_aln = self.data.query_aln + m.group(1)
374
379
381 m = self.__class__._re_start.search(line)
382 if m:
383 self.data.hit_start = int(m.group(1))
384 m = self.__class__._re_align.match(line)
385 assert m!=None, "invalid match"
386 self.data.hit_aln = self.data.hit_aln + m.group(1)
387
389 """Parses compass results into a Record object (DEPRECATED).
390 """
392 import warnings
393 warnings.warn("Bio.Compass._RecordParser is deprecated; please use the read() and parse() functions in this module instead", Bio.BiopythonDeprecationWarning)
394 self._scanner = _Scanner()
395 self._consumer = _Consumer()
396
397
398 - def parse(self, handle):
405
407 """Iterate through a file of compass results (DEPRECATED)."""
414
416 lines = []
417 while 1:
418 line = self._uhandle.readline()
419 if not line:
420 break
421 if line[0:4] == "Ali1" and lines:
422 self._uhandle.saveline(line)
423 break
424
425 lines.append(line)
426
427
428 if not lines:
429 return None
430
431 data = ''.join(lines)
432 return self._parser.parse(File.StringHandle(data))
433