1
2
3
4
5
6 """
7 Bio.AlignIO support for "fasta-m10" output from Bill Pearson's FASTA tools.
8
9 You are expected to use this module via the Bio.AlignIO functions (or the
10 Bio.SeqIO functions if you want to work directly with the gapped sequences).
11
12 This module contains a parser for the pairwise alignments produced by Bill
13 Pearson's FASTA tools, for use from the Bio.AlignIO interface where it is
14 refered to as the "fasta-m10" file format (as we only support the machine
15 readable output format selected with the -m 10 command line option).
16
17 This module does NOT cover the generic "fasta" file format originally
18 developed as an input format to the FASTA tools. The Bio.AlignIO and
19 Bio.SeqIO both use the Bio.SeqIO.FastaIO module to deal with these files,
20 which can also be used to store a multiple sequence alignments.
21 """
22
23 from Bio.Seq import Seq
24 from Bio.SeqRecord import SeqRecord
25 from Bio.Align import MultipleSeqAlignment
26 from Interfaces import AlignmentIterator
27 from Bio.Alphabet import single_letter_alphabet, generic_dna, generic_protein
28 from Bio.Alphabet import Gapped
29
30
32 """Helper function for the main parsing code (PRIVATE).
33
34 To get the actual pairwise alignment sequences, we must first
35 translate the un-gapped sequence based coordinates into positions
36 in the gapped sequence (which may have a flanking region shown
37 using leading - characters). To date, I have never seen any
38 trailing flanking region shown in the m10 file, but the
39 following code should also cope with that.
40
41 Note that this code seems to work fine even when the "sq_offset"
42 entries are prsent as a result of using the -X command line option.
43 """
44 align_stripped = alignment_seq_with_flanking.strip("-")
45 display_start = int(annotation['al_display_start'])
46 if int(annotation['al_start']) <= int(annotation['al_stop']):
47 start = int(annotation['al_start']) \
48 - display_start
49 end = int(annotation['al_stop']) \
50 - display_start + 1
51 else:
52
53 start = display_start \
54 - int(annotation['al_start'])
55 end = display_start \
56 - int(annotation['al_stop']) + 1
57 end += align_stripped.count("-")
58 assert 0 <= start and start < end and end <= len(align_stripped), \
59 "Problem with sequence start/stop,\n%s[%i:%i]\n%s" \
60 % (alignment_seq_with_flanking, start, end, annotation)
61 return align_stripped[start:end]
62
64 """Alignment iterator for the FASTA tool's pairwise alignment output.
65
66 This is for reading the pairwise alignments output by Bill Pearson's
67 FASTA program when called with the -m 10 command line option for machine
68 readable output. For more details about the FASTA tools, see the website
69 http://fasta.bioch.virginia.edu/ and the paper:
70
71 W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448
72
73 This class is intended to be used via the Bio.AlignIO.parse() function
74 by specifying the format as "fasta-m10" as shown in the following code:
75
76 from Bio import AlignIO
77 handle = ...
78 for a in AlignIO.parse(handle, "fasta-m10"):
79 assert len(a) == 2, "Should be pairwise!"
80 print "Alignment length %i" % a.get_alignment_length()
81 for record in a:
82 print record.seq, record.name, record.id
83
84 Note that this is not a full blown parser for all the information
85 in the FASTA output - for example, most of the header and all of the
86 footer is ignored. Also, the alignments are not batched according to
87 the input queries.
88
89 Also note that there can be up to about 30 letters of flanking region
90 included in the raw FASTA output as contextual information. This is NOT
91 part of the alignment itself, and is not included in the resulting
92 MultipleSeqAlignment objects returned.
93 """
94 if alphabet is None:
95 alphabet = single_letter_alphabet
96
97 state_PREAMBLE = -1
98 state_NONE = 0
99 state_QUERY_HEADER = 1
100 state_ALIGN_HEADER = 2
101 state_ALIGN_QUERY = 3
102 state_ALIGN_MATCH = 4
103 state_ALIGN_CONS = 5
104
105 def build_hsp():
106 assert query_tags, query_tags
107 assert match_tags, match_tags
108 evalue = align_tags.get("fa_expect", None)
109 q = "?"
110 m = "?"
111 tool = global_tags.get("tool", "").upper()
112 try:
113 q = _extract_alignment_region(query_seq, query_tags)
114 if tool in ["TFASTX"] and len(match_seq) == len(q):
115 m = match_seq
116
117
118 else:
119 m = _extract_alignment_region(match_seq, match_tags)
120 assert len(q) == len(m)
121 except AssertionError, err:
122 print "Darn... amino acids vs nucleotide coordinates?"
123 print tool
124 print query_seq
125 print query_tags
126 print q, len(q)
127 print match_seq
128 print match_tags
129 print m, len(m)
130 print handle.name
131 raise err
132
133 assert alphabet is not None
134 alignment = MultipleSeqAlignment([], alphabet)
135
136
137
138 alignment._annotations = {}
139
140
141 for key, value in header_tags.iteritems():
142 alignment._annotations[key] = value
143 for key, value in align_tags.iteritems():
144 alignment._annotations[key] = value
145
146
147
148 record = SeqRecord(Seq(q, alphabet),
149 id = query_id,
150 name = "query",
151 description = query_descr,
152 annotations = {"original_length" : int(query_tags["sq_len"])})
153
154 record._al_start = int(query_tags["al_start"])
155 record._al_stop = int(query_tags["al_stop"])
156 alignment.append(record)
157
158
159
160
161 if alphabet == single_letter_alphabet and "sq_type" in query_tags:
162 if query_tags["sq_type"] == "D":
163 record.seq.alphabet = generic_dna
164 elif query_tags["sq_type"] == "p":
165 record.seq.alphabet = generic_protein
166 if "-" in q:
167 if not hasattr(record.seq.alphabet,"gap_char"):
168 record.seq.alphabet = Gapped(record.seq.alphabet, "-")
169
170
171
172 record = SeqRecord(Seq(m, alphabet),
173 id = match_id,
174 name = "match",
175 description = match_descr,
176 annotations = {"original_length" : int(match_tags["sq_len"])})
177
178 record._al_start = int(match_tags["al_start"])
179 record._al_stop = int(match_tags["al_stop"])
180 alignment.append(record)
181
182
183 if alphabet == single_letter_alphabet and "sq_type" in match_tags:
184 if match_tags["sq_type"] == "D":
185 record.seq.alphabet = generic_dna
186 elif match_tags["sq_type"] == "p":
187 record.seq.alphabet = generic_protein
188 if "-" in m:
189 if not hasattr(record.seq.alphabet,"gap_char"):
190 record.seq.alphabet = Gapped(record.seq.alphabet, "-")
191
192 return alignment
193
194 state = state_PREAMBLE
195 query_id = None
196 match_id = None
197 query_descr = ""
198 match_descr = ""
199 global_tags = {}
200 header_tags = {}
201 align_tags = {}
202 query_tags = {}
203 match_tags = {}
204 query_seq = ""
205 match_seq = ""
206 cons_seq = ""
207 for line in handle:
208 if ">>>" in line and not line.startswith(">>>"):
209 if query_id and match_id:
210
211
212 yield build_hsp()
213 state = state_NONE
214 query_descr = line[line.find(">>>")+3:].strip()
215 query_id = query_descr.split(None,1)[0]
216 match_id = None
217 header_tags = {}
218 align_tags = {}
219 query_tags = {}
220 match_tags = {}
221 query_seq = ""
222 match_seq = ""
223 cons_seq = ""
224 elif line.startswith("!! No "):
225
226
227
228
229 assert state == state_NONE
230 assert not header_tags
231 assert not align_tags
232 assert not match_tags
233 assert not query_tags
234 assert match_id is None
235 assert not query_seq
236 assert not match_seq
237 assert not cons_seq
238 query_id = None
239 elif line.strip() in [">>><<<", ">>>///"]:
240
241 if query_id and match_id:
242 yield build_hsp()
243 state = state_NONE
244 query_id = None
245 match_id = None
246 header_tags = {}
247 align_tags = {}
248 query_tags = {}
249 match_tags = {}
250 query_seq = ""
251 match_seq = ""
252 cons_seq = ""
253 elif line.startswith(">>>"):
254
255 assert query_id is not None
256 assert line[3:].split(", ",1)[0] == query_id, line
257 assert match_id is None
258 assert not header_tags
259 assert not align_tags
260 assert not query_tags
261 assert not match_tags
262 assert not match_seq
263 assert not query_seq
264 assert not cons_seq
265 state = state_QUERY_HEADER
266 elif line.startswith(">>"):
267
268 if query_id and match_id:
269 yield build_hsp()
270 align_tags = {}
271 query_tags = {}
272 match_tags = {}
273 query_seq = ""
274 match_seq = ""
275 cons_seq = ""
276 match_descr = line[2:].strip()
277 match_id = match_descr.split(None,1)[0]
278 state = state_ALIGN_HEADER
279 elif line.startswith(">--"):
280
281 assert query_id and match_id, line
282 yield build_hsp()
283
284
285 align_tags = {}
286 query_tags = {}
287 match_tags = {}
288 query_seq = ""
289 match_seq = ""
290 cons_seq = ""
291 state = state_ALIGN_HEADER
292 elif line.startswith(">"):
293 if state == state_ALIGN_HEADER:
294
295 assert query_id is not None, line
296 assert match_id is not None, line
297 assert query_id.startswith(line[1:].split(None,1)[0]), line
298 state = state_ALIGN_QUERY
299 elif state == state_ALIGN_QUERY:
300
301 assert query_id is not None, line
302 assert match_id is not None, line
303 assert match_id.startswith(line[1:].split(None,1)[0]), line
304 state = state_ALIGN_MATCH
305 elif state == state_NONE:
306
307 pass
308 else:
309 assert False, "state %i got %r" % (state, line)
310 elif line.startswith("; al_cons"):
311 assert state == state_ALIGN_MATCH, line
312 state = state_ALIGN_CONS
313
314 elif line.startswith("; "):
315 key, value = [s.strip() for s in line[2:].split(": ",1)]
316 if state == state_QUERY_HEADER:
317 header_tags[key] = value
318 elif state == state_ALIGN_HEADER:
319 align_tags[key] = value
320 elif state == state_ALIGN_QUERY:
321 query_tags[key] = value
322 elif state == state_ALIGN_MATCH:
323 match_tags[key] = value
324 else:
325 assert False, "Unexpected state %r, %r" % (state, line)
326 elif state == state_ALIGN_QUERY:
327 query_seq += line.strip()
328 elif state == state_ALIGN_MATCH:
329 match_seq += line.strip()
330 elif state == state_ALIGN_CONS:
331 cons_seq += line.strip("\n")
332 elif state == state_PREAMBLE:
333 if line.startswith("#"):
334 global_tags["command"] = line[1:].strip()
335 elif line.startswith(" version "):
336 global_tags["version"] = line[9:].strip()
337 elif " compares a " in line:
338 global_tags["tool"] = line[:line.find(" compares a ")].strip()
339 elif " searches a " in line:
340 global_tags["tool"] = line[:line.find(" searches a ")].strip()
341 else:
342 pass
343
344
345 if __name__ == "__main__":
346 print "Running a quick self-test"
347
348
349 simple_example = \
350 """# /opt/fasta/fasta34 -Q -H -E 1 -m 10 NC_002127.faa NC_009649.faa
351 FASTA searches a protein or DNA sequence data bank
352 version 34.26 January 12, 2007
353 Please cite:
354 W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448
355
356 Query library NC_002127.faa vs NC_009649.faa library
357 searching NC_009649.faa library
358
359 1>>>gi|10955263|ref|NP_052604.1| plasmid mobilization [Escherichia coli O157:H7 s 107 aa - 107 aa
360 vs NC_009649.faa library
361
362 45119 residues in 180 sequences
363 Expectation_n fit: rho(ln(x))= 6.9146+/-0.0249; mu= -5.7948+/- 1.273
364 mean_var=53.6859+/-13.609, 0's: 0 Z-trim: 1 B-trim: 9 in 1/25
365 Lambda= 0.175043
366
367 FASTA (3.5 Sept 2006) function [optimized, BL50 matrix (15:-5)] ktup: 2
368 join: 36, opt: 24, open/ext: -10/-2, width: 16
369 Scan time: 0.000
370 The best scores are: opt bits E(180)
371 gi|152973457|ref|YP_001338508.1| ATPase with chape ( 931) 71 24.9 0.58
372 gi|152973588|ref|YP_001338639.1| F pilus assembly ( 459) 63 23.1 0.99
373
374 >>>gi|10955263|ref|NP_052604.1|, 107 aa vs NC_009649.faa library
375 ; pg_name: /opt/fasta/fasta34
376 ; pg_ver: 34.26
377 ; pg_argv: /opt/fasta/fasta34 -Q -H -E 1 -m 10 NC_002127.faa NC_009649.faa
378 ; pg_name: FASTA
379 ; pg_ver: 3.5 Sept 2006
380 ; pg_matrix: BL50 (15:-5)
381 ; pg_open-ext: -10 -2
382 ; pg_ktup: 2
383 ; pg_optcut: 24
384 ; pg_cgap: 36
385 ; mp_extrap: 60000 180
386 ; mp_stats: Expectation_n fit: rho(ln(x))= 6.9146+/-0.0249; mu= -5.7948+/- 1.273 mean_var=53.6859+/-13.609, 0's: 0 Z-trim: 1 B-trim: 9 in 1/25 Lambda= 0.175043
387 ; mp_KS: -0.0000 (N=0) at 8159228
388 >>gi|152973457|ref|YP_001338508.1| ATPase with chaperone activity, ATP-binding subunit [Klebsiella pneumoniae subsp. pneumoniae MGH 78578]
389 ; fa_frame: f
390 ; fa_initn: 65
391 ; fa_init1: 43
392 ; fa_opt: 71
393 ; fa_z-score: 90.3
394 ; fa_bits: 24.9
395 ; fa_expect: 0.58
396 ; sw_score: 71
397 ; sw_ident: 0.250
398 ; sw_sim: 0.574
399 ; sw_overlap: 108
400 >gi|10955263| ..
401 ; sq_len: 107
402 ; sq_offset: 1
403 ; sq_type: p
404 ; al_start: 5
405 ; al_stop: 103
406 ; al_display_start: 1
407 --------------------------MTKRSGSNT-RRRAISRPVRLTAE
408 ED---QEIRKRAAECGKTVSGFLRAAALGKKVNSLTDDRVLKEVM-----
409 RLGALQKKLFIDGKRVGDREYAEVLIAITEYHRALLSRLMAD
410 >gi|152973457|ref|YP_001338508.1| ..
411 ; sq_len: 931
412 ; sq_type: p
413 ; al_start: 96
414 ; al_stop: 195
415 ; al_display_start: 66
416 SDFFRIGDDATPVAADTDDVVDASFGEPAAAGSGAPRRRGSGLASRISEQ
417 SEALLQEAAKHAAEFGRS------EVDTEHLLLALADSDVVKTILGQFKI
418 KVDDLKRQIESEAKR-GDKPF-EGEIGVSPRVKDALSRAFVASNELGHSY
419 VGPEHFLIGLAEEGEGLAANLLRRYGLTPQ
420 >>gi|152973588|ref|YP_001338639.1| F pilus assembly protein [Klebsiella pneumoniae subsp. pneumoniae MGH 78578]
421 ; fa_frame: f
422 ; fa_initn: 33
423 ; fa_init1: 33
424 ; fa_opt: 63
425 ; fa_z-score: 86.1
426 ; fa_bits: 23.1
427 ; fa_expect: 0.99
428 ; sw_score: 63
429 ; sw_ident: 0.266
430 ; sw_sim: 0.656
431 ; sw_overlap: 64
432 >gi|10955263| ..
433 ; sq_len: 107
434 ; sq_offset: 1
435 ; sq_type: p
436 ; al_start: 32
437 ; al_stop: 94
438 ; al_display_start: 2
439 TKRSGSNTRRRAISRPVRLTAEEDQEIRKRAAECGKTVSGFLRAAALGKK
440 VNSLTDDRVLKEV-MRLGALQKKLFIDGKRVGDREYAEVLIAITEYHRAL
441 LSRLMAD
442 >gi|152973588|ref|YP_001338639.1| ..
443 ; sq_len: 459
444 ; sq_type: p
445 ; al_start: 191
446 ; al_stop: 248
447 ; al_display_start: 161
448 VGGLFPRTQVAQQKVCQDIAGESNIFSDWAASRQGCTVGG--KMDSVQDK
449 ASDKDKERVMKNINIMWNALSKNRLFDG----NKELKEFIMTLTGTLIFG
450 ENSEITPLPARTTDQDLIRAMMEGGTAKIYHCNDSDKCLKVVADATVTIT
451 SNKALKSQISALLSSIQNKAVADEKLTDQE
452 2>>>gi|10955264|ref|NP_052605.1| hypothetical protein pOSAK1_02 [Escherichia coli O157:H7 s 126 aa - 126 aa
453 vs NC_009649.faa library
454
455 45119 residues in 180 sequences
456 Expectation_n fit: rho(ln(x))= 7.1374+/-0.0246; mu= -7.6540+/- 1.313
457 mean_var=51.1189+/-13.171, 0's: 0 Z-trim: 1 B-trim: 8 in 1/25
458 Lambda= 0.179384
459
460 FASTA (3.5 Sept 2006) function [optimized, BL50 matrix (15:-5)] ktup: 2
461 join: 36, opt: 24, open/ext: -10/-2, width: 16
462 Scan time: 0.000
463 The best scores are: opt bits E(180)
464 gi|152973462|ref|YP_001338513.1| hypothetical prot ( 101) 58 22.9 0.29
465
466 >>>gi|10955264|ref|NP_052605.1|, 126 aa vs NC_009649.faa library
467 ; pg_name: /opt/fasta/fasta34
468 ; pg_ver: 34.26
469 ; pg_argv: /opt/fasta/fasta34 -Q -H -E 1 -m 10 NC_002127.faa NC_009649.faa
470 ; pg_name: FASTA
471 ; pg_ver: 3.5 Sept 2006
472 ; pg_matrix: BL50 (15:-5)
473 ; pg_open-ext: -10 -2
474 ; pg_ktup: 2
475 ; pg_optcut: 24
476 ; pg_cgap: 36
477 ; mp_extrap: 60000 180
478 ; mp_stats: Expectation_n fit: rho(ln(x))= 7.1374+/-0.0246; mu= -7.6540+/- 1.313 mean_var=51.1189+/-13.171, 0's: 0 Z-trim: 1 B-trim: 8 in 1/25 Lambda= 0.179384
479 ; mp_KS: -0.0000 (N=0) at 8159228
480 >>gi|152973462|ref|YP_001338513.1| hypothetical protein KPN_pKPN3p05904 [Klebsiella pneumoniae subsp. pneumoniae MGH 78578]
481 ; fa_frame: f
482 ; fa_initn: 50
483 ; fa_init1: 50
484 ; fa_opt: 58
485 ; fa_z-score: 95.8
486 ; fa_bits: 22.9
487 ; fa_expect: 0.29
488 ; sw_score: 58
489 ; sw_ident: 0.289
490 ; sw_sim: 0.632
491 ; sw_overlap: 38
492 >gi|10955264| ..
493 ; sq_len: 126
494 ; sq_offset: 1
495 ; sq_type: p
496 ; al_start: 1
497 ; al_stop: 38
498 ; al_display_start: 1
499 ------------------------------MKKDKKYQIEAIKNKDKTLF
500 IVYATDIYSPSEFFSKIESDLKKKKSKGDVFFDLIIPNGGKKDRYVYTSF
501 NGEKFSSYTLNKVTKTDEYN
502 >gi|152973462|ref|YP_001338513.1| ..
503 ; sq_len: 101
504 ; sq_type: p
505 ; al_start: 44
506 ; al_stop: 81
507 ; al_display_start: 14
508 DALLGEIQRLRKQVHQLQLERDILTKANELIKKDLGVSFLKLKNREKTLI
509 VDALKKKYPVAELLSVLQLARSCYFYQNVCTISMRKYA
510 3>>>gi|10955265|ref|NP_052606.1| hypothetical protein pOSAK1_03 [Escherichia coli O157:H7 s 346 aa - 346 aa
511 vs NC_009649.faa library
512
513 45119 residues in 180 sequences
514 Expectation_n fit: rho(ln(x))= 6.0276+/-0.0276; mu= 3.0670+/- 1.461
515 mean_var=37.1634+/- 8.980, 0's: 0 Z-trim: 1 B-trim: 14 in 1/25
516 Lambda= 0.210386
517
518 FASTA (3.5 Sept 2006) function [optimized, BL50 matrix (15:-5)] ktup: 2
519 join: 37, opt: 25, open/ext: -10/-2, width: 16
520 Scan time: 0.020
521 The best scores are: opt bits E(180)
522 gi|152973545|ref|YP_001338596.1| putative plasmid ( 242) 70 27.5 0.082
523
524 >>>gi|10955265|ref|NP_052606.1|, 346 aa vs NC_009649.faa library
525 ; pg_name: /opt/fasta/fasta34
526 ; pg_ver: 34.26
527 ; pg_argv: /opt/fasta/fasta34 -Q -H -E 1 -m 10 NC_002127.faa NC_009649.faa
528 ; pg_name: FASTA
529 ; pg_ver: 3.5 Sept 2006
530 ; pg_matrix: BL50 (15:-5)
531 ; pg_open-ext: -10 -2
532 ; pg_ktup: 2
533 ; pg_optcut: 25
534 ; pg_cgap: 37
535 ; mp_extrap: 60000 180
536 ; mp_stats: Expectation_n fit: rho(ln(x))= 6.0276+/-0.0276; mu= 3.0670+/- 1.461 mean_var=37.1634+/- 8.980, 0's: 0 Z-trim: 1 B-trim: 14 in 1/25 Lambda= 0.210386
537 ; mp_KS: -0.0000 (N=0) at 8159228
538 >>gi|152973545|ref|YP_001338596.1| putative plasmid SOS inhibition protein A [Klebsiella pneumoniae subsp. pneumoniae MGH 78578]
539 ; fa_frame: f
540 ; fa_initn: 52
541 ; fa_init1: 52
542 ; fa_opt: 70
543 ; fa_z-score: 105.5
544 ; fa_bits: 27.5
545 ; fa_expect: 0.082
546 ; sw_score: 70
547 ; sw_ident: 0.279
548 ; sw_sim: 0.651
549 ; sw_overlap: 43
550 >gi|10955265| ..
551 ; sq_len: 346
552 ; sq_offset: 1
553 ; sq_type: p
554 ; al_start: 197
555 ; al_stop: 238
556 ; al_display_start: 167
557 DFMCSILNMKEIVEQKNKEFNVDIKKETIESELHSKLPKSIDKIHEDIKK
558 QLSC-SLIMKKIDVEMEDYSTYCFSALRAIEGFIYQILNDVCNPSSSKNL
559 GEYFTENKPKYIIREIHQET
560 >gi|152973545|ref|YP_001338596.1| ..
561 ; sq_len: 242
562 ; sq_type: p
563 ; al_start: 52
564 ; al_stop: 94
565 ; al_display_start: 22
566 IMTVEEARQRGARLPSMPHVRTFLRLLTGCSRINSDVARRIPGIHRDPKD
567 RLSSLKQVEEALDMLISSHGEYCPLPLTMDVQAENFPEVLHTRTVRRLKR
568 QDFAFTRKMRREARQVEQSW
569 >>><<<
570
571
572 579 residues in 3 query sequences
573 45119 residues in 180 library sequences
574 Scomplib [34.26]
575 start: Tue May 20 16:38:45 2008 done: Tue May 20 16:38:45 2008
576 Total Scan time: 0.020 Total Display time: 0.010
577
578 Function used was FASTA [version 34.26 January 12, 2007]
579
580 """
581
582
583 from StringIO import StringIO
584
585 alignments = list(FastaM10Iterator(StringIO(simple_example)))
586 assert len(alignments) == 4, len(alignments)
587 assert len(alignments[0]) == 2
588 for a in alignments:
589 print "Alignment %i sequences of length %i" \
590 % (len(a), a.get_alignment_length())
591 for r in a:
592 print "%s %s %i" % (r.seq, r.id, r.annotations["original_length"])
593
594 print "Done"
595
596 import os
597 path = "../../Tests/Fasta/"
598 files = [f for f in os.listdir(path) if os.path.splitext(f)[-1] == ".m10"]
599 files.sort()
600 for filename in files:
601 if os.path.splitext(filename)[-1] == ".m10":
602 print
603 print filename
604 print "="*len(filename)
605 for i,a in enumerate(FastaM10Iterator(open(os.path.join(path,filename)))):
606 print "#%i, %s" % (i+1,a)
607 for r in a:
608 if "-" in r.seq:
609 assert r.seq.alphabet.gap_char == "-"
610 else:
611 assert not hasattr(r.seq.alphabet, "gap_char")
612