1
2
3
4
5
6
7 """Bio.SeqIO support for the "genbank" and "embl" file formats.
8
9 You are expected to use this module via the Bio.SeqIO functions.
10 Note that internally this module calls Bio.GenBank to do the actual
11 parsing of GenBank, EMBL and IMGT files.
12
13 See also:
14
15 International Nucleotide Sequence Database Collaboration
16 http://www.insdc.org/
17
18 GenBank
19 http://www.ncbi.nlm.nih.gov/Genbank/
20
21 EMBL Nucleotide Sequence Database
22 http://www.ebi.ac.uk/embl/
23
24 DDBJ (DNA Data Bank of Japan)
25 http://www.ddbj.nig.ac.jp/
26
27 IMGT (use a variant of EMBL format with longer feature indents)
28 http://imgt.cines.fr/download/LIGM-DB/userman_doc.html
29 http://imgt.cines.fr/download/LIGM-DB/ftable_doc.html
30 http://www.ebi.ac.uk/imgt/hla/docs/manual.html
31
32 """
33
34 from Bio.Seq import UnknownSeq
35 from Bio.GenBank.Scanner import GenBankScanner, EmblScanner, _ImgtScanner
36 from Bio import Alphabet
37 from Interfaces import SequentialSequenceWriter
38 from Bio import SeqFeature
39
40 from Bio._py3k import _is_int_or_long
41
42
43
44
45
46
47
48
49
51 """Breaks up a Genbank file into SeqRecord objects.
52
53 Every section from the LOCUS line to the terminating // becomes
54 a single SeqRecord with associated annotation and features.
55
56 Note that for genomes or chromosomes, there is typically only
57 one record."""
58
59 return GenBankScanner(debug=0).parse_records(handle)
60
62 """Breaks up an EMBL file into SeqRecord objects.
63
64 Every section from the LOCUS line to the terminating // becomes
65 a single SeqRecord with associated annotation and features.
66
67 Note that for genomes or chromosomes, there is typically only
68 one record."""
69
70 return EmblScanner(debug=0).parse_records(handle)
71
73 """Breaks up an IMGT file into SeqRecord objects.
74
75 Every section from the LOCUS line to the terminating // becomes
76 a single SeqRecord with associated annotation and features.
77
78 Note that for genomes or chromosomes, there is typically only
79 one record."""
80
81 return _ImgtScanner(debug=0).parse_records(handle)
82
84 """Breaks up a Genbank file into SeqRecord objects for each CDS feature.
85
86 Every section from the LOCUS line to the terminating // can contain
87 many CDS features. These are returned as with the stated amino acid
88 translation sequence (if given).
89 """
90
91 return GenBankScanner(debug=0).parse_cds_features(handle, alphabet)
92
94 """Breaks up a EMBL file into SeqRecord objects for each CDS feature.
95
96 Every section from the LOCUS line to the terminating // can contain
97 many CDS features. These are returned as with the stated amino acid
98 translation sequence (if given).
99 """
100
101 return EmblScanner(debug=0).parse_cds_features(handle, alphabet)
102
104 """Build a GenBank/EMBL position string (PRIVATE).
105
106 Use offset=1 to add one to convert a start position from python counting.
107 """
108 if isinstance(pos, SeqFeature.ExactPosition):
109 return "%i" % (pos.position+offset)
110 elif isinstance(pos, SeqFeature.WithinPosition):
111 return "(%i.%i)" % (pos.position + offset,
112 pos.position + pos.extension + offset)
113 elif isinstance(pos, SeqFeature.BetweenPosition):
114 return "(%i^%i)" % (pos.position + offset,
115 pos.position + pos.extension + offset)
116 elif isinstance(pos, SeqFeature.BeforePosition):
117 return "<%i" % (pos.position + offset)
118 elif isinstance(pos, SeqFeature.AfterPosition):
119 return ">%i" % (pos.position + offset)
120 elif isinstance(pos, SeqFeature.OneOfPosition):
121 return "one-of(%s)" \
122 % ",".join([_insdc_feature_position_string(p,offset) \
123 for p in pos.position_choices])
124 elif isinstance(pos, SeqFeature.AbstractPosition):
125 raise NotImplementedError("Please report this as a bug in Biopython.")
126 else:
127 raise ValueError("Expected a SeqFeature position object.")
128
129
131 if feature.ref:
132 ref = "%s:" % feature.ref
133 else:
134 ref = ""
135 assert not feature.ref_db
136 if isinstance(feature.location.start, SeqFeature.ExactPosition) \
137 and isinstance(feature.location.end, SeqFeature.ExactPosition) \
138 and feature.location.start.position == feature.location.end.position:
139
140
141 if feature.location.end.position == rec_length:
142
143
144
145 return "%s%i^1" % (ref, rec_length)
146 else:
147 return "%s%i^%i" % (ref, feature.location.end.position,
148 feature.location.end.position+1)
149 if isinstance(feature.location.start, SeqFeature.ExactPosition) \
150 and isinstance(feature.location.end, SeqFeature.ExactPosition) \
151 and feature.location.start.position+1 == feature.location.end.position:
152
153
154 return "%s%i" % (ref, feature.location.end.position)
155 elif isinstance(feature.location.start, SeqFeature.UnknownPosition) \
156 or isinstance(feature.location.end, SeqFeature.UnknownPosition):
157
158 if isinstance(feature.location.start, SeqFeature.UnknownPosition) \
159 and isinstance(feature.location.end, SeqFeature.UnknownPosition):
160
161
162
163 raise ValueError("Feature with unknown location")
164 elif isinstance(feature.location.start, SeqFeature.UnknownPosition):
165
166 return "%s<%i..%s" \
167 % (ref,
168 feature.location.nofuzzy_end,
169 _insdc_feature_position_string(feature.location.end))
170 else:
171
172 return "%s%s..>%i" \
173 % (ref,
174 _insdc_feature_position_string(feature.location.start),
175 feature.location.nofuzzy_start)
176 else:
177
178 return ref \
179 + _insdc_feature_position_string(feature.location.start, +1) \
180 + ".." + \
181 _insdc_feature_position_string(feature.location.end)
182
184 """Build a GenBank/EMBL location string from a SeqFeature (PRIVATE).
185
186 There is a choice of how to show joins on the reverse complement strand,
187 GenBank used "complement(join(1,10),(20,100))" while EMBL used to use
188 "join(complement(20,100),complement(1,10))" instead (but appears to have
189 now adopted the GenBank convention). Notice that the order of the entries
190 is reversed! This function therefore uses the first form. In this situation
191 we expect the parent feature and the two children to all be marked as
192 strand == -1, and in the order 0:10 then 19:100.
193
194 Also need to consider dual-strand examples like these from the Arabidopsis
195 thaliana chloroplast NC_000932: join(complement(69611..69724),139856..140650)
196 gene ArthCp047, GeneID:844801 or its CDS (protein NP_051038.1 GI:7525057)
197 which is further complicated by a splice:
198 join(complement(69611..69724),139856..140087,140625..140650)
199
200 For mixed this mixed strand feature, the parent SeqFeature should have
201 no strand (either 0 or None) while the child features should have either
202 strand +1 or -1 as appropriate, and be listed in the order given here.
203 """
204
205 if not feature.sub_features:
206
207
208
209
210 location = _insdc_location_string_ignoring_strand_and_subfeatures(feature, rec_length)
211 if feature.strand == -1:
212 location = "complement(%s)" % location
213 return location
214
215 if feature.strand == -1:
216 for f in feature.sub_features:
217 assert f.strand == -1
218 return "complement(%s(%s))" \
219 % (feature.location_operator,
220 ",".join(_insdc_location_string_ignoring_strand_and_subfeatures(f, rec_length) \
221 for f in feature.sub_features))
222
223
224
225
226 assert feature.location_operator != ""
227 return "%s(%s)" % (feature.location_operator,
228 ",".join([_insdc_feature_location_string(f, rec_length) \
229 for f in feature.sub_features]))
230
231
233 """Base class for GenBank and EMBL writers (PRIVATE)."""
234 MAX_WIDTH = 80
235 QUALIFIER_INDENT = 21
236 QUALIFIER_INDENT_STR = " "*QUALIFIER_INDENT
237 QUALIFIER_INDENT_TMP = " %s "
238
272
287
307
309 """Get an annotation dictionary entry (as a string).
310
311 Some entries are lists, in which case if just_first=True the first entry
312 is returned. If just_first=False (default) this verifies there is only
313 one entry before returning it."""
314 try:
315 answer = record.annotations[key]
316 except KeyError:
317 return default
318 if isinstance(answer, list):
319 if not just_first : assert len(answer) == 1
320 return str(answer[0])
321 else:
322 return str(answer)
323
325 """Returns a list of strings.
326
327 Any single words which are too long get returned as a whole line
328 (e.g. URLs) without an exception or warning.
329 """
330
331 text = text.strip()
332 if len(text) <= max_len:
333 return [text]
334
335 words = text.split()
336 text = ""
337 while words and len(text) + 1 + len(words[0]) <= max_len:
338 text += " " + words.pop(0)
339 text = text.strip()
340
341 answer = [text]
342 while words:
343 text = words.pop(0)
344 while words and len(text) + 1 + len(words[0]) <= max_len:
345 text += " " + words.pop(0)
346 text = text.strip()
347
348 answer.append(text)
349 assert not words
350 return answer
351
353 "Returns a list of strings, splits on commas."""
354
355
356
357 contig = record.annotations.get("contig", "")
358 if isinstance(contig, list) or isinstance(contig, tuple):
359 contig = "".join(contig)
360 contig = self.clean(contig)
361 i = 0
362 answer = []
363 while contig:
364 if len(contig) > max_len:
365
366 pos = contig[:max_len-1].rfind(",")
367 if pos == -1:
368 raise ValueError("Could not break up CONTIG")
369 text, contig = contig[:pos+1], contig[pos+1:]
370 else:
371 text, contig = contig, ""
372 answer.append(text)
373 return answer
374
376 HEADER_WIDTH = 12
377 QUALIFIER_INDENT = 21
378
380 "Used in the the 'header' of each GenBank record."""
381 assert len(tag) < self.HEADER_WIDTH
382 if len(text) > self.MAX_WIDTH - self.HEADER_WIDTH:
383 import warnings
384 warnings.warn("Annotation %r too long for %s line" % (text, tag))
385 self.handle.write("%s%s\n" % (tag.ljust(self.HEADER_WIDTH),
386 text.replace("\n", " ")))
387
396
405
407 default = "01-JAN-1980"
408 try :
409 date = record.annotations["date"]
410 except KeyError :
411 return default
412
413 if isinstance(date, list) and len(date)==1 :
414 date = date[0]
415
416 if not isinstance(date, basestring) or len(date) != 11 \
417 or date[2] != "-" or date[6] != "-" \
418 or not date[:2].isdigit() or not date[7:].isdigit() \
419 or int(date[:2]) > 31 \
420 or date[3:6] not in ["JAN", "FEB", "MAR", "APR", "MAY", "JUN",
421 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"] :
422
423 return default
424 return date
425
427 try:
428 division = record.annotations["data_file_division"]
429 except KeyError:
430 division = "UNK"
431 if division in ["PRI", "ROD", "MAM", "VRT", "INV", "PLN", "BCT",
432 "VRL", "PHG", "SYN", "UNA", "EST", "PAT", "STS",
433 "GSS", "HTG", "HTC", "ENV", "CON"]:
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456 pass
457 else:
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478 embl_to_gbk = {"FUN":"PLN",
479 "HUM":"PRI",
480 "MUS":"ROD",
481 "PRO":"BCT",
482 "UNC":"UNK",
483 "XXX":"UNK",
484 }
485 try:
486 division = embl_to_gbk[division]
487 except KeyError:
488 division = "UNK"
489 assert len(division)==3
490 return division
491
493 """Write the LOCUS line."""
494
495 locus = record.name
496 if not locus or locus == "<unknown name>":
497 locus = record.id
498 if not locus or locus == "<unknown id>":
499 locus = self._get_annotation_str(record, "accession", just_first=True)
500 if len(locus) > 16:
501 raise ValueError("Locus identifier %r is too long" % str(locus))
502
503 if len(record) > 99999999999:
504
505
506 raise ValueError("Sequence too long!")
507
508
509 a = Alphabet._get_base_alphabet(record.seq.alphabet)
510 if not isinstance(a, Alphabet.Alphabet):
511 raise TypeError("Invalid alphabet")
512 elif isinstance(a, Alphabet.ProteinAlphabet):
513 units = "aa"
514 elif isinstance(a, Alphabet.NucleotideAlphabet):
515 units = "bp"
516 else:
517
518
519 raise ValueError("Need a Nucleotide or Protein alphabet")
520
521
522
523 if isinstance(a, Alphabet.ProteinAlphabet):
524 mol_type = ""
525 elif isinstance(a, Alphabet.DNAAlphabet):
526 mol_type = "DNA"
527 elif isinstance(a, Alphabet.RNAAlphabet):
528 mol_type = "RNA"
529 else:
530
531
532 raise ValueError("Need a DNA, RNA or Protein alphabet")
533
534 division = self._get_data_division(record)
535
536 assert len(units) == 2
537 assert len(division) == 3
538
539
540 line = "LOCUS %s %s %s %s %s %s\n" \
541 % (locus.ljust(16),
542 str(len(record)).rjust(11),
543 units,
544 mol_type.ljust(6),
545 division,
546 self._get_date(record))
547 assert len(line) == 79+1, repr(line)
548
549 assert line[12:28].rstrip() == locus, \
550 'LOCUS line does not contain the locus at the expected position:\n' + line
551 assert line[28:29] == " "
552 assert line[29:40].lstrip() == str(len(record)), \
553 'LOCUS line does not contain the length at the expected position:\n' + line
554
555
556 assert line[40:44] in [' bp ', ' aa '] , \
557 'LOCUS line does not contain size units at expected position:\n' + line
558 assert line[44:47] in [' ', 'ss-', 'ds-', 'ms-'], \
559 'LOCUS line does not have valid strand type (Single stranded, ...):\n' + line
560 assert line[47:54].strip() == "" \
561 or line[47:54].strip().find('DNA') != -1 \
562 or line[47:54].strip().find('RNA') != -1, \
563 'LOCUS line does not contain valid sequence type (DNA, RNA, ...):\n' + line
564 assert line[54:55] == ' ', \
565 'LOCUS line does not contain space at position 55:\n' + line
566 assert line[55:63].strip() in ['', 'linear', 'circular'], \
567 'LOCUS line does not contain valid entry (linear, circular, ...):\n' + line
568 assert line[63:64] == ' ', \
569 'LOCUS line does not contain space at position 64:\n' + line
570 assert line[67:68] == ' ', \
571 'LOCUS line does not contain space at position 68:\n' + line
572 assert line[70:71] == '-', \
573 'LOCUS line does not contain - at position 71 in date:\n' + line
574 assert line[74:75] == '-', \
575 'LOCUS line does not contain - at position 75 in date:\n' + line
576
577 self.handle.write(line)
578
580 number = 0
581 for ref in record.annotations["references"]:
582 if not isinstance(ref, SeqFeature.Reference):
583 continue
584 number += 1
585 data = str(number)
586
587 if ref.location and len(ref.location)==1:
588 a = Alphabet._get_base_alphabet(record.seq.alphabet)
589 if isinstance(a, Alphabet.ProteinAlphabet):
590 units = "residues"
591 else:
592 units = "bases"
593 data += " (%s %i to %i)" % (units,
594 ref.location[0].nofuzzy_start+1,
595 ref.location[0].nofuzzy_end)
596 self._write_single_line("REFERENCE", data)
597 if ref.authors:
598
599 self._write_multi_line(" AUTHORS", ref.authors)
600 if ref.consrtm:
601
602 self._write_multi_line(" CONSRTM", ref.consrtm)
603 if ref.title:
604
605 self._write_multi_line(" TITLE", ref.title)
606 if ref.journal:
607
608
609 self._write_multi_line(" JOURNAL", ref.journal)
610 if ref.medline_id:
611
612
613
614 self._write_multi_line(" MEDLINE", ref.medline_id)
615 if ref.pubmed_id:
616
617 self._write_multi_line(" PUBMED", ref.pubmed_id)
618 if ref.comment:
619 self._write_multi_line(" REMARK", ref.comment)
620
621
639
646
672
674 """Write a single record to the output file."""
675 handle = self.handle
676 self._write_the_first_line(record)
677
678 accession = self._get_annotation_str(record, "accession",
679 record.id.split(".", 1)[0],
680 just_first=True)
681 acc_with_version = accession
682 if record.id.startswith(accession+"."):
683 try:
684 acc_with_version = "%s.%i" \
685 % (accession,
686 int(record.id.split(".", 1)[1]))
687 except ValueError:
688 pass
689 gi = self._get_annotation_str(record, "gi", just_first=True)
690
691 descr = record.description
692 if descr == "<unknown description>" : descr = "."
693 self._write_multi_line("DEFINITION", descr)
694
695 self._write_single_line("ACCESSION", accession)
696 if gi != ".":
697 self._write_single_line("VERSION", "%s GI:%s" \
698 % (acc_with_version, gi))
699 else:
700 self._write_single_line("VERSION", "%s" % (acc_with_version))
701
702
703
704
705 self._write_multi_entries("DBLINK", record.dbxrefs)
706
707 try:
708
709
710 keywords = "; ".join(record.annotations["keywords"])
711
712 if not keywords.endswith(".") :
713 keywords += "."
714 except KeyError:
715
716 keywords = "."
717 self._write_multi_line("KEYWORDS", keywords)
718
719 if "segment" in record.annotations:
720
721
722 segment = record.annotations["segment"]
723 if isinstance(segment, list):
724 assert len(segment)==1, segment
725 segment = segment[0]
726 self._write_single_line("SEGMENT", segment)
727
728 self._write_multi_line("SOURCE", \
729 self._get_annotation_str(record, "source"))
730
731 org = self._get_annotation_str(record, "organism")
732 if len(org) > self.MAX_WIDTH - self.HEADER_WIDTH:
733 org = org[:self.MAX_WIDTH - self.HEADER_WIDTH-4]+"..."
734 self._write_single_line(" ORGANISM", org)
735 try:
736
737
738 taxonomy = "; ".join(record.annotations["taxonomy"])
739
740 if not taxonomy.endswith(".") :
741 taxonomy += "."
742 except KeyError:
743 taxonomy = "."
744 self._write_multi_line("", taxonomy)
745
746 if "references" in record.annotations:
747 self._write_references(record)
748
749 if "comment" in record.annotations:
750 self._write_comment(record)
751
752 handle.write("FEATURES Location/Qualifiers\n")
753 rec_length = len(record)
754 for feature in record.features:
755 self._write_feature(feature, rec_length)
756 self._write_sequence(record)
757 handle.write("//\n")
758
760 HEADER_WIDTH = 5
761 QUALIFIER_INDENT = 21
762 QUALIFIER_INDENT_STR = "FT" + " "*(QUALIFIER_INDENT-2)
763 QUALIFIER_INDENT_TMP = "FT %s "
764 FEATURE_HEADER = "FH Key Location/Qualifiers\n"
765
771
824
826 assert len(tag)==2
827 line = tag+" "+text
828 if len(text) > self.MAX_WIDTH:
829 import warnings
830 warnings.warn("Line %r too long" % line)
831 self.handle.write(line+"\n")
832
838
840 """Write the ID and AC lines."""
841 if "." in record.id and record.id.rsplit(".", 1)[1].isdigit():
842 version = "SV " + record.id.rsplit(".", 1)[1]
843 accession = self._get_annotation_str(record, "accession",
844 record.id.rsplit(".", 1)[0],
845 just_first=True)
846 else :
847 version = ""
848 accession = self._get_annotation_str(record, "accession",
849 record.id,
850 just_first=True)
851
852 if ";" in accession :
853 raise ValueError("Cannot have semi-colon in EMBL accession, %s" \
854 % repr(str(accession)))
855 if " " in accession :
856
857 raise ValueError("Cannot have spaces in EMBL accession, %s" \
858 % repr(str(accession)))
859
860
861
862
863 a = Alphabet._get_base_alphabet(record.seq.alphabet)
864 if not isinstance(a, Alphabet.Alphabet):
865 raise TypeError("Invalid alphabet")
866 elif isinstance(a, Alphabet.DNAAlphabet):
867 mol_type = "DNA"
868 units = "BP"
869 elif isinstance(a, Alphabet.RNAAlphabet):
870 mol_type = "RNA"
871 units = "BP"
872 elif isinstance(a, Alphabet.ProteinAlphabet):
873 mol_type = "PROTEIN"
874 units = "AA"
875 else:
876
877 raise ValueError("Need a DNA, RNA or Protein alphabet")
878
879
880 division = self._get_data_division(record)
881
882
883 handle = self.handle
884
885
886
887
888
889
890
891
892 self._write_single_line("ID", "%s; %s; ; %s; ; %s; %i %s." \
893 % (accession, version, mol_type,
894 division, len(record), units))
895 handle.write("XX\n")
896 self._write_single_line("AC", accession+";")
897 handle.write("XX\n")
898
900 try:
901 division = record.annotations["data_file_division"]
902 except KeyError:
903 division = "UNC"
904 if division in ["PHG", "ENV", "FUN", "HUM", "INV", "MAM", "VRT",
905 "MUS", "PLN", "PRO", "ROD", "SYN", "TGN", "UNC",
906 "VRL", "XXX"]:
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927 pass
928 else:
929
930
931
932
933
934 gbk_to_embl = {"BCT":"PRO",
935 "UNK":"UNC",
936 }
937 try:
938 division = gbk_to_embl[division]
939 except KeyError:
940 division = "UNC"
941 assert len(division)==3
942 return division
943
973
993
1037
1044
1045 if __name__ == "__main__":
1046 print "Quick self test"
1047 import os
1048 from StringIO import StringIO
1049
1073
1075 """Check two lists of SeqRecords agree, raises a ValueError if mismatch."""
1076 if len(old_list) != len(new_list):
1077 raise ValueError("%i vs %i records" % (len(old_list), len(new_list)))
1078 for old, new in zip(old_list, new_list):
1079 if not compare_record(old, new):
1080 return False
1081 return True
1082
1084 """Check two SeqFeatures agree."""
1085 if old.type != new.type:
1086 raise ValueError("Type %s versus %s" % (old.type, new.type))
1087 if old.location.nofuzzy_start != new.location.nofuzzy_start \
1088 or old.location.nofuzzy_end != new.location.nofuzzy_end:
1089 raise ValueError("%s versus %s:\n%s\nvs:\n%s" \
1090 % (old.location, new.location, str(old), str(new)))
1091 if old.strand != new.strand:
1092 raise ValueError("Different strand:\n%s\nvs:\n%s" % (str(old), str(new)))
1093 if old.location.start != new.location.start:
1094 raise ValueError("Start %s versus %s:\n%s\nvs:\n%s" \
1095 % (old.location.start, new.location.start, str(old), str(new)))
1096 if old.location.end != new.location.end:
1097 raise ValueError("End %s versus %s:\n%s\nvs:\n%s" \
1098 % (old.location.end, new.location.end, str(old), str(new)))
1099 if not ignore_sub_features:
1100 if len(old.sub_features) != len(new.sub_features):
1101 raise ValueError("Different sub features")
1102 for a, b in zip(old.sub_features, new.sub_features):
1103 if not compare_feature(a, b):
1104 return False
1105
1106
1107
1108 for key in set(old.qualifiers).intersection(new.qualifiers):
1109 if key in ["db_xref", "protein_id", "product", "note"]:
1110
1111 continue
1112 if old.qualifiers[key] != new.qualifiers[key]:
1113 raise ValueError("Qualifier mis-match for %s:\n%s\n%s" \
1114 % (key, old.qualifiers[key], new.qualifiers[key]))
1115 return True
1116
1118 """Check two lists of SeqFeatures agree, raises a ValueError if mismatch."""
1119 if len(old_list) != len(new_list):
1120 raise ValueError("%i vs %i features" % (len(old_list), len(new_list)))
1121 for old, new in zip(old_list, new_list):
1122
1123 if not compare_feature(old, new, ignore_sub_features):
1124 return False
1125 return True
1126
1134
1146
1147 for filename in os.listdir("../../Tests/GenBank"):
1148 if not filename.endswith(".gbk") and not filename.endswith(".gb"):
1149 continue
1150 print filename
1151
1152 handle = open("../../Tests/GenBank/%s" % filename)
1153 records = list(GenBankIterator(handle))
1154 handle.close()
1155
1156 check_genbank_writer(records)
1157 check_embl_writer(records)
1158
1159 for filename in os.listdir("../../Tests/EMBL"):
1160 if not filename.endswith(".embl"):
1161 continue
1162 print filename
1163
1164 handle = open("../../Tests/EMBL/%s" % filename)
1165 records = list(EmblIterator(handle))
1166 handle.close()
1167
1168 check_genbank_writer(records)
1169 check_embl_writer(records)
1170
1171 from Bio import SeqIO
1172 for filename in os.listdir("../../Tests/SwissProt"):
1173 if not filename.startswith("sp"):
1174 continue
1175 print filename
1176
1177 handle = open("../../Tests/SwissProt/%s" % filename)
1178 records = list(SeqIO.parse(handle, "swiss"))
1179 handle.close()
1180
1181 check_genbank_writer(records)
1182