1
2
3
4
5
6
7
8
9
10
11 """ Notes about the diverses class of the restriction enzyme implementation.
12
13 RestrictionType is the type of all restriction enzymes.
14 ----------------------------------------------------------------------------
15 AbstractCut implements some methods that are common to all enzymes.
16 ----------------------------------------------------------------------------
17 NoCut, OneCut,TwoCuts represent the number of double strand cuts
18 produced by the enzyme.
19 they correspond to the 4th field of the rebase
20 record emboss_e.NNN.
21 0->NoCut : the enzyme is not characterised.
22 2->OneCut : the enzyme produce one double strand cut.
23 4->TwoCuts : two double strand cuts.
24 ----------------------------------------------------------------------------
25 Meth_Dep, Meth_Undep represent the methylation susceptibility to
26 the enzyme.
27 Not implemented yet.
28 ----------------------------------------------------------------------------
29 Palindromic, if the site is palindromic or not.
30 NotPalindromic allow some optimisations of the code.
31 No need to check the reverse strand
32 with palindromic sites.
33 ----------------------------------------------------------------------------
34 Unknown, Blunt, represent the overhang.
35 Ov5, Ov3 Unknown is here for symetry reasons and
36 correspond to enzymes that are not characterised
37 in rebase.
38 ----------------------------------------------------------------------------
39 Defined, Ambiguous, represent the sequence of the overhang.
40 NotDefined
41 NotDefined is for enzymes not characterised in
42 rebase.
43
44 Defined correspond to enzymes that display a
45 constant overhang whatever the sequence.
46 ex : EcoRI. G^AATTC -> overhang :AATT
47 CTTAA^G
48
49 Ambiguous : the overhang varies with the
50 sequence restricted.
51 Typically enzymes which cut outside their
52 restriction site or (but not always)
53 inside an ambiguous site.
54 ex:
55 AcuI CTGAAG(22/20) -> overhang : NN
56 AasI GACNNN^NNNGTC -> overhang : NN
57 CTGN^NNNNNCAG
58
59 note : these 3 classes refers to the overhang not the site.
60 So the enzyme ApoI (RAATTY) is defined even if its restriction
61 site is ambiguous.
62
63 ApoI R^AATTY -> overhang : AATT -> Defined
64 YTTAA^R
65 Accordingly, blunt enzymes are always Defined even
66 when they cut outside their restriction site.
67 ----------------------------------------------------------------------------
68 Not_available, as found in rebase file emboss_r.NNN files.
69 Commercially_available
70 allow the selection of the enzymes according to
71 their suppliers to reduce the quantity
72 of results.
73 Also will allow the implementation of buffer
74 compatibility tables. Not implemented yet.
75
76 the list of suppliers is extracted from
77 emboss_s.NNN
78 ----------------------------------------------------------------------------
79 """
80
81 import re
82 import itertools
83
84 from Bio.Seq import Seq, MutableSeq
85 from Bio.Alphabet import IUPAC
86
87 from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict
88 from Bio.Restriction.Restriction_Dictionary import typedict
89 from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict
90 from Bio.Restriction.RanaConfig import *
91 from Bio.Restriction.PrintFormat import PrintFormat
92
93
94
96 """Check characters in a string (PRIVATE).
97
98 Remove digits and white space present in string. Allows any valid ambiguous
99 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted).
100
101 Other characters (e.g. symbols) trigger a TypeError.
102
103 Returns the string WITH A LEADING SPACE (!). This is for backwards
104 compatibility, and may in part be explained by the fact that
105 Bio.Restriction doesn't use zero based counting.
106 """
107
108 seq_string = "".join(seq_string.split()).upper()
109
110 for c in "0123456789" : seq_string = seq_string.replace(c,"")
111
112 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")) :
113 raise TypeError("Invalid character found in %s" % repr(seq_string))
114 return " " + seq_string
115
116
117 matching = {'A' : 'ARWMHVDN', 'C' : 'CYSMHBVN', 'G' : 'GRSKBVDN',
118 'T' : 'TYWKHBDN', 'R' : 'ABDGHKMNSRWV', 'Y' : 'CBDHKMNSTWVY',
119 'W' : 'ABDHKMNRTWVY', 'S' : 'CBDGHKMNSRVY', 'M' : 'ACBDHMNSRWVY',
120 'K' : 'BDGHKNSRTWVY', 'H' : 'ACBDHKMNSRTWVY',
121 'B' : 'CBDGHKMNSRTWVY', 'V' : 'ACBDGHKMNSRWVY',
122 'D' : 'ABDGHKMNSRTWVY', 'N' : 'ACBDGHKMNSRTWVY'}
123
124 DNA = Seq
125
224
225
227 """RestrictionType. Type from which derives all enzyme classes.
228
229 Implement the operator methods."""
230
231 - def __init__(cls, name='', bases=(), dct={}):
232 """RE(name, bases, dct) -> RestrictionType instance.
233
234 Not intended to be used in normal operation. The enzymes are
235 instantiated when importing the module.
236
237 see below."""
238 if "-" in name :
239 raise ValueError("Problem with hyphen in %s as enzyme name" \
240 % repr(name))
241 super(RestrictionType, cls).__init__(cls, name, bases, dct)
242 try :
243 cls.compsite = re.compile(cls.compsite)
244 except Exception, err :
245 raise ValueError("Problem with regular expression, re.compiled(%s)" \
246 % repr(cls.compsite))
247
259
261 """RE.__div__(other) -> list.
262
263 RE/other
264 returns RE.search(other)."""
265 return cls.search(other)
266
268 """RE.__rdiv__(other) -> list.
269
270 other/RE
271 returns RE.search(other)."""
272 return cls.search(other)
273
275 """RE.__truediv__(other) -> list.
276
277 RE/other
278 returns RE.search(other)."""
279 return cls.search(other)
280
282 """RE.__rtruediv__(other) -> list.
283
284 other/RE
285 returns RE.search(other)."""
286 return cls.search(other)
287
289 """RE.__floordiv__(other) -> list.
290
291 RE//other
292 returns RE.catalyse(other)."""
293 return cls.catalyse(other)
294
296 """RE.__rfloordiv__(other) -> list.
297
298 other//RE
299 returns RE.catalyse(other)."""
300 return cls.catalyse(other)
301
303 """RE.__str__() -> str.
304
305 return the name of the enzyme."""
306 return cls.__name__
307
309 """RE.__repr__() -> str.
310
311 used with eval or exec will instantiate the enzyme."""
312 return "%s" % cls.__name__
313
315 """RE.__len__() -> int.
316
317 length of the recognition site."""
318 return cls.size
319
321
322
323 return id(cls)
324
326 """RE == other -> bool
327
328 True if RE and other are the same enzyme.
329
330 Specifically this checks they are the same Python object.
331 """
332
333 return id(cls)==id(other)
334
336 """RE != other -> bool.
337 isoschizomer strict, same recognition site, same restriction -> False
338 all the other-> True
339
340 WARNING - This is not the inverse of the __eq__ method.
341 """
342 if not isinstance(other, RestrictionType):
343 return True
344 elif cls.charac == other.charac:
345 return False
346 else:
347 return True
348
350 """RE >> other -> bool.
351
352 neoschizomer : same recognition site, different restriction. -> True
353 all the others : -> False"""
354 if not isinstance(other, RestrictionType):
355 return False
356 elif cls.site == other.site and cls.charac != other.charac:
357 return True
358 else:
359 return False
360
362 """a % b -> bool.
363
364 Test compatibility of the overhang of a and b.
365 True if a and b have compatible overhang."""
366 if not isinstance(other, RestrictionType):
367 raise TypeError( \
368 'expected RestrictionType, got %s instead' % type(other))
369 return cls._mod1(other)
370
372 """a >= b -> bool.
373
374 a is greater or equal than b if the a site is longer than b site.
375 if their site have the same length sort by alphabetical order of their
376 names."""
377 if not isinstance(other, RestrictionType):
378 raise NotImplementedError
379 if len(cls) > len(other):
380 return True
381 elif cls.size == len(other) and cls.__name__ >= other.__name__:
382 return True
383 else:
384 return False
385
387 """a > b -> bool.
388
389 sorting order:
390 1. size of the recognition site.
391 2. if equal size, alphabetical order of the names."""
392 if not isinstance(other, RestrictionType):
393 raise NotImplementedError
394 if len(cls) > len(other):
395 return True
396 elif cls.size == len(other) and cls.__name__ > other.__name__:
397 return True
398 else:
399 return False
400
402 """a <= b -> bool.
403
404 sorting order:
405 1. size of the recognition site.
406 2. if equal size, alphabetical order of the names."""
407 if not isinstance(other, RestrictionType):
408 raise NotImplementedError
409 elif len(cls) < len(other):
410 return True
411 elif len(cls) == len(other) and cls.__name__ <= other.__name__:
412 return True
413 else:
414 return False
415
417 """a < b -> bool.
418
419 sorting order:
420 1. size of the recognition site.
421 2. if equal size, alphabetical order of the names."""
422 if not isinstance(other, RestrictionType):
423 raise NotImplementedError
424 elif len(cls) < len(other):
425 return True
426 elif len(cls) == len(other) and cls.__name__ < other.__name__:
427 return True
428 else:
429 return False
430
431
433 """Implement the methods that are common to all restriction enzymes.
434
435 All the methods are classmethod.
436
437 For internal use only. Not meant to be instantiate."""
438
439 - def search(cls, dna, linear=True):
440 """RE.search(dna, linear=True) -> list.
441
442 return a list of all the site of RE in dna. Compensate for circular
443 sequences and so on.
444
445 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
446
447 if linear is False, the restriction sites than span over the boundaries
448 will be included.
449
450 The positions are the first base of the 3' fragment,
451 i.e. the first base after the position the enzyme will cut. """
452
453
454
455
456
457
458
459 if isinstance(dna, FormattedSeq):
460 cls.dna = dna
461 return cls._search()
462 else :
463 cls.dna = FormattedSeq(dna, linear)
464 return cls._search()
465 search = classmethod(search)
466
468 """RE.all_suppliers -> print all the suppliers of R"""
469 supply = [x[0] for x in suppliers_dict.itervalues()]
470 supply.sort()
471 print ",\n".join(supply)
472 return
473 all_suppliers = classmethod(all_suppliers)
474
476 """RE.is_equischizomers(other) -> bool.
477
478 True if other is an isoschizomer of RE.
479 False else.
480
481 equischizomer <=> same site, same position of restriction."""
482 return not self != other
483 is_equischizomer = classmethod(is_equischizomer)
484
486 """RE.is_neoschizomers(other) -> bool.
487
488 True if other is an isoschizomer of RE.
489 False else.
490
491 neoschizomer <=> same site, different position of restriction."""
492 return self >> other
493 is_neoschizomer = classmethod(is_neoschizomer)
494
496 """RE.is_isoschizomers(other) -> bool.
497
498 True if other is an isoschizomer of RE.
499 False else.
500
501 isoschizomer <=> same site."""
502 return (not self != other) or self >> other
503 is_isoschizomer = classmethod(is_isoschizomer)
504
506 """RE.equischizomers([batch]) -> list.
507
508 return a tuple of all the isoschizomers of RE.
509 if batch is supplied it is used instead of the default AllEnzymes.
510
511 equischizomer <=> same site, same position of restriction."""
512 if not batch : batch = AllEnzymes
513 r = [x for x in batch if not self != x]
514 i = r.index(self)
515 del r[i]
516 r.sort()
517 return r
518 equischizomers = classmethod(equischizomers)
519
521 """RE.neoschizomers([batch]) -> list.
522
523 return a tuple of all the neoschizomers of RE.
524 if batch is supplied it is used instead of the default AllEnzymes.
525
526 neoschizomer <=> same site, different position of restriction."""
527 if not batch : batch = AllEnzymes
528 r = [x for x in batch if self >> x]
529 r.sort()
530 return r
531 neoschizomers = classmethod(neoschizomers)
532
534 """RE.isoschizomers([batch]) -> list.
535
536 return a tuple of all the equischizomers and neoschizomers of RE.
537 if batch is supplied it is used instead of the default AllEnzymes."""
538 if not batch : batch = AllEnzymes
539 r = [x for x in batch if (self >> x) or (not self != x)]
540 i = r.index(self)
541 del r[i]
542 r.sort()
543 return r
544 isoschizomers = classmethod(isoschizomers)
545
547 """RE.frequency() -> int.
548
549 frequency of the site."""
550 return self.freq
551 frequency = classmethod(frequency)
552
553
554 -class NoCut(AbstractCut):
555 """Implement the methods specific to the enzymes that do not cut.
556
557 These enzymes are generally enzymes that have been only partially
558 characterised and the way they cut the DNA is unknow or enzymes for
559 which the pattern of cut is to complex to be recorded in Rebase
560 (ncuts values of 0 in emboss_e.###).
561
562 When using search() with these enzymes the values returned are at the start of
563 the restriction site.
564
565 Their catalyse() method returns a TypeError.
566
567 Unknown and NotDefined are also part of the base classes of these enzymes.
568
569 Internal use only. Not meant to be instantiated."""
570
572 """RE.cut_once() -> bool.
573
574 True if the enzyme cut the sequence one time on each strand."""
575 return False
576 cut_once = classmethod(cut_once)
577
579 """RE.cut_twice() -> bool.
580
581 True if the enzyme cut the sequence twice on each strand."""
582 return False
583 cut_twice = classmethod(cut_twice)
584
586 """RE._modify(location) -> int.
587
588 for internal use only.
589
590 location is an integer corresponding to the location of the match for
591 the enzyme pattern in the sequence.
592 _modify returns the real place where the enzyme will cut.
593
594 example:
595 EcoRI pattern : GAATTC
596 EcoRI will cut after the G.
597 so in the sequence:
598 ______
599 GAATACACGGAATTCGA
600 |
601 10
602 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
603 EcoRI cut after the G so:
604 EcoRI._modify(10) -> 11.
605
606 if the enzyme cut twice _modify will returns two integer corresponding
607 to each cutting site.
608 """
609 yield location
610 _modify = classmethod(_modify)
611
613 """RE._rev_modify(location) -> generator of int.
614
615 for internal use only.
616
617 as _modify for site situated on the antiparallel strand when the
618 enzyme is not palindromic
619 """
620 yield location
621 _rev_modify = classmethod(_rev_modify)
622
624 """RE.characteristic() -> tuple.
625
626 the tuple contains the attributes:
627 fst5 -> first 5' cut ((current strand) or None
628 fst3 -> first 3' cut (complementary strand) or None
629 scd5 -> second 5' cut (current strand) or None
630 scd5 -> second 3' cut (complementary strand) or None
631 site -> recognition site."""
632 return None, None, None, None, self.site
633 characteristic = classmethod(characteristic)
634
636 """Implement the methods specific to the enzymes that cut the DNA only once
637
638 Correspond to ncuts values of 2 in emboss_e.###
639
640 Internal use only. Not meant to be instantiated."""
641
643 """RE.cut_once() -> bool.
644
645 True if the enzyme cut the sequence one time on each strand."""
646 return True
647 cut_once = classmethod(cut_once)
648
650 """RE.cut_twice() -> bool.
651
652 True if the enzyme cut the sequence twice on each strand."""
653 return False
654 cut_twice = classmethod(cut_twice)
655
657 """RE._modify(location) -> int.
658
659 for internal use only.
660
661 location is an integer corresponding to the location of the match for
662 the enzyme pattern in the sequence.
663 _modify returns the real place where the enzyme will cut.
664
665 example:
666 EcoRI pattern : GAATTC
667 EcoRI will cut after the G.
668 so in the sequence:
669 ______
670 GAATACACGGAATTCGA
671 |
672 10
673 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
674 EcoRI cut after the G so:
675 EcoRI._modify(10) -> 11.
676
677 if the enzyme cut twice _modify will returns two integer corresponding
678 to each cutting site.
679 """
680 yield location + self.fst5
681 _modify = classmethod(_modify)
682
684 """RE._rev_modify(location) -> generator of int.
685
686 for internal use only.
687
688 as _modify for site situated on the antiparallel strand when the
689 enzyme is not palindromic
690 """
691 yield location - self.fst3
692 _rev_modify = classmethod(_rev_modify)
693
695 """RE.characteristic() -> tuple.
696
697 the tuple contains the attributes:
698 fst5 -> first 5' cut ((current strand) or None
699 fst3 -> first 3' cut (complementary strand) or None
700 scd5 -> second 5' cut (current strand) or None
701 scd5 -> second 3' cut (complementary strand) or None
702 site -> recognition site."""
703 return self.fst5, self.fst3, None, None, self.site
704 characteristic = classmethod(characteristic)
705
706
708 """Implement the methods specific to the enzymes that cut the DNA twice
709
710 Correspond to ncuts values of 4 in emboss_e.###
711
712 Internal use only. Not meant to be instantiated."""
713
715 """RE.cut_once() -> bool.
716
717 True if the enzyme cut the sequence one time on each strand."""
718 return False
719 cut_once = classmethod(cut_once)
720
722 """RE.cut_twice() -> bool.
723
724 True if the enzyme cut the sequence twice on each strand."""
725 return True
726 cut_twice = classmethod(cut_twice)
727
729 """RE._modify(location) -> int.
730
731 for internal use only.
732
733 location is an integer corresponding to the location of the match for
734 the enzyme pattern in the sequence.
735 _modify returns the real place where the enzyme will cut.
736
737 example:
738 EcoRI pattern : GAATTC
739 EcoRI will cut after the G.
740 so in the sequence:
741 ______
742 GAATACACGGAATTCGA
743 |
744 10
745 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
746 EcoRI cut after the G so:
747 EcoRI._modify(10) -> 11.
748
749 if the enzyme cut twice _modify will returns two integer corresponding
750 to each cutting site.
751 """
752 yield location + self.fst5
753 yield location + self.scd5
754 _modify = classmethod(_modify)
755
757 """RE._rev_modify(location) -> generator of int.
758
759 for internal use only.
760
761 as _modify for site situated on the antiparallel strand when the
762 enzyme is not palindromic
763 """
764 yield location - self.fst3
765 yield location - self.scd3
766 _rev_modify = classmethod(_rev_modify)
767
769 """RE.characteristic() -> tuple.
770
771 the tuple contains the attributes:
772 fst5 -> first 5' cut ((current strand) or None
773 fst3 -> first 3' cut (complementary strand) or None
774 scd5 -> second 5' cut (current strand) or None
775 scd5 -> second 3' cut (complementary strand) or None
776 site -> recognition site."""
777 return self.fst5, self.fst3, self.scd5, self.scd3, self.site
778 characteristic = classmethod(characteristic)
779
780
782 """Implement the information about methylation.
783
784 Enzymes of this class possess a site which is methylable."""
785
787 """RE.is_methylable() -> bool.
788
789 True if the recognition site is a methylable."""
790 return True
791 is_methylable = classmethod(is_methylable)
792
794 """Implement informations about methylation sensitibility.
795
796 Enzymes of this class are not sensible to methylation."""
797
799 """RE.is_methylable() -> bool.
800
801 True if the recognition site is a methylable."""
802 return False
803 is_methylable = classmethod(is_methylable)
804
806 """Implement the methods specific to the enzymes which are palindromic
807
808 palindromic means : the recognition site and its reverse complement are
809 identical.
810 Remarks : an enzyme with a site CGNNCG is palindromic even if some
811 of the sites that it will recognise are not.
812 for example here : CGAACG
813
814 Internal use only. Not meant to be instantiated."""
815
817 """RE._search() -> list.
818
819 for internal use only.
820
821 implement the search method for palindromic and non palindromic enzyme.
822 """
823 siteloc = self.dna.finditer(self.compsite,self.size)
824 self.results = [r for s,g in siteloc for r in self._modify(s)]
825 if self.results : self._drop()
826 return self.results
827 _search = classmethod(_search)
828
830 """RE.is_palindromic() -> bool.
831
832 True if the recognition site is a palindrom."""
833 return True
834 is_palindromic = classmethod(is_palindromic)
835
836
838 """Implement the methods specific to the enzymes which are not palindromic
839
840 palindromic means : the recognition site and its reverse complement are
841 identical.
842
843 Internal use only. Not meant to be instantiated."""
844
846 """RE._search() -> list.
847
848 for internal use only.
849
850 implement the search method for palindromic and non palindromic enzyme.
851 """
852 iterator = self.dna.finditer(self.compsite, self.size)
853 self.results = []
854 modif = self._modify
855 revmodif = self._rev_modify
856 s = str(self)
857 self.on_minus = []
858 for start, group in iterator:
859 if group(s):
860 self.results += [r for r in modif(start)]
861 else:
862 self.on_minus += [r for r in revmodif(start)]
863 self.results += self.on_minus
864 if self.results:
865 self.results.sort()
866 self._drop()
867 return self.results
868 _search = classmethod(_search)
869
871 """RE.is_palindromic() -> bool.
872
873 True if the recognition site is a palindrom."""
874 return False
875 is_palindromic = classmethod(is_palindromic)
876
878 """Implement the methods specific to the enzymes for which the overhang
879 is unknown.
880
881 These enzymes are also NotDefined and NoCut.
882
883 Internal use only. Not meant to be instantiated."""
884
886 """RE.catalyse(dna, linear=True) -> tuple of DNA.
887 RE.catalyze(dna, linear=True) -> tuple of DNA.
888
889 return a tuple of dna as will be produced by using RE to restrict the
890 dna.
891
892 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
893
894 if linear is False, the sequence is considered to be circular and the
895 output will be modified accordingly."""
896 raise NotImplementedError('%s restriction is unknown.' \
897 % self.__name__)
898 catalyze = catalyse = classmethod(catalyse)
899
901 """RE.is_blunt() -> bool.
902
903 True if the enzyme produces blunt end.
904
905 see also:
906 RE.is_3overhang()
907 RE.is_5overhang()
908 RE.is_unknown()"""
909 return False
910 is_blunt = classmethod(is_blunt)
911
913 """RE.is_5overhang() -> bool.
914
915 True if the enzyme produces 5' overhang sticky end.
916
917 see also:
918 RE.is_3overhang()
919 RE.is_blunt()
920 RE.is_unknown()"""
921 return False
922 is_5overhang = classmethod(is_5overhang)
923
925 """RE.is_3overhang() -> bool.
926
927 True if the enzyme produces 3' overhang sticky end.
928
929 see also:
930 RE.is_5overhang()
931 RE.is_blunt()
932 RE.is_unknown()"""
933 return False
934 is_3overhang = classmethod(is_3overhang)
935
937 """RE.overhang() -> str. type of overhang of the enzyme.,
938
939 can be "3' overhang", "5' overhang", "blunt", "unknown" """
940 return 'unknown'
941 overhang = classmethod(overhang)
942
944 """RE.compatible_end() -> list.
945
946 list of all the enzymes that share compatible end with RE."""
947 return []
948 compatible_end = classmethod(compatible_end)
949
951 """RE._mod1(other) -> bool.
952
953 for internal use only
954
955 test for the compatibility of restriction ending of RE and other."""
956 return False
957 _mod1 = classmethod(_mod1)
958
959 -class Blunt(AbstractCut):
960 """Implement the methods specific to the enzymes for which the overhang
961 is blunt.
962
963 The enzyme cuts the + strand and the - strand of the DNA at the same
964 place.
965
966 Internal use only. Not meant to be instantiated."""
967
969 """RE.catalyse(dna, linear=True) -> tuple of DNA.
970 RE.catalyze(dna, linear=True) -> tuple of DNA.
971
972 return a tuple of dna as will be produced by using RE to restrict the
973 dna.
974
975 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
976
977 if linear is False, the sequence is considered to be circular and the
978 output will be modified accordingly."""
979 r = self.search(dna, linear)
980 d = self.dna
981 if not r : return d[1:],
982 fragments = []
983 length = len(r)-1
984 if d.is_linear():
985
986
987
988 fragments.append(d[1:r[0]])
989 if length:
990
991
992
993 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
994
995
996
997 fragments.append(d[r[-1]:])
998 else:
999
1000
1001
1002 fragments.append(d[r[-1]:]+d[1:r[0]])
1003 if not length:
1004
1005
1006
1007 return tuple(fragments)
1008
1009
1010
1011 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1012 return tuple(fragments)
1013 catalyze = catalyse = classmethod(catalyse)
1014
1016 """RE.is_blunt() -> bool.
1017
1018 True if the enzyme produces blunt end.
1019
1020 see also:
1021 RE.is_3overhang()
1022 RE.is_5overhang()
1023 RE.is_unknown()"""
1024 return True
1025 is_blunt = classmethod(is_blunt)
1026
1028 """RE.is_5overhang() -> bool.
1029
1030 True if the enzyme produces 5' overhang sticky end.
1031
1032 see also:
1033 RE.is_3overhang()
1034 RE.is_blunt()
1035 RE.is_unknown()"""
1036 return False
1037 is_5overhang = classmethod(is_5overhang)
1038
1040 """RE.is_3overhang() -> bool.
1041
1042 True if the enzyme produces 3' overhang sticky end.
1043
1044 see also:
1045 RE.is_5overhang()
1046 RE.is_blunt()
1047 RE.is_unknown()"""
1048 return False
1049 is_3overhang = classmethod(is_3overhang)
1050
1052 """RE.overhang() -> str. type of overhang of the enzyme.,
1053
1054 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1055 return 'blunt'
1056 overhang = classmethod(overhang)
1057
1059 """RE.compatible_end() -> list.
1060
1061 list of all the enzymes that share compatible end with RE."""
1062 if not batch : batch = AllEnzymes
1063 r = [x for x in iter(AllEnzymes) if x.is_blunt()]
1064 r.sort()
1065 return r
1066 compatible_end = classmethod(compatible_end)
1067
1069 """RE._mod1(other) -> bool.
1070
1071 for internal use only
1072
1073 test for the compatibility of restriction ending of RE and other."""
1074 if issubclass(other, Blunt) : return True
1075 else : return False
1076 _mod1 = staticmethod(_mod1)
1077
1078 -class Ov5(AbstractCut):
1079 """Implement the methods specific to the enzymes for which the overhang
1080 is recessed in 3'.
1081
1082 The enzyme cuts the + strand after the - strand of the DNA.
1083
1084 Internal use only. Not meant to be instantiated."""
1085
1087 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1088 RE.catalyze(dna, linear=True) -> tuple of DNA.
1089
1090 return a tuple of dna as will be produced by using RE to restrict the
1091 dna.
1092
1093 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1094
1095 if linear is False, the sequence is considered to be circular and the
1096 output will be modified accordingly."""
1097 r = self.search(dna, linear)
1098 d = self.dna
1099 if not r : return d[1:],
1100 length = len(r)-1
1101 fragments = []
1102 if d.is_linear():
1103
1104
1105
1106 fragments.append(d[1:r[0]])
1107 if length:
1108
1109
1110
1111 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1112
1113
1114
1115 fragments.append(d[r[-1]:])
1116 else:
1117
1118
1119
1120 fragments.append(d[r[-1]:]+d[1:r[0]])
1121 if not length:
1122
1123
1124
1125 return tuple(fragments)
1126
1127
1128
1129 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1130 return tuple(fragments)
1131 catalyze = catalyse = classmethod(catalyse)
1132
1134 """RE.is_blunt() -> bool.
1135
1136 True if the enzyme produces blunt end.
1137
1138 see also:
1139 RE.is_3overhang()
1140 RE.is_5overhang()
1141 RE.is_unknown()"""
1142 return False
1143 is_blunt = classmethod(is_blunt)
1144
1146 """RE.is_5overhang() -> bool.
1147
1148 True if the enzyme produces 5' overhang sticky end.
1149
1150 see also:
1151 RE.is_3overhang()
1152 RE.is_blunt()
1153 RE.is_unknown()"""
1154 return True
1155 is_5overhang = classmethod(is_5overhang)
1156
1158 """RE.is_3overhang() -> bool.
1159
1160 True if the enzyme produces 3' overhang sticky end.
1161
1162 see also:
1163 RE.is_5overhang()
1164 RE.is_blunt()
1165 RE.is_unknown()"""
1166 return False
1167 is_3overhang = classmethod(is_3overhang)
1168
1170 """RE.overhang() -> str. type of overhang of the enzyme.,
1171
1172 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1173 return "5' overhang"
1174 overhang = classmethod(overhang)
1175
1177 """RE.compatible_end() -> list.
1178
1179 list of all the enzymes that share compatible end with RE."""
1180 if not batch : batch = AllEnzymes
1181 r = [x for x in iter(AllEnzymes) if x.is_5overhang() and x % self]
1182 r.sort()
1183 return r
1184 compatible_end = classmethod(compatible_end)
1185
1186 - def _mod1(self, other):
1187 """RE._mod1(other) -> bool.
1188
1189 for internal use only
1190
1191 test for the compatibility of restriction ending of RE and other."""
1192 if issubclass(other, Ov5) : return self._mod2(other)
1193 else : return False
1194 _mod1 = classmethod(_mod1)
1195
1196
1197 -class Ov3(AbstractCut):
1198 """Implement the methods specific to the enzymes for which the overhang
1199 is recessed in 5'.
1200
1201 The enzyme cuts the - strand after the + strand of the DNA.
1202
1203 Internal use only. Not meant to be instantiated."""
1204
1206 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1207 RE.catalyze(dna, linear=True) -> tuple of DNA.
1208
1209 return a tuple of dna as will be produced by using RE to restrict the
1210 dna.
1211
1212 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1213
1214 if linear is False, the sequence is considered to be circular and the
1215 output will be modified accordingly."""
1216 r = self.search(dna, linear)
1217 d = self.dna
1218 if not r : return d[1:],
1219 fragments = []
1220 length = len(r)-1
1221 if d.is_linear():
1222
1223
1224
1225 fragments.append(d[1:r[0]])
1226 if length:
1227
1228
1229
1230 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1231
1232
1233
1234 fragments.append(d[r[-1]:])
1235 else:
1236
1237
1238
1239 fragments.append(d[r[-1]:]+d[1:r[0]])
1240 if not length:
1241
1242
1243
1244 return tuple(fragments)
1245
1246
1247
1248 fragments += [d[r[x]:r[x+1]] for x in xrange(length)]
1249 return tuple(fragments)
1250 catalyze = catalyse = classmethod(catalyse)
1251
1253 """RE.is_blunt() -> bool.
1254
1255 True if the enzyme produces blunt end.
1256
1257 see also:
1258 RE.is_3overhang()
1259 RE.is_5overhang()
1260 RE.is_unknown()"""
1261 return False
1262 is_blunt = classmethod(is_blunt)
1263
1265 """RE.is_5overhang() -> bool.
1266
1267 True if the enzyme produces 5' overhang sticky end.
1268
1269 see also:
1270 RE.is_3overhang()
1271 RE.is_blunt()
1272 RE.is_unknown()"""
1273 return False
1274 is_5overhang = classmethod(is_5overhang)
1275
1277 """RE.is_3overhang() -> bool.
1278
1279 True if the enzyme produces 3' overhang sticky end.
1280
1281 see also:
1282 RE.is_5overhang()
1283 RE.is_blunt()
1284 RE.is_unknown()"""
1285 return True
1286 is_3overhang = classmethod(is_3overhang)
1287
1289 """RE.overhang() -> str. type of overhang of the enzyme.,
1290
1291 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1292 return "3' overhang"
1293 overhang = classmethod(overhang)
1294
1296 """RE.compatible_end() -> list.
1297
1298 list of all the enzymes that share compatible end with RE."""
1299 if not batch : batch = AllEnzymes
1300 r = [x for x in iter(AllEnzymes) if x.is_3overhang() and x % self]
1301 r.sort()
1302 return r
1303 compatible_end = classmethod(compatible_end)
1304
1305 - def _mod1(self, other):
1306 """RE._mod1(other) -> bool.
1307
1308 for internal use only
1309
1310 test for the compatibility of restriction ending of RE and other."""
1311
1312
1313
1314 if issubclass(other, Ov3) : return self._mod2(other)
1315 else : return False
1316 _mod1 = classmethod(_mod1)
1317
1318
1320 """Implement the methods specific to the enzymes for which the overhang
1321 and the cut are not variable.
1322
1323 Typical example : EcoRI -> G^AATT_C
1324 The overhang will always be AATT
1325 Notes:
1326 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1327 There overhang is always the same : blunt!
1328
1329 Internal use only. Not meant to be instantiated."""
1330
1364 _drop = classmethod(_drop)
1365
1367 """RE.is_defined() -> bool.
1368
1369 True if the sequence recognised and cut is constant,
1370 i.e. the recognition site is not degenerated AND the enzyme cut inside
1371 the site.
1372
1373 see also:
1374 RE.is_ambiguous()
1375 RE.is_unknown()"""
1376 return True
1377 is_defined = classmethod(is_defined)
1378
1380 """RE.is_ambiguous() -> bool.
1381
1382 True if the sequence recognised and cut is ambiguous,
1383 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1384 the site.
1385
1386 see also:
1387 RE.is_defined()
1388 RE.is_unknown()"""
1389 return False
1390 is_ambiguous = classmethod(is_ambiguous)
1391
1393 """RE.is_unknown() -> bool.
1394
1395 True if the sequence is unknown,
1396 i.e. the recognition site has not been characterised yet.
1397
1398 see also:
1399 RE.is_defined()
1400 RE.is_ambiguous()"""
1401 return False
1402 is_unknown = classmethod(is_unknown)
1403
1405 """RE.elucidate() -> str
1406
1407 return a representation of the site with the cut on the (+) strand
1408 represented as '^' and the cut on the (-) strand as '_'.
1409 ie:
1410 >>> EcoRI.elucidate() # 5' overhang
1411 'G^AATT_C'
1412 >>> KpnI.elucidate() # 3' overhang
1413 'G_GTAC^C'
1414 >>> EcoRV.elucidate() # blunt
1415 'GAT^_ATC'
1416 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1417 '? GTATAC ?'
1418 >>>
1419 """
1420 f5 = self.fst5
1421 f3 = self.fst3
1422 site = self.site
1423 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.'
1424 elif self.is_5overhang():
1425 if f5 == f3 == 0 : re = 'N^'+ self.site + '_N'
1426 elif f3 == 0 : re = site[:f5] + '^' + site[f5:] + '_N'
1427 else : re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1428 elif self.is_blunt():
1429 re = site[:f5] + '^_' + site[f5:]
1430 else:
1431 if f5 == f3 == 0 : re = 'N_'+ site + '^N'
1432 else : re = site[:f3] + '_' + site[f3:f5] +'^'+ site[f5:]
1433 return re
1434 elucidate = classmethod(elucidate)
1435
1436 - def _mod2(self, other):
1437 """RE._mod2(other) -> bool.
1438
1439 for internal use only
1440
1441 test for the compatibility of restriction ending of RE and other."""
1442
1443
1444
1445 if other.ovhgseq == self.ovhgseq:
1446 return True
1447 elif issubclass(other, Ambiguous):
1448 return other._mod2(self)
1449 else:
1450 return False
1451 _mod2 = classmethod(_mod2)
1452
1453
1455 """Implement the methods specific to the enzymes for which the overhang
1456 is variable.
1457
1458 Typical example : BstXI -> CCAN_NNNN^NTGG
1459 The overhang can be any sequence of 4 bases.
1460 Notes:
1461 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1462 There overhang is always the same : blunt!
1463
1464 Internal use only. Not meant to be instantiated."""
1465
1491 _drop = classmethod(_drop)
1492
1494 """RE.is_defined() -> bool.
1495
1496 True if the sequence recognised and cut is constant,
1497 i.e. the recognition site is not degenerated AND the enzyme cut inside
1498 the site.
1499
1500 see also:
1501 RE.is_ambiguous()
1502 RE.is_unknown()"""
1503 return False
1504 is_defined = classmethod(is_defined)
1505
1507 """RE.is_ambiguous() -> bool.
1508
1509 True if the sequence recognised and cut is ambiguous,
1510 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1511 the site.
1512
1513
1514 see also:
1515 RE.is_defined()
1516 RE.is_unknown()"""
1517 return True
1518 is_ambiguous = classmethod(is_ambiguous)
1519
1521 """RE.is_unknown() -> bool.
1522
1523 True if the sequence is unknown,
1524 i.e. the recognition site has not been characterised yet.
1525
1526 see also:
1527 RE.is_defined()
1528 RE.is_ambiguous()"""
1529 return False
1530 is_unknown = classmethod(is_unknown)
1531
1532 - def _mod2(self, other):
1533 """RE._mod2(other) -> bool.
1534
1535 for internal use only
1536
1537 test for the compatibility of restriction ending of RE and other."""
1538
1539
1540
1541 if len(self.ovhgseq) != len(other.ovhgseq):
1542 return False
1543 else:
1544 se = self.ovhgseq
1545 for base in se:
1546 if base in 'ATCG':
1547 pass
1548 if base in 'N':
1549 se = '.'.join(se.split('N'))
1550 if base in 'RYWMSKHDBV':
1551 expand = '['+ matching[base] + ']'
1552 se = expand.join(se.split(base))
1553 if re.match(se, other.ovhgseq):
1554 return True
1555 else:
1556 return False
1557 _mod2 = classmethod(_mod2)
1558
1560 """RE.elucidate() -> str
1561
1562 return a representation of the site with the cut on the (+) strand
1563 represented as '^' and the cut on the (-) strand as '_'.
1564 ie:
1565 >>> EcoRI.elucidate() # 5' overhang
1566 'G^AATT_C'
1567 >>> KpnI.elucidate() # 3' overhang
1568 'G_GTAC^C'
1569 >>> EcoRV.elucidate() # blunt
1570 'GAT^_ATC'
1571 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1572 '? GTATAC ?'
1573 >>>
1574 """
1575 f5 = self.fst5
1576 f3 = self.fst3
1577 length = len(self)
1578 site = self.site
1579 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.'
1580 elif self.is_5overhang():
1581 if f3 == f5 == 0:
1582 re = 'N^' + site +'_N'
1583 elif 0 <= f5 <= length and 0 <= f3+length <= length:
1584 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1585 elif 0 <= f5 <= length:
1586 re = site[:f5] + '^' + site[f5:] + f3*'N' + '_N'
1587 elif 0 <= f3+length <= length:
1588 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:]
1589 elif f3+length < 0:
1590 re = 'N^'*abs(f5)*'N' + '_' + abs(length+f3)*'N' + site
1591 elif f5 > length:
1592 re = site + (f5-length)*'N'+'^'+(length+f3-f5)*'N'+'_N'
1593 else:
1594 re = 'N^' + abs(f5) * 'N' + site + f3*'N' + '_N'
1595 elif self.is_blunt():
1596 if f5 < 0:
1597 re = 'N^_' + abs(f5)*'N' + site
1598 elif f5 > length:
1599 re = site + (f5-length)*'N' + '^_N'
1600 else:
1601 raise ValueError('%s.easyrepr() : error f5=%i' \
1602 % (self.name,f5))
1603 else:
1604 if f3 == 0:
1605 if f5 == 0 : re = 'N_' + site + '^N'
1606 else : re = site + '_' + (f5-length)*'N' + '^N'
1607 elif 0 < f3+length <= length and 0 <= f5 <= length:
1608 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1609 elif 0 < f3+length <= length:
1610 re = site[:f3] + '_' + site[f3:] + (f5-length)*'N' + '^N'
1611 elif 0 <= f5 <= length:
1612 re = 'N_' +'N'*(f3+length) + site[:f5] + '^' + site[f5:]
1613 elif f3 > 0:
1614 re = site + f3*'N' + '_' + (f5-f3-length)*'N' + '^N'
1615 elif f5 < 0:
1616 re = 'N_' + abs(f3-f5+length)*'N' + '^' + abs(f5)*'N' + site
1617 else:
1618 re = 'N_' + abs(f3+length)*'N' + site + (f5-length)*'N' + '^N'
1619 return re
1620 elucidate = classmethod(elucidate)
1621
1622
1624 """Implement the methods specific to the enzymes for which the overhang
1625 is not characterised.
1626
1627 Correspond to NoCut and Unknown.
1628
1629 Internal use only. Not meant to be instantiated."""
1630
1653 _drop = classmethod(_drop)
1654
1656 """RE.is_defined() -> bool.
1657
1658 True if the sequence recognised and cut is constant,
1659 i.e. the recognition site is not degenerated AND the enzyme cut inside
1660 the site.
1661
1662 see also:
1663 RE.is_ambiguous()
1664 RE.is_unknown()"""
1665 return False
1666 is_defined = classmethod(is_defined)
1667
1669 """RE.is_ambiguous() -> bool.
1670
1671 True if the sequence recognised and cut is ambiguous,
1672 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1673 the site.
1674
1675
1676 see also:
1677 RE.is_defined()
1678 RE.is_unknown()"""
1679 return False
1680 is_ambiguous = classmethod(is_ambiguous)
1681
1683 """RE.is_unknown() -> bool.
1684
1685 True if the sequence is unknown,
1686 i.e. the recognition site has not been characterised yet.
1687
1688 see also:
1689 RE.is_defined()
1690 RE.is_ambiguous()"""
1691 return True
1692 is_unknown = classmethod(is_unknown)
1693
1694 - def _mod2(self, other):
1695 """RE._mod2(other) -> bool.
1696
1697 for internal use only
1698
1699 test for the compatibility of restriction ending of RE and other."""
1700
1701
1702
1703
1704
1705
1706 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" \
1707 % (str(self), str(other), str(self)))
1708 _mod2 = classmethod(_mod2)
1709
1711 """RE.elucidate() -> str
1712
1713 return a representation of the site with the cut on the (+) strand
1714 represented as '^' and the cut on the (-) strand as '_'.
1715 ie:
1716 >>> EcoRI.elucidate() # 5' overhang
1717 'G^AATT_C'
1718 >>> KpnI.elucidate() # 3' overhang
1719 'G_GTAC^C'
1720 >>> EcoRV.elucidate() # blunt
1721 'GAT^_ATC'
1722 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1723 '? GTATAC ?'
1724 >>>
1725 """
1726 return '? %s ?' % self.site
1727 elucidate = classmethod(elucidate)
1728
1729
1731
1732
1733
1734
1735 """Implement the methods specific to the enzymes which are commercially
1736 available.
1737
1738 Internal use only. Not meant to be instantiated."""
1739
1741 """RE.suppliers() -> print the suppliers of RE."""
1742 supply = suppliers_dict.items()
1743 for k,v in supply:
1744 if k in self.suppl:
1745 print v[0]+','
1746 return
1747 suppliers = classmethod(suppliers)
1748
1750 """RE.supplier_list() -> list.
1751
1752 list of the supplier names for RE."""
1753 return [v[0] for k,v in suppliers_dict.items() if k in self.suppl]
1754 supplier_list = classmethod(supplier_list)
1755
1757 """RE.buffers(supplier) -> string.
1758
1759 not implemented yet."""
1760 return
1761 buffers = classmethod(buffers)
1762
1764 """RE.iscomm() -> bool.
1765
1766 True if RE has suppliers."""
1767 return True
1768 is_comm = classmethod(is_comm)
1769
1770
1772 """Implement the methods specific to the enzymes which are not commercially
1773 available.
1774
1775 Internal use only. Not meant to be instantiated."""
1776
1778 """RE.suppliers() -> print the suppliers of RE."""
1779 return None
1780 suppliers = staticmethod(suppliers)
1781
1783 """RE.supplier_list() -> list.
1784
1785 list of the supplier names for RE."""
1786 return []
1787 supplier_list = classmethod(supplier_list)
1788
1790 """RE.buffers(supplier) -> string.
1791
1792 not implemented yet."""
1793 raise TypeError("Enzyme not commercially available.")
1794 buffers = classmethod(buffers)
1795
1797 """RE.iscomm() -> bool.
1798
1799 True if RE has suppliers."""
1800 return False
1801 is_comm = classmethod(is_comm)
1802
1803
1804
1805
1806
1807
1808
1809
1810
1812
1813 - def __init__(self, first=[], suppliers=[]):
1814 """RestrictionBatch([sequence]) -> new RestrictionBatch."""
1815 first = [self.format(x) for x in first]
1816 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]]
1817 set.__init__(self, first)
1818 self.mapping = dict.fromkeys(self)
1819 self.already_mapped = None
1820
1822 if len(self) < 5:
1823 return '+'.join(self.elements())
1824 else:
1825 return '...'.join(('+'.join(self.elements()[:2]),\
1826 '+'.join(self.elements()[-2:])))
1827
1829 return 'RestrictionBatch(%s)' % self.elements()
1830
1837
1840
1843
1844 - def get(self, enzyme, add=False):
1845 """B.get(enzyme[, add]) -> enzyme class.
1846
1847 if add is True and enzyme is not in B add enzyme to B.
1848 if add is False (which is the default) only return enzyme.
1849 if enzyme is not a RestrictionType or can not be evaluated to
1850 a RestrictionType, raise a ValueError."""
1851 e = self.format(enzyme)
1852 if e in self:
1853 return e
1854 elif add:
1855 self.add(e)
1856 return e
1857 else:
1858 raise ValueError('enzyme %s is not in RestrictionBatch' \
1859 % e.__name__)
1860
1862 """B.lambdasplit(func) -> RestrictionBatch .
1863
1864 the new batch will contains only the enzymes for which
1865 func return True."""
1866 d = [x for x in itertools.ifilter(func, self)]
1867 new = RestrictionBatch()
1868 new._data = dict(zip(d, [True]*len(d)))
1869 return new
1870
1872 """B.add_supplier(letter) -> add a new set of enzyme to B.
1873
1874 letter represents the suppliers as defined in the dictionary
1875 RestrictionDictionary.suppliers
1876 return None.
1877 raise a KeyError if letter is not a supplier code."""
1878 supplier = suppliers_dict[letter]
1879 self.suppliers.append(letter)
1880 for x in supplier[1]:
1881 self.add_nocheck(eval(x))
1882 return
1883
1885 """B.current_suppliers() -> add a new set of enzyme to B.
1886
1887 return a sorted list of the suppliers which have been used to
1888 create the batch."""
1889 suppl_list = [suppliers_dict[x][0] for x in self.suppliers]
1890 suppl_list.sort()
1891 return suppl_list
1892
1894 """ b += other -> add other to b, check the type of other."""
1895 self.add(other)
1896 return self
1897
1899 """ b + other -> new RestrictionBatch."""
1900 new = self.__class__(self)
1901 new.add(other)
1902 return new
1903
1905 """B.remove(other) -> remove other from B if other is a RestrictionType.
1906
1907 Safe set.remove method. Verify that other is a RestrictionType or can be
1908 evaluated to a RestrictionType.
1909 raise a ValueError if other can not be evaluated to a RestrictionType.
1910 raise a KeyError if other is not in B."""
1911 return set.remove(self, self.format(other))
1912
1913 - def add(self, other):
1914 """B.add(other) -> add other to B if other is a RestrictionType.
1915
1916 Safe set.add method. Verify that other is a RestrictionType or can be
1917 evaluated to a RestrictionType.
1918 raise a ValueError if other can not be evaluated to a RestrictionType.
1919 """
1920 return set.add(self, self.format(other))
1921
1923 """B.add_nocheck(other) -> add other to B. don't check type of other.
1924 """
1925 return set.add(self, other)
1926
1944
1945
1947 """B.is_restriction(y) -> bool.
1948
1949 True is y or eval(y) is a RestrictionType."""
1950 return isinstance(y, RestrictionType) or \
1951 isinstance(eval(str(y)), RestrictionType)
1952
1953 - def split(self, *classes, **bool):
1954 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch.
1955
1956 it works but it is slow, so it has really an interest when splitting
1957 over multiple conditions."""
1958 def splittest(element):
1959 for klass in classes:
1960 b = bool.get(klass.__name__, True)
1961 if issubclass(element, klass):
1962 if b:
1963 continue
1964 else:
1965 return False
1966 elif b:
1967 return False
1968 else:
1969 continue
1970 return True
1971 d = [k for k in itertools.ifilter(splittest, self)]
1972 new = RestrictionBatch()
1973 new._data = dict(zip(d, [True]*len(d)))
1974 return new
1975
1977 """B.elements() -> tuple.
1978
1979 give all the names of the enzymes in B sorted alphabetically."""
1980 l = [str(e) for e in self]
1981 l.sort()
1982 return l
1983
1985 """B.as_string() -> list.
1986
1987 return a list of the name of the elements of B."""
1988 return [str(e) for e in self]
1989
1991 """B.suppl_codes() -> dict
1992
1993 letter code for the suppliers"""
1994 supply = dict([(k,v[0]) for k,v in suppliers_dict.iteritems()])
1995 return supply
1996 suppl_codes = classmethod(suppl_codes)
1997
1999 "B.show_codes() -> letter codes for the suppliers"""
2000 supply = [' = '.join(i) for i in self.suppl_codes().iteritems()]
2001 print '\n'.join(supply)
2002 return
2003 show_codes = classmethod(show_codes)
2004
2005 - def search(self, dna, linear=True):
2006 """B.search(dna) -> dict."""
2007
2008
2009
2010
2011 if not hasattr(self, "already_mapped") :
2012
2013
2014 self.already_mapped = None
2015 if isinstance(dna, DNA):
2016
2017
2018
2019
2020 if (str(dna), linear) == self.already_mapped:
2021 return self.mapping
2022 else:
2023 self.already_mapped = str(dna), linear
2024 fseq = FormattedSeq(dna, linear)
2025 self.mapping = dict([(x, x.search(fseq)) for x in self])
2026 return self.mapping
2027 elif isinstance(dna, FormattedSeq):
2028 if (str(dna), dna.linear) == self.already_mapped:
2029 return self.mapping
2030 else:
2031 self.already_mapped = str(dna), dna.linear
2032 self.mapping = dict([(x, x.search(dna)) for x in self])
2033 return self.mapping
2034 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"\
2035 %type(dna))
2036
2037
2038
2039
2040
2041
2042
2043 -class Analysis(RestrictionBatch, PrintFormat):
2044
2047 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class.
2048
2049 For most of the method of this class if a dictionary is given it will
2050 be used as the base to calculate the results.
2051 If no dictionary is given a new analysis using the Restriction Batch
2052 which has been given when the Analysis class has been instantiated."""
2053 RestrictionBatch.__init__(self, restrictionbatch)
2054 self.rb = restrictionbatch
2055 self.sequence = sequence
2056 self.linear = linear
2057 if self.sequence:
2058 self.search(self.sequence, self.linear)
2059
2061 return 'Analysis(%s,%s,%s)'%\
2062 (repr(self.rb),repr(self.sequence),self.linear)
2063
2065 """A._sub_set(other_set) -> dict.
2066
2067 Internal use only.
2068
2069 screen the results through wanted set.
2070 Keep only the results for which the enzymes is in wanted set.
2071 """
2072 return dict([(k,v) for k,v in self.mapping.iteritems() if k in wanted])
2073
2075 """A._boundaries(start, end) -> tuple.
2076
2077 Format the boundaries for use with the methods that limit the
2078 search to only part of the sequence given to analyse.
2079 """
2080 if not isinstance(start, int):
2081 raise TypeError('expected int, got %s instead' % type(start))
2082 if not isinstance(end, int):
2083 raise TypeError('expected int, got %s instead' % type(end))
2084 if start < 1:
2085 start += len(self.sequence)
2086 if end < 1:
2087 end += len(self.sequence)
2088 if start < end:
2089 pass
2090 else:
2091 start, end == end, start
2092 if start < 1:
2093 start == 1
2094 if start < end:
2095 return start, end, self._test_normal
2096 else:
2097 return start, end, self._test_reverse
2098
2100 """A._test_normal(start, end, site) -> bool.
2101
2102 Internal use only
2103 Test if site is in between start and end.
2104 """
2105 return start <= site < end
2106
2108 """A._test_reverse(start, end, site) -> bool.
2109
2110 Internal use only
2111 Test if site is in between end and start (for circular sequences).
2112 """
2113 return start <= site <= len(self.sequence) or 1 <= site < end
2114
2115 - def print_that(self, dct=None, title='', s1=''):
2116 """A.print_that([dct[, title[, s1]]]) -> print the results from dct.
2117
2118 If dct is not given the full dictionary is used.
2119 """
2120 if not dct:
2121 dct = self.mapping
2122 print
2123 return PrintFormat.print_that(self, dct, title, s1)
2124
2126 """A.change(**attribute_name) -> Change attribute of Analysis.
2127
2128 It is possible to change the width of the shell by setting
2129 self.ConsoleWidth to what you want.
2130 self.NameWidth refer to the maximal length of the enzyme name.
2131
2132 Changing one of these parameters here might not give the results
2133 you expect. In which case, you can settle back to a 80 columns shell
2134 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
2135 you get it right."""
2136 for k,v in what.iteritems():
2137 if k in ('NameWidth', 'ConsoleWidth'):
2138 setattr(self, k, v)
2139 self.Cmodulo = self.ConsoleWidth % self.NameWidth
2140 self.PrefWidth = self.ConsoleWidth - self.Cmodulo
2141 elif k is 'sequence':
2142 setattr(self, 'sequence', v)
2143 self.search(self.sequence, self.linear)
2144 elif k is 'rb':
2145 self = Analysis.__init__(self, v, self.sequence, self.linear)
2146 elif k is 'linear':
2147 setattr(self, 'linear', v)
2148 self.search(self.sequence, v)
2149 elif k in ('Indent', 'Maxsize'):
2150 setattr(self, k, v)
2151 elif k in ('Cmodulo', 'PrefWidth'):
2152 raise AttributeError( \
2153 'To change %s, change NameWidth and/or ConsoleWidth' \
2154 % name)
2155 else:
2156 raise AttributeError( \
2157 'Analysis has no attribute %s' % name)
2158 return
2159
2160 - def full(self, linear=True):
2161 """A.full() -> dict.
2162
2163 Full Restriction Map of the sequence."""
2164 return self.mapping
2165
2166 - def blunt(self, dct = None):
2167 """A.blunt([dct]) -> dict.
2168
2169 Only the enzymes which have a 3'overhang restriction site."""
2170 if not dct:
2171 dct = self.mapping
2172 return dict([(k,v) for k,v in dct.iteritems() if k.is_blunt()])
2173
2175 """A.overhang5([dct]) -> dict.
2176
2177 Only the enzymes which have a 5' overhang restriction site."""
2178 if not dct:
2179 dct = self.mapping
2180 return dict([(k,v) for k,v in dct.iteritems() if k.is_5overhang()])
2181
2182
2184 """A.Overhang3([dct]) -> dict.
2185
2186 Only the enzymes which have a 3'overhang restriction site."""
2187 if not dct:
2188 dct = self.mapping
2189 return dict([(k,v) for k,v in dct.iteritems() if k.is_3overhang()])
2190
2191
2193 """A.defined([dct]) -> dict.
2194
2195 Only the enzymes that have a defined restriction site in Rebase."""
2196 if not dct:
2197 dct = self.mapping
2198 return dict([(k,v) for k,v in dct.iteritems() if k.is_defined()])
2199
2201 """A.with_sites([dct]) -> dict.
2202
2203 Enzymes which have at least one site in the sequence."""
2204 if not dct:
2205 dct = self.mapping
2206 return dict([(k,v) for k,v in dct.iteritems() if v])
2207
2209 """A.without_site([dct]) -> dict.
2210
2211 Enzymes which have no site in the sequence."""
2212 if not dct:
2213 dct = self.mapping
2214 return dict([(k,v) for k,v in dct.iteritems() if not v])
2215
2217 """A.With_N_Sites(N [, dct]) -> dict.
2218
2219 Enzymes which cut N times the sequence."""
2220 if not dct:
2221 dct = self.mapping
2222 return dict([(k,v) for k,v in dct.iteritems()if len(v) == N])
2223
2225 if not dct:
2226 dct = self.mapping
2227 return dict([(k,v) for k,v in dct.iteritems() if len(v) in list])
2228
2230 """A.with_name(list_of_names [, dct]) ->
2231
2232 Limit the search to the enzymes named in list_of_names."""
2233 for i, enzyme in enumerate(names):
2234 if not enzyme in AllEnzymes:
2235 print "no datas for the enzyme:", str(name)
2236 del names[i]
2237 if not dct:
2238 return RestrictionBatch(names).search(self.sequence)
2239 return dict([(n, dct[n]) for n in names if n in dct])
2240
2242 """A.with_site_size(site_size [, dct]) ->
2243
2244 Limit the search to the enzymes whose site is of size <site_size>."""
2245 sites = [name for name in self if name.size == site_size]
2246 if not dct:
2247 return RestrictionBatch(sites).search(self.sequence)
2248 return dict([(k,v) for k,v in dct.iteritems() if k in site_size])
2249
2251 """A.only_between(start, end[, dct]) -> dict.
2252
2253 Enzymes that cut the sequence only in between start and end."""
2254 start, end, test = self._boundaries(start, end)
2255 if not dct:
2256 dct = self.mapping
2257 d = dict(dct)
2258 for key, sites in dct.iteritems():
2259 if not sites:
2260 del d[key]
2261 continue
2262 for site in sites:
2263 if test(start, end, site):
2264 continue
2265 else:
2266 del d[key]
2267 break
2268 return d
2269
2270 - def between(self, start, end, dct=None):
2271 """A.between(start, end [, dct]) -> dict.
2272
2273 Enzymes that cut the sequence at least in between start and end.
2274 They may cut outside as well."""
2275 start, end, test = self._boundaries(start, end)
2276 d = {}
2277 if not dct:
2278 dct = self.mapping
2279 for key, sites in dct.iteritems():
2280 for site in sites:
2281 if test(start, end, site):
2282 d[key] = sites
2283 break
2284 continue
2285 return d
2286
2288 """A.show_only_between(start, end [, dct]) -> dict.
2289
2290 Enzymes that cut the sequence outside of the region
2291 in between start and end but do not cut inside."""
2292 d = []
2293 if start <= end:
2294 d = [(k, [vv for vv in v if start<=vv<=end])
2295 for v in self.between(start, end, dct)]
2296 else:
2297 d = [(k, [vv for vv in v if start<=vv or vv <= end])
2298 for v in self.between(start, end, dct)]
2299 return dict(d)
2300
2302 """A.only_outside(start, end [, dct]) -> dict.
2303
2304 Enzymes that cut the sequence outside of the region
2305 in between start and end but do not cut inside."""
2306 start, end, test = self._boundaries(start, end)
2307 if not dct : dct = self.mapping
2308 d = dict(dct)
2309 for key, sites in dct.iteritems():
2310 if not sites:
2311 del d[key]
2312 continue
2313 for site in sites:
2314 if test(start, end, site):
2315 del d[key]
2316 break
2317 else:
2318 continue
2319 return d
2320
2321 - def outside(self, start, end, dct=None):
2322 """A.outside((start, end [, dct]) -> dict.
2323
2324 Enzymes that cut outside the region in between start and end.
2325 No test is made to know if they cut or not inside this region."""
2326 start, end, test = self._boundaries(start, end)
2327 if not dct:
2328 dct = self.mapping
2329 d = {}
2330 for key, sites in dct.iteritems():
2331 for site in sites:
2332 if test(start, end, site):
2333 continue
2334 else:
2335 d[key] = sites
2336 break
2337 return d
2338
2339
2341 """A.do_not_cut(start, end [, dct]) -> dict.
2342
2343 Enzymes that do not cut the region in between start and end."""
2344 if not dct:
2345 dct = self.mapping
2346 d = self.without_site()
2347 d.update(self.only_outside(start, end, dct))
2348 return d
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372 CommOnly = RestrictionBatch()
2373 NonComm = RestrictionBatch()
2374 for TYPE, (bases, enzymes) in typedict.iteritems():
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392 bases = tuple([eval(x) for x in bases])
2393
2394
2395
2396
2397 T = type.__new__(RestrictionType, 'RestrictionType', bases, {})
2398 for k in enzymes:
2399
2400
2401
2402
2403
2404 newenz = T(k, bases, enzymedict[k])
2405
2406
2407
2408
2409
2410 if newenz.is_comm() : CommOnly.add_nocheck(newenz)
2411 else : NonComm.add_nocheck(newenz)
2412
2413
2414
2415 AllEnzymes = CommOnly | NonComm
2416
2417
2418
2419 names = [str(x) for x in AllEnzymes]
2420 try:
2421 del x
2422 except NameError:
2423
2424 pass
2425 locals().update(dict(zip(names, AllEnzymes)))
2426 __all__=['FormattedSeq', 'Analysis', 'RestrictionBatch','AllEnzymes','CommOnly','NonComm']+names
2427 del k, enzymes, TYPE, bases, names
2428