Package Bio :: Package Restriction :: Module Restriction
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.Restriction

   1  #!/usr/bin/env python 
   2  # 
   3  #      Restriction Analysis Libraries. 
   4  #      Copyright (C) 2004. Frederic Sohm. 
   5  # 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  # 
  10   
  11  """ Notes about the diverses class of the restriction enzyme implementation. 
  12   
  13          RestrictionType is the type of all restriction enzymes. 
  14      ---------------------------------------------------------------------------- 
  15          AbstractCut implements some methods that are common to all enzymes. 
  16      ---------------------------------------------------------------------------- 
  17          NoCut, OneCut,TwoCuts   represent the number of double strand cuts 
  18                                  produced by the enzyme. 
  19                                  they correspond to the 4th field of the rebase 
  20                                  record emboss_e.NNN. 
  21                  0->NoCut    : the enzyme is not characterised. 
  22                  2->OneCut   : the enzyme produce one double strand cut. 
  23                  4->TwoCuts  : two double strand cuts. 
  24      ---------------------------------------------------------------------------- 
  25          Meth_Dep, Meth_Undep    represent the methylation susceptibility to 
  26                                  the enzyme. 
  27                                  Not implemented yet. 
  28      ---------------------------------------------------------------------------- 
  29          Palindromic,            if the site is palindromic or not. 
  30          NotPalindromic          allow some optimisations of the code. 
  31                                  No need to check the reverse strand 
  32                                  with palindromic sites. 
  33      ----------------------------------------------------------------------------                                     
  34          Unknown, Blunt,         represent the overhang. 
  35          Ov5, Ov3                Unknown is here for symetry reasons and 
  36                                  correspond to enzymes that are not characterised 
  37                                  in rebase. 
  38      ---------------------------------------------------------------------------- 
  39          Defined, Ambiguous,     represent the sequence of the overhang. 
  40          NotDefined              
  41                                  NotDefined is for enzymes not characterised in 
  42                                  rebase. 
  43                                   
  44                                  Defined correspond to enzymes that display a 
  45                                  constant overhang whatever the sequence. 
  46                                  ex : EcoRI. G^AATTC -> overhang :AATT 
  47                                              CTTAA^G 
  48   
  49                                  Ambiguous : the overhang varies with the 
  50                                  sequence restricted. 
  51                                  Typically enzymes which cut outside their 
  52                                  restriction site or (but not always) 
  53                                  inside an ambiguous site. 
  54                                  ex: 
  55                                  AcuI CTGAAG(22/20)  -> overhang : NN 
  56                                  AasI GACNNN^NNNGTC  -> overhang : NN 
  57                                       CTGN^NNNNNCAG 
  58   
  59              note : these 3 classes refers to the overhang not the site. 
  60                 So the enzyme ApoI (RAATTY) is defined even if its restriction 
  61                 site is ambiguous. 
  62                                   
  63                      ApoI R^AATTY -> overhang : AATT -> Defined 
  64                           YTTAA^R 
  65                 Accordingly, blunt enzymes are always Defined even 
  66                 when they cut outside their restriction site. 
  67      ---------------------------------------------------------------------------- 
  68          Not_available,          as found in rebase file emboss_r.NNN files. 
  69          Commercially_available 
  70                                  allow the selection of the enzymes according to 
  71                                  their suppliers to reduce the quantity 
  72                                  of results. 
  73                                  Also will allow the implementation of buffer 
  74                                  compatibility tables. Not implemented yet. 
  75   
  76                                  the list of suppliers is extracted from 
  77                                  emboss_s.NNN 
  78      ---------------------------------------------------------------------------- 
  79          """ 
  80   
  81  import re 
  82  import itertools 
  83   
  84  from Bio.Seq import Seq, MutableSeq 
  85  from Bio.Alphabet import IUPAC 
  86   
  87  from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict 
  88  from Bio.Restriction.Restriction_Dictionary import typedict 
  89  from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict 
  90  from Bio.Restriction.RanaConfig import * 
  91  from Bio.Restriction.PrintFormat import PrintFormat 
  92   
  93  #Used to use Bio.Restriction.DNAUtils.check_bases (and expose it under this 
  94  #namespace), but have deprecated that module. 
95 -def _check_bases(seq_string):
96 """Check characters in a string (PRIVATE). 97 98 Remove digits and white space present in string. Allows any valid ambiguous 99 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted). 100 101 Other characters (e.g. symbols) trigger a TypeError. 102 103 Returns the string WITH A LEADING SPACE (!). This is for backwards 104 compatibility, and may in part be explained by the fact that 105 Bio.Restriction doesn't use zero based counting. 106 """ 107 #Remove white space and make upper case: 108 seq_string = "".join(seq_string.split()).upper() 109 #Remove digits 110 for c in "0123456789" : seq_string = seq_string.replace(c,"") 111 #Check only allowed IUPAC letters 112 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")) : 113 raise TypeError("Invalid character found in %s" % repr(seq_string)) 114 return " " + seq_string
115 116 117 matching = {'A' : 'ARWMHVDN', 'C' : 'CYSMHBVN', 'G' : 'GRSKBVDN', 118 'T' : 'TYWKHBDN', 'R' : 'ABDGHKMNSRWV', 'Y' : 'CBDHKMNSTWVY', 119 'W' : 'ABDHKMNRTWVY', 'S' : 'CBDGHKMNSRVY', 'M' : 'ACBDHMNSRWVY', 120 'K' : 'BDGHKNSRTWVY', 'H' : 'ACBDHKMNSRTWVY', 121 'B' : 'CBDGHKMNSRTWVY', 'V' : 'ACBDGHKMNSRWVY', 122 'D' : 'ABDGHKMNSRTWVY', 'N' : 'ACBDGHKMNSRTWVY'} 123 124 DNA = Seq 125
126 -class FormattedSeq(object):
127 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 128 129 Translate a Bio.Seq into a formatted sequence to be used with Restriction. 130 131 Roughly: 132 remove anything which is not IUPAC alphabet and then add a space 133 in front of the sequence to get a biological index instead of a 134 python index (i.e. index of the first base is 1 not 0). 135 136 Retains information about the shape of the molecule linear (default) 137 or circular. Restriction sites are search over the edges of circular 138 sequence.""" 139
140 - def __init__(self, seq, linear = True):
141 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 142 143 seq is either a Bio.Seq, Bio.MutableSeq or a FormattedSeq. 144 if seq is a FormattedSeq, linear will have no effect on the 145 shape of the sequence.""" 146 if isinstance(seq, Seq) or isinstance(seq, MutableSeq): 147 stringy = seq.tostring() 148 self.lower = stringy.islower() 149 #Note this adds a leading space to the sequence (!) 150 self.data = _check_bases(stringy) 151 self.linear = linear 152 self.klass = seq.__class__ 153 self.alphabet = seq.alphabet 154 elif isinstance(seq, FormattedSeq): 155 self.lower = seq.lower 156 self.data = seq.data 157 self.linear = seq.linear 158 self.alphabet = seq.alphabet 159 self.klass = seq.klass 160 else: 161 raise TypeError('expected Seq or MutableSeq, got %s' % type(seq))
162
163 - def __len__(self):
164 return len(self.data) - 1
165
166 - def __repr__(self):
167 return 'FormattedSeq(%s, linear=%s)' %(repr(self[1:]), repr(self.linear))
168
169 - def __eq__(self, other):
170 if isinstance(other, FormattedSeq): 171 if repr(self) == repr(other): 172 return True 173 else: 174 return False 175 return False
176
177 - def circularise(self):
178 """FS.circularise() -> circularise FS""" 179 self.linear = False 180 return
181
182 - def linearise(self):
183 """FS.linearise() -> linearise FS""" 184 self.linear = True 185 return
186
187 - def to_linear(self):
188 """FS.to_linear() -> new linear FS instance""" 189 new = self.__class__(self) 190 new.linear = True 191 return new
192
193 - def to_circular(self):
194 """FS.to_circular() -> new circular FS instance""" 195 new = self.__class__(self) 196 new.linear = False 197 return new
198
199 - def is_linear(self):
200 """FS.is_linear() -> bool. 201 202 True if the sequence will analysed as a linear sequence.""" 203 return self.linear
204
205 - def finditer(self, pattern, size):
206 """FS.finditer(pattern, size) -> list. 207 208 return a list of pattern into the sequence. 209 the list is made of tuple (location, pattern.group). 210 the latter is used with non palindromic sites. 211 pattern is the regular expression pattern corresponding to the 212 enzyme restriction site. 213 size is the size of the restriction enzyme recognition-site size.""" 214 if self.is_linear(): 215 data = self.data 216 else: 217 data = self.data + self.data[1:size] 218 return [(i.start(), i.group) for i in re.finditer(pattern, data)]
219
220 - def __getitem__(self, i):
221 if self.lower: 222 return self.klass((self.data[i]).lower(), self.alphabet) 223 return self.klass(self.data[i], self.alphabet)
224 225
226 -class RestrictionType(type):
227 """RestrictionType. Type from which derives all enzyme classes. 228 229 Implement the operator methods.""" 230
231 - def __init__(cls, name='', bases=(), dct={}):
232 """RE(name, bases, dct) -> RestrictionType instance. 233 234 Not intended to be used in normal operation. The enzymes are 235 instantiated when importing the module. 236 237 see below.""" 238 if "-" in name : 239 raise ValueError("Problem with hyphen in %s as enzyme name" \ 240 % repr(name)) 241 super(RestrictionType, cls).__init__(cls, name, bases, dct) 242 try : 243 cls.compsite = re.compile(cls.compsite) 244 except Exception, err : 245 raise ValueError("Problem with regular expression, re.compiled(%s)" \ 246 % repr(cls.compsite))
247
248 - def __add__(cls, other):
249 """RE.__add__(other) -> RestrictionBatch(). 250 251 if other is an enzyme returns a batch of the two enzymes. 252 if other is already a RestrictionBatch add enzyme to it.""" 253 if isinstance(other, RestrictionType): 254 return RestrictionBatch([cls, other]) 255 elif isinstance(other, RestrictionBatch): 256 return other.add_nocheck(cls) 257 else: 258 raise TypeError
259
260 - def __div__(cls, other):
261 """RE.__div__(other) -> list. 262 263 RE/other 264 returns RE.search(other).""" 265 return cls.search(other)
266
267 - def __rdiv__(cls, other):
268 """RE.__rdiv__(other) -> list. 269 270 other/RE 271 returns RE.search(other).""" 272 return cls.search(other)
273
274 - def __truediv__(cls, other):
275 """RE.__truediv__(other) -> list. 276 277 RE/other 278 returns RE.search(other).""" 279 return cls.search(other)
280
281 - def __rtruediv__(cls, other):
282 """RE.__rtruediv__(other) -> list. 283 284 other/RE 285 returns RE.search(other).""" 286 return cls.search(other)
287
288 - def __floordiv__(cls, other):
289 """RE.__floordiv__(other) -> list. 290 291 RE//other 292 returns RE.catalyse(other).""" 293 return cls.catalyse(other)
294
295 - def __rfloordiv__(cls, other):
296 """RE.__rfloordiv__(other) -> list. 297 298 other//RE 299 returns RE.catalyse(other).""" 300 return cls.catalyse(other)
301
302 - def __str__(cls):
303 """RE.__str__() -> str. 304 305 return the name of the enzyme.""" 306 return cls.__name__
307
308 - def __repr__(cls):
309 """RE.__repr__() -> str. 310 311 used with eval or exec will instantiate the enzyme.""" 312 return "%s" % cls.__name__
313
314 - def __len__(cls):
315 """RE.__len__() -> int. 316 317 length of the recognition site.""" 318 return cls.size
319
320 - def __hash__(cls):
321 #Python default is to use id(...) 322 #This is consistent with the __eq__ implementation 323 return id(cls)
324
325 - def __eq__(cls, other):
326 """RE == other -> bool 327 328 True if RE and other are the same enzyme. 329 330 Specifically this checks they are the same Python object. 331 """ 332 #assert (id(cls)==id(other)) == (other is cls) == (cls is other) 333 return id(cls)==id(other)
334
335 - def __ne__(cls, other):
336 """RE != other -> bool. 337 isoschizomer strict, same recognition site, same restriction -> False 338 all the other-> True 339 340 WARNING - This is not the inverse of the __eq__ method. 341 """ 342 if not isinstance(other, RestrictionType): 343 return True 344 elif cls.charac == other.charac: 345 return False 346 else: 347 return True
348
349 - def __rshift__(cls, other):
350 """RE >> other -> bool. 351 352 neoschizomer : same recognition site, different restriction. -> True 353 all the others : -> False""" 354 if not isinstance(other, RestrictionType): 355 return False 356 elif cls.site == other.site and cls.charac != other.charac: 357 return True 358 else: 359 return False
360
361 - def __mod__(cls, other):
362 """a % b -> bool. 363 364 Test compatibility of the overhang of a and b. 365 True if a and b have compatible overhang.""" 366 if not isinstance(other, RestrictionType): 367 raise TypeError( \ 368 'expected RestrictionType, got %s instead' % type(other)) 369 return cls._mod1(other)
370
371 - def __ge__(cls, other):
372 """a >= b -> bool. 373 374 a is greater or equal than b if the a site is longer than b site. 375 if their site have the same length sort by alphabetical order of their 376 names.""" 377 if not isinstance(other, RestrictionType): 378 raise NotImplementedError 379 if len(cls) > len(other): 380 return True 381 elif cls.size == len(other) and cls.__name__ >= other.__name__: 382 return True 383 else: 384 return False
385
386 - def __gt__(cls, other):
387 """a > b -> bool. 388 389 sorting order: 390 1. size of the recognition site. 391 2. if equal size, alphabetical order of the names.""" 392 if not isinstance(other, RestrictionType): 393 raise NotImplementedError 394 if len(cls) > len(other): 395 return True 396 elif cls.size == len(other) and cls.__name__ > other.__name__: 397 return True 398 else: 399 return False
400
401 - def __le__(cls, other):
402 """a <= b -> bool. 403 404 sorting order: 405 1. size of the recognition site. 406 2. if equal size, alphabetical order of the names.""" 407 if not isinstance(other, RestrictionType): 408 raise NotImplementedError 409 elif len(cls) < len(other): 410 return True 411 elif len(cls) == len(other) and cls.__name__ <= other.__name__: 412 return True 413 else: 414 return False
415
416 - def __lt__(cls, other):
417 """a < b -> bool. 418 419 sorting order: 420 1. size of the recognition site. 421 2. if equal size, alphabetical order of the names.""" 422 if not isinstance(other, RestrictionType): 423 raise NotImplementedError 424 elif len(cls) < len(other): 425 return True 426 elif len(cls) == len(other) and cls.__name__ < other.__name__: 427 return True 428 else: 429 return False
430 431
432 -class AbstractCut(RestrictionType):
433 """Implement the methods that are common to all restriction enzymes. 434 435 All the methods are classmethod. 436 437 For internal use only. Not meant to be instantiate.""" 438
439 - def search(cls, dna, linear=True):
440 """RE.search(dna, linear=True) -> list. 441 442 return a list of all the site of RE in dna. Compensate for circular 443 sequences and so on. 444 445 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 446 447 if linear is False, the restriction sites than span over the boundaries 448 will be included. 449 450 The positions are the first base of the 3' fragment, 451 i.e. the first base after the position the enzyme will cut. """ 452 # 453 # Separating search from _search allow a (very limited) optimisation 454 # of the search when using a batch of restriction enzymes. 455 # in this case the DNA is tested once by the class which implements 456 # the batch instead of being tested by each enzyme single. 457 # see RestrictionBatch.search() for example. 458 # 459 if isinstance(dna, FormattedSeq): 460 cls.dna = dna 461 return cls._search() 462 else : 463 cls.dna = FormattedSeq(dna, linear) 464 return cls._search()
465 search = classmethod(search) 466
467 - def all_suppliers(self):
468 """RE.all_suppliers -> print all the suppliers of R""" 469 supply = [x[0] for x in suppliers_dict.itervalues()] 470 supply.sort() 471 print ",\n".join(supply) 472 return
473 all_suppliers = classmethod(all_suppliers) 474
475 - def is_equischizomer(self, other):
476 """RE.is_equischizomers(other) -> bool. 477 478 True if other is an isoschizomer of RE. 479 False else. 480 481 equischizomer <=> same site, same position of restriction.""" 482 return not self != other
483 is_equischizomer = classmethod(is_equischizomer) 484
485 - def is_neoschizomer(self, other):
486 """RE.is_neoschizomers(other) -> bool. 487 488 True if other is an isoschizomer of RE. 489 False else. 490 491 neoschizomer <=> same site, different position of restriction.""" 492 return self >> other
493 is_neoschizomer = classmethod(is_neoschizomer) 494
495 - def is_isoschizomer(self, other):
496 """RE.is_isoschizomers(other) -> bool. 497 498 True if other is an isoschizomer of RE. 499 False else. 500 501 isoschizomer <=> same site.""" 502 return (not self != other) or self >> other
503 is_isoschizomer = classmethod(is_isoschizomer) 504
505 - def equischizomers(self, batch=None):
506 """RE.equischizomers([batch]) -> list. 507 508 return a tuple of all the isoschizomers of RE. 509 if batch is supplied it is used instead of the default AllEnzymes. 510 511 equischizomer <=> same site, same position of restriction.""" 512 if not batch : batch = AllEnzymes 513 r = [x for x in batch if not self != x] 514 i = r.index(self) 515 del r[i] 516 r.sort() 517 return r
518 equischizomers = classmethod(equischizomers) 519
520 - def neoschizomers(self, batch=None):
521 """RE.neoschizomers([batch]) -> list. 522 523 return a tuple of all the neoschizomers of RE. 524 if batch is supplied it is used instead of the default AllEnzymes. 525 526 neoschizomer <=> same site, different position of restriction.""" 527 if not batch : batch = AllEnzymes 528 r = [x for x in batch if self >> x] 529 r.sort() 530 return r
531 neoschizomers = classmethod(neoschizomers) 532
533 - def isoschizomers(self, batch=None):
534 """RE.isoschizomers([batch]) -> list. 535 536 return a tuple of all the equischizomers and neoschizomers of RE. 537 if batch is supplied it is used instead of the default AllEnzymes.""" 538 if not batch : batch = AllEnzymes 539 r = [x for x in batch if (self >> x) or (not self != x)] 540 i = r.index(self) 541 del r[i] 542 r.sort() 543 return r
544 isoschizomers = classmethod(isoschizomers) 545
546 - def frequency(self):
547 """RE.frequency() -> int. 548 549 frequency of the site.""" 550 return self.freq
551 frequency = classmethod(frequency)
552 553
554 -class NoCut(AbstractCut):
555 """Implement the methods specific to the enzymes that do not cut. 556 557 These enzymes are generally enzymes that have been only partially 558 characterised and the way they cut the DNA is unknow or enzymes for 559 which the pattern of cut is to complex to be recorded in Rebase 560 (ncuts values of 0 in emboss_e.###). 561 562 When using search() with these enzymes the values returned are at the start of 563 the restriction site. 564 565 Their catalyse() method returns a TypeError. 566 567 Unknown and NotDefined are also part of the base classes of these enzymes. 568 569 Internal use only. Not meant to be instantiated.""" 570
571 - def cut_once(self):
572 """RE.cut_once() -> bool. 573 574 True if the enzyme cut the sequence one time on each strand.""" 575 return False
576 cut_once = classmethod(cut_once) 577
578 - def cut_twice(self):
579 """RE.cut_twice() -> bool. 580 581 True if the enzyme cut the sequence twice on each strand.""" 582 return False
583 cut_twice = classmethod(cut_twice) 584
585 - def _modify(self, location):
586 """RE._modify(location) -> int. 587 588 for internal use only. 589 590 location is an integer corresponding to the location of the match for 591 the enzyme pattern in the sequence. 592 _modify returns the real place where the enzyme will cut. 593 594 example: 595 EcoRI pattern : GAATTC 596 EcoRI will cut after the G. 597 so in the sequence: 598 ______ 599 GAATACACGGAATTCGA 600 | 601 10 602 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 603 EcoRI cut after the G so: 604 EcoRI._modify(10) -> 11. 605 606 if the enzyme cut twice _modify will returns two integer corresponding 607 to each cutting site. 608 """ 609 yield location
610 _modify = classmethod(_modify) 611
612 - def _rev_modify(self, location):
613 """RE._rev_modify(location) -> generator of int. 614 615 for internal use only. 616 617 as _modify for site situated on the antiparallel strand when the 618 enzyme is not palindromic 619 """ 620 yield location
621 _rev_modify = classmethod(_rev_modify) 622
623 - def characteristic(self):
624 """RE.characteristic() -> tuple. 625 626 the tuple contains the attributes: 627 fst5 -> first 5' cut ((current strand) or None 628 fst3 -> first 3' cut (complementary strand) or None 629 scd5 -> second 5' cut (current strand) or None 630 scd5 -> second 3' cut (complementary strand) or None 631 site -> recognition site.""" 632 return None, None, None, None, self.site
633 characteristic = classmethod(characteristic)
634
635 -class OneCut(AbstractCut):
636 """Implement the methods specific to the enzymes that cut the DNA only once 637 638 Correspond to ncuts values of 2 in emboss_e.### 639 640 Internal use only. Not meant to be instantiated.""" 641
642 - def cut_once(self):
643 """RE.cut_once() -> bool. 644 645 True if the enzyme cut the sequence one time on each strand.""" 646 return True
647 cut_once = classmethod(cut_once) 648
649 - def cut_twice(self):
650 """RE.cut_twice() -> bool. 651 652 True if the enzyme cut the sequence twice on each strand.""" 653 return False
654 cut_twice = classmethod(cut_twice) 655
656 - def _modify(self, location):
657 """RE._modify(location) -> int. 658 659 for internal use only. 660 661 location is an integer corresponding to the location of the match for 662 the enzyme pattern in the sequence. 663 _modify returns the real place where the enzyme will cut. 664 665 example: 666 EcoRI pattern : GAATTC 667 EcoRI will cut after the G. 668 so in the sequence: 669 ______ 670 GAATACACGGAATTCGA 671 | 672 10 673 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 674 EcoRI cut after the G so: 675 EcoRI._modify(10) -> 11. 676 677 if the enzyme cut twice _modify will returns two integer corresponding 678 to each cutting site. 679 """ 680 yield location + self.fst5
681 _modify = classmethod(_modify) 682
683 - def _rev_modify(self, location):
684 """RE._rev_modify(location) -> generator of int. 685 686 for internal use only. 687 688 as _modify for site situated on the antiparallel strand when the 689 enzyme is not palindromic 690 """ 691 yield location - self.fst3
692 _rev_modify = classmethod(_rev_modify) 693
694 - def characteristic(self):
695 """RE.characteristic() -> tuple. 696 697 the tuple contains the attributes: 698 fst5 -> first 5' cut ((current strand) or None 699 fst3 -> first 3' cut (complementary strand) or None 700 scd5 -> second 5' cut (current strand) or None 701 scd5 -> second 3' cut (complementary strand) or None 702 site -> recognition site.""" 703 return self.fst5, self.fst3, None, None, self.site
704 characteristic = classmethod(characteristic)
705 706
707 -class TwoCuts(AbstractCut):
708 """Implement the methods specific to the enzymes that cut the DNA twice 709 710 Correspond to ncuts values of 4 in emboss_e.### 711 712 Internal use only. Not meant to be instantiated.""" 713
714 - def cut_once(self):
715 """RE.cut_once() -> bool. 716 717 True if the enzyme cut the sequence one time on each strand.""" 718 return False
719 cut_once = classmethod(cut_once) 720
721 - def cut_twice(self):
722 """RE.cut_twice() -> bool. 723 724 True if the enzyme cut the sequence twice on each strand.""" 725 return True
726 cut_twice = classmethod(cut_twice) 727
728 - def _modify(self, location):
729 """RE._modify(location) -> int. 730 731 for internal use only. 732 733 location is an integer corresponding to the location of the match for 734 the enzyme pattern in the sequence. 735 _modify returns the real place where the enzyme will cut. 736 737 example: 738 EcoRI pattern : GAATTC 739 EcoRI will cut after the G. 740 so in the sequence: 741 ______ 742 GAATACACGGAATTCGA 743 | 744 10 745 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 746 EcoRI cut after the G so: 747 EcoRI._modify(10) -> 11. 748 749 if the enzyme cut twice _modify will returns two integer corresponding 750 to each cutting site. 751 """ 752 yield location + self.fst5 753 yield location + self.scd5
754 _modify = classmethod(_modify) 755
756 - def _rev_modify(self, location):
757 """RE._rev_modify(location) -> generator of int. 758 759 for internal use only. 760 761 as _modify for site situated on the antiparallel strand when the 762 enzyme is not palindromic 763 """ 764 yield location - self.fst3 765 yield location - self.scd3
766 _rev_modify = classmethod(_rev_modify) 767
768 - def characteristic(self):
769 """RE.characteristic() -> tuple. 770 771 the tuple contains the attributes: 772 fst5 -> first 5' cut ((current strand) or None 773 fst3 -> first 3' cut (complementary strand) or None 774 scd5 -> second 5' cut (current strand) or None 775 scd5 -> second 3' cut (complementary strand) or None 776 site -> recognition site.""" 777 return self.fst5, self.fst3, self.scd5, self.scd3, self.site
778 characteristic = classmethod(characteristic)
779 780
781 -class Meth_Dep(AbstractCut):
782 """Implement the information about methylation. 783 784 Enzymes of this class possess a site which is methylable.""" 785
786 - def is_methylable(self):
787 """RE.is_methylable() -> bool. 788 789 True if the recognition site is a methylable.""" 790 return True
791 is_methylable = classmethod(is_methylable)
792
793 -class Meth_Undep(AbstractCut):
794 """Implement informations about methylation sensitibility. 795 796 Enzymes of this class are not sensible to methylation.""" 797
798 - def is_methylable(self):
799 """RE.is_methylable() -> bool. 800 801 True if the recognition site is a methylable.""" 802 return False
803 is_methylable = classmethod(is_methylable)
804
805 -class Palindromic(AbstractCut):
806 """Implement the methods specific to the enzymes which are palindromic 807 808 palindromic means : the recognition site and its reverse complement are 809 identical. 810 Remarks : an enzyme with a site CGNNCG is palindromic even if some 811 of the sites that it will recognise are not. 812 for example here : CGAACG 813 814 Internal use only. Not meant to be instantiated.""" 815
816 - def _search(self):
817 """RE._search() -> list. 818 819 for internal use only. 820 821 implement the search method for palindromic and non palindromic enzyme. 822 """ 823 siteloc = self.dna.finditer(self.compsite,self.size) 824 self.results = [r for s,g in siteloc for r in self._modify(s)] 825 if self.results : self._drop() 826 return self.results
827 _search = classmethod(_search) 828
829 - def is_palindromic(self):
830 """RE.is_palindromic() -> bool. 831 832 True if the recognition site is a palindrom.""" 833 return True
834 is_palindromic = classmethod(is_palindromic)
835 836
837 -class NonPalindromic(AbstractCut):
838 """Implement the methods specific to the enzymes which are not palindromic 839 840 palindromic means : the recognition site and its reverse complement are 841 identical. 842 843 Internal use only. Not meant to be instantiated.""" 844
845 - def _search(self):
846 """RE._search() -> list. 847 848 for internal use only. 849 850 implement the search method for palindromic and non palindromic enzyme. 851 """ 852 iterator = self.dna.finditer(self.compsite, self.size) 853 self.results = [] 854 modif = self._modify 855 revmodif = self._rev_modify 856 s = str(self) 857 self.on_minus = [] 858 for start, group in iterator: 859 if group(s): 860 self.results += [r for r in modif(start)] 861 else: 862 self.on_minus += [r for r in revmodif(start)] 863 self.results += self.on_minus 864 if self.results: 865 self.results.sort() 866 self._drop() 867 return self.results
868 _search = classmethod(_search) 869
870 - def is_palindromic(self):
871 """RE.is_palindromic() -> bool. 872 873 True if the recognition site is a palindrom.""" 874 return False
875 is_palindromic = classmethod(is_palindromic)
876
877 -class Unknown(AbstractCut):
878 """Implement the methods specific to the enzymes for which the overhang 879 is unknown. 880 881 These enzymes are also NotDefined and NoCut. 882 883 Internal use only. Not meant to be instantiated.""" 884
885 - def catalyse(self, dna, linear=True):
886 """RE.catalyse(dna, linear=True) -> tuple of DNA. 887 RE.catalyze(dna, linear=True) -> tuple of DNA. 888 889 return a tuple of dna as will be produced by using RE to restrict the 890 dna. 891 892 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 893 894 if linear is False, the sequence is considered to be circular and the 895 output will be modified accordingly.""" 896 raise NotImplementedError('%s restriction is unknown.' \ 897 % self.__name__)
898 catalyze = catalyse = classmethod(catalyse) 899
900 - def is_blunt(self):
901 """RE.is_blunt() -> bool. 902 903 True if the enzyme produces blunt end. 904 905 see also: 906 RE.is_3overhang() 907 RE.is_5overhang() 908 RE.is_unknown()""" 909 return False
910 is_blunt = classmethod(is_blunt) 911
912 - def is_5overhang(self):
913 """RE.is_5overhang() -> bool. 914 915 True if the enzyme produces 5' overhang sticky end. 916 917 see also: 918 RE.is_3overhang() 919 RE.is_blunt() 920 RE.is_unknown()""" 921 return False
922 is_5overhang = classmethod(is_5overhang) 923
924 - def is_3overhang(self):
925 """RE.is_3overhang() -> bool. 926 927 True if the enzyme produces 3' overhang sticky end. 928 929 see also: 930 RE.is_5overhang() 931 RE.is_blunt() 932 RE.is_unknown()""" 933 return False
934 is_3overhang = classmethod(is_3overhang) 935
936 - def overhang(self):
937 """RE.overhang() -> str. type of overhang of the enzyme., 938 939 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 940 return 'unknown'
941 overhang = classmethod(overhang) 942
943 - def compatible_end(self):
944 """RE.compatible_end() -> list. 945 946 list of all the enzymes that share compatible end with RE.""" 947 return []
948 compatible_end = classmethod(compatible_end) 949
950 - def _mod1(self, other):
951 """RE._mod1(other) -> bool. 952 953 for internal use only 954 955 test for the compatibility of restriction ending of RE and other.""" 956 return False
957 _mod1 = classmethod(_mod1)
958
959 -class Blunt(AbstractCut):
960 """Implement the methods specific to the enzymes for which the overhang 961 is blunt. 962 963 The enzyme cuts the + strand and the - strand of the DNA at the same 964 place. 965 966 Internal use only. Not meant to be instantiated.""" 967
968 - def catalyse(self, dna, linear=True):
969 """RE.catalyse(dna, linear=True) -> tuple of DNA. 970 RE.catalyze(dna, linear=True) -> tuple of DNA. 971 972 return a tuple of dna as will be produced by using RE to restrict the 973 dna. 974 975 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 976 977 if linear is False, the sequence is considered to be circular and the 978 output will be modified accordingly.""" 979 r = self.search(dna, linear) 980 d = self.dna 981 if not r : return d[1:], 982 fragments = [] 983 length = len(r)-1 984 if d.is_linear(): 985 # 986 # START of the sequence to FIRST site. 987 # 988 fragments.append(d[1:r[0]]) 989 if length: 990 # 991 # if more than one site add them. 992 # 993 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 994 # 995 # LAST site to END of the sequence. 996 # 997 fragments.append(d[r[-1]:]) 998 else: 999 # 1000 # circular : bridge LAST site to FIRST site. 1001 # 1002 fragments.append(d[r[-1]:]+d[1:r[0]]) 1003 if not length: 1004 # 1005 # one site we finish here. 1006 # 1007 return tuple(fragments) 1008 # 1009 # add the others. 1010 # 1011 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1012 return tuple(fragments)
1013 catalyze = catalyse = classmethod(catalyse) 1014
1015 - def is_blunt(self):
1016 """RE.is_blunt() -> bool. 1017 1018 True if the enzyme produces blunt end. 1019 1020 see also: 1021 RE.is_3overhang() 1022 RE.is_5overhang() 1023 RE.is_unknown()""" 1024 return True
1025 is_blunt = classmethod(is_blunt) 1026
1027 - def is_5overhang(self):
1028 """RE.is_5overhang() -> bool. 1029 1030 True if the enzyme produces 5' overhang sticky end. 1031 1032 see also: 1033 RE.is_3overhang() 1034 RE.is_blunt() 1035 RE.is_unknown()""" 1036 return False
1037 is_5overhang = classmethod(is_5overhang) 1038
1039 - def is_3overhang(self):
1040 """RE.is_3overhang() -> bool. 1041 1042 True if the enzyme produces 3' overhang sticky end. 1043 1044 see also: 1045 RE.is_5overhang() 1046 RE.is_blunt() 1047 RE.is_unknown()""" 1048 return False
1049 is_3overhang = classmethod(is_3overhang) 1050
1051 - def overhang(self):
1052 """RE.overhang() -> str. type of overhang of the enzyme., 1053 1054 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1055 return 'blunt'
1056 overhang = classmethod(overhang) 1057
1058 - def compatible_end(self, batch=None):
1059 """RE.compatible_end() -> list. 1060 1061 list of all the enzymes that share compatible end with RE.""" 1062 if not batch : batch = AllEnzymes 1063 r = [x for x in iter(AllEnzymes) if x.is_blunt()] 1064 r.sort() 1065 return r
1066 compatible_end = classmethod(compatible_end) 1067
1068 - def _mod1(other):
1069 """RE._mod1(other) -> bool. 1070 1071 for internal use only 1072 1073 test for the compatibility of restriction ending of RE and other.""" 1074 if issubclass(other, Blunt) : return True 1075 else : return False
1076 _mod1 = staticmethod(_mod1)
1077
1078 -class Ov5(AbstractCut):
1079 """Implement the methods specific to the enzymes for which the overhang 1080 is recessed in 3'. 1081 1082 The enzyme cuts the + strand after the - strand of the DNA. 1083 1084 Internal use only. Not meant to be instantiated.""" 1085
1086 - def catalyse(self, dna, linear=True):
1087 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1088 RE.catalyze(dna, linear=True) -> tuple of DNA. 1089 1090 return a tuple of dna as will be produced by using RE to restrict the 1091 dna. 1092 1093 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1094 1095 if linear is False, the sequence is considered to be circular and the 1096 output will be modified accordingly.""" 1097 r = self.search(dna, linear) 1098 d = self.dna 1099 if not r : return d[1:], 1100 length = len(r)-1 1101 fragments = [] 1102 if d.is_linear(): 1103 # 1104 # START of the sequence to FIRST site. 1105 # 1106 fragments.append(d[1:r[0]]) 1107 if length: 1108 # 1109 # if more than one site add them. 1110 # 1111 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1112 # 1113 # LAST site to END of the sequence. 1114 # 1115 fragments.append(d[r[-1]:]) 1116 else: 1117 # 1118 # circular : bridge LAST site to FIRST site. 1119 # 1120 fragments.append(d[r[-1]:]+d[1:r[0]]) 1121 if not length: 1122 # 1123 # one site we finish here. 1124 # 1125 return tuple(fragments) 1126 # 1127 # add the others. 1128 # 1129 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1130 return tuple(fragments)
1131 catalyze = catalyse = classmethod(catalyse) 1132
1133 - def is_blunt(self):
1134 """RE.is_blunt() -> bool. 1135 1136 True if the enzyme produces blunt end. 1137 1138 see also: 1139 RE.is_3overhang() 1140 RE.is_5overhang() 1141 RE.is_unknown()""" 1142 return False
1143 is_blunt = classmethod(is_blunt) 1144
1145 - def is_5overhang(self):
1146 """RE.is_5overhang() -> bool. 1147 1148 True if the enzyme produces 5' overhang sticky end. 1149 1150 see also: 1151 RE.is_3overhang() 1152 RE.is_blunt() 1153 RE.is_unknown()""" 1154 return True
1155 is_5overhang = classmethod(is_5overhang) 1156
1157 - def is_3overhang(self):
1158 """RE.is_3overhang() -> bool. 1159 1160 True if the enzyme produces 3' overhang sticky end. 1161 1162 see also: 1163 RE.is_5overhang() 1164 RE.is_blunt() 1165 RE.is_unknown()""" 1166 return False
1167 is_3overhang = classmethod(is_3overhang) 1168
1169 - def overhang(self):
1170 """RE.overhang() -> str. type of overhang of the enzyme., 1171 1172 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1173 return "5' overhang"
1174 overhang = classmethod(overhang) 1175
1176 - def compatible_end(self, batch=None):
1177 """RE.compatible_end() -> list. 1178 1179 list of all the enzymes that share compatible end with RE.""" 1180 if not batch : batch = AllEnzymes 1181 r = [x for x in iter(AllEnzymes) if x.is_5overhang() and x % self] 1182 r.sort() 1183 return r
1184 compatible_end = classmethod(compatible_end) 1185
1186 - def _mod1(self, other):
1187 """RE._mod1(other) -> bool. 1188 1189 for internal use only 1190 1191 test for the compatibility of restriction ending of RE and other.""" 1192 if issubclass(other, Ov5) : return self._mod2(other) 1193 else : return False
1194 _mod1 = classmethod(_mod1)
1195 1196
1197 -class Ov3(AbstractCut):
1198 """Implement the methods specific to the enzymes for which the overhang 1199 is recessed in 5'. 1200 1201 The enzyme cuts the - strand after the + strand of the DNA. 1202 1203 Internal use only. Not meant to be instantiated.""" 1204
1205 - def catalyse(self, dna, linear=True):
1206 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1207 RE.catalyze(dna, linear=True) -> tuple of DNA. 1208 1209 return a tuple of dna as will be produced by using RE to restrict the 1210 dna. 1211 1212 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1213 1214 if linear is False, the sequence is considered to be circular and the 1215 output will be modified accordingly.""" 1216 r = self.search(dna, linear) 1217 d = self.dna 1218 if not r : return d[1:], 1219 fragments = [] 1220 length = len(r)-1 1221 if d.is_linear(): 1222 # 1223 # START of the sequence to FIRST site. 1224 # 1225 fragments.append(d[1:r[0]]) 1226 if length: 1227 # 1228 # if more than one site add them. 1229 # 1230 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1231 # 1232 # LAST site to END of the sequence. 1233 # 1234 fragments.append(d[r[-1]:]) 1235 else: 1236 # 1237 # circular : bridge LAST site to FIRST site. 1238 # 1239 fragments.append(d[r[-1]:]+d[1:r[0]]) 1240 if not length: 1241 # 1242 # one site we finish here. 1243 # 1244 return tuple(fragments) 1245 # 1246 # add the others. 1247 # 1248 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1249 return tuple(fragments)
1250 catalyze = catalyse = classmethod(catalyse) 1251
1252 - def is_blunt(self):
1253 """RE.is_blunt() -> bool. 1254 1255 True if the enzyme produces blunt end. 1256 1257 see also: 1258 RE.is_3overhang() 1259 RE.is_5overhang() 1260 RE.is_unknown()""" 1261 return False
1262 is_blunt = classmethod(is_blunt) 1263
1264 - def is_5overhang(self):
1265 """RE.is_5overhang() -> bool. 1266 1267 True if the enzyme produces 5' overhang sticky end. 1268 1269 see also: 1270 RE.is_3overhang() 1271 RE.is_blunt() 1272 RE.is_unknown()""" 1273 return False
1274 is_5overhang = classmethod(is_5overhang) 1275
1276 - def is_3overhang(self):
1277 """RE.is_3overhang() -> bool. 1278 1279 True if the enzyme produces 3' overhang sticky end. 1280 1281 see also: 1282 RE.is_5overhang() 1283 RE.is_blunt() 1284 RE.is_unknown()""" 1285 return True
1286 is_3overhang = classmethod(is_3overhang) 1287
1288 - def overhang(self):
1289 """RE.overhang() -> str. type of overhang of the enzyme., 1290 1291 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1292 return "3' overhang"
1293 overhang = classmethod(overhang) 1294
1295 - def compatible_end(self, batch=None):
1296 """RE.compatible_end() -> list. 1297 1298 list of all the enzymes that share compatible end with RE.""" 1299 if not batch : batch = AllEnzymes 1300 r = [x for x in iter(AllEnzymes) if x.is_3overhang() and x % self] 1301 r.sort() 1302 return r
1303 compatible_end = classmethod(compatible_end) 1304
1305 - def _mod1(self, other):
1306 """RE._mod1(other) -> bool. 1307 1308 for internal use only 1309 1310 test for the compatibility of restriction ending of RE and other.""" 1311 # 1312 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1313 # 1314 if issubclass(other, Ov3) : return self._mod2(other) 1315 else : return False
1316 _mod1 = classmethod(_mod1)
1317 1318
1319 -class Defined(AbstractCut):
1320 """Implement the methods specific to the enzymes for which the overhang 1321 and the cut are not variable. 1322 1323 Typical example : EcoRI -> G^AATT_C 1324 The overhang will always be AATT 1325 Notes: 1326 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1327 There overhang is always the same : blunt! 1328 1329 Internal use only. Not meant to be instantiated.""" 1330
1331 - def _drop(self):
1332 """RE._drop() -> list. 1333 1334 for internal use only. 1335 1336 drop the site that are situated outside the sequence in linear sequence. 1337 modify the index for site in circular sequences.""" 1338 # 1339 # remove or modify the results that are outside the sequence. 1340 # This is necessary since after finding the site we add the distance 1341 # from the site to the cut with the _modify and _rev_modify methods. 1342 # For linear we will remove these sites altogether. 1343 # For circular sequence, we modify the result rather than _drop it 1344 # since the site is in the sequence. 1345 # 1346 length = len(self.dna) 1347 drop = itertools.dropwhile 1348 take = itertools.takewhile 1349 if self.dna.is_linear(): 1350 self.results = [x for x in drop(lambda x:x<1, self.results)] 1351 self.results = [x for x in take(lambda x:x<length, self.results)] 1352 else: 1353 for index, location in enumerate(self.results): 1354 if location < 1: 1355 self.results[index] += length 1356 else: 1357 break 1358 for index, location in enumerate(self.results[::-1]): 1359 if location > length: 1360 self.results[-(index+1)] -= length 1361 else: 1362 break 1363 return
1364 _drop = classmethod(_drop) 1365
1366 - def is_defined(self):
1367 """RE.is_defined() -> bool. 1368 1369 True if the sequence recognised and cut is constant, 1370 i.e. the recognition site is not degenerated AND the enzyme cut inside 1371 the site. 1372 1373 see also: 1374 RE.is_ambiguous() 1375 RE.is_unknown()""" 1376 return True
1377 is_defined = classmethod(is_defined) 1378
1379 - def is_ambiguous(self):
1380 """RE.is_ambiguous() -> bool. 1381 1382 True if the sequence recognised and cut is ambiguous, 1383 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1384 the site. 1385 1386 see also: 1387 RE.is_defined() 1388 RE.is_unknown()""" 1389 return False
1390 is_ambiguous = classmethod(is_ambiguous) 1391
1392 - def is_unknown(self):
1393 """RE.is_unknown() -> bool. 1394 1395 True if the sequence is unknown, 1396 i.e. the recognition site has not been characterised yet. 1397 1398 see also: 1399 RE.is_defined() 1400 RE.is_ambiguous()""" 1401 return False
1402 is_unknown = classmethod(is_unknown) 1403
1404 - def elucidate(self):
1405 """RE.elucidate() -> str 1406 1407 return a representation of the site with the cut on the (+) strand 1408 represented as '^' and the cut on the (-) strand as '_'. 1409 ie: 1410 >>> EcoRI.elucidate() # 5' overhang 1411 'G^AATT_C' 1412 >>> KpnI.elucidate() # 3' overhang 1413 'G_GTAC^C' 1414 >>> EcoRV.elucidate() # blunt 1415 'GAT^_ATC' 1416 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1417 '? GTATAC ?' 1418 >>> 1419 """ 1420 f5 = self.fst5 1421 f3 = self.fst3 1422 site = self.site 1423 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.' 1424 elif self.is_5overhang(): 1425 if f5 == f3 == 0 : re = 'N^'+ self.site + '_N' 1426 elif f3 == 0 : re = site[:f5] + '^' + site[f5:] + '_N' 1427 else : re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1428 elif self.is_blunt(): 1429 re = site[:f5] + '^_' + site[f5:] 1430 else: 1431 if f5 == f3 == 0 : re = 'N_'+ site + '^N' 1432 else : re = site[:f3] + '_' + site[f3:f5] +'^'+ site[f5:] 1433 return re
1434 elucidate = classmethod(elucidate) 1435
1436 - def _mod2(self, other):
1437 """RE._mod2(other) -> bool. 1438 1439 for internal use only 1440 1441 test for the compatibility of restriction ending of RE and other.""" 1442 # 1443 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1444 # 1445 if other.ovhgseq == self.ovhgseq: 1446 return True 1447 elif issubclass(other, Ambiguous): 1448 return other._mod2(self) 1449 else: 1450 return False
1451 _mod2 = classmethod(_mod2)
1452 1453
1454 -class Ambiguous(AbstractCut):
1455 """Implement the methods specific to the enzymes for which the overhang 1456 is variable. 1457 1458 Typical example : BstXI -> CCAN_NNNN^NTGG 1459 The overhang can be any sequence of 4 bases. 1460 Notes: 1461 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1462 There overhang is always the same : blunt! 1463 1464 Internal use only. Not meant to be instantiated.""" 1465
1466 - def _drop(self):
1467 """RE._drop() -> list. 1468 1469 for internal use only. 1470 1471 drop the site that are situated outside the sequence in linear sequence. 1472 modify the index for site in circular sequences.""" 1473 length = len(self.dna) 1474 drop = itertools.dropwhile 1475 take = itertools.takewhile 1476 if self.dna.is_linear(): 1477 self.results = [x for x in drop(lambda x : x < 1, self.results)] 1478 self.results = [x for x in take(lambda x : x <length, self.results)] 1479 else: 1480 for index, location in enumerate(self.results): 1481 if location < 1: 1482 self.results[index] += length 1483 else: 1484 break 1485 for index, location in enumerate(self.results[::-1]): 1486 if location > length: 1487 self.results[-(index+1)] -= length 1488 else: 1489 break 1490 return
1491 _drop = classmethod(_drop) 1492
1493 - def is_defined(self):
1494 """RE.is_defined() -> bool. 1495 1496 True if the sequence recognised and cut is constant, 1497 i.e. the recognition site is not degenerated AND the enzyme cut inside 1498 the site. 1499 1500 see also: 1501 RE.is_ambiguous() 1502 RE.is_unknown()""" 1503 return False
1504 is_defined = classmethod(is_defined) 1505
1506 - def is_ambiguous(self):
1507 """RE.is_ambiguous() -> bool. 1508 1509 True if the sequence recognised and cut is ambiguous, 1510 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1511 the site. 1512 1513 1514 see also: 1515 RE.is_defined() 1516 RE.is_unknown()""" 1517 return True
1518 is_ambiguous = classmethod(is_ambiguous) 1519
1520 - def is_unknown(self):
1521 """RE.is_unknown() -> bool. 1522 1523 True if the sequence is unknown, 1524 i.e. the recognition site has not been characterised yet. 1525 1526 see also: 1527 RE.is_defined() 1528 RE.is_ambiguous()""" 1529 return False
1530 is_unknown = classmethod(is_unknown) 1531
1532 - def _mod2(self, other):
1533 """RE._mod2(other) -> bool. 1534 1535 for internal use only 1536 1537 test for the compatibility of restriction ending of RE and other.""" 1538 # 1539 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1540 # 1541 if len(self.ovhgseq) != len(other.ovhgseq): 1542 return False 1543 else: 1544 se = self.ovhgseq 1545 for base in se: 1546 if base in 'ATCG': 1547 pass 1548 if base in 'N': 1549 se = '.'.join(se.split('N')) 1550 if base in 'RYWMSKHDBV': 1551 expand = '['+ matching[base] + ']' 1552 se = expand.join(se.split(base)) 1553 if re.match(se, other.ovhgseq): 1554 return True 1555 else: 1556 return False
1557 _mod2 = classmethod(_mod2) 1558
1559 - def elucidate(self):
1560 """RE.elucidate() -> str 1561 1562 return a representation of the site with the cut on the (+) strand 1563 represented as '^' and the cut on the (-) strand as '_'. 1564 ie: 1565 >>> EcoRI.elucidate() # 5' overhang 1566 'G^AATT_C' 1567 >>> KpnI.elucidate() # 3' overhang 1568 'G_GTAC^C' 1569 >>> EcoRV.elucidate() # blunt 1570 'GAT^_ATC' 1571 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1572 '? GTATAC ?' 1573 >>> 1574 """ 1575 f5 = self.fst5 1576 f3 = self.fst3 1577 length = len(self) 1578 site = self.site 1579 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.' 1580 elif self.is_5overhang(): 1581 if f3 == f5 == 0: 1582 re = 'N^' + site +'_N' 1583 elif 0 <= f5 <= length and 0 <= f3+length <= length: 1584 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1585 elif 0 <= f5 <= length: 1586 re = site[:f5] + '^' + site[f5:] + f3*'N' + '_N' 1587 elif 0 <= f3+length <= length: 1588 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:] 1589 elif f3+length < 0: 1590 re = 'N^'*abs(f5)*'N' + '_' + abs(length+f3)*'N' + site 1591 elif f5 > length: 1592 re = site + (f5-length)*'N'+'^'+(length+f3-f5)*'N'+'_N' 1593 else: 1594 re = 'N^' + abs(f5) * 'N' + site + f3*'N' + '_N' 1595 elif self.is_blunt(): 1596 if f5 < 0: 1597 re = 'N^_' + abs(f5)*'N' + site 1598 elif f5 > length: 1599 re = site + (f5-length)*'N' + '^_N' 1600 else: 1601 raise ValueError('%s.easyrepr() : error f5=%i' \ 1602 % (self.name,f5)) 1603 else: 1604 if f3 == 0: 1605 if f5 == 0 : re = 'N_' + site + '^N' 1606 else : re = site + '_' + (f5-length)*'N' + '^N' 1607 elif 0 < f3+length <= length and 0 <= f5 <= length: 1608 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1609 elif 0 < f3+length <= length: 1610 re = site[:f3] + '_' + site[f3:] + (f5-length)*'N' + '^N' 1611 elif 0 <= f5 <= length: 1612 re = 'N_' +'N'*(f3+length) + site[:f5] + '^' + site[f5:] 1613 elif f3 > 0: 1614 re = site + f3*'N' + '_' + (f5-f3-length)*'N' + '^N' 1615 elif f5 < 0: 1616 re = 'N_' + abs(f3-f5+length)*'N' + '^' + abs(f5)*'N' + site 1617 else: 1618 re = 'N_' + abs(f3+length)*'N' + site + (f5-length)*'N' + '^N' 1619 return re
1620 elucidate = classmethod(elucidate)
1621 1622
1623 -class NotDefined(AbstractCut):
1624 """Implement the methods specific to the enzymes for which the overhang 1625 is not characterised. 1626 1627 Correspond to NoCut and Unknown. 1628 1629 Internal use only. Not meant to be instantiated.""" 1630
1631 - def _drop(self):
1632 """RE._drop() -> list. 1633 1634 for internal use only. 1635 1636 drop the site that are situated outside the sequence in linear sequence. 1637 modify the index for site in circular sequences.""" 1638 if self.dna.is_linear(): 1639 return 1640 else: 1641 length = len(self.dna) 1642 for index, location in enumerate(self.results): 1643 if location < 1: 1644 self.results[index] += length 1645 else: 1646 break 1647 for index, location in enumerate(self.results[:-1]): 1648 if location > length: 1649 self.results[-(index+1)] -= length 1650 else: 1651 break 1652 return
1653 _drop = classmethod(_drop) 1654
1655 - def is_defined(self):
1656 """RE.is_defined() -> bool. 1657 1658 True if the sequence recognised and cut is constant, 1659 i.e. the recognition site is not degenerated AND the enzyme cut inside 1660 the site. 1661 1662 see also: 1663 RE.is_ambiguous() 1664 RE.is_unknown()""" 1665 return False
1666 is_defined = classmethod(is_defined) 1667
1668 - def is_ambiguous(self):
1669 """RE.is_ambiguous() -> bool. 1670 1671 True if the sequence recognised and cut is ambiguous, 1672 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1673 the site. 1674 1675 1676 see also: 1677 RE.is_defined() 1678 RE.is_unknown()""" 1679 return False
1680 is_ambiguous = classmethod(is_ambiguous) 1681
1682 - def is_unknown(self):
1683 """RE.is_unknown() -> bool. 1684 1685 True if the sequence is unknown, 1686 i.e. the recognition site has not been characterised yet. 1687 1688 see also: 1689 RE.is_defined() 1690 RE.is_ambiguous()""" 1691 return True
1692 is_unknown = classmethod(is_unknown) 1693
1694 - def _mod2(self, other):
1695 """RE._mod2(other) -> bool. 1696 1697 for internal use only 1698 1699 test for the compatibility of restriction ending of RE and other.""" 1700 # 1701 # Normally we should not arrive here. But well better safe than sorry. 1702 # the overhang is not defined we are compatible with nobody. 1703 # could raise an Error may be rather than return quietly. 1704 # 1705 #return False 1706 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" \ 1707 % (str(self), str(other), str(self)))
1708 _mod2 = classmethod(_mod2) 1709
1710 - def elucidate(self):
1711 """RE.elucidate() -> str 1712 1713 return a representation of the site with the cut on the (+) strand 1714 represented as '^' and the cut on the (-) strand as '_'. 1715 ie: 1716 >>> EcoRI.elucidate() # 5' overhang 1717 'G^AATT_C' 1718 >>> KpnI.elucidate() # 3' overhang 1719 'G_GTAC^C' 1720 >>> EcoRV.elucidate() # blunt 1721 'GAT^_ATC' 1722 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1723 '? GTATAC ?' 1724 >>> 1725 """ 1726 return '? %s ?' % self.site
1727 elucidate = classmethod(elucidate)
1728 1729
1730 -class Commercially_available(AbstractCut):
1731 # 1732 # Recent addition to Rebase make this naming convention uncertain. 1733 # May be better to says enzymes which have a supplier. 1734 # 1735 """Implement the methods specific to the enzymes which are commercially 1736 available. 1737 1738 Internal use only. Not meant to be instantiated.""" 1739
1740 - def suppliers(self):
1741 """RE.suppliers() -> print the suppliers of RE.""" 1742 supply = suppliers_dict.items() 1743 for k,v in supply: 1744 if k in self.suppl: 1745 print v[0]+',' 1746 return
1747 suppliers = classmethod(suppliers) 1748
1749 - def supplier_list(self):
1750 """RE.supplier_list() -> list. 1751 1752 list of the supplier names for RE.""" 1753 return [v[0] for k,v in suppliers_dict.items() if k in self.suppl]
1754 supplier_list = classmethod(supplier_list) 1755
1756 - def buffers(self, supplier):
1757 """RE.buffers(supplier) -> string. 1758 1759 not implemented yet.""" 1760 return
1761 buffers = classmethod(buffers) 1762
1763 - def is_comm(self):
1764 """RE.iscomm() -> bool. 1765 1766 True if RE has suppliers.""" 1767 return True
1768 is_comm = classmethod(is_comm)
1769 1770
1771 -class Not_available(AbstractCut):
1772 """Implement the methods specific to the enzymes which are not commercially 1773 available. 1774 1775 Internal use only. Not meant to be instantiated.""" 1776
1777 - def suppliers():
1778 """RE.suppliers() -> print the suppliers of RE.""" 1779 return None
1780 suppliers = staticmethod(suppliers) 1781
1782 - def supplier_list(self):
1783 """RE.supplier_list() -> list. 1784 1785 list of the supplier names for RE.""" 1786 return []
1787 supplier_list = classmethod(supplier_list) 1788
1789 - def buffers(self, supplier):
1790 """RE.buffers(supplier) -> string. 1791 1792 not implemented yet.""" 1793 raise TypeError("Enzyme not commercially available.")
1794 buffers = classmethod(buffers) 1795
1796 - def is_comm(self):
1797 """RE.iscomm() -> bool. 1798 1799 True if RE has suppliers.""" 1800 return False
1801 is_comm = classmethod(is_comm)
1802 1803 1804 ############################################################################### 1805 # # 1806 # Restriction Batch # 1807 # # 1808 ############################################################################### 1809 1810
1811 -class RestrictionBatch(set):
1812
1813 - def __init__(self, first=[], suppliers=[]):
1814 """RestrictionBatch([sequence]) -> new RestrictionBatch.""" 1815 first = [self.format(x) for x in first] 1816 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]] 1817 set.__init__(self, first) 1818 self.mapping = dict.fromkeys(self) 1819 self.already_mapped = None
1820
1821 - def __str__(self):
1822 if len(self) < 5: 1823 return '+'.join(self.elements()) 1824 else: 1825 return '...'.join(('+'.join(self.elements()[:2]),\ 1826 '+'.join(self.elements()[-2:])))
1827
1828 - def __repr__(self):
1829 return 'RestrictionBatch(%s)' % self.elements()
1830
1831 - def __contains__(self, other):
1832 try: 1833 other = self.format(other) 1834 except ValueError : # other is not a restriction enzyme 1835 return False 1836 return set.__contains__(self, other)
1837
1838 - def __div__(self, other):
1839 return self.search(other)
1840
1841 - def __rdiv__(self, other):
1842 return self.search(other)
1843
1844 - def get(self, enzyme, add=False):
1845 """B.get(enzyme[, add]) -> enzyme class. 1846 1847 if add is True and enzyme is not in B add enzyme to B. 1848 if add is False (which is the default) only return enzyme. 1849 if enzyme is not a RestrictionType or can not be evaluated to 1850 a RestrictionType, raise a ValueError.""" 1851 e = self.format(enzyme) 1852 if e in self: 1853 return e 1854 elif add: 1855 self.add(e) 1856 return e 1857 else: 1858 raise ValueError('enzyme %s is not in RestrictionBatch' \ 1859 % e.__name__)
1860
1861 - def lambdasplit(self, func):
1862 """B.lambdasplit(func) -> RestrictionBatch . 1863 1864 the new batch will contains only the enzymes for which 1865 func return True.""" 1866 d = [x for x in itertools.ifilter(func, self)] 1867 new = RestrictionBatch() 1868 new._data = dict(zip(d, [True]*len(d))) 1869 return new
1870
1871 - def add_supplier(self, letter):
1872 """B.add_supplier(letter) -> add a new set of enzyme to B. 1873 1874 letter represents the suppliers as defined in the dictionary 1875 RestrictionDictionary.suppliers 1876 return None. 1877 raise a KeyError if letter is not a supplier code.""" 1878 supplier = suppliers_dict[letter] 1879 self.suppliers.append(letter) 1880 for x in supplier[1]: 1881 self.add_nocheck(eval(x)) 1882 return
1883
1884 - def current_suppliers(self):
1885 """B.current_suppliers() -> add a new set of enzyme to B. 1886 1887 return a sorted list of the suppliers which have been used to 1888 create the batch.""" 1889 suppl_list = [suppliers_dict[x][0] for x in self.suppliers] 1890 suppl_list.sort() 1891 return suppl_list
1892
1893 - def __iadd__(self, other):
1894 """ b += other -> add other to b, check the type of other.""" 1895 self.add(other) 1896 return self
1897
1898 - def __add__(self, other):
1899 """ b + other -> new RestrictionBatch.""" 1900 new = self.__class__(self) 1901 new.add(other) 1902 return new
1903
1904 - def remove(self, other):
1905 """B.remove(other) -> remove other from B if other is a RestrictionType. 1906 1907 Safe set.remove method. Verify that other is a RestrictionType or can be 1908 evaluated to a RestrictionType. 1909 raise a ValueError if other can not be evaluated to a RestrictionType. 1910 raise a KeyError if other is not in B.""" 1911 return set.remove(self, self.format(other))
1912
1913 - def add(self, other):
1914 """B.add(other) -> add other to B if other is a RestrictionType. 1915 1916 Safe set.add method. Verify that other is a RestrictionType or can be 1917 evaluated to a RestrictionType. 1918 raise a ValueError if other can not be evaluated to a RestrictionType. 1919 """ 1920 return set.add(self, self.format(other))
1921
1922 - def add_nocheck(self, other):
1923 """B.add_nocheck(other) -> add other to B. don't check type of other. 1924 """ 1925 return set.add(self, other)
1926
1927 - def format(self, y):
1928 """B.format(y) -> RestrictionType or raise ValueError. 1929 1930 if y is a RestrictionType return y 1931 if y can be evaluated to a RestrictionType return eval(y) 1932 raise a Value Error in all other case.""" 1933 try: 1934 if isinstance(y, RestrictionType): 1935 return y 1936 elif isinstance(eval(str(y)), RestrictionType): 1937 return eval(y) 1938 1939 else: 1940 pass 1941 except (NameError, SyntaxError): 1942 pass 1943 raise ValueError('%s is not a RestrictionType' % y.__class__)
1944 1945
1946 - def is_restriction(self, y):
1947 """B.is_restriction(y) -> bool. 1948 1949 True is y or eval(y) is a RestrictionType.""" 1950 return isinstance(y, RestrictionType) or \ 1951 isinstance(eval(str(y)), RestrictionType)
1952
1953 - def split(self, *classes, **bool):
1954 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch. 1955 1956 it works but it is slow, so it has really an interest when splitting 1957 over multiple conditions.""" 1958 def splittest(element): 1959 for klass in classes: 1960 b = bool.get(klass.__name__, True) 1961 if issubclass(element, klass): 1962 if b: 1963 continue 1964 else: 1965 return False 1966 elif b: 1967 return False 1968 else: 1969 continue 1970 return True
1971 d = [k for k in itertools.ifilter(splittest, self)] 1972 new = RestrictionBatch() 1973 new._data = dict(zip(d, [True]*len(d))) 1974 return new
1975
1976 - def elements(self):
1977 """B.elements() -> tuple. 1978 1979 give all the names of the enzymes in B sorted alphabetically.""" 1980 l = [str(e) for e in self] 1981 l.sort() 1982 return l
1983
1984 - def as_string(self):
1985 """B.as_string() -> list. 1986 1987 return a list of the name of the elements of B.""" 1988 return [str(e) for e in self]
1989
1990 - def suppl_codes(self):
1991 """B.suppl_codes() -> dict 1992 1993 letter code for the suppliers""" 1994 supply = dict([(k,v[0]) for k,v in suppliers_dict.iteritems()]) 1995 return supply
1996 suppl_codes = classmethod(suppl_codes) 1997
1998 - def show_codes(self):
1999 "B.show_codes() -> letter codes for the suppliers""" 2000 supply = [' = '.join(i) for i in self.suppl_codes().iteritems()] 2001 print '\n'.join(supply) 2002 return
2003 show_codes = classmethod(show_codes) 2004
2005 - def search(self, dna, linear=True):
2006 """B.search(dna) -> dict.""" 2007 # 2008 # here we replace the search method of the individual enzymes 2009 # with one unique testing method. 2010 # 2011 if not hasattr(self, "already_mapped") : 2012 #TODO - Why does this happen! 2013 #Try the "doctest" at the start of PrintFormat.py 2014 self.already_mapped = None 2015 if isinstance(dna, DNA): 2016 # For the searching, we just care about the sequence as a string, 2017 # if that is the same we can use the cached search results. 2018 # At the time of writing, Seq == method isn't implemented, 2019 # and therefore does object identity which is stricter. 2020 if (str(dna), linear) == self.already_mapped: 2021 return self.mapping 2022 else: 2023 self.already_mapped = str(dna), linear 2024 fseq = FormattedSeq(dna, linear) 2025 self.mapping = dict([(x, x.search(fseq)) for x in self]) 2026 return self.mapping 2027 elif isinstance(dna, FormattedSeq): 2028 if (str(dna), dna.linear) == self.already_mapped: 2029 return self.mapping 2030 else: 2031 self.already_mapped = str(dna), dna.linear 2032 self.mapping = dict([(x, x.search(dna)) for x in self]) 2033 return self.mapping 2034 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"\ 2035 %type(dna))
2036 2037 ############################################################################### 2038 # # 2039 # Restriction Analysis # 2040 # # 2041 ############################################################################### 2042
2043 -class Analysis(RestrictionBatch, PrintFormat):
2044
2045 - def __init__(self, restrictionbatch=RestrictionBatch(),sequence=DNA(''), 2046 linear=True):
2047 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class. 2048 2049 For most of the method of this class if a dictionary is given it will 2050 be used as the base to calculate the results. 2051 If no dictionary is given a new analysis using the Restriction Batch 2052 which has been given when the Analysis class has been instantiated.""" 2053 RestrictionBatch.__init__(self, restrictionbatch) 2054 self.rb = restrictionbatch 2055 self.sequence = sequence 2056 self.linear = linear 2057 if self.sequence: 2058 self.search(self.sequence, self.linear)
2059
2060 - def __repr__(self):
2061 return 'Analysis(%s,%s,%s)'%\ 2062 (repr(self.rb),repr(self.sequence),self.linear)
2063
2064 - def _sub_set(self, wanted):
2065 """A._sub_set(other_set) -> dict. 2066 2067 Internal use only. 2068 2069 screen the results through wanted set. 2070 Keep only the results for which the enzymes is in wanted set. 2071 """ 2072 return dict([(k,v) for k,v in self.mapping.iteritems() if k in wanted])
2073
2074 - def _boundaries(self, start, end):
2075 """A._boundaries(start, end) -> tuple. 2076 2077 Format the boundaries for use with the methods that limit the 2078 search to only part of the sequence given to analyse. 2079 """ 2080 if not isinstance(start, int): 2081 raise TypeError('expected int, got %s instead' % type(start)) 2082 if not isinstance(end, int): 2083 raise TypeError('expected int, got %s instead' % type(end)) 2084 if start < 1: 2085 start += len(self.sequence) 2086 if end < 1: 2087 end += len(self.sequence) 2088 if start < end: 2089 pass 2090 else: 2091 start, end == end, start 2092 if start < 1: 2093 start == 1 2094 if start < end: 2095 return start, end, self._test_normal 2096 else: 2097 return start, end, self._test_reverse
2098
2099 - def _test_normal(self, start, end, site):
2100 """A._test_normal(start, end, site) -> bool. 2101 2102 Internal use only 2103 Test if site is in between start and end. 2104 """ 2105 return start <= site < end
2106
2107 - def _test_reverse(self, start, end, site):
2108 """A._test_reverse(start, end, site) -> bool. 2109 2110 Internal use only 2111 Test if site is in between end and start (for circular sequences). 2112 """ 2113 return start <= site <= len(self.sequence) or 1 <= site < end
2114
2115 - def print_that(self, dct=None, title='', s1=''):
2116 """A.print_that([dct[, title[, s1]]]) -> print the results from dct. 2117 2118 If dct is not given the full dictionary is used. 2119 """ 2120 if not dct: 2121 dct = self.mapping 2122 print 2123 return PrintFormat.print_that(self, dct, title, s1)
2124
2125 - def change(self, **what):
2126 """A.change(**attribute_name) -> Change attribute of Analysis. 2127 2128 It is possible to change the width of the shell by setting 2129 self.ConsoleWidth to what you want. 2130 self.NameWidth refer to the maximal length of the enzyme name. 2131 2132 Changing one of these parameters here might not give the results 2133 you expect. In which case, you can settle back to a 80 columns shell 2134 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until 2135 you get it right.""" 2136 for k,v in what.iteritems(): 2137 if k in ('NameWidth', 'ConsoleWidth'): 2138 setattr(self, k, v) 2139 self.Cmodulo = self.ConsoleWidth % self.NameWidth 2140 self.PrefWidth = self.ConsoleWidth - self.Cmodulo 2141 elif k is 'sequence': 2142 setattr(self, 'sequence', v) 2143 self.search(self.sequence, self.linear) 2144 elif k is 'rb': 2145 self = Analysis.__init__(self, v, self.sequence, self.linear) 2146 elif k is 'linear': 2147 setattr(self, 'linear', v) 2148 self.search(self.sequence, v) 2149 elif k in ('Indent', 'Maxsize'): 2150 setattr(self, k, v) 2151 elif k in ('Cmodulo', 'PrefWidth'): 2152 raise AttributeError( \ 2153 'To change %s, change NameWidth and/or ConsoleWidth' \ 2154 % name) 2155 else: 2156 raise AttributeError( \ 2157 'Analysis has no attribute %s' % name) 2158 return
2159
2160 - def full(self, linear=True):
2161 """A.full() -> dict. 2162 2163 Full Restriction Map of the sequence.""" 2164 return self.mapping
2165
2166 - def blunt(self, dct = None):
2167 """A.blunt([dct]) -> dict. 2168 2169 Only the enzymes which have a 3'overhang restriction site.""" 2170 if not dct: 2171 dct = self.mapping 2172 return dict([(k,v) for k,v in dct.iteritems() if k.is_blunt()])
2173
2174 - def overhang5(self, dct=None):
2175 """A.overhang5([dct]) -> dict. 2176 2177 Only the enzymes which have a 5' overhang restriction site.""" 2178 if not dct: 2179 dct = self.mapping 2180 return dict([(k,v) for k,v in dct.iteritems() if k.is_5overhang()])
2181 2182
2183 - def overhang3(self, dct=None):
2184 """A.Overhang3([dct]) -> dict. 2185 2186 Only the enzymes which have a 3'overhang restriction site.""" 2187 if not dct: 2188 dct = self.mapping 2189 return dict([(k,v) for k,v in dct.iteritems() if k.is_3overhang()])
2190 2191
2192 - def defined(self, dct=None):
2193 """A.defined([dct]) -> dict. 2194 2195 Only the enzymes that have a defined restriction site in Rebase.""" 2196 if not dct: 2197 dct = self.mapping 2198 return dict([(k,v) for k,v in dct.iteritems() if k.is_defined()])
2199
2200 - def with_sites(self, dct=None):
2201 """A.with_sites([dct]) -> dict. 2202 2203 Enzymes which have at least one site in the sequence.""" 2204 if not dct: 2205 dct = self.mapping 2206 return dict([(k,v) for k,v in dct.iteritems() if v])
2207
2208 - def without_site(self, dct=None):
2209 """A.without_site([dct]) -> dict. 2210 2211 Enzymes which have no site in the sequence.""" 2212 if not dct: 2213 dct = self.mapping 2214 return dict([(k,v) for k,v in dct.iteritems() if not v])
2215
2216 - def with_N_sites(self, N, dct=None):
2217 """A.With_N_Sites(N [, dct]) -> dict. 2218 2219 Enzymes which cut N times the sequence.""" 2220 if not dct: 2221 dct = self.mapping 2222 return dict([(k,v) for k,v in dct.iteritems()if len(v) == N])
2223
2224 - def with_number_list(self, list, dct= None):
2225 if not dct: 2226 dct = self.mapping 2227 return dict([(k,v) for k,v in dct.iteritems() if len(v) in list])
2228
2229 - def with_name(self, names, dct=None):
2230 """A.with_name(list_of_names [, dct]) -> 2231 2232 Limit the search to the enzymes named in list_of_names.""" 2233 for i, enzyme in enumerate(names): 2234 if not enzyme in AllEnzymes: 2235 print "no datas for the enzyme:", str(name) 2236 del names[i] 2237 if not dct: 2238 return RestrictionBatch(names).search(self.sequence) 2239 return dict([(n, dct[n]) for n in names if n in dct])
2240
2241 - def with_site_size(self, site_size, dct=None):
2242 """A.with_site_size(site_size [, dct]) -> 2243 2244 Limit the search to the enzymes whose site is of size <site_size>.""" 2245 sites = [name for name in self if name.size == site_size] 2246 if not dct: 2247 return RestrictionBatch(sites).search(self.sequence) 2248 return dict([(k,v) for k,v in dct.iteritems() if k in site_size])
2249
2250 - def only_between(self, start, end, dct=None):
2251 """A.only_between(start, end[, dct]) -> dict. 2252 2253 Enzymes that cut the sequence only in between start and end.""" 2254 start, end, test = self._boundaries(start, end) 2255 if not dct: 2256 dct = self.mapping 2257 d = dict(dct) 2258 for key, sites in dct.iteritems(): 2259 if not sites: 2260 del d[key] 2261 continue 2262 for site in sites: 2263 if test(start, end, site): 2264 continue 2265 else: 2266 del d[key] 2267 break 2268 return d
2269
2270 - def between(self, start, end, dct=None):
2271 """A.between(start, end [, dct]) -> dict. 2272 2273 Enzymes that cut the sequence at least in between start and end. 2274 They may cut outside as well.""" 2275 start, end, test = self._boundaries(start, end) 2276 d = {} 2277 if not dct: 2278 dct = self.mapping 2279 for key, sites in dct.iteritems(): 2280 for site in sites: 2281 if test(start, end, site): 2282 d[key] = sites 2283 break 2284 continue 2285 return d
2286
2287 - def show_only_between(self, start, end, dct=None):
2288 """A.show_only_between(start, end [, dct]) -> dict. 2289 2290 Enzymes that cut the sequence outside of the region 2291 in between start and end but do not cut inside.""" 2292 d = [] 2293 if start <= end: 2294 d = [(k, [vv for vv in v if start<=vv<=end]) 2295 for v in self.between(start, end, dct)] 2296 else: 2297 d = [(k, [vv for vv in v if start<=vv or vv <= end]) 2298 for v in self.between(start, end, dct)] 2299 return dict(d)
2300
2301 - def only_outside(self, start, end, dct = None):
2302 """A.only_outside(start, end [, dct]) -> dict. 2303 2304 Enzymes that cut the sequence outside of the region 2305 in between start and end but do not cut inside.""" 2306 start, end, test = self._boundaries(start, end) 2307 if not dct : dct = self.mapping 2308 d = dict(dct) 2309 for key, sites in dct.iteritems(): 2310 if not sites: 2311 del d[key] 2312 continue 2313 for site in sites: 2314 if test(start, end, site): 2315 del d[key] 2316 break 2317 else: 2318 continue 2319 return d
2320
2321 - def outside(self, start, end, dct=None):
2322 """A.outside((start, end [, dct]) -> dict. 2323 2324 Enzymes that cut outside the region in between start and end. 2325 No test is made to know if they cut or not inside this region.""" 2326 start, end, test = self._boundaries(start, end) 2327 if not dct: 2328 dct = self.mapping 2329 d = {} 2330 for key, sites in dct.iteritems(): 2331 for site in sites: 2332 if test(start, end, site): 2333 continue 2334 else: 2335 d[key] = sites 2336 break 2337 return d
2338 2339
2340 - def do_not_cut(self, start, end, dct = None):
2341 """A.do_not_cut(start, end [, dct]) -> dict. 2342 2343 Enzymes that do not cut the region in between start and end.""" 2344 if not dct: 2345 dct = self.mapping 2346 d = self.without_site() 2347 d.update(self.only_outside(start, end, dct)) 2348 return d
2349 2350 # 2351 # The restriction enzyme classes are created dynamically when the module is 2352 # imported. Here is the magic which allow the creation of the 2353 # restriction-enzyme classes. 2354 # 2355 # The reason for the two dictionaries in Restriction_Dictionary 2356 # one for the types (which will be called pseudo-type as they really 2357 # correspond to the values that instances of RestrictionType can take) 2358 # and one for the enzymes is efficiency as the bases are evaluated 2359 # once per pseudo-type. 2360 # 2361 # However Restriction is still a very inefficient module at import. But 2362 # remember that around 660 classes (which is more or less the size of Rebase) 2363 # have to be created dynamically. However, this processing take place only 2364 # once. 2365 # This inefficiency is however largely compensated by the use of metaclass 2366 # which provide a very efficient layout for the class themselves mostly 2367 # alleviating the need of if/else loops in the class methods. 2368 # 2369 # It is essential to run Restriction with doc string optimisation (-OO switch) 2370 # as the doc string of 660 classes take a lot of processing. 2371 # 2372 CommOnly = RestrictionBatch() # commercial enzymes 2373 NonComm = RestrictionBatch() # not available commercially 2374 for TYPE, (bases, enzymes) in typedict.iteritems(): 2375 # 2376 # The keys are the pseudo-types TYPE (stored as type1, type2...) 2377 # The names are not important and are only present to differentiate 2378 # the keys in the dict. All the pseudo-types are in fact RestrictionType. 2379 # These names will not be used after and the pseudo-types are not 2380 # kept in the locals() dictionary. It is therefore impossible to 2381 # import them. 2382 # Now, if you have look at the dictionary, you will see that not all the 2383 # types are present as those without corresponding enzymes have been 2384 # removed by Dictionary_Builder(). 2385 # 2386 # The values are tuples which contain 2387 # as first element a tuple of bases (as string) and 2388 # as second element the names of the enzymes. 2389 # 2390 # First eval the bases. 2391 # 2392 bases = tuple([eval(x) for x in bases]) 2393 # 2394 # now create the particular value of RestrictionType for the classes 2395 # in enzymes. 2396 # 2397 T = type.__new__(RestrictionType, 'RestrictionType', bases, {}) 2398 for k in enzymes: 2399 # 2400 # Now, we go through all the enzymes and assign them their type. 2401 # enzymedict[k] contains the values of the attributes for this 2402 # particular class (self.site, self.ovhg,....). 2403 # 2404 newenz = T(k, bases, enzymedict[k]) 2405 # 2406 # we add the enzymes to the corresponding batch. 2407 # 2408 # No need to verify the enzyme is a RestrictionType -> add_nocheck 2409 # 2410 if newenz.is_comm() : CommOnly.add_nocheck(newenz) 2411 else : NonComm.add_nocheck(newenz) 2412 # 2413 # AllEnzymes is a RestrictionBatch with all the enzymes from Rebase. 2414 # 2415 AllEnzymes = CommOnly | NonComm 2416 # 2417 # Now, place the enzymes in locals so they can be imported. 2418 # 2419 names = [str(x) for x in AllEnzymes] 2420 try: 2421 del x 2422 except NameError: 2423 #Scoping changed in Python 3, the variable isn't leaked 2424 pass 2425 locals().update(dict(zip(names, AllEnzymes))) 2426 __all__=['FormattedSeq', 'Analysis', 'RestrictionBatch','AllEnzymes','CommOnly','NonComm']+names 2427 del k, enzymes, TYPE, bases, names 2428