Package Bio :: Module SeqFeature
[hide private]
[frames] | no frames]

Source Code for Module Bio.SeqFeature

  1  # Copyright 2000-2003 Jeff Chang. 
  2  # Copyright 2001-2008 Brad Chapman. 
  3  # Copyright 2005-2010 by Peter Cock. 
  4  # Copyright 2006-2009 Michiel de Hoon. 
  5  # All rights reserved. 
  6  # This code is part of the Biopython distribution and governed by its 
  7  # license.  Please see the LICENSE file that should have been included 
  8  # as part of this package. 
  9  """Represent a Sequence Feature holding info about a part of a sequence. 
 10   
 11  This is heavily modeled after the Biocorba SeqFeature objects, and 
 12  may be pretty biased towards GenBank stuff since I'm writing it 
 13  for the GenBank parser output... 
 14   
 15  What's here: 
 16   
 17  Base class to hold a Feature. 
 18  ---------------------------- 
 19  classes: 
 20  o SeqFeature 
 21   
 22  Hold information about a Reference. 
 23  ---------------------------------- 
 24   
 25  This is an attempt to create a General class to hold Reference type 
 26  information. 
 27   
 28  classes: 
 29  o Reference 
 30   
 31  Specify locations of a feature on a Sequence. 
 32  --------------------------------------------- 
 33   
 34  This aims to handle, in Ewan's words, 'the dreaded fuzziness issue' in 
 35  much the same way as Biocorba. This has the advantages of allowing us 
 36  to handle fuzzy stuff in case anyone needs it, and also be compatible 
 37  with Biocorba. 
 38   
 39  classes: 
 40  o FeatureLocation - Specify the start and end location of a feature. 
 41   
 42  o ExactPosition - Specify the position as being exact. 
 43  o WithinPosition - Specify a position occuring within some range. 
 44  o BetweenPosition - Specify a position occuring between a range (OBSOLETE?). 
 45  o BeforePosition - Specify the position as being found before some base. 
 46  o AfterPosition - Specify the position as being found after some base. 
 47  o OneOfPosition - Specify a position where the location can be multiple positions. 
 48  """ 
 49   
 50  from Bio.Seq import MutableSeq, reverse_complement 
 51   
52 -class SeqFeature(object):
53 """Represent a Sequence Feature on an object. 54 55 Attributes: 56 o location - the location of the feature on the sequence (FeatureLocation) 57 o type - the specified type of the feature (ie. CDS, exon, repeat...) 58 o location_operator - a string specifying how this SeqFeature may 59 be related to others. For example, in the example GenBank feature 60 shown below, the location_operator would be "join" 61 o strand - A value specifying on which strand (of a DNA sequence, for 62 instance) the feature deals with. 1 indicates the plus strand, -1 63 indicates the minus strand, 0 indicates stranded but unknown (? in GFF3), 64 while the default of None indicates that strand doesn't apply (dot in GFF3, 65 e.g. features on proteins) 66 o id - A string identifier for the feature. 67 o ref - A reference to another sequence. This could be an accession 68 number for some different sequence. 69 o ref_db - A different database for the reference accession number. 70 o qualifiers - A dictionary of qualifiers on the feature. These are 71 analagous to the qualifiers from a GenBank feature table. The keys of 72 the dictionary are qualifier names, the values are the qualifier 73 values. 74 o sub_features - Additional SeqFeatures which fall under this 'parent' 75 feature. For instance, if we having something like: 76 77 CDS join(1..10,30..40,50..60) 78 79 Then the top level feature would be of type 'CDS' from 1 to 60 (actually 0 80 to 60 in Python counting) with location_operator='join', and the three sub- 81 features would also be of type 'CDS', and would be from 1 to 10, 30 to 82 40 and 50 to 60, respectively (although actually using Python counting). 83 84 To get the nucleotide sequence for this CDS, you would need to take the 85 parent sequence and do seq[0:10]+seq[29:40]+seq[49:60] (Python counting). 86 Things are more complicated with strands and fuzzy positions. To save you 87 dealing with all these special cases, the SeqFeature provides an extract 88 method to do this for you. 89 """
90 - def __init__(self, location = None, type = '', location_operator = '', 91 strand = None, id = "<unknown id>", 92 qualifiers = None, sub_features = None, 93 ref = None, ref_db = None):
94 """Initialize a SeqFeature on a Sequence. 95 96 location can either be a FeatureLocation (with strand argument also 97 given if required), or None. 98 99 e.g. With no strand, on the forward strand, and on the reverse strand: 100 101 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 102 >>> f1 = SeqFeature(FeatureLocation(5,10), type="domain") 103 >>> f2 = SeqFeature(FeatureLocation(7,110), strand=1, type="CDS") 104 >>> f3 = SeqFeature(FeatureLocation(9,108), strand=-1, type="CDS") 105 106 An invalid strand will trigger an exception: 107 108 >>> f4 = SeqFeature(FeatureLocation(50,60), strand=2) 109 Traceback (most recent call last): 110 ... 111 ValueError: Strand should be +1, -1, 0 or None, not 2 112 113 For exact start/end positions, an integer can be used (as shown above) 114 as shorthand for the ExactPosition object. For non-exact locations, the 115 FeatureLocation must be specified via the appropriate position objects. 116 """ 117 if strand not in [-1, 0, 1, None] : 118 raise ValueError("Strand should be +1, -1, 0 or None, not %s" \ 119 % repr(strand)) 120 if location is not None and not isinstance(location, FeatureLocation): 121 raise TypeError("FeatureLocation (or None) required for the location") 122 self.location = location 123 124 self.type = type 125 self.location_operator = location_operator 126 self.strand = strand 127 self.id = id 128 if qualifiers is None: 129 qualifiers = {} 130 self.qualifiers = qualifiers 131 if sub_features is None: 132 sub_features = [] 133 self.sub_features = sub_features 134 self.ref = ref 135 self.ref_db = ref_db
136
137 - def __repr__(self):
138 """A string representation of the record for debugging.""" 139 answer = "%s(%s" % (self.__class__.__name__, repr(self.location)) 140 if self.type: 141 answer += ", type=%s" % repr(self.type) 142 if self.location_operator: 143 answer += ", location_operator=%s" % repr(self.location_operator) 144 if self.strand is not None: 145 answer += ", strand=%s" % repr(self.strand) 146 if self.id and self.id != "<unknown id>": 147 answer += ", id=%s" % repr(self.id) 148 if self.ref: 149 answer += ", ref=%s" % repr(self.ref) 150 if self.ref_db: 151 answer += ", ref_db=%s" % repr(self.ref_db) 152 answer += ")" 153 return answer
154
155 - def __str__(self):
156 """A readable summary of the feature intended to be printed to screen. 157 """ 158 out = "type: %s\n" % self.type 159 out += "location: %s\n" % self.location 160 if self.id and self.id != "<unknown id>": 161 out += "id: %s\n" % self.id 162 if self.ref or self.ref_db: 163 out += "ref: %s:%s\n" % (self.ref, self.ref_db) 164 out += "strand: %s\n" % self.strand 165 out += "qualifiers: \n" 166 for qual_key in sorted(self.qualifiers): 167 out += " Key: %s, Value: %s\n" % (qual_key, 168 self.qualifiers[qual_key]) 169 if len(self.sub_features) != 0: 170 out += "Sub-Features\n" 171 for sub_feature in self.sub_features: 172 out +="%s\n" % sub_feature 173 return out
174
175 - def _shift(self, offset):
176 """Returns a copy of the feature with its location shifted (PRIVATE). 177 178 The annotation qaulifiers are copied.""" 179 return SeqFeature(location = self.location._shift(offset), 180 type = self.type, 181 location_operator = self.location_operator, 182 strand = self.strand, 183 id = self.id, 184 qualifiers = dict(self.qualifiers.iteritems()), 185 sub_features = [f._shift(offset) for f in self.sub_features], 186 ref = self.ref, 187 ref_db = self.ref_db)
188
189 - def _flip(self, length):
190 """Returns a copy of the feature with its location flipped (PRIVATE). 191 192 The argument length gives the length of the parent sequence. For 193 example a location 0..20 (+1 strand) with parent length 30 becomes 194 after flipping 10..30 (-1 strand). Strandless (None) or unknown 195 strand (0) remain like that - just their end points are changed. 196 197 The annotation qaulifiers are copied. 198 """ 199 if self.strand == +1 : 200 new_strand = -1 201 elif self.strand == -1 : 202 new_strand = +1 203 else: 204 #When create new SeqFeature it will check this is 0 or None 205 new_strand = self.strand 206 return SeqFeature(location = self.location._flip(length), 207 type = self.type, 208 location_operator = self.location_operator, 209 strand = new_strand, 210 id = self.id, 211 qualifiers = dict(self.qualifiers.iteritems()), 212 sub_features = [f._flip(length) for f in self.sub_features[::-1]], 213 ref = self.ref, 214 ref_db = self.ref_db)
215
216 - def extract(self, parent_sequence):
217 """Extract feature sequence from the supplied parent sequence. 218 219 The parent_sequence can be a Seq like object or a string, and will 220 generally return an object of the same type. The exception to this is 221 a MutableSeq as the parent sequence will return a Seq object. 222 223 This should cope with complex locations including complements, joins 224 and fuzzy positions. Even mixed strand features should work! This 225 also covers features on protein sequences (e.g. domains), although 226 here reverse strand features are not permitted. 227 228 >>> from Bio.Seq import Seq 229 >>> from Bio.Alphabet import generic_protein 230 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 231 >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL", generic_protein) 232 >>> f = SeqFeature(FeatureLocation(8,15), type="domain") 233 >>> f.extract(seq) 234 Seq('VALIVIC', ProteinAlphabet()) 235 236 Note - currently only sub-features of type "join" are supported. 237 """ 238 if isinstance(parent_sequence, MutableSeq): 239 #This avoids complications with reverse complements 240 #(the MutableSeq reverse complement acts in situ) 241 parent_sequence = parent_sequence.toseq() 242 if self.sub_features: 243 if self.location_operator!="join": 244 raise ValueError(self.location_operator) 245 if self.strand == -1: 246 #This is a special case given how the GenBank parser works. 247 #Must avoid doing the reverse complement twice. 248 parts = [] 249 for f_sub in self.sub_features: 250 assert f_sub.strand==-1 251 parts.append(parent_sequence[f_sub.location.nofuzzy_start:\ 252 f_sub.location.nofuzzy_end]) 253 else: 254 #This copes with mixed strand features: 255 parts = [f_sub.extract(parent_sequence) \ 256 for f_sub in self.sub_features] 257 #We use addition rather than a join to avoid alphabet issues: 258 f_seq = parts[0] 259 for part in parts[1:] : f_seq += part 260 else: 261 f_seq = parent_sequence[self.location.nofuzzy_start:\ 262 self.location.nofuzzy_end] 263 if self.strand == -1: 264 #TODO - MutableSeq? 265 try: 266 f_seq = f_seq.reverse_complement() 267 except AttributeError: 268 assert isinstance(f_seq, str) 269 f_seq = reverse_complement(f_seq) 270 return f_seq
271
272 - def __nonzero__(self):
273 """Returns True regardless of the length of the feature. 274 275 This behaviour is for backwards compatibility, since until the 276 __len__ method was added, a SeqFeature always evaluated as True. 277 278 Note that in comparison, Seq objects, strings, lists, etc, will all 279 evaluate to False if they have length zero. 280 281 WARNING: The SeqFeature may in future evaluate to False when its 282 length is zero (in order to better match normal python behaviour)! 283 """ 284 return True
285
286 - def __len__(self):
287 """Returns the length of the region described by a feature. 288 289 >>> from Bio.Seq import Seq 290 >>> from Bio.Alphabet import generic_protein 291 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 292 >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL", generic_protein) 293 >>> f = SeqFeature(FeatureLocation(8,15), type="domain") 294 >>> len(f) 295 7 296 >>> f.extract(seq) 297 Seq('VALIVIC', ProteinAlphabet()) 298 >>> len(f.extract(seq)) 299 7 300 301 For simple features without subfeatures this is the same as the region 302 spanned (end position minus start position). However, for a feature 303 defined by combining several subfeatures (e.g. a CDS as the join of 304 several exons) the gaps are not counted (e.g. introns). This ensures 305 that len(f) == len(f.extract(parent_seq)), and also makes sure things 306 work properly with features wrapping the origin etc. 307 """ 308 if self.sub_features: 309 return sum(len(f) for f in self.sub_features) 310 else: 311 return len(self.location)
312
313 - def __iter__(self):
314 """Iterate over the parent positions within the feature. 315 316 The iteration order is strand aware, and can be thought of as moving 317 along the feature using the parent sequence coordinates: 318 319 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 320 >>> f = SeqFeature(FeatureLocation(5,10), type="domain", strand=-1) 321 >>> len(f) 322 5 323 >>> for i in f: print i 324 9 325 8 326 7 327 6 328 5 329 >>> list(f) 330 [9, 8, 7, 6, 5] 331 """ 332 if self.sub_features: 333 if self.strand == -1: 334 for f in self.sub_features[::-1]: 335 for i in f.location: 336 yield i 337 else: 338 for f in self.sub_features: 339 for i in f.location: 340 yield i 341 elif self.strand == -1: 342 for i in range(self.location.nofuzzy_end-1, 343 self.location.nofuzzy_start-1, -1): 344 yield i 345 else: 346 for i in range(self.location.nofuzzy_start, 347 self.location.nofuzzy_end): 348 yield i
349
350 - def __contains__(self, value):
351 """Check if an integer position is within the feature. 352 353 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 354 >>> f = SeqFeature(FeatureLocation(5,10), type="domain", strand=-1) 355 >>> len(f) 356 5 357 >>> [i for i in range(15) if i in f] 358 [5, 6, 7, 8, 9] 359 360 For example, to see which features include a SNP position, you could 361 use this: 362 363 >>> from Bio import SeqIO 364 >>> record = SeqIO.read("GenBank/NC_000932.gb", "gb") 365 >>> for f in record.features: 366 ... if 1750 in f: 367 ... print f.type, f.strand, f.location 368 source 1 [0:154478] 369 gene -1 [1716:4347] 370 tRNA -1 [1716:4347] 371 372 Note that for a feature defined as a join of several subfeatures (e.g. 373 the union of several exons) the gaps are not checked (e.g. introns). 374 In this example, the tRNA location is defined in the GenBank file as 375 complement(join(1717..1751,4311..4347)), so that position 1760 falls 376 in the gap: 377 378 >>> for f in record.features: 379 ... if 1760 in f: 380 ... print f.type, f.strand, f.location 381 source 1 [0:154478] 382 gene -1 [1716:4347] 383 384 Note that additional care may be required with fuzzy locations, for 385 example just before a BeforePosition: 386 387 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation 388 >>> from Bio.SeqFeature import BeforePosition 389 >>> f = SeqFeature(FeatureLocation(BeforePosition(3),8), type="domain") 390 >>> len(f) 391 5 392 >>> [i for i in range(10) if i in f] 393 [3, 4, 5, 6, 7] 394 """ 395 if not isinstance(value, int): 396 raise ValueError("Currently we only support checking for integer " 397 "positions being within a SeqFeature.") 398 if self.sub_features: 399 for f in self.sub_features: 400 if value in f: 401 return True 402 return False 403 else: 404 return value in self.location
405 406 # --- References 407 408 # TODO -- Will this hold PubMed and Medline information decently?
409 -class Reference(object):
410 """Represent a Generic Reference object. 411 412 Attributes: 413 o location - A list of Location objects specifying regions of 414 the sequence that the references correspond to. If no locations are 415 specified, the entire sequence is assumed. 416 o authors - A big old string, or a list split by author, of authors 417 for the reference. 418 o title - The title of the reference. 419 o journal - Journal the reference was published in. 420 o medline_id - A medline reference for the article. 421 o pubmed_id - A pubmed reference for the article. 422 o comment - A place to stick any comments about the reference. 423 """
424 - def __init__(self):
425 self.location = [] 426 self.authors = '' 427 self.consrtm = '' 428 self.title = '' 429 self.journal = '' 430 self.medline_id = '' 431 self.pubmed_id = '' 432 self.comment = ''
433
434 - def __str__(self):
435 """Output an informative string for debugging. 436 """ 437 out = "" 438 for single_location in self.location: 439 out += "location: %s\n" % single_location 440 out += "authors: %s\n" % self.authors 441 if self.consrtm: 442 out += "consrtm: %s\n" % self.consrtm 443 out += "title: %s\n" % self.title 444 out += "journal: %s\n" % self.journal 445 out += "medline id: %s\n" % self.medline_id 446 out += "pubmed id: %s\n" % self.pubmed_id 447 out += "comment: %s\n" % self.comment 448 return out
449
450 - def __repr__(self):
451 #TODO - Update this is __init__ later accpets values 452 return "%s(title=%s, ...)" % (self.__class__.__name__, 453 repr(self.title))
454 455 # --- Handling feature locations 456
457 -class FeatureLocation(object):
458 """Specify the location of a feature along a sequence. 459 460 This attempts to deal with fuzziness of position ends, but also 461 make it easy to get the start and end in the 'normal' case (no 462 fuzziness). 463 464 You should access the start and end attributes with 465 your_location.start and your_location.end. If the start and 466 end are exact, this will return the positions, if not, we'll return 467 the approriate Fuzzy class with info about the position and fuzziness. 468 469 Note that the start and end location numbering follow Python's scheme, 470 thus a GenBank entry of 123..150 (one based counting) becomes a location 471 of [122:150] (zero based counting). 472 """
473 - def __init__(self, start, end):
474 """Specify the start and end of a sequence feature. 475 476 start and end arguments specify the values where the feature begins 477 and ends. These can either by any of the *Position objects that 478 inherit from AbstractPosition, or can just be integers specifying the 479 position. In the case of integers, the values are assumed to be 480 exact and are converted in ExactPosition arguments. This is meant 481 to make it easy to deal with non-fuzzy ends. 482 483 i.e. Short form: 484 485 >>> from Bio.SeqFeature import FeatureLocation 486 >>> loc = FeatureLocation(5,10) 487 488 Explicit form: 489 490 >>> from Bio.SeqFeature import FeatureLocation, ExactPosition 491 >>> loc = FeatureLocation(ExactPosition(5),ExactPosition(10)) 492 493 Other fuzzy positions are used similarly, 494 495 >>> from Bio.SeqFeature import FeatureLocation 496 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 497 >>> loc2 = FeatureLocation(BeforePosition(5),AfterPosition(10)) 498 499 """ 500 if isinstance(start, AbstractPosition): 501 self._start = start 502 else: 503 self._start = ExactPosition(start) 504 505 if isinstance(end, AbstractPosition): 506 self._end = end 507 else: 508 self._end = ExactPosition(end)
509
510 - def __str__(self):
511 """Returns a representation of the location (with python counting). 512 513 For the simple case this uses the python splicing syntax, [122:150] 514 (zero based counting) which GenBank would call 123..150 (one based 515 counting). 516 """ 517 return "[%s:%s]" % (self._start, self._end)
518
519 - def __repr__(self):
520 """A string representation of the location for debugging.""" 521 return "%s(%s,%s)" \ 522 % (self.__class__.__name__, repr(self.start), repr(self.end))
523
524 - def __nonzero__(self):
525 """Returns True regardless of the length of the feature. 526 527 This behaviour is for backwards compatibility, since until the 528 __len__ method was added, a FeatureLocation always evaluated as True. 529 530 Note that in comparison, Seq objects, strings, lists, etc, will all 531 evaluate to False if they have length zero. 532 533 WARNING: The FeatureLocation may in future evaluate to False when its 534 length is zero (in order to better match normal python behaviour)! 535 """ 536 return True
537
538 - def __len__(self):
539 """Returns the length of the region described by the FeatureLocation. 540 541 Note that extra care may be needed for fuzzy locations, e.g. 542 543 >>> from Bio.SeqFeature import FeatureLocation 544 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 545 >>> loc = FeatureLocation(BeforePosition(5),AfterPosition(10)) 546 >>> len(loc) 547 5 548 """ 549 #TODO - Should we use nofuzzy_start and nofuzzy_end here? 550 return self._end.position + self._end.extension - self._start.position
551
552 - def __contains__(self, value):
553 """Check if an integer position is within the FeatureLocation. 554 555 Note that extra care may be needed for fuzzy locations, e.g. 556 557 >>> from Bio.SeqFeature import FeatureLocation 558 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 559 >>> loc = FeatureLocation(BeforePosition(5),AfterPosition(10)) 560 >>> len(loc) 561 5 562 >>> [i for i in range(15) if i in loc] 563 [5, 6, 7, 8, 9] 564 """ 565 if not isinstance(value, int): 566 raise ValueError("Currently we only support checking for integer " 567 "positions being within a FeatureLocation.") 568 #TODO - Should we use nofuzzy_start and nofuzzy_end here? 569 if value < self._start.position \ 570 or value >= self._end.position + self._end.extension: 571 return False 572 else: 573 return True
574
575 - def __iter__(self):
576 """Iterate over the parent positions within the FeatureLocation. 577 578 >>> from Bio.SeqFeature import FeatureLocation 579 >>> from Bio.SeqFeature import BeforePosition, AfterPosition 580 >>> loc = FeatureLocation(BeforePosition(5),AfterPosition(10)) 581 >>> len(loc) 582 5 583 >>> for i in loc: print i 584 5 585 6 586 7 587 8 588 9 589 >>> list(loc) 590 [5, 6, 7, 8, 9] 591 >>> [i for i in range(15) if i in loc] 592 [5, 6, 7, 8, 9] 593 """ 594 #TODO - Should we use nofuzzy_start and nofuzzy_end here? 595 for i in range(self._start.position, 596 self._end.position + self._end.extension): 597 yield i
598
599 - def _shift(self, offset):
600 """Returns a copy of the location shifted by the offset (PRIVATE).""" 601 return FeatureLocation(start = self._start._shift(offset), 602 end = self._end._shift(offset))
603
604 - def _flip(self, length):
605 """Returns a copy of the location after the parent is reversed (PRIVATE).""" 606 #Note this will flip the start and end too! 607 return FeatureLocation(start = self._end._flip(length), 608 end = self._start._flip(length))
609 610 start = property(fget= lambda self : self._start, 611 doc="Start location (possibly a fuzzy position, read only).") 612 613 end = property(fget= lambda self : self._end, 614 doc="End location (possibly a fuzzy position, read only).") 615 616 nofuzzy_start = property( 617 fget=lambda self: self._start.position, 618 doc="""Start position (integer, approximated if fuzzy, read only). 619 620 To get non-fuzzy attributes (ie. the position only) ask for 621 'location.nofuzzy_start', 'location.nofuzzy_end'. These should return 622 the largest range of the fuzzy position. So something like: 623 (10.20)..(30.40) should return 10 for start, and 40 for end. 624 """) 625 626 nofuzzy_end = property( 627 fget=lambda self: self._end.position + self._end.extension, 628 doc="""End position (integer, approximated if fuzzy, read only). 629 630 To get non-fuzzy attributes (ie. the position only) ask for 631 'location.nofuzzy_start', 'location.nofuzzy_end'. These should return 632 the largest range of the fuzzy position. So something like: 633 (10.20)..(30.40) should return 10 for start, and 40 for end. 634 """)
635 636
637 -class AbstractPosition(object):
638 """Abstract base class representing a position. 639 """
640 - def __init__(self, position, extension):
641 self.position = position 642 assert extension >= 0, extension 643 self.extension = extension
644
645 - def __repr__(self):
646 """String representation of the location for debugging.""" 647 return "%s(%s,%s)" % (self.__class__.__name__, \ 648 repr(self.position), repr(self.extension))
649
650 - def __hash__(self):
651 """Simple position based hash.""" 652 #Note __hash__ must be implemented on Python 3.x if overriding __eq__ 653 return hash(self.position)
654
655 - def __eq__(self, other):
656 """A simple equality for positions. 657 658 This is very simple-minded and just compares the position attribute 659 of the features; extensions are not considered at all. This could 660 potentially be expanded to try to take advantage of extensions. 661 """ 662 assert isinstance(other, AbstractPosition), \ 663 "We can only do comparisons between Biopython Position objects." 664 return self.position == other.position
665
666 - def __ne__(self, other):
667 """A simple non-equality for positions. 668 669 This is very simple-minded and just compares the position attribute 670 of the features; extensions are not considered at all. This could 671 potentially be expanded to try to take advantage of extensions. 672 """ 673 assert isinstance(other, AbstractPosition), \ 674 "We can only do comparisons between Biopython Position objects." 675 return self.position != other.position
676
677 - def __le__(self, other):
678 """A simple less than or equal for positions. 679 680 This is very simple-minded and just compares the position attribute 681 of the features; extensions are not considered at all. This could 682 potentially be expanded to try to take advantage of extensions. 683 """ 684 assert isinstance(other, AbstractPosition), \ 685 "We can only do comparisons between Biopython Position objects." 686 return self.position <= other.position
687
688 - def __lt__(self, other):
689 """A simple less than or equal for positions. 690 691 This is very simple-minded and just compares the position attribute 692 of the features; extensions are not considered at all. This could 693 potentially be expanded to try to take advantage of extensions. 694 """ 695 assert isinstance(other, AbstractPosition), \ 696 "We can only do comparisons between Biopython Position objects." 697 return self.position < other.position
698
699 - def __ge__(self, other):
700 """A simple less than or equal for positions. 701 702 This is very simple-minded and just compares the position attribute 703 of the features; extensions are not considered at all. This could 704 potentially be expanded to try to take advantage of extensions. 705 """ 706 assert isinstance(other, AbstractPosition), \ 707 "We can only do comparisons between Biopython Position objects." 708 return self.position >= other.position
709
710 - def __gt__(self, other):
711 """A simple less than or equal for positions. 712 713 This is very simple-minded and just compares the position attribute 714 of the features; extensions are not considered at all. This could 715 potentially be expanded to try to take advantage of extensions. 716 """ 717 assert isinstance(other, AbstractPosition), \ 718 "We can only do comparisons between Biopython Position objects." 719 return self.position > other.position
720
721 - def _shift(self, offset):
722 #We want this to maintain the subclass when called from a subclass 723 return self.__class__(self.position + offset, self.extension)
724
725 - def _flip(self, length):
726 #We want this to maintain the subclass when called from a subclass 727 return self.__class__(length - self.position - self.extension, 728 self.extension)
729 730
731 -class ExactPosition(AbstractPosition):
732 """Specify the specific position of a boundary. 733 734 o position - The position of the boundary. 735 o extension - An optional argument which must be zero since we don't 736 have an extension. The argument is provided so that the same number of 737 arguments can be passed to all position types. 738 739 In this case, there is no fuzziness associated with the position. 740 """
741 - def __init__(self, position, extension = 0):
742 if extension != 0: 743 raise AttributeError("Non-zero extension %s for exact position." 744 % extension) 745 AbstractPosition.__init__(self, position, 0)
746
747 - def __repr__(self):
748 """String representation of the ExactPosition location for debugging.""" 749 assert self.extension == 0 750 return "%s(%s)" % (self.__class__.__name__, repr(self.position))
751
752 - def __str__(self):
753 return str(self.position)
754
755 -class UncertainPosition(ExactPosition):
756 """Specify a specific position which is uncertain. 757 758 This is used in UniProt, e.g. ?222 for uncertain position 222, or in the 759 XML format explicitly marked as uncertain. Does not apply to GenBank/EMBL. 760 """ 761 pass
762
763 -class UnknownPosition(AbstractPosition):
764 """Specify a specific position which is unknown (has no position). 765 766 This is used in UniProt, e.g. ? or in the XML as unknown. 767 """
768 - def __init__(self):
769 self.position = None 770 self.extension = None 771 pass
772
773 - def __repr__(self):
774 """String representation of the UnknownPosition location for debugging.""" 775 return "%s()" % self.__class__.__name__
776
777 -class WithinPosition(AbstractPosition):
778 """Specify the position of a boundary within some coordinates. 779 780 Arguments: 781 o position - The start position of the boundary 782 o extension - The range to which the boundary can extend. 783 784 This allows dealing with a position like ((1.4)..100). This 785 indicates that the start of the sequence is somewhere between 1 786 and 4. To represent that with this class we would set position as 787 1 and extension as 3. 788 """
789 - def __init__(self, position, extension = 0):
790 AbstractPosition.__init__(self, position, extension)
791
792 - def __str__(self):
793 return "(%s.%s)" % (self.position, self.position + self.extension)
794 795
796 -class BetweenPosition(AbstractPosition):
797 """Specify the position of a boundary between two coordinates (OBSOLETE?). 798 799 Arguments: 800 o position - The start position of the boundary. 801 o extension - The range to the other position of a boundary. 802 803 This specifies a coordinate which is found between the two positions. 804 So this allows us to deal with a position like ((1^2)..100). To 805 represent that with this class we set position as 1 and the 806 extension as 1. 807 """
808 - def __init__(self, position, extension = 0):
809 AbstractPosition.__init__(self, position, extension)
810
811 - def __str__(self):
812 return "(%s^%s)" % (self.position, self.position + self.extension)
813 814
815 -class BeforePosition(AbstractPosition):
816 """Specify a position where the actual location occurs before it. 817 818 Arguments: 819 o position - The upper boundary of where the location can occur. 820 o extension - An optional argument which must be zero since we don't 821 have an extension. The argument is provided so that the same number of 822 arguments can be passed to all position types. 823 824 This is used to specify positions like (<10..100) where the location 825 occurs somewhere before position 10. 826 """
827 - def __init__(self, position, extension = 0):
828 if extension != 0: 829 raise AttributeError("Non-zero extension %s for exact position." 830 % extension) 831 AbstractPosition.__init__(self, position, 0)
832
833 - def __repr__(self):
834 """A string representation of the location for debugging.""" 835 assert self.extension == 0 836 return "%s(%s)" % (self.__class__.__name__, repr(self.position))
837
838 - def __str__(self):
839 return "<%s" % self.position
840
841 - def _flip(self, length):
842 return AfterPosition(length - self.position)
843
844 -class AfterPosition(AbstractPosition):
845 """Specify a position where the actual location is found after it. 846 847 Arguments: 848 o position - The lower boundary of where the location can occur. 849 o extension - An optional argument which must be zero since we don't 850 have an extension. The argument is provided so that the same number of 851 arguments can be passed to all position types. 852 853 This is used to specify positions like (>10..100) where the location 854 occurs somewhere after position 10. 855 """
856 - def __init__(self, position, extension = 0):
857 if extension != 0: 858 raise AttributeError("Non-zero extension %s for exact position." 859 % extension) 860 AbstractPosition.__init__(self, position, 0)
861
862 - def __repr__(self):
863 """A string representation of the location for debugging.""" 864 assert self.extension == 0 865 return "%s(%s)" % (self.__class__.__name__, repr(self.position))
866
867 - def __str__(self):
868 return ">%s" % self.position
869
870 - def _flip(self, length):
871 return BeforePosition(length - self.position)
872 873
874 -class OneOfPosition(AbstractPosition):
875 """Specify a position where the location can be multiple positions. 876 877 This models the GenBank 'one-of(1888,1901)' function, and tries 878 to make this fit within the Biopython Position models. In our case 879 the position of the "one-of" is set as the lowest choice, and the 880 extension is the range to the highest choice. 881 """
882 - def __init__(self, position_list):
883 """Initialize with a set of posssible positions. 884 885 position_list is a list of AbstractPosition derived objects, 886 specifying possible locations. 887 """ 888 # unique attribute for this type of positions 889 self.position_choices = position_list 890 # find the smallest and largest position in the choices 891 smallest = None 892 largest = None 893 for position_choice in self.position_choices: 894 assert isinstance(position_choice, AbstractPosition), \ 895 "Expected position objects, got %r" % position_choice 896 if smallest is None and largest is None: 897 smallest = position_choice.position 898 largest = position_choice.position 899 elif position_choice.position > largest: 900 largest = position_choice.position 901 elif position_choice.position < smallest: 902 smallest = position_choice.position 903 # initialize with our definition of position and extension 904 AbstractPosition.__init__(self, smallest, largest - smallest)
905
906 - def __repr__(self):
907 """String representation of the OneOfPosition location for debugging.""" 908 return "%s(%s)" % (self.__class__.__name__, \ 909 repr(self.position_choices))
910
911 - def __str__(self):
912 out = "one-of(" 913 for position in self.position_choices: 914 out += "%s," % position 915 # replace the last comma with the closing parenthesis 916 out = out[:-1] + ")" 917 return out
918
919 - def _shift(self, offset):
920 return self.__class__([position_choice._shift(offset) \ 921 for position_choice in self.position_choices])
922
923 - def _flip(self, length):
924 return OneOfPosition([p._flip(length) for p in self.position_choices[::-1]])
925 926
927 -class PositionGap(object):
928 """Simple class to hold information about a gap between positions. 929 """
930 - def __init__(self, gap_size):
931 """Intialize with a position object containing the gap information. 932 """ 933 self.gap_size = gap_size
934
935 - def __repr__(self):
936 """A string representation of the position gap for debugging.""" 937 return "%s(%s)" % (self.__class__.__name__, repr(self.gap_size))
938
939 - def __str__(self):
940 out = "gap(%s)" % self.gap_size 941 return out
942
943 -def _test():
944 """Run the Bio.SeqFeature module's doctests (PRIVATE). 945 946 This will try and locate the unit tests directory, and run the doctests 947 from there in order that the relative paths used in the examples work. 948 """ 949 import doctest 950 import os 951 if os.path.isdir(os.path.join("..","Tests")): 952 print "Runing doctests..." 953 cur_dir = os.path.abspath(os.curdir) 954 os.chdir(os.path.join("..","Tests")) 955 doctest.testmod() 956 os.chdir(cur_dir) 957 del cur_dir 958 print "Done" 959 elif os.path.isdir(os.path.join("Tests")) : 960 print "Runing doctests..." 961 cur_dir = os.path.abspath(os.curdir) 962 os.chdir(os.path.join("Tests")) 963 doctest.testmod() 964 os.chdir(cur_dir) 965 del cur_dir 966 print "Done"
967 968 969 if __name__ == "__main__": 970 _test() 971