1
2
3
4
5
6
7
8
9 """Represent a Sequence Feature holding info about a part of a sequence.
10
11 This is heavily modeled after the Biocorba SeqFeature objects, and
12 may be pretty biased towards GenBank stuff since I'm writing it
13 for the GenBank parser output...
14
15 What's here:
16
17 Base class to hold a Feature.
18 ----------------------------
19 classes:
20 o SeqFeature
21
22 Hold information about a Reference.
23 ----------------------------------
24
25 This is an attempt to create a General class to hold Reference type
26 information.
27
28 classes:
29 o Reference
30
31 Specify locations of a feature on a Sequence.
32 ---------------------------------------------
33
34 This aims to handle, in Ewan's words, 'the dreaded fuzziness issue' in
35 much the same way as Biocorba. This has the advantages of allowing us
36 to handle fuzzy stuff in case anyone needs it, and also be compatible
37 with Biocorba.
38
39 classes:
40 o FeatureLocation - Specify the start and end location of a feature.
41
42 o ExactPosition - Specify the position as being exact.
43 o WithinPosition - Specify a position occuring within some range.
44 o BetweenPosition - Specify a position occuring between a range (OBSOLETE?).
45 o BeforePosition - Specify the position as being found before some base.
46 o AfterPosition - Specify the position as being found after some base.
47 o OneOfPosition - Specify a position where the location can be multiple positions.
48 """
49
50 from Bio.Seq import MutableSeq, reverse_complement
51
53 """Represent a Sequence Feature on an object.
54
55 Attributes:
56 o location - the location of the feature on the sequence (FeatureLocation)
57 o type - the specified type of the feature (ie. CDS, exon, repeat...)
58 o location_operator - a string specifying how this SeqFeature may
59 be related to others. For example, in the example GenBank feature
60 shown below, the location_operator would be "join"
61 o strand - A value specifying on which strand (of a DNA sequence, for
62 instance) the feature deals with. 1 indicates the plus strand, -1
63 indicates the minus strand, 0 indicates stranded but unknown (? in GFF3),
64 while the default of None indicates that strand doesn't apply (dot in GFF3,
65 e.g. features on proteins)
66 o id - A string identifier for the feature.
67 o ref - A reference to another sequence. This could be an accession
68 number for some different sequence.
69 o ref_db - A different database for the reference accession number.
70 o qualifiers - A dictionary of qualifiers on the feature. These are
71 analagous to the qualifiers from a GenBank feature table. The keys of
72 the dictionary are qualifier names, the values are the qualifier
73 values.
74 o sub_features - Additional SeqFeatures which fall under this 'parent'
75 feature. For instance, if we having something like:
76
77 CDS join(1..10,30..40,50..60)
78
79 Then the top level feature would be of type 'CDS' from 1 to 60 (actually 0
80 to 60 in Python counting) with location_operator='join', and the three sub-
81 features would also be of type 'CDS', and would be from 1 to 10, 30 to
82 40 and 50 to 60, respectively (although actually using Python counting).
83
84 To get the nucleotide sequence for this CDS, you would need to take the
85 parent sequence and do seq[0:10]+seq[29:40]+seq[49:60] (Python counting).
86 Things are more complicated with strands and fuzzy positions. To save you
87 dealing with all these special cases, the SeqFeature provides an extract
88 method to do this for you.
89 """
90 - def __init__(self, location = None, type = '', location_operator = '',
91 strand = None, id = "<unknown id>",
92 qualifiers = None, sub_features = None,
93 ref = None, ref_db = None):
94 """Initialize a SeqFeature on a Sequence.
95
96 location can either be a FeatureLocation (with strand argument also
97 given if required), or None.
98
99 e.g. With no strand, on the forward strand, and on the reverse strand:
100
101 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
102 >>> f1 = SeqFeature(FeatureLocation(5,10), type="domain")
103 >>> f2 = SeqFeature(FeatureLocation(7,110), strand=1, type="CDS")
104 >>> f3 = SeqFeature(FeatureLocation(9,108), strand=-1, type="CDS")
105
106 An invalid strand will trigger an exception:
107
108 >>> f4 = SeqFeature(FeatureLocation(50,60), strand=2)
109 Traceback (most recent call last):
110 ...
111 ValueError: Strand should be +1, -1, 0 or None, not 2
112
113 For exact start/end positions, an integer can be used (as shown above)
114 as shorthand for the ExactPosition object. For non-exact locations, the
115 FeatureLocation must be specified via the appropriate position objects.
116 """
117 if strand not in [-1, 0, 1, None] :
118 raise ValueError("Strand should be +1, -1, 0 or None, not %s" \
119 % repr(strand))
120 if location is not None and not isinstance(location, FeatureLocation):
121 raise TypeError("FeatureLocation (or None) required for the location")
122 self.location = location
123
124 self.type = type
125 self.location_operator = location_operator
126 self.strand = strand
127 self.id = id
128 if qualifiers is None:
129 qualifiers = {}
130 self.qualifiers = qualifiers
131 if sub_features is None:
132 sub_features = []
133 self.sub_features = sub_features
134 self.ref = ref
135 self.ref_db = ref_db
136
138 """A string representation of the record for debugging."""
139 answer = "%s(%s" % (self.__class__.__name__, repr(self.location))
140 if self.type:
141 answer += ", type=%s" % repr(self.type)
142 if self.location_operator:
143 answer += ", location_operator=%s" % repr(self.location_operator)
144 if self.strand is not None:
145 answer += ", strand=%s" % repr(self.strand)
146 if self.id and self.id != "<unknown id>":
147 answer += ", id=%s" % repr(self.id)
148 if self.ref:
149 answer += ", ref=%s" % repr(self.ref)
150 if self.ref_db:
151 answer += ", ref_db=%s" % repr(self.ref_db)
152 answer += ")"
153 return answer
154
156 """A readable summary of the feature intended to be printed to screen.
157 """
158 out = "type: %s\n" % self.type
159 out += "location: %s\n" % self.location
160 if self.id and self.id != "<unknown id>":
161 out += "id: %s\n" % self.id
162 if self.ref or self.ref_db:
163 out += "ref: %s:%s\n" % (self.ref, self.ref_db)
164 out += "strand: %s\n" % self.strand
165 out += "qualifiers: \n"
166 for qual_key in sorted(self.qualifiers):
167 out += " Key: %s, Value: %s\n" % (qual_key,
168 self.qualifiers[qual_key])
169 if len(self.sub_features) != 0:
170 out += "Sub-Features\n"
171 for sub_feature in self.sub_features:
172 out +="%s\n" % sub_feature
173 return out
174
176 """Returns a copy of the feature with its location shifted (PRIVATE).
177
178 The annotation qaulifiers are copied."""
179 return SeqFeature(location = self.location._shift(offset),
180 type = self.type,
181 location_operator = self.location_operator,
182 strand = self.strand,
183 id = self.id,
184 qualifiers = dict(self.qualifiers.iteritems()),
185 sub_features = [f._shift(offset) for f in self.sub_features],
186 ref = self.ref,
187 ref_db = self.ref_db)
188
189 - def _flip(self, length):
190 """Returns a copy of the feature with its location flipped (PRIVATE).
191
192 The argument length gives the length of the parent sequence. For
193 example a location 0..20 (+1 strand) with parent length 30 becomes
194 after flipping 10..30 (-1 strand). Strandless (None) or unknown
195 strand (0) remain like that - just their end points are changed.
196
197 The annotation qaulifiers are copied.
198 """
199 if self.strand == +1 :
200 new_strand = -1
201 elif self.strand == -1 :
202 new_strand = +1
203 else:
204
205 new_strand = self.strand
206 return SeqFeature(location = self.location._flip(length),
207 type = self.type,
208 location_operator = self.location_operator,
209 strand = new_strand,
210 id = self.id,
211 qualifiers = dict(self.qualifiers.iteritems()),
212 sub_features = [f._flip(length) for f in self.sub_features[::-1]],
213 ref = self.ref,
214 ref_db = self.ref_db)
215
217 """Extract feature sequence from the supplied parent sequence.
218
219 The parent_sequence can be a Seq like object or a string, and will
220 generally return an object of the same type. The exception to this is
221 a MutableSeq as the parent sequence will return a Seq object.
222
223 This should cope with complex locations including complements, joins
224 and fuzzy positions. Even mixed strand features should work! This
225 also covers features on protein sequences (e.g. domains), although
226 here reverse strand features are not permitted.
227
228 >>> from Bio.Seq import Seq
229 >>> from Bio.Alphabet import generic_protein
230 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
231 >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL", generic_protein)
232 >>> f = SeqFeature(FeatureLocation(8,15), type="domain")
233 >>> f.extract(seq)
234 Seq('VALIVIC', ProteinAlphabet())
235
236 Note - currently only sub-features of type "join" are supported.
237 """
238 if isinstance(parent_sequence, MutableSeq):
239
240
241 parent_sequence = parent_sequence.toseq()
242 if self.sub_features:
243 if self.location_operator!="join":
244 raise ValueError(self.location_operator)
245 if self.strand == -1:
246
247
248 parts = []
249 for f_sub in self.sub_features:
250 assert f_sub.strand==-1
251 parts.append(parent_sequence[f_sub.location.nofuzzy_start:\
252 f_sub.location.nofuzzy_end])
253 else:
254
255 parts = [f_sub.extract(parent_sequence) \
256 for f_sub in self.sub_features]
257
258 f_seq = parts[0]
259 for part in parts[1:] : f_seq += part
260 else:
261 f_seq = parent_sequence[self.location.nofuzzy_start:\
262 self.location.nofuzzy_end]
263 if self.strand == -1:
264
265 try:
266 f_seq = f_seq.reverse_complement()
267 except AttributeError:
268 assert isinstance(f_seq, str)
269 f_seq = reverse_complement(f_seq)
270 return f_seq
271
273 """Returns True regardless of the length of the feature.
274
275 This behaviour is for backwards compatibility, since until the
276 __len__ method was added, a SeqFeature always evaluated as True.
277
278 Note that in comparison, Seq objects, strings, lists, etc, will all
279 evaluate to False if they have length zero.
280
281 WARNING: The SeqFeature may in future evaluate to False when its
282 length is zero (in order to better match normal python behaviour)!
283 """
284 return True
285
287 """Returns the length of the region described by a feature.
288
289 >>> from Bio.Seq import Seq
290 >>> from Bio.Alphabet import generic_protein
291 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
292 >>> seq = Seq("MKQHKAMIVALIVICITAVVAAL", generic_protein)
293 >>> f = SeqFeature(FeatureLocation(8,15), type="domain")
294 >>> len(f)
295 7
296 >>> f.extract(seq)
297 Seq('VALIVIC', ProteinAlphabet())
298 >>> len(f.extract(seq))
299 7
300
301 For simple features without subfeatures this is the same as the region
302 spanned (end position minus start position). However, for a feature
303 defined by combining several subfeatures (e.g. a CDS as the join of
304 several exons) the gaps are not counted (e.g. introns). This ensures
305 that len(f) == len(f.extract(parent_seq)), and also makes sure things
306 work properly with features wrapping the origin etc.
307 """
308 if self.sub_features:
309 return sum(len(f) for f in self.sub_features)
310 else:
311 return len(self.location)
312
314 """Iterate over the parent positions within the feature.
315
316 The iteration order is strand aware, and can be thought of as moving
317 along the feature using the parent sequence coordinates:
318
319 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
320 >>> f = SeqFeature(FeatureLocation(5,10), type="domain", strand=-1)
321 >>> len(f)
322 5
323 >>> for i in f: print i
324 9
325 8
326 7
327 6
328 5
329 >>> list(f)
330 [9, 8, 7, 6, 5]
331 """
332 if self.sub_features:
333 if self.strand == -1:
334 for f in self.sub_features[::-1]:
335 for i in f.location:
336 yield i
337 else:
338 for f in self.sub_features:
339 for i in f.location:
340 yield i
341 elif self.strand == -1:
342 for i in range(self.location.nofuzzy_end-1,
343 self.location.nofuzzy_start-1, -1):
344 yield i
345 else:
346 for i in range(self.location.nofuzzy_start,
347 self.location.nofuzzy_end):
348 yield i
349
351 """Check if an integer position is within the feature.
352
353 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
354 >>> f = SeqFeature(FeatureLocation(5,10), type="domain", strand=-1)
355 >>> len(f)
356 5
357 >>> [i for i in range(15) if i in f]
358 [5, 6, 7, 8, 9]
359
360 For example, to see which features include a SNP position, you could
361 use this:
362
363 >>> from Bio import SeqIO
364 >>> record = SeqIO.read("GenBank/NC_000932.gb", "gb")
365 >>> for f in record.features:
366 ... if 1750 in f:
367 ... print f.type, f.strand, f.location
368 source 1 [0:154478]
369 gene -1 [1716:4347]
370 tRNA -1 [1716:4347]
371
372 Note that for a feature defined as a join of several subfeatures (e.g.
373 the union of several exons) the gaps are not checked (e.g. introns).
374 In this example, the tRNA location is defined in the GenBank file as
375 complement(join(1717..1751,4311..4347)), so that position 1760 falls
376 in the gap:
377
378 >>> for f in record.features:
379 ... if 1760 in f:
380 ... print f.type, f.strand, f.location
381 source 1 [0:154478]
382 gene -1 [1716:4347]
383
384 Note that additional care may be required with fuzzy locations, for
385 example just before a BeforePosition:
386
387 >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
388 >>> from Bio.SeqFeature import BeforePosition
389 >>> f = SeqFeature(FeatureLocation(BeforePosition(3),8), type="domain")
390 >>> len(f)
391 5
392 >>> [i for i in range(10) if i in f]
393 [3, 4, 5, 6, 7]
394 """
395 if not isinstance(value, int):
396 raise ValueError("Currently we only support checking for integer "
397 "positions being within a SeqFeature.")
398 if self.sub_features:
399 for f in self.sub_features:
400 if value in f:
401 return True
402 return False
403 else:
404 return value in self.location
405
406
407
408
410 """Represent a Generic Reference object.
411
412 Attributes:
413 o location - A list of Location objects specifying regions of
414 the sequence that the references correspond to. If no locations are
415 specified, the entire sequence is assumed.
416 o authors - A big old string, or a list split by author, of authors
417 for the reference.
418 o title - The title of the reference.
419 o journal - Journal the reference was published in.
420 o medline_id - A medline reference for the article.
421 o pubmed_id - A pubmed reference for the article.
422 o comment - A place to stick any comments about the reference.
423 """
433
435 """Output an informative string for debugging.
436 """
437 out = ""
438 for single_location in self.location:
439 out += "location: %s\n" % single_location
440 out += "authors: %s\n" % self.authors
441 if self.consrtm:
442 out += "consrtm: %s\n" % self.consrtm
443 out += "title: %s\n" % self.title
444 out += "journal: %s\n" % self.journal
445 out += "medline id: %s\n" % self.medline_id
446 out += "pubmed id: %s\n" % self.pubmed_id
447 out += "comment: %s\n" % self.comment
448 return out
449
451
452 return "%s(title=%s, ...)" % (self.__class__.__name__,
453 repr(self.title))
454
455
456
458 """Specify the location of a feature along a sequence.
459
460 This attempts to deal with fuzziness of position ends, but also
461 make it easy to get the start and end in the 'normal' case (no
462 fuzziness).
463
464 You should access the start and end attributes with
465 your_location.start and your_location.end. If the start and
466 end are exact, this will return the positions, if not, we'll return
467 the approriate Fuzzy class with info about the position and fuzziness.
468
469 Note that the start and end location numbering follow Python's scheme,
470 thus a GenBank entry of 123..150 (one based counting) becomes a location
471 of [122:150] (zero based counting).
472 """
474 """Specify the start and end of a sequence feature.
475
476 start and end arguments specify the values where the feature begins
477 and ends. These can either by any of the *Position objects that
478 inherit from AbstractPosition, or can just be integers specifying the
479 position. In the case of integers, the values are assumed to be
480 exact and are converted in ExactPosition arguments. This is meant
481 to make it easy to deal with non-fuzzy ends.
482
483 i.e. Short form:
484
485 >>> from Bio.SeqFeature import FeatureLocation
486 >>> loc = FeatureLocation(5,10)
487
488 Explicit form:
489
490 >>> from Bio.SeqFeature import FeatureLocation, ExactPosition
491 >>> loc = FeatureLocation(ExactPosition(5),ExactPosition(10))
492
493 Other fuzzy positions are used similarly,
494
495 >>> from Bio.SeqFeature import FeatureLocation
496 >>> from Bio.SeqFeature import BeforePosition, AfterPosition
497 >>> loc2 = FeatureLocation(BeforePosition(5),AfterPosition(10))
498
499 """
500 if isinstance(start, AbstractPosition):
501 self._start = start
502 else:
503 self._start = ExactPosition(start)
504
505 if isinstance(end, AbstractPosition):
506 self._end = end
507 else:
508 self._end = ExactPosition(end)
509
511 """Returns a representation of the location (with python counting).
512
513 For the simple case this uses the python splicing syntax, [122:150]
514 (zero based counting) which GenBank would call 123..150 (one based
515 counting).
516 """
517 return "[%s:%s]" % (self._start, self._end)
518
520 """A string representation of the location for debugging."""
521 return "%s(%s,%s)" \
522 % (self.__class__.__name__, repr(self.start), repr(self.end))
523
525 """Returns True regardless of the length of the feature.
526
527 This behaviour is for backwards compatibility, since until the
528 __len__ method was added, a FeatureLocation always evaluated as True.
529
530 Note that in comparison, Seq objects, strings, lists, etc, will all
531 evaluate to False if they have length zero.
532
533 WARNING: The FeatureLocation may in future evaluate to False when its
534 length is zero (in order to better match normal python behaviour)!
535 """
536 return True
537
539 """Returns the length of the region described by the FeatureLocation.
540
541 Note that extra care may be needed for fuzzy locations, e.g.
542
543 >>> from Bio.SeqFeature import FeatureLocation
544 >>> from Bio.SeqFeature import BeforePosition, AfterPosition
545 >>> loc = FeatureLocation(BeforePosition(5),AfterPosition(10))
546 >>> len(loc)
547 5
548 """
549
550 return self._end.position + self._end.extension - self._start.position
551
553 """Check if an integer position is within the FeatureLocation.
554
555 Note that extra care may be needed for fuzzy locations, e.g.
556
557 >>> from Bio.SeqFeature import FeatureLocation
558 >>> from Bio.SeqFeature import BeforePosition, AfterPosition
559 >>> loc = FeatureLocation(BeforePosition(5),AfterPosition(10))
560 >>> len(loc)
561 5
562 >>> [i for i in range(15) if i in loc]
563 [5, 6, 7, 8, 9]
564 """
565 if not isinstance(value, int):
566 raise ValueError("Currently we only support checking for integer "
567 "positions being within a FeatureLocation.")
568
569 if value < self._start.position \
570 or value >= self._end.position + self._end.extension:
571 return False
572 else:
573 return True
574
576 """Iterate over the parent positions within the FeatureLocation.
577
578 >>> from Bio.SeqFeature import FeatureLocation
579 >>> from Bio.SeqFeature import BeforePosition, AfterPosition
580 >>> loc = FeatureLocation(BeforePosition(5),AfterPosition(10))
581 >>> len(loc)
582 5
583 >>> for i in loc: print i
584 5
585 6
586 7
587 8
588 9
589 >>> list(loc)
590 [5, 6, 7, 8, 9]
591 >>> [i for i in range(15) if i in loc]
592 [5, 6, 7, 8, 9]
593 """
594
595 for i in range(self._start.position,
596 self._end.position + self._end.extension):
597 yield i
598
603
604 - def _flip(self, length):
609
610 start = property(fget= lambda self : self._start,
611 doc="Start location (possibly a fuzzy position, read only).")
612
613 end = property(fget= lambda self : self._end,
614 doc="End location (possibly a fuzzy position, read only).")
615
616 nofuzzy_start = property(
617 fget=lambda self: self._start.position,
618 doc="""Start position (integer, approximated if fuzzy, read only).
619
620 To get non-fuzzy attributes (ie. the position only) ask for
621 'location.nofuzzy_start', 'location.nofuzzy_end'. These should return
622 the largest range of the fuzzy position. So something like:
623 (10.20)..(30.40) should return 10 for start, and 40 for end.
624 """)
625
626 nofuzzy_end = property(
627 fget=lambda self: self._end.position + self._end.extension,
628 doc="""End position (integer, approximated if fuzzy, read only).
629
630 To get non-fuzzy attributes (ie. the position only) ask for
631 'location.nofuzzy_start', 'location.nofuzzy_end'. These should return
632 the largest range of the fuzzy position. So something like:
633 (10.20)..(30.40) should return 10 for start, and 40 for end.
634 """)
635
636
638 """Abstract base class representing a position.
639 """
640 - def __init__(self, position, extension):
641 self.position = position
642 assert extension >= 0, extension
643 self.extension = extension
644
646 """String representation of the location for debugging."""
647 return "%s(%s,%s)" % (self.__class__.__name__, \
648 repr(self.position), repr(self.extension))
649
651 """Simple position based hash."""
652
653 return hash(self.position)
654
656 """A simple equality for positions.
657
658 This is very simple-minded and just compares the position attribute
659 of the features; extensions are not considered at all. This could
660 potentially be expanded to try to take advantage of extensions.
661 """
662 assert isinstance(other, AbstractPosition), \
663 "We can only do comparisons between Biopython Position objects."
664 return self.position == other.position
665
667 """A simple non-equality for positions.
668
669 This is very simple-minded and just compares the position attribute
670 of the features; extensions are not considered at all. This could
671 potentially be expanded to try to take advantage of extensions.
672 """
673 assert isinstance(other, AbstractPosition), \
674 "We can only do comparisons between Biopython Position objects."
675 return self.position != other.position
676
678 """A simple less than or equal for positions.
679
680 This is very simple-minded and just compares the position attribute
681 of the features; extensions are not considered at all. This could
682 potentially be expanded to try to take advantage of extensions.
683 """
684 assert isinstance(other, AbstractPosition), \
685 "We can only do comparisons between Biopython Position objects."
686 return self.position <= other.position
687
689 """A simple less than or equal for positions.
690
691 This is very simple-minded and just compares the position attribute
692 of the features; extensions are not considered at all. This could
693 potentially be expanded to try to take advantage of extensions.
694 """
695 assert isinstance(other, AbstractPosition), \
696 "We can only do comparisons between Biopython Position objects."
697 return self.position < other.position
698
700 """A simple less than or equal for positions.
701
702 This is very simple-minded and just compares the position attribute
703 of the features; extensions are not considered at all. This could
704 potentially be expanded to try to take advantage of extensions.
705 """
706 assert isinstance(other, AbstractPosition), \
707 "We can only do comparisons between Biopython Position objects."
708 return self.position >= other.position
709
711 """A simple less than or equal for positions.
712
713 This is very simple-minded and just compares the position attribute
714 of the features; extensions are not considered at all. This could
715 potentially be expanded to try to take advantage of extensions.
716 """
717 assert isinstance(other, AbstractPosition), \
718 "We can only do comparisons between Biopython Position objects."
719 return self.position > other.position
720
722
723 return self.__class__(self.position + offset, self.extension)
724
725 - def _flip(self, length):
726
727 return self.__class__(length - self.position - self.extension,
728 self.extension)
729
730
732 """Specify the specific position of a boundary.
733
734 o position - The position of the boundary.
735 o extension - An optional argument which must be zero since we don't
736 have an extension. The argument is provided so that the same number of
737 arguments can be passed to all position types.
738
739 In this case, there is no fuzziness associated with the position.
740 """
741 - def __init__(self, position, extension = 0):
742 if extension != 0:
743 raise AttributeError("Non-zero extension %s for exact position."
744 % extension)
745 AbstractPosition.__init__(self, position, 0)
746
748 """String representation of the ExactPosition location for debugging."""
749 assert self.extension == 0
750 return "%s(%s)" % (self.__class__.__name__, repr(self.position))
751
753 return str(self.position)
754
756 """Specify a specific position which is uncertain.
757
758 This is used in UniProt, e.g. ?222 for uncertain position 222, or in the
759 XML format explicitly marked as uncertain. Does not apply to GenBank/EMBL.
760 """
761 pass
762
764 """Specify a specific position which is unknown (has no position).
765
766 This is used in UniProt, e.g. ? or in the XML as unknown.
767 """
769 self.position = None
770 self.extension = None
771 pass
772
774 """String representation of the UnknownPosition location for debugging."""
775 return "%s()" % self.__class__.__name__
776
778 """Specify the position of a boundary within some coordinates.
779
780 Arguments:
781 o position - The start position of the boundary
782 o extension - The range to which the boundary can extend.
783
784 This allows dealing with a position like ((1.4)..100). This
785 indicates that the start of the sequence is somewhere between 1
786 and 4. To represent that with this class we would set position as
787 1 and extension as 3.
788 """
789 - def __init__(self, position, extension = 0):
791
793 return "(%s.%s)" % (self.position, self.position + self.extension)
794
795
797 """Specify the position of a boundary between two coordinates (OBSOLETE?).
798
799 Arguments:
800 o position - The start position of the boundary.
801 o extension - The range to the other position of a boundary.
802
803 This specifies a coordinate which is found between the two positions.
804 So this allows us to deal with a position like ((1^2)..100). To
805 represent that with this class we set position as 1 and the
806 extension as 1.
807 """
808 - def __init__(self, position, extension = 0):
810
812 return "(%s^%s)" % (self.position, self.position + self.extension)
813
814
816 """Specify a position where the actual location occurs before it.
817
818 Arguments:
819 o position - The upper boundary of where the location can occur.
820 o extension - An optional argument which must be zero since we don't
821 have an extension. The argument is provided so that the same number of
822 arguments can be passed to all position types.
823
824 This is used to specify positions like (<10..100) where the location
825 occurs somewhere before position 10.
826 """
827 - def __init__(self, position, extension = 0):
828 if extension != 0:
829 raise AttributeError("Non-zero extension %s for exact position."
830 % extension)
831 AbstractPosition.__init__(self, position, 0)
832
834 """A string representation of the location for debugging."""
835 assert self.extension == 0
836 return "%s(%s)" % (self.__class__.__name__, repr(self.position))
837
839 return "<%s" % self.position
840
841 - def _flip(self, length):
843
845 """Specify a position where the actual location is found after it.
846
847 Arguments:
848 o position - The lower boundary of where the location can occur.
849 o extension - An optional argument which must be zero since we don't
850 have an extension. The argument is provided so that the same number of
851 arguments can be passed to all position types.
852
853 This is used to specify positions like (>10..100) where the location
854 occurs somewhere after position 10.
855 """
856 - def __init__(self, position, extension = 0):
857 if extension != 0:
858 raise AttributeError("Non-zero extension %s for exact position."
859 % extension)
860 AbstractPosition.__init__(self, position, 0)
861
863 """A string representation of the location for debugging."""
864 assert self.extension == 0
865 return "%s(%s)" % (self.__class__.__name__, repr(self.position))
866
868 return ">%s" % self.position
869
870 - def _flip(self, length):
872
873
875 """Specify a position where the location can be multiple positions.
876
877 This models the GenBank 'one-of(1888,1901)' function, and tries
878 to make this fit within the Biopython Position models. In our case
879 the position of the "one-of" is set as the lowest choice, and the
880 extension is the range to the highest choice.
881 """
883 """Initialize with a set of posssible positions.
884
885 position_list is a list of AbstractPosition derived objects,
886 specifying possible locations.
887 """
888
889 self.position_choices = position_list
890
891 smallest = None
892 largest = None
893 for position_choice in self.position_choices:
894 assert isinstance(position_choice, AbstractPosition), \
895 "Expected position objects, got %r" % position_choice
896 if smallest is None and largest is None:
897 smallest = position_choice.position
898 largest = position_choice.position
899 elif position_choice.position > largest:
900 largest = position_choice.position
901 elif position_choice.position < smallest:
902 smallest = position_choice.position
903
904 AbstractPosition.__init__(self, smallest, largest - smallest)
905
907 """String representation of the OneOfPosition location for debugging."""
908 return "%s(%s)" % (self.__class__.__name__, \
909 repr(self.position_choices))
910
912 out = "one-of("
913 for position in self.position_choices:
914 out += "%s," % position
915
916 out = out[:-1] + ")"
917 return out
918
920 return self.__class__([position_choice._shift(offset) \
921 for position_choice in self.position_choices])
922
923 - def _flip(self, length):
925
926
928 """Simple class to hold information about a gap between positions.
929 """
931 """Intialize with a position object containing the gap information.
932 """
933 self.gap_size = gap_size
934
936 """A string representation of the position gap for debugging."""
937 return "%s(%s)" % (self.__class__.__name__, repr(self.gap_size))
938
940 out = "gap(%s)" % self.gap_size
941 return out
942
944 """Run the Bio.SeqFeature module's doctests (PRIVATE).
945
946 This will try and locate the unit tests directory, and run the doctests
947 from there in order that the relative paths used in the examples work.
948 """
949 import doctest
950 import os
951 if os.path.isdir(os.path.join("..","Tests")):
952 print "Runing doctests..."
953 cur_dir = os.path.abspath(os.curdir)
954 os.chdir(os.path.join("..","Tests"))
955 doctest.testmod()
956 os.chdir(cur_dir)
957 del cur_dir
958 print "Done"
959 elif os.path.isdir(os.path.join("Tests")) :
960 print "Runing doctests..."
961 cur_dir = os.path.abspath(os.curdir)
962 os.chdir(os.path.join("Tests"))
963 doctest.testmod()
964 os.chdir(cur_dir)
965 del cur_dir
966 print "Done"
967
968
969 if __name__ == "__main__":
970 _test()
971