Package Bio :: Package Phylo :: Module BaseTree
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.BaseTree

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Base classes for Bio.Phylo objects. 
  7   
  8  All object representations for phylogenetic trees should derive from these base 
  9  classes in order to use the common methods defined on them. 
 10  """ 
 11  __docformat__ = "restructuredtext en" 
 12   
 13  import collections 
 14  import copy 
 15  import itertools 
 16  import random 
 17  import re 
 18   
 19  from Bio.Phylo import _sugar 
20 21 # General tree-traversal algorithms 22 23 -def _level_traverse(root, get_children):
24 """Traverse a tree in breadth-first (level) order.""" 25 Q = collections.deque([root]) 26 while Q: 27 v = Q.popleft() 28 yield v 29 Q.extend(get_children(v))
30
31 -def _preorder_traverse(root, get_children):
32 """Traverse a tree in depth-first pre-order (parent before children).""" 33 def dfs(elem): 34 yield elem 35 for v in get_children(elem): 36 for u in dfs(v): 37 yield u
38 for elem in dfs(root): 39 yield elem 40
41 -def _postorder_traverse(root, get_children):
42 """Traverse a tree in depth-first post-order (children before parent).""" 43 def dfs(elem): 44 for v in get_children(elem): 45 for u in dfs(v): 46 yield u 47 yield elem
48 for elem in dfs(root): 49 yield elem 50
51 52 -def _sorted_attrs(elem):
53 """Get a flat list of elem's attributes, sorted for consistency.""" 54 singles = [] 55 lists = [] 56 # Sort attributes for consistent results 57 for attrname, child in sorted(elem.__dict__.iteritems(), 58 key=lambda kv: kv[0]): 59 if child is None: 60 continue 61 if isinstance(child, list): 62 lists.extend(child) 63 else: 64 singles.append(child) 65 return (x for x in singles + lists 66 if isinstance(x, TreeElement))
67
68 69 # Factory functions to generalize searching for clades/nodes 70 71 -def _identity_matcher(target):
72 """Match a node to the target object by identity.""" 73 def match(node): 74 return (node is target)
75 return match 76
77 -def _class_matcher(target_cls):
78 """Match a node if it's an instance of the given class.""" 79 def match(node): 80 return isinstance(node, target_cls)
81 return match 82
83 -def _string_matcher(target):
84 def match(node): 85 return unicode(node) == target
86 return match 87
88 -def _attribute_matcher(kwargs):
89 """Match a node by specified attribute values. 90 91 ``terminal`` is a special case: True restricts the search to external (leaf) 92 nodes, False restricts to internal nodes, and None allows all tree elements 93 to be searched, including phyloXML annotations. 94 95 Otherwise, for a tree element to match the specification (i.e. for the 96 function produced by `_attribute_matcher` to return True when given a tree 97 element), it must have each of the attributes specified by the keys and 98 match each of the corresponding values -- think 'and', not 'or', for 99 multiple keys. 100 """ 101 def match(node): 102 if 'terminal' in kwargs: 103 # Special case: restrict to internal/external/any nodes 104 kwa_copy = kwargs.copy() 105 pattern = kwa_copy.pop('terminal') 106 if (pattern is not None and 107 (not hasattr(node, 'is_terminal') or 108 node.is_terminal() != pattern)): 109 return False 110 else: 111 kwa_copy = kwargs 112 for key, pattern in kwa_copy.iteritems(): 113 # Nodes must match all other specified attributes 114 if not hasattr(node, key): 115 return False 116 target = getattr(node, key) 117 if isinstance(pattern, basestring): 118 return (isinstance(target, basestring) and 119 re.match(pattern+'$', target)) 120 if isinstance(pattern, bool): 121 return (pattern == bool(target)) 122 if isinstance(pattern, int): 123 return (pattern == target) 124 if pattern is None: 125 return (target is None) 126 raise TypeError('invalid query type: %s' % type(pattern)) 127 return True
128 return match 129
130 -def _function_matcher(matcher_func):
131 """Safer attribute lookup -- returns False instead of raising an error.""" 132 def match(node): 133 try: 134 return matcher_func(node) 135 except (LookupError, AttributeError, ValueError, TypeError): 136 return False
137 return match 138
139 -def _object_matcher(obj):
140 """Retrieve a matcher function by passing an arbitrary object. 141 142 i.e. passing a `TreeElement` such as a `Clade` or `Tree` instance returns an 143 identity matcher, passing a type such as the `PhyloXML.Taxonomy` class 144 returns a class matcher, and passing a dictionary returns an attribute 145 matcher. 146 147 The resulting 'match' function returns True when given an object matching 148 the specification (identity, type or attribute values), otherwise False. 149 This is useful for writing functions that search the tree, and probably 150 shouldn't be used directly by the end user. 151 """ 152 if isinstance(obj, TreeElement): 153 return _identity_matcher(obj) 154 if isinstance(obj, type): 155 return _class_matcher(obj) 156 if isinstance(obj, basestring): 157 return _string_matcher(obj) 158 if isinstance(obj, dict): 159 return _attribute_matcher(obj) 160 if callable(obj): 161 return _function_matcher(obj) 162 raise ValueError("%s (type %s) is not a valid type for comparison." 163 % (obj, type(obj)))
164
165 166 -def _combine_matchers(target, kwargs, require_spec):
167 """Merge target specifications with keyword arguments. 168 169 Dispatch the components to the various matcher functions, then merge into a 170 single boolean function. 171 """ 172 if not target: 173 if not kwargs: 174 if require_spec: 175 raise ValueError("you must specify a target object or keyword " 176 "arguments.") 177 return lambda x: True 178 return _attribute_matcher(kwargs) 179 match_obj = _object_matcher(target) 180 if not kwargs: 181 return match_obj 182 match_kwargs = _attribute_matcher(kwargs) 183 return (lambda x: match_obj(x) and match_kwargs(x))
184
185 186 -def _combine_args(first, *rest):
187 """Convert ``[targets]`` or ``*targets`` arguments to a single iterable. 188 189 This helps other functions work like the built-in functions `max` and 190 `min`. 191 """ 192 # Background: is_monophyletic takes a single list or iterable (like the 193 # same method in Bio.Nexus.Trees); root_with_outgroup and common_ancestor 194 # take separate arguments. This mismatch was in the initial release and I 195 # didn't notice the inconsistency until after Biopython 1.55. I can think 196 # of cases where either style is more convenient, so let's support both 197 # (for backward compatibility and consistency between methods). 198 if hasattr(first, '__iter__') and not (isinstance(first, TreeElement) or 199 isinstance(first, type) or isinstance(first, basestring) or 200 isinstance(first, dict)): 201 # `terminals` is an iterable of targets 202 if rest: 203 raise ValueError("Arguments must be either a single list of " 204 "targets, or separately specified targets " 205 "(e.g. foo(t1, t2, t3)), but not both.") 206 return first 207 # `terminals` is a single target -- wrap in a container 208 return itertools.chain([first], rest)
209
210 211 # Class definitions 212 213 -class TreeElement(object):
214 """Base class for all Bio.Phylo classes.""" 215
216 - def __repr__(self):
217 """Show this object's constructor with its primitive arguments.""" 218 def pair_as_kwarg_string(key, val): 219 if isinstance(val, basestring): 220 return "%s='%s'" % (key, _sugar.trim_str(unicode(val))) 221 return "%s=%s" % (key, val)
222 return u'%s(%s)' % (self.__class__.__name__, 223 ', '.join(pair_as_kwarg_string(key, val) 224 for key, val in self.__dict__.iteritems() 225 if val is not None and 226 type(val) in (str, int, float, bool, unicode) 227 ))
228 229 __str__ = __repr__ 230
231 232 -class TreeMixin(object):
233 """Methods for Tree- and Clade-based classes. 234 235 This lets `Tree` and `Clade` support the same traversal and searching 236 operations without requiring Clade to inherit from Tree, so Clade isn't 237 required to have all of Tree's attributes -- just ``root`` (a Clade 238 instance) and ``is_terminal``. 239 """ 240 # Traversal methods 241
242 - def _filter_search(self, filter_func, order, follow_attrs):
243 """Perform a BFS or DFS traversal through all elements in this tree. 244 245 :returns: generator of all elements for which `filter_func` is True. 246 """ 247 order_opts = {'preorder': _preorder_traverse, 248 'postorder': _postorder_traverse, 249 'level': _level_traverse} 250 try: 251 order_func = order_opts[order] 252 except KeyError: 253 raise ValueError("Invalid order '%s'; must be one of: %s" 254 % (order, tuple(order_opts.keys()))) 255 if follow_attrs: 256 get_children = _sorted_attrs 257 root = self 258 else: 259 get_children = lambda elem: elem.clades 260 root = self.root 261 return itertools.ifilter(filter_func, order_func(root, get_children))
262
263 - def find_any(self, *args, **kwargs):
264 """Return the first element found by find_elements(), or None. 265 266 This is also useful for checking whether any matching element exists in 267 the tree, and can be used in a conditional expression. 268 """ 269 hits = self.find_elements(*args, **kwargs) 270 try: 271 return hits.next() 272 except StopIteration: 273 return None
274
275 - def find_elements(self, target=None, terminal=None, order='preorder', 276 **kwargs):
277 """Find all tree elements matching the given attributes. 278 279 The arbitrary keyword arguments indicate the attribute name of the 280 sub-element and the value to match: string, integer or boolean. Strings 281 are evaluated as regular expression matches; integers are compared 282 directly for equality, and booleans evaluate the attribute's truth value 283 (True or False) before comparing. To handle nonzero floats, search with 284 a boolean argument, then filter the result manually. 285 286 If no keyword arguments are given, then just the class type is used for 287 matching. 288 289 The result is an iterable through all matching objects, by depth-first 290 search. (Not necessarily the same order as the elements appear in the 291 source file!) 292 293 :Parameters: 294 target : TreeElement instance, type, dict, or callable 295 Specifies the characteristics to search for. (The default, 296 TreeElement, matches any standard Bio.Phylo type.) 297 terminal : bool 298 A boolean value to select for or against terminal nodes (a.k.a. 299 leaf nodes). True searches for only terminal nodes, False 300 excludes terminal nodes, and the default, None, searches both 301 terminal and non-terminal nodes, as well as any tree elements 302 lacking the ``is_terminal`` method. 303 order : {'preorder', 'postorder', 'level'} 304 Tree traversal order: 'preorder' (default) is depth-first 305 search, 'postorder' is DFS with child nodes preceding parents, 306 and 'level' is breadth-first search. 307 308 Example 309 ------- 310 311 >>> from Bio.Phylo.IO import PhyloXMIO 312 >>> phx = PhyloXMLIO.read('phyloxml_examples.xml') 313 >>> matches = phx.phylogenies[5].find_elements(code='OCTVU') 314 >>> matches.next() 315 Taxonomy(code='OCTVU', scientific_name='Octopus vulgaris') 316 317 """ 318 if terminal is not None: 319 kwargs['terminal'] = terminal 320 is_matching_elem = _combine_matchers(target, kwargs, False) 321 return self._filter_search(is_matching_elem, order, True)
322
323 - def find_clades(self, target=None, terminal=None, order='preorder', 324 **kwargs):
325 """Find each clade containing a matching element. 326 327 That is, find each element as with find_elements(), but return the 328 corresponding clade object. (This is usually what you want.) 329 330 :returns: an iterable through all matching objects, searching 331 depth-first (preorder) by default. 332 """ 333 def match_attrs(elem): 334 orig_clades = elem.__dict__.pop('clades') 335 found = elem.find_any(target, **kwargs) 336 elem.clades = orig_clades 337 return (found is not None)
338 if terminal is None: 339 is_matching_elem = match_attrs 340 else: 341 def is_matching_elem(elem): 342 return ((elem.is_terminal() == terminal) and 343 match_attrs(elem))
344 return self._filter_search(is_matching_elem, order, False) 345
346 - def get_path(self, target=None, **kwargs):
347 """List the clades directly between this root and the given target. 348 349 :returns: list of all clade objects along this path, ending with the 350 given target, but excluding the root clade. 351 """ 352 # Only one path will work -- ignore weights and visits 353 path = [] 354 match = _combine_matchers(target, kwargs, True) 355 def check_in_path(v): 356 if match(v): 357 path.append(v) 358 return True 359 elif v.is_terminal(): 360 return False 361 for child in v: 362 if check_in_path(child): 363 path.append(v) 364 return True 365 return False
366 if not check_in_path(self.root): 367 return None 368 return path[-2::-1] 369
370 - def get_nonterminals(self, order='preorder'):
371 """Get a list of all of this tree's nonterminal (internal) nodes.""" 372 return list(self.find_clades(terminal=False, order=order))
373
374 - def get_terminals(self, order='preorder'):
375 """Get a list of all of this tree's terminal (leaf) nodes.""" 376 return list(self.find_clades(terminal=True, order=order))
377
378 - def trace(self, start, finish):
379 """List of all clade object between two targets in this tree. 380 381 Excluding `start`, including `finish`. 382 """ 383 mrca = self.common_ancestor(start, finish) 384 fromstart = mrca.get_path(start)[-2::-1] 385 to = mrca.get_path(finish) 386 return fromstart + [mrca] + to
387 388 # Information methods 389
390 - def common_ancestor(self, targets, *more_targets):
391 """Most recent common ancestor (clade) of all the given targets. 392 393 Edge cases: 394 - If no target is given, returns self.root 395 - If 1 target is given, returns the target 396 - If any target is not found in this tree, raises a ValueError 397 """ 398 paths = [self.get_path(t) 399 for t in _combine_args(targets, *more_targets)] 400 # Validation -- otherwise izip throws a spooky error below 401 for p, t in zip(paths, targets): 402 if p is None: 403 raise ValueError("target %s is not in this tree" % repr(t)) 404 mrca = self.root 405 for level in itertools.izip(*paths): 406 ref = level[0] 407 for other in level[1:]: 408 if ref is not other: 409 break 410 else: 411 mrca = ref 412 if ref is not mrca: 413 break 414 return mrca
415
416 - def count_terminals(self):
417 """Counts the number of terminal (leaf) nodes within this tree.""" 418 return _sugar.iterlen(self.find_clades(terminal=True))
419
420 - def depths(self, unit_branch_lengths=False):
421 """Create a mapping of tree clades to depths (by branch length). 422 423 :Parameters: 424 unit_branch_lengths : bool 425 If True, count only the number of branches (levels in the tree). 426 By default the distance is the cumulative branch length leading 427 to the clade. 428 429 :returns: dict of {clade: depth}, where keys are all of the Clade 430 instances in the tree, and values are the distance from the root to 431 each clade (including terminals). 432 """ 433 if unit_branch_lengths: 434 depth_of = lambda c: 1 435 else: 436 depth_of = lambda c: c.branch_length or 0 437 depths = {} 438 def update_depths(node, curr_depth): 439 depths[node] = curr_depth 440 for child in node.clades: 441 new_depth = curr_depth + depth_of(child) 442 update_depths(child, new_depth)
443 update_depths(self.root, 0) 444 return depths 445
446 - def distance(self, target1, target2=None):
447 """Calculate the sum of the branch lengths between two targets. 448 449 If only one target is specified, the other is the root of this tree. 450 """ 451 if target2 is None: 452 return sum(n.branch_length for n in self.get_path(target1) 453 if n.branch_length is not None) 454 mrca = self.common_ancestor(target1, target2) 455 return mrca.distance(target1) + mrca.distance(target2)
456
457 - def is_bifurcating(self):
458 """Return True if tree downstream of node is strictly bifurcating. 459 460 I.e., all nodes have either 2 or 0 children (internal or external, 461 respectively). The root may have 3 descendents and still be considered 462 part of a bifurcating tree, because it has no ancestor. 463 """ 464 # Root can be trifurcating 465 if isinstance(self, Tree) and len(self.root) == 3: 466 return (self.root.clades[0].is_bifurcating() and 467 self.root.clades[1].is_bifurcating() and 468 self.root.clades[2].is_bifurcating()) 469 if len(self.root) == 2: 470 return (self.root.clades[0].is_bifurcating() and 471 self.root.clades[1].is_bifurcating()) 472 if len(self.root) == 0: 473 return True 474 return False
475
476 - def is_monophyletic(self, terminals, *more_terminals):
477 """MRCA of terminals if they comprise a complete subclade, or False. 478 479 I.e., there exists a clade such that its terminals are the same set as 480 the given targets. 481 482 The given targets must be terminals of the tree. 483 484 To match both `Bio.Nexus.Trees` and the other multi-target methods in 485 Bio.Phylo, arguments to this method can be specified either of two ways: 486 (i) as a single list of targets, or (ii) separately specified targets, 487 e.g. is_monophyletic(t1, t2, t3) -- but not both. 488 489 For convenience, this method returns the common ancestor (MCRA) of the 490 targets if they are monophyletic (instead of the value True), and False 491 otherwise. 492 493 :returns: common ancestor if terminals are monophyletic, otherwise False. 494 """ 495 target_set = set(_combine_args(terminals, *more_terminals)) 496 current = self.root 497 while True: 498 if set(current.get_terminals()) == target_set: 499 return current 500 # Try a narrower subclade 501 for subclade in current.clades: 502 if set(subclade.get_terminals()).issuperset(target_set): 503 current = subclade 504 break 505 else: 506 return False
507
508 - def is_parent_of(self, target=None, **kwargs):
509 """True if target is a descendent of this tree. 510 511 Not required to be a direct descendent. 512 513 To check only direct descendents of a clade, simply use list membership 514 testing: ``if subclade in clade: ...`` 515 """ 516 return self.get_path(target, **kwargs) is not None
517
518 - def is_preterminal(self):
519 """True if all direct descendents are terminal.""" 520 if self.root.is_terminal(): 521 return False 522 for clade in self.root.clades: 523 if not clade.is_terminal(): 524 return False 525 return True
526
527 - def total_branch_length(self):
528 """Calculate the sum of all the branch lengths in this tree.""" 529 return sum(node.branch_length 530 for node in self.find_clades(branch_length=True))
531 532 # Tree manipulation methods 533
534 - def collapse(self, target=None, **kwargs):
535 """Deletes target from the tree, relinking its children to its parent. 536 537 :returns: the parent clade. 538 """ 539 path = self.get_path(target, **kwargs) 540 if not path: 541 raise ValueError("couldn't collapse %s in this tree" 542 % (target or kwargs)) 543 if len(path) == 1: 544 parent = self.root 545 else: 546 parent = path[-2] 547 popped = parent.clades.pop(parent.clades.index(path[-1])) 548 extra_length = popped.branch_length or 0 549 for child in popped: 550 child.branch_length += extra_length 551 parent.clades.extend(popped.clades) 552 return parent
553
554 - def collapse_all(self, target=None, **kwargs):
555 """Collapse all the descendents of this tree, leaving only terminals. 556 557 Total branch lengths are preserved, i.e. the distance to each terminal 558 stays the same. 559 560 For example, this will safely collapse nodes with poor bootstrap 561 support: 562 563 >>> tree.collapse_all(lambda c: c.confidence is not None and 564 ... c.confidence < 70) 565 566 This implementation avoids strange side-effects by using level-order 567 traversal and testing all clade properties (versus the target 568 specification) up front. In particular, if a clade meets the target 569 specification in the original tree, it will be collapsed. For example, 570 if the condition is: 571 572 >>> tree.collapse_all(lambda c: c.branch_length < 0.1) 573 574 Collapsing a clade's parent node adds the parent's branch length to the 575 child, so during the execution of collapse_all, a clade's branch_length 576 may increase. In this implementation, clades are collapsed according to 577 their properties in the original tree, not the properties when tree 578 traversal reaches the clade. (It's easier to debug.) If you want the 579 other behavior (incremental testing), modifying the source code of this 580 function is straightforward. 581 """ 582 # Read the iterable into a list to protect against in-place changes 583 internals = list(self.find_clades(target, False, 'level', **kwargs)) 584 # Skip the root node -- it can't be collapsed 585 if internals[0] == self.root: 586 internals.pop(0) 587 for clade in internals: 588 self.collapse(clade)
589
590 - def ladderize(self, reverse=False):
591 """Sort clades in-place according to the number of terminal nodes. 592 593 Deepest clades are last by default. Use ``reverse=True`` to sort clades 594 deepest-to-shallowest. 595 """ 596 self.root.clades.sort(key=lambda c: c.count_terminals(), 597 reverse=reverse) 598 for subclade in self.root.clades: 599 subclade.ladderize(reverse=reverse)
600
601 - def prune(self, target=None, **kwargs):
602 """Prunes a terminal clade from the tree. 603 604 If taxon is from a bifurcation, the connecting node will be collapsed 605 and its branch length added to remaining terminal node. This might be no 606 longer be a meaningful value. 607 608 :returns: parent clade of the pruned target 609 """ 610 if 'terminal' in kwargs and kwargs['terminal']: 611 raise ValueError("target must be terminal") 612 path = self.get_path(target, terminal=True, **kwargs) 613 if not path: 614 raise ValueError("can't find a matching target below this root") 615 if len(path) == 1: 616 parent = self.root 617 else: 618 parent = path[-2] 619 parent.clades.remove(path[-1]) 620 if len(parent) == 1: 621 # We deleted a branch from a bifurcation 622 if parent == self.root: 623 # If we're at the root, move the root upwards 624 # NB: This loses the length of the original branch 625 newroot = parent.clades[0] 626 newroot.branch_length = None 627 parent = self.root = newroot 628 else: 629 # If we're not at the root, collapse this parent 630 child = parent.clades[0] 631 if child.branch_length is not None: 632 child.branch_length += (parent.branch_length or 0.0) 633 if len(path) < 3: 634 grandparent = self.root 635 else: 636 grandparent = path[-3] 637 # Replace parent with child at the same place in grandparent 638 index = grandparent.clades.index(parent) 639 grandparent.clades.pop(index) 640 grandparent.clades.insert(index, child) 641 parent = grandparent 642 return parent
643
644 - def split(self, n=2, branch_length=1.0):
645 """Generate n (default 2) new descendants. 646 647 In a species tree, this is a speciation event. 648 649 New clades have the given branch_length and the same name as this 650 clade's root plus an integer suffix (counting from 0). For example, 651 splitting a clade named "A" produces sub-clades named "A0" and "A1". 652 """ 653 clade_cls = type(self.root) 654 base_name = self.root.name or '' 655 for i in range(n): 656 clade = clade_cls(name=base_name+str(i), 657 branch_length=branch_length) 658 self.root.clades.append(clade)
659
660 661 -class Tree(TreeElement, TreeMixin):
662 """A phylogenetic tree, containing global info for the phylogeny. 663 664 The structure and node-specific data is accessible through the 'root' 665 clade attached to the Tree instance. 666 667 :Parameters: 668 root : Clade 669 The starting node of the tree. If the tree is rooted, this will 670 usually be the root node. 671 rooted : bool 672 Whether or not the tree is rooted. By default, a tree is assumed to 673 be rooted. 674 id : str 675 The identifier of the tree, if there is one. 676 name : str 677 The name of the tree, in essence a label. 678 """
679 - def __init__(self, root=None, rooted=True, id=None, name=None):
680 self.root = root or Clade() 681 self.rooted = rooted 682 self.id = id 683 self.name = name
684 685 @classmethod
686 - def from_clade(cls, clade, **kwargs):
687 """Create a new Tree object given a clade. 688 689 Keyword arguments are the usual `Tree` constructor parameters. 690 """ 691 root = copy.deepcopy(clade) 692 return cls(root, **kwargs)
693 694 @classmethod
695 - def randomized(cls, taxa, branch_length=1.0, branch_stdev=None):
696 """Create a randomized bifurcating tree given a list of taxa. 697 698 :param taxa: Either an integer specifying the number of taxa to create 699 (automatically named taxon#), or an iterable of taxon names, as 700 strings. 701 702 :returns: a tree of the same type as this class. 703 """ 704 if isinstance(taxa, int): 705 taxa = ['taxon%s' % (i+1) for i in range(taxa)] 706 elif hasattr(taxa, '__iter__'): 707 taxa = list(taxa) 708 else: 709 raise TypeError("taxa argument must be integer (# taxa) or " 710 "iterable of taxon names.") 711 rtree = cls() 712 terminals = [rtree.root] 713 while len(terminals) < len(taxa): 714 newsplit = random.choice(terminals) 715 newterms = newsplit.split(branch_length=branch_length) 716 if branch_stdev: 717 # Add some noise to the branch lengths 718 for nt in newterms: 719 nt.branch_length = max(0, 720 random.gauss(branch_length, branch_stdev)) 721 terminals.remove(newsplit) 722 terminals.extend(newterms) 723 # Distribute taxon labels randomly 724 random.shuffle(taxa) 725 for node, name in zip(terminals, taxa): 726 node.name = name 727 return rtree
728 729 @property
730 - def clade(self):
731 """The first clade in this tree (not itself).""" 732 return self.root
733
734 - def as_phyloxml(self, **kwargs):
735 """Convert this tree to a PhyloXML-compatible Phylogeny. 736 737 This lets you use the additional annotation types PhyloXML defines, and 738 save this information when you write this tree as 'phyloxml'. 739 """ 740 from Bio.Phylo.PhyloXML import Phylogeny 741 return Phylogeny.from_tree(self, **kwargs)
742
743 - def root_with_outgroup(self, outgroup_targets, *more_targets):
744 """Reroot this tree with the outgroup clade containing outgroup_targets. 745 746 Operates in-place. 747 748 Edge cases: 749 750 - If ``outgroup == self.root``, no change 751 - If outgroup is terminal, create new bifurcating root node with a 752 0-length branch to the outgroup 753 - If outgroup is internal, use the given outgroup node as the new 754 trifurcating root, keeping branches the same 755 - If the original root was bifurcating, drop it from the tree, 756 preserving total branch lengths 757 """ 758 # This raises a ValueError if any target is not in this tree 759 # Otherwise, common_ancestor guarantees outgroup is in this tree 760 outgroup = self.common_ancestor(outgroup_targets, *more_targets) 761 outgroup_path = self.get_path(outgroup) 762 if len(outgroup_path) == 0: 763 # Outgroup is the current root -- no change 764 return 765 766 prev_blen = outgroup.branch_length 767 if outgroup.is_terminal(): 768 # Create a new root with a 0-length branch to the outgroup 769 outgroup.branch_length = 0.0 770 new_root = self.root.__class__( 771 branch_length=self.root.branch_length, clades=[outgroup]) 772 # The first branch reversal (see the upcoming loop) is modified 773 if len(outgroup_path) == 1: 774 # Trivial tree like '(A,B); 775 new_parent = new_root 776 else: 777 parent = outgroup_path.pop(-2) 778 parent.clades.pop(parent.clades.index(outgroup)) 779 prev_blen, parent.branch_length = parent.branch_length, prev_blen 780 new_root.clades.insert(0, parent) 781 new_parent = parent 782 else: 783 # Use the given outgroup node as the new (trifurcating) root 784 new_root = outgroup 785 new_root.branch_length = self.root.branch_length 786 new_parent = new_root 787 788 # Tracing the outgroup lineage backwards, reattach the subclades under a 789 # new root clade. Reverse the branches directly above the outgroup in 790 # the tree, but keep the descendants of those clades as they are. 791 for parent in outgroup_path[-2::-1]: 792 parent.clades.pop(parent.clades.index(new_parent)) 793 prev_blen, parent.branch_length = parent.branch_length, prev_blen 794 new_parent.clades.insert(0, parent) 795 new_parent = parent 796 797 # Finally, handle the original root according to number of descendents 798 old_root = self.root 799 if outgroup in old_root.clades: 800 assert len(outgroup_path) == 1 801 old_root.clades.pop(old_root.clades.index(outgroup)) 802 else: 803 old_root.clades.pop(old_root.clades.index(new_parent)) 804 if len(old_root) == 1: 805 # Delete the old bifurcating root & add branch lengths 806 ingroup = old_root.clades[0] 807 if ingroup.branch_length: 808 ingroup.branch_length += prev_blen 809 else: 810 ingroup.branch_length = prev_blen 811 new_parent.clades.insert(0, ingroup) 812 # ENH: If annotations are attached to old_root, do... something. 813 else: 814 # Keep the old trifurcating/polytomous root as an internal node 815 old_root.branch_length = prev_blen 816 new_parent.clades.insert(0, old_root) 817 818 self.root = new_root 819 self.rooted = True 820 return
821 822 # Method assumed by TreeMixin 823
824 - def is_terminal(self):
825 """True if the root of this tree is terminal.""" 826 return (not self.root.clades)
827 828 # Convention from SeqRecord and Alignment classes 829
830 - def __format__(self, format_spec):
831 """Serialize the tree as a string in the specified file format. 832 833 This method supports the ``format`` built-in function added in Python 834 2.6/3.0. 835 836 :param format_spec: a lower-case string supported by `Bio.Phylo.write` 837 as an output file format. 838 """ 839 if format_spec: 840 from StringIO import StringIO 841 from Bio.Phylo import _io 842 handle = StringIO() 843 _io.write([self], handle, format_spec) 844 return handle.getvalue() 845 else: 846 # Follow python convention and default to using __str__ 847 return str(self)
848
849 - def format(self, format):
850 """Serialize the tree as a string in the specified file format. 851 852 This duplicates the __format__ magic method for pre-2.6 Pythons. 853 """ 854 return self.__format__(format)
855 856 # Pretty-printer for the entire tree hierarchy 857
858 - def __str__(self):
859 """String representation of the entire tree. 860 861 Serializes each sub-clade recursively using ``repr`` to create a summary 862 of the object structure. 863 """ 864 TAB = ' ' 865 textlines = [] 866 def print_tree(obj, indent): 867 """Recursively serialize sub-elements. 868 869 This closes over textlines and modifies it in-place. 870 """ 871 textlines.append(TAB*indent + repr(obj)) 872 indent += 1 873 for attr in obj.__dict__: 874 child = getattr(obj, attr) 875 if isinstance(child, TreeElement): 876 print_tree(child, indent) 877 elif isinstance(child, list): 878 for elem in child: 879 if isinstance(elem, TreeElement): 880 print_tree(elem, indent)
881 print_tree(self, 0) 882 return '\n'.join(textlines)
883
884 885 -class Clade(TreeElement, TreeMixin):
886 """A recursively defined sub-tree. 887 888 :Parameters: 889 branch_length : str 890 The length of the branch leading to the root node of this clade. 891 name : str 892 The clade's name (a label). 893 clades : list 894 Sub-trees rooted directly under this tree's root. 895 """
896 - def __init__(self, branch_length=None, name=None, clades=None, 897 confidence=None):
898 self.branch_length = branch_length 899 self.name = name 900 self.clades = clades or [] 901 self.confidence = confidence
902 903 @property
904 - def root(self):
905 """Allow TreeMixin methods to traverse clades properly.""" 906 return self
907
908 - def is_terminal(self):
909 """True if this is a terminal (leaf) node.""" 910 return (not self.clades)
911 912 # Sequence-type behavior methods 913
914 - def __getitem__(self, index):
915 """Get clades by index (integer or slice).""" 916 if isinstance(index, int) or isinstance(index, slice): 917 return self.clades[index] 918 ref = self 919 for idx in index: 920 ref = ref[idx] 921 return ref
922
923 - def __iter__(self):
924 """Iterate through this tree's direct descendent clades (sub-trees).""" 925 return iter(self.clades)
926
927 - def __len__(self):
928 """Number of clades directy under the root.""" 929 return len(self.clades)
930
931 - def __nonzero__(self):
932 """Boolean value of an instance of this class. 933 934 NB: If this method is not defined, but ``__len__`` is, then the object 935 is considered true if the result of ``__len__()`` is nonzero. We want 936 Clade instances to always be considered True. 937 """ 938 return True
939
940 - def __str__(self):
941 if self.name: 942 return _sugar.trim_str(self.name, maxlen=40) 943 return self.__class__.__name__
944