Package Bio :: Package Nexus :: Module Trees
[hide private]
[frames] | no frames]

Source Code for Module Bio.Nexus.Trees

  1  # Copyright 2005-2008 by Frank Kauff & Cymon J. Cox. All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  # 
  6  # Bug reports welcome: fkauff@biologie.uni-kl.de or on Biopython's bugzilla. 
  7  """Tree class to handle phylogenetic trees. 
  8   
  9  Provides a set of methods to read and write newick-format tree descriptions, 
 10  get information about trees (monphyly of taxon sets, congruence between trees, 
 11  common ancestors,...) and to manipulate trees (reroot trees, split terminal 
 12  nodes). 
 13  """ 
 14   
 15  import sys, random, copy 
 16  import Nodes 
 17   
 18  PRECISION_BRANCHLENGTH=6 
 19  PRECISION_SUPPORT=6 
 20  NODECOMMENT_START='[&' 
 21  NODECOMMENT_END=']' 
 22   
23 -class TreeError(Exception): pass
24
25 -class NodeData(object):
26 """Stores tree-relevant data associated with nodes (e.g. branches or otus)."""
27 - def __init__(self,taxon=None,branchlength=0.0,support=None,comment=None):
28 self.taxon=taxon 29 self.branchlength=branchlength 30 self.support=support 31 self.comment=comment
32
33 -class Tree(Nodes.Chain):
34 """Represents a tree using a chain of nodes with on predecessor (=ancestor) 35 and multiple successors (=subclades). 36 """ 37 # A newick tree is parsed into nested list and then converted to a node list in two stages 38 # mostly due to historical reasons. This could be done in one swoop). Note: parentheses ( ) and 39 # colon : are not allowed in taxon names. This is against NEXUS standard, but makes life much 40 # easier when parsing trees. 41 42 ## NOTE: Tree should store its data class in something like self.dataclass=data, 43 ## so that nodes that are generated have easy access to the data class 44 ## Some routines use automatically NodeData, this needs to be more concise 45
46 - def __init__(self,tree=None,weight=1.0,rooted=False,name='',data=NodeData,values_are_support=False,max_support=1.0):
47 """Ntree(self,tree).""" 48 Nodes.Chain.__init__(self) 49 self.dataclass=data 50 self.__values_are_support=values_are_support 51 self.max_support=max_support 52 self.weight=weight 53 self.rooted=rooted 54 self.name=name 55 root=Nodes.Node(data()) 56 self.root = self.add(root) 57 if tree: # use the tree we have 58 # if Tree is called from outside Nexus parser, we need to get rid of linebreaks, etc 59 tree=tree.strip().replace('\n','').replace('\r','') 60 # there's discrepancy whether newick allows semicolons et the end 61 tree=tree.rstrip(';') 62 subtree_info, base_info = self._parse(tree) 63 root.data = self._add_nodedata(root.data, [[], base_info]) 64 self._add_subtree(parent_id=root.id,tree=subtree_info)
65
66 - def _parse(self,tree):
67 """Parses (a,b,c...)[[[xx]:]yy] into subcomponents and travels down recursively.""" 68 #Remove any leading/trailing white space - want any string starting 69 #with " (..." should be recognised as a leaf, "(..." 70 tree = tree.strip() 71 if tree.count('(')!=tree.count(')'): 72 raise TreeError('Parentheses do not match in (sub)tree: '+tree) 73 if tree.count('(')==0: # a leaf 74 #check if there's a colon, or a special comment, or both after the taxon name 75 nodecomment=tree.find(NODECOMMENT_START) 76 colon=tree.find(':') 77 if colon==-1 and nodecomment==-1: # none 78 return [tree,[None]] 79 elif colon==-1 and nodecomment>-1: # only special comment 80 return [tree[:nodecomment],self._get_values(tree[nodecomment:])] 81 elif colon>-1 and nodecomment==-1: # only numerical values 82 return [tree[:colon],self._get_values(tree[colon+1:])] 83 elif colon < nodecomment: # taxon name ends at first colon or with special comment 84 return [tree[:colon],self._get_values(tree[colon+1:])] 85 else: 86 return [tree[:nodecomment],self._get_values(tree[nodecomment:])] 87 else: 88 closing=tree.rfind(')') 89 val=self._get_values(tree[closing+1:]) 90 if not val: 91 val=[None] 92 subtrees=[] 93 plevel=0 94 prev=1 95 for p in range(1,closing): 96 if tree[p]=='(': 97 plevel+=1 98 elif tree[p]==')': 99 plevel-=1 100 elif tree[p]==',' and plevel==0: 101 subtrees.append(tree[prev:p]) 102 prev=p+1 103 subtrees.append(tree[prev:closing]) 104 subclades=[self._parse(subtree) for subtree in subtrees] 105 return [subclades,val]
106
107 - def _add_subtree(self,parent_id=None,tree=None):
108 """Adds leaf or tree (in newick format) to a parent_id. (self,parent_id,tree).""" 109 if parent_id is None: 110 raise TreeError('Need node_id to connect to.') 111 for st in tree: 112 nd=self.dataclass() 113 nd = self._add_nodedata(nd, st) 114 if type(st[0])==list: # it's a subtree 115 sn=Nodes.Node(nd) 116 self.add(sn,parent_id) 117 self._add_subtree(sn.id,st[0]) 118 else: # it's a leaf 119 nd.taxon=st[0] 120 leaf=Nodes.Node(nd) 121 self.add(leaf,parent_id)
122
123 - def _add_nodedata(self, nd, st):
124 """Add data to the node parsed from the comments, taxon and support. 125 """ 126 if isinstance(st[1][-1],str) and st[1][-1].startswith(NODECOMMENT_START): 127 nd.comment=st[1].pop(-1) 128 # if the first element is a string, it's the subtree node taxon 129 elif isinstance(st[1][0], str): 130 nd.taxon = st[1][0] 131 st[1] = st[1][1:] 132 if len(st)>1: 133 if len(st[1])>=2: # if there's two values, support comes first. Is that always so? 134 nd.support=st[1][0] 135 if st[1][1] is not None: 136 nd.branchlength=st[1][1] 137 elif len(st[1])==1: # otherwise it could be real branchlengths or support as branchlengths 138 if not self.__values_are_support: # default 139 if st[1][0] is not None: 140 nd.branchlength=st[1][0] 141 else: 142 nd.support=st[1][0] 143 return nd
144
145 - def _get_values(self, text):
146 """Extracts values (support/branchlength) from xx[:yyy], xx.""" 147 148 if text=='': 149 return None 150 nodecomment = None 151 if NODECOMMENT_START in text: # if there's a [&....] comment, cut it out 152 nc_start=text.find(NODECOMMENT_START) 153 nc_end=text.find(NODECOMMENT_END) 154 if nc_end==-1: 155 raise TreeError('Error in tree description: Found %s without matching %s' \ 156 % (NODECOMMENT_START, NODECOMMENT_END)) 157 nodecomment=text[nc_start:nc_end+1] 158 text=text[:nc_start]+text[nc_end+1:] 159 160 # pase out supports and branchlengths, with internal node taxa info 161 values = [] 162 taxonomy = None 163 for part in [t.strip() for t in text.split(":")]: 164 if part: 165 try: 166 values.append(float(part)) 167 except ValueError: 168 assert taxonomy is None, "Two string taxonomies?" 169 taxonomy = part 170 if taxonomy: 171 values.insert(0, taxonomy) 172 if nodecomment: 173 values.append(nodecomment) 174 return values
175
176 - def _walk(self,node=None):
177 """Return all node_ids downwards from a node.""" 178 179 if node is None: 180 node=self.root 181 for n in self.node(node).succ: 182 yield n 183 for sn in self._walk(n): 184 yield sn
185
186 - def node(self,node_id):
187 """Return the instance of node_id. 188 189 node = node(self,node_id) 190 """ 191 if node_id not in self.chain: 192 raise TreeError('Unknown node_id: %d' % node_id) 193 return self.chain[node_id]
194
195 - def split(self,parent_id=None,n=2,branchlength=1.0):
196 """Speciation: generates n (default two) descendants of a node. 197 198 [new ids] = split(self,parent_id=None,n=2,branchlength=1.0): 199 """ 200 if parent_id is None: 201 raise TreeError('Missing node_id.') 202 ids=[] 203 parent_data=self.chain[parent_id].data 204 for i in range(n): 205 node=Nodes.Node() 206 if parent_data: 207 node.data=self.dataclass() 208 # each node has taxon and branchlength attribute 209 if parent_data.taxon: 210 node.data.taxon=parent_data.taxon+str(i) 211 node.data.branchlength=branchlength 212 ids.append(self.add(node,parent_id)) 213 return ids
214
215 - def search_taxon(self,taxon):
216 """Returns the first matching taxon in self.data.taxon. Not restricted to terminal nodes. 217 218 node_id = search_taxon(self,taxon) 219 """ 220 for id,node in self.chain.iteritems(): 221 if node.data.taxon==taxon: 222 return id 223 return None
224
225 - def prune(self,taxon):
226 """Prunes a terminal taxon from the tree. 227 228 id_of_previous_node = prune(self,taxon) 229 If taxon is from a bifurcation, the connectiong node will be collapsed 230 and its branchlength added to remaining terminal node. This might be no 231 longer a meaningful value' 232 """ 233 234 id=self.search_taxon(taxon) 235 if id is None: 236 raise TreeError('Taxon not found: %s' % taxon) 237 elif id not in self.get_terminals(): 238 raise TreeError('Not a terminal taxon: %s' % taxon) 239 else: 240 prev=self.unlink(id) 241 self.kill(id) 242 if len(self.node(prev).succ)==1: 243 if prev==self.root: # we deleted one branch of a bifurcating root, then we have to move the root upwards 244 self.root=self.node(self.root).succ[0] 245 self.node(self.root).branchlength=0.0 246 self.kill(prev) 247 else: 248 succ=self.node(prev).succ[0] 249 new_bl=self.node(prev).data.branchlength+self.node(succ).data.branchlength 250 self.collapse(prev) 251 self.node(succ).data.branchlength=new_bl 252 return prev
253
254 - def get_taxa(self,node_id=None):
255 """Return a list of all otus downwards from a node (self, node_id). 256 257 nodes = get_taxa(self,node_id=None) 258 """ 259 260 if node_id is None: 261 node_id=self.root 262 if node_id not in self.chain: 263 raise TreeError('Unknown node_id: %d.' % node_id) 264 if self.chain[node_id].succ==[]: 265 if self.chain[node_id].data: 266 return [self.chain[node_id].data.taxon] 267 else: 268 return None 269 else: 270 list=[] 271 for succ in self.chain[node_id].succ: 272 list.extend(self.get_taxa(succ)) 273 return list
274
275 - def get_terminals(self):
276 """Return a list of all terminal nodes.""" 277 return [i for i in self.all_ids() if self.node(i).succ==[]]
278
279 - def is_terminal(self,node):
280 """Returns True if node is a terminal node.""" 281 return self.node(node).succ==[]
282
283 - def is_internal(self,node):
284 """Returns True if node is an internal node.""" 285 return len(self.node(node).succ)>0
286
287 - def is_preterminal(self,node):
288 """Returns True if all successors of a node are terminal ones.""" 289 if self.is_terminal(node): 290 return False not in [self.is_terminal(n) for n in self.node(node).succ] 291 else: 292 return False
293 - def count_terminals(self,node=None):
294 """Counts the number of terminal nodes that are attached to a node.""" 295 if node is None: 296 node=self.root 297 return len([n for n in self._walk(node) if self.is_terminal(n)])
298
299 - def collapse_genera(self,space_equals_underscore=True):
300 """Collapses all subtrees which belong to the same genus (i.e share the same first word in their taxon name.""" 301 302 while True: 303 for n in self._walk(): 304 if self.is_terminal(n): 305 continue 306 taxa=self.get_taxa(n) 307 genera=[] 308 for t in taxa: 309 if space_equals_underscore: 310 t=t.replace(' ','_') 311 try: 312 genus=t.split('_',1)[0] 313 except: 314 genus='None' 315 if genus not in genera: 316 genera.append(genus) 317 if len(genera)==1: 318 self.node(n).data.taxon=genera[0]+' <collapsed>' 319 #now we kill all nodes downstream 320 nodes2kill=[kn for kn in self._walk(node=n)] 321 for kn in nodes2kill: 322 self.kill(kn) 323 self.node(n).succ=[] 324 break # break out of for loop because node list from _walk will be inconsistent 325 else: # for loop exhausted: no genera to collapse left 326 break # while
327 328
329 - def sum_branchlength(self,root=None,node=None):
330 """Adds up the branchlengths from root (default self.root) to node. 331 332 sum = sum_branchlength(self,root=None,node=None) 333 """ 334 335 if root is None: 336 root=self.root 337 if node is None: 338 raise TreeError('Missing node id.') 339 blen=0.0 340 while node is not None and node is not root: 341 blen+=self.node(node).data.branchlength 342 node=self.node(node).prev 343 return blen
344
345 - def set_subtree(self,node):
346 """Return subtree as a set of nested sets. 347 348 sets = set_subtree(self,node) 349 """ 350 351 if self.node(node).succ==[]: 352 return self.node(node).data.taxon 353 else: 354 try: 355 return frozenset([self.set_subtree(n) for n in self.node(node).succ]) 356 except: 357 print node 358 print self.node(node).succ 359 for n in self.node(node).succ: 360 print n, self.set_subtree(n) 361 print [self.set_subtree(n) for n in self.node(node).succ] 362 raise
363
364 - def is_identical(self,tree2):
365 """Compare tree and tree2 for identity. 366 367 result = is_identical(self,tree2) 368 """ 369 return self.set_subtree(self.root)==tree2.set_subtree(tree2.root)
370
371 - def is_compatible(self,tree2,threshold,strict=True):
372 """Compares branches with support>threshold for compatibility. 373 374 result = is_compatible(self,tree2,threshold) 375 """ 376 377 # check if both trees have the same set of taxa. strict=True enforces this. 378 missing2=set(self.get_taxa())-set(tree2.get_taxa()) 379 missing1=set(tree2.get_taxa())-set(self.get_taxa()) 380 if strict and (missing1 or missing2): 381 if missing1: 382 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing1) , self.name) 383 if missing2: 384 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing2) , tree2.name) 385 raise TreeError('Can\'t compare trees with different taxon compositions.') 386 t1=[(set(self.get_taxa(n)),self.node(n).data.support) for n in self.all_ids() if \ 387 self.node(n).succ and\ 388 (self.node(n).data and self.node(n).data.support and self.node(n).data.support>=threshold)] 389 t2=[(set(tree2.get_taxa(n)),tree2.node(n).data.support) for n in tree2.all_ids() if \ 390 tree2.node(n).succ and\ 391 (tree2.node(n).data and tree2.node(n).data.support and tree2.node(n).data.support>=threshold)] 392 conflict=[] 393 for (st1,sup1) in t1: 394 for (st2,sup2) in t2: 395 if not st1.issubset(st2) and not st2.issubset(st1): # don't hiccup on upstream nodes 396 intersect,notin1,notin2=st1 & st2, st2-st1, st1-st2 # all three are non-empty sets 397 # if notin1==missing1 or notin2==missing2 <==> st1.issubset(st2) or st2.issubset(st1) ??? 398 if intersect and not (notin1.issubset(missing1) or notin2.issubset(missing2)): # omit conflicts due to missing taxa 399 conflict.append((st1,sup1,st2,sup2,intersect,notin1,notin2)) 400 return conflict
401
402 - def common_ancestor(self,node1,node2):
403 """Return the common ancestor that connects two nodes. 404 405 node_id = common_ancestor(self,node1,node2) 406 """ 407 408 l1=[self.root]+self.trace(self.root,node1) 409 l2=[self.root]+self.trace(self.root,node2) 410 return [n for n in l1 if n in l2][-1]
411 412
413 - def distance(self,node1,node2):
414 """Add and return the sum of the branchlengths between two nodes. 415 dist = distance(self,node1,node2) 416 """ 417 418 ca=self.common_ancestor(node1,node2) 419 return self.sum_branchlength(ca,node1)+self.sum_branchlength(ca,node2)
420
421 - def is_monophyletic(self,taxon_list):
422 """Return node_id of common ancestor if taxon_list is monophyletic, -1 otherwise. 423 424 result = is_monophyletic(self,taxon_list) 425 """ 426 if isinstance(taxon_list,str): 427 taxon_set=set([taxon_list]) 428 else: 429 taxon_set=set(taxon_list) 430 node_id=self.root 431 while 1: 432 subclade_taxa=set(self.get_taxa(node_id)) 433 if subclade_taxa==taxon_set: # are we there? 434 return node_id 435 else: # check subnodes 436 for subnode in self.chain[node_id].succ: 437 if set(self.get_taxa(subnode)).issuperset(taxon_set): # taxon_set is downstream 438 node_id=subnode 439 break # out of for loop 440 else: 441 return -1 # taxon set was not with successors, for loop exhausted
442
443 - def is_bifurcating(self,node=None):
444 """Return True if tree downstream of node is strictly bifurcating.""" 445 if node is None: 446 node=self.root 447 if node==self.root and len(self.node(node).succ)==3: #root can be trifurcating, because it has no ancestor 448 return self.is_bifurcating(self.node(node).succ[0]) and \ 449 self.is_bifurcating(self.node(node).succ[1]) and \ 450 self.is_bifurcating(self.node(node).succ[2]) 451 if len(self.node(node).succ)==2: 452 return self.is_bifurcating(self.node(node).succ[0]) and self.is_bifurcating(self.node(node).succ[1]) 453 elif len(self.node(node).succ)==0: 454 return True 455 else: 456 return False
457
458 - def branchlength2support(self):
459 """Move values stored in data.branchlength to data.support, and set branchlength to 0.0 460 461 This is necessary when support has been stored as branchlength (e.g. paup), and has thus 462 been read in as branchlength. 463 """ 464 465 for n in self.chain: 466 self.node(n).data.support=self.node(n).data.branchlength 467 self.node(n).data.branchlength=0.0
468
469 - def convert_absolute_support(self,nrep):
470 """Convert absolute support (clade-count) to rel. frequencies. 471 472 Some software (e.g. PHYLIP consense) just calculate how often clades appear, instead of 473 calculating relative frequencies.""" 474 475 for n in self._walk(): 476 if self.node(n).data.support: 477 self.node(n).data.support/=float(nrep)
478
479 - def has_support(self,node=None):
480 """Returns True if any of the nodes has data.support != None.""" 481 for n in self._walk(node): 482 if self.node(n).data.support: 483 return True 484 else: 485 return False
486
487 - def randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True):
488 """Generates a random tree with ntax taxa and/or taxa from taxlabels. 489 490 new_tree = randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True) 491 Trees are bifurcating by default. (Polytomies not yet supported). 492 """ 493 494 if not ntax and taxon_list: 495 ntax=len(taxon_list) 496 elif not taxon_list and ntax: 497 taxon_list=['taxon'+str(i+1) for i in range(ntax)] 498 elif not ntax and not taxon_list: 499 raise TreeError('Either numer of taxa or list of taxa must be specified.') 500 elif ntax != len(taxon_list): 501 raise TreeError('Length of taxon list must correspond to ntax.') 502 # initiate self with empty root 503 self.__init__() 504 terminals=self.get_terminals() 505 # bifurcate randomly at terminal nodes until ntax is reached 506 while len(terminals)<ntax: 507 newsplit=random.choice(terminals) 508 new_terminals=self.split(parent_id=newsplit,branchlength=branchlength) 509 # if desired, give some variation to the branch length 510 if branchlength_sd: 511 for nt in new_terminals: 512 bl=random.gauss(branchlength,branchlength_sd) 513 if bl<0: 514 bl=0 515 self.node(nt).data.branchlength=bl 516 terminals.extend(new_terminals) 517 terminals.remove(newsplit) 518 # distribute taxon labels randomly 519 random.shuffle(taxon_list) 520 for (node,name) in zip(terminals,taxon_list): 521 self.node(node).data.taxon=name
522
523 - def display(self):
524 """Quick and dirty lists of all nodes.""" 525 table=[('#','taxon','prev','succ','brlen','blen (sum)','support','comment')] 526 #Sort this to be consistent accross CPython, Jython, etc 527 for i in sorted(self.all_ids()): 528 n=self.node(i) 529 if not n.data: 530 table.append((str(i),'-',str(n.prev),str(n.succ),'-','-','-','-')) 531 else: 532 tx=n.data.taxon 533 if not tx: 534 tx='-' 535 blength="%0.2f" % n.data.branchlength 536 if blength is None: 537 blength='-' 538 sum_blength='-' 539 else: 540 sum_blength="%0.2f" % self.sum_branchlength(node=i) 541 support=n.data.support 542 if support is None: 543 support='-' 544 else: 545 support="%0.2f" % support 546 comment=n.data.comment 547 if comment is None: 548 comment='-' 549 table.append((str(i),tx,str(n.prev),str(n.succ), 550 blength, sum_blength, support, comment)) 551 print '\n'.join(['%3s %32s %15s %15s %8s %10s %8s %20s' % l for l in table]) 552 print '\nRoot: ',self.root
553
554 - def to_string(self,support_as_branchlengths=False,branchlengths_only=False,plain=True,plain_newick=False,ladderize=None,ignore_comments=True):
555 """Return a paup compatible tree line. 556 557 to_string(self,support_as_branchlengths=False,branchlengths_only=False,plain=True) 558 """ 559 # if there's a conflict in the arguments, we override plain=True 560 if support_as_branchlengths or branchlengths_only: 561 plain=False 562 self.support_as_branchlengths=support_as_branchlengths 563 self.branchlengths_only=branchlengths_only 564 self.ignore_comments=ignore_comments 565 self.plain=plain 566 567 def make_info_string(data,terminal=False): 568 """Creates nicely formatted support/branchlengths.""" 569 # CHECK FORMATTING 570 if self.plain: # plain tree only. That's easy. 571 info_string= '' 572 elif self.support_as_branchlengths: # support as branchlengths (eg. PAUP), ignore actual branchlengths 573 if terminal: # terminal branches have 100% support 574 info_string= ':%1.2f' % self.max_support 575 elif data.support: 576 info_string= ':%1.2f' % (data.support) 577 else: 578 info_string=':0.00' 579 elif self.branchlengths_only: # write only branchlengths, ignore support 580 info_string= ':%1.5f' % (data.branchlength) 581 else: # write suport and branchlengths (e.g. .con tree of mrbayes) 582 if terminal: 583 info_string= ':%1.5f' % (data.branchlength) 584 else: 585 if data.branchlength is not None and data.support is not None: # we have blen and suppport 586 info_string= '%1.2f:%1.5f' % (data.support,data.branchlength) 587 elif data.branchlength is not None: # we have only blen 588 info_string= '0.00000:%1.5f' % (data.branchlength) 589 elif data.support is not None: # we have only support 590 info_string= '%1.2f:0.00000' % (data.support) 591 else: 592 info_string= '0.00:0.00000' 593 if not ignore_comments and hasattr(data,'nodecomment'): 594 info_string=str(data.nodecomment)+info_string 595 return info_string
596 597 def ladderize_nodes(nodes,ladderize=None): 598 """Sorts node numbers according to the number of terminal nodes.""" 599 if ladderize in ['left','LEFT','right','RIGHT']: 600 succnode_terminals=[(self.count_terminals(node=n),n) for n in nodes] 601 succnode_terminals.sort() 602 if (ladderize=='right' or ladderize=='RIGHT'): 603 succnode_terminals.reverse() 604 if succnode_terminals: 605 succnodes=zip(*succnode_terminals)[1] 606 else: 607 succnodes=[] 608 else: 609 succnodes=nodes 610 return succnodes
611 612 def newickize(node,ladderize=None): 613 """Convert a node tree to a newick tree recursively.""" 614 615 if not self.node(node).succ: #terminal 616 return self.node(node).data.taxon+make_info_string(self.node(node).data,terminal=True) 617 else: 618 succnodes=ladderize_nodes(self.node(node).succ,ladderize=ladderize) 619 subtrees=[newickize(sn,ladderize=ladderize) for sn in succnodes] 620 return '(%s)%s' % (','.join(subtrees),make_info_string(self.node(node).data)) 621 622 treeline=['tree'] 623 if self.name: 624 treeline.append(self.name) 625 else: 626 treeline.append('a_tree') 627 treeline.append('=') 628 if self.weight != 1: 629 treeline.append('[&W%s]' % str(round(float(self.weight),3))) 630 if self.rooted: 631 treeline.append('[&R]') 632 succnodes=ladderize_nodes(self.node(self.root).succ) 633 subtrees=[newickize(sn,ladderize=ladderize) for sn in succnodes] 634 treeline.append('(%s)' % ','.join(subtrees)) 635 if plain_newick: 636 return treeline[-1] 637 else: 638 return ' '.join(treeline)+';' 639
640 - def __str__(self):
641 """Short version of to_string(), gives plain tree""" 642 return self.to_string(plain=True)
643
644 - def unroot(self):
645 """Defines a unrooted Tree structure, using data of a rooted Tree.""" 646 647 # travel down the rooted tree structure and save all branches and the nodes they connect 648 649 def _get_branches(node): 650 branches=[] 651 for b in self.node(node).succ: 652 branches.append([node,b,self.node(b).data.branchlength,self.node(b).data.support]) 653 branches.extend(_get_branches(b)) 654 return branches
655 656 self.unrooted=_get_branches(self.root) 657 # if root is bifurcating, then it is eliminated 658 if len(self.node(self.root).succ)==2: 659 # find the two branches that connect to root 660 rootbranches=[b for b in self.unrooted if self.root in b[:2]] 661 b1=self.unrooted.pop(self.unrooted.index(rootbranches[0])) 662 b2=self.unrooted.pop(self.unrooted.index(rootbranches[1])) 663 # Connect them two each other. If both have support, it should be identical (or one set to None?). 664 # If both have branchlengths, they will be added 665 newbranch=[b1[1],b2[1],b1[2]+b2[2]] 666 if b1[3] is None: 667 newbranch.append(b2[3]) # either None (both rootbranches are unsupported) or some support 668 elif b2[3] is None: 669 newbranch.append(b1[3]) # dito 670 elif b1[3]==b2[3]: 671 newbranch.append(b1[3]) # identical support 672 elif b1[3]==0 or b2[3]==0: 673 newbranch.append(b1[3]+b2[3]) # one is 0, take the other 674 else: 675 raise TreeError('Support mismatch in bifurcating root: %f, %f' \ 676 % (float(b1[3]),float(b2[3]))) 677 self.unrooted.append(newbranch) 678
679 - def root_with_outgroup(self,outgroup=None):
680 681 def _connect_subtree(parent,child): 682 """Hook subtree starting with node child to parent.""" 683 for i,branch in enumerate(self.unrooted): 684 if parent in branch[:2] and child in branch[:2]: 685 branch=self.unrooted.pop(i) 686 break 687 else: 688 raise TreeError('Unable to connect nodes for rooting: nodes %d and %d are not connected' \ 689 % (parent,child)) 690 self.link(parent,child) 691 self.node(child).data.branchlength=branch[2] 692 self.node(child).data.support=branch[3] 693 #now check if there are more branches connected to the child, and if so, connect them 694 child_branches=[b for b in self.unrooted if child in b[:2]] 695 for b in child_branches: 696 if child==b[0]: 697 succ=b[1] 698 else: 699 succ=b[0] 700 _connect_subtree(child,succ)
701 702 # check the outgroup we're supposed to root with 703 if outgroup is None: 704 return self.root 705 outgroup_node=self.is_monophyletic(outgroup) 706 if outgroup_node==-1: 707 return -1 708 # if tree is already rooted with outgroup on a bifurcating root, 709 # or the outgroup includes all taxa on the tree, then we're fine 710 if (len(self.node(self.root).succ)==2 and outgroup_node in self.node(self.root).succ) or outgroup_node==self.root: 711 return self.root 712 713 self.unroot() 714 # now we find the branch that connects outgroup and ingroup 715 #print self.node(outgroup_node).prev 716 for i,b in enumerate(self.unrooted): 717 if outgroup_node in b[:2] and self.node(outgroup_node).prev in b[:2]: 718 root_branch=self.unrooted.pop(i) 719 break 720 else: 721 raise TreeError('Unrooted and rooted Tree do not match') 722 if outgroup_node==root_branch[1]: 723 ingroup_node=root_branch[0] 724 else: 725 ingroup_node=root_branch[1] 726 # now we destroy the old tree structure, but keep node data. Nodes will be reconnected according to new outgroup 727 for n in self.all_ids(): 728 self.node(n).prev=None 729 self.node(n).succ=[] 730 # now we just add both subtrees (outgroup and ingroup) branch for branch 731 root=Nodes.Node(data=NodeData()) # new root 732 self.add(root) # add to tree description 733 self.root=root.id # set as root 734 self.unrooted.append([root.id,ingroup_node,root_branch[2],root_branch[3]]) # add branch to ingroup to unrooted tree 735 self.unrooted.append([root.id,outgroup_node,0.0,0.0]) # add branch to outgroup to unrooted tree 736 _connect_subtree(root.id,ingroup_node) # add ingroup 737 _connect_subtree(root.id,outgroup_node) # add outgroup 738 # if theres still a lonely node in self.chain, then it's the old root, and we delete it 739 oldroot=[i for i in self.all_ids() if self.node(i).prev is None and i!=self.root] 740 if len(oldroot)>1: 741 raise TreeError('Isolated nodes in tree description: %s' \ 742 % ','.join(oldroot)) 743 elif len(oldroot)==1: 744 self.kill(oldroot[0]) 745 return self.root 746
747 - def merge_with_support(self,bstrees=None,constree=None,threshold=0.5,outgroup=None):
748 """Merges clade support (from consensus or list of bootstrap-trees) with phylogeny. 749 750 tree=merge_bootstrap(phylo,bs_tree=<list_of_trees>) 751 or 752 tree=merge_bootstrap(phylo,consree=consensus_tree with clade support) 753 """ 754 755 if bstrees and constree: 756 raise TreeError('Specify either list of boostrap trees or consensus tree, not both') 757 if not (bstrees or constree): 758 raise TreeError('Specify either list of boostrap trees or consensus tree.') 759 # no outgroup specified: use the smallest clade of the root 760 if outgroup is None: 761 try: 762 succnodes=self.node(self.root).succ 763 smallest=min([(len(self.get_taxa(n)),n) for n in succnodes]) 764 outgroup=self.get_taxa(smallest[1]) 765 except: 766 raise TreeError("Error determining outgroup.") 767 else: # root with user specified outgroup 768 self.root_with_outgroup(outgroup) 769 770 if bstrees: # calculate consensus 771 constree=consensus(bstrees,threshold=threshold,outgroup=outgroup) 772 else: 773 if not constree.has_support(): 774 constree.branchlength2support() 775 constree.root_with_outgroup(outgroup) 776 # now we travel all nodes, and add support from consensus, if the clade is present in both 777 for pnode in self._walk(): 778 cnode=constree.is_monophyletic(self.get_taxa(pnode)) 779 if cnode>-1: 780 self.node(pnode).data.support=constree.node(cnode).data.support
781 782
783 -def consensus(trees, threshold=0.5,outgroup=None):
784 """Compute a majority rule consensus tree of all clades with relative frequency>=threshold from a list of trees.""" 785 786 total=len(trees) 787 if total==0: 788 return None 789 # shouldn't we make sure that it's NodeData or subclass?? 790 dataclass=trees[0].dataclass 791 max_support=trees[0].max_support 792 clades={} 793 #countclades={} 794 alltaxa=set(trees[0].get_taxa()) 795 # calculate calde frequencies 796 c=0 797 for t in trees: 798 c+=1 799 #if c%100==0: 800 # print c 801 if alltaxa!=set(t.get_taxa()): 802 raise TreeError('Trees for consensus must contain the same taxa') 803 t.root_with_outgroup(outgroup=outgroup) 804 for st_node in t._walk(t.root): 805 subclade_taxa=t.get_taxa(st_node) 806 subclade_taxa.sort() 807 subclade_taxa=str(subclade_taxa) # lists are not hashable 808 if subclade_taxa in clades: 809 clades[subclade_taxa]+=float(t.weight)/total 810 else: 811 clades[subclade_taxa]=float(t.weight)/total 812 #if subclade_taxa in countclades: 813 # countclades[subclade_taxa]+=t.weight 814 #else: 815 # countclades[subclade_taxa]=t.weight 816 # weed out clades below threshold 817 delclades=[c for c,p in clades.iteritems() if round(p,3)<threshold] # round can be necessary 818 for c in delclades: 819 del clades[c] 820 # create a tree with a root node 821 consensus=Tree(name='consensus_%2.1f' % float(threshold),data=dataclass) 822 # each clade needs a node in the new tree, add them as isolated nodes 823 for c, s in clades.iteritems(): 824 node=Nodes.Node(data=dataclass()) 825 node.data.support=s 826 node.data.taxon=set(eval(c)) 827 consensus.add(node) 828 # set root node data 829 consensus.node(consensus.root).data.support=None 830 consensus.node(consensus.root).data.taxon=alltaxa 831 # we sort the nodes by no. of taxa in the clade, so root will be the last 832 consensus_ids=consensus.all_ids() 833 consensus_ids.sort(lambda x,y:len(consensus.node(x).data.taxon)-len(consensus.node(y).data.taxon)) 834 # now we just have to hook each node to the next smallest node that includes all taxa of the current 835 for i,current in enumerate(consensus_ids[:-1]): # skip the last one which is the root 836 #print '----' 837 #print 'current: ',consensus.node(current).data.taxon 838 # search remaining nodes 839 for parent in consensus_ids[i+1:]: 840 #print 'parent: ',consensus.node(parent).data.taxon 841 if consensus.node(parent).data.taxon.issuperset(consensus.node(current).data.taxon): 842 break 843 else: 844 sys.exit('corrupt tree structure?') 845 # internal nodes don't have taxa 846 if len(consensus.node(current).data.taxon)==1: 847 consensus.node(current).data.taxon=consensus.node(current).data.taxon.pop() 848 # reset the support for terminal nodes to maximum 849 #consensus.node(current).data.support=max_support 850 else: 851 consensus.node(current).data.taxon=None 852 consensus.link(parent,current) 853 # eliminate root taxon name 854 consensus.node(consensus_ids[-1]).data.taxon=None 855 if alltaxa != set(consensus.get_taxa()): 856 raise TreeError('FATAL ERROR: consensus tree is corrupt') 857 return consensus
858