Package Bio :: Package Phylo :: Module _utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo._utils

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Utilities for handling, displaying and exporting Phylo trees. 
  7   
  8  Third-party libraries are loaded when the corresponding function is called. 
  9  """ 
 10  __docformat__ = "restructuredtext en" 
 11   
 12  import math 
 13  import sys 
 14   
 15   
16 -def to_networkx(tree):
17 """Convert a Tree object to a networkx graph. 18 19 The result is useful for graph-oriented analysis, and also interactive 20 plotting with pylab, matplotlib or pygraphviz, though the resulting diagram 21 is usually not ideal for displaying a phylogeny. 22 23 Requires NetworkX version 0.99 or 1.0. 24 """ 25 try: 26 import networkx 27 except ImportError: 28 from Bio import MissingPythonDependencyError 29 raise MissingPythonDependencyError( 30 "Install NetworkX if you want to use to_networkx.") 31 32 def add_edge(graph, n1, n2): 33 # NB (1/2010): the networkx API congealed recently 34 # Ubuntu Lucid uses v0.99, newest is v1.0.1, let's support both 35 if networkx.__version__ >= '1.0': 36 graph.add_edge(n1, n2, weight=str(n2.branch_length or 1.0)) 37 # Copy branch color value as hex, if available 38 if hasattr(n2, 'color') and n2.color is not None: 39 graph[n1][n2]['color'] = n2.color.to_hex() 40 elif hasattr(n1, 'color') and n1.color is not None: 41 # Cascading color attributes 42 graph[n1][n2]['color'] = n1.color.to_hex() 43 n2.color = n1.color 44 # Copy branch weight value (float) if available 45 if hasattr(n2, 'width') and n2.width is not None: 46 graph[n1][n2]['width'] = n2.width 47 elif hasattr(n1, 'width') and n1.width is not None: 48 # Cascading width attributes 49 graph[n1][n2]['width'] = n1.width 50 n2.width = n1.width 51 elif networkx.__version__ >= '0.99': 52 graph.add_edge(n1, n2, (n2.branch_length or 1.0)) 53 else: 54 graph.add_edge(n1, n2)
55 56 def build_subgraph(graph, top): 57 """Walk down the Tree, building graphs, edges and nodes.""" 58 for clade in top: 59 graph.add_node(clade.root) 60 add_edge(graph, top.root, clade.root) 61 build_subgraph(graph, clade) 62 63 if tree.rooted: 64 G = networkx.DiGraph() 65 else: 66 G = networkx.Graph() 67 G.add_node(tree.root) 68 build_subgraph(G, tree.root) 69 return G 70 71
72 -def draw_graphviz(tree, label_func=str, prog='twopi', args='', 73 node_color='#c0deff', **kwargs):
74 """Display a tree or clade as a graph, using the graphviz engine. 75 76 Requires NetworkX, matplotlib, Graphviz and either PyGraphviz or pydot. 77 78 The third and fourth parameters apply to Graphviz, and the remaining 79 arbitrary keyword arguments are passed directly to networkx.draw(), which 80 in turn mostly wraps matplotlib/pylab. See the documentation for Graphviz 81 and networkx for detailed explanations. 82 83 The NetworkX/matplotlib parameters are described in the docstrings for 84 networkx.draw() and pylab.scatter(), but the most reasonable options to try 85 are: *alpha, node_color, node_size, node_shape, edge_color, style, 86 font_size, font_color, font_weight, font_family* 87 88 :Parameters: 89 90 label_func : callable 91 A function to extract a label from a node. By default this is str(), 92 but you can use a different function to select another string 93 associated with each node. If this function returns None for a node, 94 no label will be shown for that node. 95 96 The label will also be silently skipped if the throws an exception 97 related to ordinary attribute access (LookupError, AttributeError, 98 ValueError); all other exception types will still be raised. This 99 means you can use a lambda expression that simply attempts to look 100 up the desired value without checking if the intermediate attributes 101 are available: 102 103 >>> Phylo.draw_graphviz(tree, lambda n: n.taxonomies[0].code) 104 105 prog : string 106 The Graphviz program to use when rendering the graph. 'twopi' 107 behaves the best for large graphs, reliably avoiding crossing edges, 108 but for moderate graphs 'neato' looks a bit nicer. For small 109 directed graphs, 'dot' may produce the most normal-looking 110 phylogram, but will cross and distort edges in larger graphs. (The 111 programs 'circo' and 'fdp' are not recommended.) 112 args : string 113 Options passed to the external graphviz program. Normally not 114 needed, but offered here for completeness. 115 116 Example 117 ------- 118 119 >>> import pylab 120 >>> from Bio import Phylo 121 >>> tree = Phylo.read('ex/apaf.xml', 'phyloxml') 122 >>> Phylo.draw_graphviz(tree) 123 >>> pylab.show() 124 >>> pylab.savefig('apaf.png') 125 """ 126 try: 127 import networkx 128 except ImportError: 129 from Bio import MissingPythonDependencyError 130 raise MissingPythonDependencyError( 131 "Install NetworkX if you want to use to_networkx.") 132 133 G = to_networkx(tree) 134 Gi = networkx.convert_node_labels_to_integers(G, discard_old_labels=False) 135 try: 136 posi = networkx.pygraphviz_layout(Gi, prog, args=args) 137 except ImportError: 138 try: 139 posi = networkx.pydot_layout(Gi, prog) 140 except ImportError: 141 raise MissingPythonDependencyError( 142 "Install PyGraphviz or Pydot if you want to use " 143 "draw_graphviz.") 144 posn = dict((n, posi[Gi.node_labels[n]]) for n in G) 145 146 def get_label_mapping(G, selection): 147 for node in G.nodes(): 148 if (selection is None) or (node in selection): 149 try: 150 label = label_func(node) 151 if label not in (None, node.__class__.__name__): 152 yield (node, label) 153 except (LookupError, AttributeError, ValueError): 154 pass
155 156 if 'nodelist' in kwargs: 157 labels = dict(get_label_mapping(G, set(kwargs['nodelist']))) 158 else: 159 labels = dict(get_label_mapping(G, None)) 160 kwargs['nodelist'] = labels.keys() 161 if 'edge_color' not in kwargs: 162 kwargs['edge_color'] = [isinstance(e[2], dict) and 163 e[2].get('color', 'k') or 'k' 164 for e in G.edges(data=True)] 165 if 'width' not in kwargs: 166 kwargs['width'] = [isinstance(e[2], dict) and 167 e[2].get('width', 1.0) or 1.0 168 for e in G.edges(data=True)] 169 networkx.draw(G, posn, labels=labels, node_color=node_color, **kwargs) 170 171
172 -def draw_ascii(tree, file=sys.stdout, column_width=80):
173 """Draw an ascii-art phylogram of the given tree. 174 175 The printed result looks like:: 176 177 _________ Orange 178 ______________| 179 | |______________ Tangerine 180 ______________| 181 | | _________________________ Grapefruit 182 _| |_________| 183 | |______________ Pummelo 184 | 185 |__________________________________ Apple 186 187 188 :Parameters: 189 file : file-like object 190 File handle opened for writing the output drawing. 191 column_width : int 192 Total number of text columns used by the drawing. 193 """ 194 taxa = tree.get_terminals() 195 # Some constants for the drawing calculations 196 max_label_width = max(len(str(taxon)) for taxon in taxa) 197 drawing_width = column_width - max_label_width - 1 198 drawing_height = 2 * len(taxa) - 1 199 200 def get_col_positions(tree): 201 """Create a mapping of each clade to its column position.""" 202 depths = tree.depths() 203 # If there are no branch lengths, assume unit branch lengths 204 if not max(depths.itervalues()): 205 depths = tree.depths(unit_branch_lengths=True) 206 # Potential drawing overflow due to rounding -- 1 char per tree layer 207 fudge_margin = int(math.ceil(math.log(len(taxa), 2))) 208 cols_per_branch_unit = ((drawing_width - fudge_margin) 209 / float(max(depths.itervalues()))) 210 return dict((clade, int(round(blen*cols_per_branch_unit + 0.5))) 211 for clade, blen in depths.iteritems())
212 213 def get_row_positions(tree): 214 positions = dict((taxon, 2*idx) for idx, taxon in enumerate(taxa)) 215 def calc_row(clade): 216 for subclade in clade: 217 if subclade not in positions: 218 calc_row(subclade) 219 positions[clade] = (positions[clade.clades[0]] + 220 positions[clade.clades[-1]]) / 2 221 calc_row(tree.root) 222 return positions 223 224 col_positions = get_col_positions(tree) 225 row_positions = get_row_positions(tree) 226 char_matrix = [[' ' for x in range(drawing_width)] 227 for y in range(drawing_height)] 228 229 def draw_clade(clade, startcol): 230 thiscol = col_positions[clade] 231 thisrow = row_positions[clade] 232 # Draw a horizontal line 233 for col in range(startcol, thiscol): 234 char_matrix[thisrow][col] = '_' 235 if clade.clades: 236 # Draw a vertical line 237 toprow = row_positions[clade.clades[0]] 238 botrow = row_positions[clade.clades[-1]] 239 for row in range(toprow+1, botrow+1): 240 char_matrix[row][thiscol] = '|' 241 # NB: Short terminal branches need something to stop rstrip() 242 if (col_positions[clade.clades[0]] - thiscol) < 2: 243 char_matrix[toprow][thiscol] = ',' 244 # Draw descendents 245 for child in clade: 246 draw_clade(child, thiscol+1) 247 248 draw_clade(tree.root, 0) 249 # Print the complete drawing 250 for idx, row in enumerate(char_matrix): 251 line = ''.join(row).rstrip() 252 # Add labels for terminal taxa in the right margin 253 if idx % 2 == 0: 254 line += ' ' + str(taxa[idx/2]) 255 file.write(line + '\n') 256 file.write('\n') 257 258
259 -def draw(tree, label_func=str, do_show=True, show_confidence=True):
260 """Plot the given tree using matplotlib (or pylab). 261 262 The graphic is a rooted tree, drawn with roughly the same algorithm as 263 draw_ascii. 264 265 :Parameters: 266 label_func : callable 267 A function to extract a label from a node. By default this is str(), 268 but you can use a different function to select another string 269 associated with each node. If this function returns None for a node, 270 no label will be shown for that node. 271 """ 272 try: 273 import matplotlib.pyplot as plt 274 except ImportError: 275 try: 276 import pylab as plt 277 except ImportError: 278 from Bio import MissingPythonDependencyError 279 raise MissingPythonDependencyError( 280 "Install matplotlib or pylab if you want to use draw.") 281 282 def get_x_positions(tree): 283 """Create a mapping of each clade to its horizontal position. 284 285 Dict of {clade: x-coord} 286 """ 287 depths = tree.depths() 288 # If there are no branch lengths, assume unit branch lengths 289 if not max(depths.itervalues()): 290 depths = tree.depths(unit_branch_lengths=True) 291 return depths
292 293 def get_y_positions(tree): 294 """Create a mapping of each clade to its vertical position. 295 296 Dict of {clade: y-coord}. 297 Coordinates are negative, and integers for tips. 298 """ 299 maxheight = tree.count_terminals() 300 # Rows are defined by the tips 301 heights = dict((tip, maxheight - i) 302 for i, tip in enumerate(tree.get_terminals())) 303 # Internal nodes: place at midpoint of children 304 def calc_row(clade): 305 for subclade in clade: 306 if subclade not in heights: 307 calc_row(subclade) 308 # Closure over heights 309 heights[clade] = (heights[clade.clades[0]] + 310 heights[clade.clades[-1]]) / 2.0 311 calc_row(tree.root) 312 return heights 313 314 x_posns = get_x_positions(tree) 315 y_posns = get_y_positions(tree) 316 317 def draw_clade(clade, x_start): 318 """Recursively draw a tree, down from the given clade.""" 319 x_here = x_posns[clade] 320 y_here = y_posns[clade] 321 # phyloXML-only graphics annotations 322 color = clade.__dict__.get('color') or 'k' 323 lw = clade.__dict__.get('width') 324 # Draw a horizontal line from start to here 325 plt.hlines(y_here, x_start, x_here, color=color, lw=lw) 326 # Add node/taxon labels 327 label = label_func(clade) 328 if label not in (None, clade.__class__.__name__): 329 plt.text(x_here, y_here, ' ' + label, 330 fontsize=10, verticalalignment='center') 331 # Add confidence 332 if hasattr(clade, 'confidences'): 333 # phyloXML supports multiple confidences 334 conf_label = ' '.join(map(str, map(float, clade.confidences))) 335 elif clade.confidence is not None: 336 conf_label = str(clade.confidence) 337 else: 338 conf_label = None 339 if conf_label: 340 plt.text(x_start, y_here, str(float(clade.confidence)), fontsize=9) 341 if clade.clades: 342 # Draw a vertical line connecting all children 343 y_top = y_posns[clade.clades[0]] 344 y_bot = y_posns[clade.clades[-1]] 345 plt.vlines(x_here, y_bot, y_top, color=color, lw=lw) 346 # Draw descendents 347 for child in clade: 348 draw_clade(child, x_here) 349 350 draw_clade(tree.root, 0) 351 if hasattr(tree, 'name') and tree.name: 352 plt.title(tree.name) 353 plt.xlabel('branch length') 354 plt.ylabel('taxa') 355 # Add margins around the tree to prevent overlapping the axes 356 xmin, xmax = plt.xlim() 357 pad = 0.05 * xmax 358 plt.xlim(-pad, xmax + pad) 359 # Also invert the y-axis (origin at the top) 360 plt.ylim(max(y_posns.itervalues()) + 1, 0) 361 if do_show: 362 plt.show() 363