1
2
3
4
5
6 """Utilities for handling, displaying and exporting Phylo trees.
7
8 Third-party libraries are loaded when the corresponding function is called.
9 """
10 __docformat__ = "restructuredtext en"
11
12 import math
13 import sys
14
15
17 """Convert a Tree object to a networkx graph.
18
19 The result is useful for graph-oriented analysis, and also interactive
20 plotting with pylab, matplotlib or pygraphviz, though the resulting diagram
21 is usually not ideal for displaying a phylogeny.
22
23 Requires NetworkX version 0.99 or 1.0.
24 """
25 try:
26 import networkx
27 except ImportError:
28 from Bio import MissingPythonDependencyError
29 raise MissingPythonDependencyError(
30 "Install NetworkX if you want to use to_networkx.")
31
32 def add_edge(graph, n1, n2):
33
34
35 if networkx.__version__ >= '1.0':
36 graph.add_edge(n1, n2, weight=str(n2.branch_length or 1.0))
37
38 if hasattr(n2, 'color') and n2.color is not None:
39 graph[n1][n2]['color'] = n2.color.to_hex()
40 elif hasattr(n1, 'color') and n1.color is not None:
41
42 graph[n1][n2]['color'] = n1.color.to_hex()
43 n2.color = n1.color
44
45 if hasattr(n2, 'width') and n2.width is not None:
46 graph[n1][n2]['width'] = n2.width
47 elif hasattr(n1, 'width') and n1.width is not None:
48
49 graph[n1][n2]['width'] = n1.width
50 n2.width = n1.width
51 elif networkx.__version__ >= '0.99':
52 graph.add_edge(n1, n2, (n2.branch_length or 1.0))
53 else:
54 graph.add_edge(n1, n2)
55
56 def build_subgraph(graph, top):
57 """Walk down the Tree, building graphs, edges and nodes."""
58 for clade in top:
59 graph.add_node(clade.root)
60 add_edge(graph, top.root, clade.root)
61 build_subgraph(graph, clade)
62
63 if tree.rooted:
64 G = networkx.DiGraph()
65 else:
66 G = networkx.Graph()
67 G.add_node(tree.root)
68 build_subgraph(G, tree.root)
69 return G
70
71
72 -def draw_graphviz(tree, label_func=str, prog='twopi', args='',
73 node_color='#c0deff', **kwargs):
74 """Display a tree or clade as a graph, using the graphviz engine.
75
76 Requires NetworkX, matplotlib, Graphviz and either PyGraphviz or pydot.
77
78 The third and fourth parameters apply to Graphviz, and the remaining
79 arbitrary keyword arguments are passed directly to networkx.draw(), which
80 in turn mostly wraps matplotlib/pylab. See the documentation for Graphviz
81 and networkx for detailed explanations.
82
83 The NetworkX/matplotlib parameters are described in the docstrings for
84 networkx.draw() and pylab.scatter(), but the most reasonable options to try
85 are: *alpha, node_color, node_size, node_shape, edge_color, style,
86 font_size, font_color, font_weight, font_family*
87
88 :Parameters:
89
90 label_func : callable
91 A function to extract a label from a node. By default this is str(),
92 but you can use a different function to select another string
93 associated with each node. If this function returns None for a node,
94 no label will be shown for that node.
95
96 The label will also be silently skipped if the throws an exception
97 related to ordinary attribute access (LookupError, AttributeError,
98 ValueError); all other exception types will still be raised. This
99 means you can use a lambda expression that simply attempts to look
100 up the desired value without checking if the intermediate attributes
101 are available:
102
103 >>> Phylo.draw_graphviz(tree, lambda n: n.taxonomies[0].code)
104
105 prog : string
106 The Graphviz program to use when rendering the graph. 'twopi'
107 behaves the best for large graphs, reliably avoiding crossing edges,
108 but for moderate graphs 'neato' looks a bit nicer. For small
109 directed graphs, 'dot' may produce the most normal-looking
110 phylogram, but will cross and distort edges in larger graphs. (The
111 programs 'circo' and 'fdp' are not recommended.)
112 args : string
113 Options passed to the external graphviz program. Normally not
114 needed, but offered here for completeness.
115
116 Example
117 -------
118
119 >>> import pylab
120 >>> from Bio import Phylo
121 >>> tree = Phylo.read('ex/apaf.xml', 'phyloxml')
122 >>> Phylo.draw_graphviz(tree)
123 >>> pylab.show()
124 >>> pylab.savefig('apaf.png')
125 """
126 try:
127 import networkx
128 except ImportError:
129 from Bio import MissingPythonDependencyError
130 raise MissingPythonDependencyError(
131 "Install NetworkX if you want to use to_networkx.")
132
133 G = to_networkx(tree)
134 Gi = networkx.convert_node_labels_to_integers(G, discard_old_labels=False)
135 try:
136 posi = networkx.pygraphviz_layout(Gi, prog, args=args)
137 except ImportError:
138 try:
139 posi = networkx.pydot_layout(Gi, prog)
140 except ImportError:
141 raise MissingPythonDependencyError(
142 "Install PyGraphviz or Pydot if you want to use "
143 "draw_graphviz.")
144 posn = dict((n, posi[Gi.node_labels[n]]) for n in G)
145
146 def get_label_mapping(G, selection):
147 for node in G.nodes():
148 if (selection is None) or (node in selection):
149 try:
150 label = label_func(node)
151 if label not in (None, node.__class__.__name__):
152 yield (node, label)
153 except (LookupError, AttributeError, ValueError):
154 pass
155
156 if 'nodelist' in kwargs:
157 labels = dict(get_label_mapping(G, set(kwargs['nodelist'])))
158 else:
159 labels = dict(get_label_mapping(G, None))
160 kwargs['nodelist'] = labels.keys()
161 if 'edge_color' not in kwargs:
162 kwargs['edge_color'] = [isinstance(e[2], dict) and
163 e[2].get('color', 'k') or 'k'
164 for e in G.edges(data=True)]
165 if 'width' not in kwargs:
166 kwargs['width'] = [isinstance(e[2], dict) and
167 e[2].get('width', 1.0) or 1.0
168 for e in G.edges(data=True)]
169 networkx.draw(G, posn, labels=labels, node_color=node_color, **kwargs)
170
171
172 -def draw_ascii(tree, file=sys.stdout, column_width=80):
173 """Draw an ascii-art phylogram of the given tree.
174
175 The printed result looks like::
176
177 _________ Orange
178 ______________|
179 | |______________ Tangerine
180 ______________|
181 | | _________________________ Grapefruit
182 _| |_________|
183 | |______________ Pummelo
184 |
185 |__________________________________ Apple
186
187
188 :Parameters:
189 file : file-like object
190 File handle opened for writing the output drawing.
191 column_width : int
192 Total number of text columns used by the drawing.
193 """
194 taxa = tree.get_terminals()
195
196 max_label_width = max(len(str(taxon)) for taxon in taxa)
197 drawing_width = column_width - max_label_width - 1
198 drawing_height = 2 * len(taxa) - 1
199
200 def get_col_positions(tree):
201 """Create a mapping of each clade to its column position."""
202 depths = tree.depths()
203
204 if not max(depths.itervalues()):
205 depths = tree.depths(unit_branch_lengths=True)
206
207 fudge_margin = int(math.ceil(math.log(len(taxa), 2)))
208 cols_per_branch_unit = ((drawing_width - fudge_margin)
209 / float(max(depths.itervalues())))
210 return dict((clade, int(round(blen*cols_per_branch_unit + 0.5)))
211 for clade, blen in depths.iteritems())
212
213 def get_row_positions(tree):
214 positions = dict((taxon, 2*idx) for idx, taxon in enumerate(taxa))
215 def calc_row(clade):
216 for subclade in clade:
217 if subclade not in positions:
218 calc_row(subclade)
219 positions[clade] = (positions[clade.clades[0]] +
220 positions[clade.clades[-1]]) / 2
221 calc_row(tree.root)
222 return positions
223
224 col_positions = get_col_positions(tree)
225 row_positions = get_row_positions(tree)
226 char_matrix = [[' ' for x in range(drawing_width)]
227 for y in range(drawing_height)]
228
229 def draw_clade(clade, startcol):
230 thiscol = col_positions[clade]
231 thisrow = row_positions[clade]
232
233 for col in range(startcol, thiscol):
234 char_matrix[thisrow][col] = '_'
235 if clade.clades:
236
237 toprow = row_positions[clade.clades[0]]
238 botrow = row_positions[clade.clades[-1]]
239 for row in range(toprow+1, botrow+1):
240 char_matrix[row][thiscol] = '|'
241
242 if (col_positions[clade.clades[0]] - thiscol) < 2:
243 char_matrix[toprow][thiscol] = ','
244
245 for child in clade:
246 draw_clade(child, thiscol+1)
247
248 draw_clade(tree.root, 0)
249
250 for idx, row in enumerate(char_matrix):
251 line = ''.join(row).rstrip()
252
253 if idx % 2 == 0:
254 line += ' ' + str(taxa[idx/2])
255 file.write(line + '\n')
256 file.write('\n')
257
258
259 -def draw(tree, label_func=str, do_show=True, show_confidence=True):
260 """Plot the given tree using matplotlib (or pylab).
261
262 The graphic is a rooted tree, drawn with roughly the same algorithm as
263 draw_ascii.
264
265 :Parameters:
266 label_func : callable
267 A function to extract a label from a node. By default this is str(),
268 but you can use a different function to select another string
269 associated with each node. If this function returns None for a node,
270 no label will be shown for that node.
271 """
272 try:
273 import matplotlib.pyplot as plt
274 except ImportError:
275 try:
276 import pylab as plt
277 except ImportError:
278 from Bio import MissingPythonDependencyError
279 raise MissingPythonDependencyError(
280 "Install matplotlib or pylab if you want to use draw.")
281
282 def get_x_positions(tree):
283 """Create a mapping of each clade to its horizontal position.
284
285 Dict of {clade: x-coord}
286 """
287 depths = tree.depths()
288
289 if not max(depths.itervalues()):
290 depths = tree.depths(unit_branch_lengths=True)
291 return depths
292
293 def get_y_positions(tree):
294 """Create a mapping of each clade to its vertical position.
295
296 Dict of {clade: y-coord}.
297 Coordinates are negative, and integers for tips.
298 """
299 maxheight = tree.count_terminals()
300
301 heights = dict((tip, maxheight - i)
302 for i, tip in enumerate(tree.get_terminals()))
303
304 def calc_row(clade):
305 for subclade in clade:
306 if subclade not in heights:
307 calc_row(subclade)
308
309 heights[clade] = (heights[clade.clades[0]] +
310 heights[clade.clades[-1]]) / 2.0
311 calc_row(tree.root)
312 return heights
313
314 x_posns = get_x_positions(tree)
315 y_posns = get_y_positions(tree)
316
317 def draw_clade(clade, x_start):
318 """Recursively draw a tree, down from the given clade."""
319 x_here = x_posns[clade]
320 y_here = y_posns[clade]
321
322 color = clade.__dict__.get('color') or 'k'
323 lw = clade.__dict__.get('width')
324
325 plt.hlines(y_here, x_start, x_here, color=color, lw=lw)
326
327 label = label_func(clade)
328 if label not in (None, clade.__class__.__name__):
329 plt.text(x_here, y_here, ' ' + label,
330 fontsize=10, verticalalignment='center')
331
332 if hasattr(clade, 'confidences'):
333
334 conf_label = ' '.join(map(str, map(float, clade.confidences)))
335 elif clade.confidence is not None:
336 conf_label = str(clade.confidence)
337 else:
338 conf_label = None
339 if conf_label:
340 plt.text(x_start, y_here, str(float(clade.confidence)), fontsize=9)
341 if clade.clades:
342
343 y_top = y_posns[clade.clades[0]]
344 y_bot = y_posns[clade.clades[-1]]
345 plt.vlines(x_here, y_bot, y_top, color=color, lw=lw)
346
347 for child in clade:
348 draw_clade(child, x_here)
349
350 draw_clade(tree.root, 0)
351 if hasattr(tree, 'name') and tree.name:
352 plt.title(tree.name)
353 plt.xlabel('branch length')
354 plt.ylabel('taxa')
355
356 xmin, xmax = plt.xlim()
357 pad = 0.05 * xmax
358 plt.xlim(-pad, xmax + pad)
359
360 plt.ylim(max(y_posns.itervalues()) + 1, 0)
361 if do_show:
362 plt.show()
363