Package Bio :: Package Phylo :: Module PhyloXMLIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.PhyloXMLIO

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """PhyloXML reader/parser, writer, and associated functions. 
  7   
  8  Instantiates tree elements from a parsed PhyloXML file, and constructs an XML 
  9  file from a `Bio.Phylo.PhyloXML` object. 
 10   
 11  About capitalization: 
 12   
 13  - phyloXML means the file format specification 
 14  - PhyloXML means the Biopython module `Bio.Phylo.PhyloXML` and its classes 
 15  - Phyloxml means the top-level class used by `PhyloXMLIO.read` (but not 
 16    `Bio.Phylo.read`!), containing a list of Phylogenies (objects derived from 
 17    `BaseTree.Tree`) 
 18  """ 
 19  __docformat__ = "restructuredtext en" 
 20   
 21  import sys 
 22  import warnings 
 23   
 24  from Bio.Phylo import PhyloXML as PX 
 25   
 26  if (3, 0, 0) <= sys.version_info[:3] <= (3, 1, 3): 
 27      # Workaround for cElementTree regression in python 3.0--3.1.3 
 28      # See http://bugs.python.org/issue9257 
 29      from xml.etree import ElementTree 
 30  else: 
 31      try: 
 32          from xml.etree import cElementTree as ElementTree 
 33      except ImportError: 
 34          # Alternative Python implementation, perhaps? 
 35          try: 
 36              from xml.etree import ElementTree as ElementTree 
 37          except ImportError: 
 38              # Python 2.4 -- check for 3rd-party implementations 
 39              try: 
 40                  from lxml import etree as ElementTree 
 41              except ImportError: 
 42                  try: 
 43                      import cElementTree as ElementTree 
 44                  except ImportError: 
 45                      try: 
 46                          from elementtree import ElementTree 
 47                      except ImportError: 
 48                          from Bio import MissingPythonDependencyError 
 49                          raise MissingPythonDependencyError( 
 50                                  "No ElementTree module was found. " 
 51                                  "Use Python 2.5+, lxml or elementtree if you " 
 52                                  "want to use Bio.PhyloXML.") 
 53   
 54  # Keep the standard namespace prefixes when writing 
 55  # See http://effbot.org/zone/element-namespaces.htm 
 56  NAMESPACES = { 
 57          'phy':  'http://www.phyloxml.org', 
 58          'xs':   'http://www.w3.org/2001/XMLSchema', 
 59          } 
 60   
 61  try: 
 62      register_namespace = ElementTree.register_namespace 
 63  except AttributeError: 
 64      if not hasattr(ElementTree, '_namespace_map'): 
 65          # cElementTree needs the pure-Python xml.etree.ElementTree 
 66          # Py2.4 support: the exception handler can go away when Py2.4 does 
 67          try: 
 68              from xml.etree import ElementTree as ET_py 
 69              ElementTree._namespace_map = ET_py._namespace_map 
 70          except ImportError: 
 71              warnings.warn("Couldn't import xml.etree.ElementTree; " 
 72                      "phyloXML namespaces may have unexpected abbreviations " 
 73                      "in the output.", 
 74                      # NB: ImportWarning was introduced in Py2.5 
 75                      Warning, stacklevel=2) 
 76              ElementTree._namespace_map = {} 
 77   
78 - def register_namespace(prefix, uri):
79 ElementTree._namespace_map[uri] = prefix
80 81 for prefix, uri in NAMESPACES.iteritems(): 82 register_namespace(prefix, uri) 83 84
85 -class PhyloXMLError(Exception):
86 """Exception raised when PhyloXML object construction cannot continue. 87 88 XML syntax errors will be found and raised by the underlying ElementTree 89 module; this exception is for valid XML that breaks the phyloXML 90 specification. 91 """ 92 pass
93 94 95 # --------------------------------------------------------- 96 # Public API 97
98 -def read(file):
99 """Parse a phyloXML file or stream and build a tree of Biopython objects. 100 101 The children of the root node are phylogenies and possibly other arbitrary 102 (non-phyloXML) objects. 103 104 :returns: a single `Bio.Phylo.PhyloXML.Phyloxml` object. 105 """ 106 return Parser(file).read()
107
108 -def parse(file):
109 """Iterate over the phylogenetic trees in a phyloXML file. 110 111 This ignores any additional data stored at the top level, but may be more 112 memory-efficient than the `read` function. 113 114 :returns: a generator of `Bio.Phylo.PhyloXML.Phylogeny` objects. 115 """ 116 return Parser(file).parse()
117
118 -def write(obj, file, encoding='utf-8', indent=True):
119 """Write a phyloXML file. 120 121 :Parameters: 122 obj 123 an instance of `Phyloxml`, `Phylogeny` or `BaseTree.Tree`, or an 124 iterable of either of the latter two. The object will be converted 125 to a Phyloxml object before serialization. 126 file 127 either an open handle or a file name. 128 """ 129 def fix_single(tree): 130 if isinstance(tree, PX.Phylogeny): 131 return tree 132 if isinstance(tree, PX.Clade): 133 return tree.to_phylogeny() 134 if isinstance(tree, PX.BaseTree.Tree): 135 return PX.Phylogeny.from_tree(tree) 136 if isinstance(tree, PX.BaseTree.Clade): 137 return PX.Phylogeny.from_tree(PX.BaseTree.Tree(root=tree)) 138 else: 139 raise ValueError("iterable must contain Tree or Clade types")
140 141 if isinstance(obj, PX.Phyloxml): 142 pass 143 elif (isinstance(obj, PX.BaseTree.Tree) or 144 isinstance(obj, PX.BaseTree.Clade)): 145 obj = fix_single(obj).to_phyloxml() 146 elif hasattr(obj, '__iter__'): 147 obj = PX.Phyloxml({}, phylogenies=(fix_single(t) for t in obj)) 148 else: 149 raise ValueError("First argument must be a Phyloxml, Phylogeny, " 150 "Tree, or iterable of Trees or Phylogenies.") 151 return Writer(obj).write(file, encoding=encoding, indent=indent) 152 153 154 # --------------------------------------------------------- 155 # Functions I wish ElementTree had 156
157 -def _local(tag):
158 """Extract the local tag from a namespaced tag name.""" 159 if tag[0] == '{': 160 return tag[tag.index('}')+1:] 161 return tag
162
163 -def _split_namespace(tag):
164 """Split a tag into namespace and local tag strings.""" 165 try: 166 return tag[1:].split('}', 1) 167 except: 168 return ('', tag)
169
170 -def _ns(tag, namespace=NAMESPACES['phy']):
171 """Format an XML tag with the given namespace.""" 172 return '{%s}%s' % (namespace, tag)
173
174 -def _get_child_as(parent, tag, construct):
175 """Find a child node by tag, and pass it through a constructor. 176 177 Returns None if no matching child is found. 178 """ 179 child = parent.find(_ns(tag)) 180 if child is not None: 181 return construct(child)
182
183 -def _get_child_text(parent, tag, construct=unicode):
184 """Find a child node by tag; pass its text through a constructor. 185 186 Returns None if no matching child is found. 187 """ 188 child = parent.find(_ns(tag)) 189 if child is not None and child.text: 190 return construct(child.text)
191
192 -def _get_children_as(parent, tag, construct):
193 """Find child nodes by tag; pass each through a constructor. 194 195 Returns an empty list if no matching child is found. 196 """ 197 return [construct(child) for child in 198 parent.findall(_ns(tag))]
199
200 -def _get_children_text(parent, tag, construct=unicode):
201 """Find child nodes by tag; pass each node's text through a constructor. 202 203 Returns an empty list if no matching child is found. 204 """ 205 return [construct(child.text) for child in 206 parent.findall(_ns(tag)) 207 if child.text]
208
209 -def _indent(elem, level=0):
210 """Add line breaks and indentation to ElementTree in-place. 211 212 Sources: 213 214 - http://effbot.org/zone/element-lib.htm#prettyprint 215 - http://infix.se/2007/02/06/gentlemen-indent-your-xml 216 """ 217 i = "\n" + level*" " 218 if len(elem): 219 if not elem.text or not elem.text.strip(): 220 elem.text = i + " " 221 for e in elem: 222 _indent(e, level+1) 223 if not e.tail or not e.tail.strip(): 224 e.tail = i + " " 225 if not e.tail or not e.tail.strip(): 226 e.tail = i 227 else: 228 if level and (not elem.tail or not elem.tail.strip()): 229 elem.tail = i
230 231 # --------------------------------------------------------- 232 # INPUT 233 # --------------------------------------------------------- 234
235 -def _str2bool(text):
236 if text == 'true': 237 return True 238 if text == 'false': 239 return False 240 raise ValueError('String could not be converted to boolean: ' + text)
241
242 -def _dict_str2bool(dct, keys):
243 out = dct.copy() 244 for key in keys: 245 if key in out: 246 out[key] = _str2bool(out[key]) 247 return out
248
249 -def _int(text):
250 if text is not None: 251 try: 252 return int(text) 253 except Exception: 254 return None
255
256 -def _float(text):
257 if text is not None: 258 try: 259 return float(text) 260 except Exception: 261 return None
262
263 -def _collapse_wspace(text):
264 """Replace all spans of whitespace with a single space character. 265 266 Also remove leading and trailing whitespace. See "Collapse Whitespace 267 Policy" in the phyloXML spec glossary: 268 http://phyloxml.org/documentation/version_100/phyloxml.xsd.html#Glossary 269 """ 270 if text is not None: 271 return ' '.join(text.split())
272 273 # NB: Not currently used
274 -def _replace_wspace(text):
275 """Replace tab, LF and CR characters with spaces, but don't collapse. 276 277 See "Replace Whitespace Policy" in the phyloXML spec glossary: 278 http://phyloxml.org/documentation/version_100/phyloxml.xsd.html#Glossary 279 """ 280 for char in ('\t', '\n', '\r'): 281 if char in text: 282 text = text.replace(char, ' ') 283 return text
284 285
286 -class Parser(object):
287 """Methods for parsing all phyloXML nodes from an XML stream. 288 289 To minimize memory use, the tree of ElementTree parsing events is cleared 290 after completing each phylogeny, clade, and top-level 'other' element. 291 Elements below the clade level are kept in memory until parsing of the 292 current clade is finished -- this shouldn't be a problem because clade is 293 the only recursive element, and non-clade nodes below this level are of 294 bounded size. 295 """ 296
297 - def __init__(self, file):
298 # Get an iterable context for XML parsing events 299 context = iter(ElementTree.iterparse(file, events=('start', 'end'))) 300 event, root = context.next() 301 self.root = root 302 self.context = context
303
304 - def read(self):
305 """Parse the phyloXML file and create a single Phyloxml object.""" 306 phyloxml = PX.Phyloxml(dict((_local(key), val) 307 for key, val in self.root.items())) 308 other_depth = 0 309 for event, elem in self.context: 310 namespace, localtag = _split_namespace(elem.tag) 311 if event == 'start': 312 if namespace != NAMESPACES['phy']: 313 other_depth += 1 314 continue 315 if localtag == 'phylogeny': 316 phylogeny = self._parse_phylogeny(elem) 317 phyloxml.phylogenies.append(phylogeny) 318 if event == 'end' and namespace != NAMESPACES['phy']: 319 # Deal with items not specified by phyloXML 320 other_depth -= 1 321 if other_depth == 0: 322 # We're directly under the root node -- evaluate 323 otr = self.other(elem, namespace, localtag) 324 phyloxml.other.append(otr) 325 self.root.clear() 326 return phyloxml
327
328 - def parse(self):
329 """Parse the phyloXML file incrementally and return each phylogeny.""" 330 phytag = _ns('phylogeny') 331 for event, elem in self.context: 332 if event == 'start' and elem.tag == phytag: 333 yield self._parse_phylogeny(elem)
334 335 # Special parsing cases -- incremental, using self.context 336
337 - def _parse_phylogeny(self, parent):
338 """Parse a single phylogeny within the phyloXML tree. 339 340 Recursively builds a phylogenetic tree with help from parse_clade, then 341 clears the XML event history for the phylogeny element and returns 342 control to the top-level parsing function. 343 """ 344 phylogeny = PX.Phylogeny(**_dict_str2bool(parent.attrib, 345 ['rooted', 'rerootable'])) 346 list_types = { 347 # XML tag, plural attribute 348 'confidence': 'confidences', 349 'property': 'properties', 350 'clade_relation': 'clade_relations', 351 'sequence_relation': 'sequence_relations', 352 } 353 for event, elem in self.context: 354 namespace, tag = _split_namespace(elem.tag) 355 if event == 'start' and tag == 'clade': 356 assert phylogeny.root is None, \ 357 "Phylogeny object should only have 1 clade" 358 phylogeny.root = self._parse_clade(elem) 359 continue 360 if event == 'end': 361 if tag == 'phylogeny': 362 parent.clear() 363 break 364 # Handle the other non-recursive children 365 if tag in list_types: 366 getattr(phylogeny, list_types[tag]).append( 367 getattr(self, tag)(elem)) 368 # Complex types 369 elif tag in ('date', 'id'): 370 setattr(phylogeny, tag, getattr(self, tag)(elem)) 371 # Simple types 372 elif tag in ('name', 'description'): 373 setattr(phylogeny, tag, _collapse_wspace(elem.text)) 374 # Unknown tags 375 elif namespace != NAMESPACES['phy']: 376 phylogeny.other.append(self.other(elem, namespace, tag)) 377 parent.clear() 378 else: 379 # NB: This shouldn't happen in valid files 380 raise PhyloXMLError('Misidentified tag: ' + tag) 381 return phylogeny
382 383 _clade_complex_types = ['color', 'events', 'binary_characters', 'date'] 384 _clade_list_types = { 385 'confidence': 'confidences', 386 'distribution': 'distributions', 387 'reference': 'references', 388 'property': 'properties', 389 } 390 _clade_tracked_tags = set(_clade_complex_types + _clade_list_types.keys() 391 + ['branch_length', 'name', 'node_id', 'width']) 392
393 - def _parse_clade(self, parent):
394 """Parse a Clade node and its children, recursively.""" 395 clade = PX.Clade(**parent.attrib) 396 if clade.branch_length is not None: 397 clade.branch_length = float(clade.branch_length) 398 # NB: Only evaluate nodes at the current level 399 tag_stack = [] 400 for event, elem in self.context: 401 namespace, tag = _split_namespace(elem.tag) 402 if event == 'start': 403 if tag == 'clade': 404 clade.clades.append(self._parse_clade(elem)) 405 continue 406 if tag == 'taxonomy': 407 clade.taxonomies.append(self._parse_taxonomy(elem)) 408 continue 409 if tag == 'sequence': 410 clade.sequences.append(self._parse_sequence(elem)) 411 continue 412 if tag in self._clade_tracked_tags: 413 tag_stack.append(tag) 414 if event == 'end': 415 if tag == 'clade': 416 elem.clear() 417 break 418 if tag != tag_stack[-1]: 419 continue 420 tag_stack.pop() 421 # Handle the other non-recursive children 422 if tag in self._clade_list_types: 423 getattr(clade, self._clade_list_types[tag]).append( 424 getattr(self, tag)(elem)) 425 elif tag in self._clade_complex_types: 426 setattr(clade, tag, getattr(self, tag)(elem)) 427 elif tag == 'branch_length': 428 # NB: possible collision with the attribute 429 if clade.branch_length is not None: 430 raise PhyloXMLError( 431 'Attribute branch_length was already set ' 432 'for this Clade.') 433 clade.branch_length = _float(elem.text) 434 elif tag == 'width': 435 clade.width = _float(elem.text) 436 elif tag == 'name': 437 clade.name = _collapse_wspace(elem.text) 438 elif tag == 'node_id': 439 clade.node_id = PX.Id(elem.text.strip(), 440 elem.attrib.get('provider')) 441 elif namespace != NAMESPACES['phy']: 442 clade.other.append(self.other(elem, namespace, tag)) 443 elem.clear() 444 else: 445 raise PhyloXMLError('Misidentified tag: ' + tag) 446 return clade
447
448 - def _parse_sequence(self, parent):
449 sequence = PX.Sequence(**parent.attrib) 450 for event, elem in self.context: 451 namespace, tag = _split_namespace(elem.tag) 452 if event == 'end': 453 if tag == 'sequence': 454 parent.clear() 455 break 456 if tag in ('accession', 'mol_seq', 'uri', 457 'domain_architecture'): 458 setattr(sequence, tag, getattr(self, tag)(elem)) 459 elif tag == 'annotation': 460 sequence.annotations.append(self.annotation(elem)) 461 elif tag == 'name': 462 sequence.name = _collapse_wspace(elem.text) 463 elif tag in ('symbol', 'location'): 464 setattr(sequence, tag, elem.text) 465 elif namespace != NAMESPACES['phy']: 466 sequence.other.append(self.other(elem, namespace, tag)) 467 parent.clear() 468 return sequence
469
470 - def _parse_taxonomy(self, parent):
471 taxonomy = PX.Taxonomy(**parent.attrib) 472 for event, elem in self.context: 473 namespace, tag = _split_namespace(elem.tag) 474 if event == 'end': 475 if tag == 'taxonomy': 476 parent.clear() 477 break 478 if tag in ('id', 'uri'): 479 setattr(taxonomy, tag, getattr(self, tag)(elem)) 480 elif tag == 'common_name': 481 taxonomy.common_names.append(_collapse_wspace(elem.text)) 482 elif tag == 'synonym': 483 taxonomy.synonyms.append(elem.text) 484 elif tag in ('code', 'scientific_name', 'authority', 'rank'): 485 # ENH: check_str on rank 486 setattr(taxonomy, tag, elem.text) 487 elif namespace != NAMESPACES['phy']: 488 taxonomy.other.append(self.other(elem, namespace, tag)) 489 parent.clear() 490 return taxonomy
491
492 - def other(self, elem, namespace, localtag):
493 return PX.Other(localtag, namespace, elem.attrib, 494 value=elem.text and elem.text.strip() or None, 495 children=[self.other(child, *_split_namespace(child.tag)) 496 for child in elem])
497 498 # Complex types 499
500 - def accession(self, elem):
501 return PX.Accession(elem.text.strip(), elem.get('source'))
502
503 - def annotation(self, elem):
504 return PX.Annotation( 505 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 506 confidence=_get_child_as(elem, 'confidence', self.confidence), 507 properties=_get_children_as(elem, 'property', self.property), 508 uri=_get_child_as(elem, 'uri', self.uri), 509 **elem.attrib)
510
511 - def binary_characters(self, elem):
512 def bc_getter(elem): 513 return _get_children_text(elem, 'bc')
514 return PX.BinaryCharacters( 515 type=elem.get('type'), 516 gained_count=_int(elem.get('gained_count')), 517 lost_count=_int(elem.get('lost_count')), 518 present_count=_int(elem.get('present_count')), 519 absent_count=_int(elem.get('absent_count')), 520 # Flatten BinaryCharacterList sub-nodes into lists of strings 521 gained=_get_child_as(elem, 'gained', bc_getter), 522 lost=_get_child_as(elem, 'lost', bc_getter), 523 present=_get_child_as(elem, 'present', bc_getter), 524 absent=_get_child_as(elem, 'absent', bc_getter))
525
526 - def clade_relation(self, elem):
527 return PX.CladeRelation( 528 elem.get('type'), elem.get('id_ref_0'), elem.get('id_ref_1'), 529 distance=elem.get('distance'), 530 confidence=_get_child_as(elem, 'confidence', self.confidence))
531
532 - def color(self, elem):
533 red, green, blue = (_get_child_text(elem, color, int) for color in 534 ('red', 'green', 'blue')) 535 return PX.BranchColor(red, green, blue)
536
537 - def confidence(self, elem):
538 return PX.Confidence( 539 _float(elem.text), 540 elem.get('type'))
541
542 - def date(self, elem):
543 return PX.Date( 544 unit=elem.get('unit'), 545 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 546 value=_get_child_text(elem, 'value', float), 547 minimum=_get_child_text(elem, 'minimum', float), 548 maximum=_get_child_text(elem, 'maximum', float), 549 )
550
551 - def distribution(self, elem):
552 return PX.Distribution( 553 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 554 points=_get_children_as(elem, 'point', self.point), 555 polygons=_get_children_as(elem, 'polygon', self.polygon))
556
557 - def domain(self, elem):
558 return PX.ProteinDomain(elem.text.strip(), 559 int(elem.get('from')) - 1, 560 int(elem.get('to')), 561 confidence=_float(elem.get('confidence')), 562 id=elem.get('id'))
563
564 - def domain_architecture(self, elem):
565 return PX.DomainArchitecture( 566 length=int(elem.get('length')), 567 domains=_get_children_as(elem, 'domain', self.domain))
568
569 - def events(self, elem):
570 return PX.Events( 571 type=_get_child_text(elem, 'type'), 572 duplications=_get_child_text(elem, 'duplications', int), 573 speciations=_get_child_text(elem, 'speciations', int), 574 losses=_get_child_text(elem, 'losses', int), 575 confidence=_get_child_as(elem, 'confidence', self.confidence))
576
577 - def id(self, elem):
578 provider = elem.get('provider') or elem.get('type') 579 return PX.Id(elem.text.strip(), provider)
580
581 - def mol_seq(self, elem):
582 is_aligned = elem.get('is_aligned') 583 if is_aligned is not None: 584 is_aligned = _str2bool(is_aligned) 585 return PX.MolSeq(elem.text.strip(), is_aligned=is_aligned)
586
587 - def point(self, elem):
588 return PX.Point( 589 elem.get('geodetic_datum'), 590 _get_child_text(elem, 'lat', float), 591 _get_child_text(elem, 'long', float), 592 alt=_get_child_text(elem, 'alt', float), 593 alt_unit=elem.get('alt_unit'))
594
595 - def polygon(self, elem):
596 return PX.Polygon( 597 points=_get_children_as(elem, 'point', self.point))
598
599 - def property(self, elem):
600 return PX.Property(elem.text.strip(), 601 elem.get('ref'), elem.get('applies_to'), elem.get('datatype'), 602 unit=elem.get('unit'), 603 id_ref=elem.get('id_ref'))
604
605 - def reference(self, elem):
606 return PX.Reference( 607 doi=elem.get('doi'), 608 desc=_get_child_text(elem, 'desc'))
609
610 - def sequence_relation(self, elem):
611 return PX.SequenceRelation( 612 elem.get('type'), elem.get('id_ref_0'), elem.get('id_ref_1'), 613 distance=_float(elem.get('distance')), 614 confidence=_get_child_as(elem, 'confidence', self.confidence))
615
616 - def uri(self, elem):
617 return PX.Uri(elem.text.strip(), 618 desc=_collapse_wspace(elem.get('desc')), 619 type=elem.get('type'))
620 621 622 623 # --------------------------------------------------------- 624 # OUTPUT 625 # --------------------------------------------------------- 626
627 -def _serialize(value):
628 """Convert a Python primitive to a phyloXML-compatible Unicode string.""" 629 if isinstance(value, float): 630 return unicode(value).upper() 631 elif isinstance(value, bool): 632 return unicode(value).lower() 633 return unicode(value)
634 635
636 -def _clean_attrib(obj, attrs):
637 """Create a dictionary from an object's specified, non-None attributes.""" 638 out = {} 639 for key in attrs: 640 val = getattr(obj, key) 641 if val is not None: 642 out[key] = _serialize(val) 643 return out
644 645
646 -def _handle_complex(tag, attribs, subnodes, has_text=False):
647 def wrapped(self, obj): 648 elem = ElementTree.Element(tag, _clean_attrib(obj, attribs)) 649 for subn in subnodes: 650 if isinstance(subn, basestring): 651 # singular object: method and attribute names are the same 652 if getattr(obj, subn) is not None: 653 elem.append(getattr(self, subn)(getattr(obj, subn))) 654 else: 655 # list: singular method, pluralized attribute name 656 method, plural = subn 657 for item in getattr(obj, plural): 658 elem.append(getattr(self, method)(item)) 659 if has_text: 660 elem.text = _serialize(obj.value) 661 return elem
662 wrapped.__doc__ = "Serialize a %s and its subnodes, in order." % tag 663 return wrapped 664 665
666 -def _handle_simple(tag):
667 def wrapped(self, obj): 668 elem = ElementTree.Element(tag) 669 elem.text = _serialize(obj) 670 return elem
671 wrapped.__doc__ = "Serialize a simple %s node." % tag 672 return wrapped 673 674
675 -class Writer(object):
676 """Methods for serializing a PhyloXML object to XML.""" 677
678 - def __init__(self, phyloxml):
679 """Build an ElementTree from a PhyloXML object.""" 680 assert isinstance(phyloxml, PX.Phyloxml), "Not a Phyloxml object" 681 self._tree = ElementTree.ElementTree(self.phyloxml(phyloxml))
682
683 - def write(self, file, encoding='utf-8', indent=True):
684 if indent: 685 _indent(self._tree.getroot()) 686 self._tree.write(file, encoding) 687 return len(self._tree.getroot())
688 689 # Convert classes to ETree elements 690
691 - def phyloxml(self, obj):
692 elem = ElementTree.Element(_ns('phyloxml'), 693 # NB: This is for XSD validation, which we don't do 694 # {_ns('schemaLocation', NAMESPACES['xsi']): 695 # obj.attributes['schemaLocation'], 696 # } 697 ) 698 for tree in obj.phylogenies: 699 elem.append(self.phylogeny(tree)) 700 for otr in obj.other: 701 elem.append(self.other(otr)) 702 return elem
703
704 - def other(self, obj):
705 elem = ElementTree.Element(_ns(obj.tag, obj.namespace), obj.attributes) 706 elem.text = obj.value 707 for child in obj.children: 708 elem.append(self.other(child)) 709 return elem
710 711 phylogeny = _handle_complex(_ns('phylogeny'), 712 ('rooted', 'rerootable', 'branch_length_unit', 'type'), 713 ( 'name', 714 'id', 715 'description', 716 'date', 717 ('confidence', 'confidences'), 718 'clade', 719 ('clade_relation', 'clade_relations'), 720 ('sequence_relation', 'sequence_relations'), 721 ('property', 'properties'), 722 ('other', 'other'), 723 )) 724 725 clade = _handle_complex(_ns('clade'), ('id_source',), 726 ( 'name', 727 'branch_length', 728 ('confidence', 'confidences'), 729 'width', 730 'color', 731 'node_id', 732 ('taxonomy', 'taxonomies'), 733 ('sequence', 'sequences'), 734 'events', 735 'binary_characters', 736 ('distribution', 'distributions'), 737 'date', 738 ('reference', 'references'), 739 ('property', 'properties'), 740 ('clade', 'clades'), 741 ('other', 'other'), 742 )) 743 744 accession = _handle_complex(_ns('accession'), ('source',), 745 (), has_text=True) 746 747 annotation = _handle_complex(_ns('annotation'), 748 ('ref', 'source', 'evidence', 'type'), 749 ( 'desc', 750 'confidence', 751 ('property', 'properties'), 752 'uri', 753 )) 754
755 - def binary_characters(self, obj):
756 """Serialize a binary_characters node and its subnodes.""" 757 elem = ElementTree.Element(_ns('binary_characters'), 758 _clean_attrib(obj, 759 ('type', 'gained_count', 'lost_count', 760 'present_count', 'absent_count'))) 761 for subn in ('gained', 'lost', 'present', 'absent'): 762 subelem = ElementTree.Element(_ns(subn)) 763 for token in getattr(obj, subn): 764 subelem.append(self.bc(token)) 765 elem.append(subelem) 766 return elem
767 768 clade_relation = _handle_complex(_ns('clade_relation'), 769 ('id_ref_0', 'id_ref_1', 'distance', 'type'), 770 ('confidence',)) 771 772 color = _handle_complex(_ns('color'), (), ('red', 'green', 'blue')) 773 774 confidence = _handle_complex(_ns('confidence'), ('type',), 775 (), has_text=True) 776 777 date = _handle_complex(_ns('date'), ('unit',), 778 ('desc', 'value', 'minimum', 'maximum')) 779 780 distribution = _handle_complex(_ns('distribution'), (), 781 ( 'desc', 782 ('point', 'points'), 783 ('polygon', 'polygons'), 784 )) 785
786 - def domain(self, obj):
787 """Serialize a domain node.""" 788 elem = ElementTree.Element(_ns('domain'), 789 {'from': str(obj.start + 1), 'to': str(obj.end)}) 790 if obj.confidence is not None: 791 elem.set('confidence', _serialize(obj.confidence)) 792 if obj.id is not None: 793 elem.set('id', obj.id) 794 elem.text = _serialize(obj.value) 795 return elem
796 797 domain_architecture = _handle_complex(_ns('domain_architecture'), 798 ('length',), 799 (('domain', 'domains'),)) 800 801 events = _handle_complex(_ns('events'), (), 802 ( 'type', 803 'duplications', 804 'speciations', 805 'losses', 806 'confidence', 807 )) 808 809 id = _handle_complex(_ns('id'), ('provider',), (), has_text=True) 810 811 mol_seq = _handle_complex(_ns('mol_seq'), ('is_aligned',), 812 (), has_text=True) 813 814 node_id = _handle_complex(_ns('node_id'), ('provider',), (), has_text=True) 815 816 point = _handle_complex(_ns('point'), ('geodetic_datum', 'alt_unit'), 817 ('lat', 'long', 'alt')) 818 819 polygon = _handle_complex(_ns('polygon'), (), (('point', 'points'),)) 820 821 property = _handle_complex(_ns('property'), 822 ('ref', 'unit', 'datatype', 'applies_to', 'id_ref'), 823 (), has_text=True) 824 825 reference = _handle_complex(_ns('reference'), ('doi',), ('desc',)) 826 827 sequence = _handle_complex(_ns('sequence'), 828 ('type', 'id_ref', 'id_source'), 829 ( 'symbol', 830 'accession', 831 'name', 832 'location', 833 'mol_seq', 834 'uri', 835 ('annotation', 'annotations'), 836 'domain_architecture', 837 ('other', 'other'), 838 )) 839 840 sequence_relation = _handle_complex(_ns('sequence_relation'), 841 ('id_ref_0', 'id_ref_1', 'distance', 'type'), 842 ('confidence',)) 843 844 taxonomy = _handle_complex(_ns('taxonomy'), 845 ('id_source',), 846 ( 'id', 847 'code', 848 'scientific_name', 849 'authority', 850 ('common_name', 'common_names'), 851 ('synonym', 'synonyms'), 852 'rank', 853 'uri', 854 ('other', 'other'), 855 )) 856 857 uri = _handle_complex(_ns('uri'), ('desc', 'type'), (), has_text=True) 858 859 # Primitive types 860 861 # Floating point 862 alt = _handle_simple(_ns('alt')) 863 branch_length = _handle_simple(_ns('branch_length')) 864 lat = _handle_simple(_ns('lat')) 865 long = _handle_simple(_ns('long')) 866 maximum = _handle_simple(_ns('maximum')) 867 minimum = _handle_simple(_ns('minimum')) 868 value = _handle_simple(_ns('value')) 869 width = _handle_simple(_ns('width')) 870 871 # Integers 872 blue = _handle_simple(_ns('blue')) 873 duplications = _handle_simple(_ns('duplications')) 874 green = _handle_simple(_ns('green')) 875 losses = _handle_simple(_ns('losses')) 876 red = _handle_simple(_ns('red')) 877 speciations = _handle_simple(_ns('speciations')) 878 879 # Strings 880 bc = _handle_simple(_ns('bc')) 881 code = _handle_simple(_ns('code')) 882 common_name = _handle_simple(_ns('common_name')) 883 desc = _handle_simple(_ns('desc')) 884 description = _handle_simple(_ns('description')) 885 location = _handle_simple(_ns('location')) 886 mol_seq = _handle_simple(_ns('mol_seq')) 887 name = _handle_simple(_ns('name')) 888 rank = _handle_simple(_ns('rank')) 889 scientific_name = _handle_simple(_ns('scientific_name')) 890 symbol = _handle_simple(_ns('symbol')) 891 synonym = _handle_simple(_ns('synonym')) 892 type = _handle_simple(_ns('type'))
893