Package Bio :: Package Phylo :: Package Applications :: Module _Phyml
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.Applications._Phyml

  1  # Copyright 2011 by Eric Talevich.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its license. 
  3  # Please see the LICENSE file that should have been included as part of this 
  4  # package. 
  5  """Command-line wrapper for the tree inference program PhyML.""" 
  6  __docformat__ = "restructuredtext en" 
  7   
  8  from Bio.Application import _Option, _Switch, AbstractCommandline 
  9   
 10   
11 -class PhymlCommandline(AbstractCommandline):
12 """Command-line wrapper for the tree inference program PhyML. 13 14 Homepage: http://www.atgc-montpellier.fr/phyml 15 16 Citations: 17 18 Guindon S, Gascuel O. 19 A simple, fast, and accurate algorithm to estimate large phylogenies by maximum 20 likelihood. 21 Systematic Biology, 2003 Oct;52(5):696-704. 22 PubMed PMID: 14530136. 23 24 Guindon S, Dufayard JF, Lefort V, Anisimova M, Hordijk W, Gascuel O. 25 New Algorithms and Methods to Estimate Maximum-Likelihood Phylogenies: Assessing 26 the Performance of PhyML 3.0. 27 Systematic Biology, 2010 59(3):307-21. 28 29 """ 30
31 - def __init__(self, cmd='phyml', **kwargs):
32 self.parameters = [ 33 _Option(['-i', '--input', 'input'], 34 """Name of the nucleotide or amino-acid sequence file in PHYLIP 35 format.""", 36 filename=True, 37 is_required=True, 38 equate=False, 39 ), 40 41 _Option(['-d', '--datatype', 'datatype'], 42 """Data type is 'nt' for nucleotide (default) and 'aa' for 43 amino-acid sequences.""", 44 checker_function=lambda x: x in ('nt', 'aa'), 45 equate=False, 46 ), 47 48 _Switch(['-q', '--sequential', 'sequential'], 49 "Changes interleaved format (default) to sequential format." 50 ), 51 52 _Option(['-n', '--multiple', 'multiple'], 53 "Number of data sets to analyse (integer).", 54 checker_function=(lambda x: 55 isinstance(x, int) or x.isdigit()), 56 equate=False, 57 ), 58 59 _Switch(['-p', '--pars', 'pars'], 60 """Use a minimum parsimony starting tree. 61 62 This option is taken into account when the '-u' option is absent 63 and when tree topology modifications are to be done. 64 """ 65 ), 66 67 _Option(['-b', '--bootstrap', 'bootstrap'], 68 """Number of bootstrap replicates, if value is > 0. 69 70 Otherwise: 71 72 0: neither approximate likelihood ratio test nor bootstrap 73 values are computed. 74 -1: approximate likelihood ratio test returning aLRT statistics. 75 -2: approximate likelihood ratio test returning Chi2-based 76 parametric branch supports. 77 -4: SH-like branch supports alone. 78 """, 79 equate=False, 80 ), 81 82 _Option(['-m', '--model', 'model'], 83 """Substitution model name. 84 85 Nucleotide-based models: 86 87 HKY85 (default) | JC69 | K80 | F81 | F84 | TN93 | GTR | custom 88 89 For the custom option, a string of six digits identifies the 90 model. For instance, 000000 corresponds to F81 (or JC69, 91 provided the distribution of nucleotide frequencies is uniform). 92 012345 corresponds to GTR. This option can be used for encoding 93 any model that is a nested within GTR. 94 95 Amino-acid based models: 96 97 LG (default) | WAG | JTT | MtREV | Dayhoff | DCMut | RtREV | 98 CpREV | VT | Blosum62 | MtMam | MtArt | HIVw | HIVb | custom 99 """, 100 checker_function=(lambda x: x in ( 101 # Nucleotide models: 102 'HKY85', 'JC69', 'K80', 'F81', 'F84', 'TN93', 'GTR', 103 # Amino acid models: 104 'LG', 'WAG', 'JTT', 'MtREV', 'Dayhoff', 'DCMut', 105 'RtREV', 'CpREV', 'VT', 'Blosum62', 'MtMam', 'MtArt', 106 'HIVw', 'HIVb') 107 or isinstance(x, int)), 108 equate=False, 109 ), 110 111 _Option(['-f', 'frequencies'], 112 """Character frequencies. 113 114 -f e, m, or "fA fC fG fT" 115 116 e : Empirical frequencies, determined as follows : 117 118 - Nucleotide sequences: (Empirical) the equilibrium base 119 frequencies are estimated by counting the occurence of the 120 different bases in the alignment. 121 - Amino-acid sequences: (Empirical) the equilibrium 122 amino-acid frequencies are estimated by counting the 123 occurence of the different amino-acids in the alignment. 124 125 m : ML/model-based frequencies, determined as follows : 126 127 - Nucleotide sequences: (ML) the equilibrium base 128 frequencies are estimated using maximum likelihood 129 - Amino-acid sequences: (Model) the equilibrium amino-acid 130 frequencies are estimated using the frequencies defined by 131 the substitution model. 132 133 "fA fC fG fT" : only valid for nucleotide-based models. 134 fA, fC, fG and fT are floating-point numbers that correspond 135 to the frequencies of A, C, G and T, respectively. 136 """, 137 filename=True, # ensure ".25 .25 .25 .25" stays quoted 138 equate=False, 139 ), 140 141 # XXX is the '/' character ok in the long arg name? 142 _Option(['-t', '--ts/tv', 'ts_tv_ratio'], 143 """Transition/transversion ratio. (DNA sequences only.) 144 145 Can be a fixed positive value (ex:4.0) or e to get the 146 maximum-likelihood estimate. 147 """, 148 equate=False, 149 ), 150 151 _Option(['-v', '--pinv', 'prop_invar'], 152 """Proportion of invariable sites. 153 154 Can be a fixed value in the range [0,1], or 'e' to get the 155 maximum-likelihood estimate. 156 """, 157 equate=False, 158 ), 159 160 _Option(['-c', '--nclasses', 'nclasses'], 161 """Number of relative substitution rate categories. 162 163 Default 1. Must be a positive integer. 164 """, 165 equate=False, 166 ), 167 168 _Option(['-a', '--alpha', 'alpha'], 169 """Distribution of the gamma distribution shape parameter. 170 171 Can be a fixed positive value, or 'e' to get the 172 maximum-likelihood estimate. 173 """, 174 equate=False, 175 ), 176 177 _Option(['-s', '--search', 'search'], 178 """Tree topology search operation option. 179 180 Can be one of: 181 182 NNI : default, fast 183 SPR : a bit slower than NNI 184 BEST : best of NNI and SPR search 185 """, 186 checker_function=lambda x: x in ('NNI', 'SPR', 'BEST'), 187 equate=False, 188 ), 189 190 # alt name: user_tree_file 191 _Option(['-u', '--inputtree', 'input_tree'], 192 "Starting tree filename. The tree must be in Newick format.", 193 filename=True, 194 equate=False, 195 ), 196 197 _Option(['-o', 'optimize'], 198 """Specific parameter optimisation. 199 200 tlr : tree topology (t), branch length (l) and 201 rate parameters (r) are optimised. 202 tl : tree topology and branch length are optimised. 203 lr : branch length and rate parameters are optimised. 204 l : branch length are optimised. 205 r : rate parameters are optimised. 206 n : no parameter is optimised. 207 """ 208 ), 209 210 _Switch(['--rand_start', 'rand_start'], 211 """Sets the initial tree to random. 212 213 Only valid if SPR searches are to be performed. 214 """, 215 ), 216 217 _Option(['--n_rand_starts', 'n_rand_starts'], 218 """Number of initial random trees to be used. 219 220 Only valid if SPR searches are to be performed. 221 """, 222 equate=False, 223 ), 224 225 _Option(['--r_seed', 'r_seed'], 226 """Seed used to initiate the random number generator. 227 228 Must be an integer. 229 """, 230 equate=False, 231 ), 232 233 _Switch(['--print_site_lnl', 'print_site_lnl'], 234 "Print the likelihood for each site in file *_phyml_lk.txt." 235 ), 236 237 _Switch(['--print_trace', 'print_trace'], 238 """Print each phylogeny explored during the tree search process 239 in file *_phyml_trace.txt.""" 240 ), 241 242 _Option(['--run_id', 'run_id'], 243 """Append the given string at the end of each PhyML output file. 244 245 This option may be useful when running simulations involving 246 PhyML. 247 """, 248 checker_function=lambda x: isinstance(x, basestring), 249 equate=False, 250 ), 251 252 # XXX should this always be set to True? 253 _Switch(['--quiet', 'quiet'], 254 "No interactive questions (for running in batch mode)." 255 ), 256 ] 257 AbstractCommandline.__init__(self, cmd, **kwargs)
258