1
2
3
4
5 """Command-line wrapper for the tree inference program PhyML."""
6 __docformat__ = "restructuredtext en"
7
8 from Bio.Application import _Option, _Switch, AbstractCommandline
9
10
12 """Command-line wrapper for the tree inference program PhyML.
13
14 Homepage: http://www.atgc-montpellier.fr/phyml
15
16 Citations:
17
18 Guindon S, Gascuel O.
19 A simple, fast, and accurate algorithm to estimate large phylogenies by maximum
20 likelihood.
21 Systematic Biology, 2003 Oct;52(5):696-704.
22 PubMed PMID: 14530136.
23
24 Guindon S, Dufayard JF, Lefort V, Anisimova M, Hordijk W, Gascuel O.
25 New Algorithms and Methods to Estimate Maximum-Likelihood Phylogenies: Assessing
26 the Performance of PhyML 3.0.
27 Systematic Biology, 2010 59(3):307-21.
28
29 """
30
31 - def __init__(self, cmd='phyml', **kwargs):
32 self.parameters = [
33 _Option(['-i', '--input', 'input'],
34 """Name of the nucleotide or amino-acid sequence file in PHYLIP
35 format.""",
36 filename=True,
37 is_required=True,
38 equate=False,
39 ),
40
41 _Option(['-d', '--datatype', 'datatype'],
42 """Data type is 'nt' for nucleotide (default) and 'aa' for
43 amino-acid sequences.""",
44 checker_function=lambda x: x in ('nt', 'aa'),
45 equate=False,
46 ),
47
48 _Switch(['-q', '--sequential', 'sequential'],
49 "Changes interleaved format (default) to sequential format."
50 ),
51
52 _Option(['-n', '--multiple', 'multiple'],
53 "Number of data sets to analyse (integer).",
54 checker_function=(lambda x:
55 isinstance(x, int) or x.isdigit()),
56 equate=False,
57 ),
58
59 _Switch(['-p', '--pars', 'pars'],
60 """Use a minimum parsimony starting tree.
61
62 This option is taken into account when the '-u' option is absent
63 and when tree topology modifications are to be done.
64 """
65 ),
66
67 _Option(['-b', '--bootstrap', 'bootstrap'],
68 """Number of bootstrap replicates, if value is > 0.
69
70 Otherwise:
71
72 0: neither approximate likelihood ratio test nor bootstrap
73 values are computed.
74 -1: approximate likelihood ratio test returning aLRT statistics.
75 -2: approximate likelihood ratio test returning Chi2-based
76 parametric branch supports.
77 -4: SH-like branch supports alone.
78 """,
79 equate=False,
80 ),
81
82 _Option(['-m', '--model', 'model'],
83 """Substitution model name.
84
85 Nucleotide-based models:
86
87 HKY85 (default) | JC69 | K80 | F81 | F84 | TN93 | GTR | custom
88
89 For the custom option, a string of six digits identifies the
90 model. For instance, 000000 corresponds to F81 (or JC69,
91 provided the distribution of nucleotide frequencies is uniform).
92 012345 corresponds to GTR. This option can be used for encoding
93 any model that is a nested within GTR.
94
95 Amino-acid based models:
96
97 LG (default) | WAG | JTT | MtREV | Dayhoff | DCMut | RtREV |
98 CpREV | VT | Blosum62 | MtMam | MtArt | HIVw | HIVb | custom
99 """,
100 checker_function=(lambda x: x in (
101
102 'HKY85', 'JC69', 'K80', 'F81', 'F84', 'TN93', 'GTR',
103
104 'LG', 'WAG', 'JTT', 'MtREV', 'Dayhoff', 'DCMut',
105 'RtREV', 'CpREV', 'VT', 'Blosum62', 'MtMam', 'MtArt',
106 'HIVw', 'HIVb')
107 or isinstance(x, int)),
108 equate=False,
109 ),
110
111 _Option(['-f', 'frequencies'],
112 """Character frequencies.
113
114 -f e, m, or "fA fC fG fT"
115
116 e : Empirical frequencies, determined as follows :
117
118 - Nucleotide sequences: (Empirical) the equilibrium base
119 frequencies are estimated by counting the occurence of the
120 different bases in the alignment.
121 - Amino-acid sequences: (Empirical) the equilibrium
122 amino-acid frequencies are estimated by counting the
123 occurence of the different amino-acids in the alignment.
124
125 m : ML/model-based frequencies, determined as follows :
126
127 - Nucleotide sequences: (ML) the equilibrium base
128 frequencies are estimated using maximum likelihood
129 - Amino-acid sequences: (Model) the equilibrium amino-acid
130 frequencies are estimated using the frequencies defined by
131 the substitution model.
132
133 "fA fC fG fT" : only valid for nucleotide-based models.
134 fA, fC, fG and fT are floating-point numbers that correspond
135 to the frequencies of A, C, G and T, respectively.
136 """,
137 filename=True,
138 equate=False,
139 ),
140
141
142 _Option(['-t', '--ts/tv', 'ts_tv_ratio'],
143 """Transition/transversion ratio. (DNA sequences only.)
144
145 Can be a fixed positive value (ex:4.0) or e to get the
146 maximum-likelihood estimate.
147 """,
148 equate=False,
149 ),
150
151 _Option(['-v', '--pinv', 'prop_invar'],
152 """Proportion of invariable sites.
153
154 Can be a fixed value in the range [0,1], or 'e' to get the
155 maximum-likelihood estimate.
156 """,
157 equate=False,
158 ),
159
160 _Option(['-c', '--nclasses', 'nclasses'],
161 """Number of relative substitution rate categories.
162
163 Default 1. Must be a positive integer.
164 """,
165 equate=False,
166 ),
167
168 _Option(['-a', '--alpha', 'alpha'],
169 """Distribution of the gamma distribution shape parameter.
170
171 Can be a fixed positive value, or 'e' to get the
172 maximum-likelihood estimate.
173 """,
174 equate=False,
175 ),
176
177 _Option(['-s', '--search', 'search'],
178 """Tree topology search operation option.
179
180 Can be one of:
181
182 NNI : default, fast
183 SPR : a bit slower than NNI
184 BEST : best of NNI and SPR search
185 """,
186 checker_function=lambda x: x in ('NNI', 'SPR', 'BEST'),
187 equate=False,
188 ),
189
190
191 _Option(['-u', '--inputtree', 'input_tree'],
192 "Starting tree filename. The tree must be in Newick format.",
193 filename=True,
194 equate=False,
195 ),
196
197 _Option(['-o', 'optimize'],
198 """Specific parameter optimisation.
199
200 tlr : tree topology (t), branch length (l) and
201 rate parameters (r) are optimised.
202 tl : tree topology and branch length are optimised.
203 lr : branch length and rate parameters are optimised.
204 l : branch length are optimised.
205 r : rate parameters are optimised.
206 n : no parameter is optimised.
207 """
208 ),
209
210 _Switch(['--rand_start', 'rand_start'],
211 """Sets the initial tree to random.
212
213 Only valid if SPR searches are to be performed.
214 """,
215 ),
216
217 _Option(['--n_rand_starts', 'n_rand_starts'],
218 """Number of initial random trees to be used.
219
220 Only valid if SPR searches are to be performed.
221 """,
222 equate=False,
223 ),
224
225 _Option(['--r_seed', 'r_seed'],
226 """Seed used to initiate the random number generator.
227
228 Must be an integer.
229 """,
230 equate=False,
231 ),
232
233 _Switch(['--print_site_lnl', 'print_site_lnl'],
234 "Print the likelihood for each site in file *_phyml_lk.txt."
235 ),
236
237 _Switch(['--print_trace', 'print_trace'],
238 """Print each phylogeny explored during the tree search process
239 in file *_phyml_trace.txt."""
240 ),
241
242 _Option(['--run_id', 'run_id'],
243 """Append the given string at the end of each PhyML output file.
244
245 This option may be useful when running simulations involving
246 PhyML.
247 """,
248 checker_function=lambda x: isinstance(x, basestring),
249 equate=False,
250 ),
251
252
253 _Switch(['--quiet', 'quiet'],
254 "No interactive questions (for running in batch mode)."
255 ),
256 ]
257 AbstractCommandline.__init__(self, cmd, **kwargs)
258