1
2
3
4
5 """Command line wrapper for the multiple alignment program PRANK.
6 """
7
8 __docformat__ = "epytext en"
9
10 from Bio.Application import _Option, _Switch, AbstractCommandline
11
13 """Command line wrapper for the multiple alignment program PRANK.
14
15 http://www.ebi.ac.uk/goldman-srv/prank/prank/
16
17 Example:
18
19 To align a FASTA file (unaligned.fasta) with the output in aligned
20 FASTA format with the output filename starting with "aligned" (you
21 can't pick the filename explicitly), no tree ouput and no XML output,
22 use:
23
24 >>> from Bio.Align.Applications import PrankCommandline
25 >>> prank_cline = PrankCommandline(d="unaligned.fasta",
26 ... o="aligned", #prefix only!
27 ... f=8, #FASTA output
28 ... notree=True, noxml=True)
29 >>> print prank_cline
30 prank -d=unaligned.fasta -o=aligned -f=8 -noxml -notree
31
32 You would typically run the command line with prank_cline() or via
33 the Python subprocess module, as described in the Biopython tutorial.
34
35 Citations:
36
37 Loytynoja, A. and Goldman, N. 2005. An algorithm for progressive
38 multiple alignment of sequences with insertions. Proceedings of
39 the National Academy of Sciences, 102: 10557--10562.
40
41 Loytynoja, A. and Goldman, N. 2008. Phylogeny-aware gap placement
42 prevents errors in sequence alignment and evolutionary analysis.
43 Science, 320: 1632.
44
45 Last checked agains version: 081202
46 """
47 - def __init__(self, cmd="prank", **kwargs):
48 OUTPUT_FORMAT_VALUES = list(range(1,18))
49 self.parameters = [
50
51
52 _Option(["-d", "d"],
53 "Input filename",
54 filename=True,
55 is_required=True),
56
57 _Option(["-t", "t"],"Input guide tree filename",
58 filename=True),
59
60 _Option(["-tree", "tree"],
61 "Input guide tree as Newick string"),
62
63 _Option(["-m", "m"],
64 "User-defined alignment model filename. Default: "
65 "HKY2/WAG"),
66
67 _Option(["-o", "o"],
68 "Output filenames prefix. Default: 'output'\n "
69 "Will write: output.?.fas (depending on requested "
70 "format), output.?.xml and output.?.dnd",
71 filename=True),
72
73 _Option(["-f", "f"],
74 "Output alignment format. Default: 8 FASTA\n"
75 "Option are:\n"
76 "1. IG/Stanford 8. Pearson/Fasta\n"
77 "2. GenBank/GB 11. Phylip3.2\n"
78 "3. NBRF 12. Phylip\n"
79 "4. EMBL 14. PIR/CODATA\n"
80 "6. DNAStrider 15. MSF\n"
81 "7. Fitch 17. PAUP/NEXUS",
82 checker_function=lambda x: x in OUTPUT_FORMAT_VALUES),
83 _Switch(["-noxml", "noxml"],
84 "Do not output XML files"),
85 _Switch(["-notree", "notree"],
86 "Do not output dnd tree files"),
87 _Switch(["-shortnames", "shortnames"],
88 "Truncate names at first space"),
89 _Switch(["-quiet", "quiet"],
90 "Reduce verbosity"),
91
92
93
94 _Switch(["-F", "+F", "F"],
95 "Force insertions to be always skipped: same as +F"),
96
97 _Switch(["-dots", "dots"],
98 "Show insertion gaps as dots"),
99
100 _Option(["-gaprate", "gaprate"],
101 "Gap opening rate. Default: dna 0.025 prot 0.0025",
102 checker_function=lambda x: isinstance(x, float)),
103
104 _Option(["-gapext", "gapext"],
105 "Gap extension probability. Default: dna 0.5 "
106 "/ prot 0.5",
107 checker_function=lambda x: isinstance(x, float)),
108
109 _Option(["-dnafreqs", "dnafreqs"],
110 "DNA frequencies - 'A,C,G,T'. eg '25,25,25,25' as a quote "
111 "surrounded string value. Default: empirical",
112 checker_function=lambda x: isinstance(x, bytes)),
113
114 _Option(["-kappa", "kappa"],
115 "Transition/transversion ratio. Default: 2",
116 checker_function=lambda x: isinstance(x, int)),
117
118 _Option(["-rho", "rho"],
119 "Purine/pyrimidine ratio. Default: 1",
120 checker_function=lambda x: isinstance(x, int)),
121
122
123 _Option(["-codon", "codon"],
124 "Codon model filename. Default: empirical codon model"),
125
126 _Switch(["-termgap", "termgap"],
127 "Penalise terminal gaps normally"),
128
129
130 _Switch(["-nopost", "nopost"],
131 "Do not compute posterior support. Default: compute"),
132
133
134 _Option(["-pwdist", "pwdist"],
135 "Expected pairwise distance for computing guidetree. "
136 "Default: dna 0.25 / prot 0.5",
137 checker_function=lambda x: isinstance(x, float)),
138 _Switch(["-once", "once"],
139 "Run only once. Default: twice if no guidetree given"),
140 _Switch(["-twice", "twice"],
141 "Always run twice"),
142 _Switch(["-skipins", "skipins"],
143 "Skip insertions in posterior support"),
144 _Switch(["-uselogs", "uselogs"],
145 "Slower but should work for a greater number of sequences"),
146 _Switch(["-writeanc", "writeanc"],
147 "Output ancestral sequences"),
148 _Switch(["-printnodes", "printnodes"],
149 "Output each node; mostly for debugging"),
150
151
152 _Option(["-matresize", "matresize"],
153 "Matrix resizing multiplier",
154 checker_function=lambda x: isinstance(x, float) or \
155 isinstance(x, int)),
156
157
158 _Option(["-matinitsize", "matinitsize"],
159 "Matrix initial size multiplier",
160 checker_function=lambda x: isinstance(x, float) or \
161 isinstance(x, int)),
162 _Switch(["-longseq", "longseq"],
163 "Save space in pairwise alignments"),
164 _Switch(["-pwgenomic", "pwgenomic"],
165 "Do pairwise alignment, no guidetree"),
166
167 _Option(["-pwgenomicdist", "pwgenomicdist"],
168 "Distance for pairwise alignment. Default: 0.3",
169 checker_function=lambda x: isinstance(x, float)),
170
171 _Option(["-scalebranches", "scalebranches"],
172 "Scale branch lengths. Default: dna 1 / prot 2",
173 checker_function=lambda x: isinstance(x, int)),
174
175
176 _Option(["-fixedbranches", "fixedbranches"],
177 "Use fixed branch lengths of input value",
178 checker_function=lambda x: isinstance(x, float)),
179
180
181 _Option(["-maxbranches", "maxbranches"],
182 "Use maximum branch lengths of input value",
183 checker_function=lambda x: isinstance(x, float)),
184
185 _Switch(["-realbranches", "realbranches"],
186 "Disable branch length truncation"),
187 _Switch(["-translate", "translate"],
188 "Translate to protein"),
189 _Switch(["-mttranslate", "mttranslate"],
190 "Translate to protein using mt table"),
191
192 _Switch(["-convert", "convert"],
193 "Convert input alignment to new format. Do "
194 "not perform alignment")
195 ]
196 AbstractCommandline.__init__(self, cmd, **kwargs)
197
199 """Run the module's doctests (PRIVATE)."""
200 print "Runing modules doctests..."
201 import doctest
202 doctest.testmod()
203 print "Done"
204
205 if __name__ == "__main__":
206 _test()
207