1
2
3
4
5 """Command line wrapper for the multiple alignment program Clustal W.
6 """
7
8 __docformat__ = "epytext en"
9
10 import os
11 from Bio.Application import _Option, _Switch, AbstractCommandline
12
14 """Command line wrapper for clustalw (version one or two).
15
16 http://www.clustal.org/
17
18 Example:
19
20 >>> from Bio.Align.Applications import ClustalwCommandline
21 >>> in_file = "unaligned.fasta"
22 >>> clustalw_cline = ClustalwCommandline("clustalw2", infile=in_file)
23 >>> print clustalw_cline
24 clustalw2 -infile=unaligned.fasta
25
26 You would typically run the command line with clustalw_cline() or via
27 the Python subprocess module, as described in the Biopython tutorial.
28
29 Citation:
30
31 Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA,
32 McWilliam H, Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD,
33 Gibson TJ, Higgins DG. (2007). Clustal W and Clustal X version 2.0.
34 Bioinformatics, 23, 2947-2948.
35
36 Last checked against versions: 1.83 and 2.0.10
37 """
38
39 - def __init__(self, cmd="clustalw", **kwargs):
40 self.parameters = \
41 [
42 _Option(["-infile", "-INFILE", "INFILE", "infile"],
43 "Input sequences.",
44 filename=True),
45 _Option(["-profile1", "-PROFILE1", "PROFILE1", "profile1"],
46 "Profiles (old alignment).",
47 filename=True),
48 _Option(["-profile2", "-PROFILE2", "PROFILE2", "profile2"],
49 "Profiles (old alignment).",
50 filename=True),
51
52 _Switch(["-options", "-OPTIONS", "OPTIONS", "options"],
53 "List the command line parameters"),
54 _Switch(["-help", "-HELP", "HELP", "help"],
55 "Outline the command line params."),
56 _Switch(["-check", "-CHECK", "CHECK", "check"],
57 "Outline the command line params."),
58 _Switch(["-fullhelp", "-FULLHELP", "FULLHELP", "fullhelp"],
59 "Output full help content."),
60 _Switch(["-align", "-ALIGN", "ALIGN", "align"],
61 "Do full multiple alignment."),
62 _Switch(["-tree", "-TREE", "TREE", "tree"],
63 "Calculate NJ tree."),
64 _Option(["-bootstrap", "-BOOTSTRAP", "BOOTSTRAP", "bootstrap"],
65 "Bootstrap a NJ tree (n= number of bootstraps; def. = 1000).",
66 checker_function=lambda x: isinstance(x, int)),
67 _Switch(["-convert", "-CONVERT", "CONVERT", "convert"],
68 "Output the input sequences in a different file format."),
69
70
71
72
73
74
75
76
77
78 _Switch(["-quicktree", "-QUICKTREE", "QUICKTREE", "quicktree"],
79 "Use FAST algorithm for the alignment guide tree"),
80 _Option(["-type", "-TYPE", "TYPE", "type"],
81 "PROTEIN or DNA sequences",
82 checker_function=lambda x: x in ["PROTEIN", "DNA",
83 "protein", "dna"]),
84 _Switch(["-negative", "-NEGATIVE", "NEGATIVE", "negative"],
85 "Protein alignment with negative values in matrix"),
86 _Option(["-outfile", "-OUTFILE", "OUTFILE", "outfile"],
87 "Output sequence alignment file name",
88 filename=True),
89 _Option(["-output", "-OUTPUT", "OUTPUT", "output"],
90 "Output format: GCG, GDE, PHYLIP, PIR or NEXUS",
91 checker_function=lambda x: x in ["GCG", "GDE", "PHYLIP",
92 "PIR", "NEXUS",
93 "gcg", "gde", "phylip",
94 "pir", "nexus"]),
95 _Option(["-outorder", "-OUTORDER", "OUTORDER", "outorder"],
96 "Output taxon order: INPUT or ALIGNED",
97 checker_function=lambda x: x in ["INPUT", "input",
98 "ALIGNED", "aligned"]),
99 _Option(["-case", "-CASE", "CASE", "case"],
100 "LOWER or UPPER (for GDE output only)",
101 checker_function=lambda x: x in ["UPPER", "upper",
102 "LOWER", "lower"]),
103 _Option(["-seqnos", "-SEQNOS", "SEQNOS", "seqnos"],
104 "OFF or ON (for Clustal output only)",
105 checker_function=lambda x: x in ["ON", "on",
106 "OFF", "off"]),
107 _Option(["-seqno_range", "-SEQNO_RANGE", "SEQNO_RANGE", "seqno_range"],
108 "OFF or ON (NEW- for all output formats)",
109 checker_function=lambda x: x in ["ON", "on",
110 "OFF", "off"]),
111 _Option(["-range", "-RANGE", "RANGE", "range"],
112 "Sequence range to write starting m to m+n. "
113 "Input as string eg. '24,200'"),
114 _Option(["-maxseqlen", "-MAXSEQLEN", "MAXSEQLEN", "maxseqlen"],
115 "Maximum allowed input sequence length",
116 checker_function=lambda x: isinstance(x, int)),
117 _Switch(["-quiet", "-QUIET", "QUIET", "quiet"],
118 "Reduce console output to minimum"),
119 _Switch(["-stats", "-STATS", "STATS", "stats"],
120 "Log some alignents statistics to file"),
121
122 _Option(["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"],
123 "Word size",
124 checker_function=lambda x: isinstance(x, int) or \
125 isinstance(x, float)),
126 _Option(["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"],
127 "Number of best diags.",
128 checker_function=lambda x: isinstance(x, int) or \
129 isinstance(x, float)),
130 _Option(["-window", "-WINDOW", "WINDOW", "window"],
131 "Window around best diags.",
132 checker_function=lambda x: isinstance(x, int) or \
133 isinstance(x, float)),
134 _Option(["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"],
135 "Gap penalty",
136 checker_function=lambda x: isinstance(x, int) or \
137 isinstance(x, float)),
138 _Option(["-score", "-SCORE", "SCORE", "score"],
139 "Either: PERCENT or ABSOLUTE",
140 checker_function=lambda x: x in ["percent", "PERCENT",
141 "absolute","ABSOLUTE"]),
142
143 _Option(["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"],
144 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
145 checker_function=lambda x: x in ["BLOSUM", "PAM",
146 "GONNET", "ID",
147 "blosum", "pam",
148 "gonnet", "id"] or \
149 os.path.exists(x),
150 filename=True),
151 _Option(["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"],
152 "DNA weight matrix=IUB, CLUSTALW or filename",
153 checker_function=lambda x: x in ["IUB", "CLUSTALW",
154 "iub", "clustalw"] or \
155 os.path.exists(x),
156 filename=True),
157 _Option(["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"],
158 "Gap opening penalty",
159 checker_function=lambda x: isinstance(x, int) or \
160 isinstance(x, float)),
161 _Option(["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"],
162 "Gap opening penalty",
163 checker_function=lambda x: isinstance(x, int) or \
164 isinstance(x, float)),
165
166 _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"],
167 "Output file name for newly created guide tree",
168 filename=True),
169 _Option(["-usetree", "-USETREE", "USETREE", "usetree"],
170 "File name of guide tree",
171 checker_function=lambda x: os.path.exists,
172 filename=True),
173 _Option(["-matrix", "-MATRIX", "MATRIX", "matrix"],
174 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
175 checker_function=lambda x: x in ["BLOSUM", "PAM",
176 "GONNET", "ID",
177 "blosum", "pam",
178 "gonnet", "id"] or \
179 os.path.exists(x),
180 filename=True),
181 _Option(["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"],
182 "DNA weight matrix=IUB, CLUSTALW or filename",
183 checker_function=lambda x: x in ["IUB", "CLUSTALW",
184 "iub", "clustalw"] or \
185 os.path.exists(x),
186 filename=True),
187 _Option(["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"],
188 "Gap opening penalty",
189 checker_function=lambda x: isinstance(x, int) or \
190 isinstance(x, float)),
191 _Option(["-gapext", "-GAPEXT", "GAPEXT", "gapext"],
192 "Gap extension penalty",
193 checker_function=lambda x: isinstance(x, int) or \
194 isinstance(x, float)),
195 _Switch(["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"],
196 "No end gap separation pen."),
197 _Option(["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"],
198 "Gap separation pen. range",
199 checker_function=lambda x: isinstance(x, int) or \
200 isinstance(x, float)),
201 _Switch(["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"],
202 "Residue-specific gaps off"),
203 _Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"],
204 "Hydrophilic gaps off"),
205 _Switch(["-hgapresidues", "-HGAPRESIDUES", "HGAPRESIDUES", "hgapresidues"],
206 "List hydrophilic res."),
207 _Option(["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"],
208 "% ident. for delay",
209 checker_function=lambda x: isinstance(x, int) or \
210 isinstance(x, float)),
211 _Option(["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"],
212 "Transitions weighting",
213 checker_function=lambda x: isinstance(x, int) or \
214 isinstance(x, float)),
215 _Option(["-iteration", "-ITERATION", "ITERATION", "iteration"],
216 "NONE or TREE or ALIGNMENT",
217 checker_function=lambda x: x in ["NONE", "TREE",
218 "ALIGNMENT",
219 "none", "tree",
220 "alignment"]),
221 _Option(["-numiter", "-NUMITER", "NUMITER", "numiter"],
222 "maximum number of iterations to perform",
223 checker_function=lambda x: isinstance(x, int)),
224 _Switch(["-noweights", "-NOWEIGHTS", "NOWEIGHTS", "noweights"],
225 "Disable sequence weighting"),
226
227 _Switch(["-profile", "-PROFILE", "PROFILE", "profile"],
228 "Merge two alignments by profile alignment"),
229 _Option(["-newtree1", "-NEWTREE1", "NEWTREE1", "newtree1"],
230 "Output file name for new guide tree of profile1",
231 filename=True),
232 _Option(["-newtree2", "-NEWTREE2", "NEWTREE2", "newtree2"],
233 "Output file for new guide tree of profile2",
234 filename=True),
235 _Option(["-usetree1", "-USETREE1", "USETREE1", "usetree1"],
236 "File name of guide tree for profile1",
237 checker_function=lambda x: os.path.exists,
238 filename=True),
239 _Option(["-usetree2", "-USETREE2", "USETREE2", "usetree2"],
240 "File name of guide tree for profile2",
241 checker_function=lambda x: os.path.exists,
242 filename=True),
243
244 _Switch(["-sequences", "-SEQUENCES", "SEQUENCES", "sequences"],
245 "Sequentially add profile2 sequences to profile1 alignment"),
246 _Switch(["-nosecstr1", "-NOSECSTR1", "NOSECSTR1", "nosecstr1"],
247 "Do not use secondary structure-gap penalty mask for profile 1"),
248 _Switch(["-nosecstr2", "-NOSECSTR2", "NOSECSTR2", "nosecstr2"],
249 "Do not use secondary structure-gap penalty mask for profile 2"),
250
251 _Option(["-secstrout", "-SECSTROUT", "SECSTROUT", "secstrout"],
252 "STRUCTURE or MASK or BOTH or NONE output in alignment file",
253 checker_function=lambda x: x in ["STRUCTURE", "MASK",
254 "BOTH", "NONE",
255 "structure", "mask",
256 "both", "none"]),
257 _Option(["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"],
258 "Gap penalty for helix core residues",
259 checker_function=lambda x: isinstance(x, int) or \
260 isinstance(x, float)),
261 _Option(["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"],
262 "gap penalty for strand core residues",
263 checker_function=lambda x: isinstance(x, int) or \
264 isinstance(x, float)),
265 _Option(["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"],
266 "Gap penalty for loop regions",
267 checker_function=lambda x: isinstance(x, int) or \
268 isinstance(x, float)),
269 _Option(["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"],
270 "Gap penalty for structure termini",
271 checker_function=lambda x: isinstance(x, int) or \
272 isinstance(x, float)),
273 _Option(["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"],
274 "Number of residues inside helix to be treated as terminal",
275 checker_function=lambda x: isinstance(x, int)),
276 _Option(["-helixendout", "-HELIXENDOUT", "HELIXENDOUT", "helixendout"],
277 "Number of residues outside helix to be treated as terminal",
278 checker_function=lambda x: isinstance(x, int)),
279 _Option(["-strandendin", "-STRANDENDIN", "STRANDENDIN", "strandendin"],
280 "Number of residues inside strand to be treated as terminal",
281 checker_function=lambda x: isinstance(x, int)),
282 _Option(["-strandendout", "-STRANDENDOUT", "STRANDENDOUT", "strandendout"],
283 "number of residues outside strand to be treated as terminal",
284 checker_function=lambda x: isinstance(x, int)),
285
286 _Option(["-outputtree", "-OUTPUTTREE", "OUTPUTTREE", "outputtree"],
287 "nj OR phylip OR dist OR nexus",
288 checker_function=lambda x: x in ["NJ", "PHYLIP",
289 "DIST", "NEXUS",
290 "nj", "phylip",
291 "dist", "nexus"]),
292 _Option(["-seed", "-SEED", "SEED", "seed"],
293 "Seed number for bootstraps.",
294 checker_function=lambda x: isinstance(x, int)),
295 _Switch(["-kimura", "-KIMURA", "KIMURA", "kimura"],
296 "Use Kimura's correction."),
297 _Switch(["-tossgaps", "-TOSSGAPS", "TOSSGAPS", "tossgaps"],
298 "Ignore positions with gaps."),
299 _Option(["-bootlabels", "-BOOTLABELS", "BOOTLABELS", "bootlabels"],
300 "Node OR branch position of bootstrap values in tree display",
301 checker_function=lambda x: x in ["NODE", "BRANCH",
302 "node", "branch"]),
303 _Option(["-clustering", "-CLUSTERING", "CLUSTERING", "clustering"],
304 "NJ or UPGMA",
305 checker_function=lambda x: x in ["NJ", "UPGMA", "nj", "upgma"])
306 ]
307 AbstractCommandline.__init__(self, cmd, **kwargs)
308
310 """Run the module's doctests (PRIVATE)."""
311 print "Runing ClustalW doctests..."
312 import doctest
313 doctest.testmod()
314 print "Done"
315
316 if __name__ == "__main__":
317 _test()
318