1
2
3
4
5 """Command line wrapper for the multiple alignment program DIALIGN2-2.
6 """
7
8 __docformat__ = "epytext en"
9
10 from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline
11
13 """Command line wrapper for the multiple alignment program DIALIGN2-2.
14
15 http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html
16
17 Example:
18
19 To align a FASTA file (unaligned.fasta) with the output files names
20 aligned.* including a FASTA output file (aligned.fa), use:
21
22 >>> from Bio.Align.Applications import DialignCommandline
23 >>> dialign_cline = DialignCommandline(input="unaligned.fasta",
24 ... fn="aligned", fa=True)
25 >>> print dialign_cline
26 dialign2-2 -fa -fn aligned unaligned.fasta
27
28 You would typically run the command line with dialign_cline() or via
29 the Python subprocess module, as described in the Biopython tutorial.
30
31 Citation:
32
33 B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence
34 Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36.
35
36 Last checked against version: 2.2
37 """
38 - def __init__(self, cmd="dialign2-2", **kwargs):
39 self.program_name = cmd
40 self.parameters = \
41 [
42 _Switch(["-afc", "afc"],
43 "Creates additional output file '*.afc' "
44 "containing data of all fragments considered "
45 "for alignment WARNING: this file can be HUGE !"),
46 _Switch(["-afc_v", "afc_v"],
47 "Like '-afc' but verbose: fragments are explicitly "
48 "printed. WARNING: this file can be EVEN BIGGER !"),
49 _Switch(["-anc", "anc"],
50 "Anchored alignment. Requires a file <seq_file>.anc "
51 "containing anchor points."),
52 _Switch(["-cs", "cs"],
53 "If segments are translated, not only the `Watson "
54 "strand' but also the `Crick strand' is looked at."),
55 _Switch(["-cw", "cw"],
56 "Additional output file in CLUSTAL W format."),
57 _Switch(["-ds", "ds"],
58 "`dna alignment speed up' - non-translated nucleic acid "
59 "fragments are taken into account only if they start "
60 "with at least two matches. Speeds up DNA alignment at "
61 "the expense of sensitivity."),
62 _Switch(["-fa", "fa"],
63 "Additional output file in FASTA format."),
64 _Switch(["-ff", "ff"],
65 "Creates file *.frg containing information about all "
66 "fragments that are part of the respective optimal "
67 "pairwise alignmnets plus information about "
68 "consistency in the multiple alignment"),
69 _Option(["-fn", "fn"],
70 "Output files are named <out_file>.<extension>.",
71 equate=False),
72 _Switch(["-fop", "fop"],
73 "Creates file *.fop containing coordinates of all "
74 "fragments that are part of the respective pairwise alignments."),
75 _Switch(["-fsm", "fsm"],
76 "Creates file *.fsm containing coordinates of all "
77 "fragments that are part of the final alignment"),
78 _Switch(["-iw", "iw"],
79 "Overlap weights switched off (by default, overlap "
80 "weights are used if up to 35 sequences are aligned). "
81 "This option speeds up the alignment but may lead "
82 "to reduced alignment quality."),
83 _Switch(["-lgs", "lgs"],
84 "`long genomic sequences' - combines the following "
85 "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, "
86 "-fop, -ff, -cs, -ds, -pst "),
87 _Switch(["-lgs_t", "lgs_t"],
88 "Like '-lgs' but with all segment pairs assessed "
89 "at the peptide level (rather than 'mixed alignments' "
90 "as with the '-lgs' option). Therefore faster than "
91 "-lgs but not very sensitive for non-coding regions."),
92 _Option(["-lmax", "lmax"],
93 "Maximum fragment length = x (default: x = 40 or "
94 "x = 120 for `translated' fragments). Shorter x "
95 "speeds up the program but may affect alignment quality.",
96 checker_function=lambda x: isinstance(x, int),
97 equate=False),
98 _Switch(["-lo", "lo"],
99 "(Long Output) Additional file *.log with information "
100 "about fragments selected for pairwise alignment and "
101 "about consistency in multi-alignment proceedure."),
102 _Switch(["-ma", "ma"],
103 "`mixed alignments' consisting of P-fragments and "
104 "N-fragments if nucleic acid sequences are aligned."),
105 _Switch(["-mask", "mask"],
106 "Residues not belonging to selected fragments are "
107 "replaced by `*' characters in output alignment "
108 "(rather than being printed in lower-case characters)"),
109 _Switch(["-mat", "mat"],
110 "Creates file *mat with substitution counts derived "
111 "from the fragments that have been selected for alignment."),
112 _Switch(["-mat_thr", "mat_thr"],
113 "Like '-mat' but only fragments with weight score "
114 "> t are considered"),
115 _Switch(["-max_link", "max_link"],
116 "'maximum linkage' clustering used to construct "
117 "sequence tree (instead of UPGMA)."),
118 _Switch(["-min_link", "min_link"],
119 "'minimum linkage' clustering used."),
120 _Option(["-mot", "mot"],
121 "'motif' option.",
122 equate=False),
123 _Switch(["-msf", "msf"],
124 "Separate output file in MSF format."),
125 _Switch(["-n", "n"],
126 "Input sequences are nucleic acid sequences. "
127 "No translation of fragments."),
128 _Switch(["-nt", "nt"],
129 "Input sequences are nucleic acid sequences and "
130 "`nucleic acid segments' are translated to `peptide "
131 "segments'."),
132 _Switch(["-nta", "nta"],
133 "`no textual alignment' - textual alignment suppressed. "
134 "This option makes sense if other output files are of "
135 "intrest -- e.g. the fragment files created with -ff, "
136 "-fop, -fsm or -lo."),
137 _Switch(["-o", "o"],
138 "Fast version, resulting alignments may be slightly "
139 "different."),
140 _Switch(["-ow", "ow"],
141 "Overlap weights enforced (By default, overlap weights "
142 "are used only if up to 35 sequences are aligned since "
143 "calculating overlap weights is time consuming)."),
144 _Switch(["-pst", "pst"],
145 "'print status'. Creates and updates a file *.sta with "
146 "information about the current status of the program "
147 "run. This option is recommended if large data sets "
148 "are aligned since it allows the user to estimate the "
149 "remaining running time."),
150 _Switch(["-smin", "smin"],
151 "Minimum similarity value for first residue pair "
152 "(or codon pair) in fragments. Speeds up protein "
153 "alignment or alignment of translated DNA fragments "
154 "at the expense of sensitivity."),
155 _Option(["-stars", "stars"],
156 "Maximum number of `*' characters indicating degree "
157 "of local similarity among sequences. By default, no "
158 "stars are used but numbers between 0 and 9, instead.",
159 checker_function = lambda x: x in range(0,10),
160 equate=False),
161 _Switch(["-stdo", "stdo"],
162 "Results written to standard output."),
163 _Switch(["-ta", "ta"],
164 "Standard textual alignment printed (overrides "
165 "suppression of textual alignments in special "
166 "options, e.g. -lgs)"),
167 _Option(["-thr", "thr"],
168 "Threshold T = x.",
169 checker_function = lambda x: isinstance(x, int),
170 equate=False),
171 _Switch(["-xfr", "xfr"],
172 "'exclude fragments' - list of fragments can be "
173 "specified that are NOT considered for pairwise alignment"),
174 _Argument(["input"],
175 "Input file name. Must be FASTA format",
176 filename=True,
177 is_required=True),
178 ]
179 AbstractCommandline.__init__(self, cmd, **kwargs)
180
182 """Run the module's doctests (PRIVATE)."""
183 print "Runing modules doctests..."
184 import doctest
185 doctest.testmod()
186 print "Done"
187
188 if __name__ == "__main__":
189 _test()
190