Package Bio :: Package Align :: Package Applications :: Module _Dialign
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.Applications._Dialign

  1  # Copyright 2009 by Cymon J. Cox.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Command line wrapper for the multiple alignment program DIALIGN2-2. 
  6  """ 
  7   
  8  __docformat__ = "epytext en" #Don't just use plain text in epydoc API pages! 
  9   
 10  from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline 
 11   
12 -class DialignCommandline(AbstractCommandline):
13 """Command line wrapper for the multiple alignment program DIALIGN2-2. 14 15 http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html 16 17 Example: 18 19 To align a FASTA file (unaligned.fasta) with the output files names 20 aligned.* including a FASTA output file (aligned.fa), use: 21 22 >>> from Bio.Align.Applications import DialignCommandline 23 >>> dialign_cline = DialignCommandline(input="unaligned.fasta", 24 ... fn="aligned", fa=True) 25 >>> print dialign_cline 26 dialign2-2 -fa -fn aligned unaligned.fasta 27 28 You would typically run the command line with dialign_cline() or via 29 the Python subprocess module, as described in the Biopython tutorial. 30 31 Citation: 32 33 B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence 34 Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36. 35 36 Last checked against version: 2.2 37 """
38 - def __init__(self, cmd="dialign2-2", **kwargs):
39 self.program_name = cmd 40 self.parameters = \ 41 [ 42 _Switch(["-afc", "afc"], 43 "Creates additional output file '*.afc' " 44 "containing data of all fragments considered " 45 "for alignment WARNING: this file can be HUGE !"), 46 _Switch(["-afc_v", "afc_v"], 47 "Like '-afc' but verbose: fragments are explicitly " 48 "printed. WARNING: this file can be EVEN BIGGER !"), 49 _Switch(["-anc", "anc"], 50 "Anchored alignment. Requires a file <seq_file>.anc " 51 "containing anchor points."), 52 _Switch(["-cs", "cs"], 53 "If segments are translated, not only the `Watson " 54 "strand' but also the `Crick strand' is looked at."), 55 _Switch(["-cw", "cw"], 56 "Additional output file in CLUSTAL W format."), 57 _Switch(["-ds", "ds"], 58 "`dna alignment speed up' - non-translated nucleic acid " 59 "fragments are taken into account only if they start " 60 "with at least two matches. Speeds up DNA alignment at " 61 "the expense of sensitivity."), 62 _Switch(["-fa", "fa"], 63 "Additional output file in FASTA format."), 64 _Switch(["-ff", "ff"], 65 "Creates file *.frg containing information about all " 66 "fragments that are part of the respective optimal " 67 "pairwise alignmnets plus information about " 68 "consistency in the multiple alignment"), 69 _Option(["-fn", "fn"], 70 "Output files are named <out_file>.<extension>.", 71 equate=False), 72 _Switch(["-fop", "fop"], 73 "Creates file *.fop containing coordinates of all " 74 "fragments that are part of the respective pairwise alignments."), 75 _Switch(["-fsm", "fsm"], 76 "Creates file *.fsm containing coordinates of all " 77 "fragments that are part of the final alignment"), 78 _Switch(["-iw", "iw"], 79 "Overlap weights switched off (by default, overlap " 80 "weights are used if up to 35 sequences are aligned). " 81 "This option speeds up the alignment but may lead " 82 "to reduced alignment quality."), 83 _Switch(["-lgs", "lgs"], 84 "`long genomic sequences' - combines the following " 85 "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, " 86 "-fop, -ff, -cs, -ds, -pst "), 87 _Switch(["-lgs_t", "lgs_t"], 88 "Like '-lgs' but with all segment pairs assessed " 89 "at the peptide level (rather than 'mixed alignments' " 90 "as with the '-lgs' option). Therefore faster than " 91 "-lgs but not very sensitive for non-coding regions."), 92 _Option(["-lmax", "lmax"], 93 "Maximum fragment length = x (default: x = 40 or " 94 "x = 120 for `translated' fragments). Shorter x " 95 "speeds up the program but may affect alignment quality.", 96 checker_function=lambda x: isinstance(x, int), 97 equate=False), 98 _Switch(["-lo", "lo"], 99 "(Long Output) Additional file *.log with information " 100 "about fragments selected for pairwise alignment and " 101 "about consistency in multi-alignment proceedure."), 102 _Switch(["-ma", "ma"], 103 "`mixed alignments' consisting of P-fragments and " 104 "N-fragments if nucleic acid sequences are aligned."), 105 _Switch(["-mask", "mask"], 106 "Residues not belonging to selected fragments are " 107 "replaced by `*' characters in output alignment " 108 "(rather than being printed in lower-case characters)"), 109 _Switch(["-mat", "mat"], 110 "Creates file *mat with substitution counts derived " 111 "from the fragments that have been selected for alignment."), 112 _Switch(["-mat_thr", "mat_thr"], 113 "Like '-mat' but only fragments with weight score " 114 "> t are considered"), 115 _Switch(["-max_link", "max_link"], 116 "'maximum linkage' clustering used to construct " 117 "sequence tree (instead of UPGMA)."), 118 _Switch(["-min_link", "min_link"], 119 "'minimum linkage' clustering used."), 120 _Option(["-mot", "mot"], 121 "'motif' option.", 122 equate=False), 123 _Switch(["-msf", "msf"], 124 "Separate output file in MSF format."), 125 _Switch(["-n", "n"], 126 "Input sequences are nucleic acid sequences. " 127 "No translation of fragments."), 128 _Switch(["-nt", "nt"], 129 "Input sequences are nucleic acid sequences and " 130 "`nucleic acid segments' are translated to `peptide " 131 "segments'."), 132 _Switch(["-nta", "nta"], 133 "`no textual alignment' - textual alignment suppressed. " 134 "This option makes sense if other output files are of " 135 "intrest -- e.g. the fragment files created with -ff, " 136 "-fop, -fsm or -lo."), 137 _Switch(["-o", "o"], 138 "Fast version, resulting alignments may be slightly " 139 "different."), 140 _Switch(["-ow", "ow"], 141 "Overlap weights enforced (By default, overlap weights " 142 "are used only if up to 35 sequences are aligned since " 143 "calculating overlap weights is time consuming)."), 144 _Switch(["-pst", "pst"], 145 "'print status'. Creates and updates a file *.sta with " 146 "information about the current status of the program " 147 "run. This option is recommended if large data sets " 148 "are aligned since it allows the user to estimate the " 149 "remaining running time."), 150 _Switch(["-smin", "smin"], 151 "Minimum similarity value for first residue pair " 152 "(or codon pair) in fragments. Speeds up protein " 153 "alignment or alignment of translated DNA fragments " 154 "at the expense of sensitivity."), 155 _Option(["-stars", "stars"], 156 "Maximum number of `*' characters indicating degree " 157 "of local similarity among sequences. By default, no " 158 "stars are used but numbers between 0 and 9, instead.", 159 checker_function = lambda x: x in range(0,10), 160 equate=False), 161 _Switch(["-stdo", "stdo"], 162 "Results written to standard output."), 163 _Switch(["-ta", "ta"], 164 "Standard textual alignment printed (overrides " 165 "suppression of textual alignments in special " 166 "options, e.g. -lgs)"), 167 _Option(["-thr", "thr"], 168 "Threshold T = x.", 169 checker_function = lambda x: isinstance(x, int), 170 equate=False), 171 _Switch(["-xfr", "xfr"], 172 "'exclude fragments' - list of fragments can be " 173 "specified that are NOT considered for pairwise alignment"), 174 _Argument(["input"], 175 "Input file name. Must be FASTA format", 176 filename=True, 177 is_required=True), 178 ] 179 AbstractCommandline.__init__(self, cmd, **kwargs)
180
181 -def _test():
182 """Run the module's doctests (PRIVATE).""" 183 print "Runing modules doctests..." 184 import doctest 185 doctest.testmod() 186 print "Done"
187 188 if __name__ == "__main__": 189 _test() 190