Package Bio :: Package Align :: Package Applications :: Module _Clustalw
[hide private]
[frames] | no frames]

Source Code for Module Bio.Align.Applications._Clustalw

  1  # Copyright 2009 by Cymon J. Cox.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """Command line wrapper for the multiple alignment program Clustal W. 
  6  """ 
  7   
  8  __docformat__ = "epytext en" #Don't just use plain text in epydoc API pages! 
  9   
 10  import os 
 11  from Bio.Application import _Option, _Switch, AbstractCommandline 
 12   
13 -class ClustalwCommandline(AbstractCommandline):
14 """Command line wrapper for clustalw (version one or two). 15 16 http://www.clustal.org/ 17 18 Example: 19 20 >>> from Bio.Align.Applications import ClustalwCommandline 21 >>> in_file = "unaligned.fasta" 22 >>> clustalw_cline = ClustalwCommandline("clustalw2", infile=in_file) 23 >>> print clustalw_cline 24 clustalw2 -infile=unaligned.fasta 25 26 You would typically run the command line with clustalw_cline() or via 27 the Python subprocess module, as described in the Biopython tutorial. 28 29 Citation: 30 31 Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA, 32 McWilliam H, Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD, 33 Gibson TJ, Higgins DG. (2007). Clustal W and Clustal X version 2.0. 34 Bioinformatics, 23, 2947-2948. 35 36 Last checked against versions: 1.83 and 2.0.10 37 """ 38 #TODO - Should we default to cmd="clustalw2" now?
39 - def __init__(self, cmd="clustalw", **kwargs):
40 self.parameters = \ 41 [ 42 _Option(["-infile", "-INFILE", "INFILE", "infile"], 43 "Input sequences.", 44 filename=True), 45 _Option(["-profile1", "-PROFILE1", "PROFILE1", "profile1"], 46 "Profiles (old alignment).", 47 filename=True), 48 _Option(["-profile2", "-PROFILE2", "PROFILE2", "profile2"], 49 "Profiles (old alignment).", 50 filename=True), 51 ################## VERBS (do things) ############################# 52 _Switch(["-options", "-OPTIONS", "OPTIONS", "options"], 53 "List the command line parameters"), 54 _Switch(["-help", "-HELP", "HELP", "help"], 55 "Outline the command line params."), 56 _Switch(["-check", "-CHECK", "CHECK", "check"], 57 "Outline the command line params."), 58 _Switch(["-fullhelp", "-FULLHELP", "FULLHELP", "fullhelp"], 59 "Output full help content."), 60 _Switch(["-align", "-ALIGN", "ALIGN", "align"], 61 "Do full multiple alignment."), 62 _Switch(["-tree", "-TREE", "TREE", "tree"], 63 "Calculate NJ tree."), 64 _Option(["-bootstrap", "-BOOTSTRAP", "BOOTSTRAP", "bootstrap"], 65 "Bootstrap a NJ tree (n= number of bootstraps; def. = 1000).", 66 checker_function=lambda x: isinstance(x, int)), 67 _Switch(["-convert", "-CONVERT", "CONVERT", "convert"], 68 "Output the input sequences in a different file format."), 69 ##################### PARAMETERS (set things) ######################### 70 # ***General settings:**** 71 # Makes no sense in biopython 72 #_Option(["-interactive", "-INTERACTIVE", "INTERACTIVE", "interactive"], 73 # [], 74 # lambda x: 0, #Does not take value 75 # False, 76 # "read command line, then enter normal interactive menus", 77 # False), 78 _Switch(["-quicktree", "-QUICKTREE", "QUICKTREE", "quicktree"], 79 "Use FAST algorithm for the alignment guide tree"), 80 _Option(["-type", "-TYPE", "TYPE", "type"], 81 "PROTEIN or DNA sequences", 82 checker_function=lambda x: x in ["PROTEIN", "DNA", 83 "protein", "dna"]), 84 _Switch(["-negative", "-NEGATIVE", "NEGATIVE", "negative"], 85 "Protein alignment with negative values in matrix"), 86 _Option(["-outfile", "-OUTFILE", "OUTFILE", "outfile"], 87 "Output sequence alignment file name", 88 filename=True), 89 _Option(["-output", "-OUTPUT", "OUTPUT", "output"], 90 "Output format: GCG, GDE, PHYLIP, PIR or NEXUS", 91 checker_function=lambda x: x in ["GCG", "GDE", "PHYLIP", 92 "PIR", "NEXUS", 93 "gcg", "gde", "phylip", 94 "pir", "nexus"]), 95 _Option(["-outorder", "-OUTORDER", "OUTORDER", "outorder"], 96 "Output taxon order: INPUT or ALIGNED", 97 checker_function=lambda x: x in ["INPUT", "input", 98 "ALIGNED", "aligned"]), 99 _Option(["-case", "-CASE", "CASE", "case"], 100 "LOWER or UPPER (for GDE output only)", 101 checker_function=lambda x: x in ["UPPER", "upper", 102 "LOWER", "lower"]), 103 _Option(["-seqnos", "-SEQNOS", "SEQNOS", "seqnos"], 104 "OFF or ON (for Clustal output only)", 105 checker_function=lambda x: x in ["ON", "on", 106 "OFF", "off"]), 107 _Option(["-seqno_range", "-SEQNO_RANGE", "SEQNO_RANGE", "seqno_range"], 108 "OFF or ON (NEW- for all output formats)", 109 checker_function=lambda x: x in ["ON", "on", 110 "OFF", "off"]), 111 _Option(["-range", "-RANGE", "RANGE", "range"], 112 "Sequence range to write starting m to m+n. " 113 "Input as string eg. '24,200'"), 114 _Option(["-maxseqlen", "-MAXSEQLEN", "MAXSEQLEN", "maxseqlen"], 115 "Maximum allowed input sequence length", 116 checker_function=lambda x: isinstance(x, int)), 117 _Switch(["-quiet", "-QUIET", "QUIET", "quiet"], 118 "Reduce console output to minimum"), 119 _Switch(["-stats", "-STATS", "STATS", "stats"], 120 "Log some alignents statistics to file"), 121 # ***Fast Pairwise Alignments:*** 122 _Option(["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"], 123 "Word size", 124 checker_function=lambda x: isinstance(x, int) or \ 125 isinstance(x, float)), 126 _Option(["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"], 127 "Number of best diags.", 128 checker_function=lambda x: isinstance(x, int) or \ 129 isinstance(x, float)), 130 _Option(["-window", "-WINDOW", "WINDOW", "window"], 131 "Window around best diags.", 132 checker_function=lambda x: isinstance(x, int) or \ 133 isinstance(x, float)), 134 _Option(["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"], 135 "Gap penalty", 136 checker_function=lambda x: isinstance(x, int) or \ 137 isinstance(x, float)), 138 _Option(["-score", "-SCORE", "SCORE", "score"], 139 "Either: PERCENT or ABSOLUTE", 140 checker_function=lambda x: x in ["percent", "PERCENT", 141 "absolute","ABSOLUTE"]), 142 # ***Slow Pairwise Alignments:*** 143 _Option(["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"], 144 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", 145 checker_function=lambda x: x in ["BLOSUM", "PAM", 146 "GONNET", "ID", 147 "blosum", "pam", 148 "gonnet", "id"] or \ 149 os.path.exists(x), 150 filename=True), 151 _Option(["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"], 152 "DNA weight matrix=IUB, CLUSTALW or filename", 153 checker_function=lambda x: x in ["IUB", "CLUSTALW", 154 "iub", "clustalw"] or \ 155 os.path.exists(x), 156 filename=True), 157 _Option(["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"], 158 "Gap opening penalty", 159 checker_function=lambda x: isinstance(x, int) or \ 160 isinstance(x, float)), 161 _Option(["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"], 162 "Gap opening penalty", 163 checker_function=lambda x: isinstance(x, int) or \ 164 isinstance(x, float)), 165 # ***Multiple Alignments:*** 166 _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"], 167 "Output file name for newly created guide tree", 168 filename=True), 169 _Option(["-usetree", "-USETREE", "USETREE", "usetree"], 170 "File name of guide tree", 171 checker_function=lambda x: os.path.exists, 172 filename=True), 173 _Option(["-matrix", "-MATRIX", "MATRIX", "matrix"], 174 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename", 175 checker_function=lambda x: x in ["BLOSUM", "PAM", 176 "GONNET", "ID", 177 "blosum", "pam", 178 "gonnet", "id"] or \ 179 os.path.exists(x), 180 filename=True), 181 _Option(["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"], 182 "DNA weight matrix=IUB, CLUSTALW or filename", 183 checker_function=lambda x: x in ["IUB", "CLUSTALW", 184 "iub", "clustalw"] or \ 185 os.path.exists(x), 186 filename=True), 187 _Option(["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"], 188 "Gap opening penalty", 189 checker_function=lambda x: isinstance(x, int) or \ 190 isinstance(x, float)), 191 _Option(["-gapext", "-GAPEXT", "GAPEXT", "gapext"], 192 "Gap extension penalty", 193 checker_function=lambda x: isinstance(x, int) or \ 194 isinstance(x, float)), 195 _Switch(["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"], 196 "No end gap separation pen."), 197 _Option(["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"], 198 "Gap separation pen. range", 199 checker_function=lambda x: isinstance(x, int) or \ 200 isinstance(x, float)), 201 _Switch(["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"], 202 "Residue-specific gaps off"), 203 _Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"], 204 "Hydrophilic gaps off"), 205 _Switch(["-hgapresidues", "-HGAPRESIDUES", "HGAPRESIDUES", "hgapresidues"], 206 "List hydrophilic res."), 207 _Option(["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"], 208 "% ident. for delay", 209 checker_function=lambda x: isinstance(x, int) or \ 210 isinstance(x, float)), 211 _Option(["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"], 212 "Transitions weighting", 213 checker_function=lambda x: isinstance(x, int) or \ 214 isinstance(x, float)), 215 _Option(["-iteration", "-ITERATION", "ITERATION", "iteration"], 216 "NONE or TREE or ALIGNMENT", 217 checker_function=lambda x: x in ["NONE", "TREE", 218 "ALIGNMENT", 219 "none", "tree", 220 "alignment"]), 221 _Option(["-numiter", "-NUMITER", "NUMITER", "numiter"], 222 "maximum number of iterations to perform", 223 checker_function=lambda x: isinstance(x, int)), 224 _Switch(["-noweights", "-NOWEIGHTS", "NOWEIGHTS", "noweights"], 225 "Disable sequence weighting"), 226 # ***Profile Alignments:*** 227 _Switch(["-profile", "-PROFILE", "PROFILE", "profile"], 228 "Merge two alignments by profile alignment"), 229 _Option(["-newtree1", "-NEWTREE1", "NEWTREE1", "newtree1"], 230 "Output file name for new guide tree of profile1", 231 filename=True), 232 _Option(["-newtree2", "-NEWTREE2", "NEWTREE2", "newtree2"], 233 "Output file for new guide tree of profile2", 234 filename=True), 235 _Option(["-usetree1", "-USETREE1", "USETREE1", "usetree1"], 236 "File name of guide tree for profile1", 237 checker_function=lambda x: os.path.exists, 238 filename=True), 239 _Option(["-usetree2", "-USETREE2", "USETREE2", "usetree2"], 240 "File name of guide tree for profile2", 241 checker_function=lambda x: os.path.exists, 242 filename=True), 243 # ***Sequence to Profile Alignments:*** 244 _Switch(["-sequences", "-SEQUENCES", "SEQUENCES", "sequences"], 245 "Sequentially add profile2 sequences to profile1 alignment"), 246 _Switch(["-nosecstr1", "-NOSECSTR1", "NOSECSTR1", "nosecstr1"], 247 "Do not use secondary structure-gap penalty mask for profile 1"), 248 _Switch(["-nosecstr2", "-NOSECSTR2", "NOSECSTR2", "nosecstr2"], 249 "Do not use secondary structure-gap penalty mask for profile 2"), 250 # ***Structure Alignments:*** 251 _Option(["-secstrout", "-SECSTROUT", "SECSTROUT", "secstrout"], 252 "STRUCTURE or MASK or BOTH or NONE output in alignment file", 253 checker_function=lambda x: x in ["STRUCTURE", "MASK", 254 "BOTH", "NONE", 255 "structure", "mask", 256 "both", "none"]), 257 _Option(["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"], 258 "Gap penalty for helix core residues", 259 checker_function=lambda x: isinstance(x, int) or \ 260 isinstance(x, float)), 261 _Option(["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"], 262 "gap penalty for strand core residues", 263 checker_function=lambda x: isinstance(x, int) or \ 264 isinstance(x, float)), 265 _Option(["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"], 266 "Gap penalty for loop regions", 267 checker_function=lambda x: isinstance(x, int) or \ 268 isinstance(x, float)), 269 _Option(["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"], 270 "Gap penalty for structure termini", 271 checker_function=lambda x: isinstance(x, int) or \ 272 isinstance(x, float)), 273 _Option(["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"], 274 "Number of residues inside helix to be treated as terminal", 275 checker_function=lambda x: isinstance(x, int)), 276 _Option(["-helixendout", "-HELIXENDOUT", "HELIXENDOUT", "helixendout"], 277 "Number of residues outside helix to be treated as terminal", 278 checker_function=lambda x: isinstance(x, int)), 279 _Option(["-strandendin", "-STRANDENDIN", "STRANDENDIN", "strandendin"], 280 "Number of residues inside strand to be treated as terminal", 281 checker_function=lambda x: isinstance(x, int)), 282 _Option(["-strandendout", "-STRANDENDOUT", "STRANDENDOUT", "strandendout"], 283 "number of residues outside strand to be treated as terminal", 284 checker_function=lambda x: isinstance(x, int)), 285 # ***Trees:*** 286 _Option(["-outputtree", "-OUTPUTTREE", "OUTPUTTREE", "outputtree"], 287 "nj OR phylip OR dist OR nexus", 288 checker_function=lambda x: x in ["NJ", "PHYLIP", 289 "DIST", "NEXUS", 290 "nj", "phylip", 291 "dist", "nexus"]), 292 _Option(["-seed", "-SEED", "SEED", "seed"], 293 "Seed number for bootstraps.", 294 checker_function=lambda x: isinstance(x, int)), 295 _Switch(["-kimura", "-KIMURA", "KIMURA", "kimura"], 296 "Use Kimura's correction."), 297 _Switch(["-tossgaps", "-TOSSGAPS", "TOSSGAPS", "tossgaps"], 298 "Ignore positions with gaps."), 299 _Option(["-bootlabels", "-BOOTLABELS", "BOOTLABELS", "bootlabels"], 300 "Node OR branch position of bootstrap values in tree display", 301 checker_function=lambda x: x in ["NODE", "BRANCH", 302 "node", "branch"]), 303 _Option(["-clustering", "-CLUSTERING", "CLUSTERING", "clustering"], 304 "NJ or UPGMA", 305 checker_function=lambda x: x in ["NJ", "UPGMA", "nj", "upgma"]) 306 ] 307 AbstractCommandline.__init__(self, cmd, **kwargs)
308
309 -def _test():
310 """Run the module's doctests (PRIVATE).""" 311 print "Runing ClustalW doctests..." 312 import doctest 313 doctest.testmod() 314 print "Done"
315 316 if __name__ == "__main__": 317 _test() 318