Package Bio :: Package Emboss :: Module Applications
[hide private]
[frames] | no frames]

Source Code for Module Bio.Emboss.Applications

   1  # Copyright 2001-2009 Brad Chapman. 
   2  # Revisions copyright 2009-2010 by Peter Cock. 
   3  # Revisions copyright 2009 by David Winter. 
   4  # Revisions copyright 2009-2010 by Leighton Pritchard. 
   5  # All rights reserved. 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  """Code to interact with and run various EMBOSS programs. 
  10   
  11  These classes follow the AbstractCommandline interfaces for running 
  12  programs. 
  13  """ 
  14   
  15  from Bio.Application import _Option, _Switch, AbstractCommandline 
  16   
17 -class _EmbossMinimalCommandLine(AbstractCommandline):
18 """Base Commandline object for EMBOSS wrappers (PRIVATE). 19 20 This is provided for subclassing, it deals with shared options 21 common to all the EMBOSS tools: 22 23 - auto Turn off prompts 24 - stdout Write standard output 25 - filter Read standard input, write standard output 26 - options Prompt for standard and additional values 27 - debug Write debug output to program.dbg 28 - verbose Report some/full command line options 29 - help Report command line options. More 30 information on associated and general 31 qualifiers can be found with -help -verbose 32 - warning Report warnings 33 - error Report errors 34 - fatal Report fatal errors 35 - die Report dying program messages 36 """
37 - def __init__(self, cmd=None, **kwargs):
38 assert cmd is not None 39 extra_parameters = [ 40 _Switch(["-auto","auto"], 41 """Turn off prompts. 42 43 Automatic mode disables prompting, so we recommend you set 44 this argument all the time when calling an EMBOSS tool from 45 Biopython. 46 """), 47 _Switch(["-stdout","stdout"], 48 "Write standard output."), 49 _Switch(["-filter","filter"], 50 "Read standard input, write standard output."), 51 _Switch(["-options","options"], 52 """Prompt for standard and additional values. 53 54 If you are calling an EMBOSS tool from within Biopython, 55 we DO NOT recommend using this option. 56 """), 57 _Switch(["-debug","debug"], 58 "Write debug output to program.dbg."), 59 _Switch(["-verbose","verbose"], 60 "Report some/full command line options"), 61 _Switch(["-help","help"], 62 """Report command line options. 63 64 More information on associated and general qualifiers can 65 be found with -help -verbose 66 """), 67 _Switch(["-warning","warning"], 68 "Report warnings."), 69 _Switch(["-error","error"], 70 "Report errors."), 71 _Switch(["-die","die"], 72 "Report dying program messages."), 73 ] 74 try: 75 #Insert extra parameters - at the start just in case there 76 #are any arguments which must come last: 77 self.parameters = extra_parameters + self.parameters 78 except AttributeError: 79 #Should we raise an error? The subclass should have set this up! 80 self.parameters = extra_parameters 81 AbstractCommandline.__init__(self, cmd, **kwargs)
82
83 -class _EmbossCommandLine(_EmbossMinimalCommandLine):
84 """Base Commandline object for EMBOSS wrappers (PRIVATE). 85 86 This is provided for subclassing, it deals with shared options 87 common to all the EMBOSS tools plus: 88 89 - outfile Output filename 90 91 """
92 - def __init__(self, cmd=None, **kwargs):
93 assert cmd is not None 94 extra_parameters = [ 95 _Option(["-outfile","outfile"], 96 "Output filename", 97 filename=True), 98 ] 99 try: 100 #Insert extra parameters - at the start just in case there 101 #are any arguments which must come last: 102 self.parameters = extra_parameters + self.parameters 103 except AttributeError: 104 #Should we raise an error? The subclass should have set this up! 105 self.parameters = extra_parameters 106 _EmbossMinimalCommandLine.__init__(self, cmd, **kwargs)
107
108 - def _validate(self):
109 #Check the outfile, filter, or stdout option has been set. 110 #We can't simply do this via the required flag for the outfile 111 #output - this seems the simplest solution. 112 if not (self.outfile or self.filter or self.stdout): 113 raise ValueError("You must either set outfile (output filename), " 114 "or enable filter or stdout (output to stdout).") 115 return _EmbossMinimalCommandLine._validate(self)
116
117 -class Primer3Commandline(_EmbossCommandLine):
118 """Commandline object for the Primer3 interface from EMBOSS. 119 120 The precise set of supported arguments depends on your version of EMBOSS. 121 This version accepts arguments current at EMBOSS 6.1.0, but in order to 122 remain backwards compatible also support the old argument names as well. 123 124 e.g. Using EMBOSS 6.1.0 or later, 125 126 >>> cline = Primer3Commandline(sequence="mysequence.fas", auto=True, hybridprobe=True) 127 >>> cline.explainflag = True 128 >>> cline.osizeopt=20 129 >>> cline.psizeopt=200 130 >>> cline.outfile = "myresults.out" 131 >>> cline.bogusparameter = 1967 # Invalid parameter 132 Traceback (most recent call last): 133 ... 134 ValueError: Option name bogusparameter was not found. 135 >>> print cline 136 eprimer3 -auto -outfile=myresults.out -sequence=mysequence.fas -hybridprobe=True -psizeopt=200 -osizeopt=20 -explainflag=True 137 138 The equivalent for anyone still using an older version of EMBOSS would be: 139 140 >>> cline = Primer3Commandline(sequence="mysequence.fas", auto=True, hybridprobe=True) 141 >>> cline.explainflag = True 142 >>> cline.oligosize=20 # Old EMBOSS, instead of osizeopt 143 >>> cline.productosize=200 # Old EMBOSS, instead of psizeopt 144 >>> cline.outfile = "myresults.out" 145 >>> print cline 146 eprimer3 -auto -outfile=myresults.out -sequence=mysequence.fas -hybridprobe=True -productosize=200 -oligosize=20 -explainflag=True 147 148 """
149 - def __init__(self, cmd="eprimer3", **kwargs):
150 self.parameters = [ 151 _Option(["-sequence","sequence"], 152 "Sequence to choose primers from.", 153 is_required=True), 154 _Option(["-task","task"], 155 "Tell eprimer3 what task to perform."), 156 _Option(["-hybridprobe","hybridprobe"], 157 "Find an internal oligo to use as a hyb probe."), 158 _Option(["-numreturn","numreturn"], 159 "Maximum number of primer pairs to return."), 160 _Option(["-includedregion","includedregion"], 161 "Subregion of the sequence in which to pick primers."), 162 _Option(["-target","target"], 163 "Sequence to target for flanking primers."), 164 _Option(["-excludedregion","excludedregion"], 165 "Regions to exclude from primer picking."), 166 _Option(["-forwardinput","forwardinput"], 167 "Sequence of a forward primer to check."), 168 _Option(["-reverseinput","reverseinput"], 169 "Sequence of a reverse primer to check."), 170 _Option(["-gcclamp","gcclamp"], 171 "The required number of Gs and Cs at the 3' of each primer."), 172 _Option(["-osize","osize"], 173 "Optimum length of a primer oligo."), 174 _Option(["-minsize","minsize"], 175 "Minimum length of a primer oligo."), 176 _Option(["-maxsize","maxsize"], 177 "Maximum length of a primer oligo."), 178 _Option(["-otm","otm"], 179 "Optimum melting temperature for a primer oligo."), 180 _Option(["-mintm","mintm"], 181 "Minimum melting temperature for a primer oligo."), 182 _Option(["-maxtm","maxtm"], 183 "Maximum melting temperature for a primer oligo."), 184 _Option(["-maxdifftm","maxdifftm"], 185 "Maximum difference in melting temperatures between " 186 "forward and reverse primers."), 187 _Option(["-ogcpercent","ogcpercent"], 188 "Optimum GC% for a primer."), 189 _Option(["-mingc","mingc"], 190 "Minimum GC% for a primer."), 191 _Option(["-maxgc","maxgc"], 192 "Maximum GC% for a primer."), 193 _Option(["-saltconc","saltconc"], 194 "Millimolar salt concentration in the PCR."), 195 _Option(["-dnaconc","dnaconc"], 196 "Nanomolar concentration of annealing oligos in the PCR."), 197 _Option(["-maxpolyx","maxpolyx"], 198 "Maximum allowable mononucleotide repeat length in a primer."), 199 #Primer length: 200 _Option(["-productosize","productosize"], 201 """Optimum size for the PCR product (OBSOLETE). 202 203 Option replaced in EMBOSS 6.1.0 by -psizeopt 204 """), 205 _Option(["-psizeopt", "psizeopt"], 206 """Optimum size for the PCR product. 207 208 Option added in EMBOSS 6.1.0, replacing -productosize 209 """), 210 _Option(["-productsizerange","productsizerange"], 211 """Acceptable range of length for the PCR product (OBSOLETE). 212 213 Option replaced in EMBOSS 6.1.0 by -prange 214 """), 215 _Option(["-prange", "prange"], 216 """Acceptable range of length for the PCR product. 217 218 Option added in EMBOSS 6.1.0, replacing -productsizerange 219 """), 220 #Primer temperature: 221 _Option(["-productotm","productotm"], 222 """Optimum melting temperature for the PCR product (OBSOLETE). 223 224 Option replaced in EMBOSS 6.1.0 by -ptmopt 225 """), 226 _Option(["-ptmopt", "ptmopt"], 227 """Optimum melting temperature for the PCR product. 228 229 Option added in EMBOSS 6.1.0, replacing -productotm 230 """), 231 _Option(["-productmintm","productmintm"], 232 """Minimum allowed melting temperature for the amplicon (OBSOLETE) 233 234 Option replaced in EMBOSS 6.1.0 by -ptmmin 235 """), 236 _Option(["-ptmmin", "ptmmin"], 237 """Minimum allowed melting temperature for the amplicon."), 238 239 Option added in EMBOSS 6.1.0, replacing -productmintm 240 """), 241 _Option(["-productmaxtm","productmaxtm"], 242 """Maximum allowed melting temperature for the amplicon (OBSOLETE). 243 244 Option replaced in EMBOSS 6.1.0 by -ptmmax 245 """), 246 _Option(["-ptmmax", "ptmmax"], 247 """Maximum allowed melting temperature for the amplicon."), 248 249 Option added in EMBOSS 6.1.0, replacing -productmaxtm 250 """), 251 #Note to self, should be -oexcludedregion not -oexcluderegion 252 _Option(["-oexcludedregion", "oexcludedregion"], 253 """Do not pick internal oligos in this region."), 254 255 Option added in EMBOSS 6.1.0, replacing -oligoexcludedregion. 256 """), 257 _Option(["-oligoexcludedregion", "oligoexcludedregion"], 258 """Do not pick internal oligos in this region (OBSOLETE)."), 259 260 Option replaced in EMBOSS 6.1.0 by -oexcluderegion. 261 """), 262 _Option(["-oligoinput","oligoinput"], 263 "Sequence of the internal oligo."), 264 #Oligo length: 265 _Option(["-oligosize","oligosize"], 266 """Optimum length of internal oligo (OBSOLETE). 267 268 Option replaced in EMBOSS 6.1.0 by -osizeopt. 269 """), 270 _Option(["-osizeopt", "osizeopt"], 271 """Optimum length of internal oligo. 272 273 Option added in EMBOSS 6.1.0, replaces -oligosize 274 """), 275 _Option(["-oligominsize","oligominsize"], 276 """Minimum length of internal oligo (OBSOLETE)."), 277 278 Option replaced in EMBOSS 6.1.0 by -ominsize. 279 """), 280 _Option(["-ominsize", "ominsize"], 281 """Minimum length of internal oligo." 282 283 Option added in EMBOSS 6.1.0, replaces -oligominsize 284 """), 285 _Option(["-oligomaxsize","oligomaxsize"], 286 """Maximum length of internal oligo (OBSOLETE). 287 288 Option replaced in EMBOSS 6.1.0 by -omaxsize. 289 """), 290 _Option(["-omaxsize", "omaxsize"], 291 """Maximum length of internal oligo. 292 293 Option added in EMBOSS 6.1.0, replaces -oligomaxsize 294 """), 295 #Oligo GC temperature: 296 _Option(["-oligotm","oligotm"], 297 """Optimum melting temperature of internal oligo (OBSOLETE). 298 299 Option replaced in EMBOSS 6.1.0 by -otmopt. 300 """), 301 _Option(["-otmopt", "otmopt"], 302 """Optimum melting temperature of internal oligo. 303 304 Option added in EMBOSS 6.1.0. 305 """), 306 _Option(["-oligomintm","oligomintm"], 307 """Minimum melting temperature of internal oligo (OBSOLETE). 308 309 Option replaced in EMBOSS 6.1.0 by -otmmin. 310 """), 311 _Option(["-otmmin", "otmmin"], 312 """Minimum melting temperature of internal oligo. 313 314 Option added in EMBOSS 6.1.0, replacing -oligomintm 315 """), 316 _Option(["-oligomaxtm","oligomaxtm"], 317 """Maximum melting temperature of internal oligo (OBSOLETE). 318 319 Option replaced in EMBOSS 6.1.0 by -otmmax. 320 """), 321 _Option(["-otmmax", "otmmax"], 322 """Maximum melting temperature of internal oligo. 323 324 Option added in EMBOSS 6.1.0, replacing -oligomaxtm 325 """), 326 #Oligo GC percent: 327 _Option(["-oligoogcpercent","oligoogcpercent"], 328 """Optimum GC% for internal oligo (OBSOLETE). 329 330 Option replaced in EMBOSS 6.1.0 by -ogcopt. 331 """), 332 _Option(["-ogcopt", "ogcopt"], 333 """Optimum GC% for internal oligo." 334 335 Option added in EMBOSS 6.1.0, replacing -oligoogcpercent 336 """), 337 _Option(["-oligomingc","oligomingc"], 338 """Minimum GC% for internal oligo (OBSOLETE). 339 340 Option replaced in EMBOSS 6.1.0 by -ogcmin. 341 """), 342 _Option(["-ogcmin", "ogcmin"], 343 """Minimum GC% for internal oligo. 344 345 Option added in EMBOSS 6.1.0, replacing -oligomingc 346 """), 347 _Option(["-oligomaxgc","oligomaxgc"], 348 """Maximum GC% for internal oligo. 349 350 Option replaced in EMBOSS 6.1.0 by -ogcmax 351 """), 352 _Option(["-ogcmax", "ogcmax"], 353 """Maximum GC% for internal oligo."), 354 355 Option added in EMBOSS 6.1.0, replacing -oligomaxgc 356 """), 357 #Oligo salt concentration: 358 _Option(["-oligosaltconc","oligosaltconc"], 359 """Millimolar concentration of salt in the hybridisation."), 360 361 Option replaced in EMBOSS 6.1.0 by -osaltconc 362 """), 363 _Option(["-osaltconc", "osaltconc"], 364 """Millimolar concentration of salt in the hybridisation."), 365 366 Option added in EMBOSS 6.1.0, replacing -oligosaltconc 367 """), 368 _Option(["-oligodnaconc","oligodnaconc"], 369 """Nanomolar concentration of internal oligo in the hybridisation. 370 371 Option replaced in EMBOSS 6.1.0 by -odnaconc 372 """), 373 _Option(["-odnaconc", "odnaconc"], 374 """Nanomolar concentration of internal oligo in the hybridisation. 375 376 Option added in EMBOSS 6.1.0, replacing -oligodnaconc 377 """), 378 #Oligo self complementarity 379 _Option(["-oligoselfany","oligoselfany"], 380 """Maximum allowable alignment score for self-complementarity (OBSOLETE). 381 382 Option replaced in EMBOSS 6.1.0 by -oanyself 383 """), 384 _Option(["-oanyself", "oanyself"], 385 """Maximum allowable alignment score for self-complementarity."), 386 387 Option added in EMBOSS 6.1.0, replacing -oligoselfany 388 """), 389 _Option(["-oligoselfend","oligoselfend"], 390 """Maximum allowable 3`-anchored global alignment score " 391 for self-complementarity (OBSOLETE). 392 393 Option replaced in EMBOSS 6.1.0 by -oendself 394 """), 395 _Option(["-oendself", "oendself"], 396 """Max 3`-anchored self-complementarity global alignment score. 397 398 Option added in EMBOSS 6.1.0, replacing -oligoselfend 399 """), 400 _Option(["-oligomaxpolyx","oligomaxpolyx"], 401 """Maximum length of mononucleotide repeat in internal oligo (OBSOLETE). 402 403 Option replaced in EMBOSS 6.1.0 by -opolyxmax 404 """), 405 _Option(["-opolyxmax", "opolyxmax"], 406 """Maximum length of mononucleotide repeat in internal oligo."), 407 408 Option added in EMBOSS 6.1.0, replacing -oligomaxpolyx 409 """), 410 _Option(["-mispriminglibraryfile","mispriminglibraryfile"], 411 "File containing library of sequences to avoid amplifying"), 412 _Option(["-maxmispriming","maxmispriming"], 413 "Maximum allowed similarity of primers to sequences in " 414 "library specified by -mispriminglibrary"), 415 _Option(["-oligomaxmishyb","oligomaxmishyb"], 416 """Maximum alignment score for hybridisation of internal oligo to 417 library specified by -oligomishyblibraryfile (OBSOLETE). 418 419 Option replaced in EMBOSS 6.1.0 by -omishybmax 420 """), 421 _Option(["-omishybmax", "omishybmax"], 422 """Maximum alignment score for hybridisation of internal oligo to 423 library specified by -mishyblibraryfile. 424 425 Option added in EMBOSS 6.1.0, replacing -oligomaxmishyb 426 """), 427 _Option(["-oligomishyblibraryfile", "oligomishyblibraryfile"], 428 429 """Library file of seqs to avoid internal oligo hybridisation (OBSOLETE). 430 431 Option replaced in EMBOSS 6.1.0 by -mishyblibraryfile 432 """), 433 _Option(["-mishyblibraryfile", "mishyblibraryfile"], 434 """Library file of seqs to avoid internal oligo hybridisation. 435 436 Option added in EMBOSS 6.1.0, replacing -oligomishyblibraryfile 437 """), 438 _Option(["-explainflag","explainflag"], 439 "Produce output tags with eprimer3 statistics"), 440 ] 441 _EmbossCommandLine.__init__(self, cmd, **kwargs)
442 443
444 -class PrimerSearchCommandline(_EmbossCommandLine):
445 """Commandline object for the primersearch program from EMBOSS. 446 """
447 - def __init__(self, cmd="primersearch", **kwargs):
448 self.parameters = [ 449 _Option(["-seqall","-sequences","sequences","seqall"], 450 "Sequence to look for the primer pairs in.", 451 is_required=True), 452 #When this wrapper was written primersearch used -sequences 453 #as the argument name. Since at least EMBOSS 5.0 (and 454 #perhaps earlier) this has been -seqall instead. 455 _Option(["-infile","-primers","primers","infile"], 456 "File containing the primer pairs to search for.", 457 filename=True, 458 is_required=True), 459 #When this wrapper was written primersearch used -primers 460 #as the argument name. Since at least EMBOSS 5.0 (and 461 #perhaps earlier) this has been -infile instead. 462 _Option(["-mismatchpercent","mismatchpercent"], 463 "Allowed percentage mismatch (any integer value, default 0).", 464 is_required=True), 465 _Option(["-snucleotide","snucleotide"], 466 "Sequences are nucleotide (boolean)"), 467 _Option(["-sprotein","sprotein"], 468 "Sequences are protein (boolean)"), 469 ] 470 _EmbossCommandLine.__init__(self, cmd, **kwargs)
471 472
473 -class FDNADistCommandline(_EmbossCommandLine):
474 """Commandline object for the fdnadist program from EMBOSS. 475 476 fdnadist is an EMBOSS wrapper for the PHYLIP program dnadist for 477 calulating distance matrices from DNA sequence files. 478 """
479 - def __init__(self, cmd = "fdnadist", **kwargs):
480 self.parameters = [ 481 _Option(["-sequence", "sequence"], 482 "seq file to use (phylip)", 483 filename=True, 484 is_required=True), 485 _Option(["-method", "method"], 486 "sub. model [f,k,j,l,s]", 487 is_required=True), 488 _Option(["-gamma", "gamma"], 489 "gamma [g, i,n]"), 490 _Option(["-ncategories", "ncategories"], 491 "number of rate catergories (1-9)"), 492 _Option(["-rate", "rate"], 493 "rate for each category"), 494 _Option(["-categories","categories"], 495 "File of substitution rate categories"), 496 _Option(["-weights", "weights"], 497 "weights file"), 498 _Option(["-gammacoefficient", "gammacoefficient"], 499 "value for gamma (> 0.001)"), 500 _Option(["-invarfrac", "invarfrac"], 501 "proportoin of invariant sites"), 502 _Option(["-ttratio", "ttratio"], 503 "ts/tv ratio"), 504 _Option(["-freqsfrom", "freqsfrom"], 505 "use emprical base freqs"), 506 _Option(["-basefreq", "basefreq"], 507 "specify basefreqs"), 508 _Option(["-lower", "lower"], 509 "lower triangle matrix (y/N)")] 510 _EmbossCommandLine.__init__(self, cmd, **kwargs)
511 512
513 -class FTreeDistCommandline(_EmbossCommandLine):
514 """Commandline object for the ftreedist program from EMBOSS. 515 516 ftreedist is an EMBOSS wrapper for the PHYLIP program treedist used for 517 calulating distance measures between phylogentic trees. 518 """
519 - def __init__(self, cmd = "ftreedist", **kwargs):
520 self.parameters = [ 521 _Option(["-intreefile", "intreefile"], 522 "tree file to score (phylip)", 523 filename=True, 524 is_required=True), 525 _Option(["-dtype", "dtype"], 526 "distance type ([S]ymetric, [b]ranch score)"), 527 _Option(["-pairing", "pairing"], 528 "tree pairing method ([A]djacent pairs, all [p]ossible pairs)"), 529 _Option(["-style", "style"], 530 "output style - [V]erbose, [f]ill, [s]parse"), 531 _Option(["-noroot", "noroot"], 532 "treat trees as rooted [N/y]"), 533 _Option(["-outgrno", "outgrno"], 534 "which taxon to root the trees with (starts from 0)")] 535 _EmbossCommandLine.__init__(self, cmd, **kwargs)
536 537
538 -class FNeighborCommandline(_EmbossCommandLine):
539 """Commandline object for the fneighbor program from EMBOSS. 540 541 fneighbor is an EMBOSS wrapper for the PHYLIP program neighbor used for 542 calulating neighbor-joining or UPGMA trees from distance matrices. 543 """
544 - def __init__(self, cmd = "fneighbor", **kwargs):
545 self.parameters = [ 546 _Option(["-datafile", "datafile"], 547 "dist file to use (phylip)", 548 filename=True, 549 is_required=True), 550 _Option(["-matrixtype", "matrixtype"], 551 "is martrix [S]quare pr [u]pper or [l]ower"), 552 _Option(["-treetype", "treetype"], 553 "nj or UPGMA tree (n/u)"), 554 _Option(["-outgrno","outgrno" ], 555 "taxon to use as OG"), 556 _Option(["-jumble", "jumble"], 557 "randommise input order (Y/n)"), 558 _Option(["-seed", "seed"], 559 "provide a random seed"), 560 _Option(["-trout", "trout"], 561 "write tree (Y/n)"), 562 _Option(["-outtreefile", "outtreefile"], 563 "filename for output tree"), 564 _Option(["-progress", "progress"], 565 "print progress (Y/n)"), 566 _Option(["-treeprint", "treeprint"], 567 "print tree (Y/n)")] 568 _EmbossCommandLine.__init__(self, cmd, **kwargs)
569 570
571 -class FSeqBootCommandline(_EmbossCommandLine):
572 """Commandline object for the fseqboot program from EMBOSS. 573 574 fseqboot is an EMBOSS wrapper for the PHYLIP program seqboot used to 575 pseudo-sample alignment files. 576 """
577 - def __init__(self, cmd = "fseqboot", **kwargs):
578 self.parameters = [ 579 _Option(["-sequence", "sequence"], 580 "seq file to sample (phylip)", 581 filename=True, 582 is_required=True), 583 _Option(["-categories", "catergories"], 584 "file of input categories"), 585 _Option(["-weights", "weights"], 586 " weights file"), 587 _Option(["-test", "test"], 588 "specify operation, default is bootstrap"), 589 _Option(["-regular", "regular"], 590 "absolute number to resample"), 591 _Option(["-fracsample", "fracsample"], 592 "fraction to resample"), 593 _Option(["-rewriteformat", "rewriteformat"], 594 "output format ([P]hyilp, [n]exus, [x]ml"), 595 _Option(["-seqtype", "seqtype"], 596 "output format ([D]na, [p]rotein, [r]na"), 597 _Option(["-blocksize", "blocksize"], 598 "print progress (Y/n)"), 599 _Option(["-reps", "reps"], 600 "how many replicates, defaults to 100)"), 601 _Option(["-justweights", "jusweights"], 602 "what to write out [D]atasets of just [w]eights"), 603 _Option(["-seed", "seed"], 604 "specify random seed"), 605 _Option(["-dotdiff", "dotdiff"], 606 "Use dot-differencing? [Y/n]"),] 607 _EmbossCommandLine.__init__(self, cmd, **kwargs)
608 609
610 -class FDNAParsCommandline(_EmbossCommandLine):
611 """Commandline object for the fdnapars program from EMBOSS. 612 613 fdnapars is an EMBOSS version of the PHYLIP program dnapars, for 614 estimating trees from DNA sequences using parsiomny. Calling this command 615 without providing a value for the option "-intreefile" will invoke 616 "interactive mode" (and as a result fail if called with subprocess) if 617 "-auto" is not set to true. 618 """
619 - def __init__(self, cmd = "fdnapars", **kwargs):
620 self.parameters = [ 621 _Option(["-sequence", "sequence"], 622 "seq file to use (phylip)", 623 filename=True, 624 is_required=True), 625 _Option(["-intreefile", "intreefile"], 626 "Phylip tree file"), 627 _Option(["-weights", "weights"], 628 "weights file"), 629 _Option(["-maxtrees", "maxtrees"], 630 "max trees to save during run"), 631 _Option(["-thorough", "thorough"], 632 "more thorough search (Y/n)"), 633 _Option(["-rearrange", "rearrange"], 634 "Rearrange on jsut 1 best tree (Y/n)"), 635 _Option(["-transversion", "transversion"], 636 "Use tranversion parsimony (y/N)"), 637 _Option(["-njumble", "njumble"], 638 "number of times to randomise input order (default is 0)"), 639 _Option(["-seed", "seed"], 640 "provde random seed"), 641 _Option(["-outgrno", "outgrno"], 642 "Specify outgroup"), 643 _Option(["-thresh", "thresh"], 644 "Use threshold parsimony (y/N)"), 645 _Option(["-threshold", "threshold"], 646 "Threshold value"), 647 _Option(["-trout", "trout"], 648 "Write trees to file (Y/n)"), 649 _Option(["-outtreefile", "outtreefile"], 650 "filename for output tree"), 651 _Option(["-dotdiff", "dotdiff"], 652 "Use dot-differencing? [Y/n]")] 653 _EmbossCommandLine.__init__(self, cmd, **kwargs)
654 655
656 -class FProtParsCommandline(_EmbossCommandLine):
657 """Commandline object for the fdnapars program from EMBOSS. 658 659 fprotpars is an EMBOSS version of the PHYLIP program protpars, for 660 estimating trees from protein sequences using parsiomny. Calling this 661 command without providing a value for the option "-intreefile" will invoke 662 "interactive mode" (and as a result fail if called with subprocess) if 663 "-auto" is not set to true. 664 """
665 - def __init__(self, cmd = "fprotpars", **kwargs):
666 self.parameters = [ 667 _Option(["-sequence", "sequence"], 668 "seq file to use (phylip)", 669 filename=True, 670 is_required=True), 671 _Option(["-intreefile", "intreefile"], 672 "Phylip tree file to score"), 673 _Option(["-outtreefile", "outtreefile"], 674 "phylip tree output file", 675 filename=True, 676 is_required=True), 677 _Option(["-weights", "weights"], 678 "weights file"), 679 _Option(["-whichcode", "whichcode"], 680 "which genetic code, [U,M,V,F,Y]]"), 681 _Option(["-njumble", "njumble"], 682 "number of times to randomise input order (default is 0)"), 683 _Option(["-seed", "seed"], 684 "provde random seed"), 685 _Option(["-outgrno", "outgrno"], 686 "Specify outgroup"), 687 _Option(["-thresh", "thresh"], 688 "Use threshold parsimony (y/N)"), 689 _Option(["-threshold", "threshold"], 690 "Threshold value"), 691 _Option(["-trout", "trout"], 692 "Write trees to file (Y/n)"), 693 _Option(["-dotdiff", "dotdiff"], 694 "Use dot-differencing? [Y/n]")] 695 _EmbossCommandLine.__init__(self, cmd, **kwargs)
696 697
698 -class FProtDistCommandline(_EmbossCommandLine):
699 """Commandline object for the fprotdist program from EMBOSS. 700 701 fprotdist is an EMBOSS wrapper for the PHYLIP program protdist used to 702 estimate trees from protein sequences using parsimony 703 """
704 - def __init__(self, cmd = "fprotdist", **kwargs):
705 self.parameters = [ 706 _Option(["-sequence", "sequence"], 707 "seq file to use (phylip)", 708 filename=True, 709 is_required=True), 710 _Option(["-ncategories", "ncategories"], 711 "number of rate catergories (1-9)"), 712 _Option(["-rate", "rate"], 713 "rate for each category"), 714 _Option(["-catergories","catergories"], 715 "file of rates"), 716 _Option(["-weights", "weights"], 717 "weights file"), 718 _Option(["-method", "method"], 719 "sub. model [j,h,d,k,s,c]"), 720 _Option(["-gamma", "gamma"], 721 "gamma [g, i,c]"), 722 _Option(["-gammacoefficient", "gammacoefficient"], 723 "value for gamma (> 0.001)"), 724 _Option(["-invarcoefficient", "invarcoefficient"], 725 "float for variation of substitution rate among sites"), 726 _Option(["-aacateg", "aacateg"], 727 "Choose the category to use [G,C,H]"), 728 _Option(["-whichcode", "whichcode"], 729 "genetic code [c,m,v,f,y]"), 730 _Option(["-ease", "ease"], 731 "Pob change catergory (float between -0 and 1)"), 732 _Option(["-ttratio", "ttratio"], 733 "Transition/transversion ratio (0-1)"), 734 _Option(["-basefreq", "basefreq"], 735 "DNA base frequencies (space seperated list)")] 736 _EmbossCommandLine.__init__(self, cmd, **kwargs)
737 738
739 -class FConsenseCommandline(_EmbossCommandLine):
740 """Commandline object for the fconsense program from EMBOSS. 741 742 fconsense is an EMBOSS wrapper for the PHYLIP program consense used to 743 calculate consensus trees. 744 """
745 - def __init__(self, cmd = "fconsense", **kwargs):
746 self.parameters = [ 747 _Option(["-intreefile", "intreefile"], 748 "file with phylip trees to make consensus from", 749 filename=True, 750 is_required=True), 751 _Option(["-method", "method"], 752 "consensus method [s, mr, MRE, ml]"), 753 _Option(["-mlfrac", "mlfrac"], 754 "cut-off freq for a branch to appear in consensus (0.5-1.0)"), 755 _Option(["-root", "root"], 756 "treat trees as rooted (YES, no)"), 757 _Option(["-outgrno", "outgrno"], 758 "OTU to use as outgroup (starts from 0)"), 759 _Option(["-trout", "trout"], 760 "treat trees as rooted (YES, no)"), 761 _Option(["-outtreefile", "outtreefile"], 762 "Phylip tree output file (optional)")] 763 _EmbossCommandLine.__init__(self, cmd, **kwargs)
764 765
766 -class WaterCommandline(_EmbossCommandLine):
767 """Commandline object for the water program from EMBOSS. 768 """
769 - def __init__(self, cmd="water", **kwargs):
770 self.parameters = [ 771 _Option(["-asequence","asequence"], 772 "First sequence to align", 773 filename=True, 774 is_required=True), 775 _Option(["-bsequence","bsequence"], 776 "Second sequence to align", 777 filename=True, 778 is_required=True), 779 _Option(["-gapopen","gapopen"], 780 "Gap open penalty", 781 is_required=True), 782 _Option(["-gapextend","gapextend"], 783 "Gap extension penalty", 784 is_required=True), 785 _Option(["-datafile","datafile"], 786 "Matrix file", 787 filename=True), 788 _Option(["-similarity","similarity"], 789 "Display percent identity and similarity"), 790 _Option(["-snucleotide","snucleotide"], 791 "Sequences are nucleotide (boolean)"), 792 _Option(["-sprotein","sprotein"], 793 "Sequences are protein (boolean)"), 794 _Option(["-aformat","aformat"], 795 "Display output in a different specified output format")] 796 _EmbossCommandLine.__init__(self, cmd, **kwargs)
797 798
799 -class NeedleCommandline(_EmbossCommandLine):
800 """Commandline object for the needle program from EMBOSS. 801 """
802 - def __init__(self, cmd="needle", **kwargs):
803 self.parameters = [ 804 _Option(["-asequence","asequence"], 805 "First sequence to align", 806 filename=True, 807 is_required=True), 808 _Option(["-bsequence","bsequence"], 809 "Second sequence to align", 810 filename=True, 811 is_required=True), 812 _Option(["-gapopen","gapopen"], 813 "Gap open penalty", 814 is_required=True), 815 _Option(["-gapextend","gapextend"], 816 "Gap extension penalty", 817 is_required=True), 818 _Option(["-datafile","datafile"], 819 "Matrix file", 820 filename=True), 821 _Option(["-similarity","similarity"], 822 "Display percent identity and similarity"), 823 _Option(["-snucleotide","snucleotide"], 824 "Sequences are nucleotide (boolean)"), 825 _Option(["-sprotein","sprotein"], 826 "Sequences are protein (boolean)"), 827 _Option(["-aformat","aformat"], 828 "Display output in a different specified output format")] 829 _EmbossCommandLine.__init__(self, cmd, **kwargs)
830 831
832 -class FuzznucCommandline(_EmbossCommandLine):
833 """Commandline object for the fuzznuc program from EMBOSS. 834 """
835 - def __init__(self, cmd="fuzznuc", **kwargs):
836 self.parameters = [ 837 _Option(["-sequence","sequence"], 838 "Sequence database USA", 839 is_required=True), 840 _Option(["-pattern","pattern"], 841 "Search pattern, using standard IUPAC one-letter codes", 842 is_required=True), 843 _Option(["-mismatch","mismatch"], 844 "Number of mismatches", 845 is_required=True), 846 _Option(["-complement","complement"], 847 "Search complementary strand"), 848 _Option(["-rformat","rformat"], 849 "Specify the report format to output in.")] 850 _EmbossCommandLine.__init__(self, cmd, **kwargs)
851 852
853 -class Est2GenomeCommandline(_EmbossCommandLine):
854 """Commandline object for the est2genome program from EMBOSS. 855 """
856 - def __init__(self, cmd="est2genome", **kwargs):
857 self.parameters = [ 858 _Option(["-est","est"], 859 "EST sequence(s)", 860 is_required=True), 861 _Option(["-genome","genome"], 862 "Genomic sequence", 863 is_required=True), 864 _Option(["-match","match"], 865 "Score for matching two bases"), 866 _Option(["-mismatch","mismatch"], 867 "Cost for mismatching two bases"), 868 _Option(["-gappenalty","gappenalty"], 869 "Cost for deleting a single base in either sequence, " 870 "excluding introns"), 871 _Option(["-intronpenalty","intronpenalty"], 872 "Cost for an intron, independent of length."), 873 _Option(["-splicepenalty","splicepenalty"], 874 "Cost for an intron, independent of length " 875 "and starting/ending on donor-acceptor sites"), 876 _Option(["-minscore","minscore"], 877 "Exclude alignments with scores below this threshold score."), 878 _Option(["-reverse","reverse"], 879 "Reverse the orientation of the EST sequence"), 880 _Option(["-splice","splice"], 881 "Use donor and acceptor splice sites."), 882 _Option(["-mode","mode"], 883 "This determines the comparion mode. 'both', 'forward' " 884 "'reverse'"), 885 _Option(["-best","best"], 886 "You can print out all comparisons instead of just the best"), 887 _Option(["-space","space"], 888 "for linear-space recursion."), 889 _Option(["-shuffle","shuffle"], 890 "Shuffle"), 891 _Option(["-seed","seed"], 892 "Random number seed"), 893 _Option(["-align","align"], 894 "Show the alignment."), 895 _Option(["-width","width"], 896 "Alignment width") 897 ] 898 _EmbossCommandLine.__init__(self, cmd, **kwargs)
899 900
901 -class ETandemCommandline(_EmbossCommandLine):
902 """Commandline object for the etandem program from EMBOSS. 903 """
904 - def __init__(self, cmd="etandem", **kwargs):
905 self.parameters = [ 906 _Option(["-sequence","sequence"], 907 "Sequence", 908 filename=True, 909 is_required=True), 910 _Option(["-minrepeat","minrepeat"], 911 "Minimum repeat size", 912 is_required=True), 913 _Option(["-maxrepeat","maxrepeat"], 914 "Maximum repeat size", 915 is_required=True), 916 _Option(["-threshold","threshold"], 917 "Threshold score"), 918 _Option(["-mismatch","mismatch"], 919 "Allow N as a mismatch"), 920 _Option(["-uniform","uniform"], 921 "Allow uniform consensus"), 922 _Option(["-rformat","rformat"], 923 "Output report format")] 924 _EmbossCommandLine.__init__(self, cmd, **kwargs)
925 926
927 -class EInvertedCommandline(_EmbossCommandLine):
928 """Commandline object for the einverted program from EMBOSS. 929 """
930 - def __init__(self, cmd="einverted", **kwargs):
931 self.parameters = [ 932 _Option(["-sequence","sequence"], 933 "Sequence", 934 filename=True, 935 is_required=True), 936 _Option(["-gap","gap"], 937 "Gap penalty", 938 filename=True, 939 is_required=True), 940 _Option(["-threshold","threshold"], 941 "Minimum score threshold", 942 is_required=True), 943 _Option(["-match","match"], 944 "Match score", 945 is_required=True), 946 _Option(["-mismatch","mismatch"], 947 "Mismatch score", 948 is_required=True), 949 _Option(["-maxrepeat","maxrepeat"], 950 "Maximum separation between the start and end of repeat"), 951 ] 952 _EmbossCommandLine.__init__(self, cmd, **kwargs)
953 954
955 -class PalindromeCommandline(_EmbossCommandLine):
956 """Commandline object for the palindrome program from EMBOSS. 957 """
958 - def __init__(self, cmd="palindrome", **kwargs):
959 self.parameters = [ 960 _Option(["-sequence","sequence"], 961 "Sequence", 962 filename=True, 963 is_required=True), 964 _Option(["-minpallen","minpallen"], 965 "Minimum palindrome length", 966 is_required=True), 967 _Option(["-maxpallen","maxpallen"], 968 "Maximum palindrome length", 969 is_required=True), 970 _Option(["-gaplimit","gaplimit"], 971 "Maximum gap between repeats", 972 is_required=True), 973 _Option(["-nummismatches","nummismatches"], 974 "Number of mismatches allowed", 975 is_required=True), 976 _Option(["-overlap","overlap"], 977 "Report overlapping matches", 978 is_required=True), 979 ] 980 _EmbossCommandLine.__init__(self, cmd, **kwargs)
981 982
983 -class TranalignCommandline(_EmbossCommandLine):
984 """Commandline object for the tranalign program from EMBOSS. 985 """
986 - def __init__(self, cmd="tranalign", **kwargs):
987 self.parameters = [ 988 _Option(["-asequence","asequence"], 989 "Nucleotide sequences to be aligned.", 990 filename=True, 991 is_required=True), 992 _Option(["-bsequence","bsequence"], 993 "Protein sequence alignment", 994 filename=True, 995 is_required=True), 996 _Option(["-outseq","outseq"], 997 "Output sequence file.", 998 filename=True, 999 is_required=True), 1000 _Option(["-table","table"], 1001 "Code to use")] 1002 _EmbossCommandLine.__init__(self, cmd, **kwargs)
1003 1004
1005 -class DiffseqCommandline(_EmbossCommandLine):
1006 """Commandline object for the diffseq program from EMBOSS. 1007 """
1008 - def __init__(self, cmd="diffseq", **kwargs):
1009 self.parameters = [ 1010 _Option(["-asequence","asequence"], 1011 "First sequence to compare", 1012 filename=True, 1013 is_required=True), 1014 _Option(["-bsequence","bsequence"], 1015 "Second sequence to compare", 1016 filename=True, 1017 is_required=True), 1018 _Option(["-wordsize","wordsize"], 1019 "Word size to use for comparisons (10 default)", 1020 is_required=True), 1021 _Option(["-aoutfeat","aoutfeat"], 1022 "File for output of first sequence's features", 1023 filename=True, 1024 is_required=True), 1025 _Option(["-boutfeat","boutfeat"], 1026 "File for output of second sequence's features", 1027 filename=True, 1028 is_required=True), 1029 _Option(["-rformat","rformat"], 1030 "Output report file format") 1031 ] 1032 _EmbossCommandLine.__init__(self, cmd, **kwargs)
1033 1034
1035 -class IepCommandline(_EmbossCommandLine):
1036 """Commandline for EMBOSS iep: calculated isoelectric point and charge. 1037 1038 Example: 1039 1040 >>> from Bio.Emboss.Applications import IepCommandline 1041 >>> iep_cline = IepCommandline(sequence="proteins.faa", 1042 ... outfile="proteins.txt") 1043 >>> print iep_cline 1044 iep -outfile=proteins.txt -sequence=proteins.faa 1045 1046 You would typically run the command line with iep_cline() or via the 1047 Python subprocess module, as described in the Biopython tutorial. 1048 """
1049 - def __init__(self, cmd="iep", **kwargs):
1050 self.parameters = [ 1051 _Option(["-sequence","sequence"], 1052 "Protein sequence(s) filename", 1053 filename=True, 1054 is_required=True), 1055 _Option(["-amino","amino"], 1056 """Number of N-termini 1057 1058 Integer 0 (default) or more. 1059 """), 1060 _Option(["-carboxyl","carboxyl"], 1061 """Number of C-termini 1062 1063 Integer 0 (default) or more. 1064 """), 1065 _Option(["-lysinemodified","lysinemodified"], 1066 """Number of modified lysines 1067 1068 Integer 0 (default) or more. 1069 """), 1070 _Option(["-disulphides","disulphides"], 1071 """Number of disulphide bridges 1072 1073 Integer 0 (default) or more. 1074 """), 1075 #Should we implement the -termini switch as well? 1076 _Option(["-notermini","notermini"], 1077 "Exclude (True) or include (False) charge at N and C terminus."), 1078 ] 1079 _EmbossCommandLine.__init__(self, cmd, **kwargs)
1080 1081 1082 #seqret uses -outseq, not -outfile, so use the base class:
1083 -class SeqretCommandline(_EmbossMinimalCommandLine):
1084 """Commandline object for the seqret program from EMBOSS. 1085 1086 This tool allows you to interconvert between different sequence file 1087 formats (e.g. GenBank to FASTA). Combining Biopython's Bio.SeqIO module 1088 with seqret using a suitable intermediate file format can allow you to 1089 read/write to an even wider range of file formats. 1090 1091 This wrapper currently only supports the core functionality, things like 1092 feature tables (in EMBOSS 6.1.0 onwards) are not yet included. 1093 """
1094 - def __init__(self, cmd="seqret", **kwargs):
1095 self.parameters = [ 1096 _Option(["-sequence","sequence"], 1097 "Input sequence(s) filename", 1098 filename=True), 1099 _Option(["-outseq","outseq"], 1100 "Output sequence file.", 1101 filename=True), 1102 _Option(["-sformat","sformat"], 1103 "Input sequence(s) format (e.g. fasta, genbank)"), 1104 _Option(["-osformat","osformat"], 1105 "Output sequence(s) format (e.g. fasta, genbank)"), 1106 ] 1107 _EmbossMinimalCommandLine.__init__(self, cmd, **kwargs)
1108
1109 - def _validate(self):
1110 #Check the outfile, filter, or stdout option has been set. 1111 #We can't simply do this via the required flag for the outfile 1112 #output - this seems the simplest solution. 1113 if not (self.outseq or self.filter or self.stdout): 1114 raise ValueError("You must either set outfile (output filename), " 1115 "or enable filter or stdout (output to stdout).") 1116 if not (self.sequence or self.filter or self.stdint): 1117 raise ValueError("You must either set sequence (input filename), " 1118 "or enable filter or stdin (input from stdin).") 1119 return _EmbossMinimalCommandLine._validate(self)
1120
1121 -class SeqmatchallCommandline(_EmbossCommandLine):
1122 """ Commandline object for the seqmatchall program from EMBOSS 1123 1124 e.g. 1125 >>> cline = SeqmatchallCommandline(sequence="opuntia.fasta", outfile="opuntia.txt") 1126 >>> cline.auto = True 1127 >>> cline.wordsize = 18 1128 >>> cline.aformat = "pair" 1129 >>> print cline 1130 seqmatchall -auto -outfile=opuntia.txt -sequence=opuntia.fasta -wordsize=18 -aformat=pair 1131 1132 """
1133 - def __init__(self, cmd="seqmatchall", **kwargs):
1134 self.parameters = [ 1135 _Option(["-sequence", "sequence"], 1136 "Readable set of sequences", 1137 filename=True, 1138 is_required=True), 1139 _Option(["-wordsize", "wordsize"], 1140 "Word size (Integer 2 or more, default 4)"), 1141 _Option(["-aformat","aformat"], 1142 "Display output in a different specified output format"), 1143 ] 1144 _EmbossCommandLine.__init__(self, cmd, **kwargs)
1145
1146 -def _test():
1147 """Run the Bio.Emboss.Applications module doctests.""" 1148 import doctest 1149 doctest.testmod()
1150 1151 if __name__ == "__main__": 1152 #Run the doctests 1153 _test() 1154