Package Bio :: Package Blast :: Module Applications
[hide private]
[frames] | no frames]

Source Code for Module Bio.Blast.Applications

   1  # Copyright 2001 Brad Chapman. 
   2  # Revisions copyright 2009-2010 by Peter Cock. 
   3  # Revisions copyright 2010 by Phillip Garland. 
   4  # All rights reserved. 
   5  # This code is part of the Biopython distribution and governed by its 
   6  # license.  Please see the LICENSE file that should have been included 
   7  # as part of this package. 
   8  """Definitions for interacting with BLAST related applications. 
   9   
  10  Obsolete wrappers for the old/classic NCBI BLAST tools (written in C): 
  11   
  12  - FastacmdCommandline 
  13  - BlastallCommandline 
  14  - BlastpgpCommandline 
  15  - RpsBlastCommandline 
  16   
  17  Wrappers for the new NCBI BLAST+ tools (written in C++): 
  18   
  19  - NcbiblastpCommandline - Protein-Protein BLAST 
  20  - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST 
  21  - NcbiblastxCommandline - Translated Query-Protein Subject BLAST 
  22  - NcbitblastnCommandline - Protein Query-Translated Subject BLAST 
  23  - NcbitblastxCommandline - Translated Query-Protein Subject BLAST 
  24  - NcbipsiblastCommandline - Position-Specific Initiated BLAST 
  25  - NcbirpsblastCommandline - Reverse Position Specific BLAST 
  26  - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST 
  27  - NcbiblastformatterCommandline - Convert ASN.1 to other BLAST output formats 
  28   
  29  For further details, see: 
  30   
  31  Camacho et al. BLAST+: architecture and applications 
  32  BMC Bioinformatics 2009, 10:421 
  33  doi:10.1186/1471-2105-10-421 
  34  """ 
  35  from Bio.Application import _Option, AbstractCommandline, _Switch 
  36   
37 -class FastacmdCommandline(AbstractCommandline):
38 """Create a commandline for the fasta program from NCBI (OBSOLETE). 39 40 """
41 - def __init__(self, cmd="fastacmd", **kwargs):
42 self.parameters = [ 43 _Option(["-d", "database"], 44 "The database to retrieve from.", 45 is_required=True, 46 equate=False), 47 _Option(["-s", "search_string"], 48 "The id to search for.", 49 is_required=True, 50 equate=False) 51 ] 52 AbstractCommandline.__init__(self, cmd, **kwargs)
53 54
55 -class _BlastCommandLine(AbstractCommandline):
56 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE). 57 58 This is provided for subclassing, it deals with shared options 59 common to all the BLAST tools (blastall, rpsblast, blastpgp). 60 """
61 - def __init__(self, cmd=None, **kwargs):
62 assert cmd is not None 63 extra_parameters = [ 64 _Switch(["--help", "help"], 65 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."), 66 _Option(["-d", "database"], 67 "The database to BLAST against.", 68 is_required=True, 69 equate=False), 70 _Option(["-i", "infile"], 71 "The sequence to search with.", 72 filename=True, 73 is_required=True, 74 equate=False), 75 _Option(["-e", "expectation"], 76 "Expectation value cutoff.", 77 equate=False), 78 _Option(["-m", "align_view"], 79 "Alignment view. Integer 0-11. Use 7 for XML output.", 80 equate=False), 81 _Option(["-o", "align_outfile", "outfile"], 82 "Output file for alignment.", 83 filename=True, 84 equate=False), 85 _Option(["-y", "xdrop_extension"], 86 "Dropoff for blast extensions.", 87 equate=False), 88 _Option(["-F", "filter"], 89 "Filter query sequence with SEG? T/F", 90 equate=False), 91 _Option(["-X", "xdrop"], 92 "Dropoff value (bits) for gapped alignments.", 93 equate=False), 94 _Option(["-I", "show_gi"], 95 "Show GI's in deflines? T/F", 96 equate=False), 97 _Option(["-J", "believe_query"], 98 "Believe the query defline? T/F", 99 equate=False), 100 _Option(["-Z", "xdrop_final"], 101 "X dropoff for final gapped alignment.", 102 equate=False), 103 _Option(["-z", "db_length"], 104 "Effective database length.", 105 equate=False), 106 _Option(["-O", "seqalign_file"], 107 "seqalign file to output.", 108 filename=True, 109 equate=False), 110 _Option(["-v", "descriptions"], 111 "Number of one-line descriptions.", 112 equate=False), 113 _Option(["-b", "alignments"], 114 "Number of alignments.", 115 equate=False), 116 _Option(["-Y", "search_length"], 117 "Effective length of search space (use zero for the " 118 "real size).", 119 equate=False), 120 _Option(["-T", "html"], 121 "Produce HTML output? T/F", 122 equate=False), 123 _Option(["-U", "case_filter"], 124 "Use lower case filtering of FASTA sequence? T/F", 125 equate=False), 126 _Option(["-a", "nprocessors"], 127 "Number of processors to use.", 128 equate=False), 129 _Option(["-g", "gapped"], 130 "Whether to do a gapped alignment. T/F", 131 equate=False), 132 ] 133 try: 134 #Insert extra parameters - at the start just in case there 135 #are any arguments which must come last: 136 self.parameters = extra_parameters + self.parameters 137 except AttributeError: 138 #Should we raise an error? The subclass should have set this up! 139 self.parameters = extra_parameters 140 AbstractCommandline.__init__(self, cmd, **kwargs)
141
142 - def _validate(self):
143 if self.help: 144 #Don't want to check the normally mandatory arguments like db 145 return 146 AbstractCommandline._validate(self)
147 148
149 -class _BlastAllOrPgpCommandLine(_BlastCommandLine):
150 """Base Commandline object for NCBI BLAST wrappers (PRIVATE). 151 152 This is provided for subclassing, it deals with shared options 153 common to all the blastall and blastpgp tools (but not rpsblast). 154 """
155 - def __init__(self, cmd=None, **kwargs):
156 assert cmd is not None 157 extra_parameters = [ 158 _Option(["-G", "gap_open"], 159 "Gap open penalty", 160 equate=False), 161 _Option(["-E", "gap_extend"], 162 "Gap extension penalty", 163 equate=False), 164 _Option(["-A", "window_size"], 165 "Multiple hits window size", 166 equate=False), 167 _Option(["-f", "hit_extend"], 168 "Threshold for extending hits.", 169 equate=False), 170 _Option(["-K", "keep_hits"], 171 " Number of best hits from a region to keep.", 172 equate=False), 173 _Option(["-W", "wordsize"], 174 "Word size", 175 equate=False), 176 _Option(["-P", "passes"], 177 "Hits/passes. Integer 0-2. 0 for multiple hit, " 178 "1 for single hit (does not apply to blastn)", 179 equate=False), 180 ] 181 try: 182 #Insert extra parameters - at the start just in case there 183 #are any arguments which must come last: 184 self.parameters = extra_parameters + self.parameters 185 except AttributeError: 186 #Should we raise an error? The subclass should have set this up! 187 self.parameters = extra_parameters 188 _BlastCommandLine.__init__(self, cmd, **kwargs)
189 190
191 -class BlastallCommandline(_BlastAllOrPgpCommandLine):
192 """Create a commandline for the blastall program from NCBI (OBSOLETE). 193 194 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 195 are replacing blastall with separate tools blastn, blastp, blastx, tblastn 196 and tblastx. 197 198 Like blastall, this wrapper is now obsolete, and will be deprecated and 199 removed in a future release of Biopython. 200 201 >>> from Bio.Blast.Applications import BlastallCommandline 202 >>> cline = BlastallCommandline(program="blastx", infile="m_cold.fasta", 203 ... database="nr", expectation=0.001) 204 >>> cline 205 BlastallCommandline(cmd='blastall', database='nr', infile='m_cold.fasta', expectation=0.001, program='blastx') 206 >>> print cline 207 blastall -d nr -i m_cold.fasta -e 0.001 -p blastx 208 209 You would typically run the command line with cline() or via the Python 210 subprocess module, as described in the Biopython tutorial. 211 """ 212 #TODO - This could use more checking for valid parameters to the program.
213 - def __init__(self, cmd="blastall",**kwargs):
214 import warnings 215 warnings.warn("Like blastall, this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning) 216 self.parameters = [ 217 #Sorted in the same order as the output from blastall --help 218 #which should make it easier to keep them up to date in future. 219 #Note that some arguments are defined the the base clases (above). 220 _Option(["-p", "program"], 221 "The blast program to use (e.g. blastp, blastn).", 222 is_required=True, 223 equate=False), 224 _Option(["-q", "nuc_mismatch"], 225 "Penalty for a nucleotide mismatch (blastn only).", 226 equate=False), 227 _Option(["-r", "nuc_match"], 228 "Reward for a nucleotide match (blastn only).", 229 equate=False), 230 _Option(["-Q", "query_genetic_code"], 231 "Query Genetic code to use.", 232 equate=False), 233 _Option(["-D", "db_genetic_code"], 234 "DB Genetic code (for tblast[nx] only).", 235 equate=False), 236 _Option(["-M", "matrix"], 237 "Matrix to use", 238 equate=False), 239 _Option(["-S", "strands"], 240 "Query strands to search against database (for blast[nx], " 241 "and tblastx). 3 is both, 1 is top, 2 is bottom.", 242 equate=False), 243 _Option(["-l", "restrict_gi"], 244 "Restrict search of database to list of GI's.", 245 equate=False), 246 _Option(["-R", "checkpoint"], 247 "PSI-TBLASTN checkpoint input file.", 248 filename=True, 249 equate=False), 250 _Option(["-n", "megablast"], 251 "MegaBlast search T/F.", 252 equate=False), 253 #The old name "region_length" is for consistency with our 254 #old blastall function wrapper: 255 _Option(["-L", "region_length", "range_restriction"], 256 """Location on query sequence (string format start,end). 257 258 In older versions of BLAST, -L set the length of region 259 used to judge hits (see -K parameter).""", 260 equate=False), 261 _Option(["-w", "frame_shit_penalty"], 262 "Frame shift penalty (OOF algorithm for blastx).", 263 equate=False), 264 _Option(["-t", "largest_intron"], 265 "Length of the largest intron allowed in a translated " 266 "nucleotide sequence when linking multiple distinct " 267 "alignments. (0 invokes default behavior; a negative value " 268 "disables linking.)", 269 equate=False), 270 _Option(["-B", "num_concatenated_queries"], 271 "Number of concatenated queries, for blastn and tblastn.", 272 equate=False), 273 _Option(["-V", "oldengine"], 274 "Force use of the legacy BLAST engine.", 275 equate=False), 276 _Option(["-C", "composition_based"], 277 """Use composition-based statistics for tblastn: 278 D or d: default (equivalent to F) 279 0 or F or f: no composition-based statistics 280 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001 281 2: Composition-based score adjustment as in Bioinformatics 282 21:902-911, 2005, conditioned on sequence properties 283 3: Composition-based score adjustment as in Bioinformatics 284 21:902-911, 2005, unconditionally 285 For programs other than tblastn, must either be absent or be 286 D, F or 0.""", 287 equate=False), 288 _Option(["-s", "smith_waterman"], 289 "Compute locally optimal Smith-Waterman alignments (This " 290 "option is only available for gapped tblastn.) T/F", 291 equate=False), 292 ] 293 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
294 295
296 -class BlastpgpCommandline(_BlastAllOrPgpCommandLine):
297 """Create a commandline for the blastpgp program from NCBI (OBSOLETE). 298 299 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 300 are replacing blastpgp with a renamed tool psiblast. This module provides 301 NcbipsiblastCommandline as a wrapper for the new tool psiblast. 302 303 Like blastpgp (and blastall), this wrapper is now obsolete, and will be 304 deprecated and removed in a future release of Biopython. 305 306 >>> from Bio.Blast.Applications import BlastpgpCommandline 307 >>> cline = BlastpgpCommandline(help=True) 308 >>> cline 309 BlastpgpCommandline(cmd='blastpgp', help=True) 310 >>> print cline 311 blastpgp --help 312 313 You would typically run the command line with cline() or via the Python 314 subprocess module, as described in the Biopython tutorial. 315 """
316 - def __init__(self, cmd="blastpgp",**kwargs):
317 import warnings 318 warnings.warn("Like blastpgp (and blastall), this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning) 319 self.parameters = [ 320 _Option(["-C", "checkpoint_outfile"], 321 "Output file for PSI-BLAST checkpointing.", 322 filename=True, 323 equate=False), 324 _Option(["-R", "restart_infile"], 325 "Input file for PSI-BLAST restart.", 326 filename=True, 327 equate=False), 328 _Option(["-k", "hit_infile"], 329 "Hit file for PHI-BLAST.", 330 filename=True, 331 equate=False), 332 _Option(["-Q", "matrix_outfile"], 333 "Output file for PSI-BLAST matrix in ASCII.", 334 filename=True, 335 equate=False), 336 _Option(["-B", "align_infile"], 337 "Input alignment file for PSI-BLAST restart.", 338 filename=True, 339 equate=False), 340 _Option(["-S", "required_start"], 341 "Start of required region in query.", 342 equate=False), 343 _Option(["-H", "required_end"], 344 "End of required region in query.", 345 equate=False), 346 _Option(["-j", "npasses"], 347 "Number of passes", 348 equate=False), 349 _Option(["-N", "nbits_gapping"], 350 "Number of bits to trigger gapping.", 351 equate=False), 352 _Option(["-c", "pseudocounts"], 353 "Pseudocounts constants for multiple passes.", 354 equate=False), 355 _Option(["-h", "model_threshold"], 356 "E-value threshold to include in multipass model.", 357 equate=False), 358 #Does the old name "region_length" for -L make sense? 359 _Option(["-L", "region_length"], 360 "Cost to decline alignment (disabled when zero).", 361 equate=False), 362 _Option(["-M", "matrix"], 363 "Matrix (string, default BLOSUM62).", 364 equate=False), 365 _Option(["-p", "program"], 366 "The blast program to use (e.g blastpgp, patseedp or seedp).", 367 is_required=True, 368 equate=False), 369 ] 370 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
371 372
373 -class RpsBlastCommandline(_BlastCommandLine):
374 """Create a commandline for the classic rpsblast program from NCBI (OBSOLETE). 375 376 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 377 are replacing the old rpsblast with a new version of the same name plus a 378 second tool rpstblastn, both taking different command line arguments. This 379 module provides NcbirpsblastCommandline and NcbirpsblasntCommandline as 380 wrappers for the new tools. 381 382 Like the old rpsblast (and blastall), this wrapper is now obsolete, and will 383 be deprecated and removed in a future release of Biopython. 384 385 >>> from Bio.Blast.Applications import RpsBlastCommandline 386 >>> cline = RpsBlastCommandline(help=True) 387 >>> cline 388 RpsBlastCommandline(cmd='rpsblast', help=True) 389 >>> print cline 390 rpsblast --help 391 392 You would typically run the command line with cline() or via the Python 393 subprocess module, as described in the Biopython tutorial. 394 """
395 - def __init__(self, cmd="rpsblast",**kwargs):
396 import warnings 397 warnings.warn("Like the old rpsblast (and blastall), this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning) 398 self.parameters = [ 399 #Note -N is also in blastpgp, but not blastall 400 _Option(["-N", "nbits_gapping"], 401 "Number of bits to trigger gapping.", 402 equate=False), 403 #Note blastall and blastpgp wrappers have -P with name "passes". 404 #If this is the same thing, we should be consistent! 405 _Option(["-P", "multihit"], 406 "0 for multiple hit, 1 for single hit", 407 equate=False), 408 _Option(["-l", "logfile"], 409 "Logfile name.", 410 filename=True, 411 equate=False), 412 _Option(["-p", "protein"], 413 "Query sequence is protein. T/F", 414 equate=False), 415 _Option(["-L", "range_restriction"], 416 "Location on query sequence (string format start,end).", 417 equate=False), 418 ] 419 _BlastCommandLine.__init__(self, cmd, **kwargs)
420 421 ############################################################################## 422 # Legacy BLAST wrappers above, (new) BLAST+ wrappers below 423 ############################################################################## 424
425 -class _NcbibaseblastCommandline(AbstractCommandline):
426 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 427 428 This is provided for subclassing, it deals with shared options 429 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc 430 AND blast_formatter). 431 """
432 - def __init__(self, cmd=None, **kwargs):
433 assert cmd is not None 434 extra_parameters = [ 435 #Core: 436 _Switch(["-h", "h"], 437 "Print USAGE and DESCRIPTION; ignore other arguments."), 438 _Switch(["-help", "help"], 439 "Print USAGE, DESCRIPTION and ARGUMENTS description; " 440 "ignore other arguments."), 441 _Switch(["-version", "version"], 442 "Print version number; ignore other arguments."), 443 # Output configuration options 444 _Option(["-out", "out"], 445 "Output file for alignment.", 446 filename=True, 447 equate=False), 448 #Formatting options: 449 _Option(["-outfmt", "outfmt"], 450 "Alignment view. Integer 0-11. Use 5 for XML output " 451 "(differs from classic BLAST which used 7 for XML).", 452 equate=False), 453 #TODO - Document and test the column options 454 _Switch(["-show_gis","show_gis"], 455 "Show NCBI GIs in deflines?"), 456 _Option(["-num_descriptions","num_descriptions"], 457 """Number of database sequences to show one-line descriptions for. 458 459 Integer argument (at least zero). Default is 500. 460 See also num_alignments.""", 461 equate=False), 462 _Option(["-num_alignments","num_alignments"], 463 """Number of database sequences to show num_alignments for. 464 465 Integer argument (at least zero). Default is 200. 466 See also num_alignments.""", 467 equate=False), 468 _Switch(["-html", "html"], 469 "Produce HTML output? See also the outfmt option."), 470 #Miscellaneous options 471 _Switch(["-parse_deflines", "parse_deflines"], 472 "Should the query and subject defline(s) be parsed?"), 473 ] 474 try: 475 #Insert extra parameters - at the start just in case there 476 #are any arguments which must come last: 477 self.parameters = extra_parameters + self.parameters 478 except AttributeError: 479 #Should we raise an error? The subclass should have set this up! 480 self.parameters = extra_parameters 481 AbstractCommandline.__init__(self, cmd, **kwargs)
482
483 - def _validate_incompatibilities(self, incompatibles):
484 """Used by the BLAST+ _validate method (PRIVATE).""" 485 for a in incompatibles: 486 if self._get_parameter(a): 487 for b in incompatibles[a]: 488 if self._get_parameter(b): 489 raise ValueError("Options %s and %s are incompatible." \ 490 % (a,b))
491 492
493 -class _NcbiblastCommandline(_NcbibaseblastCommandline):
494 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 495 496 This is provided for subclassing, it deals with shared options 497 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc). 498 """
499 - def __init__(self, cmd=None, **kwargs):
500 assert cmd is not None 501 extra_parameters = [ 502 #Input query options: 503 _Option(["-query", "query"], 504 "The sequence to search with.", 505 filename=True, 506 equate=False), #Should this be required? 507 _Option(["-query_loc", "query_loc"], 508 "Location on the query sequence (Format: start-stop)", 509 equate=False), 510 #General search options: 511 _Option(["-db", "db"], 512 "The database to BLAST against.", 513 equate=False), 514 _Option(["-evalue", "evalue"], 515 "Expectation value cutoff.", 516 equate=False), 517 _Option(["-word_size","word_size"], 518 """Word size for wordfinder algorithm. 519 520 Integer. Minimum 2.""", 521 equate=False), 522 #BLAST-2-Sequences options: 523 # - see subclass 524 #Formatting options: 525 # - see baseclass 526 #Query filtering options 527 # TODO -soft_masking <Boolean>, is this a switch or an option? 528 #_Switch(["-soft_masking", "soft_masking"], 529 # "Apply filtering locations as soft masks?"), 530 _Switch(["-lcase_masking", "lcase_masking"], 531 "Use lower case filtering in query and subject sequence(s)?"), 532 #Restrict search or results 533 _Option(["-gilist", "gilist"], 534 """Restrict search of database to list of GI's. 535 536 Incompatible with: negative_gilist, seqidlist, remote, subject, subject_loc""", 537 filename=True, 538 equate=False), 539 _Option(["-negative_gilist", "negative_gilist"], 540 """Restrict search of database to everything except the listed GIs. 541 542 Incompatible with: gilist, seqidlist, remote, subject, subject_loc""", 543 filename=True, 544 equate=False), 545 _Option(["-seqidlist", "seqidlist"], 546 """Restrict search of database to list of SeqID's. 547 548 Incompatible with: gilist, negative_gilist, remote, subject, subject_loc""", 549 filename=True, 550 equate=False), 551 _Option(["-entrez_query", "entrez_query"], 552 "Restrict search with the given Entrez query (requires remote).", 553 equate=False), 554 _Option(["-max_target_seqs", "max_target_seqs"], 555 """Maximum number of aligned sequences to keep. 556 557 Integer argument (at least one).""", 558 equate=False), 559 #Statistical options 560 _Option(["-dbsize", "dbsize"], 561 "Effective length of the database (integer)", 562 equate=False), 563 _Option(["-searchsp", "searchsp"], 564 "Effective length of the search space (integer)", 565 equate=False), 566 #Extension options 567 _Option(["-xdrop_ungap", "xdrop_ungap"], 568 "X-dropoff value (in bits) for ungapped extensions. Float.", 569 equate=False), 570 _Option(["-xdrop_gap", "xdrop_gap"], 571 "X-dropoff value (in bits) for preliminary gapped extensions. Float.", 572 equate=False), 573 _Option(["-xdrop_gap_final", "xdrop_gap_final"], 574 "X-dropoff value (in bits) for final gapped alignment. Float.", 575 equate=False), 576 _Option(["-window_size", "window_size"], 577 "Multiple hits window size, use 0 to specify 1-hit algorithm. Integer.", 578 equate=False), 579 # Search strategy options 580 _Option(["-import_search_strategy", "import_search_strategy"], 581 """Search strategy to use. 582 583 Incompatible with: export_search_strategy""", 584 filename=True, 585 equate=False), 586 _Option(["-export_search_strategy", "export_search_strategy"], 587 """File name to record the search strategy used. 588 589 Incompatible with: import_search_strategy""", 590 filename=True, 591 equate=False), 592 #Miscellaneous options 593 _Option(["-num_threads", "num_threads"], 594 """Number of threads to use in the BLAST search. 595 596 Integer of at least one. Default is one. 597 Incompatible with: remote""", 598 equate=False), 599 _Switch(["-remote", "remote"], 600 """Execute search remotely? 601 602 Incompatible with: gilist, negative_gilist, subject_loc, num_threads, ..."""), 603 ] 604 try: 605 #Insert extra parameters - at the start just in case there 606 #are any arguments which must come last: 607 self.parameters = extra_parameters + self.parameters 608 except AttributeError: 609 #Should we raise an error? The subclass should have set this up! 610 self.parameters = extra_parameters 611 _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
612
613 - def _validate(self):
614 incompatibles = {"remote":["gilist", "negative_gilist", "num_threads"], 615 "import_search_strategy" : ["export_search_strategy"], 616 "gilist":["negative_gilist"], 617 "seqidlist":["gilist", "negative_gilist", "remote"]} 618 self._validate_incompatibilities(incompatibles) 619 if self.entrez_query and not self.remote : 620 raise ValueError("Option entrez_query requires remote option.") 621 AbstractCommandline._validate(self)
622 623
624 -class _Ncbiblast2SeqCommandline(_NcbiblastCommandline):
625 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 626 627 This is provided for subclassing, it deals with shared options 628 common to all the BLAST tools supporting two-sequence BLAST 629 (blastn, psiblast, etc) but not rpsblast or rpstblastn. 630 """
631 - def __init__(self, cmd=None, **kwargs):
632 assert cmd is not None 633 extra_parameters = [ 634 #General search options: 635 _Option(["-gapopen", "gapopen"], 636 "Cost to open a gap (integer).", 637 equate=False), 638 _Option(["-gapextend", "gapextend"], 639 "Cost to extend a gap (integer).", 640 equate=False), 641 #BLAST-2-Sequences options: 642 _Option(["-subject", "subject"], 643 """Subject sequence(s) to search. 644 645 Incompatible with: db, gilist, negative_gilist. 646 See also subject_loc.""", 647 filename=True, 648 equate=False), 649 _Option(["-subject_loc", "subject_loc"], 650 """Location on the subject sequence (Format: start-stop) 651 652 Incompatible with: db, gilist, seqidlist, negative_gilist, 653 db_soft_mask, db_hard_mask, remote. 654 655 See also subject.""", 656 equate=False), 657 #Restrict search or results: 658 _Option(["-culling_limit", "culling_limit"], 659 """Hit culling limit (integer). 660 661 If the query range of a hit is enveloped by that of at 662 least this many higher-scoring hits, delete the hit. 663 664 Incompatible with: best_hit_overhang, best_hit_score_edge. 665 """, 666 equate=False), 667 _Option(["-best_hit_overhang", "best_hit_overhang"], 668 """Best Hit algorithm overhang value (recommended value: 0.1) 669 670 Float between 0.0 and 0.5 inclusive. 671 672 Incompatible with: culling_limit.""", 673 equate=False), 674 _Option(["-best_hit_score_edge", "best_hit_score_edge"], 675 """Best Hit algorithm score edge value (recommended value: 0.1) 676 677 Float between 0.0 and 0.5 inclusive. 678 679 Incompatible with: culling_limit.""", 680 equate=False), 681 ] 682 try: 683 #Insert extra parameters - at the start just in case there 684 #are any arguments which must come last: 685 self.parameters = extra_parameters + self.parameters 686 except AttributeError: 687 #Should we raise an error? The subclass should have set this up! 688 self.parameters = extra_parameters 689 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
690 691
692 - def _validate(self):
693 incompatibles = {"subject_loc":["db", "gilist", "negative_gilist", "seqidlist", "remote"], 694 "culling_limit":["best_hit_overhang","best_hit_score_edge"], 695 "subject":["db", "gilist", "negative_gilist", "seqidlist"]} 696 self._validate_incompatibilities(incompatibles) 697 _NcbiblastCommandline._validate(self)
698 699
700 -class _NcbiblastMain2SeqCommandline(_Ncbiblast2SeqCommandline):
701 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE). 702 703 This is provided for subclassing, it deals with shared options 704 common to the main BLAST tools blastp, blastn, blastx, tblastx, tblastn 705 but not psiblast, rpsblast or rpstblastn. 706 """
707 - def __init__(self, cmd=None, **kwargs):
708 assert cmd is not None 709 extra_parameters = [ 710 #Restrict search or results: 711 _Option(["-db_soft_mask", "db_soft_mask"], 712 """Filtering algorithm for soft masking (integer). 713 714 Filtering algorithm ID to apply to the BLAST database as soft masking. 715 716 Incompatible with: db_hard_mask, subject, subject_loc""", 717 equate=False), 718 _Option(["-db_hard_mask", "db_hard_mask"], 719 """Filtering algorithm for hard masking (integer). 720 721 Filtering algorithm ID to apply to the BLAST database as hard masking. 722 723 Incompatible with: db_soft_mask, subject, subject_loc""", 724 equate=False), 725 ] 726 try: 727 #Insert extra parameters - at the start just in case there 728 #are any arguments which must come last: 729 self.parameters = extra_parameters + self.parameters 730 except AttributeError: 731 #Should we raise an error? The subclass should have set this up! 732 self.parameters = extra_parameters 733 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
734
735 - def _validate(self):
736 incompatibles = {"db_soft_mask":["db_hard_mask", "subject", "subject_loc"], 737 "db_hard_mask":["db_soft_mask", "subject", "subject_loc"]} 738 self._validate_incompatibilities(incompatibles) 739 _Ncbiblast2SeqCommandline._validate(self)
740
741 -class NcbiblastpCommandline(_NcbiblastMain2SeqCommandline):
742 """Create a commandline for the NCBI BLAST+ program blastp (for proteins). 743 744 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 745 replaced the old blastall tool with separate tools for each of the searches. 746 This wrapper therefore replaces BlastallCommandline with option -p blastp. 747 748 >>> from Bio.Blast.Applications import NcbiblastpCommandline 749 >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr", 750 ... evalue=0.001, remote=True, ungapped=True) 751 >>> cline 752 NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True) 753 >>> print cline 754 blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped 755 756 You would typically run the command line with cline() or via the Python 757 subprocess module, as described in the Biopython tutorial. 758 """
759 - def __init__(self, cmd="blastp", **kwargs):
760 self.parameters = [ 761 #General search options: 762 _Option(["-task", "task"], 763 "Task to execute (string, blastp (default) or blastp-short).", 764 checker_function=lambda value : value in ["blastp", 765 "blastp-short"], 766 equate=False), 767 _Option(["-matrix", "matrix"], 768 "Scoring matrix name (default BLOSUM62)."), 769 _Option(["-threshold", "threshold"], 770 "Minimum word score such that the word is added to the " 771 "BLAST lookup table (float)", 772 equate=False), 773 _Option(["-comp_based_stats", "comp_based_stats"], 774 """Use composition-based statistics (string, default 2, i.e. True). 775 776 0, F or f: no composition-based statistics 777 2, T or t, D or d : Composition-based score adjustment as in 778 Bioinformatics 21:902-911, 2005, conditioned on sequence properties 779 780 Note that tblastn also supports values of 1 and 3.""", 781 checker_function=lambda value : value in "0Ft2TtDd", 782 equate=False), 783 #Query filtering options: 784 _Option(["-seg", "seg"], 785 """Filter query sequence with SEG (string). 786 787 Format: "yes", "window locut hicut", or "no" to disable. 788 Default is "12 2.2 2.5""", 789 equate=False), 790 #Extension options: 791 _Switch(["-ungapped", "ungapped"], 792 "Perform ungapped alignment only?"), 793 #Miscellaneous options: 794 _Switch(["-use_sw_tback", "use_sw_tback"], 795 "Compute locally optimal Smith-Waterman alignments?"), 796 ] 797 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
798 799
800 -class NcbiblastnCommandline(_NcbiblastMain2SeqCommandline):
801 """Wrapper for the NCBI BLAST+ program blastn (for nucleotides). 802 803 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 804 replaced the old blastall tool with separate tools for each of the searches. 805 This wrapper therefore replaces BlastallCommandline with option -p blastn. 806 807 For example, to run a search against the "nt" nucleotide database using the 808 FASTA nucleotide file "m_code.fasta" as the query, with an expectation value 809 cut off of 0.001, saving the output to a file in XML format: 810 811 >>> from Bio.Blast.Applications import NcbiblastnCommandline 812 >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus", 813 ... evalue=0.001, out="m_cold.xml", outfmt=5) 814 >>> cline 815 NcbiblastnCommandline(cmd='blastn', out='m_cold.xml', outfmt=5, query='m_cold.fasta', db='nt', evalue=0.001, strand='plus') 816 >>> print cline 817 blastn -out m_cold.xml -outfmt 5 -query m_cold.fasta -db nt -evalue 0.001 -strand plus 818 819 You would typically run the command line with cline() or via the Python 820 subprocess module, as described in the Biopython tutorial. 821 """
822 - def __init__(self, cmd="blastn", **kwargs):
823 self.parameters = [ 824 #Input query options: 825 _Option(["-strand", "strand"], 826 """Query strand(s) to search against database/subject. 827 828 Values allowed are "both" (default), "minus", "plus".""", 829 checker_function=lambda value : value in ["both", 830 "minus", 831 "plus"], 832 equate=False), 833 #General search options: 834 _Option(["-task", "task"], 835 """Task to execute (string, default 'megablast') 836 837 Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast' 838 (the default), or 'vecscreen'.""", 839 checker_function=lambda value : value in ['blastn', 840 'blastn-short', 841 'dc-megablast', 842 'megablast', 843 'vecscreen'], 844 equate=False), 845 _Option(["-penalty", "penalty"], 846 "Penalty for a nucleotide mismatch (integer, at most zero).", 847 equate=False), 848 _Option(["-reward", "reward"], 849 "Reward for a nucleotide match (integer, at least zero).", 850 equate=False), 851 #TODO - Does this need an argument or is it a switch? 852 #_Option(["-use_index", "use_index"], 853 # "Use MegaBLAST database index (boolean).", 854 # equate=False), 855 _Option(["-index_name", "index_name"], 856 "MegaBLAST database index name.", 857 equate=False), 858 #Query filtering options: 859 _Option(["-dust", "dust"], 860 """Filter query sequence with DUST (string). 861 862 Format: 'yes', 'level window linker', or 'no' to disable. 863 Default = '20 64 1'. 864 """, 865 equate=False), 866 _Option(["-filtering_db", "filtering_db"], 867 "BLAST database containing filtering elements (i.e. repeats).", 868 equate=False), 869 _Option(["-window_masker_taxid", "window_masker_taxid"], 870 "Enable WindowMasker filtering using a Taxonomic ID (integer).", 871 equate=False), 872 _Option(["-window_masker_db", "window_masker_db"], 873 "Enable WindowMasker filtering using this repeats database (string).", 874 equate=False), 875 #Restrict search or results: 876 _Option(["-perc_identity", "perc_identity"], 877 "Percent identity (real, 0 to 100 inclusive).", 878 equate=False), 879 #Discontiguous MegaBLAST options 880 _Option(["-template_type", "template_type"], 881 """Discontiguous MegaBLAST template type (string). 882 883 Allowed values: 'coding', 'coding_and_optimal' or 'optimal' 884 Requires: template_length.""", 885 checker_function=lambda value : value in ['coding', 'coding_and_optimal','optimal'], 886 equate=False), 887 _Option(["-template_length", "template_length"], 888 """Discontiguous MegaBLAST template length (integer). 889 890 Allowed values: 16, 18, 21 891 892 Requires: template_type.""", 893 checker_function=lambda value : value in [16,18,21,'16','18','21'], 894 equate=False), 895 #Extension options: 896 _Switch(["-no_greedy", "no_greedy"], 897 "Use non-greedy dynamic programming extension"), 898 _Option(["-min_raw_gapped_score", "min_raw_gapped_score"], 899 "Minimum raw gapped score to keep an alignment in the " 900 "preliminary gapped and traceback stages (integer).", 901 equate=False), 902 _Switch(["-ungapped", "ungapped"], 903 "Perform ungapped alignment only?"), 904 _Option(["-off_diagonal_range", "off_diagonal_range"], 905 """Number of off-diagonals to search for the 2nd hit (integer). 906 907 Expects a positive integer, or 0 (default) to turn off. 908 909 Added in BLAST 2.2.23+ 910 """, 911 equate=False), 912 ] 913 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
914
915 - def _validate(self):
916 if (self.template_type and not self.template_length) \ 917 or (self.template_length and not self.template_type) : 918 raise ValueError("Options template_type and template_type require each other.") 919 _NcbiblastMain2SeqCommandline._validate(self)
920 921
922 -class NcbiblastxCommandline(_NcbiblastMain2SeqCommandline):
923 """Wrapper for the NCBI BLAST+ program blastx (nucleotide query, protein database). 924 925 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 926 replaced the old blastall tool with separate tools for each of the searches. 927 This wrapper therefore replaces BlastallCommandline with option -p blastx. 928 929 >>> from Bio.Blast.Applications import NcbiblastxCommandline 930 >>> cline = NcbiblastxCommandline(query="m_cold.fasta", db="nr", evalue=0.001) 931 >>> cline 932 NcbiblastxCommandline(cmd='blastx', query='m_cold.fasta', db='nr', evalue=0.001) 933 >>> print cline 934 blastx -query m_cold.fasta -db nr -evalue 0.001 935 936 You would typically run the command line with cline() or via the Python 937 subprocess module, as described in the Biopython tutorial. 938 """
939 - def __init__(self, cmd="blastx", **kwargs):
940 self.parameters = [ 941 #Input query options: 942 _Option(["-strand", "strand"], 943 """Query strand(s) to search against database/subject. 944 945 Values allowed are "both" (default), "minus", "plus".""", 946 checker_function=lambda value : value in ["both", "minus", "plus"], 947 equate=False), 948 #Input query options: 949 _Option(["-query_gencode", "query_gencode"], 950 """Genetic code to use to translate query 951 952 Integer. Default is one.""", 953 equate=False), 954 #General search options: 955 _Option(["-frame_shift_penalty", "frame_shift_penalty"], 956 "Frame shift penalty (integer, at least 1, default ignored).", 957 equate=False), 958 _Option(["-max_intron_length", "max_intron_length"], 959 """Maximum intron length (integer). 960 961 Length of the largest intron allowed in a translated nucleotide 962 sequence when linking multiple distinct alignments (a negative 963 value disables linking). Default zero.""", 964 equate=False), 965 _Option(["-matrix", "matrix"], 966 "Scoring matrix name (default BLOSUM62).", 967 equate=False), 968 _Option(["-threshold", "threshold"], 969 "Minimum word score such that the word is added to the " 970 "BLAST lookup table (float)", 971 equate=False), 972 #Query filtering options: 973 _Option(["-seg", "seg"], 974 """Filter query sequence with SEG (string). 975 976 Format: "yes", "window locut hicut", or "no" to disable. 977 Default is "12 2.2 2.5""", 978 equate=False), 979 #Extension options: 980 _Switch(["-ungapped", "ungapped"], 981 "Perform ungapped alignment only?"), 982 ] 983 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
984 985
986 -class NcbitblastnCommandline(_NcbiblastMain2SeqCommandline):
987 """Wrapper for the NCBI BLAST+ program tblastn. 988 989 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 990 replaced the old blastall tool with separate tools for each of the searches. 991 This wrapper therefore replaces BlastallCommandline with option -p tblastn. 992 993 >>> from Bio.Blast.Applications import NcbitblastnCommandline 994 >>> cline = NcbitblastnCommandline(help=True) 995 >>> cline 996 NcbitblastnCommandline(cmd='tblastn', help=True) 997 >>> print cline 998 tblastn -help 999 1000 You would typically run the command line with cline() or via the Python 1001 subprocess module, as described in the Biopython tutorial. 1002 """
1003 - def __init__(self, cmd="tblastn", **kwargs):
1004 self.parameters = [ 1005 #General search options: 1006 _Option(["-db_gencode", "db_gencode"], 1007 """Genetic code to use to translate query 1008 1009 Integer. Default is one.""", 1010 equate=False), 1011 _Option(["-frame_shift_penalty", "frame_shift_penalty"], 1012 "Frame shift penalty (integer, at least 1, default ignored).", 1013 equate=False), 1014 _Option(["-max_intron_length", "max_intron_length"], 1015 """Maximum intron length (integer). 1016 1017 Length of the largest intron allowed in a translated nucleotide 1018 sequence when linking multiple distinct alignments (a negative 1019 value disables linking). Default zero.""", 1020 equate=False), 1021 _Option(["-matrix", "matrix"], 1022 "Scoring matrix name (default BLOSUM62).", 1023 equate=False), 1024 _Option(["-threshold", "threshold"], 1025 "Minimum word score such that the word is added to the BLAST lookup table (float)", 1026 equate=False), 1027 _Option(["-comp_based_stats", "comp_based_stats"], 1028 """Use composition-based statistics (string, default 2, i.e. True). 1029 1030 0, F or f: no composition-based statistics 1031 1: Composition-based statistics as in NAR 29:2994-3005, 2001 1032 2, T or t, D or d : Composition-based score adjustment as in 1033 Bioinformatics 21:902-911, 2005, conditioned on sequence properties 1034 3: Composition-based score adjustment as in Bioinformatics 21:902-911, 1035 2005, unconditionally 1036 1037 Note that only tblastn supports values of 1 and 3.""", 1038 checker_function=lambda value : value in "0Ft12TtDd3", 1039 equate=False), 1040 #Query filtering options: 1041 _Option(["-seg", "seg"], 1042 """Filter query sequence with SEG (string). 1043 1044 Format: "yes", "window locut hicut", or "no" to disable. 1045 Default is "12 2.2 2.5""", 1046 equate=False), 1047 #Extension options: 1048 _Switch(["-ungapped", "ungapped"], 1049 "Perform ungapped alignment only?"), 1050 #Miscellaneous options: 1051 _Switch(["-use_sw_tback", "use_sw_tback"], 1052 "Compute locally optimal Smith-Waterman alignments?"), 1053 #PSI-TBLASTN options: 1054 _Option(["-in_pssm", "in_pssm"], 1055 """PSI-BLAST checkpoint file 1056 1057 Incompatible with: remote, query""", 1058 filename=True, 1059 equate=False), 1060 ] 1061 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
1062 1063
1064 -class NcbitblastxCommandline(_NcbiblastMain2SeqCommandline):
1065 """Wrapper for the NCBI BLAST+ program tblastx. 1066 1067 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 1068 replaced the old blastall tool with separate tools for each of the searches. 1069 This wrapper therefore replaces BlastallCommandline with option -p tblastx. 1070 1071 >>> from Bio.Blast.Applications import NcbitblastxCommandline 1072 >>> cline = NcbitblastxCommandline(help=True) 1073 >>> cline 1074 NcbitblastxCommandline(cmd='tblastx', help=True) 1075 >>> print cline 1076 tblastx -help 1077 1078 You would typically run the command line with cline() or via the Python 1079 subprocess module, as described in the Biopython tutorial. 1080 """
1081 - def __init__(self, cmd="tblastx", **kwargs):
1082 self.parameters = [ 1083 #Input query options: 1084 _Option(["-strand", "strand"], 1085 """Query strand(s) to search against database/subject. 1086 1087 Values allowed are "both" (default), "minus", "plus".""", 1088 checker_function=lambda value : value in ["both", "minus", "plus"], 1089 equate=False), 1090 #Input query options: 1091 _Option(["-query_gencode", "query_gencode"], 1092 """Genetic code to use to translate query 1093 1094 Integer. Default is one.""", 1095 equate=False), 1096 #General search options: 1097 _Option(["-db_gencode", "db_gencode"], 1098 """Genetic code to use to translate query 1099 1100 Integer. Default is one.""", 1101 equate=False), 1102 _Option(["-max_intron_length", "max_intron_length"], 1103 """Maximum intron length (integer). 1104 1105 Length of the largest intron allowed in a translated nucleotide 1106 sequence when linking multiple distinct alignments (a negative 1107 value disables linking). Default zero.""", 1108 equate=False), 1109 _Option(["-matrix", "matrix"], 1110 "Scoring matrix name (default BLOSUM62).", 1111 equate=False), 1112 _Option(["-threshold", "threshold"], 1113 "Minimum word score such that the word is added to the " 1114 "BLAST lookup table (float)", 1115 equate=False), 1116 #Query filtering options: 1117 _Option(["-seg", "seg"], 1118 """Filter query sequence with SEG (string). 1119 1120 Format: "yes", "window locut hicut", or "no" to disable. 1121 Default is "12 2.2 2.5""", 1122 equate=False), 1123 ] 1124 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
1125 1126
1127 -class NcbipsiblastCommandline(_Ncbiblast2SeqCommandline):
1128 """Wrapper for the NCBI BLAST+ program psiblast. 1129 1130 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 1131 replaced the old blastpgp tool with a similar tool psiblast. This wrapper 1132 therefore replaces BlastpgpCommandline, the wrapper for blastpgp. 1133 1134 >>> from Bio.Blast.Applications import NcbipsiblastCommandline 1135 >>> cline = NcbipsiblastCommandline(help=True) 1136 >>> cline 1137 NcbipsiblastCommandline(cmd='psiblast', help=True) 1138 >>> print cline 1139 psiblast -help 1140 1141 You would typically run the command line with cline() or via the Python 1142 subprocess module, as described in the Biopython tutorial. 1143 """
1144 - def __init__(self, cmd="psiblast", **kwargs):
1145 self.parameters = [ 1146 #General search options: 1147 _Option(["-matrix", "matrix"], 1148 "Scoring matrix name (default BLOSUM62).", 1149 equate=False), 1150 _Option(["-threshold", "threshold"], 1151 "Minimum word score such that the word is added to the " 1152 "BLAST lookup table (float)", 1153 equate=False), 1154 _Option(["-comp_based_stats", "comp_based_stats"], 1155 """Use composition-based statistics (string, default 2, i.e. True). 1156 1157 0, F or f: no composition-based statistics 1158 2, T or t, D or d : Composition-based score adjustment 1159 as in Bioinformatics 21:902-911, 2005, conditioned on 1160 sequence properties 1161 1162 Note that tblastn also supports values of 1 and 3.""", 1163 checker_function=lambda value : value in "0Ft2TtDd", 1164 equate=False), 1165 #Query filtering options: 1166 _Option(["-seg", "seg"], 1167 """Filter query sequence with SEG (string). 1168 1169 Format: "yes", "window locut hicut", or "no" to disable. 1170 Default is "12 2.2 2.5""", 1171 equate=False), 1172 #Extension options: 1173 _Option(["-gap_trigger", "gap_trigger"], 1174 "Number of bits to trigger gapping (float, default 22)", 1175 equate=False), 1176 #Miscellaneous options: 1177 _Switch(["-use_sw_tback", "use_sw_tback"], 1178 "Compute locally optimal Smith-Waterman alignments?"), 1179 #PSI-BLAST options: 1180 _Option(["-num_iterations", "num_iterations"], 1181 """Number of iterations to perform, integer 1182 1183 Integer of at least one. Default is one. 1184 Incompatible with: remote""", 1185 equate=False), 1186 _Option(["-out_pssm", "out_pssm"], 1187 "File name to store checkpoint file", 1188 filename=True, 1189 equate=False), 1190 _Option(["-out_ascii_pssm", "out_ascii_pssm"], 1191 "File name to store ASCII version of PSSM", 1192 filename=True, 1193 equate=False), 1194 _Option(["-in_msa", "in_msa"], 1195 """File name of multiple sequence alignment to restart 1196 PSI-BLAST 1197 1198 Incompatible with: in_pssm, query""", 1199 filename=True, 1200 equate=False), 1201 _Option(["-msa_master_idx", "msa_master_idx"], 1202 """Index of sequence to use as master in MSA. 1203 1204 Index (1-based) of sequence to use as the master in the 1205 multiple sequence alignment. If not specified, the first 1206 sequence is used.""", 1207 equate=False), 1208 _Option(["-in_pssm", "in_pssm"], 1209 """PSI-BLAST checkpoint file 1210 1211 Incompatible with: in_msa, query, phi_pattern""", 1212 filename=True, 1213 equate=False), 1214 #PSSM engine options: 1215 _Option(["-pseudocount", "pseudocount"], 1216 """Pseudo-count value used when constructing PSSM 1217 1218 Integer. Default is zero.""", 1219 equate=False), 1220 _Option(["-inclusion_ethresh", "inclusion_ethresh"], 1221 """E-value inclusion threshold for pairwise alignments 1222 1223 Float. Default is 0.002.""", 1224 equate=False), 1225 #PHI-BLAST options: 1226 _Option(["-phi_pattern", "phi_pattern"], 1227 """File name containing pattern to search 1228 1229 Incompatible with: in_pssm""", 1230 filename=True, 1231 equate=False), 1232 ] 1233 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
1234
1235 - def _validate(self):
1236 incompatibles = {"num_iterations":["remote"], 1237 "in_msa":["in_pssm", "query"], 1238 "in_pssm":["in_msa","query","phi_pattern"]} 1239 self._validate_incompatibilities(incompatibles) 1240 _Ncbiblast2SeqCommandline._validate(self)
1241 1242
1243 -class NcbirpsblastCommandline(_NcbiblastCommandline):
1244 """Wrapper for the NCBI BLAST+ program rpsblast. 1245 1246 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 1247 replaced the old rpsblast tool with a similar tool of the same name. This 1248 wrapper replaces RpsBlastCommandline, the wrapper for the old rpsblast. 1249 1250 >>> from Bio.Blast.Applications import NcbirpsblastCommandline 1251 >>> cline = NcbirpsblastCommandline(help=True) 1252 >>> cline 1253 NcbirpsblastCommandline(cmd='rpsblast', help=True) 1254 >>> print cline 1255 rpsblast -help 1256 1257 You would typically run the command line with cline() or via the Python 1258 subprocess module, as described in the Biopython tutorial. 1259 """
1260 - def __init__(self, cmd="rpsblast", **kwargs):
1261 self.parameters = [ 1262 #Query filtering options: 1263 _Option(["-seg", "seg"], 1264 """Filter query sequence with SEG (string). 1265 1266 Format: "yes", "window locut hicut", or "no" to disable. 1267 Default is "12 2.2 2.5""", 1268 equate=False), 1269 #Restrict search or results: 1270 _Option(["-culling_limit", "culling_limit"], 1271 """Hit culling limit (integer). 1272 1273 If the query range of a hit is enveloped by that of at 1274 least this many higher-scoring hits, delete the hit. 1275 1276 Incompatible with: best_hit_overhang, best_hit_score_edge. 1277 """, 1278 equate=False), 1279 _Option(["-best_hit_overhang", "best_hit_overhang"], 1280 """Best Hit algorithm overhang value (recommended value: 0.1) 1281 1282 Float between 0.0 and 0.5 inclusive. 1283 1284 Incompatible with: culling_limit.""", 1285 equate=False), 1286 _Option(["-best_hit_score_edge", "best_hit_score_edge"], 1287 """Best Hit algorithm score edge value (recommended value: 0.1) 1288 1289 Float between 0.0 and 0.5 inclusive. 1290 1291 Incompatible with: culling_limit.""", 1292 equate=False), 1293 ] 1294 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
1295
1296 - def _validate(self):
1297 incompatibles = {"culling_limit":["best_hit_overhang","best_hit_score_edge"]} 1298 self._validate_incompatibilities(incompatibles) 1299 _NcbiblastCommandline._validate(self)
1300 1301
1302 -class NcbirpstblastnCommandline(_NcbiblastCommandline):
1303 """Wrapper for the NCBI BLAST+ program rpstblastn. 1304 1305 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI 1306 replaced the old rpsblast tool with a similar tool of the same name, and a 1307 separate tool rpstblastn for Translated Reverse Position Specific BLAST. 1308 1309 >>> from Bio.Blast.Applications import NcbirpstblastnCommandline 1310 >>> cline = NcbirpstblastnCommandline(help=True) 1311 >>> cline 1312 NcbirpstblastnCommandline(cmd='rpstblastn', help=True) 1313 >>> print cline 1314 rpstblastn -help 1315 1316 You would typically run the command line with cline() or via the Python 1317 subprocess module, as described in the Biopython tutorial. 1318 """
1319 - def __init__(self, cmd="rpstblastn", **kwargs):
1320 self.parameters = [ 1321 #Input query options: 1322 _Option(["-strand", "strand"], 1323 """Query strand(s) to search against database/subject. 1324 1325 Values allowed are "both" (default), "minus", "plus".""", 1326 checker_function=lambda value : value in ["both", 1327 "minus", 1328 "plus"], 1329 equate=False), 1330 #Input query options: 1331 _Option(["-query_gencode", "query_gencode"], 1332 """Genetic code to use to translate query 1333 1334 Integer. Default is one.""", 1335 equate=False), 1336 #Query filtering options: 1337 _Option(["-seg", "seg"], 1338 """Filter query sequence with SEG (string). 1339 1340 Format: "yes", "window locut hicut", or "no" to disable. 1341 Default is "12 2.2 2.5""", 1342 equate=False), 1343 #Extension options: 1344 _Switch(["-ungapped", "ungapped"], 1345 "Perform ungapped alignment only?"), 1346 ] 1347 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
1348 1349
1350 -class NcbiblastformatterCommandline(_NcbibaseblastCommandline):
1351 """Wrapper for the NCBI BLAST+ program blast_formatter. 1352 1353 With the release of BLAST 2.2.24+ (i.e. the BLAST suite rewritten in C++ 1354 instead of C), the NCBI added the ASN.1 output format option to all the 1355 search tools, and extended the blast_formatter to support this as input. 1356 1357 The blast_formatter command allows you to convert the ASN.1 output into 1358 the other output formats (XML, tabular, plain text, HTML). 1359 1360 >>> from Bio.Blast.Applications import NcbiblastformatterCommandline 1361 >>> cline = NcbiblastformatterCommandline(archive="example.asn", outfmt=5, out="example.xml") 1362 >>> cline 1363 NcbiblastformatterCommandline(cmd='blast_formatter', out='example.xml', outfmt=5, archive='example.asn') 1364 >>> print cline 1365 blast_formatter -out example.xml -outfmt 5 -archive example.asn 1366 1367 You would typically run the command line with cline() or via the Python 1368 subprocess module, as described in the Biopython tutorial. 1369 1370 Note that this wrapper is for the version of blast_formatter from BLAST 1371 2.2.24+ (or later) which is when the NCBI first announced the inclusion 1372 this tool. There was actually an early version in BLAST 2.2.23+ (and 1373 possibly in older releases) but this did not have the -archive option 1374 (instead -rid is a mandatory argument), and is not supported by this 1375 wrapper. 1376 """
1377 - def __init__(self, cmd="blast_formatter", **kwargs):
1378 self.parameters = [ 1379 # Input options 1380 _Option(["-rid", "rid"], 1381 "BLAST Request ID (RID), not compatiable with archive arg", 1382 equate=False), 1383 _Option(["-archive", "archive"], 1384 "Archive file of results, not compatiable with rid arg.", 1385 filename=True, 1386 equate=False), 1387 # Restrict search or results 1388 _Option(["-max_target_seqs", "max_target_seqs"], 1389 "Maximum number of aligned sequences to keep", 1390 checker_function=lambda value: value >= 1, 1391 equate=False), 1392 ] 1393 _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
1394
1395 - def _validate(self):
1396 incompatibles = {"rid":["archive"]} 1397 self._validate_incompatibilities(incompatibles) 1398 _NcbibaseblastCommandline._validate(self)
1399 1400
1401 -def _test():
1402 """Run the Bio.Blast.Applications module's doctests.""" 1403 import doctest 1404 doctest.testmod(verbose=1)
1405 1406 if __name__ == "__main__": 1407 #Run the doctests 1408 _test() 1409