1
2
3
4
5
6
7
8 """Definitions for interacting with BLAST related applications.
9
10 Obsolete wrappers for the old/classic NCBI BLAST tools (written in C):
11
12 - FastacmdCommandline
13 - BlastallCommandline
14 - BlastpgpCommandline
15 - RpsBlastCommandline
16
17 Wrappers for the new NCBI BLAST+ tools (written in C++):
18
19 - NcbiblastpCommandline - Protein-Protein BLAST
20 - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST
21 - NcbiblastxCommandline - Translated Query-Protein Subject BLAST
22 - NcbitblastnCommandline - Protein Query-Translated Subject BLAST
23 - NcbitblastxCommandline - Translated Query-Protein Subject BLAST
24 - NcbipsiblastCommandline - Position-Specific Initiated BLAST
25 - NcbirpsblastCommandline - Reverse Position Specific BLAST
26 - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST
27 - NcbiblastformatterCommandline - Convert ASN.1 to other BLAST output formats
28
29 For further details, see:
30
31 Camacho et al. BLAST+: architecture and applications
32 BMC Bioinformatics 2009, 10:421
33 doi:10.1186/1471-2105-10-421
34 """
35 from Bio.Application import _Option, AbstractCommandline, _Switch
36
38 """Create a commandline for the fasta program from NCBI (OBSOLETE).
39
40 """
41 - def __init__(self, cmd="fastacmd", **kwargs):
42 self.parameters = [
43 _Option(["-d", "database"],
44 "The database to retrieve from.",
45 is_required=True,
46 equate=False),
47 _Option(["-s", "search_string"],
48 "The id to search for.",
49 is_required=True,
50 equate=False)
51 ]
52 AbstractCommandline.__init__(self, cmd, **kwargs)
53
54
56 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE).
57
58 This is provided for subclassing, it deals with shared options
59 common to all the BLAST tools (blastall, rpsblast, blastpgp).
60 """
62 assert cmd is not None
63 extra_parameters = [
64 _Switch(["--help", "help"],
65 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."),
66 _Option(["-d", "database"],
67 "The database to BLAST against.",
68 is_required=True,
69 equate=False),
70 _Option(["-i", "infile"],
71 "The sequence to search with.",
72 filename=True,
73 is_required=True,
74 equate=False),
75 _Option(["-e", "expectation"],
76 "Expectation value cutoff.",
77 equate=False),
78 _Option(["-m", "align_view"],
79 "Alignment view. Integer 0-11. Use 7 for XML output.",
80 equate=False),
81 _Option(["-o", "align_outfile", "outfile"],
82 "Output file for alignment.",
83 filename=True,
84 equate=False),
85 _Option(["-y", "xdrop_extension"],
86 "Dropoff for blast extensions.",
87 equate=False),
88 _Option(["-F", "filter"],
89 "Filter query sequence with SEG? T/F",
90 equate=False),
91 _Option(["-X", "xdrop"],
92 "Dropoff value (bits) for gapped alignments.",
93 equate=False),
94 _Option(["-I", "show_gi"],
95 "Show GI's in deflines? T/F",
96 equate=False),
97 _Option(["-J", "believe_query"],
98 "Believe the query defline? T/F",
99 equate=False),
100 _Option(["-Z", "xdrop_final"],
101 "X dropoff for final gapped alignment.",
102 equate=False),
103 _Option(["-z", "db_length"],
104 "Effective database length.",
105 equate=False),
106 _Option(["-O", "seqalign_file"],
107 "seqalign file to output.",
108 filename=True,
109 equate=False),
110 _Option(["-v", "descriptions"],
111 "Number of one-line descriptions.",
112 equate=False),
113 _Option(["-b", "alignments"],
114 "Number of alignments.",
115 equate=False),
116 _Option(["-Y", "search_length"],
117 "Effective length of search space (use zero for the "
118 "real size).",
119 equate=False),
120 _Option(["-T", "html"],
121 "Produce HTML output? T/F",
122 equate=False),
123 _Option(["-U", "case_filter"],
124 "Use lower case filtering of FASTA sequence? T/F",
125 equate=False),
126 _Option(["-a", "nprocessors"],
127 "Number of processors to use.",
128 equate=False),
129 _Option(["-g", "gapped"],
130 "Whether to do a gapped alignment. T/F",
131 equate=False),
132 ]
133 try:
134
135
136 self.parameters = extra_parameters + self.parameters
137 except AttributeError:
138
139 self.parameters = extra_parameters
140 AbstractCommandline.__init__(self, cmd, **kwargs)
141
147
148
150 """Base Commandline object for NCBI BLAST wrappers (PRIVATE).
151
152 This is provided for subclassing, it deals with shared options
153 common to all the blastall and blastpgp tools (but not rpsblast).
154 """
155 - def __init__(self, cmd=None, **kwargs):
156 assert cmd is not None
157 extra_parameters = [
158 _Option(["-G", "gap_open"],
159 "Gap open penalty",
160 equate=False),
161 _Option(["-E", "gap_extend"],
162 "Gap extension penalty",
163 equate=False),
164 _Option(["-A", "window_size"],
165 "Multiple hits window size",
166 equate=False),
167 _Option(["-f", "hit_extend"],
168 "Threshold for extending hits.",
169 equate=False),
170 _Option(["-K", "keep_hits"],
171 " Number of best hits from a region to keep.",
172 equate=False),
173 _Option(["-W", "wordsize"],
174 "Word size",
175 equate=False),
176 _Option(["-P", "passes"],
177 "Hits/passes. Integer 0-2. 0 for multiple hit, "
178 "1 for single hit (does not apply to blastn)",
179 equate=False),
180 ]
181 try:
182
183
184 self.parameters = extra_parameters + self.parameters
185 except AttributeError:
186
187 self.parameters = extra_parameters
188 _BlastCommandLine.__init__(self, cmd, **kwargs)
189
190
192 """Create a commandline for the blastall program from NCBI (OBSOLETE).
193
194 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
195 are replacing blastall with separate tools blastn, blastp, blastx, tblastn
196 and tblastx.
197
198 Like blastall, this wrapper is now obsolete, and will be deprecated and
199 removed in a future release of Biopython.
200
201 >>> from Bio.Blast.Applications import BlastallCommandline
202 >>> cline = BlastallCommandline(program="blastx", infile="m_cold.fasta",
203 ... database="nr", expectation=0.001)
204 >>> cline
205 BlastallCommandline(cmd='blastall', database='nr', infile='m_cold.fasta', expectation=0.001, program='blastx')
206 >>> print cline
207 blastall -d nr -i m_cold.fasta -e 0.001 -p blastx
208
209 You would typically run the command line with cline() or via the Python
210 subprocess module, as described in the Biopython tutorial.
211 """
212
213 - def __init__(self, cmd="blastall",**kwargs):
214 import warnings
215 warnings.warn("Like blastall, this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning)
216 self.parameters = [
217
218
219
220 _Option(["-p", "program"],
221 "The blast program to use (e.g. blastp, blastn).",
222 is_required=True,
223 equate=False),
224 _Option(["-q", "nuc_mismatch"],
225 "Penalty for a nucleotide mismatch (blastn only).",
226 equate=False),
227 _Option(["-r", "nuc_match"],
228 "Reward for a nucleotide match (blastn only).",
229 equate=False),
230 _Option(["-Q", "query_genetic_code"],
231 "Query Genetic code to use.",
232 equate=False),
233 _Option(["-D", "db_genetic_code"],
234 "DB Genetic code (for tblast[nx] only).",
235 equate=False),
236 _Option(["-M", "matrix"],
237 "Matrix to use",
238 equate=False),
239 _Option(["-S", "strands"],
240 "Query strands to search against database (for blast[nx], "
241 "and tblastx). 3 is both, 1 is top, 2 is bottom.",
242 equate=False),
243 _Option(["-l", "restrict_gi"],
244 "Restrict search of database to list of GI's.",
245 equate=False),
246 _Option(["-R", "checkpoint"],
247 "PSI-TBLASTN checkpoint input file.",
248 filename=True,
249 equate=False),
250 _Option(["-n", "megablast"],
251 "MegaBlast search T/F.",
252 equate=False),
253
254
255 _Option(["-L", "region_length", "range_restriction"],
256 """Location on query sequence (string format start,end).
257
258 In older versions of BLAST, -L set the length of region
259 used to judge hits (see -K parameter).""",
260 equate=False),
261 _Option(["-w", "frame_shit_penalty"],
262 "Frame shift penalty (OOF algorithm for blastx).",
263 equate=False),
264 _Option(["-t", "largest_intron"],
265 "Length of the largest intron allowed in a translated "
266 "nucleotide sequence when linking multiple distinct "
267 "alignments. (0 invokes default behavior; a negative value "
268 "disables linking.)",
269 equate=False),
270 _Option(["-B", "num_concatenated_queries"],
271 "Number of concatenated queries, for blastn and tblastn.",
272 equate=False),
273 _Option(["-V", "oldengine"],
274 "Force use of the legacy BLAST engine.",
275 equate=False),
276 _Option(["-C", "composition_based"],
277 """Use composition-based statistics for tblastn:
278 D or d: default (equivalent to F)
279 0 or F or f: no composition-based statistics
280 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001
281 2: Composition-based score adjustment as in Bioinformatics
282 21:902-911, 2005, conditioned on sequence properties
283 3: Composition-based score adjustment as in Bioinformatics
284 21:902-911, 2005, unconditionally
285 For programs other than tblastn, must either be absent or be
286 D, F or 0.""",
287 equate=False),
288 _Option(["-s", "smith_waterman"],
289 "Compute locally optimal Smith-Waterman alignments (This "
290 "option is only available for gapped tblastn.) T/F",
291 equate=False),
292 ]
293 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
294
295
297 """Create a commandline for the blastpgp program from NCBI (OBSOLETE).
298
299 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
300 are replacing blastpgp with a renamed tool psiblast. This module provides
301 NcbipsiblastCommandline as a wrapper for the new tool psiblast.
302
303 Like blastpgp (and blastall), this wrapper is now obsolete, and will be
304 deprecated and removed in a future release of Biopython.
305
306 >>> from Bio.Blast.Applications import BlastpgpCommandline
307 >>> cline = BlastpgpCommandline(help=True)
308 >>> cline
309 BlastpgpCommandline(cmd='blastpgp', help=True)
310 >>> print cline
311 blastpgp --help
312
313 You would typically run the command line with cline() or via the Python
314 subprocess module, as described in the Biopython tutorial.
315 """
316 - def __init__(self, cmd="blastpgp",**kwargs):
317 import warnings
318 warnings.warn("Like blastpgp (and blastall), this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning)
319 self.parameters = [
320 _Option(["-C", "checkpoint_outfile"],
321 "Output file for PSI-BLAST checkpointing.",
322 filename=True,
323 equate=False),
324 _Option(["-R", "restart_infile"],
325 "Input file for PSI-BLAST restart.",
326 filename=True,
327 equate=False),
328 _Option(["-k", "hit_infile"],
329 "Hit file for PHI-BLAST.",
330 filename=True,
331 equate=False),
332 _Option(["-Q", "matrix_outfile"],
333 "Output file for PSI-BLAST matrix in ASCII.",
334 filename=True,
335 equate=False),
336 _Option(["-B", "align_infile"],
337 "Input alignment file for PSI-BLAST restart.",
338 filename=True,
339 equate=False),
340 _Option(["-S", "required_start"],
341 "Start of required region in query.",
342 equate=False),
343 _Option(["-H", "required_end"],
344 "End of required region in query.",
345 equate=False),
346 _Option(["-j", "npasses"],
347 "Number of passes",
348 equate=False),
349 _Option(["-N", "nbits_gapping"],
350 "Number of bits to trigger gapping.",
351 equate=False),
352 _Option(["-c", "pseudocounts"],
353 "Pseudocounts constants for multiple passes.",
354 equate=False),
355 _Option(["-h", "model_threshold"],
356 "E-value threshold to include in multipass model.",
357 equate=False),
358
359 _Option(["-L", "region_length"],
360 "Cost to decline alignment (disabled when zero).",
361 equate=False),
362 _Option(["-M", "matrix"],
363 "Matrix (string, default BLOSUM62).",
364 equate=False),
365 _Option(["-p", "program"],
366 "The blast program to use (e.g blastpgp, patseedp or seedp).",
367 is_required=True,
368 equate=False),
369 ]
370 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
371
372
374 """Create a commandline for the classic rpsblast program from NCBI (OBSOLETE).
375
376 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
377 are replacing the old rpsblast with a new version of the same name plus a
378 second tool rpstblastn, both taking different command line arguments. This
379 module provides NcbirpsblastCommandline and NcbirpsblasntCommandline as
380 wrappers for the new tools.
381
382 Like the old rpsblast (and blastall), this wrapper is now obsolete, and will
383 be deprecated and removed in a future release of Biopython.
384
385 >>> from Bio.Blast.Applications import RpsBlastCommandline
386 >>> cline = RpsBlastCommandline(help=True)
387 >>> cline
388 RpsBlastCommandline(cmd='rpsblast', help=True)
389 >>> print cline
390 rpsblast --help
391
392 You would typically run the command line with cline() or via the Python
393 subprocess module, as described in the Biopython tutorial.
394 """
395 - def __init__(self, cmd="rpsblast",**kwargs):
396 import warnings
397 warnings.warn("Like the old rpsblast (and blastall), this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning)
398 self.parameters = [
399
400 _Option(["-N", "nbits_gapping"],
401 "Number of bits to trigger gapping.",
402 equate=False),
403
404
405 _Option(["-P", "multihit"],
406 "0 for multiple hit, 1 for single hit",
407 equate=False),
408 _Option(["-l", "logfile"],
409 "Logfile name.",
410 filename=True,
411 equate=False),
412 _Option(["-p", "protein"],
413 "Query sequence is protein. T/F",
414 equate=False),
415 _Option(["-L", "range_restriction"],
416 "Location on query sequence (string format start,end).",
417 equate=False),
418 ]
419 _BlastCommandLine.__init__(self, cmd, **kwargs)
420
421
422
423
424
426 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
427
428 This is provided for subclassing, it deals with shared options
429 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc
430 AND blast_formatter).
431 """
432 - def __init__(self, cmd=None, **kwargs):
433 assert cmd is not None
434 extra_parameters = [
435
436 _Switch(["-h", "h"],
437 "Print USAGE and DESCRIPTION; ignore other arguments."),
438 _Switch(["-help", "help"],
439 "Print USAGE, DESCRIPTION and ARGUMENTS description; "
440 "ignore other arguments."),
441 _Switch(["-version", "version"],
442 "Print version number; ignore other arguments."),
443
444 _Option(["-out", "out"],
445 "Output file for alignment.",
446 filename=True,
447 equate=False),
448
449 _Option(["-outfmt", "outfmt"],
450 "Alignment view. Integer 0-11. Use 5 for XML output "
451 "(differs from classic BLAST which used 7 for XML).",
452 equate=False),
453
454 _Switch(["-show_gis","show_gis"],
455 "Show NCBI GIs in deflines?"),
456 _Option(["-num_descriptions","num_descriptions"],
457 """Number of database sequences to show one-line descriptions for.
458
459 Integer argument (at least zero). Default is 500.
460 See also num_alignments.""",
461 equate=False),
462 _Option(["-num_alignments","num_alignments"],
463 """Number of database sequences to show num_alignments for.
464
465 Integer argument (at least zero). Default is 200.
466 See also num_alignments.""",
467 equate=False),
468 _Switch(["-html", "html"],
469 "Produce HTML output? See also the outfmt option."),
470
471 _Switch(["-parse_deflines", "parse_deflines"],
472 "Should the query and subject defline(s) be parsed?"),
473 ]
474 try:
475
476
477 self.parameters = extra_parameters + self.parameters
478 except AttributeError:
479
480 self.parameters = extra_parameters
481 AbstractCommandline.__init__(self, cmd, **kwargs)
482
484 """Used by the BLAST+ _validate method (PRIVATE)."""
485 for a in incompatibles:
486 if self._get_parameter(a):
487 for b in incompatibles[a]:
488 if self._get_parameter(b):
489 raise ValueError("Options %s and %s are incompatible." \
490 % (a,b))
491
492
494 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
495
496 This is provided for subclassing, it deals with shared options
497 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc).
498 """
499 - def __init__(self, cmd=None, **kwargs):
500 assert cmd is not None
501 extra_parameters = [
502
503 _Option(["-query", "query"],
504 "The sequence to search with.",
505 filename=True,
506 equate=False),
507 _Option(["-query_loc", "query_loc"],
508 "Location on the query sequence (Format: start-stop)",
509 equate=False),
510
511 _Option(["-db", "db"],
512 "The database to BLAST against.",
513 equate=False),
514 _Option(["-evalue", "evalue"],
515 "Expectation value cutoff.",
516 equate=False),
517 _Option(["-word_size","word_size"],
518 """Word size for wordfinder algorithm.
519
520 Integer. Minimum 2.""",
521 equate=False),
522
523
524
525
526
527
528
529
530 _Switch(["-lcase_masking", "lcase_masking"],
531 "Use lower case filtering in query and subject sequence(s)?"),
532
533 _Option(["-gilist", "gilist"],
534 """Restrict search of database to list of GI's.
535
536 Incompatible with: negative_gilist, seqidlist, remote, subject, subject_loc""",
537 filename=True,
538 equate=False),
539 _Option(["-negative_gilist", "negative_gilist"],
540 """Restrict search of database to everything except the listed GIs.
541
542 Incompatible with: gilist, seqidlist, remote, subject, subject_loc""",
543 filename=True,
544 equate=False),
545 _Option(["-seqidlist", "seqidlist"],
546 """Restrict search of database to list of SeqID's.
547
548 Incompatible with: gilist, negative_gilist, remote, subject, subject_loc""",
549 filename=True,
550 equate=False),
551 _Option(["-entrez_query", "entrez_query"],
552 "Restrict search with the given Entrez query (requires remote).",
553 equate=False),
554 _Option(["-max_target_seqs", "max_target_seqs"],
555 """Maximum number of aligned sequences to keep.
556
557 Integer argument (at least one).""",
558 equate=False),
559
560 _Option(["-dbsize", "dbsize"],
561 "Effective length of the database (integer)",
562 equate=False),
563 _Option(["-searchsp", "searchsp"],
564 "Effective length of the search space (integer)",
565 equate=False),
566
567 _Option(["-xdrop_ungap", "xdrop_ungap"],
568 "X-dropoff value (in bits) for ungapped extensions. Float.",
569 equate=False),
570 _Option(["-xdrop_gap", "xdrop_gap"],
571 "X-dropoff value (in bits) for preliminary gapped extensions. Float.",
572 equate=False),
573 _Option(["-xdrop_gap_final", "xdrop_gap_final"],
574 "X-dropoff value (in bits) for final gapped alignment. Float.",
575 equate=False),
576 _Option(["-window_size", "window_size"],
577 "Multiple hits window size, use 0 to specify 1-hit algorithm. Integer.",
578 equate=False),
579
580 _Option(["-import_search_strategy", "import_search_strategy"],
581 """Search strategy to use.
582
583 Incompatible with: export_search_strategy""",
584 filename=True,
585 equate=False),
586 _Option(["-export_search_strategy", "export_search_strategy"],
587 """File name to record the search strategy used.
588
589 Incompatible with: import_search_strategy""",
590 filename=True,
591 equate=False),
592
593 _Option(["-num_threads", "num_threads"],
594 """Number of threads to use in the BLAST search.
595
596 Integer of at least one. Default is one.
597 Incompatible with: remote""",
598 equate=False),
599 _Switch(["-remote", "remote"],
600 """Execute search remotely?
601
602 Incompatible with: gilist, negative_gilist, subject_loc, num_threads, ..."""),
603 ]
604 try:
605
606
607 self.parameters = extra_parameters + self.parameters
608 except AttributeError:
609
610 self.parameters = extra_parameters
611 _NcbibaseblastCommandline.__init__(self, cmd, **kwargs)
612
614 incompatibles = {"remote":["gilist", "negative_gilist", "num_threads"],
615 "import_search_strategy" : ["export_search_strategy"],
616 "gilist":["negative_gilist"],
617 "seqidlist":["gilist", "negative_gilist", "remote"]}
618 self._validate_incompatibilities(incompatibles)
619 if self.entrez_query and not self.remote :
620 raise ValueError("Option entrez_query requires remote option.")
621 AbstractCommandline._validate(self)
622
623
625 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
626
627 This is provided for subclassing, it deals with shared options
628 common to all the BLAST tools supporting two-sequence BLAST
629 (blastn, psiblast, etc) but not rpsblast or rpstblastn.
630 """
631 - def __init__(self, cmd=None, **kwargs):
632 assert cmd is not None
633 extra_parameters = [
634
635 _Option(["-gapopen", "gapopen"],
636 "Cost to open a gap (integer).",
637 equate=False),
638 _Option(["-gapextend", "gapextend"],
639 "Cost to extend a gap (integer).",
640 equate=False),
641
642 _Option(["-subject", "subject"],
643 """Subject sequence(s) to search.
644
645 Incompatible with: db, gilist, negative_gilist.
646 See also subject_loc.""",
647 filename=True,
648 equate=False),
649 _Option(["-subject_loc", "subject_loc"],
650 """Location on the subject sequence (Format: start-stop)
651
652 Incompatible with: db, gilist, seqidlist, negative_gilist,
653 db_soft_mask, db_hard_mask, remote.
654
655 See also subject.""",
656 equate=False),
657
658 _Option(["-culling_limit", "culling_limit"],
659 """Hit culling limit (integer).
660
661 If the query range of a hit is enveloped by that of at
662 least this many higher-scoring hits, delete the hit.
663
664 Incompatible with: best_hit_overhang, best_hit_score_edge.
665 """,
666 equate=False),
667 _Option(["-best_hit_overhang", "best_hit_overhang"],
668 """Best Hit algorithm overhang value (recommended value: 0.1)
669
670 Float between 0.0 and 0.5 inclusive.
671
672 Incompatible with: culling_limit.""",
673 equate=False),
674 _Option(["-best_hit_score_edge", "best_hit_score_edge"],
675 """Best Hit algorithm score edge value (recommended value: 0.1)
676
677 Float between 0.0 and 0.5 inclusive.
678
679 Incompatible with: culling_limit.""",
680 equate=False),
681 ]
682 try:
683
684
685 self.parameters = extra_parameters + self.parameters
686 except AttributeError:
687
688 self.parameters = extra_parameters
689 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
690
691
693 incompatibles = {"subject_loc":["db", "gilist", "negative_gilist", "seqidlist", "remote"],
694 "culling_limit":["best_hit_overhang","best_hit_score_edge"],
695 "subject":["db", "gilist", "negative_gilist", "seqidlist"]}
696 self._validate_incompatibilities(incompatibles)
697 _NcbiblastCommandline._validate(self)
698
699
700 -class _NcbiblastMain2SeqCommandline(_Ncbiblast2SeqCommandline):
701 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
702
703 This is provided for subclassing, it deals with shared options
704 common to the main BLAST tools blastp, blastn, blastx, tblastx, tblastn
705 but not psiblast, rpsblast or rpstblastn.
706 """
707 - def __init__(self, cmd=None, **kwargs):
708 assert cmd is not None
709 extra_parameters = [
710
711 _Option(["-db_soft_mask", "db_soft_mask"],
712 """Filtering algorithm for soft masking (integer).
713
714 Filtering algorithm ID to apply to the BLAST database as soft masking.
715
716 Incompatible with: db_hard_mask, subject, subject_loc""",
717 equate=False),
718 _Option(["-db_hard_mask", "db_hard_mask"],
719 """Filtering algorithm for hard masking (integer).
720
721 Filtering algorithm ID to apply to the BLAST database as hard masking.
722
723 Incompatible with: db_soft_mask, subject, subject_loc""",
724 equate=False),
725 ]
726 try:
727
728
729 self.parameters = extra_parameters + self.parameters
730 except AttributeError:
731
732 self.parameters = extra_parameters
733 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
734
735 - def _validate(self):
736 incompatibles = {"db_soft_mask":["db_hard_mask", "subject", "subject_loc"],
737 "db_hard_mask":["db_soft_mask", "subject", "subject_loc"]}
738 self._validate_incompatibilities(incompatibles)
739 _Ncbiblast2SeqCommandline._validate(self)
740
742 """Create a commandline for the NCBI BLAST+ program blastp (for proteins).
743
744 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
745 replaced the old blastall tool with separate tools for each of the searches.
746 This wrapper therefore replaces BlastallCommandline with option -p blastp.
747
748 >>> from Bio.Blast.Applications import NcbiblastpCommandline
749 >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr",
750 ... evalue=0.001, remote=True, ungapped=True)
751 >>> cline
752 NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True)
753 >>> print cline
754 blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped
755
756 You would typically run the command line with cline() or via the Python
757 subprocess module, as described in the Biopython tutorial.
758 """
759 - def __init__(self, cmd="blastp", **kwargs):
760 self.parameters = [
761
762 _Option(["-task", "task"],
763 "Task to execute (string, blastp (default) or blastp-short).",
764 checker_function=lambda value : value in ["blastp",
765 "blastp-short"],
766 equate=False),
767 _Option(["-matrix", "matrix"],
768 "Scoring matrix name (default BLOSUM62)."),
769 _Option(["-threshold", "threshold"],
770 "Minimum word score such that the word is added to the "
771 "BLAST lookup table (float)",
772 equate=False),
773 _Option(["-comp_based_stats", "comp_based_stats"],
774 """Use composition-based statistics (string, default 2, i.e. True).
775
776 0, F or f: no composition-based statistics
777 2, T or t, D or d : Composition-based score adjustment as in
778 Bioinformatics 21:902-911, 2005, conditioned on sequence properties
779
780 Note that tblastn also supports values of 1 and 3.""",
781 checker_function=lambda value : value in "0Ft2TtDd",
782 equate=False),
783
784 _Option(["-seg", "seg"],
785 """Filter query sequence with SEG (string).
786
787 Format: "yes", "window locut hicut", or "no" to disable.
788 Default is "12 2.2 2.5""",
789 equate=False),
790
791 _Switch(["-ungapped", "ungapped"],
792 "Perform ungapped alignment only?"),
793
794 _Switch(["-use_sw_tback", "use_sw_tback"],
795 "Compute locally optimal Smith-Waterman alignments?"),
796 ]
797 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
798
799
801 """Wrapper for the NCBI BLAST+ program blastn (for nucleotides).
802
803 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
804 replaced the old blastall tool with separate tools for each of the searches.
805 This wrapper therefore replaces BlastallCommandline with option -p blastn.
806
807 For example, to run a search against the "nt" nucleotide database using the
808 FASTA nucleotide file "m_code.fasta" as the query, with an expectation value
809 cut off of 0.001, saving the output to a file in XML format:
810
811 >>> from Bio.Blast.Applications import NcbiblastnCommandline
812 >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus",
813 ... evalue=0.001, out="m_cold.xml", outfmt=5)
814 >>> cline
815 NcbiblastnCommandline(cmd='blastn', out='m_cold.xml', outfmt=5, query='m_cold.fasta', db='nt', evalue=0.001, strand='plus')
816 >>> print cline
817 blastn -out m_cold.xml -outfmt 5 -query m_cold.fasta -db nt -evalue 0.001 -strand plus
818
819 You would typically run the command line with cline() or via the Python
820 subprocess module, as described in the Biopython tutorial.
821 """
822 - def __init__(self, cmd="blastn", **kwargs):
823 self.parameters = [
824
825 _Option(["-strand", "strand"],
826 """Query strand(s) to search against database/subject.
827
828 Values allowed are "both" (default), "minus", "plus".""",
829 checker_function=lambda value : value in ["both",
830 "minus",
831 "plus"],
832 equate=False),
833
834 _Option(["-task", "task"],
835 """Task to execute (string, default 'megablast')
836
837 Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast'
838 (the default), or 'vecscreen'.""",
839 checker_function=lambda value : value in ['blastn',
840 'blastn-short',
841 'dc-megablast',
842 'megablast',
843 'vecscreen'],
844 equate=False),
845 _Option(["-penalty", "penalty"],
846 "Penalty for a nucleotide mismatch (integer, at most zero).",
847 equate=False),
848 _Option(["-reward", "reward"],
849 "Reward for a nucleotide match (integer, at least zero).",
850 equate=False),
851
852
853
854
855 _Option(["-index_name", "index_name"],
856 "MegaBLAST database index name.",
857 equate=False),
858
859 _Option(["-dust", "dust"],
860 """Filter query sequence with DUST (string).
861
862 Format: 'yes', 'level window linker', or 'no' to disable.
863 Default = '20 64 1'.
864 """,
865 equate=False),
866 _Option(["-filtering_db", "filtering_db"],
867 "BLAST database containing filtering elements (i.e. repeats).",
868 equate=False),
869 _Option(["-window_masker_taxid", "window_masker_taxid"],
870 "Enable WindowMasker filtering using a Taxonomic ID (integer).",
871 equate=False),
872 _Option(["-window_masker_db", "window_masker_db"],
873 "Enable WindowMasker filtering using this repeats database (string).",
874 equate=False),
875
876 _Option(["-perc_identity", "perc_identity"],
877 "Percent identity (real, 0 to 100 inclusive).",
878 equate=False),
879
880 _Option(["-template_type", "template_type"],
881 """Discontiguous MegaBLAST template type (string).
882
883 Allowed values: 'coding', 'coding_and_optimal' or 'optimal'
884 Requires: template_length.""",
885 checker_function=lambda value : value in ['coding', 'coding_and_optimal','optimal'],
886 equate=False),
887 _Option(["-template_length", "template_length"],
888 """Discontiguous MegaBLAST template length (integer).
889
890 Allowed values: 16, 18, 21
891
892 Requires: template_type.""",
893 checker_function=lambda value : value in [16,18,21,'16','18','21'],
894 equate=False),
895
896 _Switch(["-no_greedy", "no_greedy"],
897 "Use non-greedy dynamic programming extension"),
898 _Option(["-min_raw_gapped_score", "min_raw_gapped_score"],
899 "Minimum raw gapped score to keep an alignment in the "
900 "preliminary gapped and traceback stages (integer).",
901 equate=False),
902 _Switch(["-ungapped", "ungapped"],
903 "Perform ungapped alignment only?"),
904 _Option(["-off_diagonal_range", "off_diagonal_range"],
905 """Number of off-diagonals to search for the 2nd hit (integer).
906
907 Expects a positive integer, or 0 (default) to turn off.
908
909 Added in BLAST 2.2.23+
910 """,
911 equate=False),
912 ]
913 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
914
916 if (self.template_type and not self.template_length) \
917 or (self.template_length and not self.template_type) :
918 raise ValueError("Options template_type and template_type require each other.")
919 _NcbiblastMain2SeqCommandline._validate(self)
920
921
923 """Wrapper for the NCBI BLAST+ program blastx (nucleotide query, protein database).
924
925 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
926 replaced the old blastall tool with separate tools for each of the searches.
927 This wrapper therefore replaces BlastallCommandline with option -p blastx.
928
929 >>> from Bio.Blast.Applications import NcbiblastxCommandline
930 >>> cline = NcbiblastxCommandline(query="m_cold.fasta", db="nr", evalue=0.001)
931 >>> cline
932 NcbiblastxCommandline(cmd='blastx', query='m_cold.fasta', db='nr', evalue=0.001)
933 >>> print cline
934 blastx -query m_cold.fasta -db nr -evalue 0.001
935
936 You would typically run the command line with cline() or via the Python
937 subprocess module, as described in the Biopython tutorial.
938 """
939 - def __init__(self, cmd="blastx", **kwargs):
940 self.parameters = [
941
942 _Option(["-strand", "strand"],
943 """Query strand(s) to search against database/subject.
944
945 Values allowed are "both" (default), "minus", "plus".""",
946 checker_function=lambda value : value in ["both", "minus", "plus"],
947 equate=False),
948
949 _Option(["-query_gencode", "query_gencode"],
950 """Genetic code to use to translate query
951
952 Integer. Default is one.""",
953 equate=False),
954
955 _Option(["-frame_shift_penalty", "frame_shift_penalty"],
956 "Frame shift penalty (integer, at least 1, default ignored).",
957 equate=False),
958 _Option(["-max_intron_length", "max_intron_length"],
959 """Maximum intron length (integer).
960
961 Length of the largest intron allowed in a translated nucleotide
962 sequence when linking multiple distinct alignments (a negative
963 value disables linking). Default zero.""",
964 equate=False),
965 _Option(["-matrix", "matrix"],
966 "Scoring matrix name (default BLOSUM62).",
967 equate=False),
968 _Option(["-threshold", "threshold"],
969 "Minimum word score such that the word is added to the "
970 "BLAST lookup table (float)",
971 equate=False),
972
973 _Option(["-seg", "seg"],
974 """Filter query sequence with SEG (string).
975
976 Format: "yes", "window locut hicut", or "no" to disable.
977 Default is "12 2.2 2.5""",
978 equate=False),
979
980 _Switch(["-ungapped", "ungapped"],
981 "Perform ungapped alignment only?"),
982 ]
983 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
984
985
987 """Wrapper for the NCBI BLAST+ program tblastn.
988
989 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
990 replaced the old blastall tool with separate tools for each of the searches.
991 This wrapper therefore replaces BlastallCommandline with option -p tblastn.
992
993 >>> from Bio.Blast.Applications import NcbitblastnCommandline
994 >>> cline = NcbitblastnCommandline(help=True)
995 >>> cline
996 NcbitblastnCommandline(cmd='tblastn', help=True)
997 >>> print cline
998 tblastn -help
999
1000 You would typically run the command line with cline() or via the Python
1001 subprocess module, as described in the Biopython tutorial.
1002 """
1003 - def __init__(self, cmd="tblastn", **kwargs):
1004 self.parameters = [
1005
1006 _Option(["-db_gencode", "db_gencode"],
1007 """Genetic code to use to translate query
1008
1009 Integer. Default is one.""",
1010 equate=False),
1011 _Option(["-frame_shift_penalty", "frame_shift_penalty"],
1012 "Frame shift penalty (integer, at least 1, default ignored).",
1013 equate=False),
1014 _Option(["-max_intron_length", "max_intron_length"],
1015 """Maximum intron length (integer).
1016
1017 Length of the largest intron allowed in a translated nucleotide
1018 sequence when linking multiple distinct alignments (a negative
1019 value disables linking). Default zero.""",
1020 equate=False),
1021 _Option(["-matrix", "matrix"],
1022 "Scoring matrix name (default BLOSUM62).",
1023 equate=False),
1024 _Option(["-threshold", "threshold"],
1025 "Minimum word score such that the word is added to the BLAST lookup table (float)",
1026 equate=False),
1027 _Option(["-comp_based_stats", "comp_based_stats"],
1028 """Use composition-based statistics (string, default 2, i.e. True).
1029
1030 0, F or f: no composition-based statistics
1031 1: Composition-based statistics as in NAR 29:2994-3005, 2001
1032 2, T or t, D or d : Composition-based score adjustment as in
1033 Bioinformatics 21:902-911, 2005, conditioned on sequence properties
1034 3: Composition-based score adjustment as in Bioinformatics 21:902-911,
1035 2005, unconditionally
1036
1037 Note that only tblastn supports values of 1 and 3.""",
1038 checker_function=lambda value : value in "0Ft12TtDd3",
1039 equate=False),
1040
1041 _Option(["-seg", "seg"],
1042 """Filter query sequence with SEG (string).
1043
1044 Format: "yes", "window locut hicut", or "no" to disable.
1045 Default is "12 2.2 2.5""",
1046 equate=False),
1047
1048 _Switch(["-ungapped", "ungapped"],
1049 "Perform ungapped alignment only?"),
1050
1051 _Switch(["-use_sw_tback", "use_sw_tback"],
1052 "Compute locally optimal Smith-Waterman alignments?"),
1053
1054 _Option(["-in_pssm", "in_pssm"],
1055 """PSI-BLAST checkpoint file
1056
1057 Incompatible with: remote, query""",
1058 filename=True,
1059 equate=False),
1060 ]
1061 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
1062
1063
1065 """Wrapper for the NCBI BLAST+ program tblastx.
1066
1067 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
1068 replaced the old blastall tool with separate tools for each of the searches.
1069 This wrapper therefore replaces BlastallCommandline with option -p tblastx.
1070
1071 >>> from Bio.Blast.Applications import NcbitblastxCommandline
1072 >>> cline = NcbitblastxCommandline(help=True)
1073 >>> cline
1074 NcbitblastxCommandline(cmd='tblastx', help=True)
1075 >>> print cline
1076 tblastx -help
1077
1078 You would typically run the command line with cline() or via the Python
1079 subprocess module, as described in the Biopython tutorial.
1080 """
1081 - def __init__(self, cmd="tblastx", **kwargs):
1082 self.parameters = [
1083
1084 _Option(["-strand", "strand"],
1085 """Query strand(s) to search against database/subject.
1086
1087 Values allowed are "both" (default), "minus", "plus".""",
1088 checker_function=lambda value : value in ["both", "minus", "plus"],
1089 equate=False),
1090
1091 _Option(["-query_gencode", "query_gencode"],
1092 """Genetic code to use to translate query
1093
1094 Integer. Default is one.""",
1095 equate=False),
1096
1097 _Option(["-db_gencode", "db_gencode"],
1098 """Genetic code to use to translate query
1099
1100 Integer. Default is one.""",
1101 equate=False),
1102 _Option(["-max_intron_length", "max_intron_length"],
1103 """Maximum intron length (integer).
1104
1105 Length of the largest intron allowed in a translated nucleotide
1106 sequence when linking multiple distinct alignments (a negative
1107 value disables linking). Default zero.""",
1108 equate=False),
1109 _Option(["-matrix", "matrix"],
1110 "Scoring matrix name (default BLOSUM62).",
1111 equate=False),
1112 _Option(["-threshold", "threshold"],
1113 "Minimum word score such that the word is added to the "
1114 "BLAST lookup table (float)",
1115 equate=False),
1116
1117 _Option(["-seg", "seg"],
1118 """Filter query sequence with SEG (string).
1119
1120 Format: "yes", "window locut hicut", or "no" to disable.
1121 Default is "12 2.2 2.5""",
1122 equate=False),
1123 ]
1124 _NcbiblastMain2SeqCommandline.__init__(self, cmd, **kwargs)
1125
1126
1128 """Wrapper for the NCBI BLAST+ program psiblast.
1129
1130 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
1131 replaced the old blastpgp tool with a similar tool psiblast. This wrapper
1132 therefore replaces BlastpgpCommandline, the wrapper for blastpgp.
1133
1134 >>> from Bio.Blast.Applications import NcbipsiblastCommandline
1135 >>> cline = NcbipsiblastCommandline(help=True)
1136 >>> cline
1137 NcbipsiblastCommandline(cmd='psiblast', help=True)
1138 >>> print cline
1139 psiblast -help
1140
1141 You would typically run the command line with cline() or via the Python
1142 subprocess module, as described in the Biopython tutorial.
1143 """
1144 - def __init__(self, cmd="psiblast", **kwargs):
1145 self.parameters = [
1146
1147 _Option(["-matrix", "matrix"],
1148 "Scoring matrix name (default BLOSUM62).",
1149 equate=False),
1150 _Option(["-threshold", "threshold"],
1151 "Minimum word score such that the word is added to the "
1152 "BLAST lookup table (float)",
1153 equate=False),
1154 _Option(["-comp_based_stats", "comp_based_stats"],
1155 """Use composition-based statistics (string, default 2, i.e. True).
1156
1157 0, F or f: no composition-based statistics
1158 2, T or t, D or d : Composition-based score adjustment
1159 as in Bioinformatics 21:902-911, 2005, conditioned on
1160 sequence properties
1161
1162 Note that tblastn also supports values of 1 and 3.""",
1163 checker_function=lambda value : value in "0Ft2TtDd",
1164 equate=False),
1165
1166 _Option(["-seg", "seg"],
1167 """Filter query sequence with SEG (string).
1168
1169 Format: "yes", "window locut hicut", or "no" to disable.
1170 Default is "12 2.2 2.5""",
1171 equate=False),
1172
1173 _Option(["-gap_trigger", "gap_trigger"],
1174 "Number of bits to trigger gapping (float, default 22)",
1175 equate=False),
1176
1177 _Switch(["-use_sw_tback", "use_sw_tback"],
1178 "Compute locally optimal Smith-Waterman alignments?"),
1179
1180 _Option(["-num_iterations", "num_iterations"],
1181 """Number of iterations to perform, integer
1182
1183 Integer of at least one. Default is one.
1184 Incompatible with: remote""",
1185 equate=False),
1186 _Option(["-out_pssm", "out_pssm"],
1187 "File name to store checkpoint file",
1188 filename=True,
1189 equate=False),
1190 _Option(["-out_ascii_pssm", "out_ascii_pssm"],
1191 "File name to store ASCII version of PSSM",
1192 filename=True,
1193 equate=False),
1194 _Option(["-in_msa", "in_msa"],
1195 """File name of multiple sequence alignment to restart
1196 PSI-BLAST
1197
1198 Incompatible with: in_pssm, query""",
1199 filename=True,
1200 equate=False),
1201 _Option(["-msa_master_idx", "msa_master_idx"],
1202 """Index of sequence to use as master in MSA.
1203
1204 Index (1-based) of sequence to use as the master in the
1205 multiple sequence alignment. If not specified, the first
1206 sequence is used.""",
1207 equate=False),
1208 _Option(["-in_pssm", "in_pssm"],
1209 """PSI-BLAST checkpoint file
1210
1211 Incompatible with: in_msa, query, phi_pattern""",
1212 filename=True,
1213 equate=False),
1214
1215 _Option(["-pseudocount", "pseudocount"],
1216 """Pseudo-count value used when constructing PSSM
1217
1218 Integer. Default is zero.""",
1219 equate=False),
1220 _Option(["-inclusion_ethresh", "inclusion_ethresh"],
1221 """E-value inclusion threshold for pairwise alignments
1222
1223 Float. Default is 0.002.""",
1224 equate=False),
1225
1226 _Option(["-phi_pattern", "phi_pattern"],
1227 """File name containing pattern to search
1228
1229 Incompatible with: in_pssm""",
1230 filename=True,
1231 equate=False),
1232 ]
1233 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
1234
1241
1242
1244 """Wrapper for the NCBI BLAST+ program rpsblast.
1245
1246 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
1247 replaced the old rpsblast tool with a similar tool of the same name. This
1248 wrapper replaces RpsBlastCommandline, the wrapper for the old rpsblast.
1249
1250 >>> from Bio.Blast.Applications import NcbirpsblastCommandline
1251 >>> cline = NcbirpsblastCommandline(help=True)
1252 >>> cline
1253 NcbirpsblastCommandline(cmd='rpsblast', help=True)
1254 >>> print cline
1255 rpsblast -help
1256
1257 You would typically run the command line with cline() or via the Python
1258 subprocess module, as described in the Biopython tutorial.
1259 """
1260 - def __init__(self, cmd="rpsblast", **kwargs):
1261 self.parameters = [
1262
1263 _Option(["-seg", "seg"],
1264 """Filter query sequence with SEG (string).
1265
1266 Format: "yes", "window locut hicut", or "no" to disable.
1267 Default is "12 2.2 2.5""",
1268 equate=False),
1269
1270 _Option(["-culling_limit", "culling_limit"],
1271 """Hit culling limit (integer).
1272
1273 If the query range of a hit is enveloped by that of at
1274 least this many higher-scoring hits, delete the hit.
1275
1276 Incompatible with: best_hit_overhang, best_hit_score_edge.
1277 """,
1278 equate=False),
1279 _Option(["-best_hit_overhang", "best_hit_overhang"],
1280 """Best Hit algorithm overhang value (recommended value: 0.1)
1281
1282 Float between 0.0 and 0.5 inclusive.
1283
1284 Incompatible with: culling_limit.""",
1285 equate=False),
1286 _Option(["-best_hit_score_edge", "best_hit_score_edge"],
1287 """Best Hit algorithm score edge value (recommended value: 0.1)
1288
1289 Float between 0.0 and 0.5 inclusive.
1290
1291 Incompatible with: culling_limit.""",
1292 equate=False),
1293 ]
1294 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
1295
1300
1301
1303 """Wrapper for the NCBI BLAST+ program rpstblastn.
1304
1305 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
1306 replaced the old rpsblast tool with a similar tool of the same name, and a
1307 separate tool rpstblastn for Translated Reverse Position Specific BLAST.
1308
1309 >>> from Bio.Blast.Applications import NcbirpstblastnCommandline
1310 >>> cline = NcbirpstblastnCommandline(help=True)
1311 >>> cline
1312 NcbirpstblastnCommandline(cmd='rpstblastn', help=True)
1313 >>> print cline
1314 rpstblastn -help
1315
1316 You would typically run the command line with cline() or via the Python
1317 subprocess module, as described in the Biopython tutorial.
1318 """
1319 - def __init__(self, cmd="rpstblastn", **kwargs):
1320 self.parameters = [
1321
1322 _Option(["-strand", "strand"],
1323 """Query strand(s) to search against database/subject.
1324
1325 Values allowed are "both" (default), "minus", "plus".""",
1326 checker_function=lambda value : value in ["both",
1327 "minus",
1328 "plus"],
1329 equate=False),
1330
1331 _Option(["-query_gencode", "query_gencode"],
1332 """Genetic code to use to translate query
1333
1334 Integer. Default is one.""",
1335 equate=False),
1336
1337 _Option(["-seg", "seg"],
1338 """Filter query sequence with SEG (string).
1339
1340 Format: "yes", "window locut hicut", or "no" to disable.
1341 Default is "12 2.2 2.5""",
1342 equate=False),
1343
1344 _Switch(["-ungapped", "ungapped"],
1345 "Perform ungapped alignment only?"),
1346 ]
1347 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
1348
1349
1399
1400
1402 """Run the Bio.Blast.Applications module's doctests."""
1403 import doctest
1404 doctest.testmod(verbose=1)
1405
1406 if __name__ == "__main__":
1407
1408 _test()
1409