1
2
3
4
5
6
7 """General mechanisms to access applications in Biopython.
8
9 This module is not intended for direct use. It provides the basic objects which
10 are subclassed by our command line wrappers, such as:
11
12 - Bio.Align.Applications
13 - Bio.Blast.Applications
14 - Bio.Emboss.Applications
15 - Bio.Sequencing.Applications
16
17 These modules provide wrapper classes for command line tools to help you
18 construct command line strings by setting the values of each parameter.
19 The finished command line strings are then normally invoked via the built-in
20 Python module subprocess.
21 """
22 import os, sys
23 import StringIO
24 import subprocess
25 import re
26
27
28 try:
29 from subprocess import CalledProcessError as _ProcessCalledError
30 except:
31
32 _ProcessCalledError = Exception
33
34 from Bio import File
35
36
37
38 _re_prop_name = re.compile(r"[a-zA-Z][a-zA-Z0-9_]*")
39 assert _re_prop_name.match("t")
40 assert _re_prop_name.match("test")
41 assert _re_prop_name.match("_test") is None
42 assert _re_prop_name.match("-test") is None
43 assert _re_prop_name.match("test_name")
44 assert _re_prop_name.match("test2")
45
46 _reserved_names = ["and", "del", "from", "not", "while", "as", "elif",
47 "global", "or", "with", "assert", "else", "if", "pass",
48 "yield", "break", "except", "import", "print", "class",
49 "exec", "in", "raise", "continue", "finally", "is",
50 "return", "def", "for", "lambda", "try"]
51
52 _local_reserved_names = ["set_parameter"]
53
54
56 """Raised when an application returns a non-zero exit status.
57
58 The exit status will be stored in the returncode attribute, similarly
59 the command line string used in the cmd attribute, and (if captured)
60 stdout and stderr as strings.
61
62 This exception is a subclass of subprocess.CalledProcessError
63 (unless run on Python 2.4 where that does not exist).
64
65 >>> err = ApplicationError(-11, "helloworld", "", "Some error text")
66 >>> err.returncode, err.cmd, err.stdout, err.stderr
67 (-11, 'helloworld', '', 'Some error text')
68 >>> print err
69 Command 'helloworld' returned non-zero exit status -11, 'Some error text'
70
71 """
72 - def __init__(self, returncode, cmd, stdout="", stderr=""):
73 self.returncode = returncode
74 self.cmd = cmd
75 self.stdout = stdout
76 self.stderr = stderr
77
79
80 try:
81 msg = self.stderr.lstrip().split("\n",1)[0].rstrip()
82 except:
83 msg = ""
84 if msg:
85 return "Command '%s' returned non-zero exit status %d, %r" \
86 % (self.cmd, self.returncode, msg)
87 else:
88 return "Command '%s' returned non-zero exit status %d" \
89 % (self.cmd, self.returncode)
90
92 return "ApplicationError(%i, %s, %s, %s)" \
93 % (self.returncode, self.cmd, self.stdout, self.stderr)
94
95
97 """Generic interface for constructing command line strings.
98
99 This class shouldn't be called directly; it should be subclassed to
100 provide an implementation for a specific application.
101
102 For a usage example we'll show one of the EMBOSS wrappers. You can set
103 options when creating the wrapper object using keyword arguments - or
104 later using their corresponding properties:
105
106 >>> from Bio.Emboss.Applications import WaterCommandline
107 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
108 >>> cline
109 WaterCommandline(cmd='water', gapopen=10, gapextend=0.5)
110
111 You can instead manipulate the parameters via their properties, e.g.
112
113 >>> cline.gapopen
114 10
115 >>> cline.gapopen = 20
116 >>> cline
117 WaterCommandline(cmd='water', gapopen=20, gapextend=0.5)
118
119 You can clear a parameter you have already added by 'deleting' the
120 corresponding property:
121
122 >>> del cline.gapopen
123 >>> cline.gapopen
124 >>> cline
125 WaterCommandline(cmd='water', gapextend=0.5)
126
127 Once you have set the parameters you need, turn the object into a string:
128
129 >>> str(cline)
130 Traceback (most recent call last):
131 ...
132 ValueError: You must either set outfile (output filename), or enable filter or stdout (output to stdout).
133
134 In this case the wrapper knows certain arguments are required to construct
135 a valid command line for the tool. For a complete example,
136
137 >>> from Bio.Emboss.Applications import WaterCommandline
138 >>> water_cmd = WaterCommandline(gapopen=10, gapextend=0.5)
139 >>> water_cmd.asequence = "asis:ACCCGGGCGCGGT"
140 >>> water_cmd.bsequence = "asis:ACCCGAGCGCGGT"
141 >>> water_cmd.outfile = "temp_water.txt"
142 >>> print water_cmd
143 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
144 >>> water_cmd
145 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
146
147 You would typically run the command line via a standard Python operating
148 system call using the subprocess module for full control. For the simple
149 case where you just want to run the command and get the output:
150
151 stdout, stderr = water_cmd()
152 """
153
154
156 """Create a new instance of a command line wrapper object."""
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173 self.program_name = cmd
174 try:
175 parameters = self.parameters
176 except AttributeError:
177 raise AttributeError("Subclass should have defined self.parameters")
178
179 aliases = set()
180 for p in parameters:
181 for name in p.names:
182 if name in aliases:
183 raise ValueError("Parameter alias %s multiply defined" \
184 % name)
185 aliases.add(name)
186 name = p.names[-1]
187 if _re_prop_name.match(name) is None:
188 raise ValueError("Final parameter name %s cannot be used as "
189 "an argument or property name in python"
190 % repr(name))
191 if name in _reserved_names:
192 raise ValueError("Final parameter name %s cannot be used as "
193 "an argument or property name because it is "
194 "a reserved word in python" % repr(name))
195 if name in _local_reserved_names:
196 raise ValueError("Final parameter name %s cannot be used as "
197 "an argument or property name due to the "
198 "way the AbstractCommandline class works"
199 % repr(name))
200
201 def getter(name):
202 return lambda x : x._get_parameter(name)
203 def setter(name):
204 return lambda x, value : x.set_parameter(name, value)
205 def deleter(name):
206 return lambda x : x._clear_parameter(name)
207 doc = p.description
208 if isinstance(p, _Switch):
209 doc += "\n\nThis property controls the addition of the %s " \
210 "switch, treat this property as a boolean." % p.names[0]
211 else:
212 doc += "\n\nThis controls the addition of the %s parameter " \
213 "and its associated value. Set this property to the " \
214 "argument value required." % p.names[0]
215 prop = property(getter(name), setter(name), deleter(name), doc)
216 setattr(self.__class__, name, prop)
217 for key, value in kwargs.iteritems():
218 self.set_parameter(key, value)
219
221 """Make sure the required parameters have been set (PRIVATE).
222
223 No return value - it either works or raises a ValueError.
224
225 This is a separate method (called from __str__) so that subclasses may
226 override it.
227 """
228 for p in self.parameters:
229
230 if p.is_required and not(p.is_set):
231 raise ValueError("Parameter %s is not set." \
232 % p.names[-1])
233
234
236 """Make the commandline string with the currently set options.
237
238 e.g.
239 >>> from Bio.Emboss.Applications import WaterCommandline
240 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
241 >>> cline.asequence = "asis:ACCCGGGCGCGGT"
242 >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
243 >>> cline.outfile = "temp_water.txt"
244 >>> print cline
245 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
246 >>> str(cline)
247 'water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5'
248 """
249 self._validate()
250 commandline = "%s " % self.program_name
251 for parameter in self.parameters:
252 if parameter.is_set:
253
254 commandline += str(parameter)
255 return commandline.strip()
256
258 """Return a representation of the command line object for debugging.
259
260 e.g.
261 >>> from Bio.Emboss.Applications import WaterCommandline
262 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
263 >>> cline.asequence = "asis:ACCCGGGCGCGGT"
264 >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
265 >>> cline.outfile = "temp_water.txt"
266 >>> print cline
267 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
268 >>> cline
269 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
270 """
271 answer = "%s(cmd=%s" % (self.__class__.__name__, repr(self.program_name))
272 for parameter in self.parameters:
273 if parameter.is_set:
274 if isinstance(parameter, _Switch):
275 answer += ", %s=True" % parameter.names[-1]
276 else:
277 answer += ", %s=%s" \
278 % (parameter.names[-1], repr(parameter.value))
279 answer += ")"
280 return answer
281
283 """Get a commandline option value."""
284 for parameter in self.parameters:
285 if name in parameter.names:
286 if isinstance(parameter, _Switch):
287 return parameter.is_set
288 else:
289 return parameter.value
290 raise ValueError("Option name %s was not found." % name)
291
293 """Reset or clear a commandline option value."""
294 cleared_option = False
295 for parameter in self.parameters:
296 if name in parameter.names:
297 parameter.value = None
298 parameter.is_set = False
299 cleared_option = True
300 if not cleared_option:
301 raise ValueError("Option name %s was not found." % name)
302
304 """Set a commandline option for a program.
305 """
306 set_option = False
307 for parameter in self.parameters:
308 if name in parameter.names:
309 if isinstance(parameter, _Switch):
310 if value is None:
311 import warnings
312 warnings.warn("For a switch type argument like %s, "
313 "we expect a boolean. None is treated "
314 "as FALSE!" % parameter.names[-1])
315 parameter.is_set = bool(value)
316 set_option = True
317 else:
318 if value is not None:
319 self._check_value(value, name, parameter.checker_function)
320 parameter.value = value
321 parameter.is_set = True
322 set_option = True
323 if not set_option:
324 raise ValueError("Option name %s was not found." % name)
325
327 """Check whether the given value is valid.
328
329 No return value - it either works or raises a ValueError.
330
331 This uses the passed function 'check_function', which can either
332 return a [0, 1] (bad, good) value or raise an error. Either way
333 this function will raise an error if the value is not valid, or
334 finish silently otherwise.
335 """
336 if check_function is not None:
337 is_good = check_function(value)
338 assert is_good in [0,1,True,False]
339 if not is_good:
340 raise ValueError("Invalid parameter value %r for parameter %s" \
341 % (value, name))
342
344 """Set attribute name to value (PRIVATE).
345
346 This code implements a workaround for a user interface issue.
347 Without this __setattr__ attribute-based assignment of parameters
348 will silently accept invalid parameters, leading to known instances
349 of the user assuming that parameters for the application are set,
350 when they are not.
351
352 >>> from Bio.Emboss.Applications import WaterCommandline
353 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5, stdout=True)
354 >>> cline.asequence = "a.fasta"
355 >>> cline.bsequence = "b.fasta"
356 >>> cline.csequence = "c.fasta"
357 Traceback (most recent call last):
358 ...
359 ValueError: Option name csequence was not found.
360 >>> print cline
361 water -stdout -asequence=a.fasta -bsequence=b.fasta -gapopen=10 -gapextend=0.5
362
363 This workaround uses a whitelist of object attributes, and sets the
364 object attribute list as normal, for these. Other attributes are
365 assumed to be parameters, and passed to the self.set_parameter method
366 for validation and assignment.
367 """
368 if name in ['parameters', 'program_name']:
369 self.__dict__[name] = value
370 else:
371 self.set_parameter(name, value)
372
373 - def __call__(self, stdin=None, stdout=True, stderr=True,
374 cwd=None, env=None):
375 """Executes the command, waits for it to finish, and returns output.
376
377 Runs the command line tool and waits for it to finish. If it returns
378 a non-zero error level, an exception is raised. Otherwise two strings
379 are returned containing stdout and stderr.
380
381 The optional stdin argument should be a string of data which will be
382 passed to the tool as standard input.
383
384 The optional stdout and stderr argument are treated as a booleans, and
385 control if the output should be captured (True, default), or ignored
386 by sending it to /dev/null to avoid wasting memory (False). In the
387 later case empty string(s) are returned.
388
389 The optional cwd argument is a string giving the working directory to
390 to run the command from. See Python's subprocess module documentation
391 for more details.
392
393 The optional env argument is a dictionary setting the environment
394 variables to be used in the new process. By default the current
395 process' environment variables are used. See Python's subprocess
396 module documentation for more details.
397
398 Default example usage:
399
400 from Bio.Emboss.Applications import WaterCommandline
401 water_cmd = WaterCommandline(gapopen=10, gapextend=0.5,
402 stdout=True, auto=True,
403 asequence="a.fasta", bsequence="b.fasta")
404 print "About to run:\n%s" % water_cmd
405 std_output, err_output = water_cmd()
406
407 This functionality is similar to subprocess.check_output() added in
408 Python 2.7. In general if you require more control over running the
409 command, use subprocess directly.
410
411 As of Biopython 1.56, when the program called returns a non-zero error
412 level, a custom ApplicationError exception is raised. This includes
413 any stdout and stderr strings captured as attributes of the exception
414 object, since they may be useful for diagnosing what went wrong.
415 """
416 if stdout:
417 stdout_arg = subprocess.PIPE
418 else:
419 stdout_arg = open(os.devnull)
420 if stderr:
421 stderr_arg = subprocess.PIPE
422 else:
423 stderr_arg = open(os.devnull)
424
425
426
427
428
429
430
431 child_process = subprocess.Popen(str(self), stdin=subprocess.PIPE,
432 stdout=stdout_arg, stderr=stderr_arg,
433 universal_newlines=True,
434 cwd=cwd, env=env,
435 shell=(sys.platform!="win32"))
436
437 stdout_str, stderr_str = child_process.communicate(stdin)
438 if not stdout: assert not stdout_str
439 if not stderr: assert not stderr_str
440 return_code = child_process.returncode
441 if return_code:
442 raise ApplicationError(return_code, str(self),
443 stdout_str, stderr_str)
444 return stdout_str, stderr_str
445
446
448 """A class to hold information about a parameter for a commandline.
449
450 Do not use this directly, instead use one of the subclasses.
451 """
453 raise NotImplementedError
454
456 raise NotImplementedError
457
459 """Represent an option that can be set for a program.
460
461 This holds UNIXish options like --append=yes and -a yes,
462 where a value (here "yes") is generally expected.
463
464 For UNIXish options like -kimura in clustalw which don't
465 take a value, use the _Switch object instead.
466
467 Attributes:
468
469 o names -- a list of string names by which the parameter can be
470 referenced (ie. ["-a", "--append", "append"]). The first name in
471 the list is considered to be the one that goes on the commandline,
472 for those parameters that print the option. The last name in the list
473 is assumed to be a "human readable" name describing the option in one
474 word.
475
476 o description -- a description of the option.
477
478 o filename -- True if this argument is a filename and should be
479 automatically quoted if it contains spaces.
480
481 o checker_function -- a reference to a function that will determine
482 if a given value is valid for this parameter. This function can either
483 raise an error when given a bad value, or return a [0, 1] decision on
484 whether the value is correct.
485
486 o equate -- should an equals sign be inserted if a value is used?
487
488 o is_required -- a flag to indicate if the parameter must be set for
489 the program to be run.
490
491 o is_set -- if the parameter has been set
492
493 o value -- the value of a parameter
494 """
495 - def __init__(self, names, description, filename=False, checker_function=None,
496 is_required=False, equate=True):
497 self.names = names
498 assert isinstance(description, basestring), \
499 "%r for %s" % (description, names[-1])
500 self.is_filename = filename
501 self.checker_function = checker_function
502 self.description = description
503 self.equate = equate
504 self.is_required = is_required
505
506 self.is_set = False
507 self.value = None
508
510 """Return the value of this option for the commandline.
511
512 Includes a trailing space.
513 """
514
515
516
517
518 if self.value is None:
519 return "%s " % self.names[0]
520 if self.is_filename:
521 v = _escape_filename(self.value)
522 else:
523 v = str(self.value)
524 if self.equate:
525 return "%s=%s " % (self.names[0], v)
526 else:
527 return "%s %s " % (self.names[0], v)
528
530 """Represent an optional argument switch for a program.
531
532 This holds UNIXish options like -kimura in clustalw which don't
533 take a value, they are either included in the command string
534 or omitted.
535
536 o names -- a list of string names by which the parameter can be
537 referenced (ie. ["-a", "--append", "append"]). The first name in
538 the list is considered to be the one that goes on the commandline,
539 for those parameters that print the option. The last name in the list
540 is assumed to be a "human readable" name describing the option in one
541 word.
542
543 o description -- a description of the option.
544
545 o is_set -- if the parameter has been set
546
547 NOTE - There is no value attribute, see is_set instead,
548 """
549 - def __init__(self, names, description):
554
556 """Return the value of this option for the commandline.
557
558 Includes a trailing space.
559 """
560 assert not hasattr(self, "value")
561 if self.is_set:
562 return "%s " % self.names[0]
563 else:
564 return ""
565
567 """Represent an argument on a commandline.
568 """
569 - def __init__(self, names, description, filename=False,
570 checker_function=None, is_required=False):
580
582 if self.value is None:
583 return " "
584 elif self.is_filename:
585 return "%s " % _escape_filename(self.value)
586 else:
587 return "%s " % self.value
588
590 """Escape filenames with spaces by adding quotes (PRIVATE).
591
592 Note this will not add quotes if they are already included:
593
594 >>> print _escape_filename('example with spaces')
595 "example with spaces"
596 >>> print _escape_filename('"example with spaces"')
597 "example with spaces"
598 """
599
600
601
602
603
604
605
606
607
608
609
610
611
612 if " " not in filename:
613 return filename
614
615 if filename.startswith('"') and filename.endswith('"'):
616
617 return filename
618 else:
619 return '"%s"' % filename
620
622 """Run the Bio.Application module's doctests."""
623 import doctest
624 doctest.testmod(verbose=1)
625
626 if __name__ == "__main__":
627
628 _test()
629