Package Bio :: Package Application
[hide private]
[frames] | no frames]

Source Code for Package Bio.Application

  1  # Copyright 2001-2004 Brad Chapman. 
  2  # Revisions copyright 2009-2010 by Peter Cock. 
  3  # All rights reserved. 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7  """General mechanisms to access applications in Biopython. 
  8   
  9  This module is not intended for direct use. It provides the basic objects which 
 10  are subclassed by our command line wrappers, such as: 
 11   
 12   - Bio.Align.Applications 
 13   - Bio.Blast.Applications 
 14   - Bio.Emboss.Applications 
 15   - Bio.Sequencing.Applications 
 16   
 17  These modules provide wrapper classes for command line tools to help you 
 18  construct command line strings by setting the values of each parameter. 
 19  The finished command line strings are then normally invoked via the built-in 
 20  Python module subprocess. 
 21  """ 
 22  import os, sys 
 23  import StringIO 
 24  import subprocess 
 25  import re 
 26   
 27  #TODO - Remove this hack once we drop Python 2.4 support. 
 28  try: 
 29      from subprocess import CalledProcessError as _ProcessCalledError 
 30  except: 
 31      #For Python 2.4 use Exception as base class 
 32      _ProcessCalledError = Exception 
 33   
 34  from Bio import File 
 35   
 36  #Use this regular expresion to test the property names are going to 
 37  #be valid as Python properties or arguments 
 38  _re_prop_name = re.compile(r"[a-zA-Z][a-zA-Z0-9_]*") 
 39  assert _re_prop_name.match("t") 
 40  assert _re_prop_name.match("test") 
 41  assert _re_prop_name.match("_test") is None # we don't want private names 
 42  assert _re_prop_name.match("-test") is None 
 43  assert _re_prop_name.match("test_name") 
 44  assert _re_prop_name.match("test2") 
 45  #These are reserved names in Python itself, 
 46  _reserved_names = ["and", "del", "from", "not", "while", "as", "elif", 
 47                     "global", "or", "with", "assert", "else", "if", "pass", 
 48                     "yield", "break", "except", "import", "print", "class", 
 49                     "exec", "in", "raise", "continue", "finally", "is", 
 50                     "return", "def", "for", "lambda", "try"] 
 51  #These are reserved names due to the way the wrappers work 
 52  _local_reserved_names = ["set_parameter"] 
 53   
 54   
55 -class ApplicationError(_ProcessCalledError):
56 """Raised when an application returns a non-zero exit status. 57 58 The exit status will be stored in the returncode attribute, similarly 59 the command line string used in the cmd attribute, and (if captured) 60 stdout and stderr as strings. 61 62 This exception is a subclass of subprocess.CalledProcessError 63 (unless run on Python 2.4 where that does not exist). 64 65 >>> err = ApplicationError(-11, "helloworld", "", "Some error text") 66 >>> err.returncode, err.cmd, err.stdout, err.stderr 67 (-11, 'helloworld', '', 'Some error text') 68 >>> print err 69 Command 'helloworld' returned non-zero exit status -11, 'Some error text' 70 71 """
72 - def __init__(self, returncode, cmd, stdout="", stderr=""):
73 self.returncode = returncode 74 self.cmd = cmd 75 self.stdout = stdout 76 self.stderr = stderr
77
78 - def __str__(self):
79 #get first line of any stderr message 80 try: 81 msg = self.stderr.lstrip().split("\n",1)[0].rstrip() 82 except: 83 msg = "" 84 if msg: 85 return "Command '%s' returned non-zero exit status %d, %r" \ 86 % (self.cmd, self.returncode, msg) 87 else: 88 return "Command '%s' returned non-zero exit status %d" \ 89 % (self.cmd, self.returncode)
90
91 - def __repr__(self):
92 return "ApplicationError(%i, %s, %s, %s)" \ 93 % (self.returncode, self.cmd, self.stdout, self.stderr)
94 95
96 -class AbstractCommandline(object):
97 """Generic interface for constructing command line strings. 98 99 This class shouldn't be called directly; it should be subclassed to 100 provide an implementation for a specific application. 101 102 For a usage example we'll show one of the EMBOSS wrappers. You can set 103 options when creating the wrapper object using keyword arguments - or 104 later using their corresponding properties: 105 106 >>> from Bio.Emboss.Applications import WaterCommandline 107 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 108 >>> cline 109 WaterCommandline(cmd='water', gapopen=10, gapextend=0.5) 110 111 You can instead manipulate the parameters via their properties, e.g. 112 113 >>> cline.gapopen 114 10 115 >>> cline.gapopen = 20 116 >>> cline 117 WaterCommandline(cmd='water', gapopen=20, gapextend=0.5) 118 119 You can clear a parameter you have already added by 'deleting' the 120 corresponding property: 121 122 >>> del cline.gapopen 123 >>> cline.gapopen 124 >>> cline 125 WaterCommandline(cmd='water', gapextend=0.5) 126 127 Once you have set the parameters you need, turn the object into a string: 128 129 >>> str(cline) 130 Traceback (most recent call last): 131 ... 132 ValueError: You must either set outfile (output filename), or enable filter or stdout (output to stdout). 133 134 In this case the wrapper knows certain arguments are required to construct 135 a valid command line for the tool. For a complete example, 136 137 >>> from Bio.Emboss.Applications import WaterCommandline 138 >>> water_cmd = WaterCommandline(gapopen=10, gapextend=0.5) 139 >>> water_cmd.asequence = "asis:ACCCGGGCGCGGT" 140 >>> water_cmd.bsequence = "asis:ACCCGAGCGCGGT" 141 >>> water_cmd.outfile = "temp_water.txt" 142 >>> print water_cmd 143 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 144 >>> water_cmd 145 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5) 146 147 You would typically run the command line via a standard Python operating 148 system call using the subprocess module for full control. For the simple 149 case where you just want to run the command and get the output: 150 151 stdout, stderr = water_cmd() 152 """ 153 #Note the call example above is not a doctest as we can't handle EMBOSS 154 #(or any other tool) being missing in the unit tests.
155 - def __init__(self, cmd, **kwargs):
156 """Create a new instance of a command line wrapper object.""" 157 # Init method - should be subclassed! 158 # 159 # The subclass methods should look like this: 160 # 161 # def __init__(self, cmd="muscle", **kwargs): 162 # self.parameters = [...] 163 # AbstractCommandline.__init__(self, cmd, **kwargs) 164 # 165 # i.e. There should have an optional argument "cmd" to set the location 166 # of the executable (with a sensible default which should work if the 167 # command is on the path on Unix), and keyword arguments. It should 168 # then define a list of parameters, all objects derived from the base 169 # class _AbstractParameter. 170 # 171 # The keyword arguments should be any valid parameter name, and will 172 # be used to set the associated parameter. 173 self.program_name = cmd 174 try: 175 parameters = self.parameters 176 except AttributeError: 177 raise AttributeError("Subclass should have defined self.parameters") 178 #Create properties for each parameter at run time 179 aliases = set() 180 for p in parameters: 181 for name in p.names: 182 if name in aliases: 183 raise ValueError("Parameter alias %s multiply defined" \ 184 % name) 185 aliases.add(name) 186 name = p.names[-1] 187 if _re_prop_name.match(name) is None: 188 raise ValueError("Final parameter name %s cannot be used as " 189 "an argument or property name in python" 190 % repr(name)) 191 if name in _reserved_names: 192 raise ValueError("Final parameter name %s cannot be used as " 193 "an argument or property name because it is " 194 "a reserved word in python" % repr(name)) 195 if name in _local_reserved_names: 196 raise ValueError("Final parameter name %s cannot be used as " 197 "an argument or property name due to the " 198 "way the AbstractCommandline class works" 199 % repr(name)) 200 #Beware of binding-versus-assignment confusion issues 201 def getter(name): 202 return lambda x : x._get_parameter(name)
203 def setter(name): 204 return lambda x, value : x.set_parameter(name, value)
205 def deleter(name): 206 return lambda x : x._clear_parameter(name) 207 doc = p.description 208 if isinstance(p, _Switch): 209 doc += "\n\nThis property controls the addition of the %s " \ 210 "switch, treat this property as a boolean." % p.names[0] 211 else: 212 doc += "\n\nThis controls the addition of the %s parameter " \ 213 "and its associated value. Set this property to the " \ 214 "argument value required." % p.names[0] 215 prop = property(getter(name), setter(name), deleter(name), doc) 216 setattr(self.__class__, name, prop) #magic! 217 for key, value in kwargs.iteritems(): 218 self.set_parameter(key, value) 219
220 - def _validate(self):
221 """Make sure the required parameters have been set (PRIVATE). 222 223 No return value - it either works or raises a ValueError. 224 225 This is a separate method (called from __str__) so that subclasses may 226 override it. 227 """ 228 for p in self.parameters: 229 #Check for missing required parameters: 230 if p.is_required and not(p.is_set): 231 raise ValueError("Parameter %s is not set." \ 232 % p.names[-1])
233 #Also repeat the parameter validation here, just in case? 234
235 - def __str__(self):
236 """Make the commandline string with the currently set options. 237 238 e.g. 239 >>> from Bio.Emboss.Applications import WaterCommandline 240 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 241 >>> cline.asequence = "asis:ACCCGGGCGCGGT" 242 >>> cline.bsequence = "asis:ACCCGAGCGCGGT" 243 >>> cline.outfile = "temp_water.txt" 244 >>> print cline 245 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 246 >>> str(cline) 247 'water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5' 248 """ 249 self._validate() 250 commandline = "%s " % self.program_name 251 for parameter in self.parameters: 252 if parameter.is_set: 253 #This will include a trailing space: 254 commandline += str(parameter) 255 return commandline.strip() # remove trailing space
256
257 - def __repr__(self):
258 """Return a representation of the command line object for debugging. 259 260 e.g. 261 >>> from Bio.Emboss.Applications import WaterCommandline 262 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 263 >>> cline.asequence = "asis:ACCCGGGCGCGGT" 264 >>> cline.bsequence = "asis:ACCCGAGCGCGGT" 265 >>> cline.outfile = "temp_water.txt" 266 >>> print cline 267 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 268 >>> cline 269 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5) 270 """ 271 answer = "%s(cmd=%s" % (self.__class__.__name__, repr(self.program_name)) 272 for parameter in self.parameters: 273 if parameter.is_set: 274 if isinstance(parameter, _Switch): 275 answer += ", %s=True" % parameter.names[-1] 276 else: 277 answer += ", %s=%s" \ 278 % (parameter.names[-1], repr(parameter.value)) 279 answer += ")" 280 return answer
281
282 - def _get_parameter(self, name):
283 """Get a commandline option value.""" 284 for parameter in self.parameters: 285 if name in parameter.names: 286 if isinstance(parameter, _Switch): 287 return parameter.is_set 288 else: 289 return parameter.value 290 raise ValueError("Option name %s was not found." % name)
291
292 - def _clear_parameter(self, name):
293 """Reset or clear a commandline option value.""" 294 cleared_option = False 295 for parameter in self.parameters: 296 if name in parameter.names: 297 parameter.value = None 298 parameter.is_set = False 299 cleared_option = True 300 if not cleared_option: 301 raise ValueError("Option name %s was not found." % name)
302
303 - def set_parameter(self, name, value = None):
304 """Set a commandline option for a program. 305 """ 306 set_option = False 307 for parameter in self.parameters: 308 if name in parameter.names: 309 if isinstance(parameter, _Switch): 310 if value is None: 311 import warnings 312 warnings.warn("For a switch type argument like %s, " 313 "we expect a boolean. None is treated " 314 "as FALSE!" % parameter.names[-1]) 315 parameter.is_set = bool(value) 316 set_option = True 317 else: 318 if value is not None: 319 self._check_value(value, name, parameter.checker_function) 320 parameter.value = value 321 parameter.is_set = True 322 set_option = True 323 if not set_option: 324 raise ValueError("Option name %s was not found." % name)
325
326 - def _check_value(self, value, name, check_function):
327 """Check whether the given value is valid. 328 329 No return value - it either works or raises a ValueError. 330 331 This uses the passed function 'check_function', which can either 332 return a [0, 1] (bad, good) value or raise an error. Either way 333 this function will raise an error if the value is not valid, or 334 finish silently otherwise. 335 """ 336 if check_function is not None: 337 is_good = check_function(value) #May raise an exception 338 assert is_good in [0,1,True,False] 339 if not is_good: 340 raise ValueError("Invalid parameter value %r for parameter %s" \ 341 % (value, name))
342
343 - def __setattr__(self, name, value):
344 """Set attribute name to value (PRIVATE). 345 346 This code implements a workaround for a user interface issue. 347 Without this __setattr__ attribute-based assignment of parameters 348 will silently accept invalid parameters, leading to known instances 349 of the user assuming that parameters for the application are set, 350 when they are not. 351 352 >>> from Bio.Emboss.Applications import WaterCommandline 353 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5, stdout=True) 354 >>> cline.asequence = "a.fasta" 355 >>> cline.bsequence = "b.fasta" 356 >>> cline.csequence = "c.fasta" 357 Traceback (most recent call last): 358 ... 359 ValueError: Option name csequence was not found. 360 >>> print cline 361 water -stdout -asequence=a.fasta -bsequence=b.fasta -gapopen=10 -gapextend=0.5 362 363 This workaround uses a whitelist of object attributes, and sets the 364 object attribute list as normal, for these. Other attributes are 365 assumed to be parameters, and passed to the self.set_parameter method 366 for validation and assignment. 367 """ 368 if name in ['parameters', 'program_name']: # Allowed attributes 369 self.__dict__[name] = value 370 else: 371 self.set_parameter(name, value) # treat as a parameter
372
373 - def __call__(self, stdin=None, stdout=True, stderr=True, 374 cwd=None, env=None):
375 """Executes the command, waits for it to finish, and returns output. 376 377 Runs the command line tool and waits for it to finish. If it returns 378 a non-zero error level, an exception is raised. Otherwise two strings 379 are returned containing stdout and stderr. 380 381 The optional stdin argument should be a string of data which will be 382 passed to the tool as standard input. 383 384 The optional stdout and stderr argument are treated as a booleans, and 385 control if the output should be captured (True, default), or ignored 386 by sending it to /dev/null to avoid wasting memory (False). In the 387 later case empty string(s) are returned. 388 389 The optional cwd argument is a string giving the working directory to 390 to run the command from. See Python's subprocess module documentation 391 for more details. 392 393 The optional env argument is a dictionary setting the environment 394 variables to be used in the new process. By default the current 395 process' environment variables are used. See Python's subprocess 396 module documentation for more details. 397 398 Default example usage: 399 400 from Bio.Emboss.Applications import WaterCommandline 401 water_cmd = WaterCommandline(gapopen=10, gapextend=0.5, 402 stdout=True, auto=True, 403 asequence="a.fasta", bsequence="b.fasta") 404 print "About to run:\n%s" % water_cmd 405 std_output, err_output = water_cmd() 406 407 This functionality is similar to subprocess.check_output() added in 408 Python 2.7. In general if you require more control over running the 409 command, use subprocess directly. 410 411 As of Biopython 1.56, when the program called returns a non-zero error 412 level, a custom ApplicationError exception is raised. This includes 413 any stdout and stderr strings captured as attributes of the exception 414 object, since they may be useful for diagnosing what went wrong. 415 """ 416 if stdout: 417 stdout_arg = subprocess.PIPE 418 else: 419 stdout_arg = open(os.devnull) 420 if stderr: 421 stderr_arg = subprocess.PIPE 422 else: 423 stderr_arg = open(os.devnull) 424 #We may not need to supply any piped input, but we setup the 425 #standard input pipe anyway as a work around for a python 426 #bug if this is called from a Windows GUI program. For 427 #details, see http://bugs.python.org/issue1124861 428 # 429 #Using universal newlines is important on Python 3, this 430 #gives unicode handles rather than bytes handles. 431 child_process = subprocess.Popen(str(self), stdin=subprocess.PIPE, 432 stdout=stdout_arg, stderr=stderr_arg, 433 universal_newlines=True, 434 cwd=cwd, env=env, 435 shell=(sys.platform!="win32")) 436 #Use .communicate as can get deadlocks with .wait(), see Bug 2804 437 stdout_str, stderr_str = child_process.communicate(stdin) 438 if not stdout: assert not stdout_str 439 if not stderr: assert not stderr_str 440 return_code = child_process.returncode 441 if return_code: 442 raise ApplicationError(return_code, str(self), 443 stdout_str, stderr_str) 444 return stdout_str, stderr_str
445 446
447 -class _AbstractParameter:
448 """A class to hold information about a parameter for a commandline. 449 450 Do not use this directly, instead use one of the subclasses. 451 """
452 - def __init__(self):
453 raise NotImplementedError
454
455 - def __str__(self):
456 raise NotImplementedError
457
458 -class _Option(_AbstractParameter):
459 """Represent an option that can be set for a program. 460 461 This holds UNIXish options like --append=yes and -a yes, 462 where a value (here "yes") is generally expected. 463 464 For UNIXish options like -kimura in clustalw which don't 465 take a value, use the _Switch object instead. 466 467 Attributes: 468 469 o names -- a list of string names by which the parameter can be 470 referenced (ie. ["-a", "--append", "append"]). The first name in 471 the list is considered to be the one that goes on the commandline, 472 for those parameters that print the option. The last name in the list 473 is assumed to be a "human readable" name describing the option in one 474 word. 475 476 o description -- a description of the option. 477 478 o filename -- True if this argument is a filename and should be 479 automatically quoted if it contains spaces. 480 481 o checker_function -- a reference to a function that will determine 482 if a given value is valid for this parameter. This function can either 483 raise an error when given a bad value, or return a [0, 1] decision on 484 whether the value is correct. 485 486 o equate -- should an equals sign be inserted if a value is used? 487 488 o is_required -- a flag to indicate if the parameter must be set for 489 the program to be run. 490 491 o is_set -- if the parameter has been set 492 493 o value -- the value of a parameter 494 """
495 - def __init__(self, names, description, filename=False, checker_function=None, 496 is_required=False, equate=True):
497 self.names = names 498 assert isinstance(description, basestring), \ 499 "%r for %s" % (description, names[-1]) 500 self.is_filename = filename 501 self.checker_function = checker_function 502 self.description = description 503 self.equate = equate 504 self.is_required = is_required 505 506 self.is_set = False 507 self.value = None
508
509 - def __str__(self):
510 """Return the value of this option for the commandline. 511 512 Includes a trailing space. 513 """ 514 # Note: Before equate was handled explicitly, the old 515 # code would do either "--name " or "--name=value ", 516 # or " -name " or " -name value ". This choice is now 517 # now made explicitly when setting up the option. 518 if self.value is None: 519 return "%s " % self.names[0] 520 if self.is_filename: 521 v = _escape_filename(self.value) 522 else: 523 v = str(self.value) 524 if self.equate: 525 return "%s=%s " % (self.names[0], v) 526 else: 527 return "%s %s " % (self.names[0], v)
528
529 -class _Switch(_AbstractParameter):
530 """Represent an optional argument switch for a program. 531 532 This holds UNIXish options like -kimura in clustalw which don't 533 take a value, they are either included in the command string 534 or omitted. 535 536 o names -- a list of string names by which the parameter can be 537 referenced (ie. ["-a", "--append", "append"]). The first name in 538 the list is considered to be the one that goes on the commandline, 539 for those parameters that print the option. The last name in the list 540 is assumed to be a "human readable" name describing the option in one 541 word. 542 543 o description -- a description of the option. 544 545 o is_set -- if the parameter has been set 546 547 NOTE - There is no value attribute, see is_set instead, 548 """
549 - def __init__(self, names, description):
550 self.names = names 551 self.description = description 552 self.is_set = False 553 self.is_required = False
554
555 - def __str__(self):
556 """Return the value of this option for the commandline. 557 558 Includes a trailing space. 559 """ 560 assert not hasattr(self, "value") 561 if self.is_set: 562 return "%s " % self.names[0] 563 else: 564 return ""
565
566 -class _Argument(_AbstractParameter):
567 """Represent an argument on a commandline. 568 """
569 - def __init__(self, names, description, filename=False, 570 checker_function=None, is_required=False):
571 self.names = names 572 assert isinstance(description, basestring), \ 573 "%r for %s" % (description, names[-1]) 574 self.is_filename = filename 575 self.checker_function = checker_function 576 self.description = description 577 self.is_required = is_required 578 self.is_set = False 579 self.value = None
580
581 - def __str__(self):
582 if self.value is None: 583 return " " 584 elif self.is_filename: 585 return "%s " % _escape_filename(self.value) 586 else: 587 return "%s " % self.value
588
589 -def _escape_filename(filename):
590 """Escape filenames with spaces by adding quotes (PRIVATE). 591 592 Note this will not add quotes if they are already included: 593 594 >>> print _escape_filename('example with spaces') 595 "example with spaces" 596 >>> print _escape_filename('"example with spaces"') 597 "example with spaces" 598 """ 599 #Is adding the following helpful 600 #if os.path.isfile(filename): 601 # #On Windows, if the file exists, we can ask for 602 # #its alternative short name (DOS style 8.3 format) 603 # #which has no spaces in it. Note that this name 604 # #is not portable between machines, or even folder! 605 # try: 606 # import win32api 607 # short = win32api.GetShortPathName(filename) 608 # assert os.path.isfile(short) 609 # return short 610 # except ImportError: 611 # pass 612 if " " not in filename: 613 return filename 614 #We'll just quote it - works on Windows, Mac OS X etc 615 if filename.startswith('"') and filename.endswith('"'): 616 #Its already quoted 617 return filename 618 else: 619 return '"%s"' % filename
620
621 -def _test():
622 """Run the Bio.Application module's doctests.""" 623 import doctest 624 doctest.testmod(verbose=1)
625 626 if __name__ == "__main__": 627 #Run the doctests 628 _test() 629