Package Bio :: Package Phylo :: Package PAML :: Module codeml
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.PAML.codeml

  1  # Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  import os 
  7  import os.path 
  8  from _paml import Paml, PamlError, _relpath 
  9  import _parse_codeml 
 10   
 11  #TODO - Restore use of with statement for closing handles automatically 
 12  #after dropping Python 2.4 
 13   
14 -class CodemlError(EnvironmentError):
15 """CODEML has failed. Run with verbose = True to view CODEML's error 16 message"""
17
18 -class Codeml(Paml):
19 """This class implements an interface to CODEML, part of the PAML package.""" 20
21 - def __init__(self, alignment = None, tree = None, working_dir = None, 22 out_file = None):
23 """Initialize the codeml instance. 24 25 The user may optionally pass in strings specifying the locations 26 of the input alignment and tree files, the working directory and 27 the final output file. Other options found in the CODEML control 28 have typical settings by default to run site class models 0, 1 and 29 2 on a nucleotide alignment. 30 """ 31 Paml.__init__(self, alignment, working_dir, out_file) 32 if tree is not None: 33 if not os.path.exists(tree): 34 raise IOError, "The specified tree file does not exist." 35 self.tree = tree 36 self.ctl_file = "codeml.ctl" 37 self._options = {"noisy": None, 38 "verbose": None, 39 "runmode": None, 40 "seqtype": None, 41 "CodonFreq": None, 42 "ndata": None, 43 "clock": None, 44 "aaDist": None, 45 "aaRatefile": None, 46 "model": None, 47 "NSsites": None, 48 "icode": None, 49 "Mgene": None, 50 "fix_kappa": None, 51 "kappa": None, 52 "fix_omega": None, 53 "omega": None, 54 "fix_alpha": None, 55 "alpha": None, 56 "Malpha": None, 57 "ncatG": None, 58 "getSE": None, 59 "RateAncestor": None, 60 "Small_Diff": None, 61 "cleandata": None, 62 "fix_blength": None, 63 "method": None}
64
65 - def write_ctl_file(self):
66 """Dynamically build a CODEML control file from the options. 67 68 The control file is written to the location specified by the 69 ctl_file property of the codeml class. 70 """ 71 # Make sure all paths are relative to the working directory 72 self._set_rel_paths() 73 if True: #Dummy statement to preserve indentation for diff 74 ctl_handle = open(self.ctl_file, 'w') 75 ctl_handle.write("seqfile = %s\n" % self._rel_alignment) 76 ctl_handle.write("outfile = %s\n" % self._rel_out_file) 77 ctl_handle.write("treefile = %s\n" % self._rel_tree) 78 for option in self._options.items(): 79 if option[1] == None: 80 # If an option has a value of None, there's no need 81 # to write it in the control file; it's normally just 82 # commented out. 83 continue 84 if option[0] == "NSsites": 85 # NSsites is stored in Python as a list but in the 86 # control file it is specified as a series of numbers 87 # separated by spaces. 88 NSsites = " ".join([str(site) for site in option[1]]) 89 ctl_handle.write("%s = %s\n" % (option[0], NSsites)) 90 else: 91 ctl_handle.write("%s = %s\n" % (option[0], option[1])) 92 ctl_handle.close()
93
94 - def read_ctl_file(self, ctl_file):
95 """Parse a control file and load the options into the Codeml instance. 96 """ 97 temp_options = {} 98 if not os.path.isfile(ctl_file): 99 raise IOError("File not found: %r" % ctl_file) 100 else: 101 ctl_handle = open(ctl_file) 102 for line in ctl_handle: 103 line = line.strip() 104 uncommented = line.split("*",1)[0] 105 if uncommented != "": 106 if "=" not in uncommented: 107 ctl_handle.close() 108 raise AttributeError, \ 109 "Malformed line in control file:\n%r" % line 110 (option, value) = uncommented.split("=") 111 option = option.strip() 112 value = value.strip() 113 if option == "seqfile": 114 self.alignment = value 115 elif option == "treefile": 116 self.tree = value 117 elif option == "outfile": 118 self.out_file = value 119 elif option == "NSsites": 120 site_classes = value.split(" ") 121 for n in range(len(site_classes)): 122 try: 123 site_classes[n] = int(site_classes[n]) 124 except: 125 ctl_handle.close() 126 raise TypeError, \ 127 "Invalid site class: %s" % site_classes[n] 128 temp_options["NSsites"] = site_classes 129 elif option not in self._options: 130 ctl_handle.close() 131 raise KeyError, "Invalid option: %s" % option 132 else: 133 if "." in value: 134 try: 135 converted_value = float(value) 136 except: 137 converted_value = value 138 else: 139 try: 140 converted_value = int(value) 141 except: 142 converted_value = value 143 temp_options[option] = converted_value 144 ctl_handle.close() 145 for option in self._options.keys(): 146 if option in temp_options.keys(): 147 self._options[option] = temp_options[option] 148 else: 149 self._options[option] = None
150
151 - def print_options(self):
152 """Print out all of the options and their current settings.""" 153 for option in self._options.items(): 154 if option[0] == "NSsites" and option[1] is not None: 155 # NSsites is stored in Python as a list but in the 156 # control file it is specified as a series of numbers 157 # separated by spaces. 158 NSsites = " ".join([str(site) for site in option[1]]) 159 print "%s = %s" % (option[0], NSsites) 160 else: 161 print "%s = %s" % (option[0], option[1])
162
163 - def _set_rel_paths(self):
164 """Convert all file/directory locations to paths relative to the current working directory. 165 166 CODEML requires that all paths specified in the control file be 167 relative to the directory from which it is called rather than 168 absolute paths. 169 """ 170 Paml._set_rel_paths(self) 171 if self.tree is not None: 172 self._rel_tree = _relpath(self.tree, self.working_dir)
173
174 - def run(self, ctl_file = None, verbose = False, command = "codeml", 175 parse = True):
176 """Run codeml using the current configuration and then parse the results. 177 178 Return a process signal so the user can determine if 179 the execution was successful (return code 0 is successful, -N 180 indicates a failure). The arguments may be passed as either 181 absolute or relative paths, despite the fact that CODEML 182 requires relative paths. 183 """ 184 if self.tree is None: 185 raise ValueError, "Tree file not specified." 186 if not os.path.exists(self.tree): 187 raise IOError, "The specified tree file does not exist." 188 Paml.run(self, ctl_file, verbose, command) 189 if parse: 190 results = read(self.out_file) 191 else: 192 results = None 193 return results
194
195 -def read(results_file):
196 """Parse a CODEML results file.""" 197 results = {} 198 if not os.path.exists(results_file): 199 raise IOError, "Results file does not exist." 200 handle = open(results_file) 201 lines = handle.readlines() 202 handle.close() 203 (results, multi_models) = _parse_codeml.parse_basics(lines, results) 204 results = _parse_codeml.parse_nssites(lines, results, multi_models) 205 results = _parse_codeml.parse_pairwise(lines, results) 206 results = _parse_codeml.parse_distances(lines, results) 207 if len(results) == 0: 208 raise ValueError, "Invalid results file" 209 return results
210