Package Bio :: Package PopGen :: Package FDist :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.FDist.Controller

  1  # Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6   
  7   
  8  """ 
  9  This module allows to control fdist. 
 10   
 11  This will allow to call fdist and associated program (cplot, datacal, pv). 
 12   
 13  http://www.rubic.rdg.ac.uk/~mab/software.html 
 14  """ 
 15   
 16  import os 
 17  import tempfile 
 18  import sys 
 19  from shutil import copyfile 
 20  from random import randint, random 
 21  from time import strftime, clock 
 22  #from logging import debug 
 23   
 24  if sys.version_info[0] == 3: 
 25      maxint = sys.maxsize 
 26  else: 
 27      maxint = sys.maxint 
 28   
29 -def my_float(f):
30 #Because of Jython, mostly 31 if f=="-nan": f="nan" 32 return float(f)
33
34 -class FDistController(object):
35 - def __init__(self, fdist_dir = '', ext = None):
36 """Initializes the controller. 37 38 fdist_dir is the directory where fdist2 is. 39 ext is the extension of binaries (.exe on windows, 40 none on Unix) 41 42 """ 43 self.tmp_idx = 0 44 self.fdist_dir = fdist_dir 45 self.os_name = os.name 46 if sys.platform=='win32': 47 py_ext = '.exe' 48 else: 49 py_ext = '' 50 if ext == None: 51 self.ext = py_ext 52 else: 53 self.ext = ext 54 exec_counts = 0
55
56 - def _get_path(self, app):
57 """Returns the path to an fdist application. 58 59 Includes Path where fdist can be found plus executable extension. 60 """ 61 if self.fdist_dir == '': 62 return app + self.ext 63 else: 64 return os.sep.join([self.fdist_dir, app]) + self.ext
65
66 - def _get_temp_file(self):
67 """Gets a temporary file name. 68 69 Returns a temporary file name, if executing inside jython 70 tries to replace unexisting tempfile.mkstemp(). 71 """ 72 self.tmp_idx += 1 73 return strftime("%H%M%S") + str(int(clock()*100)) + str(randint(0,1000)) + str(self.tmp_idx)
74
75 - def run_datacal(self, data_dir='.', version=1, 76 crit_freq = 0.99, p = 0.5, beta= (0.25, 0.25)):
77 """Executes datacal. 78 79 data_dir - Where the data is found. 80 """ 81 in_name = self._get_temp_file() 82 out_name = self._get_temp_file() 83 f = open(data_dir + os.sep + in_name, 'w') 84 if version==1: 85 f.write('a\n') 86 datacal_name = "datacal" 87 else: 88 f.write('%f\n%f\n%f %f\na\n' % (crit_freq, p, beta[0], beta[1])) 89 datacal_name = "Ddatacal" 90 f.close() 91 curr_dir = os.getcwd() 92 os.system('cd ' + data_dir + ' && ' + 93 self._get_path(datacal_name) + ' < ' + in_name + ' > ' + out_name) 94 f = open(data_dir + os.sep + out_name) 95 if version == 1: 96 fst_line = f.readline().rstrip().split(' ') 97 fst = my_float(fst_line[4]) 98 sample_line = f.readline().rstrip().split(' ') 99 sample = int(sample_line[9]) 100 else: 101 l = f.readline().rstrip().split(" ") 102 loci, pops = int(l[-5]), int(l[-2]) 103 fst_line = f.readline().rstrip().split(' ') 104 fst = my_float(fst_line[4]) 105 sample_line = f.readline().rstrip().split(' ') 106 sample = int(sample_line[9]) 107 F_line = f.readline().rstrip().split(' ') 108 F, obs = my_float(F_line[5]), int (F_line[8]) 109 f.close() 110 os.remove(data_dir + os.sep + in_name) 111 os.remove(data_dir + os.sep + out_name) 112 if version==1: 113 return fst, sample 114 else: 115 return fst, sample, loci, pops, F, obs
116
117 - def _generate_intfile(self, data_dir):
118 """Generates an INTFILE. 119 120 Parameter: 121 data_dir - data directory 122 """ 123 inf = open(data_dir + os.sep + 'INTFILE', 'w') 124 for i in range(98): 125 inf.write(str(randint(-maxint+1,maxint-1)) + '\n') 126 inf.write('8\n') 127 inf.close()
128
129 - def run_fdist(self, npops, nsamples, fst, sample_size, 130 mut = 0, num_sims = 50000, data_dir='.', 131 is_dominant = False, theta = 0.06, beta = (0.25, 0.25), 132 max_freq = 0.99):
133 """Executes (d)fdist. 134 135 Parameters: 136 npops - Number of populations 137 nsamples - Number of populations sampled 138 fst - expected Fst 139 sample_size - Sample size per population 140 For dfdist: if zero a sample size file has to be provided 141 mut - 1=Stepwise, 0=Infinite allele 142 num_sims - number of simulations 143 data_dir - Where the data is found 144 is_dominant - If true executes dfdist 145 theta - Theta (=2Nmu) 146 beta - Parameters for the beta prior 147 max_freq - Maximum allowed frequency of the commonest allele 148 149 Returns: 150 fst - Average Fst 151 152 Important Note: This can take quite a while to run! 153 """ 154 if fst >= 0.9: 155 #Lets not joke 156 fst = 0.899 157 if fst <= 0.0: 158 #0 will make fdist run forever 159 fst = 0.001 160 in_name = 'input.fd' 161 out_name = 'output.fd' 162 #print 'writing', data_dir + os.sep + in_name 163 f = open(data_dir + os.sep + in_name, 'w') 164 f.write('y\n\n') 165 f.close() 166 if is_dominant: 167 config_name = "Dfdist_params" 168 else: 169 config_name = "fdist_params2.dat" 170 171 f = open(data_dir + os.sep + config_name, 'w') 172 f.write(str(npops) + '\n') 173 f.write(str(nsamples) + '\n') 174 f.write(str(fst) + '\n') 175 f.write(str(sample_size) + '\n') 176 if is_dominant: 177 f.write(str(theta) + '\n') 178 else: 179 f.write(str(mut) + '\n') 180 f.write(str(num_sims) + '\n') 181 if is_dominant: 182 f.write("%f %f\n" % beta) 183 f.write("%f\n" % max_freq) 184 f.close() 185 self._generate_intfile(data_dir) 186 187 if is_dominant: 188 bin_name = "Dfdist" 189 else: 190 bin_name = "fdist2" 191 os.system('cd ' + data_dir + ' && ' + 192 self._get_path(bin_name) + ' < ' + in_name + ' > ' + out_name) 193 f = open(data_dir + os.sep + out_name) 194 lines = f.readlines() 195 f.close() 196 for line in lines: 197 if line.startswith('average Fst'): 198 fst = my_float(line.rstrip().split(' ')[-1]) 199 os.remove(data_dir + os.sep + in_name) 200 os.remove(data_dir + os.sep + out_name) 201 return fst
202
203 - def run_fdist_force_fst(self, npops, nsamples, fst, sample_size, 204 mut = 0, num_sims = 50000, data_dir='.', 205 try_runs = 5000, limit=0.001, 206 is_dominant = False, theta = 0.06, beta = (0.25, 0.25), 207 max_freq = 0.99):
208 """Executes fdist trying to force Fst. 209 210 Parameters: 211 try_runs - Number of simulations on the part trying to get 212 Fst correct 213 limit - Interval limit 214 Other parameters can be seen on run_fdist. 215 """ 216 max_run_fst = 1 217 min_run_fst = 0 218 current_run_fst = fst 219 old_fst = fst 220 while True: 221 #debug('testing fst ' + str(current_run_fst)) 222 real_fst = self.run_fdist(npops, nsamples, 223 current_run_fst, sample_size, 224 mut, try_runs, data_dir, 225 is_dominant, theta, beta, max_freq) 226 #debug('got real fst ' + str(real_fst)) 227 if abs(real_fst - fst) < limit: 228 #debug('We are OK') 229 return self.run_fdist(npops, nsamples, current_run_fst, 230 sample_size, 231 mut, num_sims, data_dir, 232 is_dominant, theta, beta, max_freq) 233 old_fst = current_run_fst 234 if real_fst > fst: 235 max_run_fst = current_run_fst 236 if current_run_fst < min_run_fst + limit: 237 #we can do no better 238 #debug('Lower limit is ' + str(min_run_fst)) 239 return self.run_fdist(npops, nsamples, current_run_fst, 240 sample_size, mut, num_sims, data_dir) 241 current_run_fst = (min_run_fst + current_run_fst)/2 242 else: 243 min_run_fst = current_run_fst 244 if current_run_fst > max_run_fst - limit: 245 #we can do no better 246 #debug('Upper limit is ' + str(max_run_fst)) 247 return self.run_fdist(npops, nsamples, current_run_fst, 248 sample_size, mut, num_sims, data_dir, 249 is_dominant, theta, beta, max_freq) 250 current_run_fst = (max_run_fst + current_run_fst)/2
251
252 - def run_cplot(self, ci= 0.95, data_dir='.', version = 1, smooth=0.04):
253 """Executes cplot. 254 255 ci - Confidence interval. 256 data_dir - Where the data is found. 257 """ 258 in_name = self._get_temp_file() 259 out_name = self._get_temp_file() 260 f = open(data_dir + os.sep + in_name, 'w') 261 if version == 1: 262 f.write('out.dat out.cpl\n' + str(ci) + '\n') 263 else: 264 f.write("\n".join([ 265 "data_fst_outfile out.cpl out.dat", 266 str(ci), str(smooth)])) 267 f.close() 268 curr_dir = os.getcwd() 269 self._generate_intfile(data_dir) 270 if version == 1: 271 cplot_name = "cplot" 272 else: 273 cplot_name = "cplot2" 274 os.system('cd ' + data_dir + ' && ' + 275 self._get_path(cplot_name) + ' < ' + in_name + ' > ' + out_name) 276 os.remove(data_dir + os.sep + in_name) 277 os.remove(data_dir + os.sep + out_name) 278 f = open(data_dir + os.sep + 'out.cpl') 279 conf_lines = [] 280 l = f.readline() 281 try: 282 while l!='': 283 conf_lines.append( 284 tuple(map(lambda x : my_float(x), l.rstrip().split(' '))) 285 ) 286 l = f.readline() 287 except ValueError: 288 f.close() 289 return [] 290 f.close() 291 return conf_lines
292
293 - def run_pv(self, out_file='probs.dat', data_dir='.', 294 version = 1, smooth=0.04):
295 """Executes pv. 296 297 out_file - Name of output file. 298 data_dir - Where the data is found. 299 """ 300 in_name = self._get_temp_file() 301 out_name = self._get_temp_file() 302 f = open(data_dir + os.sep + in_name, 'w') 303 f.write('data_fst_outfile ' + out_file + ' out.dat\n') 304 f.write(str(smooth) + '\n') 305 f.close() 306 self._generate_intfile(data_dir) 307 if version == 1: 308 pv_name = "pv" 309 else: 310 pv_name = "pv2" 311 os.system('cd ' + data_dir + ' && ' + 312 self._get_path(pv_name) + ' < ' + in_name + ' > ' + out_name) 313 pvf = open(data_dir + os.sep + out_file, 'r') 314 result = map(lambda x: tuple(map(lambda y: my_float(y), x.rstrip().split(' '))), 315 pvf.readlines()) 316 pvf.close() 317 os.remove(data_dir + os.sep + in_name) 318 os.remove(data_dir + os.sep + out_name) 319 return result
320