Package Bio :: Package PopGen :: Package GenePop :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.GenePop.Controller

  1  # Copyright 2009 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6   
  7   
  8  """ 
  9  This module allows to control GenePop. 
 10   
 11  """ 
 12   
 13  import os 
 14  import re 
 15  import shutil 
 16  import subprocess 
 17  import sys 
 18  import tempfile 
 19   
 20  from Bio.Application import AbstractCommandline, _Argument, _Option 
 21   
22 -def _gp_float(tok):
23 """Gets a float from a token, if it fails, returns the string. 24 """ 25 try: 26 return float(tok) 27 except ValueError: 28 return str(tok)
29
30 -def _gp_int(tok):
31 """Gets a int from a token, if it fails, returns the string. 32 """ 33 try: 34 return int(tok) 35 except ValueError: 36 return str(tok)
37 38
39 -def _read_allele_freq_table(f):
40 l = f.readline() 41 while l.find(" --")==-1: 42 if l == "": 43 raise StopIteration 44 if l.find("No data")>-1: 45 return None, None 46 l = f.readline() 47 alleles = filter(lambda x: x != '', f.readline().rstrip().split(" ")) 48 alleles = map(lambda x: _gp_int(x), alleles) 49 l = f.readline().rstrip() 50 table = [] 51 while l != "": 52 line = filter(lambda x: x != '', l.split(" ")) 53 try: 54 table.append( 55 (line[0], 56 map(lambda x: _gp_float(x), line[1:-1]), 57 _gp_int(line[-1]))) 58 except ValueError: 59 table.append( 60 (line[0], 61 [None] * len(alleles), 62 0)) 63 l = f.readline().rstrip() 64 return alleles, table
65
66 -def _read_table(f, funs):
67 table = [] 68 l = f.readline().rstrip() 69 while l.find("---")==-1: 70 l = f.readline().rstrip() 71 l = f.readline().rstrip() 72 while l.find("===")==-1 and l.find("---")==-1 and l != "": 73 toks = filter(lambda x: x != "", l.split(" ")) 74 line = [] 75 for i in range(len(toks)): 76 try: 77 line.append(funs[i](toks[i])) 78 except ValueError: 79 line.append(toks[i]) # Could not cast 80 table.append(tuple(line)) 81 l = f.readline().rstrip() 82 return table
83
84 -def _read_triangle_matrix(f):
85 matrix = [] 86 l = f.readline().rstrip() 87 while l != "": 88 matrix.append( 89 map(lambda x: _gp_float(x), 90 filter(lambda y: y != "", l.split(" ")))) 91 l = f.readline().rstrip() 92 return matrix
93
94 -def _read_headed_triangle_matrix(f):
95 matrix = {} 96 header = f.readline().rstrip() 97 if header.find("---")>-1 or header.find("===")>-1: 98 header = f.readline().rstrip() 99 nlines = len(filter(lambda x:x != '', header.split(' '))) - 1 100 for line_pop in range(nlines): 101 l = f.readline().rstrip() 102 vals = filter(lambda x:x != '', l.split(' ')[1:]) 103 clean_vals = [] 104 for val in vals: 105 try: 106 clean_vals.append(_gp_float(val)) 107 except ValueError: 108 clean_vals.append(None) 109 for col_pop in range(len(clean_vals)): 110 matrix[(line_pop+1, col_pop)] = clean_vals[col_pop] 111 return matrix
112
113 -def _hw_func(stream, is_locus, has_fisher = False):
114 l = stream.readline() 115 if is_locus: 116 hook = "Locus " 117 else: 118 hook = " Pop : " 119 while l != "": 120 if l.startswith(hook): 121 stream.readline() 122 stream.readline() 123 stream.readline() 124 table = _read_table(stream,[str,_gp_float,_gp_float,_gp_float,_gp_float,_gp_int,str]) 125 #loci might mean pop if hook="Locus " 126 loci = {} 127 for entry in table: 128 if len(entry) < 3: 129 loci[entry[0]] = None 130 else: 131 locus, p, se, fis_wc, fis_rh, steps = entry[:-1] 132 if se == "-": se = None 133 loci[locus] = p, se, fis_wc, fis_rh, steps 134 return loci 135 l = stream.readline() 136 #self.done = True 137 raise StopIteration 138
139 -class _FileIterator:
140 """Iterator which crawls over a stream of lines with a function. 141 142 The generator function is expected to yield a tuple, while 143 consuming input 144 """
145 - def __init__(self, func, stream, fname):
146 self.func = func 147 self.stream = stream 148 self.fname = fname 149 self.done = False
150
151 - def __iter__(self):
152 if self.done: 153 self.done = True 154 raise StopIteration 155 return self
156
157 - def next(self):
158 return self.func(self)
159
160 - def __del__(self):
161 self.stream.close() 162 try: 163 os.remove(self.fname) 164 except OSError: 165 #Jython seems to call the iterator twice 166 pass
167
168 -class _GenePopCommandline(AbstractCommandline):
169 """ Command Line Wrapper for GenePop. 170 """
171 - def __init__(self, genepop_dir=None, cmd='Genepop', **kwargs):
172 self.parameters = [ 173 _Argument(["command"], 174 "GenePop option to be called", 175 is_required=True), 176 _Argument(["mode"], 177 "Should allways be batch", 178 is_required=True), 179 _Argument(["input"], 180 "Input file", 181 is_required=True), 182 _Argument(["Dememorization"], 183 "Dememorization step"), 184 _Argument(["BatchNumber"], 185 "Number of MCMC batches"), 186 _Argument(["BatchLength"], 187 "Length of MCMC chains"), 188 _Argument(["HWtests"], 189 "Enumeration or MCMC"), 190 _Argument(["IsolBDstatistic"], 191 "IBD statistic (a or e)"), 192 _Argument(["MinimalDistance"], 193 "Minimal IBD distance"), 194 _Argument(["GeographicScale"], 195 "Log or Linear"), 196 ] 197 AbstractCommandline.__init__(self, cmd, **kwargs) 198 self.set_parameter("mode", "Mode=Batch")
199
200 - def set_menu(self, option_list):
201 """Sets the menu option. 202 203 Example set_menu([6,1]) = get all F statistics (menu 6.1) 204 """ 205 self.set_parameter("command", "MenuOptions="+ 206 ".".join(map(lambda x:str(x),option_list)))
207
208 - def set_input(self, fname):
209 """Sets the input file name. 210 """ 211 self.set_parameter("input", "InputFile="+fname)
212
213 -class GenePopController(object):
214 - def __init__(self, genepop_dir = None):
215 """Initializes the controller. 216 217 genepop_dir is the directory where GenePop is. 218 219 The binary should be called Genepop (capital G) 220 221 """ 222 self.controller = _GenePopCommandline(genepop_dir)
223
224 - def _remove_garbage(self, fname_out):
225 try: 226 if fname_out != None: os.remove(fname_out) 227 except OSError: 228 pass # safe 229 try: 230 os.remove("genepop.txt") 231 except OSError: 232 pass # safe 233 try: 234 os.remove("fichier.in") 235 except OSError: 236 pass # safe 237 try: 238 os.remove("cmdline.txt") 239 except OSError: 240 pass # safe
241
242 - def _get_opts(self, dememorization, batches, iterations, enum_test=None):
243 opts = {} 244 opts["Dememorization"]=dememorization 245 opts["BatchNumber"]=batches 246 opts["BatchLength"]=iterations 247 if enum_test != None: 248 if enum_test == True: 249 opts["HWtests"]="Enumeration" 250 else: 251 opts["HWtests"]="MCMC" 252 return opts
253
254 - def _run_genepop(self, extensions, option, fname, opts={}):
255 for extension in extensions: 256 self._remove_garbage(fname + extension) 257 self.controller.set_menu(option) 258 self.controller.set_input(fname) 259 for opt in opts: 260 self.controller.set_parameter(opt, opt+"="+str(opts[opt])) 261 self.controller() #checks error level is zero 262 self._remove_garbage(None) 263 return
264 265
266 - def _test_pop_hz_both(self, fname, type, ext, enum_test = True, 267 dememorization = 10000, batches = 20, iterations = 5000):
268 """Hardy-Weinberg test for heterozygote deficiency/excess. 269 270 Returns a population iterator containg 271 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 272 Some loci have a None if the info is not available 273 SE might be none (for enumerations) 274 """ 275 opts = self._get_opts(dememorization, batches, iterations, enum_test) 276 self._run_genepop([ext], [1, type], fname, opts) 277 f = open(fname + ext) 278 def hw_func(self): 279 return _hw_func(self.stream, False)
280 return _FileIterator(hw_func, f, fname + ext)
281
282 - def _test_global_hz_both(self, fname, type, ext, enum_test = True, 283 dememorization = 10000, batches = 20, iterations = 5000):
284 """Global Hardy-Weinberg test for heterozygote deficiency/excess. 285 286 Returns a triple with: 287 A list per population containg 288 (pop_name, P-val, SE, switches) 289 Some pops have a None if the info is not available 290 SE might be none (for enumerations) 291 A list per loci containg 292 (locus_name, P-val, SE, switches) 293 Some loci have a None if the info is not available 294 SE might be none (for enumerations) 295 Overall results (P-val, SE, switches) 296 297 """ 298 opts = self._get_opts(dememorization, batches, iterations, enum_test) 299 self._run_genepop([ext], [1, type], fname, opts) 300 def hw_pop_func(self): 301 return _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
302 f1 = open(fname + ext) 303 l = f1.readline() 304 while l.find("by population") == -1: 305 l = f1.readline() 306 pop_p = _read_table(f1, [str, _gp_float, _gp_float, _gp_float]) 307 f2 = open(fname + ext) 308 l = f2.readline() 309 while l.find("by locus") == -1: 310 l = f2.readline() 311 loc_p = _read_table(f2, [str, _gp_float, _gp_float, _gp_float]) 312 f = open(fname + ext) 313 l = f.readline() 314 while l.find("all locus") == -1: 315 l = f.readline() 316 f.readline() 317 f.readline() 318 f.readline() 319 f.readline() 320 l = f.readline().rstrip() 321 p, se, switches = tuple(map(lambda x: _gp_float(x), 322 filter(lambda y: y != "",l.split(" ")))) 323 f.close() 324 return pop_p, loc_p, (p, se, switches) 325 326 #1.1
327 - def test_pop_hz_deficiency(self, fname, enum_test = True, 328 dememorization = 10000, batches = 20, iterations = 5000):
329 """Hardy-Weinberg test for heterozygote deficiency. 330 331 Returns a population iterator containg 332 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 333 Some loci have a None if the info is not available 334 SE might be none (for enumerations) 335 """ 336 return self._test_pop_hz_both(fname, 1, ".D", enum_test, 337 dememorization, batches, iterations)
338 339 #1.2
340 - def test_pop_hz_excess(self, fname, enum_test = True, 341 dememorization = 10000, batches = 20, iterations = 5000):
342 """Hardy-Weinberg test for heterozygote deficiency. 343 344 Returns a population iterator containg 345 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 346 Some loci have a None if the info is not available 347 SE might be none (for enumerations) 348 """ 349 return self._test_pop_hz_both(fname, 2, ".E", enum_test, 350 dememorization, batches, iterations)
351 352 #1.3 P file
353 - def test_pop_hz_prob(self, fname, ext, enum_test = False, 354 dememorization = 10000, batches = 20, iterations = 5000):
355 """Hardy-Weinberg test based on probability. 356 357 Returns 2 iterators and a final tuple: 358 359 1. Returns a loci iterator containing 360 b. A dictionary[pop_pos]=(P-val, SE, Fis-WC, Fis-RH, steps) 361 Some pops have a None if the info is not available 362 SE might be none (for enumerations) 363 c. Result of Fisher's test (Chi2, deg freedom, prob) 364 2. Returns a population iterator containg 365 a. A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 366 Some loci have a None if the info is not available 367 SE might be none (for enumerations) 368 b. Result of Fisher's test (Chi2, deg freedom, prob) 369 3. (Chi2, deg freedom, prob) 370 """ 371 opts = self._get_opts(dememorization, batches, iterations, enum_test) 372 self._run_genepop([ext], [1, 3], fname, opts) 373 def hw_prob_loci_func(self): 374 return _hw_func(self.stream, True, True)
375 def hw_prob_pop_func(self): 376 return _hw_func(self.stream, False, True) 377 shutil.copyfile(fname+".P", fname+".P2") 378 f1 = open(fname + ".P") 379 f2 = open(fname + ".P2") 380 return _FileIterator(hw_prob_loci_func, f1, fname + ".P"), _FileIterator(hw_prob_pop_func, f2, fname + ".P2") 381 382 #1.4
383 - def test_global_hz_deficiency(self, fname, enum_test = True, 384 dememorization = 10000, batches = 20, iterations = 5000):
385 """Global Hardy-Weinberg test for heterozygote deficiency. 386 387 Returns a triple with: 388 An list per population containg 389 (pop_name, P-val, SE, switches) 390 Some pops have a None if the info is not available 391 SE might be none (for enumerations) 392 An list per loci containg 393 (locus_name, P-val, SE, switches) 394 Some loci have a None if the info is not available 395 SE might be none (for enumerations) 396 Overall results (P-val, SE, switches) 397 """ 398 return self._test_global_hz_both(fname, 4, ".DG", enum_test, 399 dememorization, batches, iterations)
400 401 402 #1.5
403 - def test_global_hz_excess(self, fname, enum_test = True, 404 dememorization = 10000, batches = 20, iterations = 5000):
405 """Global Hardy-Weinberg test for heterozygote excess. 406 407 Returns a triple with: 408 An list per population containg 409 (pop_name, P-val, SE, switches) 410 Some pops have a None if the info is not available 411 SE might be none (for enumerations) 412 An list per loci containg 413 (locus_name, P-val, SE, switches) 414 Some loci have a None if the info is not available 415 SE might be none (for enumerations) 416 Overall results (P-val, SE, switches) 417 """ 418 return self._test_global_hz_both(fname, 5, ".EG", enum_test, 419 dememorization, batches, iterations)
420 421 #2.1
422 - def test_ld(self, fname, 423 dememorization = 10000, batches = 20, iterations = 5000):
424 opts = self._get_opts(dememorization, batches, iterations) 425 self._run_genepop([".DIS"], [2, 1], fname, opts) 426 def ld_pop_func(self): 427 current_pop = None 428 l = self.stream.readline().rstrip() 429 if l == "": 430 self.done = True 431 raise StopIteration 432 toks = filter(lambda x: x != "", l.split(" ")) 433 pop, locus1, locus2 = toks[0], toks[1], toks[2] 434 if not hasattr(self, "start_locus1"): 435 start_locus1, start_locus2 = locus1, locus2 436 current_pop = -1 437 if locus1 == start_locus1 and locus2 == start_locus2: 438 current_pop += 1 439 if toks[3] == "No": 440 return current_pop, pop, (locus1, locus2), None 441 p, se, switches = _gp_float(toks[3]), _gp_float(toks[4]), _gp_int(toks[5]) 442 return current_pop, pop, (locus1, locus2), (p, se, switches)
443 def ld_func(self): 444 l = self.stream.readline().rstrip() 445 if l == "": 446 self.done = True 447 raise StopIteration 448 toks = filter(lambda x: x != "", l.split(" ")) 449 locus1, locus2 = toks[0], toks[2] 450 try: 451 chi2, df, p = _gp_float(toks[3]), _gp_int(toks[4]), _gp_float(toks[5]) 452 except ValueError: 453 return (locus1, locus2), None 454 return (locus1, locus2), (chi2, df, p) 455 f1 = open(fname + ".DIS") 456 l = f1.readline() 457 while l.find("----")==-1: 458 l = f1.readline() 459 shutil.copyfile(fname + ".DIS", fname + ".DI2") 460 f2 = open(fname + ".DI2") 461 l = f2.readline() 462 while l.find("Locus pair")==-1: 463 l = f2.readline() 464 while l.find("----")==-1: 465 l = f2.readline() 466 return _FileIterator(ld_pop_func, f1, fname+".DIS"), _FileIterator(ld_func, f2, fname + ".DI2") 467 468 #2.2
469 - def create_contingency_tables(self, fname):
470 raise NotImplementedError
471 472 #3.1 PR/GE files
473 - def test_genic_diff_all(self, fname, 474 dememorization = 10000, batches = 20, iterations = 5000):
475 raise NotImplementedError
476 477 #3.2 PR2/GE2 files
478 - def test_genic_diff_pair(self, fname, 479 dememorization = 10000, batches = 20, iterations = 5000):
480 raise NotImplementedError
481 482 #3.3 G files
483 - def test_genotypic_diff_all(self, fname, 484 dememorization = 10000, batches = 20, iterations = 5000):
485 raise NotImplementedError
486 487 #3.4 2G2 files
488 - def test_genotypic_diff_pair(self, fname, 489 dememorization = 10000, batches = 20, iterations = 5000):
490 raise NotImplementedError
491 492 #4
493 - def estimate_nm(self, fname):
494 self._run_genepop(["PRI"], [4], fname) 495 f = open(fname + ".PRI") 496 lines = f.readlines() # Small file, it is ok 497 f.close() 498 for line in lines: 499 m = re.search("Mean sample size: ([.0-9]+)", line) 500 if m != None: 501 mean_sample_size = _gp_float(m.group(1)) 502 m = re.search("Mean frequency of private alleles p\(1\)= ([.0-9]+)", line) 503 if m != None: 504 mean_priv_alleles = _gp_float(m.group(1)) 505 m = re.search("N=10: ([.0-9]+)", line) 506 if m != None: 507 mig10 = _gp_float(m.group(1)) 508 m = re.search("N=25: ([.0-9]+)", line) 509 if m != None: 510 mig25 = _gp_float(m.group(1)) 511 m = re.search("N=50: ([.0-9]+)", line) 512 if m != None: 513 mig50 = _gp_float(m.group(1)) 514 m = re.search("for size= ([.0-9]+)", line) 515 if m != None: 516 mig_corrected = _gp_float(m.group(1)) 517 os.remove(fname + ".PRI") 518 return mean_sample_size, mean_priv_alleles, mig10, mig25, mig50, mig_corrected
519 520 #5.1
521 - def calc_allele_genotype_freqs(self, fname):
522 """Calculates allele and genotype frequencies per locus and per sample. 523 524 Parameters: 525 fname - file name 526 527 Returns tuple with 2 elements: 528 Population iterator with 529 population name 530 Locus dictionary with key = locus name and content tuple as 531 Genotype List with 532 (Allele1, Allele2, observed, expected) 533 (expected homozygotes, observed hm, 534 expected heterozygotes, observed ht) 535 Allele frequency/Fis dictionary with allele as key and 536 (count, frequency, Fis Weir & Cockerham) 537 Totals as a pair 538 count 539 Fis Weir & Cockerham, 540 Fis Robertson & Hill 541 Locus iterator with 542 Locus name 543 allele list 544 Population list with a triple 545 population name 546 list of allele frequencies in the same order as allele list above 547 number of genes 548 549 550 Will create a file called fname.INF 551 """ 552 self._run_genepop(["INF"], [5,1], fname) 553 #First pass, general information 554 #num_loci = None 555 #num_pops = None 556 #f = open(fname + ".INF") 557 #l = f.readline() 558 #while (num_loci == None or num_pops == None) and l != '': 559 # m = re.search("Number of populations detected : ([0-9+])", l) 560 # if m != None: 561 # num_pops = _gp_int(m.group(1)) 562 # m = re.search("Number of loci detected : ([0-9+])", l) 563 # if m != None: 564 # num_loci = _gp_int(m.group(1)) 565 # l = f.readline() 566 #f.close() 567 def pop_parser(self): 568 if hasattr(self, "old_line"): 569 l = self.old_line 570 del self.old_line 571 else: 572 l = self.stream.readline() 573 loci_content = {} 574 while l != '': 575 l = l.rstrip() 576 if l.find("Tables of allelic frequencies for each locus")>-1: 577 return self.curr_pop, loci_content 578 match = re.match(".*Pop: (.+) Locus: (.+)", l) 579 if match != None: 580 pop = match.group(1) 581 locus = match.group(2) 582 if not hasattr(self, "first_locus"): 583 self.first_locus = locus 584 if hasattr(self, "curr_pop"): 585 if self.first_locus == locus: 586 old_pop = self.curr_pop 587 #self.curr_pop = pop 588 self.old_line = l 589 del self.first_locus 590 del self.curr_pop 591 return old_pop, loci_content 592 self.curr_pop = pop 593 else: 594 l = self.stream.readline() 595 continue 596 geno_list = [] 597 l = self.stream.readline() 598 if l.find("No data")>-1: continue 599 600 while l.find("Genotypes Obs.")==-1: 601 l = self.stream.readline() 602 603 while l != "\n": 604 m2 = re.match(" +([0-9]+) , ([0-9]+) *([0-9]+) *(.+)",l) 605 if m2 != None: 606 geno_list.append((_gp_int(m2.group(1)), _gp_int(m2.group(2)), 607 _gp_int(m2.group(3)), _gp_float(m2.group(4)))) 608 else: 609 l = self.stream.readline() 610 continue 611 l = self.stream.readline() 612 613 while l.find("Expected number of ho")==-1: 614 l = self.stream.readline() 615 expHo = _gp_float(l[38:]) 616 l = self.stream.readline() 617 obsHo = _gp_int(l[38:]) 618 l = self.stream.readline() 619 expHe = _gp_float(l[38:]) 620 l = self.stream.readline() 621 obsHe = _gp_int(l[38:]) 622 l = self.stream.readline() 623 624 while l.find("Sample count")==-1: 625 l = self.stream.readline() 626 l = self.stream.readline() 627 freq_fis={} 628 overall_fis = None 629 while l.find("----")==-1: 630 vals = filter(lambda x: x!='', 631 l.rstrip().split(' ')) 632 if vals[0]=="Tot": 633 overall_fis = _gp_int(vals[1]), \ 634 _gp_float(vals[2]), _gp_float(vals[3]) 635 else: 636 freq_fis[_gp_int(vals[0])] = _gp_int(vals[1]), \ 637 _gp_float(vals[2]), _gp_float(vals[3]) 638 l = self.stream.readline() 639 loci_content[locus] = geno_list, \ 640 (expHo, obsHo, expHe, obsHe), \ 641 freq_fis, overall_fis 642 self.done = True 643 raise StopIteration
644 def locus_parser(self): 645 l = self.stream.readline() 646 while l != "": 647 l = l.rstrip() 648 match = re.match(" Locus: (.+)", l) 649 if match != None: 650 locus = match.group(1) 651 alleles, table = _read_allele_freq_table(self.stream) 652 return locus, alleles, table 653 l = self.stream.readline() 654 self.done = True 655 raise StopIteration 656 657 popf = open(fname + ".INF") 658 shutil.copyfile(fname + ".INF", fname + ".IN2") 659 locf = open(fname + ".IN2") 660 pop_iter = _FileIterator(pop_parser, popf, fname + ".INF") 661 locus_iter = _FileIterator(locus_parser, locf, fname + ".IN2") 662 return (pop_iter, locus_iter) 663
664 - def _calc_diversities_fis(self, fname, ext):
665 self._run_genepop([ext], [5,2], fname) 666 f = open(fname + ext) 667 l = f.readline() 668 while l != "": 669 l = l.rstrip() 670 if l.startswith("Statistics per sample over all loci with at least two individuals typed"): 671 avg_fis = _read_table(f, [str, _gp_float, _gp_float, _gp_float]) 672 avg_Qintra = _read_table(f, [str, _gp_float]) 673 l = f.readline() 674 f.close() 675 def fis_func(self): 676 l = self.stream.readline() 677 while l != "": 678 l = l.rstrip() 679 m = re.search("Locus: (.+)", l) 680 if m != None: 681 locus = m.group(1) 682 self.stream.readline() 683 if self.stream.readline().find("No complete")>-1: return locus, None 684 self.stream.readline() 685 fis_table = _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float]) 686 self.stream.readline() 687 avg_qinter, avg_fis = tuple(map (lambda x: _gp_float(x), 688 filter(lambda y:y != "", self.stream.readline().split(" ")))) 689 return locus, fis_table, avg_qinter, avg_fis 690 l = self.stream.readline() 691 self.done = True 692 raise StopIteration
693 dvf = open(fname + ext) 694 return _FileIterator(fis_func, dvf, fname + ext), avg_fis, avg_Qintra 695 696 #5.2
697 - def calc_diversities_fis_with_identity(self, fname):
698 return self._calc_diversities_fis(fname, ".DIV")
699 700 #5.3
701 - def calc_diversities_fis_with_size(self, fname):
702 raise NotImplementedError
703 704 #6.1 Less genotype frequencies
705 - def calc_fst_all(self, fname):
706 """Executes GenePop and gets Fst/Fis/Fit (all populations) 707 708 Parameters: 709 fname - file name 710 711 Returns: 712 (multiLocusFis, multiLocusFst, multiLocus Fit), 713 Iterator of tuples 714 (Locus name, Fis, Fst, Fit, Qintra, Qinter) 715 716 Will create a file called fname.FST . 717 718 This does not return the genotype frequencies. 719 720 """ 721 self._run_genepop([".FST"], [6,1], fname) 722 f = open(fname + ".FST") 723 l = f.readline() 724 while l != '': 725 if l.startswith(' All:'): 726 toks=filter(lambda x:x!="", l.rstrip().split(' ')) 727 try: 728 allFis = _gp_float(toks[1]) 729 except ValueError: 730 allFis = None 731 try: 732 allFst = _gp_float(toks[2]) 733 except ValueError: 734 allFst = None 735 try: 736 allFit = _gp_float(toks[3]) 737 except ValueError: 738 allFit = None 739 l = f.readline() 740 f.close() 741 f = open(fname + ".FST") 742 def proc(self): 743 if hasattr(self, "last_line"): 744 l = self.last_line 745 del self.last_line 746 else: 747 l = self.stream.readline() 748 locus = None 749 fis = None 750 fst = None 751 fit = None 752 qintra = None 753 qinter = None 754 while l != '': 755 l = l.rstrip() 756 if l.startswith(' Locus:'): 757 if locus != None: 758 self.last_line = l 759 return locus, fis, fst, fit, qintra, qinter 760 else: 761 locus = l.split(':')[1].lstrip() 762 elif l.startswith('Fis^='): 763 fis = _gp_float(l.split(' ')[1]) 764 elif l.startswith('Fst^='): 765 fst = _gp_float(l.split(' ')[1]) 766 elif l.startswith('Fit^='): 767 fit = _gp_float(l.split(' ')[1]) 768 elif l.startswith('1-Qintra^='): 769 qintra = _gp_float(l.split(' ')[1]) 770 elif l.startswith('1-Qinter^='): 771 qinter = _gp_float(l.split(' ')[1]) 772 return locus, fis, fst, fit, qintra, qinter 773 l = self.stream.readline() 774 if locus != None: 775 return locus, fis, fst, fit, qintra, qinter 776 self.stream.close() 777 self.done = True 778 raise StopIteration
779 return (allFis, allFst, allFit), _FileIterator(proc , f, fname + ".FST") 780 781 #6.2
782 - def calc_fst_pair(self, fname):
783 self._run_genepop([".ST2", ".MIG"], [6,2], fname) 784 f = open(fname + ".ST2") 785 l = f.readline() 786 while l != "": 787 l = l.rstrip() 788 if l.startswith("Estimates for all loci"): 789 avg_fst = _read_headed_triangle_matrix(f) 790 l = f.readline() 791 f.close() 792 def loci_func(self): 793 l = self.stream.readline() 794 while l != "": 795 l = l.rstrip() 796 m = re.search(" Locus: (.+)", l) 797 if m != None: 798 locus = m.group(1) 799 matrix = _read_headed_triangle_matrix(self.stream) 800 return locus, matrix 801 l = self.stream.readline() 802 self.done = True 803 raise StopIteration
804 stf = open(fname + ".ST2") 805 os.remove(fname + ".MIG") 806 return _FileIterator(loci_func, stf, fname + ".ST2"), avg_fst 807 808 #6.3
809 - def calc_rho_all(self, fname):
810 raise NotImplementedError
811 812 #6.4
813 - def calc_rho_pair(self, fname):
814 raise NotImplementedError
815
816 - def _calc_ibd(self, fname, sub, stat="a", scale="Log", min_dist=0.00001):
817 """Calculates isolation by distance statistics 818 """ 819 self._run_genepop([".GRA", ".MIG", ".ISO"], [6,sub], 820 fname, opts = { 821 "MinimalDistance" : min_dist, 822 "GeographicScale" : scale, 823 "IsolBDstatistic" : stat, 824 }) 825 f = open(fname + ".ISO") 826 f.readline() 827 f.readline() 828 f.readline() 829 f.readline() 830 estimate = _read_triangle_matrix(f) 831 f.readline() 832 f.readline() 833 distance = _read_triangle_matrix(f) 834 f.readline() 835 match = re.match("a = (.+), b = (.+)", f.readline().rstrip()) 836 a = _gp_float(match.group(1)) 837 b = _gp_float(match.group(2)) 838 f.readline() 839 f.readline() 840 match = re.match(" b=(.+)", f.readline().rstrip()) 841 bb = _gp_float(match.group(1)) 842 match = re.match(".*\[(.+) ; (.+)\]", f.readline().rstrip()) 843 bblow = _gp_float(match.group(1)) 844 bbhigh = _gp_float(match.group(2)) 845 f.close() 846 os.remove(fname + ".MIG") 847 os.remove(fname + ".GRA") 848 os.remove(fname + ".ISO") 849 return estimate, distance, (a, b), (bb, bblow, bbhigh)
850 851 #6.5
852 - def calc_ibd_diplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
853 """Calculates isolation by distance statistics for diploid data. 854 855 See _calc_ibd for parameter details. 856 Note that each pop can only have a single individual and 857 the individual name has to be the sample coordinates. 858 """ 859 return self._calc_ibd(fname, 5, stat, scale, min_dist)
860 861 #6.6
862 - def calc_ibd_haplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
863 """Calculates isolation by distance statistics for haploid data. 864 865 See _calc_ibd for parameter details. 866 Note that each pop can only have a single individual and 867 the individual name has to be the sample coordinates. 868 """ 869 return self._calc_ibd(fname, 6, stat, scale, min_dist)
870