1
2
3
4
5
6 import re
7
8 line_floats_re = re.compile("-*\d+\.\d+")
9
11 """Parse the basics that should be present in most baseml results files.
12 """
13 version_re = re.compile("BASEML \(in paml version (\d+\.\d+[a-z]*).*")
14 np_re = re.compile("lnL\(ntime:\s+\d+\s+np:\s+(\d+)\)")
15 num_params = -1
16 for line in lines:
17
18 line_floats_res = line_floats_re.findall(line)
19 line_floats = [float(val) for val in line_floats_res]
20
21
22
23 version_res = version_re.match(line)
24 if version_res is not None:
25 results["version"] = version_res.group(1)
26
27
28
29 if "ln Lmax" in line and len(line_floats) == 1:
30 results["lnL max"] = line_floats[0]
31
32
33
34 elif "lnL(ntime:" in line and len(line_floats) > 0:
35 results["lnL"] = line_floats[0]
36 np_res = np_re.match(line)
37 if np_res is not None:
38 num_params = int(np_res.group(1))
39
40
41 elif "tree length" in line and len(line_floats) == 1:
42 results["tree length"] = line_floats[0]
43
44
45 elif re.match("\(+", line) is not None:
46 if ":" in line:
47 results["tree"] = line.strip()
48 return (results, num_params)
49
51 """Parse the various parameters from the file.
52 """
53 parameters = {}
54 parameters = parse_parameter_list(lines, parameters, num_params)
55 parameters = parse_kappas(lines, parameters)
56 parameters = parse_rates(lines, parameters)
57 parameters = parse_freqs(lines, parameters)
58 results["parameters"] = parameters
59 return results
60
62 """ Parse the parameters list, which is just an unlabeled list of numeric values.
63 """
64 for line_num in range(len(lines)):
65 line = lines[line_num]
66
67 line_floats_res = line_floats_re.findall(line)
68 line_floats = [float(val) for val in line_floats_res]
69
70
71
72
73
74
75 if len(line_floats) == num_params:
76 parameters["parameter list"] = line.strip()
77
78
79
80
81
82
83 if "SEs for parameters:" in lines[line_num + 1]:
84 SEs_line = lines[line_num + 2]
85 parameters["SEs"] = SEs_line.strip()
86 break
87 return parameters
88
90 """Parse out the kappa parameters.
91 """
92 kappa_found = False
93 for line in lines:
94
95 line_floats_res = line_floats_re.findall(line)
96 line_floats = [float(val) for val in line_floats_res]
97
98
99
100
101 if "Parameters (kappa)" in line:
102 kappa_found = True
103 elif kappa_found and len(line_floats) > 0:
104 branch_res = re.match("\s(\d+\.\.\d+)", line)
105 if branch_res is None:
106 if len(line_floats) == 1:
107 parameters["kappa"] = line_floats[0]
108 else:
109 parameters["kappa"] = line_floats
110 kappa_found = False
111 else:
112 if parameters.get("branches") is None:
113 parameters["branches"] = {}
114 branch = branch_res.group(1)
115 if len(line_floats) > 0:
116 parameters["branches"][branch] = \
117 {"t":line_floats[0], "kappa":line_floats[1],
118 "TS":line_floats[2], "TV":line_floats[3]}
119
120
121
122 elif "kappa under" in line and len(line_floats) > 0:
123 if len(line_floats) == 1:
124 parameters["kappa"] = line_floats[0]
125 else:
126 parameters["kappa"] = line_floats
127 return parameters
128
130 """Parse the rate parameters.
131 """
132 Q_mat_found = False
133 for line in lines:
134
135 line_floats_res = line_floats_re.findall(line)
136 line_floats = [float(val) for val in line_floats_res]
137
138
139
140 if "Rate parameters:" in line and len(line_floats) > 0:
141 parameters["rate parameters"] = line_floats
142
143
144
145 elif "rate: " in line and len(line_floats) > 0:
146 parameters["rates"] = line_floats
147
148
149
150
151
152
153
154 elif "matrix Q" in line:
155 parameters["Q matrix"] = {"matrix":[]}
156 if len(line_floats) > 0:
157 parameters["Q matrix"]["average Ts/Tv"] = \
158 line_floats[0]
159 Q_mat_found = True
160 elif Q_mat_found and len(line_floats) > 0:
161 parameters["Q matrix"]["matrix"].append(line_floats)
162 if len(parameters["Q matrix"]["matrix"]) == 4:
163 Q_mat_found = False
164
165
166 elif "alpha" in line and len(line_floats) > 0:
167 parameters["alpha"] = line_floats[0]
168 return parameters
169
171 """Parse the basepair frequencies.
172 """
173 root_re = re.compile("Note: node (\d+) is root.")
174 branch_freqs_found = False
175 for line in lines:
176
177 line_floats_res = line_floats_re.findall(line)
178 line_floats = [float(val) for val in line_floats_res]
179
180
181
182 if "Base frequencies" in line and len(line_floats) > 0:
183 base_frequencies = {}
184 base_frequencies["T"] = line_floats[0]
185 base_frequencies["C"] = line_floats[1]
186 base_frequencies["A"] = line_floats[2]
187 base_frequencies["G"] = line_floats[3]
188 parameters["base frequencies"] = base_frequencies
189
190
191
192 elif "freq: " in line and len(line_floats) > 0:
193 parameters["rate frequencies"] = line_floats
194
195
196
197
198
199
200
201
202 elif "(frequency parameters for branches)" in line:
203 parameters["nodes"] = {}
204 branch_freqs_found = True
205 elif branch_freqs_found is True:
206 if len(line_floats) > 0:
207 node_res = re.match("Node \#(\d+)", line)
208 node_num = int(node_res.group(1))
209 node = {"root":False}
210 node["frequency parameters"] = line_floats[:4]
211 if len(line_floats) > 4:
212 node["base frequencies"] = {"T":line_floats[4],
213 "C":line_floats[5],
214 "A":line_floats[6],
215 "G":line_floats[7]}
216 parameters["nodes"][node_num] = node
217 else:
218 root_res = root_re.match(line)
219 if root_res is not None:
220 root_node = int(root_res.group(1))
221 parameters["nodes"][root_node]["root"] =\
222 True
223 branch_freqs_found = False
224 return parameters
225