1
2
3 protein_letters = "ACDEFGHIKLMNPQRSTVWY"
4 extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO"
5
6
7
8
9
10
11 ambiguous_dna_letters = "GATCRYWSMKHBVDN"
12 unambiguous_dna_letters = "GATC"
13 ambiguous_rna_letters = "GAUCRYWSMKHBVDN"
14 unambiguous_rna_letters = "GAUC"
15
16
17
18
19
20 extended_dna_letters = "GATCBDSW"
21
22
23
24
25 ambiguous_dna_values = {
26 "A": "A",
27 "C": "C",
28 "G": "G",
29 "T": "T",
30 "M": "AC",
31 "R": "AG",
32 "W": "AT",
33 "S": "CG",
34 "Y": "CT",
35 "K": "GT",
36 "V": "ACG",
37 "H": "ACT",
38 "D": "AGT",
39 "B": "CGT",
40 "X": "GATC",
41 "N": "GATC",
42 }
43 ambiguous_rna_values = {
44 "A": "A",
45 "C": "C",
46 "G": "G",
47 "U": "U",
48 "M": "AC",
49 "R": "AG",
50 "W": "AU",
51 "S": "CG",
52 "Y": "CU",
53 "K": "GU",
54 "V": "ACG",
55 "H": "ACU",
56 "D": "AGU",
57 "B": "CGU",
58 "X": "GAUC",
59 "N": "GAUC",
60 }
61
62 ambiguous_dna_complement = {
63 "A": "T",
64 "C": "G",
65 "G": "C",
66 "T": "A",
67 "M": "K",
68 "R": "Y",
69 "W": "W",
70 "S": "S",
71 "Y": "R",
72 "K": "M",
73 "V": "B",
74 "H": "D",
75 "D": "H",
76 "B": "V",
77 "X": "X",
78 "N": "N",
79 }
80
81 ambiguous_rna_complement = {
82 "A": "U",
83 "C": "G",
84 "G": "C",
85 "U": "A",
86 "M": "K",
87 "R": "Y",
88 "W": "W",
89 "S": "S",
90 "Y": "R",
91 "K": "M",
92 "V": "B",
93 "H": "D",
94 "D": "H",
95 "B": "V",
96 "X": "X",
97 "N": "N",
98 }
99
100
106
107
108 unambiguous_dna_weights = {
109 "A": 347.,
110 "C": 323.,
111 "G": 363.,
112 "T": 322.,
113 }
114 unambiguous_dna_weight_ranges = _make_ranges(unambiguous_dna_weights)
115
116 unambiguous_rna_weights = {
117 "A": unambiguous_dna_weights["A"] + 16.,
118 "C": unambiguous_dna_weights["C"] + 16.,
119 "G": unambiguous_dna_weights["G"] + 16.,
120 "U": 340.,
121 }
122 unambiguous_rna_weight_ranges = _make_ranges(unambiguous_rna_weights)
123
125 range_d = {}
126 avg_d = {}
127 for letter, values in mydict.iteritems():
128
129 if len(values)==1 and values[0] not in weight_table : continue
130 weights = map(weight_table.get, values)
131 range_d[letter] = (min(weights), max(weights))
132 total_w = 0.0
133 for w in weights:
134 total_w = total_w + w
135 avg_d[letter] = total_w / len(weights)
136 return range_d, avg_d
137
138 ambiguous_dna_weight_ranges, avg_ambiguous_dna_weights = \
139 _make_ambiguous_ranges(ambiguous_dna_values,
140 unambiguous_dna_weights)
141
142 ambiguous_rna_weight_ranges, avg_ambiguous_rna_weights = \
143 _make_ambiguous_ranges(ambiguous_rna_values,
144 unambiguous_rna_weights)
145
146 protein_weights = {
147 "A": 89.09,
148 "C": 121.16,
149 "D": 133.10,
150 "E": 147.13,
151 "F": 165.19,
152 "G": 75.07,
153 "H": 155.16,
154 "I": 131.18,
155 "K": 146.19,
156 "L": 131.18,
157 "M": 149.21,
158 "N": 132.12,
159
160 "P": 115.13,
161 "Q": 146.15,
162 "R": 174.20,
163 "S": 105.09,
164 "T": 119.12,
165
166 "V": 117.15,
167 "W": 204.23,
168 "Y": 181.19
169 }
170
171 extended_protein_values = {
172 "A": "A",
173 "B": "ND",
174 "C": "C",
175 "D": "D",
176 "E": "E",
177 "F": "F",
178 "G": "G",
179 "H": "H",
180 "I": "I",
181 "J": "IL",
182 "K": "K",
183 "L": "L",
184 "M": "M",
185 "N": "N",
186 "O": "O",
187 "P": "P",
188 "Q": "Q",
189 "R": "R",
190 "S": "S",
191 "T": "T",
192 "U": "U",
193 "V": "V",
194 "W": "W",
195 "X": "ACDEFGHIKLMNPQRSTVWY",
196
197
198 "Y": "Y",
199 "Z": "QE",
200 }
201
202 protein_weight_ranges = _make_ranges(protein_weights)
203
204 extended_protein_weight_ranges, avg_extended_protein_weights = \
205 _make_ambiguous_ranges(extended_protein_values,
206 protein_weights)
207
208
209
210
211 atom_weights = {
212 'H' : 1.00794,
213 'He' : 4.002602,
214 'Li' : 6.941,
215 'Be' : 9.012182,
216 'B' : 10.811,
217 'C' : 12.0107,
218 'N' : 14.0067,
219 'O' : 15.9994,
220 'F' : 18.9984032,
221 'Ne' : 20.1797,
222 'Na' : 22.989770,
223 'Mg' : 24.3050,
224 'Al' : 26.981538,
225 'Si' : 28.0855,
226 'P' : 30.973761,
227 'S' : 32.065,
228 'Cl' : 35.453,
229 'Ar' : 39.948,
230 'K' : 39.0983,
231 'Ca' : 40.078,
232 'Sc' : 44.955910,
233 'Ti' : 47.867,
234 'V' : 50.9415,
235 'Cr' : 51.9961,
236 'Mn' : 54.938049,
237 'Fe' : 55.845,
238 'Co' : 58.933200,
239 'Ni' : 58.6934,
240 'Cu' : 63.546,
241 'Zn' : 65.39,
242 'Ga' : 69.723,
243 'Ge' : 72.64,
244 'As' : 74.92160,
245 'Se' : 78.96,
246 'Br' : 79.904,
247 'Kr' : 83.80,
248 'Rb' : 85.4678,
249 'Sr' : 87.62,
250 'Y' : 88.90585,
251 'Zr' : 91.224,
252 'Nb' : 92.90638,
253 'Mo' : 95.94,
254 'Tc' : 98.0,
255 'Ru' : 101.07,
256 'Rh' : 102.90550,
257 'Pd' : 106.42,
258 'Ag' : 107.8682,
259 'Cd' : 112.411,
260 'In' : 114.818,
261 'Sn' : 118.710,
262 'Sb' : 121.760,
263 'Te' : 127.60,
264 'I' : 126.90447,
265 'Xe' : 131.293,
266 'Cs' : 132.90545,
267 'Ba' : 137.327,
268 'La' : 138.9055,
269 'Ce' : 140.116,
270 'Pr' : 140.90765,
271 'Nd' : 144.24,
272 'Pm' : 145.0,
273 'Sm' : 150.36,
274 'Eu' : 151.964,
275 'Gd' : 157.25,
276 'Tb' : 158.92534,
277 'Dy' : 162.50,
278 'Ho' : 164.93032,
279 'Er' : 167.259,
280 'Tm' : 168.93421,
281 'Yb' : 173.04,
282 'Lu' : 174.967,
283 'Hf' : 178.49,
284 'Ta' : 180.9479,
285 'W' : 183.84,
286 'Re' : 186.207,
287 'Os' : 190.23,
288 'Ir' : 192.217,
289 'Pt' : 195.078,
290 'Au' : 196.96655,
291 'Hg' : 200.59,
292 'Tl' : 204.3833,
293 'Pb' : 207.2,
294 'Bi' : 208.98038,
295 'Po' : 208.98,
296 'At' : 209.99,
297 'Rn' : 222.02,
298 'Fr' : 223.02,
299 'Ra' : 226.03,
300 'Ac' : 227.03,
301 'Th' : 232.0381,
302 'Pa' : 231.03588,
303 'U' : 238.02891,
304 'Np' : 237.05,
305 'Pu' : 244.06,
306 'Am' : 243.06,
307 'Cm' : 247.07,
308 'Bk' : 247.07,
309 'Cf' : 251.08,
310 'Es' : 252.08,
311 'Fm' : 257.10,
312 'Md' : 258.10,
313 'No' : 259.10,
314 'Lr' : 262.11,
315 'Rf' : 261.11,
316 'Db' : 262.11,
317 'Sg' : 266.12,
318 'Bh' : 264.12,
319 'Hs' : 269.13,
320 'Mt' : 268.14,
321 }
322