Package Bio :: Package Data :: Module IUPACData
[hide private]
[frames] | no frames]

Source Code for Module Bio.Data.IUPACData

  1  # Information about the IUPAC alphabets 
  2   
  3  protein_letters = "ACDEFGHIKLMNPQRSTVWY" 
  4  extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZJUO" 
  5  #   B = "Asx";  aspartic acid or asparagine (D or N) 
  6  #   X = "Xxx";  unknown or 'other' amino acid 
  7  #   Z = "Glx";  glutamic acid or glutamine (E or Q) 
  8  #   J = "Xle";  leucine or isoleucine (L or I, used in mass-spec) 
  9  #   U = "Sec";  selenocysteine 
 10  #   O = "Pyl";  pyrrolysine 
 11  ambiguous_dna_letters = "GATCRYWSMKHBVDN" 
 12  unambiguous_dna_letters = "GATC" 
 13  ambiguous_rna_letters = "GAUCRYWSMKHBVDN" 
 14  unambiguous_rna_letters = "GAUC" 
 15   
 16  #   B == 5-bromouridine 
 17  #   D == 5,6-dihydrouridine 
 18  #   S == thiouridine 
 19  #   W == wyosine 
 20  extended_dna_letters = "GATCBDSW" 
 21   
 22  # are there extended forms? 
 23  #extended_rna_letters = "GAUCBDSW" 
 24   
 25  ambiguous_dna_values = { 
 26      "A": "A", 
 27      "C": "C", 
 28      "G": "G", 
 29      "T": "T", 
 30      "M": "AC", 
 31      "R": "AG", 
 32      "W": "AT", 
 33      "S": "CG", 
 34      "Y": "CT", 
 35      "K": "GT", 
 36      "V": "ACG", 
 37      "H": "ACT", 
 38      "D": "AGT", 
 39      "B": "CGT", 
 40      "X": "GATC", 
 41      "N": "GATC", 
 42      } 
 43  ambiguous_rna_values = { 
 44      "A": "A", 
 45      "C": "C", 
 46      "G": "G", 
 47      "U": "U", 
 48      "M": "AC", 
 49      "R": "AG", 
 50      "W": "AU", 
 51      "S": "CG", 
 52      "Y": "CU", 
 53      "K": "GU", 
 54      "V": "ACG", 
 55      "H": "ACU", 
 56      "D": "AGU", 
 57      "B": "CGU", 
 58      "X": "GAUC", 
 59      "N": "GAUC", 
 60      } 
 61   
 62  ambiguous_dna_complement = { 
 63      "A": "T", 
 64      "C": "G", 
 65      "G": "C", 
 66      "T": "A", 
 67      "M": "K", 
 68      "R": "Y", 
 69      "W": "W", 
 70      "S": "S", 
 71      "Y": "R", 
 72      "K": "M", 
 73      "V": "B", 
 74      "H": "D", 
 75      "D": "H", 
 76      "B": "V", 
 77      "X": "X", 
 78      "N": "N", 
 79      } 
 80   
 81  ambiguous_rna_complement = { 
 82      "A": "U", 
 83      "C": "G", 
 84      "G": "C", 
 85      "U": "A", 
 86      "M": "K", 
 87      "R": "Y", 
 88      "W": "W", 
 89      "S": "S", 
 90      "Y": "R", 
 91      "K": "M", 
 92      "V": "B", 
 93      "H": "D", 
 94      "D": "H", 
 95      "B": "V", 
 96      "X": "X", 
 97      "N": "N", 
 98      } 
 99   
100   
101 -def _make_ranges(mydict):
102 d = {} 103 for key, value in mydict.iteritems(): 104 d[key] = (value, value) 105 return d
106 107 # From bioperl's SeqStats.pm 108 unambiguous_dna_weights = { 109 "A": 347., 110 "C": 323., 111 "G": 363., 112 "T": 322., 113 } 114 unambiguous_dna_weight_ranges = _make_ranges(unambiguous_dna_weights) 115 116 unambiguous_rna_weights = { 117 "A": unambiguous_dna_weights["A"] + 16., # 16 for the oxygen 118 "C": unambiguous_dna_weights["C"] + 16., 119 "G": unambiguous_dna_weights["G"] + 16., 120 "U": 340., 121 } 122 unambiguous_rna_weight_ranges = _make_ranges(unambiguous_rna_weights) 123
124 -def _make_ambiguous_ranges(mydict, weight_table):
125 range_d = {} 126 avg_d = {} 127 for letter, values in mydict.iteritems(): 128 #Following line is a quick hack to skip undefined weights for U and O 129 if len(values)==1 and values[0] not in weight_table : continue 130 weights = map(weight_table.get, values) 131 range_d[letter] = (min(weights), max(weights)) 132 total_w = 0.0 133 for w in weights: 134 total_w = total_w + w 135 avg_d[letter] = total_w / len(weights) 136 return range_d, avg_d
137 138 ambiguous_dna_weight_ranges, avg_ambiguous_dna_weights = \ 139 _make_ambiguous_ranges(ambiguous_dna_values, 140 unambiguous_dna_weights) 141 142 ambiguous_rna_weight_ranges, avg_ambiguous_rna_weights = \ 143 _make_ambiguous_ranges(ambiguous_rna_values, 144 unambiguous_rna_weights) 145 146 protein_weights = { 147 "A": 89.09, 148 "C": 121.16, 149 "D": 133.10, 150 "E": 147.13, 151 "F": 165.19, 152 "G": 75.07, 153 "H": 155.16, 154 "I": 131.18, 155 "K": 146.19, 156 "L": 131.18, 157 "M": 149.21, 158 "N": 132.12, 159 #"O": 0.0, # Needs to be recorded! 160 "P": 115.13, 161 "Q": 146.15, 162 "R": 174.20, 163 "S": 105.09, 164 "T": 119.12, 165 #"U": 168.05, # To be confirmed 166 "V": 117.15, 167 "W": 204.23, 168 "Y": 181.19 169 } 170 171 extended_protein_values = { 172 "A": "A", 173 "B": "ND", 174 "C": "C", 175 "D": "D", 176 "E": "E", 177 "F": "F", 178 "G": "G", 179 "H": "H", 180 "I": "I", 181 "J": "IL", 182 "K": "K", 183 "L": "L", 184 "M": "M", 185 "N": "N", 186 "O": "O", 187 "P": "P", 188 "Q": "Q", 189 "R": "R", 190 "S": "S", 191 "T": "T", 192 "U": "U", 193 "V": "V", 194 "W": "W", 195 "X": "ACDEFGHIKLMNPQRSTVWY", 196 #TODO - Include U and O in the possible values of X? 197 #This could alter the extended_protein_weight_ranges ... 198 "Y": "Y", 199 "Z": "QE", 200 } 201 202 protein_weight_ranges = _make_ranges(protein_weights) 203 204 extended_protein_weight_ranges, avg_extended_protein_weights = \ 205 _make_ambiguous_ranges(extended_protein_values, 206 protein_weights) 207 208 209 # For Center of Mass Calculation. 210 # Taken from http://www.chem.qmul.ac.uk/iupac/AtWt/ & PyMol 211 atom_weights = { 212 'H' : 1.00794, 213 'He' : 4.002602, 214 'Li' : 6.941, 215 'Be' : 9.012182, 216 'B' : 10.811, 217 'C' : 12.0107, 218 'N' : 14.0067, 219 'O' : 15.9994, 220 'F' : 18.9984032, 221 'Ne' : 20.1797, 222 'Na' : 22.989770, 223 'Mg' : 24.3050, 224 'Al' : 26.981538, 225 'Si' : 28.0855, 226 'P' : 30.973761, 227 'S' : 32.065, 228 'Cl' : 35.453, 229 'Ar' : 39.948, 230 'K' : 39.0983, 231 'Ca' : 40.078, 232 'Sc' : 44.955910, 233 'Ti' : 47.867, 234 'V' : 50.9415, 235 'Cr' : 51.9961, 236 'Mn' : 54.938049, 237 'Fe' : 55.845, 238 'Co' : 58.933200, 239 'Ni' : 58.6934, 240 'Cu' : 63.546, 241 'Zn' : 65.39, 242 'Ga' : 69.723, 243 'Ge' : 72.64, 244 'As' : 74.92160, 245 'Se' : 78.96, 246 'Br' : 79.904, 247 'Kr' : 83.80, 248 'Rb' : 85.4678, 249 'Sr' : 87.62, 250 'Y' : 88.90585, 251 'Zr' : 91.224, 252 'Nb' : 92.90638, 253 'Mo' : 95.94, 254 'Tc' : 98.0, 255 'Ru' : 101.07, 256 'Rh' : 102.90550, 257 'Pd' : 106.42, 258 'Ag' : 107.8682, 259 'Cd' : 112.411, 260 'In' : 114.818, 261 'Sn' : 118.710, 262 'Sb' : 121.760, 263 'Te' : 127.60, 264 'I' : 126.90447, 265 'Xe' : 131.293, 266 'Cs' : 132.90545, 267 'Ba' : 137.327, 268 'La' : 138.9055, 269 'Ce' : 140.116, 270 'Pr' : 140.90765, 271 'Nd' : 144.24, 272 'Pm' : 145.0, 273 'Sm' : 150.36, 274 'Eu' : 151.964, 275 'Gd' : 157.25, 276 'Tb' : 158.92534, 277 'Dy' : 162.50, 278 'Ho' : 164.93032, 279 'Er' : 167.259, 280 'Tm' : 168.93421, 281 'Yb' : 173.04, 282 'Lu' : 174.967, 283 'Hf' : 178.49, 284 'Ta' : 180.9479, 285 'W' : 183.84, 286 'Re' : 186.207, 287 'Os' : 190.23, 288 'Ir' : 192.217, 289 'Pt' : 195.078, 290 'Au' : 196.96655, 291 'Hg' : 200.59, 292 'Tl' : 204.3833, 293 'Pb' : 207.2, 294 'Bi' : 208.98038, 295 'Po' : 208.98, 296 'At' : 209.99, 297 'Rn' : 222.02, 298 'Fr' : 223.02, 299 'Ra' : 226.03, 300 'Ac' : 227.03, 301 'Th' : 232.0381, 302 'Pa' : 231.03588, 303 'U' : 238.02891, 304 'Np' : 237.05, 305 'Pu' : 244.06, 306 'Am' : 243.06, 307 'Cm' : 247.07, 308 'Bk' : 247.07, 309 'Cf' : 251.08, 310 'Es' : 252.08, 311 'Fm' : 257.10, 312 'Md' : 258.10, 313 'No' : 259.10, 314 'Lr' : 262.11, 315 'Rf' : 261.11, 316 'Db' : 262.11, 317 'Sg' : 266.12, 318 'Bh' : 264.12, 319 'Hs' : 269.13, 320 'Mt' : 268.14, 321 } 322