Package Bio :: Package Alphabet :: Module IUPAC
[hide private]
[frames] | no frames]

Source Code for Module Bio.Alphabet.IUPAC

 1  # Copyright 2000-2001 by Andrew Dalke. 
 2  # Revisions copyright 2008 by Peter Cock. 
 3  # All rights reserved. 
 4  # This code is part of the Biopython distribution and governed by its 
 5  # license.  Please see the LICENSE file that should have been included 
 6  # as part of this package. 
 7   
 8  """Standard nucleotide and protein alphabets defined by IUPAC.""" 
 9   
10  from Bio import Alphabet 
11  from Bio.Data import IUPACData 
12   
13  ##################### Protein 
14   
15  # From the IUPAC definition at: 
16  #   http://www.chem.qmw.ac.uk/iupac/AminoAcid/A2021.html#AA21 
17   
18  assert IUPACData.extended_protein_letters == IUPACData.extended_protein_letters.upper() 
19 -class ExtendedIUPACProtein(Alphabet.ProteinAlphabet):
20 """Extended uppercase IUPAC protein single letter alphabet including X etc. 21 22 In addition to the standard 20 single letter protein codes, this includes: 23 24 B = "Asx"; Aspartic acid (R) or Asparagine (N) 25 X = "Xxx"; Unknown or 'other' amino acid 26 Z = "Glx"; Glutamic acid (E) or Glutamine (Q) 27 J = "Xle"; Leucine (L) or Isoleucine (I), used in mass-spec (NMR) 28 U = "Sec"; Selenocysteine 29 O = "Pyl"; Pyrrolysine 30 31 This alphabet is not intended to be used with X for Selenocysteine 32 (an ad-hoc standard prior to the IUPAC adoption of U instead). 33 """ 34 letters = IUPACData.extended_protein_letters
35 36 extended_protein = ExtendedIUPACProtein() 37 38 assert IUPACData.protein_letters == IUPACData.protein_letters.upper()
39 -class IUPACProtein(ExtendedIUPACProtein):
40 """Uppercase IUPAC protein single letter alphabet of the 20 standard amino acids.""" 41 letters = IUPACData.protein_letters
42 43 protein = IUPACProtein() 44 45 ##################### DNA 46 47 # The next two are the IUPAC definitions, from: 48 # http://www.chem.qmw.ac.uk/iubmb/misc/naseq.html
49 -class IUPACAmbiguousDNA(Alphabet.DNAAlphabet):
50 """Uppercase IUPAC ambiguous DNA.""" 51 letters = IUPACData.ambiguous_dna_letters
52 53 ambiguous_dna = IUPACAmbiguousDNA() 54
55 -class IUPACUnambiguousDNA(IUPACAmbiguousDNA):
56 """Uppercase IUPAC unambiguous DNA (letters GATC only).""" 57 letters = IUPACData.unambiguous_dna_letters
58 59 unambiguous_dna = IUPACUnambiguousDNA() 60 61 62 # Also from the URL, but not part of the standard
63 -class ExtendedIUPACDNA(Alphabet.DNAAlphabet):
64 """Extended IUPAC DNA alphabet. 65 66 In addition to the standard letter codes GATC, this includes: 67 68 B = 5-bromouridine 69 D = 5,6-dihydrouridine 70 S = thiouridine 71 W = wyosine 72 """ 73 letters = IUPACData.extended_dna_letters
74 75 extended_dna = ExtendedIUPACDNA() 76 77 ##################### RNA 78
79 -class IUPACAmbiguousRNA(Alphabet.RNAAlphabet):
80 """Uppercase IUPAC ambiguous RNA.""" 81 letters = IUPACData.ambiguous_rna_letters
82 83 ambiguous_rna = IUPACAmbiguousRNA() 84
85 -class IUPACUnambiguousRNA(IUPACAmbiguousRNA):
86 """Uppercase IUPAC unambiguous RNA (letters GAUC only).""" 87 letters = IUPACData.unambiguous_rna_letters
88 89 unambiguous_rna = IUPACUnambiguousRNA() 90 91 # are there extended forms? 92 #class ExtendedIUPACRNA(Alphabet.RNAAlphabet): 93 # letters = extended_rna_letters 94 # # B == 5-bromouridine 95 # # D == 5,6-dihydrouridine 96 # # S == thiouridine 97 # # W == wyosine 98