Package Bio :: Package SubsMat :: Module FreqTable
[hide private]
[frames] | no frames]

Source Code for Module Bio.SubsMat.FreqTable

 1  from Bio import Alphabet 
 2  COUNT = 1 
 3  FREQ = 2 
 4  ################################################################## 
 5  # A class to handle frequency tables 
 6  # Copyright Iddo Friedberg idoerg@cc.huji.ac.il 
 7  # Biopython (http://biopython.org) license applies 
 8  # Methods to read a letter frequency or a letter count file: 
 9  # Example files for a DNA alphabet: 
10  # 
11  # A count file (whitespace seperated): 
12  # 
13  # A  50 
14  # C  37 
15  # G  23 
16  # T  58 
17  # 
18  # The same info as a frequency file: 
19  # 
20  # A 0.2976 
21  # C 0.2202 
22  # G 0.1369 
23  # T 0.3452 
24  #  
25  # Functions: 
26  #   read_count(f): read a count file from stream f. Then convert to 
27  #   frequencies 
28  #   read_freq(f): read a frequency data file from stream f. Of course, we then 
29  #   don't have the counts, but it is usually the letter frquencies which are 
30  #   interesting. 
31  # 
32  # Methods: 
33  #   (all internal) 
34  # Attributes: 
35  #   alphabet: The IUPAC alphabet set (or any other) whose letters you are 
36  #   using. Common sets are: IUPAC.protein (20-letter protein), 
37  #   IUPAC.unambiguous_dna (4-letter DNA). See Bio/alphabet for more. 
38  #   data: frequency dictionary. 
39  #   count: count dictionary. Empty if no counts are provided. 
40  # 
41  # Example of use: 
42  #   >>> from SubsMat import FreqTable 
43  #   >>> ftab = FreqTable.FreqTable(my_frequency_dictionary,FreqTable.FREQ) 
44  #   >>> ftab = FreqTable.FreqTable(my_count_dictionary,FreqTable.COUNT) 
45  #   >>> ftab = FreqTable.read_count(open('myDNACountFile')) 
46  # 
47  #   
48  ################################################################## 
49 -class FreqTable(dict):
50
51 - def _freq_from_count(self):
52 total = float(sum(self.count.values())) 53 for i, v in self.count.iteritems(): 54 self[i] = v / total
55
56 - def _alphabet_from_input(self):
57 s = '' 58 for i in sorted(self): 59 s += i 60 return s
61
62 - def __init__(self,in_dict,dict_type,alphabet=None):
63 self.alphabet = alphabet 64 if dict_type == COUNT: 65 self.count = in_dict 66 self._freq_from_count() 67 elif dict_type == FREQ: 68 self.count = {} 69 self.update(in_dict) 70 else: 71 raise ValueError("bad dict_type") 72 if not alphabet: 73 self.alphabet = Alphabet.Alphabet() 74 self.alphabet.letters = self._alphabet_from_input()
75
76 -def read_count(f):
77 count = {} 78 for line in f: 79 key, value = line.strip().split() 80 count[key] = int(value) 81 freq_table = FreqTable(count,COUNT) 82 return freq_table
83
84 -def read_freq(f):
85 freq_dict = {} 86 for line in f: 87 key, value = line.strip().split() 88 freq_dict[key] = float(value) 89 return FreqTable(freq_dict,FREQ)
90