1
2
3
4
5
6
7 """
8 This module provides code to work with the KEGG Ligand/Compound database.
9
10 Functions:
11 parse - Returns an iterator giving Record objects.
12
13 Classes:
14 Record - A representation of a KEGG Ligand/Compound.
15 """
16
17
18 from Bio.KEGG import _write_kegg
19 from Bio.KEGG import _wrap_kegg
20
21
22
23 name_wrap = [0, "",
24 (" ","$",1,1),
25 ("-","$",1,1)]
26 id_wrap = lambda indent : [indent, "",
27 (" ","",1,0)]
28 struct_wrap = lambda indent : [indent, "",
29 (" ","",1,1)]
30
32 """Holds info from a KEGG Ligand/Compound record.
33
34 Members:
35 entry The entry identifier.
36 name A list of the compund names.
37 formula The chemical formula for the compound
38 mass The molecular weight for the compound
39 pathway A list of 3-tuples: (database, id, pathway)
40 enzyme A list of 2-tuples: (enzyme id, role)
41 structures A list of 2-tuples: (database, list of struct ids)
42 dblinks A list of 2-tuples: (database, list of link ids)
43
44 """
46 """__init___(self)
47
48 Create a new Record.
49 """
50 self.entry = ""
51 self.name = []
52 self.formula = ""
53 self.mass = ""
54 self.pathway = []
55 self.enzyme = []
56 self.structures = []
57 self.dblinks = []
73 return _write_kegg("ENTRY",
74 [self.entry])
82
86
88 s = []
89 for entry in self.pathway:
90 s.append(entry[0] + ": " + entry[1] + " " + entry[2])
91 return _write_kegg("PATHWAY",
92 [_wrap_kegg(l, wrap_rule = id_wrap(16)) \
93 for l in s])
95 s = ""
96 for entry in self.enzyme:
97 if entry[1]:
98 t = entry[0] + " (" + entry[1] + ")"
99 else:
100 t = entry[0]
101 s = s + t.ljust(16)
102 return _write_kegg("ENZYME",
103 [_wrap_kegg(s, wrap_rule = id_wrap(0))])
105 s = []
106 for entry in self.structures:
107 s.append(entry[0] + ": " + " ".join(entry[1]) + " ")
108 return _write_kegg("STRUCTURES",
109 [_wrap_kegg(l, wrap_rule = struct_wrap(5)) \
110 for l in s])
112 s = []
113 for entry in self.dblinks:
114 s.append(entry[0] + ": " + " ".join(entry[1]))
115 return _write_kegg("DBLINKS",
116 [_wrap_kegg(l, wrap_rule = id_wrap(9)) \
117 for l in s])
118
119
121 """Parse a KEGG Ligan/Compound file, returning Record objects.
122
123 This is an iterator function, typically used in a for loop. For
124 example, using one of the example KEGG files in the Biopython
125 test suite,
126
127 >>> handle = open("KEGG/compound.sample")
128 >>> for record in parse(handle):
129 ... print record.entry, record.name[0]
130 ...
131 C00023 Iron
132 C00017 Protein
133 C00099 beta-Alanine
134 C00294 Inosine
135 C00298 Trypsin
136 C00348 Undecaprenyl phosphate
137 C00349 2-Methyl-3-oxopropanoate
138 C01386 NH2Mec
139 """
140 record = Record()
141 for line in handle:
142 if line[:3]=="///":
143 yield record
144 record = Record()
145 continue
146 if line[:12]!=" ":
147 keyword = line[:12]
148 data = line[12:].strip()
149 if keyword=="ENTRY ":
150 words = data.split()
151 record.entry = words[0]
152 elif keyword=="NAME ":
153 data = data.strip(";")
154 record.name.append(data)
155 elif keyword=="ENZYME ":
156 while data:
157 column = data[:16]
158 data = data[16:]
159 if '(' in column:
160 entry = column.split()
161 enzyme = (entry[0], entry[1][1:-1])
162 else:
163 enzyme = (column.strip(), "")
164 record.enzyme.append(enzyme)
165 elif keyword=="PATHWAY ":
166 if data[:5]=='PATH:':
167 path, map, name = data.split(None,2)
168 pathway = (path[:-1], map, name)
169 record.pathway.append(pathway)
170 else:
171 pathway = record.pathway[-1]
172 path, map, name = pathway
173 name = name + " " + data
174 pathway = path, map, name
175 record.pathway[-1] = pathway
176 elif keyword=="FORMULA ":
177 record.formula = data
178 elif keyword=="MASS ":
179 record.mass = data
180 elif keyword=="DBLINKS ":
181 if ":" in data:
182 key, values = data.split(":")
183 values = values.split()
184 row = (key, values)
185 record.dblinks.append(row)
186 else:
187 row = record.dblinks[-1]
188 key, values = row
189 values.extend(data.split())
190 row = key, values
191 record.dblinks[-1] = row
192
194 """Run the Bio.KEGG.Compound module's doctests.
195
196 This will try and locate the unit tests directory, and run the doctests
197 from there in order that the relative paths used in the examples work.
198 """
199 import doctest
200 import os
201 if os.path.isdir(os.path.join("..","..","..","Tests")):
202 print "Runing doctests..."
203 cur_dir = os.path.abspath(os.curdir)
204 os.chdir(os.path.join("..","..","..","Tests"))
205 doctest.testmod()
206 os.chdir(cur_dir)
207 del cur_dir
208 print "Done"
209
210 if __name__ == "__main__":
211 _test()
212