1
2
3
4
5
6 """Code to parse the keywlist.txt file from SwissProt/UniProt
7
8 See:
9 http://www.expasy.ch/sprot/sprot-top.html
10 ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/docs/keywlist.txt
11
12 Classes:
13 Record Stores the information about one keyword or one category
14 in the keywlist.txt file.
15
16 Functions:
17 parse Parses the keywlist.txt file and returns an iterator to
18 the records it contains.
19 """
20
21
23 """
24 This record stores the information of one keyword or category in the
25 keywlist.txt as a Python dictionary. The keys in this dictionary are
26 the line codes that can appear in the keywlist.txt file:
27
28 --------- --------------------------- ----------------------
29 Line code Content Occurrence in an entry
30 --------- --------------------------- ----------------------
31 ID Identifier (keyword) Once; starts a keyword entry
32 IC Identifier (category) Once; starts a category entry
33 AC Accession (KW-xxxx) Once
34 DE Definition Once or more
35 SY Synonyms Optional; once or more
36 GO Gene ontology (GO) mapping Optional; once or more
37 HI Hierarchy Optional; once or more
38 WW Relevant WWW site Optional; once or more
39 CA Category Once per keyword entry; absent
40 in category entries
41 """
43 dict.__init__(self)
44 for keyword in ("DE", "SY", "GO", "HI", "WW"):
45 self[keyword] = []
46
48 record = Record()
49
50 for line in handle:
51 if line.startswith("ID "):
52
53 record["ID"] = line[5:].strip()
54 break
55 if line.startswith("IC "):
56
57 record["IC"] = line[5:].strip()
58 break
59
60 for line in handle:
61 if line.startswith("-------------------------------------"):
62
63 break
64 key = line[:2]
65 if key=="//":
66 record["DE"] = " ".join(record["DE"])
67 record["SY"] = " ".join(record["SY"])
68 yield record
69 record = Record()
70 elif line[2:5]==" ":
71 value = line[5:].strip()
72 if key in ("ID", "IC", "AC", "CA"):
73 record[key] = value
74 elif key in ("DE", "SY", "GO", "HI", "WW"):
75 record[key].append(value)
76 else:
77 print "Ignoring: %s" % line.strip()
78
79 for line in handle:
80 pass
81