1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 """ SCOP: Structural Classification of Proteins.
18
19 The SCOP database aims to provide a manually constructed classification of
20 all know protein structures into a hierarchy, the main levels of which
21 are family, superfamily and fold.
22
23 * "SCOP":http://scop.mrc-lmb.cam.ac.uk/scop/
24
25 * "Introduction":http://scop.mrc-lmb.cam.ac.uk/scop/intro.html
26
27 * "SCOP parsable files":http://scop.mrc-lmb.cam.ac.uk/scop/parse/
28
29 The Scop object in this module represents the entire SCOP classification. It
30 can be built from the three SCOP parsable files, modified is so desired, and
31 converted back to the same file formats. A single SCOP domain (represented
32 by the Domain class) can be obtained from Scop using the domain's SCOP
33 identifier (sid).
34
35
36 nodeCodeDict -- A mapping between known 2 letter node codes and a longer
37 description. The known node types are 'cl' (class), 'cf'
38 (fold), 'sf' (superfamily), 'fa' (family), 'dm' (domain),
39 'sp' (species), 'px' (domain). Additional node types may
40 be added in the future.
41
42 This module also provides code to access SCOP over the WWW.
43
44 Functions:
45 search -- Access the main CGI script.
46 _open -- Internally used function.
47
48 """
49
50 from types import *
51 import os
52
53 import Des
54 import Cla
55 import Hie
56 from Residues import *
57 from Bio import SeqIO
58 from Bio.Seq import Seq
59
60
61 nodeCodeDict = { 'cl':'class', 'cf':'fold', 'sf':'superfamily',
62 'fa':'family', 'dm':'protein', 'sp':'species', 'px':'domain'}
63
64
65 _nodetype_to_code= { 'class': 'cl', 'fold': 'cf', 'superfamily': 'sf',
66 'family': 'fa', 'protein': 'dm', 'species': 'sp', 'domain': 'px'}
67
68 nodeCodeOrder = [ 'ro', 'cl', 'cf', 'sf', 'fa', 'dm', 'sp', 'px' ]
69
70 astralBibIds = [10,20,25,30,35,40,50,70,90,95,100]
71
72 astralEvs = [10, 5, 1, 0.5, 0.1, 0.05, 0.01, 0.005, 0.001, 1e-4, 1e-5, 1e-10, 1e-15,
73 1e-20, 1e-25, 1e-50]
74
75 astralEv_to_file = { 10: 'e+1', 5: 'e+0,7', 1: 'e+0', 0.5: 'e-0,3', 0.1: 'e-1',
76 0.05: 'e-1,3', 0.01: 'e-2', 0.005: 'e-2,3', 0.001: 'e-3',
77 1e-4: 'e-4', 1e-5: 'e-5', 1e-10: 'e-10', 1e-15: 'e-15',
78 1e-20: 'e-20', 1e-25: 'e-25', 1e-50: 'e-50' }
79
80 astralEv_to_sql = { 10: 'e1', 5: 'e0_7', 1: 'e0', 0.5: 'e_0_3', 0.1: 'e_1',
81 0.05: 'e_1_3', 0.01: 'e_2', 0.005: 'e_2_3', 0.001: 'e_3',
82 1e-4: 'e_4', 1e-5: 'e_5', 1e-10: 'e_10', 1e-15: 'e_15',
83 1e-20: 'e_20', 1e-25: 'e_25', 1e-50: 'e_50' }
84
85 try:
86
87 _cmp = cmp
88 except NameError:
90 """Implementation of cmp(x,y) for Python 3 (PRIVATE).
91
92 Based on Python 3 docs which say if you really need the cmp()
93 functionality, you could use the expression (a > b) - (a < b)
94 as the equivalent for cmp(a, b)
95 """
96 return (a > b) - (a < b)
97
99 """Order SCOP concise classification strings (sccs).
100
101 a.4.5.1 < a.4.5.11 < b.1.1.1
102
103 A sccs (e.g. a.4.5.11) compactly represents a domain's classification.
104 The letter represents the class, and the numbers are the fold,
105 superfamily, and family, respectively.
106
107 """
108
109 s1 = sccs1.split(".")
110 s2 = sccs2.split(".")
111
112 if s1[0] != s2[0]: return _cmp(s1[0], s2[0])
113
114 s1 = list(map(int, s1[1:]))
115 s2 = list(map(int, s2[1:]))
116
117 return _cmp(s1,s2)
118
119
120 _domain_re = re.compile(r">?([\w_\.]*)\s+([\w\.]*)\s+\(([^)]*)\) (.*)")
121
122 -def parse_domain(str):
123 """Convert an ASTRAL header string into a Scop domain.
124
125 An ASTRAL (http://astral.stanford.edu/) header contains a concise
126 description of a SCOP domain. A very similar format is used when a
127 Domain object is converted into a string. The Domain returned by this
128 method contains most of the SCOP information, but it will not be located
129 within the SCOP hierarchy (i.e. The parent node will be None). The
130 description is composed of the SCOP protein and species descriptions.
131
132 A typical ASTRAL header looks like --
133 >d1tpt_1 a.46.2.1 (1-70) Thymidine phosphorylase {Escherichia coli}
134 """
135
136 m = _domain_re.match(str)
137 if (not m) : raise ValueError("Domain: "+ str)
138
139 dom = Domain()
140 dom.sid = m.group(1)
141 dom.sccs = m.group(2)
142 dom.residues = Residues(m.group(3))
143 if not dom.residues.pdbid:
144 dom.residues.pdbid= dom.sid[1:5]
145 dom.description = m.group(4).strip()
146
147 return dom
148
150 filename = "dir.%s.scop.txt_%s" % (filetype,version)
151 handle = open(os.path.join( scop_dir_path, filename))
152 return handle
153
154
156 """The entire SCOP hierarchy.
157
158 root -- The root node of the hierarchy
159 """
160 - def __init__(self, cla_handle=None, des_handle=None, hie_handle=None,
161 dir_path=None, db_handle=None, version=None):
162 """Build the SCOP hierarchy from the SCOP parsable files, or a sql backend.
163
164 If no file handles are given, then a Scop object with a single
165 empty root node is returned.
166
167 If a directory and version are given (with dir_path=.., version=...) or
168 file handles for each file, the whole scop tree will be built in memory.
169
170 If a MySQLdb database handle is given, the tree will be built as needed,
171 minimising construction times. To build the SQL database to the methods
172 write_xxx_sql to create the tables.
173
174 """
175 self._sidDict = {}
176 self._sunidDict = {}
177
178 if cla_handle==des_handle==hie_handle==dir_path==db_handle==None: return
179
180 if dir_path is None and db_handle is None:
181 if cla_handle == None or des_handle==None or hie_handle==None:
182 raise RuntimeError("Need CLA, DES and HIE files to build SCOP")
183
184 sunidDict = {}
185
186 self.db_handle = db_handle
187 try:
188
189 if db_handle:
190
191 pass
192
193 else:
194
195 if dir_path:
196 if not version:
197 raise RuntimeError("Need SCOP version to find parsable files in directory")
198 if cla_handle or des_handle or hie_handle:
199 raise RuntimeError("Cannot specify SCOP directory and specific files")
200
201 cla_handle = _open_scop_file( dir_path, version, 'cla')
202 des_handle = _open_scop_file( dir_path, version, 'des')
203 hie_handle = _open_scop_file( dir_path, version, 'hie')
204
205 root = Node()
206 domains = []
207 root.sunid=0
208 root.type='ro'
209 sunidDict[root.sunid] = root
210 self.root = root
211 root.description = 'SCOP Root'
212
213
214 records = Des.parse(des_handle)
215 for record in records:
216 if record.nodetype =='px':
217 n = Domain()
218 n.sid = record.name
219 domains.append(n)
220 else :
221 n = Node()
222 n.sunid = record.sunid
223 n.type = record.nodetype
224 n.sccs = record.sccs
225 n.description = record.description
226
227 sunidDict[n.sunid] = n
228
229
230 records = Hie.parse(hie_handle)
231 for record in records:
232 if record.sunid not in sunidDict:
233 print record.sunid
234
235 n = sunidDict[record.sunid]
236
237 if record.parent != '' :
238
239 if record.parent not in sunidDict:
240 raise ValueError("Incomplete data?")
241
242 n.parent = sunidDict[record.parent]
243
244 for c in record.children:
245 if c not in sunidDict:
246 raise ValueError("Incomplete data?")
247 n.children.append(sunidDict[c])
248
249
250
251 sidDict = {}
252 records = Cla.parse(cla_handle)
253 for record in records:
254 n = sunidDict[record.sunid]
255 assert n.sccs == record.sccs
256 assert n.sid == record.sid
257 n.residues = record.residues
258 sidDict[n.sid] = n
259
260
261 self._sunidDict = sunidDict
262 self._sidDict = sidDict
263 self._domains = tuple(domains)
264
265 finally:
266 if dir_path:
267
268 if cla_handle : cla_handle.close()
269 if des_handle : des_handle.close()
270 if hie_handle : hie_handle.close()
271
272
275
276
277 - def getDomainBySid(self, sid):
278 """Return a domain from its sid"""
279 if sid in self._sidDict:
280 return self._sidDict[sid]
281 if self.db_handle:
282 self.getDomainFromSQL(sid=sid)
283 if sid in self._sidDict:
284 return self._sidDict[sid]
285 else:
286 return None
287
288
290 """Return a node from its sunid"""
291 if sunid in self._sunidDict:
292 return self._sunidDict[sunid]
293 if self.db_handle:
294 self.getDomainFromSQL(sunid=sunid)
295 if sunid in self._sunidDict:
296 return self._sunidDict[sunid]
297 else:
298 return None
299
300
301 - def getDomains(self):
302 """Returns an ordered tuple of all SCOP Domains"""
303 if self.db_handle:
304 return self.getRoot().getDescendents('px')
305 else:
306 return self._domains
307
308
309
317
318
327
328
336
337
338 - def getDomainFromSQL(self, sunid=None, sid=None):
339 """Load a node from the SQL backend using sunid or sid"""
340 if sunid==sid==None: return None
341
342 cur = self.db_handle.cursor()
343
344 if sid:
345 cur.execute("SELECT sunid FROM cla WHERE sid=%s", sid)
346 res = cur.fetchone()
347 if res is None:
348 return None
349 sunid = res[0]
350
351 cur.execute("SELECT * FROM des WHERE sunid=%s", sunid)
352 data = cur.fetchone()
353
354 if data is not None:
355 n = None
356
357
358 if data[1] != "px":
359 n = Node(scop=self)
360
361 cur.execute("SELECT child FROM hie WHERE parent=%s", sunid)
362 children = []
363 for c in cur.fetchall():
364 children.append(c[0])
365 n.children = children
366
367
368 else:
369 n = Domain(scop=self)
370 cur.execute("select sid, residues, pdbid from cla where sunid=%s",
371 sunid)
372
373 [n.sid,n.residues,pdbid] = cur.fetchone()
374 n.residues = Residues(n.residues)
375 n.residues.pdbid=pdbid
376 self._sidDict[n.sid] = n
377
378 [n.sunid,n.type,n.sccs,n.description] = data
379
380 if data[1] != 'ro':
381 cur.execute("SELECT parent FROM hie WHERE child=%s", sunid)
382 n.parent = cur.fetchone()[0]
383
384 n.sunid = int(n.sunid)
385
386 self._sunidDict[n.sunid] = n
387
388
400
401
403 """Get descendents of a node using the database backend. This avoids
404 repeated iteration of SQL calls and is therefore much quicker than
405 repeatedly calling node.getChildren().
406 """
407 if nodeCodeOrder.index(type) <= nodeCodeOrder.index(node.type): return []
408
409 des_list = []
410
411
412
413 if node.type == 'ro':
414 for c in node.getChildren():
415 for d in self.getDescendentsFromSQL(c,type):
416 des_list.append(d)
417 return des_list
418
419 cur = self.db_handle.cursor()
420
421
422 if type != 'px':
423 cur.execute("SELECT DISTINCT des.sunid,des.type,des.sccs,description FROM \
424 cla,des WHERE cla."+node.type+"=%s AND cla."+type+"=des.sunid", (node.sunid))
425 data = cur.fetchall()
426 for d in data:
427 if int(d[0]) not in self._sunidDict:
428 n = Node(scop=self)
429 [n.sunid,n.type,n.sccs,n.description] = d
430 n.sunid=int(n.sunid)
431 self._sunidDict[n.sunid] = n
432
433 cur.execute("SELECT parent FROM hie WHERE child=%s", n.sunid)
434 n.parent = cur.fetchone()[0]
435
436 cur.execute("SELECT child FROM hie WHERE parent=%s", n.sunid)
437 children = []
438 for c in cur.fetchall():
439 children.append(c[0])
440 n.children = children
441
442
443 des_list.append( self._sunidDict[int(d[0])] )
444
445 else:
446 cur.execute("SELECT cla.sunid,sid,pdbid,residues,cla.sccs,type,description,sp\
447 FROM cla,des where cla.sunid=des.sunid and cla."+node.type+"=%s",
448 node.sunid)
449
450 data = cur.fetchall()
451 for d in data:
452 if int(d[0]) not in self._sunidDict:
453 n = Domain(scop=self)
454
455
456 [n.sunid,n.sid, pdbid,n.residues,n.sccs,n.type,n.description,
457 n.parent] = d[0:8]
458 n.residues = Residues(n.residues)
459 n.residues.pdbid = pdbid
460 n.sunid = int(n.sunid)
461 self._sunidDict[n.sunid] = n
462 self._sidDict[n.sid] = n
463
464
465 des_list.append( self._sunidDict[int(d[0])] )
466
467 return des_list
468
469
470
471
473 """Write HIE data to SQL database"""
474 cur = handle.cursor()
475
476 cur.execute("DROP TABLE IF EXISTS hie")
477 cur.execute("CREATE TABLE hie (parent INT, child INT, PRIMARY KEY (child),\
478 INDEX (parent) )")
479
480 for p in self._sunidDict.itervalues():
481 for c in p.children:
482 cur.execute("INSERT INTO hie VALUES (%s,%s)" % (p.sunid, c.sunid))
483
484
486 """Write CLA data to SQL database"""
487 cur = handle.cursor()
488
489 cur.execute("DROP TABLE IF EXISTS cla")
490 cur.execute("CREATE TABLE cla (sunid INT, sid CHAR(8), pdbid CHAR(4),\
491 residues VARCHAR(50), sccs CHAR(10), cl INT, cf INT, sf INT, fa INT,\
492 dm INT, sp INT, px INT, PRIMARY KEY (sunid), INDEX (SID) )")
493
494 for n in self._sidDict.itervalues():
495 c = n.toClaRecord()
496 cur.execute( "INSERT INTO cla VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
497 (n.sunid, n.sid, c.residues.pdbid, c.residues, n.sccs,
498 n.getAscendent('cl').sunid, n.getAscendent('cf').sunid,
499 n.getAscendent('sf').sunid, n.getAscendent('fa').sunid,
500 n.getAscendent('dm').sunid, n.getAscendent('sp').sunid,
501 n.sunid ))
502
503
505 """Write DES data to SQL database"""
506 cur = handle.cursor()
507
508 cur.execute("DROP TABLE IF EXISTS des")
509 cur.execute("CREATE TABLE des (sunid INT, type CHAR(2), sccs CHAR(10),\
510 description VARCHAR(255),\
511 PRIMARY KEY (sunid) )")
512
513 for n in self._sunidDict.itervalues():
514 cur.execute( "INSERT INTO des VALUES (%s,%s,%s,%s)",
515 ( n.sunid, n.type, n.sccs, n.description ) )
516
517
518
520 """ A node in the Scop hierarchy
521
522 sunid -- SCOP unique identifiers. e.g. '14986'
523
524 parent -- The parent node
525
526 children -- A list of child nodes
527
528 sccs -- SCOP concise classification string. e.g. 'a.1.1.2'
529
530 type -- A 2 letter node type code. e.g. 'px' for domains
531
532 description --
533
534 """
536 """Create a Node in the scop hierarchy. If a Scop instance is provided to the
537 constructor, this will be used to lookup related references using the SQL
538 methods. If no instance is provided, it is assumed the whole tree exists
539 and is connected."""
540 self.sunid=''
541 self.parent = None
542 self.children=[]
543 self.sccs = ''
544 self.type =''
545 self.description =''
546 self.scop=scop
547
556
558 """Return an Hie.Record"""
559 rec = Hie.Record()
560 rec.sunid = str(self.sunid)
561 if self.getParent() :
562 rec.parent = str(self.getParent().sunid)
563 else:
564 rec.parent = '-'
565 for c in self.getChildren():
566 rec.children.append(str(c.sunid))
567 return rec
568
570 """Return a Des.Record"""
571 rec = Des.Record()
572 rec.sunid = str(self.sunid)
573 rec.nodetype = self.type
574 rec.sccs = self.sccs
575 rec.description = self.description
576 return rec
577
579 """Return a list of children of this Node"""
580 if self.scop is None:
581 return self.children
582 else:
583 return map ( self.scop.getNodeBySunid, self.children )
584
586 """Return the parent of this Node"""
587 if self.scop is None:
588 return self.parent
589 else:
590 return self.scop.getNodeBySunid( self.parent )
591
593 """ Return a list of all decendent nodes of the given type. Node type can a
594 two letter code or longer description. e.g. 'fa' or 'family'
595 """
596 if node_type in _nodetype_to_code:
597 node_type = _nodetype_to_code[node_type]
598
599 nodes = [self]
600 if self.scop:
601 return self.scop.getDescendentsFromSQL(self,node_type)
602 while nodes[0].type != node_type:
603 if nodes[0].type == 'px' : return []
604 child_list = []
605 for n in nodes:
606 for child in n.getChildren():
607 child_list.append( child )
608 nodes = child_list
609
610 return nodes
611
612
614 """ Return the ancenstor node of the given type, or None.Node type can a
615 two letter code or longer description. e.g. 'fa' or 'family'"""
616 if node_type in _nodetype_to_code:
617 node_type = _nodetype_to_code[node_type]
618
619 if self.scop:
620 return self.scop.getAscendentFromSQL(self,node_type)
621 else:
622 n = self
623 if n.type == node_type: return None
624 while n.type != node_type:
625 if n.type == 'ro': return None
626 n = n.getParent()
627
628 return n
629
630
631
633 """ A SCOP domain. A leaf node in the Scop hierarchy.
634
635 sid -- The SCOP domain identifier. e.g. 'd5hbib_'
636
637 residues -- A Residue object. It defines the collection
638 of PDB atoms that make up this domain.
639 """
640 - def __init__(self,scop=None):
641 Node.__init__(self,scop=scop)
642 self.sid = ''
643 self.residues = None
644
646 s = []
647 s.append(self.sid)
648 s.append(self.sccs)
649 s.append("("+str(self.residues)+")")
650
651 if not self.getParent():
652 s.append(self.description)
653 else:
654 sp = self.getParent()
655 dm = sp.getParent()
656 s.append(dm.description)
657 s.append("{"+sp.description+"}")
658
659 return " ".join(s)
660
661 - def toDesRecord(self):
662 """Return a Des.Record"""
663 rec = Node.toDesRecord(self)
664 rec.name = self.sid
665 return rec
666
667 - def toClaRecord(self):
668 """Return a Cla.Record"""
669 rec = Cla.Record()
670 rec.sid = self.sid
671 rec.residues = self.residues
672 rec.sccs = self.sccs
673 rec.sunid = self.sunid
674
675 n = self
676 while n.sunid != 0:
677 rec.hierarchy[n.type] = str(n.sunid)
678 n = n.getParent()
679
680
681
682
683
684 return rec
685
687 """Abstraction of the ASTRAL database, which has sequences for all the SCOP domains,
688 as well as clusterings by percent id or evalue.
689 """
690
691 - def __init__( self, dir_path=None, version=None, scop=None,
692 astral_file=None, db_handle=None):
693 """
694 Initialise the astral database.
695
696 You must provide either a directory of SCOP files:
697
698 dir_path - string, the path to location of the scopseq-x.xx directory
699 (not the directory itself), and
700 version -a version number.
701
702 or, a FASTA file:
703
704 astral_file - string, a path to a fasta file (which will be loaded in memory)
705
706 or, a MYSQL database:
707
708 db_handle - a database handle for a MYSQL database containing a table
709 'astral' with the astral data in it. This can be created
710 using writeToSQL.
711 """
712
713 if astral_file==dir_path==db_handle==None:
714 raise RuntimeError("Need either file handle, or (dir_path + "\
715 + "version) or database handle to construct Astral")
716 if not scop:
717 raise RuntimeError("Must provide a Scop instance to construct")
718
719 self.scop = scop
720 self.db_handle = db_handle
721
722
723 if not astral_file and not db_handle:
724 if dir_path == None or version == None:
725 raise RuntimeError("must provide dir_path and version")
726
727 self.version = version
728 self.path = os.path.join( dir_path, "scopseq-%s" % version)
729 astral_file = "astral-scopdom-seqres-all-%s.fa" % self.version
730 astral_file = os.path.join (self.path, astral_file)
731
732 if astral_file:
733
734 self.fasta_dict = SeqIO.to_dict(SeqIO.parse(open(astral_file), "fasta"))
735
736 self.astral_file = astral_file
737 self.EvDatasets = {}
738 self.EvDatahash = {}
739 self.IdDatasets = {}
740 self.IdDatahash = {}
741
742
744 """get domains clustered by evalue"""
745 if id not in self.EvDatasets:
746 if self.db_handle:
747 self.EvDatasets[id] = self.getAstralDomainsFromSQL(astralEv_to_sql[id])
748
749 else:
750 if not self.path:
751 raise RuntimeError("No scopseq directory specified")
752
753 file_prefix = "astral-scopdom-seqres-sel-gs"
754 filename = "%s-e100m-%s-%s.id" % (file_prefix, astralEv_to_file[id] ,
755 self.version)
756 filename = os.path.join(self.path,filename)
757 self.EvDatasets[id] = self.getAstralDomainsFromFile(filename)
758 return self.EvDatasets[id]
759
760
762 """get domains clustered by percent id"""
763 if id not in self.IdDatasets:
764 if self.db_handle:
765 self.IdDatasets[id] = self.getAstralDomainsFromSQL("id"+str(id))
766
767 else:
768 if not self.path:
769 raise RuntimeError("No scopseq directory specified")
770
771 file_prefix = "astral-scopdom-seqres-sel-gs"
772 filename = "%s-bib-%s-%s.id" % (file_prefix, id, self.version)
773 filename = os.path.join(self.path,filename)
774 self.IdDatasets[id] = self.getAstralDomainsFromFile(filename)
775 return self.IdDatasets[id]
776
777
778 - def getAstralDomainsFromFile(self,filename=None,file_handle=None):
779 """Get the scop domains from a file containing a list of sids"""
780 if file_handle == filename == None:
781 raise RuntimeError("You must provide a filename or handle")
782 if not file_handle:
783 file_handle = open(filename)
784 doms = []
785 while 1:
786 line = file_handle.readline()
787 if not line:
788 break
789 line = line.rstrip()
790 doms.append(line)
791 if filename:
792 file_handle.close()
793
794 doms = filter( lambda a: a[0]=='d', doms )
795 doms = map( self.scop.getDomainBySid, doms )
796 return doms
797
798 - def getAstralDomainsFromSQL(self, column):
799 """Load a set of astral domains from a column in the astral table of a MYSQL
800 database (which can be created with writeToSQL(...)"""
801 cur = self.db_handle.cursor()
802 cur.execute("SELECT sid FROM astral WHERE "+column+"=1")
803 data = cur.fetchall()
804 data = map( lambda x: self.scop.getDomainBySid(x[0]), data)
805
806 return data
807
808
810 """get the seq record of a given domain from its sid"""
811 if self.db_handle is None:
812 return self.fasta_dict[domain].seq
813
814 else:
815 cur = self.db_handle.cursor()
816 cur.execute("SELECT seq FROM astral WHERE sid=%s", domain)
817 return Seq(cur.fetchone()[0])
818
822
823
824 - def hashedDomainsById(self,id):
825 """Get domains clustered by sequence identity in a dict"""
826 if id not in self.IdDatahash:
827 self.IdDatahash[id] = {}
828 for d in self.domainsClusteredById(id):
829 self.IdDatahash[id][d] = 1
830 return self.IdDatahash[id]
831
832 - def hashedDomainsByEv(self,id):
833 """Get domains clustered by evalue in a dict"""
834 if id not in self.EvDatahash:
835 self.EvDatahash[id] = {}
836 for d in self.domainsClusteredByEv(id):
837 self.EvDatahash[id][d] = 1
838 return self.EvDatahash[id]
839
840
841 - def isDomainInId(self,dom,id):
842 """Returns true if the domain is in the astral clusters for percent ID"""
843 return dom in self.hashedDomainsById(id)
844
845 - def isDomainInEv(self,dom,id):
846 """Returns true if the domain is in the ASTRAL clusters for evalues"""
847 return dom in self.hashedDomainsByEv(id)
848
849
851 """Write the ASTRAL database to a MYSQL database"""
852 cur = db_handle.cursor()
853
854 cur.execute("DROP TABLE IF EXISTS astral")
855 cur.execute("CREATE TABLE astral (sid CHAR(8), seq TEXT, PRIMARY KEY (sid))")
856
857 for dom in self.fasta_dict:
858 cur.execute("INSERT INTO astral (sid,seq) values (%s,%s)",
859 (dom, self.fasta_dict[dom].seq.data))
860
861 for i in astralBibIds:
862 cur.execute("ALTER TABLE astral ADD (id"+str(i)+" TINYINT)")
863
864 for d in self.domainsClusteredById(i):
865 cur.execute("UPDATE astral SET id"+str(i)+"=1 WHERE sid=%s",
866 d.sid)
867
868 for ev in astralEvs:
869 cur.execute("ALTER TABLE astral ADD ("+astralEv_to_sql[ev]+" TINYINT)")
870
871 for d in self.domainsClusteredByEv(ev):
872
873 cur.execute("UPDATE astral SET "+astralEv_to_sql[ev]+"=1 WHERE sid=%s",
874 d.sid)
875
876 -def search(pdb=None, key=None, sid=None, disp=None, dir=None, loc=None,
877 cgi='http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi', **keywds):
878 """search(pdb=None, key=None, sid=None, disp=None, dir=None, loc=None,
879 cgi='http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi', **keywds)
880
881 Access search.cgi and return a handle to the results. See the
882 online help file for an explanation of the parameters:
883 http://scop.mrc-lmb.cam.ac.uk/scop/help.html
884
885 Raises an IOError if there's a network error.
886
887 """
888 params = {'pdb' : pdb, 'key' : key, 'sid' : sid, 'disp' : disp,
889 'dir' : dir, 'loc' : loc}
890 variables = {}
891 for k, v in params.iteritmes():
892 if v is not None:
893 variables[k] = v
894 variables.update(keywds)
895 return _open(cgi, variables)
896
897 -def _open(cgi, params={}, get=1):
898 """_open(cgi, params={}, get=1) -> UndoHandle
899
900 Open a handle to SCOP. cgi is the URL for the cgi script to access.
901 params is a dictionary with the options to pass to it. get is a boolean
902 that describes whether a GET should be used. Does some
903 simple error checking, and will raise an IOError if it encounters one.
904
905 """
906 import urllib
907 from Bio import File
908
909 options = urllib.urlencode(params)
910 if get:
911 fullcgi = cgi
912 if options:
913 fullcgi = "%s?%s" % (cgi, options)
914 handle = urllib.urlopen(fullcgi)
915 else:
916 handle = urllib.urlopen(cgi, options)
917
918
919 uhandle = File.UndoHandle(handle)
920
921 return uhandle
922