1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Copy of John Aycock's SPARK parser included with Biopython (DEPRECATED).
23
24 To clarify, when we say deprecated we just mean to warn you that Biopython will
25 in a future release no longer include this module. This does not affect the
26 status of John Aycock's SPARK parser which can be installed separately from:
27 http://pages.cpsc.ucalgary.ca/~aycock/spark/
28
29 Biopython included this copy of SPARK purely for parsing GenBank/EMBL feature
30 location strings using Bio.GenBank.LocationParser, and that code has now been
31 replaced with something simpler and faster using regular expressions.
32 """
33
34
35
36 __version__ = 'SPARK-0.6.1'
37
38 import re
39 import sys
40
42 namelist, namedict, classlist = [], {}, [instance.__class__]
43 for c in classlist:
44 for b in c.__bases__:
45 classlist.append(b)
46 for name in dir(c):
47 if name not in namedict:
48 namelist.append(name)
49 namedict[name] = 1
50 return namelist
51
54 pattern = self.reflect()
55 self.re = re.compile(pattern, re.VERBOSE)
56
57 self.index2func = {}
58 for name, number in self.re.groupindex.items():
59 self.index2func[number-1] = getattr(self, 't_' + name)
60
62 doc = getattr(self, name).__doc__
63 rv = '(?P<%s>%s)' % (name[2:], doc)
64 return rv
65
74
76 print >>sys.stderr, "Lexical error at position %s" % pos
77 raise SystemExit
78
80 pos = 0
81 n = len(s)
82 while pos < n:
83 m = self.re.match(s, pos)
84 if m is None:
85 self.error(s, pos)
86
87 groups = m.groups()
88 for i in range(len(groups)):
89 if groups[i] and i in self.index2func:
90 self.index2func[i](groups[i])
91 pos = m.end()
92
94 r'( . | \n )+'
95 pass
96
99 self.rules = {}
100 self.rule2func = {}
101 self.rule2name = {}
102 self.collectRules()
103 self.startRule = self.augment(start)
104 self.ruleschanged = 1
105
106 _START = 'START'
107 _EOF = 'EOF'
108
109
110
111
112 - def preprocess(self, rule, func): return rule, func
113
115 rules = doc.split()
116
117 index = []
118 for i in range(len(rules)):
119 if rules[i] == '::=':
120 index.append(i-1)
121 index.append(len(rules))
122
123 for i in range(len(index)-1):
124 lhs = rules[index[i]]
125 rhs = rules[index[i]+2:index[i+1]]
126 rule = (lhs, tuple(rhs))
127
128 rule, fn = self.preprocess(rule, func)
129
130 if lhs in self.rules:
131 self.rules[lhs].append(rule)
132 else:
133 self.rules[lhs] = [ rule ]
134 self.rule2func[rule] = fn
135 self.rule2name[rule] = func.__name__[2:]
136 self.ruleschanged = 1
137
144
146
147
148
149
150
151 startRule = (self._START, ( start, self._EOF ))
152 self.rule2func[startRule] = lambda args: args[0]
153 self.rules[self._START] = [ startRule ]
154 self.rule2name[startRule] = ''
155 return startRule
156
158 union = {}
159 self.first = {}
160
161 for rulelist in self.rules.values():
162 for lhs, rhs in rulelist:
163 if lhs not in self.first:
164 self.first[lhs] = {}
165
166 if len(rhs) == 0:
167 self.first[lhs][None] = 1
168 continue
169
170 sym = rhs[0]
171 if sym not in self.rules:
172 self.first[lhs][sym] = 1
173 else:
174 union[(sym, lhs)] = 1
175 changes = 1
176 while changes:
177 changes = 0
178 for src, dest in union.keys():
179 destlen = len(self.first[dest])
180 self.first[dest].update(self.first[src])
181 if len(self.first[dest]) != destlen:
182 changes = 1
183
184
185
186
187
188
189
190
193
195 print >>sys.stderr, "Syntax error at or near `%s' token" % token
196 raise SystemExit
197
198 - def parse(self, tokens):
199 tree = {}
200 tokens.append(self._EOF)
201 states = { 0: [ (self.startRule, 0, 0) ] }
202
203 if self.ruleschanged:
204 self.makeFIRST()
205
206 for i in xrange(len(tokens)):
207 states[i+1] = []
208
209 if states[i] == []:
210 break
211 self.buildState(tokens[i], states, i, tree)
212
213
214
215 if i < len(tokens)-1 or states[i+1] != [(self.startRule, 2, 0)]:
216 del tokens[-1]
217 self.error(tokens[i-1])
218 rv = self.buildTree(tokens, tree, ((self.startRule, 2, 0), i+1))
219 del tokens[-1]
220 return rv
221
223 needsCompletion = {}
224 state = states[i]
225 predicted = {}
226
227 for item in state:
228 rule, pos, parent = item
229 lhs, rhs = rule
230
231
232
233
234 if pos == len(rhs):
235 if len(rhs) == 0:
236 needsCompletion[lhs] = (item, i)
237
238 for pitem in states[parent]:
239 if pitem is item:
240 break
241
242 prule, ppos, pparent = pitem
243 plhs, prhs = prule
244
245 if prhs[ppos:ppos+1] == (lhs,):
246 new = (prule,
247 ppos+1,
248 pparent)
249 if new not in state:
250 state.append(new)
251 tree[(new, i)] = [(item, i)]
252 else:
253 tree[(new, i)].append((item, i))
254 continue
255
256 nextSym = rhs[pos]
257
258
259
260
261 if nextSym in self.rules:
262
263
264
265
266
267
268
269 if nextSym in needsCompletion:
270 new = (rule, pos+1, parent)
271 olditem_i = needsCompletion[nextSym]
272 if new not in state:
273 state.append(new)
274 tree[(new, i)] = [olditem_i]
275 else:
276 tree[(new, i)].append(olditem_i)
277
278
279
280
281 if nextSym in predicted:
282 continue
283 predicted[nextSym] = 1
284
285 ttype = token is not self._EOF and \
286 self.typestring(token) or \
287 None
288 if ttype is not None:
289
290
291
292
293
294
295
296
297
298 for prule in self.rules[nextSym]:
299 new = (prule, 0, i)
300 prhs = prule[1]
301 if len(prhs) == 0:
302 state.append(new)
303 continue
304 prhs0 = prhs[0]
305 if prhs0 not in self.rules:
306 if prhs0 != ttype:
307 continue
308 else:
309 state.append(new)
310 continue
311 first = self.first[prhs0]
312 if None not in first and \
313 ttype not in first:
314 continue
315 state.append(new)
316 continue
317
318 for prule in self.rules[nextSym]:
319
320
321
322
323
324 prhs = prule[1]
325 if len(prhs) > 0 and \
326 prhs[0] not in self.rules and \
327 token != prhs[0]:
328 continue
329 state.append((prule, 0, i))
330
331
332
333
334 elif token == nextSym:
335
336 states[i+1].append((rule, pos+1, parent))
337
339 stack = []
340 self.buildTree_r(stack, tokens, -1, tree, root)
341 return stack[0]
342
343 - def buildTree_r(self, stack, tokens, tokpos, tree, root):
344 (rule, pos, parent), state = root
345
346 while pos > 0:
347 want = ((rule, pos, parent), state)
348 if want not in tree:
349
350
351
352
353
354
355 pos = pos - 1
356 state = state - 1
357 stack.insert(0, tokens[tokpos])
358 tokpos = tokpos - 1
359 else:
360
361
362
363
364
365
366
367
368
369 children = tree[want]
370 if len(children) > 1:
371 child = self.ambiguity(children)
372 else:
373 child = children[0]
374
375 tokpos = self.buildTree_r(stack,
376 tokens, tokpos,
377 tree, child)
378 pos = pos - 1
379 (crule, cpos, cparent), cstate = child
380 state = cparent
381
382 lhs, rhs = rule
383 result = self.rule2func[rule](stack[:len(rhs)])
384 stack[:len(rhs)] = [result]
385 return tokpos
386
388
389
390
391
392
393
394
395 sortlist = []
396 name2index = {}
397 for i in range(len(children)):
398 ((rule, pos, parent), index) = children[i]
399 lhs, rhs = rule
400 name = self.rule2name[rule]
401 sortlist.append((len(rhs), name))
402 name2index[name] = i
403 sortlist.sort()
404 list = map(lambda (a,b): b, sortlist)
405 return children[name2index[self.resolve(list)]]
406
408
409
410
411
412
413 return list[0]
414
415
416
417
418
419
420
421
422
427
429 rebind = lambda lhs, self=self: \
430 lambda args, lhs=lhs, self=self: \
431 self.buildASTNode(args, lhs)
432 lhs, rhs = rule
433 return rule, rebind(lhs)
434
443
444 - def terminal(self, token): return token
445
447 rv = self.AST(type)
448 rv[:len(args)] = args
449 return rv
450
451
452
453
454
455
456
457
458
459
460
463
513
514
515
516
517
518
519
520
525
527 rebind = lambda func, self=self: \
528 lambda args, func=func, self=self: \
529 self.foundMatch(args, func)
530 lhs, rhs = rule
531 rhslist = list(rhs)
532 rhslist.reverse()
533
534 return (lhs, tuple(rhslist)), rebind(func)
535
537 func(args[-1])
538 return args[-1]
539
552
553 - def match(self, ast=None):
554 if ast is None:
555 ast = self.ast
556 self.input = []
557
558 self.match_r(ast)
559 self.parse(self.input)
560
562
563
564
565 return list[-1]
566
567 -def _dump(tokens, states):
568 for i in range(len(states)):
569 print 'state', i
570 for (lhs, rhs), pos, parent in states[i]:
571 print '\t', lhs, '::=',
572 print ' '.join(rhs[:pos]),
573 print '.',
574 print ' '.join(rhs[pos:]),
575 print ',', parent
576 if i < len(tokens):
577 print
578 print 'token', str(tokens[i])
579 print
580