Package Bio :: Package GenBank :: Module LocationParser
[hide private]
[frames] | no frames]

Source Code for Module Bio.GenBank.LocationParser

  1  # Copyright 2001 Brad Chapman. 
  2  # All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """Code used for parsing GenBank/EMBL feature location strings (DEPRECATED).""" 
  7  # Don't issue a deprecation warning here, but via Bio.Parsers instead 
  8  # This avoids the user seeing multiple deprecation warnings. 
  9   
 10  # First pass at a parser for the location fields of a feature table. 
 11  # Everything likely to change. 
 12  # 
 13  # This does NOT cope with the Gap(), Gap(X), or Gap(unkXXX) tokens used 
 14  # in CONTIG lines, which are otherwise similar to feature locations. 
 15  # 
 16  # Based on the DDBJ/EMBL/GenBank Feature Table Definition Version 2.2 
 17  # Dec 15 1999 available from EBI, but the documentation is not 
 18  # completely internally consistent much less agree with real-life 
 19  # examples.  Conflicts resolved to agree with real examples. 
 20  # 
 21  # This does NOT cope with the Gap(), Gap(X), or Gap(unkXXX) tokens used 
 22  # in CONTIG lines, which are otherwise similar to feature locations. 
 23  # 
 24  # Uses John Aycock's SPARK for parsing 
 25  from Bio.Parsers.spark import GenericScanner, GenericParser 
 26   
27 -class Token(object):
28 - def __init__(self, type):
29 self.type = type
30 - def __cmp__(self, other):
31 return cmp(self.type, other)
32 - def __repr__(self):
33 return "Tokens(%r)" % (self.type,)
34 35 # "38"
36 -class Integer(object):
37 type = "integer"
38 - def __init__(self, val):
39 self.val = val
40 - def __cmp__(self, other):
41 return cmp(self.type, other)
42 - def __str__(self):
43 return str(self.val)
44 - def __repr__(self):
45 return "Integer(%s)" % self.val
46 47 # From the BNF definition, this isn't needed. Does tht mean 48 # that bases can be refered to with negative numbers?
49 -class UnsignedInteger(Integer):
50 type = "unsigned_integer"
51 - def __repr__(self):
52 return "UnsignedInteger(%s)" % self.val
53
54 -class Symbol(object):
55 type = "symbol"
56 - def __init__(self, name):
57 self.name = name
58 - def __cmp__(self, other):
59 return cmp(self.type, other)
60 - def __str__(self):
61 return str(self.name)
62 - def __repr__(self):
63 return "Symbol(%s)" % repr(self.name)
64 65 # ">38" -- The BNF says ">" is for the lower bound.. seems wrong to me
66 -class LowBound(object):
67 - def __init__(self, base):
68 self.base = base
69 - def __repr__(self):
70 return "LowBound(%r)" % self.base
71 72 # "<38"
73 -class HighBound(object):
74 - def __init__(self, base):
75 self.base = base
76 - def __repr__(self):
77 return "HighBound(%r)" % self.base
78 79 # 12.34
80 -class TwoBound(object):
81 - def __init__(self, low, high):
82 self.low = low 83 self.high = high
84 - def __repr__(self):
85 return "TwoBound(%r, %r)" % (self.low, self.high)
86 87 # 12^34
88 -class Between(object):
89 - def __init__(self, low, high):
90 self.low = low 91 self.high = high
92 - def __repr__(self):
93 return "Between(%r, %r)" % (self.low, self.high)
94 95 # 12..34
96 -class Range(object):
97 - def __init__(self, low, high):
98 self.low = low 99 self.high = high
100 - def __repr__(self):
101 return "Range(%r, %r)" % (self.low, self.high)
102
103 -class Function(object):
104 - def __init__(self, name, args):
105 self.name = name 106 self.args = args
107 - def __repr__(self):
108 return "Function(%r, %r)" % (self.name, self.args)
109
110 -class AbsoluteLocation(object):
111 - def __init__(self, path, local_location):
112 self.path = path 113 self.local_location = local_location
114 - def __repr__(self):
115 return "AbsoluteLocation(%r, %r)" % (self.path, self.local_location)
116
117 -class Path(object):
118 - def __init__(self, database, accession):
119 self.database = database 120 self.accession = accession
121 - def __repr__(self):
122 return "Path(%r, %r)" % (self.database, self.accession)
123
124 -class FeatureName(object):
125 - def __init__(self, path, label):
126 self.path = path 127 self.label = label
128 - def __repr__(self):
129 return "FeatureName(%r, %r)" % (self.path, self.label)
130
131 -class LocationScanner(GenericScanner):
132 - def __init__(self):
134
135 - def tokenize(self, input):
136 self.rv = [] 137 GenericScanner.tokenize(self, input) 138 return self.rv
139
140 - def t_double_colon(self, input):
141 r" :: " 142 self.rv.append(Token("double_colon"))
143 - def t_double_dot(self, input):
144 r" \.\. " 145 self.rv.append(Token("double_dot"))
146 - def t_dot(self, input):
147 r" \.(?!\.) " 148 self.rv.append(Token("dot"))
149 - def t_caret(self, input):
150 r" \^ " 151 self.rv.append(Token("caret"))
152 - def t_comma(self, input):
153 r" \, " 154 self.rv.append(Token("comma"))
155 - def t_integer(self, input):
156 r" -?[0-9]+ " 157 self.rv.append(Integer(int(input)))
158 - def t_unsigned_integer(self, input):
159 r" [0-9]+ " 160 self.rv.append(UnsignedInteger(int(input)))
161 - def t_colon(self, input):
162 r" :(?!:) " 163 self.rv.append(Token("colon"))
164 - def t_open_paren(self, input):
165 r" \( " 166 self.rv.append(Token("open_paren"))
167 - def t_close_paren(self, input):
168 r" \) " 169 self.rv.append(Token("close_paren"))
170 - def t_symbol(self, input):
171 r" [A-Za-z0-9_'*-][A-Za-z0-9_'*.-]* " 172 # Needed an extra '.' 173 self.rv.append(Symbol(input))
174 - def t_less_than(self, input):
175 r" < " 176 self.rv.append(Token("less_than"))
177 - def t_greater_than(self, input):
178 r" > " 179 self.rv.append(Token("greater_than"))
180 181 # punctuation .. hmm, isn't needed for location 182 # r''' [ !#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~] ''' 183
184 -class LocationParser(GenericParser):
185 - def __init__(self, start='location'):
186 GenericParser.__init__(self, start) 187 self.begin_pos = 0
188
189 - def p_location(self, args):
190 """ 191 location ::= absolute_location 192 location ::= feature_name 193 location ::= function 194 """ 195 return args[0]
196
197 - def p_function(self, args):
198 """ 199 function ::= functional_operator open_paren location_list close_paren 200 """ 201 return Function(args[0].name, args[2])
202
203 - def p_absolute_location(self, args):
204 """ 205 absolute_location ::= local_location 206 absolute_location ::= path colon local_location 207 """ 208 if len(args) == 1: 209 return AbsoluteLocation(None, args[-1]) 210 return AbsoluteLocation(args[0], args[-1])
211
212 - def p_path(self, args):
213 """ 214 path ::= database double_colon primary_accession 215 path ::= primary_accession 216 """ 217 if len(args) == 3: 218 return Path(args[0], args[2]) 219 return Path(None, args[0])
220
221 - def p_feature_name(self, args):
222 """ 223 feature_name ::= path colon feature_label 224 feature_name ::= feature_label 225 """ 226 if len(args) == 3: 227 return FeatureName(args[0], args[2]) 228 return FeatureName(None, args[0])
229
230 - def p_feature_label(self, args):
231 """ 232 label ::= symbol 233 """ 234 return args[0].name
235
236 - def p_local_location(self, args):
237 """ 238 local_location ::= base_position 239 local_location ::= between_position 240 local_location ::= base_range 241 """ 242 return args[0]
243 - def p_location_list(self, args):
244 """ 245 location_list ::= location 246 location_list ::= location_list comma location 247 """ 248 if len(args) == 1: 249 return args 250 return args[0] + [args[2]]
251
252 - def p_functional_operator(self, args):
253 """ 254 functional_operator ::= symbol 255 """ 256 return args[0]
257
258 - def p_base_position(self, args):
259 """ 260 base_position ::= integer 261 base_position ::= low_base_bound 262 base_position ::= high_base_bound 263 base_position ::= two_base_bound 264 """ 265 return args[0]
266
267 - def p_low_base_bound(self, args):
268 """ 269 low_base_bound ::= greater_than integer 270 """ 271 return LowBound(args[1])
272
273 - def p_high_base_bound(self, args):
274 """ 275 high_base_bound ::= less_than integer 276 """ 277 return HighBound(args[1])
278
279 - def p_two_base_bound_1(self, args):
280 """ 281 two_base_bound ::= open_paren base_position dot base_position close_paren 282 """ 283 # main example doesn't have parens but others do.. (?) 284 return TwoBound(args[1], args[3])
285
286 - def p_two_base_bound_2(self, args):
287 """ 288 two_base_bound ::= base_position dot base_position 289 """ 290 # two_base_bound with no parentheses like 1.6 291 return TwoBound(args[0], args[2])
292
293 - def p_between_position(self, args):
294 """ 295 between_position ::= base_position caret base_position 296 """ 297 return Between(args[0], args[2])
298
299 - def p_base_range(self, args):
300 """ 301 base_range ::= base_position double_dot base_position 302 base_range ::= function double_dot base_position 303 base_range ::= base_position double_dot function 304 base_range ::= function double_dot function 305 """ 306 return Range(args[0], args[2])
307
308 - def p_database(self, args):
309 """ 310 database ::= symbol 311 """ 312 return args[0].name
313
314 - def p_primary_accession(self, args):
315 """ 316 primary_accession ::= symbol 317 """ 318 return args[0].name
319 320 321 _cached_scanner = LocationScanner()
322 -def scan(input):
323 """Break a location string into a set of tokens""" 324 #scanner = LocationScanner() 325 #return scanner.tokenize(input) 326 return _cached_scanner.tokenize(input)
327 328 _cached_parser = LocationParser()
329 -def parse(tokens):
330 """Go from a set of tokens to an object representation""" 331 #print "I have", tokens 332 #parser = LocationParser() 333 #return parser.parse(tokens) 334 return _cached_parser.parse(tokens)
335