Package Bio :: Package GenBank
[hide private]
[frames] | no frames]

Source Code for Package Bio.GenBank

   1  # Copyright 2000 by Jeffrey Chang, Brad Chapman.  All rights reserved. 
   2  # Copyright 2006-2008 by Peter Cock.  All rights reserved. 
   3  # This code is part of the Biopython distribution and governed by its 
   4  # license.  Please see the LICENSE file that should have been included 
   5  # as part of this package. 
   6   
   7  """Code to work with GenBank formatted files. 
   8   
   9  Rather than using Bio.GenBank, you are now encouraged to use Bio.SeqIO with 
  10  the "genbank" or "embl" format names to parse GenBank or EMBL files into 
  11  SeqRecord and SeqFeature objects (see the Biopython tutorial for details). 
  12   
  13  Also, rather than using Bio.GenBank to search or download files from the NCBI, 
  14  you are now encouraged to use Bio.Entrez instead (again, see the Biopython 
  15  tutorial for details). 
  16   
  17  Currently the ONLY reason to use Bio.GenBank directly is for the RecordParser 
  18  which turns a GenBank file into GenBank-specific Record objects.  This is a 
  19  much closer representation to the raw file contents that the SeqRecord 
  20  alternative from the FeatureParser (used in Bio.SeqIO). 
  21   
  22  Classes: 
  23  Iterator              Iterate through a file of GenBank entries 
  24  ErrorFeatureParser    Catch errors caused during parsing. 
  25  FeatureParser         Parse GenBank data in SeqRecord and SeqFeature objects. 
  26  RecordParser          Parse GenBank data into a Record object. 
  27   
  28  Exceptions: 
  29  ParserFailureError    Exception indicating a failure in the parser (ie. 
  30                        scanner or consumer) 
  31  LocationParserError   Exception indiciating a problem with the spark based 
  32                        location parser. 
  33   
  34   
  35  17-MAR-2009: added wgs, wgs_scafld for GenBank whole genome shotgun master records. 
  36  These are GenBank files that summarize the content of a project, and provide lists of 
  37  scaffold and contig files in the project. These will be in annotations['wgs'] and 
  38  annotations['wgs_scafld']. These GenBank files do not have sequences. See 
  39  http://groups.google.com/group/bionet.molbio.genbank/browse_thread/thread/51fb88bf39e7dc36 
  40   
  41  http://is.gd/nNgk 
  42  for more details of this format, and an example. 
  43  Added by Ying Huang & Iddo Friedberg 
  44  """ 
  45  import cStringIO 
  46  import re 
  47   
  48  # other Biopython stuff 
  49  from Bio import SeqFeature 
  50  from Bio.ParserSupport import AbstractConsumer 
  51  from Bio import Entrez 
  52   
  53  # other Bio.GenBank stuff 
  54  from utils import FeatureValueCleaner 
  55  from Scanner import GenBankScanner 
  56   
  57  #Constants used to parse GenBank header lines 
  58  GENBANK_INDENT = 12 
  59  GENBANK_SPACER = " " * GENBANK_INDENT 
  60   
  61  #Constants for parsing GenBank feature lines 
  62  FEATURE_KEY_INDENT = 5 
  63  FEATURE_QUALIFIER_INDENT = 21 
  64  FEATURE_KEY_SPACER = " " * FEATURE_KEY_INDENT 
  65  FEATURE_QUALIFIER_SPACER = " " * FEATURE_QUALIFIER_INDENT 
  66   
  67  #Regular expresions for location parsing 
  68  _solo_location = r"[<>]?\d+" 
  69  _pair_location = r"[<>]?\d+\.\.[<>]?\d+" 
  70  _between_location = r"\d+\^\d+" 
  71   
  72  _within_position = r"\(\d+\.\d+\)" 
  73  _re_within_position = re.compile(_within_position) 
  74  _within_location = r"([<>]?\d+|%s)\.\.([<>]?\d+|%s)" \ 
  75                     % (_within_position,_within_position) 
  76  assert _re_within_position.match("(3.9)") 
  77  assert re.compile(_within_location).match("(3.9)..10") 
  78  assert re.compile(_within_location).match("26..(30.33)") 
  79  assert re.compile(_within_location).match("(13.19)..(20.28)") 
  80   
  81  _oneof_position = r"one\-of\(\d+(,\d+)+\)" 
  82  _re_oneof_position = re.compile(_oneof_position) 
  83  _oneof_location = r"([<>]?\d+|%s)\.\.([<>]?\d+|%s)" \ 
  84                     % (_oneof_position,_oneof_position) 
  85  assert _re_oneof_position.match("one-of(6,9)") 
  86  assert re.compile(_oneof_location).match("one-of(6,9)..101") 
  87  assert re.compile(_oneof_location).match("one-of(6,9)..one-of(101,104)") 
  88  assert re.compile(_oneof_location).match("6..one-of(101,104)") 
  89   
  90  assert not _re_oneof_position.match("one-of(3)") 
  91  assert _re_oneof_position.match("one-of(3,6)") 
  92  assert _re_oneof_position.match("one-of(3,6,9)") 
  93   
  94   
  95  _simple_location = r"\d+\.\.\d+" 
  96  _re_simple_location = re.compile(_simple_location) 
  97  _re_simple_compound = re.compile(r"^(join|order|bond)\(%s(,%s)*\)$" \ 
  98                                   % (_simple_location, _simple_location)) 
  99  _complex_location = r"([a-zA-z][a-zA-Z0-9]*(\.[a-zA-Z0-9]+)?\:)?(%s|%s|%s|%s|%s)" \ 
 100                      % (_pair_location, _solo_location, _between_location, 
 101                         _within_location, _oneof_location) 
 102  _re_complex_location = re.compile(r"^%s$" % _complex_location) 
 103  _possibly_complemented_complex_location = r"(%s|complement\(%s\))" \ 
 104                                            % (_complex_location, _complex_location) 
 105  _re_complex_compound = re.compile(r"^(join|order|bond)\(%s(,%s)*\)$" \ 
 106                                   % (_possibly_complemented_complex_location, 
 107                                      _possibly_complemented_complex_location)) 
 108   
 109  assert _re_simple_location.match("104..160") 
 110  assert not _re_simple_location.match("<104..>160") 
 111  assert not _re_simple_location.match("104") 
 112  assert not _re_simple_location.match("<1") 
 113  assert not _re_simple_location.match(">99999") 
 114  assert not _re_simple_location.match("join(104..160,320..390,504..579)") 
 115  assert not _re_simple_compound.match("bond(12,63)") 
 116  assert _re_simple_compound.match("join(104..160,320..390,504..579)") 
 117  assert _re_simple_compound.match("order(1..69,1308..1465)") 
 118  assert not _re_simple_compound.match("order(1..69,1308..1465,1524)") 
 119  assert not _re_simple_compound.match("join(<1..442,992..1228,1524..>1983)") 
 120  assert not _re_simple_compound.match("join(<1..181,254..336,422..497,574..>590)") 
 121  assert not _re_simple_compound.match("join(1475..1577,2841..2986,3074..3193,3314..3481,4126..>4215)") 
 122  assert not _re_simple_compound.match("test(1..69,1308..1465)") 
 123  assert not _re_simple_compound.match("complement(1..69)") 
 124  assert not _re_simple_compound.match("(1..69)") 
 125  assert _re_complex_location.match("(3.9)..10") 
 126  assert _re_complex_location.match("26..(30.33)") 
 127  assert _re_complex_location.match("(13.19)..(20.28)") 
 128  assert _re_complex_location.match("41^42") #between 
 129  assert _re_complex_location.match("AL121804:41^42") 
 130  assert _re_complex_location.match("AL121804:41..610") 
 131  assert _re_complex_location.match("AL121804.2:41..610") 
 132  assert _re_complex_location.match("one-of(3,6)..101") 
 133  assert _re_complex_compound.match("join(153490..154269,AL121804.2:41..610,AL121804.2:672..1487)") 
 134  assert not _re_simple_compound.match("join(153490..154269,AL121804.2:41..610,AL121804.2:672..1487)") 
 135  assert _re_complex_compound.match("join(complement(69611..69724),139856..140650)") 
 136   
137 -def _pos(pos_str, offset=0):
138 """Build a Position object (PRIVATE). 139 140 For an end position, leave offset as zero (default): 141 142 >>> _pos("5") 143 ExactPosition(5) 144 145 For a start position, set offset to minus one (for Python counting): 146 147 >>> _pos("5", -1) 148 ExactPosition(4) 149 150 This also covers fuzzy positions: 151 152 >>> _pos("<5") 153 BeforePosition(5) 154 >>> _pos(">5") 155 AfterPosition(5) 156 >>> _pos("one-of(5,8,11)") 157 OneOfPosition([ExactPosition(5), ExactPosition(8), ExactPosition(11)]) 158 >>> _pos("(8.10)") 159 WithinPosition(8,2) 160 """ 161 if pos_str.startswith("<"): 162 return SeqFeature.BeforePosition(int(pos_str[1:])+offset) 163 elif pos_str.startswith(">"): 164 return SeqFeature.AfterPosition(int(pos_str[1:])+offset) 165 elif _re_within_position.match(pos_str): 166 s,e = pos_str[1:-1].split(".") 167 return SeqFeature.WithinPosition(int(s)+offset, int(e)-int(s)) 168 elif _re_oneof_position.match(pos_str): 169 assert pos_str.startswith("one-of(") 170 assert pos_str[-1]==")" 171 parts = [SeqFeature.ExactPosition(int(pos)+offset) \ 172 for pos in pos_str[7:-1].split(",")] 173 return SeqFeature.OneOfPosition(parts) 174 else: 175 return SeqFeature.ExactPosition(int(pos_str)+offset)
176
177 -def _loc(loc_str, expected_seq_length):
178 """FeatureLocation from non-compound non-complement location (PRIVATE). 179 180 Simple examples, 181 182 >>> _loc("123..456", 1000) 183 FeatureLocation(ExactPosition(122),ExactPosition(456)) 184 >>> _loc("<123..>456", 1000) 185 FeatureLocation(BeforePosition(122),AfterPosition(456)) 186 187 A more complex location using within positions, 188 189 >>> _loc("(9.10)..(20.25)", 1000) 190 FeatureLocation(WithinPosition(8,1),WithinPosition(20,5)) 191 192 Zero length between feature, 193 194 >>> _loc("123^124", 1000) 195 FeatureLocation(ExactPosition(123),ExactPosition(123)) 196 197 The expected sequence length is needed for a special case, a between 198 position at the start/end of a circular genome: 199 200 >>> _loc("1000^1", 1000) 201 FeatureLocation(ExactPosition(1000),ExactPosition(1000)) 202 203 Apart from this special case, between positions P^Q must have P+1==Q, 204 205 >>> _loc("123^456", 1000) 206 Traceback (most recent call last): 207 ... 208 ValueError: Invalid between location '123^456' 209 """ 210 try: 211 s, e = loc_str.split("..") 212 except ValueError: 213 assert ".." not in loc_str 214 if "^" in loc_str: 215 #A between location like "67^68" (one based counting) is a 216 #special case (note it has zero length). In python slice 217 #notation this is 67:67, a zero length slice. See Bug 2622 218 #Further more, on a circular genome of length N you can have 219 #a location N^1 meaning the junction at the origin. See Bug 3098. 220 #NOTE - We can imagine between locations like "2^4", but this 221 #is just "3". Similarly, "2^5" is just "3..4" 222 s, e = loc_str.split("^") 223 if int(s)+1==int(e): 224 pos = _pos(s) 225 elif int(s)==expected_seq_length and e=="1": 226 pos = _pos(s) 227 else: 228 raise ValueError("Invalid between location %s" % repr(loc_str)) 229 return SeqFeature.FeatureLocation(pos, pos) 230 else: 231 #e.g. "123" 232 s = loc_str 233 e = loc_str 234 return SeqFeature.FeatureLocation(_pos(s,-1), _pos(e))
235
236 -def _split_compound_loc(compound_loc):
237 """Split a tricky compound location string (PRIVATE). 238 239 >>> list(_split_compound_loc("123..145")) 240 ['123..145'] 241 >>> list(_split_compound_loc("123..145,200..209")) 242 ['123..145', '200..209'] 243 >>> list(_split_compound_loc("one-of(200,203)..300")) 244 ['one-of(200,203)..300'] 245 >>> list(_split_compound_loc("complement(123..145),200..209")) 246 ['complement(123..145)', '200..209'] 247 >>> list(_split_compound_loc("123..145,one-of(200,203)..209")) 248 ['123..145', 'one-of(200,203)..209'] 249 >>> list(_split_compound_loc("123..145,one-of(200,203)..one-of(209,211),300")) 250 ['123..145', 'one-of(200,203)..one-of(209,211)', '300'] 251 >>> list(_split_compound_loc("123..145,complement(one-of(200,203)..one-of(209,211)),300")) 252 ['123..145', 'complement(one-of(200,203)..one-of(209,211))', '300'] 253 >>> list(_split_compound_loc("123..145,200..one-of(209,211),300")) 254 ['123..145', '200..one-of(209,211)', '300'] 255 >>> list(_split_compound_loc("123..145,200..one-of(209,211)")) 256 ['123..145', '200..one-of(209,211)'] 257 """ 258 if "one-of(" in compound_loc: 259 #Hard case 260 while "," in compound_loc: 261 assert compound_loc[0] != "," 262 assert compound_loc[0:2] != ".." 263 i = compound_loc.find(",") 264 part = compound_loc[:i] 265 compound_loc = compound_loc[i:] #includes the comma 266 while part.count("(") > part.count(")"): 267 assert "one-of(" in part, (part, compound_loc) 268 i = compound_loc.find(")") 269 part += compound_loc[:i+1] 270 compound_loc = compound_loc[i+1:] 271 if compound_loc.startswith(".."): 272 i = compound_loc.find(",") 273 if i==-1: 274 part += compound_loc 275 compound_loc = "" 276 else: 277 part += compound_loc[:i] 278 compound_loc = compound_loc[i:] #includes the comma 279 while part.count("(") > part.count(")"): 280 assert part.count("one-of(") == 2 281 i = compound_loc.find(")") 282 part += compound_loc[:i+1] 283 compound_loc = compound_loc[i+1:] 284 if compound_loc.startswith(","): 285 compound_loc = compound_loc[1:] 286 assert part 287 yield part 288 if compound_loc: 289 yield compound_loc 290 else: 291 #Easy case 292 for part in compound_loc.split(","): 293 yield part
294
295 -class Iterator(object):
296 """Iterator interface to move over a file of GenBank entries one at a time. 297 """
298 - def __init__(self, handle, parser = None):
299 """Initialize the iterator. 300 301 Arguments: 302 o handle - A handle with GenBank entries to iterate through. 303 o parser - An optional parser to pass the entries through before 304 returning them. If None, then the raw entry will be returned. 305 """ 306 self.handle = handle 307 self._parser = parser
308
309 - def next(self):
310 """Return the next GenBank record from the handle. 311 312 Will return None if we ran out of records. 313 """ 314 if self._parser is None: 315 lines = [] 316 while True: 317 line = self.handle.readline() 318 if not line : return None #Premature end of file? 319 lines.append(line) 320 if line.rstrip() == "//" : break 321 return "".join(lines) 322 try: 323 return self._parser.parse(self.handle) 324 except StopIteration: 325 return None
326
327 - def __iter__(self):
328 return iter(self.next, None)
329
330 -class ParserFailureError(Exception):
331 """Failure caused by some kind of problem in the parser. 332 """ 333 pass
334
335 -class LocationParserError(Exception):
336 """Could not Properly parse out a location from a GenBank file. 337 """ 338 pass
339
340 -class FeatureParser(object):
341 """Parse GenBank files into Seq + Feature objects. 342 """
343 - def __init__(self, debug_level = 0, use_fuzziness = 1, 344 feature_cleaner = FeatureValueCleaner()):
345 """Initialize a GenBank parser and Feature consumer. 346 347 Arguments: 348 o debug_level - An optional argument that species the amount of 349 debugging information the parser should spit out. By default we have 350 no debugging info (the fastest way to do things), but if you want 351 you can set this as high as two and see exactly where a parse fails. 352 o use_fuzziness - Specify whether or not to use fuzzy representations. 353 The default is 1 (use fuzziness). 354 o feature_cleaner - A class which will be used to clean out the 355 values of features. This class must implement the function 356 clean_value. GenBank.utils has a "standard" cleaner class, which 357 is used by default. 358 """ 359 self._scanner = GenBankScanner(debug_level) 360 self.use_fuzziness = use_fuzziness 361 self._cleaner = feature_cleaner
362
363 - def parse(self, handle):
364 """Parse the specified handle. 365 """ 366 self._consumer = _FeatureConsumer(self.use_fuzziness, 367 self._cleaner) 368 self._scanner.feed(handle, self._consumer) 369 return self._consumer.data
370
371 -class RecordParser(object):
372 """Parse GenBank files into Record objects 373 """
374 - def __init__(self, debug_level = 0):
375 """Initialize the parser. 376 377 Arguments: 378 o debug_level - An optional argument that species the amount of 379 debugging information the parser should spit out. By default we have 380 no debugging info (the fastest way to do things), but if you want 381 you can set this as high as two and see exactly where a parse fails. 382 """ 383 self._scanner = GenBankScanner(debug_level)
384
385 - def parse(self, handle):
386 """Parse the specified handle into a GenBank record. 387 """ 388 self._consumer = _RecordConsumer() 389 self._scanner.feed(handle, self._consumer) 390 return self._consumer.data
391
392 -class _BaseGenBankConsumer(AbstractConsumer):
393 """Abstract GenBank consumer providing useful general functions. 394 395 This just helps to eliminate some duplication in things that most 396 GenBank consumers want to do. 397 """ 398 # Special keys in GenBank records that we should remove spaces from 399 # For instance, \translation keys have values which are proteins and 400 # should have spaces and newlines removed from them. This class 401 # attribute gives us more control over specific formatting problems. 402 remove_space_keys = ["translation"] 403
404 - def __init__(self):
405 pass
406
407 - def _split_keywords(self, keyword_string):
408 """Split a string of keywords into a nice clean list. 409 """ 410 # process the keywords into a python list 411 if keyword_string == "" or keyword_string == ".": 412 keywords = "" 413 elif keyword_string[-1] == '.': 414 keywords = keyword_string[:-1] 415 else: 416 keywords = keyword_string 417 keyword_list = keywords.split(';') 418 clean_keyword_list = [x.strip() for x in keyword_list] 419 return clean_keyword_list
420
421 - def _split_accessions(self, accession_string):
422 """Split a string of accession numbers into a list. 423 """ 424 # first replace all line feeds with spaces 425 # Also, EMBL style accessions are split with ';' 426 accession = accession_string.replace("\n", " ").replace(";"," ") 427 428 return [x.strip() for x in accession.split() if x.strip()]
429
430 - def _split_taxonomy(self, taxonomy_string):
431 """Split a string with taxonomy info into a list. 432 """ 433 if not taxonomy_string or taxonomy_string==".": 434 #Missing data, no taxonomy 435 return [] 436 437 if taxonomy_string[-1] == '.': 438 tax_info = taxonomy_string[:-1] 439 else: 440 tax_info = taxonomy_string 441 tax_list = tax_info.split(';') 442 new_tax_list = [] 443 for tax_item in tax_list: 444 new_items = tax_item.split("\n") 445 new_tax_list.extend(new_items) 446 while '' in new_tax_list: 447 new_tax_list.remove('') 448 clean_tax_list = [x.strip() for x in new_tax_list] 449 450 return clean_tax_list
451
452 - def _clean_location(self, location_string):
453 """Clean whitespace out of a location string. 454 455 The location parser isn't a fan of whitespace, so we clean it out 456 before feeding it into the parser. 457 """ 458 #Originally this imported string.whitespace and did a replace 459 #via a loop. It's simpler to just split on whitespace and rejoin 460 #the string - and this avoids importing string too. See Bug 2684. 461 return ''.join(location_string.split())
462
463 - def _remove_newlines(self, text):
464 """Remove any newlines in the passed text, returning the new string. 465 """ 466 # get rid of newlines in the qualifier value 467 newlines = ["\n", "\r"] 468 for ws in newlines: 469 text = text.replace(ws, "") 470 471 return text
472
473 - def _normalize_spaces(self, text):
474 """Replace multiple spaces in the passed text with single spaces. 475 """ 476 # get rid of excessive spaces 477 text_parts = text.split(" ") 478 text_parts = filter(None, text_parts) 479 return ' '.join(text_parts)
480
481 - def _remove_spaces(self, text):
482 """Remove all spaces from the passed text. 483 """ 484 return text.replace(" ", "")
485
486 - def _convert_to_python_numbers(self, start, end):
487 """Convert a start and end range to python notation. 488 489 In GenBank, starts and ends are defined in "biological" coordinates, 490 where 1 is the first base and [i, j] means to include both i and j. 491 492 In python, 0 is the first base and [i, j] means to include i, but 493 not j. 494 495 So, to convert "biological" to python coordinates, we need to 496 subtract 1 from the start, and leave the end and things should 497 be converted happily. 498 """ 499 new_start = start - 1 500 new_end = end 501 502 return new_start, new_end
503
504 -class _FeatureConsumer(_BaseGenBankConsumer):
505 """Create a SeqRecord object with Features to return. 506 507 Attributes: 508 o use_fuzziness - specify whether or not to parse with fuzziness in 509 feature locations. 510 o feature_cleaner - a class that will be used to provide specialized 511 cleaning-up of feature values. 512 """
513 - def __init__(self, use_fuzziness, feature_cleaner = None):
514 from Bio.SeqRecord import SeqRecord 515 _BaseGenBankConsumer.__init__(self) 516 self.data = SeqRecord(None, id = None) 517 self.data.id = None 518 self.data.description = "" 519 520 self._use_fuzziness = use_fuzziness 521 self._feature_cleaner = feature_cleaner 522 523 self._seq_type = '' 524 self._seq_data = [] 525 self._cur_reference = None 526 self._cur_feature = None 527 self._cur_qualifier_key = None 528 self._cur_qualifier_value = None 529 self._expected_size = None
530
531 - def locus(self, locus_name):
532 """Set the locus name is set as the name of the Sequence. 533 """ 534 self.data.name = locus_name
535
536 - def size(self, content):
537 """Record the sequence length.""" 538 self._expected_size = int(content)
539
540 - def residue_type(self, type):
541 """Record the sequence type so we can choose an appropriate alphabet. 542 """ 543 self._seq_type = type
544
545 - def data_file_division(self, division):
546 self.data.annotations['data_file_division'] = division
547
548 - def date(self, submit_date):
549 self.data.annotations['date'] = submit_date
550
551 - def definition(self, definition):
552 """Set the definition as the description of the sequence. 553 """ 554 if self.data.description: 555 #Append to any existing description 556 #e.g. EMBL files with two DE lines. 557 self.data.description += " " + definition 558 else: 559 self.data.description = definition
560
561 - def accession(self, acc_num):
562 """Set the accession number as the id of the sequence. 563 564 If we have multiple accession numbers, the first one passed is 565 used. 566 """ 567 new_acc_nums = self._split_accessions(acc_num) 568 569 #Also record them ALL in the annotations 570 try: 571 #On the off chance there was more than one accession line: 572 for acc in new_acc_nums: 573 #Prevent repeat entries 574 if acc not in self.data.annotations['accessions']: 575 self.data.annotations['accessions'].append(acc) 576 except KeyError: 577 self.data.annotations['accessions'] = new_acc_nums 578 579 # if we haven't set the id information yet, add the first acc num 580 if self.data.id is None: 581 if len(new_acc_nums) > 0: 582 #self.data.id = new_acc_nums[0] 583 #Use the FIRST accession as the ID, not the first on this line! 584 self.data.id = self.data.annotations['accessions'][0]
585
586 - def wgs(self, content):
587 self.data.annotations['wgs'] = content.split('-')
588
589 - def add_wgs_scafld(self, content):
590 self.data.annotations.setdefault('wgs_scafld',[]).append(content.split('-'))
591
592 - def nid(self, content):
593 self.data.annotations['nid'] = content
594
595 - def pid(self, content):
596 self.data.annotations['pid'] = content
597
598 - def version(self, version_id):
599 #Want to use the versioned accession as the record.id 600 #This comes from the VERSION line in GenBank files, or the 601 #obsolete SV line in EMBL. For the new EMBL files we need 602 #both the version suffix from the ID line and the accession 603 #from the AC line. 604 if version_id.count(".")==1 and version_id.split(".")[1].isdigit(): 605 self.accession(version_id.split(".")[0]) 606 self.version_suffix(version_id.split(".")[1]) 607 else: 608 #For backwards compatibility... 609 self.data.id = version_id
610
611 - def project(self, content):
612 """Handle the information from the PROJECT line as a list of projects. 613 614 e.g. 615 PROJECT GenomeProject:28471 616 617 or: 618 PROJECT GenomeProject:13543 GenomeProject:99999 619 620 This is stored as dbxrefs in the SeqRecord to be consistent with the 621 projected switch of this line to DBLINK in future GenBank versions. 622 Note the NCBI plan to replace "GenomeProject:28471" with the shorter 623 "Project:28471" as part of this transition. 624 """ 625 content = content.replace("GenomeProject:", "Project:") 626 self.data.dbxrefs.extend([p for p in content.split() if p])
627 653
654 - def version_suffix(self, version):
655 """Set the version to overwrite the id. 656 657 Since the verison provides the same information as the accession 658 number, plus some extra info, we set this as the id if we have 659 a version. 660 """ 661 #e.g. GenBank line: 662 #VERSION U49845.1 GI:1293613 663 #or the obsolete EMBL line: 664 #SV U49845.1 665 #Scanner calls consumer.version("U49845.1") 666 #which then calls consumer.version_suffix(1) 667 # 668 #e.g. EMBL new line: 669 #ID X56734; SV 1; linear; mRNA; STD; PLN; 1859 BP. 670 #Scanner calls consumer.version_suffix(1) 671 assert version.isdigit() 672 self.data.annotations['sequence_version'] = int(version)
673
674 - def db_source(self, content):
675 self.data.annotations['db_source'] = content.rstrip()
676
677 - def gi(self, content):
678 self.data.annotations['gi'] = content
679
680 - def keywords(self, content):
681 self.data.annotations['keywords'] = self._split_keywords(content)
682
683 - def segment(self, content):
684 self.data.annotations['segment'] = content
685
686 - def source(self, content):
687 #Note that some software (e.g. VectorNTI) may produce an empty 688 #source (rather than using a dot/period as might be expected). 689 if content == "": 690 source_info = "" 691 elif content[-1] == '.': 692 source_info = content[:-1] 693 else: 694 source_info = content 695 self.data.annotations['source'] = source_info
696
697 - def organism(self, content):
698 self.data.annotations['organism'] = content
699
700 - def taxonomy(self, content):
701 """Records (another line of) the taxonomy lineage. 702 """ 703 lineage = self._split_taxonomy(content) 704 try: 705 self.data.annotations['taxonomy'].extend(lineage) 706 except KeyError: 707 self.data.annotations['taxonomy'] = lineage
708
709 - def reference_num(self, content):
710 """Signal the beginning of a new reference object. 711 """ 712 # if we have a current reference that hasn't been added to 713 # the list of references, add it. 714 if self._cur_reference is not None: 715 self.data.annotations['references'].append(self._cur_reference) 716 else: 717 self.data.annotations['references'] = [] 718 719 self._cur_reference = SeqFeature.Reference()
720
721 - def reference_bases(self, content):
722 """Attempt to determine the sequence region the reference entails. 723 724 Possible types of information we may have to deal with: 725 726 (bases 1 to 86436) 727 (sites) 728 (bases 1 to 105654; 110423 to 111122) 729 1 (residues 1 to 182) 730 """ 731 # first remove the parentheses or other junk 732 ref_base_info = content[1:-1] 733 734 all_locations = [] 735 # parse if we've got 'bases' and 'to' 736 if ref_base_info.find('bases') != -1 and \ 737 ref_base_info.find('to') != -1: 738 # get rid of the beginning 'bases' 739 ref_base_info = ref_base_info[5:] 740 locations = self._split_reference_locations(ref_base_info) 741 all_locations.extend(locations) 742 elif (ref_base_info.find("residues") >= 0 and 743 ref_base_info.find("to") >= 0): 744 residues_start = ref_base_info.find("residues") 745 # get only the information after "residues" 746 ref_base_info = ref_base_info[(residues_start + len("residues ")):] 747 locations = self._split_reference_locations(ref_base_info) 748 all_locations.extend(locations) 749 750 # make sure if we are not finding information then we have 751 # the string 'sites' or the string 'bases' 752 elif (ref_base_info == 'sites' or 753 ref_base_info.strip() == 'bases'): 754 pass 755 # otherwise raise an error 756 else: 757 raise ValueError("Could not parse base info %s in record %s" % 758 (ref_base_info, self.data.id)) 759 760 self._cur_reference.location = all_locations
761
762 - def _split_reference_locations(self, location_string):
763 """Get reference locations out of a string of reference information 764 765 The passed string should be of the form: 766 767 1 to 20; 20 to 100 768 769 This splits the information out and returns a list of location objects 770 based on the reference locations. 771 """ 772 # split possibly multiple locations using the ';' 773 all_base_info = location_string.split(';') 774 775 new_locations = [] 776 for base_info in all_base_info: 777 start, end = base_info.split('to') 778 new_start, new_end = \ 779 self._convert_to_python_numbers(int(start.strip()), 780 int(end.strip())) 781 this_location = SeqFeature.FeatureLocation(new_start, new_end) 782 new_locations.append(this_location) 783 return new_locations
784
785 - def authors(self, content):
786 if self._cur_reference.authors: 787 self._cur_reference.authors += ' ' + content 788 else: 789 self._cur_reference.authors = content
790
791 - def consrtm(self, content):
792 if self._cur_reference.consrtm: 793 self._cur_reference.consrtm += ' ' + content 794 else: 795 self._cur_reference.consrtm = content
796
797 - def title(self, content):
798 if self._cur_reference is None: 799 import warnings 800 from Bio import BiopythonParserWarning 801 warnings.warn("GenBank TITLE line without REFERENCE line.", 802 BiopythonParserWarning) 803 elif self._cur_reference.title: 804 self._cur_reference.title += ' ' + content 805 else: 806 self._cur_reference.title = content
807
808 - def journal(self, content):
809 if self._cur_reference.journal: 810 self._cur_reference.journal += ' ' + content 811 else: 812 self._cur_reference.journal = content
813
814 - def medline_id(self, content):
815 self._cur_reference.medline_id = content
816
817 - def pubmed_id(self, content):
818 self._cur_reference.pubmed_id = content
819
820 - def remark(self, content):
821 """Deal with a reference comment.""" 822 if self._cur_reference.comment: 823 self._cur_reference.comment += ' ' + content 824 else: 825 self._cur_reference.comment = content
826
827 - def comment(self, content):
828 try: 829 self.data.annotations['comment'] += "\n" + "\n".join(content) 830 except KeyError: 831 self.data.annotations['comment'] = "\n".join(content)
832
833 - def features_line(self, content):
834 """Get ready for the feature table when we reach the FEATURE line. 835 """ 836 self.start_feature_table()
837
838 - def start_feature_table(self):
839 """Indicate we've got to the start of the feature table. 840 """ 841 # make sure we've added on our last reference object 842 if self._cur_reference is not None: 843 self.data.annotations['references'].append(self._cur_reference) 844 self._cur_reference = None
845
846 - def _add_feature(self):
847 """Utility function to add a feature to the SeqRecord. 848 849 This does all of the appropriate checking to make sure we haven't 850 left any info behind, and that we are only adding info if it 851 exists. 852 """ 853 if self._cur_feature: 854 # if we have a left over qualifier, add it to the qualifiers 855 # on the current feature 856 self._add_qualifier() 857 858 self._cur_qualifier_key = '' 859 self._cur_qualifier_value = '' 860 self.data.features.append(self._cur_feature)
861
862 - def feature_key(self, content):
863 # if we already have a feature, add it on 864 self._add_feature() 865 866 # start a new feature 867 self._cur_feature = SeqFeature.SeqFeature() 868 self._cur_feature.type = content 869 870 # assume positive strand to start with if we have DNA or cDNA 871 # (labelled as mRNA). The complement in the location will 872 # change this later if something is on the reverse strand 873 if self._seq_type.find("DNA") >= 0 or self._seq_type.find("mRNA") >= 0: 874 self._cur_feature.strand = 1
875
876 - def location(self, content):
877 """Parse out location information from the location string. 878 879 This uses simple Python code with some regular expressions to do the 880 parsing, and then translates the results into appropriate objects. 881 """ 882 # clean up newlines and other whitespace inside the location before 883 # parsing - locations should have no whitespace whatsoever 884 location_line = self._clean_location(content) 885 886 # Older records have junk like replace(266,"c") in the 887 # location line. Newer records just replace this with 888 # the number 266 and have the information in a more reasonable 889 # place. So we'll just grab out the number and feed this to the 890 # parser. We shouldn't really be losing any info this way. 891 if location_line.find('replace') != -1: 892 comma_pos = location_line.find(',') 893 location_line = location_line[8:comma_pos] 894 895 cur_feature = self._cur_feature 896 897 #Handle top level complement here for speed 898 if location_line.startswith("complement("): 899 assert location_line.endswith(")") 900 location_line = location_line[11:-1] 901 cur_feature.strand = -1 902 #And continue... 903 904 #Special case handling of the most common cases for speed 905 if _re_simple_location.match(location_line): 906 #e.g. "123..456" 907 s, e = location_line.split("..") 908 cur_feature.location = SeqFeature.FeatureLocation(int(s)-1, 909 int(e)) 910 return 911 if _re_simple_compound.match(location_line): 912 #e.g. join(<123..456,480..>500) 913 i = location_line.find("(") 914 cur_feature.location_operator = location_line[:i] 915 #we can split on the comma because these are simple locations 916 for part in location_line[i+1:-1].split(","): 917 s, e = part.split("..") 918 f = SeqFeature.SeqFeature(SeqFeature.FeatureLocation(int(s)-1, 919 int(e)), 920 location_operator=cur_feature.location_operator, 921 strand=cur_feature.strand, type=cur_feature.type) 922 cur_feature.sub_features.append(f) 923 s = cur_feature.sub_features[0].location.start 924 e = cur_feature.sub_features[-1].location.end 925 cur_feature.location = SeqFeature.FeatureLocation(s,e) 926 return 927 928 #Handle the general case with more complex regular expressions 929 if _re_complex_location.match(location_line): 930 #e.g. "AL121804.2:41..610" 931 if ":" in location_line: 932 cur_feature.ref, location_line = location_line.split(":") 933 cur_feature.location = _loc(location_line, self._expected_size) 934 return 935 if _re_complex_compound.match(location_line): 936 i = location_line.find("(") 937 cur_feature.location_operator = location_line[:i] 938 #Can't split on the comma because of ositions like one-of(1,2,3) 939 for part in _split_compound_loc(location_line[i+1:-1]): 940 if part.startswith("complement("): 941 assert part[-1]==")" 942 part = part[11:-1] 943 assert cur_feature.strand != -1, "Double complement?" 944 strand = -1 945 else: 946 strand = cur_feature.strand 947 if ":" in part: 948 ref, part = part.split(":") 949 else: 950 ref = None 951 try: 952 loc = _loc(part, self._expected_size) 953 except ValueError, err: 954 print location_line 955 print part 956 raise err 957 f = SeqFeature.SeqFeature(location=loc, ref=ref, 958 location_operator=cur_feature.location_operator, 959 strand=strand, type=cur_feature.type) 960 cur_feature.sub_features.append(f) 961 # Historically a join on the reverse strand has been represented 962 # in Biopython with both the parent SeqFeature and its children 963 # (the exons for a CDS) all given a strand of -1. Likewise, for 964 # a join feature on the forward strand they all have strand +1. 965 # However, we must also consider evil mixed strand examples like 966 # this, join(complement(69611..69724),139856..140087,140625..140650) 967 strands = set(sf.strand for sf in cur_feature.sub_features) 968 if len(strands)==1: 969 cur_feature.strand = cur_feature.sub_features[0].strand 970 else: 971 cur_feature.strand = None # i.e. mixed strands 972 s = cur_feature.sub_features[0].location.start 973 e = cur_feature.sub_features[-1].location.end 974 cur_feature.location = SeqFeature.FeatureLocation(s,e) 975 return 976 #Not recognised 977 if "order" in location_line and "join" in location_line: 978 #See Bug 3197 979 msg = 'Combinations of "join" and "order" within the same ' + \ 980 'location (nested operators) are illegal:\n' + location_line 981 raise LocationParserError(msg) 982 raise LocationParserError(location_line)
983
984 - def _add_qualifier(self):
985 """Add a qualifier to the current feature without loss of info. 986 987 If there are multiple qualifier keys with the same name we 988 would lose some info in the dictionary, so we append a unique 989 number to the end of the name in case of conflicts. 990 """ 991 # if we've got a key from before, add it to the dictionary of 992 # qualifiers 993 if self._cur_qualifier_key: 994 key = self._cur_qualifier_key 995 value = "".join(self._cur_qualifier_value) 996 if self._feature_cleaner is not None: 997 value = self._feature_cleaner.clean_value(key, value) 998 # if the qualifier name exists, append the value 999 if key in self._cur_feature.qualifiers: 1000 self._cur_feature.qualifiers[key].append(value) 1001 # otherwise start a new list of the key with its values 1002 else: 1003 self._cur_feature.qualifiers[key] = [value]
1004
1005 - def feature_qualifier_name(self, content_list):
1006 """When we get a qualifier key, use it as a dictionary key. 1007 1008 We receive a list of keys, since you can have valueless keys such as 1009 /pseudo which would be passed in with the next key (since no other 1010 tags separate them in the file) 1011 """ 1012 for content in content_list: 1013 # add a qualifier if we've got one 1014 self._add_qualifier() 1015 1016 # assume the / and = have been removed, and it has been trimmed 1017 assert '/' not in content and '=' not in content \ 1018 and content == content.strip(), content 1019 1020 self._cur_qualifier_key = content 1021 self._cur_qualifier_value = []
1022
1023 - def feature_qualifier_description(self, content):
1024 # get rid of the quotes surrounding the qualifier if we've got 'em 1025 qual_value = content.replace('"', '') 1026 1027 self._cur_qualifier_value.append(qual_value)
1028
1029 - def contig_location(self, content):
1030 """Deal with CONTIG information.""" 1031 #Historically this was stored as a SeqFeature object, but it was 1032 #stored under record.annotations["contig"] and not under 1033 #record.features with the other SeqFeature objects. 1034 # 1035 #The CONTIG location line can include additional tokens like 1036 #Gap(), Gap(100) or Gap(unk100) which are not used in the feature 1037 #location lines, so storing it using SeqFeature based location 1038 #objects is difficult. 1039 # 1040 #We now store this a string, which means for BioSQL we are now in 1041 #much better agreement with how BioPerl records the CONTIG line 1042 #in the database. 1043 # 1044 #NOTE - This code assumes the scanner will return all the CONTIG 1045 #lines already combined into one long string! 1046 self.data.annotations["contig"] = content
1047
1048 - def origin_name(self, content):
1049 pass
1050
1051 - def base_count(self, content):
1052 pass
1053
1054 - def base_number(self, content):
1055 pass
1056
1057 - def sequence(self, content):
1058 """Add up sequence information as we get it. 1059 1060 To try and make things speedier, this puts all of the strings 1061 into a list of strings, and then uses string.join later to put 1062 them together. Supposedly, this is a big time savings 1063 """ 1064 assert ' ' not in content 1065 self._seq_data.append(content.upper())
1066
1067 - def record_end(self, content):
1068 """Clean up when we've finished the record. 1069 """ 1070 from Bio import Alphabet 1071 from Bio.Alphabet import IUPAC 1072 from Bio.Seq import Seq, UnknownSeq 1073 1074 #Try and append the version number to the accession for the full id 1075 if self.data.id is None: 1076 assert 'accessions' not in self.data.annotations, \ 1077 self.data.annotations['accessions'] 1078 self.data.id = self.data.name #Good fall back? 1079 elif self.data.id.count('.') == 0: 1080 try: 1081 self.data.id+='.%i' % self.data.annotations['sequence_version'] 1082 except KeyError: 1083 pass 1084 1085 # add the last feature in the table which hasn't been added yet 1086 self._add_feature() 1087 1088 # add the sequence information 1089 # first, determine the alphabet 1090 # we default to an generic alphabet if we don't have a 1091 # seq type or have strange sequence information. 1092 seq_alphabet = Alphabet.generic_alphabet 1093 1094 # now set the sequence 1095 sequence = "".join(self._seq_data) 1096 1097 if self._expected_size is not None \ 1098 and len(sequence) != 0 \ 1099 and self._expected_size != len(sequence): 1100 import warnings 1101 from Bio import BiopythonParserWarning 1102 warnings.warn("Expected sequence length %i, found %i (%s)." \ 1103 % (self._expected_size, len(sequence), self.data.id), 1104 BiopythonParserWarning) 1105 1106 if self._seq_type: 1107 # mRNA is really also DNA, since it is actually cDNA 1108 if self._seq_type.find('DNA') != -1 or \ 1109 self._seq_type.find('mRNA') != -1: 1110 seq_alphabet = IUPAC.ambiguous_dna 1111 # are there ever really RNA sequences in GenBank? 1112 elif self._seq_type.find('RNA') != -1: 1113 #Even for data which was from RNA, the sequence string 1114 #is usually given as DNA (T not U). Bug 2408 1115 if "T" in sequence and "U" not in sequence: 1116 seq_alphabet = IUPAC.ambiguous_dna 1117 else: 1118 seq_alphabet = IUPAC.ambiguous_rna 1119 elif self._seq_type.upper().find('PROTEIN') != -1: 1120 seq_alphabet = IUPAC.protein # or extended protein? 1121 # work around ugly GenBank records which have circular or 1122 # linear but no indication of sequence type 1123 elif self._seq_type in ["circular", "linear"]: 1124 pass 1125 # we have a bug if we get here 1126 else: 1127 raise ValueError("Could not determine alphabet for seq_type %s" 1128 % self._seq_type) 1129 1130 if not sequence and self.__expected_size: 1131 self.data.seq = UnknownSeq(self._expected_size, seq_alphabet) 1132 else: 1133 self.data.seq = Seq(sequence, seq_alphabet)
1134
1135 -class _RecordConsumer(_BaseGenBankConsumer):
1136 """Create a GenBank Record object from scanner generated information. 1137 """
1138 - def __init__(self):
1139 _BaseGenBankConsumer.__init__(self) 1140 import Record 1141 self.data = Record.Record() 1142 1143 self._seq_data = [] 1144 self._cur_reference = None 1145 self._cur_feature = None 1146 self._cur_qualifier = None
1147
1148 - def wgs(self, content):
1149 self.data.wgs = content.split('-')
1150
1151 - def add_wgs_scafld(self, content):
1152 self.data.wgs_scafld.append(content.split('-'))
1153
1154 - def locus(self, content):
1155 self.data.locus = content
1156
1157 - def size(self, content):
1158 self.data.size = content
1159
1160 - def residue_type(self, content):
1161 self.data.residue_type = content
1162
1163 - def data_file_division(self, content):
1164 self.data.data_file_division = content
1165
1166 - def date(self, content):
1167 self.data.date = content
1168
1169 - def definition(self, content):
1170 self.data.definition = content
1171
1172 - def accession(self, content):
1173 for acc in self._split_accessions(content): 1174 if acc not in self.data.accession: 1175 self.data.accession.append(acc)
1176
1177 - def nid(self, content):
1178 self.data.nid = content
1179
1180 - def pid(self, content):
1181 self.data.pid = content
1182
1183 - def version(self, content):
1184 self.data.version = content
1185
1186 - def db_source(self, content):
1187 self.data.db_source = content.rstrip()
1188
1189 - def gi(self, content):
1190 self.data.gi = content
1191
1192 - def keywords(self, content):
1193 self.data.keywords = self._split_keywords(content)
1194
1195 - def project(self, content):
1196 self.data.projects.extend([p for p in content.split() if p])
1197 1200
1201 - def segment(self, content):
1202 self.data.segment = content
1203
1204 - def source(self, content):
1205 self.data.source = content
1206
1207 - def organism(self, content):
1208 self.data.organism = content
1209
1210 - def taxonomy(self, content):
1211 self.data.taxonomy = self._split_taxonomy(content)
1212
1213 - def reference_num(self, content):
1214 """Grab the reference number and signal the start of a new reference. 1215 """ 1216 # check if we have a reference to add 1217 if self._cur_reference is not None: 1218 self.data.references.append(self._cur_reference) 1219 1220 import Record 1221 self._cur_reference = Record.Reference() 1222 self._cur_reference.number = content
1223
1224 - def reference_bases(self, content):
1225 self._cur_reference.bases = content
1226
1227 - def authors(self, content):
1228 self._cur_reference.authors = content
1229
1230 - def consrtm(self, content):
1231 self._cur_reference.consrtm = content
1232
1233 - def title(self, content):
1234 if self._cur_reference is None: 1235 import warnings 1236 from Bio import BiopythonParserWarning 1237 warnings.warn("GenBank TITLE line without REFERENCE line.", 1238 BiopythonParserWarning) 1239 return 1240 self._cur_reference.title = content
1241
1242 - def journal(self, content):
1243 self._cur_reference.journal = content
1244
1245 - def medline_id(self, content):
1246 self._cur_reference.medline_id = content
1247
1248 - def pubmed_id(self, content):
1249 self._cur_reference.pubmed_id = content
1250
1251 - def remark(self, content):
1252 self._cur_reference.remark = content
1253
1254 - def comment(self, content):
1255 self.data.comment += "\n".join(content)
1256
1257 - def primary_ref_line(self,content):
1258 """Data for the PRIMARY line""" 1259 self.data.primary.append(content)
1260
1261 - def primary(self,content):
1262 pass
1263
1264 - def features_line(self, content):
1265 """Get ready for the feature table when we reach the FEATURE line. 1266 """ 1267 self.start_feature_table()
1268
1269 - def start_feature_table(self):
1270 """Signal the start of the feature table. 1271 """ 1272 # we need to add on the last reference 1273 if self._cur_reference is not None: 1274 self.data.references.append(self._cur_reference)
1275
1276 - def feature_key(self, content):
1277 """Grab the key of the feature and signal the start of a new feature. 1278 """ 1279 # first add on feature information if we've got any 1280 self._add_feature() 1281 1282 import Record 1283 self._cur_feature = Record.Feature() 1284 self._cur_feature.key = content
1285
1286 - def _add_feature(self):
1287 """Utility function to add a feature to the Record. 1288 1289 This does all of the appropriate checking to make sure we haven't 1290 left any info behind, and that we are only adding info if it 1291 exists. 1292 """ 1293 if self._cur_feature is not None: 1294 # if we have a left over qualifier, add it to the qualifiers 1295 # on the current feature 1296 if self._cur_qualifier is not None: 1297 self._cur_feature.qualifiers.append(self._cur_qualifier) 1298 1299 self._cur_qualifier = None 1300 self.data.features.append(self._cur_feature)
1301
1302 - def location(self, content):
1303 self._cur_feature.location = self._clean_location(content)
1304
1305 - def feature_qualifier_name(self, content_list):
1306 """Deal with qualifier names 1307 1308 We receive a list of keys, since you can have valueless keys such as 1309 /pseudo which would be passed in with the next key (since no other 1310 tags separate them in the file) 1311 """ 1312 import Record 1313 for content in content_list: 1314 # the record parser keeps the /s -- add them if we don't have 'em 1315 if content.find("/") != 0: 1316 content = "/%s" % content 1317 # add on a qualifier if we've got one 1318 if self._cur_qualifier is not None: 1319 self._cur_feature.qualifiers.append(self._cur_qualifier) 1320 1321 self._cur_qualifier = Record.Qualifier() 1322 self._cur_qualifier.key = content
1323
1324 - def feature_qualifier_description(self, content):
1325 # if we have info then the qualifier key should have a ='s 1326 if self._cur_qualifier.key.find("=") == -1: 1327 self._cur_qualifier.key = "%s=" % self._cur_qualifier.key 1328 cur_content = self._remove_newlines(content) 1329 # remove all spaces from the value if it is a type where spaces 1330 # are not important 1331 for remove_space_key in self.__class__.remove_space_keys: 1332 if self._cur_qualifier.key.find(remove_space_key) >= 0: 1333 cur_content = self._remove_spaces(cur_content) 1334 self._cur_qualifier.value = self._normalize_spaces(cur_content)
1335
1336 - def base_count(self, content):
1337 self.data.base_counts = content
1338
1339 - def origin_name(self, content):
1340 self.data.origin = content
1341
1342 - def contig_location(self, content):
1343 """Signal that we have contig information to add to the record. 1344 """ 1345 self.data.contig = self._clean_location(content)
1346
1347 - def sequence(self, content):
1348 """Add sequence information to a list of sequence strings. 1349 1350 This removes spaces in the data and uppercases the sequence, and 1351 then adds it to a list of sequences. Later on we'll join this 1352 list together to make the final sequence. This is faster than 1353 adding on the new string every time. 1354 """ 1355 assert ' ' not in content 1356 self._seq_data.append(content.upper())
1357
1358 - def record_end(self, content):
1359 """Signal the end of the record and do any necessary clean-up. 1360 """ 1361 # add together all of the sequence parts to create the 1362 # final sequence string 1363 self.data.sequence = "".join(self._seq_data) 1364 # add on the last feature 1365 self._add_feature()
1366 1367
1368 -def _test():
1369 """Run the Bio.GenBank module's doctests.""" 1370 print "Runing doctests..." 1371 import doctest 1372 doctest.testmod() 1373 print "Done"
1374 1375 if __name__ == "__main__": 1376 _test() 1377