Trees | Indices | Help |
---|
|
1 # Copyright 1999 by Jeffrey Chang. All rights reserved. 2 # This code is part of the Biopython distribution and governed by its 3 # license. Please see the LICENSE file that should have been included 4 # as part of this package. 5 6 """Index.py 7 8 This module provides a way to create indexes to text files. 9 10 Classes: 11 Index Dictionary-like class used to store index information. 12 13 _ShelveIndex An Index class based on the shelve module. 14 _InMemoryIndex An in-memory Index class. 15 16 """ 17 import os 18 import array 19 import cPickle 20 import shelve 2123 """An index file wrapped around shelve. 24 25 """ 26 # Without a good dbm module installed, this is pretty slow and 27 # generates large files. When generating an index on a FASTA- 28 # formatted file with 82000 sequences (37Mb), the 29 # index 'dat' file is 42Mb and 'dir' file is 8Mb. 30 31 __version = 2 32 __version_key = '__version' 336535 dict.__init__(self) 36 try: 37 if truncate: 38 # In python 1.52 and before, dumbdbm (under shelve) 39 # doesn't clear the old database. 40 files = [indexname + '.dir', 41 indexname + '.dat', 42 indexname + '.bak' 43 ] 44 for file in files: 45 if os.path.exists(file): 46 os.unlink(file) 47 raise Exception("open a new shelf") 48 self.data = shelve.open(indexname, flag='r') 49 except: 50 # No database exists. 51 self.data = shelve.open(indexname, flag='n') 52 self.data[self.__version_key] = self.__version 53 else: 54 # Check to make sure the database is the correct version. 55 version = self.data.get(self.__version_key, None) 56 if version is None: 57 raise IOError("Unrecognized index format") 58 elif version != self.__version: 59 raise IOError("Version %s doesn't match my version %s" \ 60 % (version, self.__version))6167 """This creates an in-memory index file. 68 69 """ 70 # File Format: 71 # version 72 # key value 73 # [...] 74 75 __version = 3 76 __version_key = '__version' 77141 142 Index = _InMemoryIndex 14379 self._indexname = indexname 80 dict.__init__(self) 81 self.__changed = 0 # the index hasn't changed 82 83 # Remove the database if truncate is true. 84 if truncate and os.path.exists(indexname): 85 os.unlink(indexname) 86 self.__changed = 1 87 88 # Load the database if it exists 89 if os.path.exists(indexname): 90 handle = open(indexname) 91 version = self._toobj(handle.readline().rstrip()) 92 if version != self.__version: 93 raise IOError("Version %s doesn't match my version %s" \ 94 % (version, self.__version)) 95 for line in handle: 96 key, value = line.split() 97 key, value = self._toobj(key), self._toobj(value) 98 self[key] = value 99 self.__changed = 0100 113115 if self.__changed: 116 handle = open(self._indexname, 'w') 117 handle.write("%s\n" % self._tostr(self.__version)) 118 for key, value in self.items(): 119 handle.write("%s %s\n" % 120 (self._tostr(key), self._tostr(value))) 121 handle.close()122124 # I need a representation of the object that's saveable to 125 # a file that uses whitespace as delimiters. Thus, I'm 126 # going to pickle the object, and then convert each character of 127 # the string to its ASCII integer value. Then, I'm going to convert 128 # the integers into strings and join them together with commas. 129 # It's not the most efficient way of storing things, but it's 130 # relatively fast. 131 s = cPickle.dumps(obj) 132 intlist = array.array('b', s) 133 strlist = map(str, intlist) 134 return ','.join(strlist)135137 intlist = map(int, str.split(',')) 138 intlist = array.array('b', intlist) 139 strlist = map(chr, intlist) 140 return cPickle.loads(''.join(strlist))
Trees | Indices | Help |
---|
Generated by Epydoc 3.0.1 on Sat Aug 20 10:39:19 2011 | http://epydoc.sourceforge.net |