X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=roundup%2Fbackends%2Fback_anydbm.py;h=4e1ee10f0642763492c2fb62f801d2fd948f0f79;hb=ac36b06befed0636bc8f90776482714cff863f0c;hp=b6226aac7de490ad1ef17b067a53638f0bf0ef9a;hpb=52c4073ff97774929896dfc1c347c42c78f46dbb;p=roundup.git diff --git a/roundup/backends/back_anydbm.py b/roundup/backends/back_anydbm.py index b6226aa..4e1ee10 100644 --- a/roundup/backends/back_anydbm.py +++ b/roundup/backends/back_anydbm.py @@ -15,7 +15,7 @@ # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE, # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # -#$Id: back_anydbm.py,v 1.20 2001-12-18 15:30:34 rochecompaan Exp $ +#$Id: back_anydbm.py,v 1.43 2002-07-10 06:30:30 richard Exp $ ''' This module defines a backend that saves the hyperdatabase in a database chosen by anydbm. It is guaranteed to always be available in python @@ -24,14 +24,15 @@ serious bugs, and is not available) ''' import whichdb, anydbm, os, marshal -from roundup import hyperdb, date, password - -DEBUG=os.environ.get('HYPERDBDEBUG', '') +from roundup import hyperdb, date +from blobfiles import FileStorage +from roundup.indexer import Indexer +from locking import acquire_lock, release_lock # # Now the database # -class Database(hyperdb.Database): +class Database(FileStorage, hyperdb.Database): """A database for storing records containing flexible data types. Transaction stuff TODO: @@ -40,9 +41,10 @@ class Database(hyperdb.Database): . perhaps detect write collisions (related to above)? """ - def __init__(self, storagelocator, journaltag=None): + def __init__(self, config, journaltag=None): """Open a hyperdatabase given a specifier to some storage. + The 'storagelocator' is obtained from config.DATABASE. The meaning of 'storagelocator' depends on the particular implementation of the hyperdatabase. It could be a file name, a directory path, a socket descriptor for a connection to a @@ -53,12 +55,28 @@ class Database(hyperdb.Database): None, the database is opened in read-only mode: the Class.create(), Class.set(), and Class.retire() methods are disabled. """ - self.dir, self.journaltag = storagelocator, journaltag + self.config, self.journaltag = config, journaltag + self.dir = config.DATABASE self.classes = {} self.cache = {} # cache of nodes loaded or created self.dirtynodes = {} # keep track of the dirty nodes by class self.newnodes = {} # keep track of the new nodes by class self.transactions = [] + self.indexer = Indexer(self.dir) + # ensure files are group readable and writable + os.umask(0002) + + def post_init(self): + """Called once the schema initialisation has finished.""" + # reindex the db if necessary + if self.indexer.should_reindex(): + self.reindex() + + def reindex(self): + for klass in self.classes.values(): + for nodeid in klass.list(): + klass.index(nodeid) + self.indexer.save_index() def __repr__(self): return ''%id(self) @@ -69,14 +87,14 @@ class Database(hyperdb.Database): def __getattr__(self, classname): """A convenient way of calling self.getclass(classname).""" if self.classes.has_key(classname): - if DEBUG: - print '__getattr__', (self, classname) + if __debug__: + print >>hyperdb.DEBUG, '__getattr__', (self, classname) return self.classes[classname] raise AttributeError, classname def addclass(self, cl): - if DEBUG: - print 'addclass', (self, cl) + if __debug__: + print >>hyperdb.DEBUG, 'addclass', (self, cl) cn = cl.classname if self.classes.has_key(cn): raise ValueError, cn @@ -84,8 +102,8 @@ class Database(hyperdb.Database): def getclasses(self): """Return a list of the names of all existing classes.""" - if DEBUG: - print 'getclasses', (self,) + if __debug__: + print >>hyperdb.DEBUG, 'getclasses', (self,) l = self.classes.keys() l.sort() return l @@ -95,8 +113,8 @@ class Database(hyperdb.Database): If 'classname' is not a valid class name, a KeyError is raised. """ - if DEBUG: - print 'getclass', (self, classname) + if __debug__: + print >>hyperdb.DEBUG, 'getclass', (self, classname) return self.classes[classname] # @@ -105,10 +123,10 @@ class Database(hyperdb.Database): def clear(self): '''Delete all database contents ''' - if DEBUG: - print 'clear', (self,) + if __debug__: + print >>hyperdb.DEBUG, 'clear', (self,) for cn in self.classes.keys(): - for type in 'nodes', 'journals': + for dummy in 'nodes', 'journals': path = os.path.join(self.dir, 'journals.%s'%cn) if os.path.exists(path): os.remove(path) @@ -119,16 +137,17 @@ class Database(hyperdb.Database): ''' grab a connection to the class db that will be used for multiple actions ''' - if DEBUG: - print 'getclassdb', (self, classname, mode) + if __debug__: + print >>hyperdb.DEBUG, 'getclassdb', (self, classname, mode) return self._opendb('nodes.%s'%classname, mode) def _opendb(self, name, mode): '''Low-level database opener that gets around anydbm/dbm eccentricities. ''' - if DEBUG: - print '_opendb', (self, name, mode) + if __debug__: + print >>hyperdb.DEBUG, '_opendb', (self, name, mode) + # determine which DB wrote the class file db_type = '' path = os.path.join(os.getcwd(), self.dir, name) @@ -143,8 +162,8 @@ class Database(hyperdb.Database): # new database? let anydbm pick the best dbm if not db_type: - if DEBUG: - print "_opendb anydbm.open(%r, 'n')"%path + if __debug__: + print >>hyperdb.DEBUG, "_opendb anydbm.open(%r, 'n')"%path return anydbm.open(path, 'n') # open the database with the correct module @@ -153,19 +172,45 @@ class Database(hyperdb.Database): except ImportError: raise hyperdb.DatabaseError, \ "Couldn't open database - the required module '%s'"\ - "is not available"%db_type - if DEBUG: - print "_opendb %r.open(%r, %r)"%(db_type, path, mode) + " is not available"%db_type + if __debug__: + print >>hyperdb.DEBUG, "_opendb %r.open(%r, %r)"%(db_type, path, + mode) return dbm.open(path, mode) + def _lockdb(self, name): + ''' Lock a database file + ''' + path = os.path.join(os.getcwd(), self.dir, '%s.lock'%name) + return acquire_lock(path) + + # + # Node IDs + # + def newid(self, classname): + ''' Generate a new id for the given class + ''' + # open the ids DB - create if if doesn't exist + lock = self._lockdb('_ids') + db = self._opendb('_ids', 'c') + if db.has_key(classname): + newid = db[classname] = str(int(db[classname]) + 1) + else: + # the count() bit is transitional - older dbs won't start at 1 + newid = str(self.getclass(classname).count()+1) + db[classname] = newid + db.close() + release_lock(lock) + return newid + # # Nodes # def addnode(self, classname, nodeid, node): ''' add the specified node to its class's db ''' - if DEBUG: - print 'addnode', (self, classname, nodeid, node) + if __debug__: + print >>hyperdb.DEBUG, 'addnode', (self, classname, nodeid, node) self.newnodes.setdefault(classname, {})[nodeid] = 1 self.cache.setdefault(classname, {})[nodeid] = node self.savenode(classname, nodeid, node) @@ -173,9 +218,10 @@ class Database(hyperdb.Database): def setnode(self, classname, nodeid, node): ''' change the specified node ''' - if DEBUG: - print 'setnode', (self, classname, nodeid, node) + if __debug__: + print >>hyperdb.DEBUG, 'setnode', (self, classname, nodeid, node) self.dirtynodes.setdefault(classname, {})[nodeid] = 1 + # can't set without having already loaded the node self.cache[classname][nodeid] = node self.savenode(classname, nodeid, node) @@ -183,38 +229,59 @@ class Database(hyperdb.Database): def savenode(self, classname, nodeid, node): ''' perform the saving of data specified by the set/addnode ''' - if DEBUG: - print 'savenode', (self, classname, nodeid, node) + if __debug__: + print >>hyperdb.DEBUG, 'savenode', (self, classname, nodeid, node) self.transactions.append((self._doSaveNode, (classname, nodeid, node))) - def getnode(self, classname, nodeid, db=None): + def getnode(self, classname, nodeid, db=None, cache=1): ''' get a node from the database ''' - if DEBUG: - print 'getnode', (self, classname, nodeid, cldb) - # try the cache - cache = self.cache.setdefault(classname, {}) - if cache.has_key(nodeid): - return cache[nodeid] + if __debug__: + print >>hyperdb.DEBUG, 'getnode', (self, classname, nodeid, db) + if cache: + # try the cache + cache_dict = self.cache.setdefault(classname, {}) + if cache_dict.has_key(nodeid): + if __debug__: + print >>hyperdb.TRACE, 'get %s %s cached'%(classname, + nodeid) + return cache_dict[nodeid] + + if __debug__: + print >>hyperdb.TRACE, 'get %s %s'%(classname, nodeid) # get from the database and save in the cache if db is None: db = self.getclassdb(classname) if not db.has_key(nodeid): - raise IndexError, nodeid + raise IndexError, "no such %s %s"%(classname, nodeid) + + # decode res = marshal.loads(db[nodeid]) - cache[nodeid] = res + + # reverse the serialisation + res = self.unserialise(classname, res) + + # store off in the cache dict + if cache: + cache_dict[nodeid] = res + return res def hasnode(self, classname, nodeid, db=None): ''' determine if the database has a given node ''' - if DEBUG: - print 'hasnode', (self, classname, nodeid, cldb) + if __debug__: + print >>hyperdb.DEBUG, 'hasnode', (self, classname, nodeid, db) + # try the cache cache = self.cache.setdefault(classname, {}) if cache.has_key(nodeid): + if __debug__: + print >>hyperdb.TRACE, 'has %s %s cached'%(classname, nodeid) return 1 + if __debug__: + print >>hyperdb.TRACE, 'has %s %s'%(classname, nodeid) # not in the cache - check the database if db is None: @@ -223,8 +290,8 @@ class Database(hyperdb.Database): return res def countnodes(self, classname, db=None): - if DEBUG: - print 'countnodes', (self, classname, cldb) + if __debug__: + print >>hyperdb.DEBUG, 'countnodes', (self, classname, db) # include the new nodes not saved to the DB yet count = len(self.newnodes.get(classname, {})) @@ -235,8 +302,8 @@ class Database(hyperdb.Database): return count def getnodeids(self, classname, db=None): - if DEBUG: - print 'getnodeids', (self, classname, db) + if __debug__: + print >>hyperdb.DEBUG, 'getnodeids', (self, classname, db) # start off with the new nodes res = self.newnodes.get(classname, {}).keys() @@ -248,36 +315,7 @@ class Database(hyperdb.Database): # # Files - special node properties - # - def filename(self, classname, nodeid, property=None): - '''Determine what the filename for the given node and optionally property is. - ''' - # TODO: split into multiple files directories - if property: - return os.path.join(self.dir, 'files', '%s%s.%s'%(classname, - nodeid, property)) - else: - # roundupdb.FileClass never specified the property name, so don't include it - return os.path.join(self.dir, 'files', '%s%s'%(classname, - nodeid)) - - def storefile(self, classname, nodeid, property, content): - '''Store the content of the file in the database. The property may be None, in - which case the filename does not indicate which property is being saved. - ''' - name = self.filename(classname, nodeid, property) - open(name + '.tmp', 'wb').write(content) - self.transactions.append((self._doStoreFile, (name, ))) - - def getfile(self, classname, nodeid, property): - '''Store the content of the file in the database. - ''' - filename = self.filename(classname, nodeid, property) - try: - return open(filename, 'rb').read() - except: - return open(filename+'.tmp', 'rb').read() - + # inherited from FileStorage # # Journal @@ -290,16 +328,17 @@ class Database(hyperdb.Database): 'link' or 'unlink' -- 'params' is (classname, nodeid, propname) 'retire' -- 'params' is None ''' - if DEBUG: - print 'addjournal', (self, classname, nodeid, action, params) + if __debug__: + print >>hyperdb.DEBUG, 'addjournal', (self, classname, nodeid, + action, params) self.transactions.append((self._doSaveJournal, (classname, nodeid, action, params))) def getjournal(self, classname, nodeid): ''' get the journal for id ''' - if DEBUG: - print 'getjournal', (self, classname, nodeid) + if __debug__: + print >>hyperdb.DEBUG, 'getjournal', (self, classname, nodeid) # attempt to open the journal - in some rare cases, the journal may # not exist try: @@ -308,14 +347,67 @@ class Database(hyperdb.Database): if str(error) == "need 'c' or 'n' flag to open new db": return [] elif error.args[0] != 2: raise return [] - journal = marshal.loads(db[nodeid]) + try: + journal = marshal.loads(db[nodeid]) + except KeyError: + db.close() + raise KeyError, 'no such %s %s'%(classname, nodeid) + db.close() res = [] for entry in journal: - (nodeid, date_stamp, self.journaltag, action, params) = entry + (nodeid, date_stamp, user, action, params) = entry date_obj = date.Date(date_stamp) - res.append((nodeid, date_obj, self.journaltag, action, params)) + res.append((nodeid, date_obj, user, action, params)) return res + def pack(self, pack_before): + ''' delete all journal entries before 'pack_before' ''' + if __debug__: + print >>hyperdb.DEBUG, 'packjournal', (self, pack_before) + + pack_before = pack_before.get_tuple() + + classes = self.getclasses() + + # TODO: factor this out to method - we're already doing it in + # _opendb. + db_type = '' + path = os.path.join(os.getcwd(), self.dir, classes[0]) + if os.path.exists(path): + db_type = whichdb.whichdb(path) + if not db_type: + raise hyperdb.DatabaseError, "Couldn't identify database type" + elif os.path.exists(path+'.db'): + db_type = 'dbm' + + for classname in classes: + db_name = 'journals.%s'%classname + db = self._opendb(db_name, 'w') + + for key in db.keys(): + journal = marshal.loads(db[key]) + l = [] + last_set_entry = None + for entry in journal: + (nodeid, date_stamp, self.journaltag, action, + params) = entry + if date_stamp > pack_before or action == 'create': + l.append(entry) + elif action == 'set': + # grab the last set entry to keep information on + # activity + last_set_entry = entry + if last_set_entry: + date_stamp = last_set_entry[1] + # if the last set entry was made after the pack date + # then it is already in the list + if date_stamp < pack_before: + l.append(last_set_entry) + db[key] = marshal.dumps(l) + if db_type == 'gdbm': + db.reorganize() + db.close() + # # Basic transaction support @@ -323,16 +415,17 @@ class Database(hyperdb.Database): def commit(self): ''' Commit the current transactions. ''' - if DEBUG: - print 'commit', (self,) + if __debug__: + print >>hyperdb.DEBUG, 'commit', (self,) # TODO: lock the DB # keep a handle to all the database files opened self.databases = {} # now, do all the transactions + reindex = {} for method, args in self.transactions: - method(*args) + reindex[method(*args)] = 1 # now close all the database files for db in self.databases.values(): @@ -340,6 +433,14 @@ class Database(hyperdb.Database): del self.databases # TODO: unlock the DB + # reindex the nodes that request it + for classname, nodeid in filter(None, reindex.keys()): + print >>hyperdb.DEBUG, 'commit.reindex', (classname, nodeid) + self.getclass(classname).index(nodeid) + + # save the indexer state + self.indexer.save_index() + # all transactions committed, back to normal self.cache = {} self.dirtynodes = {} @@ -347,8 +448,9 @@ class Database(hyperdb.Database): self.transactions = [] def _doSaveNode(self, classname, nodeid, node): - if DEBUG: - print '_doSaveNode', (self, classname, nodeid, node) + if __debug__: + print >>hyperdb.DEBUG, '_doSaveNode', (self, classname, nodeid, + node) # get the database handle db_name = 'nodes.%s'%classname @@ -358,14 +460,23 @@ class Database(hyperdb.Database): db = self.databases[db_name] = self.getclassdb(classname, 'c') # now save the marshalled data - db[nodeid] = marshal.dumps(node) + db[nodeid] = marshal.dumps(self.serialise(classname, node)) + + # return the classname, nodeid so we reindex this content + return (classname, nodeid) def _doSaveJournal(self, classname, nodeid, action, params): - if DEBUG: - print '_doSaveJournal', (self, classname, nodeid, action, params) + # serialise first + if action in ('set', 'create'): + params = self.serialise(classname, params) + + # create the journal entry entry = (nodeid, date.Date().get_tuple(), self.journaltag, action, params) + if __debug__: + print >>hyperdb.DEBUG, '_doSaveJournal', entry + # get the database handle db_name = 'journals.%s'%classname if self.databases.has_key(db_name): @@ -375,26 +486,24 @@ class Database(hyperdb.Database): # now insert the journal entry if db.has_key(nodeid): + # append to existing s = db[nodeid] - l = marshal.loads(db[nodeid]) + l = marshal.loads(s) l.append(entry) else: l = [entry] - db[nodeid] = marshal.dumps(l) - def _doStoreFile(self, name, **databases): - # the file is currently ".tmp" - move it to its real name to commit - os.rename(name+".tmp", name) + db[nodeid] = marshal.dumps(l) def rollback(self): ''' Reverse all actions from the current transaction. ''' - if DEBUG: - print 'rollback', (self, ) + if __debug__: + print >>hyperdb.DEBUG, 'rollback', (self, ) for method, args in self.transactions: # delete temporary files if method == self._doStoreFile: - os.remove(args[0]+".tmp") + self._rollbackStoreFile(*args) self.cache = {} self.dirtynodes = {} self.newnodes = {} @@ -402,6 +511,145 @@ class Database(hyperdb.Database): # #$Log: not supported by cvs2svn $ +#Revision 1.42 2002/07/10 06:21:38 richard +#Be extra safe +# +#Revision 1.41 2002/07/10 00:21:45 richard +#explicit database closing +# +#Revision 1.40 2002/07/09 04:19:09 richard +#Added reindex command to roundup-admin. +#Fixed reindex on first access. +#Also fixed reindexing of entries that change. +# +#Revision 1.39 2002/07/09 03:02:52 richard +#More indexer work: +#- all String properties may now be indexed too. Currently there's a bit of +# "issue" specific code in the actual searching which needs to be +# addressed. In a nutshell: +# + pass 'indexme="yes"' as a String() property initialisation arg, eg: +# file = FileClass(db, "file", name=String(), type=String(), +# comment=String(indexme="yes")) +# + the comment will then be indexed and be searchable, with the results +# related back to the issue that the file is linked to +#- as a result of this work, the FileClass has a default MIME type that may +# be overridden in a subclass, or by the use of a "type" property as is +# done in the default templates. +#- the regeneration of the indexes (if necessary) is done once the schema is +# set up in the dbinit. +# +#Revision 1.38 2002/07/08 06:58:15 richard +#cleaned up the indexer code: +# - it splits more words out (much simpler, faster splitter) +# - removed code we'll never use (roundup.roundup_indexer has the full +# implementation, and replaces roundup.indexer) +# - only index text/plain and rfc822/message (ideas for other text formats to +# index are welcome) +# - added simple unit test for indexer. Needs more tests for regression. +# +#Revision 1.37 2002/06/20 23:52:35 richard +#More informative error message +# +#Revision 1.36 2002/06/19 03:07:19 richard +#Moved the file storage commit into blobfiles where it belongs. +# +#Revision 1.35 2002/05/25 07:16:24 rochecompaan +#Merged search_indexing-branch with HEAD +# +#Revision 1.34 2002/05/15 06:21:21 richard +# . node caching now works, and gives a small boost in performance +# +#As a part of this, I cleaned up the DEBUG output and implemented TRACE +#output (HYPERDBTRACE='file to trace to') with checkpoints at the start of +#CGI requests. Run roundup with python -O to skip all the DEBUG/TRACE stuff +#(using if __debug__ which is compiled out with -O) +# +#Revision 1.33 2002/04/24 10:38:26 rochecompaan +#All database files are now created group readable and writable. +# +#Revision 1.32 2002/04/15 23:25:15 richard +#. node ids are now generated from a lockable store - no more race conditions +# +#We're using the portalocker code by Jonathan Feinberg that was contributed +#to the ASPN Python cookbook. This gives us locking across Unix and Windows. +# +#Revision 1.31 2002/04/03 05:54:31 richard +#Fixed serialisation problem by moving the serialisation step out of the +#hyperdb.Class (get, set) into the hyperdb.Database. +# +#Also fixed htmltemplate after the showid changes I made yesterday. +# +#Unit tests for all of the above written. +# +#Revision 1.30.2.1 2002/04/03 11:55:57 rochecompaan +# . Added feature #526730 - search for messages capability +# +#Revision 1.30 2002/02/27 03:40:59 richard +#Ran it through pychecker, made fixes +# +#Revision 1.29 2002/02/25 14:34:31 grubert +# . use blobfiles in back_anydbm which is used in back_bsddb. +# change test_db as dirlist does not work for subdirectories. +# ATTENTION: blobfiles now creates subdirectories for files. +# +#Revision 1.28 2002/02/16 09:14:17 richard +# . #514854 ] History: "User" is always ticket creator +# +#Revision 1.27 2002/01/22 07:21:13 richard +#. fixed back_bsddb so it passed the journal tests +# +#... it didn't seem happy using the back_anydbm _open method, which is odd. +#Yet another occurrance of whichdb not being able to recognise older bsddb +#databases. Yadda yadda. Made the HYPERDBDEBUG stuff more sane in the +#process. +# +#Revision 1.26 2002/01/22 05:18:38 rochecompaan +#last_set_entry was referenced before assignment +# +#Revision 1.25 2002/01/22 05:06:08 rochecompaan +#We need to keep the last 'set' entry in the journal to preserve +#information on 'activity' for nodes. +# +#Revision 1.24 2002/01/21 16:33:20 rochecompaan +#You can now use the roundup-admin tool to pack the database +# +#Revision 1.23 2002/01/18 04:32:04 richard +#Rollback was breaking because a message hadn't actually been written to the file. Needs +#more investigation. +# +#Revision 1.22 2002/01/14 02:20:15 richard +# . changed all config accesses so they access either the instance or the +# config attriubute on the db. This means that all config is obtained from +# instance_config instead of the mish-mash of classes. This will make +# switching to a ConfigParser setup easier too, I hope. +# +#At a minimum, this makes migration a _little_ easier (a lot easier in the +#0.5.0 switch, I hope!) +# +#Revision 1.21 2002/01/02 02:31:38 richard +#Sorry for the huge checkin message - I was only intending to implement #496356 +#but I found a number of places where things had been broken by transactions: +# . modified ROUNDUPDBSENDMAILDEBUG to be SENDMAILDEBUG and hold a filename +# for _all_ roundup-generated smtp messages to be sent to. +# . the transaction cache had broken the roundupdb.Class set() reactors +# . newly-created author users in the mailgw weren't being committed to the db +# +#Stuff that made it into CHANGES.txt (ie. the stuff I was actually working +#on when I found that stuff :): +# . #496356 ] Use threading in messages +# . detectors were being registered multiple times +# . added tests for mailgw +# . much better attaching of erroneous messages in the mail gateway +# +#Revision 1.20 2001/12/18 15:30:34 rochecompaan +#Fixed bugs: +# . Fixed file creation and retrieval in same transaction in anydbm +# backend +# . Cgi interface now renders new issue after issue creation +# . Could not set issue status to resolved through cgi interface +# . Mail gateway was changing status back to 'chatting' if status was +# omitted as an argument +# #Revision 1.19 2001/12/17 03:52:48 richard #Implemented file store rollback. As a bonus, the hyperdb is now capable of #storing more than one file per node - if a property name is supplied,