X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=roundup%2Fbackends%2Fback_anydbm.py;h=5f30b1ad6cffb3d2511a2d17070c84cdce4a9dd1;hb=e4c89796238027912e80acdab4c8c7a67fc21a9e;hp=eb2c143062432fb51bf410b10785a85dcd57cb8a;hpb=dbefb5b8876f101fc52733479352a0667d8ec62d;p=roundup.git diff --git a/roundup/backends/back_anydbm.py b/roundup/backends/back_anydbm.py index eb2c143..5f30b1a 100644 --- a/roundup/backends/back_anydbm.py +++ b/roundup/backends/back_anydbm.py @@ -15,20 +15,33 @@ # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE, # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # -#$Id: back_anydbm.py,v 1.11 2001-11-21 02:34:18 richard Exp $ +#$Id: back_anydbm.py,v 1.28 2002-02-16 09:14:17 richard Exp $ +''' +This module defines a backend that saves the hyperdatabase in a database +chosen by anydbm. It is guaranteed to always be available in python +versions >2.1.1 (the dumbdbm fallback in 2.1.1 and earlier has several +serious bugs, and is not available) +''' -import anydbm, os, marshal +import whichdb, anydbm, os, marshal from roundup import hyperdb, date, password # # Now the database # class Database(hyperdb.Database): - """A database for storing records containing flexible data types.""" + """A database for storing records containing flexible data types. - def __init__(self, storagelocator, journaltag=None): + Transaction stuff TODO: + . check the timestamp of the class file and nuke the cache if it's + modified. Do some sort of conflict checking on the dirty stuff. + . perhaps detect write collisions (related to above)? + + """ + def __init__(self, config, journaltag=None): """Open a hyperdatabase given a specifier to some storage. + The 'storagelocator' is obtained from config.DATABASE. The meaning of 'storagelocator' depends on the particular implementation of the hyperdatabase. It could be a file name, a directory path, a socket descriptor for a connection to a @@ -39,18 +52,31 @@ class Database(hyperdb.Database): None, the database is opened in read-only mode: the Class.create(), Class.set(), and Class.retire() methods are disabled. """ - self.dir, self.journaltag = storagelocator, journaltag + self.config, self.journaltag = config, journaltag + self.dir = config.DATABASE self.classes = {} + self.cache = {} # cache of nodes loaded or created + self.dirtynodes = {} # keep track of the dirty nodes by class + self.newnodes = {} # keep track of the new nodes by class self.transactions = [] + def __repr__(self): + return ''%id(self) + # # Classes # def __getattr__(self, classname): """A convenient way of calling self.getclass(classname).""" - return self.classes[classname] + if self.classes.has_key(classname): + if hyperdb.DEBUG: + print '__getattr__', (self, classname) + return self.classes[classname] + raise AttributeError, classname def addclass(self, cl): + if hyperdb.DEBUG: + print 'addclass', (self, cl) cn = cl.classname if self.classes.has_key(cn): raise ValueError, cn @@ -58,6 +84,8 @@ class Database(hyperdb.Database): def getclasses(self): """Return a list of the names of all existing classes.""" + if hyperdb.DEBUG: + print 'getclasses', (self,) l = self.classes.keys() l.sort() return l @@ -67,70 +95,192 @@ class Database(hyperdb.Database): If 'classname' is not a valid class name, a KeyError is raised. """ + if hyperdb.DEBUG: + print 'getclass', (self, classname) return self.classes[classname] # # Class DBs # def clear(self): + '''Delete all database contents + ''' + if hyperdb.DEBUG: + print 'clear', (self,) for cn in self.classes.keys(): - db = os.path.join(self.dir, 'nodes.%s'%cn) - anydbm.open(db, 'n') - db = os.path.join(self.dir, 'journals.%s'%cn) - anydbm.open(db, 'n') + for type in 'nodes', 'journals': + path = os.path.join(self.dir, 'journals.%s'%cn) + if os.path.exists(path): + os.remove(path) + elif os.path.exists(path+'.db'): # dbm appends .db + os.remove(path+'.db') def getclassdb(self, classname, mode='r'): ''' grab a connection to the class db that will be used for multiple actions ''' - path = os.path.join(os.getcwd(), self.dir, 'nodes.%s'%classname) + if hyperdb.DEBUG: + print 'getclassdb', (self, classname, mode) + return self._opendb('nodes.%s'%classname, mode) + + def _opendb(self, name, mode): + '''Low-level database opener that gets around anydbm/dbm + eccentricities. + ''' + if hyperdb.DEBUG: + print '_opendb', (self, name, mode) + # determine which DB wrote the class file + db_type = '' + path = os.path.join(os.getcwd(), self.dir, name) if os.path.exists(path): - return anydbm.open(path, mode) - else: + db_type = whichdb.whichdb(path) + if not db_type: + raise hyperdb.DatabaseError, "Couldn't identify database type" + elif os.path.exists(path+'.db'): + # if the path ends in '.db', it's a dbm database, whether + # anydbm says it's dbhash or not! + db_type = 'dbm' + + # new database? let anydbm pick the best dbm + if not db_type: + if hyperdb.DEBUG: + print "_opendb anydbm.open(%r, 'n')"%path return anydbm.open(path, 'n') + # open the database with the correct module + try: + dbm = __import__(db_type) + except ImportError: + raise hyperdb.DatabaseError, \ + "Couldn't open database - the required module '%s'"\ + "is not available"%db_type + if hyperdb.DEBUG: + print "_opendb %r.open(%r, %r)"%(db_type, path, mode) + return dbm.open(path, mode) + # # Nodes # def addnode(self, classname, nodeid, node): ''' add the specified node to its class's db ''' - db = self.getclassdb(classname, 'c') - # now save the marshalled data - db[nodeid] = marshal.dumps(node) - db.close() - setnode = addnode + if hyperdb.DEBUG: + print 'addnode', (self, classname, nodeid, node) + self.newnodes.setdefault(classname, {})[nodeid] = 1 + self.cache.setdefault(classname, {})[nodeid] = node + self.savenode(classname, nodeid, node) - def getnode(self, classname, nodeid, cldb=None): - ''' add the specified node to its class's db + def setnode(self, classname, nodeid, node): + ''' change the specified node ''' - db = cldb or self.getclassdb(classname) + if hyperdb.DEBUG: + print 'setnode', (self, classname, nodeid, node) + self.dirtynodes.setdefault(classname, {})[nodeid] = 1 + # can't set without having already loaded the node + self.cache[classname][nodeid] = node + self.savenode(classname, nodeid, node) + + def savenode(self, classname, nodeid, node): + ''' perform the saving of data specified by the set/addnode + ''' + if hyperdb.DEBUG: + print 'savenode', (self, classname, nodeid, node) + self.transactions.append((self._doSaveNode, (classname, nodeid, node))) + + def getnode(self, classname, nodeid, db=None, cache=1): + ''' get a node from the database + ''' + if hyperdb.DEBUG: + print 'getnode', (self, classname, nodeid, db) + if cache: + # try the cache + cache = self.cache.setdefault(classname, {}) + if cache.has_key(nodeid): + return cache[nodeid] + + # get from the database and save in the cache + if db is None: + db = self.getclassdb(classname) if not db.has_key(nodeid): - raise IndexError, nodeid + raise IndexError, "no such %s %s"%(classname, nodeid) res = marshal.loads(db[nodeid]) - if not cldb: db.close() + if cache: + cache[nodeid] = res return res - def hasnode(self, classname, nodeid, cldb=None): - ''' add the specified node to its class's db + def hasnode(self, classname, nodeid, db=None): + ''' determine if the database has a given node ''' - db = cldb or self.getclassdb(classname) + if hyperdb.DEBUG: + print 'hasnode', (self, classname, nodeid, db) + # try the cache + cache = self.cache.setdefault(classname, {}) + if cache.has_key(nodeid): + return 1 + + # not in the cache - check the database + if db is None: + db = self.getclassdb(classname) res = db.has_key(nodeid) - if not cldb: db.close() return res - def countnodes(self, classname, cldb=None): - db = cldb or self.getclassdb(classname) - return len(db.keys()) - if not cldb: db.close() - return res + def countnodes(self, classname, db=None): + if hyperdb.DEBUG: + print 'countnodes', (self, classname, db) + # include the new nodes not saved to the DB yet + count = len(self.newnodes.get(classname, {})) + + # and count those in the DB + if db is None: + db = self.getclassdb(classname) + count = count + len(db.keys()) + return count - def getnodeids(self, classname, cldb=None): - db = cldb or self.getclassdb(classname) - res = db.keys() - if not cldb: db.close() + def getnodeids(self, classname, db=None): + if hyperdb.DEBUG: + print 'getnodeids', (self, classname, db) + # start off with the new nodes + res = self.newnodes.get(classname, {}).keys() + + if db is None: + db = self.getclassdb(classname) + res = res + db.keys() return res + + # + # Files - special node properties + # + def filename(self, classname, nodeid, property=None): + '''Determine what the filename for the given node and optionally property is. + ''' + # TODO: split into multiple files directories + if property: + return os.path.join(self.dir, 'files', '%s%s.%s'%(classname, + nodeid, property)) + else: + # roundupdb.FileClass never specified the property name, so don't include it + return os.path.join(self.dir, 'files', '%s%s'%(classname, + nodeid)) + + def storefile(self, classname, nodeid, property, content): + '''Store the content of the file in the database. The property may be None, in + which case the filename does not indicate which property is being saved. + ''' + name = self.filename(classname, nodeid, property) + open(name + '.tmp', 'wb').write(content) + self.transactions.append((self._doStoreFile, (name, ))) + + def getfile(self, classname, nodeid, property): + '''Store the content of the file in the database. + ''' + filename = self.filename(classname, nodeid, property) + try: + return open(filename, 'rb').read() + except: + return open(filename+'.tmp', 'rb').read() + + # # Journal # @@ -142,45 +292,80 @@ class Database(hyperdb.Database): 'link' or 'unlink' -- 'params' is (classname, nodeid, propname) 'retire' -- 'params' is None ''' - entry = (nodeid, date.Date().get_tuple(), self.journaltag, action, - params) - db = anydbm.open(os.path.join(self.dir, 'journals.%s'%classname), 'c') - if db.has_key(nodeid): - s = db[nodeid] - l = marshal.loads(db[nodeid]) - l.append(entry) - else: - l = [entry] - db[nodeid] = marshal.dumps(l) - db.close() + if hyperdb.DEBUG: + print 'addjournal', (self, classname, nodeid, action, params) + self.transactions.append((self._doSaveJournal, (classname, nodeid, + action, params))) def getjournal(self, classname, nodeid): ''' get the journal for id ''' + if hyperdb.DEBUG: + print 'getjournal', (self, classname, nodeid) # attempt to open the journal - in some rare cases, the journal may # not exist try: - db = anydbm.open(os.path.join(self.dir, 'journals.%s'%classname), - 'r') - except anydbm.open, error: - if error.args[0] != 2: raise + db = self._opendb('journals.%s'%classname, 'r') + except anydbm.error, error: + if str(error) == "need 'c' or 'n' flag to open new db": return [] + elif error.args[0] != 2: raise return [] journal = marshal.loads(db[nodeid]) res = [] for entry in journal: - (nodeid, date_stamp, self.journaltag, action, params) = entry + (nodeid, date_stamp, user, action, params) = entry date_obj = date.Date(date_stamp) - res.append((nodeid, date_obj, self.journaltag, action, params)) - db.close() + res.append((nodeid, date_obj, user, action, params)) return res - def close(self): - ''' Close the Database - we must release the circular refs so that - we can be del'ed and the underlying anydbm connections closed - cleanly. - ''' - self.classes = {} + def pack(self, pack_before): + ''' delete all journal entries before 'pack_before' ''' + if hyperdb.DEBUG: + print 'packjournal', (self, pack_before) + + pack_before = pack_before.get_tuple() + + classes = self.getclasses() + + # TODO: factor this out to method - we're already doing it in + # _opendb. + db_type = '' + path = os.path.join(os.getcwd(), self.dir, classes[0]) + if os.path.exists(path): + db_type = whichdb.whichdb(path) + if not db_type: + raise hyperdb.DatabaseError, "Couldn't identify database type" + elif os.path.exists(path+'.db'): + db_type = 'dbm' + + for classname in classes: + db_name = 'journals.%s'%classname + db = self._opendb(db_name, 'w') + for key in db.keys(): + journal = marshal.loads(db[key]) + l = [] + last_set_entry = None + for entry in journal: + (nodeid, date_stamp, self.journaltag, action, + params) = entry + if date_stamp > pack_before or action == 'create': + l.append(entry) + elif action == 'set': + # grab the last set entry to keep information on + # activity + last_set_entry = entry + if last_set_entry: + date_stamp = last_set_entry[1] + # if the last set entry was made after the pack date + # then it is already in the list + if date_stamp < pack_before: + l.append(last_set_entry) + db[key] = marshal.dumps(l) + if db_type == 'gdbm': + db.reorganize() + db.close() + # # Basic transaction support @@ -188,19 +373,205 @@ class Database(hyperdb.Database): def commit(self): ''' Commit the current transactions. ''' - # lock the DB - for action, classname, entry in self.transactions: - # write the node, figure what's changed for the journal. - pass - # unlock the DB + if hyperdb.DEBUG: + print 'commit', (self,) + # TODO: lock the DB + + # keep a handle to all the database files opened + self.databases = {} + + # now, do all the transactions + for method, args in self.transactions: + method(*args) + + # now close all the database files + for db in self.databases.values(): + db.close() + del self.databases + # TODO: unlock the DB + + # all transactions committed, back to normal + self.cache = {} + self.dirtynodes = {} + self.newnodes = {} + self.transactions = [] + + def _doSaveNode(self, classname, nodeid, node): + if hyperdb.DEBUG: + print '_doSaveNode', (self, classname, nodeid, node) + + # get the database handle + db_name = 'nodes.%s'%classname + if self.databases.has_key(db_name): + db = self.databases[db_name] + else: + db = self.databases[db_name] = self.getclassdb(classname, 'c') + + # now save the marshalled data + db[nodeid] = marshal.dumps(node) + + def _doSaveJournal(self, classname, nodeid, action, params): + entry = (nodeid, date.Date().get_tuple(), self.journaltag, action, + params) + if hyperdb.DEBUG: + print '_doSaveJournal', entry + + # get the database handle + db_name = 'journals.%s'%classname + if self.databases.has_key(db_name): + db = self.databases[db_name] + else: + db = self.databases[db_name] = self._opendb(db_name, 'c') + + # now insert the journal entry + if db.has_key(nodeid): + s = db[nodeid] + l = marshal.loads(db[nodeid]) + l.append(entry) + else: + l = [entry] + db[nodeid] = marshal.dumps(l) + + def _doStoreFile(self, name, **databases): + # the file is currently ".tmp" - move it to its real name to commit + os.rename(name+".tmp", name) def rollback(self): ''' Reverse all actions from the current transaction. ''' + if hyperdb.DEBUG: + print 'rollback', (self, ) + for method, args in self.transactions: + # delete temporary files + if method == self._doStoreFile: + if os.path.exists(args[0]+".tmp"): + os.remove(args[0]+".tmp") + self.cache = {} + self.dirtynodes = {} + self.newnodes = {} self.transactions = [] # #$Log: not supported by cvs2svn $ +#Revision 1.27 2002/01/22 07:21:13 richard +#. fixed back_bsddb so it passed the journal tests +# +#... it didn't seem happy using the back_anydbm _open method, which is odd. +#Yet another occurrance of whichdb not being able to recognise older bsddb +#databases. Yadda yadda. Made the HYPERDBDEBUG stuff more sane in the +#process. +# +#Revision 1.26 2002/01/22 05:18:38 rochecompaan +#last_set_entry was referenced before assignment +# +#Revision 1.25 2002/01/22 05:06:08 rochecompaan +#We need to keep the last 'set' entry in the journal to preserve +#information on 'activity' for nodes. +# +#Revision 1.24 2002/01/21 16:33:20 rochecompaan +#You can now use the roundup-admin tool to pack the database +# +#Revision 1.23 2002/01/18 04:32:04 richard +#Rollback was breaking because a message hadn't actually been written to the file. Needs +#more investigation. +# +#Revision 1.22 2002/01/14 02:20:15 richard +# . changed all config accesses so they access either the instance or the +# config attriubute on the db. This means that all config is obtained from +# instance_config instead of the mish-mash of classes. This will make +# switching to a ConfigParser setup easier too, I hope. +# +#At a minimum, this makes migration a _little_ easier (a lot easier in the +#0.5.0 switch, I hope!) +# +#Revision 1.21 2002/01/02 02:31:38 richard +#Sorry for the huge checkin message - I was only intending to implement #496356 +#but I found a number of places where things had been broken by transactions: +# . modified ROUNDUPDBSENDMAILDEBUG to be SENDMAILDEBUG and hold a filename +# for _all_ roundup-generated smtp messages to be sent to. +# . the transaction cache had broken the roundupdb.Class set() reactors +# . newly-created author users in the mailgw weren't being committed to the db +# +#Stuff that made it into CHANGES.txt (ie. the stuff I was actually working +#on when I found that stuff :): +# . #496356 ] Use threading in messages +# . detectors were being registered multiple times +# . added tests for mailgw +# . much better attaching of erroneous messages in the mail gateway +# +#Revision 1.20 2001/12/18 15:30:34 rochecompaan +#Fixed bugs: +# . Fixed file creation and retrieval in same transaction in anydbm +# backend +# . Cgi interface now renders new issue after issue creation +# . Could not set issue status to resolved through cgi interface +# . Mail gateway was changing status back to 'chatting' if status was +# omitted as an argument +# +#Revision 1.19 2001/12/17 03:52:48 richard +#Implemented file store rollback. As a bonus, the hyperdb is now capable of +#storing more than one file per node - if a property name is supplied, +#the file is called designator.property. +#I decided not to migrate the existing files stored over to the new naming +#scheme - the FileClass just doesn't specify the property name. +# +#Revision 1.18 2001/12/16 10:53:38 richard +#take a copy of the node dict so that the subsequent set +#operation doesn't modify the oldvalues structure +# +#Revision 1.17 2001/12/14 23:42:57 richard +#yuck, a gdbm instance tests false :( +#I've left the debugging code in - it should be removed one day if we're ever +#_really_ anal about performace :) +# +#Revision 1.16 2001/12/12 03:23:14 richard +#Cor blimey this anydbm/whichdb stuff is yecchy. Turns out that whichdb +#incorrectly identifies a dbm file as a dbhash file on my system. This has +#been submitted to the python bug tracker as issue #491888: +#https://sourceforge.net/tracker/index.php?func=detail&aid=491888&group_id=5470&atid=105470 +# +#Revision 1.15 2001/12/12 02:30:51 richard +#I fixed the problems with people whose anydbm was using the dbm module at the +#backend. It turns out the dbm module modifies the file name to append ".db" +#and my check to determine if we're opening an existing or new db just +#tested os.path.exists() on the filename. Well, no longer! We now perform a +#much better check _and_ cope with the anydbm implementation module changing +#too! +#I also fixed the backends __init__ so only ImportError is squashed. +# +#Revision 1.14 2001/12/10 22:20:01 richard +#Enabled transaction support in the bsddb backend. It uses the anydbm code +#where possible, only replacing methods where the db is opened (it uses the +#btree opener specifically.) +#Also cleaned up some change note generation. +#Made the backends package work with pydoc too. +# +#Revision 1.13 2001/12/02 05:06:16 richard +#. We now use weakrefs in the Classes to keep the database reference, so +# the close() method on the database is no longer needed. +# I bumped the minimum python requirement up to 2.1 accordingly. +#. #487480 ] roundup-server +#. #487476 ] INSTALL.txt +# +#I also cleaned up the change message / post-edit stuff in the cgi client. +#There's now a clearly marked "TODO: append the change note" where I believe +#the change note should be added there. The "changes" list will obviously +#have to be modified to be a dict of the changes, or somesuch. +# +#More testing needed. +# +#Revision 1.12 2001/12/01 07:17:50 richard +#. We now have basic transaction support! Information is only written to +# the database when the commit() method is called. Only the anydbm +# backend is modified in this way - neither of the bsddb backends have been. +# The mail, admin and cgi interfaces all use commit (except the admin tool +# doesn't have a commit command, so interactive users can't commit...) +#. Fixed login/registration forwarding the user to the right page (or not, +# on a failure) +# +#Revision 1.11 2001/11/21 02:34:18 richard +#Added a target version field to the extended issue schema +# #Revision 1.10 2001/10/09 23:58:10 richard #Moved the data stringification up into the hyperdb.Class class' get, set #and create methods. This means that the data is also stringified for the