. use blobfiles in back_anydbm which is used in back_bsddb.

[roundup.git] / roundup / backends / back_anydbm.py
diff --git a/roundup/backends/back_anydbm.py b/roundup/backends/back_anydbm.py

index 9f3124081171957316d70e5a67caf0f5064bac45..7c95841eeca0f54bd23b84f0d19f7d6ca65456be 100644 (file)
--- a/roundup/backends/back_anydbm.py
+++ b/roundup/backends/back_anydbm.py
@@ -15,20 +15,34 @@
  # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
  # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  # 
-#$Id: back_anydbm.py,v 1.8 2001-09-29 13:27:00 richard Exp $
+#$Id: back_anydbm.py,v 1.29 2002-02-25 14:34:31 grubert Exp $
+'''
+This module defines a backend that saves the hyperdatabase in a database
+chosen by anydbm. It is guaranteed to always be available in python
+versions >2.1.1 (the dumbdbm fallback in 2.1.1 and earlier has several
+serious bugs, and is not available)
+'''
  
-import anydbm, os, marshal
-from roundup import hyperdb, date
+import whichdb, anydbm, os, marshal
+from roundup import hyperdb, date, password
+from blobfiles import FileStorage
  
  #
  # Now the database
  #
-class Database(hyperdb.Database):
-    """A database for storing records containing flexible data types."""
+class Database(FileStorage, hyperdb.Database):
+    """A database for storing records containing flexible data types.
  
-    def __init__(self, storagelocator, journaltag=None):
+    Transaction stuff TODO:
+        . check the timestamp of the class file and nuke the cache if it's
+          modified. Do some sort of conflict checking on the dirty stuff.
+        . perhaps detect write collisions (related to above)?
+
+    """
+    def __init__(self, config, journaltag=None):
          """Open a hyperdatabase given a specifier to some storage.
  
+        The 'storagelocator' is obtained from config.DATABASE.
          The meaning of 'storagelocator' depends on the particular
          implementation of the hyperdatabase.  It could be a file name,
          a directory path, a socket descriptor for a connection to a
@@ -39,18 +53,31 @@ class Database(hyperdb.Database):
          None, the database is opened in read-only mode: the Class.create(),
          Class.set(), and Class.retire() methods are disabled.
          """
-        self.dir, self.journaltag = storagelocator, journaltag
+        self.config, self.journaltag = config, journaltag
+        self.dir = config.DATABASE
          self.classes = {}
+        self.cache = {}         # cache of nodes loaded or created
+        self.dirtynodes = {}    # keep track of the dirty nodes by class
+        self.newnodes = {}      # keep track of the new nodes by class
          self.transactions = []
  
+    def __repr__(self):
+        return '<back_anydbm instance at %x>'%id(self) 
+
      #
      # Classes
      #
      def __getattr__(self, classname):
          """A convenient way of calling self.getclass(classname)."""
-        return self.classes[classname]
+        if self.classes.has_key(classname):
+            if hyperdb.DEBUG:
+                print '__getattr__', (self, classname)
+            return self.classes[classname]
+        raise AttributeError, classname
  
      def addclass(self, cl):
+        if hyperdb.DEBUG:
+            print 'addclass', (self, cl)
          cn = cl.classname
          if self.classes.has_key(cn):
              raise ValueError, cn
@@ -58,6 +85,8 @@ class Database(hyperdb.Database):
  
      def getclasses(self):
          """Return a list of the names of all existing classes."""
+        if hyperdb.DEBUG:
+            print 'getclasses', (self,)
          l = self.classes.keys()
          l.sort()
          return l
@@ -67,89 +96,163 @@ class Database(hyperdb.Database):
  
          If 'classname' is not a valid class name, a KeyError is raised.
          """
+        if hyperdb.DEBUG:
+            print 'getclass', (self, classname)
          return self.classes[classname]
  
      #
      # Class DBs
      #
      def clear(self):
+        '''Delete all database contents
+        '''
+        if hyperdb.DEBUG:
+            print 'clear', (self,)
          for cn in self.classes.keys():
-            db = os.path.join(self.dir, 'nodes.%s'%cn)
-            anydbm.open(db, 'n')
-            db = os.path.join(self.dir, 'journals.%s'%cn)
-            anydbm.open(db, 'n')
+            for type in 'nodes', 'journals':
+                path = os.path.join(self.dir, 'journals.%s'%cn)
+                if os.path.exists(path):
+                    os.remove(path)
+                elif os.path.exists(path+'.db'):    # dbm appends .db
+                    os.remove(path+'.db')
  
      def getclassdb(self, classname, mode='r'):
          ''' grab a connection to the class db that will be used for
              multiple actions
          '''
-        path = os.path.join(os.getcwd(), self.dir, 'nodes.%s'%classname)
+        if hyperdb.DEBUG:
+            print 'getclassdb', (self, classname, mode)
+        return self._opendb('nodes.%s'%classname, mode)
+
+    def _opendb(self, name, mode):
+        '''Low-level database opener that gets around anydbm/dbm
+           eccentricities.
+        '''
+        if hyperdb.DEBUG:
+            print '_opendb', (self, name, mode)
+        # determine which DB wrote the class file
+        db_type = ''
+        path = os.path.join(os.getcwd(), self.dir, name)
          if os.path.exists(path):
-            return anydbm.open(path, mode)
-        else:
+            db_type = whichdb.whichdb(path)
+            if not db_type:
+                raise hyperdb.DatabaseError, "Couldn't identify database type"
+        elif os.path.exists(path+'.db'):
+            # if the path ends in '.db', it's a dbm database, whether
+            # anydbm says it's dbhash or not!
+            db_type = 'dbm'
+
+        # new database? let anydbm pick the best dbm
+        if not db_type:
+            if hyperdb.DEBUG:
+                print "_opendb anydbm.open(%r, 'n')"%path
              return anydbm.open(path, 'n')
  
+        # open the database with the correct module
+        try:
+            dbm = __import__(db_type)
+        except ImportError:
+            raise hyperdb.DatabaseError, \
+                "Couldn't open database - the required module '%s'"\
+                "is not available"%db_type
+        if hyperdb.DEBUG:
+            print "_opendb %r.open(%r, %r)"%(db_type, path, mode)
+        return dbm.open(path, mode)
+
      #
      # Nodes
      #
      def addnode(self, classname, nodeid, node):
          ''' add the specified node to its class's db
          '''
-        db = self.getclassdb(classname, 'c')
+        if hyperdb.DEBUG:
+            print 'addnode', (self, classname, nodeid, node)
+        self.newnodes.setdefault(classname, {})[nodeid] = 1
+        self.cache.setdefault(classname, {})[nodeid] = node
+        self.savenode(classname, nodeid, node)
  
-        # convert the instance data to builtin types
-        properties = self.classes[classname].properties
-        for key in properties.keys():
-            if isinstance(properties[key], hyperdb.Date):
-                node[key] = node[key].get_tuple()
-            elif isinstance(properties[key], hyperdb.Interval):
-                node[key] = node[key].get_tuple()
+    def setnode(self, classname, nodeid, node):
+        ''' change the specified node
+        '''
+        if hyperdb.DEBUG:
+            print 'setnode', (self, classname, nodeid, node)
+        self.dirtynodes.setdefault(classname, {})[nodeid] = 1
+        # can't set without having already loaded the node
+        self.cache[classname][nodeid] = node
+        self.savenode(classname, nodeid, node)
  
-        # now save the marshalled data
-        db[nodeid] = marshal.dumps(node)
-        db.close()
-    setnode = addnode
+    def savenode(self, classname, nodeid, node):
+        ''' perform the saving of data specified by the set/addnode
+        '''
+        if hyperdb.DEBUG:
+            print 'savenode', (self, classname, nodeid, node)
+        self.transactions.append((self._doSaveNode, (classname, nodeid, node)))
  
-    def getnode(self, classname, nodeid, cldb=None):
-        ''' add the specified node to its class's db
+    def getnode(self, classname, nodeid, db=None, cache=1):
+        ''' get a node from the database
          '''
-        db = cldb or self.getclassdb(classname)
+        if hyperdb.DEBUG:
+            print 'getnode', (self, classname, nodeid, db)
+        if cache:
+            # try the cache
+            cache = self.cache.setdefault(classname, {})
+            if cache.has_key(nodeid):
+                return cache[nodeid]
+
+        # get from the database and save in the cache
+        if db is None:
+            db = self.getclassdb(classname)
          if not db.has_key(nodeid):
-            raise IndexError, nodeid
+            raise IndexError, "no such %s %s"%(classname, nodeid)
          res = marshal.loads(db[nodeid])
-
-        # convert the marshalled data to instances
-        properties = self.classes[classname].properties
-        for key in properties.keys():
-            if key == self.RETIRED_FLAG: continue
-            if isinstance(properties[key], hyperdb.Date):
-                res[key] = date.Date(res[key])
-            elif isinstance(properties[key], hyperdb.Interval):
-                res[key] = date.Interval(res[key])
-
-        if not cldb: db.close()
+        if cache:
+            cache[nodeid] = res
          return res
  
-    def hasnode(self, classname, nodeid, cldb=None):
-        ''' add the specified node to its class's db
+    def hasnode(self, classname, nodeid, db=None):
+        ''' determine if the database has a given node
          '''
-        db = cldb or self.getclassdb(classname)
+        if hyperdb.DEBUG:
+            print 'hasnode', (self, classname, nodeid, db)
+        # try the cache
+        cache = self.cache.setdefault(classname, {})
+        if cache.has_key(nodeid):
+            return 1
+
+        # not in the cache - check the database
+        if db is None:
+            db = self.getclassdb(classname)
          res = db.has_key(nodeid)
-        if not cldb: db.close()
          return res
  
-    def countnodes(self, classname, cldb=None):
-        db = cldb or self.getclassdb(classname)
-        return len(db.keys())
-        if not cldb: db.close()
-        return res
+    def countnodes(self, classname, db=None):
+        if hyperdb.DEBUG:
+            print 'countnodes', (self, classname, db)
+        # include the new nodes not saved to the DB yet
+        count = len(self.newnodes.get(classname, {}))
+
+        # and count those in the DB
+        if db is None:
+            db = self.getclassdb(classname)
+        count = count + len(db.keys())
+        return count
+
+    def getnodeids(self, classname, db=None):
+        if hyperdb.DEBUG:
+            print 'getnodeids', (self, classname, db)
+        # start off with the new nodes
+        res = self.newnodes.get(classname, {}).keys()
  
-    def getnodeids(self, classname, cldb=None):
-        db = cldb or self.getclassdb(classname)
-        res = db.keys()
-        if not cldb: db.close()
+        if db is None:
+            db = self.getclassdb(classname)
+        res = res + db.keys()
          return res
  
+
+    #
+    # Files - special node properties
+    # inherited from FileStorage
+
      #
      # Journal
      #
@@ -161,45 +264,80 @@ class Database(hyperdb.Database):
              'link' or 'unlink' -- 'params' is (classname, nodeid, propname)
              'retire' -- 'params' is None
          '''
-        entry = (nodeid, date.Date().get_tuple(), self.journaltag, action,
-            params)
-        db = anydbm.open(os.path.join(self.dir, 'journals.%s'%classname), 'c')
-        if db.has_key(nodeid):
-            s = db[nodeid]
-            l = marshal.loads(db[nodeid])
-            l.append(entry)
-        else:
-            l = [entry]
-        db[nodeid] = marshal.dumps(l)
-        db.close()
+        if hyperdb.DEBUG:
+            print 'addjournal', (self, classname, nodeid, action, params)
+        self.transactions.append((self._doSaveJournal, (classname, nodeid,
+            action, params)))
  
      def getjournal(self, classname, nodeid):
          ''' get the journal for id
          '''
+        if hyperdb.DEBUG:
+            print 'getjournal', (self, classname, nodeid)
          # attempt to open the journal - in some rare cases, the journal may
          # not exist
          try:
-            db = anydbm.open(os.path.join(self.dir, 'journals.%s'%classname),
-                'r')
-        except anydbm.open, error:
-            if error.args[0] != 2: raise
+            db = self._opendb('journals.%s'%classname, 'r')
+        except anydbm.error, error:
+            if str(error) == "need 'c' or 'n' flag to open new db": return []
+            elif error.args[0] != 2: raise
              return []
          journal = marshal.loads(db[nodeid])
          res = []
          for entry in journal:
-            (nodeid, date_stamp, self.journaltag, action, params) = entry
+            (nodeid, date_stamp, user, action, params) = entry
              date_obj = date.Date(date_stamp)
-            res.append((nodeid, date_obj, self.journaltag, action, params))
-        db.close()
+            res.append((nodeid, date_obj, user, action, params))
          return res
  
-    def close(self):
-        ''' Close the Database - we must release the circular refs so that
-            we can be del'ed and the underlying anydbm connections closed
-            cleanly.
-        '''
-        self.classes = None
+    def pack(self, pack_before):
+        ''' delete all journal entries before 'pack_before' '''
+        if hyperdb.DEBUG:
+            print 'packjournal', (self, pack_before)
+
+        pack_before = pack_before.get_tuple()
+
+        classes = self.getclasses()
+
+        # TODO: factor this out to method - we're already doing it in
+        # _opendb.
+        db_type = ''
+        path = os.path.join(os.getcwd(), self.dir, classes[0])
+        if os.path.exists(path):
+            db_type = whichdb.whichdb(path)
+            if not db_type:
+                raise hyperdb.DatabaseError, "Couldn't identify database type"
+        elif os.path.exists(path+'.db'):
+            db_type = 'dbm'
+
+        for classname in classes:
+            db_name = 'journals.%s'%classname
+            db = self._opendb(db_name, 'w')
  
+            for key in db.keys():
+                journal = marshal.loads(db[key])
+                l = []
+                last_set_entry = None
+                for entry in journal:
+                    (nodeid, date_stamp, self.journaltag, action, 
+                        params) = entry
+                    if date_stamp > pack_before or action == 'create':
+                        l.append(entry)
+                    elif action == 'set':
+                        # grab the last set entry to keep information on
+                        # activity
+                        last_set_entry = entry
+                if last_set_entry:
+                    date_stamp = last_set_entry[1]
+                    # if the last set entry was made after the pack date
+                    # then it is already in the list
+                    if date_stamp < pack_before:
+                        l.append(last_set_entry)
+                db[key] = marshal.dumps(l)
+            if db_type == 'gdbm':
+                db.reorganize()
+            db.close()
+            
  
      #
      # Basic transaction support
@@ -207,19 +345,222 @@ class Database(hyperdb.Database):
      def commit(self):
          ''' Commit the current transactions.
          '''
-        # lock the DB
-        for action, classname, entry in self.transactions:
-            # write the node, figure what's changed for the journal.
-            pass
-        # unlock the DB
+        if hyperdb.DEBUG:
+            print 'commit', (self,)
+        # TODO: lock the DB
+
+        # keep a handle to all the database files opened
+        self.databases = {}
+
+        # now, do all the transactions
+        for method, args in self.transactions:
+            method(*args)
+
+        # now close all the database files
+        for db in self.databases.values():
+            db.close()
+        del self.databases
+        # TODO: unlock the DB
+
+        # all transactions committed, back to normal
+        self.cache = {}
+        self.dirtynodes = {}
+        self.newnodes = {}
+        self.transactions = []
+
+    def _doSaveNode(self, classname, nodeid, node):
+        if hyperdb.DEBUG:
+            print '_doSaveNode', (self, classname, nodeid, node)
+
+        # get the database handle
+        db_name = 'nodes.%s'%classname
+        if self.databases.has_key(db_name):
+            db = self.databases[db_name]
+        else:
+            db = self.databases[db_name] = self.getclassdb(classname, 'c')
+
+        # now save the marshalled data
+        db[nodeid] = marshal.dumps(node)
+
+    def _doSaveJournal(self, classname, nodeid, action, params):
+        entry = (nodeid, date.Date().get_tuple(), self.journaltag, action,
+            params)
+        if hyperdb.DEBUG:
+            print '_doSaveJournal', entry
+
+        # get the database handle
+        db_name = 'journals.%s'%classname
+        if self.databases.has_key(db_name):
+            db = self.databases[db_name]
+        else:
+            db = self.databases[db_name] = self._opendb(db_name, 'c')
+
+        # now insert the journal entry
+        if db.has_key(nodeid):
+            s = db[nodeid]
+            l = marshal.loads(db[nodeid])
+            l.append(entry)
+        else:
+            l = [entry]
+        db[nodeid] = marshal.dumps(l)
+
+    def _doStoreFile(self, name, **databases):
+        # the file is currently ".tmp" - move it to its real name to commit
+        os.rename(name+".tmp", name)
  
      def rollback(self):
          ''' Reverse all actions from the current transaction.
          '''
+        if hyperdb.DEBUG:
+            print 'rollback', (self, )
+        for method, args in self.transactions:
+            # delete temporary files
+            if method == self._doStoreFile:
+                if os.path.exists(args[0]+".tmp"):
+                    os.remove(args[0]+".tmp")
+        self.cache = {}
+        self.dirtynodes = {}
+        self.newnodes = {}
          self.transactions = []
  
  #
  #$Log: not supported by cvs2svn $
+#Revision 1.28  2002/02/16 09:14:17  richard
+# . #514854 ] History: "User" is always ticket creator
+#
+#Revision 1.27  2002/01/22 07:21:13  richard
+#. fixed back_bsddb so it passed the journal tests
+#
+#... it didn't seem happy using the back_anydbm _open method, which is odd.
+#Yet another occurrance of whichdb not being able to recognise older bsddb
+#databases. Yadda yadda. Made the HYPERDBDEBUG stuff more sane in the
+#process.
+#
+#Revision 1.26  2002/01/22 05:18:38  rochecompaan
+#last_set_entry was referenced before assignment
+#
+#Revision 1.25  2002/01/22 05:06:08  rochecompaan
+#We need to keep the last 'set' entry in the journal to preserve
+#information on 'activity' for nodes.
+#
+#Revision 1.24  2002/01/21 16:33:20  rochecompaan
+#You can now use the roundup-admin tool to pack the database
+#
+#Revision 1.23  2002/01/18 04:32:04  richard
+#Rollback was breaking because a message hadn't actually been written to the file. Needs
+#more investigation.
+#
+#Revision 1.22  2002/01/14 02:20:15  richard
+# . changed all config accesses so they access either the instance or the
+#   config attriubute on the db. This means that all config is obtained from
+#   instance_config instead of the mish-mash of classes. This will make
+#   switching to a ConfigParser setup easier too, I hope.
+#
+#At a minimum, this makes migration a _little_ easier (a lot easier in the
+#0.5.0 switch, I hope!)
+#
+#Revision 1.21  2002/01/02 02:31:38  richard
+#Sorry for the huge checkin message - I was only intending to implement #496356
+#but I found a number of places where things had been broken by transactions:
+# . modified ROUNDUPDBSENDMAILDEBUG to be SENDMAILDEBUG and hold a filename
+#   for _all_ roundup-generated smtp messages to be sent to.
+# . the transaction cache had broken the roundupdb.Class set() reactors
+# . newly-created author users in the mailgw weren't being committed to the db
+#
+#Stuff that made it into CHANGES.txt (ie. the stuff I was actually working
+#on when I found that stuff :):
+# . #496356 ] Use threading in messages
+# . detectors were being registered multiple times
+# . added tests for mailgw
+# . much better attaching of erroneous messages in the mail gateway
+#
+#Revision 1.20  2001/12/18 15:30:34  rochecompaan
+#Fixed bugs:
+# .  Fixed file creation and retrieval in same transaction in anydbm
+#    backend
+# .  Cgi interface now renders new issue after issue creation
+# .  Could not set issue status to resolved through cgi interface
+# .  Mail gateway was changing status back to 'chatting' if status was
+#    omitted as an argument
+#
+#Revision 1.19  2001/12/17 03:52:48  richard
+#Implemented file store rollback. As a bonus, the hyperdb is now capable of
+#storing more than one file per node - if a property name is supplied,
+#the file is called designator.property.
+#I decided not to migrate the existing files stored over to the new naming
+#scheme - the FileClass just doesn't specify the property name.
+#
+#Revision 1.18  2001/12/16 10:53:38  richard
+#take a copy of the node dict so that the subsequent set
+#operation doesn't modify the oldvalues structure
+#
+#Revision 1.17  2001/12/14 23:42:57  richard
+#yuck, a gdbm instance tests false :(
+#I've left the debugging code in - it should be removed one day if we're ever
+#_really_ anal about performace :)
+#
+#Revision 1.16  2001/12/12 03:23:14  richard
+#Cor blimey this anydbm/whichdb stuff is yecchy. Turns out that whichdb
+#incorrectly identifies a dbm file as a dbhash file on my system. This has
+#been submitted to the python bug tracker as issue #491888:
+#https://sourceforge.net/tracker/index.php?func=detail&aid=491888&group_id=5470&atid=105470
+#
+#Revision 1.15  2001/12/12 02:30:51  richard
+#I fixed the problems with people whose anydbm was using the dbm module at the
+#backend. It turns out the dbm module modifies the file name to append ".db"
+#and my check to determine if we're opening an existing or new db just
+#tested os.path.exists() on the filename. Well, no longer! We now perform a
+#much better check _and_ cope with the anydbm implementation module changing
+#too!
+#I also fixed the backends __init__ so only ImportError is squashed.
+#
+#Revision 1.14  2001/12/10 22:20:01  richard
+#Enabled transaction support in the bsddb backend. It uses the anydbm code
+#where possible, only replacing methods where the db is opened (it uses the
+#btree opener specifically.)
+#Also cleaned up some change note generation.
+#Made the backends package work with pydoc too.
+#
+#Revision 1.13  2001/12/02 05:06:16  richard
+#. We now use weakrefs in the Classes to keep the database reference, so
+#  the close() method on the database is no longer needed.
+#  I bumped the minimum python requirement up to 2.1 accordingly.
+#. #487480 ] roundup-server
+#. #487476 ] INSTALL.txt
+#
+#I also cleaned up the change message / post-edit stuff in the cgi client.
+#There's now a clearly marked "TODO: append the change note" where I believe
+#the change note should be added there. The "changes" list will obviously
+#have to be modified to be a dict of the changes, or somesuch.
+#
+#More testing needed.
+#
+#Revision 1.12  2001/12/01 07:17:50  richard
+#. We now have basic transaction support! Information is only written to
+#  the database when the commit() method is called. Only the anydbm
+#  backend is modified in this way - neither of the bsddb backends have been.
+#  The mail, admin and cgi interfaces all use commit (except the admin tool
+#  doesn't have a commit command, so interactive users can't commit...)
+#. Fixed login/registration forwarding the user to the right page (or not,
+#  on a failure)
+#
+#Revision 1.11  2001/11/21 02:34:18  richard
+#Added a target version field to the extended issue schema
+#
+#Revision 1.10  2001/10/09 23:58:10  richard
+#Moved the data stringification up into the hyperdb.Class class' get, set
+#and create methods. This means that the data is also stringified for the
+#journal call, and removes duplication of code from the backends. The
+#backend code now only sees strings.
+#
+#Revision 1.9  2001/10/09 07:25:59  richard
+#Added the Password property type. See "pydoc roundup.password" for
+#implementation details. Have updated some of the documentation too.
+#
+#Revision 1.8  2001/09/29 13:27:00  richard
+#CGI interfaces now spit up a top-level index of all the instances they can
+#serve.
+#
  #Revision 1.7  2001/08/12 06:32:36  richard
  #using isinstance(blah, Foo) now instead of isFooType
  #