...except of course it's nice to use valid Python syntax

[roundup.git] / roundup / backends / back_anydbm.py
diff --git a/roundup/backends/back_anydbm.py b/roundup/backends/back_anydbm.py

index be4bd4ef639ad4603a555e117a4df405a96bea7d..4e1ee10f0642763492c2fb62f801d2fd948f0f79 100644 (file)
--- a/roundup/backends/back_anydbm.py
+++ b/roundup/backends/back_anydbm.py
@@ -15,7 +15,7 @@
  # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
  # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  # 
-#$Id: back_anydbm.py,v 1.33 2002-04-24 10:38:26 rochecompaan Exp $
+#$Id: back_anydbm.py,v 1.43 2002-07-10 06:30:30 richard Exp $
  '''
  This module defines a backend that saves the hyperdatabase in a database
  chosen by anydbm. It is guaranteed to always be available in python
@@ -26,6 +26,7 @@ serious bugs, and is not available)
  import whichdb, anydbm, os, marshal
  from roundup import hyperdb, date
  from blobfiles import FileStorage
+from roundup.indexer import Indexer
  from locking import acquire_lock, release_lock
  
  #
@@ -61,9 +62,22 @@ class Database(FileStorage, hyperdb.Database):
          self.dirtynodes = {}    # keep track of the dirty nodes by class
          self.newnodes = {}      # keep track of the new nodes by class
          self.transactions = []
+        self.indexer = Indexer(self.dir)
          # ensure files are group readable and writable
          os.umask(0002)
  
+    def post_init(self):
+        """Called once the schema initialisation has finished."""
+        # reindex the db if necessary
+        if self.indexer.should_reindex():
+            self.reindex()
+
+    def reindex(self):
+        for klass in self.classes.values():
+            for nodeid in klass.list():
+                klass.index(nodeid)
+        self.indexer.save_index()
+
      def __repr__(self):
          return '<back_anydbm instance at %x>'%id(self) 
  
@@ -73,14 +87,14 @@ class Database(FileStorage, hyperdb.Database):
      def __getattr__(self, classname):
          """A convenient way of calling self.getclass(classname)."""
          if self.classes.has_key(classname):
-            if hyperdb.DEBUG:
-                print '__getattr__', (self, classname)
+            if __debug__:
+                print >>hyperdb.DEBUG, '__getattr__', (self, classname)
              return self.classes[classname]
          raise AttributeError, classname
  
      def addclass(self, cl):
-        if hyperdb.DEBUG:
-            print 'addclass', (self, cl)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'addclass', (self, cl)
          cn = cl.classname
          if self.classes.has_key(cn):
              raise ValueError, cn
@@ -88,8 +102,8 @@ class Database(FileStorage, hyperdb.Database):
  
      def getclasses(self):
          """Return a list of the names of all existing classes."""
-        if hyperdb.DEBUG:
-            print 'getclasses', (self,)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'getclasses', (self,)
          l = self.classes.keys()
          l.sort()
          return l
@@ -99,8 +113,8 @@ class Database(FileStorage, hyperdb.Database):
  
          If 'classname' is not a valid class name, a KeyError is raised.
          """
-        if hyperdb.DEBUG:
-            print 'getclass', (self, classname)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'getclass', (self, classname)
          return self.classes[classname]
  
      #
@@ -109,8 +123,8 @@ class Database(FileStorage, hyperdb.Database):
      def clear(self):
          '''Delete all database contents
          '''
-        if hyperdb.DEBUG:
-            print 'clear', (self,)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'clear', (self,)
          for cn in self.classes.keys():
              for dummy in 'nodes', 'journals':
                  path = os.path.join(self.dir, 'journals.%s'%cn)
@@ -123,16 +137,16 @@ class Database(FileStorage, hyperdb.Database):
          ''' grab a connection to the class db that will be used for
              multiple actions
          '''
-        if hyperdb.DEBUG:
-            print 'getclassdb', (self, classname, mode)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'getclassdb', (self, classname, mode)
          return self._opendb('nodes.%s'%classname, mode)
  
      def _opendb(self, name, mode):
          '''Low-level database opener that gets around anydbm/dbm
             eccentricities.
          '''
-        if hyperdb.DEBUG:
-            print '_opendb', (self, name, mode)
+        if __debug__:
+            print >>hyperdb.DEBUG, '_opendb', (self, name, mode)
  
          # determine which DB wrote the class file
          db_type = ''
@@ -148,8 +162,8 @@ class Database(FileStorage, hyperdb.Database):
  
          # new database? let anydbm pick the best dbm
          if not db_type:
-            if hyperdb.DEBUG:
-                print "_opendb anydbm.open(%r, 'n')"%path
+            if __debug__:
+                print >>hyperdb.DEBUG, "_opendb anydbm.open(%r, 'n')"%path
              return anydbm.open(path, 'n')
  
          # open the database with the correct module
@@ -158,9 +172,10 @@ class Database(FileStorage, hyperdb.Database):
          except ImportError:
              raise hyperdb.DatabaseError, \
                  "Couldn't open database - the required module '%s'"\
-                "is not available"%db_type
-        if hyperdb.DEBUG:
-            print "_opendb %r.open(%r, %r)"%(db_type, path, mode)
+                " is not available"%db_type
+        if __debug__:
+            print >>hyperdb.DEBUG, "_opendb %r.open(%r, %r)"%(db_type, path,
+                mode)
          return dbm.open(path, mode)
  
      def _lockdb(self, name):
@@ -194,8 +209,8 @@ class Database(FileStorage, hyperdb.Database):
      def addnode(self, classname, nodeid, node):
          ''' add the specified node to its class's db
          '''
-        if hyperdb.DEBUG:
-            print 'addnode', (self, classname, nodeid, node)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'addnode', (self, classname, nodeid, node)
          self.newnodes.setdefault(classname, {})[nodeid] = 1
          self.cache.setdefault(classname, {})[nodeid] = node
          self.savenode(classname, nodeid, node)
@@ -203,8 +218,8 @@ class Database(FileStorage, hyperdb.Database):
      def setnode(self, classname, nodeid, node):
          ''' change the specified node
          '''
-        if hyperdb.DEBUG:
-            print 'setnode', (self, classname, nodeid, node)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'setnode', (self, classname, nodeid, node)
          self.dirtynodes.setdefault(classname, {})[nodeid] = 1
  
          # can't set without having already loaded the node
@@ -214,20 +229,26 @@ class Database(FileStorage, hyperdb.Database):
      def savenode(self, classname, nodeid, node):
          ''' perform the saving of data specified by the set/addnode
          '''
-        if hyperdb.DEBUG:
-            print 'savenode', (self, classname, nodeid, node)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'savenode', (self, classname, nodeid, node)
          self.transactions.append((self._doSaveNode, (classname, nodeid, node)))
  
      def getnode(self, classname, nodeid, db=None, cache=1):
          ''' get a node from the database
          '''
-        if hyperdb.DEBUG:
-            print 'getnode', (self, classname, nodeid, db)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'getnode', (self, classname, nodeid, db)
          if cache:
              # try the cache
-            cache = self.cache.setdefault(classname, {})
-            if cache.has_key(nodeid):
-                return cache[nodeid]
+            cache_dict = self.cache.setdefault(classname, {})
+            if cache_dict.has_key(nodeid):
+                if __debug__:
+                    print >>hyperdb.TRACE, 'get %s %s cached'%(classname,
+                        nodeid)
+                return cache_dict[nodeid]
+
+        if __debug__:
+            print >>hyperdb.TRACE, 'get %s %s'%(classname, nodeid)
  
          # get from the database and save in the cache
          if db is None:
@@ -241,21 +262,26 @@ class Database(FileStorage, hyperdb.Database):
          # reverse the serialisation
          res = self.unserialise(classname, res)
  
-        # store off in the cache
+        # store off in the cache dict
          if cache:
-            cache[nodeid] = res
+            cache_dict[nodeid] = res
  
          return res
  
      def hasnode(self, classname, nodeid, db=None):
          ''' determine if the database has a given node
          '''
-        if hyperdb.DEBUG:
-            print 'hasnode', (self, classname, nodeid, db)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'hasnode', (self, classname, nodeid, db)
+
          # try the cache
          cache = self.cache.setdefault(classname, {})
          if cache.has_key(nodeid):
+            if __debug__:
+                print >>hyperdb.TRACE, 'has %s %s cached'%(classname, nodeid)
              return 1
+        if __debug__:
+            print >>hyperdb.TRACE, 'has %s %s'%(classname, nodeid)
  
          # not in the cache - check the database
          if db is None:
@@ -264,8 +290,8 @@ class Database(FileStorage, hyperdb.Database):
          return res
  
      def countnodes(self, classname, db=None):
-        if hyperdb.DEBUG:
-            print 'countnodes', (self, classname, db)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'countnodes', (self, classname, db)
          # include the new nodes not saved to the DB yet
          count = len(self.newnodes.get(classname, {}))
  
@@ -276,8 +302,8 @@ class Database(FileStorage, hyperdb.Database):
          return count
  
      def getnodeids(self, classname, db=None):
-        if hyperdb.DEBUG:
-            print 'getnodeids', (self, classname, db)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'getnodeids', (self, classname, db)
          # start off with the new nodes
          res = self.newnodes.get(classname, {}).keys()
  
@@ -302,16 +328,17 @@ class Database(FileStorage, hyperdb.Database):
              'link' or 'unlink' -- 'params' is (classname, nodeid, propname)
              'retire' -- 'params' is None
          '''
-        if hyperdb.DEBUG:
-            print 'addjournal', (self, classname, nodeid, action, params)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'addjournal', (self, classname, nodeid,
+                action, params)
          self.transactions.append((self._doSaveJournal, (classname, nodeid,
              action, params)))
  
      def getjournal(self, classname, nodeid):
          ''' get the journal for id
          '''
-        if hyperdb.DEBUG:
-            print 'getjournal', (self, classname, nodeid)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'getjournal', (self, classname, nodeid)
          # attempt to open the journal - in some rare cases, the journal may
          # not exist
          try:
@@ -320,7 +347,12 @@ class Database(FileStorage, hyperdb.Database):
              if str(error) == "need 'c' or 'n' flag to open new db": return []
              elif error.args[0] != 2: raise
              return []
-        journal = marshal.loads(db[nodeid])
+        try:
+            journal = marshal.loads(db[nodeid])
+        except KeyError:
+            db.close()
+            raise KeyError, 'no such %s %s'%(classname, nodeid)
+        db.close()
          res = []
          for entry in journal:
              (nodeid, date_stamp, user, action, params) = entry
@@ -330,8 +362,8 @@ class Database(FileStorage, hyperdb.Database):
  
      def pack(self, pack_before):
          ''' delete all journal entries before 'pack_before' '''
-        if hyperdb.DEBUG:
-            print 'packjournal', (self, pack_before)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'packjournal', (self, pack_before)
  
          pack_before = pack_before.get_tuple()
  
@@ -383,16 +415,17 @@ class Database(FileStorage, hyperdb.Database):
      def commit(self):
          ''' Commit the current transactions.
          '''
-        if hyperdb.DEBUG:
-            print 'commit', (self,)
+        if __debug__:
+            print >>hyperdb.DEBUG, 'commit', (self,)
          # TODO: lock the DB
  
          # keep a handle to all the database files opened
          self.databases = {}
  
          # now, do all the transactions
+        reindex = {}
          for method, args in self.transactions:
-            method(*args)
+            reindex[method(*args)] = 1
  
          # now close all the database files
          for db in self.databases.values():
@@ -400,6 +433,14 @@ class Database(FileStorage, hyperdb.Database):
          del self.databases
          # TODO: unlock the DB
  
+        # reindex the nodes that request it
+        for classname, nodeid in filter(None, reindex.keys()):
+            print >>hyperdb.DEBUG, 'commit.reindex', (classname, nodeid)
+            self.getclass(classname).index(nodeid)
+
+        # save the indexer state
+        self.indexer.save_index()
+
          # all transactions committed, back to normal
          self.cache = {}
          self.dirtynodes = {}
@@ -407,8 +448,9 @@ class Database(FileStorage, hyperdb.Database):
          self.transactions = []
  
      def _doSaveNode(self, classname, nodeid, node):
-        if hyperdb.DEBUG:
-            print '_doSaveNode', (self, classname, nodeid, node)
+        if __debug__:
+            print >>hyperdb.DEBUG, '_doSaveNode', (self, classname, nodeid,
+                node)
  
          # get the database handle
          db_name = 'nodes.%s'%classname
@@ -420,6 +462,9 @@ class Database(FileStorage, hyperdb.Database):
          # now save the marshalled data
          db[nodeid] = marshal.dumps(self.serialise(classname, node))
  
+        # return the classname, nodeid so we reindex this content
+        return (classname, nodeid)
+
      def _doSaveJournal(self, classname, nodeid, action, params):
          # serialise first
          if action in ('set', 'create'):
@@ -429,8 +474,8 @@ class Database(FileStorage, hyperdb.Database):
          entry = (nodeid, date.Date().get_tuple(), self.journaltag, action,
              params)
  
-        if hyperdb.DEBUG:
-            print '_doSaveJournal', entry
+        if __debug__:
+            print >>hyperdb.DEBUG, '_doSaveJournal', entry
  
          # get the database handle
          db_name = 'journals.%s'%classname
@@ -450,20 +495,15 @@ class Database(FileStorage, hyperdb.Database):
  
          db[nodeid] = marshal.dumps(l)
  
-    def _doStoreFile(self, name, **databases):
-        # the file is currently ".tmp" - move it to its real name to commit
-        os.rename(name+".tmp", name)
-
      def rollback(self):
          ''' Reverse all actions from the current transaction.
          '''
-        if hyperdb.DEBUG:
-            print 'rollback', (self, )
+        if __debug__:
+            print >>hyperdb.DEBUG, 'rollback', (self, )
          for method, args in self.transactions:
              # delete temporary files
              if method == self._doStoreFile:
-                if os.path.exists(args[0]+".tmp"):
-                    os.remove(args[0]+".tmp")
+                self._rollbackStoreFile(*args)
          self.cache = {}
          self.dirtynodes = {}
          self.newnodes = {}
@@ -471,6 +511,62 @@ class Database(FileStorage, hyperdb.Database):
  
  #
  #$Log: not supported by cvs2svn $
+#Revision 1.42  2002/07/10 06:21:38  richard
+#Be extra safe
+#
+#Revision 1.41  2002/07/10 00:21:45  richard
+#explicit database closing
+#
+#Revision 1.40  2002/07/09 04:19:09  richard
+#Added reindex command to roundup-admin.
+#Fixed reindex on first access.
+#Also fixed reindexing of entries that change.
+#
+#Revision 1.39  2002/07/09 03:02:52  richard
+#More indexer work:
+#- all String properties may now be indexed too. Currently there's a bit of
+#  "issue" specific code in the actual searching which needs to be
+#  addressed. In a nutshell:
+#  + pass 'indexme="yes"' as a String() property initialisation arg, eg:
+#        file = FileClass(db, "file", name=String(), type=String(),
+#            comment=String(indexme="yes"))
+#  + the comment will then be indexed and be searchable, with the results
+#    related back to the issue that the file is linked to
+#- as a result of this work, the FileClass has a default MIME type that may
+#  be overridden in a subclass, or by the use of a "type" property as is
+#  done in the default templates.
+#- the regeneration of the indexes (if necessary) is done once the schema is
+#  set up in the dbinit.
+#
+#Revision 1.38  2002/07/08 06:58:15  richard
+#cleaned up the indexer code:
+# - it splits more words out (much simpler, faster splitter)
+# - removed code we'll never use (roundup.roundup_indexer has the full
+#   implementation, and replaces roundup.indexer)
+# - only index text/plain and rfc822/message (ideas for other text formats to
+#   index are welcome)
+# - added simple unit test for indexer. Needs more tests for regression.
+#
+#Revision 1.37  2002/06/20 23:52:35  richard
+#More informative error message
+#
+#Revision 1.36  2002/06/19 03:07:19  richard
+#Moved the file storage commit into blobfiles where it belongs.
+#
+#Revision 1.35  2002/05/25 07:16:24  rochecompaan
+#Merged search_indexing-branch with HEAD
+#
+#Revision 1.34  2002/05/15 06:21:21  richard
+# . node caching now works, and gives a small boost in performance
+#
+#As a part of this, I cleaned up the DEBUG output and implemented TRACE
+#output (HYPERDBTRACE='file to trace to') with checkpoints at the start of
+#CGI requests. Run roundup with python -O to skip all the DEBUG/TRACE stuff
+#(using if __debug__ which is compiled out with -O)
+#
+#Revision 1.33  2002/04/24 10:38:26  rochecompaan
+#All database files are now created group readable and writable.
+#
  #Revision 1.32  2002/04/15 23:25:15  richard
  #. node ids are now generated from a lockable store - no more race conditions
  #
@@ -485,6 +581,9 @@ class Database(FileStorage, hyperdb.Database):
  #
  #Unit tests for all of the above written.
  #
+#Revision 1.30.2.1  2002/04/03 11:55:57  rochecompaan
+# . Added feature #526730 - search for messages capability
+#
  #Revision 1.30  2002/02/27 03:40:59  richard
  #Ran it through pychecker, made fixes
  #