Added reindex command to roundup-admin.

author richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>

Tue, 9 Jul 2002 04:19:09 +0000 (04:19 +0000)

committer richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>

Tue, 9 Jul 2002 04:19:09 +0000 (04:19 +0000)
author richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>
Tue, 9 Jul 2002 04:19:09 +0000 (04:19 +0000)
committer richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>
Tue, 9 Jul 2002 04:19:09 +0000 (04:19 +0000)
diff --git a/roundup/admin.py b/roundup/admin.py

index 481ca46882b93ad9f53ea72579b1526c7bf216db..e54ef7c609ebfbb4ddcf646311e0922bfa1b4c40 100644 (file)
--- a/roundup/admin.py
+++ b/roundup/admin.py
@@ -16,7 +16,7 @@
  # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
  # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  # 
-# $Id: admin.py,v 1.15 2002-06-17 23:14:44 richard Exp $
+# $Id: admin.py,v 1.16 2002-07-09 04:19:09 richard Exp $
  
  import sys, os, getpass, getopt, re, UserDict, shlex, shutil
  try:
@@ -964,6 +964,17 @@ Date format is "YYYY-MM-DD" eg:
          self.db.pack(pack_before)
          return 0
  
+    def do_reindex(self, args):
+        '''Usage: reindex
+        Re-generate an instance's search indexes.
+
+        This will re-generate the search indexes for an instance. This will
+        typically happen automatically.
+        '''
+        self.db.indexer.force_reindex()
+        self.db.reindex()
+        return 0
+
      def run_command(self, args):
          '''Run a single command
          '''
@@ -1114,6 +1125,9 @@ if __name__ == '__main__':
  
  #
  # $Log: not supported by cvs2svn $
+# Revision 1.15  2002/06/17 23:14:44  richard
+# . #569415 ] {version}
+#
  # Revision 1.14  2002/06/11 06:41:50  richard
  # Removed prompt for admin email in initialisation.
  #
diff --git a/roundup/backends/back_anydbm.py b/roundup/backends/back_anydbm.py

index d2473dbb8e387c18d745b2be35a59d3f80268220..b341c4432713ab004ff7276607c8c4b96e303287 100644 (file)
--- a/roundup/backends/back_anydbm.py
+++ b/roundup/backends/back_anydbm.py
@@ -15,7 +15,7 @@
  # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
  # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  # 
-#$Id: back_anydbm.py,v 1.39 2002-07-09 03:02:52 richard Exp $
+#$Id: back_anydbm.py,v 1.40 2002-07-09 04:19:09 richard Exp $
  '''
  This module defines a backend that saves the hyperdatabase in a database
  chosen by anydbm. It is guaranteed to always be available in python
@@ -69,8 +69,10 @@ class Database(FileStorage, hyperdb.Database):
      def post_init(self):
          """Called once the schema initialisation has finished."""
          # reindex the db if necessary
-        if not self.indexer.should_reindex():
-            return
+        if self.indexer.should_reindex():
+            self.reindex()
+
+    def reindex(self):
          for klass in self.classes.values():
              for nodeid in klass.list():
                  klass.index(nodeid)
@@ -507,6 +509,22 @@ class Database(FileStorage, hyperdb.Database):
  
  #
  #$Log: not supported by cvs2svn $
+#Revision 1.39  2002/07/09 03:02:52  richard
+#More indexer work:
+#- all String properties may now be indexed too. Currently there's a bit of
+#  "issue" specific code in the actual searching which needs to be
+#  addressed. In a nutshell:
+#  + pass 'indexme="yes"' as a String() property initialisation arg, eg:
+#        file = FileClass(db, "file", name=String(), type=String(),
+#            comment=String(indexme="yes"))
+#  + the comment will then be indexed and be searchable, with the results
+#    related back to the issue that the file is linked to
+#- as a result of this work, the FileClass has a default MIME type that may
+#  be overridden in a subclass, or by the use of a "type" property as is
+#  done in the default templates.
+#- the regeneration of the indexes (if necessary) is done once the schema is
+#  set up in the dbinit.
+#
  #Revision 1.38  2002/07/08 06:58:15  richard
  #cleaned up the indexer code:
  # - it splits more words out (much simpler, faster splitter)
diff --git a/roundup/cgi_client.py b/roundup/cgi_client.py

index 83380f6685b580444de4784ad87bca0fe29279d6..b471c1b206be9611455fc61719447ae5f0440447 100644 (file)
--- a/roundup/cgi_client.py
+++ b/roundup/cgi_client.py
@@ -15,7 +15,7 @@
  # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
  # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  # 
-# $Id: cgi_client.py,v 1.133 2002-07-08 15:32:05 gmcm Exp $
+# $Id: cgi_client.py,v 1.134 2002-07-09 04:19:09 richard Exp $
  
  __doc__ = """
  WWW request handler (also used in the stand-alone server).
@@ -26,7 +26,6 @@ import binascii, Cookie, time, random
  
  import roundupdb, htmltemplate, date, hyperdb, password
  from roundup.i18n import _
-from roundup.indexer import Indexer
  
  class Unauthorised(ValueError):
      pass
@@ -73,10 +72,6 @@ class Client:
              # someone gave us a non-int debug level, turn it off
              self.debug = 0
  
-        # used for searching the indexes
-        self.indexer = Indexer('%s/db'%instance.INSTANCE_HOME)
-
-
      def getuid(self):
          try:
              return self.db.user.lookup(self.user)
@@ -1459,6 +1454,10 @@ def parsePropsFromForm(db, cl, form, nodeid=0, num_re=re.compile('^\d+$')):
  
  #
  # $Log: not supported by cvs2svn $
+# Revision 1.133  2002/07/08 15:32:05  gmcm
+# Pagination of index pages.
+# New search form.
+#
  # Revision 1.132  2002/07/08 07:26:14  richard
  # ehem
  #
diff --git a/roundup/htmltemplate.py b/roundup/htmltemplate.py

index 2541ee348dc0bf0a8318f2b95e582859476765c4..65e9992c6d2e8f6fd3eb998285f573573ee3730b 100644 (file)
--- a/roundup/htmltemplate.py
+++ b/roundup/htmltemplate.py
@@ -15,7 +15,7 @@
  # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
  # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  # 
-# $Id: htmltemplate.py,v 1.95 2002-07-08 15:32:06 gmcm Exp $
+# $Id: htmltemplate.py,v 1.96 2002-07-09 04:19:09 richard Exp $
  
  __doc__ = """
  Template engine.
@@ -874,7 +874,7 @@ class IndexTemplate(TemplateFunctions):
              matches = None
              if nodeids is None:
                  if search_text != '':
-                    matches = self.client.indexer.search(
+                    matches = self.db.indexer.search(
                          search_text.split(' '), self.cl)
                  nodeids = self.cl.filter(matches, filterspec, sort, group)
              for nodeid in nodeids[startwith:startwith+pagesize]:
@@ -1237,6 +1237,10 @@ class NewItemTemplate(TemplateFunctions):
  
  #
  # $Log: not supported by cvs2svn $
+# Revision 1.95  2002/07/08 15:32:06  gmcm
+# Pagination of index pages.
+# New search form.
+#
  # Revision 1.94  2002/06/27 15:38:53  gmcm
  # Fix the cycles (a clear method, called after render, that removes
  # the bound methods from the globals dict).
diff --git a/roundup/indexer.py b/roundup/indexer.py

index d82560ce7d4ad2fb007ccf17c18384b030f93c5f..096b6c6dda28b589db9d29c62a2b510269f6aaf0 100644 (file)
--- a/roundup/indexer.py
+++ b/roundup/indexer.py
@@ -14,7 +14,7 @@
  #     that promote freedom, but obviously am giving up any rights
  #     to compel such.
  # 
-#$Id: indexer.py,v 1.4 2002-07-09 03:02:52 richard Exp $
+#$Id: indexer.py,v 1.5 2002-07-09 04:19:09 richard Exp $
  '''
  This module provides an indexer class, RoundupIndexer, that stores text
  indices in a roundup instance.  This class makes searching the content of
@@ -25,29 +25,35 @@ import os, shutil, re, mimetypes, marshal, zlib, errno
  class Indexer:
      ''' Indexes information from roundup's hyperdb to allow efficient
          searching.
+
+        Three structures are created by the indexer:
+          files   {identifier: (fileid, wordcount)}
+          words   {word: {fileid: count}}
+          fileids {fileid: identifier}
      '''
      def __init__(self, db_path):
-        indexdb_path = os.path.join(db_path, 'indexes')
-        self.indexdb = os.path.join(indexdb_path, 'index.db')
+        self.indexdb_path = os.path.join(db_path, 'indexes')
+        self.indexdb = os.path.join(self.indexdb_path, 'index.db')
          self.reindex = 0
          self.casesensitive = 0
          self.quiet = 9
  
          # see if we need to reindex because of a change in code
-        if (not os.path.exists(indexdb_path) or
-                not os.path.exists(os.path.join(indexdb_path, 'version'))):
+        if (not os.path.exists(self.indexdb_path) or
+                not os.path.exists(os.path.join(self.indexdb_path, 'version'))):
              # TODO: if the version file exists (in the future) we'll want to
              # check the value in it - for now the file itself is a flag
-            if os.path.exists(indexdb_path):
-                shutil.rmtree(indexdb_path)
-            os.makedirs(indexdb_path)
-            os.chmod(indexdb_path, 0775)
-            open(os.path.join(indexdb_path, 'version'), 'w').write('1\n')
-
-            # we need to reindex
-            self.reindex = 1
-        else:
-            self.reindex = 0
+            self.force_reindex()
+
+    def force_reindex(self):
+        '''Force a reindex condition
+        '''
+        if os.path.exists(self.indexdb_path):
+            shutil.rmtree(self.indexdb_path)
+        os.makedirs(self.indexdb_path)
+        os.chmod(self.indexdb_path, 0775)
+        open(os.path.join(self.indexdb_path, 'version'), 'w').write('1\n')
+        self.reindex = 1
  
      def should_reindex(self):
          '''Should we reindex?
@@ -61,16 +67,9 @@ class Indexer:
          # make sure the index is loaded
          self.load_index()
  
-        # Is file eligible for (re)indexing?
+        # remove old entries for this identifier
          if self.files.has_key(identifier):
-            # Reindexing enabled, cleanup dicts
-            if self.reindex:
-                self.purge_entry(identifier, self.files, self.words)
-            else:
-                # DO NOT reindex this file
-                if self.quiet < 5:
-                    print "Not reindexing", identifier
-                return 0
+            self.purge_entry(identifier)
  
          # split into words
          words = self.splitter(text, mime_type)
@@ -281,19 +280,20 @@ class Indexer:
              pickle_fh.write(zlib.compress(pickle_str))
              os.chmod(filename, 0664)
  
-    def purge_entry(self, fname, file_dct, word_dct):
+    def purge_entry(self, identifier):
          ''' Remove a file from file index and word index
          '''
-        try:        # The easy part, cleanup the file index
-            file_index = file_dct[fname]
-            del file_dct[fname]
-        except KeyError:
-            pass    # We'll assume we only encounter KeyError's
+        if not self.files.has_key(identifier):
+            return
+
+        file_index = self.files[identifier][0]
+        del self.files[identifier]
+        del self.fileids[file_index]
+
          # The much harder part, cleanup the word index
-        for word, occurs in word_dct.items():
+        for key, occurs in self.words.items():
              if occurs.has_key(file_index):
                  del occurs[file_index]
-                word_dct[word] = occurs
  
      def index_loaded(self):
          return (hasattr(self,'fileids') and hasattr(self,'files') and
@@ -301,6 +301,22 @@ class Indexer:
  
  #
  #$Log: not supported by cvs2svn $
+#Revision 1.4  2002/07/09 03:02:52  richard
+#More indexer work:
+#- all String properties may now be indexed too. Currently there's a bit of
+#  "issue" specific code in the actual searching which needs to be
+#  addressed. In a nutshell:
+#  + pass 'indexme="yes"' as a String() property initialisation arg, eg:
+#        file = FileClass(db, "file", name=String(), type=String(),
+#            comment=String(indexme="yes"))
+#  + the comment will then be indexed and be searchable, with the results
+#    related back to the issue that the file is linked to
+#- as a result of this work, the FileClass has a default MIME type that may
+#  be overridden in a subclass, or by the use of a "type" property as is
+#  done in the default templates.
+#- the regeneration of the indexes (if necessary) is done once the schema is
+#  set up in the dbinit.
+#
  #Revision 1.3  2002/07/08 06:58:15  richard
  #cleaned up the indexer code:
  # - it splits more words out (much simpler, faster splitter)
diff --git a/roundup/roundupdb.py b/roundup/roundupdb.py

index 03bde0d1d3c486942c016a466f872e3b0a2fcd0f..dc181a4f3a78e60c3fc67e02c68f528129ddbdda 100644 (file)
--- a/roundup/roundupdb.py
+++ b/roundup/roundupdb.py
@@ -15,7 +15,7 @@
  # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
  # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  # 
-# $Id: roundupdb.py,v 1.60 2002-07-09 03:02:52 richard Exp $
+# $Id: roundupdb.py,v 1.61 2002-07-09 04:19:09 richard Exp $
  
  __doc__ = """
  Extending hyperdb with types specific to issue-tracking.
@@ -313,7 +313,7 @@ class IssueClass(Class):
          dictionary attempts to specify any of these properties or a
          "creation" or "activity" property, a ValueError is raised."""
          if not properties.has_key('title'):
-            properties['title'] = hyperdb.String()
+            properties['title'] = hyperdb.String(indexme='yes')
          if not properties.has_key('messages'):
              properties['messages'] = hyperdb.Multilink("msg")
          if not properties.has_key('files'):
@@ -691,6 +691,22 @@ class IssueClass(Class):
  
  #
  # $Log: not supported by cvs2svn $
+# Revision 1.60  2002/07/09 03:02:52  richard
+# More indexer work:
+# - all String properties may now be indexed too. Currently there's a bit of
+#   "issue" specific code in the actual searching which needs to be
+#   addressed. In a nutshell:
+#   + pass 'indexme="yes"' as a String() property initialisation arg, eg:
+#         file = FileClass(db, "file", name=String(), type=String(),
+#             comment=String(indexme="yes"))
+#   + the comment will then be indexed and be searchable, with the results
+#     related back to the issue that the file is linked to
+# - as a result of this work, the FileClass has a default MIME type that may
+#   be overridden in a subclass, or by the use of a "type" property as is
+#   done in the default templates.
+# - the regeneration of the indexes (if necessary) is done once the schema is
+#   set up in the dbinit.
+#
  # Revision 1.59  2002/06/18 03:55:25  dman13
  # Fixed name/address display problem introduced by an earlier change.
  # (instead of "name<addr>" display "name <addr>")
diff --git a/test/test_db.py b/test/test_db.py

index 814c17a8274b080c949691c76ebc6a2714cdf971..e8a51b0b93f2a74050465a5b3bdb3899f49071c1 100644 (file)
--- a/test/test_db.py
+++ b/test/test_db.py
@@ -15,7 +15,7 @@
  # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
  # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  # 
-# $Id: test_db.py,v 1.24 2002-07-09 03:02:53 richard Exp $ 
+# $Id: test_db.py,v 1.25 2002-07-09 04:19:09 richard Exp $ 
  
  import unittest, os, shutil
  
@@ -314,8 +314,29 @@ class anydbmDBTestCase(MyTestCase):
              {'2': {}})
          self.assertEquals(self.db.indexer.search(['flebble'], self.db.issue),
              {'2': {}, '1': {}})
-        self.assertEquals(self.db.indexer.search(['blah'], self.db.issue),
-            {'1': {'files': ['2']}})
+
+    def testReindexing(self):
+        self.db.issue.create(title="frooz")
+        self.db.commit()
+        self.assertEquals(self.db.indexer.search(['frooz'], self.db.issue),
+            {'1': {}})
+        self.db.issue.set('1', title="dooble")
+        self.db.commit()
+        self.assertEquals(self.db.indexer.search(['dooble'], self.db.issue),
+            {'1': {}})
+        self.assertEquals(self.db.indexer.search(['frooz'], self.db.issue), {})
+
+    def testForcedReindexing(self):
+        self.db.issue.create(title="flebble frooz")
+        self.db.commit()
+        self.assertEquals(self.db.indexer.search(['flebble'], self.db.issue),
+            {'1': {}})
+        self.db.indexer.quiet = 1
+        self.db.indexer.force_reindex()
+        self.db.post_init()
+        self.db.indexer.quiet = 9
+        self.assertEquals(self.db.indexer.search(['flebble'], self.db.issue),
+            {'1': {}})
  
  class anydbmReadOnlyDBTestCase(MyTestCase):
      def setUp(self):
@@ -419,6 +440,22 @@ def suite():
  
  #
  # $Log: not supported by cvs2svn $
+# Revision 1.24  2002/07/09 03:02:53  richard
+# More indexer work:
+# - all String properties may now be indexed too. Currently there's a bit of
+#   "issue" specific code in the actual searching which needs to be
+#   addressed. In a nutshell:
+#   + pass 'indexme="yes"' as a String() property initialisation arg, eg:
+#         file = FileClass(db, "file", name=String(), type=String(),
+#             comment=String(indexme="yes"))
+#   + the comment will then be indexed and be searchable, with the results
+#     related back to the issue that the file is linked to
+# - as a result of this work, the FileClass has a default MIME type that may
+#   be overridden in a subclass, or by the use of a "type" property as is
+#   done in the default templates.
+# - the regeneration of the indexes (if necessary) is done once the schema is
+#   set up in the dbinit.
+#
  # Revision 1.23  2002/06/20 23:51:48  richard
  # Cleaned up the hyperdb tests
  #
author	richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>
	Tue, 9 Jul 2002 04:19:09 +0000 (04:19 +0000)
committer	richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>
	Tue, 9 Jul 2002 04:19:09 +0000 (04:19 +0000)
roundup/admin.py		patch \| blob \| history
roundup/backends/back_anydbm.py		patch \| blob \| history
roundup/cgi_client.py		patch \| blob \| history
roundup/htmltemplate.py		patch \| blob \| history
roundup/indexer.py		patch \| blob \| history
roundup/roundupdb.py		patch \| blob \| history
test/test_db.py		patch \| blob \| history