From: richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>
Date: Tue, 9 Jul 2002 03:02:53 +0000 (+0000)
Subject: More indexer work:
X-Git-Url: https://git.tokkee.org/?a=commitdiff_plain;h=657d984178aa961818013d2ac0ab89dc81778a37;p=roundup.git

More indexer work:
- all String properties may now be indexed too. Currently there's a bit of
  "issue" specific code in the actual searching which needs to be
  addressed. In a nutshell:
  + pass 'indexme="yes"' as a String() property initialisation arg, eg:
        file = FileClass(db, "file", name=String(), type=String(),
            comment=String(indexme="yes"))
  + the comment will then be indexed and be searchable, with the results
    related back to the issue that the file is linked to
- as a result of this work, the FileClass has a default MIME type that may
  be overridden in a subclass, or by the use of a "type" property as is
  done in the default templates.
- the regeneration of the indexes (if necessary) is done once the schema is
  set up in the dbinit.


git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/trunk@839 57a73879-2fb5-44c3-a270-3262357dd7e2
---

diff --git a/CHANGES.txt b/CHANGES.txt
index 7b602e3..18e9f87 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -12,6 +12,19 @@ Fixed:
    - only index text/plain and rfc822/message (ideas for other text formats to
      index are welcome)
    - added simple unit test for indexer. Needs more tests for regression.
+   - all String properties may now be indexed too. Currently there's a bit of
+     "issue" specific code in the actual searching which needs to be
+     addressed. In a nutshell:
+     + pass 'indexme="yes"' as a String() property initialisation arg, eg:
+           file = FileClass(db, "file", name=String(), type=String(),
+               comment=String(indexme="yes"))
+     + the comment will then be indexed and be searchable, with the results
+       related back to the issue that the file is linked to
+   - as a result of this work, the FileClass has a default MIME type that may
+     be overridden in a subclass, or by the use of a "type" property as is
+     done in the default templates.
+   - the regeneration of the indexes (if necessary) is done once the schema is
+     set up in the dbinit.
  . made the unit tests run again - they were quite b0rken
 
 
diff --git a/doc/upgrading.txt b/doc/upgrading.txt
index f7aba78..2be9d30 100644
--- a/doc/upgrading.txt
+++ b/doc/upgrading.txt
@@ -7,6 +7,17 @@ accordingly.
 
 .. contents::
 
+Migrating from 0.4.x to 0.5.0
+=============================
+
+0.5.0 Configuration
+-------------------
+
+TODO: mention stuff about indexing
+TODO: mention that the dbinit needs the db.post_init() method call for
+reindexing
+
+
 Migrating from 0.4.1 to 0.4.2
 =============================
 
diff --git a/roundup/backends/back_anydbm.py b/roundup/backends/back_anydbm.py
index 5fb8b65..d2473db 100644
--- a/roundup/backends/back_anydbm.py
+++ b/roundup/backends/back_anydbm.py
@@ -15,7 +15,7 @@
 # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
 # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 # 
-#$Id: back_anydbm.py,v 1.38 2002-07-08 06:58:15 richard Exp $
+#$Id: back_anydbm.py,v 1.39 2002-07-09 03:02:52 richard Exp $
 '''
 This module defines a backend that saves the hyperdatabase in a database
 chosen by anydbm. It is guaranteed to always be available in python
@@ -66,6 +66,16 @@ class Database(FileStorage, hyperdb.Database):
         # ensure files are group readable and writable
         os.umask(0002)
 
+    def post_init(self):
+        """Called once the schema initialisation has finished."""
+        # reindex the db if necessary
+        if not self.indexer.should_reindex():
+            return
+        for klass in self.classes.values():
+            for nodeid in klass.list():
+                klass.index(nodeid)
+        self.indexer.save_index()
+
     def __repr__(self):
         return '<back_anydbm instance at %x>'%id(self) 
 
@@ -409,8 +419,9 @@ class Database(FileStorage, hyperdb.Database):
         self.databases = {}
 
         # now, do all the transactions
+        reindex = {}
         for method, args in self.transactions:
-            method(*args)
+            reindex[method(*args)] = 1
 
         # now close all the database files
         for db in self.databases.values():
@@ -418,6 +429,14 @@ class Database(FileStorage, hyperdb.Database):
         del self.databases
         # TODO: unlock the DB
 
+        # reindex the nodes that request it
+        for classname, nodeid in filter(None, reindex.keys()):
+            print >>hyperdb.DEBUG, 'commit.reindex', (classname, nodeid)
+            self.getclass(classname).index(nodeid)
+
+        # save the indexer state
+        self.indexer.save_index()
+
         # all transactions committed, back to normal
         self.cache = {}
         self.dirtynodes = {}
@@ -439,6 +458,9 @@ class Database(FileStorage, hyperdb.Database):
         # now save the marshalled data
         db[nodeid] = marshal.dumps(self.serialise(classname, node))
 
+        # return the classname, nodeid so we reindex this content
+        return (classname, nodeid)
+
     def _doSaveJournal(self, classname, nodeid, action, params):
         # serialise first
         if action in ('set', 'create'):
@@ -477,8 +499,7 @@ class Database(FileStorage, hyperdb.Database):
         for method, args in self.transactions:
             # delete temporary files
             if method == self._doStoreFile:
-                if os.path.exists(args[0]+".tmp"):
-                    os.remove(args[0]+".tmp")
+                self._rollbackStoreFile(*args)
         self.cache = {}
         self.dirtynodes = {}
         self.newnodes = {}
@@ -486,6 +507,15 @@ class Database(FileStorage, hyperdb.Database):
 
 #
 #$Log: not supported by cvs2svn $
+#Revision 1.38  2002/07/08 06:58:15  richard
+#cleaned up the indexer code:
+# - it splits more words out (much simpler, faster splitter)
+# - removed code we'll never use (roundup.roundup_indexer has the full
+#   implementation, and replaces roundup.indexer)
+# - only index text/plain and rfc822/message (ideas for other text formats to
+#   index are welcome)
+# - added simple unit test for indexer. Needs more tests for regression.
+#
 #Revision 1.37  2002/06/20 23:52:35  richard
 #More informative error message
 #
diff --git a/roundup/backends/blobfiles.py b/roundup/backends/blobfiles.py
index 86ff228..62da5fa 100644
--- a/roundup/backends/blobfiles.py
+++ b/roundup/backends/blobfiles.py
@@ -15,7 +15,7 @@
 # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
 # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 # 
-#$Id: blobfiles.py,v 1.5 2002-07-08 06:58:15 richard Exp $
+#$Id: blobfiles.py,v 1.6 2002-07-09 03:02:52 richard Exp $
 '''
 This module exports file storage for roundup backends.
 Files are stored into a directory hierarchy.
@@ -74,12 +74,19 @@ class FileStorage:
            None, in which case the filename does not indicate which property
            is being saved.
         '''
+        # determine the name of the file to write to
         name = self.filename(classname, nodeid, property)
+
+        # make sure the file storage dir exists
         if not os.path.exists(os.path.dirname(name)):
             os.makedirs(os.path.dirname(name))
+
+        # open the temp file for writing
         open(name + '.tmp', 'wb').write(content)
-        self.transactions.append((self._doStoreFile, (name, )))
 
+        # save off the commit action
+        self.transactions.append((self._doStoreFile, (classname, nodeid,
+            property)))
 
     def getfile(self, classname, nodeid, property):
         '''Get the content of the file in the database.
@@ -88,6 +95,7 @@ class FileStorage:
         try:
             return open(filename, 'rb').read()
         except:
+            # now try the temp pre-commit filename
             try:
                 return open(filename+'.tmp', 'rb').read()
             except:
@@ -101,15 +109,36 @@ class FileStorage:
         files_dir = os.path.join(self.dir, 'files')
         return files_in_dir(files_dir)
 
-    def _doStoreFile(self, name, **databases):
+    def _doStoreFile(self, classname, nodeid, property, **databases):
         '''Store the file as part of a transaction commit.
         '''
+        # determine the name of the file to write to
+        name = self.filename(classname, nodeid, property)
+
         # the file is currently ".tmp" - move it to its real name to commit
         os.rename(name+".tmp", name)
-        self.indexer.add_file(name)
-        self.indexer.save_index()
+
+        # return the classname, nodeid so we reindex this content
+        return (classname, nodeid)
+
+    def _rollbackStoreFile(self, classname, nodeid, property, **databases):
+        '''Remove the temp file as a part of a rollback
+        '''
+        # determine the name of the file to delete
+        name = self.filename(classname, nodeid, property)
+        if os.path.exists(name+".tmp"):
+            os.remove(name+".tmp")
 
 # $Log: not supported by cvs2svn $
+# Revision 1.5  2002/07/08 06:58:15  richard
+# cleaned up the indexer code:
+#  - it splits more words out (much simpler, faster splitter)
+#  - removed code we'll never use (roundup.roundup_indexer has the full
+#    implementation, and replaces roundup.indexer)
+#  - only index text/plain and rfc822/message (ideas for other text formats to
+#    index are welcome)
+#  - added simple unit test for indexer. Needs more tests for regression.
+#
 # Revision 1.4  2002/06/19 03:07:19  richard
 # Moved the file storage commit into blobfiles where it belongs.
 #
diff --git a/roundup/hyperdb.py b/roundup/hyperdb.py
index 874fcbe..efbc716 100644
--- a/roundup/hyperdb.py
+++ b/roundup/hyperdb.py
@@ -15,7 +15,7 @@
 # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
 # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 # 
-# $Id: hyperdb.py,v 1.70 2002-06-27 12:06:20 gmcm Exp $
+# $Id: hyperdb.py,v 1.71 2002-07-09 03:02:52 richard Exp $
 
 __doc__ = """
 Hyperdatabase implementation, especially field types.
@@ -56,6 +56,8 @@ del Sink
 #
 class String:
     """An object designating a String property."""
+    def __init__(self, indexme='no'):
+        self.indexme = indexme == 'yes'
     def __repr__(self):
         ' more useful for dumps '
         return '<%s>'%self.__class__
@@ -157,6 +159,10 @@ transaction.
         """
         raise NotImplementedError
 
+    def post_init(self):
+        """Called once the schema initialisation has finished."""
+        raise NotImplementedError
+
     def __getattr__(self, classname):
         """A convenient way of calling self.getclass(classname)."""
         raise NotImplementedError
@@ -1111,6 +1117,16 @@ class Class:
                 raise ValueError, key
         self.properties.update(properties)
 
+    def index(self, nodeid):
+        '''Add (or refresh) the node to search indexes
+        '''
+        # find all the String properties that have indexme
+        for prop, propclass in self.getprops().items():
+            if isinstance(propclass, String) and propclass.indexme:
+                # and index them under (classname, nodeid, property)
+                self.db.indexer.add_text((self.classname, nodeid, prop),
+                    str(self.get(nodeid, prop)))
+
 # XXX not in spec
 class Node:
     ''' A convenience wrapper for the given node
@@ -1169,6 +1185,9 @@ def Choice(name, db, *options):
 
 #
 # $Log: not supported by cvs2svn $
+# Revision 1.70  2002/06/27 12:06:20  gmcm
+# Improve an error message.
+#
 # Revision 1.69  2002/06/17 23:15:29  richard
 # Can debug to stdout now
 #
diff --git a/roundup/indexer.py b/roundup/indexer.py
index 8b2f615..d82560c 100644
--- a/roundup/indexer.py
+++ b/roundup/indexer.py
@@ -14,7 +14,7 @@
 #     that promote freedom, but obviously am giving up any rights
 #     to compel such.
 # 
-#$Id: indexer.py,v 1.3 2002-07-08 06:58:15 richard Exp $
+#$Id: indexer.py,v 1.4 2002-07-09 03:02:52 richard Exp $
 '''
 This module provides an indexer class, RoundupIndexer, that stores text
 indices in a roundup instance.  This class makes searching the content of
@@ -23,112 +23,44 @@ messages and text files possible.
 import os, shutil, re, mimetypes, marshal, zlib, errno
 
 class Indexer:
-    ''' Indexes messages and files.
-
-        This implements a new splitter based on re.findall '\w+' and the
-        add_othertext method.
+    ''' Indexes information from roundup's hyperdb to allow efficient
+        searching.
     '''
     def __init__(self, db_path):
         indexdb_path = os.path.join(db_path, 'indexes')
+        self.indexdb = os.path.join(indexdb_path, 'index.db')
+        self.reindex = 0
+        self.casesensitive = 0
+        self.quiet = 9
 
         # see if we need to reindex because of a change in code
-        if (os.path.exists(indexdb_path) and
+        if (not os.path.exists(indexdb_path) or
                 not os.path.exists(os.path.join(indexdb_path, 'version'))):
-            shutil.rmtree(indexdb_path)
-
-        # see if the index exists
-        index_exists = 0
-        if not os.path.exists(indexdb_path):
+            # TODO: if the version file exists (in the future) we'll want to
+            # check the value in it - for now the file itself is a flag
+            if os.path.exists(indexdb_path):
+                shutil.rmtree(indexdb_path)
             os.makedirs(indexdb_path)
             os.chmod(indexdb_path, 0775)
             open(os.path.join(indexdb_path, 'version'), 'w').write('1\n')
-        else:
-            index_exists = 1
 
-        # save off the path to the indexdb
-        self.indexdb = os.path.join(indexdb_path, 'index.db')
-        self.reindex = 0
-        self.casesensitive = 0
-        self.quiet = 9
-
-        if not index_exists:
-            # index everything
-            files_path = os.path.join(db_path, 'files')
-            self.add_files(dir=files_path)
-            self.save_index()
+            # we need to reindex
+            self.reindex = 1
+        else:
+            self.reindex = 0
 
-    # override add_files so it's a little smarter about file types
-    def add_files(self, dir):
-        if not hasattr(self, 'files'):
-            self.load_index()
-        os.path.walk(dir, self.walk_add_file, None)
-        # Rebuild the fileid index
-        self.fileids = {}
-        for fname in self.files.keys():
-            fileid = self.files[fname][0]
-            self.fileids[fileid] = fname
-
-    # override add_file so it can be a little smarter about determining the
-    # file type
-    def walk_add_file(self, arg, dname, names, ftype=None):
-        for name in names:
-            name = os.path.join(dname, name)
-            if os.path.isfile(name):
-                self.add_file(name)
-            elif os.path.isdir(name):
-                os.path.walk(name, self.walk_add_file, None)
-    def add_file(self, fname, ftype=None):
-        ''' Index the contents of a regular file
+    def should_reindex(self):
+        '''Should we reindex?
         '''
-        if not hasattr(self, 'files'):
-            self.load_index()
-        # Is file eligible for (re)indexing?
-        if self.files.has_key(fname):
-            if self.reindex:
-                # Reindexing enabled, cleanup dicts
-                self.purge_entry(fname, self.files, self.words)
-            else:
-                # DO NOT reindex this file
-                if self.quiet < 5:
-                    print "Skipping", fname
-                return 0
-
-        # guess the file type
-        if ftype is None:
-            ftype = mimetypes.guess_type(fname)
-
-        # read in the file
-        text = open(fname).read()
-        if self.quiet < 5: print "Indexing", fname
-        words = self.splitter(text, ftype)
-
-        # Find new file index, and assign it to filename
-        # (_TOP uses trick of negative to avoid conflict with file index)
-        self.files['_TOP'] = (self.files['_TOP'][0]-1, None)
-        file_index =  abs(self.files['_TOP'][0])
-        self.files[fname] = (file_index, len(words))
+        return self.reindex
 
-        filedict = {}
-        for word in words:
-            if filedict.has_key(word):
-                filedict[word] = filedict[word]+1
-            else:
-                filedict[word] = 1
-
-        for word in filedict.keys():
-            if self.words.has_key(word):
-                entry = self.words[word]
-            else:
-                entry = {}
-            entry[file_index] = filedict[word]
-            self.words[word] = entry
-
-    # NOTE: this method signature deviates from the one specified in
-    # indexer - I'm not entirely sure where it was expected to the text
-    # from otherwise...
-    def add_othertext(self, identifier, text):
-        ''' Add some text associated with the identifier
+    def add_text(self, identifier, text, mime_type='text/plain'):
+        ''' Add some text associated with the (classname, nodeid, property)
+            identifier.
         '''
+        # make sure the index is loaded
+        self.load_index()
+
         # Is file eligible for (re)indexing?
         if self.files.has_key(identifier):
             # Reindexing enabled, cleanup dicts
@@ -141,7 +73,7 @@ class Indexer:
                 return 0
 
         # split into words
-        words = self.splitter(text, 'text/plain')
+        words = self.splitter(text, mime_type)
 
         # Find new file index, and assign it to identifier
         # (_TOP uses trick of negative to avoid conflict with file index)
@@ -174,7 +106,7 @@ class Indexer:
     def splitter(self, text, ftype):
         ''' Split the contents of a text string into a list of 'words'
         '''
-        if ftype in ('text/plain', 'message/rfc822'):
+        if ftype == 'text/plain':
             words = self.text_splitter(text, self.casesensitive)
         else:
             return []
@@ -193,37 +125,49 @@ class Indexer:
         # place
         return re.findall(r'\b\w{2,25}\b', text)
 
-    def search(self, search_terms, klass):
-        ''' display search results
+    def search(self, search_terms, klass, ignore={},
+            dre=re.compile(r'([^\d]+)(\d+)')):
+        ''' Display search results looking for [search, terms] associated
+            with the hyperdb Class "klass". Ignore hits on {class: property}.
+
+            "dre" is a helper, not an argument.
         '''
+        # do the index lookup
         hits = self.find(search_terms)
-        links = []
-        nodeids = {}
+        if not hits:
+            return {}
+
+        # this is specific to "issue" klass ... eugh
         designator_propname = {'msg': 'messages', 'file': 'files'}
-        if hits:
-            hitcount = len(hits)
-            # build a dictionary of nodes and their associated messages
-            # and files
-            for hit in hits.keys():
-                filename = hits[hit].split('/')[-1]
-                for designator, propname in designator_propname.items():
-                    if not filename.startswith(designator):
-                        continue
-                    nodeid = filename[len(designator):]
-                    result = apply(klass.find, (), {propname:nodeid})
-                    if not result:
-                        continue
-
-                    id = str(result[0])
-                    if not nodeids.has_key(id):
-                        nodeids[id] = {}
-
-                    node_dict = nodeids[id]
-                    if not node_dict.has_key(propname):
-                        node_dict[propname] = [nodeid]
-                    elif node_dict.has_key(propname):
-                        node_dict[propname].append(nodeid)
 
+        # build a dictionary of nodes and their associated messages
+        # and files
+        nodeids = {}
+        for classname, nodeid, property in hits.values():
+            # skip this result if we don't care about this class/property
+            if ignore.has_key((classname, property)):
+                continue
+
+            # if it's a property on klass, it's easy
+            if classname == klass.classname:
+                if not nodeids.has_key(nodeid):
+                    nodeids[nodeid] = {}
+                continue
+
+            # it's a linked class - find the klass entries that are
+            # linked to it
+            linkprop = designator_propname[classname]
+            for resid in klass.find(**{linkprop: nodeid}):
+                resid = str(resid)
+                if not nodeids.has_key(id):
+                    nodeids[resid] = {}
+
+                # update the links for this klass nodeid
+                node_dict = nodeids[resid]
+                if not node_dict.has_key(linkprop):
+                    node_dict[linkprop] = [nodeid]
+                elif node_dict.has_key(linkprop):
+                    node_dict[linkprop].append(nodeid)
         return nodeids
 
     # we override this to ignore not 2 < word < 25 and also to fix a bug -
@@ -303,6 +247,9 @@ class Indexer:
         self.fileids = db['FILEIDS']
 
     def save_index(self):
+        # make sure we're loaded
+        self.load_index()
+
         # brutal space saver... delete all the small segments
         for segment in self.segments:
             try:
@@ -354,6 +301,15 @@ class Indexer:
 
 #
 #$Log: not supported by cvs2svn $
+#Revision 1.3  2002/07/08 06:58:15  richard
+#cleaned up the indexer code:
+# - it splits more words out (much simpler, faster splitter)
+# - removed code we'll never use (roundup.roundup_indexer has the full
+#   implementation, and replaces roundup.indexer)
+# - only index text/plain and rfc822/message (ideas for other text formats to
+#   index are welcome)
+# - added simple unit test for indexer. Needs more tests for regression.
+#
 #Revision 1.2  2002/05/25 07:16:24  rochecompaan
 #Merged search_indexing-branch with HEAD
 #
diff --git a/roundup/roundupdb.py b/roundup/roundupdb.py
index 3f5620b..03bde0d 100644
--- a/roundup/roundupdb.py
+++ b/roundup/roundupdb.py
@@ -15,7 +15,7 @@
 # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
 # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 # 
-# $Id: roundupdb.py,v 1.59 2002-06-18 03:55:25 dman13 Exp $
+# $Id: roundupdb.py,v 1.60 2002-07-09 03:02:52 richard Exp $
 
 __doc__ = """
 Extending hyperdb with types specific to issue-tracking.
@@ -227,6 +227,16 @@ class Class(hyperdb.Class):
             react(self.db, self, nodeid, oldvalues)
 
 class FileClass(Class):
+    '''This class defines a large chunk of data. To support this, it has a
+       mandatory String property "content" which is typically saved off
+       externally to the hyperdb.
+
+       The default MIME type of this data is defined by the
+       "default_mime_type" class attribute, which may be overridden by each
+       node if the class defines a "type" String property.
+    '''
+    default_mime_type = 'text/plain'
+
     def create(self, **propvalues):
         ''' snaffle the file propvalue and store in a file
         '''
@@ -264,6 +274,28 @@ class FileClass(Class):
             d['content'] = hyperdb.String()
         return d
 
+    def index(self, nodeid):
+        ''' Index the node in the search index.
+
+            We want to index the content in addition to the normal String
+            property indexing.
+        '''
+        # perform normal indexing
+        Class.index(self, nodeid)
+
+        # get the content to index
+        content = self.get(nodeid, 'content')
+
+        # figure the mime type
+        if self.properties.has_key('type'):
+            mime_type = self.get(nodeid, 'type')
+        else:
+            mime_type = self.default_mime_type
+
+        # and index!
+        self.db.indexer.add_text((self.classname, nodeid, 'content'), content,
+            mime_type)
+
 class MessageSendError(RuntimeError):
     pass
 
@@ -659,6 +691,10 @@ class IssueClass(Class):
 
 #
 # $Log: not supported by cvs2svn $
+# Revision 1.59  2002/06/18 03:55:25  dman13
+# Fixed name/address display problem introduced by an earlier change.
+# (instead of "name<addr>" display "name <addr>")
+#
 # Revision 1.58  2002/06/16 01:05:15  dman13
 # Removed temporary workaround -- it seems it was a bug in the
 # nosyreaction detector in the 0.4.1 extended template and has already
diff --git a/roundup/templates/classic/dbinit.py b/roundup/templates/classic/dbinit.py
index 5e708e9..f6c1829 100644
--- a/roundup/templates/classic/dbinit.py
+++ b/roundup/templates/classic/dbinit.py
@@ -15,7 +15,7 @@
 # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
 # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 # 
-# $Id: dbinit.py,v 1.17 2002-05-24 04:03:23 richard Exp $
+# $Id: dbinit.py,v 1.18 2002-07-09 03:02:53 richard Exp $
 
 import os
 
@@ -101,6 +101,8 @@ def open(name=None):
     import detectors
     detectors.init(db)
 
+    # schema is set up - run any post-initialisation
+    db.post_init()
     return db
  
 def init(adminpw): 
@@ -141,6 +143,10 @@ def init(adminpw):
 
 #
 # $Log: not supported by cvs2svn $
+# Revision 1.17  2002/05/24 04:03:23  richard
+# Added commentage to the dbinit files to help people with their
+# customisation.
+#
 # Revision 1.16  2002/02/16 08:06:14  richard
 # Removed the key property restriction on title of the classic issue class.
 #
diff --git a/roundup/templates/extended/dbinit.py b/roundup/templates/extended/dbinit.py
index 7bddfee..fb8f875 100644
--- a/roundup/templates/extended/dbinit.py
+++ b/roundup/templates/extended/dbinit.py
@@ -15,7 +15,7 @@
 # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
 # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 # 
-# $Id: dbinit.py,v 1.21 2002-05-24 04:03:23 richard Exp $
+# $Id: dbinit.py,v 1.22 2002-07-09 03:02:53 richard Exp $
 
 import os
 
@@ -131,6 +131,8 @@ def open(name=None):
     import detectors
     detectors.init(db)
 
+    # schema is set up - run any post-initialisation
+    db.post_init()
     return db
  
 def init(adminpw): 
@@ -193,6 +195,10 @@ def init(adminpw):
 
 #
 # $Log: not supported by cvs2svn $
+# Revision 1.21  2002/05/24 04:03:23  richard
+# Added commentage to the dbinit files to help people with their
+# customisation.
+#
 # Revision 1.20  2002/02/15 07:08:44  richard
 #  . Alternate email addresses are now available for users. See the MIGRATION
 #    file for info on how to activate the feature.
diff --git a/test/test_db.py b/test/test_db.py
index c1bc92d..814c17a 100644
--- a/test/test_db.py
+++ b/test/test_db.py
@@ -15,7 +15,7 @@
 # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
 # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 # 
-# $Id: test_db.py,v 1.23 2002-06-20 23:51:48 richard Exp $ 
+# $Id: test_db.py,v 1.24 2002-07-09 03:02:53 richard Exp $ 
 
 import unittest, os, shutil
 
@@ -23,14 +23,18 @@ from roundup.hyperdb import String, Password, Link, Multilink, Date, \
     Interval, Class, DatabaseError
 from roundup.roundupdb import FileClass
 from roundup import date, password
+from roundup.indexer import Indexer
 
 def setupSchema(db, create):
     status = Class(db, "status", name=String())
     status.setkey("name")
     user = Class(db, "user", username=String(), password=Password())
-    file = FileClass(db, "file", name=String(), type=String())
-    issue = Class(db, "issue", title=String(), status=Link("status"),
-        nosy=Multilink("user"), deadline=Date(), foo=Interval())
+    file = FileClass(db, "file", name=String(), type=String(),
+        comment=String(indexme="yes"))
+    issue = Class(db, "issue", title=String(indexme="yes"),
+        status=Link("status"), nosy=Multilink("user"), deadline=Date(),
+        foo=Interval(), files=Multilink("file"))
+    db.post_init()
     if create:
         status.create(name="unread")
         status.create(name="in-progress")
@@ -112,8 +116,8 @@ class anydbmDBTestCase(MyTestCase):
         props = self.db.issue.getprops()
         keys = props.keys()
         keys.sort()
-        self.assertEqual(keys, ['deadline', 'fixer', 'foo', 'id', 'nosy',
-            'status', 'title'])
+        self.assertEqual(keys, ['deadline', 'files', 'fixer', 'foo', 'id',
+            'nosy', 'status', 'title'])
         self.assertEqual(self.db.issue.get('1', "fixer"), None)
 
     def testRetire(self):
@@ -246,7 +250,7 @@ class anydbmDBTestCase(MyTestCase):
         self.assertEqual(action, 'create')
         keys = params.keys()
         keys.sort()
-        self.assertEqual(keys, ['deadline', 'fixer', 'foo', 'nosy', 
+        self.assertEqual(keys, ['deadline', 'files', 'fixer', 'foo', 'nosy', 
             'status', 'title'])
         self.assertEqual(None,params['deadline'])
         self.assertEqual(None,params['fixer'])
@@ -296,6 +300,22 @@ class anydbmDBTestCase(MyTestCase):
         id2 = self.db2.issue.create(title="eggs", status='2')
         self.assertNotEqual(id1, id2)
 
+    def testSearching(self):
+        self.db.file.create(content='hello', type="text/plain")
+        self.db.file.create(content='world', type="text/frozz",
+            comment='blah blah')
+        self.db.issue.create(files=['1', '2'], title="flebble plop")
+        self.db.issue.create(title="flebble frooz")
+        self.db.commit()
+        self.assertEquals(self.db.indexer.search(['hello'], self.db.issue),
+            {'1': {'files': ['1']}})
+        self.assertEquals(self.db.indexer.search(['world'], self.db.issue), {})
+        self.assertEquals(self.db.indexer.search(['frooz'], self.db.issue),
+            {'2': {}})
+        self.assertEquals(self.db.indexer.search(['flebble'], self.db.issue),
+            {'2': {}, '1': {}})
+        self.assertEquals(self.db.indexer.search(['blah'], self.db.issue),
+            {'1': {'files': ['2']}})
 
 class anydbmReadOnlyDBTestCase(MyTestCase):
     def setUp(self):
@@ -399,6 +419,9 @@ def suite():
 
 #
 # $Log: not supported by cvs2svn $
+# Revision 1.23  2002/06/20 23:51:48  richard
+# Cleaned up the hyperdb tests
+#
 # Revision 1.22  2002/05/21 05:52:11  richard
 # Well whadya know, bsddb3 works again.
 # The backend is implemented _exactly_ the same as bsddb - so there's no