summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 657d984)
raw | patch | inline | side by side (parent: 657d984)
author | richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2> | |
Tue, 9 Jul 2002 04:19:09 +0000 (04:19 +0000) | ||
committer | richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2> | |
Tue, 9 Jul 2002 04:19:09 +0000 (04:19 +0000) |
Fixed reindex on first access.
Also fixed reindexing of entries that change.
git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/trunk@840 57a73879-2fb5-44c3-a270-3262357dd7e2
Also fixed reindexing of entries that change.
git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/trunk@840 57a73879-2fb5-44c3-a270-3262357dd7e2
diff --git a/roundup/admin.py b/roundup/admin.py
index 481ca46882b93ad9f53ea72579b1526c7bf216db..e54ef7c609ebfbb4ddcf646311e0922bfa1b4c40 100644 (file)
--- a/roundup/admin.py
+++ b/roundup/admin.py
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
-# $Id: admin.py,v 1.15 2002-06-17 23:14:44 richard Exp $
+# $Id: admin.py,v 1.16 2002-07-09 04:19:09 richard Exp $
import sys, os, getpass, getopt, re, UserDict, shlex, shutil
try:
self.db.pack(pack_before)
return 0
+ def do_reindex(self, args):
+ '''Usage: reindex
+ Re-generate an instance's search indexes.
+
+ This will re-generate the search indexes for an instance. This will
+ typically happen automatically.
+ '''
+ self.db.indexer.force_reindex()
+ self.db.reindex()
+ return 0
+
def run_command(self, args):
'''Run a single command
'''
#
# $Log: not supported by cvs2svn $
+# Revision 1.15 2002/06/17 23:14:44 richard
+# . #569415 ] {version}
+#
# Revision 1.14 2002/06/11 06:41:50 richard
# Removed prompt for admin email in initialisation.
#
index d2473dbb8e387c18d745b2be35a59d3f80268220..b341c4432713ab004ff7276607c8c4b96e303287 100644 (file)
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
-#$Id: back_anydbm.py,v 1.39 2002-07-09 03:02:52 richard Exp $
+#$Id: back_anydbm.py,v 1.40 2002-07-09 04:19:09 richard Exp $
'''
This module defines a backend that saves the hyperdatabase in a database
chosen by anydbm. It is guaranteed to always be available in python
def post_init(self):
"""Called once the schema initialisation has finished."""
# reindex the db if necessary
- if not self.indexer.should_reindex():
- return
+ if self.indexer.should_reindex():
+ self.reindex()
+
+ def reindex(self):
for klass in self.classes.values():
for nodeid in klass.list():
klass.index(nodeid)
#
#$Log: not supported by cvs2svn $
+#Revision 1.39 2002/07/09 03:02:52 richard
+#More indexer work:
+#- all String properties may now be indexed too. Currently there's a bit of
+# "issue" specific code in the actual searching which needs to be
+# addressed. In a nutshell:
+# + pass 'indexme="yes"' as a String() property initialisation arg, eg:
+# file = FileClass(db, "file", name=String(), type=String(),
+# comment=String(indexme="yes"))
+# + the comment will then be indexed and be searchable, with the results
+# related back to the issue that the file is linked to
+#- as a result of this work, the FileClass has a default MIME type that may
+# be overridden in a subclass, or by the use of a "type" property as is
+# done in the default templates.
+#- the regeneration of the indexes (if necessary) is done once the schema is
+# set up in the dbinit.
+#
#Revision 1.38 2002/07/08 06:58:15 richard
#cleaned up the indexer code:
# - it splits more words out (much simpler, faster splitter)
diff --git a/roundup/cgi_client.py b/roundup/cgi_client.py
index 83380f6685b580444de4784ad87bca0fe29279d6..b471c1b206be9611455fc61719447ae5f0440447 100644 (file)
--- a/roundup/cgi_client.py
+++ b/roundup/cgi_client.py
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
-# $Id: cgi_client.py,v 1.133 2002-07-08 15:32:05 gmcm Exp $
+# $Id: cgi_client.py,v 1.134 2002-07-09 04:19:09 richard Exp $
__doc__ = """
WWW request handler (also used in the stand-alone server).
import roundupdb, htmltemplate, date, hyperdb, password
from roundup.i18n import _
-from roundup.indexer import Indexer
class Unauthorised(ValueError):
pass
# someone gave us a non-int debug level, turn it off
self.debug = 0
- # used for searching the indexes
- self.indexer = Indexer('%s/db'%instance.INSTANCE_HOME)
-
-
def getuid(self):
try:
return self.db.user.lookup(self.user)
#
# $Log: not supported by cvs2svn $
+# Revision 1.133 2002/07/08 15:32:05 gmcm
+# Pagination of index pages.
+# New search form.
+#
# Revision 1.132 2002/07/08 07:26:14 richard
# ehem
#
index 2541ee348dc0bf0a8318f2b95e582859476765c4..65e9992c6d2e8f6fd3eb998285f573573ee3730b 100644 (file)
--- a/roundup/htmltemplate.py
+++ b/roundup/htmltemplate.py
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
-# $Id: htmltemplate.py,v 1.95 2002-07-08 15:32:06 gmcm Exp $
+# $Id: htmltemplate.py,v 1.96 2002-07-09 04:19:09 richard Exp $
__doc__ = """
Template engine.
matches = None
if nodeids is None:
if search_text != '':
- matches = self.client.indexer.search(
+ matches = self.db.indexer.search(
search_text.split(' '), self.cl)
nodeids = self.cl.filter(matches, filterspec, sort, group)
for nodeid in nodeids[startwith:startwith+pagesize]:
#
# $Log: not supported by cvs2svn $
+# Revision 1.95 2002/07/08 15:32:06 gmcm
+# Pagination of index pages.
+# New search form.
+#
# Revision 1.94 2002/06/27 15:38:53 gmcm
# Fix the cycles (a clear method, called after render, that removes
# the bound methods from the globals dict).
diff --git a/roundup/indexer.py b/roundup/indexer.py
index d82560ce7d4ad2fb007ccf17c18384b030f93c5f..096b6c6dda28b589db9d29c62a2b510269f6aaf0 100644 (file)
--- a/roundup/indexer.py
+++ b/roundup/indexer.py
# that promote freedom, but obviously am giving up any rights
# to compel such.
#
-#$Id: indexer.py,v 1.4 2002-07-09 03:02:52 richard Exp $
+#$Id: indexer.py,v 1.5 2002-07-09 04:19:09 richard Exp $
'''
This module provides an indexer class, RoundupIndexer, that stores text
indices in a roundup instance. This class makes searching the content of
class Indexer:
''' Indexes information from roundup's hyperdb to allow efficient
searching.
+
+ Three structures are created by the indexer:
+ files {identifier: (fileid, wordcount)}
+ words {word: {fileid: count}}
+ fileids {fileid: identifier}
'''
def __init__(self, db_path):
- indexdb_path = os.path.join(db_path, 'indexes')
- self.indexdb = os.path.join(indexdb_path, 'index.db')
+ self.indexdb_path = os.path.join(db_path, 'indexes')
+ self.indexdb = os.path.join(self.indexdb_path, 'index.db')
self.reindex = 0
self.casesensitive = 0
self.quiet = 9
# see if we need to reindex because of a change in code
- if (not os.path.exists(indexdb_path) or
- not os.path.exists(os.path.join(indexdb_path, 'version'))):
+ if (not os.path.exists(self.indexdb_path) or
+ not os.path.exists(os.path.join(self.indexdb_path, 'version'))):
# TODO: if the version file exists (in the future) we'll want to
# check the value in it - for now the file itself is a flag
- if os.path.exists(indexdb_path):
- shutil.rmtree(indexdb_path)
- os.makedirs(indexdb_path)
- os.chmod(indexdb_path, 0775)
- open(os.path.join(indexdb_path, 'version'), 'w').write('1\n')
-
- # we need to reindex
- self.reindex = 1
- else:
- self.reindex = 0
+ self.force_reindex()
+
+ def force_reindex(self):
+ '''Force a reindex condition
+ '''
+ if os.path.exists(self.indexdb_path):
+ shutil.rmtree(self.indexdb_path)
+ os.makedirs(self.indexdb_path)
+ os.chmod(self.indexdb_path, 0775)
+ open(os.path.join(self.indexdb_path, 'version'), 'w').write('1\n')
+ self.reindex = 1
def should_reindex(self):
'''Should we reindex?
# make sure the index is loaded
self.load_index()
- # Is file eligible for (re)indexing?
+ # remove old entries for this identifier
if self.files.has_key(identifier):
- # Reindexing enabled, cleanup dicts
- if self.reindex:
- self.purge_entry(identifier, self.files, self.words)
- else:
- # DO NOT reindex this file
- if self.quiet < 5:
- print "Not reindexing", identifier
- return 0
+ self.purge_entry(identifier)
# split into words
words = self.splitter(text, mime_type)
pickle_fh.write(zlib.compress(pickle_str))
os.chmod(filename, 0664)
- def purge_entry(self, fname, file_dct, word_dct):
+ def purge_entry(self, identifier):
''' Remove a file from file index and word index
'''
- try: # The easy part, cleanup the file index
- file_index = file_dct[fname]
- del file_dct[fname]
- except KeyError:
- pass # We'll assume we only encounter KeyError's
+ if not self.files.has_key(identifier):
+ return
+
+ file_index = self.files[identifier][0]
+ del self.files[identifier]
+ del self.fileids[file_index]
+
# The much harder part, cleanup the word index
- for word, occurs in word_dct.items():
+ for key, occurs in self.words.items():
if occurs.has_key(file_index):
del occurs[file_index]
- word_dct[word] = occurs
def index_loaded(self):
return (hasattr(self,'fileids') and hasattr(self,'files') and
#
#$Log: not supported by cvs2svn $
+#Revision 1.4 2002/07/09 03:02:52 richard
+#More indexer work:
+#- all String properties may now be indexed too. Currently there's a bit of
+# "issue" specific code in the actual searching which needs to be
+# addressed. In a nutshell:
+# + pass 'indexme="yes"' as a String() property initialisation arg, eg:
+# file = FileClass(db, "file", name=String(), type=String(),
+# comment=String(indexme="yes"))
+# + the comment will then be indexed and be searchable, with the results
+# related back to the issue that the file is linked to
+#- as a result of this work, the FileClass has a default MIME type that may
+# be overridden in a subclass, or by the use of a "type" property as is
+# done in the default templates.
+#- the regeneration of the indexes (if necessary) is done once the schema is
+# set up in the dbinit.
+#
#Revision 1.3 2002/07/08 06:58:15 richard
#cleaned up the indexer code:
# - it splits more words out (much simpler, faster splitter)
diff --git a/roundup/roundupdb.py b/roundup/roundupdb.py
index 03bde0d1d3c486942c016a466f872e3b0a2fcd0f..dc181a4f3a78e60c3fc67e02c68f528129ddbdda 100644 (file)
--- a/roundup/roundupdb.py
+++ b/roundup/roundupdb.py
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
-# $Id: roundupdb.py,v 1.60 2002-07-09 03:02:52 richard Exp $
+# $Id: roundupdb.py,v 1.61 2002-07-09 04:19:09 richard Exp $
__doc__ = """
Extending hyperdb with types specific to issue-tracking.
dictionary attempts to specify any of these properties or a
"creation" or "activity" property, a ValueError is raised."""
if not properties.has_key('title'):
- properties['title'] = hyperdb.String()
+ properties['title'] = hyperdb.String(indexme='yes')
if not properties.has_key('messages'):
properties['messages'] = hyperdb.Multilink("msg")
if not properties.has_key('files'):
#
# $Log: not supported by cvs2svn $
+# Revision 1.60 2002/07/09 03:02:52 richard
+# More indexer work:
+# - all String properties may now be indexed too. Currently there's a bit of
+# "issue" specific code in the actual searching which needs to be
+# addressed. In a nutshell:
+# + pass 'indexme="yes"' as a String() property initialisation arg, eg:
+# file = FileClass(db, "file", name=String(), type=String(),
+# comment=String(indexme="yes"))
+# + the comment will then be indexed and be searchable, with the results
+# related back to the issue that the file is linked to
+# - as a result of this work, the FileClass has a default MIME type that may
+# be overridden in a subclass, or by the use of a "type" property as is
+# done in the default templates.
+# - the regeneration of the indexes (if necessary) is done once the schema is
+# set up in the dbinit.
+#
# Revision 1.59 2002/06/18 03:55:25 dman13
# Fixed name/address display problem introduced by an earlier change.
# (instead of "name<addr>" display "name <addr>")
diff --git a/test/test_db.py b/test/test_db.py
index 814c17a8274b080c949691c76ebc6a2714cdf971..e8a51b0b93f2a74050465a5b3bdb3899f49071c1 100644 (file)
--- a/test/test_db.py
+++ b/test/test_db.py
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
-# $Id: test_db.py,v 1.24 2002-07-09 03:02:53 richard Exp $
+# $Id: test_db.py,v 1.25 2002-07-09 04:19:09 richard Exp $
import unittest, os, shutil
{'2': {}})
self.assertEquals(self.db.indexer.search(['flebble'], self.db.issue),
{'2': {}, '1': {}})
- self.assertEquals(self.db.indexer.search(['blah'], self.db.issue),
- {'1': {'files': ['2']}})
+
+ def testReindexing(self):
+ self.db.issue.create(title="frooz")
+ self.db.commit()
+ self.assertEquals(self.db.indexer.search(['frooz'], self.db.issue),
+ {'1': {}})
+ self.db.issue.set('1', title="dooble")
+ self.db.commit()
+ self.assertEquals(self.db.indexer.search(['dooble'], self.db.issue),
+ {'1': {}})
+ self.assertEquals(self.db.indexer.search(['frooz'], self.db.issue), {})
+
+ def testForcedReindexing(self):
+ self.db.issue.create(title="flebble frooz")
+ self.db.commit()
+ self.assertEquals(self.db.indexer.search(['flebble'], self.db.issue),
+ {'1': {}})
+ self.db.indexer.quiet = 1
+ self.db.indexer.force_reindex()
+ self.db.post_init()
+ self.db.indexer.quiet = 9
+ self.assertEquals(self.db.indexer.search(['flebble'], self.db.issue),
+ {'1': {}})
class anydbmReadOnlyDBTestCase(MyTestCase):
def setUp(self):
#
# $Log: not supported by cvs2svn $
+# Revision 1.24 2002/07/09 03:02:53 richard
+# More indexer work:
+# - all String properties may now be indexed too. Currently there's a bit of
+# "issue" specific code in the actual searching which needs to be
+# addressed. In a nutshell:
+# + pass 'indexme="yes"' as a String() property initialisation arg, eg:
+# file = FileClass(db, "file", name=String(), type=String(),
+# comment=String(indexme="yes"))
+# + the comment will then be indexed and be searchable, with the results
+# related back to the issue that the file is linked to
+# - as a result of this work, the FileClass has a default MIME type that may
+# be overridden in a subclass, or by the use of a "type" property as is
+# done in the default templates.
+# - the regeneration of the indexes (if necessary) is done once the schema is
+# set up in the dbinit.
+#
# Revision 1.23 2002/06/20 23:51:48 richard
# Cleaned up the hyperdb tests
#