index 2547300f693d22e3bb7ac825a89937ba37da470a..b992c70d09a0f86c9d0e7beb70c5f19b5e9033eb 100644 (file)
#$Id: indexer_rdbms.py,v 1.18 2008-09-01 00:43:02 richard Exp $
-''' This implements the full-text indexer over two RDBMS tables. The first
+""" This implements the full-text indexer over two RDBMS tables. The first
is a mapping of words to occurance IDs. The second maps the IDs to (Class,
propname, itemid) instances.
-'''
-import re, sets
+"""
+import re
+# Python 2.3 ... 2.6 compatibility:
+from roundup.anypy.sets_ import set
from roundup.backends.indexer_common import Indexer as IndexerBase
self.reindex = 0
def close(self):
- '''close the indexing database'''
+ """close the indexing database"""
# just nuke the circular reference
self.db = None
def save_index(self):
- '''Save the changes to the index.'''
+ """Save the changes to the index."""
# not necessary - the RDBMS connection will handle this for us
pass
def force_reindex(self):
- '''Force a reindexing of the database. This essentially
+ """Force a reindexing of the database. This essentially
empties the tables ids and index and sets a flag so
- that the databases are reindexed'''
+ that the databases are reindexed"""
self.reindex = 1
def should_reindex(self):
- '''returns True if the indexes need to be rebuilt'''
+ """returns True if the indexes need to be rebuilt"""
return self.reindex
def add_text(self, identifier, text, mime_type='text/plain'):
- ''' "identifier" is (classname, itemid, property) '''
+ """ "identifier" is (classname, itemid, property) """
if mime_type != 'text/plain':
return
text = unicode(text, "utf-8", "replace").upper()
wordlist = [w.encode("utf-8", "replace")
for w in re.findall(r'(?u)\b\w{2,25}\b', text)]
- words = sets.Set()
+ words = set()
for word in wordlist:
if self.is_stopword(word): continue
if len(word) > 25: continue
self.db.cursor.executemany(sql, words)
def find(self, wordlist):
- '''look up all the words in the wordlist.
+ """look up all the words in the wordlist.
If none are found return an empty dictionary
* more rules here
- '''
+ """
if not wordlist:
return []