From: ber Date: Fri, 1 Jul 2011 13:23:09 +0000 (+0000) Subject: Xapian indexing improved: Slightly faster and slightly smaller database. X-Git-Url: https://git.tokkee.org/?a=commitdiff_plain;h=1f788c2c704bbfe25af49b8049178cc58433ee9b;p=roundup.git Xapian indexing improved: Slightly faster and slightly smaller database. Closes issue2550687. Thanks to Olly Betts for the patch. git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/roundup/trunk@4623 57a73879-2fb5-44c3-a270-3262357dd7e2 --- diff --git a/CHANGES.txt b/CHANGES.txt index c1f7d8b..7a8597a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -5,6 +5,11 @@ Entries without name were done by Richard Jones. 2011-XX-XX 1.4.19 (r46XX) +Features: + +- Xapian indexing improved: Slightly faster and slightly smaller database. + Closes issue2550687. Thanks to Olly Betts for the patch. (Bernhard Reiter) + Fixed: - Updated the url to point to www.roundup-tracker.org in two places in the diff --git a/roundup/backends/indexer_xapian.py b/roundup/backends/indexer_xapian.py index 4116b1c..f071baa 100644 --- a/roundup/backends/indexer_xapian.py +++ b/roundup/backends/indexer_xapian.py @@ -72,20 +72,10 @@ class Indexer(IndexerBase): # indexed so we know what we're matching when we get results identifier = '%s:%s:%s'%identifier - # see if the id is in the database - enquire = xapian.Enquire(database) - query = xapian.Query(xapian.Query.OP_AND, [identifier]) - enquire.set_query(query) - matches = enquire.get_mset(0, 10) - if len(matches): - docid = matches[0].docid - else: - docid = None - # create the new document doc = xapian.Document() doc.set_data(identifier) - doc.add_posting(identifier, 0) + doc.add_term(identifier, 0) for match in re.finditer(r'\b\w{%d,%d}\b' % (self.minlength, self.maxlength), @@ -95,10 +85,8 @@ class Indexer(IndexerBase): continue term = stemmer(word) doc.add_posting(term, match.start(0)) - if docid: - database.replace_document(docid, doc) - else: - database.add_document(doc) + + database.replace_document(identifier, doc) def find(self, wordlist): '''look up all the words in the wordlist.