From 0afdb59a1aeb366ae1132f3d3d7e00410e962345 Mon Sep 17 00:00:00 2001 From: richard Date: Sun, 21 Mar 2004 23:39:08 +0000 Subject: [PATCH] That's the last of the RDBMS migration steps done! Yay! Note that the code currently has some unit testing issues: - Metakit needs some attention in a couple of areas - RDBMS backends are having trouble ordering their journal entries correctly. I'm going to be migrating them to use TIMESTAMP for the date column, but that's not necessarily going to fix things as mysql and postgresql both appear to have second granularity. Sqlite will ignore the data type as usual ;) Next up is the datatype-ification of the RDBMS backends. Part of that will involve the migration to numeric IDs, which will also be done in the *dbm backends (already done in metakit). The ID exposed *above* the hyperdb will be String, since so many things assume a string ID now. git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/trunk@2161 57a73879-2fb5-44c3-a270-3262357dd7e2 --- TODO.txt | 9 ++- roundup/admin.py | 4 +- roundup/backends/back_metakit.py | 8 +-- roundup/backends/back_mysql.py | 19 ++++- roundup/backends/back_postgresql.py | 12 ++++ roundup/backends/back_sqlite.py | 11 ++- roundup/backends/indexer_rdbms.py | 104 ++++++++++++++++++++++++++++ roundup/backends/rdbms_common.py | 11 +-- 8 files changed, 159 insertions(+), 19 deletions(-) create mode 100644 roundup/backends/indexer_rdbms.py diff --git a/TODO.txt b/TODO.txt index b16f115..a962ce7 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,10 +1,9 @@ This file has been re-purposed to contain specifically the items that need doing before the next release: -- indexing in RDBMSes -- add tests for group-by-multilink so I finally implement it for the RDBMSes -- s/getnode/getitem in backends (and s/Node/Item) - have rdbms backends look up the journal for actor if it's not set -- ensure index creation is triggered by the version 1->2 update - +- migrate rdbms backends to use typed columns - migrate to numeric ID values (fixes bug 817217) + +- ensure index creation is triggered by the version 1->2 update + (and other upgrade tests) diff --git a/roundup/admin.py b/roundup/admin.py index c08f0e8..e89f864 100644 --- a/roundup/admin.py +++ b/roundup/admin.py @@ -16,7 +16,7 @@ # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE, # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # -# $Id: admin.py,v 1.62 2004-02-11 23:55:08 richard Exp $ +# $Id: admin.py,v 1.63 2004-03-21 23:39:08 richard Exp $ '''Administration commands for maintaining Roundup trackers. ''' @@ -748,7 +748,7 @@ Command help: # convert types for propname, value in props.items(): try: - props[key] = hyperdb.rawToHyperdb(self.db, cl, None, + props[propname] = hyperdb.rawToHyperdb(self.db, cl, None, propname, value) except hyperdb.HyperdbValueError, message: raise UsageError, message diff --git a/roundup/backends/back_metakit.py b/roundup/backends/back_metakit.py index dc94edf..a0d8757 100755 --- a/roundup/backends/back_metakit.py +++ b/roundup/backends/back_metakit.py @@ -1,4 +1,4 @@ -# $Id: back_metakit.py,v 1.65 2004-03-19 05:27:55 richard Exp $ +# $Id: back_metakit.py,v 1.66 2004-03-21 23:39:08 richard Exp $ '''Metakit backend for Roundup, originally by Gordon McMillan. Known Current Bugs: @@ -1873,12 +1873,12 @@ class Indexer(Indexer): ids[oldpos].ignore = 1 self.changed = 1 pos = ids.append(tblid=tblid,nodeid=nodeid,propid=propid) - + wordlist = re.findall(r'\b\w{2,25}\b', text.upper()) words = {} for word in wordlist: - if not self.disallows.has_key(word): - words[word] = 1 + if not self.disallows.has_key(word): + words[word] = 1 words = words.keys() index = self.db.view('index').ordered(1) diff --git a/roundup/backends/back_mysql.py b/roundup/backends/back_mysql.py index 21f17d8..c1d1680 100644 --- a/roundup/backends/back_mysql.py +++ b/roundup/backends/back_mysql.py @@ -124,14 +124,29 @@ class Database(Database): self.create_version_2_tables() def create_version_2_tables(self): + # OTK store self.cursor.execute('CREATE TABLE otks (otk_key VARCHAR(255), ' - 'otk_value VARCHAR(255), otk_time FLOAT(20))') + 'otk_value VARCHAR(255), otk_time FLOAT(20)) ' + 'TYPE=%s'%self.mysql_backend) self.cursor.execute('CREATE INDEX otks_key_idx ON otks(otk_key)') + + # Sessions store self.cursor.execute('CREATE TABLE sessions (session_key VARCHAR(255), ' - 'session_time FLOAT(20), session_value VARCHAR(255))') + 'session_time FLOAT(20), session_value VARCHAR(255)) ' + 'TYPE=%s'%self.mysql_backend) self.cursor.execute('CREATE INDEX sessions_key_idx ON ' 'sessions(session_key)') + # full-text indexing store + self.cursor.execute('CREATE TABLE _textids (_class VARCHAR(255), ' + '_itemid VARCHAR(255), _prop VARCHAR(255), _textid INT) ' + 'TYPE=%s'%self.mysql_backend) + self.cursor.execute('CREATE TABLE _words (_word VARCHAR(30), ' + '_textid INT) TYPE=%s'%self.mysql_backend) + self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)') + sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg) + self.cursor.execute(sql, ('_textids', 1)) + def add_actor_column(self): # update existing tables to have the new actor column tables = self.database_schema['tables'] diff --git a/roundup/backends/back_postgresql.py b/roundup/backends/back_postgresql.py index 195bfa7..a8537df 100644 --- a/roundup/backends/back_postgresql.py +++ b/roundup/backends/back_postgresql.py @@ -113,14 +113,26 @@ class Database(rdbms_common.Database): self.create_version_2_tables() def create_version_2_tables(self): + # OTK store self.cursor.execute('CREATE TABLE otks (otk_key VARCHAR(255), ' 'otk_value VARCHAR(255), otk_time FLOAT(20))') self.cursor.execute('CREATE INDEX otks_key_idx ON otks(otk_key)') + + # Sessions store self.cursor.execute('CREATE TABLE sessions (session_key VARCHAR(255), ' 'session_time FLOAT(20), session_value VARCHAR(255))') self.cursor.execute('CREATE INDEX sessions_key_idx ON ' 'sessions(session_key)') + # full-text indexing store + self.cursor.execute('CREATE TABLE _textids (_class VARCHAR(255), ' + '_itemid VARCHAR(255), _prop VARCHAR(255), _textid INT4) ') + self.cursor.execute('CREATE TABLE _words (_word VARCHAR(30), ' + '_textid INT4)') + self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)') + sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg) + self.cursor.execute(sql, ('_textids', 1)) + def add_actor_column(self): # update existing tables to have the new actor column tables = self.database_schema['tables'] diff --git a/roundup/backends/back_sqlite.py b/roundup/backends/back_sqlite.py index e1105d3..4c883c9 100644 --- a/roundup/backends/back_sqlite.py +++ b/roundup/backends/back_sqlite.py @@ -1,4 +1,4 @@ -# $Id: back_sqlite.py,v 1.17 2004-03-18 01:58:45 richard Exp $ +# $Id: back_sqlite.py,v 1.18 2004-03-21 23:39:08 richard Exp $ '''Implements a backend for SQLite. See https://pysqlite.sourceforge.net/ for pysqlite info @@ -65,6 +65,15 @@ class Database(rdbms_common.Database): self.cursor.execute('create index sessions_key_idx on ' 'sessions(session_key)') + # full-text indexing store + self.cursor.execute('CREATE TABLE _textids (_class varchar, ' + '_itemid varchar, _prop varchar, _textid integer) ') + self.cursor.execute('CREATE TABLE _words (_word varchar, ' + '_textid integer)') + self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)') + sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg) + self.cursor.execute(sql, ('_textids', 1)) + def add_actor_column(self): # update existing tables to have the new actor column tables = self.database_schema['tables'] diff --git a/roundup/backends/indexer_rdbms.py b/roundup/backends/indexer_rdbms.py new file mode 100644 index 0000000..3528083 --- /dev/null +++ b/roundup/backends/indexer_rdbms.py @@ -0,0 +1,104 @@ +''' This implements the full-text indexer over two RDBMS tables. The first +is a mapping of words to occurance IDs. The second maps the IDs to (Class, +propname, itemid) instances. +''' +import re + +from indexer_dbm import Indexer + +class Indexer(Indexer): + disallows = {'THE':1, 'THIS':1, 'ZZZ':1, 'THAT':1, 'WITH':1} + def __init__(self, db): + self.db = db + self.reindex = 0 + + def close(self): + '''close the indexing database''' + # just nuke the circular reference + self.db = None + + def force_reindex(self): + '''Force a reindexing of the database. This essentially + empties the tables ids and index and sets a flag so + that the databases are reindexed''' + self.reindex = 1 + + def should_reindex(self): + '''returns True if the indexes need to be rebuilt''' + return self.reindex + + def add_text(self, identifier, text, mime_type='text/plain'): + ''' "identifier" is (classname, itemid, property) ''' + if mime_type != 'text/plain': + return + + # first, find the id of the (classname, itemid, property) + a = self.db.arg + sql = 'select _textid from _textids where _class=%s and '\ + '_itemid=%s and _prop=%s'%(a, a, a) + self.db.cursor.execute(sql, identifier) + r = self.db.cursor.fetchone() + if not r: + id = self.db.newid('_textids') + sql = 'insert into _textids (_textid, _class, _itemid, _prop)'\ + ' values (%s, %s, %s, %s)'%(a, a, a, a) + self.db.cursor.execute(sql, (id, ) + identifier) + else: + id = int(r[0]) + # clear out any existing indexed values + sql = 'delete from _words where _textid=%s'%a + self.db.cursor.execute(sql, (id, )) + + # ok, find all the words in the text + wordlist = re.findall(r'\b\w{2,25}\b', str(text).upper()) + words = {} + for word in wordlist: + if not self.disallows.has_key(word): + words[word] = 1 + words = words.keys() + + # for each word, add an entry in the db + for word in words: + # don't dupe + sql = 'select * from _words where _word=%s and _textid=%s'%(a, a) + self.db.cursor.execute(sql, (word, id)) + if self.db.cursor.fetchall(): + continue + sql = 'insert into _words (_word, _textid) values (%s, %s)'%(a, a) + self.db.cursor.execute(sql, (word, id)) + + def find(self, wordlist): + '''look up all the words in the wordlist. + If none are found return an empty dictionary + * more rules here + ''' + l = [word.upper() for word in wordlist if 26 > len(word) > 2] + + a = ','.join([self.db.arg] * len(l)) + sql = 'select distinct(_textid) from _words where _word in (%s)'%a + self.db.cursor.execute(sql, tuple(l)) + r = self.db.cursor.fetchall() + if not r: + return {} + a = ','.join([self.db.arg] * len(r)) + sql = 'select _class, _itemid, _prop from _textids '\ + 'where _textid in (%s)'%a + self.db.cursor.execute(sql, tuple([int(id) for (id,) in r])) + # self.search_index has the results as {some id: identifier} ... + # sigh + r = {} + k = 0 + for c,n,p in self.db.cursor.fetchall(): + key = (str(c), str(n), str(p)) + r[k] = key + k += 1 + return r + + def save_index(self): + # the normal RDBMS backend transaction mechanisms will handle this + pass + + def rollback(self): + # the normal RDBMS backend transaction mechanisms will handle this + pass + diff --git a/roundup/backends/rdbms_common.py b/roundup/backends/rdbms_common.py index 8064c3e..e7c4f42 100644 --- a/roundup/backends/rdbms_common.py +++ b/roundup/backends/rdbms_common.py @@ -1,4 +1,4 @@ -# $Id: rdbms_common.py,v 1.82 2004-03-19 04:47:59 richard Exp $ +# $Id: rdbms_common.py,v 1.83 2004-03-21 23:39:08 richard Exp $ ''' Relational database (SQL) backend common code. Basics: @@ -39,7 +39,7 @@ from roundup.backends import locking # support from blobfiles import FileStorage -from indexer_dbm import Indexer +from indexer_rdbms import Indexer from sessions_rdbms import Sessions, OneTimeKeys from roundup.date import Range @@ -59,7 +59,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database): self.config, self.journaltag = config, journaltag self.dir = config.DATABASE self.classes = {} - self.indexer = Indexer(self.dir) + self.indexer = Indexer(self) self.security = security.Security(self) # additional transaction support for external files and the like @@ -177,7 +177,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database): self.reindex() # commit - self.conn.commit() + self.sql_commit() # update this number when we need to make changes to the SQL structure # of the backen database @@ -591,7 +591,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database): if __debug__: print >>hyperdb.DEBUG, 'newid', (self, sql, classname) self.cursor.execute(sql, (classname, )) - newid = self.cursor.fetchone()[0] + newid = int(self.cursor.fetchone()[0]) # update the counter sql = 'update ids set num=%s where name=%s'%(self.arg, self.arg) @@ -1066,6 +1066,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database): def close(self): ''' Close off the connection. ''' + self.indexer.close() self.sql_close() # -- 2.30.2