That's the last of the RDBMS migration steps done! Yay!

author richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>

Sun, 21 Mar 2004 23:39:08 +0000 (23:39 +0000)

committer richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>

Sun, 21 Mar 2004 23:39:08 +0000 (23:39 +0000)
author richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>
Sun, 21 Mar 2004 23:39:08 +0000 (23:39 +0000)
committer richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>
Sun, 21 Mar 2004 23:39:08 +0000 (23:39 +0000)
diff --git a/TODO.txt b/TODO.txt

index b16f1153f7e41ab93207d4a50186e9cffcf0b361..a962ce7bbcd4a96e3b4e8498872939780227c129 100644 (file)
--- a/TODO.txt
+++ b/TODO.txt
@@ -1,10 +1,9 @@
  This file has been re-purposed to contain specifically the items that need
  doing before the next release:
  
-- indexing in RDBMSes
-- add tests for group-by-multilink so I finally implement it for the RDBMSes
-- s/getnode/getitem in backends (and s/Node/Item)
  - have rdbms backends look up the journal for actor if it's not set
-- ensure index creation is triggered by the version 1->2 update
-
+- migrate rdbms backends to use typed columns
  - migrate to numeric ID values (fixes bug 817217)
+
+- ensure index creation is triggered by the version 1->2 update
+  (and other upgrade tests)
diff --git a/roundup/admin.py b/roundup/admin.py

index c08f0e8bf8c12a94cc8b7c33976bccb5a87a3391..e89f8643df326727cf15b1142a8be69d564fa09b 100644 (file)
--- a/roundup/admin.py
+++ b/roundup/admin.py
@@ -16,7 +16,7 @@
  # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
  # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  # 
-# $Id: admin.py,v 1.62 2004-02-11 23:55:08 richard Exp $
+# $Id: admin.py,v 1.63 2004-03-21 23:39:08 richard Exp $
  
  '''Administration commands for maintaining Roundup trackers.
  '''
@@ -748,7 +748,7 @@ Command help:
          # convert types
          for propname, value in props.items():
              try:
-                props[key] = hyperdb.rawToHyperdb(self.db, cl, None,
+                props[propname] = hyperdb.rawToHyperdb(self.db, cl, None,
                      propname, value)
              except hyperdb.HyperdbValueError, message:
                  raise UsageError, message
diff --git a/roundup/backends/back_metakit.py b/roundup/backends/back_metakit.py

index dc94edf33aac1929319792a589befc2b0df151c7..a0d87577be6872d34e89e3e947c15538d148c18c 100755 (executable)
--- a/roundup/backends/back_metakit.py
+++ b/roundup/backends/back_metakit.py
@@ -1,4 +1,4 @@
-# $Id: back_metakit.py,v 1.65 2004-03-19 05:27:55 richard Exp $
+# $Id: back_metakit.py,v 1.66 2004-03-21 23:39:08 richard Exp $
  '''Metakit backend for Roundup, originally by Gordon McMillan.
  
  Known Current Bugs:
@@ -1873,12 +1873,12 @@ class Indexer(Indexer):
              ids[oldpos].ignore = 1
              self.changed = 1
          pos = ids.append(tblid=tblid,nodeid=nodeid,propid=propid)
-        
+
          wordlist = re.findall(r'\b\w{2,25}\b', text.upper())
          words = {}
          for word in wordlist:
-           if not self.disallows.has_key(word):
-               words[word] = 1
+            if not self.disallows.has_key(word):
+                words[word] = 1
          words = words.keys()
          
          index = self.db.view('index').ordered(1)
diff --git a/roundup/backends/back_mysql.py b/roundup/backends/back_mysql.py

index 21f17d8761361e4a993dea1e60b001b0ee92ea9d..c1d1680aa714d66b3253afd0649f054054821ccc 100644 (file)
--- a/roundup/backends/back_mysql.py
+++ b/roundup/backends/back_mysql.py
@@ -124,14 +124,29 @@ class Database(Database):
              self.create_version_2_tables()
  
      def create_version_2_tables(self):
+        # OTK store
          self.cursor.execute('CREATE TABLE otks (otk_key VARCHAR(255), '
-            'otk_value VARCHAR(255), otk_time FLOAT(20))')
+            'otk_value VARCHAR(255), otk_time FLOAT(20)) '
+            'TYPE=%s'%self.mysql_backend)
          self.cursor.execute('CREATE INDEX otks_key_idx ON otks(otk_key)')
+
+        # Sessions store
          self.cursor.execute('CREATE TABLE sessions (session_key VARCHAR(255), '
-            'session_time FLOAT(20), session_value VARCHAR(255))')
+            'session_time FLOAT(20), session_value VARCHAR(255)) '
+            'TYPE=%s'%self.mysql_backend)
          self.cursor.execute('CREATE INDEX sessions_key_idx ON '
              'sessions(session_key)')
  
+        # full-text indexing store
+        self.cursor.execute('CREATE TABLE _textids (_class VARCHAR(255), '
+            '_itemid VARCHAR(255), _prop VARCHAR(255), _textid INT) '
+            'TYPE=%s'%self.mysql_backend)
+        self.cursor.execute('CREATE TABLE _words (_word VARCHAR(30), '
+            '_textid INT) TYPE=%s'%self.mysql_backend)
+        self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)')
+        sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg)
+        self.cursor.execute(sql, ('_textids', 1))
+
      def add_actor_column(self):
          # update existing tables to have the new actor column
          tables = self.database_schema['tables']
diff --git a/roundup/backends/back_postgresql.py b/roundup/backends/back_postgresql.py

index 195bfa76ef2a15834552490497ea09ff95b18ec3..a8537dfe2704ba75f4a82e2b0d00de6eb89e35c7 100644 (file)
--- a/roundup/backends/back_postgresql.py
+++ b/roundup/backends/back_postgresql.py
@@ -113,14 +113,26 @@ class Database(rdbms_common.Database):
              self.create_version_2_tables()
  
      def create_version_2_tables(self):
+        # OTK store
          self.cursor.execute('CREATE TABLE otks (otk_key VARCHAR(255), '
              'otk_value VARCHAR(255), otk_time FLOAT(20))')
          self.cursor.execute('CREATE INDEX otks_key_idx ON otks(otk_key)')
+
+        # Sessions store
          self.cursor.execute('CREATE TABLE sessions (session_key VARCHAR(255), '
              'session_time FLOAT(20), session_value VARCHAR(255))')
          self.cursor.execute('CREATE INDEX sessions_key_idx ON '
              'sessions(session_key)')
  
+        # full-text indexing store
+        self.cursor.execute('CREATE TABLE _textids (_class VARCHAR(255), '
+            '_itemid VARCHAR(255), _prop VARCHAR(255), _textid INT4) ')
+        self.cursor.execute('CREATE TABLE _words (_word VARCHAR(30), '
+            '_textid INT4)')
+        self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)')
+        sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg)
+        self.cursor.execute(sql, ('_textids', 1))
+
      def add_actor_column(self):
          # update existing tables to have the new actor column
          tables = self.database_schema['tables']
diff --git a/roundup/backends/back_sqlite.py b/roundup/backends/back_sqlite.py

index e1105d3488484e85709872189091dbdd7ac09dac..4c883c9ed923c02337904d91b9845117af7e6211 100644 (file)
--- a/roundup/backends/back_sqlite.py
+++ b/roundup/backends/back_sqlite.py
@@ -1,4 +1,4 @@
-# $Id: back_sqlite.py,v 1.17 2004-03-18 01:58:45 richard Exp $
+# $Id: back_sqlite.py,v 1.18 2004-03-21 23:39:08 richard Exp $
  '''Implements a backend for SQLite.
  
  See https://pysqlite.sourceforge.net/ for pysqlite info
@@ -65,6 +65,15 @@ class Database(rdbms_common.Database):
          self.cursor.execute('create index sessions_key_idx on '
                  'sessions(session_key)')
  
+        # full-text indexing store
+        self.cursor.execute('CREATE TABLE _textids (_class varchar, '
+            '_itemid varchar, _prop varchar, _textid integer) ')
+        self.cursor.execute('CREATE TABLE _words (_word varchar, '
+            '_textid integer)')
+        self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)')
+        sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg)
+        self.cursor.execute(sql, ('_textids', 1))
+
      def add_actor_column(self):
          # update existing tables to have the new actor column
          tables = self.database_schema['tables']
diff --git a/roundup/backends/indexer_rdbms.py b/roundup/backends/indexer_rdbms.py

new file mode 100644 (file)

index 0000000..3528083
--- /dev/null
+++ b/roundup/backends/indexer_rdbms.py
@@ -0,0 +1,104 @@
+''' This implements the full-text indexer over two RDBMS tables. The first
+is a mapping of words to occurance IDs. The second maps the IDs to (Class,
+propname, itemid) instances.
+'''
+import re
+
+from indexer_dbm import Indexer
+
+class Indexer(Indexer):
+    disallows = {'THE':1, 'THIS':1, 'ZZZ':1, 'THAT':1, 'WITH':1}
+    def __init__(self, db):
+        self.db = db
+        self.reindex = 0
+
+    def close(self):
+        '''close the indexing database'''
+        # just nuke the circular reference
+        self.db = None
+  
+    def force_reindex(self):
+        '''Force a reindexing of the database.  This essentially
+        empties the tables ids and index and sets a flag so
+        that the databases are reindexed'''
+        self.reindex = 1
+
+    def should_reindex(self):
+        '''returns True if the indexes need to be rebuilt'''
+        return self.reindex
+
+    def add_text(self, identifier, text, mime_type='text/plain'):
+        ''' "identifier" is  (classname, itemid, property) '''
+        if mime_type != 'text/plain':
+            return
+
+        # first, find the id of the (classname, itemid, property)
+        a = self.db.arg
+        sql = 'select _textid from _textids where _class=%s and '\
+            '_itemid=%s and _prop=%s'%(a, a, a)
+        self.db.cursor.execute(sql, identifier)
+        r = self.db.cursor.fetchone()
+        if not r:
+            id = self.db.newid('_textids')
+            sql = 'insert into _textids (_textid, _class, _itemid, _prop)'\
+                ' values (%s, %s, %s, %s)'%(a, a, a, a)
+            self.db.cursor.execute(sql, (id, ) + identifier)
+        else:
+            id = int(r[0])
+            # clear out any existing indexed values
+            sql = 'delete from _words where _textid=%s'%a
+            self.db.cursor.execute(sql, (id, ))
+
+        # ok, find all the words in the text
+        wordlist = re.findall(r'\b\w{2,25}\b', str(text).upper())
+        words = {}
+        for word in wordlist:
+            if not self.disallows.has_key(word):
+                words[word] = 1
+        words = words.keys()
+
+        # for each word, add an entry in the db
+        for word in words:
+            # don't dupe
+            sql = 'select * from _words where _word=%s and _textid=%s'%(a, a)
+            self.db.cursor.execute(sql, (word, id))
+            if self.db.cursor.fetchall():
+                continue
+            sql = 'insert into _words (_word, _textid) values (%s, %s)'%(a, a)
+            self.db.cursor.execute(sql, (word, id))
+
+    def find(self, wordlist):
+        '''look up all the words in the wordlist.
+        If none are found return an empty dictionary
+        * more rules here
+        '''        
+        l = [word.upper() for word in wordlist if 26 > len(word) > 2]
+
+        a = ','.join([self.db.arg] * len(l))
+        sql = 'select distinct(_textid) from _words where _word in (%s)'%a
+        self.db.cursor.execute(sql, tuple(l))
+        r = self.db.cursor.fetchall()
+        if not r:
+            return {}
+        a = ','.join([self.db.arg] * len(r))
+        sql = 'select _class, _itemid, _prop from _textids '\
+            'where _textid in (%s)'%a
+        self.db.cursor.execute(sql, tuple([int(id) for (id,) in r]))
+        # self.search_index has the results as {some id: identifier} ...
+        # sigh
+        r = {}
+        k = 0
+        for c,n,p in self.db.cursor.fetchall():
+            key = (str(c), str(n), str(p))
+            r[k] = key
+            k += 1
+        return r
+
+    def save_index(self):
+        # the normal RDBMS backend transaction mechanisms will handle this
+        pass
+
+    def rollback(self):
+        # the normal RDBMS backend transaction mechanisms will handle this
+        pass
+
diff --git a/roundup/backends/rdbms_common.py b/roundup/backends/rdbms_common.py

index 8064c3ecd0c75f5b52f04a6fae41248d6946ca2d..e7c4f42c2f165976a3a83f1c3a33fe4291bdeb53 100644 (file)
--- a/roundup/backends/rdbms_common.py
+++ b/roundup/backends/rdbms_common.py
@@ -1,4 +1,4 @@
-# $Id: rdbms_common.py,v 1.82 2004-03-19 04:47:59 richard Exp $
+# $Id: rdbms_common.py,v 1.83 2004-03-21 23:39:08 richard Exp $
  ''' Relational database (SQL) backend common code.
  
  Basics:
@@ -39,7 +39,7 @@ from roundup.backends import locking
  
  # support
  from blobfiles import FileStorage
-from indexer_dbm import Indexer
+from indexer_rdbms import Indexer
  from sessions_rdbms import Sessions, OneTimeKeys
  from roundup.date import Range
  
@@ -59,7 +59,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          self.config, self.journaltag = config, journaltag
          self.dir = config.DATABASE
          self.classes = {}
-        self.indexer = Indexer(self.dir)
+        self.indexer = Indexer(self)
          self.security = security.Security(self)
  
          # additional transaction support for external files and the like
@@ -177,7 +177,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
              self.reindex()
  
          # commit
-        self.conn.commit()
+        self.sql_commit()
  
      # update this number when we need to make changes to the SQL structure
      # of the backen database
@@ -591,7 +591,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
          if __debug__:
              print >>hyperdb.DEBUG, 'newid', (self, sql, classname)
          self.cursor.execute(sql, (classname, ))
-        newid = self.cursor.fetchone()[0]
+        newid = int(self.cursor.fetchone()[0])
  
          # update the counter
          sql = 'update ids set num=%s where name=%s'%(self.arg, self.arg)
@@ -1066,6 +1066,7 @@ class Database(FileStorage, hyperdb.Database, roundupdb.Database):
      def close(self):
          ''' Close off the connection.
          '''
+        self.indexer.close()
          self.sql_close()
  
  #
author	richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>
	Sun, 21 Mar 2004 23:39:08 +0000 (23:39 +0000)
committer	richard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>
	Sun, 21 Mar 2004 23:39:08 +0000 (23:39 +0000)
TODO.txt		patch \| blob \| history
roundup/admin.py		patch \| blob \| history
roundup/backends/back_metakit.py		patch \| blob \| history
roundup/backends/back_mysql.py		patch \| blob \| history
roundup/backends/back_postgresql.py		patch \| blob \| history
roundup/backends/back_sqlite.py		patch \| blob \| history
roundup/backends/indexer_rdbms.py	[new file with mode: 0644]	patch \| blob
roundup/backends/rdbms_common.py		patch \| blob \| history