Code

force sqlite3 in py2.6+ to treat our stored text as UTF-8
authorrichard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>
Thu, 12 Mar 2009 04:29:16 +0000 (04:29 +0000)
committerrichard <richard@57a73879-2fb5-44c3-a270-3262357dd7e2>
Thu, 12 Mar 2009 04:29:16 +0000 (04:29 +0000)
git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/roundup/trunk@4183 57a73879-2fb5-44c3-a270-3262357dd7e2

roundup/anypy/TODO.txt
roundup/backends/back_sqlite.py
roundup/backends/indexer_rdbms.py

index 12103ae167013e738b65eb22df795bf4d0068f2d..028058e622be3fb0d2f946470b0dfd4efd829c3f 100644 (file)
@@ -22,6 +22,4 @@ Python compatiblity TODO
   MimeWriter.MimeWrite.addheader
   -> email.Message.Message.add_header     (2.3)
 
-- test.test_sqlite.sqliteDBTest.testStringUnicode fails
-
 # vim: si
index 90a5190a95e8283d4d4184a6cd858a76414e58ed..c0447810777be863b1de136aa2c4761fa1723b29 100644 (file)
@@ -108,6 +108,13 @@ class Database(rdbms_common.Database):
         else:
             conn = sqlite.connect(db, timeout=30)
             conn.row_factory = sqlite.Row
+
+        # sqlite3 wants us to store Unicode in the db but that's not what's
+        # been done historically and it's definitely not what the other
+        # backends do, so we'll stick with UTF-8
+        if sqlite_version == 3:
+            conn.text_factory = str
+
         cursor = conn.cursor()
         return (conn, cursor)
 
index b992c70d09a0f86c9d0e7beb70c5f19b5e9033eb..3e4a7de78987e2af8dab931d17507fd7c5f04bee 100644 (file)
@@ -65,8 +65,8 @@ class Indexer(IndexerBase):
 
         # ok, find all the unique words in the text
         text = unicode(text, "utf-8", "replace").upper()
-        wordlist = [w.encode("utf-8", "replace")
-                for w in re.findall(r'(?u)\b\w{2,25}\b', text)]
+        wordlist = [w.encode("utf-8")
+            for w in re.findall(r'(?u)\b\w{2,25}\b', text)]
         words = set()
         for word in wordlist:
             if self.is_stopword(word): continue