From a995573a382c85a9438c40cb1962db72fd28ca3f Mon Sep 17 00:00:00 2001 From: richard Date: Thu, 12 Mar 2009 04:29:16 +0000 Subject: [PATCH] force sqlite3 in py2.6+ to treat our stored text as UTF-8 git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/roundup/trunk@4183 57a73879-2fb5-44c3-a270-3262357dd7e2 --- roundup/anypy/TODO.txt | 2 -- roundup/backends/back_sqlite.py | 7 +++++++ roundup/backends/indexer_rdbms.py | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/roundup/anypy/TODO.txt b/roundup/anypy/TODO.txt index 12103ae..028058e 100644 --- a/roundup/anypy/TODO.txt +++ b/roundup/anypy/TODO.txt @@ -22,6 +22,4 @@ Python compatiblity TODO MimeWriter.MimeWrite.addheader -> email.Message.Message.add_header (2.3) -- test.test_sqlite.sqliteDBTest.testStringUnicode fails - # vim: si diff --git a/roundup/backends/back_sqlite.py b/roundup/backends/back_sqlite.py index 90a5190..c044781 100644 --- a/roundup/backends/back_sqlite.py +++ b/roundup/backends/back_sqlite.py @@ -108,6 +108,13 @@ class Database(rdbms_common.Database): else: conn = sqlite.connect(db, timeout=30) conn.row_factory = sqlite.Row + + # sqlite3 wants us to store Unicode in the db but that's not what's + # been done historically and it's definitely not what the other + # backends do, so we'll stick with UTF-8 + if sqlite_version == 3: + conn.text_factory = str + cursor = conn.cursor() return (conn, cursor) diff --git a/roundup/backends/indexer_rdbms.py b/roundup/backends/indexer_rdbms.py index b992c70..3e4a7de 100644 --- a/roundup/backends/indexer_rdbms.py +++ b/roundup/backends/indexer_rdbms.py @@ -65,8 +65,8 @@ class Indexer(IndexerBase): # ok, find all the unique words in the text text = unicode(text, "utf-8", "replace").upper() - wordlist = [w.encode("utf-8", "replace") - for w in re.findall(r'(?u)\b\w{2,25}\b', text)] + wordlist = [w.encode("utf-8") + for w in re.findall(r'(?u)\b\w{2,25}\b', text)] words = set() for word in wordlist: if self.is_stopword(word): continue -- 2.30.2