Fix Issue2550609.

author stefan <stefan@57a73879-2fb5-44c3-a270-3262357dd7e2>

Tue, 24 Nov 2009 20:12:52 +0000 (20:12 +0000)

committer stefan <stefan@57a73879-2fb5-44c3-a270-3262357dd7e2>

Tue, 24 Nov 2009 20:12:52 +0000 (20:12 +0000)
author stefan <stefan@57a73879-2fb5-44c3-a270-3262357dd7e2>
Tue, 24 Nov 2009 20:12:52 +0000 (20:12 +0000)
committer stefan <stefan@57a73879-2fb5-44c3-a270-3262357dd7e2>
Tue, 24 Nov 2009 20:12:52 +0000 (20:12 +0000)
diff --git a/roundup/backends/indexer_rdbms.py b/roundup/backends/indexer_rdbms.py

index 83d91ae9f4ddf8c324d4f8589ff7433cd62bf45c..3155c0257daeee2fd655128244941357fb229917 100644 (file)
--- a/roundup/backends/indexer_rdbms.py
+++ b/roundup/backends/indexer_rdbms.py
@@ -64,10 +64,14 @@ class Indexer(IndexerBase):
              self.db.cursor.execute(sql, (id, ))
  
          # ok, find all the unique words in the text
-        text = unicode(text, "utf-8", "replace").upper()
-        wordlist = [w.encode("utf-8")
-            for w in re.findall(r'(?u)\b\w{%d,%d}\b'
-                                % (self.minlength, self.maxlength), text)]
+        def tryencode(str):
+            if not isinstance(str, unicode):
+                str = str.encode("utf-8", "replace")
+            return str
+        text = tryencode(text).upper()
+        wordlist = [tryencode(w)
+                    for w in re.findall(r'(?u)\b\w{%d,%d}\b'
+                                        % (self.minlength, self.maxlength), text)]
          words = set()
          for word in wordlist:
              if self.is_stopword(word): continue
author	stefan <stefan@57a73879-2fb5-44c3-a270-3262357dd7e2>
	Tue, 24 Nov 2009 20:12:52 +0000 (20:12 +0000)
committer	stefan <stefan@57a73879-2fb5-44c3-a270-3262357dd7e2>
	Tue, 24 Nov 2009 20:12:52 +0000 (20:12 +0000)