From: stefan Date: Tue, 24 Nov 2009 20:12:52 +0000 (+0000) Subject: Fix Issue2550609. X-Git-Url: https://git.tokkee.org/?a=commitdiff_plain;h=7938787f80219ecbc959464086dcf65256e809c7;p=roundup.git Fix Issue2550609. git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/roundup/trunk@4386 57a73879-2fb5-44c3-a270-3262357dd7e2 --- diff --git a/roundup/backends/indexer_rdbms.py b/roundup/backends/indexer_rdbms.py index 83d91ae..3155c02 100644 --- a/roundup/backends/indexer_rdbms.py +++ b/roundup/backends/indexer_rdbms.py @@ -64,10 +64,14 @@ class Indexer(IndexerBase): self.db.cursor.execute(sql, (id, )) # ok, find all the unique words in the text - text = unicode(text, "utf-8", "replace").upper() - wordlist = [w.encode("utf-8") - for w in re.findall(r'(?u)\b\w{%d,%d}\b' - % (self.minlength, self.maxlength), text)] + def tryencode(str): + if not isinstance(str, unicode): + str = str.encode("utf-8", "replace") + return str + text = tryencode(text).upper() + wordlist = [tryencode(w) + for w in re.findall(r'(?u)\b\w{%d,%d}\b' + % (self.minlength, self.maxlength), text)] words = set() for word in wordlist: if self.is_stopword(word): continue