summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 52bd1cc)
raw | patch | inline | side by side (parent: 52bd1cc)
author | stefan <stefan@57a73879-2fb5-44c3-a270-3262357dd7e2> | |
Tue, 24 Nov 2009 20:12:52 +0000 (20:12 +0000) | ||
committer | stefan <stefan@57a73879-2fb5-44c3-a270-3262357dd7e2> | |
Tue, 24 Nov 2009 20:12:52 +0000 (20:12 +0000) |
git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/roundup/trunk@4386 57a73879-2fb5-44c3-a270-3262357dd7e2
roundup/backends/indexer_rdbms.py | patch | blob | history |
index 83d91ae9f4ddf8c324d4f8589ff7433cd62bf45c..3155c0257daeee2fd655128244941357fb229917 100644 (file)
self.db.cursor.execute(sql, (id, ))
# ok, find all the unique words in the text
- text = unicode(text, "utf-8", "replace").upper()
- wordlist = [w.encode("utf-8")
- for w in re.findall(r'(?u)\b\w{%d,%d}\b'
- % (self.minlength, self.maxlength), text)]
+ def tryencode(str):
+ if not isinstance(str, unicode):
+ str = str.encode("utf-8", "replace")
+ return str
+ text = tryencode(text).upper()
+ wordlist = [tryencode(w)
+ for w in re.findall(r'(?u)\b\w{%d,%d}\b'
+ % (self.minlength, self.maxlength), text)]
words = set()
for word in wordlist:
if self.is_stopword(word): continue