From: kedder Date: Wed, 15 Jan 2003 22:17:20 +0000 (+0000) Subject: applied unicode patch X-Git-Url: https://git.tokkee.org/?a=commitdiff_plain;h=7eb0f490663b4aa11644b9878451911910daec24;p=roundup.git applied unicode patch git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/trunk@1460 57a73879-2fb5-44c3-a270-3262357dd7e2 --- diff --git a/CHANGES.txt b/CHANGES.txt index d52a001..cf47373 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -15,6 +15,9 @@ are given with the most recent entry first. - fix StringHTMLProperty hyperlinking - added mysql backend - fixes to CGI form handling (NEEDS BACKPORTING TO 0.5) +- applied unicode patch. All data is stored in utf-8. Incoming messages + converted from any encoding to utf-8, outgoing messages are encoded + according to rfc2822 (sf bug 568873) 2003-??-?? 0.5.5 diff --git a/roundup/backends/back_anydbm.py b/roundup/backends/back_anydbm.py index 14b20f9..2e1bd9b 100644 --- a/roundup/backends/back_anydbm.py +++ b/roundup/backends/back_anydbm.py @@ -15,7 +15,7 @@ # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE, # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # -#$Id: back_anydbm.py,v 1.96 2003-01-08 05:39:40 richard Exp $ +#$Id: back_anydbm.py,v 1.97 2003-01-15 22:17:19 kedder Exp $ ''' This module defines a backend that saves the hyperdatabase in a database chosen by anydbm. It is guaranteed to always be available in python @@ -847,7 +847,7 @@ class Class(hyperdb.Class): (self.classname, newid, key)) elif isinstance(prop, String): - if type(value) != type(''): + if type(value) != type('') and type(value) != type(u''): raise TypeError, 'new property "%s" not a string'%key elif isinstance(prop, Password): @@ -1244,7 +1244,7 @@ class Class(hyperdb.Class): journalvalues[propname] = tuple(l) elif isinstance(prop, String): - if value is not None and type(value) != type(''): + if value is not None and type(value) != type('') and type(value) != type(u''): raise TypeError, 'new property "%s" not a string'%propname elif isinstance(prop, Password): diff --git a/roundup/backends/rdbms_common.py b/roundup/backends/rdbms_common.py index 550b51b..30fd371 100644 --- a/roundup/backends/rdbms_common.py +++ b/roundup/backends/rdbms_common.py @@ -1,4 +1,4 @@ -# $Id: rdbms_common.py,v 1.28 2003-01-12 23:53:20 richard Exp $ +# $Id: rdbms_common.py,v 1.29 2003-01-15 22:17:19 kedder Exp $ ''' Relational database (SQL) backend common code. Basics: @@ -1070,7 +1070,7 @@ class Class(hyperdb.Class): (self.classname, newid, key)) elif isinstance(prop, String): - if type(value) != type(''): + if type(value) != type('') and type(value) != type(u''): raise TypeError, 'new property "%s" not a string'%key elif isinstance(prop, Password): @@ -1432,7 +1432,7 @@ class Class(hyperdb.Class): journalvalues[propname] = tuple(l) elif isinstance(prop, String): - if value is not None and type(value) != type(''): + if value is not None and type(value) != type('') and type(value) != type(u''): raise TypeError, 'new property "%s" not a string'%propname elif isinstance(prop, Password): diff --git a/roundup/mailgw.py b/roundup/mailgw.py index b9dd886..a12f909 100644 --- a/roundup/mailgw.py +++ b/roundup/mailgw.py @@ -73,7 +73,7 @@ are calling the create() method to create a new node). If an auditor raises an exception, the original message is bounced back to the sender with the explanatory message given in the exception. -$Id: mailgw.py,v 1.106 2003-01-12 00:03:10 richard Exp $ +$Id: mailgw.py,v 1.107 2003-01-15 22:17:19 kedder Exp $ ''' import string, re, os, mimetools, cStringIO, smtplib, socket, binascii, quopri @@ -81,6 +81,8 @@ import time, random, sys import traceback, MimeWriter import hyperdb, date, password +import rfc2822 + SENDMAILDEBUG = os.environ.get('SENDMAILDEBUG', '') class MailGWError(ValueError): @@ -134,6 +136,10 @@ class Message(mimetools.Message): s.seek(0) return Message(s) + def getheader(self, name, default=None): + hdr = mimetools.Message.getheader(self, name, default) + return rfc2822.decode_header(hdr) + subject_re = re.compile(r'(?P\s*\W?\s*(fw|fwd|re|aw)\W\s*)*' r'\s*(?P")?(\[(?P[^\d\s]+)(?P\d+)?\])?' r'\s*(?P[^[]+)?"?(\[(?P<args>.+?)\])?', re.I) @@ -339,7 +345,7 @@ class MailGW: writer.addheader('MIME-Version', '1.0') part = writer.startmultipartbody('mixed') part = writer.nextpart() - body = part.startbody('text/plain') + body = part.startbody('text/plain; charset=utf-8') body.write('\n'.join(error)) # attach the original message to the returned message @@ -377,7 +383,19 @@ class MailGW: else: # take it as text data = part.fp.read() - return data + + # Encode message to unicode + charset = rfc2822.unaliasCharset(part.getparam("charset")) + if charset: + # Do conversion only if charset specified + edata = unicode(data, charset).encode('utf-8') + # Convert from dos eol to unix + edata = edata.replace('\r\n', '\n') + else: + # Leave message content as is + edata = data + + return edata def handle_message(self, message): ''' message - a Message instance diff --git a/roundup/rfc2822.py b/roundup/rfc2822.py new file mode 100644 index 0000000..7cee715 --- /dev/null +++ b/roundup/rfc2822.py @@ -0,0 +1,160 @@ +import re +from binascii import b2a_base64, a2b_base64 + +ecre = re.compile(r''' + =\? # literal =? + (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset + \? # literal ? + (?P<encoding>[qb]) # either a "q" or a "b", case insensitive + \? # literal ? + (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string + \?= # literal ?= + ''', re.VERBOSE | re.IGNORECASE) + +hqre = re.compile(r'^[-a-zA-Z0-9!*+/\[\]., ]+$') + +def base64_decode(s, convert_eols=None): + """Decode a raw base64 string. + + If convert_eols is set to a string value, all canonical email linefeeds, + e.g. "\\r\\n", in the decoded text will be converted to the value of + convert_eols. os.linesep is a good choice for convert_eols if you are + decoding a text attachment. + + This function does not parse a full MIME header value encoded with + base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high + level email.Header class for that functionality. + + Taken from 'email' module + """ + if not s: + return s + + dec = a2b_base64(s) + if convert_eols: + return dec.replace(CRLF, convert_eols) + return dec + +def unquote_match(match): + """Turn a match in the form =AB to the ASCII character with value 0xab + + Taken from 'email' module + """ + s = match.group(0) + return chr(int(s[1:3], 16)) + +def qp_decode(s): + """Decode a string encoded with RFC 2045 MIME header `Q' encoding. + + This function does not parse a full MIME header value encoded with + quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use + the high level email.Header class for that functionality. + + Taken from 'email' module + """ + s = s.replace('_', ' ') + return re.sub(r'=\w{2}', unquote_match, s) + +def _decode_header(header): + """Decode a message header value without converting charset. + + Returns a list of (decoded_string, charset) pairs containing each of the + decoded parts of the header. Charset is None for non-encoded parts of the + header, otherwise a lower-case string containing the name of the character + set specified in the encoded string. + + Taken from 'email' module + """ + # If no encoding, just return the header + header = str(header) + if not ecre.search(header): + return [(header, None)] + + decoded = [] + dec = '' + for line in header.splitlines(): + # This line might not have an encoding in it + if not ecre.search(line): + decoded.append((line, None)) + continue + + parts = ecre.split(line) + while parts: + unenc = parts.pop(0) + if unenc: + if unenc.strip(): + decoded.append((unenc, None)) + if parts: + charset, encoding = [s.lower() for s in parts[0:2]] + encoded = parts[2] + dec = '' + if encoding == 'q': + dec = qp_decode(encoded) + elif encoding == 'b': + dec = base64_decode(encoded) + else: + dec = encoded + + if decoded and decoded[-1][1] == charset: + decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1]) + else: + decoded.append((dec, charset)) + del parts[0:3] + return decoded + +def decode_header(hdr): + """ Decodes rfc2822 encoded header and return utf-8 encoded string + """ + if not hdr: + return None + outs = u"" + for section in _decode_header(hdr): + charset = unaliasCharset(section[1]) + outs += unicode(section[0], charset or 'iso-8859-1', 'replace') + return outs.encode('utf-8') + +def encode_header(header): + """ Will encode in quoted-printable encoding only if header + contains non latin characters + """ + + # Return empty headers unchanged + if not header: + return header + + global hqre + # return plain header if it is not contains non-ascii characters + if hqre.match(header): + return header + + charset = 'utf-8' + quoted = '' + #max_encoded = 76 - len(charset) - 7 + for c in header: + # Space may be represented as _ instead of =20 for readability + if c == ' ': + quoted += '_' + # These characters can be included verbatim + elif hqre.match(c): + quoted += c + # Otherwise, replace with hex value like =E2 + else: + quoted += "=%02X" % ord(c) + plain = 0 + + return '=?%s?q?%s?=' % (charset, quoted) + +def unaliasCharset(charset): + if charset: + return charset.lower().replace("windows-", 'cp') + #return charset_table.get(charset.lower(), charset) + return None + +def test(): + print encode_header("Contrary, Mary") + #print unaliasCharset('Windows-1251') + +if __name__ == '__main__': + test() + +# vim: et diff --git a/roundup/roundupdb.py b/roundup/roundupdb.py index a45ec16..4b3761a 100644 --- a/roundup/roundupdb.py +++ b/roundup/roundupdb.py @@ -15,7 +15,7 @@ # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE, # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # -# $Id: roundupdb.py,v 1.77 2003-01-14 22:19:27 richard Exp $ +# $Id: roundupdb.py,v 1.78 2003-01-15 22:17:19 kedder Exp $ __doc__ = """ Extending hyperdb with types specific to issue-tracking. @@ -24,6 +24,9 @@ Extending hyperdb with types specific to issue-tracking. import re, os, smtplib, socket, time, random import MimeWriter, cStringIO import base64, quopri, mimetypes + +from rfc2822 import encode_header + # if available, use the 'email' module, otherwise fallback to 'rfc822' try : from email.Utils import formataddr as straddr @@ -243,9 +246,10 @@ class IssueClass: # create the message message = cStringIO.StringIO() writer = MimeWriter.MimeWriter(message) - writer.addheader('Subject', '[%s%s] %s'%(cn, nodeid, title)) + writer.addheader('Subject', '[%s%s] %s'%(cn, nodeid, encode_header(title))) writer.addheader('To', ', '.join(sendto)) - writer.addheader('From', straddr((authname + from_tag, from_address))) + writer.addheader('From', straddr((encode_header(authname) + + from_tag, from_address))) writer.addheader('Reply-To', straddr((self.db.config.TRACKER_NAME, from_address))) writer.addheader('Date', time.strftime("%a, %d %b %Y %H:%M:%S +0000", @@ -267,7 +271,7 @@ class IssueClass: part = writer.startmultipartbody('mixed') part = writer.nextpart() part.addheader('Content-Transfer-Encoding', 'quoted-printable') - body = part.startbody('text/plain') + body = part.startbody('text/plain; charset=utf-8') body.write(content_encoded) for fileid in message_files: name = files.get(fileid, 'name') @@ -295,7 +299,7 @@ class IssueClass: writer.lastpart() else: writer.addheader('Content-Transfer-Encoding', 'quoted-printable') - body = writer.startbody('text/plain') + body = writer.startbody('text/plain; charset=utf-8') body.write(content_encoded) # now try to send the message diff --git a/roundup/templates/classic/html/_generic.help b/roundup/templates/classic/html/_generic.help index 0197597..9cb1535 100644 --- a/roundup/templates/classic/html/_generic.help +++ b/roundup/templates/classic/html/_generic.help @@ -1,6 +1,7 @@ <html> <head> <link rel="stylesheet" type="text/css" href="_file/style.css"> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8;"> </head> <body class="body" marginwidth="0" marginheight="0"> diff --git a/roundup/templates/classic/html/page b/roundup/templates/classic/html/page index 8966ce6..033a379 100644 --- a/roundup/templates/classic/html/page +++ b/roundup/templates/classic/html/page @@ -1,6 +1,7 @@ <html metal:define-macro="icing"> <head> <title metal:define-slot="head_title">title goes here + diff --git a/roundup/templates/minimal/html/_generic.help b/roundup/templates/minimal/html/_generic.help index bced017..ff4c7a3 100644 --- a/roundup/templates/minimal/html/_generic.help +++ b/roundup/templates/minimal/html/_generic.help @@ -1,5 +1,6 @@ + diff --git a/roundup/templates/minimal/html/page b/roundup/templates/minimal/html/page index 3c139cf..219a52a 100644 --- a/roundup/templates/minimal/html/page +++ b/roundup/templates/minimal/html/page @@ -1,6 +1,7 @@ title goes here + diff --git a/test/test_mailgw.py b/test/test_mailgw.py index a680dfb..5fed002 100644 --- a/test/test_mailgw.py +++ b/test/test_mailgw.py @@ -8,7 +8,7 @@ # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # -# $Id: test_mailgw.py,v 1.37 2002-12-18 00:42:03 richard Exp $ +# $Id: test_mailgw.py,v 1.38 2003-01-15 22:17:20 kedder Exp $ import unittest, cStringIO, tempfile, os, shutil, errno, imp, sys, difflib @@ -197,7 +197,7 @@ This is a test submission of a new issue. self.compareStrings(open(os.environ['SENDMAILDEBUG']).read(), '''FROM: roundup-admin@your.tracker.email.domain.example TO: chef@bork.bork.bork, mary@test, richard@test -Content-Type: text/plain +Content-Type: text/plain; charset=utf-8 Subject: [issue1] Testing... To: chef@bork.bork.bork, mary@test, richard@test From: "Bork, Chef" @@ -253,7 +253,7 @@ This is a second followup self.compareStrings(open(os.environ['SENDMAILDEBUG']).read(), '''FROM: roundup-admin@your.tracker.email.domain.example TO: chef@bork.bork.bork, richard@test -Content-Type: text/plain +Content-Type: text/plain; charset=utf-8 Subject: [issue1] Testing... To: chef@bork.bork.bork, richard@test From: "Contrary, Mary" @@ -302,7 +302,7 @@ This is a followup self.compareStrings(open(os.environ['SENDMAILDEBUG']).read(), '''FROM: roundup-admin@your.tracker.email.domain.example TO: chef@bork.bork.bork, john@test, mary@test -Content-Type: text/plain +Content-Type: text/plain; charset=utf-8 Subject: [issue1] Testing... To: chef@bork.bork.bork, john@test, mary@test From: richard @@ -349,7 +349,7 @@ This is a followup self.compareStrings(open(os.environ['SENDMAILDEBUG']).read(), '''FROM: roundup-admin@your.tracker.email.domain.example TO: chef@bork.bork.bork, john@test, mary@test -Content-Type: text/plain +Content-Type: text/plain; charset=utf-8 Subject: [issue1] Testing... To: chef@bork.bork.bork, john@test, mary@test From: richard @@ -397,7 +397,7 @@ This is a followup self.compareStrings(open(os.environ['SENDMAILDEBUG']).read(), '''FROM: roundup-admin@your.tracker.email.domain.example TO: chef@bork.bork.bork, richard@test -Content-Type: text/plain +Content-Type: text/plain; charset=utf-8 Subject: [issue1] Testing... To: chef@bork.bork.bork, richard@test From: John Doe @@ -446,7 +446,7 @@ This is a followup self.compareStrings(open(os.environ['SENDMAILDEBUG']).read(), '''FROM: roundup-admin@your.tracker.email.domain.example TO: chef@bork.bork.bork -Content-Type: text/plain +Content-Type: text/plain; charset=utf-8 Subject: [issue1] Testing... To: chef@bork.bork.bork From: richard @@ -495,7 +495,7 @@ This is a followup self.compareStrings(open(os.environ['SENDMAILDEBUG']).read(), '''FROM: roundup-admin@your.tracker.email.domain.example TO: chef@bork.bork.bork, john@test, richard@test -Content-Type: text/plain +Content-Type: text/plain; charset=utf-8 Subject: [issue1] Testing... To: chef@bork.bork.bork, john@test, richard@test From: John Doe @@ -543,7 +543,7 @@ This is a followup self.compareStrings(open(os.environ['SENDMAILDEBUG']).read(), '''FROM: roundup-admin@your.tracker.email.domain.example TO: chef@bork.bork.bork, richard@test -Content-Type: text/plain +Content-Type: text/plain; charset=utf-8 Subject: [issue1] Testing... To: chef@bork.bork.bork, richard@test From: John Doe @@ -591,7 +591,7 @@ This is a followup self.compareStrings(open(os.environ['SENDMAILDEBUG']).read(), '''FROM: roundup-admin@your.tracker.email.domain.example TO: chef@bork.bork.bork -Content-Type: text/plain +Content-Type: text/plain; charset=utf-8 Subject: [issue1] Testing... To: chef@bork.bork.bork From: richard @@ -700,7 +700,7 @@ A message with encoding (encoded oe =F6) self.compareStrings(open(os.environ['SENDMAILDEBUG']).read(), '''FROM: roundup-admin@your.tracker.email.domain.example TO: chef@bork.bork.bork, richard@test -Content-Type: text/plain +Content-Type: text/plain; charset=utf-8 Subject: [issue1] Testing... To: chef@bork.bork.bork, richard@test From: "Contrary, Mary" @@ -715,7 +715,7 @@ Content-Transfer-Encoding: quoted-printable Contrary, Mary added the comment: -A message with encoding (encoded oe =F6) +A message with encoding (encoded oe =C3=B6) ---------- status: unread -> chatting @@ -755,7 +755,7 @@ A message with first part encoded (encoded oe =F6) self.compareStrings(open(os.environ['SENDMAILDEBUG']).read(), '''FROM: roundup-admin@your.tracker.email.domain.example TO: chef@bork.bork.bork, richard@test -Content-Type: text/plain +Content-Type: text/plain; charset=utf-8 Subject: [issue1] Testing... To: chef@bork.bork.bork, richard@test From: "Contrary, Mary" @@ -770,7 +770,7 @@ Content-Transfer-Encoding: quoted-printable Contrary, Mary added the comment: -A message with first part encoded (encoded oe =F6) +A message with first part encoded (encoded oe =C3=B6) ---------- status: unread -> chatting @@ -800,7 +800,7 @@ This is a followup self.compareStrings(open(os.environ['SENDMAILDEBUG']).read(), '''FROM: roundup-admin@your.tracker.email.domain.example TO: chef@bork.bork.bork -Content-Type: text/plain +Content-Type: text/plain; charset=utf-8 Subject: [issue1] Testing... To: chef@bork.bork.bork From: richard