summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 7d26165)
raw | patch | inline | side by side (parent: 7d26165)
author | jlgijsbers <jlgijsbers@57a73879-2fb5-44c3-a270-3262357dd7e2> | |
Sat, 17 Jan 2004 13:49:06 +0000 (13:49 +0000) | ||
committer | jlgijsbers <jlgijsbers@57a73879-2fb5-44c3-a270-3262357dd7e2> | |
Sat, 17 Jan 2004 13:49:06 +0000 (13:49 +0000) |
git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/trunk@2043 57a73879-2fb5-44c3-a270-3262357dd7e2
CHANGES.txt | patch | blob | history | |
roundup/mailgw.py | patch | blob | history | |
test/test_multipart.py | patch | blob | history |
diff --git a/CHANGES.txt b/CHANGES.txt
index 4bca3e6b070172f5f529d9c269e2d2fb2246c02e..0de8a88ba4687a50b8c9bec8fd3d44bea3a71770 100644 (file)
--- a/CHANGES.txt
+++ b/CHANGES.txt
- allowed negative ids (ie. new item markers) in HTMLClass.getItem,
allowing "db/file_with_status/-1/status/menu" to generate a useful
widget
+- The mail gateway now searches recursively for the text/plain and the
+ attachments of a message (sf bug 841241).
Cleanup:
- replace curuserid attribute on Database with the extended getuid() method.
diff --git a/roundup/mailgw.py b/roundup/mailgw.py
index af2cde4628176225ac69a1eda6b94881f38a6d96..c76706c4f586414e459e1aa23fcb0a0720e77b0b 100644 (file)
--- a/roundup/mailgw.py
+++ b/roundup/mailgw.py
an exception, the original message is bounced back to the sender with the
explanatory message given in the exception.
-$Id: mailgw.py,v 1.140 2003-12-19 01:50:19 richard Exp $
+$Id: mailgw.py,v 1.141 2004-01-17 13:49:06 jlgijsbers Exp $
"""
import string, re, os, mimetools, cStringIO, smtplib, socket, binascii, quopri
''' subclass mimetools.Message so we can retrieve the parts of the
message...
'''
- def getPart(self):
+ def getpart(self):
''' Get a single part of a multipart message and return it as a new
Message instance.
'''
s.seek(0)
return Message(s)
+ def getparts(self):
+ """Get all parts of this multipart message."""
+ # skip over the intro to the first boundary
+ self.getpart()
+
+ # accumulate the other parts
+ parts = []
+ while 1:
+ part = self.getpart()
+ if part is None:
+ break
+ parts.append(part)
+ return parts
+
def getheader(self, name, default=None):
hdr = mimetools.Message.getheader(self, name, default)
if hdr:
hdr = hdr.replace('\n','') # Inserted by rfc822.readheaders
return rfc2822.decode_header(hdr)
-
+
+ def getname(self):
+ """Find an appropriate name for this message."""
+ if self.gettype() == 'message/rfc822':
+ # handle message/rfc822 specially - the name should be
+ # the subject of the actual e-mail embedded here
+ self.fp.seek(0)
+ name = Message(self.fp).getheader('subject')
+ else:
+ # try name on Content-Type
+ name = self.getparam('name')
+ if not name:
+ disp = self.getheader('content-disposition', None)
+ if disp:
+ name = getparam(disp, 'filename')
+
+ if name:
+ return name.strip()
+
+ def getbody(self):
+ """Get the decoded message body."""
+ self.rewindbody()
+ encoding = self.getencoding()
+ data = None
+ if encoding == 'base64':
+ # BUG: is base64 really used for text encoding or
+ # are we inserting zip files here.
+ data = binascii.a2b_base64(self.fp.read())
+ elif encoding == 'quoted-printable':
+ # the quopri module wants to work with files
+ decoded = cStringIO.StringIO()
+ quopri.decode(self.fp, decoded)
+ data = decoded.getvalue()
+ elif encoding == 'uuencoded':
+ data = binascii.a2b_uu(self.fp.read())
+ else:
+ # take it as text
+ data = self.fp.read()
+
+ # Encode message to unicode
+ charset = rfc2822.unaliasCharset(self.getparam("charset"))
+ if charset:
+ # Do conversion only if charset specified
+ edata = unicode(data, charset).encode('utf-8')
+ # Convert from dos eol to unix
+ edata = edata.replace('\r\n', '\n')
+ else:
+ # Leave message content as is
+ edata = data
+
+ return edata
+
+ # General multipart handling:
+ # Take the first text/plain part, anything else is considered an
+ # attachment.
+ # multipart/mixed: multiple "unrelated" parts.
+ # multipart/signed (rfc 1847):
+ # The control information is carried in the second of the two
+ # required body parts.
+ # ACTION: Default, so if content is text/plain we get it.
+ # multipart/encrypted (rfc 1847):
+ # The control information is carried in the first of the two
+ # required body parts.
+ # ACTION: Not handleable as the content is encrypted.
+ # multipart/related (rfc 1872, 2112, 2387):
+ # The Multipart/Related content-type addresses the MIME
+ # representation of compound objects.
+ # ACTION: Default. If we are lucky there is a text/plain.
+ # TODO: One should use the start part and look for an Alternative
+ # that is text/plain.
+ # multipart/Alternative (rfc 1872, 1892):
+ # only in "related" ?
+ # multipart/report (rfc 1892):
+ # e.g. mail system delivery status reports.
+ # ACTION: Default. Could be ignored or used for Delivery Notification
+ # flagging.
+ # multipart/form-data:
+ # For web forms only.
+
+ def extract_content(self, parent_type=None):
+ """Extract the body and the attachments recursively."""
+ content_type = self.gettype()
+ content = None
+ attachments = []
+
+ if content_type == 'text/plain':
+ content = self.getbody()
+ elif content_type[:10] == 'multipart/':
+ for part in self.getparts():
+ new_content, new_attach = part.extract_content(content_type)
+
+ # If we haven't found a text/plain part yet, take this one,
+ # otherwise make it an attachment.
+ if not content:
+ content = new_content
+ elif new_content:
+ attachments.append(part.as_attachment())
+
+ attachments.extend(new_attach)
+ elif (parent_type == 'multipart/signed' and
+ content_type == 'application/pgp-signature'):
+ # ignore it so it won't be saved as an attachment
+ pass
+ else:
+ attachments.append(self.as_attachment())
+ return content, attachments
+
+ def as_attachment(self):
+ """Return this message as an attachment."""
+ return (self.getname(), self.gettype(), self.getbody())
+
class MailGW:
# Matches subjects like:
m.append(s.getvalue())
self.mailer.bounce_message(message, sendto, m)
- def get_part_data_decoded(self,part):
- encoding = part.getencoding()
- data = None
- if encoding == 'base64':
- # BUG: is base64 really used for text encoding or
- # are we inserting zip files here.
- data = binascii.a2b_base64(part.fp.read())
- elif encoding == 'quoted-printable':
- # the quopri module wants to work with files
- decoded = cStringIO.StringIO()
- quopri.decode(part.fp, decoded)
- data = decoded.getvalue()
- elif encoding == 'uuencoded':
- data = binascii.a2b_uu(part.fp.read())
- else:
- # take it as text
- data = part.fp.read()
-
- # Encode message to unicode
- charset = rfc2822.unaliasCharset(part.getparam("charset"))
- if charset:
- # Do conversion only if charset specified
- edata = unicode(data, charset).encode('utf-8')
- # Convert from dos eol to unix
- edata = edata.replace('\r\n', '\n')
- else:
- # Leave message content as is
- edata = data
-
- return edata
-
def handle_message(self, message):
''' message - a Message instance
messageid = "<%s.%s.%s%s@%s>"%(time.time(), random.random(),
classname, nodeid, self.instance.config.MAIL_DOMAIN)
- #
# now handle the body - find the message
- #
- content_type = message.gettype()
- attachments = []
- # General multipart handling:
- # Take the first text/plain part, anything else is considered an
- # attachment.
- # multipart/mixed: multiple "unrelated" parts.
- # multipart/signed (rfc 1847):
- # The control information is carried in the second of the two
- # required body parts.
- # ACTION: Default, so if content is text/plain we get it.
- # multipart/encrypted (rfc 1847):
- # The control information is carried in the first of the two
- # required body parts.
- # ACTION: Not handleable as the content is encrypted.
- # multipart/related (rfc 1872, 2112, 2387):
- # The Multipart/Related content-type addresses the MIME
- # representation of compound objects.
- # ACTION: Default. If we are lucky there is a text/plain.
- # TODO: One should use the start part and look for an Alternative
- # that is text/plain.
- # multipart/Alternative (rfc 1872, 1892):
- # only in "related" ?
- # multipart/report (rfc 1892):
- # e.g. mail system delivery status reports.
- # ACTION: Default. Could be ignored or used for Delivery Notification
- # flagging.
- # multipart/form-data:
- # For web forms only.
- if content_type == 'multipart/mixed':
- # skip over the intro to the first boundary
- part = message.getPart()
- content = None
- while 1:
- # get the next part
- part = message.getPart()
- if part is None:
- break
- # parse it
- subtype = part.gettype()
- if subtype == 'text/plain' and not content:
- # The first text/plain part is the message content.
- content = self.get_part_data_decoded(part)
- elif subtype == 'message/rfc822':
- # handle message/rfc822 specially - the name should be
- # the subject of the actual e-mail embedded here
- i = part.fp.tell()
- mailmess = Message(part.fp)
- name = mailmess.getheader('subject')
- part.fp.seek(i)
- attachments.append((name, 'message/rfc822', part.fp.read()))
- elif subtype == 'multipart/alternative':
- # Search for text/plain in message with attachment and
- # alternative text representation
- # skip over intro to first boundary
- part.getPart()
- while 1:
- # get the next part
- subpart = part.getPart()
- if subpart is None:
- break
- # parse it
- if subpart.gettype() == 'text/plain' and not content:
- content = self.get_part_data_decoded(subpart)
- else:
- # try name on Content-Type
- name = part.getparam('name')
- if name:
- name = name.strip()
- if not name:
- disp = part.getheader('content-disposition', None)
- if disp:
- name = getparam(disp, 'filename')
- if name:
- name = name.strip()
- # this is just an attachment
- data = self.get_part_data_decoded(part)
- attachments.append((name, part.gettype(), data))
- if content is None:
- raise MailUsageError, '''
-Roundup requires the submission to be plain text. The message parser could
-not find a text/plain part to use.
-'''
-
- elif content_type[:10] == 'multipart/':
- # skip over the intro to the first boundary
- message.getPart()
- content = None
- while 1:
- # get the next part
- part = message.getPart()
- if part is None:
- break
- # parse it
- if part.gettype() == 'text/plain' and not content:
- content = self.get_part_data_decoded(part)
- if content is None:
- raise MailUsageError, '''
-Roundup requires the submission to be plain text. The message parser could
-not find a text/plain part to use.
-'''
-
- elif content_type != 'text/plain':
+ content, attachments = message.extract_content()
+ if content is None:
raise MailUsageError, '''
Roundup requires the submission to be plain text. The message parser could
not find a text/plain part to use.
'''
-
- else:
- content = self.get_part_data_decoded(message)
# figure how much we should muck around with the email body
keep_citations = getattr(self.instance.config, 'EMAIL_KEEP_QUOTED_TEXT',
diff --git a/test/test_multipart.py b/test/test_multipart.py
index 9022d8674ee9b6b176f75eff58862af73703fca0..99e0bf40427c5eea920b0dc8c08f9ba951c862fe 100644 (file)
--- a/test/test_multipart.py
+++ b/test/test_multipart.py
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#
-# $Id: test_multipart.py,v 1.6 2003-10-25 22:53:26 richard Exp $
+# $Id: test_multipart.py,v 1.7 2004-01-17 13:49:06 jlgijsbers Exp $
-import unittest, cStringIO
+import unittest
+from cStringIO import StringIO
from roundup.mailgw import Message
+class TestMessage(Message):
+ table = {'multipart/signed': ' boundary="boundary-%(indent)s";\n',
+ 'multipart/mixed': ' boundary="boundary-%(indent)s";\n',
+ 'multipart/alternative': ' boundary="boundary-%(indent)s";\n',
+ 'text/plain': ' name="foo.txt"\nfoo\n',
+ 'application/pgp-signature': ' name="foo.gpg"\nfoo\n',
+ 'application/pdf': ' name="foo.pdf"\nfoo\n',
+ 'message/rfc822': 'Subject: foo\n\nfoo\n'}
+
+ def __init__(self, spec):
+ """Create a basic MIME message according to 'spec'.
+
+ Each line of a spec has one content-type, which is optionally indented.
+ The indentation signifies how deep in the MIME hierarchy the
+ content-type is.
+
+ """
+ parts = []
+ for line in spec.splitlines():
+ content_type = line.strip()
+ if not content_type:
+ continue
+
+ indent = self.getIndent(line)
+ if indent:
+ parts.append('--boundary-%s\n' % indent)
+ parts.append('Content-type: %s;\n' % content_type)
+ parts.append(self.table[content_type] % {'indent': indent + 1})
+
+ Message.__init__(self, StringIO(''.join(parts)))
+
+ def getIndent(self, line):
+ """Get the current line's indentation, using four-space indents."""
+ count = 0
+ for char in line:
+ if char != ' ':
+ break
+ count += 1
+ return count / 4
+
class MultipartTestCase(unittest.TestCase):
def setUp(self):
- self.fp = cStringIO.StringIO()
+ self.fp = StringIO()
w = self.fp.write
w('Content-Type: multipart/mixed; boundary="foo"\r\n\r\n')
w('This is a multipart message. Ignore this bit.\r\n')
self.assert_(m is not None)
# skip the first bit
- p = m.getPart()
+ p = m.getpart()
self.assert_(p is not None)
self.assertEqual(p.fp.read(),
'This is a multipart message. Ignore this bit.\r\n')
# first text/plain
- p = m.getPart()
+ p = m.getpart()
self.assert_(p is not None)
self.assertEqual(p.gettype(), 'text/plain')
self.assertEqual(p.fp.read(),
'Hello, world!\r\n\r\nBlah blah\r\nfoo\r\n-foo\r\n')
# sub-multipart
- p = m.getPart()
+ p = m.getpart()
self.assert_(p is not None)
self.assertEqual(p.gettype(), 'multipart/alternative')
# sub-multipart text/plain
- q = p.getPart()
+ q = p.getpart()
self.assert_(q is not None)
- q = p.getPart()
+ q = p.getpart()
self.assert_(q is not None)
self.assertEqual(q.gettype(), 'text/plain')
self.assertEqual(q.fp.read(), 'Hello, world!\r\n\r\nBlah blah\r\n')
# sub-multipart text/html
- q = p.getPart()
+ q = p.getpart()
self.assert_(q is not None)
self.assertEqual(q.gettype(), 'text/html')
self.assertEqual(q.fp.read(), '<b>Hello, world!</b>\r\n')
# sub-multipart end
- q = p.getPart()
+ q = p.getpart()
self.assert_(q is None)
# final text/plain
- p = m.getPart()
+ p = m.getpart()
self.assert_(p is not None)
self.assertEqual(p.gettype(), 'text/plain')
self.assertEqual(p.fp.read(),
'Last bit\n')
# end
- p = m.getPart()
+ p = m.getpart()
self.assert_(p is None)
+ def TestExtraction(self, spec, expected):
+ self.assertEqual(TestMessage(spec).extract_content(), expected)
+
+ def testTextPlain(self):
+ self.TestExtraction('text/plain', ('foo\n', []))
+
+ def testAttachedTextPlain(self):
+ self.TestExtraction("""
+multipart/mixed
+ text/plain
+ text/plain""",
+ ('foo\n',
+ [('foo.txt', 'text/plain', 'foo\n')]))
+
+ def testMultipartMixed(self):
+ self.TestExtraction("""
+multipart/mixed
+ text/plain
+ application/pdf""",
+ ('foo\n',
+ [('foo.pdf', 'application/pdf', 'foo\n')]))
+
+ def testMultipartAlternative(self):
+ self.TestExtraction("""
+multipart/alternative
+ text/plain
+ application/pdf
+""", ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')]))
+
+ def testDeepMultipartAlternative(self):
+ self.TestExtraction("""
+multipart/mixed
+ multipart/alternative
+ text/plain
+ application/pdf
+""", ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')]))
+
+ def testSignedText(self):
+ self.TestExtraction("""
+multipart/signed
+ text/plain
+ application/pgp-signature""", ('foo\n', []))
+
+ def testSignedAttachments(self):
+ self.TestExtraction("""
+multipart/signed
+ multipart/mixed
+ text/plain
+ application/pdf
+ application/pgp-signature""",
+ ('foo\n',
+ [('foo.pdf', 'application/pdf', 'foo\n')]))
+
+ def testAttachedSignature(self):
+ self.TestExtraction("""
+multipart/mixed
+ text/plain
+ application/pgp-signature""",
+ ('foo\n',
+ [('foo.gpg', 'application/pgp-signature', 'foo\n')]))
+
+ def testMessageRfc822(self):
+ self.TestExtraction("""
+multipart/mixed
+ message/rfc822""",
+ (None,
+ [('foo', 'message/rfc822', 'foo\n')]))
+
def test_suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(MultipartTestCase))