From 1b73c1c7e9031022781ee6b56750e1b647f72adf Mon Sep 17 00:00:00 2001
From: jlgijsbers <jlgijsbers@57a73879-2fb5-44c3-a270-3262357dd7e2>
Date: Sat, 17 Jan 2004 13:49:06 +0000
Subject: [PATCH] *** empty log message ***

git-svn-id: http://svn.roundup-tracker.org/svnroot/roundup/trunk@2043 57a73879-2fb5-44c3-a270-3262357dd7e2
---
 CHANGES.txt            |   2 +
 roundup/mailgw.py      | 270 ++++++++++++++++++++---------------------
 test/test_multipart.py | 133 ++++++++++++++++++--
 3 files changed, 252 insertions(+), 153 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 4bca3e6..0de8a88 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -47,6 +47,8 @@ Fixed:
 - allowed negative ids (ie. new item markers) in HTMLClass.getItem,
   allowing "db/file_with_status/-1/status/menu" to generate a useful
   widget
+- The mail gateway now searches recursively for the text/plain and the
+  attachments of a message (sf bug 841241).
 
 Cleanup:
 - replace curuserid attribute on Database with the extended getuid() method.
diff --git a/roundup/mailgw.py b/roundup/mailgw.py
index af2cde4..c76706c 100644
--- a/roundup/mailgw.py
+++ b/roundup/mailgw.py
@@ -73,7 +73,7 @@ are calling the create() method to create a new node). If an auditor raises
 an exception, the original message is bounced back to the sender with the
 explanatory message given in the exception. 
 
-$Id: mailgw.py,v 1.140 2003-12-19 01:50:19 richard Exp $
+$Id: mailgw.py,v 1.141 2004-01-17 13:49:06 jlgijsbers Exp $
 """
 
 import string, re, os, mimetools, cStringIO, smtplib, socket, binascii, quopri
@@ -145,7 +145,7 @@ class Message(mimetools.Message):
     ''' subclass mimetools.Message so we can retrieve the parts of the
         message...
     '''
-    def getPart(self):
+    def getpart(self):
         ''' Get a single part of a multipart message and return it as a new
             Message instance.
         '''
@@ -164,12 +164,136 @@ class Message(mimetools.Message):
         s.seek(0)
         return Message(s)
 
+    def getparts(self):
+        """Get all parts of this multipart message."""
+        # skip over the intro to the first boundary
+        self.getpart()
+
+        # accumulate the other parts
+        parts = []
+        while 1:
+            part = self.getpart()
+            if part is None:
+                break
+            parts.append(part)
+        return parts
+
     def getheader(self, name, default=None):
         hdr = mimetools.Message.getheader(self, name, default)
         if hdr:
             hdr = hdr.replace('\n','') # Inserted by rfc822.readheaders
         return rfc2822.decode_header(hdr)
- 
+
+    def getname(self):
+        """Find an appropriate name for this message."""
+        if self.gettype() == 'message/rfc822':
+            # handle message/rfc822 specially - the name should be
+            # the subject of the actual e-mail embedded here
+            self.fp.seek(0)
+            name = Message(self.fp).getheader('subject')
+        else:
+            # try name on Content-Type
+            name = self.getparam('name')
+            if not name:
+                disp = self.getheader('content-disposition', None)
+                if disp:
+                    name = getparam(disp, 'filename')
+
+        if name:
+            return name.strip()
+
+    def getbody(self):
+        """Get the decoded message body."""
+        self.rewindbody()
+        encoding = self.getencoding()
+        data = None
+        if encoding == 'base64':
+            # BUG: is base64 really used for text encoding or
+            # are we inserting zip files here. 
+            data = binascii.a2b_base64(self.fp.read())
+        elif encoding == 'quoted-printable':
+            # the quopri module wants to work with files
+            decoded = cStringIO.StringIO()
+            quopri.decode(self.fp, decoded)
+            data = decoded.getvalue()
+        elif encoding == 'uuencoded':
+            data = binascii.a2b_uu(self.fp.read())
+        else:
+            # take it as text
+            data = self.fp.read()
+        
+        # Encode message to unicode
+        charset = rfc2822.unaliasCharset(self.getparam("charset"))
+        if charset:
+            # Do conversion only if charset specified
+            edata = unicode(data, charset).encode('utf-8')
+            # Convert from dos eol to unix
+            edata = edata.replace('\r\n', '\n')
+        else:
+            # Leave message content as is
+            edata = data
+                
+        return edata
+
+    # General multipart handling:
+    #   Take the first text/plain part, anything else is considered an 
+    #   attachment.
+    # multipart/mixed: multiple "unrelated" parts.
+    # multipart/signed (rfc 1847): 
+    #   The control information is carried in the second of the two 
+    #   required body parts.
+    #   ACTION: Default, so if content is text/plain we get it.
+    # multipart/encrypted (rfc 1847): 
+    #   The control information is carried in the first of the two 
+    #   required body parts.
+    #   ACTION: Not handleable as the content is encrypted.
+    # multipart/related (rfc 1872, 2112, 2387):
+    #   The Multipart/Related content-type addresses the MIME
+    #   representation of compound objects.
+    #   ACTION: Default. If we are lucky there is a text/plain.
+    #   TODO: One should use the start part and look for an Alternative
+    #   that is text/plain.
+    # multipart/Alternative (rfc 1872, 1892):
+    #   only in "related" ?
+    # multipart/report (rfc 1892):
+    #   e.g. mail system delivery status reports.
+    #   ACTION: Default. Could be ignored or used for Delivery Notification 
+    #   flagging.
+    # multipart/form-data:
+    #   For web forms only.
+
+    def extract_content(self, parent_type=None):
+        """Extract the body and the attachments recursively."""
+        content_type = self.gettype()
+        content = None
+        attachments = []
+        
+        if content_type == 'text/plain':
+            content = self.getbody()
+        elif content_type[:10] == 'multipart/':
+            for part in self.getparts():
+                new_content, new_attach = part.extract_content(content_type)
+
+                # If we haven't found a text/plain part yet, take this one,
+                # otherwise make it an attachment.
+                if not content:
+                    content = new_content
+                elif new_content:
+                    attachments.append(part.as_attachment())
+                    
+                attachments.extend(new_attach)
+        elif (parent_type == 'multipart/signed' and
+              content_type == 'application/pgp-signature'):
+            # ignore it so it won't be saved as an attachment
+            pass
+        else:
+            attachments.append(self.as_attachment())
+        return content, attachments
+
+    def as_attachment(self):
+        """Return this message as an attachment."""
+        return (self.getname(), self.gettype(), self.getbody())
+
 class MailGW:
 
     # Matches subjects like:
@@ -371,37 +495,6 @@ class MailGW:
             m.append(s.getvalue())
             self.mailer.bounce_message(message, sendto, m)
 
-    def get_part_data_decoded(self,part):
-        encoding = part.getencoding()
-        data = None
-        if encoding == 'base64':
-            # BUG: is base64 really used for text encoding or
-            # are we inserting zip files here. 
-            data = binascii.a2b_base64(part.fp.read())
-        elif encoding == 'quoted-printable':
-            # the quopri module wants to work with files
-            decoded = cStringIO.StringIO()
-            quopri.decode(part.fp, decoded)
-            data = decoded.getvalue()
-        elif encoding == 'uuencoded':
-            data = binascii.a2b_uu(part.fp.read())
-        else:
-            # take it as text
-            data = part.fp.read()
-        
-        # Encode message to unicode
-        charset = rfc2822.unaliasCharset(part.getparam("charset"))
-        if charset:
-            # Do conversion only if charset specified
-            edata = unicode(data, charset).encode('utf-8')
-            # Convert from dos eol to unix
-            edata = edata.replace('\r\n', '\n')
-        else:
-            # Leave message content as is
-            edata = data
-                
-        return edata
-
     def handle_message(self, message):
         ''' message - a Message instance
 
@@ -684,118 +777,13 @@ Subject was: "%s"
             messageid = "<%s.%s.%s%s@%s>"%(time.time(), random.random(),
                 classname, nodeid, self.instance.config.MAIL_DOMAIN)
 
-        #
         # now handle the body - find the message
-        #
-        content_type =  message.gettype()
-        attachments = []
-        # General multipart handling:
-        #   Take the first text/plain part, anything else is considered an 
-        #   attachment.
-        # multipart/mixed: multiple "unrelated" parts.
-        # multipart/signed (rfc 1847): 
-        #   The control information is carried in the second of the two 
-        #   required body parts.
-        #   ACTION: Default, so if content is text/plain we get it.
-        # multipart/encrypted (rfc 1847): 
-        #   The control information is carried in the first of the two 
-        #   required body parts.
-        #   ACTION: Not handleable as the content is encrypted.
-        # multipart/related (rfc 1872, 2112, 2387):
-        #   The Multipart/Related content-type addresses the MIME
-        #   representation of compound objects.
-        #   ACTION: Default. If we are lucky there is a text/plain.
-        #   TODO: One should use the start part and look for an Alternative
-        #   that is text/plain.
-        # multipart/Alternative (rfc 1872, 1892):
-        #   only in "related" ?
-        # multipart/report (rfc 1892):
-        #   e.g. mail system delivery status reports.
-        #   ACTION: Default. Could be ignored or used for Delivery Notification 
-        #   flagging.
-        # multipart/form-data:
-        #   For web forms only.
-        if content_type == 'multipart/mixed':
-            # skip over the intro to the first boundary
-            part = message.getPart()
-            content = None
-            while 1:
-                # get the next part
-                part = message.getPart()
-                if part is None:
-                    break
-                # parse it
-                subtype = part.gettype()
-                if subtype == 'text/plain' and not content:
-                    # The first text/plain part is the message content.
-                    content = self.get_part_data_decoded(part) 
-                elif subtype == 'message/rfc822':
-                    # handle message/rfc822 specially - the name should be
-                    # the subject of the actual e-mail embedded here
-                    i = part.fp.tell()
-                    mailmess = Message(part.fp)
-                    name = mailmess.getheader('subject')
-                    part.fp.seek(i)
-                    attachments.append((name, 'message/rfc822', part.fp.read()))
-                elif subtype == 'multipart/alternative':
-                    # Search for text/plain in message with attachment and
-                    # alternative text representation
-                    # skip over intro to first boundary
-                    part.getPart()
-                    while 1:
-                        # get the next part
-                        subpart = part.getPart()
-                        if subpart is None:
-                            break
-                        # parse it
-                        if subpart.gettype() == 'text/plain' and not content:
-                            content = self.get_part_data_decoded(subpart) 
-                else:
-                    # try name on Content-Type
-                    name = part.getparam('name')
-                    if name:
-                        name = name.strip()
-                    if not name:
-                        disp = part.getheader('content-disposition', None)
-                        if disp:
-                            name = getparam(disp, 'filename')
-                            if name:
-                                name = name.strip()
-                    # this is just an attachment
-                    data = self.get_part_data_decoded(part) 
-                    attachments.append((name, part.gettype(), data))
-            if content is None:
-                raise MailUsageError, '''
-Roundup requires the submission to be plain text. The message parser could
-not find a text/plain part to use.
-'''
-
-        elif content_type[:10] == 'multipart/':
-            # skip over the intro to the first boundary
-            message.getPart()
-            content = None
-            while 1:
-                # get the next part
-                part = message.getPart()
-                if part is None:
-                    break
-                # parse it
-                if part.gettype() == 'text/plain' and not content:
-                    content = self.get_part_data_decoded(part) 
-            if content is None:
-                raise MailUsageError, '''
-Roundup requires the submission to be plain text. The message parser could
-not find a text/plain part to use.
-'''
-
-        elif content_type != 'text/plain':
+        content, attachments = message.extract_content()
+        if content is None:
             raise MailUsageError, '''
 Roundup requires the submission to be plain text. The message parser could
 not find a text/plain part to use.
 '''
-
-        else:
-            content = self.get_part_data_decoded(message) 
  
         # figure how much we should muck around with the email body
         keep_citations = getattr(self.instance.config, 'EMAIL_KEEP_QUOTED_TEXT',
diff --git a/test/test_multipart.py b/test/test_multipart.py
index 9022d86..99e0bf4 100644
--- a/test/test_multipart.py
+++ b/test/test_multipart.py
@@ -15,15 +15,56 @@
 # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
 # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 # 
-# $Id: test_multipart.py,v 1.6 2003-10-25 22:53:26 richard Exp $ 
+# $Id: test_multipart.py,v 1.7 2004-01-17 13:49:06 jlgijsbers Exp $ 
 
-import unittest, cStringIO
+import unittest
+from cStringIO import StringIO
 
 from roundup.mailgw import Message
 
+class TestMessage(Message):
+    table = {'multipart/signed': '    boundary="boundary-%(indent)s";\n',
+             'multipart/mixed': '    boundary="boundary-%(indent)s";\n',
+             'multipart/alternative': '    boundary="boundary-%(indent)s";\n',
+             'text/plain': '    name="foo.txt"\nfoo\n',
+             'application/pgp-signature': '    name="foo.gpg"\nfoo\n',
+             'application/pdf': '    name="foo.pdf"\nfoo\n',
+             'message/rfc822': 'Subject: foo\n\nfoo\n'}
+    
+    def __init__(self, spec):
+        """Create a basic MIME message according to 'spec'.
+
+        Each line of a spec has one content-type, which is optionally indented.
+        The indentation signifies how deep in the MIME hierarchy the
+        content-type is.
+
+        """
+        parts = []
+        for line in spec.splitlines():
+            content_type = line.strip()
+            if not content_type:
+                continue
+            
+            indent = self.getIndent(line)
+            if indent:
+                parts.append('--boundary-%s\n' % indent)
+            parts.append('Content-type: %s;\n' % content_type)
+            parts.append(self.table[content_type] % {'indent': indent + 1})
+
+        Message.__init__(self, StringIO(''.join(parts)))
+
+    def getIndent(self, line):
+        """Get the current line's indentation, using four-space indents."""
+        count = 0
+        for char in line:
+            if char != ' ':
+                break
+            count += 1
+        return count / 4
+
 class MultipartTestCase(unittest.TestCase):
     def setUp(self):
-        self.fp = cStringIO.StringIO()
+        self.fp = StringIO()
         w = self.fp.write
         w('Content-Type: multipart/mixed; boundary="foo"\r\n\r\n')
         w('This is a multipart message. Ignore this bit.\r\n')
@@ -62,52 +103,120 @@ class MultipartTestCase(unittest.TestCase):
         self.assert_(m is not None)
 
         # skip the first bit
-        p = m.getPart()
+        p = m.getpart()
         self.assert_(p is not None)
         self.assertEqual(p.fp.read(),
             'This is a multipart message. Ignore this bit.\r\n')
 
         # first text/plain
-        p = m.getPart()
+        p = m.getpart()
         self.assert_(p is not None)
         self.assertEqual(p.gettype(), 'text/plain')
         self.assertEqual(p.fp.read(),
             'Hello, world!\r\n\r\nBlah blah\r\nfoo\r\n-foo\r\n')
 
         # sub-multipart
-        p = m.getPart()
+        p = m.getpart()
         self.assert_(p is not None)
         self.assertEqual(p.gettype(), 'multipart/alternative')
 
         # sub-multipart text/plain
-        q = p.getPart()
+        q = p.getpart()
         self.assert_(q is not None)
-        q = p.getPart()
+        q = p.getpart()
         self.assert_(q is not None)
         self.assertEqual(q.gettype(), 'text/plain')
         self.assertEqual(q.fp.read(), 'Hello, world!\r\n\r\nBlah blah\r\n')
 
         # sub-multipart text/html
-        q = p.getPart()
+        q = p.getpart()
         self.assert_(q is not None)
         self.assertEqual(q.gettype(), 'text/html')
         self.assertEqual(q.fp.read(), '<b>Hello, world!</b>\r\n')
 
         # sub-multipart end
-        q = p.getPart()
+        q = p.getpart()
         self.assert_(q is None)
 
         # final text/plain
-        p = m.getPart()
+        p = m.getpart()
         self.assert_(p is not None)
         self.assertEqual(p.gettype(), 'text/plain')
         self.assertEqual(p.fp.read(),
             'Last bit\n')
 
         # end
-        p = m.getPart()
+        p = m.getpart()
         self.assert_(p is None)
 
+    def TestExtraction(self, spec, expected):
+        self.assertEqual(TestMessage(spec).extract_content(), expected)
+
+    def testTextPlain(self):
+        self.TestExtraction('text/plain', ('foo\n', []))
+
+    def testAttachedTextPlain(self):
+        self.TestExtraction("""
+multipart/mixed
+    text/plain
+    text/plain""",
+                  ('foo\n',
+                   [('foo.txt', 'text/plain', 'foo\n')]))
+
+    def testMultipartMixed(self):
+        self.TestExtraction("""
+multipart/mixed
+    text/plain
+    application/pdf""",
+                  ('foo\n',
+                   [('foo.pdf', 'application/pdf', 'foo\n')]))
+
+    def testMultipartAlternative(self):
+        self.TestExtraction("""
+multipart/alternative
+    text/plain
+    application/pdf
+""", ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')]))
+
+    def testDeepMultipartAlternative(self):
+        self.TestExtraction("""
+multipart/mixed
+    multipart/alternative
+        text/plain
+        application/pdf
+""", ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')]))
+    
+    def testSignedText(self):
+        self.TestExtraction("""
+multipart/signed
+    text/plain
+    application/pgp-signature""", ('foo\n', []))
+
+    def testSignedAttachments(self):
+        self.TestExtraction("""
+multipart/signed
+    multipart/mixed
+        text/plain
+        application/pdf
+    application/pgp-signature""",
+                  ('foo\n',
+                   [('foo.pdf', 'application/pdf', 'foo\n')]))
+
+    def testAttachedSignature(self):
+        self.TestExtraction("""
+multipart/mixed
+    text/plain
+    application/pgp-signature""",
+                  ('foo\n',
+                   [('foo.gpg', 'application/pgp-signature', 'foo\n')]))
+
+    def testMessageRfc822(self):
+        self.TestExtraction("""
+multipart/mixed
+    message/rfc822""",
+                  (None,
+                   [('foo', 'message/rfc822', 'foo\n')]))
+
 def test_suite():
     suite = unittest.TestSuite()
     suite.addTest(unittest.makeSuite(MultipartTestCase))
-- 
2.39.5