Code

Add test results directory to t/.gitignore
[git.git] / builtin-mailinfo.c
index 489c2c58c01514ac3d967d1c3f46f1243f853580..97c1ff97440ec107e336df774f86aadc28f26b89 100644 (file)
@@ -237,8 +237,6 @@ static int eatspace(char *line)
 
 static char *cleanup_subject(char *subject)
 {
-       if (keep_subject)
-               return subject;
        for (;;) {
                char *p;
                int len, remove;
@@ -289,12 +287,12 @@ static void cleanup_space(char *buf)
        }
 }
 
-static void decode_header(char *it);
-static char *header[MAX_HDR_PARSED] = {
+static void decode_header(char *it, unsigned itsize);
+static const char *header[MAX_HDR_PARSED] = {
        "From","Subject","Date",
 };
 
-static int check_header(char *line, char **hdr_data, int overwrite)
+static int check_header(char *line, unsigned linesize, char **hdr_data, int overwrite)
 {
        int i;
 
@@ -307,7 +305,7 @@ static int check_header(char *line, char **hdr_data, int overwrite)
                        /* Unwrap inline B and Q encoding, and optionally
                         * normalize the meta information to utf8.
                         */
-                       decode_header(line + len + 2);
+                       decode_header(line + len + 2, linesize - len - 2);
                        hdr_data[i] = xmalloc(1000 * sizeof(char));
                        if (! handle_header(line, hdr_data[i], len + 2)) {
                                return 1;
@@ -318,14 +316,14 @@ static int check_header(char *line, char **hdr_data, int overwrite)
        /* Content stuff */
        if (!strncasecmp(line, "Content-Type", 12) &&
                line[12] == ':' && isspace(line[12 + 1])) {
-               decode_header(line + 12 + 2);
+               decode_header(line + 12 + 2, linesize - 12 - 2);
                if (! handle_content_type(line)) {
                        return 1;
                }
        }
        if (!strncasecmp(line, "Content-Transfer-Encoding", 25) &&
                line[25] == ':' && isspace(line[25 + 1])) {
-               decode_header(line + 25 + 2);
+               decode_header(line + 25 + 2, linesize - 25 - 2);
                if (! handle_content_transfer_encoding(line)) {
                        return 1;
                }
@@ -425,6 +423,7 @@ static int read_one_header_line(char *line, int sz, FILE *in)
                        if (addlen >= sz - len)
                                addlen = sz - len - 1;
                        memcpy(line + len, continuation, addlen);
+                       line[len] = '\n';
                        len += addlen;
                }
        }
@@ -433,10 +432,16 @@ static int read_one_header_line(char *line, int sz, FILE *in)
        return 1;
 }
 
-static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047)
+static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int rfc2047)
 {
+       char *otbegin = ot;
+       char *otend = ot + otsize;
        int c;
        while ((c = *in++) != 0 && (in <= ep)) {
+               if (ot == otend) {
+                       *--ot = '\0';
+                       return -1;
+               }
                if (c == '=') {
                        int d = *in++;
                        if (d == '\n' || !d)
@@ -449,15 +454,21 @@ static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047)
                *ot++ = c;
        }
        *ot = 0;
-       return 0;
+       return (ot - otbegin);
 }
 
-static int decode_b_segment(char *in, char *ot, char *ep)
+static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep)
 {
        /* Decode in..ep, possibly in-place to ot */
        int c, pos = 0, acc = 0;
+       char *otbegin = ot;
+       char *otend = ot + otsize;
 
        while ((c = *in++) != 0 && (in <= ep)) {
+               if (ot == otend) {
+                       *--ot = '\0';
+                       return -1;
+               }
                if (c == '+')
                        c = 62;
                else if (c == '/')
@@ -496,23 +507,50 @@ static int decode_b_segment(char *in, char *ot, char *ep)
                }
        }
        *ot = 0;
-       return 0;
+       return (ot - otbegin);
 }
 
-static void convert_to_utf8(char *line, const char *charset)
+/*
+ * When there is no known charset, guess.
+ *
+ * Right now we assume that if the target is UTF-8 (the default),
+ * and it already looks like UTF-8 (which includes US-ASCII as its
+ * subset, of course) then that is what it is and there is nothing
+ * to do.
+ *
+ * Otherwise, we default to assuming it is Latin1 for historical
+ * reasons.
+ */
+static const char *guess_charset(const char *line, const char *target_charset)
 {
-       static const char latin_one[] = "latin1";
-       const char *input_charset = *charset ? charset : latin_one;
-       char *out = reencode_string(line, metainfo_charset, input_charset);
+       if (is_encoding_utf8(target_charset)) {
+               if (is_utf8(line))
+                       return NULL;
+       }
+       return "latin1";
+}
 
+static void convert_to_utf8(char *line, unsigned linesize, const char *charset)
+{
+       char *out;
+
+       if (!charset || !*charset) {
+               charset = guess_charset(line, metainfo_charset);
+               if (!charset)
+                       return;
+       }
+
+       if (!strcmp(metainfo_charset, charset))
+               return;
+       out = reencode_string(line, metainfo_charset, charset);
        if (!out)
                die("cannot convert from %s to %s\n",
-                   input_charset, metainfo_charset);
-       strcpy(line, out);
+                   charset, metainfo_charset);
+       strlcpy(line, out, linesize);
        free(out);
 }
 
-static int decode_header_bq(char *it)
+static int decode_header_bq(char *it, unsigned itsize)
 {
        char *in, *out, *ep, *cp, *sp;
        char outbuf[1000];
@@ -552,56 +590,59 @@ static int decode_header_bq(char *it)
                default:
                        return rfc2047; /* no munging */
                case 'b':
-                       sz = decode_b_segment(cp + 3, piecebuf, ep);
+                       sz = decode_b_segment(cp + 3, piecebuf, sizeof(piecebuf), ep);
                        break;
                case 'q':
-                       sz = decode_q_segment(cp + 3, piecebuf, ep, 1);
+                       sz = decode_q_segment(cp + 3, piecebuf, sizeof(piecebuf), ep, 1);
                        break;
                }
                if (sz < 0)
                        return rfc2047;
                if (metainfo_charset)
-                       convert_to_utf8(piecebuf, charset_q);
+                       convert_to_utf8(piecebuf, sizeof(piecebuf), charset_q);
+
+               sz = strlen(piecebuf);
+               if (outbuf + sizeof(outbuf) <= out + sz)
+                       return rfc2047; /* no munging */
                strcpy(out, piecebuf);
-               out += strlen(out);
+               out += sz;
                in = ep + 2;
        }
        strcpy(out, in);
-       strcpy(it, outbuf);
+       strlcpy(it, outbuf, itsize);
        return rfc2047;
 }
 
-static void decode_header(char *it)
+static void decode_header(char *it, unsigned itsize)
 {
 
-       if (decode_header_bq(it))
+       if (decode_header_bq(it, itsize))
                return;
        /* otherwise "it" is a straight copy of the input.
         * This can be binary guck but there is no charset specified.
         */
        if (metainfo_charset)
-               convert_to_utf8(it, "");
+               convert_to_utf8(it, itsize, "");
 }
 
-static void decode_transfer_encoding(char *line)
+static int decode_transfer_encoding(char *line, unsigned linesize, int inputlen)
 {
        char *ep;
 
        switch (transfer_encoding) {
        case TE_QP:
-               ep = line + strlen(line);
-               decode_q_segment(line, line, ep, 0);
-               break;
+               ep = line + inputlen;
+               return decode_q_segment(line, line, linesize, ep, 0);
        case TE_BASE64:
-               ep = line + strlen(line);
-               decode_b_segment(line, line, ep);
-               break;
+               ep = line + inputlen;
+               return decode_b_segment(line, line, linesize, ep);
        case TE_DONTCARE:
-               break;
+       default:
+               return inputlen;
        }
 }
 
-static int handle_filter(char *line);
+static int handle_filter(char *line, unsigned linesize, int linelen);
 
 static int find_boundary(void)
 {
@@ -629,7 +670,7 @@ again:
                                        "can't recover\n");
                        exit(1);
                }
-               handle_filter(newline);
+               handle_filter(newline, sizeof(newline), strlen(newline));
 
                /* skip to the next boundary */
                if (!find_boundary())
@@ -644,7 +685,7 @@ again:
 
        /* slurp in this section's info */
        while (read_one_header_line(line, sizeof(line), fin))
-               check_header(line, p_hdr_data, 0);
+               check_header(line, sizeof(line), p_hdr_data, 0);
 
        /* eat the blank line after section info */
        return (fgets(line, sizeof(line), fin) != NULL);
@@ -683,9 +724,10 @@ static inline int patchbreak(const char *line)
 }
 
 
-static int handle_commit_msg(char *line)
+static int handle_commit_msg(char *line, unsigned linesize)
 {
        static int still_looking = 1;
+       char *endline = line + linesize;
 
        if (!cmitmsg)
                return 0;
@@ -700,13 +742,13 @@ static int handle_commit_msg(char *line)
                        if (!*cp)
                                return 0;
                }
-               if ((still_looking = check_header(cp, s_hdr_data, 0)) != 0)
+               if ((still_looking = check_header(cp, endline - cp, s_hdr_data, 0)) != 0)
                        return 0;
        }
 
        /* normalize the log message to UTF-8. */
        if (metainfo_charset)
-               convert_to_utf8(line, charset);
+               convert_to_utf8(line, endline - line, charset);
 
        if (patchbreak(line)) {
                fclose(cmitmsg);
@@ -718,14 +760,14 @@ static int handle_commit_msg(char *line)
        return 0;
 }
 
-static int handle_patch(char *line)
+static int handle_patch(char *line, int len)
 {
-       fputs(line, patchfile);
+       fwrite(line, 1, len, patchfile);
        patch_lines++;
        return 0;
 }
 
-static int handle_filter(char *line)
+static int handle_filter(char *line, unsigned linesize, int linelen)
 {
        static int filter = 0;
 
@@ -734,11 +776,11 @@ static int handle_filter(char *line)
         */
        switch (filter) {
        case 0:
-               if (!handle_commit_msg(line))
+               if (!handle_commit_msg(line, linesize))
                        break;
                filter++;
        case 1:
-               if (!handle_patch(line))
+               if (!handle_patch(line, linelen))
                        break;
                filter++;
        default:
@@ -753,6 +795,7 @@ static void handle_body(void)
        int rc = 0;
        static char newline[2000];
        static char *np = newline;
+       int len = strlen(line);
 
        /* Skip up to the first boundary */
        if (content_top->boundary) {
@@ -764,61 +807,82 @@ static void handle_body(void)
                /* process any boundary lines */
                if (content_top->boundary && is_multipart_boundary(line)) {
                        /* flush any leftover */
-                       if ((transfer_encoding == TE_BASE64)  &&
-                           (np != newline)) {
-                               handle_filter(newline);
-                       }
+                       if (np != newline)
+                               handle_filter(newline, sizeof(newline),
+                                             np - newline);
                        if (!handle_boundary())
                                return;
                }
 
                /* Unwrap transfer encoding */
-               decode_transfer_encoding(line);
+               len = decode_transfer_encoding(line, sizeof(line), len);
+               if (len < 0) {
+                       error("Malformed input line");
+                       return;
+               }
 
                switch (transfer_encoding) {
                case TE_BASE64:
+               case TE_QP:
                {
                        char *op = line;
 
                        /* binary data most likely doesn't have newlines */
                        if (message_type != TYPE_TEXT) {
-                               rc = handle_filter(line);
+                               rc = handle_filter(line, sizeof(line), len);
                                break;
                        }
 
-                       /* this is a decoded line that may contain
+                       /*
+                        * This is a decoded line that may contain
                         * multiple new lines.  Pass only one chunk
                         * at a time to handle_filter()
                         */
-
                        do {
-                               while (*op != '\n' && *op != 0)
+                               while (op < line + len && *op != '\n')
                                        *np++ = *op++;
                                *np = *op;
                                if (*np != 0) {
                                        /* should be sitting on a new line */
                                        *(++np) = 0;
                                        op++;
-                                       rc = handle_filter(newline);
+                                       rc = handle_filter(newline, sizeof(newline), np - newline);
                                        np = newline;
                                }
-                       } while (*op != 0);
-                       /* the partial chunk is saved in newline and
-                        * will be appended by the next iteration of fgets
+                       } while (op < line + len);
+                       /*
+                        * The partial chunk is saved in newline and will be
+                        * appended by the next iteration of read_line_with_nul().
                         */
                        break;
                }
                default:
-                       rc = handle_filter(line);
+                       rc = handle_filter(line, sizeof(line), len);
                }
                if (rc)
                        /* nothing left to filter */
                        break;
-       } while (fgets(line, sizeof(line), fin));
+       } while ((len = read_line_with_nul(line, sizeof(line), fin)));
 
        return;
 }
 
+static void output_header_lines(FILE *fout, const char *hdr, char *data)
+{
+       while (1) {
+               char *ep = strchr(data, '\n');
+               int len;
+               if (!ep)
+                       len = strlen(data);
+               else
+                       len = ep - data;
+               fprintf(fout, "%s: %.*s\n", hdr, len, data);
+               if (!ep)
+                       break;
+               data = ep + 1;
+       }
+}
+
 static void handle_info(void)
 {
        char *sub;
@@ -836,9 +900,13 @@ static void handle_info(void)
                        continue;
 
                if (!memcmp(header[i], "Subject", 7)) {
-                       sub = cleanup_subject(hdr);
-                       cleanup_space(sub);
-                       fprintf(fout, "Subject: %s\n", sub);
+                       if (keep_subject)
+                               sub = hdr;
+                       else {
+                               sub = cleanup_subject(hdr);
+                               cleanup_space(sub);
+                       }
+                       output_header_lines(fout, "Subject", sub);
                } else if (!memcmp(header[i], "From", 4)) {
                        handle_from(hdr);
                        fprintf(fout, "Author: %s\n", name);
@@ -854,6 +922,7 @@ static void handle_info(void)
 static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
                    const char *msg, const char *patch)
 {
+       int peek;
        keep_subject = ks;
        metainfo_charset = encoding;
        fin = in;
@@ -874,9 +943,14 @@ static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding,
        p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *));
        s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *));
 
+       do {
+               peek = fgetc(in);
+       } while (isspace(peek));
+       ungetc(peek, in);
+
        /* process the email header */
        while (read_one_header_line(line, sizeof(line), fin))
-               check_header(line, p_hdr_data, 1);
+               check_header(line, sizeof(line), p_hdr_data, 1);
 
        handle_body();
        handle_info();
@@ -894,7 +968,7 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
        /* NEEDSWORK: might want to do the optional .git/ directory
         * discovery
         */
-       git_config(git_default_config);
+       git_config(git_default_config, NULL);
 
        def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8");
        metainfo_charset = def_charset;