X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=builtin-mailinfo.c;h=fa6e8f90a4d54eb07f2af80620e35e617c94d605;hb=1db4a75c8d18d8827ac710e87219b349558d9196;hp=489c2c58c01514ac3d967d1c3f46f1243f853580;hpb=68fb4650497d6acbf6d407513cd2e2d960442e3b;p=git.git diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c index 489c2c58c..fa6e8f90a 100644 --- a/builtin-mailinfo.c +++ b/builtin-mailinfo.c @@ -237,8 +237,6 @@ static int eatspace(char *line) static char *cleanup_subject(char *subject) { - if (keep_subject) - return subject; for (;;) { char *p; int len, remove; @@ -289,12 +287,12 @@ static void cleanup_space(char *buf) } } -static void decode_header(char *it); -static char *header[MAX_HDR_PARSED] = { +static void decode_header(char *it, unsigned itsize); +static const char *header[MAX_HDR_PARSED] = { "From","Subject","Date", }; -static int check_header(char *line, char **hdr_data, int overwrite) +static int check_header(char *line, unsigned linesize, char **hdr_data, int overwrite) { int i; @@ -307,7 +305,7 @@ static int check_header(char *line, char **hdr_data, int overwrite) /* Unwrap inline B and Q encoding, and optionally * normalize the meta information to utf8. */ - decode_header(line + len + 2); + decode_header(line + len + 2, linesize - len - 2); hdr_data[i] = xmalloc(1000 * sizeof(char)); if (! handle_header(line, hdr_data[i], len + 2)) { return 1; @@ -318,14 +316,14 @@ static int check_header(char *line, char **hdr_data, int overwrite) /* Content stuff */ if (!strncasecmp(line, "Content-Type", 12) && line[12] == ':' && isspace(line[12 + 1])) { - decode_header(line + 12 + 2); + decode_header(line + 12 + 2, linesize - 12 - 2); if (! handle_content_type(line)) { return 1; } } if (!strncasecmp(line, "Content-Transfer-Encoding", 25) && line[25] == ':' && isspace(line[25 + 1])) { - decode_header(line + 25 + 2); + decode_header(line + 25 + 2, linesize - 25 - 2); if (! handle_content_transfer_encoding(line)) { return 1; } @@ -425,6 +423,7 @@ static int read_one_header_line(char *line, int sz, FILE *in) if (addlen >= sz - len) addlen = sz - len - 1; memcpy(line + len, continuation, addlen); + line[len] = '\n'; len += addlen; } } @@ -433,10 +432,16 @@ static int read_one_header_line(char *line, int sz, FILE *in) return 1; } -static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047) +static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int rfc2047) { + char *otbegin = ot; + char *otend = ot + otsize; int c; while ((c = *in++) != 0 && (in <= ep)) { + if (ot == otend) { + *--ot = '\0'; + return -1; + } if (c == '=') { int d = *in++; if (d == '\n' || !d) @@ -449,15 +454,21 @@ static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047) *ot++ = c; } *ot = 0; - return 0; + return (ot - otbegin); } -static int decode_b_segment(char *in, char *ot, char *ep) +static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep) { /* Decode in..ep, possibly in-place to ot */ int c, pos = 0, acc = 0; + char *otbegin = ot; + char *otend = ot + otsize; while ((c = *in++) != 0 && (in <= ep)) { + if (ot == otend) { + *--ot = '\0'; + return -1; + } if (c == '+') c = 62; else if (c == '/') @@ -496,23 +507,50 @@ static int decode_b_segment(char *in, char *ot, char *ep) } } *ot = 0; - return 0; + return (ot - otbegin); } -static void convert_to_utf8(char *line, const char *charset) +/* + * When there is no known charset, guess. + * + * Right now we assume that if the target is UTF-8 (the default), + * and it already looks like UTF-8 (which includes US-ASCII as its + * subset, of course) then that is what it is and there is nothing + * to do. + * + * Otherwise, we default to assuming it is Latin1 for historical + * reasons. + */ +static const char *guess_charset(const char *line, const char *target_charset) { - static const char latin_one[] = "latin1"; - const char *input_charset = *charset ? charset : latin_one; - char *out = reencode_string(line, metainfo_charset, input_charset); + if (is_encoding_utf8(target_charset)) { + if (is_utf8(line)) + return NULL; + } + return "latin1"; +} +static void convert_to_utf8(char *line, unsigned linesize, const char *charset) +{ + char *out; + + if (!charset || !*charset) { + charset = guess_charset(line, metainfo_charset); + if (!charset) + return; + } + + if (!strcmp(metainfo_charset, charset)) + return; + out = reencode_string(line, metainfo_charset, charset); if (!out) die("cannot convert from %s to %s\n", - input_charset, metainfo_charset); - strcpy(line, out); + charset, metainfo_charset); + strlcpy(line, out, linesize); free(out); } -static int decode_header_bq(char *it) +static int decode_header_bq(char *it, unsigned itsize) { char *in, *out, *ep, *cp, *sp; char outbuf[1000]; @@ -552,56 +590,59 @@ static int decode_header_bq(char *it) default: return rfc2047; /* no munging */ case 'b': - sz = decode_b_segment(cp + 3, piecebuf, ep); + sz = decode_b_segment(cp + 3, piecebuf, sizeof(piecebuf), ep); break; case 'q': - sz = decode_q_segment(cp + 3, piecebuf, ep, 1); + sz = decode_q_segment(cp + 3, piecebuf, sizeof(piecebuf), ep, 1); break; } if (sz < 0) return rfc2047; if (metainfo_charset) - convert_to_utf8(piecebuf, charset_q); + convert_to_utf8(piecebuf, sizeof(piecebuf), charset_q); + + sz = strlen(piecebuf); + if (outbuf + sizeof(outbuf) <= out + sz) + return rfc2047; /* no munging */ strcpy(out, piecebuf); - out += strlen(out); + out += sz; in = ep + 2; } strcpy(out, in); - strcpy(it, outbuf); + strlcpy(it, outbuf, itsize); return rfc2047; } -static void decode_header(char *it) +static void decode_header(char *it, unsigned itsize) { - if (decode_header_bq(it)) + if (decode_header_bq(it, itsize)) return; /* otherwise "it" is a straight copy of the input. * This can be binary guck but there is no charset specified. */ if (metainfo_charset) - convert_to_utf8(it, ""); + convert_to_utf8(it, itsize, ""); } -static void decode_transfer_encoding(char *line) +static int decode_transfer_encoding(char *line, unsigned linesize, int inputlen) { char *ep; switch (transfer_encoding) { case TE_QP: - ep = line + strlen(line); - decode_q_segment(line, line, ep, 0); - break; + ep = line + inputlen; + return decode_q_segment(line, line, linesize, ep, 0); case TE_BASE64: - ep = line + strlen(line); - decode_b_segment(line, line, ep); - break; + ep = line + inputlen; + return decode_b_segment(line, line, linesize, ep); case TE_DONTCARE: - break; + default: + return inputlen; } } -static int handle_filter(char *line); +static int handle_filter(char *line, unsigned linesize, int linelen); static int find_boundary(void) { @@ -629,7 +670,7 @@ again: "can't recover\n"); exit(1); } - handle_filter(newline); + handle_filter(newline, sizeof(newline), strlen(newline)); /* skip to the next boundary */ if (!find_boundary()) @@ -644,7 +685,7 @@ again: /* slurp in this section's info */ while (read_one_header_line(line, sizeof(line), fin)) - check_header(line, p_hdr_data, 0); + check_header(line, sizeof(line), p_hdr_data, 0); /* eat the blank line after section info */ return (fgets(line, sizeof(line), fin) != NULL); @@ -683,9 +724,10 @@ static inline int patchbreak(const char *line) } -static int handle_commit_msg(char *line) +static int handle_commit_msg(char *line, unsigned linesize) { static int still_looking = 1; + char *endline = line + linesize; if (!cmitmsg) return 0; @@ -700,13 +742,13 @@ static int handle_commit_msg(char *line) if (!*cp) return 0; } - if ((still_looking = check_header(cp, s_hdr_data, 0)) != 0) + if ((still_looking = check_header(cp, endline - cp, s_hdr_data, 0)) != 0) return 0; } /* normalize the log message to UTF-8. */ if (metainfo_charset) - convert_to_utf8(line, charset); + convert_to_utf8(line, endline - line, charset); if (patchbreak(line)) { fclose(cmitmsg); @@ -718,14 +760,14 @@ static int handle_commit_msg(char *line) return 0; } -static int handle_patch(char *line) +static int handle_patch(char *line, int len) { - fputs(line, patchfile); + fwrite(line, 1, len, patchfile); patch_lines++; return 0; } -static int handle_filter(char *line) +static int handle_filter(char *line, unsigned linesize, int linelen) { static int filter = 0; @@ -734,11 +776,11 @@ static int handle_filter(char *line) */ switch (filter) { case 0: - if (!handle_commit_msg(line)) + if (!handle_commit_msg(line, linesize)) break; filter++; case 1: - if (!handle_patch(line)) + if (!handle_patch(line, linelen)) break; filter++; default: @@ -753,6 +795,7 @@ static void handle_body(void) int rc = 0; static char newline[2000]; static char *np = newline; + int len = strlen(line); /* Skip up to the first boundary */ if (content_top->boundary) { @@ -764,61 +807,83 @@ static void handle_body(void) /* process any boundary lines */ if (content_top->boundary && is_multipart_boundary(line)) { /* flush any leftover */ - if ((transfer_encoding == TE_BASE64) && - (np != newline)) { - handle_filter(newline); - } + if (np != newline) + handle_filter(newline, sizeof(newline), + np - newline); if (!handle_boundary()) return; + len = strlen(line); } /* Unwrap transfer encoding */ - decode_transfer_encoding(line); + len = decode_transfer_encoding(line, sizeof(line), len); + if (len < 0) { + error("Malformed input line"); + return; + } switch (transfer_encoding) { case TE_BASE64: + case TE_QP: { char *op = line; /* binary data most likely doesn't have newlines */ if (message_type != TYPE_TEXT) { - rc = handle_filter(line); + rc = handle_filter(line, sizeof(line), len); break; } - /* this is a decoded line that may contain + /* + * This is a decoded line that may contain * multiple new lines. Pass only one chunk * at a time to handle_filter() */ - do { - while (*op != '\n' && *op != 0) + while (op < line + len && *op != '\n') *np++ = *op++; *np = *op; if (*np != 0) { /* should be sitting on a new line */ *(++np) = 0; op++; - rc = handle_filter(newline); + rc = handle_filter(newline, sizeof(newline), np - newline); np = newline; } - } while (*op != 0); - /* the partial chunk is saved in newline and - * will be appended by the next iteration of fgets + } while (op < line + len); + /* + * The partial chunk is saved in newline and will be + * appended by the next iteration of read_line_with_nul(). */ break; } default: - rc = handle_filter(line); + rc = handle_filter(line, sizeof(line), len); } if (rc) /* nothing left to filter */ break; - } while (fgets(line, sizeof(line), fin)); + } while ((len = read_line_with_nul(line, sizeof(line), fin))); return; } +static void output_header_lines(FILE *fout, const char *hdr, char *data) +{ + while (1) { + char *ep = strchr(data, '\n'); + int len; + if (!ep) + len = strlen(data); + else + len = ep - data; + fprintf(fout, "%s: %.*s\n", hdr, len, data); + if (!ep) + break; + data = ep + 1; + } +} + static void handle_info(void) { char *sub; @@ -836,9 +901,13 @@ static void handle_info(void) continue; if (!memcmp(header[i], "Subject", 7)) { - sub = cleanup_subject(hdr); - cleanup_space(sub); - fprintf(fout, "Subject: %s\n", sub); + if (keep_subject) + sub = hdr; + else { + sub = cleanup_subject(hdr); + cleanup_space(sub); + } + output_header_lines(fout, "Subject", sub); } else if (!memcmp(header[i], "From", 4)) { handle_from(hdr); fprintf(fout, "Author: %s\n", name); @@ -854,6 +923,7 @@ static void handle_info(void) static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, const char *msg, const char *patch) { + int peek; keep_subject = ks; metainfo_charset = encoding; fin = in; @@ -874,9 +944,14 @@ static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *)); s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *)); + do { + peek = fgetc(in); + } while (isspace(peek)); + ungetc(peek, in); + /* process the email header */ while (read_one_header_line(line, sizeof(line), fin)) - check_header(line, p_hdr_data, 1); + check_header(line, sizeof(line), p_hdr_data, 1); handle_body(); handle_info(); @@ -894,7 +969,7 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) /* NEEDSWORK: might want to do the optional .git/ directory * discovery */ - git_config(git_default_config); + git_config(git_default_config, NULL); def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8"); metainfo_charset = def_charset;