X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=builtin-mailinfo.c;h=d7cb11dc0d6339dbea51c89f3cd4966e8f6b4c3d;hb=5433235daec11550973f1e290f069ce0c27f53ab;hp=d94578cb4ac0649913db1542f876d5010ece7f0f;hpb=bb95e19c5f1e470d2efe1c0e4e04c291019e4b25;p=git.git diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c index d94578cb4..d7cb11dc0 100644 --- a/builtin-mailinfo.c +++ b/builtin-mailinfo.c @@ -237,8 +237,6 @@ static int eatspace(char *line) static char *cleanup_subject(char *subject) { - if (keep_subject) - return subject; for (;;) { char *p; int len, remove; @@ -289,25 +287,25 @@ static void cleanup_space(char *buf) } } -static void decode_header(char *it); +static void decode_header(char *it, unsigned itsize); static char *header[MAX_HDR_PARSED] = { "From","Subject","Date", }; -static int check_header(char *line, char **hdr_data) +static int check_header(char *line, unsigned linesize, char **hdr_data, int overwrite) { int i; /* search for the interesting parts */ for (i = 0; header[i]; i++) { int len = strlen(header[i]); - if (!hdr_data[i] && + if ((!hdr_data[i] || overwrite) && !strncasecmp(line, header[i], len) && line[len] == ':' && isspace(line[len + 1])) { /* Unwrap inline B and Q encoding, and optionally * normalize the meta information to utf8. */ - decode_header(line + len + 2); + decode_header(line + len + 2, linesize - len - 2); hdr_data[i] = xmalloc(1000 * sizeof(char)); if (! handle_header(line, hdr_data[i], len + 2)) { return 1; @@ -318,14 +316,14 @@ static int check_header(char *line, char **hdr_data) /* Content stuff */ if (!strncasecmp(line, "Content-Type", 12) && line[12] == ':' && isspace(line[12 + 1])) { - decode_header(line + 12 + 2); + decode_header(line + 12 + 2, linesize - 12 - 2); if (! handle_content_type(line)) { return 1; } } if (!strncasecmp(line, "Content-Transfer-Encoding", 25) && line[25] == ':' && isspace(line[25 + 1])) { - decode_header(line + 25 + 2); + decode_header(line + 25 + 2, linesize - 25 - 2); if (! handle_content_transfer_encoding(line)) { return 1; } @@ -425,6 +423,7 @@ static int read_one_header_line(char *line, int sz, FILE *in) if (addlen >= sz - len) addlen = sz - len - 1; memcpy(line + len, continuation, addlen); + line[len] = '\n'; len += addlen; } } @@ -433,10 +432,15 @@ static int read_one_header_line(char *line, int sz, FILE *in) return 1; } -static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047) +static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int rfc2047) { + char *otend = ot + otsize; int c; while ((c = *in++) != 0 && (in <= ep)) { + if (ot == otend) { + *--ot = '\0'; + return -1; + } if (c == '=') { int d = *in++; if (d == '\n' || !d) @@ -452,12 +456,17 @@ static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047) return 0; } -static int decode_b_segment(char *in, char *ot, char *ep) +static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep) { /* Decode in..ep, possibly in-place to ot */ int c, pos = 0, acc = 0; + char *otend = ot + otsize; while ((c = *in++) != 0 && (in <= ep)) { + if (ot == otend) { + *--ot = '\0'; + return -1; + } if (c == '+') c = 62; else if (c == '/') @@ -499,20 +508,47 @@ static int decode_b_segment(char *in, char *ot, char *ep) return 0; } -static void convert_to_utf8(char *line, const char *charset) +/* + * When there is no known charset, guess. + * + * Right now we assume that if the target is UTF-8 (the default), + * and it already looks like UTF-8 (which includes US-ASCII as its + * subset, of course) then that is what it is and there is nothing + * to do. + * + * Otherwise, we default to assuming it is Latin1 for historical + * reasons. + */ +static const char *guess_charset(const char *line, const char *target_charset) +{ + if (is_encoding_utf8(target_charset)) { + if (is_utf8(line)) + return NULL; + } + return "latin1"; +} + +static void convert_to_utf8(char *line, unsigned linesize, const char *charset) { - static const char latin_one[] = "latin1"; - const char *input_charset = *charset ? charset : latin_one; - char *out = reencode_string(line, metainfo_charset, input_charset); + char *out; + if (!charset || !*charset) { + charset = guess_charset(line, metainfo_charset); + if (!charset) + return; + } + + if (!strcmp(metainfo_charset, charset)) + return; + out = reencode_string(line, metainfo_charset, charset); if (!out) die("cannot convert from %s to %s\n", - input_charset, metainfo_charset); - strcpy(line, out); + charset, metainfo_charset); + strlcpy(line, out, linesize); free(out); } -static int decode_header_bq(char *it) +static int decode_header_bq(char *it, unsigned itsize) { char *in, *out, *ep, *cp, *sp; char outbuf[1000]; @@ -552,56 +588,60 @@ static int decode_header_bq(char *it) default: return rfc2047; /* no munging */ case 'b': - sz = decode_b_segment(cp + 3, piecebuf, ep); + sz = decode_b_segment(cp + 3, piecebuf, sizeof(piecebuf), ep); break; case 'q': - sz = decode_q_segment(cp + 3, piecebuf, ep, 1); + sz = decode_q_segment(cp + 3, piecebuf, sizeof(piecebuf), ep, 1); break; } if (sz < 0) return rfc2047; if (metainfo_charset) - convert_to_utf8(piecebuf, charset_q); + convert_to_utf8(piecebuf, sizeof(piecebuf), charset_q); + + sz = strlen(piecebuf); + if (outbuf + sizeof(outbuf) <= out + sz) + return rfc2047; /* no munging */ strcpy(out, piecebuf); - out += strlen(out); + out += sz; in = ep + 2; } strcpy(out, in); - strcpy(it, outbuf); + strlcpy(it, outbuf, itsize); return rfc2047; } -static void decode_header(char *it) +static void decode_header(char *it, unsigned itsize) { - if (decode_header_bq(it)) + if (decode_header_bq(it, itsize)) return; /* otherwise "it" is a straight copy of the input. * This can be binary guck but there is no charset specified. */ if (metainfo_charset) - convert_to_utf8(it, ""); + convert_to_utf8(it, itsize, ""); } -static void decode_transfer_encoding(char *line) +static void decode_transfer_encoding(char *line, unsigned linesize) { char *ep; switch (transfer_encoding) { case TE_QP: ep = line + strlen(line); - decode_q_segment(line, line, ep, 0); + decode_q_segment(line, line, linesize, ep, 0); break; case TE_BASE64: ep = line + strlen(line); - decode_b_segment(line, line, ep); + decode_b_segment(line, line, linesize, ep); break; case TE_DONTCARE: break; } } -static int handle_filter(char *line); +static int handle_filter(char *line, unsigned linesize); static int find_boundary(void) { @@ -614,6 +654,7 @@ static int find_boundary(void) static int handle_boundary(void) { + char newline[]="\n"; again: if (!memcmp(line+content_top->boundary_len, "--", 2)) { /* we hit an end boundary */ @@ -628,7 +669,7 @@ again: "can't recover\n"); exit(1); } - handle_filter("\n"); + handle_filter(newline, sizeof(newline)); /* skip to the next boundary */ if (!find_boundary()) @@ -643,7 +684,7 @@ again: /* slurp in this section's info */ while (read_one_header_line(line, sizeof(line), fin)) - check_header(line, p_hdr_data); + check_header(line, sizeof(line), p_hdr_data, 0); /* eat the blank line after section info */ return (fgets(line, sizeof(line), fin) != NULL); @@ -682,9 +723,10 @@ static inline int patchbreak(const char *line) } -static int handle_commit_msg(char *line) +static int handle_commit_msg(char *line, unsigned linesize) { static int still_looking = 1; + char *endline = line + linesize; if (!cmitmsg) return 0; @@ -699,10 +741,14 @@ static int handle_commit_msg(char *line) if (!*cp) return 0; } - if ((still_looking = check_header(cp, s_hdr_data)) != 0) + if ((still_looking = check_header(cp, endline - cp, s_hdr_data, 0)) != 0) return 0; } + /* normalize the log message to UTF-8. */ + if (metainfo_charset) + convert_to_utf8(line, endline - line, charset); + if (patchbreak(line)) { fclose(cmitmsg); cmitmsg = NULL; @@ -720,7 +766,7 @@ static int handle_patch(char *line) return 0; } -static int handle_filter(char *line) +static int handle_filter(char *line, unsigned linesize) { static int filter = 0; @@ -729,7 +775,7 @@ static int handle_filter(char *line) */ switch (filter) { case 0: - if (!handle_commit_msg(line)) + if (!handle_commit_msg(line, linesize)) break; filter++; case 1: @@ -761,18 +807,14 @@ static void handle_body(void) /* flush any leftover */ if ((transfer_encoding == TE_BASE64) && (np != newline)) { - handle_filter(newline); + handle_filter(newline, sizeof(newline)); } if (!handle_boundary()) return; } - /* Unwrap transfer encoding and optionally - * normalize the log message to UTF-8. - */ - decode_transfer_encoding(line); - if (metainfo_charset) - convert_to_utf8(line, charset); + /* Unwrap transfer encoding */ + decode_transfer_encoding(line, sizeof(line)); switch (transfer_encoding) { case TE_BASE64: @@ -781,7 +823,7 @@ static void handle_body(void) /* binary data most likely doesn't have newlines */ if (message_type != TYPE_TEXT) { - rc = handle_filter(line); + rc = handle_filter(line, sizeof(newline)); break; } @@ -798,7 +840,7 @@ static void handle_body(void) /* should be sitting on a new line */ *(++np) = 0; op++; - rc = handle_filter(newline); + rc = handle_filter(newline, sizeof(newline)); np = newline; } } while (*op != 0); @@ -808,7 +850,7 @@ static void handle_body(void) break; } default: - rc = handle_filter(line); + rc = handle_filter(line, sizeof(newline)); } if (rc) /* nothing left to filter */ @@ -818,6 +860,22 @@ static void handle_body(void) return; } +static void output_header_lines(FILE *fout, const char *hdr, char *data) +{ + while (1) { + char *ep = strchr(data, '\n'); + int len; + if (!ep) + len = strlen(data); + else + len = ep - data; + fprintf(fout, "%s: %.*s\n", hdr, len, data); + if (!ep) + break; + data = ep + 1; + } +} + static void handle_info(void) { char *sub; @@ -835,9 +893,13 @@ static void handle_info(void) continue; if (!memcmp(header[i], "Subject", 7)) { - sub = cleanup_subject(hdr); - cleanup_space(sub); - fprintf(fout, "Subject: %s\n", sub); + if (keep_subject) + sub = hdr; + else { + sub = cleanup_subject(hdr); + cleanup_space(sub); + } + output_header_lines(fout, "Subject", sub); } else if (!memcmp(header[i], "From", 4)) { handle_from(hdr); fprintf(fout, "Author: %s\n", name); @@ -850,8 +912,8 @@ static void handle_info(void) fprintf(fout, "\n"); } -int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, - const char *msg, const char *patch) +static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, + const char *msg, const char *patch) { keep_subject = ks; metainfo_charset = encoding; @@ -875,7 +937,7 @@ int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, /* process the email header */ while (read_one_header_line(line, sizeof(line), fin)) - check_header(line, p_hdr_data); + check_header(line, sizeof(line), p_hdr_data, 1); handle_body(); handle_info();