X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=builtin-mailinfo.c;h=d7cb11dc0d6339dbea51c89f3cd4966e8f6b4c3d;hb=4fb5fd5d301;hp=f54e8752fb6dd0f7f251fe6bfe7bf3b2f8cd91f3;hpb=7f09ac4714214d1dc12877cc98fb69cdee22e33e;p=git.git diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c index f54e8752f..d7cb11dc0 100644 --- a/builtin-mailinfo.c +++ b/builtin-mailinfo.c @@ -11,19 +11,22 @@ static FILE *cmitmsg, *patchfile, *fin, *fout; static int keep_subject; static const char *metainfo_charset; static char line[1000]; -static char date[1000]; static char name[1000]; static char email[1000]; -static char subject[1000]; static enum { TE_DONTCARE, TE_QP, TE_BASE64, } transfer_encoding; -static char charset[256]; +static enum { + TYPE_TEXT, TYPE_OTHER, +} message_type; -static char multipart_boundary[1000]; -static int multipart_boundary_len; +static char charset[256]; static int patch_lines; +static char **p_hdr_data, **s_hdr_data; + +#define MAX_HDR_PARSED 10 +#define MAX_BOUNDARIES 5 static char *sanity_check(char *name, char *email) { @@ -137,15 +140,13 @@ static int handle_from(char *in_line) return 1; } -static int handle_date(char *line) +static int handle_header(char *line, char *data, int ofs) { - strcpy(date, line); - return 0; -} + if (!line || !data) + return 1; + + strcpy(data, line+ofs); -static int handle_subject(char *line) -{ - strcpy(subject, line); return 0; } @@ -177,17 +178,32 @@ static int slurp_attr(const char *line, const char *name, char *attr) return 1; } -static int handle_subcontent_type(char *line) +struct content_type { + char *boundary; + int boundary_len; +}; + +static struct content_type content[MAX_BOUNDARIES]; + +static struct content_type *content_top = content; + +static int handle_content_type(char *line) { - /* We do not want to mess with boundary. Note that we do not - * handle nested multipart. - */ - if (strcasestr(line, "boundary=")) { - fprintf(stderr, "Not handling nested multipart message.\n"); - exit(1); + char boundary[256]; + + if (strcasestr(line, "text/") == NULL) + message_type = TYPE_OTHER; + if (slurp_attr(line, "boundary=", boundary + 2)) { + memcpy(boundary, "--", 2); + if (content_top++ >= &content[MAX_BOUNDARIES]) { + fprintf(stderr, "Too many boundaries to handle\n"); + exit(1); + } + content_top->boundary_len = strlen(boundary); + content_top->boundary = xmalloc(content_top->boundary_len+1); + strcpy(content_top->boundary, boundary); } - slurp_attr(line, "charset=", charset); - if (*charset) { + if (slurp_attr(line, "charset=", charset)) { int i, c; for (i = 0; (c = charset[i]) != 0; i++) charset[i] = tolower(c); @@ -195,17 +211,6 @@ static int handle_subcontent_type(char *line) return 0; } -static int handle_content_type(char *line) -{ - *multipart_boundary = 0; - if (slurp_attr(line, "boundary=", multipart_boundary + 2)) { - memcpy(multipart_boundary, "--", 2); - multipart_boundary_len = strlen(multipart_boundary); - } - slurp_attr(line, "charset=", charset); - return 0; -} - static int handle_content_transfer_encoding(char *line) { if (strcasestr(line, "base64")) @@ -219,7 +224,7 @@ static int handle_content_transfer_encoding(char *line) static int is_multipart_boundary(const char *line) { - return (!memcmp(line, multipart_boundary, multipart_boundary_len)); + return (!memcmp(line, content_top->boundary, content_top->boundary_len)); } static int eatspace(char *line) @@ -230,73 +235,15 @@ static int eatspace(char *line) return len; } -#define SEEN_FROM 01 -#define SEEN_DATE 02 -#define SEEN_SUBJECT 04 -#define SEEN_BOGUS_UNIX_FROM 010 -#define SEEN_PREFIX 020 - -/* First lines of body can have From:, Date:, and Subject: or empty */ -static void handle_inbody_header(int *seen, char *line) -{ - if (*seen & SEEN_PREFIX) - return; - if (isspace(*line)) { - char *cp; - for (cp = line + 1; *cp; cp++) { - if (!isspace(*cp)) - break; - } - if (!*cp) - return; - } - if (!memcmp(">From", line, 5) && isspace(line[5])) { - if (!(*seen & SEEN_BOGUS_UNIX_FROM)) { - *seen |= SEEN_BOGUS_UNIX_FROM; - return; - } - } - if (!memcmp("From:", line, 5) && isspace(line[5])) { - if (!(*seen & SEEN_FROM) && handle_from(line+6)) { - *seen |= SEEN_FROM; - return; - } - } - if (!memcmp("Date:", line, 5) && isspace(line[5])) { - if (!(*seen & SEEN_DATE)) { - handle_date(line+6); - *seen |= SEEN_DATE; - return; - } - } - if (!memcmp("Subject:", line, 8) && isspace(line[8])) { - if (!(*seen & SEEN_SUBJECT)) { - handle_subject(line+9); - *seen |= SEEN_SUBJECT; - return; - } - } - if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) { - if (!(*seen & SEEN_SUBJECT)) { - handle_subject(line); - *seen |= SEEN_SUBJECT; - return; - } - } - *seen |= SEEN_PREFIX; -} - static char *cleanup_subject(char *subject) { - if (keep_subject) - return subject; for (;;) { char *p; int len, remove; switch (*subject) { case 'r': case 'R': if (!memcmp("e:", subject+1, 2)) { - subject +=3; + subject += 3; continue; } break; @@ -340,58 +287,63 @@ static void cleanup_space(char *buf) } } -static void decode_header(char *it); -typedef int (*header_fn_t)(char *); -struct header_def { - const char *name; - header_fn_t func; - int namelen; +static void decode_header(char *it, unsigned itsize); +static char *header[MAX_HDR_PARSED] = { + "From","Subject","Date", }; -static void check_header(char *line, struct header_def *header) +static int check_header(char *line, unsigned linesize, char **hdr_data, int overwrite) { int i; - if (header[0].namelen <= 0) { - for (i = 0; header[i].name; i++) - header[i].namelen = strlen(header[i].name); - } - for (i = 0; header[i].name; i++) { - int len = header[i].namelen; - if (!strncasecmp(line, header[i].name, len) && + /* search for the interesting parts */ + for (i = 0; header[i]; i++) { + int len = strlen(header[i]); + if ((!hdr_data[i] || overwrite) && + !strncasecmp(line, header[i], len) && line[len] == ':' && isspace(line[len + 1])) { /* Unwrap inline B and Q encoding, and optionally * normalize the meta information to utf8. */ - decode_header(line + len + 2); - header[i].func(line + len + 2); - break; + decode_header(line + len + 2, linesize - len - 2); + hdr_data[i] = xmalloc(1000 * sizeof(char)); + if (! handle_header(line, hdr_data[i], len + 2)) { + return 1; + } } } -} -static void check_subheader_line(char *line) -{ - static struct header_def header[] = { - { "Content-Type", handle_subcontent_type }, - { "Content-Transfer-Encoding", - handle_content_transfer_encoding }, - { NULL }, - }; - check_header(line, header); -} -static void check_header_line(char *line) -{ - static struct header_def header[] = { - { "From", handle_from }, - { "Date", handle_date }, - { "Subject", handle_subject }, - { "Content-Type", handle_content_type }, - { "Content-Transfer-Encoding", - handle_content_transfer_encoding }, - { NULL }, - }; - check_header(line, header); + /* Content stuff */ + if (!strncasecmp(line, "Content-Type", 12) && + line[12] == ':' && isspace(line[12 + 1])) { + decode_header(line + 12 + 2, linesize - 12 - 2); + if (! handle_content_type(line)) { + return 1; + } + } + if (!strncasecmp(line, "Content-Transfer-Encoding", 25) && + line[25] == ':' && isspace(line[25 + 1])) { + decode_header(line + 25 + 2, linesize - 25 - 2); + if (! handle_content_transfer_encoding(line)) { + return 1; + } + } + + /* for inbody stuff */ + if (!memcmp(">From", line, 5) && isspace(line[5])) + return 1; + if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) { + for (i = 0; header[i]; i++) { + if (!memcmp("Subject: ", header[i], 9)) { + if (! handle_header(line, hdr_data[i], 0)) { + return 1; + } + } + } + } + + /* no match */ + return 0; } static int is_rfc2822_header(char *line) @@ -471,6 +423,7 @@ static int read_one_header_line(char *line, int sz, FILE *in) if (addlen >= sz - len) addlen = sz - len - 1; memcpy(line + len, continuation, addlen); + line[len] = '\n'; len += addlen; } } @@ -479,10 +432,15 @@ static int read_one_header_line(char *line, int sz, FILE *in) return 1; } -static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047) +static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int rfc2047) { + char *otend = ot + otsize; int c; while ((c = *in++) != 0 && (in <= ep)) { + if (ot == otend) { + *--ot = '\0'; + return -1; + } if (c == '=') { int d = *in++; if (d == '\n' || !d) @@ -498,12 +456,17 @@ static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047) return 0; } -static int decode_b_segment(char *in, char *ot, char *ep) +static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep) { /* Decode in..ep, possibly in-place to ot */ int c, pos = 0, acc = 0; + char *otend = ot + otsize; while ((c = *in++) != 0 && (in <= ep)) { + if (ot == otend) { + *--ot = '\0'; + return -1; + } if (c == '+') c = 62; else if (c == '/') @@ -545,20 +508,47 @@ static int decode_b_segment(char *in, char *ot, char *ep) return 0; } -static void convert_to_utf8(char *line, const char *charset) +/* + * When there is no known charset, guess. + * + * Right now we assume that if the target is UTF-8 (the default), + * and it already looks like UTF-8 (which includes US-ASCII as its + * subset, of course) then that is what it is and there is nothing + * to do. + * + * Otherwise, we default to assuming it is Latin1 for historical + * reasons. + */ +static const char *guess_charset(const char *line, const char *target_charset) +{ + if (is_encoding_utf8(target_charset)) { + if (is_utf8(line)) + return NULL; + } + return "latin1"; +} + +static void convert_to_utf8(char *line, unsigned linesize, const char *charset) { - static const char latin_one[] = "latin1"; - const char *input_charset = *charset ? charset : latin_one; - char *out = reencode_string(line, metainfo_charset, input_charset); + char *out; + + if (!charset || !*charset) { + charset = guess_charset(line, metainfo_charset); + if (!charset) + return; + } + if (!strcmp(metainfo_charset, charset)) + return; + out = reencode_string(line, metainfo_charset, charset); if (!out) die("cannot convert from %s to %s\n", - input_charset, metainfo_charset); - strcpy(line, out); + charset, metainfo_charset); + strlcpy(line, out, linesize); free(out); } -static int decode_header_bq(char *it) +static int decode_header_bq(char *it, unsigned itsize) { char *in, *out, *ep, *cp, *sp; char outbuf[1000]; @@ -598,200 +588,332 @@ static int decode_header_bq(char *it) default: return rfc2047; /* no munging */ case 'b': - sz = decode_b_segment(cp + 3, piecebuf, ep); + sz = decode_b_segment(cp + 3, piecebuf, sizeof(piecebuf), ep); break; case 'q': - sz = decode_q_segment(cp + 3, piecebuf, ep, 1); + sz = decode_q_segment(cp + 3, piecebuf, sizeof(piecebuf), ep, 1); break; } if (sz < 0) return rfc2047; if (metainfo_charset) - convert_to_utf8(piecebuf, charset_q); + convert_to_utf8(piecebuf, sizeof(piecebuf), charset_q); + + sz = strlen(piecebuf); + if (outbuf + sizeof(outbuf) <= out + sz) + return rfc2047; /* no munging */ strcpy(out, piecebuf); - out += strlen(out); + out += sz; in = ep + 2; } strcpy(out, in); - strcpy(it, outbuf); + strlcpy(it, outbuf, itsize); return rfc2047; } -static void decode_header(char *it) +static void decode_header(char *it, unsigned itsize) { - if (decode_header_bq(it)) + if (decode_header_bq(it, itsize)) return; /* otherwise "it" is a straight copy of the input. * This can be binary guck but there is no charset specified. */ if (metainfo_charset) - convert_to_utf8(it, ""); + convert_to_utf8(it, itsize, ""); } -static void decode_transfer_encoding(char *line) +static void decode_transfer_encoding(char *line, unsigned linesize) { char *ep; switch (transfer_encoding) { case TE_QP: ep = line + strlen(line); - decode_q_segment(line, line, ep, 0); + decode_q_segment(line, line, linesize, ep, 0); break; case TE_BASE64: ep = line + strlen(line); - decode_b_segment(line, line, ep); + decode_b_segment(line, line, linesize, ep); break; case TE_DONTCARE: break; } } -static void handle_info(void) +static int handle_filter(char *line, unsigned linesize); + +static int find_boundary(void) { - char *sub; + while(fgets(line, sizeof(line), fin) != NULL) { + if (is_multipart_boundary(line)) + return 1; + } + return 0; +} + +static int handle_boundary(void) +{ + char newline[]="\n"; +again: + if (!memcmp(line+content_top->boundary_len, "--", 2)) { + /* we hit an end boundary */ + /* pop the current boundary off the stack */ + free(content_top->boundary); - sub = cleanup_subject(subject); - cleanup_space(name); - cleanup_space(date); - cleanup_space(email); - cleanup_space(sub); + /* technically won't happen as is_multipart_boundary() + will fail first. But just in case.. + */ + if (content_top-- < content) { + fprintf(stderr, "Detected mismatched boundaries, " + "can't recover\n"); + exit(1); + } + handle_filter(newline, sizeof(newline)); - fprintf(fout, "Author: %s\nEmail: %s\nSubject: %s\nDate: %s\n\n", - name, email, sub, date); + /* skip to the next boundary */ + if (!find_boundary()) + return 0; + goto again; + } + + /* set some defaults */ + transfer_encoding = TE_DONTCARE; + charset[0] = 0; + message_type = TYPE_TEXT; + + /* slurp in this section's info */ + while (read_one_header_line(line, sizeof(line), fin)) + check_header(line, sizeof(line), p_hdr_data, 0); + + /* eat the blank line after section info */ + return (fgets(line, sizeof(line), fin) != NULL); } -/* We are inside message body and have read line[] already. - * Spit out the commit log. - */ -static int handle_commit_msg(int *seen) +static inline int patchbreak(const char *line) +{ + /* Beginning of a "diff -" header? */ + if (!memcmp("diff -", line, 6)) + return 1; + + /* CVS "Index: " line? */ + if (!memcmp("Index: ", line, 7)) + return 1; + + /* + * "--- " starts patches without headers + * "---*" is a manual separator + */ + if (!memcmp("---", line, 3)) { + line += 3; + /* space followed by a filename? */ + if (line[0] == ' ' && !isspace(line[1])) + return 1; + /* Just whitespace? */ + for (;;) { + unsigned char c = *line++; + if (c == '\n') + return 1; + if (!isspace(c)) + break; + } + return 0; + } + return 0; +} + + +static int handle_commit_msg(char *line, unsigned linesize) { + static int still_looking = 1; + char *endline = line + linesize; + if (!cmitmsg) return 0; - do { - if (!memcmp("diff -", line, 6) || - !memcmp("---", line, 3) || - !memcmp("Index: ", line, 7)) - break; - if ((multipart_boundary[0] && is_multipart_boundary(line))) { - /* We come here when the first part had only - * the commit message without any patch. We - * pretend we have not seen this line yet, and - * go back to the loop. - */ - return 1; + + if (still_looking) { + char *cp = line; + if (isspace(*line)) { + for (cp = line + 1; *cp; cp++) { + if (!isspace(*cp)) + break; + } + if (!*cp) + return 0; } + if ((still_looking = check_header(cp, endline - cp, s_hdr_data, 0)) != 0) + return 0; + } - /* Unwrap transfer encoding and optionally - * normalize the log message to UTF-8. - */ - decode_transfer_encoding(line); - if (metainfo_charset) - convert_to_utf8(line, charset); + /* normalize the log message to UTF-8. */ + if (metainfo_charset) + convert_to_utf8(line, endline - line, charset); - handle_inbody_header(seen, line); - if (!(*seen & SEEN_PREFIX)) - continue; + if (patchbreak(line)) { + fclose(cmitmsg); + cmitmsg = NULL; + return 1; + } - fputs(line, cmitmsg); - } while (fgets(line, sizeof(line), fin) != NULL); - fclose(cmitmsg); - cmitmsg = NULL; + fputs(line, cmitmsg); return 0; } -/* We have done the commit message and have the first - * line of the patch in line[]. - */ -static void handle_patch(void) +static int handle_patch(char *line) { - do { - if (multipart_boundary[0] && is_multipart_boundary(line)) - break; - /* Only unwrap transfer encoding but otherwise do not - * do anything. We do *NOT* want UTF-8 conversion - * here; we are dealing with the user payload. - */ - decode_transfer_encoding(line); - fputs(line, patchfile); - patch_lines++; - } while (fgets(line, sizeof(line), fin) != NULL); + fputs(line, patchfile); + patch_lines++; + return 0; } -/* multipart boundary and transfer encoding are set up for us, and we - * are at the end of the sub header. do equivalent of handle_body up - * to the next boundary without closing patchfile --- we will expect - * that the first part to contain commit message and a patch, and - * handle other parts as pure patches. - */ -static int handle_multipart_one_part(int *seen) +static int handle_filter(char *line, unsigned linesize) { - int n = 0; + static int filter = 0; - while (fgets(line, sizeof(line), fin) != NULL) { - again: - n++; - if (is_multipart_boundary(line)) + /* filter tells us which part we left off on + * a non-zero return indicates we hit a filter point + */ + switch (filter) { + case 0: + if (!handle_commit_msg(line, linesize)) break; - if (handle_commit_msg(seen)) - goto again; - handle_patch(); - break; + filter++; + case 1: + if (!handle_patch(line)) + break; + filter++; + default: + return 1; } - if (n == 0) - return -1; + return 0; } -static void handle_multipart_body(void) +static void handle_body(void) { - int seen = 0; - int part_num = 0; + int rc = 0; + static char newline[2000]; + static char *np = newline; /* Skip up to the first boundary */ - while (fgets(line, sizeof(line), fin) != NULL) - if (is_multipart_boundary(line)) { - part_num = 1; + if (content_top->boundary) { + if (!find_boundary()) + return; + } + + do { + /* process any boundary lines */ + if (content_top->boundary && is_multipart_boundary(line)) { + /* flush any leftover */ + if ((transfer_encoding == TE_BASE64) && + (np != newline)) { + handle_filter(newline, sizeof(newline)); + } + if (!handle_boundary()) + return; + } + + /* Unwrap transfer encoding */ + decode_transfer_encoding(line, sizeof(line)); + + switch (transfer_encoding) { + case TE_BASE64: + { + char *op = line; + + /* binary data most likely doesn't have newlines */ + if (message_type != TYPE_TEXT) { + rc = handle_filter(line, sizeof(newline)); + break; + } + + /* this is a decoded line that may contain + * multiple new lines. Pass only one chunk + * at a time to handle_filter() + */ + + do { + while (*op != '\n' && *op != 0) + *np++ = *op++; + *np = *op; + if (*np != 0) { + /* should be sitting on a new line */ + *(++np) = 0; + op++; + rc = handle_filter(newline, sizeof(newline)); + np = newline; + } + } while (*op != 0); + /* the partial chunk is saved in newline and + * will be appended by the next iteration of fgets + */ break; } - if (!part_num) - return; - /* We are on boundary line. Start slurping the subhead. */ - while (1) { - int hdr = read_one_header_line(line, sizeof(line), fin); - if (!hdr) { - if (handle_multipart_one_part(&seen) < 0) - return; - /* Reset per part headers */ - transfer_encoding = TE_DONTCARE; - charset[0] = 0; + default: + rc = handle_filter(line, sizeof(newline)); } + if (rc) + /* nothing left to filter */ + break; + } while (fgets(line, sizeof(line), fin)); + + return; +} + +static void output_header_lines(FILE *fout, const char *hdr, char *data) +{ + while (1) { + char *ep = strchr(data, '\n'); + int len; + if (!ep) + len = strlen(data); else - check_subheader_line(line); - } - fclose(patchfile); - if (!patch_lines) { - fprintf(stderr, "No patch found\n"); - exit(1); + len = ep - data; + fprintf(fout, "%s: %.*s\n", hdr, len, data); + if (!ep) + break; + data = ep + 1; } } -/* Non multipart message */ -static void handle_body(void) +static void handle_info(void) { - int seen = 0; + char *sub; + char *hdr; + int i; + + for (i = 0; header[i]; i++) { - handle_commit_msg(&seen); - handle_patch(); - fclose(patchfile); - if (!patch_lines) { - fprintf(stderr, "No patch found\n"); - exit(1); + /* only print inbody headers if we output a patch file */ + if (patch_lines && s_hdr_data[i]) + hdr = s_hdr_data[i]; + else if (p_hdr_data[i]) + hdr = p_hdr_data[i]; + else + continue; + + if (!memcmp(header[i], "Subject", 7)) { + if (keep_subject) + sub = hdr; + else { + sub = cleanup_subject(hdr); + cleanup_space(sub); + } + output_header_lines(fout, "Subject", sub); + } else if (!memcmp(header[i], "From", 4)) { + handle_from(hdr); + fprintf(fout, "Author: %s\n", name); + fprintf(fout, "Email: %s\n", email); + } else { + cleanup_space(hdr); + fprintf(fout, "%s: %s\n", header[i], hdr); + } } + fprintf(fout, "\n"); } -int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, - const char *msg, const char *patch) +static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, + const char *msg, const char *patch) { keep_subject = ks; metainfo_charset = encoding; @@ -809,18 +931,16 @@ int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, fclose(cmitmsg); return -1; } - while (1) { - int hdr = read_one_header_line(line, sizeof(line), fin); - if (!hdr) { - if (multipart_boundary[0]) - handle_multipart_body(); - else - handle_body(); - handle_info(); - break; - } - check_header_line(line); - } + + p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *)); + s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *)); + + /* process the email header */ + while (read_one_header_line(line, sizeof(line), fin)) + check_header(line, sizeof(line), p_hdr_data, 1); + + handle_body(); + handle_info(); return 0; }