X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=builtin-mailinfo.c;h=583da38b6750185eb38f04d91555aa75ee4a77b0;hb=8860fd42fcf5a7853f7d7c2198793183320293ff;hp=821642a7af97e7afa13a187af5ec68c8a67ae93c;hpb=b19beecd9456a2e6282634e5df751206b972604a;p=git.git diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c index 821642a7a..583da38b6 100644 --- a/builtin-mailinfo.c +++ b/builtin-mailinfo.c @@ -2,22 +2,14 @@ * Another stupid program, this one parsing the headers of an * email to figure out authorship and subject */ -#define _GNU_SOURCE -#include -#include -#include -#include -#ifndef NO_ICONV -#include -#endif -#include "git-compat-util.h" #include "cache.h" #include "builtin.h" +#include "utf8.h" static FILE *cmitmsg, *patchfile, *fin, *fout; -static int keep_subject = 0; -static const char *metainfo_charset = NULL; +static int keep_subject; +static const char *metainfo_charset; static char line[1000]; static char date[1000]; static char name[1000]; @@ -31,7 +23,7 @@ static char charset[256]; static char multipart_boundary[1000]; static int multipart_boundary_len; -static int patch_lines = 0; +static int patch_lines; static char *sanity_check(char *name, char *email) { @@ -165,7 +157,7 @@ static int handle_subject(char *line) static int slurp_attr(const char *line, const char *name, char *attr) { - char *ends, *ap = strcasestr(line, name); + const char *ends, *ap = strcasestr(line, name); size_t sz; if (!ap) { @@ -348,7 +340,7 @@ static void cleanup_space(char *buf) } } -static void decode_header_bq(char *it); +static void decode_header(char *it); typedef int (*header_fn_t)(char *); struct header_def { const char *name; @@ -371,7 +363,7 @@ static void check_header(char *line, struct header_def *header) /* Unwrap inline B and Q encoding, and optionally * normalize the meta information to utf8. */ - decode_header_bq(line + len + 2); + decode_header(line + len + 2); header[i].func(line + len + 2); break; } @@ -446,22 +438,11 @@ static int read_one_header_line(char *line, int sz, FILE *in) break; } /* Count mbox From headers as headers */ - if (!ofs && !memcmp(line, "From ", 5)) + if (!ofs && (!memcmp(line, "From ", 5) || !memcmp(line, ">From ", 6))) ofs = 1; return ofs; } -static unsigned hexval(int c) -{ - if (c >= '0' && c <= '9') - return c - '0'; - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - if (c >= 'A' && c <= 'F') - return c - 'A' + 10; - return ~0; -} - static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047) { int c; @@ -530,52 +511,30 @@ static int decode_b_segment(char *in, char *ot, char *ep) static void convert_to_utf8(char *line, char *charset) { -#ifndef NO_ICONV - char *in, *out; - size_t insize, outsize, nrc; - char outbuf[4096]; /* cheat */ static char latin_one[] = "latin1"; char *input_charset = *charset ? charset : latin_one; - iconv_t conv = iconv_open(metainfo_charset, input_charset); - - if (conv == (iconv_t) -1) { - static int warned_latin1_once = 0; - if (input_charset != latin_one) { - fprintf(stderr, "cannot convert from %s to %s\n", - input_charset, metainfo_charset); - *charset = 0; - } - else if (!warned_latin1_once) { - warned_latin1_once = 1; - fprintf(stderr, "tried to convert from %s to %s, " - "but your iconv does not work with it.\n", - input_charset, metainfo_charset); - } - return; - } - in = line; - insize = strlen(in); - out = outbuf; - outsize = sizeof(outbuf); - nrc = iconv(conv, &in, &insize, &out, &outsize); - iconv_close(conv); - if (nrc == (size_t) -1) - return; - *out = 0; - strcpy(line, outbuf); -#endif + char *out = reencode_string(line, metainfo_charset, input_charset); + + if (!out) + die("cannot convert from %s to %s\n", + input_charset, metainfo_charset); + strcpy(line, out); + free(out); } -static void decode_header_bq(char *it) +static int decode_header_bq(char *it) { char *in, *out, *ep, *cp, *sp; char outbuf[1000]; + int rfc2047 = 0; in = it; out = outbuf; while ((ep = strstr(in, "=?")) != NULL) { int sz, encoding; char charset_q[256], piecebuf[256]; + rfc2047 = 1; + if (in != ep) { sz = ep - in; memcpy(out, in, sz); @@ -589,19 +548,19 @@ static void decode_header_bq(char *it) ep += 2; cp = strchr(ep, '?'); if (!cp) - return; /* no munging */ + return rfc2047; /* no munging */ for (sp = ep; sp < cp; sp++) charset_q[sp - ep] = tolower(*sp); charset_q[cp - ep] = 0; encoding = cp[1]; if (!encoding || cp[2] != '?') - return; /* no munging */ + return rfc2047; /* no munging */ ep = strstr(cp + 3, "?="); if (!ep) - return; /* no munging */ + return rfc2047; /* no munging */ switch (tolower(encoding)) { default: - return; /* no munging */ + return rfc2047; /* no munging */ case 'b': sz = decode_b_segment(cp + 3, piecebuf, ep); break; @@ -610,7 +569,7 @@ static void decode_header_bq(char *it) break; } if (sz < 0) - return; + return rfc2047; if (metainfo_charset) convert_to_utf8(piecebuf, charset_q); strcpy(out, piecebuf); @@ -619,6 +578,19 @@ static void decode_header_bq(char *it) } strcpy(out, in); strcpy(it, outbuf); + return rfc2047; +} + +static void decode_header(char *it) +{ + + if (decode_header_bq(it)) + return; + /* otherwise "it" is a straight copy of the input. + * This can be binary guck but there is no charset specified. + */ + if (metainfo_charset) + convert_to_utf8(it, ""); } static void decode_transfer_encoding(char *line) @@ -820,18 +792,25 @@ int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, static const char mailinfo_usage[] = "git-mailinfo [-k] [-u | --encoding=] msg patch info"; -int cmd_mailinfo(int argc, const char **argv, char **envp) +int cmd_mailinfo(int argc, const char **argv, const char *prefix) { + const char *def_charset; + /* NEEDSWORK: might want to do the optional .git/ directory * discovery */ git_config(git_default_config); + def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8"); + metainfo_charset = def_charset; + while (1 < argc && argv[1][0] == '-') { if (!strcmp(argv[1], "-k")) keep_subject = 1; else if (!strcmp(argv[1], "-u")) - metainfo_charset = git_commit_encoding; + metainfo_charset = def_charset; + else if (!strcmp(argv[1], "-n")) + metainfo_charset = NULL; else if (!strncmp(argv[1], "--encoding=", 11)) metainfo_charset = argv[1] + 11; else