From 650e4be59b9f385f56e5829d97d09e8440f174b8 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 27 Nov 2005 16:22:16 -0800 Subject: [PATCH] mailinfo: allow -u to fall back on latin1 to utf8 conversion. When the message body does not identify what encoding it is in, -u assumes it is in latin-1 and converts it to utf8, which is the recommended encoding for git commit log messages. With -u=, the conversion is made into the specified one, instead of utf8, to allow project-local policies. Signed-off-by: Junio C Hamano --- mailinfo.c | 59 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/mailinfo.c b/mailinfo.c index cb853df99..6d8c93360 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -16,7 +16,7 @@ extern char *gitstrcasestr(const char *haystack, const char *needle); static FILE *cmitmsg, *patchfile; static int keep_subject = 0; -static int metainfo_utf8 = 0; +static char *metainfo_charset = NULL; static char line[1000]; static char date[1000]; static char name[1000]; @@ -441,29 +441,38 @@ static int decode_b_segment(char *in, char *ot, char *ep) static void convert_to_utf8(char *line, char *charset) { - if (*charset) { - char *in, *out; - size_t insize, outsize, nrc; - char outbuf[4096]; /* cheat */ - iconv_t conv = iconv_open("utf-8", charset); - - if (conv == (iconv_t) -1) { - fprintf(stderr, "cannot convert from %s to utf-8\n", - charset); + char *in, *out; + size_t insize, outsize, nrc; + char outbuf[4096]; /* cheat */ + static char latin_one[] = "latin-1"; + char *input_charset = *charset ? charset : latin_one; + iconv_t conv = iconv_open(metainfo_charset, input_charset); + + if (conv == (iconv_t) -1) { + static int warned_latin1_once = 0; + if (input_charset != latin_one) { + fprintf(stderr, "cannot convert from %s to %s\n", + input_charset, metainfo_charset); *charset = 0; - return; } - in = line; - insize = strlen(in); - out = outbuf; - outsize = sizeof(outbuf); - nrc = iconv(conv, &in, &insize, &out, &outsize); - iconv_close(conv); - if (nrc == (size_t) -1) - return; - *out = 0; - strcpy(line, outbuf); + else if (!warned_latin1_once) { + warned_latin1_once = 1; + fprintf(stderr, "tried to convert from %s to %s, " + "but your iconv does not work with it.\n", + input_charset, metainfo_charset); + } + return; } + in = line; + insize = strlen(in); + out = outbuf; + outsize = sizeof(outbuf); + nrc = iconv(conv, &in, &insize, &out, &outsize); + iconv_close(conv); + if (nrc == (size_t) -1) + return; + *out = 0; + strcpy(line, outbuf); } static void decode_header_bq(char *it) @@ -511,7 +520,7 @@ static void decode_header_bq(char *it) } if (sz < 0) return; - if (metainfo_utf8) + if (metainfo_charset) convert_to_utf8(piecebuf, charset_q); strcpy(out, piecebuf); out += strlen(out); @@ -590,7 +599,7 @@ static int handle_commit_msg(void) * normalize the log message to UTF-8. */ decode_transfer_encoding(line); - if (metainfo_utf8) + if (metainfo_charset) convert_to_utf8(line, charset); fputs(line, cmitmsg); } while (fgets(line, sizeof(line), stdin) != NULL); @@ -720,7 +729,9 @@ int main(int argc, char **argv) if (!strcmp(argv[1], "-k")) keep_subject = 1; else if (!strcmp(argv[1], "-u")) - metainfo_utf8 = 1; + metainfo_charset = "utf-8"; + else if (!strncmp(argv[1], "-u=", 3)) + metainfo_charset = argv[1] + 3; else usage(); argc--; argv++; -- 2.30.2