Code

UTF-8: introduce i18n.logoutputencoding.
authorJunio C Hamano <junkio@cox.net>
Thu, 28 Dec 2006 00:41:33 +0000 (16:41 -0800)
committerJunio C Hamano <junkio@cox.net>
Thu, 28 Dec 2006 00:41:33 +0000 (16:41 -0800)
It is plausible for somebody to want to view the commit log in a
different encoding from i18n.commitencoding -- the project's
policy may be UTF-8 and the user may be using a commit message
hook to run iconv to conform to that policy (and either not have
i18n.commitencoding to default to UTF-8 or have it explicitly
set to UTF-8).  Even then, Latin-1 may be more convenient for
the usual pager and the terminal the user uses.

The new variable i18n.logoutputencoding is used in preference to
i18n.commitencoding to decide what encoding to recode the log
output in when git-log and friends formats the commit log message.

Signed-off-by: Junio C Hamano <junkio@cox.net>
15 files changed:
Documentation/config.txt
builtin-commit-tree.c
builtin-log.c
builtin-mailinfo.c
cache.h
commit.c
config.c
contrib/completion/git-completion.bash
environment.c
t/t3900-i18n-commit.sh [new file with mode: 0755]
t/t3900/1-UTF-8.txt [new file with mode: 0644]
t/t3900/2-UTF-8.txt [new file with mode: 0644]
t/t3900/EUCJP.txt [new file with mode: 0644]
t/t3900/ISO-8859-1.txt [new file with mode: 0644]
t/t3900/ISO2022JP.txt [new file with mode: 0644]

index 22482d6a94e88544935eee5f560089b9a4bf0dfa..ffef3abfb6c9381e84a9d3ee11789c6ea78d9283 100644 (file)
@@ -248,6 +248,10 @@ i18n.commitEncoding::
        browser (and possibly at other places in the future or in other
        porcelains). See e.g. gitlink:git-mailinfo[1]. Defaults to 'utf-8'.
 
+i18n.logOutputEncoding::
+       Character encoding the commit messages are converted to when
+       running `git-log` and friends.
+
 log.showroot::
        If true, the initial commit will be shown as a big creation event.
        This is equivalent to a diff against an empty tree.
index 33c29f7495afad0342545a7e6854c3e3032a69e2..146aaffd282987454c0910477cfe7a047f478e94 100644 (file)
@@ -118,7 +118,9 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
                        parents++;
        }
 
-       encoding_is_utf8 = !strcmp(git_commit_encoding, "utf-8");
+       /* Not having i18n.commitencoding is the same as having utf-8 */
+       encoding_is_utf8 = (!git_commit_encoding ||
+                           !strcmp(git_commit_encoding, "utf-8"));
 
        init_buffer(&buffer, &size);
        add_buffer(&buffer, &size, "tree %s\n", sha1_to_hex(tree_sha1));
index b7e47cb5fe980374fa0d328830a524c6f6d65b66..a59b4acef1bc45f07f15e10c0501aa23de3eed7d 100644 (file)
@@ -33,13 +33,10 @@ static void cmd_log_init(int argc, const char **argv, const char *prefix,
                const char *arg = argv[i];
                if (!strncmp(arg, "--encoding=", 11)) {
                        arg += 11;
-                       if (MAX_ENCODING_LENGTH <= strlen(arg))
-                               die(" Value of output encoding '%s' too long",
-                                   arg);
                        if (strcmp(arg, "none"))
-                               strcpy(git_commit_encoding, arg);
+                               git_log_output_encoding = strdup(arg);
                        else
-                               git_commit_encoding[0] = 0;
+                               git_log_output_encoding = "";
                }
                else
                        die("unrecognized argument: %s", arg);
index 507b93f6a707a2e44ee5f5b2abc55f0e6b096367..a67f3eb90b6f715714c6fa7bb931044630c74111 100644 (file)
@@ -806,7 +806,8 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
                if (!strcmp(argv[1], "-k"))
                        keep_subject = 1;
                else if (!strcmp(argv[1], "-u"))
-                       metainfo_charset = git_commit_encoding;
+                       metainfo_charset = (git_commit_encoding
+                                           ? git_commit_encoding : "utf-8");
                else if (!strncmp(argv[1], "--encoding=", 11))
                        metainfo_charset = argv[1] + 11;
                else
diff --git a/cache.h b/cache.h
index 4943056c19ffb72a7cfb994daaa788ec1b01d60b..29dd290c9253bd96f086432e24d98cb0b43fa096 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -416,8 +416,8 @@ extern int check_repository_format_version(const char *var, const char *value);
 extern char git_default_email[MAX_GITNAME];
 extern char git_default_name[MAX_GITNAME];
 
-#define MAX_ENCODING_LENGTH 64
-extern char git_commit_encoding[MAX_ENCODING_LENGTH];
+extern char *git_commit_encoding;
+extern char *git_log_output_encoding;
 
 extern int copy_fd(int ifd, int ofd);
 extern void write_or_die(int fd, const void *buf, size_t count);
index df4bc0775a5042a353cb37228ff66210f35ed97b..6f2839a5cd3708fca9ef845375d3b16373fd3266 100644 (file)
--- a/commit.c
+++ b/commit.c
@@ -592,12 +592,20 @@ static char *get_header(const struct commit *commit, const char *key)
 
 static char *logmsg_reencode(const struct commit *commit)
 {
-       char *encoding = get_header(commit, "encoding");
+       char *encoding;
        char *out;
+       char *output_encoding = (git_log_output_encoding
+                                ? git_log_output_encoding
+                                : git_commit_encoding);
 
-       if (!encoding || !strcmp(encoding, git_commit_encoding))
+       if (!output_encoding)
                return NULL;
-       out = reencode_string(commit->buffer, git_commit_encoding, encoding);
+       encoding = get_header(commit, "encoding");
+       if (!encoding || !strcmp(encoding, output_encoding)) {
+               free(encoding);
+               return NULL;
+       }
+       out = reencode_string(commit->buffer, output_encoding, encoding);
        free(encoding);
        if (!out)
                return NULL;
@@ -618,15 +626,10 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
        int parents_shown = 0;
        const char *msg = commit->buffer;
        int plain_non_ascii = 0;
-       char *reencoded = NULL;
+       char *reencoded = logmsg_reencode(commit);
 
-       if (*git_commit_encoding) {
-               reencoded = logmsg_reencode(commit);
-               if (reencoded) {
-                       msg = reencoded;
-                       len = strlen(msg);
-               }
-       }
+       if (reencoded)
+               msg = reencoded;
 
        if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
                indent = 0;
@@ -643,7 +646,7 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
                for (in_body = i = 0; (ch = msg[i]) && i < len; i++) {
                        if (!in_body) {
                                /* author could be non 7-bit ASCII but
-                                * the log may so; skip over the
+                                * the log may be so; skip over the
                                 * header part first.
                                 */
                                if (ch == '\n' &&
index 1662a4626e569b07d96c622b357928216a24538c..fcccf7e2a4f3b7487af10d4f7b505c7ef492b9e8 100644 (file)
--- a/config.c
+++ b/config.c
@@ -309,10 +309,16 @@ int git_default_config(const char *var, const char *value)
        }
 
        if (!strcmp(var, "i18n.commitencoding")) {
-               strlcpy(git_commit_encoding, value, sizeof(git_commit_encoding));
+               git_commit_encoding = strdup(value);
                return 0;
        }
 
+       if (!strcmp(var, "i18n.logoutputencoding")) {
+               git_log_output_encoding = strdup(value);
+               return 0;
+       }
+
+
        if (!strcmp(var, "pager.color") || !strcmp(var, "color.pager")) {
                pager_use_color = git_config_bool(var,value);
                return 0;
index 234cd0954b888d814d8d4d86bb41983b80fddade..7c7520ea29dfb26ffafc355ea739046b6d821e28 100755 (executable)
@@ -711,6 +711,7 @@ _git_repo_config ()
                core.compression
                core.legacyHeaders
                i18n.commitEncoding
+               i18n.logOutputEncoding
                diff.color
                color.diff
                diff.renameLimit
index f8c7dbceadf2190997816f9a21b211a6a97efe48..a1502c4e87c0067c8cc276006317005a0da21a49 100644 (file)
@@ -18,7 +18,8 @@ int prefer_symlink_refs;
 int log_all_ref_updates;
 int warn_ambiguous_refs = 1;
 int repository_format_version;
-char git_commit_encoding[MAX_ENCODING_LENGTH] = "utf-8";
+char *git_commit_encoding;
+char *git_log_output_encoding;
 int shared_repository = PERM_UMASK;
 const char *apply_default_whitespace;
 int zlib_compression_level = Z_DEFAULT_COMPRESSION;
diff --git a/t/t3900-i18n-commit.sh b/t/t3900-i18n-commit.sh
new file mode 100755 (executable)
index 0000000..3606ed2
--- /dev/null
@@ -0,0 +1,104 @@
+#!/bin/sh
+#
+# Copyright (c) 2006 Junio C Hamano
+#
+
+test_description='commit and log output encodings'
+
+. ./test-lib.sh
+
+compare_with () {
+       git-show -s "$1" | sed -e '1,/^$/d' -e 's/^    //' -e '$d' >current &&
+       diff -u current "$2"
+}
+
+test_expect_success setup '
+       : >F &&
+       git-add F &&
+       T=$(git-write-tree) &&
+       C=$(git-commit-tree $T <../t3900/1-UTF-8.txt) &&
+       git-update-ref HEAD $C &&
+       git-tag C0
+'
+
+test_expect_success 'no encoding header for base case' '
+       E=$(git-cat-file commit C0 | sed -ne "s/^encoding //p") &&
+       test z = "z$E"
+'
+
+for H in ISO-8859-1 EUCJP ISO2022JP
+do
+       test_expect_success "$H setup" '
+               git-repo-config i18n.commitencoding $H &&
+               git-checkout -b $H C0 &&
+               echo $H >F &&
+               git-commit -a -F ../t3900/$H.txt
+       '
+done
+
+for H in ISO-8859-1 EUCJP ISO2022JP
+do
+       test_expect_success "check encoding header for $H" '
+               E=$(git-cat-file commit '$H' | sed -ne "s/^encoding //p") &&
+               test "z$E" = "z'$H'"
+       '
+done
+
+test_expect_success 'repo-config to remove customization' '
+       git-repo-config --unset-all i18n.commitencoding &&
+       if Z=$(git-repo-config --get-all i18n.commitencoding)
+       then
+               echo Oops, should have failed.
+               false
+       else
+               test z = "z$Z"
+       fi &&
+       git-repo-config i18n.commitencoding utf-8
+'
+
+test_expect_success 'ISO-8859-1 should be shown in UTF-8 now' '
+       compare_with ISO-8859-1 ../t3900/1-UTF-8.txt
+'
+
+for H in EUCJP ISO2022JP
+do
+       test_expect_success "$H should be shown in UTF-8 now" '
+               compare_with '$H' ../t3900/2-UTF-8.txt
+       '
+done
+
+test_expect_success 'repo-config to add customization' '
+       git-repo-config --unset-all i18n.commitencoding &&
+       if Z=$(git-repo-config --get-all i18n.commitencoding)
+       then
+               echo Oops, should have failed.
+               false
+       else
+               test z = "z$Z"
+       fi
+'
+
+for H in ISO-8859-1 EUCJP ISO2022JP
+do
+       test_expect_success "$H should be shown in itself now" '
+               git-repo-config i18n.commitencoding '$H' &&
+               compare_with '$H' ../t3900/'$H'.txt
+       '
+done
+
+test_expect_success 'repo-config to tweak customization' '
+       git-repo-config i18n.logoutputencoding utf-8
+'
+
+test_expect_success 'ISO-8859-1 should be shown in UTF-8 now' '
+       compare_with ISO-8859-1 ../t3900/1-UTF-8.txt
+'
+
+for H in EUCJP ISO2022JP
+do
+       test_expect_success "$H should be shown in UTF-8 now" '
+               compare_with '$H' ../t3900/2-UTF-8.txt
+       '
+done
+
+test_done
diff --git a/t/t3900/1-UTF-8.txt b/t/t3900/1-UTF-8.txt
new file mode 100644 (file)
index 0000000..ee31e19
--- /dev/null
@@ -0,0 +1,3 @@
+ÄËÑÏÖ
+
+Ábçdèfg
diff --git a/t/t3900/2-UTF-8.txt b/t/t3900/2-UTF-8.txt
new file mode 100644 (file)
index 0000000..63f4f8f
--- /dev/null
@@ -0,0 +1,4 @@
+はれひほふ
+
+しているのが、いるので。
+濱浜ほれぷりぽれまびぐりろへ。
diff --git a/t/t3900/EUCJP.txt b/t/t3900/EUCJP.txt
new file mode 100644 (file)
index 0000000..546f2aa
--- /dev/null
@@ -0,0 +1,4 @@
+¤Ï¤ì¤Ò¤Û¤Õ
+
+¤·¤Æ¤¤¤ë¤Î¤¬¡¢¤¤¤ë¤Î¤Ç¡£
+ßÀÉͤۤì¤×¤ê¤Ý¤ì¤Þ¤Ó¤°¤ê¤í¤Ø¡£
diff --git a/t/t3900/ISO-8859-1.txt b/t/t3900/ISO-8859-1.txt
new file mode 100644 (file)
index 0000000..7cbef0e
--- /dev/null
@@ -0,0 +1,3 @@
+ÄËÑÏÖ
+
+Ábçdèfg
diff --git a/t/t3900/ISO2022JP.txt b/t/t3900/ISO2022JP.txt
new file mode 100644 (file)
index 0000000..74b5330
--- /dev/null
@@ -0,0 +1,4 @@
+\e$B$O$l$R$[$U\e(B
+
+\e$B$7$F$$$k$N$,!"$$$k$N$G!#\e(B
+\e$B_@IM$[$l$W$j$]$l$^$S$0$j$m$X!#\e(B