Code

Encode everything internally as UTF-8
authorJonas Fonseca <fonseca@diku.dk>
Wed, 18 Feb 2009 22:14:56 +0000 (23:14 +0100)
committerJonas Fonseca <fonseca@diku.dk>
Sun, 29 Nov 2009 16:52:32 +0000 (11:52 -0500)
Store all strings internally as UTF-8 and convert them when they are
displayed if the locale is non-UTF-8. Improve the install documentation
related to proper support for UTF-8 via ncursesw.

INSTALL
NEWS
tig.c

diff --git a/INSTALL b/INSTALL
index c762a8a207b41019f0a8bfe6fed5e15208cde334..3f4844dc01cdf37a0cafaf648a1f96536f7641cd 100644 (file)
--- a/INSTALL
+++ b/INSTALL
@@ -28,6 +28,11 @@ need to first make the configure script:
 
        $ make configure
 
+Build settings are read from the file "config.make". To manually configure tig
+to use the ncurses library with wide character support, add the line:
+
+       LDLIBS = -lncursesw
+
 Release notes documenting notable changes are available in the NEWS file. When
 upgrading, you are advised to also read them after you have completed the
 installation.
@@ -38,9 +43,12 @@ The following tools and packages are needed:
 |=============================================================================
 |Tool                          |Description
 |git-core                      |Tig is just a frontend for git.
-|ncurses                       |Be sure to also have development files
+|ncurses or ncursesw           |Be sure to have the development files
                                 installed. Usually they are available in a
                                 separate package ending with `-dev`.
+                                Note also that ncurses with wide character
+                                support (ncursesw) is required to properly
+                                handle UTF-8 encoded strings.
 |iconv                         |If iconv is not provided by the c library
                                 you need to change the Makefile to link it
                                 into the binary.
diff --git a/NEWS b/NEWS
index 23445504a2b8a48a21cc2c8e15c01a932fb81550..67ebad8ef0504b370972774dc252b72880646192 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,13 @@ Release notes
 master
 ------
 
+Incompatibilities:
+
+ - Encode everything internally as UTF-8. This can affect performance,
+   but should in general improve handling of character lengths etc.
+   Also, to properly handle UTF-8 environments use ncurses with wide
+   character support.
+
 Improvements:
 
  - Status view: update the file variable when a line is selected so
diff --git a/tig.c b/tig.c
index 86c82db53ed679a284c8b385cd566f963036cea5..9710d6fd0dd6ca3d2dd73c828e4ce9009caeed48 100644 (file)
--- a/tig.c
+++ b/tig.c
@@ -994,9 +994,9 @@ static char opt_head[SIZEOF_REF]    = "";
 static char opt_head_rev[SIZEOF_REV]   = "";
 static char opt_remote[SIZEOF_REF]     = "";
 static char opt_encoding[20]           = "UTF-8";
-static bool opt_utf8                   = TRUE;
 static char opt_codeset[20]            = "UTF-8";
-static iconv_t opt_iconv               = ICONV_NONE;
+static iconv_t opt_iconv_in            = ICONV_NONE;
+static iconv_t opt_iconv_out           = ICONV_NONE;
 static char opt_search[SIZEOF_STR]     = "";
 static char opt_cdup[SIZEOF_STR]       = "";
 static char opt_prefix[SIZEOF_STR]     = "";
@@ -2060,6 +2060,7 @@ static int
 draw_chars(struct view *view, enum line_type type, const char *string,
           int max_len, bool use_tilde)
 {
+       static char out_buffer[BUFSIZ * 2];
        int len = 0;
        int col = 0;
        int trimmed = FALSE;
@@ -2068,22 +2069,28 @@ draw_chars(struct view *view, enum line_type type, const char *string,
        if (max_len <= 0)
                return 0;
 
-       if (opt_utf8) {
-               len = utf8_length(&string, skip, &col, max_len, &trimmed, use_tilde);
-       } else {
-               col = len = strlen(string);
-               if (len > max_len) {
-                       if (use_tilde) {
-                               max_len -= 1;
+       len = utf8_length(&string, skip, &col, max_len, &trimmed, use_tilde);
+
+       set_view_attr(view, type);
+       if (len > 0) {
+               if (opt_iconv_out != ICONV_NONE) {
+                       ICONV_CONST char *inbuf = (ICONV_CONST char *) string;
+                       size_t inlen = len + 1;
+
+                       char *outbuf = out_buffer;
+                       size_t outlen = sizeof(out_buffer);
+
+                       size_t ret;
+
+                       ret = iconv(opt_iconv_out, &inbuf, &inlen, &outbuf, &outlen);
+                       if (ret != (size_t) -1) {
+                               string = out_buffer;
+                               len = sizeof(out_buffer) - outlen;
                        }
-                       col = len = max_len;
-                       trimmed = TRUE;
                }
-       }
 
-       set_view_attr(view, type);
-       if (len > 0)
                waddnstr(view->win, string, len);
+       }
        if (trimmed && use_tilde) {
                set_view_attr(view, LINE_DELIMITER);
                waddch(view->win, '~');
@@ -3056,7 +3063,7 @@ update_view(struct view *view)
        }
 
        for (; (line = io_get(view->pipe, '\n', can_read)); can_read = FALSE) {
-               if (opt_iconv != ICONV_NONE) {
+               if (opt_iconv_in != ICONV_NONE) {
                        ICONV_CONST char *inbuf = line;
                        size_t inlen = strlen(line) + 1;
 
@@ -3065,7 +3072,7 @@ update_view(struct view *view)
 
                        size_t ret;
 
-                       ret = iconv(opt_iconv, &inbuf, &inlen, &outbuf, &outlen);
+                       ret = iconv(opt_iconv_in, &inbuf, &inlen, &outbuf, &outlen);
                        if (ret != (size_t) -1)
                                line = out_buffer;
                }
@@ -7698,12 +7705,15 @@ main(int argc, const char *argv[])
        if (!opt_git_dir[0] && request != REQ_VIEW_PAGER)
                die("Not a git repository");
 
-       if (*opt_encoding && strcasecmp(opt_encoding, "UTF-8"))
-               opt_utf8 = FALSE;
+       if (*opt_encoding && strcmp(opt_codeset, "UTF-8")) {
+               opt_iconv_in = iconv_open("UTF-8", opt_encoding);
+               if (opt_iconv_in == ICONV_NONE)
+                       die("Failed to initialize character set conversion");
+       }
 
-       if (*opt_codeset && strcmp(opt_codeset, opt_encoding)) {
-               opt_iconv = iconv_open(opt_codeset, opt_encoding);
-               if (opt_iconv == ICONV_NONE)
+       if (*opt_codeset && strcmp(opt_codeset, "UTF-8")) {
+               opt_iconv_out = iconv_open(opt_codeset, "UTF-8");
+               if (opt_iconv_out == ICONV_NONE)
                        die("Failed to initialize character set conversion");
        }