X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=utf8.c;h=a2965c9c1106b7e772d9b53c8885d86320d18f07;hb=ff06c743dcd7f451381b41aeac4288179eaec428;hp=8fa62571aa959897275e694a4370e99c46cccd6a;hpb=9e83266525aad5c6210b9a21be9b1c6996d5544f;p=git.git diff --git a/utf8.c b/utf8.c index 8fa62571a..a2965c9c1 100644 --- a/utf8.c +++ b/utf8.c @@ -3,13 +3,15 @@ /* This code is originally from http://www.cl.cam.ac.uk/~mgk25/ucs/ */ +typedef unsigned int ucs_char_t; /* assuming 32bit int */ + struct interval { int first; int last; }; /* auxiliary function for binary search in interval table */ -static int bisearch(wchar_t ucs, const struct interval *table, int max) { +static int bisearch(ucs_char_t ucs, const struct interval *table, int max) { int min = 0; int mid; @@ -56,11 +58,11 @@ static int bisearch(wchar_t ucs, const struct interval *table, int max) { * ISO 8859-1 and WGL4 characters, Unicode control characters, * etc.) have a column width of 1. * - * This implementation assumes that wchar_t characters are encoded + * This implementation assumes that ucs_char_t characters are encoded * in ISO 10646. */ -static int wcwidth(wchar_t ch) +static int wcwidth(ucs_char_t ch) { /* * Sorted list of non-overlapping intervals of non-spacing characters, @@ -157,7 +159,7 @@ static int wcwidth(wchar_t ch) int utf8_width(const char **start) { unsigned char *s = (unsigned char *)*start; - wchar_t ch; + ucs_char_t ch; if (*s < 0x80) { /* 0xxxxxxx */ @@ -235,12 +237,19 @@ static void print_spaces(int count) /* * Wrap the text, if necessary. The variable indent is the indent for the * first line, indent2 is the indent for all other lines. + * If indent is negative, assume that already -indent columns have been + * consumed (and no extra indent is necessary for the first line). */ -void print_wrapped_text(const char *text, int indent, int indent2, int width) +int print_wrapped_text(const char *text, int indent, int indent2, int width) { int w = indent, assume_utf8 = is_utf8(text); const char *bol = text, *space = NULL; + if (indent < 0) { + w = -indent; + space = text; + } + for (;;) { char c = *text; if (!c || isspace(c)) { @@ -251,10 +260,9 @@ void print_wrapped_text(const char *text, int indent, int indent2, int width) else print_spaces(indent); fwrite(start, text - start, 1, stdout); - if (!c) { - putchar('\n'); - return; - } else if (c == '\t') + if (!c) + return w; + else if (c == '\t') w |= 0x07; space = text; w++; @@ -262,7 +270,7 @@ void print_wrapped_text(const char *text, int indent, int indent2, int width) } else { putchar('\n'); - text = bol = space + 1; + text = bol = space + isspace(*space); space = NULL; w = indent = indent2; } @@ -275,4 +283,74 @@ void print_wrapped_text(const char *text, int indent, int indent2, int width) text++; } } + return w; +} + +int is_encoding_utf8(const char *name) +{ + if (!name) + return 1; + if (!strcasecmp(name, "utf-8") || !strcasecmp(name, "utf8")) + return 1; + return 0; +} + +/* + * Given a buffer and its encoding, return it re-encoded + * with iconv. If the conversion fails, returns NULL. + */ +#ifndef NO_ICONV +#ifdef OLD_ICONV + typedef const char * iconv_ibp; +#else + typedef char * iconv_ibp; +#endif +char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding) +{ + iconv_t conv; + size_t insz, outsz, outalloc; + char *out, *outpos; + iconv_ibp cp; + + if (!in_encoding) + return NULL; + conv = iconv_open(out_encoding, in_encoding); + if (conv == (iconv_t) -1) + return NULL; + insz = strlen(in); + outsz = insz; + outalloc = outsz + 1; /* for terminating NUL */ + out = xmalloc(outalloc); + outpos = out; + cp = (iconv_ibp)in; + + while (1) { + size_t cnt = iconv(conv, &cp, &insz, &outpos, &outsz); + + if (cnt == -1) { + size_t sofar; + if (errno != E2BIG) { + free(out); + iconv_close(conv); + return NULL; + } + /* insz has remaining number of bytes. + * since we started outsz the same as insz, + * it is likely that insz is not enough for + * converting the rest. + */ + sofar = outpos - out; + outalloc = sofar + insz * 2 + 32; + out = xrealloc(out, outalloc); + outpos = out + sofar; + outsz = outalloc - sofar - 1; + } + else { + *outpos = '\0'; + break; + } + } + iconv_close(conv); + return out; } +#endif