X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/f67e3671c985ad8095dc6fcc8be7ba6dd25cdf63..1f07835ddb14fe99fa3ea4b6a1e39e8d5ba7df20:/test/getcwidth.c?ds=inline diff --git a/test/getcwidth.c b/test/getcwidth.c index 2df307c9..c1717e17 100644 --- a/test/getcwidth.c +++ b/test/getcwidth.c @@ -1,7 +1,7 @@ /* - * getcwidth - Get the OS's idea of the width of a combining character + * getcwidth - Get the OS's idea of the width of Unicode codepoints * - * This code is Copyright (c) 2012, by the authors of nmh. See the + * This code is Copyright (c) 2013, by the authors of nmh. See the * COPYRIGHT file in the root directory of the nmh distribution for * complete copyright information. */ @@ -20,61 +20,149 @@ #include #endif +#ifdef MULTIBYTE_SUPPORT +static void usage(char *); +static void dumpwidth(void); +static void getwidth(const char *); +#endif /* MULTIBYTE_SUPPORT */ + int main(int argc, char *argv[]) { +#ifndef MULTIBYTE_SUPPORT + (void) argc; + (void) argv; + fprintf(stderr, "Nmh was not configured with multibyte support\n"); + exit(1); +#else /* MULTIBYTE_SUPPORT */ wchar_t c; - int charlen; - char *p; + int i; + + if (! setlocale(LC_ALL, "")) { + fprintf(stderr, "setlocale failed, check your LC_ALL, " + "LC_CTYPE, and LANG environment variables\n"); + } + + if (argc < 2) + usage(argv[0]); + + if (strcmp(argv[1], "--dump") == 0) { + if (argc == 2) { + dumpwidth(); + exit(0); + } else { + fprintf(stderr, "--dump cannot be combined with " + "other arguments\n"); + exit(1); + } + } /* - * This is the UTF-8 for "n" + U+0308 (Combining Diaeresis) + * Process each argument. If it begins with "U+", then try to + * convert it to a Unicode codepoint. Otherwise, take each + * string and get the total width */ - unsigned char string[] = "n\xcc\x88"; + for (i = 1; i < argc; i++) { + if (strncmp(argv[i], "U+", 2) == 0) { + /* + * We're making a big assumption here that + * wchar_t represents a Unicode codepoint. + * That technically isn't valid unless the + * C compiler defines __STDC_ISO_10646__, but + * we're going to assume now that it works. + */ + errno = 0; + c = strtoul(argv[i] + 2, NULL, 16); + if (errno) { + fprintf(stderr, "Codepoint %s invalid\n", + argv[i]); + continue; + } + printf("%d\n", wcwidth(c)); + } else { + getwidth(argv[i]); + } + } - setlocale(LC_ALL, ""); + exit(0); +} - if (argc != 1) { - fprintf(stderr, "Usage: %s\n", argv[0]); - fprintf(stderr, "Returns the column width of a UTF-8 " - "multibyte character\n"); - exit(1); - } +static void +usage(char *argv0) +{ + fprintf(stderr, "Usage: %s [--dump]\n", argv0); + fprintf(stderr, " %s U+XXXX [...]\n", argv0); + fprintf(stderr, " %s utf-8-sequence [...]\n", argv0); + fprintf(stderr, "Returns the column width of a Unicode codepoint " + "or UTF-8 character sequence\n"); + fprintf(stderr, "\t--dump\tDump complete width table\n"); -#ifndef MULTIBYTE_SUPPORT - fprintf(stderr, "Nmh was not configured with multibyte support\n"); exit(1); -#else +} + +static void +getwidth(const char *string) +{ + wchar_t c; + int charlen, charleft = strlen(string); + int length = 0; + /* - * It's not clear to me that we can just call mbtowc() with a - * combining character; just to be safe, feed it in a base - * character first. + * In theory we should be able to use wcswidth(), but since we're + * testing out how the format libraries behave we'll do it a character + * at a time. */ - mbtowc(NULL, NULL, 0); + if (mbtowc(NULL, NULL, 0)) {} - charlen = mbtowc(&c, string, strlen(string)); + while (charleft > 0) { + int clen; - if (charlen != 1) { - fprintf(stderr, "We expected a beginning character length " - "of 1, got %d instead\n", charlen); - exit(1); - } + charlen = mbtowc(&c, string, charleft); + + if (charlen == 0) + break; - p = string + charlen; + if (charlen < 0) { + fprintf(stderr, "Unable to convert string \"%s\"\n", + string); + return; + } - charlen = mbtowc(&c, p, strlen(p)); + if ((clen = wcwidth(c)) < 0) { + fprintf(stderr, "U+%04lX non-printable\n", + (unsigned long int) c); + return; + } - if (charlen != 2) { - fprintf(stderr, "We expected a multibyte character length " - "of 2, got %d instead\n", charlen); - fprintf(stderr, "Are you using a UTF-8 locale?\n"); - exit(1); + length += clen; + string += charlen; + charleft -= charlen; } - printf("%d\n", wcwidth(c)); + printf("%d\n", length); +} - exit(0); +static void +dumpwidth(void) +{ + wchar_t wc, low; + int width, lastwidth; + + for (wc = 0, low = 1, lastwidth = wcwidth(1); wc < 0xffff; wc++) { + width = wcwidth(wc+1); + if (width != lastwidth) { + printf("%04lX - %04lX = %d\n", (unsigned long int) low, + (unsigned long int) (wc), lastwidth); + low = wc+1; + } + lastwidth = width; + } + + width = wcwidth(wc); + if (width == lastwidth) + printf("%04lX - %04lX = %d\n", (unsigned long int) low, + (unsigned long int) (wc), width); #endif /* MULTIBYTE_SUPPORT */ }