X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/f74f462c6a0bb4e203789da0a181e3fea9ac4a3b..9291a5f82480f2458e04cb9ea7e6749bc952b308:/test/getcwidth.c?ds=sidebyside diff --git a/test/getcwidth.c b/test/getcwidth.c index 264d5831..165f12ab 100644 --- a/test/getcwidth.c +++ b/test/getcwidth.c @@ -1,5 +1,4 @@ -/* - * getcwidth - Get the OS's idea of the width of Unicode codepoints +/* getcwidth - Get the OS's idea of the width of Unicode codepoints * * This code is Copyright (c) 2013, by the authors of nmh. See the * COPYRIGHT file in the root directory of the nmh distribution for @@ -18,11 +17,13 @@ #ifdef MULTIBYTE_SUPPORT #include #include +#include #endif #ifdef MULTIBYTE_SUPPORT static void usage(char *); static void dumpwidth(void); +static void dumpctype(void); static void getwidth(const char *); #endif /* MULTIBYTE_SUPPORT */ @@ -57,6 +58,15 @@ main(int argc, char *argv[]) } } + if (strcmp(argv[1], "--ctype") == 0) { + if (argc != 2) { + fprintf(stderr, "--ctype cannot be combined with other arguments\n"); + exit(1); + } + dumpctype(); + exit(0); + } + /* * Process each argument. If it begins with "U+", then try to * convert it to a Unicode codepoint. Otherwise, take each @@ -92,11 +102,13 @@ static void usage(char *argv0) { fprintf(stderr, "Usage: %s [--dump]\n", argv0); + fprintf(stderr, " %s [--ctype]\n", argv0); fprintf(stderr, " %s U+XXXX [...]\n", argv0); fprintf(stderr, " %s utf-8-sequence [...]\n", argv0); fprintf(stderr, "Returns the column width of a Unicode codepoint " "or UTF-8 character sequence\n"); fprintf(stderr, "\t--dump\tDump complete width table\n"); + fprintf(stderr, "\t--ctype\tPrint wctype(3) table.\n"); exit(1); } @@ -144,22 +156,31 @@ getwidth(const char *string) printf("%d\n", length); } +typedef struct { + wchar_t min, max; +} unicode_range; + +static unicode_range range[] = { + /* https://en.wikipedia.org/wiki/Unicode#Code_point_planes_and_blocks */ + { L'\x0000', L'\xff' }, +#if WCHAR_MAX >= 0xffff + { L'\x0100', L'\xffff' }, +#if WCHAR_MAX >= 0xfffff + { L'\x10000', L'\x14fff' }, + { L'\x16000', L'\x18fff' }, + { L'\x1b000', L'\x1bfff' }, + { L'\x1d000', L'\x1ffff' }, + { L'\x20000', L'\x2ffff' }, + { L'\xe0000', L'\xe0fff' }, +#endif +#endif + { L'\0', L'\0' }, /* Terminates list. */ +}; + static void dumpwidth(void) { - static struct { - wchar_t min, max; - } range[] = { - /* https://en.wikipedia.org/wiki/Unicode#Code_point_planes_and_blocks */ - { L'\x0000', L'\xffff' }, - { L'\x10000', L'\x14fff' }, - { L'\x16000', L'\x18fff' }, - { L'\x1b000', L'\x1bfff' }, - { L'\x1d000', L'\x1ffff' }, - { L'\x20000', L'\x2ffff' }, - { L'\xe0000', L'\xe0fff' }, - { L'\0', L'\0' }, /* Terminates list. */ - }, *r; + unicode_range *r; int first; wchar_t wc, start; int width, lastwidth; @@ -180,9 +201,42 @@ dumpwidth(void) start = wc; lastwidth = width; } - if (wc == r->max) + if (wc == r->max) { printf("%04lX - %04lX = %d\n", (unsigned long)start, (unsigned long int)wc, lastwidth); + /* wchar_t can be a 16-bit unsigned short. */ + break; + } + } + } +} + +static void +dumpctype(void) +{ + unicode_range *r; + wchar_t wc; + + for (r = range; r->max; r++) { + for (wc = r->min; wc <= r->max; wc++) { + printf("%6x %2d %c%c%c%c%c%c%c%c%c%c%c%c\n", + wc, wcwidth(wc), + iswcntrl(wc) ? 'c' : '-', + iswprint(wc) ? 'p' : '-', + iswgraph(wc) ? 'g' : '-', + iswalpha(wc) ? 'a' : '-', + iswupper(wc) ? 'u' : '-', + iswlower(wc) ? 'l' : '-', + iswdigit(wc) ? 'd' : '-', + iswxdigit(wc) ? 'x' : '-', + iswalnum(wc) ? 'N' : '-', + iswpunct(wc) ? '@' : '-', + iswspace(wc) ? 's' : '-', + iswblank(wc) ? 'b' : '-'); + + if (wc == r->max) + /* wchar_t can be a 16-bit unsigned short. */ + break; } } #endif /* MULTIBYTE_SUPPORT */