-/*
- * getcwidth - Get the OS's idea of the width of Unicode codepoints
+/* getcwidth - Get the OS's idea of the width of Unicode codepoints
*
* This code is Copyright (c) 2013, by the authors of nmh. See the
* COPYRIGHT file in the root directory of the nmh distribution for
#ifdef MULTIBYTE_SUPPORT
#include <locale.h>
#include <wchar.h>
+#include <wctype.h>
#endif
#ifdef MULTIBYTE_SUPPORT
static void usage(char *);
static void dumpwidth(void);
+static void dumpctype(void);
static void getwidth(const char *);
#endif /* MULTIBYTE_SUPPORT */
main(int argc, char *argv[])
{
#ifndef MULTIBYTE_SUPPORT
+ (void) argc;
+ (void) argv;
fprintf(stderr, "Nmh was not configured with multibyte support\n");
exit(1);
#else /* MULTIBYTE_SUPPORT */
wchar_t c;
int i;
- setlocale(LC_ALL, "");
+ if (! setlocale(LC_ALL, "")) {
+ fprintf(stderr, "setlocale failed, check your LC_ALL, "
+ "LC_CTYPE, and LANG environment variables\n");
+ }
if (argc < 2)
usage(argv[0]);
}
}
+ if (strcmp(argv[1], "--ctype") == 0) {
+ if (argc != 2) {
+ fprintf(stderr, "--ctype cannot be combined with other arguments\n");
+ exit(1);
+ }
+ dumpctype();
+ exit(0);
+ }
+
/*
* Process each argument. If it begins with "U+", then try to
* convert it to a Unicode codepoint. Otherwise, take each
usage(char *argv0)
{
fprintf(stderr, "Usage: %s [--dump]\n", argv0);
+ fprintf(stderr, " %s [--ctype]\n", argv0);
fprintf(stderr, " %s U+XXXX [...]\n", argv0);
fprintf(stderr, " %s utf-8-sequence [...]\n", argv0);
fprintf(stderr, "Returns the column width of a Unicode codepoint "
"or UTF-8 character sequence\n");
fprintf(stderr, "\t--dump\tDump complete width table\n");
+ fprintf(stderr, "\t--ctype\tPrint wctype(3) table.\n");
exit(1);
}
* at a time.
*/
- mbtowc(NULL, NULL, 0);
+ if (mbtowc(NULL, NULL, 0)) {}
while (charleft > 0) {
int clen;
printf("%d\n", length);
}
+typedef struct {
+ wchar_t min, max;
+} unicode_range;
+
+static unicode_range range[] = {
+ /* https://en.wikipedia.org/wiki/Unicode#Code_point_planes_and_blocks */
+ { L'\x0000', L'\xff' },
+#if WCHAR_MAX >= 0xffff
+ { L'\x0100', L'\xffff' },
+#if WCHAR_MAX >= 0xfffff
+ { L'\x10000', L'\x14fff' },
+ { L'\x16000', L'\x18fff' },
+ { L'\x1b000', L'\x1bfff' },
+ { L'\x1d000', L'\x1ffff' },
+ { L'\x20000', L'\x2ffff' },
+ { L'\xe0000', L'\xe0fff' },
+#endif
+#endif
+ { L'\0', L'\0' }, /* Terminates list. */
+};
+
static void
dumpwidth(void)
{
- wchar_t wc, low;
+ unicode_range *r;
+ int first;
+ wchar_t wc, start;
int width, lastwidth;
- for (wc = 0, low = 1, lastwidth = wcwidth(1); wc < 0xffff; wc++) {
- width = wcwidth(wc+1);
- if (width != lastwidth) {
- printf("%04lX - %04lX = %d\n", (unsigned long int) low,
- (unsigned long int) (wc), lastwidth);
- low = wc+1;
+ for (r = range; r->max; r++) {
+ first = 1;
+ for (wc = r->min; wc <= r->max; wc++) {
+ width = wcwidth(wc);
+ if (first) {
+ start = wc;
+ lastwidth = width;
+ first = 0;
+ continue;
+ }
+ if (width != lastwidth) {
+ printf("%04lX - %04lX = %d\n", (unsigned long)start,
+ (unsigned long int)wc - 1, lastwidth);
+ start = wc;
+ lastwidth = width;
+ }
+ if (wc == r->max) {
+ printf("%04lX - %04lX = %d\n", (unsigned long)start,
+ (unsigned long int)wc, lastwidth);
+ /* wchar_t can be a 16-bit unsigned short. */
+ break;
+ }
}
- lastwidth = width;
}
-
- width = wcwidth(wc);
- if (width == lastwidth)
- printf("%04lX - %04lX = %d\n", (unsigned long int) low,
- (unsigned long int) (wc), width);
}
+
+static void
+dumpctype(void)
+{
+ unicode_range *r;
+ wchar_t wc;
+
+ for (r = range; r->max; r++) {
+ for (wc = r->min; wc <= r->max; wc++) {
+ printf("%6x %2d %c%c%c%c%c%c%c%c%c%c%c%c\n",
+ wc, wcwidth(wc),
+ iswcntrl(wc) ? 'c' : '-',
+ iswprint(wc) ? 'p' : '-',
+ iswgraph(wc) ? 'g' : '-',
+ iswalpha(wc) ? 'a' : '-',
+ iswupper(wc) ? 'u' : '-',
+ iswlower(wc) ? 'l' : '-',
+ iswdigit(wc) ? 'd' : '-',
+ iswxdigit(wc) ? 'x' : '-',
+ iswalnum(wc) ? 'N' : '-',
+ iswpunct(wc) ? '@' : '-',
+ iswspace(wc) ? 's' : '-',
+ iswblank(wc) ? 'b' : '-');
+
+ if (wc == r->max)
+ /* wchar_t can be a 16-bit unsigned short. */
+ break;
+ }
+ }
#endif /* MULTIBYTE_SUPPORT */
+}