X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/f74f462c6a0bb4e203789da0a181e3fea9ac4a3b..9291a5f82480f2458e04cb9ea7e6749bc952b308:/test/getcwidth.c?ds=sidebyside

diff --git a/test/getcwidth.c b/test/getcwidth.c
index 264d5831..165f12ab 100644
--- a/test/getcwidth.c
+++ b/test/getcwidth.c
@@ -1,5 +1,4 @@
-/*
- * getcwidth - Get the OS's idea of the width of Unicode codepoints
+/* getcwidth - Get the OS's idea of the width of Unicode codepoints
  *
  * This code is Copyright (c) 2013, by the authors of nmh.  See the
  * COPYRIGHT file in the root directory of the nmh distribution for
@@ -18,11 +17,13 @@
 #ifdef MULTIBYTE_SUPPORT
 #include <locale.h>
 #include <wchar.h>
+#include <wctype.h>
 #endif
 
 #ifdef MULTIBYTE_SUPPORT
 static void usage(char *);
 static void dumpwidth(void);
+static void dumpctype(void);
 static void getwidth(const char *);
 #endif /* MULTIBYTE_SUPPORT */
 
@@ -57,6 +58,15 @@ main(int argc, char *argv[])
 		}
 	}
 
+	if (strcmp(argv[1], "--ctype") == 0) {
+		if (argc != 2) {
+			fprintf(stderr, "--ctype cannot be combined with other arguments\n");
+			exit(1);
+		}
+		dumpctype();
+		exit(0);
+	}
+
 	/*
 	 * Process each argument.  If it begins with "U+", then try to
 	 * convert it to a Unicode codepoint.  Otherwise, take each
@@ -92,11 +102,13 @@ static void
 usage(char *argv0)
 {
 	fprintf(stderr, "Usage: %s [--dump]\n", argv0);
+	fprintf(stderr, "       %s [--ctype]\n", argv0);
 	fprintf(stderr, "       %s U+XXXX [...]\n", argv0);
 	fprintf(stderr, "       %s utf-8-sequence [...]\n", argv0);
 	fprintf(stderr, "Returns the column width of a Unicode codepoint "
 		"or UTF-8 character sequence\n");
 	fprintf(stderr, "\t--dump\tDump complete width table\n");
+	fprintf(stderr, "\t--ctype\tPrint wctype(3) table.\n");
 
 	exit(1);
 }
@@ -144,22 +156,31 @@ getwidth(const char *string)
 	printf("%d\n", length);
 }
 
+typedef struct {
+	wchar_t min, max;
+} unicode_range;
+
+static unicode_range range[] = {
+	/* https://en.wikipedia.org/wiki/Unicode#Code_point_planes_and_blocks */
+	{  L'\x0000',    L'\xff' },
+#if WCHAR_MAX >= 0xffff
+	{  L'\x0100',  L'\xffff' },
+#if WCHAR_MAX >= 0xfffff
+	{ L'\x10000', L'\x14fff' },
+	{ L'\x16000', L'\x18fff' },
+	{ L'\x1b000', L'\x1bfff' },
+	{ L'\x1d000', L'\x1ffff' },
+	{ L'\x20000', L'\x2ffff' },
+	{ L'\xe0000', L'\xe0fff' },
+#endif
+#endif
+	{ L'\0', L'\0' }, /* Terminates list. */
+};
+
 static void
 dumpwidth(void)
 {
-	static struct {
-		wchar_t min, max;
-	} range[] = {
-		/* https://en.wikipedia.org/wiki/Unicode#Code_point_planes_and_blocks */
-		{  L'\x0000',  L'\xffff' },
-		{ L'\x10000', L'\x14fff' },
-		{ L'\x16000', L'\x18fff' },
-		{ L'\x1b000', L'\x1bfff' },
-		{ L'\x1d000', L'\x1ffff' },
-		{ L'\x20000', L'\x2ffff' },
-		{ L'\xe0000', L'\xe0fff' },
-		{ L'\0', L'\0' }, /* Terminates list. */
-	}, *r;
+	unicode_range *r;
 	int first;
 	wchar_t wc, start;
 	int width, lastwidth;
@@ -180,9 +201,42 @@ dumpwidth(void)
 				start = wc;
 				lastwidth = width;
 			}
-			if (wc == r->max)
+			if (wc == r->max) {
 				printf("%04lX - %04lX = %d\n", (unsigned long)start,
 					   (unsigned long int)wc, lastwidth);
+                /* wchar_t can be a 16-bit unsigned short. */
+                break;
+            }
+		}
+	}
+}
+
+static void
+dumpctype(void)
+{
+	unicode_range *r;
+	wchar_t wc;
+
+	for (r = range; r->max; r++) {
+		for (wc = r->min; wc <= r->max; wc++) {
+			printf("%6x  %2d  %c%c%c%c%c%c%c%c%c%c%c%c\n",
+				wc, wcwidth(wc),
+				iswcntrl(wc) ? 'c' : '-',
+				iswprint(wc) ? 'p' : '-',
+				iswgraph(wc) ? 'g' : '-',
+				iswalpha(wc) ? 'a' : '-',
+				iswupper(wc) ? 'u' : '-',
+				iswlower(wc) ? 'l' : '-',
+				iswdigit(wc) ? 'd' : '-',
+				iswxdigit(wc) ? 'x' : '-',
+				iswalnum(wc) ? 'N' : '-',
+				iswpunct(wc) ? '@' : '-',
+				iswspace(wc) ? 's' : '-',
+				iswblank(wc) ? 'b' : '-');
+
+            if (wc == r->max)
+                /* wchar_t can be a 16-bit unsigned short. */
+                break;
 		}
 	}
 #endif /* MULTIBYTE_SUPPORT */