]> diplodocus.org Git - nmh/blob - test/getcwidth.c
Added context_find_prefix().
[nmh] / test / getcwidth.c
1 /*
2 * getcwidth - Get the OS's idea of the width of Unicode codepoints
3 *
4 * This code is Copyright (c) 2013, by the authors of nmh. See the
5 * COPYRIGHT file in the root directory of the nmh distribution for
6 * complete copyright information.
7 */
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <errno.h>
13
14 #ifdef HAVE_CONFIG_H
15 #include <config.h>
16 #endif
17
18 #ifdef MULTIBYTE_SUPPORT
19 #include <locale.h>
20 #include <wchar.h>
21 #endif
22
23 #ifdef MULTIBYTE_SUPPORT
24 static void usage(char *);
25 static void dumpwidth(void);
26 static void getwidth(const char *);
27 #endif /* MULTIBYTE_SUPPORT */
28
29 int
30 main(int argc, char *argv[])
31 {
32 #ifndef MULTIBYTE_SUPPORT
33 (void) argc;
34 (void) argv;
35 fprintf(stderr, "Nmh was not configured with multibyte support\n");
36 exit(1);
37 #else /* MULTIBYTE_SUPPORT */
38 wchar_t c;
39 int i;
40
41 if (! setlocale(LC_ALL, "")) {
42 fprintf(stderr, "setlocale failed, check your LC_ALL, "
43 "LC_CTYPE, and LANG environment variables\n");
44 }
45
46 if (argc < 2)
47 usage(argv[0]);
48
49 if (strcmp(argv[1], "--dump") == 0) {
50 if (argc == 2) {
51 dumpwidth();
52 exit(0);
53 } else {
54 fprintf(stderr, "--dump cannot be combined with "
55 "other arguments\n");
56 exit(1);
57 }
58 }
59
60 /*
61 * Process each argument. If it begins with "U+", then try to
62 * convert it to a Unicode codepoint. Otherwise, take each
63 * string and get the total width
64 */
65
66 for (i = 1; i < argc; i++) {
67 if (strncmp(argv[i], "U+", 2) == 0) {
68 /*
69 * We're making a big assumption here that
70 * wchar_t represents a Unicode codepoint.
71 * That technically isn't valid unless the
72 * C compiler defines __STDC_ISO_10646__, but
73 * we're going to assume now that it works.
74 */
75 errno = 0;
76 c = strtoul(argv[i] + 2, NULL, 16);
77 if (errno) {
78 fprintf(stderr, "Codepoint %s invalid\n",
79 argv[i]);
80 continue;
81 }
82 printf("%d\n", wcwidth(c));
83 } else {
84 getwidth(argv[i]);
85 }
86 }
87
88 exit(0);
89 }
90
91 static void
92 usage(char *argv0)
93 {
94 fprintf(stderr, "Usage: %s [--dump]\n", argv0);
95 fprintf(stderr, " %s U+XXXX [...]\n", argv0);
96 fprintf(stderr, " %s utf-8-sequence [...]\n", argv0);
97 fprintf(stderr, "Returns the column width of a Unicode codepoint "
98 "or UTF-8 character sequence\n");
99 fprintf(stderr, "\t--dump\tDump complete width table\n");
100
101 exit(1);
102 }
103
104 static void
105 getwidth(const char *string)
106 {
107 wchar_t c;
108 int charlen, charleft = strlen(string);
109 int length = 0;
110
111 /*
112 * In theory we should be able to use wcswidth(), but since we're
113 * testing out how the format libraries behave we'll do it a character
114 * at a time.
115 */
116
117 if (mbtowc(NULL, NULL, 0)) {}
118
119 while (charleft > 0) {
120 int clen;
121
122 charlen = mbtowc(&c, string, charleft);
123
124 if (charlen == 0)
125 break;
126
127 if (charlen < 0) {
128 fprintf(stderr, "Unable to convert string \"%s\"\n",
129 string);
130 return;
131 }
132
133 if ((clen = wcwidth(c)) < 0) {
134 fprintf(stderr, "U+%04lX non-printable\n",
135 (unsigned long int) c);
136 return;
137 }
138
139 length += clen;
140 string += charlen;
141 charleft -= charlen;
142 }
143
144 printf("%d\n", length);
145 }
146
147 static void
148 dumpwidth(void)
149 {
150 wchar_t wc, low;
151 int width, lastwidth;
152
153 for (wc = 0, low = 1, lastwidth = wcwidth(1); wc < 0xffff; wc++) {
154 width = wcwidth(wc+1);
155 if (width != lastwidth) {
156 printf("%04lX - %04lX = %d\n", (unsigned long int) low,
157 (unsigned long int) (wc), lastwidth);
158 low = wc+1;
159 }
160 lastwidth = width;
161 }
162
163 width = wcwidth(wc);
164 if (width == lastwidth)
165 printf("%04lX - %04lX = %d\n", (unsigned long int) low,
166 (unsigned long int) (wc), width);
167 #endif /* MULTIBYTE_SUPPORT */
168 }