]> diplodocus.org Git - nmh/blob - test/getcwidth.c
getpass.c: Move interface to own file.
[nmh] / test / getcwidth.c
1 /* getcwidth - Get the OS's idea of the width of Unicode codepoints
2 *
3 * This code is Copyright (c) 2013, by the authors of nmh. See the
4 * COPYRIGHT file in the root directory of the nmh distribution for
5 * complete copyright information.
6 */
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12
13 #ifdef HAVE_CONFIG_H
14 #include <config.h>
15 #endif
16
17 #ifdef MULTIBYTE_SUPPORT
18 #include <locale.h>
19 #include <wchar.h>
20 #include <wctype.h>
21 #endif
22
23 #ifdef MULTIBYTE_SUPPORT
24 static void usage(char *);
25 static void dumpwidth(void);
26 static void dumpctype(void);
27 static void getwidth(const char *);
28 #endif /* MULTIBYTE_SUPPORT */
29
30 int
31 main(int argc, char *argv[])
32 {
33 #ifndef MULTIBYTE_SUPPORT
34 (void) argc;
35 (void) argv;
36 fprintf(stderr, "Nmh was not configured with multibyte support\n");
37 exit(1);
38 #else /* MULTIBYTE_SUPPORT */
39 wchar_t c;
40 int i;
41
42 if (! setlocale(LC_ALL, "")) {
43 fprintf(stderr, "setlocale failed, check your LC_ALL, "
44 "LC_CTYPE, and LANG environment variables\n");
45 }
46
47 if (argc < 2)
48 usage(argv[0]);
49
50 if (strcmp(argv[1], "--dump") == 0) {
51 if (argc == 2) {
52 dumpwidth();
53 exit(0);
54 } else {
55 fprintf(stderr, "--dump cannot be combined with "
56 "other arguments\n");
57 exit(1);
58 }
59 }
60
61 if (strcmp(argv[1], "--ctype") == 0) {
62 if (argc != 2) {
63 fprintf(stderr, "--ctype cannot be combined with other arguments\n");
64 exit(1);
65 }
66 dumpctype();
67 exit(0);
68 }
69
70 /*
71 * Process each argument. If it begins with "U+", then try to
72 * convert it to a Unicode codepoint. Otherwise, take each
73 * string and get the total width
74 */
75
76 for (i = 1; i < argc; i++) {
77 if (strncmp(argv[i], "U+", 2) == 0) {
78 /*
79 * We're making a big assumption here that
80 * wchar_t represents a Unicode codepoint.
81 * That technically isn't valid unless the
82 * C compiler defines __STDC_ISO_10646__, but
83 * we're going to assume now that it works.
84 */
85 errno = 0;
86 c = strtoul(argv[i] + 2, NULL, 16);
87 if (errno) {
88 fprintf(stderr, "Codepoint %s invalid\n",
89 argv[i]);
90 continue;
91 }
92 printf("%d\n", wcwidth(c));
93 } else {
94 getwidth(argv[i]);
95 }
96 }
97
98 exit(0);
99 }
100
101 static void
102 usage(char *argv0)
103 {
104 fprintf(stderr, "Usage: %s [--dump]\n", argv0);
105 fprintf(stderr, " %s [--ctype]\n", argv0);
106 fprintf(stderr, " %s U+XXXX [...]\n", argv0);
107 fprintf(stderr, " %s utf-8-sequence [...]\n", argv0);
108 fprintf(stderr, "Returns the column width of a Unicode codepoint "
109 "or UTF-8 character sequence\n");
110 fprintf(stderr, "\t--dump\tDump complete width table\n");
111 fprintf(stderr, "\t--ctype\tPrint wctype(3) table.\n");
112
113 exit(1);
114 }
115
116 static void
117 getwidth(const char *string)
118 {
119 wchar_t c;
120 int charlen, charleft = strlen(string);
121 int length = 0;
122
123 /*
124 * In theory we should be able to use wcswidth(), but since we're
125 * testing out how the format libraries behave we'll do it a character
126 * at a time.
127 */
128
129 if (mbtowc(NULL, NULL, 0)) {}
130
131 while (charleft > 0) {
132 int clen;
133
134 charlen = mbtowc(&c, string, charleft);
135
136 if (charlen == 0)
137 break;
138
139 if (charlen < 0) {
140 fprintf(stderr, "Unable to convert string \"%s\"\n",
141 string);
142 return;
143 }
144
145 if ((clen = wcwidth(c)) < 0) {
146 fprintf(stderr, "U+%04lX non-printable\n",
147 (unsigned long int) c);
148 return;
149 }
150
151 length += clen;
152 string += charlen;
153 charleft -= charlen;
154 }
155
156 printf("%d\n", length);
157 }
158
159 typedef struct {
160 wchar_t min, max;
161 } unicode_range;
162
163 static unicode_range range[] = {
164 /* https://en.wikipedia.org/wiki/Unicode#Code_point_planes_and_blocks */
165 { L'\x0000', L'\xff' },
166 #if WCHAR_MAX >= 0xffff
167 { L'\x0100', L'\xffff' },
168 #if WCHAR_MAX >= 0xfffff
169 { L'\x10000', L'\x14fff' },
170 { L'\x16000', L'\x18fff' },
171 { L'\x1b000', L'\x1bfff' },
172 { L'\x1d000', L'\x1ffff' },
173 { L'\x20000', L'\x2ffff' },
174 { L'\xe0000', L'\xe0fff' },
175 #endif
176 #endif
177 { L'\0', L'\0' }, /* Terminates list. */
178 };
179
180 static void
181 dumpwidth(void)
182 {
183 unicode_range *r;
184 int first;
185 wchar_t wc, start;
186 int width, lastwidth;
187
188 for (r = range; r->max; r++) {
189 first = 1;
190 for (wc = r->min; wc <= r->max; wc++) {
191 width = wcwidth(wc);
192 if (first) {
193 start = wc;
194 lastwidth = width;
195 first = 0;
196 continue;
197 }
198 if (width != lastwidth) {
199 printf("%04lX - %04lX = %d\n", (unsigned long)start,
200 (unsigned long int)wc - 1, lastwidth);
201 start = wc;
202 lastwidth = width;
203 }
204 if (wc == r->max) {
205 printf("%04lX - %04lX = %d\n", (unsigned long)start,
206 (unsigned long int)wc, lastwidth);
207 /* wchar_t can be a 16-bit unsigned short. */
208 break;
209 }
210 }
211 }
212 }
213
214 static void
215 dumpctype(void)
216 {
217 unicode_range *r;
218 wchar_t wc;
219
220 for (r = range; r->max; r++) {
221 for (wc = r->min; wc <= r->max; wc++) {
222 printf("%6x %2d %c%c%c%c%c%c%c%c%c%c%c%c\n",
223 wc, wcwidth(wc),
224 iswcntrl(wc) ? 'c' : '-',
225 iswprint(wc) ? 'p' : '-',
226 iswgraph(wc) ? 'g' : '-',
227 iswalpha(wc) ? 'a' : '-',
228 iswupper(wc) ? 'u' : '-',
229 iswlower(wc) ? 'l' : '-',
230 iswdigit(wc) ? 'd' : '-',
231 iswxdigit(wc) ? 'x' : '-',
232 iswalnum(wc) ? 'N' : '-',
233 iswpunct(wc) ? '@' : '-',
234 iswspace(wc) ? 's' : '-',
235 iswblank(wc) ? 'b' : '-');
236
237 if (wc == r->max)
238 /* wchar_t can be a 16-bit unsigned short. */
239 break;
240 }
241 }
242 #endif /* MULTIBYTE_SUPPORT */
243 }