]> diplodocus.org Git - nmh/blob - test/getcwidth.c
cpstripped: Split into two versions, each with a single #ifdef.
[nmh] / test / getcwidth.c
1 /*
2 * getcwidth - Get the OS's idea of the width of Unicode codepoints
3 *
4 * This code is Copyright (c) 2013, by the authors of nmh. See the
5 * COPYRIGHT file in the root directory of the nmh distribution for
6 * complete copyright information.
7 */
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <errno.h>
13
14 #ifdef HAVE_CONFIG_H
15 #include <config.h>
16 #endif
17
18 #ifdef MULTIBYTE_SUPPORT
19 #include <locale.h>
20 #include <wchar.h>
21 #include <wctype.h>
22 #endif
23
24 #ifdef MULTIBYTE_SUPPORT
25 static void usage(char *);
26 static void dumpwidth(void);
27 static void dumpctype(void);
28 static void getwidth(const char *);
29 #endif /* MULTIBYTE_SUPPORT */
30
31 int
32 main(int argc, char *argv[])
33 {
34 #ifndef MULTIBYTE_SUPPORT
35 (void) argc;
36 (void) argv;
37 fprintf(stderr, "Nmh was not configured with multibyte support\n");
38 exit(1);
39 #else /* MULTIBYTE_SUPPORT */
40 wchar_t c;
41 int i;
42
43 if (! setlocale(LC_ALL, "")) {
44 fprintf(stderr, "setlocale failed, check your LC_ALL, "
45 "LC_CTYPE, and LANG environment variables\n");
46 }
47
48 if (argc < 2)
49 usage(argv[0]);
50
51 if (strcmp(argv[1], "--dump") == 0) {
52 if (argc == 2) {
53 dumpwidth();
54 exit(0);
55 } else {
56 fprintf(stderr, "--dump cannot be combined with "
57 "other arguments\n");
58 exit(1);
59 }
60 }
61
62 if (strcmp(argv[1], "--ctype") == 0) {
63 if (argc != 2) {
64 fprintf(stderr, "--ctype cannot be combined with other arguments\n");
65 exit(1);
66 }
67 dumpctype();
68 exit(0);
69 }
70
71 /*
72 * Process each argument. If it begins with "U+", then try to
73 * convert it to a Unicode codepoint. Otherwise, take each
74 * string and get the total width
75 */
76
77 for (i = 1; i < argc; i++) {
78 if (strncmp(argv[i], "U+", 2) == 0) {
79 /*
80 * We're making a big assumption here that
81 * wchar_t represents a Unicode codepoint.
82 * That technically isn't valid unless the
83 * C compiler defines __STDC_ISO_10646__, but
84 * we're going to assume now that it works.
85 */
86 errno = 0;
87 c = strtoul(argv[i] + 2, NULL, 16);
88 if (errno) {
89 fprintf(stderr, "Codepoint %s invalid\n",
90 argv[i]);
91 continue;
92 }
93 printf("%d\n", wcwidth(c));
94 } else {
95 getwidth(argv[i]);
96 }
97 }
98
99 exit(0);
100 }
101
102 static void
103 usage(char *argv0)
104 {
105 fprintf(stderr, "Usage: %s [--dump]\n", argv0);
106 fprintf(stderr, " %s U+XXXX [...]\n", argv0);
107 fprintf(stderr, " %s utf-8-sequence [...]\n", argv0);
108 fprintf(stderr, "Returns the column width of a Unicode codepoint "
109 "or UTF-8 character sequence\n");
110 fprintf(stderr, "\t--dump\tDump complete width table\n");
111
112 exit(1);
113 }
114
115 static void
116 getwidth(const char *string)
117 {
118 wchar_t c;
119 int charlen, charleft = strlen(string);
120 int length = 0;
121
122 /*
123 * In theory we should be able to use wcswidth(), but since we're
124 * testing out how the format libraries behave we'll do it a character
125 * at a time.
126 */
127
128 if (mbtowc(NULL, NULL, 0)) {}
129
130 while (charleft > 0) {
131 int clen;
132
133 charlen = mbtowc(&c, string, charleft);
134
135 if (charlen == 0)
136 break;
137
138 if (charlen < 0) {
139 fprintf(stderr, "Unable to convert string \"%s\"\n",
140 string);
141 return;
142 }
143
144 if ((clen = wcwidth(c)) < 0) {
145 fprintf(stderr, "U+%04lX non-printable\n",
146 (unsigned long int) c);
147 return;
148 }
149
150 length += clen;
151 string += charlen;
152 charleft -= charlen;
153 }
154
155 printf("%d\n", length);
156 }
157
158 typedef struct {
159 wchar_t min, max;
160 } unicode_range;
161
162 static unicode_range range[] = {
163 /* https://en.wikipedia.org/wiki/Unicode#Code_point_planes_and_blocks */
164 { L'\x0000', L'\xffff' },
165 { L'\x10000', L'\x14fff' },
166 { L'\x16000', L'\x18fff' },
167 { L'\x1b000', L'\x1bfff' },
168 { L'\x1d000', L'\x1ffff' },
169 { L'\x20000', L'\x2ffff' },
170 { L'\xe0000', L'\xe0fff' },
171 { L'\0', L'\0' }, /* Terminates list. */
172 };
173
174 static void
175 dumpwidth(void)
176 {
177 unicode_range *r;
178 int first;
179 wchar_t wc, start;
180 int width, lastwidth;
181
182 for (r = range; r->max; r++) {
183 first = 1;
184 for (wc = r->min; wc <= r->max; wc++) {
185 width = wcwidth(wc);
186 if (first) {
187 start = wc;
188 lastwidth = width;
189 first = 0;
190 continue;
191 }
192 if (width != lastwidth) {
193 printf("%04lX - %04lX = %d\n", (unsigned long)start,
194 (unsigned long int)wc - 1, lastwidth);
195 start = wc;
196 lastwidth = width;
197 }
198 if (wc == r->max)
199 printf("%04lX - %04lX = %d\n", (unsigned long)start,
200 (unsigned long int)wc, lastwidth);
201 }
202 }
203 }
204
205 static void
206 dumpctype(void)
207 {
208 unicode_range *r;
209 wchar_t wc;
210
211 for (r = range; r->max; r++) {
212 for (wc = r->min; wc <= r->max; wc++) {
213 printf("%6x %2d %c%c%c%c%c%c%c%c%c%c%c%c\n",
214 wc, wcwidth(wc),
215 iswcntrl(wc) ? 'c' : '-',
216 iswprint(wc) ? 'p' : '-',
217 iswgraph(wc) ? 'g' : '-',
218 iswalpha(wc) ? 'a' : '-',
219 iswupper(wc) ? 'u' : '-',
220 iswlower(wc) ? 'l' : '-',
221 iswdigit(wc) ? 'd' : '-',
222 iswxdigit(wc) ? 'x' : '-',
223 iswalnum(wc) ? 'N' : '-',
224 iswpunct(wc) ? '@' : '-',
225 iswspace(wc) ? 's' : '-',
226 iswblank(wc) ? 'b' : '-');
227 }
228 }
229 #endif /* MULTIBYTE_SUPPORT */
230 }