]>
diplodocus.org Git - nmh/blob - test/getcwidth.c
1 /* getcwidth - Get the OS's idea of the width of Unicode codepoints
3 * This code is Copyright (c) 2013, by the authors of nmh. See the
4 * COPYRIGHT file in the root directory of the nmh distribution for
5 * complete copyright information.
17 #ifdef MULTIBYTE_SUPPORT
23 #ifdef MULTIBYTE_SUPPORT
24 static void usage(char *);
25 static void dumpwidth(void);
26 static void dumpctype(void);
27 static void getwidth(const char *);
28 #endif /* MULTIBYTE_SUPPORT */
31 main(int argc
, char *argv
[])
33 #ifndef MULTIBYTE_SUPPORT
36 fprintf(stderr
, "Nmh was not configured with multibyte support\n");
38 #else /* MULTIBYTE_SUPPORT */
42 if (! setlocale(LC_ALL
, "")) {
43 fprintf(stderr
, "setlocale failed, check your LC_ALL, "
44 "LC_CTYPE, and LANG environment variables\n");
50 if (strcmp(argv
[1], "--dump") == 0) {
55 fprintf(stderr
, "--dump cannot be combined with "
61 if (strcmp(argv
[1], "--ctype") == 0) {
63 fprintf(stderr
, "--ctype cannot be combined with other arguments\n");
71 * Process each argument. If it begins with "U+", then try to
72 * convert it to a Unicode codepoint. Otherwise, take each
73 * string and get the total width
76 for (i
= 1; i
< argc
; i
++) {
77 if (strncmp(argv
[i
], "U+", 2) == 0) {
79 * We're making a big assumption here that
80 * wchar_t represents a Unicode codepoint.
81 * That technically isn't valid unless the
82 * C compiler defines __STDC_ISO_10646__, but
83 * we're going to assume now that it works.
86 c
= strtoul(argv
[i
] + 2, NULL
, 16);
88 fprintf(stderr
, "Codepoint %s invalid\n",
92 printf("%d\n", wcwidth(c
));
104 fprintf(stderr
, "Usage: %s [--dump]\n", argv0
);
105 fprintf(stderr
, " %s [--ctype]\n", argv0
);
106 fprintf(stderr
, " %s U+XXXX [...]\n", argv0
);
107 fprintf(stderr
, " %s utf-8-sequence [...]\n", argv0
);
108 fprintf(stderr
, "Returns the column width of a Unicode codepoint "
109 "or UTF-8 character sequence\n");
110 fprintf(stderr
, "\t--dump\tDump complete width table\n");
111 fprintf(stderr
, "\t--ctype\tPrint wctype(3) table.\n");
117 getwidth(const char *string
)
120 int charlen
, charleft
= strlen(string
);
124 * In theory we should be able to use wcswidth(), but since we're
125 * testing out how the format libraries behave we'll do it a character
129 if (mbtowc(NULL
, NULL
, 0)) {}
131 while (charleft
> 0) {
134 charlen
= mbtowc(&c
, string
, charleft
);
140 fprintf(stderr
, "Unable to convert string \"%s\"\n",
145 if ((clen
= wcwidth(c
)) < 0) {
146 fprintf(stderr
, "U+%04lX non-printable\n",
147 (unsigned long int) c
);
156 printf("%d\n", length
);
163 static unicode_range range
[] = {
164 /* https://en.wikipedia.org/wiki/Unicode#Code_point_planes_and_blocks */
165 { L
'\x0000', L
'\xff' },
166 #if WCHAR_MAX >= 0xffff
167 { L
'\x0100', L
'\xffff' },
168 #if WCHAR_MAX >= 0xfffff
169 { L
'\x10000', L
'\x14fff' },
170 { L
'\x16000', L
'\x18fff' },
171 { L
'\x1b000', L
'\x1bfff' },
172 { L
'\x1d000', L
'\x1ffff' },
173 { L
'\x20000', L
'\x2ffff' },
174 { L
'\xe0000', L
'\xe0fff' },
177 { L
'\0', L
'\0' }, /* Terminates list. */
186 int width
, lastwidth
;
188 for (r
= range
; r
->max
; r
++) {
190 for (wc
= r
->min
; wc
<= r
->max
; wc
++) {
198 if (width
!= lastwidth
) {
199 printf("%04lX - %04lX = %d\n", (unsigned long)start
,
200 (unsigned long int)wc
- 1, lastwidth
);
205 printf("%04lX - %04lX = %d\n", (unsigned long)start
,
206 (unsigned long int)wc
, lastwidth
);
207 /* wchar_t can be a 16-bit unsigned short. */
220 for (r
= range
; r
->max
; r
++) {
221 for (wc
= r
->min
; wc
<= r
->max
; wc
++) {
222 printf("%6x %2d %c%c%c%c%c%c%c%c%c%c%c%c\n",
224 iswcntrl(wc
) ? 'c' : '-',
225 iswprint(wc
) ? 'p' : '-',
226 iswgraph(wc
) ? 'g' : '-',
227 iswalpha(wc
) ? 'a' : '-',
228 iswupper(wc
) ? 'u' : '-',
229 iswlower(wc
) ? 'l' : '-',
230 iswdigit(wc
) ? 'd' : '-',
231 iswxdigit(wc
) ? 'x' : '-',
232 iswalnum(wc
) ? 'N' : '-',
233 iswpunct(wc
) ? '@' : '-',
234 iswspace(wc
) ? 's' : '-',
235 iswblank(wc
) ? 'b' : '-');
238 /* wchar_t can be a 16-bit unsigned short. */
242 #endif /* MULTIBYTE_SUPPORT */