]>
diplodocus.org Git - nmh/blob - test/getcwidth.c
2 * getcwidth - Get the OS's idea of the width of Unicode codepoints
4 * This code is Copyright (c) 2013, by the authors of nmh. See the
5 * COPYRIGHT file in the root directory of the nmh distribution for
6 * complete copyright information.
18 #ifdef MULTIBYTE_SUPPORT
24 #ifdef MULTIBYTE_SUPPORT
25 static void usage(char *);
26 static void dumpwidth(void);
27 static void dumpctype(void);
28 static void getwidth(const char *);
29 #endif /* MULTIBYTE_SUPPORT */
32 main(int argc
, char *argv
[])
34 #ifndef MULTIBYTE_SUPPORT
37 fprintf(stderr
, "Nmh was not configured with multibyte support\n");
39 #else /* MULTIBYTE_SUPPORT */
43 if (! setlocale(LC_ALL
, "")) {
44 fprintf(stderr
, "setlocale failed, check your LC_ALL, "
45 "LC_CTYPE, and LANG environment variables\n");
51 if (strcmp(argv
[1], "--dump") == 0) {
56 fprintf(stderr
, "--dump cannot be combined with "
62 if (strcmp(argv
[1], "--ctype") == 0) {
64 fprintf(stderr
, "--ctype cannot be combined with other arguments\n");
72 * Process each argument. If it begins with "U+", then try to
73 * convert it to a Unicode codepoint. Otherwise, take each
74 * string and get the total width
77 for (i
= 1; i
< argc
; i
++) {
78 if (strncmp(argv
[i
], "U+", 2) == 0) {
80 * We're making a big assumption here that
81 * wchar_t represents a Unicode codepoint.
82 * That technically isn't valid unless the
83 * C compiler defines __STDC_ISO_10646__, but
84 * we're going to assume now that it works.
87 c
= strtoul(argv
[i
] + 2, NULL
, 16);
89 fprintf(stderr
, "Codepoint %s invalid\n",
93 printf("%d\n", wcwidth(c
));
105 fprintf(stderr
, "Usage: %s [--dump]\n", argv0
);
106 fprintf(stderr
, " %s [--ctype]\n", argv0
);
107 fprintf(stderr
, " %s U+XXXX [...]\n", argv0
);
108 fprintf(stderr
, " %s utf-8-sequence [...]\n", argv0
);
109 fprintf(stderr
, "Returns the column width of a Unicode codepoint "
110 "or UTF-8 character sequence\n");
111 fprintf(stderr
, "\t--dump\tDump complete width table\n");
112 fprintf(stderr
, "\t--ctype\tPrint wctype(3) table.\n");
118 getwidth(const char *string
)
121 int charlen
, charleft
= strlen(string
);
125 * In theory we should be able to use wcswidth(), but since we're
126 * testing out how the format libraries behave we'll do it a character
130 if (mbtowc(NULL
, NULL
, 0)) {}
132 while (charleft
> 0) {
135 charlen
= mbtowc(&c
, string
, charleft
);
141 fprintf(stderr
, "Unable to convert string \"%s\"\n",
146 if ((clen
= wcwidth(c
)) < 0) {
147 fprintf(stderr
, "U+%04lX non-printable\n",
148 (unsigned long int) c
);
157 printf("%d\n", length
);
164 static unicode_range range
[] = {
165 /* https://en.wikipedia.org/wiki/Unicode#Code_point_planes_and_blocks */
166 { L
'\x0000', L
'\xff' },
167 #if WCHAR_MAX >= 0xffff
168 { L
'\x0100', L
'\xffff' },
169 #if WCHAR_MAX >= 0xfffff
170 { L
'\x10000', L
'\x14fff' },
171 { L
'\x16000', L
'\x18fff' },
172 { L
'\x1b000', L
'\x1bfff' },
173 { L
'\x1d000', L
'\x1ffff' },
174 { L
'\x20000', L
'\x2ffff' },
175 { L
'\xe0000', L
'\xe0fff' },
178 { L
'\0', L
'\0' }, /* Terminates list. */
187 int width
, lastwidth
;
189 for (r
= range
; r
->max
; r
++) {
191 for (wc
= r
->min
; wc
<= r
->max
; wc
++) {
199 if (width
!= lastwidth
) {
200 printf("%04lX - %04lX = %d\n", (unsigned long)start
,
201 (unsigned long int)wc
- 1, lastwidth
);
206 printf("%04lX - %04lX = %d\n", (unsigned long)start
,
207 (unsigned long int)wc
, lastwidth
);
208 /* wchar_t can be a 16-bit unsigned short. */
221 for (r
= range
; r
->max
; r
++) {
222 for (wc
= r
->min
; wc
<= r
->max
; wc
++) {
223 printf("%6x %2d %c%c%c%c%c%c%c%c%c%c%c%c\n",
225 iswcntrl(wc
) ? 'c' : '-',
226 iswprint(wc
) ? 'p' : '-',
227 iswgraph(wc
) ? 'g' : '-',
228 iswalpha(wc
) ? 'a' : '-',
229 iswupper(wc
) ? 'u' : '-',
230 iswlower(wc
) ? 'l' : '-',
231 iswdigit(wc
) ? 'd' : '-',
232 iswxdigit(wc
) ? 'x' : '-',
233 iswalnum(wc
) ? 'N' : '-',
234 iswpunct(wc
) ? '@' : '-',
235 iswspace(wc
) ? 's' : '-',
236 iswblank(wc
) ? 'b' : '-');
239 /* wchar_t can be a 16-bit unsigned short. */
243 #endif /* MULTIBYTE_SUPPORT */