]>
diplodocus.org Git - nmh/blob - test/getcwidth.c
2 * getcwidth - Get the OS's idea of the width of Unicode codepoints
4 * This code is Copyright (c) 2013, by the authors of nmh. See the
5 * COPYRIGHT file in the root directory of the nmh distribution for
6 * complete copyright information.
18 #ifdef MULTIBYTE_SUPPORT
24 #ifdef MULTIBYTE_SUPPORT
25 static void usage(char *);
26 static void dumpwidth(void);
27 static void dumpctype(void);
28 static void getwidth(const char *);
29 #endif /* MULTIBYTE_SUPPORT */
32 main(int argc
, char *argv
[])
34 #ifndef MULTIBYTE_SUPPORT
37 fprintf(stderr
, "Nmh was not configured with multibyte support\n");
39 #else /* MULTIBYTE_SUPPORT */
43 if (! setlocale(LC_ALL
, "")) {
44 fprintf(stderr
, "setlocale failed, check your LC_ALL, "
45 "LC_CTYPE, and LANG environment variables\n");
51 if (strcmp(argv
[1], "--dump") == 0) {
56 fprintf(stderr
, "--dump cannot be combined with "
62 if (strcmp(argv
[1], "--ctype") == 0) {
64 fprintf(stderr
, "--ctype cannot be combined with other arguments\n");
72 * Process each argument. If it begins with "U+", then try to
73 * convert it to a Unicode codepoint. Otherwise, take each
74 * string and get the total width
77 for (i
= 1; i
< argc
; i
++) {
78 if (strncmp(argv
[i
], "U+", 2) == 0) {
80 * We're making a big assumption here that
81 * wchar_t represents a Unicode codepoint.
82 * That technically isn't valid unless the
83 * C compiler defines __STDC_ISO_10646__, but
84 * we're going to assume now that it works.
87 c
= strtoul(argv
[i
] + 2, NULL
, 16);
89 fprintf(stderr
, "Codepoint %s invalid\n",
93 printf("%d\n", wcwidth(c
));
105 fprintf(stderr
, "Usage: %s [--dump]\n", argv0
);
106 fprintf(stderr
, " %s U+XXXX [...]\n", argv0
);
107 fprintf(stderr
, " %s utf-8-sequence [...]\n", argv0
);
108 fprintf(stderr
, "Returns the column width of a Unicode codepoint "
109 "or UTF-8 character sequence\n");
110 fprintf(stderr
, "\t--dump\tDump complete width table\n");
116 getwidth(const char *string
)
119 int charlen
, charleft
= strlen(string
);
123 * In theory we should be able to use wcswidth(), but since we're
124 * testing out how the format libraries behave we'll do it a character
128 if (mbtowc(NULL
, NULL
, 0)) {}
130 while (charleft
> 0) {
133 charlen
= mbtowc(&c
, string
, charleft
);
139 fprintf(stderr
, "Unable to convert string \"%s\"\n",
144 if ((clen
= wcwidth(c
)) < 0) {
145 fprintf(stderr
, "U+%04lX non-printable\n",
146 (unsigned long int) c
);
155 printf("%d\n", length
);
162 static unicode_range range
[] = {
163 /* https://en.wikipedia.org/wiki/Unicode#Code_point_planes_and_blocks */
164 { L
'\x0000', L
'\xffff' },
165 { L
'\x10000', L
'\x14fff' },
166 { L
'\x16000', L
'\x18fff' },
167 { L
'\x1b000', L
'\x1bfff' },
168 { L
'\x1d000', L
'\x1ffff' },
169 { L
'\x20000', L
'\x2ffff' },
170 { L
'\xe0000', L
'\xe0fff' },
171 { L
'\0', L
'\0' }, /* Terminates list. */
180 int width
, lastwidth
;
182 for (r
= range
; r
->max
; r
++) {
184 for (wc
= r
->min
; wc
<= r
->max
; wc
++) {
192 if (width
!= lastwidth
) {
193 printf("%04lX - %04lX = %d\n", (unsigned long)start
,
194 (unsigned long int)wc
- 1, lastwidth
);
199 printf("%04lX - %04lX = %d\n", (unsigned long)start
,
200 (unsigned long int)wc
, lastwidth
);
211 for (r
= range
; r
->max
; r
++) {
212 for (wc
= r
->min
; wc
<= r
->max
; wc
++) {
213 printf("%6x %2d %c%c%c%c%c%c%c%c%c%c%c%c\n",
215 iswcntrl(wc
) ? 'c' : '-',
216 iswprint(wc
) ? 'p' : '-',
217 iswgraph(wc
) ? 'g' : '-',
218 iswalpha(wc
) ? 'a' : '-',
219 iswupper(wc
) ? 'u' : '-',
220 iswlower(wc
) ? 'l' : '-',
221 iswdigit(wc
) ? 'd' : '-',
222 iswxdigit(wc
) ? 'x' : '-',
223 iswalnum(wc
) ? 'N' : '-',
224 iswpunct(wc
) ? '@' : '-',
225 iswspace(wc
) ? 's' : '-',
226 iswblank(wc
) ? 'b' : '-');
229 #endif /* MULTIBYTE_SUPPORT */