]> diplodocus.org Git - nmh/blob - test/getcwidth.c
add $(kibi) function, as complement to %(kilo)
[nmh] / test / getcwidth.c
1 /*
2 * getcwidth - Get the OS's idea of the width of Unicode codepoints
3 *
4 * This code is Copyright (c) 2013, by the authors of nmh. See the
5 * COPYRIGHT file in the root directory of the nmh distribution for
6 * complete copyright information.
7 */
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <errno.h>
13
14 #ifdef HAVE_CONFIG_H
15 #include <config.h>
16 #endif
17
18 #ifdef MULTIBYTE_SUPPORT
19 #include <locale.h>
20 #include <wchar.h>
21 #endif
22
23 #ifdef MULTIBYTE_SUPPORT
24 static void usage(char *);
25 static void dumpwidth(void);
26 static void getwidth(const char *);
27 #endif /* MULTIBYTE_SUPPORT */
28
29 int
30 main(int argc, char *argv[])
31 {
32 #ifndef MULTIBYTE_SUPPORT
33 (void) argc;
34 (void) argv;
35 fprintf(stderr, "Nmh was not configured with multibyte support\n");
36 exit(1);
37 #else /* MULTIBYTE_SUPPORT */
38 wchar_t c;
39 int i;
40
41 setlocale(LC_ALL, "");
42
43 if (argc < 2)
44 usage(argv[0]);
45
46 if (strcmp(argv[1], "--dump") == 0) {
47 if (argc == 2) {
48 dumpwidth();
49 exit(0);
50 } else {
51 fprintf(stderr, "--dump cannot be combined with "
52 "other arguments\n");
53 exit(1);
54 }
55 }
56
57 /*
58 * Process each argument. If it begins with "U+", then try to
59 * convert it to a Unicode codepoint. Otherwise, take each
60 * string and get the total width
61 */
62
63 for (i = 1; i < argc; i++) {
64 if (strncmp(argv[i], "U+", 2) == 0) {
65 /*
66 * We're making a big assumption here that
67 * wchar_t represents a Unicode codepoint.
68 * That technically isn't valid unless the
69 * C compiler defines __STDC_ISO_10646__, but
70 * we're going to assume now that it works.
71 */
72 errno = 0;
73 c = strtoul(argv[i] + 2, NULL, 16);
74 if (errno) {
75 fprintf(stderr, "Codepoint %s invalid\n",
76 argv[i]);
77 continue;
78 }
79 printf("%d\n", wcwidth(c));
80 } else {
81 getwidth(argv[i]);
82 }
83 }
84
85 exit(0);
86 }
87
88 static void
89 usage(char *argv0)
90 {
91 fprintf(stderr, "Usage: %s [--dump]\n", argv0);
92 fprintf(stderr, " %s U+XXXX [...]\n", argv0);
93 fprintf(stderr, " %s utf-8-sequence [...]\n", argv0);
94 fprintf(stderr, "Returns the column width of a Unicode codepoint "
95 "or UTF-8 character sequence\n");
96 fprintf(stderr, "\t--dump\tDump complete width table\n");
97
98 exit(1);
99 }
100
101 static void
102 getwidth(const char *string)
103 {
104 wchar_t c;
105 int charlen, charleft = strlen(string);
106 int length = 0;
107
108 /*
109 * In theory we should be able to use wcswidth(), but since we're
110 * testing out how the format libraries behave we'll do it a character
111 * at a time.
112 */
113
114 mbtowc(NULL, NULL, 0);
115
116 while (charleft > 0) {
117 int clen;
118
119 charlen = mbtowc(&c, string, charleft);
120
121 if (charlen == 0)
122 break;
123
124 if (charlen < 0) {
125 fprintf(stderr, "Unable to convert string \"%s\"\n",
126 string);
127 return;
128 }
129
130 if ((clen = wcwidth(c)) < 0) {
131 fprintf(stderr, "U+%04lX non-printable\n",
132 (unsigned long int) c);
133 return;
134 }
135
136 length += clen;
137 string += charlen;
138 charleft -= charlen;
139 }
140
141 printf("%d\n", length);
142 }
143
144 static void
145 dumpwidth(void)
146 {
147 wchar_t wc, low;
148 int width, lastwidth;
149
150 for (wc = 0, low = 1, lastwidth = wcwidth(1); wc < 0xffff; wc++) {
151 width = wcwidth(wc+1);
152 if (width != lastwidth) {
153 printf("%04lX - %04lX = %d\n", (unsigned long int) low,
154 (unsigned long int) (wc), lastwidth);
155 low = wc+1;
156 }
157 lastwidth = width;
158 }
159
160 width = wcwidth(wc);
161 if (width == lastwidth)
162 printf("%04lX - %04lX = %d\n", (unsigned long int) low,
163 (unsigned long int) (wc), width);
164 #endif /* MULTIBYTE_SUPPORT */
165 }