]> diplodocus.org Git - nmh/blob - test/getcwidth.c
Make sure we don't modify the original text when processing the disposition
[nmh] / test / getcwidth.c
1 /*
2 * getcwidth - Get the OS's idea of the width of Unicode codepoints
3 *
4 * This code is Copyright (c) 2013, by the authors of nmh. See the
5 * COPYRIGHT file in the root directory of the nmh distribution for
6 * complete copyright information.
7 */
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <errno.h>
13
14 #ifdef HAVE_CONFIG_H
15 #include <config.h>
16 #endif
17
18 #ifdef MULTIBYTE_SUPPORT
19 #include <locale.h>
20 #include <wchar.h>
21 #endif
22
23 #ifdef MULTIBYTE_SUPPORT
24 static void usage(char *);
25 static void dumpwidth(void);
26 static void getwidth(const char *);
27 #endif /* MULTIBYTE_SUPPORT */
28
29 int
30 main(int argc, char *argv[])
31 {
32 #ifndef MULTIBYTE_SUPPORT
33 fprintf(stderr, "Nmh was not configured with multibyte support\n");
34 exit(1);
35 #else /* MULTIBYTE_SUPPORT */
36 wchar_t c;
37 int i;
38
39 setlocale(LC_ALL, "");
40
41 if (argc < 2)
42 usage(argv[0]);
43
44 if (strcmp(argv[1], "--dump") == 0) {
45 if (argc == 2) {
46 dumpwidth();
47 exit(0);
48 } else {
49 fprintf(stderr, "--dump cannot be combined with "
50 "other arguments\n");
51 exit(1);
52 }
53 }
54
55 /*
56 * Process each argument. If it begins with "U+", then try to
57 * convert it to a Unicode codepoint. Otherwise, take each
58 * string and get the total width
59 */
60
61 for (i = 1; i < argc; i++) {
62 if (strncmp(argv[i], "U+", 2) == 0) {
63 /*
64 * We're making a big assumption here that
65 * wchar_t represents a Unicode codepoint.
66 * That technically isn't valid unless the
67 * C compiler defines __STDC_ISO_10646__, but
68 * we're going to assume now that it works.
69 */
70 errno = 0;
71 c = strtoul(argv[i] + 2, NULL, 16);
72 if (errno) {
73 fprintf(stderr, "Codepoint %s invalid\n",
74 argv[i]);
75 continue;
76 }
77 printf("%d\n", wcwidth(c));
78 } else {
79 getwidth(argv[i]);
80 }
81 }
82
83 exit(0);
84 }
85
86 static void
87 usage(char *argv0)
88 {
89 fprintf(stderr, "Usage: %s [--dump]\n", argv0);
90 fprintf(stderr, " %s U+XXXX [...]\n", argv0);
91 fprintf(stderr, " %s utf-8-sequence [...]\n", argv0);
92 fprintf(stderr, "Returns the column width of a Unicode codepoint "
93 "or UTF-8 character sequence\n");
94 fprintf(stderr, "\t--dump\tDump complete width table\n");
95
96 exit(1);
97 }
98
99 static void
100 getwidth(const char *string)
101 {
102 wchar_t c;
103 int charlen, charleft = strlen(string);
104 int length = 0;
105
106 /*
107 * In theory we should be able to use wcswidth(), but since we're
108 * testing out how the format libraries behave we'll do it a character
109 * at a time.
110 */
111
112 mbtowc(NULL, NULL, 0);
113
114 while (charleft > 0) {
115 int clen;
116
117 charlen = mbtowc(&c, string, charleft);
118
119 if (charlen == 0)
120 break;
121
122 if (charlen < 0) {
123 fprintf(stderr, "Unable to convert string \"%s\"\n",
124 string);
125 return;
126 }
127
128 if ((clen = wcwidth(c)) < 0) {
129 fprintf(stderr, "U+%04lX non-printable\n",
130 (unsigned long int) c);
131 return;
132 }
133
134 length += clen;
135 string += charlen;
136 charleft -= charlen;
137 }
138
139 printf("%d\n", length);
140 }
141
142 static void
143 dumpwidth(void)
144 {
145 wchar_t wc, low;
146 int width, lastwidth;
147
148 for (wc = 0, low = 1, lastwidth = wcwidth(1); wc < 0xffff; wc++) {
149 width = wcwidth(wc+1);
150 if (width != lastwidth) {
151 printf("%04lX - %04lX = %d\n", (unsigned long int) low,
152 (unsigned long int) (wc), lastwidth);
153 low = wc+1;
154 }
155 lastwidth = width;
156 }
157
158 width = wcwidth(wc);
159 if (width == lastwidth)
160 printf("%04lX - %04lX = %d\n", (unsigned long int) low,
161 (unsigned long int) (wc), width);
162 }
163 #endif /* MULTIBYTE_SUPPORT */