diplodocus.org Git - nmh/blob - test/getcwidth.c

   1 /*
   2  * getcwidth - Get the OS's idea of the width of Unicode codepoints
   3  *
   4  * This code is Copyright (c) 2013, by the authors of nmh.  See the
   5  * COPYRIGHT file in the root directory of the nmh distribution for
   6  * complete copyright information.
   7  */
   8
   9 #include <stdio.h>
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <errno.h>
  13
  14 #ifdef HAVE_CONFIG_H
  15 #include <config.h>
  16 #endif
  17
  18 #ifdef MULTIBYTE_SUPPORT
  19 #include <locale.h>
  20 #include <wchar.h>
  21 #endif
  22
  23 #ifdef MULTIBYTE_SUPPORT
  24 static void usage(char *);
  25 static void dumpwidth(void);
  26 static void getwidth(const char *);
  27 #endif /* MULTIBYTE_SUPPORT */
  28
  29 int
  30 main(int argc, char *argv[])
  31 {
  32 #ifndef MULTIBYTE_SUPPORT
  33         (void) argc;
  34         (void) argv;
  35         fprintf(stderr, "Nmh was not configured with multibyte support\n");
  36         exit(1);
  37 #else /* MULTIBYTE_SUPPORT */
  38         wchar_t c;
  39         int i;
  40
  41         setlocale(LC_ALL, "");
  42
  43         if (argc < 2)
  44                 usage(argv[0]);
  45
  46         if (strcmp(argv[1], "--dump") == 0) {
  47                 if (argc == 2) {
  48                         dumpwidth();
  49                         exit(0);
  50                 } else {
  51                         fprintf(stderr, "--dump cannot be combined with "
  52                                 "other arguments\n");
  53                         exit(1);
  54                 }
  55         }
  56
  57         /*
  58          * Process each argument.  If it begins with "U+", then try to
  59          * convert it to a Unicode codepoint.  Otherwise, take each
  60          * string and get the total width
  61          */
  62
  63         for (i = 1; i < argc; i++) {
  64                 if (strncmp(argv[i], "U+", 2) == 0) {
  65                         /*
  66                          * We're making a big assumption here that
  67                          * wchar_t represents a Unicode codepoint.
  68                          * That technically isn't valid unless the
  69                          * C compiler defines __STDC_ISO_10646__, but
  70                          * we're going to assume now that it works.
  71                          */
  72                         errno = 0;
  73                         c = strtoul(argv[i] + 2, NULL, 16);
  74                         if (errno) {
  75                                 fprintf(stderr, "Codepoint %s invalid\n",
  76                                         argv[i]);
  77                                 continue;
  78                         }
  79                         printf("%d\n", wcwidth(c));
  80                 } else {
  81                         getwidth(argv[i]);
  82                 }
  83         }
  84
  85         exit(0);
  86 }
  87
  88 static void
  89 usage(char *argv0)
  90 {
  91         fprintf(stderr, "Usage: %s [--dump]\n", argv0);
  92         fprintf(stderr, "       %s U+XXXX [...]\n", argv0);
  93         fprintf(stderr, "       %s utf-8-sequence [...]\n", argv0);
  94         fprintf(stderr, "Returns the column width of a Unicode codepoint "
  95                 "or UTF-8 character sequence\n");
  96         fprintf(stderr, "\t--dump\tDump complete width table\n");
  97
  98         exit(1);
  99 }
 100
 101 static void
 102 getwidth(const char *string)
 103 {
 104         wchar_t c;
 105         int charlen, charleft = strlen(string);
 106         int length = 0;
 107
 108         /*
 109          * In theory we should be able to use wcswidth(), but since we're
 110          * testing out how the format libraries behave we'll do it a character
 111          * at a time.
 112          */
 113
 114         mbtowc(NULL, NULL, 0);
 115
 116         while (charleft > 0) {
 117                 int clen;
 118
 119                 charlen = mbtowc(&c, string, charleft);
 120
 121                 if (charlen == 0)
 122                         break;
 123
 124                 if (charlen < 0) {
 125                         fprintf(stderr, "Unable to convert string \"%s\"\n",
 126                                 string);
 127                         return;
 128                 }
 129
 130                 if ((clen = wcwidth(c)) < 0) {
 131                         fprintf(stderr, "U+%04lX non-printable\n",
 132                                 (unsigned long int) c);
 133                         return;
 134                 }
 135
 136                 length += clen;
 137                 string += charlen;
 138                 charleft -= charlen;
 139         }
 140
 141         printf("%d\n", length);
 142 }
 143
 144 static void
 145 dumpwidth(void)
 146 {
 147         wchar_t wc, low;
 148         int width, lastwidth;
 149
 150         for (wc = 0, low = 1, lastwidth = wcwidth(1); wc < 0xffff; wc++) {
 151                 width = wcwidth(wc+1);
 152                 if (width != lastwidth) {
 153                         printf("%04lX - %04lX = %d\n", (unsigned long int) low,
 154                                (unsigned long int) (wc), lastwidth);
 155                         low = wc+1;
 156                 }
 157                 lastwidth = width;
 158         }
 159
 160         width = wcwidth(wc);
 161         if (width == lastwidth)
 162                 printf("%04lX - %04lX = %d\n", (unsigned long int) low,
 163                        (unsigned long int) (wc), width);
 164 #endif /* MULTIBYTE_SUPPORT */
 165 }