diplodocus.org Git - nmh/blob - test/getcwidth.c

   1 /*
   2  * getcwidth - Get the OS's idea of the width of Unicode codepoints
   3  *
   4  * This code is Copyright (c) 2013, by the authors of nmh.  See the
   5  * COPYRIGHT file in the root directory of the nmh distribution for
   6  * complete copyright information.
   7  */
   8
   9 #include <stdio.h>
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <errno.h>
  13
  14 #ifdef HAVE_CONFIG_H
  15 #include <config.h>
  16 #endif
  17
  18 #ifdef MULTIBYTE_SUPPORT
  19 #include <locale.h>
  20 #include <wchar.h>
  21 #endif
  22
  23 #ifdef MULTIBYTE_SUPPORT
  24 static void usage(char *);
  25 static void dumpwidth(void);
  26 static void getwidth(const char *);
  27 #endif /* MULTIBYTE_SUPPORT */
  28
  29 int
  30 main(int argc, char *argv[])
  31 {
  32 #ifndef MULTIBYTE_SUPPORT
  33         fprintf(stderr, "Nmh was not configured with multibyte support\n");
  34         exit(1);
  35 #else /* MULTIBYTE_SUPPORT */
  36         wchar_t c;
  37         int i;
  38
  39         setlocale(LC_ALL, "");
  40
  41         if (argc < 2)
  42                 usage(argv[0]);
  43
  44         if (strcmp(argv[1], "--dump") == 0) {
  45                 if (argc == 2) {
  46                         dumpwidth();
  47                         exit(0);
  48                 } else {
  49                         fprintf(stderr, "--dump cannot be combined with "
  50                                 "other arguments\n");
  51                         exit(1);
  52                 }
  53         }
  54
  55         /*
  56          * Process each argument.  If it begins with "U+", then try to
  57          * convert it to a Unicode codepoint.  Otherwise, take each
  58          * string and get the total width
  59          */
  60
  61         for (i = 1; i < argc; i++) {
  62                 if (strncmp(argv[i], "U+", 2) == 0) {
  63                         /*
  64                          * We're making a big assumption here that
  65                          * wchar_t represents a Unicode codepoint.
  66                          * That technically isn't valid unless the
  67                          * C compiler defines __STDC_ISO_10646__, but
  68                          * we're going to assume now that it works.
  69                          */
  70                         errno = 0;
  71                         c = strtoul(argv[i] + 2, NULL, 16);
  72                         if (errno) {
  73                                 fprintf(stderr, "Codepoint %s invalid\n",
  74                                         argv[i]);
  75                                 continue;
  76                         }
  77                         printf("%d\n", wcwidth(c));
  78                 } else {
  79                         getwidth(argv[i]);
  80                 }
  81         }
  82
  83         exit(0);
  84 }
  85
  86 static void
  87 usage(char *argv0)
  88 {
  89         fprintf(stderr, "Usage: %s [--dump]\n", argv0);
  90         fprintf(stderr, "       %s U+XXXX [...]\n", argv0);
  91         fprintf(stderr, "       %s utf-8-sequence [...]\n", argv0);
  92         fprintf(stderr, "Returns the column width of a Unicode codepoint "
  93                 "or UTF-8 character sequence\n");
  94         fprintf(stderr, "\t--dump\tDump complete width table\n");
  95
  96         exit(1);
  97 }
  98
  99 static void
 100 getwidth(const char *string)
 101 {
 102         wchar_t c;
 103         int charlen, charleft = strlen(string);
 104         int length = 0;
 105
 106         /*
 107          * In theory we should be able to use wcswidth(), but since we're
 108          * testing out how the format libraries behave we'll do it a character
 109          * at a time.
 110          */
 111
 112         mbtowc(NULL, NULL, 0);
 113
 114         while (charleft > 0) {
 115                 int clen;
 116
 117                 charlen = mbtowc(&c, string, charleft);
 118
 119                 if (charlen == 0)
 120                         break;
 121
 122                 if (charlen < 0) {
 123                         fprintf(stderr, "Unable to convert string \"%s\"\n",
 124                                 string);
 125                         return;
 126                 }
 127
 128                 if ((clen = wcwidth(c)) < 0) {
 129                         fprintf(stderr, "U+%04lX non-printable\n",
 130                                 (unsigned long int) c);
 131                         return;
 132                 }
 133
 134                 length += clen;
 135                 string += charlen;
 136                 charleft -= charlen;
 137         }
 138
 139         printf("%d\n", length);
 140 }
 141
 142 static void
 143 dumpwidth(void)
 144 {
 145         wchar_t wc, low;
 146         int width, lastwidth;
 147
 148         for (wc = 0, low = 1, lastwidth = wcwidth(1); wc < 0xffff; wc++) {
 149                 width = wcwidth(wc+1);
 150                 if (width != lastwidth) {
 151                         printf("%04lX - %04lX = %d\n", (unsigned long int) low,
 152                                (unsigned long int) (wc), lastwidth);
 153                         low = wc+1;
 154                 }
 155                 lastwidth = width;
 156         }
 157
 158         width = wcwidth(wc);
 159         if (width == lastwidth)
 160                 printf("%04lX - %04lX = %d\n", (unsigned long int) low,
 161                        (unsigned long int) (wc), width);
 162 }
 163 #endif /* MULTIBYTE_SUPPORT */