X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/2a4e2f15af1a697152104af8f46f0ba6aab472ff..b7a676587f92187d2270be73a1ede5be0af9f104:/sbr/fmt_scan.c diff --git a/sbr/fmt_scan.c b/sbr/fmt_scan.c index 39d549a5..8e59669d 100644 --- a/sbr/fmt_scan.c +++ b/sbr/fmt_scan.c @@ -14,6 +14,7 @@ #include #include #include +#include "unquote.h" #ifdef HAVE_SYS_TIME_H # include @@ -217,96 +218,123 @@ cptrimmed(charstring_t dest, char *str, int wid, char fill, size_t max) { } } +#ifdef MULTIBYTE_SUPPORT static void cpstripped (charstring_t dest, size_t max, char *str) { - int prevCtrl = 1; /* This is 1 so we strip out leading spaces */ - int len; -#ifdef MULTIBYTE_SUPPORT - int char_len, w; - wchar_t wide_char; - char *altstr = NULL; -#endif /* MULTIBYTE_SUPPORT */ + static bool deja_vu; + static char *oddchar; + static size_t oddlen; + static char *spacechar; + static size_t spacelen; + char *end; + bool squash; + char *src; + int srclen; + wchar_t rune; + int w; - if (!str) { - return; - } + if (!deja_vu) { + size_t two; - len = strlen(str); + deja_vu = true; -#ifdef MULTIBYTE_SUPPORT - if (mbtowc(NULL, NULL, 0)) {} /* Reset shift state */ -#endif /* MULTIBYTE_SUPPORT */ + two = MB_CUR_MAX * 2; /* Varies at run-time. */ - /* - * Process each character at a time; if we have multibyte support - * then deal with that here. - */ + oddchar = mh_xmalloc(two); + oddlen = wcstombs(oddchar, L"?", two); + assert(oddlen > 0); - while (*str != '\0' && len > 0 && max > 0) { -#ifdef MULTIBYTE_SUPPORT - char_len = mbtowc(&wide_char, str, len); + assert(wcwidth(L' ') == 1); /* Need to pad in ones. */ + spacechar = mh_xmalloc(two); + spacelen = wcstombs(spacechar, L" ", two); + assert(spacelen > 0); + } - /* - * If mbrtowc() failed, then we have a character that isn't valid - * in the current encoding, or len wasn't enough for the whole - * multi-byte rune to be read. Replace it with a '?'. We do that by - * setting the alstr variable to the value of the replacement string; - * altstr is used below when the bytes are copied into the output - * buffer. - */ - if (char_len < 0) { - altstr = "?"; - char_len = mbtowc(&wide_char, altstr, 1); - } + if (!str) + return; /* It's unclear why no padding in this case. */ + end = str + strlen(str); + + if (mbtowc(NULL, NULL, 0)) + {} /* Reset shift state. */ + + squash = true; /* Trim `space' or `cntrl' from the start. */ + while (max) { + if (!*str) + return; /* It's unclear why no padding in this case. */ + + srclen = mbtowc(&rune, str, end - str); + if (srclen == -1) { + /* Invalid rune, or not enough bytes to finish it. */ + rune = L'?'; + src = oddchar; + srclen = oddlen; + str++; /* Skip one byte. */ + } else { + src = str; + str += srclen; + } - if (char_len <= 0) { - break; - } + if (iswspace(rune) || iswcntrl(rune)) { + if (squash) + continue; /* Amidst a run of these. */ + rune = L' '; + src = spacechar; + srclen = spacelen; + squash = true; + } else + squash = false; + + w = wcwidth(rune); + if (w == -1) { + rune = L'?'; + w = wcwidth(rune); + assert(w != -1); + src = oddchar; + srclen = oddlen; + } - len -= char_len; - - if (iswcntrl(wide_char) || iswspace(wide_char)) { - str += char_len; -#else /* MULTIBYTE_SUPPORT */ - int c = (unsigned char) *str; - len--; - if (iscntrl(c) || isspace(c)) { - str++; -#endif /* MULTIBYTE_SUPPORT */ - if (! prevCtrl) { - charstring_push_back (dest, ' '); - --max; - } + if ((size_t)w > max) { + /* No room for rune; pad. */ + while (max--) + charstring_push_back_chars(dest, spacechar, spacelen, 1); + return; + } - prevCtrl = 1; - continue; - } + charstring_push_back_chars(dest, src, srclen, w); + max -= w; + } +} +#endif + +#ifndef MULTIBYTE_SUPPORT +static void +cpstripped (charstring_t dest, size_t max, char *str) +{ + bool squash; + int c; - prevCtrl = 0; + if (!str) + return; -#ifdef MULTIBYTE_SUPPORT - w = wcwidth(wide_char); - assert(w >= 0); - if (max >= (size_t) w) { - charstring_push_back_chars (dest, altstr ? altstr : str, char_len, w); - max -= w; - str += char_len; - altstr = NULL; - } else { - /* Not enough width available for the last character. Output - space(s) to fill. */ - while (max-- > 0) { - charstring_push_back (dest, ' '); - } - break; - } -#else /* MULTIBYE_SUPPORT */ - charstring_push_back (dest, *str++); - --max; -#endif /* MULTIBYTE_SUPPORT */ + squash = true; /* Strip leading cases. */ + while (max--) { + c = (unsigned char)*str++; + if (!c) + return; + + if (isspace(c) || iscntrl(c)) { + if (squash) + continue; + c = ' '; + squash = true; + } else + squash = false; + + charstring_push_back(dest, (char)c); } } +#endif static char *lmonth[] = { "January", "February","March", "April", "May", "June", "July", "August",