- /* skip any initial control characters or spaces */
- while ((c = (unsigned char) *s) &&
-#ifdef LOCALE
- (iscntrl(c) || isspace(c)))
-#else
- (c <= 32))
-#endif
- s++;
-
- /* compact repeated control characters and spaces into a single space */
- while((c = (unsigned char) *s++) && *start < end)
- if (!iscntrl(c) && !isspace(c))
- *(*start)++ = c;
- else {
- while ((c = (unsigned char) *s) &&
-#ifdef LOCALE
- (iscntrl(c) || isspace(c)))
-#else
- (c <= 32))
-#endif
- s++;
- *(*start)++ = ' ';
+ len = strlen(str);
+
+#ifdef MULTIBYTE_SUPPORT
+ if (mbtowc(NULL, NULL, 0)) {} /* Reset shift state */
+#endif /* MULTIBYTE_SUPPORT */
+
+ /*
+ * Process each character at a time; if we have multibyte support
+ * then deal with that here.
+ */
+
+ while (*str != '\0' && len > 0 && max > 0) {
+#ifdef MULTIBYTE_SUPPORT
+ char_len = mbtowc(&wide_char, str, len);
+
+ /*
+ * If mbrtowc() failed, then we have a character that isn't valid
+ * in the current encoding, or len wasn't enough for the whole
+ * multi-byte rune to be read. Replace it with a '?'. We do that by
+ * setting the alstr variable to the value of the replacement string;
+ * altstr is used below when the bytes are copied into the output
+ * buffer.
+ */
+ if (char_len < 0) {
+ altstr = "?";
+ char_len = mbtowc(&wide_char, altstr, 1);
+ }
+
+ if (char_len <= 0) {
+ break;
+ }
+
+ len -= char_len;
+
+ if (iswcntrl(wide_char) || iswspace(wide_char)) {
+ str += char_len;
+#else /* MULTIBYTE_SUPPORT */
+ int c = (unsigned char) *str;
+ len--;
+ if (iscntrl(c) || isspace(c)) {
+ str++;
+#endif /* MULTIBYTE_SUPPORT */
+ if (! prevCtrl) {
+ charstring_push_back (dest, ' ');
+ --max;
+ }
+
+ prevCtrl = 1;
+ continue;
+ }
+
+ prevCtrl = 0;
+
+#ifdef MULTIBYTE_SUPPORT
+ w = wcwidth(wide_char);
+ assert(w >= 0);
+ if (max >= (size_t) w) {
+ charstring_push_back_chars (dest, altstr ? altstr : str, char_len, w);
+ max -= w;
+ str += char_len;
+ altstr = NULL;
+ } else {
+ /* Not enough width available for the last character. Output
+ space(s) to fill. */
+ while (max-- > 0) {
+ charstring_push_back (dest, ' ');
+ }
+ break;