]> diplodocus.org Git - nmh/blobdiff - uip/mhparse.c
Change things so we only supply a form filename; most programs don't do
[nmh] / uip / mhparse.c
index a9ad26e78eefe5c175d4e5ef81ee47da6dd2da23..444bf7d563caccf7b3c6bab236cd028092736c0e 100644 (file)
@@ -23,8 +23,6 @@
 
 extern int debugsw;
 
-extern pid_t xpid;     /* mhshowsbr.c  */
-
 /* cache policies */
 extern int rcachesw;   /* mhcachesbr.c */
 extern int wcachesw;   /* mhcachesbr.c */
@@ -138,6 +136,8 @@ static int readDigest (CT, char *);
 static int get_leftover_mp_content (CT, int);
 static int InitURL (CT);
 static int openURL (CT, char **);
+static int parse_header_attrs (const char *, const char *, char **, PM *,
+                              PM *, char **);
 static size_t param_len(PM, int, size_t, int *, int *, size_t *);
 static size_t encode_param(PM, char *, size_t, size_t, size_t, int);
 static size_t normal_param(PM, char *, size_t, size_t, size_t);
@@ -181,19 +181,6 @@ struct str2init str2methods[] = {
 };
 
 
-int
-pidcheck (int status)
-{
-    if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT)
-       return status;
-
-    fflush (stdout);
-    fflush (stderr);
-    done (1);
-    return 1;
-}
-
-
 /*
  * Main entry point for parsing a MIME message or file.
  * It returns the Content structure for the top level
@@ -595,88 +582,6 @@ add_header (CT ct, char *name, char *value)
 }
 
 
-/* Make sure that buf contains at least one appearance of name,
-   followed by =.  If not, insert both name and value, just after
-   first semicolon, if any.  Note that name should not contain a
-   trailing =. And quotes will be added around the value.  Typical
-   usage:  make sure that a Content-Disposition header contains
-   filename="foo".  If it doesn't and value does, use value from
-   that. */
-static char *
-incl_name_value (char *buf, char *name, char *value) {
-    char *newbuf = buf;
-
-    /* Assume that name is non-null. */
-    if (buf && value) {
-       char *name_plus_equal = concat (name, "=", NULL);
-
-       if (! strstr (buf, name_plus_equal)) {
-           char *insertion;
-           char *cp, *prefix, *suffix;
-
-           /* Trim trailing space, esp. newline. */
-           for (cp = &buf[strlen (buf) - 1];
-                cp >= buf && isspace ((unsigned char) *cp);
-                --cp) {
-               *cp = '\0';
-           }
-
-           insertion = concat ("; ", name, "=", "\"", value, "\"", NULL);
-
-           /* Insert at first semicolon, if any.  If none, append to
-              end. */
-           prefix = add (buf, NULL);
-           if ((cp = strchr (prefix, ';'))) {
-               suffix = concat (cp, NULL);
-               *cp = '\0';
-               newbuf = concat (prefix, insertion, suffix, "\n", NULL);
-               free (suffix);
-           } else {
-               /* Append to end. */
-               newbuf = concat (buf, insertion, "\n", NULL);
-           }
-
-           free (prefix);
-           free (insertion);
-           free (buf);
-       }
-
-       free (name_plus_equal);
-    }
-
-    return newbuf;
-}
-
-/* Extract just name_suffix="foo", if any, from value. If there isn't
-   one, return the entire value.  Note that, for example, a name_suffix
-   of name will match filename="foo", and return foo. */
-static char *
-extract_name_value (char *name_suffix, char *value) {
-    char *extracted_name_value = value;
-    char *name_suffix_plus_quote = concat (name_suffix, "=\"", NULL);
-    char *name_suffix_equals = strstr (value, name_suffix_plus_quote);
-    char *cp;
-
-    free (name_suffix_plus_quote);
-    if (name_suffix_equals) {
-       char *name_suffix_begin;
-
-       /* Find first \". */
-       for (cp = name_suffix_equals; *cp != '"'; ++cp) /* empty */;
-       name_suffix_begin = ++cp;
-       /* Find second \". */
-       for (; *cp != '"'; ++cp) /* empty */;
-
-       extracted_name_value = mh_xmalloc (cp - name_suffix_begin + 1);
-       memcpy (extracted_name_value,
-               name_suffix_begin,
-               cp - name_suffix_begin);
-       extracted_name_value[cp - name_suffix_begin] = '\0';
-    }
-
-    return extracted_name_value;
-}
-
 /*
  * Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
  * directives.  Fills in the information of the CTinfo structure.
@@ -915,14 +820,11 @@ magic_skip:
                have a *filename=, extract it from the magic contents.
                The r1bindex call skips any leading directory
                components. */
-            if (ct->c_dispo)
-                ct->c_dispo =
-                    incl_name_value (ct->c_dispo,
-                                     "filename",
-                                     r1bindex (extract_name_value ("name",
-                                                                   ci->
-                                                                   ci_magic),
-                                               '/'));
+            if (ct->c_dispo_type &&
+               !get_param(ct->c_dispo_first, "filename", '_', 1)) {
+               add_param(&ct->c_dispo_first, &ct->c_dispo_last, "filename",
+                         r1bindex(ci->ci_magic, '/'), 0);
+           }
         }
        else
            advise (NULL,
@@ -2561,13 +2463,6 @@ openFTP (CT ct, char **file)
        return NOTOK;
     }
 
-    if (xpid) {
-       if (xpid < 0)
-           xpid = -xpid;
-       pidcheck (pidwait (xpid, NOTOK));
-       xpid = 0;
-    }
-
     /* Get the buffer ready to go */
     bp = buffer;
     buflen = sizeof(buffer);
@@ -2767,13 +2662,6 @@ openMail (CT ct, char **file)
        return NOTOK;
     }
 
-    if (xpid) {
-       if (xpid < 0)
-           xpid = -xpid;
-       pidcheck (pidwait (xpid, NOTOK));
-       xpid = 0;
-    }
-
     /* Get buffer ready to go */
     bp = buffer;
     buflen = sizeof(buffer);
@@ -2904,13 +2792,6 @@ openURL (CT ct, char **file)
        return NOTOK;
     }
 
-    if (xpid) {
-       if (xpid < 0)
-           xpid = -xpid;
-       pidcheck (pidwait (xpid, NOTOK));
-       xpid = 0;
-    }
-
     ce->ce_unlink = (*file == NULL);
     caching = 0;
     cachefile[0] = '\0';
@@ -3309,16 +3190,49 @@ get_ce_method (const char *method) {
     return NULL;
 }
 
-int
+/*
+ * Parse a series of MIME attributes (or parameters) given a header as
+ * input.
+ *
+ * Arguments include:
+ *
+ * filename    - Name of input file (for error messages)
+ * fieldname   - Name of field being processed
+ * headerp     - Pointer to pointer of the beginning of the MIME attributes.
+ *               Updated to point to end of attributes when finished.
+ * param_head  - Pointer to head of parameter list
+ * param_tail  - Pointer to tail of parameter list
+ * commentp    - Pointer to header comment pointer (may be NULL)
+ *
+ * Returns OK if parsing was successful, NOTOK if parsing failed, and
+ * DONE to indicate a benign error (minor parsing error, but the program
+ * should continue).
+ */
+
+static int
 parse_header_attrs (const char *filename, const char *fieldname,
                    char **header_attrp, PM *param_head, PM *param_tail,
                    char **commentp)
 {
     char *cp = *header_attrp;
     PM pm;
+    struct sectlist {
+       char *value;
+       int index;
+       int len;
+       struct sectlist *next;
+    } *sp, *sp2;
+    struct parmlist {
+       char *name;
+       char *charset;
+       char *lang;
+       struct sectlist *sechead;
+       struct parmlist *next;
+    } *pp, *pp2, *phead = NULL;
 
     while (*cp == ';') {
-       char *dp, *vp, *up, c;
+       char *dp, *vp, *up, *nameptr, *valptr, *charset = NULL, *lang = NULL;
+       int encoded = 0, partial = 0, len = 0, index = 0;
 
        cp++;
        while (isspace ((unsigned char) *cp))
@@ -3352,60 +3266,297 @@ parse_header_attrs (const char *filename, const char *fieldname,
            return NOTOK;
        }
 
-       pm = mh_xmalloc(sizeof(*pm));
-       memset(pm, 0, sizeof(*pm));
+       /*
+        * To handle RFC 2231, we have to deal with the following extensions:
+        *
+        * name*=encoded-value
+        * name*<N>=part-N-of-a-parameter-value
+        * name*<N>*=encoded-part-N-of-a-parameter-value
+        *
+        * So the rule is:
+        * If there's a * right before the equal sign, it's encoded.
+        * If there's a * and one or more digits, then it's section N.
+        *
+        * Remember we can have one or the other, or both.  cp points to
+        * beginning of name, up points past the last character in the
+        * parameter name.
+        */
+
+       for (vp = cp; vp < up; vp++) {
+           if (*vp == '*' && vp < up - 1) {
+               partial = 1;
+               continue;
+           } else if (*vp == '*' && vp == up - 1) {
+               encoded = 1;
+           } else if (partial) {
+               if (isdigit((unsigned char) *vp))
+                   index = *vp - '0' + index * 10;
+               else {
+                   advise (NULL, "invalid parameter index in message %s's "
+                           "%s: field\n%*s(parameter %s)", filename,
+                           fieldname, strlen(invo_name) + 2, "", cp);
+                   return NOTOK;
+               }
+           } else {
+               len++;
+           }
+       }
+
+       /*
+        * Break out the parameter name and value sections and allocate
+        * memory for each.
+        */
+
+       nameptr = mh_xmalloc(len + 1);
+       strncpy(nameptr, cp, len);
+       nameptr[len] = '\0';
 
-       /* This is all mega-bozo and needs cleanup */
-       vp = (pm->pm_name = add (cp, NULL)) + (up - cp);
-       *vp = '\0';
        for (dp++; isspace ((unsigned char) *dp);)
            dp++;
 
-       /* Now store the attribute value. */
+       if (encoded) {
+           /*
+            * Single quotes delimit the character set and language tag.
+            * They are required on the first section (or a complete
+            * parameter).
+            */
+           if (index == 0) {
+               vp = dp;
+               while (*vp != '\'' && !isspace((unsigned char) *vp) &&
+                                                       *vp != '\0')
+                   vp++;
+               if (*vp == '\'') {
+                   if (vp != dp) {
+                       len = vp - dp;
+                       charset = mh_xmalloc(len + 1);
+                       strncpy(charset, dp, len);
+                       charset[len] = '\0';
+                   } else {
+                       charset = NULL;
+                   }
+                   vp++;
+               } else {
+                   advise(NULL, "missing charset in message %s's %s: "
+                          "field\n%*s(parameter %s)", filename, fieldname,
+                          strlen(invo_name) + 2, "", nameptr);
+                   free(nameptr);
+                   return NOTOK;
+               }
+               dp = vp;
+
+               while (*vp != '\'' && !isspace((unsigned char) *vp) &&
+                                                       *vp != '\0')
+                   vp++;
+
+               if (*vp == '\'') {
+                   if (vp != dp) {
+                       len = vp - dp;
+                       lang = mh_xmalloc(len + 1);
+                       strncpy(lang, dp, len);
+                       lang[len] = '\0';
+                   } else {
+                       lang = NULL;
+                   }
+                   vp++;
+               } else {
+                   advise(NULL, "missing language tag in message %s's %s: "
+                          "field\n%*s(parameter %s)", filename, fieldname,
+                          strlen(invo_name) + 2, "", nameptr);
+                   free(nameptr);
+                   if (charset)
+                       free(charset);
+                   return NOTOK;
+               }
 
-       vp = pm->pm_name + (dp - cp);
+               dp = vp;
+           }
+
+           /*
+            * At this point vp should be pointing at the beginning
+            * of the encoded value/section.  Continue until we reach
+            * the end or get whitespace.  But first, calculate the
+            * length so we can allocate the correct buffer size.
+            */
 
-       if (*dp == '"') {
-           for (cp = ++dp, dp = vp;;) {
-               switch (c = *cp++) {
+           for (vp = dp, len = 0; istoken(*vp); vp++) {
+               if (*vp == '%') {
+                    if (*(vp + 1) == '\0' ||
+                               !isxdigit((unsigned char) *(vp + 1)) ||
+                               *(vp + 2) == '\0' ||
+                               !isxdigit((unsigned char) *(vp + 2))) {
+                       advise(NULL, "invalid encoded sequence in message "
+                              "%s's %s: field\n%*s(parameter %s)",
+                              filename, fieldname, strlen(invo_name) + 2,
+                              "", nameptr);
+                       free(nameptr);
+                       if (charset)
+                           free(charset);
+                       if (lang)
+                           free(lang);
+                       return NOTOK;
+                   }
+                   vp += 2;
+               }
+               len++;
+           }
+
+           up = valptr = mh_xmalloc(len + 1);
+
+           for (vp = dp; istoken(*vp); vp++) {
+               if (*vp == '%') {
+                   *up++ = decode_qp(*(vp + 1), *(vp + 2));
+                   vp += 2;
+               } else {
+                   *up++ = *vp;
+               }
+           }
+
+           *up = '\0';
+           cp = vp;
+       } else {
+           /*
+            * A "normal" string.  If it's got a leading quote, then we
+            * strip the quotes out.  Otherwise go until we reach the end
+            * or get whitespace.  Note we scan it twice; once to get the
+            * length, then the second time copies it into the destination
+            * buffer.
+            */
+
+           len = 0;
+
+           if (*dp == '"') {
+               for (cp = dp + 1;;) {
+                   switch (*cp++) {
                    case '\0':
 bad_quote:
                        advise (NULL,
                                "invalid quoted-string in message %s's %s: "
                                 "field\n%*s(parameter %s)",
                                filename, fieldname, strlen(invo_name) + 2, "",
-                               pm->pm_name);
+                               nameptr);
+                       free(nameptr);
+                       if (charset)
+                           free(charset);
+                       if (lang)
+                           free(lang);
                        return NOTOK;
+                   case '"':
+                       break;
 
                    case '\\':
-                       *dp++ = c;
-                       if ((c = *cp++) == '\0')
+                       if (*++cp == '\0')
                            goto bad_quote;
-                       /* else fall... */
-
+                       /* FALL THROUGH */
                    default:
-                       *dp++ = c;
+                       len++;
                        continue;
+                   }
+                   break;
+               }
 
-                   case '"':
-                       *dp = '\0';
+           } else {
+               for (cp = dp; istoken (*cp); cp++) {
+                   len++;
+               }
+           }
+
+           valptr = mh_xmalloc(len + 1);
+
+           if (*dp == '"') {
+               int i;
+               for (cp = dp + 1, vp = valptr, i = 0; i < len; i++) {
+                   if (*cp == '\\') {
+                       cp++;
+                   }
+                   *vp++ = *cp++;
+               }
+               cp++;
+           } else {
+               strncpy(valptr, cp = dp, len);
+               cp += len;
+           }
+
+           valptr[len] = '\0';
+       }
+
+       /*
+        * If 'partial' is set, we don't allocate a parameter now.  We
+        * put it on the parameter linked list to be reassembled later.
+        *
+        * "phead" points to a list of all parameters we need to reassemble.
+        * Each parameter has a list of sections. We insert the sections in
+        * order.
+        */
+
+       if (partial) {
+           for (pp = phead; pp != NULL; pp = pp->next) {
+               if (strcasecmp(nameptr, pp->name) == 0)
+                   break;
+           }
+
+           if (pp == NULL) {
+               pp = mh_xmalloc(sizeof(*pp));
+               memset(pp, 0, sizeof(*pp));
+               pp->name = nameptr;
+               pp->next = phead;
+               phead = pp;
+           }
+
+           /*
+            * Insert this into the section linked list
+            */
+
+           sp = mh_xmalloc(sizeof(*sp));
+           memset(sp, 0, sizeof(*sp));
+           sp->value = valptr;
+           sp->index = index;
+           sp->len = len;
+
+           if (pp->sechead == NULL || pp->sechead->index > index) {
+               sp->next = pp->sechead;
+               pp->sechead = sp;
+           } else {
+               for (sp2 = pp->sechead; sp2 != NULL; sp2 = sp2->next) {
+                   if (sp2->index == sp->index) {
+                       advise (NULL, "duplicate index (%d) in message "
+                               "%s's %s: field\n%*s(parameter %s)", sp->index,
+                               filename, fieldname, strlen(invo_name) + 2, "",
+                               nameptr);
+                       return NOTOK;
+                   }
+                   if (sp2->index < sp->index &&
+                       (sp2->next == NULL || sp2->next->index > sp->index)) {
+                       sp->next = sp2->next;
+                       sp2->next = sp;
                        break;
+                   }
+               }
+
+               if (sp2 == NULL) {
+                   advise(NULL, "Internal error: cannot insert partial "
+                          "param in message %s's %s: field\n%*s(parameter %s)",
+                          filename, fieldname, strlen(invo_name) + 2, "",
+                          nameptr);
+                   return NOTOK;
                }
-               break;
+           }
+
+           /*
+            * Save our charset and lang tags.
+            */
+
+           if (index == 0 && encoded) {
+               if (pp->charset)
+                   free(pp->charset);
+               pp->charset = charset;
+               if (pp->lang)
+                   free(pp->lang);
+               pp->lang = lang;
            }
        } else {
-           for (cp = dp, dp = vp; istoken (*cp); cp++, dp++)
-               continue;
-           *dp = '\0';
-       }
-       pm->pm_value = getcpy(vp); 
-       if (!*vp) {
-           advise (NULL,
-                   "invalid parameter in message %s's %s: "
-                    "field\n%*s(parameter %s)",
-                   filename, fieldname, strlen(invo_name) + 2, "",
-                   pm->pm_name);
-           return NOTOK;
+           pm = add_param(param_head, param_tail, nameptr, valptr, 1);
+           pm->pm_charset = charset;
+           pm->pm_lang = lang;
        }
 
        while (isspace ((unsigned char) *cp))
@@ -3415,20 +3566,70 @@ bad_quote:
             get_comment (filename, fieldname, &cp, commentp) == NOTOK) {
            return NOTOK;
         }
+    }
 
-       if (*param_head == NULL) {
-           *param_head = pm;
-           *param_tail = pm;
-       } else {
-           (*param_tail)->pm_next = pm;
-           *param_tail = pm;
+    /*
+     * Now that we're done, reassemble all of the partial parameters.
+     */
+
+    for (pp = phead; pp != NULL; ) {
+       char *p, *q;
+       size_t tlen = 0;
+       int pindex = 0;
+       for (sp = pp->sechead; sp != NULL; sp = sp->next) {
+           if (sp->index != pindex++) {
+               advise(NULL, "missing section %d for parameter in "
+                      "message %s's %s: field\n%*s(parameter %s)", pindex - 1,
+                      filename, fieldname, strlen(invo_name) + 2, "",
+                      pp->name);
+               return NOTOK;
+           }
+           tlen += sp->len;
+       }
+
+       p = q = mh_xmalloc(tlen + 1);
+       for (sp = pp->sechead; sp != NULL; ) {
+           memcpy(q, sp->value, sp->len);
+           q += sp->len;
+           free(sp->value);
+           sp2 = sp->next;
+           free(sp);
+           sp = sp2;
        }
+
+       p[tlen] = '\0';
+
+       pm = add_param(param_head, param_tail, pp->name, p, 1);
+       pm->pm_charset = pp->charset;
+       pm->pm_lang = pp->lang;
+       pp2 = pp->next;
+       free(pp);
+       pp = pp2;
     }
 
     *header_attrp = cp;
     return OK;
 }
 
+/*
+ * Return the charset for a particular content type.  Return pointer is
+ * only valid until the next call to content_charset().
+ */
+
+char *
+content_charset (CT ct) {
+    static char *ret_charset = NULL;
+
+    if (ret_charset != NULL) {
+       free(ret_charset);
+    }
+
+    ret_charset = get_param(ct->c_ctinfo.ci_first_pm, "charset", '?', 0);
+
+    return ret_charset ? ret_charset : "US-ASCII";
+}
+
+
 /*
  * Create a string based on a list of output parameters.  Assume that this
  * parameter string will be appended to an existing header, so start out
@@ -3836,14 +4037,14 @@ normal_param(PM pm, char *output, size_t len, size_t valuelen,
  */
 
 PM
-add_param(PM *first, PM *last, const char *name, const char *value)
+add_param(PM *first, PM *last, char *name, char *value, int nocopy)
 {
     PM pm = mh_xmalloc(sizeof(*pm));
 
     memset(pm, 0, sizeof(*pm));
 
-    pm->pm_name = getcpy(name);
-    pm->pm_value = getcpy(value);
+    pm->pm_name = nocopy ? name : getcpy(name);
+    pm->pm_value = nocopy ? value : getcpy(value);
 
     if (*first) {
        (*last)->pm_next = pm;
@@ -3856,6 +4057,33 @@ add_param(PM *first, PM *last, const char *name, const char *value)
     return pm;
 }
 
+/*
+ * Either replace a current parameter with a new value, or add the parameter
+ * to the parameter linked list.
+ */
+
+PM
+replace_param(PM *first, PM *last, char *name, char *value, int nocopy)
+{
+    PM pm;
+
+    for (pm = *first; pm != NULL; pm = pm->pm_next) {
+       if (strcasecmp(name, pm->pm_name) == 0) {
+           /*
+            * If nocopy is set, it's assumed that we own both name
+            * and value.  We don't need name, so we discard it now.
+            */
+           if (nocopy)
+               free(name);
+           free(pm->pm_value);
+           pm->pm_value = nocopy ? value : getcpy(value);
+           return pm;
+       }
+    }
+
+    return add_param(first, last, name, value, nocopy);
+}
+
 /*
  * Retrieve a parameter value from a parameter linked list.  If the parameter
  * value needs converted to the local character set, do that now.
@@ -3868,95 +4096,125 @@ get_param(PM first, const char *name, char replace, int fetchonly)
        if (strcasecmp(name, first->pm_name) == 0) {
            if (fetchonly)
                return first->pm_value;
-           else {
-               char convbuf[BUFSIZ];
-               size_t inbytes, outbytes = sizeof(convbuf);
+           else
+               return getcpy(get_param_value(first, replace));
+       }
+       first = first->pm_next;
+    }
+
+    return NULL;
+}
+
+/*
+ * Return a parameter value, converting to the local character set if
+ * necessary
+ */
+
+char *get_param_value(PM pm, char replace)
+{
+    static char buffer[4096];          /* I hope no parameters are larger */
+    size_t bufsize = sizeof(buffer);
 #ifdef HAVE_ICONV
-               int utf8;
-               iconv_t cd;
-               ICONV_CONST char *p;
-               char *q;
+    size_t inbytes;
+    int utf8;
+    iconv_t cd;
+    ICONV_CONST char *p;
+#else /* HAVE_ICONV */
+    char *p;
 #endif /* HAVE_ICONV */
-               if (!first->pm_charset ||
-                               check_charset(first->pm_charset,
-                                             strlen(first->pm_charset))) {
-                   /*
-                    * No conversion necessary
-                    */
-                   return getcpy(first->pm_value);
-               }
-#ifdef HAVE_ICONV
-               utf8 = strcasecmp(first->pm_charset, "UTF-8") == 0;
 
-               cd = iconv_open(get_charset(), first->pm_charset);
-               if (cd == (iconv_t) -1) {
-                   goto noconvert;
-               }
+    char *q;
 
-               inbytes = strlen(first->pm_value);
-               outbytes = sizeof(convbuf);
-               p = first->pm_value;
-               q = convbuf;
+    /*
+     * If we don't have a character set indicated, it's assumed to be
+     * US-ASCII.  If it matches our character set, we don't need to convert
+     * anything.
+     */
 
-               while (inbytes) {
-                   if (iconv(cd, &p, &inbytes, &q, &outbytes) == -1) {
-                       if (errno != EILSEQ) {
-                           iconv_close(cd);
-                           goto noconvert;
-                       }
-                       /*
-                        * Reset shift state, substitute our character,
-                        * try to restart conversion.
-                        */
-                       iconv(cd, NULL, NULL, &q, &outbytes);
-                       if (outbytes == 0) {
-                           iconv_close(cd);
-                           goto noconvert;
-                       }
-                       *q++ = replace;
-                       outbytes--;
-                       if (outbytes == 0) {
-                           iconv_close(cd);
-                           goto noconvert;
-                       }
-                       if (utf8) {
-                           for (++p, --inbytes; inbytes > 0 &&
-                                       (((unsigned char) *q) & 0xc0) == 0x80;
-                                ++p, --inbytes)
-                               continue;
-                       } else {
-                           p++;
-                           inbytes--;
-                       }
-                   }
-               }
+    if (!pm->pm_charset || check_charset(pm->pm_charset,
+                                        strlen(pm->pm_charset))) {
+       return pm->pm_value;
+    }
 
-               iconv_close(cd);
+    /*
+     * In this case, we need to convert.  If we have iconv support, use
+     * that.  Otherwise, go through and simply replace every non-ASCII
+     * character with the substitution character.
+     */
 
-               if (outbytes == 0)
-                   q--;
-               *q = '\0';
+#ifdef HAVE_ICONV
+    q = buffer;
+    bufsize = sizeof(buffer);
+    utf8 = strcasecmp(pm->pm_charset, "UTF-8") == 0;
 
-               return getcpy(convbuf);
-#endif /* HAVE_ICONV */
-noconvert:
-               for (p = first->pm_value, q = convbuf; *p && outbytes > 1;
-                    p++, q++, outbytes--) {
-                   if (isascii((unsigned char) p) &&
-                                       isprint((unsigned char) p))
-                       *q = *p;
-                   else
-                       *q = replace;
-               }
+    cd = iconv_open(get_charset(), pm->pm_charset);
+    if (cd == (iconv_t) -1) {
+       goto noiconv;
+    }
 
-               *q = '\0';
+    inbytes = strlen(pm->pm_value);
+    p = pm->pm_value;
 
-               return getcpy(convbuf);
+    while (inbytes) {
+       if (iconv(cd, &p, &inbytes, &q, &bufsize) == (size_t)-1) {
+           if (errno != EILSEQ) {
+               iconv_close(cd);
+               goto noiconv;
+           }
+           /*
+            * Reset shift state, substitute our character,
+            * try to restart conversion.
+            */
+
+           iconv(cd, NULL, NULL, &q, &bufsize);
+
+           if (bufsize == 0) {
+               iconv_close(cd);
+               goto noiconv;
+           }
+           *q++ = replace;
+           bufsize--;
+           if (bufsize == 0) {
+               iconv_close(cd);
+               goto noiconv;
+           }
+           if (utf8) {
+               for (++p, --inbytes;
+                    inbytes > 0 && (((unsigned char) *q) & 0xc0) == 0x80;
+                    ++p, --inbytes)
+                   continue;
+           } else {
+               p++;
+               inbytes--;
            }
        }
+    }
 
-       first = first->pm_next;
+    iconv_close(cd);
+
+    if (bufsize == 0)
+       q--;
+    *q = '\0';
+
+    return buffer;
+
+noiconv:
+#endif /* HAVE_ICONV */
+
+    /*
+     * Take everything non-ASCII and substituite the replacement character
+     */
+
+    q = buffer;
+    bufsize = sizeof(buffer);
+    for (p = pm->pm_value; *p != '\0' && bufsize > 1; p++, q++, bufsize--) {
+       if (isascii((unsigned char) *p) && !iscntrl((unsigned char) *p))
+           *q = *p;
+       else
+           *q = replace;
     }
 
-    return NULL;
+    *q = '\0';
+
+    return buffer;
 }