X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/e6a9609ee92dd8d01ae2524ea193bbcd88b5be73..d2520ac7054ad75d60342606bf13c821305d958c:/uip/mhparse.c

diff --git a/uip/mhparse.c b/uip/mhparse.c
index 50510856..59f8b221 100644
--- a/uip/mhparse.c
+++ b/uip/mhparse.c
@@ -16,6 +16,9 @@
 #include <h/mime.h>
 #include <h/mhparse.h>
 #include <h/utils.h>
+#ifdef HAVE_ICONV
+# include <iconv.h>
+#endif /* HAVE_ICONV */
 
 
 extern int debugsw;
@@ -135,6 +138,8 @@ static int readDigest (CT, char *);
 static int get_leftover_mp_content (CT, int);
 static int InitURL (CT);
 static int openURL (CT, char **);
+static int parse_header_attrs (const char *, const char *, char **, PM *,
+			       PM *, char **);
 static size_t param_len(PM, int, size_t, int *, int *, size_t *);
 static size_t encode_param(PM, char *, size_t, size_t, size_t, int);
 static size_t normal_param(PM, char *, size_t, size_t, size_t);
@@ -592,88 +597,6 @@ add_header (CT ct, char *name, char *value)
 }
 
 
-/* Make sure that buf contains at least one appearance of name,
-   followed by =.  If not, insert both name and value, just after
-   first semicolon, if any.  Note that name should not contain a
-   trailing =.	And quotes will be added around the value.  Typical
-   usage:  make sure that a Content-Disposition header contains
-   filename="foo".  If it doesn't and value does, use value from
-   that. */
-static char *
-incl_name_value (char *buf, char *name, char *value) {
-    char *newbuf = buf;
-
-    /* Assume that name is non-null. */
-    if (buf && value) {
-	char *name_plus_equal = concat (name, "=", NULL);
-
-	if (! strstr (buf, name_plus_equal)) {
-	    char *insertion;
-	    char *cp, *prefix, *suffix;
-
-	    /* Trim trailing space, esp. newline. */
-	    for (cp = &buf[strlen (buf) - 1];
-		 cp >= buf && isspace ((unsigned char) *cp);
-		 --cp) {
-		*cp = '\0';
-	    }
-
-	    insertion = concat ("; ", name, "=", "\"", value, "\"", NULL);
-
-	    /* Insert at first semicolon, if any.  If none, append to
-	       end. */
-	    prefix = add (buf, NULL);
-	    if ((cp = strchr (prefix, ';'))) {
-		suffix = concat (cp, NULL);
-		*cp = '\0';
-		newbuf = concat (prefix, insertion, suffix, "\n", NULL);
-		free (suffix);
-	    } else {
-		/* Append to end. */
-		newbuf = concat (buf, insertion, "\n", NULL);
-	    }
-
-	    free (prefix);
-	    free (insertion);
-	    free (buf);
-	}
-
-	free (name_plus_equal);
-    }
-
-    return newbuf;
-}
-
-/* Extract just name_suffix="foo", if any, from value.	If there isn't
-   one, return the entire value.  Note that, for example, a name_suffix
-   of name will match filename="foo", and return foo. */
-static char *
-extract_name_value (char *name_suffix, char *value) {
-    char *extracted_name_value = value;
-    char *name_suffix_plus_quote = concat (name_suffix, "=\"", NULL);
-    char *name_suffix_equals = strstr (value, name_suffix_plus_quote);
-    char *cp;
-
-    free (name_suffix_plus_quote);
-    if (name_suffix_equals) {
-	char *name_suffix_begin;
-
-	/* Find first \". */
-	for (cp = name_suffix_equals; *cp != '"'; ++cp) /* empty */;
-	name_suffix_begin = ++cp;
-	/* Find second \". */
-	for (; *cp != '"'; ++cp) /* empty */;
-
-	extracted_name_value = mh_xmalloc (cp - name_suffix_begin + 1);
-	memcpy (extracted_name_value,
-		name_suffix_begin,
-		cp - name_suffix_begin);
-	extracted_name_value[cp - name_suffix_begin] = '\0';
-    }
-
-    return extracted_name_value;
-}
-
 /*
  * Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
  * directives.  Fills in the information of the CTinfo structure.
@@ -912,14 +835,11 @@ magic_skip:
                have a *filename=, extract it from the magic contents.
                The r1bindex call skips any leading directory
                components. */
-            if (ct->c_dispo)
-                ct->c_dispo =
-                    incl_name_value (ct->c_dispo,
-                                     "filename",
-                                     r1bindex (extract_name_value ("name",
-                                                                   ci->
-                                                                   ci_magic),
-                                               '/'));
+            if (ct->c_dispo_type &&
+		!get_param(ct->c_dispo_first, "filename", '_', 1)) {
+		add_param(&ct->c_dispo_first, &ct->c_dispo_last, "filename",
+			  r1bindex(ci->ci_magic, '/'), 0);
+	    }
         }
 	else
 	    advise (NULL,
@@ -2298,7 +2218,7 @@ open7Bit (CT ct, char **file)
 	fprintf (ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type, ci->ci_subtype);
 	len += strlen (TYPE_FIELD) + 2 + strlen (ci->ci_type)
 	    + 1 + strlen (ci->ci_subtype);
-	buffer = output_params(len, ci->ci_first_pm, &len);
+	buffer = output_params(len, ci->ci_first_pm, &len, 0);
 
 	if (buffer) {
 	    fputs (buffer, ce->ce_fp);
@@ -3306,16 +3226,49 @@ get_ce_method (const char *method) {
     return NULL;
 }
 
-int
+/*
+ * Parse a series of MIME attributes (or parameters) given a header as
+ * input.
+ *
+ * Arguments include:
+ *
+ * filename	- Name of input file (for error messages)
+ * fieldname	- Name of field being processed
+ * headerp	- Pointer to pointer of the beginning of the MIME attributes.
+ *		  Updated to point to end of attributes when finished.
+ * param_head	- Pointer to head of parameter list
+ * param_tail	- Pointer to tail of parameter list
+ * commentp	- Pointer to header comment pointer (may be NULL)
+ *
+ * Returns OK if parsing was successful, NOTOK if parsing failed, and
+ * DONE to indicate a benign error (minor parsing error, but the program
+ * should continue).
+ */
+
+static int
 parse_header_attrs (const char *filename, const char *fieldname,
 		    char **header_attrp, PM *param_head, PM *param_tail,
 		    char **commentp)
 {
     char *cp = *header_attrp;
     PM pm;
+    struct sectlist {
+	char *value;
+	int index;
+	int len;
+	struct sectlist *next;
+    } *sp, *sp2;
+    struct parmlist {
+	char *name;
+	char *charset;
+	char *lang;
+	struct sectlist *sechead;
+	struct parmlist *next;
+    } *pp, *pp2, *phead = NULL;
 
     while (*cp == ';') {
-	char *dp, *vp, *up, c;
+	char *dp, *vp, *up, *nameptr, *valptr, *charset = NULL, *lang = NULL;
+	int encoded = 0, partial = 0, len = 0, index = 0;
 
 	cp++;
 	while (isspace ((unsigned char) *cp))
@@ -3349,60 +3302,297 @@ parse_header_attrs (const char *filename, const char *fieldname,
 	    return NOTOK;
 	}
 
-	pm = mh_xmalloc(sizeof(*pm));
-	memset(pm, 0, sizeof(*pm));
+	/*
+	 * To handle RFC 2231, we have to deal with the following extensions:
+	 *
+	 * name*=encoded-value
+	 * name*<N>=part-N-of-a-parameter-value
+	 * name*<N>*=encoded-part-N-of-a-parameter-value
+	 *
+	 * So the rule is:
+	 * If there's a * right before the equal sign, it's encoded.
+	 * If there's a * and one or more digits, then it's section N.
+	 *
+	 * Remember we can have one or the other, or both.  cp points to
+	 * beginning of name, up points past the last character in the
+	 * parameter name.
+	 */
+
+	for (vp = cp; vp < up; vp++) {
+	    if (*vp == '*' && vp < up - 1) {
+		partial = 1;
+		continue;
+	    } else if (*vp == '*' && vp == up - 1) {
+	    	encoded = 1;
+	    } else if (partial) {
+		if (isdigit((unsigned char) *vp))
+		    index = *vp - '0' + index * 10;
+		else {
+		    advise (NULL, "invalid parameter index in message %s's "
+			    "%s: field\n%*s(parameter %s)", filename,
+			    fieldname, strlen(invo_name) + 2, "", cp);
+		    return NOTOK;
+		}
+	    } else {
+		len++;
+	    }
+	}
+
+	/*
+	 * Break out the parameter name and value sections and allocate
+	 * memory for each.
+	 */
+
+	nameptr = mh_xmalloc(len + 1);
+	strncpy(nameptr, cp, len);
+	nameptr[len] = '\0';
 
-	/* This is all mega-bozo and needs cleanup */
-	vp = (pm->pm_name = add (cp, NULL)) + (up - cp);
-	*vp = '\0';
 	for (dp++; isspace ((unsigned char) *dp);)
 	    dp++;
 
-	/* Now store the attribute value. */
+	if (encoded) {
+	    /*
+	     * Single quotes delimit the character set and language tag.
+	     * They are required on the first section (or a complete
+	     * parameter).
+	     */
+	    if (index == 0) {
+	    	vp = dp;
+		while (*vp != '\'' && !isspace((unsigned char) *vp) &&
+							*vp != '\0')
+		    vp++;
+		if (*vp == '\'') {
+		    if (vp != dp) {
+			len = vp - dp;
+			charset = mh_xmalloc(len + 1);
+			strncpy(charset, dp, len);
+			charset[len] = '\0';
+		    } else {
+			charset = NULL;
+		    }
+		    vp++;
+		} else {
+		    advise(NULL, "missing charset in message %s's %s: "
+			   "field\n%*s(parameter %s)", filename, fieldname,
+			   strlen(invo_name) + 2, "", nameptr);
+		    free(nameptr);
+		    return NOTOK;
+		}
+		dp = vp;
+
+		while (*vp != '\'' && !isspace((unsigned char) *vp) &&
+							*vp != '\0')
+		    vp++;
+
+		if (*vp == '\'') {
+		    if (vp != dp) {
+			len = vp - dp;
+			lang = mh_xmalloc(len + 1);
+			strncpy(lang, dp, len);
+			lang[len] = '\0';
+		    } else {
+			lang = NULL;
+		    }
+		    vp++;
+		} else {
+		    advise(NULL, "missing language tag in message %s's %s: "
+			   "field\n%*s(parameter %s)", filename, fieldname,
+			   strlen(invo_name) + 2, "", nameptr);
+		    free(nameptr);
+		    if (charset)
+			free(charset);
+		    return NOTOK;
+		}
+
+		dp = vp;
+	    }
+
+	    /*
+	     * At this point vp should be pointing at the beginning
+	     * of the encoded value/section.  Continue until we reach
+	     * the end or get whitespace.  But first, calculate the
+	     * length so we can allocate the correct buffer size.
+	     */
+
+	    for (vp = dp, len = 0; istoken(*vp); vp++) {
+		if (*vp == '%') {
+		     if (*(vp + 1) == '\0' ||
+				!isxdigit((unsigned char) *(vp + 1)) ||
+				*(vp + 2) == '\0' ||
+				!isxdigit((unsigned char) *(vp + 2))) {
+			advise(NULL, "invalid encoded sequence in message "
+			       "%s's %s: field\n%*s(parameter %s)",
+			       filename, fieldname, strlen(invo_name) + 2,
+			       "", nameptr);
+			free(nameptr);
+			if (charset)
+			    free(charset);
+			if (lang)
+			    free(lang);
+			return NOTOK;
+		    }
+		    vp += 2;
+		}
+		len++;
+	    }
+
+	    up = valptr = mh_xmalloc(len + 1);
+
+	    for (vp = dp; istoken(*vp); vp++) {
+		if (*vp == '%') {
+		    *up++ = decode_qp(*(vp + 1), *(vp + 2));
+		    vp += 2;
+		} else {
+		    *up++ = *vp;
+		}
+	    }
 
-	vp = pm->pm_name + (dp - cp);
+	    *up = '\0';
+	    cp = vp;
+	} else {
+	    /*
+	     * A "normal" string.  If it's got a leading quote, then we
+	     * strip the quotes out.  Otherwise go until we reach the end
+	     * or get whitespace.  Note we scan it twice; once to get the
+	     * length, then the second time copies it into the destination
+	     * buffer.
+	     */
 
-	if (*dp == '"') {
-	    for (cp = ++dp, dp = vp;;) {
-		switch (c = *cp++) {
+	    len = 0;
+
+	    if (*dp == '"') {
+		for (cp = dp + 1;;) {
+		    switch (*cp++) {
 		    case '\0':
 bad_quote:
 		        advise (NULL,
 				"invalid quoted-string in message %s's %s: "
                                 "field\n%*s(parameter %s)",
 				filename, fieldname, strlen(invo_name) + 2, "",
-				pm->pm_name);
+				nameptr);
+			free(nameptr);
+			if (charset)
+			    free(charset);
+			if (lang)
+			    free(lang);
 			return NOTOK;
+		    case '"':
+			break;
 
 		    case '\\':
-			*dp++ = c;
-			if ((c = *cp++) == '\0')
+			if (*++cp == '\0')
 			    goto bad_quote;
-			/* else fall... */
-
+			/* FALL THROUGH */
 		    default:
-			*dp++ = c;
+			len++;
 			continue;
+		    }
+		    break;
+		}
 
-		    case '"':
-			*dp = '\0';
+	    } else {
+		for (cp = dp; istoken (*cp); cp++) {
+		    len++;
+		}
+	    }
+
+	    valptr = mh_xmalloc(len + 1);
+
+	    if (*dp == '"') {
+	    	int i;
+		for (cp = dp + 1, vp = valptr, i = 0; i < len; i++) {
+		    if (*cp == '\\') {
+			cp++;
+		    }
+		    *vp++ = *cp++;
+		}
+		cp++;
+	    } else {
+	    	strncpy(valptr, cp = dp, len);
+		cp += len;
+	    }
+
+	    valptr[len] = '\0';
+	}
+
+	/*
+	 * If 'partial' is set, we don't allocate a parameter now.  We
+	 * put it on the parameter linked list to be reassembled later.
+	 *
+	 * "phead" points to a list of all parameters we need to reassemble.
+	 * Each parameter has a list of sections. We insert the sections in
+	 * order.
+	 */
+
+	if (partial) {
+	    for (pp = phead; pp != NULL; pp = pp->next) {
+		if (strcasecmp(nameptr, pp->name) == 0)
+		    break;
+	    }
+
+	    if (pp == NULL) {
+		pp = mh_xmalloc(sizeof(*pp));
+		memset(pp, 0, sizeof(*pp));
+		pp->name = nameptr;
+		pp->next = phead;
+		phead = pp;
+	    }
+
+	    /*
+	     * Insert this into the section linked list
+	     */
+
+	    sp = mh_xmalloc(sizeof(*sp));
+	    memset(sp, 0, sizeof(*sp));
+	    sp->value = valptr;
+	    sp->index = index;
+	    sp->len = len;
+
+	    if (pp->sechead == NULL || pp->sechead->index > index) {
+		sp->next = pp->sechead;
+		pp->sechead = sp;
+	    } else {
+		for (sp2 = pp->sechead; sp2 != NULL; sp2 = sp2->next) {
+		    if (sp2->index == sp->index) {
+			advise (NULL, "duplicate index (%d) in message "
+				"%s's %s: field\n%*s(parameter %s)", sp->index,
+				filename, fieldname, strlen(invo_name) + 2, "",
+				nameptr);
+			return NOTOK;
+		    }
+		    if (sp2->index < sp->index &&
+			(sp2->next == NULL || sp2->next->index > sp->index)) {
+			sp->next = sp2->next;
+			sp2->next = sp;
 			break;
+		    }
 		}
-		break;
+
+		if (sp2 == NULL) {
+		    advise(NULL, "Internal error: cannot insert partial "
+		    	   "param in message %s's %s: field\n%*s(parameter %s)",
+			   filename, fieldname, strlen(invo_name) + 2, "",
+			   nameptr);
+		    return NOTOK;
+		}
+	    }
+
+	    /*
+	     * Save our charset and lang tags.
+	     */
+
+	    if (index == 0 && encoded) {
+		if (pp->charset)
+		    free(pp->charset);
+	    	pp->charset = charset;
+		if (pp->lang)
+		    free(pp->lang);
+		pp->lang = lang;
 	    }
 	} else {
-	    for (cp = dp, dp = vp; istoken (*cp); cp++, dp++)
-		continue;
-	    *dp = '\0';
-	}
-	pm->pm_value = getcpy(vp); 
-	if (!*vp) {
-	    advise (NULL,
-		    "invalid parameter in message %s's %s: "
-                    "field\n%*s(parameter %s)",
-		    filename, fieldname, strlen(invo_name) + 2, "",
-		    pm->pm_name);
-	    return NOTOK;
+	    pm = add_param(param_head, param_tail, nameptr, valptr, 1);
+	    pm->pm_charset = charset;
+	    pm->pm_lang = lang;
 	}
 
 	while (isspace ((unsigned char) *cp))
@@ -3412,20 +3602,70 @@ bad_quote:
             get_comment (filename, fieldname, &cp, commentp) == NOTOK) {
 	    return NOTOK;
         }
+    }
 
-	if (*param_head == NULL) {
-	    *param_head = pm;
-	    *param_tail = pm;
-	} else {
-	    (*param_tail)->pm_next = pm;
-	    *param_tail = pm;
+    /*
+     * Now that we're done, reassemble all of the partial parameters.
+     */
+
+    for (pp = phead; pp != NULL; ) {
+    	char *p, *q;
+	size_t tlen = 0;
+	int pindex = 0;
+	for (sp = pp->sechead; sp != NULL; sp = sp->next) {
+	    if (sp->index != pindex++) {
+		advise(NULL, "missing section %d for parameter in "
+		       "message %s's %s: field\n%*s(parameter %s)", pindex - 1,
+		       filename, fieldname, strlen(invo_name) + 2, "",
+		       pp->name);
+		return NOTOK;
+	    }
+	    tlen += sp->len;
 	}
+
+	p = q = mh_xmalloc(tlen + 1);
+	for (sp = pp->sechead; sp != NULL; ) {
+	    memcpy(q, sp->value, sp->len);
+	    q += sp->len;
+	    free(sp->value);
+	    sp2 = sp->next;
+	    free(sp);
+	    sp = sp2;
+	}
+
+	p[tlen] = '\0';
+
+	pm = add_param(param_head, param_tail, pp->name, p, 1);
+	pm->pm_charset = pp->charset;
+	pm->pm_lang = pp->lang;
+	pp2 = pp->next;
+	free(pp);
+	pp = pp2;
     }
 
     *header_attrp = cp;
     return OK;
 }
 
+/*
+ * Return the charset for a particular content type.  Return pointer is
+ * only valid until the next call to content_charset().
+ */
+
+char *
+content_charset (CT ct) {
+    static char *ret_charset = NULL;
+
+    if (ret_charset != NULL) {
+	free(ret_charset);
+    }
+
+    ret_charset = get_param(ct->c_ctinfo.ci_first_pm, "charset", '?', 0);
+
+    return ret_charset ? ret_charset : "US-ASCII";
+}
+
+
 /*
  * Create a string based on a list of output parameters.  Assume that this
  * parameter string will be appended to an existing header, so start out
@@ -3433,7 +3673,7 @@ bad_quote:
  */
 
 char *
-output_params(size_t initialwidth, PM params, int *offsetout)
+output_params(size_t initialwidth, PM params, int *offsetout, int external)
 {
     char *paramout = NULL;
     char line[CPERLIN * 2], *q;
@@ -3446,6 +3686,9 @@ output_params(size_t initialwidth, PM params, int *offsetout)
 	valoff = 0;
 	q = line;
 
+	if (external && strcasecmp(params->pm_name, "body") == 0)
+	    continue;
+
 	if (strlen(params->pm_name) > CPERLIN) {
 	    advise(NULL, "Parameter name \"%s\" is too long", params->pm_name);
 	    if (paramout)
@@ -3618,17 +3861,17 @@ param_len(PM pm, int index, size_t valueoff, int *encode, int *cont,
      *   section.
      * - There are 8-bit characters within N bytes of our section start.
      *   N is calculated based on the number of bytes it would take to
-     *   reach CPERLIN - 1.  Specifically:
+     *   reach CPERLIN.  Specifically:
      *		8 (starting tab) +
      *		strlen(param name) +
      *		4 ('* for section marker, '=', opening/closing '"')
      *		strlen (index)
      *	is the number of bytes used by everything that isn't part of the
-     *  value.  So that gets subtracted from CPERLIN - 1.
+     *  value.  So that gets subtracted from CPERLIN.
      */
 
     snprintf(indexchar, sizeof(indexchar), "%d", index);
-    maxfit = CPERLIN - (13 + len + strlen(indexchar));
+    maxfit = CPERLIN - (12 + len + strlen(indexchar));
     if ((eightbit && index == 0) || contains8bit(start, start + maxfit)) {
 	*encode = 1;
     }
@@ -3655,11 +3898,15 @@ param_len(PM pm, int index, size_t valueoff, int *encode, int *cont,
 	    pm->pm_lang = getcpy(NULL);	/* Default to a blank lang tag */
 
 	len++;		/* For the encoding marker */
+	maxfit--;
 	if (index == 0) {
-	    len += strlen(pm->pm_charset) + strlen(pm->pm_lang) + 2;
+	    int enclen = strlen(pm->pm_charset) + strlen(pm->pm_lang) + 2;
+	    len += enclen;
+	    maxfit-= enclen;
 	} else {
 	    /*
-	     * We know we definitely need to include an index.
+	     * We know we definitely need to include an index.  maxfit already
+	     * includes the section marker.
 	     */
 	    len += strlen(indexchar);
 	}
@@ -3826,14 +4073,14 @@ normal_param(PM pm, char *output, size_t len, size_t valuelen,
  */
 
 PM
-add_param(PM *first, PM *last, const char *name, const char *value)
+add_param(PM *first, PM *last, char *name, char *value, int nocopy)
 {
     PM pm = mh_xmalloc(sizeof(*pm));
 
     memset(pm, 0, sizeof(*pm));
 
-    pm->pm_name = getcpy(name);
-    pm->pm_value = getcpy(value);
+    pm->pm_name = nocopy ? name : getcpy(name);
+    pm->pm_value = nocopy ? value : getcpy(value);
 
     if (*first) {
 	(*last)->pm_next = pm;
@@ -3845,3 +4092,165 @@ add_param(PM *first, PM *last, const char *name, const char *value)
 
     return pm;
 }
+
+/*
+ * Either replace a current parameter with a new value, or add the parameter
+ * to the parameter linked list.
+ */
+
+PM
+replace_param(PM *first, PM *last, char *name, char *value, int nocopy)
+{
+    PM pm;
+
+    for (pm = *first; pm != NULL; pm = pm->pm_next) {
+	if (strcasecmp(name, pm->pm_name) == 0) {
+	    /*
+	     * If nocopy is set, it's assumed that we own both name
+	     * and value.  We don't need name, so we discard it now.
+	     */
+	    if (nocopy)
+		free(name);
+	    free(pm->pm_value);
+	    pm->pm_value = nocopy ? value : getcpy(value);
+	    return pm;
+	}
+    }
+
+    return add_param(first, last, name, value, nocopy);
+}
+
+/*
+ * Retrieve a parameter value from a parameter linked list.  If the parameter
+ * value needs converted to the local character set, do that now.
+ */
+
+char *
+get_param(PM first, const char *name, char replace, int fetchonly)
+{
+    while (first != NULL) {
+	if (strcasecmp(name, first->pm_name) == 0) {
+	    if (fetchonly)
+	    	return first->pm_value;
+	    else
+	    	return getcpy(get_param_value(first, replace));
+	}
+	first = first->pm_next;
+    }
+
+    return NULL;
+}
+
+/*
+ * Return a parameter value, converting to the local character set if
+ * necessary
+ */
+
+char *get_param_value(PM pm, char replace)
+{
+    static char buffer[4096];		/* I hope no parameters are larger */
+    size_t bufsize = sizeof(buffer);
+#ifdef HAVE_ICONV
+    size_t inbytes;
+    int utf8;
+    iconv_t cd;
+    ICONV_CONST char *p;
+#else /* HAVE_ICONV */
+    char *p;
+#endif /* HAVE_ICONV */
+
+    char *q;
+
+    /*
+     * If we don't have a character set indicated, it's assumed to be
+     * US-ASCII.  If it matches our character set, we don't need to convert
+     * anything.
+     */
+
+    if (!pm->pm_charset || check_charset(pm->pm_charset,
+    					 strlen(pm->pm_charset))) {
+	return pm->pm_value;
+    }
+
+    /*
+     * In this case, we need to convert.  If we have iconv support, use
+     * that.  Otherwise, go through and simply replace every non-ASCII
+     * character with the substitution character.
+     */
+
+#ifdef HAVE_ICONV
+    q = buffer;
+    bufsize = sizeof(buffer);
+    utf8 = strcasecmp(pm->pm_charset, "UTF-8") == 0;
+
+    cd = iconv_open(get_charset(), pm->pm_charset);
+    if (cd == (iconv_t) -1) {
+	goto noiconv;
+    }
+
+    inbytes = strlen(pm->pm_value);
+    p = pm->pm_value;
+
+    while (inbytes) {
+	if (iconv(cd, &p, &inbytes, &q, &bufsize) == (size_t)-1) {
+	    if (errno != EILSEQ) {
+		iconv_close(cd);
+		goto noiconv;
+	    }
+	    /*
+	     * Reset shift state, substitute our character,
+	     * try to restart conversion.
+	     */
+
+	    iconv(cd, NULL, NULL, &q, &bufsize);
+
+	    if (bufsize == 0) {
+		iconv_close(cd);
+		goto noiconv;
+	    }
+	    *q++ = replace;
+	    bufsize--;
+	    if (bufsize == 0) {
+		iconv_close(cd);
+		goto noiconv;
+	    }
+	    if (utf8) {
+		for (++p, --inbytes;
+		     inbytes > 0 && (((unsigned char) *q) & 0xc0) == 0x80;
+		     ++p, --inbytes)
+		    continue;
+	    } else {
+		p++;
+		inbytes--;
+	    }
+	}
+    }
+
+    iconv_close(cd);
+
+    if (bufsize == 0)
+	q--;
+    *q = '\0';
+
+    return buffer;
+
+noiconv:
+#endif /* HAVE_ICONV */
+
+    /*
+     * Take everything non-ASCII and substituite the replacement character
+     */
+
+    q = buffer;
+    bufsize = sizeof(buffer);
+    for (p = pm->pm_value; *p != '\0' && bufsize > 1; p++, q++, bufsize--) {
+	if (isascii((unsigned char) *p) && !iscntrl((unsigned char) *p))
+	    *q = *p;
+	else
+	    *q = replace;
+    }
+
+    *q = '\0';
+
+    return buffer;
+}