+
+/*
+ * Parse a series of MIME attributes (or parameters) given a header as
+ * input.
+ *
+ * Arguments include:
+ *
+ * filename - Name of input file (for error messages)
+ * fieldname - Name of field being processed
+ * headerp - Pointer to pointer of the beginning of the MIME attributes.
+ * Updated to point to end of attributes when finished.
+ * param_head - Pointer to head of parameter list
+ * param_tail - Pointer to tail of parameter list
+ * commentp - Pointer to header comment pointer (may be NULL)
+ *
+ * Returns OK if parsing was successful, NOTOK if parsing failed, and
+ * DONE to indicate a benign error (minor parsing error, but the program
+ * should continue).
+ */
+
+static int
+parse_header_attrs (const char *filename, const char *fieldname,
+ char **header_attrp, PM *param_head, PM *param_tail,
+ char **commentp)
+{
+ char *cp = *header_attrp;
+ PM pm;
+ struct sectlist {
+ char *value;
+ int index;
+ int len;
+ struct sectlist *next;
+ } *sp, *sp2;
+ struct parmlist {
+ char *name;
+ char *charset;
+ char *lang;
+ struct sectlist *sechead;
+ struct parmlist *next;
+ } *pp, *pp2, *phead = NULL;
+
+ while (*cp == ';') {
+ char *dp, *vp, *up, *nameptr, *valptr, *charset = NULL, *lang = NULL;
+ int encoded = 0, partial = 0, len = 0, index = 0;
+
+ cp++;
+ while (isspace ((unsigned char) *cp))
+ cp++;
+
+ if (*cp == '(' &&
+ get_comment (filename, fieldname, &cp, commentp) == NOTOK) {
+ return NOTOK;
+ }
+
+ if (*cp == 0) {
+ if (! suppress_extraneous_trailing_semicolon_warning) {
+ inform("extraneous trailing ';' in message %s's %s: "
+ "parameter list", filename, fieldname);
+ }
+ return DONE;
+ }
+
+ /* down case the attribute name */
+ for (dp = cp; istoken ((unsigned char) *dp); dp++)
+ *dp = tolower ((unsigned char) *dp);
+
+ for (up = dp; isspace ((unsigned char) *dp);)
+ dp++;
+ if (dp == cp || *dp != '=') {
+ inform("invalid parameter in message %s's %s: field\n"
+ " parameter %s (error detected at offset %ld)",
+ filename, fieldname, cp, (long)(dp - cp));
+ return NOTOK;
+ }
+
+ /*
+ * To handle RFC 2231, we have to deal with the following extensions:
+ *
+ * name*=encoded-value
+ * name*<N>=part-N-of-a-parameter-value
+ * name*<N>*=encoded-part-N-of-a-parameter-value
+ *
+ * So the rule is:
+ * If there's a * right before the equal sign, it's encoded.
+ * If there's a * and one or more digits, then it's section N.
+ *
+ * Remember we can have one or the other, or both. cp points to
+ * beginning of name, up points past the last character in the
+ * parameter name.
+ */
+
+ for (vp = cp; vp < up; vp++) {
+ if (*vp == '*' && vp < up - 1) {
+ partial = 1;
+ continue;
+ }
+ if (*vp == '*' && vp == up - 1) {
+ encoded = 1;
+ } else if (partial) {
+ if (isdigit((unsigned char) *vp))
+ index = *vp - '0' + index * 10;
+ else {
+ inform("invalid parameter index in message %s's %s: field"
+ "\n (parameter %s)", filename, fieldname, cp);
+ return NOTOK;
+ }
+ } else {
+ len++;
+ }
+ }
+
+ /*
+ * Break out the parameter name and value sections and allocate
+ * memory for each.
+ */
+
+ nameptr = mh_xmalloc(len + 1);
+ strncpy(nameptr, cp, len);
+ nameptr[len] = '\0';
+
+ for (dp++; isspace ((unsigned char) *dp);)
+ dp++;
+
+ if (encoded) {
+ /*
+ * Single quotes delimit the character set and language tag.
+ * They are required on the first section (or a complete
+ * parameter).
+ */
+ if (index == 0) {
+ vp = dp;
+ while (*vp != '\'' && !isspace((unsigned char) *vp) &&
+ *vp != '\0')
+ vp++;
+ if (*vp == '\'') {
+ if (vp != dp) {
+ len = vp - dp;
+ charset = mh_xmalloc(len + 1);
+ strncpy(charset, dp, len);
+ charset[len] = '\0';
+ } else {
+ charset = NULL;
+ }
+ vp++;
+ } else {
+ inform("missing charset in message %s's %s: field\n"
+ " (parameter %s)", filename, fieldname, nameptr);
+ free(nameptr);
+ return NOTOK;
+ }
+ dp = vp;
+
+ while (*vp != '\'' && !isspace((unsigned char) *vp) &&
+ *vp != '\0')
+ vp++;
+
+ if (*vp == '\'') {
+ if (vp != dp) {
+ len = vp - dp;
+ lang = mh_xmalloc(len + 1);
+ strncpy(lang, dp, len);
+ lang[len] = '\0';
+ } else {
+ lang = NULL;
+ }
+ vp++;
+ } else {
+ inform("missing language tag in message %s's %s: field\n"
+ " (parameter %s)", filename, fieldname, nameptr);
+ free(nameptr);
+ mh_xfree(charset);
+ return NOTOK;
+ }
+
+ dp = vp;
+ }
+
+ /*
+ * At this point vp should be pointing at the beginning
+ * of the encoded value/section. Continue until we reach
+ * the end or get whitespace. But first, calculate the
+ * length so we can allocate the correct buffer size.
+ */
+
+ for (vp = dp, len = 0; istoken(*vp); vp++) {
+ if (*vp == '%') {
+ if (*(vp + 1) == '\0' ||
+ !isxdigit((unsigned char) *(vp + 1)) ||
+ *(vp + 2) == '\0' ||
+ !isxdigit((unsigned char) *(vp + 2))) {
+ inform("invalid encoded sequence in message %s's %s: field\n"
+ " (parameter %s)", filename, fieldname, nameptr);
+ free(nameptr);
+ mh_xfree(charset);
+ mh_xfree(lang);
+ return NOTOK;
+ }
+ vp += 2;
+ }
+ len++;
+ }
+
+ up = valptr = mh_xmalloc(len + 1);
+
+ for (vp = dp; istoken(*vp); vp++) {
+ if (*vp == '%') {
+ *up++ = decode_qp(*(vp + 1), *(vp + 2));
+ vp += 2;
+ } else {
+ *up++ = *vp;
+ }
+ }
+
+ *up = '\0';
+ cp = vp;
+ } else {
+ /*
+ * A "normal" string. If it's got a leading quote, then we
+ * strip the quotes out. Otherwise go until we reach the end
+ * or get whitespace. Note we scan it twice; once to get the
+ * length, then the second time copies it into the destination
+ * buffer.
+ */
+
+ len = 0;
+
+ if (*dp == '"') {
+ for (cp = dp + 1;;) {
+ switch (*cp++) {
+ case '\0':
+bad_quote:
+ inform("invalid quoted-string in message %s's %s: field\n"
+ " (parameter %s)", filename, fieldname, nameptr);
+ free(nameptr);
+ mh_xfree(charset);
+ mh_xfree(lang);
+ return NOTOK;
+ case '"':
+ break;
+
+ case '\\':
+ if (*++cp == '\0')
+ goto bad_quote;
+ /* FALLTHRU */
+ default:
+ len++;
+ continue;
+ }
+ break;
+ }
+
+ } else {
+ for (cp = dp; istoken (*cp); cp++) {
+ len++;
+ }
+ }
+
+ valptr = mh_xmalloc(len + 1);
+
+ if (*dp == '"') {
+ int i;
+ for (cp = dp + 1, vp = valptr, i = 0; i < len; i++) {
+ if (*cp == '\\') {
+ cp++;
+ }
+ *vp++ = *cp++;
+ }
+ cp++;
+ } else {
+ strncpy(valptr, cp = dp, len);
+ cp += len;
+ }
+
+ valptr[len] = '\0';
+ }
+
+ /*
+ * If 'partial' is set, we don't allocate a parameter now. We
+ * put it on the parameter linked list to be reassembled later.
+ *
+ * "phead" points to a list of all parameters we need to reassemble.
+ * Each parameter has a list of sections. We insert the sections in
+ * order.
+ */
+
+ if (partial) {
+ for (pp = phead; pp != NULL; pp = pp->next) {
+ if (strcasecmp(nameptr, pp->name) == 0) {
+ free (nameptr);
+ nameptr = pp->name;
+ break;
+ }
+ }
+
+ if (pp == NULL) {
+ NEW0(pp);
+ pp->name = nameptr;
+ pp->next = phead;
+ phead = pp;
+ }
+
+ /*
+ * Insert this into the section linked list
+ */
+
+ NEW0(sp);
+ sp->value = valptr;
+ sp->index = index;
+ sp->len = len;
+
+ if (pp->sechead == NULL || pp->sechead->index > index) {
+ sp->next = pp->sechead;
+ pp->sechead = sp;
+ } else {
+ for (sp2 = pp->sechead; sp2 != NULL; sp2 = sp2->next) {
+ if (sp2->index == sp->index) {
+ inform("duplicate index (%d) in message %s's %s: field"
+ "\n (parameter %s)", sp->index, filename,
+ fieldname, nameptr);
+ return NOTOK;
+ }
+ if (sp2->index < sp->index &&
+ (sp2->next == NULL || sp2->next->index > sp->index)) {
+ sp->next = sp2->next;
+ sp2->next = sp;
+ break;
+ }
+ }
+
+ if (sp2 == NULL) {
+ inform("Internal error: cannot insert partial param "
+ "in message %s's %s: field\n (parameter %s)",
+ filename, fieldname, nameptr);
+ return NOTOK;
+ }
+ }
+
+ /*
+ * Save our charset and lang tags.
+ */
+
+ if (index == 0 && encoded) {
+ mh_xfree(pp->charset);
+ pp->charset = charset;
+ mh_xfree(pp->lang);
+ pp->lang = lang;
+ }
+ } else {
+ pm = add_param(param_head, param_tail, nameptr, valptr, 1);
+ pm->pm_charset = charset;
+ pm->pm_lang = lang;
+ }
+
+ while (isspace ((unsigned char) *cp))
+ cp++;
+
+ if (*cp == '(' &&
+ get_comment (filename, fieldname, &cp, commentp) == NOTOK) {
+ return NOTOK;
+ }
+ }
+
+ /*
+ * Now that we're done, reassemble all of the partial parameters.
+ */
+
+ for (pp = phead; pp != NULL; ) {
+ char *p, *q;
+ size_t tlen = 0;
+ int pindex = 0;
+ for (sp = pp->sechead; sp != NULL; sp = sp->next) {
+ if (sp->index != pindex++) {
+ inform("missing section %d for parameter in message "
+ "%s's %s: field\n (parameter %s)", pindex - 1,
+ filename, fieldname, pp->name);
+ return NOTOK;
+ }
+ tlen += sp->len;
+ }
+
+ p = q = mh_xmalloc(tlen + 1);
+ for (sp = pp->sechead; sp != NULL; ) {
+ memcpy(q, sp->value, sp->len);
+ q += sp->len;
+ free(sp->value);
+ sp2 = sp->next;
+ free(sp);
+ sp = sp2;
+ }
+
+ p[tlen] = '\0';
+
+ pm = add_param(param_head, param_tail, pp->name, p, 1);
+ pm->pm_charset = pp->charset;
+ pm->pm_lang = pp->lang;
+ pp2 = pp->next;
+ free(pp);
+ pp = pp2;
+ }
+
+ *header_attrp = cp;
+ return OK;
+}
+
+/*
+ * Return the charset for a particular content type.
+ */
+
+char *
+content_charset (CT ct) {
+ char *ret_charset = NULL;
+
+ ret_charset = get_param(ct->c_ctinfo.ci_first_pm, "charset", '?', 0);
+
+ return ret_charset ? ret_charset : mh_xstrdup("US-ASCII");
+}
+
+
+/*
+ * Create a string based on a list of output parameters. Assume that this
+ * parameter string will be appended to an existing header, so start out
+ * with the separator (;). Perform RFC 2231 encoding when necessary.
+ */
+
+char *
+output_params(size_t initialwidth, PM params, int *offsetout, int external)
+{
+ char *paramout = NULL;
+ char line[CPERLIN * 2], *q;
+ int curlen, index, cont, encode, i;
+ size_t valoff, numchars;
+
+ while (params != NULL) {
+ encode = 0;
+ index = 0;
+ valoff = 0;
+ q = line;
+
+ if (external && strcasecmp(params->pm_name, "body") == 0)
+ continue;
+
+ if (strlen(params->pm_name) > CPERLIN) {
+ inform("Parameter name \"%s\" is too long", params->pm_name);
+ mh_xfree(paramout);
+ return NULL;
+ }
+
+ curlen = param_len(params, index, valoff, &encode, &cont, &numchars);
+
+ /*
+ * Loop until we get a parameter that fits within a line. We
+ * assume new lines start with a tab, so check our overflow based
+ * on that.
+ */
+
+ while (cont) {
+ *q++ = ';';
+ *q++ = '\n';
+ *q++ = '\t';
+
+ /*
+ * At this point we're definitely continuing the line, so
+ * be sure to include the parameter name and section index.
+ */
+
+ q += snprintf(q, sizeof(line) - (q - line), "%s*%d",
+ params->pm_name, index);
+
+ /*
+ * Both of these functions do a NUL termination
+ */
+
+ if (encode)
+ i = encode_param(params, q, sizeof(line) - (q - line),
+ numchars, valoff, index);
+ else
+ i = normal_param(params, q, sizeof(line) - (q - line),
+ numchars, valoff);
+
+ if (i == 0) {
+ mh_xfree(paramout);
+ return NULL;
+ }
+
+ valoff += numchars;
+ index++;
+ curlen = param_len(params, index, valoff, &encode, &cont,
+ &numchars);
+ q = line;
+
+ /*
+ * "line" starts with a ;\n\t, so that doesn't count against
+ * the length. But add 8 since it starts with a tab; that's
+ * how we end up with 5.
+ */
+
+ initialwidth = strlen(line) + 5;
+
+ /*
+ * At this point the line should be built, so add it to our
+ * current output buffer.
+ */
+
+ paramout = add(line, paramout);
+ }
+
+ /*
+ * If this won't fit on the line, start a new one. Save room in
+ * case we need a semicolon on the end
+ */
+
+ if (initialwidth + curlen > CPERLIN - 1) {
+ *q++ = ';';
+ *q++ = '\n';
+ *q++ = '\t';
+ initialwidth = 8;
+ } else {
+ *q++ = ';';
+ *q++ = ' ';
+ initialwidth += 2;
+ }
+
+ /*
+ * At this point, we're either finishing a continued parameter, or
+ * we're working on a new one.
+ */
+
+ if (index > 0) {
+ q += snprintf(q, sizeof(line) - (q - line), "%s*%d",
+ params->pm_name, index);
+ } else {
+ strncpy(q, params->pm_name, sizeof(line) - (q - line));
+ q += strlen(q);
+ }
+
+ if (encode)
+ i = encode_param(params, q, sizeof(line) - (q - line),
+ strlen(params->pm_value + valoff), valoff, index);
+ else
+ i = normal_param(params, q, sizeof(line) - (q - line),
+ strlen(params->pm_value + valoff), valoff);
+
+ if (i == 0) {
+ mh_xfree(paramout);
+ return NULL;
+ }
+
+ paramout = add(line, paramout);
+ initialwidth += strlen(line);
+
+ params = params->pm_next;
+ }
+
+ if (offsetout)
+ *offsetout = initialwidth;
+
+ return paramout;
+}
+
+/*
+ * Calculate the size of a parameter.
+ *
+ * Arguments include
+ *
+ * pm - The parameter being output
+ * index - If continuing the parameter, the index of the section
+ * we're on.
+ * valueoff - The current offset into the parameter value that we're
+ * working on (previous sections have consumed valueoff bytes).
+ * encode - Set if we should perform encoding on this parameter section
+ * (given that we're consuming bytesfit bytes).
+ * cont - Set if the remaining data in value will not fit on a single
+ * line and will need to be continued.
+ * bytesfit - The number of bytes that we can consume from the parameter
+ * value and still fit on a completely new line. The
+ * calculation assumes the new line starts with a tab,
+ * includes the parameter name and any encoding, and fits
+ * within CPERLIN bytes. Will always be at least 1.
+ */
+
+static size_t
+param_len(PM pm, int index, size_t valueoff, int *encode, int *cont,
+ size_t *bytesfit)
+{
+ char *start = pm->pm_value + valueoff, *p, indexchar[32];
+ size_t len = 0, fit = 0;
+ int fitlimit = 0, eightbit, maxfit;
+
+ *encode = 0;
+
+ /*
+ * Add up the length. First, start with the parameter name.
+ */
+
+ len = strlen(pm->pm_name);
+
+ /*
+ * Scan the parameter value and see if we need to do encoding for this
+ * section.
+ */
+
+ eightbit = contains8bit(start, NULL);
+
+ /*
+ * Determine if we need to encode this section. Encoding is necessary if:
+ *
+ * - There are any 8-bit characters at all and we're on the first
+ * section.
+ * - There are 8-bit characters within N bytes of our section start.
+ * N is calculated based on the number of bytes it would take to
+ * reach CPERLIN. Specifically:
+ * 8 (starting tab) +
+ * strlen(param name) +
+ * 4 ('* for section marker, '=', opening/closing '"')
+ * strlen (index)
+ * is the number of bytes used by everything that isn't part of the
+ * value. So that gets subtracted from CPERLIN.
+ */
+
+ snprintf(indexchar, sizeof(indexchar), "%d", index);
+ maxfit = CPERLIN - (12 + len + strlen(indexchar));
+ if ((eightbit && index == 0) || contains8bit(start, start + maxfit)) {
+ *encode = 1;
+ }
+
+ len++; /* Add in equal sign */
+
+ if (*encode) {
+ /*
+ * We're using maxfit as a marker for how many characters we can
+ * fit into the line. Bump it by two because we're not using quotes
+ * when encoding.
+ */
+
+ maxfit += 2;
+
+ /*
+ * If we don't have a charset or language tag in this parameter,
+ * add them now.
+ */
+
+ if (! pm->pm_charset) {
+ pm->pm_charset = mh_xstrdup(write_charset_8bit());
+ if (strcasecmp(pm->pm_charset, "US-ASCII") == 0)
+ adios(NULL, "8-bit characters in parameter \"%s\", but "
+ "local character set is US-ASCII", pm->pm_name);
+ }
+ if (! pm->pm_lang)
+ pm->pm_lang = mh_xstrdup(""); /* Default to a blank lang tag */
+
+ len++; /* For the encoding marker */
+ maxfit--;
+ if (index == 0) {
+ int enclen = strlen(pm->pm_charset) + strlen(pm->pm_lang) + 2;
+ len += enclen;
+ maxfit-= enclen;
+ } else {
+ /*
+ * We know we definitely need to include an index. maxfit already
+ * includes the section marker.
+ */
+ len += strlen(indexchar);
+ }
+ for (p = start; *p != '\0'; p++) {
+ if (isparamencode(*p)) {
+ len += 3;
+ maxfit -= 3;
+ } else {
+ len++;
+ maxfit--;
+ }
+ /*
+ * Just so there's no confusion: maxfit is counting OUTPUT
+ * characters (post-encoding). fit is counting INPUT characters.
+ */
+ if (! fitlimit && maxfit >= 0)
+ fit++;
+ else if (! fitlimit)
+ fitlimit++;
+ }
+ } else {
+ /*
+ * Calculate the string length, but add room for quoting \
+ * and " if necessary. Also account for quotes at beginning
+ * and end.
+ */
+ for (p = start; *p != '\0'; p++) {
+ switch (*p) {
+ case '"':
+ case '\\':
+ len++;
+ maxfit--;
+ /* FALLTHRU */
+ default:
+ len++;
+ maxfit--;
+ }
+ if (! fitlimit && maxfit >= 0)
+ fit++;
+ else if (! fitlimit)
+ fitlimit++;
+ }
+
+ len += 2;
+ }
+
+ if (fit < 1)
+ fit = 1;
+
+ *cont = fitlimit;
+ *bytesfit = fit;
+
+ return len;
+}
+
+/*
+ * Output an encoded parameter string.
+ */
+
+size_t
+encode_param(PM pm, char *output, size_t len, size_t valuelen,
+ size_t valueoff, int index)
+{
+ size_t outlen = 0, n;
+ char *endptr = output + len, *p;
+
+ /*
+ * First, output the marker for an encoded string.
+ */
+
+ *output++ = '*';
+ *output++ = '=';
+ outlen += 2;
+
+ /*
+ * If the index is 0, output the character set and language tag.
+ * If theses were NULL, they should have already been filled in
+ * by param_len().
+ */
+
+ if (index == 0) {
+ n = snprintf(output, len - outlen, "%s'%s'", pm->pm_charset,
+ pm->pm_lang);
+ output += n;
+ outlen += n;
+ if (output > endptr) {
+ inform("Internal error: parameter buffer overflow");
+ return 0;
+ }
+ }
+
+ /*
+ * Copy over the value, encoding if necessary
+ */
+
+ p = pm->pm_value + valueoff;
+ while (valuelen-- > 0) {
+ if (isparamencode(*p)) {
+ n = snprintf(output, len - outlen, "%%%02X", (unsigned char) *p++);
+ output += n;
+ outlen += n;
+ } else {
+ *output++ = *p++;
+ outlen++;
+ }
+ if (output > endptr) {
+ inform("Internal error: parameter buffer overflow");
+ return 0;
+ }
+ }
+
+ *output = '\0';
+
+ return outlen;
+}
+
+/*
+ * Output a "normal" parameter, without encoding. Be sure to escape
+ * quotes and backslashes if necessary.
+ */
+
+static size_t
+normal_param(PM pm, char *output, size_t len, size_t valuelen,
+ size_t valueoff)
+{
+ size_t outlen = 0;
+ char *endptr = output + len, *p;
+
+ *output++ = '=';
+ *output++ = '"';
+ outlen += 2;
+
+ p = pm->pm_value + valueoff;
+
+ while (valuelen-- > 0) {
+ switch (*p) {
+ case '\\':
+ case '"':
+ *output++ = '\\';
+ outlen++;
+ /* FALLTHRU */
+ default:
+ *output++ = *p++;
+ outlen++;
+ }
+ if (output > endptr) {
+ inform("Internal error: parameter buffer overflow");
+ return 0;
+ }
+ }
+
+ if (output - 2 > endptr) {
+ inform("Internal error: parameter buffer overflow");
+ return 0;
+ }
+
+ *output++ = '"';
+ *output++ = '\0';
+
+ return outlen + 1;
+}
+
+/*
+ * Add a parameter to the parameter linked list
+ */
+
+PM
+add_param(PM *first, PM *last, char *name, char *value, int nocopy)
+{
+ PM pm;
+
+ NEW0(pm);
+ pm->pm_name = nocopy ? name : getcpy(name);
+ pm->pm_value = nocopy ? value : getcpy(value);
+
+ if (*first) {
+ (*last)->pm_next = pm;
+ *last = pm;
+ } else {
+ *first = pm;
+ *last = pm;
+ }
+
+ return pm;
+}
+
+/*
+ * Either replace a current parameter with a new value, or add the parameter
+ * to the parameter linked list.
+ */
+
+PM
+replace_param(PM *first, PM *last, char *name, char *value, int nocopy)
+{
+ PM pm;
+
+ for (pm = *first; pm != NULL; pm = pm->pm_next) {
+ if (strcasecmp(name, pm->pm_name) == 0) {
+ /*
+ * If nocopy is set, it's assumed that we own both name
+ * and value. We don't need name, so we discard it now.
+ */
+ if (nocopy)
+ free(name);
+ free(pm->pm_value);
+ pm->pm_value = nocopy ? value : getcpy(value);
+ return pm;
+ }
+ }
+
+ return add_param(first, last, name, value, nocopy);
+}
+
+/*
+ * Retrieve a parameter value from a parameter linked list. If the parameter
+ * value needs converted to the local character set, do that now.
+ */
+
+char *
+get_param(PM first, const char *name, char replace, int fetchonly)
+{
+ while (first != NULL) {
+ if (strcasecmp(name, first->pm_name) == 0) {
+ if (fetchonly)
+ return first->pm_value;
+ return getcpy(get_param_value(first, replace));
+ }
+ first = first->pm_next;
+ }
+
+ return NULL;
+}
+
+/*
+ * Return a parameter value, converting to the local character set if
+ * necessary
+ */
+
+char *get_param_value(PM pm, char replace)
+{
+ static char buffer[4096]; /* I hope no parameters are larger */
+ size_t bufsize = sizeof(buffer);
+#ifdef HAVE_ICONV
+ size_t inbytes;
+ int utf8;
+ iconv_t cd;
+ ICONV_CONST char *p;
+#else /* HAVE_ICONV */
+ char *p;
+#endif /* HAVE_ICONV */
+
+ char *q;
+
+ /*
+ * If we don't have a character set indicated, it's assumed to be
+ * US-ASCII. If it matches our character set, we don't need to convert
+ * anything.
+ */
+
+ if (!pm->pm_charset || check_charset(pm->pm_charset,
+ strlen(pm->pm_charset))) {
+ return pm->pm_value;
+ }
+
+ /*
+ * In this case, we need to convert. If we have iconv support, use
+ * that. Otherwise, go through and simply replace every non-ASCII
+ * character with the substitution character.
+ */
+
+#ifdef HAVE_ICONV
+ q = buffer;
+ bufsize = sizeof(buffer);
+ utf8 = strcasecmp(pm->pm_charset, "UTF-8") == 0;
+
+ cd = iconv_open(get_charset(), pm->pm_charset);
+ if (cd == (iconv_t) -1) {
+ goto noiconv;
+ }
+
+ inbytes = strlen(pm->pm_value);
+ p = pm->pm_value;
+
+ while (inbytes) {
+ if (iconv(cd, &p, &inbytes, &q, &bufsize) == (size_t)-1) {
+ if (errno != EILSEQ) {
+ iconv_close(cd);
+ goto noiconv;
+ }
+ /*
+ * Reset shift state, substitute our character,
+ * try to restart conversion.
+ */
+
+ iconv(cd, NULL, NULL, &q, &bufsize);
+
+ if (bufsize == 0) {
+ iconv_close(cd);
+ goto noiconv;
+ }
+ *q++ = replace;
+ bufsize--;
+ if (bufsize == 0) {
+ iconv_close(cd);
+ goto noiconv;
+ }
+ if (utf8) {
+ for (++p, --inbytes;
+ inbytes > 0 && (((unsigned char) *p) & 0xc0) == 0x80;
+ ++p, --inbytes)
+ continue;
+ } else {
+ p++;
+ inbytes--;
+ }
+ }
+ }
+
+ iconv_close(cd);
+
+ if (bufsize == 0)
+ q--;
+ *q = '\0';
+
+ return buffer;
+
+noiconv:
+#endif /* HAVE_ICONV */
+
+ /*
+ * Take everything non-ASCII and substitute the replacement character
+ */
+
+ q = buffer;
+ bufsize = sizeof(buffer);
+ for (p = pm->pm_value; *p != '\0' && bufsize > 1; p++, q++, bufsize--) {
+ if (isascii((unsigned char) *p) && isprint((unsigned char) *p))
+ *q = *p;
+ else
+ *q = replace;
+ }
+
+ *q = '\0';
+
+ return buffer;
+}