+
+/*
+ * Return the charset for a particular content type.
+ */
+
+char *
+content_charset (CT ct) {
+ char *ret_charset = NULL;
+
+ ret_charset = get_param(ct->c_ctinfo.ci_first_pm, "charset", '?', 0);
+
+ return ret_charset ? ret_charset : mh_xstrdup("US-ASCII");
+}
+
+
+/*
+ * Create a string based on a list of output parameters. Assume that this
+ * parameter string will be appended to an existing header, so start out
+ * with the separator (;). Perform RFC 2231 encoding when necessary.
+ */
+
+char *
+output_params(size_t initialwidth, PM params, int *offsetout, int external)
+{
+ char *paramout = NULL;
+ char line[CPERLIN * 2], *q;
+ int curlen, index, cont, encode, i;
+ size_t valoff, numchars;
+
+ while (params != NULL) {
+ encode = 0;
+ index = 0;
+ valoff = 0;
+ q = line;
+
+ if (external && strcasecmp(params->pm_name, "body") == 0)
+ continue;
+
+ if (strlen(params->pm_name) > CPERLIN) {
+ advise(NULL, "Parameter name \"%s\" is too long", params->pm_name);
+ mh_xfree(paramout);
+ return NULL;
+ }
+
+ curlen = param_len(params, index, valoff, &encode, &cont, &numchars);
+
+ /*
+ * Loop until we get a parameter that fits within a line. We
+ * assume new lines start with a tab, so check our overflow based
+ * on that.
+ */
+
+ while (cont) {
+ *q++ = ';';
+ *q++ = '\n';
+ *q++ = '\t';
+
+ /*
+ * At this point we're definitely continuing the line, so
+ * be sure to include the parameter name and section index.
+ */
+
+ q += snprintf(q, sizeof(line) - (q - line), "%s*%d",
+ params->pm_name, index);
+
+ /*
+ * Both of these functions do a NUL termination
+ */
+
+ if (encode)
+ i = encode_param(params, q, sizeof(line) - (q - line),
+ numchars, valoff, index);
+ else
+ i = normal_param(params, q, sizeof(line) - (q - line),
+ numchars, valoff);
+
+ if (i == 0) {
+ mh_xfree(paramout);
+ return NULL;
+ }
+
+ valoff += numchars;
+ index++;
+ curlen = param_len(params, index, valoff, &encode, &cont,
+ &numchars);
+ q = line;
+
+ /*
+ * "line" starts with a ;\n\t, so that doesn't count against
+ * the length. But add 8 since it starts with a tab; that's
+ * how we end up with 5.
+ */
+
+ initialwidth = strlen(line) + 5;
+
+ /*
+ * At this point the line should be built, so add it to our
+ * current output buffer.
+ */
+
+ paramout = add(line, paramout);
+ }
+
+ /*
+ * If this won't fit on the line, start a new one. Save room in
+ * case we need a semicolon on the end
+ */
+
+ if (initialwidth + curlen > CPERLIN - 1) {
+ *q++ = ';';
+ *q++ = '\n';
+ *q++ = '\t';
+ initialwidth = 8;
+ } else {
+ *q++ = ';';
+ *q++ = ' ';
+ initialwidth += 2;
+ }
+
+ /*
+ * At this point, we're either finishing a contined parameter, or
+ * we're working on a new one.
+ */
+
+ if (index > 0) {
+ q += snprintf(q, sizeof(line) - (q - line), "%s*%d",
+ params->pm_name, index);
+ } else {
+ strncpy(q, params->pm_name, sizeof(line) - (q - line));
+ q += strlen(q);
+ }
+
+ if (encode)
+ i = encode_param(params, q, sizeof(line) - (q - line),
+ strlen(params->pm_value + valoff), valoff, index);
+ else
+ i = normal_param(params, q, sizeof(line) - (q - line),
+ strlen(params->pm_value + valoff), valoff);
+
+ if (i == 0) {
+ mh_xfree(paramout);
+ return NULL;
+ }
+
+ paramout = add(line, paramout);
+ initialwidth += strlen(line);
+
+ params = params->pm_next;
+ }
+
+ if (offsetout)
+ *offsetout = initialwidth;
+
+ return paramout;
+}
+
+/*
+ * Calculate the size of a parameter.
+ *
+ * Arguments include
+ *
+ * pm - The parameter being output
+ * index - If continuing the parameter, the index of the section
+ * we're on.
+ * valueoff - The current offset into the parameter value that we're
+ * working on (previous sections have consumed valueoff bytes).
+ * encode - Set if we should perform encoding on this parameter section
+ * (given that we're consuming bytesfit bytes).
+ * cont - Set if the remaining data in value will not fit on a single
+ * line and will need to be continued.
+ * bytesfit - The number of bytes that we can consume from the parameter
+ * value and still fit on a completely new line. The
+ * calculation assumes the new line starts with a tab,
+ * includes the parameter name and any encoding, and fits
+ * within CPERLIN bytes. Will always be at least 1.
+ */
+
+static size_t
+param_len(PM pm, int index, size_t valueoff, int *encode, int *cont,
+ size_t *bytesfit)
+{
+ char *start = pm->pm_value + valueoff, *p, indexchar[32];
+ size_t len = 0, fit = 0;
+ int fitlimit = 0, eightbit, maxfit;
+
+ *encode = 0;
+
+ /*
+ * Add up the length. First, start with the parameter name.
+ */
+
+ len = strlen(pm->pm_name);
+
+ /*
+ * Scan the parameter value and see if we need to do encoding for this
+ * section.
+ */
+
+ eightbit = contains8bit(start, NULL);
+
+ /*
+ * Determine if we need to encode this section. Encoding is necessary if:
+ *
+ * - There are any 8-bit characters at all and we're on the first
+ * section.
+ * - There are 8-bit characters within N bytes of our section start.
+ * N is calculated based on the number of bytes it would take to
+ * reach CPERLIN. Specifically:
+ * 8 (starting tab) +
+ * strlen(param name) +
+ * 4 ('* for section marker, '=', opening/closing '"')
+ * strlen (index)
+ * is the number of bytes used by everything that isn't part of the
+ * value. So that gets subtracted from CPERLIN.
+ */
+
+ snprintf(indexchar, sizeof(indexchar), "%d", index);
+ maxfit = CPERLIN - (12 + len + strlen(indexchar));
+ if ((eightbit && index == 0) || contains8bit(start, start + maxfit)) {
+ *encode = 1;
+ }
+
+ len++; /* Add in equal sign */
+
+ if (*encode) {
+ /*
+ * We're using maxfit as a marker for how many characters we can
+ * fit into the line. Bump it by two because we're not using quotes
+ * when encoding.
+ */
+
+ maxfit += 2;
+
+ /*
+ * If we don't have a charset or language tag in this parameter,
+ * add them now.
+ */
+
+ if (! pm->pm_charset) {
+ pm->pm_charset = mh_xstrdup(write_charset_8bit());
+ if (strcasecmp(pm->pm_charset, "US-ASCII") == 0)
+ adios(NULL, "8-bit characters in parameter \"%s\", but "
+ "local character set is US-ASCII", pm->pm_name);
+ }
+ if (! pm->pm_lang)
+ pm->pm_lang = mh_xstrdup(""); /* Default to a blank lang tag */
+
+ len++; /* For the encoding marker */
+ maxfit--;
+ if (index == 0) {
+ int enclen = strlen(pm->pm_charset) + strlen(pm->pm_lang) + 2;
+ len += enclen;
+ maxfit-= enclen;
+ } else {
+ /*
+ * We know we definitely need to include an index. maxfit already
+ * includes the section marker.
+ */
+ len += strlen(indexchar);
+ }
+ for (p = start; *p != '\0'; p++) {
+ if (isparamencode(*p)) {
+ len += 3;
+ maxfit -= 3;
+ } else {
+ len++;
+ maxfit--;
+ }
+ /*
+ * Just so there's no confusion: maxfit is counting OUTPUT
+ * characters (post-encoding). fit is counting INPUT characters.
+ */
+ if (! fitlimit && maxfit >= 0)
+ fit++;
+ else if (! fitlimit)
+ fitlimit++;
+ }
+ } else {
+ /*
+ * Calculate the string length, but add room for quoting \
+ * and " if necessary. Also account for quotes at beginning
+ * and end.
+ */
+ for (p = start; *p != '\0'; p++) {
+ switch (*p) {
+ case '"':
+ case '\\':
+ len++;
+ maxfit--;
+ /* FALL THROUGH */
+ default:
+ len++;
+ maxfit--;
+ }
+ if (! fitlimit && maxfit >= 0)
+ fit++;
+ else if (! fitlimit)
+ fitlimit++;
+ }
+
+ len += 2;
+ }
+
+ if (fit < 1)
+ fit = 1;
+
+ *cont = fitlimit;
+ *bytesfit = fit;
+
+ return len;
+}
+
+/*
+ * Output an encoded parameter string.
+ */
+
+size_t
+encode_param(PM pm, char *output, size_t len, size_t valuelen,
+ size_t valueoff, int index)
+{
+ size_t outlen = 0, n;
+ char *endptr = output + len, *p;
+
+ /*
+ * First, output the marker for an encoded string.
+ */
+
+ *output++ = '*';
+ *output++ = '=';
+ outlen += 2;
+
+ /*
+ * If the index is 0, output the character set and language tag.
+ * If theses were NULL, they should have already been filled in
+ * by param_len().
+ */
+
+ if (index == 0) {
+ n = snprintf(output, len - outlen, "%s'%s'", pm->pm_charset,
+ pm->pm_lang);
+ output += n;
+ outlen += n;
+ if (output > endptr) {
+ advise(NULL, "Internal error: parameter buffer overflow");
+ return 0;
+ }
+ }
+
+ /*
+ * Copy over the value, encoding if necessary
+ */
+
+ p = pm->pm_value + valueoff;
+ while (valuelen-- > 0) {
+ if (isparamencode(*p)) {
+ n = snprintf(output, len - outlen, "%%%02X", (unsigned char) *p++);
+ output += n;
+ outlen += n;
+ } else {
+ *output++ = *p++;
+ outlen++;
+ }
+ if (output > endptr) {
+ advise(NULL, "Internal error: parameter buffer overflow");
+ return 0;
+ }
+ }
+
+ *output = '\0';
+
+ return outlen;
+}
+
+/*
+ * Output a "normal" parameter, without encoding. Be sure to escape
+ * quotes and backslashes if necessary.
+ */
+
+static size_t
+normal_param(PM pm, char *output, size_t len, size_t valuelen,
+ size_t valueoff)
+{
+ size_t outlen = 0;
+ char *endptr = output + len, *p;
+
+ *output++ = '=';
+ *output++ = '"';
+ outlen += 2;
+
+ p = pm->pm_value + valueoff;
+
+ while (valuelen-- > 0) {
+ switch (*p) {
+ case '\\':
+ case '"':
+ *output++ = '\\';
+ outlen++;
+ default:
+ *output++ = *p++;
+ outlen++;
+ }
+ if (output > endptr) {
+ advise(NULL, "Internal error: parameter buffer overflow");
+ return 0;
+ }
+ }
+
+ if (output - 2 > endptr) {
+ advise(NULL, "Internal error: parameter buffer overflow");
+ return 0;
+ }
+
+ *output++ = '"';
+ *output++ = '\0';
+
+ return outlen + 1;
+}
+
+/*
+ * Add a parameter to the parameter linked list
+ */
+
+PM
+add_param(PM *first, PM *last, char *name, char *value, int nocopy)
+{
+ PM pm;
+
+ NEW0(pm);
+ pm->pm_name = nocopy ? name : getcpy(name);
+ pm->pm_value = nocopy ? value : getcpy(value);
+
+ if (*first) {
+ (*last)->pm_next = pm;
+ *last = pm;
+ } else {
+ *first = pm;
+ *last = pm;
+ }
+
+ return pm;
+}
+
+/*
+ * Either replace a current parameter with a new value, or add the parameter
+ * to the parameter linked list.
+ */
+
+PM
+replace_param(PM *first, PM *last, char *name, char *value, int nocopy)
+{
+ PM pm;
+
+ for (pm = *first; pm != NULL; pm = pm->pm_next) {
+ if (strcasecmp(name, pm->pm_name) == 0) {
+ /*
+ * If nocopy is set, it's assumed that we own both name
+ * and value. We don't need name, so we discard it now.
+ */
+ if (nocopy)
+ free(name);
+ free(pm->pm_value);
+ pm->pm_value = nocopy ? value : getcpy(value);
+ return pm;
+ }
+ }
+
+ return add_param(first, last, name, value, nocopy);
+}
+
+/*
+ * Retrieve a parameter value from a parameter linked list. If the parameter
+ * value needs converted to the local character set, do that now.
+ */
+
+char *
+get_param(PM first, const char *name, char replace, int fetchonly)
+{
+ while (first != NULL) {
+ if (strcasecmp(name, first->pm_name) == 0) {
+ if (fetchonly)
+ return first->pm_value;
+ return getcpy(get_param_value(first, replace));
+ }
+ first = first->pm_next;
+ }
+
+ return NULL;
+}
+
+/*
+ * Return a parameter value, converting to the local character set if
+ * necessary
+ */
+
+char *get_param_value(PM pm, char replace)
+{
+ static char buffer[4096]; /* I hope no parameters are larger */
+ size_t bufsize = sizeof(buffer);
+#ifdef HAVE_ICONV
+ size_t inbytes;
+ int utf8;
+ iconv_t cd;
+ ICONV_CONST char *p;
+#else /* HAVE_ICONV */
+ char *p;
+#endif /* HAVE_ICONV */
+
+ char *q;
+
+ /*
+ * If we don't have a character set indicated, it's assumed to be
+ * US-ASCII. If it matches our character set, we don't need to convert
+ * anything.
+ */
+
+ if (!pm->pm_charset || check_charset(pm->pm_charset,
+ strlen(pm->pm_charset))) {
+ return pm->pm_value;
+ }
+
+ /*
+ * In this case, we need to convert. If we have iconv support, use
+ * that. Otherwise, go through and simply replace every non-ASCII
+ * character with the substitution character.
+ */
+
+#ifdef HAVE_ICONV
+ q = buffer;
+ bufsize = sizeof(buffer);
+ utf8 = strcasecmp(pm->pm_charset, "UTF-8") == 0;
+
+ cd = iconv_open(get_charset(), pm->pm_charset);
+ if (cd == (iconv_t) -1) {
+ goto noiconv;
+ }
+
+ inbytes = strlen(pm->pm_value);
+ p = pm->pm_value;
+
+ while (inbytes) {
+ if (iconv(cd, &p, &inbytes, &q, &bufsize) == (size_t)-1) {
+ if (errno != EILSEQ) {
+ iconv_close(cd);
+ goto noiconv;
+ }
+ /*
+ * Reset shift state, substitute our character,
+ * try to restart conversion.
+ */
+
+ iconv(cd, NULL, NULL, &q, &bufsize);
+
+ if (bufsize == 0) {
+ iconv_close(cd);
+ goto noiconv;
+ }
+ *q++ = replace;
+ bufsize--;
+ if (bufsize == 0) {
+ iconv_close(cd);
+ goto noiconv;
+ }
+ if (utf8) {
+ for (++p, --inbytes;
+ inbytes > 0 && (((unsigned char) *p) & 0xc0) == 0x80;
+ ++p, --inbytes)
+ continue;
+ } else {
+ p++;
+ inbytes--;
+ }
+ }
+ }
+
+ iconv_close(cd);
+
+ if (bufsize == 0)
+ q--;
+ *q = '\0';
+
+ return buffer;
+
+noiconv:
+#endif /* HAVE_ICONV */
+
+ /*
+ * Take everything non-ASCII and substituite the replacement character
+ */
+
+ q = buffer;
+ bufsize = sizeof(buffer);
+ for (p = pm->pm_value; *p != '\0' && bufsize > 1; p++, q++, bufsize--) {
+ /* FIXME: !iscntrl should perhaps be isprint as that allows all
+ * classes bar cntrl, whereas the cntrl class can include those
+ * in space and blank.
+ * http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html */
+ if (isascii((unsigned char) *p) && !iscntrl((unsigned char) *p))
+ *q = *p;
+ else
+ *q = replace;
+ }
+
+ *q = '\0';
+
+ return buffer;
+}