X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/50d4d5ca6977bf7224f5ae0aa31f8ee7964df5a3..7711f3fc00259e55f630cfe6104eff3083dc9d77:/uip/mhparse.c diff --git a/uip/mhparse.c b/uip/mhparse.c index ebfd67d8..00fa61bd 100644 --- a/uip/mhparse.c +++ b/uip/mhparse.c @@ -23,8 +23,6 @@ extern int debugsw; -extern pid_t xpid; /* mhshowsbr.c */ - /* cache policies */ extern int rcachesw; /* mhcachesbr.c */ extern int wcachesw; /* mhcachesbr.c */ @@ -36,10 +34,20 @@ int checksw = 0; /* check Content-MD5 field */ * 1) Instruct parser not to detect invalid Content-Transfer-Encoding * in a multipart. * 2) Suppress the warning about bogus multipart content, and report it. + * 3) Suppress the warning about extraneous trailing ';' in header parameter + * lists, and report it. */ int skip_mp_cte_check; int suppress_bogus_mp_content_warning; int bogus_mp_content; +int suppress_extraneous_trailing_semicolon_warning; +int extraneous_trailing_semicolon; + +/* list of preferred type/subtype pairs, for -prefer */ +char *preferred_types[NPREFS], + *preferred_subtypes[NPREFS]; +int npreferred; + /* * Structures for TEXT messages @@ -61,6 +69,7 @@ struct k2v SubMultiPart[] = { { "alternative", MULTI_ALTERNATE }, { "digest", MULTI_DIGEST }, { "parallel", MULTI_PARALLEL }, + { "related", MULTI_RELATED }, { NULL, MULTI_UNKNOWN } /* this one must be last! */ }; @@ -101,7 +110,7 @@ static struct k2v EncodingType[] = { int find_cache (CT, int, int *, char *, char *, int); /* mhmisc.c */ -int part_ok (CT, int); +int part_ok (CT); int type_ok (CT, int); void content_error (char *, CT, char *, ...); @@ -117,7 +126,8 @@ static int get_comment (const char *, const char *, char **, char **); static int InitGeneric (CT); static int InitText (CT); static int InitMultiPart (CT); -void reverse_parts (CT); +static void reverse_parts (CT); +static void prefer_parts(CT ct); static int InitMessage (CT); static int InitApplication (CT); static int init_encoding (CT, OpenCEFunc); @@ -183,19 +193,6 @@ struct str2init str2methods[] = { }; -int -pidcheck (int status) -{ - if ((status & 0xff00) == 0xff00 || (status & 0x007f) != SIGQUIT) - return status; - - fflush (stdout); - fflush (stderr); - done (1); - return 1; -} - - /* * Main entry point for parsing a MIME message or file. * It returns the Content structure for the top level @@ -209,6 +206,7 @@ parse_mime (char *file) char buffer[BUFSIZ]; FILE *fp; CT ct; + size_t n; /* * Check if file is actually standard input @@ -222,8 +220,13 @@ parse_mime (char *file) } file = add (tfile, NULL); - while (fgets (buffer, sizeof(buffer), stdin)) - fputs (buffer, fp); + while ((n = fread(buffer, 1, sizeof(buffer), stdin)) > 0) { + if (fwrite(buffer, 1, n, fp) != n) { + (void) m_unlink (file); + advise (file, "error copying to temporary file"); + return NULL; + } + } fflush (fp); if (ferror (stdin)) { @@ -292,7 +295,7 @@ get_content (FILE *in, char *file, int toplevel) m_getfld_state_t gstate = 0; /* allocate the content structure */ - if (!(ct = (CT) calloc (1, sizeof(*ct)))) + if (!(ct = (CT) mh_xcalloc (1, sizeof(*ct)))) adios (NULL, "out of memory"); ct->c_fp = in; @@ -838,7 +841,7 @@ magic_skip: if (ct->c_dispo_type && !get_param(ct->c_dispo_first, "filename", '_', 1)) { add_param(&ct->c_dispo_first, &ct->c_dispo_last, "filename", - r1bindex(ci->ci_magic, '/')); + r1bindex(ci->ci_magic, '/'), 0); } } else @@ -1012,7 +1015,6 @@ InitText (CT ct) char *chset = NULL; char *cp; PM pm; - struct k2v *kv; struct text *t; CI ci = &ct->c_ctinfo; @@ -1021,13 +1023,10 @@ InitText (CT ct) ci->ci_subtype = add ("plain", ci->ci_subtype); /* match subtype */ - for (kv = SubText; kv->kv_key; kv++) - if (!strcasecmp (ci->ci_subtype, kv->kv_key)) - break; - ct->c_subtype = kv->kv_value; + ct->c_subtype = ct_str_subtype (CT_TEXT, ci->ci_subtype); /* allocate text character set structure */ - if ((t = (struct text *) calloc (1, sizeof(*t))) == NULL) + if ((t = (struct text *) mh_xcalloc (1, sizeof(*t))) == NULL) adios (NULL, "out of memory"); ct->c_ctparams = (void *) t; @@ -1072,9 +1071,11 @@ InitMultiPart (CT ct) long last, pos; char *cp, *dp; PM pm; - char *bp, buffer[BUFSIZ]; + char *bp; + char *bufp = NULL; + size_t buflen; + ssize_t gotlen; struct multipart *m; - struct k2v *kv; struct part *part, **next; CI ci = &ct->c_ctinfo; CT p; @@ -1106,10 +1107,7 @@ InitMultiPart (CT ct) } /* match subtype */ - for (kv = SubMultiPart; kv->kv_key; kv++) - if (!strcasecmp (ci->ci_subtype, kv->kv_key)) - break; - ct->c_subtype = kv->kv_value; + ct->c_subtype = ct_str_subtype (CT_MULTIPART, ci->ci_subtype); /* * Check for "boundary" parameter, which is @@ -1132,7 +1130,7 @@ InitMultiPart (CT ct) } /* allocate primary structure for multipart info */ - if ((m = (struct multipart *) calloc (1, sizeof(*m))) == NULL) + if ((m = (struct multipart *) mh_xcalloc (1, sizeof(*m))) == NULL) adios (NULL, "out of memory"); ct->c_ctparams = (void *) m; @@ -1166,24 +1164,25 @@ InitMultiPart (CT ct) part = NULL; inout = 1; - while (fgets (buffer, sizeof(buffer) - 1, fp)) { + while ((gotlen = getline(&bufp, &buflen, fp)) != -1) { if (pos > last) break; - pos += strlen (buffer); - if (buffer[0] != '-' || buffer[1] != '-') + pos += gotlen; + if (bufp[0] != '-' || bufp[1] != '-') continue; if (inout) { - if (strcmp (buffer + 2, m->mp_start)) + if (strcmp (bufp + 2, m->mp_start)) continue; next_part: - if ((part = (struct part *) calloc (1, sizeof(*part))) == NULL) + if ((part = (struct part *) mh_xcalloc (1, sizeof(*part))) == NULL) adios (NULL, "out of memory"); *next = part; next = &part->mp_next; if (!(p = get_content (fp, ct->c_file, ct->c_subtype == MULTI_DIGEST ? -1 : 0))) { + free(bufp); ct->c_fp = NULL; return NOTOK; } @@ -1193,18 +1192,18 @@ next_part: fseek (fp, pos, SEEK_SET); inout = 0; } else { - if (strcmp (buffer + 2, m->mp_start) == 0) { + if (strcmp (bufp + 2, m->mp_start) == 0) { inout = 1; end_part: p = part->mp_part; - p->c_end = ftell(fp) - (strlen(buffer) + 1); + p->c_end = ftell(fp) - (gotlen + 1); if (p->c_end < p->c_begin) p->c_begin = p->c_end; if (inout) goto next_part; goto last_part; } else { - if (strcmp (buffer + 2, m->mp_stop) == 0) + if (strcmp (bufp + 2, m->mp_stop) == 0) goto end_part; } } @@ -1231,8 +1230,10 @@ end_part: last_part: /* reverse the order of the parts for multipart/alternative */ - if (ct->c_subtype == MULTI_ALTERNATE) + if (ct->c_subtype == MULTI_ALTERNATE) { reverse_parts (ct); + prefer_parts (ct); + } /* * label all subparts with part number, and @@ -1259,6 +1260,7 @@ last_part: /* initialize the content of the subparts */ if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) { + free(bufp); fclose (ct->c_fp); ct->c_fp = NULL; return NOTOK; @@ -1269,6 +1271,7 @@ last_part: get_leftover_mp_content (ct, 1); get_leftover_mp_content (ct, 0); + free(bufp); fclose (ct->c_fp); ct->c_fp = NULL; return OK; @@ -1276,10 +1279,16 @@ last_part: /* - * reverse the order of the parts of a multipart/alternative + * reverse the order of the parts of a multipart/alternative, + * presumably to put the "most favored" alternative first, for + * ease of choosing/displaying it later on. from a mail message on + * nmh-workers, from kenh: + * "Stock" MH 6.8.5 did not have a reverse_parts() function, but I + * see code in mhn that did the same thing... Acccording to the RCS + * logs, that code was around from the initial checkin of mhn.c by + * John Romine in 1992, which is as far back as we have." */ - -void +static void reverse_parts (CT ct) { struct multipart *m = (struct multipart *) ct->c_ctparams; @@ -1295,6 +1304,84 @@ reverse_parts (CT ct) } } +static void +move_preferred_part (CT ct, char *type, char *subtype) +{ + struct multipart *m = (struct multipart *) ct->c_ctparams; + struct part *part, *next, *prev, *head, *nhead, *ntail; + struct part h, n; + CI ci; + + /* move the matching part(s) to the head of the list: walk the + * list of parts, move matching parts to a new list (maintaining + * their order), and finally, concatenate the old list onto the + * new. + */ + + head = &h; + nhead = &n; + + head->mp_next = m->mp_parts; + nhead->mp_next = NULL; + ntail = nhead; + + prev = head; + part = head->mp_next; + while (part != NULL) { + ci = &part->mp_part->c_ctinfo; + if (!strcasecmp(ci->ci_type, type) && + (!subtype || !strcasecmp(ci->ci_subtype, subtype))) { + prev->mp_next = part->mp_next; + part->mp_next = NULL; + ntail->mp_next = part; + ntail = part; + part = prev->mp_next; + } else { + prev = part; + part = prev->mp_next; + } + } + ntail->mp_next = head->mp_next; + m->mp_parts = nhead->mp_next; + +} + +/* + * move parts that match the user's preferences (-prefer) to the head + * of the line. process preferences in reverse so first one given + * ends up first in line + */ +static void +prefer_parts(CT ct) +{ + int i; + for (i = npreferred-1; i >= 0; i--) + move_preferred_part(ct, preferred_types[i], preferred_subtypes[i]); +} + + + +/* parse_mime() arranges alternates in reverse (priority) order. This + function can be used to reverse them back. This will put, for + example, a text/plain part before a text/html part in a + multipart/alternative part, for example, where it belongs. */ +void +reverse_alternative_parts (CT ct) { + if (ct->c_type == CT_MULTIPART) { + struct multipart *m = (struct multipart *) ct->c_ctparams; + struct part *part; + + if (ct->c_subtype == MULTI_ALTERNATE) { + reverse_parts (ct); + } + + /* And call recursively on each part of a multipart. */ + for (part = m->mp_parts; part; part = part->mp_next) { + reverse_alternative_parts (part->mp_part); + } + } +} + /* * MESSAGE @@ -1303,7 +1390,6 @@ reverse_parts (CT ct) static int InitMessage (CT ct) { - struct k2v *kv; CI ci = &ct->c_ctinfo; if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) { @@ -1318,10 +1404,7 @@ InitMessage (CT ct) ci->ci_subtype = add ("rfc822", ci->ci_subtype); /* match subtype */ - for (kv = SubMessage; kv->kv_key; kv++) - if (!strcasecmp (ci->ci_subtype, kv->kv_key)) - break; - ct->c_subtype = kv->kv_value; + ct->c_subtype = ct_str_subtype (CT_MESSAGE, ci->ci_subtype); switch (ct->c_subtype) { case MESSAGE_RFC822: @@ -1332,7 +1415,7 @@ InitMessage (CT ct) PM pm; struct partial *p; - if ((p = (struct partial *) calloc (1, sizeof(*p))) == NULL) + if ((p = (struct partial *) mh_xcalloc (1, sizeof(*p))) == NULL) adios (NULL, "out of memory"); ct->c_ctparams = (void *) p; @@ -1381,7 +1464,7 @@ invalid_param: CT p; FILE *fp; - if ((e = (struct exbody *) calloc (1, sizeof(*e))) == NULL) + if ((e = (struct exbody *) mh_xcalloc (1, sizeof(*e))) == NULL) adios (NULL, "out of memory"); ct->c_ctparams = (void *) e; @@ -1571,14 +1654,10 @@ params_external (CT ct, int composing) static int InitApplication (CT ct) { - struct k2v *kv; CI ci = &ct->c_ctinfo; /* match subtype */ - for (kv = SubApplication; kv->kv_key; kv++) - if (!strcasecmp (ci->ci_subtype, kv->kv_key)) - break; - ct->c_subtype = kv->kv_value; + ct->c_subtype = ct_str_subtype (CT_APPLICATION, ci->ci_subtype); return OK; } @@ -1713,15 +1792,7 @@ openBase64 (CT ct, char **file) /* sbeck@cise.ufl.edu -- handle suffixes */ ci = &ct->c_ctinfo; - snprintf (buffer, sizeof(buffer), "%s-suffix-%s/%s", - invo_name, ci->ci_type, ci->ci_subtype); - cp = context_find (buffer); - if (cp == NULL || *cp == '\0') { - snprintf (buffer, sizeof(buffer), "%s-suffix-%s", invo_name, - ci->ci_type); - cp = context_find (buffer); - } - if (cp != NULL && *cp != '\0') { + if ((cp = context_find_by_type ("suffix", ci->ci_type, ci->ci_subtype))) { if (ce->ce_unlink) { /* Create temporary file with filename extension. */ if ((ce->ce_file = m_mktemps(invo_name, cp, NULL, NULL)) == NULL) { @@ -1923,7 +1994,9 @@ openQuoted (CT ct, char **file) { int cc, digested, len, quoted, own_ct_fp = 0; char *cp, *ep; - char buffer[BUFSIZ]; + char *bufp = NULL; + size_t buflen; + ssize_t gotlen; unsigned char mask; CE ce = &ct->c_cefile; /* sbeck -- handle suffixes */ @@ -1952,15 +2025,7 @@ openQuoted (CT ct, char **file) /* sbeck@cise.ufl.edu -- handle suffixes */ ci = &ct->c_ctinfo; - snprintf (buffer, sizeof(buffer), "%s-suffix-%s/%s", - invo_name, ci->ci_type, ci->ci_subtype); - cp = context_find (buffer); - if (cp == NULL || *cp == '\0') { - snprintf (buffer, sizeof(buffer), "%s-suffix-%s", invo_name, - ci->ci_type); - cp = context_find (buffer); - } - if (cp != NULL && *cp != '\0') { + if ((cp = context_find_by_type ("suffix", ci->ci_type, ci->ci_subtype))) { if (ce->ce_unlink) { /* Create temporary file with filename extension. */ if ((ce->ce_file = m_mktemps(invo_name, cp, NULL, NULL)) == NULL) { @@ -2005,16 +2070,16 @@ openQuoted (CT ct, char **file) fseek (ct->c_fp, ct->c_begin, SEEK_SET); while (len > 0) { - if (fgets (buffer, sizeof(buffer) - 1, ct->c_fp) == NULL) { + if ((gotlen = getline(&bufp, &buflen, ct->c_fp)) == -1) { content_error (NULL, ct, "premature eof"); goto clean_up; } - if ((cc = strlen (buffer)) > len) + if ((cc = gotlen) > len) cc = len; len -= cc; - for (ep = (cp = buffer) + cc - 1; cp <= ep; ep--) + for (ep = (cp = bufp) + cc - 1; cp <= ep; ep--) if (!isspace ((unsigned char) *ep)) break; *++ep = '\n', ep++; @@ -2119,6 +2184,7 @@ ready_to_go: fclose (ct->c_fp); ct->c_fp = NULL; } + free (bufp); return fileno (ce->ce_fp); clean_up: @@ -2127,6 +2193,7 @@ clean_up: fclose (ct->c_fp); ct->c_fp = NULL; } + free (bufp); return NOTOK; } @@ -2178,15 +2245,7 @@ open7Bit (CT ct, char **file) /* sbeck@cise.ufl.edu -- handle suffixes */ ci = &ct->c_ctinfo; - snprintf (buffer, sizeof(buffer), "%s-suffix-%s/%s", - invo_name, ci->ci_type, ci->ci_subtype); - cp = context_find (buffer); - if (cp == NULL || *cp == '\0') { - snprintf (buffer, sizeof(buffer), "%s-suffix-%s", invo_name, - ci->ci_type); - cp = context_find (buffer); - } - if (cp != NULL && *cp != '\0') { + if ((cp = context_find_by_type ("suffix", ci->ci_type, ci->ci_subtype))) { if (ce->ce_unlink) { /* Create temporary file with filename extension. */ if ((ce->ce_file = m_mktemps(invo_name, cp, NULL, NULL)) == NULL) { @@ -2274,7 +2333,9 @@ open7Bit (CT ct, char **file) cc = len; len -= cc; - fwrite (buffer, sizeof(*buffer), cc, ce->ce_fp); + if ((int) fwrite (buffer, sizeof(*buffer), cc, ce->ce_fp) < cc) { + advise ("open7Bit", "fwrite"); + } if (ferror (ce->ce_fp)) { content_error (ce->ce_file, ct, "error writing to"); goto clean_up; @@ -2341,6 +2402,7 @@ openExternal (CT ct, CT cb, CE ce, char **file, int *fd) } } + *fd = fileno (ce->ce_fp); return OK; ready_already: @@ -2408,7 +2470,9 @@ openFile (CT ct, char **file) while ((cc = fread (buffer, sizeof(*buffer), sizeof(buffer), gp)) > 0) - fwrite (buffer, sizeof(*buffer), cc, fp); + if ((int) fwrite (buffer, sizeof(*buffer), cc, fp) < cc) { + advise ("openFile", "fwrite"); + } fflush (fp); if (ferror (gp)) { @@ -2478,13 +2542,6 @@ openFTP (CT ct, char **file) return NOTOK; } - if (xpid) { - if (xpid < 0) - xpid = -xpid; - pidcheck (pidwait (xpid, NOTOK)); - xpid = 0; - } - /* Get the buffer ready to go */ bp = buffer; buflen = sizeof(buffer); @@ -2624,7 +2681,9 @@ openFTP (CT ct, char **file) while ((cc= fread (buffer, sizeof(*buffer), sizeof(buffer), gp)) > 0) - fwrite (buffer, sizeof(*buffer), cc, fp); + if ((int) fwrite (buffer, sizeof(*buffer), cc, fp) < cc) { + advise ("openFTP", "fwrite"); + } fflush (fp); if (ferror (gp)) { @@ -2684,13 +2743,6 @@ openMail (CT ct, char **file) return NOTOK; } - if (xpid) { - if (xpid < 0) - xpid = -xpid; - pidcheck (pidwait (xpid, NOTOK)); - xpid = 0; - } - /* Get buffer ready to go */ bp = buffer; buflen = sizeof(buffer); @@ -2821,13 +2873,6 @@ openURL (CT ct, char **file) return NOTOK; } - if (xpid) { - if (xpid < 0) - xpid = -xpid; - pidcheck (pidwait (xpid, NOTOK)); - xpid = 0; - } - ce->ce_unlink = (*file == NULL); caching = 0; cachefile[0] = '\0'; @@ -2898,7 +2943,9 @@ openURL (CT ct, char **file) while ((cc = fread(buffer, sizeof(*buffer), sizeof(buffer), gp)) > 0) - fwrite(buffer, sizeof(*buffer), cc, fp); + if ((int) fwrite(buffer, sizeof(*buffer), cc, fp) < cc) { + advise ("openURL", "fwrite"); + } fflush(fp); @@ -2996,12 +3043,15 @@ invalid_digest: after the last subpart that hasn't been stored anywhere else, so do that. */ int -get_leftover_mp_content (CT ct, int before /* or after */) { +get_leftover_mp_content (CT ct, int before /* or after */) +{ struct multipart *m = (struct multipart *) ct->c_ctparams; char *boundary; int found_boundary = 0; - char buffer[BUFSIZ]; int max = BUFSIZ; + char *bufp = NULL; + size_t buflen; + ssize_t gotlen; int read = 0; char *content = NULL; @@ -3034,18 +3084,18 @@ get_leftover_mp_content (CT ct, int before /* or after */) { } /* Back up by 1 to pick up the newline. */ - while (fgets (buffer, sizeof(buffer) - 1, ct->c_fp)) { - read += strlen (buffer); + while ((gotlen = getline(&bufp, &buflen, ct->c_fp)) != -1) { + read += gotlen; /* Don't look beyond beginning of first subpart (before) or next part (after). */ - if (read > max) buffer[read-max] = '\0'; + if (read > max) bufp[read-max] = '\0'; if (before) { - if (! strcmp (buffer, boundary)) { + if (! strcmp (bufp, boundary)) { found_boundary = 1; } } else { - if (! found_boundary && ! strcmp (buffer, boundary)) { + if (! found_boundary && ! strcmp (bufp, boundary)) { found_boundary = 1; continue; } @@ -3054,12 +3104,12 @@ get_leftover_mp_content (CT ct, int before /* or after */) { if ((before && ! found_boundary) || (! before && found_boundary)) { if (content) { char *old_content = content; - content = concat (content, buffer, NULL); + content = concat (content, bufp, NULL); free (old_content); } else { content = before - ? concat ("\n", buffer, NULL) - : concat (buffer, NULL); + ? concat ("\n", bufp, NULL) + : concat (bufp, NULL); } } @@ -3093,6 +3143,7 @@ get_leftover_mp_content (CT ct, int before /* or after */) { } free (boundary); + free (bufp); return OK; } @@ -3156,6 +3207,8 @@ ct_subtype_str (int type, int subtype) { return "digest"; case MULTI_PARALLEL: return "parallel"; + case MULTI_RELATED: + return "related"; default: return "unknown_multipart_subtype"; } @@ -3176,6 +3229,62 @@ ct_subtype_str (int type, int subtype) { } +int +ct_str_type (const char *type) { + struct str2init *s2i; + + for (s2i = str2cts; s2i->si_key; ++s2i) { + if (! strcasecmp (type, s2i->si_key)) { + break; + } + } + if (! s2i->si_key && ! uprf (type, "X-")) { + ++s2i; + } + + return s2i->si_val; +} + + +int +ct_str_subtype (int type, const char *subtype) { + struct k2v *kv; + + switch (type) { + case CT_APPLICATION: + for (kv = SubApplication; kv->kv_key; ++kv) { + if (! strcasecmp (subtype, kv->kv_key)) { + break; + } + } + return kv->kv_value; + case CT_MESSAGE: + for (kv = SubMessage; kv->kv_key; ++kv) { + if (! strcasecmp (subtype, kv->kv_key)) { + break; + } + } + return kv->kv_value; + case CT_MULTIPART: + for (kv = SubMultiPart; kv->kv_key; ++kv) { + if (! strcasecmp (subtype, kv->kv_key)) { + break; + } + } + return kv->kv_value; + case CT_TEXT: + for (kv = SubText; kv->kv_key; ++kv) { + if (! strcasecmp (subtype, kv->kv_key)) { + break; + } + } + return kv->kv_value; + default: + return 0; + } +} + + /* Find the content type and InitFunc for the CT. */ const struct str2init * get_ct_init (int type) { @@ -3252,9 +3361,23 @@ parse_header_attrs (const char *filename, const char *fieldname, { char *cp = *header_attrp; PM pm; + struct sectlist { + char *value; + int index; + int len; + struct sectlist *next; + } *sp, *sp2; + struct parmlist { + char *name; + char *charset; + char *lang; + struct sectlist *sechead; + struct parmlist *next; + } *pp, *pp2, *phead = NULL; while (*cp == ';') { - char *dp, *vp, *up, c; + char *dp, *vp, *up, *nameptr, *valptr, *charset = NULL, *lang = NULL; + int encoded = 0, partial = 0, len = 0, index = 0; cp++; while (isspace ((unsigned char) *cp)) @@ -3266,10 +3389,13 @@ parse_header_attrs (const char *filename, const char *fieldname, } if (*cp == 0) { - advise (NULL, - "extraneous trailing ';' in message %s's %s: " - "parameter list", - filename, fieldname); + if (! suppress_extraneous_trailing_semicolon_warning) { + advise (NULL, + "extraneous trailing ';' in message %s's %s: " + "parameter list", + filename, fieldname); + } + extraneous_trailing_semicolon = 1; return DONE; } @@ -3288,60 +3414,299 @@ parse_header_attrs (const char *filename, const char *fieldname, return NOTOK; } - pm = mh_xmalloc(sizeof(*pm)); - memset(pm, 0, sizeof(*pm)); + /* + * To handle RFC 2231, we have to deal with the following extensions: + * + * name*=encoded-value + * name*=part-N-of-a-parameter-value + * name**=encoded-part-N-of-a-parameter-value + * + * So the rule is: + * If there's a * right before the equal sign, it's encoded. + * If there's a * and one or more digits, then it's section N. + * + * Remember we can have one or the other, or both. cp points to + * beginning of name, up points past the last character in the + * parameter name. + */ + + for (vp = cp; vp < up; vp++) { + if (*vp == '*' && vp < up - 1) { + partial = 1; + continue; + } else if (*vp == '*' && vp == up - 1) { + encoded = 1; + } else if (partial) { + if (isdigit((unsigned char) *vp)) + index = *vp - '0' + index * 10; + else { + advise (NULL, "invalid parameter index in message %s's " + "%s: field\n%*s(parameter %s)", filename, + fieldname, strlen(invo_name) + 2, "", cp); + return NOTOK; + } + } else { + len++; + } + } + + /* + * Break out the parameter name and value sections and allocate + * memory for each. + */ + + nameptr = mh_xmalloc(len + 1); + strncpy(nameptr, cp, len); + nameptr[len] = '\0'; - /* This is all mega-bozo and needs cleanup */ - vp = (pm->pm_name = add (cp, NULL)) + (up - cp); - *vp = '\0'; for (dp++; isspace ((unsigned char) *dp);) dp++; - /* Now store the attribute value. */ + if (encoded) { + /* + * Single quotes delimit the character set and language tag. + * They are required on the first section (or a complete + * parameter). + */ + if (index == 0) { + vp = dp; + while (*vp != '\'' && !isspace((unsigned char) *vp) && + *vp != '\0') + vp++; + if (*vp == '\'') { + if (vp != dp) { + len = vp - dp; + charset = mh_xmalloc(len + 1); + strncpy(charset, dp, len); + charset[len] = '\0'; + } else { + charset = NULL; + } + vp++; + } else { + advise(NULL, "missing charset in message %s's %s: " + "field\n%*s(parameter %s)", filename, fieldname, + strlen(invo_name) + 2, "", nameptr); + free(nameptr); + return NOTOK; + } + dp = vp; + + while (*vp != '\'' && !isspace((unsigned char) *vp) && + *vp != '\0') + vp++; + + if (*vp == '\'') { + if (vp != dp) { + len = vp - dp; + lang = mh_xmalloc(len + 1); + strncpy(lang, dp, len); + lang[len] = '\0'; + } else { + lang = NULL; + } + vp++; + } else { + advise(NULL, "missing language tag in message %s's %s: " + "field\n%*s(parameter %s)", filename, fieldname, + strlen(invo_name) + 2, "", nameptr); + free(nameptr); + if (charset) + free(charset); + return NOTOK; + } + + dp = vp; + } + + /* + * At this point vp should be pointing at the beginning + * of the encoded value/section. Continue until we reach + * the end or get whitespace. But first, calculate the + * length so we can allocate the correct buffer size. + */ + + for (vp = dp, len = 0; istoken(*vp); vp++) { + if (*vp == '%') { + if (*(vp + 1) == '\0' || + !isxdigit((unsigned char) *(vp + 1)) || + *(vp + 2) == '\0' || + !isxdigit((unsigned char) *(vp + 2))) { + advise(NULL, "invalid encoded sequence in message " + "%s's %s: field\n%*s(parameter %s)", + filename, fieldname, strlen(invo_name) + 2, + "", nameptr); + free(nameptr); + if (charset) + free(charset); + if (lang) + free(lang); + return NOTOK; + } + vp += 2; + } + len++; + } + + up = valptr = mh_xmalloc(len + 1); - vp = pm->pm_name + (dp - cp); + for (vp = dp; istoken(*vp); vp++) { + if (*vp == '%') { + *up++ = decode_qp(*(vp + 1), *(vp + 2)); + vp += 2; + } else { + *up++ = *vp; + } + } - if (*dp == '"') { - for (cp = ++dp, dp = vp;;) { - switch (c = *cp++) { + *up = '\0'; + cp = vp; + } else { + /* + * A "normal" string. If it's got a leading quote, then we + * strip the quotes out. Otherwise go until we reach the end + * or get whitespace. Note we scan it twice; once to get the + * length, then the second time copies it into the destination + * buffer. + */ + + len = 0; + + if (*dp == '"') { + for (cp = dp + 1;;) { + switch (*cp++) { case '\0': bad_quote: advise (NULL, "invalid quoted-string in message %s's %s: " "field\n%*s(parameter %s)", filename, fieldname, strlen(invo_name) + 2, "", - pm->pm_name); + nameptr); + free(nameptr); + if (charset) + free(charset); + if (lang) + free(lang); return NOTOK; + case '"': + break; case '\\': - *dp++ = c; - if ((c = *cp++) == '\0') + if (*++cp == '\0') goto bad_quote; - /* else fall... */ - + /* FALL THROUGH */ default: - *dp++ = c; + len++; continue; + } + break; + } - case '"': - *dp = '\0'; + } else { + for (cp = dp; istoken (*cp); cp++) { + len++; + } + } + + valptr = mh_xmalloc(len + 1); + + if (*dp == '"') { + int i; + for (cp = dp + 1, vp = valptr, i = 0; i < len; i++) { + if (*cp == '\\') { + cp++; + } + *vp++ = *cp++; + } + cp++; + } else { + strncpy(valptr, cp = dp, len); + cp += len; + } + + valptr[len] = '\0'; + } + + /* + * If 'partial' is set, we don't allocate a parameter now. We + * put it on the parameter linked list to be reassembled later. + * + * "phead" points to a list of all parameters we need to reassemble. + * Each parameter has a list of sections. We insert the sections in + * order. + */ + + if (partial) { + for (pp = phead; pp != NULL; pp = pp->next) { + if (strcasecmp(nameptr, pp->name) == 0) + break; + } + + if (pp == NULL) { + pp = mh_xmalloc(sizeof(*pp)); + memset(pp, 0, sizeof(*pp)); + pp->name = nameptr; + pp->next = phead; + phead = pp; + } + + /* + * Insert this into the section linked list + */ + + sp = mh_xmalloc(sizeof(*sp)); + memset(sp, 0, sizeof(*sp)); + sp->value = valptr; + sp->index = index; + sp->len = len; + + if (pp->sechead == NULL || pp->sechead->index > index) { + sp->next = pp->sechead; + pp->sechead = sp; + } else { + for (sp2 = pp->sechead; sp2 != NULL; sp2 = sp2->next) { + if (sp2->index == sp->index) { + advise (NULL, "duplicate index (%d) in message " + "%s's %s: field\n%*s(parameter %s)", sp->index, + filename, fieldname, strlen(invo_name) + 2, "", + nameptr); + free (nameptr); + return NOTOK; + } + if (sp2->index < sp->index && + (sp2->next == NULL || sp2->next->index > sp->index)) { + sp->next = sp2->next; + sp2->next = sp; break; + } + } + + if (sp2 == NULL) { + advise(NULL, "Internal error: cannot insert partial " + "param in message %s's %s: field\n%*s(parameter %s)", + filename, fieldname, strlen(invo_name) + 2, "", + nameptr); + free (nameptr); + return NOTOK; } - break; + } + + /* + * Save our charset and lang tags. + */ + + if (index == 0 && encoded) { + if (pp->charset) + free(pp->charset); + pp->charset = charset; + if (pp->lang) + free(pp->lang); + pp->lang = lang; } } else { - for (cp = dp, dp = vp; istoken (*cp); cp++, dp++) - continue; - *dp = '\0'; - } - pm->pm_value = getcpy(vp); - if (!*vp) { - advise (NULL, - "invalid parameter in message %s's %s: " - "field\n%*s(parameter %s)", - filename, fieldname, strlen(invo_name) + 2, "", - pm->pm_name); - return NOTOK; + pm = add_param(param_head, param_tail, nameptr, valptr, 1); + pm->pm_charset = charset; + pm->pm_lang = lang; } while (isspace ((unsigned char) *cp)) @@ -3351,20 +3716,65 @@ bad_quote: get_comment (filename, fieldname, &cp, commentp) == NOTOK) { return NOTOK; } + } - if (*param_head == NULL) { - *param_head = pm; - *param_tail = pm; - } else { - (*param_tail)->pm_next = pm; - *param_tail = pm; + /* + * Now that we're done, reassemble all of the partial parameters. + */ + + for (pp = phead; pp != NULL; ) { + char *p, *q; + size_t tlen = 0; + int pindex = 0; + for (sp = pp->sechead; sp != NULL; sp = sp->next) { + if (sp->index != pindex++) { + advise(NULL, "missing section %d for parameter in " + "message %s's %s: field\n%*s(parameter %s)", pindex - 1, + filename, fieldname, strlen(invo_name) + 2, "", + pp->name); + return NOTOK; + } + tlen += sp->len; } + + p = q = mh_xmalloc(tlen + 1); + for (sp = pp->sechead; sp != NULL; ) { + memcpy(q, sp->value, sp->len); + q += sp->len; + free(sp->value); + sp2 = sp->next; + free(sp); + sp = sp2; + } + + p[tlen] = '\0'; + + pm = add_param(param_head, param_tail, pp->name, p, 1); + pm->pm_charset = pp->charset; + pm->pm_lang = pp->lang; + pp2 = pp->next; + free(pp); + pp = pp2; } *header_attrp = cp; return OK; } +/* + * Return the charset for a particular content type. + */ + +char * +content_charset (CT ct) { + char *ret_charset = NULL; + + ret_charset = get_param(ct->c_ctinfo.ci_first_pm, "charset", '?', 0); + + return ret_charset ? ret_charset : getcpy ("US-ASCII"); +} + + /* * Create a string based on a list of output parameters. Assume that this * parameter string will be appended to an existing header, so start out @@ -3591,8 +4001,12 @@ param_len(PM pm, int index, size_t valueoff, int *encode, int *cont, * add them now. */ - if (! pm->pm_charset) + if (! pm->pm_charset) { pm->pm_charset = getcpy(write_charset_8bit()); + if (strcasecmp(pm->pm_charset, "US-ASCII") == 0) + adios(NULL, "8-bit characters in parameter \"%s\", but " + "local character set is US-ASCII", pm->pm_name); + } if (! pm->pm_lang) pm->pm_lang = getcpy(NULL); /* Default to a blank lang tag */ @@ -3772,14 +4186,14 @@ normal_param(PM pm, char *output, size_t len, size_t valuelen, */ PM -add_param(PM *first, PM *last, const char *name, const char *value) +add_param(PM *first, PM *last, char *name, char *value, int nocopy) { PM pm = mh_xmalloc(sizeof(*pm)); memset(pm, 0, sizeof(*pm)); - pm->pm_name = getcpy(name); - pm->pm_value = getcpy(value); + pm->pm_name = nocopy ? name : getcpy(name); + pm->pm_value = nocopy ? value : getcpy(value); if (*first) { (*last)->pm_next = pm; @@ -3792,6 +4206,33 @@ add_param(PM *first, PM *last, const char *name, const char *value) return pm; } +/* + * Either replace a current parameter with a new value, or add the parameter + * to the parameter linked list. + */ + +PM +replace_param(PM *first, PM *last, char *name, char *value, int nocopy) +{ + PM pm; + + for (pm = *first; pm != NULL; pm = pm->pm_next) { + if (strcasecmp(name, pm->pm_name) == 0) { + /* + * If nocopy is set, it's assumed that we own both name + * and value. We don't need name, so we discard it now. + */ + if (nocopy) + free(name); + free(pm->pm_value); + pm->pm_value = nocopy ? value : getcpy(value); + return pm; + } + } + + return add_param(first, last, name, value, nocopy); +} + /* * Retrieve a parameter value from a parameter linked list. If the parameter * value needs converted to the local character set, do that now. @@ -3827,7 +4268,10 @@ char *get_param_value(PM pm, char replace) int utf8; iconv_t cd; ICONV_CONST char *p; +#else /* HAVE_ICONV */ + char *p; #endif /* HAVE_ICONV */ + char *q; /* @@ -3902,9 +4346,10 @@ char *get_param_value(PM pm, char replace) *q = '\0'; return buffer; -#endif /* HAVE_ICONV */ noiconv: +#endif /* HAVE_ICONV */ + /* * Take everything non-ASCII and substituite the replacement character */