X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/8a14191c0a0ad15bb8f35b49748c75c1e0a8c2f4..9ffabd06f0dc8e7478b7484fceee4906990d44e2:/uip/mhparse.c?ds=inline diff --git a/uip/mhparse.c b/uip/mhparse.c index 2be97603..ca01545f 100644 --- a/uip/mhparse.c +++ b/uip/mhparse.c @@ -1,6 +1,4 @@ - -/* - * mhparse.c -- routines to parse the contents of MIME messages +/* mhparse.c -- routines to parse the contents of MIME messages * * This code is Copyright (c) 2002, by the authors of nmh. See the * COPYRIGHT file in the root directory of the nmh distribution for @@ -15,6 +13,9 @@ #include #include #include +#include +#include "../sbr/m_mktemp.h" +#include "mhfree.h" #ifdef HAVE_ICONV # include #endif /* HAVE_ICONV */ @@ -22,10 +23,6 @@ extern int debugsw; -/* cache policies */ -extern int rcachesw; /* mhcachesbr.c */ -extern int wcachesw; /* mhcachesbr.c */ - int checksw = 0; /* check Content-MD5 field */ /* @@ -57,8 +54,8 @@ int npreferred; */ struct k2v SubText[] = { { "plain", TEXT_PLAIN }, - { "richtext", TEXT_RICHTEXT }, /* defined in RFC-1341 */ - { "enriched", TEXT_ENRICHED }, /* defined in RFC-1896 */ + { "richtext", TEXT_RICHTEXT }, /* defined in RFC 1341 */ + { "enriched", TEXT_ENRICHED }, /* defined in RFC 1896 */ { NULL, TEXT_UNKNOWN } /* this one must be last! */ }; @@ -109,17 +106,11 @@ static struct k2v EncodingType[] = { }; -/* mhcachesbr.c */ -int find_cache (CT, int, int *, char *, char *, int); - /* mhmisc.c */ int part_ok (CT); int type_ok (CT, int); void content_error (char *, CT, char *, ...); -/* mhfree.c */ -void free_encoding (CT, int); - /* * static prototypes */ @@ -182,7 +173,7 @@ struct str2init str2ces[] = { /* * NOTE WELL: si_key MUST NOT have value of NOTOK * - * si_key is 1 if access method is anonymous. + * si_val is 1 if access method is anonymous. */ struct str2init str2methods[] = { { "afs", 1, InitFile }, @@ -245,8 +236,8 @@ parse_mime (char *file) return NULL; } fseek (fp, 0L, SEEK_SET); - } else if (lstat (file, &statbuf) == NOTOK) { - advise (file, "unable to lstat"); + } else if (stat (file, &statbuf) == NOTOK) { + advise (file, "unable to stat"); return NULL; } else if (S_ISDIR(statbuf.st_mode)) { /* Don't try to parse a directory. */ @@ -300,11 +291,11 @@ static CT get_content (FILE *in, char *file, int toplevel) { int compnum, state; - char buf[BUFSIZ], name[NAMESZ]; + char buf[NMH_BUFSIZ], name[NAMESZ]; char *np, *vp; CT ct; HF hp; - m_getfld_state_t gstate = 0; + m_getfld_state_t gstate; /* allocate the content structure */ NEW0(ct); @@ -316,10 +307,11 @@ get_content (FILE *in, char *file, int toplevel) * Parse the header fields for this * content into a linked list. */ - m_getfld_track_filepos (&gstate, in); + gstate = m_getfld_state_init(in); + m_getfld_track_filepos2(&gstate); for (compnum = 1;;) { int bufsz = sizeof buf; - switch (state = m_getfld (&gstate, name, buf, &bufsz, in)) { + switch (state = m_getfld2(&gstate, name, buf, &bufsz)) { case FLD: case FLDPLUS: compnum++; @@ -331,7 +323,7 @@ get_content (FILE *in, char *file, int toplevel) /* if necessary, get rest of field */ while (state == FLDPLUS) { bufsz = sizeof buf; - state = m_getfld (&gstate, name, buf, &bufsz, in); + state = m_getfld2(&gstate, name, buf, &bufsz); vp = add (buf, vp); /* add to previous value */ } @@ -343,12 +335,25 @@ get_content (FILE *in, char *file, int toplevel) continue; case BODY: + /* There are two cases. The unusual one is when there is no + * blank line between the headers and the body. This is + * indicated by the name of the header starting with `:'. + * + * For both cases, normal first, `1' is the desired c_begin + * file position for the start of the body, and `2' is the + * file position when buf is returned. + * + * f o o : b a r \n \n b o d y \n bufsz = 6 + * 1 2 move -5 + * f o o : b a r \n b o d y \n bufsz = 4 + * 1 2 move -4 + * + * For the normal case, bufsz includes the + * header-terminating `\n', even though it is not in buf, + * but bufsz isn't affected when it's missing in the unusual + * case. */ if (name[0] == ':') { - /* Special case: no blank line between header and body. The - file position indicator is on the newline at the end of the - line, but it needs to be one prior to the beginning of the - line. So subtract the length of the line, bufsz, plus 1. */ - ct->c_begin = ftell (in) - (bufsz + 1); + ct->c_begin = ftell(in) - bufsz; } else { ct->c_begin = ftell (in) - (bufsz - 1); } @@ -413,7 +418,7 @@ get_content (FILE *in, char *file, int toplevel) ucmp = !strcasecmp (cp, VRSN_VALUE); *dp = c; if (!ucmp) { - admonish (NULL, "message %s has unknown value for %s: field (%s)", + inform("message %s has unknown value for %s: field (%s), continuing...", ct->c_file, VRSN_FIELD, cp); } if (!ct->c_vrsn) { @@ -661,9 +666,11 @@ get_ctinfo (char *cp, CT ct, int magic) for (dp = cp; istoken (*dp); dp++) continue; - c = *dp, *dp = '\0'; + c = *dp; + *dp = '\0'; ci->ci_type = mh_xstrdup(cp); /* store content type */ - *dp = c, cp = dp; + *dp = c; + cp = dp; if (!*ci->ci_type) { inform("invalid %s: field in message %s (empty type)", @@ -695,9 +702,11 @@ get_ctinfo (char *cp, CT ct, int magic) for (dp = cp; istoken (*dp); dp++) continue; - c = *dp, *dp = '\0'; + c = *dp; + *dp = '\0'; ci->ci_subtype = mh_xstrdup(cp); /* store the content subtype */ - *dp = c, cp = dp; + *dp = c; + cp = dp; if (!*ci->ci_subtype) { inform("invalid %s: field in message %s (empty subtype for \"%s\")", @@ -856,8 +865,8 @@ magic_skip: } } else - inform("extraneous information in message %s's %s: field\n%*s(%s)", - ct->c_file, TYPE_FIELD, strlen(invo_name) + 2, "", cp); + inform("extraneous information in message %s's %s: field\n" + " (%s)", ct->c_file, TYPE_FIELD, cp); } return OK; @@ -908,9 +917,11 @@ get_dispo (char *cp, CT ct, int buildflag) for (dp = cp; istoken (*dp); dp++) continue; - c = *dp, *dp = '\0'; + c = *dp; + *dp = '\0'; ct->c_dispo_type = mh_xstrdup(cp); /* store disposition type */ - *dp = c, cp = dp; + *dp = c; + cp = dp; if (*cp == '(' && get_comment (ct->c_file, DISPO_FIELD, &cp, NULL) == NOTOK) return NOTOK; @@ -923,8 +934,8 @@ get_dispo (char *cp, CT ct, int buildflag) return NOTOK; } } else if (*cp) { - inform("extraneous information in message %s's %s: field\n%*s(%s)", - ct->c_file, DISPO_FIELD, strlen(invo_name) + 2, "", cp); + inform("extraneous information in message %s's %s: field\n (%s)", + ct->c_file, DISPO_FIELD, cp); } if (buildflag) @@ -1091,7 +1102,7 @@ InitMultiPart (CT ct) /* * The encoding for multipart messages must be either - * 7bit, 8bit, or binary (per RFC2045). + * 7bit, 8bit, or binary (per RFC 2045). */ if (! skip_mp_cte_check && ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT && ct->c_encoding != CE_BINARY) { @@ -1103,13 +1114,12 @@ InitMultiPart (CT ct) while (bp >= cte && isspace ((unsigned char) *bp)) *bp-- = '\0'; for (bp = cte; *bp && isblank ((unsigned char) *bp); ++bp) continue; - admonish (NULL, - "\"%s/%s\" type in message %s must be encoded in\n" - "7bit, 8bit, or binary, per RFC 2045 (6.4). " - "mhfixmsg -fixcte can fix it, or\n" - "manually edit the file and change the \"%s\"\n" - "Content-Transfer-Encoding to one of those. For now", - ci->ci_type, ci->ci_subtype, ct->c_file, bp); + inform("\"%s/%s\" type in message %s must be encoded in\n" + "7bit, 8bit, or binary, per RFC 2045 (6.4). " + "mhfixmsg -fixcte can fix it, or\n" + "manually edit the file and change the \"%s\"\n" + "Content-Transfer-Encoding to one of those. For now, continuing...", + ci->ci_type, ci->ci_subtype, ct->c_file, bp); free (cte); return NOTOK; @@ -1289,7 +1299,7 @@ last_part: * ease of choosing/displaying it later on. from a mail message on * nmh-workers, from kenh: * "Stock" MH 6.8.5 did not have a reverse_parts() function, but I - * see code in mhn that did the same thing... Acccording to the RCS + * see code in mhn that did the same thing... According to the RCS * logs, that code was around from the initial checkin of mhn.c by * John Romine in 1992, which is as far back as we have." */ @@ -1398,9 +1408,9 @@ InitMessage (CT ct) CI ci = &ct->c_ctinfo; if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) { - admonish (NULL, - "\"%s/%s\" type in message %s should be encoded in 7bit or 8bit", - ci->ci_type, ci->ci_subtype, ct->c_file); + inform("\"%s/%s\" type in message %s should be encoded in " + "7bit or 8bit, continuing...", ci->ci_type, ci->ci_subtype, + ct->c_file); return NOTOK; } @@ -1831,7 +1841,10 @@ openBase64 (CT ct, char **file) *cp = '\0'; if (decodeBase64 (buffer, &decoded, &decoded_len, ct->c_type == CT_TEXT, - ct->c_digested ? digest : NULL) == OK) { + ct->c_digested ? digest : NULL) != OK) + goto clean_up; + + { size_t i; unsigned char *decoded_p = decoded; for (i = 0; i < decoded_len; ++i) { @@ -1845,7 +1858,7 @@ openBase64 (CT ct, char **file) if (ct->c_digested) { if (memcmp(digest, ct->c_digest, - sizeof(digest) / sizeof(digest[0]))) { + sizeof digest)) { content_error (NULL, ct, "content integrity suspect (digest mismatch) -- continuing"); } else { @@ -1854,8 +1867,6 @@ openBase64 (CT ct, char **file) } } } - } else { - goto clean_up; } fseek (ct->c_fp, 0L, SEEK_SET); @@ -2011,7 +2022,8 @@ openQuoted (CT ct, char **file) for (ep = (cp = bufp) + cc - 1; cp <= ep; ep--) if (!isspace ((unsigned char) *ep)) break; - *++ep = '\n', ep++; + *++ep = '\n'; + ep++; for (; cp < ep; cp++) { if (quoted > 0) { @@ -2057,11 +2069,10 @@ openQuoted (CT ct, char **file) * sequence; let's decode it (above). */ quoted = 1; continue; - } else { - /* One or both of the next 2 is out of range, making this - * an invalid escape sequence; just show the raw bytes - * (below). */ } + /* One or both of the next 2 is out of range, making this + * an invalid escape sequence; just show the raw bytes + * (below). */ } /* Just show the raw byte. */ @@ -2097,12 +2108,11 @@ openQuoted (CT ct, char **file) MD5Final (digest, &mdContext); if (memcmp((char *) digest, (char *) ct->c_digest, - sizeof(digest) / sizeof(digest[0]))) + sizeof digest)) content_error (NULL, ct, "content integrity suspect (digest mismatch) -- continuing"); - else - if (debugsw) - fprintf (stderr, "content integrity confirmed\n"); + else if (debugsw) + fprintf (stderr, "content integrity confirmed\n"); } fseek (ce->ce_fp, 0L, SEEK_SET); @@ -2330,7 +2340,7 @@ openExternal (CT ct, CT cb, CE ce, char **file, int *fd) admonish (cachefile, "unable to fopen for reading"); } - *fd = fileno (ce->ce_fp); + *fd = ce->ce_fp ? fileno (ce->ce_fp) : -1; return OK; ready_already: @@ -2406,12 +2416,10 @@ openFile (CT ct, char **file) if (ferror (gp)) { admonish (ce->ce_file, "error reading"); (void) m_unlink (cachefile); - } - else - if (ferror (fp)) { - admonish (cachefile, "error writing"); - (void) m_unlink (cachefile); - } + } else if (ferror (fp)) { + admonish (cachefile, "error writing"); + (void) m_unlink (cachefile); + } fclose (fp); } umask (mask); @@ -2617,12 +2625,10 @@ openFTP (CT ct, char **file) if (ferror (gp)) { admonish (ce->ce_file, "error reading"); (void) m_unlink (cachefile); - } - else - if (ferror (fp)) { - admonish (cachefile, "error writing"); - (void) m_unlink (cachefile); - } + } else if (ferror (fp)) { + admonish (cachefile, "error writing"); + (void) m_unlink (cachefile); + } fclose (fp); } umask (mask); @@ -2887,7 +2893,7 @@ openURL (CT ct, char **file) fseeko(ce->ce_fp, 0, SEEK_SET); *file = ce->ce_file; - return fd; + return fileno(ce->ce_fp); } @@ -2902,7 +2908,7 @@ readDigest (CT ct, char *cp) size_t len; if (decodeBase64 (cp, &digest, &len, 0, NULL) == OK) { - const size_t maxlen = sizeof ct->c_digest / sizeof ct->c_digest[0]; + const size_t maxlen = sizeof ct->c_digest; if (strlen ((char *) digest) <= maxlen) { memcpy (ct->c_digest, digest, maxlen); @@ -3295,9 +3301,9 @@ parse_header_attrs (const char *filename, const char *fieldname, for (up = dp; isspace ((unsigned char) *dp);) dp++; if (dp == cp || *dp != '=') { - inform("invalid parameter in message %s's %s: " - "field\n%*sparameter %s (error detected at offset %d)", - filename, fieldname, strlen(invo_name) + 2, "",cp, dp - cp); + inform("invalid parameter in message %s's %s: field\n" + " parameter %s (error detected at offset %ld)", + filename, fieldname, cp, (long)(dp - cp)); return NOTOK; } @@ -3321,15 +3327,15 @@ parse_header_attrs (const char *filename, const char *fieldname, if (*vp == '*' && vp < up - 1) { partial = 1; continue; - } else if (*vp == '*' && vp == up - 1) { + } + if (*vp == '*' && vp == up - 1) { encoded = 1; } else if (partial) { if (isdigit((unsigned char) *vp)) index = *vp - '0' + index * 10; else { - inform("invalid parameter index in message %s's " - "%s: field\n%*s(parameter %s)", filename, - fieldname, strlen(invo_name) + 2, "", cp); + inform("invalid parameter index in message %s's %s: field" + "\n (parameter %s)", filename, fieldname, cp); return NOTOK; } } else { @@ -3371,9 +3377,8 @@ parse_header_attrs (const char *filename, const char *fieldname, } vp++; } else { - inform("missing charset in message %s's %s: " - "field\n%*s(parameter %s)", filename, fieldname, - strlen(invo_name) + 2, "", nameptr); + inform("missing charset in message %s's %s: field\n" + " (parameter %s)", filename, fieldname, nameptr); free(nameptr); return NOTOK; } @@ -3394,9 +3399,8 @@ parse_header_attrs (const char *filename, const char *fieldname, } vp++; } else { - inform("missing language tag in message %s's %s: " - "field\n%*s(parameter %s)", filename, fieldname, - strlen(invo_name) + 2, "", nameptr); + inform("missing language tag in message %s's %s: field\n" + " (parameter %s)", filename, fieldname, nameptr); free(nameptr); mh_xfree(charset); return NOTOK; @@ -3418,10 +3422,8 @@ parse_header_attrs (const char *filename, const char *fieldname, !isxdigit((unsigned char) *(vp + 1)) || *(vp + 2) == '\0' || !isxdigit((unsigned char) *(vp + 2))) { - inform("invalid encoded sequence in message " - "%s's %s: field\n%*s(parameter %s)", - filename, fieldname, strlen(invo_name) + 2, - "", nameptr); + inform("invalid encoded sequence in message %s's %s: field\n" + " (parameter %s)", filename, fieldname, nameptr); free(nameptr); mh_xfree(charset); mh_xfree(lang); @@ -3461,9 +3463,8 @@ parse_header_attrs (const char *filename, const char *fieldname, switch (*cp++) { case '\0': bad_quote: - inform("invalid quoted-string in message %s's %s: " - "field\n%*s(parameter %s)", filename, - fieldname, strlen(invo_name) + 2, "", nameptr); + inform("invalid quoted-string in message %s's %s: field\n" + " (parameter %s)", filename, fieldname, nameptr); free(nameptr); mh_xfree(charset); mh_xfree(lang); @@ -3547,10 +3548,9 @@ bad_quote: } else { for (sp2 = pp->sechead; sp2 != NULL; sp2 = sp2->next) { if (sp2->index == sp->index) { - inform("duplicate index (%d) in message " - "%s's %s: field\n%*s(parameter %s)", sp->index, - filename, fieldname, strlen(invo_name) + 2, "", - nameptr); + inform("duplicate index (%d) in message %s's %s: field" + "\n (parameter %s)", sp->index, filename, + fieldname, nameptr); return NOTOK; } if (sp2->index < sp->index && @@ -3562,10 +3562,9 @@ bad_quote: } if (sp2 == NULL) { - inform("Internal error: cannot insert partial " - "param in message %s's %s: field\n%*s(parameter %s)", - filename, fieldname, strlen(invo_name) + 2, "", - nameptr); + inform("Internal error: cannot insert partial param " + "in message %s's %s: field\n (parameter %s)", + filename, fieldname, nameptr); return NOTOK; } } @@ -3605,10 +3604,9 @@ bad_quote: int pindex = 0; for (sp = pp->sechead; sp != NULL; sp = sp->next) { if (sp->index != pindex++) { - inform("missing section %d for parameter in " - "message %s's %s: field\n%*s(parameter %s)", pindex - 1, - filename, fieldname, strlen(invo_name) + 2, "", - pp->name); + inform("missing section %d for parameter in message " + "%s's %s: field\n (parameter %s)", pindex - 1, + filename, fieldname, pp->name); return NOTOK; } tlen += sp->len; @@ -3757,7 +3755,7 @@ output_params(size_t initialwidth, PM params, int *offsetout, int external) } /* - * At this point, we're either finishing a contined parameter, or + * At this point, we're either finishing a continued parameter, or * we're working on a new one. */