X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/afea511a759408f44ac37493df8944d6c5cf2d69..c02f66668d32980a3770e450061c5b96b9f31690:/uip/mhfixmsg.c diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c index 8d67e827..ff3e383a 100644 --- a/uip/mhfixmsg.c +++ b/uip/mhfixmsg.c @@ -1,5 +1,5 @@ /* - * mhfixmsg.c -- rewrite a message with various tranformations + * mhfixmsg.c -- rewrite a message with various transformations * * This code is Copyright (c) 2002 and 2013, by the authors of nmh. * See the COPYRIGHT file in the root directory of the nmh @@ -14,8 +14,11 @@ #include #define MHFIXMSG_SWITCHES \ - X("decodetext 8bit|7bit", 0, DECODETEXTSW) \ + X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \ X("nodecodetext", 0, NDECODETEXTSW) \ + X("decodetypes", 0, DECODETYPESW) \ + X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \ + X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \ X("textcharset", 0, TEXTCHARSETSW) \ X("notextcharset", 0, NTEXTCHARSETSW) \ X("reformat", 0, REFORMATSW) \ @@ -24,8 +27,8 @@ X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \ X("fixboundary", 0, FIXBOUNDARYSW) \ X("nofixboundary", 0, NFIXBOUNDARYSW) \ - X("fixcte", 0, FIXCTESW) \ - X("nofixcte", 0, NFIXCTESW) \ + X("fixcte", 0, FIXCOMPOSITECTESW) \ + X("nofixcte", 0, NFIXCOMPOSITECTESW) \ X("fixtype mimetype", 0, FIXTYPESW) \ X("file file", 0, FILESW) \ X("outfile file", 0, OUTFILESW) \ @@ -58,39 +61,42 @@ extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */ extern int bogus_mp_content; /* flag from InitMultiPart */ /* flags to/from parse_header_attrs */ extern int suppress_extraneous_trailing_semicolon_warning; -extern int extraneous_trailing_semicolon; /* mhoutsbr.c */ -int output_message (CT, char *); +int output_message_fp (CT, FILE *, char *); /* mhmisc.c */ void flush_errors (void); /* mhfree.c */ extern CT *cts; -void freects_done (int) NORETURN; /* * static prototypes */ typedef struct fix_transformations { int fixboundary; - int fixcte; + int fixcompositecte; svector_t fixtypes; int reformat; int replacetextplain; int decodetext; + char *decodetypes; + /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */ + int lf_line_endings; char *textcharset; } fix_transformations; -int mhfixmsgsbr (CT *, const fix_transformations *, char *); +int mhfixmsgsbr (CT *, char *, const fix_transformations *, FILE **, char *, + FILE **); static int fix_boundary (CT *, int *); +static int copy_input_to_output (const char *, FILE *, const char *, FILE *); static int get_multipart_boundary (CT, char **); static int replace_boundary (CT, char *, char *); static int fix_types (CT, svector_t, int *); static char *replace_substring (char **, const char *, const char *); static char *remove_parameter (char *, const char *); -static int fix_multipart_cte (CT, int *); +static int fix_composite_cte (CT, int *); static int set_ce (CT, int); static int ensure_text_plain (CT *, CT, int *, int); static int find_textplain_sibling (CT, int, int *); @@ -101,18 +107,24 @@ static CT divide_part (CT); static void copy_ctinfo (CI, CI); static int decode_part (CT); static int reformat_part (CT, char *, char *, char *, int); -static int charset_encoding (CT); static CT build_multipart_alt (CT, CT, int, int); static int boundary_in_content (FILE **, char *, const char *); static void transfer_noncontent_headers (CT, CT); static int set_ct_type (CT, int type, int subtype, int encoding); -static int decode_text_parts (CT, int, int *); +static int decode_text_parts (CT, int, const char *, int *); +static int should_decode(const char *, const char *, const char *); static int content_encoding (CT, const char **); static int strip_crs (CT, int *); +static void update_cte (CT); +static int least_restrictive_encoding (CT); +static int less_restrictive (int, int); static int convert_charsets (CT, char *, int *); static int fix_always (CT, int *); -static int write_content (CT, char *, char *, int, int); -static int remove_file (char *); +static int fix_filename_param (char *, char *, PM *, PM *); +static int fix_filename_encoding (CT); +static int write_content (CT, const char *, char *, FILE *, int, int); +static void set_text_ctparams(CT, char *, int); +static int remove_file (const char *); static void report (char *, char *, char *, char *, ...); static void pipeser (int); @@ -121,25 +133,25 @@ int main (int argc, char **argv) { int msgnum; char *cp, *file = NULL, *folder = NULL; - char *maildir, buf[100], *outfile = NULL; + char *maildir = NULL, buf[100], *outfile = NULL; char **argp, **arguments; struct msgs_array msgs = { 0, 0, NULL }; struct msgs *mp = NULL; CT *ctp; - FILE *fp; + FILE *fp, *infp = NULL, *outfp = NULL; int using_stdin = 0; int chgflag = 1; int status = OK; fix_transformations fx; - fx.reformat = fx.fixcte = fx.fixboundary = 1; + fx.reformat = fx.fixcompositecte = fx.fixboundary = 1; fx.fixtypes = NULL; fx.replacetextplain = 0; fx.decodetext = CE_8BIT; + fx.decodetypes = "text,application/ics"; /* Default, per man page. */ + fx.lf_line_endings = 0; fx.textcharset = NULL; - if (nmh_init(argv[0], 1)) { return 1; } - - done = freects_done; + if (nmh_init(argv[0], 2)) { return 1; } arguments = getarguments (invo_name, argc, argv, 1); argp = arguments; @@ -166,12 +178,15 @@ main (int argc, char **argv) { done (0); case DECODETEXTSW: - if (! (cp = *argp++) || *cp == '-') + if (! (cp = *argp++) || *cp == '-') { adios (NULL, "missing argument to %s", argp[-2]); + } if (! strcasecmp (cp, "8bit")) { fx.decodetext = CE_8BIT; } else if (! strcasecmp (cp, "7bit")) { fx.decodetext = CE_7BIT; + } else if (! strcasecmp (cp, "binary")) { + fx.decodetext = CE_BINARY; } else { adios (NULL, "invalid argument to %s", argp[-2]); } @@ -179,9 +194,22 @@ main (int argc, char **argv) { case NDECODETEXTSW: fx.decodetext = 0; continue; + case DECODETYPESW: + if (! (cp = *argp++) || *cp == '-') { + adios (NULL, "missing argument to %s", argp[-2]); + } + fx.decodetypes = cp; + continue; + case CRLFLINEBREAKSSW: + fx.lf_line_endings = 0; + continue; + case NCRLFLINEBREAKSSW: + fx.lf_line_endings = 1; + continue; case TEXTCHARSETSW: - if (! (cp = *argp++) || (*cp == '-' && cp[1])) + if (! (cp = *argp++) || (*cp == '-' && cp[1])) { adios (NULL, "missing argument to %s", argp[-2]); + } fx.textcharset = cp; continue; case NTEXTCHARSETSW: @@ -193,15 +221,16 @@ main (int argc, char **argv) { case NFIXBOUNDARYSW: fx.fixboundary = 0; continue; - case FIXCTESW: - fx.fixcte = 1; + case FIXCOMPOSITECTESW: + fx.fixcompositecte = 1; continue; - case NFIXCTESW: - fx.fixcte = 0; + case NFIXCOMPOSITECTESW: + fx.fixcompositecte = 0; continue; case FIXTYPESW: - if (! (cp = *argp++) || (*cp == '-' && cp[1])) + if (! (cp = *argp++) || (*cp == '-' && cp[1])) { adios (NULL, "missing argument to %s", argp[-2]); + } if (! strncasecmp (cp, "multipart/", 10) || ! strncasecmp (cp, "message/", 8)) { adios (NULL, "-fixtype %s not allowed", cp); @@ -224,18 +253,21 @@ main (int argc, char **argv) { fx.replacetextplain = 0; continue; case FILESW: - if (! (cp = *argp++) || (*cp == '-' && cp[1])) + if (! (cp = *argp++) || (*cp == '-' && cp[1])) { adios (NULL, "missing argument to %s", argp[-2]); - file = *cp == '-' ? add (cp, NULL) : path (cp, TFILE); + } + file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE); continue; case OUTFILESW: - if (! (cp = *argp++) || (*cp == '-' && cp[1])) + if (! (cp = *argp++) || (*cp == '-' && cp[1])) { adios (NULL, "missing argument to %s", argp[-2]); - outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE); + } + outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE); continue; case RPROCSW: - if (!(rmmproc = *argp++) || *rmmproc == '-') + if (!(rmmproc = *argp++) || *rmmproc == '-') { adios (NULL, "missing argument to %s", argp[-2]); + } continue; case NRPRCSW: rmmproc = NULL; @@ -255,14 +287,15 @@ main (int argc, char **argv) { } } if (*cp == '+' || *cp == '@') { - if (folder) + if (folder) { adios (NULL, "only one folder at a time!"); - else + } else { folder = pluspath (cp); + } } else { if (*cp == '/') { /* Interpret a full path as a filename, not a message. */ - file = add (cp, NULL); + file = mh_xstrdup (cp); } else { app_msgarg (&msgs, cp); } @@ -283,11 +316,23 @@ main (int argc, char **argv) { suppress_bogus_mp_content_warning = skip_mp_cte_check = 1; suppress_extraneous_trailing_semicolon_warning = 1; - if (! context_find ("path")) + if (! context_find ("path")) { free (path ("./", TFOLDER)); + } - if (file && msgs.size) + if (file && msgs.size) { adios (NULL, "cannot specify msg and file at same time!"); + } + + if (outfile) { + /* Open the outfile now, so we don't have to risk opening it + after running out of fds. */ + if (strcmp (outfile, "-") == 0) { + outfp = stdout; + } else if ((outfp = fopen (outfile, "w")) == NULL) { + adios (outfile, "unable to open for writing"); + } + } /* * check if message is coming from file @@ -310,7 +355,7 @@ main (int argc, char **argv) { get_temp_dir()); } else { free (file); - file = add (cp, NULL); + file = mh_xstrdup (cp); cpydata (STDIN_FILENO, fd, "-", file); } @@ -320,53 +365,117 @@ main (int argc, char **argv) { } } - if (! (cts = (CT *) mh_xcalloc ((size_t) 2, sizeof *cts))) { - adios (NULL, "out of memory"); - } + cts = mh_xcalloc(2, sizeof *cts); ctp = cts; - if ((ct = parse_mime (file))) { *ctp++ = ct; } + if ((ct = parse_mime (file))) { + set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings); + *ctp++ = ct; + } else { + advise (NULL, "unable to parse message from file %s", file); + status = NOTOK; + + /* If there's an outfile, pass the input message unchanged, so the + message won't get dropped from a pipeline. */ + if (outfile) { + /* Something went wrong. Output might be expected, such as if + this were run as a filter. Just copy the input to the + output. */ + if ((infp = fopen (file, "r")) == NULL) { + adios (file, "unable to open for reading"); + } + + if (copy_input_to_output (file, infp, outfile, outfp) != OK) { + advise (NULL, "unable to copy message to %s, " + "it might be lost\n", outfile); + } + + fclose (infp); + infp = NULL; + } + } } else { /* * message(s) are coming from a folder */ CT ct; - if (! msgs.size) + if (! msgs.size) { app_msgarg(&msgs, "cur"); - if (! folder) + } + if (! folder) { folder = getfolder (1); + } maildir = m_maildir (folder); - if (chdir (maildir) == NOTOK) + /* chdir so that error messages, esp. from MIME parser, just + refer to the message and not its path. */ + if (chdir (maildir) == NOTOK) { adios (maildir, "unable to change directory to"); + } /* read folder and create message structure */ - if (! (mp = folder_read (folder, 1))) + if (! (mp = folder_read (folder, 1))) { adios (NULL, "unable to read folder %s", folder); + } /* check for empty folder */ - if (mp->nummsg == 0) + if (mp->nummsg == 0) { adios (NULL, "no messages in %s", folder); + } /* parse all the message ranges/sequences and set SELECTED */ for (msgnum = 0; msgnum < msgs.size; msgnum++) - if (! m_convert (mp, msgs.msgs[msgnum])) + if (! m_convert (mp, msgs.msgs[msgnum])) { done (1); + } seq_setprev (mp); /* set the previous-sequence */ - if (! (cts = - (CT *) mh_xcalloc ((size_t) (mp->numsel + 1), sizeof *cts))) { - adios (NULL, "out of memory"); - } + cts = mh_xcalloc(mp->numsel + 1, sizeof *cts); ctp = cts; for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) { if (is_selected(mp, msgnum)) { - char *msgnam; + char *msgnam = m_name (msgnum); + + if ((ct = parse_mime (msgnam))) { + set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings); + *ctp++ = ct; + } else { + advise (NULL, "unable to parse message %s", msgnam); + status = NOTOK; + + /* If there's an outfile, pass the input message + unchanged, so the message won't get dropped from a + pipeline. */ + if (outfile) { + /* Something went wrong. Output might be expected, + such as if this were run as a filter. Just copy + the input to the output. */ + /* Can't use path() here because 1) it might have been + called before and it caches the pwd, and 2) we call + chdir() after that. */ + char *input_filename = + concat (maildir, "/", msgnam, NULL); + + if ((infp = fopen (input_filename, "r")) == NULL) { + adios (input_filename, + "unable to open for reading"); + } + + if (copy_input_to_output (input_filename, infp, + outfile, outfp) != OK) { + advise (NULL, + "unable to copy message to %s, " + "it might be lost\n", + outfile); + } - msgnam = m_name (msgnum); - if ((ct = parse_mime (msgnam))) { *ctp++ = ct; } + fclose (infp); + infp = NULL; + free (input_filename); + } + } } } @@ -380,7 +489,8 @@ main (int argc, char **argv) { if (*cts) { for (ctp = cts; *ctp; ++ctp) { - status += mhfixmsgsbr (ctp, &fx, outfile); + status += mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp); + free_content (*ctp); if (using_stdin) { (void) m_unlink (file); @@ -395,39 +505,59 @@ main (int argc, char **argv) { status = 1; } + free (cts); + if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); } + if (infp) { fclose (infp); } /* even if stdin */ + if (outfp) { fclose (outfp); } /* even if stdout */ free (outfile); free (file); + free (folder); + free (arguments); - /* done is freects_done, which will clean up all of cts. */ done (status); return NOTOK; } +/* + * Apply transformations to one message. + */ int -mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { +mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx, + FILE **infp, char *outfile, FILE **outfp) { /* Store input filename in case one of the transformations, i.e., fix_boundary(), rewrites to a tmp file. */ - char *input_filename = add ((*ctp)->c_file, NULL); + char *input_filename = maildir + ? concat (maildir, "/", (*ctp)->c_file, NULL) + : mh_xstrdup ((*ctp)->c_file); int modify_inplace = 0; int message_mods = 0; int status = OK; + /* Though the input file won't need to be opened if everything goes + well, do it here just in case there's a failure, and that failure is + running out of file descriptors. */ + if ((*infp = fopen (input_filename, "r")) == NULL) { + adios (input_filename, "unable to open for reading"); + } + if (outfile == NULL) { modify_inplace = 1; if ((*ctp)->c_file) { char *tempfile; - if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) { + /* outfp will be closed by the caller */ + if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) == + NULL) { adios (NULL, "unable to create temporary file in %s", get_temp_dir()); } - outfile = add (tempfile, NULL); + outfile = mh_xstrdup (tempfile); } else { adios (NULL, "missing both input and output filenames\n"); } - } + } /* else *outfp was defined by caller */ reverse_alternative_parts (*ctp); status = fix_always (*ctp, &message_mods); @@ -437,15 +567,17 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { if (status == OK && fx->fixtypes != NULL) { status = fix_types (*ctp, fx->fixtypes, &message_mods); } - if (status == OK && fx->fixcte) { - status = fix_multipart_cte (*ctp, &message_mods); + if (status == OK && fx->fixcompositecte) { + status = fix_composite_cte (*ctp, &message_mods); } if (status == OK && fx->reformat) { status = ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain); } if (status == OK && fx->decodetext) { - status = decode_text_parts (*ctp, fx->decodetext, &message_mods); + status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes, + &message_mods); + update_cte (*ctp); } if (status == OK && fx->textcharset != NULL) { status = convert_charsets (*ctp, fx->textcharset, &message_mods); @@ -467,25 +599,17 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { * Write the content to a file */ if (status == OK) { - status = write_content (*ctp, input_filename, outfile, modify_inplace, - message_mods); + status = write_content (*ctp, input_filename, outfile, *outfp, + modify_inplace, message_mods); } else if (! modify_inplace) { /* Something went wrong. Output might be expected, such as if this were run as a filter. Just copy the input to the output. */ - int in = open (input_filename, O_RDONLY); - int out = strcmp (outfile, "-") - ? open (outfile, O_WRONLY | O_CREAT, m_gmprot ()) - : STDOUT_FILENO; - - if (in != -1 && out != -1) { - cpydata (in, out, input_filename, outfile); - } else { - status = NOTOK; + if (copy_input_to_output (input_filename, *infp, outfile, + *outfp) != OK) { + advise (NULL, "unable to copy message to %s, it might be lost\n", + outfile); } - - close (out); - close (in); } if (modify_inplace) { @@ -494,12 +618,38 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { outfile = NULL; } + fclose (*infp); + *infp = NULL; free (input_filename); return status; } +/* + * Copy input message to output. Assumes not modifying in place, so this + * might be running as part of a pipeline. + */ +static int +copy_input_to_output (const char *input_filename, FILE *infp, + const char *output_filename, FILE *outfp) { + int in = fileno (infp); + int out = fileno (outfp); + int status = OK; + + if (in != -1 && out != -1) { + cpydata (in, out, input_filename, output_filename); + } else { + status = NOTOK; + } + + return status; +} + + +/* + * Fix mismatched outer level boundary. + */ static int fix_boundary (CT *ct, int *message_mods) { struct multipart *mp; @@ -526,7 +676,7 @@ fix_boundary (CT *ct, int *message_mods) { if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) { if (replace_boundary (*ct, fixed, part_boundary) == OK) { - char *filename = add ((*ct)->c_file, NULL); + char *filename = mh_xstrdup ((*ct)->c_file); CT fixed_ct; free_content (*ct); @@ -556,7 +706,16 @@ fix_boundary (CT *ct, int *message_mods) { } free (part_boundary); + } else { + /* Couldn't fix the boundary. Report failure so that mhfixmsg + doesn't modify the message. */ + status = NOTOK; } + } else { + /* No multipart struct, even though the content type is + CT_MULTIPART. Report failure so that mhfixmsg doesn't modify + the message. */ + status = NOTOK; } } @@ -564,9 +723,12 @@ fix_boundary (CT *ct, int *message_mods) { } +/* + * Find boundary at end of multipart. + */ static int get_multipart_boundary (CT ct, char **part_boundary) { - char buffer[BUFSIZ]; + char buffer[NMH_BUFSIZ]; char *end_boundary = NULL; off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer) ? (off_t) (ct->c_end - sizeof buffer) @@ -574,10 +736,10 @@ get_multipart_boundary (CT ct, char **part_boundary) { size_t bytes_read; int status = OK; - /* This will fail if the boundary spans fread() calls. BUFSIZ should + /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should be big enough, even if it's just 1024, to make that unlikely. */ - /* free_content() will close ct->c_fp. */ + /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */ if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) { advise (ct->c_file, "unable to open for reading"); return NOTOK; @@ -597,7 +759,7 @@ get_multipart_boundary (CT ct, char **part_boundary) { if ((end = rfind_str (buffer, cp - buffer, "\n"))) { if (strlen (end) > 3 && *end++ == '\n' && *end++ == '-' && *end++ == '-') { - end_boundary = add (end, NULL); + end_boundary = mh_xstrdup (end); break; } } @@ -634,6 +796,11 @@ get_multipart_boundary (CT ct, char **part_boundary) { status = NOTOK; } + if (ct->c_fp) { + fclose (ct->c_fp); + ct->c_fp = NULL; + } + if (status == OK) { *part_boundary = end_boundary; } else { @@ -645,12 +812,14 @@ get_multipart_boundary (CT ct, char **part_boundary) { } -/* Open and copy ct->c_file to file, replacing the multipart boundary. */ +/* + * Open and copy ct->c_file to file, replacing the multipart boundary. + */ static int replace_boundary (CT ct, char *file, char *boundary) { FILE *fpin, *fpout; int compnum, state; - char buf[BUFSIZ], name[NAMESZ]; + char buf[NMH_BUFSIZ], name[NAMESZ]; char *np, *vp; m_getfld_state_t gstate = 0; int status = OK; @@ -680,8 +849,8 @@ replace_boundary (CT ct, char *file, char *boundary) { compnum++; /* get copies of the buffers */ - np = add (name, NULL); - vp = add (buf, NULL); + np = mh_xstrdup (name); + vp = mh_xstrdup (buf); /* if necessary, get rest of field */ while (state == FLDPLUS) { @@ -707,8 +876,7 @@ replace_boundary (CT ct, char *file, char *boundary) { fprintf (fpout, "%s:%s%s\n", np, new_ctline, new_params ? new_params : ""); free(new_ctline); - if (new_params) - free(new_params); + mh_xfree(new_params); } free (vp); @@ -717,7 +885,7 @@ replace_boundary (CT ct, char *file, char *boundary) { continue; case BODY: - fputs ("\n", fpout); + putc('\n', fpout); /* buf will have a terminating NULL, skip it. */ if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) { advise (file, "fwrite"); @@ -750,6 +918,9 @@ replace_boundary (CT ct, char *file, char *boundary) { } +/* + * Fix Content-Type header to reflect the content of its part. + */ static int fix_types (CT ct, svector_t fixtypes, int *message_mods) { int status = OK; @@ -808,10 +979,10 @@ fix_types (CT ct, svector_t fixtypes, int *message_mods) { * c_ctline */ /* Extract type and subtype from type/subtype. */ - ct_type = getcpy (ct_type_subtype); + ct_type = mh_xstrdup(ct_type_subtype); if ((cp = strchr (ct_type, '/'))) { *cp = '\0'; - ct_subtype = getcpy (++cp); + ct_subtype = mh_xstrdup(++cp); } else { advise (NULL, "missing / in MIME type of %s %s", ct->c_file, ct->c_partno); @@ -862,6 +1033,10 @@ fix_types (CT ct, svector_t fixtypes, int *message_mods) { return status; } + +/* + * Replace a substring, allocating space to hold the new one. + */ char * replace_substring (char **str, const char *old, const char *new) { char *cp; @@ -871,7 +1046,7 @@ replace_substring (char **str, const char *old, const char *new) { char *prefix, *new_str; if (cp - *str) { - prefix = getcpy (*str); + prefix = mh_xstrdup(*str); *(prefix + (cp - *str)) = '\0'; new_str = concat (prefix, new, remainder, NULL); free (prefix); @@ -882,11 +1057,12 @@ replace_substring (char **str, const char *old, const char *new) { free (*str); return *str = new_str; - } else { - return NULL; } + + return NULL; } + /* * Remove a name=value parameter, given just its name, from a header value. */ @@ -933,14 +1109,17 @@ remove_parameter (char *str, const char *name) { return str; } + +/* + * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part. + * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to + * 8 bit. + */ static int -fix_multipart_cte (CT ct, int *message_mods) { +fix_composite_cte (CT ct, int *message_mods) { int status = OK; - if (ct->c_type == CT_MULTIPART) { - struct multipart *m; - struct part *part; - + if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) { if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT && ct->c_encoding != CE_BINARY) { HF hf; @@ -954,9 +1133,10 @@ fix_multipart_cte (CT ct, int *message_mods) { if (! strncasecmp (name, ENCODING_FIELD, strlen (ENCODING_FIELD))) { char *prefix = "Nmh-REPLACED-INVALID-"; - HF h = mh_xmalloc (sizeof *h); + HF h; - h->name = add (hf->name, NULL); + NEW(h); + h->name = mh_xstrdup (hf->name); h->hf_encoding = hf->hf_encoding; h->next = hf->next; hf->next = h; @@ -974,7 +1154,7 @@ fix_multipart_cte (CT ct, int *message_mods) { free (encoding); } - h->value = add (" 8bit\n", NULL); + h->value = mh_xstrdup (" 8bit\n"); /* Don't need to warn for multiple C-T-E header fields, parse_mime() already does that. But @@ -986,11 +1166,16 @@ fix_multipart_cte (CT ct, int *message_mods) { set_ce (ct, CE_8BIT); } - m = (struct multipart *) ct->c_ctparams; - for (part = m->mp_parts; part; part = part->mp_next) { - if (fix_multipart_cte (part->mp_part, message_mods) != OK) { - status = NOTOK; - break; + if (ct->c_type == CT_MULTIPART) { + struct multipart *m; + struct part *part; + + m = (struct multipart *) ct->c_ctparams; + for (part = m->mp_parts; part; part = part->mp_next) { + if (fix_composite_cte (part->mp_part, message_mods) != OK) { + status = NOTOK; + break; + } } } } @@ -999,6 +1184,9 @@ fix_multipart_cte (CT ct, int *message_mods) { } +/* + * Set content encoding. + */ static int set_ce (CT ct, int encoding) { const char *ce = ce_str (encoding); @@ -1023,6 +1211,10 @@ set_ce (CT ct, int encoding) { ct->c_cefile.ce_file to the name of the file containing the contents. */ + if (ct->c_ceclosefnx) { + (*ct->c_ceclosefnx) (ct); + } + /* Restore the cefile. */ ct->c_cefile = decoded_content_info; @@ -1035,21 +1227,23 @@ set_ce (CT ct, int encoding) { } } if (! found_cte) { - add_header (ct, add (ENCODING_FIELD, NULL), cte); + add_header (ct, mh_xstrdup (ENCODING_FIELD), cte); } /* Update c_celine. It's used only by mhlist -debug. */ free (ct->c_celine); - ct->c_celine = add (cte, NULL); + ct->c_celine = mh_xstrdup (cte); return OK; - } else { - return NOTOK; } + + return NOTOK; } -/* Make sure each text part has a corresponding text/plain part. */ +/* + * Make sure each text part has a corresponding text/plain part. + */ static int ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) { int status = OK; @@ -1138,7 +1332,8 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) { HF hf; parent->c_subtype = MULTI_ALTERNATE; - parent->c_ctinfo.ci_subtype = getcpy ("alternative"); + free (parent->c_ctinfo.ci_subtype); + parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative"); if (! replace_substring (&parent->c_ctline, "/related", "/alternative")) { advise (NULL, @@ -1209,7 +1404,9 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) { } -/* See if there is a sibling text/plain. */ +/* + * See if there is a sibling text/plain, and return its subpart number. + */ static int find_textplain_sibling (CT parent, int replacetextplain, int *new_subpart_number) { @@ -1249,11 +1446,15 @@ find_textplain_sibling (CT parent, int replacetextplain, } +/* + * Insert a new text/plain part. + */ static int insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) { struct multipart *mp = (struct multipart *) parent->c_ctparams; - struct part *new_part = mh_xmalloc (sizeof *new_part); + struct part *new_part; + NEW(new_part); if ((new_part->mp_part = build_text_plain_part (ct))) { char buffer[16]; snprintf (buffer, sizeof buffer, "%d", new_subpart_number); @@ -1265,15 +1466,18 @@ insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) { buffer, NULL); return 1; - } else { - free_content (new_part->mp_part); - free (new_part); - - return 0; } + + free_content (new_part->mp_part); + free (new_part); + + return 0; } +/* + * Create a text/plain part to go along with non-plain sibling part. + */ static CT build_text_plain_part (CT encoded_part) { CT tp_part = divide_part (encoded_part); @@ -1286,11 +1490,12 @@ build_text_plain_part (CT encoded_part) { be unlinked by free_content (). */ char *tempfile; + /* This m_mktemp2() call closes the temp file. */ if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) { advise (NULL, "unable to create temporary file in %s", get_temp_dir()); } else { - tmp_plain_file = add (tempfile, NULL); + tmp_plain_file = mh_xstrdup (tempfile); if (reformat_part (tp_part, tmp_plain_file, tp_part->c_ctinfo.ci_type, tp_part->c_ctinfo.ci_subtype, @@ -1308,7 +1513,9 @@ build_text_plain_part (CT encoded_part) { } -/* Slip new text/plain part into a new multipart/alternative. */ +/* + * Slip new text/plain part into a new multipart/alternative. + */ static int insert_into_new_mp_alt (CT *ct, int *message_mods) { CT tp_part = build_text_plain_part (*ct); @@ -1318,8 +1525,7 @@ insert_into_new_mp_alt (CT *ct, int *message_mods) { CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART, MULTI_ALTERNATE); if (mp_alt) { - struct multipart *mp = - (struct multipart *) mp_alt->c_ctparams; + struct multipart *mp = (struct multipart *) mp_alt->c_ctparams; if (mp && mp->mp_parts) { mp->mp_parts->mp_part = tp_part; @@ -1340,22 +1546,24 @@ insert_into_new_mp_alt (CT *ct, int *message_mods) { status = NOTOK; } } else { - status = NOTOK; + /* Not an error if text/plain couldn't be built. */ } return status; } + +/* + * Clone a MIME part. + */ static CT divide_part (CT ct) { CT new_part; - if ((new_part = (CT) mh_xcalloc (1, sizeof *new_part)) == NULL) - adios (NULL, "out of memory"); - + NEW0(new_part); /* Just copy over what is needed for decoding. c_vrsn and c_celine aren't necessary. */ - new_part->c_file = add (ct->c_file, NULL); + new_part->c_file = mh_xstrdup (ct->c_file); new_part->c_begin = ct->c_begin; new_part->c_end = ct->c_end; copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo); @@ -1369,59 +1577,75 @@ divide_part (CT ct) { /* c_ctline is used by reformat__part(), so it can preserve anything after the type/subtype. */ - new_part->c_ctline = add (ct->c_ctline, NULL); + new_part->c_ctline = mh_xstrdup (ct->c_ctline); return new_part; } +/* + * Copy the content info from one part to another. + */ static void copy_ctinfo (CI dest, CI src) { PM s_pm, d_pm; - dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL; - dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL; + dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL; + dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL; for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) { d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name, s_pm->pm_value, 0); - if (s_pm->pm_charset) - d_pm->pm_charset = getcpy(s_pm->pm_charset); - if (s_pm->pm_lang) - d_pm->pm_lang = getcpy(s_pm->pm_lang); + if (s_pm->pm_charset) { + d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset); + } + if (s_pm->pm_lang) { + d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang); + } } - dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL; - dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL; + dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL; + dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL; } +/* + * Decode content. + */ static int decode_part (CT ct) { char *tmp_decoded; int status; + FILE *file; char *tempfile; - if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) { + if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) { adios (NULL, "unable to create temporary file in %s", get_temp_dir()); } - tmp_decoded = add (tempfile, NULL); + tmp_decoded = mh_xstrdup (tempfile); /* The following call will load ct->c_cefile.ce_file with the tmp filename of the decoded content. tmp_decoded will contain the encoded output, get rid of that. */ - status = output_message (ct, tmp_decoded); + status = output_message_fp (ct, file, tmp_decoded); (void) m_unlink (tmp_decoded); free (tmp_decoded); + if (fclose (file)) { + admonish (NULL, "unable to close temporary file %s", tempfile); + } return status; } -/* Some of the arguments aren't really needed now, but maybe will - be in the future for other than text types. */ +/* + * Reformat content as plain text. + * Some of the arguments aren't really needed now, but maybe will + * be in the future for other than text types. + */ static int reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { int output_subtype, output_encoding; + const char *reason = NULL; char *cp, *cf; int status; @@ -1438,14 +1662,13 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { ct->c_file, invo_name, type, subtype); } return NOTOK; - } else { - if (strchr (cf, '>')) { - advise (NULL, "'>' prohibited in \"%s\",\nplease fix your " - "%s-format-%s/%s profile entry", cf, invo_name, type, - subtype ? subtype : ""); + } + if (strchr (cf, '>')) { + advise (NULL, "'>' prohibited in \"%s\",\nplease fix your " + "%s-format-%s/%s profile entry", cf, invo_name, type, + subtype ? subtype : ""); - return NOTOK; - } + return NOTOK; } cp = concat (cf, " >", file, NULL); @@ -1467,9 +1690,10 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { /* Set subtype to 0, which is always an UNKNOWN subtype. */ output_subtype = 0; } - output_encoding = charset_encoding (ct); - if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) { + output_encoding = content_encoding (ct, &reason); + if (status == OK && + set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) { ct->c_cefile.ce_file = file; ct->c_cefile.ce_unlink = 1; } else { @@ -1481,18 +1705,9 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { } -/* Identifies 7bit or 8bit content based on charset. */ -static int -charset_encoding (CT ct) { - char *ct_charset = content_charset (ct); - int encoding = strcasecmp (ct_charset, "US-ASCII") ? CE_8BIT : CE_7BIT; - - free (ct_charset); - - return encoding; -} - - +/* + * Fill in a multipart/alternative part. + */ static CT build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { char *boundary_prefix = "----=_nmh-multipart"; @@ -1504,8 +1719,7 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { struct multipart *m; const struct str2init *ctinit; - if ((ct = (CT) mh_xcalloc (1, sizeof *ct)) == NULL) - adios (NULL, "out of memory"); + NEW0(ct); /* Set up the multipart/alternative part. These fields of *ct were initialized to 0 by mh_xcalloc(): @@ -1520,7 +1734,7 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { c_showproc, c_termproc, c_storeproc, c_storage, c_folder */ - ct->c_file = add (first_alt->c_file, NULL); + ct->c_file = mh_xstrdup (first_alt->c_file); ct->c_type = type; ct->c_subtype = subtype; @@ -1542,8 +1756,8 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { if ((found_boundary = boundary_in_content (&new_part->c_cefile.ce_fp, new_part->c_cefile.ce_file, - boundary)) == -1) { - free (ct); + boundary)) == NOTOK) { + free_content (ct); return NULL; } } @@ -1551,10 +1765,11 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { /* Ensure that the boundary doesn't appear in the encoded content. */ if (! found_boundary && new_part->c_file) { - if ((found_boundary = boundary_in_content (&new_part->c_fp, - new_part->c_file, - boundary)) == -1) { - free (ct); + if ((found_boundary = + boundary_in_content (&new_part->c_fp, + new_part->c_file, + boundary)) == NOTOK) { + free_content (ct); return NULL; } } @@ -1575,7 +1790,7 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { if (found_boundary) { advise (NULL, "giving up trying to find a unique boundary"); - free (ct); + free_content (ct); return NULL; } } @@ -1585,36 +1800,34 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { /* Load c_first_hf and c_last_hf. */ transfer_noncontent_headers (first_alt, ct); - add_header (ct, add (TYPE_FIELD, NULL), concat (name, "\n", NULL)); + add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL)); free (name); /* Load c_partno. */ if (first_alt->c_partno) { - ct->c_partno = add (first_alt->c_partno, NULL); + ct->c_partno = mh_xstrdup (first_alt->c_partno); free (first_alt->c_partno); first_alt->c_partno = concat (ct->c_partno, ".1", NULL); new_part->c_partno = concat (ct->c_partno, ".2", NULL); } else { - first_alt->c_partno = add ("1", NULL); - new_part->c_partno = add ("2", NULL); + first_alt->c_partno = mh_xstrdup ("1"); + new_part->c_partno = mh_xstrdup ("2"); } if (ctinit) { - ct->c_ctinfo.ci_type = add (typename, NULL); - ct->c_ctinfo.ci_subtype = add (subtypename, NULL); + ct->c_ctinfo.ci_type = mh_xstrdup (typename); + ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename); } add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm, "boundary", boundary, 0); - p = (struct part *) mh_xmalloc (sizeof *p); - p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next); + NEW(p); + NEW(p->mp_next); p->mp_next->mp_next = NULL; p->mp_next->mp_part = first_alt; - if ((m = (struct multipart *) mh_xcalloc (1, sizeof (struct multipart))) == - NULL) - adios (NULL, "out of memory"); + NEW0(m); m->mp_start = concat (boundary, "\n", NULL); m->mp_stop = concat (boundary, "--\n", NULL); m->mp_parts = p; @@ -1626,10 +1839,12 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { } -/* Check that the boundary does not appear in the content. */ +/* + * Check that the boundary does not appear in the content. + */ static int boundary_in_content (FILE **fp, char *file, const char *boundary) { - char buffer[BUFSIZ]; + char buffer[NMH_BUFSIZ]; size_t bytes_read; int found_boundary = 0; @@ -1651,7 +1866,9 @@ boundary_in_content (FILE **fp, char *file, const char *boundary) { } -/* Remove all non-Content headers. */ +/* + * Remove all non-Content headers. + */ static void transfer_noncontent_headers (CT old, CT new) { HF hp, hp_prev; @@ -1693,6 +1910,9 @@ transfer_noncontent_headers (CT old, CT new) { } +/* + * Set content type. + */ static int set_ct_type (CT ct, int type, int subtype, int encoding) { char *typename = ct_type_str (type); @@ -1714,14 +1934,14 @@ set_ct_type (CT ct, int type, int subtype, int encoding) { free (hf->value); hf->value = (cp = strchr (ct->c_ctline, ';')) ? concat (type_subtypename, cp, "\n", NULL) - : add (name_plus_nl, NULL); + : mh_xstrdup (name_plus_nl); } } if (! found_content_type) { - add_header (ct, add (TYPE_FIELD, NULL), + add_header (ct, mh_xstrdup (TYPE_FIELD), (cp = strchr (ct->c_ctline, ';')) ? concat (type_subtypename, cp, "\n", NULL) - : add (name_plus_nl, NULL)); + : mh_xstrdup (name_plus_nl)); } /* Some of these might not be used, but set them anyway. */ @@ -1732,9 +1952,9 @@ set_ct_type (CT ct, int type, int subtype, int encoding) { ct->c_ctline = ctline; /* Leave other ctinfo members as they were. */ free (ct->c_ctinfo.ci_type); - ct->c_ctinfo.ci_type = add (typename, NULL); + ct->c_ctinfo.ci_type = mh_xstrdup (typename); free (ct->c_ctinfo.ci_subtype); - ct->c_ctinfo.ci_subtype = add (subtypename, NULL); + ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename); ct->c_type = type; ct->c_subtype = subtype; @@ -1747,12 +1967,49 @@ set_ct_type (CT ct, int type, int subtype, int encoding) { } +/* + * It's not necessary to update the charset parameter of a Content-Type + * header for a text part. According to RFC 2045 Sec. 6.4, the body + * (content) was originally in the specified charset, "and will be in + * that character set again after decoding." + */ static int -decode_text_parts (CT ct, int encoding, int *message_mods) { +decode_text_parts (CT ct, int encoding, const char *decodetypes, + int *message_mods) { int status = OK; + int lf_line_endings = 0; switch (ct->c_type) { - case CT_TEXT: + case CT_MULTIPART: { + struct multipart *m = (struct multipart *) ct->c_ctparams; + struct part *part; + + /* Should check to see if the body for this part is encoded? + For now, it gets passed along as-is by InitMultiPart(). */ + for (part = m->mp_parts; status == OK && part; part = part->mp_next) { + status = decode_text_parts (part->mp_part, encoding, decodetypes, + message_mods); + } + break; + } + + case CT_MESSAGE: + if (ct->c_subtype == MESSAGE_EXTERNAL) { + struct exbody *e = (struct exbody *) ct->c_ctparams; + + status = decode_text_parts (e->eb_content, encoding, decodetypes, + message_mods); + } + break; + + default: + if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) { + break; + } + + lf_line_endings = + ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings; + switch (ct->c_encoding) { case CE_BASE64: case CE_QUOTED: { @@ -1792,19 +2049,23 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { ct->c_cefile.ce_file = NULL; } else { int enc; - if (ct_encoding == CE_BINARY) + + if (ct_encoding == CE_BINARY) { enc = CE_BINARY; - else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) + } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) { enc = CE_QUOTED; - else - enc = charset_encoding (ct); + } else { + enc = ct_encoding; + } if (set_ce (ct, enc) == OK) { ++*message_mods; if (verbosw) { report (NULL, ct->c_partno, ct->c_file, "decode%s", ct->c_ctline ? ct->c_ctline : ""); } - strip_crs (ct, message_mods); + if (lf_line_endings) { + strip_crs (ct, message_mods); + } } else { status = NOTOK; } @@ -1816,46 +2077,60 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { } case CE_8BIT: case CE_7BIT: - strip_crs (ct, message_mods); + if (lf_line_endings) { + strip_crs (ct, message_mods); + } break; default: break; } break; - - case CT_MULTIPART: { - struct multipart *m = (struct multipart *) ct->c_ctparams; - struct part *part; - - /* Should check to see if the body for this part is encoded? - For now, it gets passed along as-is by InitMultiPart(). */ - for (part = m->mp_parts; status == OK && part; part = part->mp_next) { - status = decode_text_parts (part->mp_part, encoding, message_mods); - } - break; } - case CT_MESSAGE: - if (ct->c_subtype == MESSAGE_EXTERNAL) { - struct exbody *e = (struct exbody *) ct->c_ctparams; + return status; +} - status = decode_text_parts (e->eb_content, encoding, message_mods); - } - break; - default: - break; +/* + * Determine if the part with type[/subtype] should be decoded, according to + * decodetypes (which came from the -decodetypes switch). + */ +static int +should_decode(const char *decodetypes, const char *type, const char *subtype) { + /* Quick search for matching type[/subtype] in decodetypes: bracket + decodetypes with commas, then search for ,type, and ,type/subtype, in + it. */ + + int found_match = 0; + char *delimited_decodetypes = concat(",", decodetypes, ",", NULL); + char *delimited_type = concat(",", type, ",", NULL); + + if (nmh_strcasestr(delimited_decodetypes, delimited_type)) { + found_match = 1; + } else if (subtype != NULL) { + char *delimited_type_subtype = + concat(",", type, "/", subtype, ",", NULL); + + if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) { + found_match = 1; + } + free(delimited_type_subtype); } - return status; + free(delimited_type); + free(delimited_decodetypes); + + return found_match; } -/* See if the decoded content is 7bit, 8bit, or binary. It's binary - if it has any NUL characters, a CR not followed by a LF, or lines - greater than 998 characters in length. If binary, reason is set - to a string explaining why. */ +/* + * See if the decoded content is 7bit, 8bit, or binary. It's binary + * if it has any NUL characters, a CR not followed by a LF, or lines + * greater than 998 characters in length. If binary, reason is set + * to a string explaining why. + */ static int content_encoding (CT ct, const char **reason) { CE ce = &ct->c_cefile; @@ -1863,7 +2138,7 @@ content_encoding (CT ct, const char **reason) { if (ce->ce_file) { size_t line_len = 0; - char buffer[BUFSIZ]; + char buffer[NMH_BUFSIZ]; size_t inbytes; if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) { @@ -1911,6 +2186,9 @@ content_encoding (CT ct, const char **reason) { } +/* + * Strip carriage returns from content. + */ static int strip_crs (CT ct, int *message_mods) { char *charset = content_charset (ct); @@ -1955,7 +2233,7 @@ strip_crs (CT ct, int *message_mods) { } if (fp && *fp) { - char buffer[BUFSIZ]; + char buffer[NMH_BUFSIZ]; size_t bytes_read; size_t bytes_to_read = end > 0 && end > begin ? end - begin : sizeof buffer; @@ -1992,7 +2270,7 @@ strip_crs (CT ct, int *message_mods) { adios (NULL, "unable to create temporary file in %s", get_temp_dir()); } - stripped_content_file = add (tempfile, NULL); + stripped_content_file = mh_xstrdup (tempfile); /* Strip each CR before a LF from the content. */ fseeko (*fp, begin, SEEK_SET); @@ -2031,12 +2309,10 @@ strip_crs (CT ct, int *message_mods) { status = NOTOK; } else { /* Replace the decoded file with the converted one. */ - if (ct->c_cefile.ce_file) { - if (ct->c_cefile.ce_unlink) { - (void) m_unlink (ct->c_cefile.ce_file); - } - free (ct->c_cefile.ce_file); - } + if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink) + (void) m_unlink (ct->c_cefile.ce_file); + + mh_xfree(ct->c_cefile.ce_file); ct->c_cefile.ce_file = stripped_content_file; ct->c_cefile.ce_unlink = 1; @@ -2062,6 +2338,106 @@ strip_crs (CT ct, int *message_mods) { } +/* + * Add/update, if necessary, the message C-T-E, based on the least restrictive + * of the part C-T-E's. + */ +static void +update_cte (CT ct) { + const int least_restrictive_enc = least_restrictive_encoding (ct); + + if (least_restrictive_enc != CE_UNKNOWN && + least_restrictive_enc != CE_7BIT) { + char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL); + HF hf; + int found_cte = 0; + + /* Update/add Content-Transfer-Encoding header field. */ + for (hf = ct->c_first_hf; hf; hf = hf->next) { + if (! strcasecmp (ENCODING_FIELD, hf->name)) { + found_cte = 1; + free (hf->value); + hf->value = cte; + } + } + if (! found_cte) { + add_header (ct, mh_xstrdup (ENCODING_FIELD), cte); + } + } +} + + +/* + * Find the least restrictive encoding (7bit, 8bit, binary) of the parts + * within a message. + */ +static int +least_restrictive_encoding (CT ct) { + int encoding = CE_UNKNOWN; + + switch (ct->c_type) { + case CT_MULTIPART: { + struct multipart *m = (struct multipart *) ct->c_ctparams; + struct part *part; + + for (part = m->mp_parts; part; part = part->mp_next) { + const int part_encoding = + least_restrictive_encoding (part->mp_part); + + if (less_restrictive (encoding, part_encoding)) { + encoding = part_encoding; + } + } + break; + } + + case CT_MESSAGE: + if (ct->c_subtype == MESSAGE_EXTERNAL) { + struct exbody *e = (struct exbody *) ct->c_ctparams; + const int part_encoding = + least_restrictive_encoding (e->eb_content); + + if (less_restrictive (encoding, part_encoding)) { + encoding = part_encoding; + } + } + break; + + default: { + if (less_restrictive (encoding, ct->c_encoding)) { + encoding = ct->c_encoding; + } + }} + + return encoding; +} + + +/* + * Return whether the second encoding is less restrictive than the first, where + * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So, + * CE_BINARY is less restrictive than CE_8BIT and + * CE_8BIT is less restrictive than CE_7BIT. + */ +static int +less_restrictive (int encoding, int second_encoding) { + switch (second_encoding) { + case CE_BINARY: + return encoding != CE_BINARY; + case CE_8BIT: + return encoding != CE_BINARY && encoding != CE_8BIT; + case CE_7BIT: + return encoding != CE_BINARY && encoding != CE_8BIT && + encoding != CE_7BIT; + default : + return 0; + } +} + + +/* + * Convert character set of each part. + */ static int convert_charsets (CT ct, char *dest_charset, int *message_mods) { int status = OK; @@ -2121,8 +2497,11 @@ convert_charsets (CT ct, char *dest_charset, int *message_mods) { /* * Fix various problems that aren't handled elsewhere. These * are fixed unconditionally: there are no switches to disable - * them. (Currently, "problems" is just one: an extraneous - * semicolon at the end of a header parameter list.) + * them. Currently, "problems" are these: + * 1) remove extraneous semicolon at the end of a header parameter list + * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and + * filename parameters in Content-Type and Content-Disposition + * headers, respectively. */ static int fix_always (CT ct, int *message_mods) { @@ -2150,6 +2529,10 @@ fix_always (CT ct, int *message_mods) { default: { HF hf; + if (ct->c_first_hf) { + fix_filename_encoding (ct); + } + for (hf = ct->c_first_hf; hf; hf = hf->next) { size_t len = strlen (hf->value); @@ -2199,17 +2582,111 @@ fix_always (CT ct, int *message_mods) { } +/* + * Factor out common code for loops in fix_filename_encoding(). + */ +static int +fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) { + int fixed = 0; + + if (HasPrefix(value, "=?") && HasSuffix(value, "?=")) { + /* Looks like an RFC 2047 encoded parameter. */ + char decoded[PATH_MAX + 1]; + + if (decode_rfc2047 (value, decoded, sizeof decoded)) { + /* Encode using RFC 2231. */ + replace_param (first_pm, last_pm, name, decoded, 0); + fixed = 1; + } else { + advise (NULL, "failed to decode %s parameter %s", name, value); + } + } + + return fixed; +} + + +/* + * Replace RFC 2047 encoding with RFC 2231 encoding of name and + * filename parameters in Content-Type and Content-Disposition + * headers, respectively. + */ static int -write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, - int message_mods) { +fix_filename_encoding (CT ct) { + PM pm; + HF hf; + int fixed = 0; + + for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) { + if (pm->pm_name && pm->pm_value && + strcasecmp (pm->pm_name, "name") == 0) { + fixed = fix_filename_param (pm->pm_name, pm->pm_value, + &ct->c_ctinfo.ci_first_pm, + &ct->c_ctinfo.ci_last_pm); + } + } + + for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) { + if (pm->pm_name && pm->pm_value && + strcasecmp (pm->pm_name, "filename") == 0) { + fixed = fix_filename_param (pm->pm_name, pm->pm_value, + &ct->c_dispo_first, + &ct->c_dispo_last); + } + } + + /* Fix hf values to correspond. */ + for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) { + enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER; + + if (strcasecmp (hf->name, TYPE_FIELD) == 0) { + field = TYPE_HEADER; + } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) { + field = DISPO_HEADER; + } + + if (field != OTHER) { + const char *const semicolon_loc = strchr (hf->value, ';'); + + if (semicolon_loc) { + const size_t len = + strlen (hf->name) + 1 + semicolon_loc - hf->value; + const char *const params = + output_params (len, + field == TYPE_HEADER + ? ct->c_ctinfo.ci_first_pm + : ct->c_dispo_first, + NULL, 0); + const char *const new_params = concat (params, "\n", NULL); + + replace_substring (&hf->value, semicolon_loc, new_params); + free((void *)new_params); /* Cast away const. Sigh. */ + free((void *)params); + } else { + advise (NULL, "did not find semicolon in %s:%s\n", + hf->name, hf->value); + } + } + } + + return OK; +} + + +/* + * Output content in input file to output file. + */ +static int +write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp, + int modify_inplace, int message_mods) { int status = OK; if (modify_inplace) { if (message_mods > 0) { - if ((status = output_message (ct, outfile)) == OK) { + if ((status = output_message_fp (ct, outfp, outfile)) == OK) { char *infile = input_filename - ? add (input_filename, NULL) - : add (ct->c_file ? ct->c_file : "-", NULL); + ? mh_xstrdup (input_filename) + : mh_xstrdup (ct->c_file ? ct->c_file : "-"); if (remove_file (infile) == OK) { if (rename (outfile, infile)) { @@ -2222,7 +2699,7 @@ write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, int i = -1; if (old != -1 && new != -1) { - char buffer[BUFSIZ]; + char buffer[NMH_BUFSIZ]; while ((i = read (old, buffer, sizeof buffer)) > 0) { @@ -2265,7 +2742,7 @@ write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, } else { /* Output is going to some file. Produce it whether or not there were modifications. */ - status = output_message (ct, outfile); + status = output_message_fp (ct, outfp, outfile); } flush_errors (); @@ -2273,27 +2750,65 @@ write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, } +/* + * parse_mime() does not set lf_line_endings in struct text, so use this + * function to do it. It touches the parts the decodetypes identifies. + */ +static void +set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) { + switch (ct->c_type) { + case CT_MULTIPART: { + struct multipart *m = (struct multipart *) ct->c_ctparams; + struct part *part; + + for (part = m->mp_parts; part; part = part->mp_next) { + set_text_ctparams(part->mp_part, decodetypes, lf_line_endings); + } + break; + } + + case CT_MESSAGE: + if (ct->c_subtype == MESSAGE_EXTERNAL) { + struct exbody *e = (struct exbody *) ct->c_ctparams; + + set_text_ctparams(e->eb_content, decodetypes, lf_line_endings); + } + break; + + default: + if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) { + if (ct->c_ctparams == NULL) { + ct->c_ctparams = mh_xcalloc(1, sizeof (struct text)); + } + ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings; + } + } +} + + /* * If "rmmproc" is defined, call that to remove the file. Otherwise, * use the standard MH backup file. */ static int -remove_file (char *file) { +remove_file (const char *file) { if (rmmproc) { char *rmm_command = concat (rmmproc, " ", file, NULL); int status = system (rmm_command); free (rmm_command); return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK; - } else { - /* This is OK for a non-message file, it still uses the - BACKUP_PREFIX form. The backup file will be in the same - directory as file. */ - return rename (file, m_backup (file)); } + /* This is OK for a non-message file, it still uses the + BACKUP_PREFIX form. The backup file will be in the same + directory as file. */ + return rename (file, m_backup (file)); } +/* + * Output formatted message to user. + */ static void report (char *what, char *partno, char *filename, char *message, ...) { va_list args;