X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/a30e7865b46712fe346d8a91894ab68aac664901..ddf3a8574f657dcb8c53fc5908e7bebbde1994b5:/uip/mhfixmsg.c?ds=sidebyside diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c index 4262276d..f410ecb4 100644 --- a/uip/mhfixmsg.c +++ b/uip/mhfixmsg.c @@ -1,22 +1,57 @@ -/* - * mhfixmsg.c -- rewrite a message with various transformations +/* mhfixmsg.c -- rewrite a message with various transformations * * This code is Copyright (c) 2002 and 2013, by the authors of nmh. * See the COPYRIGHT file in the root directory of the nmh * distribution for complete copyright information. */ -#include -#include -#include -#include -#include +#include "h/mh.h" +#include "sbr/m_name.h" +#include "sbr/m_gmprot.h" +#include "sbr/m_getfld.h" +#include "sbr/getarguments.h" +#include "sbr/concat.h" +#include "sbr/seq_setprev.h" +#include "sbr/seq_setcur.h" +#include "sbr/seq_save.h" +#include "sbr/smatch.h" +#include "sbr/fmt_rfc2047.h" +#include "sbr/cpydata.h" +#include "sbr/trimcpy.h" +#include "sbr/m_convert.h" +#include "sbr/m_backup.h" +#include "sbr/getfolder.h" +#include "sbr/folder_read.h" +#include "sbr/context_save.h" +#include "sbr/context_replace.h" +#include "sbr/context_find.h" +#include "sbr/readconfig.h" +#include "sbr/ambigsw.h" +#include "sbr/path.h" +#include "sbr/print_version.h" +#include "sbr/print_help.h" +#include "sbr/error.h" +#include "h/fmt_scan.h" +#include "h/mime.h" +#include "h/mhparse.h" +#include "h/done.h" +#include "h/utils.h" +#include "h/signals.h" +#include "sbr/m_maildir.h" +#include "sbr/m_mktemp.h" +#include "sbr/mime_type.h" +#include "mhmisc.h" +#include "mhfree.h" +#include "mhoutsbr.h" +#include "mhshowsbr.h" #include #define MHFIXMSG_SWITCHES \ X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \ X("nodecodetext", 0, NDECODETEXTSW) \ X("decodetypes", 0, DECODETYPESW) \ + X("decodeheaderfieldbodies utf-8", 0, DECODEHEADERFIELDBODIESSW) \ + X("nodecodeheaderfieldbodies", 0, NDECODEHEADERFIELDBODIESSW) \ X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \ X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \ X("textcharset", 0, TEXTCHARSETSW) \ @@ -27,8 +62,10 @@ X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \ X("fixboundary", 0, FIXBOUNDARYSW) \ X("nofixboundary", 0, NFIXBOUNDARYSW) \ - X("fixcte", 0, FIXCTESW) \ - X("nofixcte", 0, NFIXCTESW) \ + X("fixcte", 0, FIXCOMPOSITECTESW) \ + X("nofixcte", 0, NFIXCOMPOSITECTESW) \ + X("checkbase64", 0, CHECKBASE64SW) \ + X("nocheckbase64", 0, NCHECKBASE64SW) \ X("fixtype mimetype", 0, FIXTYPESW) \ X("file file", 0, FILESW) \ X("outfile file", 0, OUTFILESW) \ @@ -55,61 +92,49 @@ int debugsw; /* Needed by mhparse.c. */ #define quitser pipeser -/* mhparse.c */ -extern int skip_mp_cte_check; /* flag to InitMultiPart */ -extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */ -extern int bogus_mp_content; /* flag from InitMultiPart */ -/* flags to/from parse_header_attrs */ -extern int suppress_extraneous_trailing_semicolon_warning; -extern int extraneous_trailing_semicolon; - -/* mhoutsbr.c */ -int output_message (CT, char *); - -/* mhmisc.c */ -void flush_errors (void); - -/* mhfree.c */ -extern CT *cts; -void freects_done (int) NORETURN; - /* * static prototypes */ -typedef struct fix_transformations { +typedef struct { int fixboundary; - int fixcte; + int fixcompositecte; svector_t fixtypes; int reformat; int replacetextplain; int decodetext; char *decodetypes; + char *decodeheaderfieldbodies; /* Either NULL or "utf-8". */ /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */ int lf_line_endings; char *textcharset; + bool checkbase64; } fix_transformations; -int mhfixmsgsbr (CT *, const fix_transformations *, char *); +static int mhfixmsgsbr (CT *, char *, const fix_transformations *, + FILE **, char *, FILE **); static int fix_boundary (CT *, int *); -static int copy_input_to_output (const char *, const char *); +static int copy_input_to_output (const char *, FILE *, const char *, FILE *); static int get_multipart_boundary (CT, char **); static int replace_boundary (CT, char *, char *); static int fix_types (CT, svector_t, int *); static char *replace_substring (char **, const char *, const char *); static char *remove_parameter (char *, const char *); -static int fix_multipart_cte (CT, int *); +static int fix_composite_cte (CT, int *); static int set_ce (CT, int); static int ensure_text_plain (CT *, CT, int *, int); static int find_textplain_sibling (CT, int, int *); static int insert_new_text_plain_part (CT, int, CT); static CT build_text_plain_part (CT); static int insert_into_new_mp_alt (CT *, int *); +static int insert_into_new_mp_mixed (CT *, const char *, int *); static CT divide_part (CT); static void copy_ctinfo (CI, CI); static int decode_part (CT); +static size_t get_valid_base64 (CT, char **); +static size_t find_invalid_base64_pos (const char *); +static int check_base64_encoding (CT *); static int reformat_part (CT, char *, char *, char *, int); -static int charset_encoding (CT); -static CT build_multipart_alt (CT, CT, int, int); +static CT build_multipart (CT, CT, int, int); static int boundary_in_content (FILE **, char *, const char *); static void transfer_noncontent_headers (CT, CT); static int set_ct_type (CT, int type, int subtype, int encoding); @@ -117,40 +142,48 @@ static int decode_text_parts (CT, int, const char *, int *); static int should_decode(const char *, const char *, const char *); static int content_encoding (CT, const char **); static int strip_crs (CT, int *); +static void update_cte (CT); +static int least_restrictive_encoding (CT) PURE; +static int less_restrictive (int, int); static int convert_charsets (CT, char *, int *); -static int fix_always (CT, int *); -static int write_content (CT, const char *, char *, int, int); +static int fix_always (CT *, const fix_transformations *, int *); +static int decode_header_field_bodies (CT, int *); +static int fix_filename_param (char *, char *, PM *, PM *); +static int fix_filename_encoding (CT); +static int write_content (CT, const char *, char *, FILE *, int, int); static void set_text_ctparams(CT, char *, int); static int remove_file (const char *); -static void report (char *, char *, char *, char *, ...); +static void report (char *, char *, char *, char *, ...) + CHECK_PRINTF(4, 5); static void pipeser (int); int -main (int argc, char **argv) { +main (int argc, char **argv) +{ int msgnum; char *cp, *file = NULL, *folder = NULL; - char *maildir, buf[100], *outfile = NULL; + char *maildir = NULL, buf[100], *outfile = NULL; char **argp, **arguments; struct msgs_array msgs = { 0, 0, NULL }; struct msgs *mp = NULL; CT *ctp; - FILE *fp; - int using_stdin = 0; - int chgflag = 1; + FILE *fp, *infp = NULL, *outfp = NULL; + bool using_stdin = false; + bool chgflag = true; int status = OK; fix_transformations fx; - fx.reformat = fx.fixcte = fx.fixboundary = 1; + fx.reformat = fx.fixcompositecte = fx.fixboundary = 1; fx.fixtypes = NULL; fx.replacetextplain = 0; fx.decodetext = CE_8BIT; fx.decodetypes = "text,application/ics"; /* Default, per man page. */ + fx.decodeheaderfieldbodies = NULL; fx.lf_line_endings = 0; fx.textcharset = NULL; + fx.checkbase64 = true; - if (nmh_init(argv[0], 2)) { return 1; } - - done = freects_done; + if (nmh_init(argv[0], true, false)) { return 1; } arguments = getarguments (invo_name, argc, argv, 1); argp = arguments; @@ -165,7 +198,7 @@ main (int argc, char **argv) { ambigsw (cp, switches); done (1); case UNKWNSW: - adios (NULL, "-%s unknown", cp); + die("-%s unknown", cp); case HELPSW: snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]", @@ -178,7 +211,7 @@ main (int argc, char **argv) { case DECODETEXTSW: if (! (cp = *argp++) || *cp == '-') { - adios (NULL, "missing argument to %s", argp[-2]); + die("missing argument to %s", argp[-2]); } if (! strcasecmp (cp, "8bit")) { fx.decodetext = CE_8BIT; @@ -187,7 +220,7 @@ main (int argc, char **argv) { } else if (! strcasecmp (cp, "binary")) { fx.decodetext = CE_BINARY; } else { - adios (NULL, "invalid argument to %s", argp[-2]); + die("invalid argument to %s", argp[-2]); } continue; case NDECODETEXTSW: @@ -195,10 +228,25 @@ main (int argc, char **argv) { continue; case DECODETYPESW: if (! (cp = *argp++) || *cp == '-') { - adios (NULL, "missing argument to %s", argp[-2]); + die("missing argument to %s", argp[-2]); } fx.decodetypes = cp; continue; + case DECODEHEADERFIELDBODIESSW: + if (! (cp = *argp++) || *cp == '-') { + die("missing argument to %s", argp[-2]); + } + fx.decodeheaderfieldbodies = cp; + if (strcasecmp (cp, "utf-8") && strcasecmp (cp, "utf8")) { + /* Because UTF-8 strings can't have embedded nulls. Other + encodings support that, too, but we won't bother to + enumerate them. */ + die("-decodeheaderfieldbodies only supports utf-8"); + } + continue; + case NDECODEHEADERFIELDBODIESSW: + fx.decodeheaderfieldbodies = NULL; + continue; case CRLFLINEBREAKSSW: fx.lf_line_endings = 0; continue; @@ -207,35 +255,40 @@ main (int argc, char **argv) { continue; case TEXTCHARSETSW: if (! (cp = *argp++) || (*cp == '-' && cp[1])) { - adios (NULL, "missing argument to %s", argp[-2]); + die("missing argument to %s", argp[-2]); } fx.textcharset = cp; continue; case NTEXTCHARSETSW: fx.textcharset = 0; continue; + case CHECKBASE64SW: + fx.checkbase64 = true; + continue; + case NCHECKBASE64SW: + fx.checkbase64 = false; + continue; case FIXBOUNDARYSW: fx.fixboundary = 1; continue; case NFIXBOUNDARYSW: fx.fixboundary = 0; continue; - case FIXCTESW: - fx.fixcte = 1; + case FIXCOMPOSITECTESW: + fx.fixcompositecte = 1; continue; - case NFIXCTESW: - fx.fixcte = 0; + case NFIXCOMPOSITECTESW: + fx.fixcompositecte = 0; continue; case FIXTYPESW: if (! (cp = *argp++) || (*cp == '-' && cp[1])) { - adios (NULL, "missing argument to %s", argp[-2]); + die("missing argument to %s", argp[-2]); } if (! strncasecmp (cp, "multipart/", 10) || - ! strncasecmp (cp, "message/", 8)) { - adios (NULL, "-fixtype %s not allowed", cp); - } else if (! strchr (cp, '/')) { - adios (NULL, "-fixtype requires type/subtype"); - } + ! strncasecmp (cp, "message/", 8)) + die("-fixtype %s not allowed", cp); + if (! strchr (cp, '/')) + die("-fixtype requires type/subtype"); if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); } svector_push_back (fx.fixtypes, cp); continue; @@ -253,29 +306,29 @@ main (int argc, char **argv) { continue; case FILESW: if (! (cp = *argp++) || (*cp == '-' && cp[1])) { - adios (NULL, "missing argument to %s", argp[-2]); + die("missing argument to %s", argp[-2]); } - file = *cp == '-' ? add (cp, NULL) : path (cp, TFILE); + file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE); continue; case OUTFILESW: if (! (cp = *argp++) || (*cp == '-' && cp[1])) { - adios (NULL, "missing argument to %s", argp[-2]); + die("missing argument to %s", argp[-2]); } - outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE); + outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE); continue; case RPROCSW: if (!(rmmproc = *argp++) || *rmmproc == '-') { - adios (NULL, "missing argument to %s", argp[-2]); + die("missing argument to %s", argp[-2]); } continue; case NRPRCSW: rmmproc = NULL; continue; case CHGSW: - chgflag = 1; + chgflag = true; continue; case NCHGSW: - chgflag = 0; + chgflag = false; continue; case VERBSW: verbosw = 1; @@ -286,15 +339,13 @@ main (int argc, char **argv) { } } if (*cp == '+' || *cp == '@') { - if (folder) { - adios (NULL, "only one folder at a time!"); - } else { - folder = pluspath (cp); - } + if (folder) + die("only one folder at a time!"); + folder = pluspath (cp); } else { if (*cp == '/') { /* Interpret a full path as a filename, not a message. */ - file = add (cp, NULL); + file = mh_xstrdup (cp); } else { app_msgarg (&msgs, cp); } @@ -308,19 +359,29 @@ main (int argc, char **argv) { * Read the standard profile setup */ if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) { - readconfig ((struct node **) 0, fp, cp, 0); + readconfig(NULL, fp, cp, 0); fclose (fp); } - suppress_bogus_mp_content_warning = skip_mp_cte_check = 1; - suppress_extraneous_trailing_semicolon_warning = 1; + suppress_bogus_mp_content_warning = skip_mp_cte_check = true; + suppress_extraneous_trailing_semicolon_warning = true; if (! context_find ("path")) { free (path ("./", TFOLDER)); } if (file && msgs.size) { - adios (NULL, "cannot specify msg and file at same time!"); + die("cannot specify msg and file at same time!"); + } + + if (outfile) { + /* Open the outfile now, so we don't have to risk opening it + after running out of fds. */ + if (strcmp (outfile, "-") == 0) { + outfp = stdout; + } else if ((outfp = fopen (outfile, "w")) == NULL) { + adios (outfile, "unable to open for writing"); + } } /* @@ -337,43 +398,50 @@ main (int argc, char **argv) { int fd; char *cp; - using_stdin = 1; + using_stdin = true; if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) { - adios (NULL, "unable to create temporary file in %s", + die("unable to create temporary file in %s", get_temp_dir()); } else { free (file); - file = add (cp, NULL); + file = mh_xstrdup (cp); cpydata (STDIN_FILENO, fd, "-", file); } if (close (fd)) { (void) m_unlink (file); - adios (NULL, "failed to write temporary file"); + die("failed to write temporary file"); } } - if (! (cts = (CT *) mh_xcalloc ((size_t) 2, sizeof *cts))) { - adios (NULL, "out of memory"); - } + cts = mh_xcalloc(2, sizeof *cts); ctp = cts; if ((ct = parse_mime (file))) { set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings); *ctp++ = ct; } else { - advise (NULL, "unable to parse message from file %s", file); + inform("unable to parse message from file %s", file); status = NOTOK; - /* If there's an outfile, pass the input message unchanged, so the message won't - get dropped from a pipeline. */ + /* If there's an outfile, pass the input message unchanged, so the + message won't get dropped from a pipeline. */ if (outfile) { - /* Something went wrong. Output might be expected, such as if this were run - as a filter. Just copy the input to the output. */ - if (copy_input_to_output (file, outfile) != OK) { - advise (NULL, "unable to copy message to %s, it might be lost\n", outfile); + /* Something went wrong. Output might be expected, such as if + this were run as a filter. Just copy the input to the + output. */ + if ((infp = fopen (file, "r")) == NULL) { + adios (file, "unable to open for reading"); } + + if (copy_input_to_output (file, infp, outfile, outfp) != OK) { + inform("unable to copy message to %s, " + "it might be lost\n", outfile); + } + + fclose (infp); + infp = NULL; } } } else { @@ -388,20 +456,22 @@ main (int argc, char **argv) { if (! folder) { folder = getfolder (1); } - maildir = m_maildir (folder); + maildir = mh_xstrdup(m_maildir (folder)); + /* chdir so that error messages, esp. from MIME parser, just + refer to the message and not its path. */ if (chdir (maildir) == NOTOK) { adios (maildir, "unable to change directory to"); } /* read folder and create message structure */ if (! (mp = folder_read (folder, 1))) { - adios (NULL, "unable to read folder %s", folder); + die("unable to read folder %s", folder); } /* check for empty folder */ if (mp->nummsg == 0) { - adios (NULL, "no messages in %s", folder); + die("no messages in %s", folder); } /* parse all the message ranges/sequences and set SELECTED */ @@ -411,34 +481,47 @@ main (int argc, char **argv) { } seq_setprev (mp); /* set the previous-sequence */ - if (! (cts = - (CT *) mh_xcalloc ((size_t) (mp->numsel + 1), sizeof *cts))) { - adios (NULL, "out of memory"); - } + cts = mh_xcalloc(mp->numsel + 1, sizeof *cts); ctp = cts; for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) { if (is_selected(mp, msgnum)) { - char *msgnam; + char *msgnam = m_name (msgnum); - msgnam = m_name (msgnum); if ((ct = parse_mime (msgnam))) { set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings); *ctp++ = ct; } else { - advise (NULL, "unable to parse message %s", msgnam); + inform("unable to parse message %s", msgnam); status = NOTOK; - /* If there's an outfile, pass the input message unchanged, so the message won't - get dropped from a pipeline. */ + /* If there's an outfile, pass the input message + unchanged, so the message won't get dropped from a + pipeline. */ if (outfile) { - /* Something went wrong. Output might be expected, such as if this were run - as a filter. Just copy the input to the output. */ - const char *input_filename = path (msgnam, TFILE); + /* Something went wrong. Output might be expected, + such as if this were run as a filter. Just copy + the input to the output. */ + /* Can't use path() here because 1) it might have been + called before and it caches the pwd, and 2) we call + chdir() after that. */ + char *input_filename = + concat (maildir, "/", msgnam, NULL); + + if ((infp = fopen (input_filename, "r")) == NULL) { + adios (input_filename, + "unable to open for reading"); + } - if (copy_input_to_output (input_filename, outfile) != OK) { - advise (NULL, "unable to copy message to %s, it might be lost\n", outfile); + if (copy_input_to_output (input_filename, infp, + outfile, outfp) != OK) { + inform("unable to copy message to %s, " + "it might be lost\n", outfile); } + + fclose (infp); + infp = NULL; + free (input_filename); } } } @@ -454,7 +537,11 @@ main (int argc, char **argv) { if (*cts) { for (ctp = cts; *ctp; ++ctp) { - status += mhfixmsgsbr (ctp, &fx, outfile); + status = + mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp) == OK + ? 0 + : 1; + free_content (*ctp); if (using_stdin) { (void) m_unlink (file); @@ -469,59 +556,84 @@ main (int argc, char **argv) { status = 1; } + free(maildir); + free (cts); + if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); } + if (infp) { fclose (infp); } /* even if stdin */ + if (outfp) { fclose (outfp); } /* even if stdout */ free (outfile); free (file); free (folder); free (arguments); - /* done is freects_done, which will clean up all of cts. */ - done (status); + done (status == OK ? 0 : 1); return NOTOK; } -int -mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { +/* + * Apply transformations to one message. + */ +static int +mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx, + FILE **infp, char *outfile, FILE **outfp) +{ /* Store input filename in case one of the transformations, i.e., fix_boundary(), rewrites to a tmp file. */ - char *input_filename = add ((*ctp)->c_file, NULL); - int modify_inplace = 0; + char *input_filename = maildir + ? concat (maildir, "/", (*ctp)->c_file, NULL) + : mh_xstrdup ((*ctp)->c_file); + bool modify_inplace = false; int message_mods = 0; int status = OK; + /* Though the input file won't need to be opened if everything goes + well, do it here just in case there's a failure, and that failure is + running out of file descriptors. */ + if ((*infp = fopen (input_filename, "r")) == NULL) { + adios (input_filename, "unable to open for reading"); + } + if (outfile == NULL) { - modify_inplace = 1; + modify_inplace = true; if ((*ctp)->c_file) { char *tempfile; - if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) { - adios (NULL, "unable to create temporary file in %s", + /* outfp will be closed by the caller */ + if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) == + NULL) { + die("unable to create temporary file in %s", get_temp_dir()); } - outfile = add (tempfile, NULL); + outfile = mh_xstrdup (tempfile); } else { - adios (NULL, "missing both input and output filenames\n"); + die("missing both input and output filenames\n"); } - } + } /* else *outfp was defined by caller */ reverse_alternative_parts (*ctp); - status = fix_always (*ctp, &message_mods); + status = fix_always (ctp, fx, &message_mods); if (status == OK && fx->fixboundary) { status = fix_boundary (ctp, &message_mods); } if (status == OK && fx->fixtypes != NULL) { status = fix_types (*ctp, fx->fixtypes, &message_mods); } - if (status == OK && fx->fixcte) { - status = fix_multipart_cte (*ctp, &message_mods); + if (status == OK && fx->fixcompositecte) { + status = fix_composite_cte (*ctp, &message_mods); } if (status == OK && fx->reformat) { status = ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain); } if (status == OK && fx->decodetext) { - status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes, &message_mods); + status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes, + &message_mods); + update_cte (*ctp); + } + if (status == OK && fx->decodeheaderfieldbodies) { + status = decode_header_field_bodies(*ctp, &message_mods); } if (status == OK && fx->textcharset != NULL) { status = convert_charsets (*ctp, fx->textcharset, &message_mods); @@ -543,14 +655,16 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { * Write the content to a file */ if (status == OK) { - status = write_content (*ctp, input_filename, outfile, modify_inplace, - message_mods); + status = write_content (*ctp, input_filename, outfile, *outfp, + modify_inplace, message_mods); } else if (! modify_inplace) { /* Something went wrong. Output might be expected, such as if this were run as a filter. Just copy the input to the output. */ - if (copy_input_to_output (input_filename, outfile) != OK) { - advise (NULL, "unable to copy message to %s, it might be lost\n", outfile); + if (copy_input_to_output (input_filename, *infp, outfile, + *outfp) != OK) { + inform("unable to copy message to %s, it might be lost\n", + outfile); } } @@ -560,20 +674,24 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { outfile = NULL; } + fclose (*infp); + *infp = NULL; free (input_filename); return status; } -/* Copy input message to output. Assumes not modifying in place, so this - might be running as part of a pipeline. */ +/* + * Copy input message to output. Assumes not modifying in place, so this + * might be running as part of a pipeline. + */ static int -copy_input_to_output (const char *input_filename, const char *output_filename) { - int in = open (input_filename, O_RDONLY); - int out = strcmp (output_filename, "-") - ? open (output_filename, O_WRONLY | O_CREAT, m_gmprot ()) - : STDOUT_FILENO; +copy_input_to_output (const char *input_filename, FILE *infp, + const char *output_filename, FILE *outfp) +{ + int in = fileno (infp); + int out = fileno (outfp); int status = OK; if (in != -1 && out != -1) { @@ -582,15 +700,16 @@ copy_input_to_output (const char *input_filename, const char *output_filename) { status = NOTOK; } - close (out); - close (in); - return status; } +/* + * Fix mismatched outer level boundary. + */ static int -fix_boundary (CT *ct, int *message_mods) { +fix_boundary (CT *ct, int *message_mods) +{ struct multipart *mp; int status = OK; @@ -615,7 +734,7 @@ fix_boundary (CT *ct, int *message_mods) { if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) { if (replace_boundary (*ct, fixed, part_boundary) == OK) { - char *filename = add ((*ct)->c_file, NULL); + char *filename = mh_xstrdup ((*ct)->c_file); CT fixed_ct; free_content (*ct); @@ -630,16 +749,16 @@ fix_boundary (CT *ct, int *message_mods) { } } else { *ct = NULL; - advise (NULL, "unable to parse fixed part"); + inform("unable to parse fixed part"); status = NOTOK; } free (filename); } else { - advise (NULL, "unable to replace broken boundary"); + inform("unable to replace broken boundary"); status = NOTOK; } } else { - advise (NULL, "unable to create temporary file in %s", + inform("unable to create temporary file in %s", get_temp_dir()); status = NOTOK; } @@ -662,9 +781,13 @@ fix_boundary (CT *ct, int *message_mods) { } +/* + * Find boundary at end of multipart. + */ static int -get_multipart_boundary (CT ct, char **part_boundary) { - char buffer[BUFSIZ]; +get_multipart_boundary (CT ct, char **part_boundary) +{ + char buffer[NMH_BUFSIZ]; char *end_boundary = NULL; off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer) ? (off_t) (ct->c_end - sizeof buffer) @@ -672,10 +795,10 @@ get_multipart_boundary (CT ct, char **part_boundary) { size_t bytes_read; int status = OK; - /* This will fail if the boundary spans fread() calls. BUFSIZ should + /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should be big enough, even if it's just 1024, to make that unlikely. */ - /* free_content() will close ct->c_fp. */ + /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */ if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) { advise (ct->c_file, "unable to open for reading"); return NOTOK; @@ -695,18 +818,16 @@ get_multipart_boundary (CT ct, char **part_boundary) { if ((end = rfind_str (buffer, cp - buffer, "\n"))) { if (strlen (end) > 3 && *end++ == '\n' && *end++ == '-' && *end++ == '-') { - end_boundary = add (end, NULL); + end_boundary = mh_xstrdup (end); break; } } } } - if (! end_boundary && begin > (off_t) (ct->c_begin + sizeof buffer)) { - begin -= sizeof buffer; - } else { + if (end_boundary || begin <= (off_t) (ct->c_begin + sizeof buffer)) break; - } + begin -= sizeof buffer; } /* Get boundary at beginning of multipart. */ @@ -732,6 +853,11 @@ get_multipart_boundary (CT ct, char **part_boundary) { status = NOTOK; } + if (ct->c_fp) { + fclose (ct->c_fp); + ct->c_fp = NULL; + } + if (status == OK) { *part_boundary = end_boundary; } else { @@ -743,18 +869,21 @@ get_multipart_boundary (CT ct, char **part_boundary) { } -/* Open and copy ct->c_file to file, replacing the multipart boundary. */ +/* + * Open and copy ct->c_file to file, replacing the multipart boundary. + */ static int -replace_boundary (CT ct, char *file, char *boundary) { +replace_boundary (CT ct, char *file, char *boundary) +{ FILE *fpin, *fpout; int compnum, state; - char buf[BUFSIZ], name[NAMESZ]; + char buf[NMH_BUFSIZ], name[NAMESZ]; char *np, *vp; - m_getfld_state_t gstate = 0; + m_getfld_state_t gstate; int status = OK; if (ct->c_file == NULL) { - advise (NULL, "missing input filename"); + inform("missing input filename"); return NOTOK; } @@ -769,45 +898,44 @@ replace_boundary (CT ct, char *file, char *boundary) { return NOTOK; } + gstate = m_getfld_state_init(fpin); for (compnum = 1;;) { int bufsz = (int) sizeof buf; - switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) { + switch (state = m_getfld2(&gstate, name, buf, &bufsz)) { case FLD: case FLDPLUS: compnum++; /* get copies of the buffers */ - np = add (name, NULL); - vp = add (buf, NULL); + np = mh_xstrdup (name); + vp = mh_xstrdup (buf); /* if necessary, get rest of field */ while (state == FLDPLUS) { bufsz = sizeof buf; - state = m_getfld (&gstate, name, buf, &bufsz, fpin); + state = m_getfld2(&gstate, name, buf, &bufsz); vp = add (buf, vp); /* add to previous value */ } if (strcasecmp (TYPE_FIELD, np)) { fprintf (fpout, "%s:%s", np, vp); } else { - char *new_ctline, *new_params; + char *new_ctline, *new_params; - replace_param(&ct->c_ctinfo.ci_first_pm, - &ct->c_ctinfo.ci_last_pm, "boundary", - boundary, 0); + replace_param(&ct->c_ctinfo.ci_first_pm, + &ct->c_ctinfo.ci_last_pm, "boundary", + boundary, 0); - new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/", - ct->c_ctinfo.ci_subtype, NULL); - new_params = output_params(strlen(TYPE_FIELD) + - strlen(new_ctline) + 1, - ct->c_ctinfo.ci_first_pm, NULL, 0); + new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/", + ct->c_ctinfo.ci_subtype, NULL); + new_params = output_params(LEN(TYPE_FIELD) + + strlen(new_ctline) + 1, + ct->c_ctinfo.ci_first_pm, NULL, 0); fprintf (fpout, "%s:%s%s\n", np, new_ctline, - new_params ? new_params : ""); - free(new_ctline); - if (new_params) { - free(new_params); - } + FENDNULL(new_params)); + free(new_ctline); + free(new_params); } free (vp); @@ -816,7 +944,7 @@ replace_boundary (CT ct, char *file, char *boundary) { continue; case BODY: - fputs ("\n", fpout); + putc('\n', fpout); /* buf will have a terminating NULL, skip it. */ if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) { advise (file, "fwrite"); @@ -828,12 +956,12 @@ replace_boundary (CT ct, char *file, char *boundary) { case LENERR: case FMTERR: - advise (NULL, "message format error in component #%d", compnum); + inform("message format error in component #%d", compnum); status = NOTOK; break; default: - advise (NULL, "getfld() returned %d", state); + inform("getfld() returned %d", state); status = NOTOK; break; } @@ -849,8 +977,12 @@ replace_boundary (CT ct, char *file, char *boundary) { } +/* + * Fix Content-Type header to reflect the content of its part. + */ static int -fix_types (CT ct, svector_t fixtypes, int *message_mods) { +fix_types (CT ct, svector_t fixtypes, int *message_mods) +{ int status = OK; switch (ct->c_type) { @@ -907,12 +1039,12 @@ fix_types (CT ct, svector_t fixtypes, int *message_mods) { * c_ctline */ /* Extract type and subtype from type/subtype. */ - ct_type = getcpy (ct_type_subtype); + ct_type = mh_xstrdup(ct_type_subtype); if ((cp = strchr (ct_type, '/'))) { *cp = '\0'; - ct_subtype = getcpy (++cp); + ct_subtype = mh_xstrdup(++cp); } else { - advise (NULL, "missing / in MIME type of %s %s", + inform("missing / in MIME type of %s %s", ct->c_file, ct->c_partno); free (ct_type); return NOTOK; @@ -927,7 +1059,7 @@ fix_types (CT ct, svector_t fixtypes, int *message_mods) { ct->c_ctinfo.ci_subtype = ct_subtype; if (! replace_substring (&ct->c_ctline, type, ct_type_subtype)) { - advise (NULL, "did not find %s in %s", + inform("did not find %s in %s", type, ct->c_ctline); } @@ -944,10 +1076,8 @@ fix_types (CT ct, svector_t fixtypes, int *message_mods) { type, ct_type_subtype); } break; - } else { - advise (NULL, "did not find %s in %s", - type, hf->value); } + inform("did not find %s in %s", type, hf->value); } } } @@ -961,8 +1091,13 @@ fix_types (CT ct, svector_t fixtypes, int *message_mods) { return status; } + +/* + * Replace a substring, allocating space to hold the new one. + */ char * -replace_substring (char **str, const char *old, const char *new) { +replace_substring (char **str, const char *old, const char *new) +{ char *cp; if ((cp = strstr (*str, old))) { @@ -970,7 +1105,7 @@ replace_substring (char **str, const char *old, const char *new) { char *prefix, *new_str; if (cp - *str) { - prefix = getcpy (*str); + prefix = mh_xstrdup(*str); *(prefix + (cp - *str)) = '\0'; new_str = concat (prefix, new, remainder, NULL); free (prefix); @@ -981,18 +1116,20 @@ replace_substring (char **str, const char *old, const char *new) { free (*str); return *str = new_str; - } else { - return NULL; } + + return NULL; } + /* * Remove a name=value parameter, given just its name, from a header value. */ char * -remove_parameter (char *str, const char *name) { +remove_parameter (char *str, const char *name) +{ /* It looks to me, based on the BNF in RFC 2045, than there can't - be whitespace betwwen the parameter name and the "=", or + be whitespace between the parameter name and the "=", or between the "=" and the parameter value. */ char *param_name = concat (name, "=", NULL); char *cp; @@ -1032,31 +1169,35 @@ remove_parameter (char *str, const char *name) { return str; } + +/* + * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part. + * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to + * 8 bit. + */ static int -fix_multipart_cte (CT ct, int *message_mods) { +fix_composite_cte (CT ct, int *message_mods) +{ int status = OK; - if (ct->c_type == CT_MULTIPART) { - struct multipart *m; - struct part *part; - + if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) { if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT && ct->c_encoding != CE_BINARY) { HF hf; for (hf = ct->c_first_hf; hf; hf = hf->next) { char *name = hf->name; - for (; *name && isspace ((unsigned char) *name); ++name) { + for (; isspace((unsigned char)*name); ++name) { continue; } if (! strncasecmp (name, ENCODING_FIELD, - strlen (ENCODING_FIELD))) { + LEN(ENCODING_FIELD))) { char *prefix = "Nmh-REPLACED-INVALID-"; - HF h = mh_xmalloc (sizeof *h); + HF h; - h->name = add (hf->name, NULL); - h->hf_encoding = hf->hf_encoding; + NEW(h); + h->name = mh_xstrdup (hf->name); h->next = hf->next; hf->next = h; @@ -1073,7 +1214,7 @@ fix_multipart_cte (CT ct, int *message_mods) { free (encoding); } - h->value = add (" 8bit\n", NULL); + h->value = mh_xstrdup (" 8bit\n"); /* Don't need to warn for multiple C-T-E header fields, parse_mime() already does that. But @@ -1085,11 +1226,16 @@ fix_multipart_cte (CT ct, int *message_mods) { set_ce (ct, CE_8BIT); } - m = (struct multipart *) ct->c_ctparams; - for (part = m->mp_parts; part; part = part->mp_next) { - if (fix_multipart_cte (part->mp_part, message_mods) != OK) { - status = NOTOK; - break; + if (ct->c_type == CT_MULTIPART) { + struct multipart *m; + struct part *part; + + m = (struct multipart *) ct->c_ctparams; + for (part = m->mp_parts; part; part = part->mp_next) { + if (fix_composite_cte (part->mp_part, message_mods) != OK) { + status = NOTOK; + break; + } } } } @@ -1098,14 +1244,18 @@ fix_multipart_cte (CT ct, int *message_mods) { } +/* + * Set content encoding. + */ static int -set_ce (CT ct, int encoding) { +set_ce (CT ct, int encoding) +{ const char *ce = ce_str (encoding); const struct str2init *ctinit = get_ce_method (ce); if (ctinit) { char *cte = concat (" ", ce, "\n", NULL); - int found_cte = 0; + bool found_cte = false; HF hf; /* Decoded contents might be in ct->c_cefile.ce_file, if the caller is decode_text_parts (). Save because we'll @@ -1122,35 +1272,42 @@ set_ce (CT ct, int encoding) { ct->c_cefile.ce_file to the name of the file containing the contents. */ + if (ct->c_ceclosefnx) { + (*ct->c_ceclosefnx) (ct); + } + /* Restore the cefile. */ ct->c_cefile = decoded_content_info; /* Update/add Content-Transfer-Encoding header field. */ for (hf = ct->c_first_hf; hf; hf = hf->next) { if (! strcasecmp (ENCODING_FIELD, hf->name)) { - found_cte = 1; + found_cte = true; free (hf->value); hf->value = cte; } } if (! found_cte) { - add_header (ct, add (ENCODING_FIELD, NULL), cte); + add_header (ct, mh_xstrdup (ENCODING_FIELD), cte); } /* Update c_celine. It's used only by mhlist -debug. */ free (ct->c_celine); - ct->c_celine = add (cte, NULL); + ct->c_celine = mh_xstrdup (cte); return OK; - } else { - return NOTOK; } + + return NOTOK; } -/* Make sure each text part has a corresponding text/plain part. */ +/* + * Make sure each text part has a corresponding text/plain part. + */ static int -ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) { +ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) +{ int status = OK; switch ((*ct)->c_type) { @@ -1237,12 +1394,12 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) { HF hf; parent->c_subtype = MULTI_ALTERNATE; - parent->c_ctinfo.ci_subtype = getcpy ("alternative"); + free (parent->c_ctinfo.ci_subtype); + parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative"); if (! replace_substring (&parent->c_ctline, "/related", "/alternative")) { - advise (NULL, - "did not find multipart/related in %s", - parent->c_ctline); + inform("did not find multipart/related in %s", + parent->c_ctline); } /* Update Content-Type header field. */ @@ -1261,11 +1418,9 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) { multipart/alternative. */ remove_parameter (hf->value, "type"); break; - } else { - advise (NULL, "did not find multipart/" - "related in header %s", - hf->value); } + inform("did not find multipart/" + "related in header %s", hf->value); } } } else { @@ -1308,13 +1463,16 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) { } -/* See if there is a sibling text/plain. */ +/* + * See if there is a sibling text/plain, and return its subpart number. + */ static int find_textplain_sibling (CT parent, int replacetextplain, - int *new_subpart_number) { + int *new_subpart_number) +{ struct multipart *mp = (struct multipart *) parent->c_ctparams; struct part *part, *prev; - int has_text_plain = 0; + bool has_text_plain = false; for (prev = part = mp->mp_parts; part; part = part->mp_next) { ++*new_subpart_number; @@ -1337,7 +1495,7 @@ find_textplain_sibling (CT parent, int replacetextplain, free_content (old_part->mp_part); free (old_part); } else { - has_text_plain = 1; + has_text_plain = true; } break; } @@ -1348,11 +1506,16 @@ find_textplain_sibling (CT parent, int replacetextplain, } +/* + * Insert a new text/plain part in a multipart part. + */ static int -insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) { +insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) +{ struct multipart *mp = (struct multipart *) parent->c_ctparams; - struct part *new_part = mh_xmalloc (sizeof *new_part); + struct part *new_part; + NEW(new_part); if ((new_part->mp_part = build_text_plain_part (ct))) { char buffer[16]; snprintf (buffer, sizeof buffer, "%d", new_subpart_number); @@ -1364,17 +1527,21 @@ insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) { buffer, NULL); return 1; - } else { - free_content (new_part->mp_part); - free (new_part); - - return 0; } + + free_content (new_part->mp_part); + free (new_part); + + return 0; } +/* + * Create a text/plain part to go along with non-plain sibling part. + */ static CT -build_text_plain_part (CT encoded_part) { +build_text_plain_part (CT encoded_part) +{ CT tp_part = divide_part (encoded_part); char *tmp_plain_file = NULL; @@ -1385,11 +1552,12 @@ build_text_plain_part (CT encoded_part) { be unlinked by free_content (). */ char *tempfile; + /* This m_mktemp2() call closes the temp file. */ if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) { - advise (NULL, "unable to create temporary file in %s", + inform("unable to create temporary file in %s", get_temp_dir()); } else { - tmp_plain_file = add (tempfile, NULL); + tmp_plain_file = mh_xstrdup (tempfile); if (reformat_part (tp_part, tmp_plain_file, tp_part->c_ctinfo.ci_type, tp_part->c_ctinfo.ci_subtype, @@ -1407,15 +1575,19 @@ build_text_plain_part (CT encoded_part) { } -/* Slip new text/plain part into a new multipart/alternative. */ +/* + * Slip new text/plain part into a new multipart/alternative. + */ static int -insert_into_new_mp_alt (CT *ct, int *message_mods) { +insert_into_new_mp_alt (CT *ct, int *message_mods) +{ + /* The following will call decode_part(). */ CT tp_part = build_text_plain_part (*ct); int status = OK; if (tp_part) { - CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART, - MULTI_ALTERNATE); + CT mp_alt = build_multipart (*ct, tp_part, CT_MULTIPART, + MULTI_ALTERNATE); if (mp_alt) { struct multipart *mp = (struct multipart *) mp_alt->c_ctparams; @@ -1444,16 +1616,92 @@ insert_into_new_mp_alt (CT *ct, int *message_mods) { return status; } + +/* + * Slip new text/plain part into a new multipart/mixed. + */ +static int +insert_into_new_mp_mixed (CT *ct, const char *content, int *message_mods) +{ + CT main_part = divide_part (*ct); + const char *reason = NULL; + const int encoding = content_encoding (main_part, &reason); + int status = OK; + + if (set_ct_type(main_part, (*ct)->c_type, (*ct)->c_subtype, + main_part->c_encoding) != OK) { + inform("failed to set Content-Type of main part"); + return NOTOK; + } + if (set_ct_type(*ct, (*ct)->c_type, (*ct)->c_subtype, encoding) != OK) { + inform("failed to set Content-Type of new part"); + return NOTOK; + } + + if (main_part) { + /* Load remainder into the new part. */ + CE cefile = &(*ct)->c_cefile; + CT mp_alt; + + cefile->ce_file = + mh_xstrdup(m_mktemp2 (NULL, invo_name, NULL, &cefile->ce_fp)); + if (cefile->ce_file == NULL) { + die("unable to create temporary file in %s", get_temp_dir()); + } + cefile->ce_unlink = 1; + fprintf (cefile->ce_fp, "%s", content); + + /* Put both parts into a new multipart. */ + mp_alt = build_multipart (*ct, main_part, CT_MULTIPART, MULTI_MIXED); + if (mp_alt) { + struct multipart *mp = (struct multipart *) mp_alt->c_ctparams; + + /* So fix_composite_cte doesn't try to overwrite the encoding. If + the content needs to be decoded, c_encoding will be properly + set. */ + mp_alt->c_encoding = encoding; + + if (mp && mp->mp_parts) { + mp->mp_parts->mp_part = main_part; + /* Make the new multipart/alternative the parent. */ + *ct = mp_alt; + + ++*message_mods; + if (verbosw) { + report (NULL, (*ct)->c_partno, (*ct)->c_file, + "insert text/plain part"); + } + } else { + free_content (main_part); + free_content (mp_alt); + status = NOTOK; + } + } else { + inform("failed to build multipart/alternate"); + status = NOTOK; + } + } else { + /* Should never happen. */ + inform("failed to insert new text part into multipart/related"); + status = NOTOK; + } + + return status; +} + + +/* + * Clone a MIME part. + */ static CT -divide_part (CT ct) { +divide_part (CT ct) +{ CT new_part; - if ((new_part = (CT) mh_xcalloc (1, sizeof *new_part)) == NULL) - adios (NULL, "out of memory"); - + NEW0(new_part); /* Just copy over what is needed for decoding. c_vrsn and c_celine aren't necessary. */ - new_part->c_file = add (ct->c_file, NULL); + new_part->c_file = mh_xstrdup (ct->c_file); new_part->c_begin = ct->c_begin; new_part->c_end = ct->c_end; copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo); @@ -1467,59 +1715,205 @@ divide_part (CT ct) { /* c_ctline is used by reformat__part(), so it can preserve anything after the type/subtype. */ - new_part->c_ctline = add (ct->c_ctline, NULL); + new_part->c_ctline = mh_xstrdup (ct->c_ctline); return new_part; } +/* + * Copy the content info from one part to another. + */ static void -copy_ctinfo (CI dest, CI src) { +copy_ctinfo (CI dest, CI src) +{ PM s_pm, d_pm; - dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL; - dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL; + dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL; + dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL; for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) { - d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name, - s_pm->pm_value, 0); - if (s_pm->pm_charset) - d_pm->pm_charset = getcpy(s_pm->pm_charset); - if (s_pm->pm_lang) - d_pm->pm_lang = getcpy(s_pm->pm_lang); + d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name, + s_pm->pm_value, 0); + if (s_pm->pm_charset) { + d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset); + } + if (s_pm->pm_lang) { + d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang); + } } - dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL; - dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL; + dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL; + dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL; } +/* + * Decode content. + */ static int -decode_part (CT ct) { - char *tmp_decoded; +decode_part (CT ct) +{ + char *tempfile, *tmp_decoded; + FILE *file; int status; - char *tempfile; - if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) { - adios (NULL, "unable to create temporary file in %s", get_temp_dir()); + if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) { + die("unable to create temporary file in %s", get_temp_dir()); } - tmp_decoded = add (tempfile, NULL); + tmp_decoded = mh_xstrdup (tempfile); /* The following call will load ct->c_cefile.ce_file with the tmp filename of the decoded content. tmp_decoded will contain the encoded output, get rid of that. */ - status = output_message (ct, tmp_decoded); + status = output_message_fp (ct, file, tmp_decoded); (void) m_unlink (tmp_decoded); free (tmp_decoded); + if (fclose (file)) { + inform("unable to close temporary file %s, continuing...", tempfile); + } + + return status; +} + + +/* + * If base64-encoded content has a text trailer, return the location, relative + * to c->c_begin, where the valid base64 ends. And return the trailer in the + * addresses pointed to by remainderp. The caller is responsible for + * deallocating that. If no text trailer, return ct->c_end - ct->c_begin and + * leave remainderp unchanged. + */ +static size_t +get_valid_base64 (CT ct, char **remainderp) { + const size_t len = ct->c_end - ct->c_begin; + char *buf, format[16]; + size_t pos; + int fd; + + if (! ct->c_fp && ((ct->c_fp = fopen (ct->c_file, "r")) == NULL)) { + advise (ct->c_file, "unable to open for reading"); + return NOTOK; + } + if ((fd = fileno (ct->c_fp)) == -1 || + lseek (fd, ct->c_begin, SEEK_SET) == (off_t) -1) { + advise (ct->c_file, "unable to seek in"); + return NOTOK; + } + buf = mh_xmalloc(len + 1); + snprintf(format, sizeof format, "%%%luc", (unsigned long) len); + if (fscanf(ct->c_fp, format, buf) == EOF) { + advise (ct->c_file, "unable to read"); + return NOTOK; + } + buf[len] = '\0'; + + pos = find_invalid_base64_pos(buf); + + if (ct->c_begin + pos < (size_t) ct->c_end) { + *remainderp = mh_xstrdup(&buf[pos]); + } else { + pos = ct->c_end - ct->c_begin; + } + free(buf); + + return pos; +} + + +/* + * Find position in byte string of invalid base64 code. Skip individual + * invalid characters because RFC 2045 Sec 6.8 says they should be ignored. + * The motivating use case is a text footer that was mistakenly applied to + * base64 content. Therefore, if any of these is found, return the position + * of: + * 1. The byte (or end) after one or two consecutive pad ('=') bytes. + * 2. The first of a pair of invalid base64 bytes. + * + * If the base64 code is valid, return the position of the null terminator. + * + * encoded - the base64-encoded string + */ +static size_t +find_invalid_base64_pos (const char *encoded) { + const char *cp; + size_t pos; + bool found_pad = false; + unsigned int found_invalid = 0; + + for (cp = encoded, pos = 0; + *cp && ! found_pad && found_invalid < 2; + ++cp, ++pos) { + if (isspace ((unsigned char) *cp) || + isalnum ((unsigned char) *cp) || + *cp == '+' || *cp == '/' || *cp == '=') { + /* Valid base64 byte. */ + if (*cp == '=') { + /* "evidence that the end of the data has been reached" + according to RFC 2045 */ + found_pad = true; + } + /* Require consecutive invalid bytes. Let decodeBase64() handle + individual ones. */ + found_invalid = 0; + } else { + ++found_invalid; + } + } + + if (found_pad && *cp && *cp == '=') { + /* Skip over last in pair of ==. */ + ++cp, ++pos; + } else if (found_invalid == 2) { + /* If a pair of consecutive invalid bytes, back up to first one. */ + --cp, --pos; + --cp, --pos; + } + + /* Skip over any trailing whitespace. */ + while (*cp && isspace((unsigned char) *cp)) { + ++cp, ++pos; + } + + return pos; +} + + +/* + * Check for valid base64 encoding, and "fix" if invalid. + */ +static int +check_base64_encoding (CT *ctp) +{ + char *remainder = NULL; + int status = OK; + + /* If there's a footer after base64 content, set c_end to before it, and + store the footer in remainder. */ + (*ctp)->c_end = (*ctp)->c_begin + get_valid_base64(*ctp, &remainder); + + if (remainder != NULL) { + /* Move ct to a subpart of a new multipart/related, and add the + remainder as a new text/plain subpart of it. */ + int ignore_message_mods = 0; + + status = insert_into_new_mp_mixed(ctp, remainder, &ignore_message_mods); + free(remainder); + } return status; } -/* Some of the arguments aren't really needed now, but maybe will - be in the future for other than text types. */ +/* + * Reformat content as plain text. + * Some of the arguments aren't really needed now, but maybe will + * be in the future for other than text types. + */ static int -reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { +reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) +{ int output_subtype, output_encoding; + const char *reason = NULL; char *cp, *cf; int status; @@ -1531,19 +1925,18 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { /* Check for invo_name-format-type/subtype. */ if ((cf = context_find_by_type ("format", type, subtype)) == NULL) { if (verbosw) { - advise (NULL, "Don't know how to convert %s, there is no " + inform("Don't know how to convert %s, there is no " "%s-format-%s/%s profile entry", ct->c_file, invo_name, type, subtype); } return NOTOK; - } else { - if (strchr (cf, '>')) { - advise (NULL, "'>' prohibited in \"%s\",\nplease fix your " - "%s-format-%s/%s profile entry", cf, invo_name, type, - subtype ? subtype : ""); + } + if (strchr (cf, '>')) { + inform("'>' prohibited in \"%s\",\nplease fix your " + "%s-format-%s/%s profile entry", cf, invo_name, type, + FENDNULL(subtype)); - return NOTOK; - } + return NOTOK; } cp = concat (cf, " >", file, NULL); @@ -1565,9 +1958,10 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { /* Set subtype to 0, which is always an UNKNOWN subtype. */ output_subtype = 0; } - output_encoding = charset_encoding (ct); - if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) { + output_encoding = content_encoding (ct, &reason); + if (status == OK && + set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) { ct->c_cefile.ce_file = file; ct->c_cefile.ce_unlink = 1; } else { @@ -1579,22 +1973,16 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { } -/* Identifies 7bit or 8bit content based on charset. */ -static int -charset_encoding (CT ct) { - char *ct_charset = content_charset (ct); - int encoding = strcasecmp (ct_charset, "US-ASCII") ? CE_8BIT : CE_7BIT; - - free (ct_charset); - - return encoding; -} - - +/* + * Create and fill in a multipart part. + */ static CT -build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { +build_multipart (CT first_part, CT new_part, int type, int subtype) +{ char *boundary_prefix = "----=_nmh-multipart"; - char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL); + static unsigned int bp_uses = 0; + char bp_uses_buf[16]; + char *boundary; char *boundary_indicator = "; boundary="; char *typename, *subtypename, *name; CT ct; @@ -1602,10 +1990,17 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { struct multipart *m; const struct str2init *ctinit; - if ((ct = (CT) mh_xcalloc (1, sizeof *ct)) == NULL) - adios (NULL, "out of memory"); + NEW0(ct); + + if (bp_uses > 0) { + snprintf(bp_uses_buf, sizeof bp_uses_buf - 1, "-%d", bp_uses++); + } else { + bp_uses_buf[0] = '\0'; + } + boundary = + concat (boundary_prefix, bp_uses_buf, first_part->c_partno, NULL); - /* Set up the multipart/alternative part. These fields of *ct were + /* Set up the multipart part. These fields of *ct were initialized to 0 by mh_xcalloc(): c_fp, c_unlink, c_begin, c_end, c_vrsn, c_ctline, c_celine, @@ -1618,7 +2013,7 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { c_showproc, c_termproc, c_storeproc, c_storage, c_folder */ - ct->c_file = add (first_alt->c_file, NULL); + ct->c_file = mh_xstrdup (first_part->c_file); ct->c_type = type; ct->c_subtype = subtype; @@ -1640,20 +2035,19 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { if ((found_boundary = boundary_in_content (&new_part->c_cefile.ce_fp, new_part->c_cefile.ce_file, - boundary)) == -1) { - free (ct); - return NULL; + boundary)) == NOTOK) { + goto return_null; } } /* Ensure that the boundary doesn't appear in the encoded content. */ if (! found_boundary && new_part->c_file) { - if ((found_boundary = boundary_in_content (&new_part->c_fp, - new_part->c_file, - boundary)) == -1) { - free (ct); - return NULL; + if ((found_boundary = + boundary_in_content (&new_part->c_fp, + new_part->c_file, + boundary)) == NOTOK) { + goto return_null; } } @@ -1666,15 +2060,14 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { snprintf (buffer2, sizeof buffer2, "%d", serial); boundary = concat (boundary_prefix, - first_alt->c_partno ? first_alt->c_partno : "", + FENDNULL(first_part->c_partno), "-", buffer2, NULL); } } if (found_boundary) { - advise (NULL, "giving up trying to find a unique boundary"); - free (ct); - return NULL; + inform("giving up trying to find a unique boundary"); + goto return_null; } } @@ -1682,37 +2075,35 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { boundary, "\"", NULL); /* Load c_first_hf and c_last_hf. */ - transfer_noncontent_headers (first_alt, ct); - add_header (ct, add (TYPE_FIELD, NULL), concat (name, "\n", NULL)); + transfer_noncontent_headers (first_part, ct); + add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL)); free (name); /* Load c_partno. */ - if (first_alt->c_partno) { - ct->c_partno = add (first_alt->c_partno, NULL); - free (first_alt->c_partno); - first_alt->c_partno = concat (ct->c_partno, ".1", NULL); + if (first_part->c_partno) { + ct->c_partno = mh_xstrdup (first_part->c_partno); + free (first_part->c_partno); + first_part->c_partno = concat (ct->c_partno, ".1", NULL); new_part->c_partno = concat (ct->c_partno, ".2", NULL); } else { - first_alt->c_partno = add ("1", NULL); - new_part->c_partno = add ("2", NULL); + first_part->c_partno = mh_xstrdup ("1"); + new_part->c_partno = mh_xstrdup ("2"); } if (ctinit) { - ct->c_ctinfo.ci_type = add (typename, NULL); - ct->c_ctinfo.ci_subtype = add (subtypename, NULL); + ct->c_ctinfo.ci_type = mh_xstrdup (typename); + ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename); } add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm, "boundary", boundary, 0); - p = (struct part *) mh_xmalloc (sizeof *p); - p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next); + NEW(p); + NEW(p->mp_next); p->mp_next->mp_next = NULL; - p->mp_next->mp_part = first_alt; + p->mp_next->mp_part = first_part; - if ((m = (struct multipart *) mh_xcalloc (1, sizeof (struct multipart))) == - NULL) - adios (NULL, "out of memory"); + NEW0(m); m->mp_start = concat (boundary, "\n", NULL); m->mp_stop = concat (boundary, "--\n", NULL); m->mp_parts = p; @@ -1721,15 +2112,23 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { free (boundary); return ct; + +return_null: + free_content(ct); + free(boundary); + return NULL; } -/* Check that the boundary does not appear in the content. */ +/* + * Check that the boundary does not appear in the content. + */ static int -boundary_in_content (FILE **fp, char *file, const char *boundary) { - char buffer[BUFSIZ]; +boundary_in_content (FILE **fp, char *file, const char *boundary) +{ + char buffer[NMH_BUFSIZ]; size_t bytes_read; - int found_boundary = 0; + bool found_boundary = false; /* free_content() will close *fp if we fopen it here. */ if (! *fp && (*fp = fopen (file, "r")) == NULL) { @@ -1740,7 +2139,7 @@ boundary_in_content (FILE **fp, char *file, const char *boundary) { fseeko (*fp, 0L, SEEK_SET); while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) { if (find_str (buffer, bytes_read, boundary)) { - found_boundary = 1; + found_boundary = true; break; } } @@ -1749,16 +2148,19 @@ boundary_in_content (FILE **fp, char *file, const char *boundary) { } -/* Remove all non-Content headers. */ +/* + * Remove all non-Content headers. + */ static void -transfer_noncontent_headers (CT old, CT new) { +transfer_noncontent_headers (CT old, CT new) +{ HF hp, hp_prev; hp_prev = hp = old->c_first_hf; while (hp) { HF next = hp->next; - if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) { + if (strncasecmp (XXX_FIELD_PRF, hp->name, LEN(XXX_FIELD_PRF))) { if (hp == old->c_last_hf) { if (hp == old->c_first_hf) { old->c_last_hf = old->c_first_hf = NULL; @@ -1791,15 +2193,19 @@ transfer_noncontent_headers (CT old, CT new) { } +/* + * Set content type. + */ static int -set_ct_type (CT ct, int type, int subtype, int encoding) { +set_ct_type (CT ct, int type, int subtype, int encoding) +{ char *typename = ct_type_str (type); char *subtypename = ct_subtype_str (type, subtype); /* E.g, " text/plain" */ char *type_subtypename = concat (" ", typename, "/", subtypename, NULL); /* E.g, " text/plain\n" */ char *name_plus_nl = concat (type_subtypename, "\n", NULL); - int found_content_type = 0; + bool found_content_type = false; HF hf; const char *cp = NULL; char *ctline; @@ -1808,18 +2214,18 @@ set_ct_type (CT ct, int type, int subtype, int encoding) { /* Update/add Content-Type header field. */ for (hf = ct->c_first_hf; hf; hf = hf->next) { if (! strcasecmp (TYPE_FIELD, hf->name)) { - found_content_type = 1; + found_content_type = true; free (hf->value); hf->value = (cp = strchr (ct->c_ctline, ';')) ? concat (type_subtypename, cp, "\n", NULL) - : add (name_plus_nl, NULL); + : mh_xstrdup (name_plus_nl); } } if (! found_content_type) { - add_header (ct, add (TYPE_FIELD, NULL), + add_header (ct, mh_xstrdup (TYPE_FIELD), (cp = strchr (ct->c_ctline, ';')) ? concat (type_subtypename, cp, "\n", NULL) - : add (name_plus_nl, NULL)); + : mh_xstrdup (name_plus_nl)); } /* Some of these might not be used, but set them anyway. */ @@ -1830,9 +2236,9 @@ set_ct_type (CT ct, int type, int subtype, int encoding) { ct->c_ctline = ctline; /* Leave other ctinfo members as they were. */ free (ct->c_ctinfo.ci_type); - ct->c_ctinfo.ci_type = add (typename, NULL); + ct->c_ctinfo.ci_type = mh_xstrdup (typename); free (ct->c_ctinfo.ci_subtype); - ct->c_ctinfo.ci_subtype = add (subtypename, NULL); + ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename); ct->c_type = type; ct->c_subtype = subtype; @@ -1852,7 +2258,9 @@ set_ct_type (CT ct, int type, int subtype, int encoding) { * that character set again after decoding." */ static int -decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mods) { +decode_text_parts (CT ct, int encoding, const char *decodetypes, + int *message_mods) +{ int status = OK; int lf_line_endings = 0; @@ -1864,7 +2272,8 @@ decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mo /* Should check to see if the body for this part is encoded? For now, it gets passed along as-is by InitMultiPart(). */ for (part = m->mp_parts; status == OK && part; part = part->mp_next) { - status = decode_text_parts (part->mp_part, encoding, decodetypes, message_mods); + status = decode_text_parts (part->mp_part, encoding, decodetypes, + message_mods); } break; } @@ -1873,7 +2282,8 @@ decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mo if (ct->c_subtype == MESSAGE_EXTERNAL) { struct exbody *e = (struct exbody *) ct->c_ctparams; - status = decode_text_parts (e->eb_content, encoding, decodetypes, message_mods); + status = decode_text_parts (e->eb_content, encoding, decodetypes, + message_mods); } break; @@ -1901,8 +2311,7 @@ decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mo report (NULL, ct->c_partno, ct->c_file, "will not decode%s because it is binary (%s)", ct->c_partno ? "" - : ct->c_ctline ? ct->c_ctline - : "", + : (FENDNULL(ct->c_ctline)), reason); } (void) m_unlink (ct->c_cefile.ce_file); @@ -1916,26 +2325,26 @@ decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mo report (NULL, ct->c_partno, ct->c_file, "will not decode%s because it is 8bit", ct->c_partno ? "" - : ct->c_ctline ? ct->c_ctline - : ""); + : (FENDNULL(ct->c_ctline))); } (void) m_unlink (ct->c_cefile.ce_file); free (ct->c_cefile.ce_file); ct->c_cefile.ce_file = NULL; } else { int enc; + if (ct_encoding == CE_BINARY) { enc = CE_BINARY; } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) { enc = CE_QUOTED; } else { - enc = charset_encoding (ct); + enc = ct_encoding; } if (set_ce (ct, enc) == OK) { ++*message_mods; if (verbosw) { report (NULL, ct->c_partno, ct->c_file, "decode%s", - ct->c_ctline ? ct->c_ctline : ""); + FENDNULL(ct->c_ctline)); } if (lf_line_endings) { strip_crs (ct, message_mods); @@ -1966,26 +2375,29 @@ decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mo } -/* Determine if the part with type[/subtype] should be decoded, according to - decodetypes (which came from the -decodetypes switch). */ +/* + * Determine if the part with type[/subtype] should be decoded, according to + * decodetypes (which came from the -decodetypes switch). + */ static int -should_decode(const char *decodetypes, const char *type, const char *subtype) { +should_decode(const char *decodetypes, const char *type, const char *subtype) +{ /* Quick search for matching type[/subtype] in decodetypes: bracket decodetypes with commas, then search for ,type, and ,type/subtype, in it. */ - int found_match = 0; + bool found_match = false; char *delimited_decodetypes = concat(",", decodetypes, ",", NULL); char *delimited_type = concat(",", type, ",", NULL); if (nmh_strcasestr(delimited_decodetypes, delimited_type)) { - found_match = 1; + found_match = true; } else if (subtype != NULL) { char *delimited_type_subtype = concat(",", type, "/", subtype, ",", NULL); if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) { - found_match = 1; + found_match = true; } free(delimited_type_subtype); } @@ -1997,18 +2409,21 @@ should_decode(const char *decodetypes, const char *type, const char *subtype) { } -/* See if the decoded content is 7bit, 8bit, or binary. It's binary - if it has any NUL characters, a CR not followed by a LF, or lines - greater than 998 characters in length. If binary, reason is set - to a string explaining why. */ +/* + * See if the decoded content is 7bit, 8bit, or binary. It's binary + * if it has any NUL characters, a CR not followed by a LF, or lines + * greater than 998 characters in length. If binary, reason is set + * to a string explaining why. + */ static int -content_encoding (CT ct, const char **reason) { +content_encoding (CT ct, const char **reason) +{ CE ce = &ct->c_cefile; int encoding = CE_7BIT; if (ce->ce_file) { size_t line_len = 0; - char buffer[BUFSIZ]; + char buffer[NMH_BUFSIZ]; size_t inbytes; if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) { @@ -2038,13 +2453,14 @@ content_encoding (CT ct, const char **reason) { *reason = ""; } break; - } else if (*cp == '\n') { + } + if (*cp == '\n') { line_len = 0; } else if (! isascii ((unsigned char) *cp)) { encoding = CE_8BIT; } - last_char_was_cr = *cp == '\r' ? 1 : 0; + last_char_was_cr = *cp == '\r'; } } @@ -2056,8 +2472,12 @@ content_encoding (CT ct, const char **reason) { } +/* + * Strip carriage returns from content. + */ static int -strip_crs (CT ct, int *message_mods) { +strip_crs (CT ct, int *message_mods) +{ char *charset = content_charset (ct); int status = OK; @@ -2074,8 +2494,8 @@ strip_crs (CT ct, int *message_mods) { FILE **fp = NULL; size_t begin; size_t end; - int has_crs = 0; - int opened_input_file = 0; + bool has_crs = false; + bool opened_input_file = false; if (ct->c_cefile.ce_file) { file = &ct->c_cefile.ce_file; @@ -2094,13 +2514,13 @@ strip_crs (CT ct, int *message_mods) { advise (*file, "unable to open for reading"); status = NOTOK; } else { - opened_input_file = 1; + opened_input_file = true; } } } if (fp && *fp) { - char buffer[BUFSIZ]; + char buffer[NMH_BUFSIZ]; size_t bytes_read; size_t bytes_to_read = end > 0 && end > begin ? end - begin : sizeof buffer; @@ -2114,17 +2534,17 @@ strip_crs (CT ct, int *message_mods) { modify the content. */ char *cp; size_t i; - int last_char_was_cr = 0; + bool last_char_was_cr = false; if (end > 0) { bytes_to_read -= bytes_read; } for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) { if (*cp == '\n' && last_char_was_cr) { - has_crs = 1; + has_crs = true; break; } - last_char_was_cr = *cp == '\r' ? 1 : 0; + last_char_was_cr = *cp == '\r'; } } @@ -2134,10 +2554,10 @@ strip_crs (CT ct, int *message_mods) { char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL); if (tempfile == NULL) { - adios (NULL, "unable to create temporary file in %s", + die("unable to create temporary file in %s", get_temp_dir()); } - stripped_content_file = add (tempfile, NULL); + stripped_content_file = mh_xstrdup (tempfile); /* Strip each CR before a LF from the content. */ fseeko (*fp, begin, SEEK_SET); @@ -2145,11 +2565,11 @@ strip_crs (CT ct, int *message_mods) { 0) { char *cp; size_t i; - int last_char_was_cr = 0; + bool last_char_was_cr = false; for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) { if (*cp == '\r') { - last_char_was_cr = 1; + last_char_was_cr = true; } else if (last_char_was_cr) { if (*cp != '\n') { if (write (fd, "\r", 1) < 0) { @@ -2159,29 +2579,28 @@ strip_crs (CT ct, int *message_mods) { if (write (fd, cp, 1) < 0) { advise (tempfile, "write"); } - last_char_was_cr = 0; + last_char_was_cr = false; } else { if (write (fd, cp, 1) < 0) { advise (tempfile, "write"); } - last_char_was_cr = 0; + last_char_was_cr = false; } } } if (close (fd)) { - admonish (NULL, "unable to write temporary file %s", + inform("unable to write temporary file %s, continuing...", stripped_content_file); (void) m_unlink (stripped_content_file); + free(stripped_content_file); status = NOTOK; } else { /* Replace the decoded file with the converted one. */ - if (ct->c_cefile.ce_file) { - if (ct->c_cefile.ce_unlink) { - (void) m_unlink (ct->c_cefile.ce_file); - } - free (ct->c_cefile.ce_file); - } + if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink) + (void) m_unlink (ct->c_cefile.ce_file); + + free(ct->c_cefile.ce_file); ct->c_cefile.ce_file = stripped_content_file; ct->c_cefile.ce_unlink = 1; @@ -2207,8 +2626,112 @@ strip_crs (CT ct, int *message_mods) { } +/* + * Add/update, if necessary, the message C-T-E, based on the least restrictive + * of the part C-T-E's. + */ +static void +update_cte (CT ct) +{ + const int least_restrictive_enc = least_restrictive_encoding (ct); + + if (least_restrictive_enc != CE_UNKNOWN && + least_restrictive_enc != CE_7BIT) { + char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL); + HF hf; + bool found_cte = false; + + /* Update/add Content-Transfer-Encoding header field. */ + for (hf = ct->c_first_hf; hf; hf = hf->next) { + if (! strcasecmp (ENCODING_FIELD, hf->name)) { + found_cte = true; + free (hf->value); + hf->value = cte; + } + } + if (! found_cte) { + add_header (ct, mh_xstrdup (ENCODING_FIELD), cte); + } + } +} + + +/* + * Find the least restrictive encoding (7bit, 8bit, binary) of the parts + * within a message. + */ static int -convert_charsets (CT ct, char *dest_charset, int *message_mods) { +least_restrictive_encoding (CT ct) +{ + int encoding = CE_UNKNOWN; + + switch (ct->c_type) { + case CT_MULTIPART: { + struct multipart *m = (struct multipart *) ct->c_ctparams; + struct part *part; + + for (part = m->mp_parts; part; part = part->mp_next) { + const int part_encoding = + least_restrictive_encoding (part->mp_part); + + if (less_restrictive (encoding, part_encoding)) { + encoding = part_encoding; + } + } + break; + } + + case CT_MESSAGE: + if (ct->c_subtype == MESSAGE_EXTERNAL) { + struct exbody *e = (struct exbody *) ct->c_ctparams; + const int part_encoding = + least_restrictive_encoding (e->eb_content); + + if (less_restrictive (encoding, part_encoding)) { + encoding = part_encoding; + } + } + break; + + default: { + if (less_restrictive (encoding, ct->c_encoding)) { + encoding = ct->c_encoding; + } + }} + + return encoding; +} + + +/* + * Return whether the second encoding is less restrictive than the first, where + * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So, + * CE_BINARY is less restrictive than CE_8BIT and + * CE_8BIT is less restrictive than CE_7BIT. + */ +static int +less_restrictive (int encoding, int second_encoding) +{ + switch (second_encoding) { + case CE_BINARY: + return encoding != CE_BINARY; + case CE_8BIT: + return encoding != CE_BINARY && encoding != CE_8BIT; + case CE_7BIT: + return encoding != CE_BINARY && encoding != CE_8BIT && + encoding != CE_7BIT; + default : + return 0; + } +} + + +/* + * Convert character set of each part. + */ +static int +convert_charsets (CT ct, char *dest_charset, int *message_mods) +{ int status = OK; switch (ct->c_type) { @@ -2266,36 +2789,44 @@ convert_charsets (CT ct, char *dest_charset, int *message_mods) { /* * Fix various problems that aren't handled elsewhere. These * are fixed unconditionally: there are no switches to disable - * them. (Currently, "problems" is just one: an extraneous - * semicolon at the end of a header parameter list.) + * them. Currently, "problems" are these: + * 1) remove extraneous semicolon at the end of a header parameter list + * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and + * filename parameters in Content-Type and Content-Disposition + * headers, respectively. */ static int -fix_always (CT ct, int *message_mods) { +fix_always (CT *ctp, const fix_transformations *fx, int *message_mods) +{ int status = OK; - switch (ct->c_type) { + switch ((*ctp)->c_type) { case CT_MULTIPART: { - struct multipart *m = (struct multipart *) ct->c_ctparams; + struct multipart *m = (struct multipart *) (*ctp)->c_ctparams; struct part *part; for (part = m->mp_parts; status == OK && part; part = part->mp_next) { - status = fix_always (part->mp_part, message_mods); + status = fix_always (&part->mp_part, fx, message_mods); } break; } case CT_MESSAGE: - if (ct->c_subtype == MESSAGE_EXTERNAL) { - struct exbody *e = (struct exbody *) ct->c_ctparams; + if ((*ctp)->c_subtype == MESSAGE_EXTERNAL) { + struct exbody *e = (struct exbody *) (*ctp)->c_ctparams; - status = fix_always (e->eb_content, message_mods); + status = fix_always (&e->eb_content, fx, message_mods); } break; default: { HF hf; - for (hf = ct->c_first_hf; hf; hf = hf->next) { + if ((*ctp)->c_first_hf) { + fix_filename_encoding (*ctp); + } + + for (hf = (*ctp)->c_first_hf; hf; hf = hf->next) { size_t len = strlen (hf->value); if (strcasecmp (hf->name, TYPE_FIELD) != 0 && @@ -2319,42 +2850,203 @@ fix_always (CT ct, int *message_mods) { hf->value[len - 1] = '\0'; /* Also, if Content-Type parameter, remove trailing ';' - from ct->c_ctline. This probably isn't necessary + from (*ctp)->c_ctline. This probably isn't necessary but can't hurt. */ - if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) { - size_t l = strlen(ct->c_ctline) - 1; - while (isspace((unsigned char)(ct->c_ctline[l])) || - ct->c_ctline[l] == ';') { - ct->c_ctline[l--] = '\0'; + if (strcasecmp(hf->name, TYPE_FIELD) == 0 && (*ctp)->c_ctline) { + size_t l = strlen((*ctp)->c_ctline) - 1; + while (isspace((unsigned char)((*ctp)->c_ctline[l])) || + (*ctp)->c_ctline[l] == ';') { + (*ctp)->c_ctline[l--] = '\0'; if (l == 0) { break; } } } ++*message_mods; if (verbosw) { - report (NULL, ct->c_partno, ct->c_file, + report (NULL, (*ctp)->c_partno, (*ctp)->c_file, "remove trailing ; from %s parameter value", hf->name); } } } + + if (fx->checkbase64 && (*ctp)->c_encoding == CE_BASE64) { + status = check_base64_encoding (ctp); + } }} return status; } +/* + * Decodes UTF-8 encoded header values. Similar to fix_filename_param(), but + * does not modify any MIME parameter values. + */ static int -write_content (CT ct, const char *input_filename, char *outfile, int modify_inplace, - int message_mods) { +decode_header_field_bodies (CT ct, int *message_mods) +{ + int status = OK; + + switch (ct->c_type) { + case CT_MULTIPART: { + struct multipart *m = (struct multipart *) ct->c_ctparams; + struct part *part; + + for (part = m->mp_parts; status == OK && part; part = part->mp_next) { + status = decode_header_field_bodies (part->mp_part, message_mods); + } + break; + } + + case CT_MESSAGE: + if (ct->c_subtype == MESSAGE_EXTERNAL) { + struct exbody *e = (struct exbody *) ct->c_ctparams; + + status = decode_header_field_bodies (e->eb_content, message_mods); + } + break; + } + + HF hf; + + for (hf = ct->c_first_hf; hf; hf = hf->next) { + /* Only decode UTF-8 values. */ + if (hf->value && has_suffix(hf->value, "?=\n") && + (! strncasecmp (hf->value, " =?utf8?", 8) || + ! strncasecmp (hf->value, " =?utf-8?", 9))) { + /* Looks like an RFC 2047 encoded parameter. */ + char decoded[PATH_MAX + 1]; + + if (decode_rfc2047 (hf->value, decoded, sizeof decoded)) { + const size_t len = strlen(decoded); + + /* decode_rfc2047() could truncate if the buffer fills up. + Detect and discard if that happened. */ + if (len < sizeof(decoded) - 1 && strcmp(hf->value, decoded)) { + hf->value = mh_xrealloc (hf->value, len + 1); + strncpy (hf->value, decoded, len + 1); + ++*message_mods; + } + } else { + inform("failed to decode %s parameter %s", hf->name, hf->value); + status = NOTOK; + } + } + } + + return status; +} + + +/* + * Factor out common code for loops in fix_filename_encoding(). + */ +static int +fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) +{ + bool fixed = false; + + if (has_prefix(value, "=?") && has_suffix(value, "?=")) { + /* Looks like an RFC 2047 encoded parameter. */ + char decoded[PATH_MAX + 1]; + + if (decode_rfc2047 (value, decoded, sizeof decoded)) { + /* Encode using RFC 2231. */ + replace_param (first_pm, last_pm, name, decoded, 0); + fixed = true; + } else { + inform("failed to decode %s parameter %s", name, value); + } + } + + return fixed; +} + + +/* + * Replace RFC 2047 encoding with RFC 2231 encoding of name and + * filename parameters in Content-Type and Content-Disposition + * headers, respectively. + */ +static int +fix_filename_encoding (CT ct) +{ + PM pm; + HF hf; + int fixed = 0; + + for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) { + if (pm->pm_name && pm->pm_value && + strcasecmp (pm->pm_name, "name") == 0) { + fixed = fix_filename_param (pm->pm_name, pm->pm_value, + &ct->c_ctinfo.ci_first_pm, + &ct->c_ctinfo.ci_last_pm); + } + } + + for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) { + if (pm->pm_name && pm->pm_value && + strcasecmp (pm->pm_name, "filename") == 0) { + fixed = fix_filename_param (pm->pm_name, pm->pm_value, + &ct->c_dispo_first, + &ct->c_dispo_last); + } + } + + /* Fix hf values to correspond. */ + for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) { + enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER; + + if (strcasecmp (hf->name, TYPE_FIELD) == 0) { + field = TYPE_HEADER; + } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) { + field = DISPO_HEADER; + } + + if (field != OTHER) { + const char *const semicolon_loc = strchr (hf->value, ';'); + + if (semicolon_loc) { + const size_t len = + strlen (hf->name) + 1 + semicolon_loc - hf->value; + const char *const params = + output_params (len, + field == TYPE_HEADER + ? ct->c_ctinfo.ci_first_pm + : ct->c_dispo_first, + NULL, 0); + const char *const new_params = concat (params, "\n", NULL); + + replace_substring (&hf->value, semicolon_loc, new_params); + free((void *)new_params); /* Cast away const. Sigh. */ + free((void *)params); + } else { + inform("did not find semicolon in %s:%s\n", + hf->name, hf->value); + } + } + } + + return OK; +} + + +/* + * Output content in input file to output file. + */ +static int +write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp, + int modify_inplace, int message_mods) +{ int status = OK; if (modify_inplace) { if (message_mods > 0) { - if ((status = output_message (ct, outfile)) == OK) { + if ((status = output_message_fp (ct, outfp, outfile)) == OK) { char *infile = input_filename - ? add (input_filename, NULL) - : add (ct->c_file ? ct->c_file : "-", NULL); + ? mh_xstrdup (input_filename) + : mh_xstrdup (ct->c_file ? ct->c_file : "-"); if (remove_file (infile) == OK) { if (rename (outfile, infile)) { @@ -2367,7 +3059,7 @@ write_content (CT ct, const char *input_filename, char *outfile, int modify_inpl int i = -1; if (old != -1 && new != -1) { - char buffer[BUFSIZ]; + char buffer[NMH_BUFSIZ]; while ((i = read (old, buffer, sizeof buffer)) > 0) { @@ -2386,15 +3078,15 @@ write_content (CT ct, const char *input_filename, char *outfile, int modify_inpl expand filename to absolute path. */ int file = ct->c_file && ct->c_file[0] == '/'; - admonish (NULL, "unable to rename %s %s to %s", + inform("unable to rename %s %s to %s, continuing...", file ? "file" : "message", outfile, infile); status = NOTOK; } } } else { - admonish (NULL, "unable to remove input file %s, " - "not modifying it", infile); + inform("unable to remove input file %s, " + "not modifying it, continuing...", infile); (void) m_unlink (outfile); status = NOTOK; } @@ -2410,7 +3102,7 @@ write_content (CT ct, const char *input_filename, char *outfile, int modify_inpl } else { /* Output is going to some file. Produce it whether or not there were modifications. */ - status = output_message (ct, outfile); + status = output_message_fp (ct, outfp, outfile); } flush_errors (); @@ -2419,11 +3111,12 @@ write_content (CT ct, const char *input_filename, char *outfile, int modify_inpl /* - * parse_mime() does not set lf_line_endings in struct text, so use this function to do it. - * It touches the parts the decodetypes identifies. + * parse_mime() does not set lf_line_endings in struct text, so use this + * function to do it. It touches the parts the decodetypes identifies. */ static void -set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) { +set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) +{ switch (ct->c_type) { case CT_MULTIPART: { struct multipart *m = (struct multipart *) ct->c_ctparams; @@ -2446,9 +3139,7 @@ set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) { default: if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) { if (ct->c_ctparams == NULL) { - if ((ct->c_ctparams = (struct text *) mh_xcalloc (1, sizeof (struct text))) == NULL) { - adios (NULL, "out of memory"); - } + ct->c_ctparams = mh_xcalloc(1, sizeof (struct text)); } ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings; } @@ -2461,31 +3152,35 @@ set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) { * use the standard MH backup file. */ static int -remove_file (const char *file) { +remove_file (const char *file) +{ if (rmmproc) { char *rmm_command = concat (rmmproc, " ", file, NULL); int status = system (rmm_command); free (rmm_command); return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK; - } else { - /* This is OK for a non-message file, it still uses the - BACKUP_PREFIX form. The backup file will be in the same - directory as file. */ - return rename (file, m_backup (file)); } + /* This is OK for a non-message file, it still uses the + BACKUP_PREFIX form. The backup file will be in the same + directory as file. */ + return rename (file, m_backup (file)); } +/* + * Output formatted message to user. + */ static void -report (char *what, char *partno, char *filename, char *message, ...) { +report (char *what, char *partno, char *filename, char *message, ...) +{ va_list args; char *fmt; if (verbosw) { va_start (args, message); fmt = concat (filename, partno ? " part " : ", ", - partno ? partno : "", partno ? ", " : "", message, NULL); + FENDNULL(partno), partno ? ", " : "", message, NULL); advertise (what, NULL, fmt, args);