X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/902e700b1d5ba0112fb92c25e6092a88d4789f6a..9374a77fec170451968e67eb90fce2c331b5040c:/uip/mhfixmsg.c?ds=sidebyside diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c index 2714024d..372dbb9c 100644 --- a/uip/mhfixmsg.c +++ b/uip/mhfixmsg.c @@ -1,5 +1,5 @@ /* - * mhfixmsg.c -- rewrite a message with various tranformations + * mhfixmsg.c -- rewrite a message with various transformations * * This code is Copyright (c) 2002 and 2013, by the authors of nmh. * See the COPYRIGHT file in the root directory of the nmh @@ -12,25 +12,30 @@ #include #include #include -#ifdef HAVE_ICONV -# include -#endif #define MHFIXMSG_SWITCHES \ - X("decodetext 8bit|7bit", 0, DECODETEXTSW) \ + X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \ X("nodecodetext", 0, NDECODETEXTSW) \ - X("textcodeset", 0, TEXTCODESETSW) \ - X("notextcodeset", 0, NTEXTCODESETSW) \ + X("decodetypes", 0, DECODETYPESW) \ + X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \ + X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \ + X("textcharset", 0, TEXTCHARSETSW) \ + X("notextcharset", 0, NTEXTCHARSETSW) \ X("reformat", 0, REFORMATSW) \ X("noreformat", 0, NREFORMATSW) \ + X("replacetextplain", 0, REPLACETEXTPLAINSW) \ + X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \ X("fixboundary", 0, FIXBOUNDARYSW) \ X("nofixboundary", 0, NFIXBOUNDARYSW) \ - X("fixcte", 0, FIXCTESW) \ - X("nofixcte", 0, NFIXCTESW) \ + X("fixcte", 0, FIXCOMPOSITECTESW) \ + X("nofixcte", 0, NFIXCOMPOSITECTESW) \ + X("fixtype mimetype", 0, FIXTYPESW) \ X("file file", 0, FILESW) \ X("outfile file", 0, OUTFILESW) \ X("rmmproc program", 0, RPROCSW) \ X("normmproc", 0, NRPRCSW) \ + X("changecur", 0, CHGSW) \ + X("nochangecur", 0, NCHGSW) \ X("verbose", 0, VERBSW) \ X("noverbose", 0, NVERBSW) \ X("version", 0, VERSIONSW) \ @@ -51,19 +56,16 @@ int debugsw; /* Needed by mhparse.c. */ #define quitser pipeser /* mhparse.c */ -extern char *tmp; /* directory to place tmp files */ extern int skip_mp_cte_check; /* flag to InitMultiPart */ extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */ extern int bogus_mp_content; /* flag from InitMultiPart */ -CT parse_mime (char *); -void reverse_parts (CT); +/* flags to/from parse_header_attrs */ +extern int suppress_extraneous_trailing_semicolon_warning; +extern int extraneous_trailing_semicolon; /* mhoutsbr.c */ int output_message (CT, char *); -/* mhshowsbr.c */ -int show_content_aux (CT, int, int, char *, char *); - /* mhmisc.c */ void flush_errors (void); @@ -76,41 +78,55 @@ void freects_done (int) NORETURN; */ typedef struct fix_transformations { int fixboundary; - int fixcte; + int fixcompositecte; + svector_t fixtypes; int reformat; + int replacetextplain; int decodetext; - char *textcodeset; + char *decodetypes; + /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */ + int lf_line_endings; + char *textcharset; } fix_transformations; int mhfixmsgsbr (CT *, const fix_transformations *, char *); -static void reverse_alternative_parts (CT); static int fix_boundary (CT *, int *); +static int copy_input_to_output (const char *, const char *); static int get_multipart_boundary (CT, char **); -static int replace_boundary (CT, char *, const char *); -static char *update_attr (char *, const char *, const char *e); -static int fix_multipart_cte (CT, int *); +static int replace_boundary (CT, char *, char *); +static int fix_types (CT, svector_t, int *); +static char *replace_substring (char **, const char *, const char *); +static char *remove_parameter (char *, const char *); +static int fix_composite_cte (CT, int *); static int set_ce (CT, int); -static int ensure_text_plain (CT *, CT, int *); +static int ensure_text_plain (CT *, CT, int *, int); +static int find_textplain_sibling (CT, int, int *); +static int insert_new_text_plain_part (CT, int, CT); static CT build_text_plain_part (CT); +static int insert_into_new_mp_alt (CT *, int *); static CT divide_part (CT); static void copy_ctinfo (CI, CI); static int decode_part (CT); static int reformat_part (CT, char *, char *, char *, int); -static int charset_encoding (CT); static CT build_multipart_alt (CT, CT, int, int); static int boundary_in_content (FILE **, char *, const char *); static void transfer_noncontent_headers (CT, CT); static int set_ct_type (CT, int type, int subtype, int encoding); -static int decode_text_parts (CT, int, int *); -static int content_encoding (CT); +static int decode_text_parts (CT, int, const char *, int *); +static int should_decode(const char *, const char *, const char *); +static int content_encoding (CT, const char **); static int strip_crs (CT, int *); -static int convert_codesets (CT, char *, int *); -static int convert_codeset (CT, char *, int *); -static char *content_codeset (CT); -static int write_content (CT, char *, char *, int, int); -static int remove_file (char *); -static void report (char *, char *, char *, ...); -static char *upcase (char *); +static void update_cte (CT); +static int least_restrictive_encoding (CT); +static int less_restrictive (int, int); +static int convert_charsets (CT, char *, int *); +static int fix_always (CT, int *); +static int fix_filename_param (char *, char *, PM *, PM *); +static int fix_filename_encoding (CT); +static int write_content (CT, const char *, char *, int, int); +static void set_text_ctparams(CT, char *, int); +static int remove_file (const char *); +static void report (char *, char *, char *, char *, ...); static void pipeser (int); @@ -125,21 +141,20 @@ main (int argc, char **argv) { CT *ctp; FILE *fp; int using_stdin = 0; + int chgflag = 1; int status = OK; fix_transformations fx; - fx.reformat = fx.fixcte = fx.fixboundary = 1; + fx.reformat = fx.fixcompositecte = fx.fixboundary = 1; + fx.fixtypes = NULL; + fx.replacetextplain = 0; fx.decodetext = CE_8BIT; - fx.textcodeset = NULL; - - done = freects_done; + fx.decodetypes = "text,application/ics"; /* Default, per man page. */ + fx.lf_line_endings = 0; + fx.textcharset = NULL; -#ifdef LOCALE - setlocale(LC_ALL, ""); -#endif - invo_name = r1bindex (argv[0], '/'); + if (nmh_init(argv[0], 2)) { return 1; } - /* read user profile/context */ - context_read(); + done = freects_done; arguments = getarguments (invo_name, argc, argv, 1); argp = arguments; @@ -166,12 +181,15 @@ main (int argc, char **argv) { done (0); case DECODETEXTSW: - if (! (cp = *argp++) || *cp == '-') + if (! (cp = *argp++) || *cp == '-') { adios (NULL, "missing argument to %s", argp[-2]); + } if (! strcasecmp (cp, "8bit")) { fx.decodetext = CE_8BIT; } else if (! strcasecmp (cp, "7bit")) { fx.decodetext = CE_7BIT; + } else if (! strcasecmp (cp, "binary")) { + fx.decodetext = CE_BINARY; } else { adios (NULL, "invalid argument to %s", argp[-2]); } @@ -179,13 +197,26 @@ main (int argc, char **argv) { case NDECODETEXTSW: fx.decodetext = 0; continue; - case TEXTCODESETSW: - if (! (cp = *argp++) || (*cp == '-' && cp[1])) + case DECODETYPESW: + if (! (cp = *argp++) || *cp == '-') { + adios (NULL, "missing argument to %s", argp[-2]); + } + fx.decodetypes = cp; + continue; + case CRLFLINEBREAKSSW: + fx.lf_line_endings = 0; + continue; + case NCRLFLINEBREAKSSW: + fx.lf_line_endings = 1; + continue; + case TEXTCHARSETSW: + if (! (cp = *argp++) || (*cp == '-' && cp[1])) { adios (NULL, "missing argument to %s", argp[-2]); - fx.textcodeset = cp; + } + fx.textcharset = cp; continue; - case NTEXTCODESETSW: - fx.textcodeset = 0; + case NTEXTCHARSETSW: + fx.textcharset = 0; continue; case FIXBOUNDARYSW: fx.fixboundary = 1; @@ -193,11 +224,24 @@ main (int argc, char **argv) { case NFIXBOUNDARYSW: fx.fixboundary = 0; continue; - case FIXCTESW: - fx.fixcte = 1; + case FIXCOMPOSITECTESW: + fx.fixcompositecte = 1; + continue; + case NFIXCOMPOSITECTESW: + fx.fixcompositecte = 0; continue; - case NFIXCTESW: - fx.fixcte = 0; + case FIXTYPESW: + if (! (cp = *argp++) || (*cp == '-' && cp[1])) { + adios (NULL, "missing argument to %s", argp[-2]); + } + if (! strncasecmp (cp, "multipart/", 10) || + ! strncasecmp (cp, "message/", 8)) { + adios (NULL, "-fixtype %s not allowed", cp); + } else if (! strchr (cp, '/')) { + adios (NULL, "-fixtype requires type/subtype"); + } + if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); } + svector_push_back (fx.fixtypes, cp); continue; case REFORMATSW: fx.reformat = 1; @@ -205,27 +249,38 @@ main (int argc, char **argv) { case NREFORMATSW: fx.reformat = 0; continue; - + case REPLACETEXTPLAINSW: + fx.replacetextplain = 1; + continue; + case NREPLACETEXTPLAINSW: + fx.replacetextplain = 0; + continue; case FILESW: - if (! (cp = *argp++) || (*cp == '-' && cp[1])) + if (! (cp = *argp++) || (*cp == '-' && cp[1])) { adios (NULL, "missing argument to %s", argp[-2]); + } file = *cp == '-' ? add (cp, NULL) : path (cp, TFILE); continue; - case OUTFILESW: - if (! (cp = *argp++) || (*cp == '-' && cp[1])) + if (! (cp = *argp++) || (*cp == '-' && cp[1])) { adios (NULL, "missing argument to %s", argp[-2]); + } outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE); continue; - case RPROCSW: - if (!(rmmproc = *argp++) || *rmmproc == '-') + if (!(rmmproc = *argp++) || *rmmproc == '-') { adios (NULL, "missing argument to %s", argp[-2]); + } continue; case NRPRCSW: rmmproc = NULL; continue; - + case CHGSW: + chgflag = 1; + continue; + case NCHGSW: + chgflag = 0; + continue; case VERBSW: verbosw = 1; continue; @@ -235,12 +290,19 @@ main (int argc, char **argv) { } } if (*cp == '+' || *cp == '@') { - if (folder) + if (folder) { adios (NULL, "only one folder at a time!"); - else + } else { folder = pluspath (cp); - } else - app_msgarg(&msgs, cp); + } + } else { + if (*cp == '/') { + /* Interpret a full path as a filename, not a message. */ + file = add (cp, NULL); + } else { + app_msgarg (&msgs, cp); + } + } } SIGNAL (SIGQUIT, quitser); @@ -254,23 +316,16 @@ main (int argc, char **argv) { fclose (fp); } - /* - * Check for storage directory. If specified, - * then store temporary files there. Else we - * store them in standard nmh directory. - */ - if ((cp = context_find (nmhstorage)) && *cp) - tmp = concat (cp, "/", invo_name, NULL); - else - tmp = add (m_maildir (invo_name), NULL); - suppress_bogus_mp_content_warning = skip_mp_cte_check = 1; + suppress_extraneous_trailing_semicolon_warning = 1; - if (! context_find ("path")) + if (! context_find ("path")) { free (path ("./", TFOLDER)); + } - if (file && msgs.size) + if (file && msgs.size) { adios (NULL, "cannot specify msg and file at same time!"); + } /* * check if message is coming from file @@ -288,57 +343,77 @@ main (int argc, char **argv) { using_stdin = 1; - if ((cp = m_mktemp2 (tmp, invo_name, &fd, NULL)) == NULL) { - adios (NULL, "unable to create temporary file"); + if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) { + adios (NULL, "unable to create temporary file in %s", + get_temp_dir()); } else { free (file); file = add (cp, NULL); - chmod (file, 0600); cpydata (STDIN_FILENO, fd, "-", file); } if (close (fd)) { - unlink (file); + (void) m_unlink (file); adios (NULL, "failed to write temporary file"); } } - if (! (cts = (CT *) calloc ((size_t) 2, sizeof *cts))) - adios (NULL, "out of memory"); + cts = mh_xcalloc(2, sizeof *cts); ctp = cts; - if ((ct = parse_mime (file))) *ctp++ = ct; + if ((ct = parse_mime (file))) { + set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings); + *ctp++ = ct; + } else { + advise (NULL, "unable to parse message from file %s", file); + status = NOTOK; + + /* If there's an outfile, pass the input message unchanged, so the message won't + get dropped from a pipeline. */ + if (outfile) { + /* Something went wrong. Output might be expected, such as if this were run + as a filter. Just copy the input to the output. */ + if (copy_input_to_output (file, outfile) != OK) { + advise (NULL, "unable to copy message to %s, it might be lost\n", outfile); + } + } + } } else { /* * message(s) are coming from a folder */ CT ct; - if (! msgs.size) + if (! msgs.size) { app_msgarg(&msgs, "cur"); - if (! folder) + } + if (! folder) { folder = getfolder (1); + } maildir = m_maildir (folder); - if (chdir (maildir) == NOTOK) + if (chdir (maildir) == NOTOK) { adios (maildir, "unable to change directory to"); + } /* read folder and create message structure */ - if (! (mp = folder_read (folder, 1))) + if (! (mp = folder_read (folder, 1))) { adios (NULL, "unable to read folder %s", folder); + } /* check for empty folder */ - if (mp->nummsg == 0) + if (mp->nummsg == 0) { adios (NULL, "no messages in %s", folder); + } /* parse all the message ranges/sequences and set SELECTED */ for (msgnum = 0; msgnum < msgs.size; msgnum++) - if (! m_convert (mp, msgs.msgs[msgnum])) + if (! m_convert (mp, msgs.msgs[msgnum])) { done (1); + } seq_setprev (mp); /* set the previous-sequence */ - if (! (cts = (CT *) calloc ((size_t) (mp->numsel + 1), sizeof *cts))) - adios (NULL, "out of memory"); + cts = mh_xcalloc(mp->numsel + 1, sizeof *cts); ctp = cts; for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) { @@ -346,11 +421,31 @@ main (int argc, char **argv) { char *msgnam; msgnam = m_name (msgnum); - if ((ct = parse_mime (msgnam))) *ctp++ = ct; + if ((ct = parse_mime (msgnam))) { + set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings); + *ctp++ = ct; + } else { + advise (NULL, "unable to parse message %s", msgnam); + status = NOTOK; + + /* If there's an outfile, pass the input message unchanged, so the message won't + get dropped from a pipeline. */ + if (outfile) { + /* Something went wrong. Output might be expected, such as if this were run + as a filter. Just copy the input to the output. */ + const char *input_filename = path (msgnam, TFILE); + + if (copy_input_to_output (input_filename, outfile) != OK) { + advise (NULL, "unable to copy message to %s, it might be lost\n", outfile); + } + } + } } } - seq_setcur (mp, mp->hghsel); /* update current message */ + if (chgflag) { + seq_setcur (mp, mp->hghsel); /* update current message */ + } seq_save (mp); /* synchronize sequences */ context_replace (pfolder, folder);/* update current folder */ context_save (); /* save the context file */ @@ -361,7 +456,7 @@ main (int argc, char **argv) { status += mhfixmsgsbr (ctp, &fx, outfile); if (using_stdin) { - unlink (file); + (void) m_unlink (file); if (! outfile) { /* Just calling m_backup() unlinks the backup file. */ @@ -373,9 +468,11 @@ main (int argc, char **argv) { status = 1; } + if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); } free (outfile); - free (tmp); free (file); + free (folder); + free (arguments); /* done is freects_done, which will clean up all of cts. */ done (status); @@ -383,6 +480,9 @@ main (int argc, char **argv) { } +/* + * Apply transformations to one message. + */ int mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { /* Store input filename in case one of the transformations, i.e., @@ -396,30 +496,42 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { modify_inplace = 1; if ((*ctp)->c_file) { - outfile = add (m_mktemp2 (tmp, invo_name, NULL, NULL), NULL); + char *tempfile; + if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) { + adios (NULL, "unable to create temporary file in %s", + get_temp_dir()); + } + outfile = add (tempfile, NULL); } else { adios (NULL, "missing both input and output filenames\n"); } } reverse_alternative_parts (*ctp); + status = fix_always (*ctp, &message_mods); if (status == OK && fx->fixboundary) { status = fix_boundary (ctp, &message_mods); } - if (status == OK && fx->fixcte) { - status = fix_multipart_cte (*ctp, &message_mods); + if (status == OK && fx->fixtypes != NULL) { + status = fix_types (*ctp, fx->fixtypes, &message_mods); + } + if (status == OK && fx->fixcompositecte) { + status = fix_composite_cte (*ctp, &message_mods); } if (status == OK && fx->reformat) { - status = ensure_text_plain (ctp, NULL, &message_mods); + status = + ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain); } if (status == OK && fx->decodetext) { - status = decode_text_parts (*ctp, fx->decodetext, &message_mods); + status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes, + &message_mods); + update_cte (*ctp); } - if (status == OK && fx->textcodeset != NULL) { - status = convert_codesets (*ctp, fx->textcodeset, &message_mods); + if (status == OK && fx->textcharset != NULL) { + status = convert_charsets (*ctp, fx->textcharset, &message_mods); } - if (! (*ctp)->c_umask) { + if (status == OK && ! (*ctp)->c_umask) { /* Set the umask for the contents file. This currently isn't used but just in case it is in the future. */ struct stat st; @@ -441,23 +553,13 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { /* Something went wrong. Output might be expected, such as if this were run as a filter. Just copy the input to the output. */ - int in = open (input_filename, O_RDONLY); - int out = strcmp (outfile, "-") - ? open (outfile, O_WRONLY | O_CREAT, m_gmprot ()) - : STDOUT_FILENO; - - if (in != -1 && out != -1) { - cpydata (in, out, input_filename, outfile); - } else { - status = NOTOK; + if (copy_input_to_output (input_filename, outfile) != OK) { + advise (NULL, "unable to copy message to %s, it might be lost\n", outfile); } - - close (out); - close (in); } if (modify_inplace) { - if (status != OK) unlink (outfile); + if (status != OK) { (void) m_unlink (outfile); } free (outfile); outfile = NULL; } @@ -468,33 +570,40 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { } -/* parse_mime() arranges alternates in reverse (priority) order, so - reverse them back. This will put a text/plain part at the front of - a multipart/alternative part, for example, where it belongs. */ -static void -reverse_alternative_parts (CT ct) { - if (ct->c_type == CT_MULTIPART) { - struct multipart *m = (struct multipart *) ct->c_ctparams; - struct part *part; - - if (ct->c_subtype == MULTI_ALTERNATE) { - reverse_parts (ct); - } +/* + * Copy input message to output. Assumes not modifying in place, so this + * might be running as part of a pipeline. + */ +static int +copy_input_to_output (const char *input_filename, const char *output_filename) { + int in = open (input_filename, O_RDONLY); + int out = strcmp (output_filename, "-") + ? open (output_filename, O_WRONLY | O_CREAT, m_gmprot ()) + : STDOUT_FILENO; + int status = OK; - /* And call recursively on each part of a multipart. */ - for (part = m->mp_parts; part; part = part->mp_next) { - reverse_alternative_parts (part->mp_part); - } + if (in != -1 && out != -1) { + cpydata (in, out, input_filename, output_filename); + } else { + status = NOTOK; } + + close (out); + close (in); + + return status; } +/* + * Fix mismatched outer level boundary. + */ static int fix_boundary (CT *ct, int *message_mods) { struct multipart *mp; int status = OK; - if (bogus_mp_content) { + if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) { mp = (struct multipart *) (*ct)->c_ctparams; /* @@ -513,19 +622,25 @@ fix_boundary (CT *ct, int *message_mods) { if (get_multipart_boundary (*ct, &part_boundary) == OK) { char *fixed; - if ((fixed = m_mktemp2 (tmp, invo_name, NULL, &(*ct)->c_fp))) { + if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) { if (replace_boundary (*ct, fixed, part_boundary) == OK) { char *filename = add ((*ct)->c_file, NULL); + CT fixed_ct; free_content (*ct); - if ((*ct = parse_mime (fixed))) { + if ((fixed_ct = parse_mime (fixed))) { + *ct = fixed_ct; (*ct)->c_unlink = 1; ++*message_mods; if (verbosw) { - report (NULL, filename, + report (NULL, NULL, filename, "fix multipart boundary"); } + } else { + *ct = NULL; + advise (NULL, "unable to parse fixed part"); + status = NOTOK; } free (filename); } else { @@ -533,12 +648,22 @@ fix_boundary (CT *ct, int *message_mods) { status = NOTOK; } } else { - advise (NULL, "unable to create temporary file"); + advise (NULL, "unable to create temporary file in %s", + get_temp_dir()); status = NOTOK; } free (part_boundary); + } else { + /* Couldn't fix the boundary. Report failure so that mhfixmsg + doesn't modify the message. */ + status = NOTOK; } + } else { + /* No multipart struct, even though the content type is + CT_MULTIPART. Report failure so that mhfixmsg doesn't modify + the message. */ + status = NOTOK; } } @@ -546,6 +671,9 @@ fix_boundary (CT *ct, int *message_mods) { } +/* + * Find boundary at end of multipart. + */ static int get_multipart_boundary (CT ct, char **part_boundary) { char buffer[BUFSIZ]; @@ -569,10 +697,11 @@ get_multipart_boundary (CT ct, char **part_boundary) { while (begin >= (off_t) ct->c_begin) { fseeko (ct->c_fp, begin, SEEK_SET); while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) { - char *end = buffer + bytes_read - 1; - char *cp; + char *cp = rfind_str (buffer, bytes_read, "--"); + + if (cp) { + char *end; - if ((cp = rfind_str (buffer, bytes_read, "--"))) { /* Trim off trailing "--" and anything beyond. */ *cp-- = '\0'; if ((end = rfind_str (buffer, cp - buffer, "\n"))) { @@ -626,9 +755,11 @@ get_multipart_boundary (CT ct, char **part_boundary) { } -/* Open and copy ct->c_file to file, replacing the multipart boundary. */ +/* + * Open and copy ct->c_file to file, replacing the multipart boundary. + */ static int -replace_boundary (CT ct, char *file, const char *boundary) { +replace_boundary (CT ct, char *file, char *boundary) { FILE *fpin, *fpout; int compnum, state; char buf[BUFSIZ], name[NAMESZ]; @@ -674,10 +805,23 @@ replace_boundary (CT ct, char *file, const char *boundary) { if (strcasecmp (TYPE_FIELD, np)) { fprintf (fpout, "%s:%s", np, vp); } else { - char *new_boundary = update_attr (vp, "boundary=", boundary); - - fprintf (fpout, "%s:%s\n", np, new_boundary); - free (new_boundary); + char *new_ctline, *new_params; + + replace_param(&ct->c_ctinfo.ci_first_pm, + &ct->c_ctinfo.ci_last_pm, "boundary", + boundary, 0); + + new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/", + ct->c_ctinfo.ci_subtype, NULL); + new_params = output_params(strlen(TYPE_FIELD) + + strlen(new_ctline) + 1, + ct->c_ctinfo.ci_first_pm, NULL, 0); + fprintf (fpout, "%s:%s%s\n", np, new_ctline, + new_params ? new_params : ""); + free(new_ctline); + if (new_params) { + free(new_params); + } } free (vp); @@ -686,9 +830,11 @@ replace_boundary (CT ct, char *file, const char *boundary) { continue; case BODY: - fputs ("\n", fpout); + putc('\n', fpout); /* buf will have a terminating NULL, skip it. */ - fwrite (buf, 1, bufsz-1, fpout); + if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) { + advise (file, "fwrite"); + } continue; case FILEEOF: @@ -717,52 +863,208 @@ replace_boundary (CT ct, char *file, const char *boundary) { } -/* Change the value of a name=value pair in a header field body. - If the name isn't there, append them. In any case, a new - string will be allocated and must be free'd by the caller. - Trims any trailing newlines. */ -static char * -update_attr (char *body, const char *name, const char *value) { - char *bp = nmh_strcasestr (body, name); - char *new_body; +/* + * Fix Content-Type header to reflect the content of its part. + */ +static int +fix_types (CT ct, svector_t fixtypes, int *message_mods) { + int status = OK; - if (bp) { - char *other_attrs = strchr (bp, ';'); + switch (ct->c_type) { + case CT_MULTIPART: { + struct multipart *m = (struct multipart *) ct->c_ctparams; + struct part *part; - *(bp + strlen (name)) = '\0'; - new_body = concat (body, "\"", value, "\"", NULL); + for (part = m->mp_parts; status == OK && part; part = part->mp_next) { + status = fix_types (part->mp_part, fixtypes, message_mods); + } + break; + } - if (other_attrs) { - char *cp; + case CT_MESSAGE: + if (ct->c_subtype == MESSAGE_EXTERNAL) { + struct exbody *e = (struct exbody *) ct->c_ctparams; - /* Trim any trailing newlines. */ - for (cp = &other_attrs[strlen (other_attrs) - 1]; - cp > other_attrs && *cp == '\n'; - *cp-- = '\0') continue; - new_body = add (other_attrs, new_body); + status = fix_types (e->eb_content, fixtypes, message_mods); } - } else { - char *cp; + break; + + default: { + char **typep, *type; + + if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) { + for (typep = svector_strs (fixtypes); + typep && (type = *typep); + ++typep) { + char *type_subtype = + concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype, + NULL); + + if (! strcasecmp (type, type_subtype) && + decode_part (ct) == OK && + ct->c_cefile.ce_file != NULL) { + char *ct_type_subtype = mime_type (ct->c_cefile.ce_file); + char *cp; + + if ((cp = strchr (ct_type_subtype, ';'))) { + /* Truncate to remove any parameter list from + mime_type () result. */ + *cp = '\0'; + } + + if (strcasecmp (type, ct_type_subtype)) { + char *ct_type, *ct_subtype; + HF hf; + + /* The Content-Type header does not match the + content, so update these struct Content + fields to match: + * c_type, c_subtype + * c_ctinfo.ci_type, c_ctinfo.ci_subtype + * c_ctline + */ + /* Extract type and subtype from type/subtype. */ + ct_type = mh_xstrdup(ct_type_subtype); + if ((cp = strchr (ct_type, '/'))) { + *cp = '\0'; + ct_subtype = mh_xstrdup(++cp); + } else { + advise (NULL, "missing / in MIME type of %s %s", + ct->c_file, ct->c_partno); + free (ct_type); + return NOTOK; + } + + ct->c_type = ct_str_type (ct_type); + ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype); + + free (ct->c_ctinfo.ci_type); + ct->c_ctinfo.ci_type = ct_type; + free (ct->c_ctinfo.ci_subtype); + ct->c_ctinfo.ci_subtype = ct_subtype; + if (! replace_substring (&ct->c_ctline, type, + ct_type_subtype)) { + advise (NULL, "did not find %s in %s", + type, ct->c_ctline); + } + + /* Update Content-Type header field. */ + for (hf = ct->c_first_hf; hf; hf = hf->next) { + if (! strcasecmp (TYPE_FIELD, hf->name)) { + if (replace_substring (&hf->value, type, + ct_type_subtype)) { + ++*message_mods; + if (verbosw) { + report (NULL, ct->c_partno, ct->c_file, + "change Content-Type in header " + "from %s to %s", + type, ct_type_subtype); + } + break; + } else { + advise (NULL, "did not find %s in %s", + type, hf->value); + } + } + } + } + free (ct_type_subtype); + } + free (type_subtype); + } + } + }} + + return status; +} + + +/* + * Replace a substring, allocating space to hold the new one. + */ +char * +replace_substring (char **str, const char *old, const char *new) { + char *cp; + + if ((cp = strstr (*str, old))) { + char *remainder = cp + strlen (old); + char *prefix, *new_str; + + if (cp - *str) { + prefix = mh_xstrdup(*str); + *(prefix + (cp - *str)) = '\0'; + new_str = concat (prefix, new, remainder, NULL); + free (prefix); + } else { + new_str = concat (new, remainder, NULL); + } + + free (*str); + + return *str = new_str; + } + + return NULL; +} + + +/* + * Remove a name=value parameter, given just its name, from a header value. + */ +char * +remove_parameter (char *str, const char *name) { + /* It looks to me, based on the BNF in RFC 2045, than there can't + be whitespace betwwen the parameter name and the "=", or + between the "=" and the parameter value. */ + char *param_name = concat (name, "=", NULL); + char *cp; + + if ((cp = strstr (str, param_name))) { + char *start, *end; + size_t count = 1; - /* Append name/value pair, after first removing a final newline - and (extraneous) semicolon. */ - if (*(cp = &body[strlen (body) - 1]) == '\n') *cp = '\0'; - if (*(cp = &body[strlen (body) - 1]) == ';') *cp = '\0'; - new_body = concat (body, "; ", name, "\"", value, "\"", NULL); + /* Remove any leading spaces, before the parameter name. */ + for (start = cp; + start > str && isspace ((unsigned char) *(start-1)); + --start) { + continue; + } + /* Remove a leading semicolon. */ + if (start > str && *(start-1) == ';') { --start; } + + end = cp + strlen (name) + 1; + if (*end == '"') { + /* Skip past the quoted value, and then the final quote. */ + for (++end ; *end && *end != '"'; ++end) { continue; } + ++end; + } else { + /* Skip past the value. */ + for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {} + } + + /* Count how many characters need to be moved. Include + trailing null, which is accounted for by the + initialization of count to 1. */ + for (cp = end; *cp; ++cp) { ++count; } + (void) memmove (start, end, count); } - return new_body; + free (param_name); + + return str; } +/* + * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part. + * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to + * 8 bit. + */ static int -fix_multipart_cte (CT ct, int *message_mods) { +fix_composite_cte (CT ct, int *message_mods) { int status = OK; - if (ct->c_type == CT_MULTIPART) { - struct multipart *m; - struct part *part; - + if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) { if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT && ct->c_encoding != CE_BINARY) { HF hf; @@ -776,8 +1078,9 @@ fix_multipart_cte (CT ct, int *message_mods) { if (! strncasecmp (name, ENCODING_FIELD, strlen (ENCODING_FIELD))) { char *prefix = "Nmh-REPLACED-INVALID-"; - HF h = mh_xmalloc (sizeof *h); + HF h; + NEW(h); h->name = add (hf->name, NULL); h->hf_encoding = hf->hf_encoding; h->next = hf->next; @@ -790,7 +1093,7 @@ fix_multipart_cte (CT ct, int *message_mods) { ++*message_mods; if (verbosw) { char *encoding = cpytrim (hf->value); - report (ct->c_partno, ct->c_file, + report (NULL, ct->c_partno, ct->c_file, "replace Content-Transfer-Encoding of %s " "with 8 bit", encoding); free (encoding); @@ -808,11 +1111,16 @@ fix_multipart_cte (CT ct, int *message_mods) { set_ce (ct, CE_8BIT); } - m = (struct multipart *) ct->c_ctparams; - for (part = m->mp_parts; part; part = part->mp_next) { - if (fix_multipart_cte (part->mp_part, message_mods) != OK) { - status = NOTOK; - break; + if (ct->c_type == CT_MULTIPART) { + struct multipart *m; + struct part *part; + + m = (struct multipart *) ct->c_ctparams; + for (part = m->mp_parts; part; part = part->mp_next) { + if (fix_composite_cte (part->mp_part, message_mods) != OK) { + status = NOTOK; + break; + } } } } @@ -821,6 +1129,9 @@ fix_multipart_cte (CT ct, int *message_mods) { } +/* + * Set content encoding. + */ static int set_ce (CT ct, int encoding) { const char *ce = ce_str (encoding); @@ -845,6 +1156,10 @@ set_ce (CT ct, int encoding) { ct->c_cefile.ce_file to the name of the file containing the contents. */ + if (ct->c_ceclosefnx) { + (*ct->c_ceclosefnx) (ct); + } + /* Restore the cefile. */ ct->c_cefile = decoded_content_info; @@ -865,96 +1180,141 @@ set_ce (CT ct, int encoding) { ct->c_celine = add (cte, NULL); return OK; - } else { - return NOTOK; } + + return NOTOK; } -/* Make sure each text part has a corresponding text/plain part. */ +/* + * Make sure each text part has a corresponding text/plain part. + */ static int -ensure_text_plain (CT *ct, CT parent, int *message_mods) { +ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) { int status = OK; switch ((*ct)->c_type) { case CT_TEXT: { - int has_text_plain = 0; - /* Nothing to do for text/plain. */ - if ((*ct)->c_subtype == TEXT_PLAIN) return OK; + if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; } if (parent && parent->c_type == CT_MULTIPART && parent->c_subtype == MULTI_ALTERNATE) { - struct multipart *mp = (struct multipart *) parent->c_ctparams; - struct part *part; int new_subpart_number = 1; - - /* See if there is a sibling text/plain. */ - for (part = mp->mp_parts; part; part = part->mp_next) { - ++new_subpart_number; - if (part->mp_part->c_type == CT_TEXT && - part->mp_part->c_subtype == TEXT_PLAIN) { - has_text_plain = 1; - break; - } - } + int has_text_plain = + find_textplain_sibling (parent, replacetextplain, + &new_subpart_number); if (! has_text_plain) { /* Parent is a multipart/alternative. Insert a new text/plain subpart. */ - struct part *new_part = mh_xmalloc (sizeof *new_part); - - if ((new_part->mp_part = build_text_plain_part (*ct))) { - char buffer[16]; - snprintf (buffer, sizeof buffer, "%d", new_subpart_number); - - new_part->mp_next = mp->mp_parts; - mp->mp_parts = new_part; - new_part->mp_part->c_partno = - concat (parent->c_partno ? parent->c_partno : "1", ".", - buffer, NULL); - + const int inserted = + insert_new_text_plain_part (*ct, new_subpart_number, + parent); + if (inserted) { ++*message_mods; if (verbosw) { - report (parent->c_partno, parent->c_file, + report (NULL, parent->c_partno, parent->c_file, "insert text/plain part"); } } else { - free_content (new_part->mp_part); - free (new_part); status = NOTOK; } } - } else { - /* Slip new text/plain part into a new multipart/alternative. */ - CT tp_part = build_text_plain_part (*ct); + } else if (parent && parent->c_type == CT_MULTIPART && + parent->c_subtype == MULTI_RELATED) { + char *type_subtype = + concat ((*ct)->c_ctinfo.ci_type, "/", + (*ct)->c_ctinfo.ci_subtype, NULL); + const char *parent_type = + get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1); + int new_subpart_number = 1; + int has_text_plain = 0; + + /* Have to do string comparison on the subtype because we + don't enumerate all of them in c_subtype values. + parent_type will be NULL if the multipart/related part + doesn't have a type parameter. The type parameter must + be specified according to RFC 2387 Sec. 3.1 but not all + messages comply. */ + if (parent_type && strcasecmp (type_subtype, parent_type) == 0) { + /* The type of this part matches the root type of the + parent multipart/related. Look to see if there's + text/plain sibling. */ + has_text_plain = + find_textplain_sibling (parent, replacetextplain, + &new_subpart_number); + } + + free (type_subtype); - if (tp_part) { - CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART, - MULTI_ALTERNATE); - if (mp_alt) { - struct multipart *mp = - (struct multipart *) mp_alt->c_ctparams; + if (! has_text_plain) { + struct multipart *mp = (struct multipart *) parent->c_ctparams; + struct part *part; + int siblings = 0; - if (mp && mp->mp_parts) { - mp->mp_parts->mp_part = tp_part; - /* Make the new multipart/alternative the parent. */ - *ct = mp_alt; + for (part = mp->mp_parts; part; part = part->mp_next) { + if (*ct != part->mp_part) { + ++siblings; + } + } - ++*message_mods; - if (verbosw) { - report ((*ct)->c_partno, (*ct)->c_file, - "insert text/plain part"); + if (siblings) { + /* Parent is a multipart/related. Insert a new + text/plain subpart in a new multipart/alternative. */ + if (insert_into_new_mp_alt (ct, message_mods)) { + /* Not an error if text/plain couldn't be added. */ + } + } else { + /* There are no siblings, so insert a new text/plain + subpart, and change the parent type from + multipart/related to multipart/alternative. */ + const int inserted = + insert_new_text_plain_part (*ct, new_subpart_number, + parent); + + if (inserted) { + HF hf; + + parent->c_subtype = MULTI_ALTERNATE; + parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative"); + if (! replace_substring (&parent->c_ctline, "/related", + "/alternative")) { + advise (NULL, + "did not find multipart/related in %s", + parent->c_ctline); + } + + /* Update Content-Type header field. */ + for (hf = parent->c_first_hf; hf; hf = hf->next) { + if (! strcasecmp (TYPE_FIELD, hf->name)) { + if (replace_substring (&hf->value, "/related", + "/alternative")) { + ++*message_mods; + if (verbosw) { + report (NULL, parent->c_partno, + parent->c_file, + "insert text/plain part"); + } + + /* Remove, e.g., type="text/html" from + multipart/alternative. */ + remove_parameter (hf->value, "type"); + break; + } else { + advise (NULL, "did not find multipart/" + "related in header %s", + hf->value); + } + } } } else { - free_content (tp_part); - free_content (mp_alt); - status = NOTOK; + /* Not an error if text/plain couldn't be inserted. */ } - } else { - status = NOTOK; } - } else { + } + } else { + if (insert_into_new_mp_alt (ct, message_mods)) { status = NOTOK; } } @@ -967,7 +1327,8 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { for (part = mp->mp_parts; status == OK && part; part = part->mp_next) { if ((*ct)->c_type == CT_MULTIPART) { - status = ensure_text_plain (&part->mp_part, *ct, message_mods); + status = ensure_text_plain (&part->mp_part, *ct, message_mods, + replacetextplain); } } break; @@ -975,10 +1336,10 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { case CT_MESSAGE: if ((*ct)->c_subtype == MESSAGE_EXTERNAL) { - struct exbody *e; + struct exbody *e = (struct exbody *) (*ct)->c_ctparams; - e = (struct exbody *) (*ct)->c_ctparams; - status = ensure_text_plain (&e->eb_content, *ct, message_mods); + status = ensure_text_plain (&e->eb_content, *ct, message_mods, + replacetextplain); } break; } @@ -987,6 +1348,80 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { } +/* + * See if there is a sibling text/plain, and return its subpart number. + */ +static int +find_textplain_sibling (CT parent, int replacetextplain, + int *new_subpart_number) { + struct multipart *mp = (struct multipart *) parent->c_ctparams; + struct part *part, *prev; + int has_text_plain = 0; + + for (prev = part = mp->mp_parts; part; part = part->mp_next) { + ++*new_subpart_number; + if (part->mp_part->c_type == CT_TEXT && + part->mp_part->c_subtype == TEXT_PLAIN) { + if (replacetextplain) { + struct part *old_part; + if (part == mp->mp_parts) { + old_part = mp->mp_parts; + mp->mp_parts = part->mp_next; + } else { + old_part = prev->mp_next; + prev->mp_next = part->mp_next; + } + if (verbosw) { + report (NULL, parent->c_partno, parent->c_file, + "remove text/plain part %s", + old_part->mp_part->c_partno); + } + free_content (old_part->mp_part); + free (old_part); + } else { + has_text_plain = 1; + } + break; + } + prev = part; + } + + return has_text_plain; +} + + +/* + * Insert a new text/plain part. + */ +static int +insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) { + struct multipart *mp = (struct multipart *) parent->c_ctparams; + struct part *new_part; + + NEW(new_part); + if ((new_part->mp_part = build_text_plain_part (ct))) { + char buffer[16]; + snprintf (buffer, sizeof buffer, "%d", new_subpart_number); + + new_part->mp_next = mp->mp_parts; + mp->mp_parts = new_part; + new_part->mp_part->c_partno = + concat (parent->c_partno ? parent->c_partno : "1", ".", + buffer, NULL); + + return 1; + } + + free_content (new_part->mp_part); + free (new_part); + + return 0; +} + + +/* + * Create a text/plain part to go along with non-plain sibling part. + */ static CT build_text_plain_part (CT encoded_part) { CT tp_part = divide_part (encoded_part); @@ -997,31 +1432,79 @@ build_text_plain_part (CT encoded_part) { contains the decoded contents. And the decoding function, such as openQuoted, will have set ...->ce_unlink to 1 so that it will be unlinked by free_content (). */ - tmp_plain_file = add (m_mktemp2 (tmp, invo_name, NULL, NULL), NULL); - if (reformat_part (tp_part, tmp_plain_file, - tp_part->c_ctinfo.ci_type, - tp_part->c_ctinfo.ci_subtype, - tp_part->c_type) == OK) { - return tp_part; + char *tempfile; + + if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) { + advise (NULL, "unable to create temporary file in %s", + get_temp_dir()); + } else { + tmp_plain_file = add (tempfile, NULL); + if (reformat_part (tp_part, tmp_plain_file, + tp_part->c_ctinfo.ci_type, + tp_part->c_ctinfo.ci_subtype, + tp_part->c_type) == OK) { + return tp_part; + } } } free_content (tp_part); - unlink (tmp_plain_file); + if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); } free (tmp_plain_file); return NULL; } -static CT -divide_part (CT ct) { - CT new_part; - - if ((new_part = (CT) calloc (1, sizeof *new_part)) == NULL) - adios (NULL, "out of memory"); +/* + * Slip new text/plain part into a new multipart/alternative. + */ +static int +insert_into_new_mp_alt (CT *ct, int *message_mods) { + CT tp_part = build_text_plain_part (*ct); + int status = OK; - /* Just copy over what is needed for decoding. c_vrsn and + if (tp_part) { + CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART, + MULTI_ALTERNATE); + if (mp_alt) { + struct multipart *mp = (struct multipart *) mp_alt->c_ctparams; + + if (mp && mp->mp_parts) { + mp->mp_parts->mp_part = tp_part; + /* Make the new multipart/alternative the parent. */ + *ct = mp_alt; + + ++*message_mods; + if (verbosw) { + report (NULL, (*ct)->c_partno, (*ct)->c_file, + "insert text/plain part"); + } + } else { + free_content (tp_part); + free_content (mp_alt); + status = NOTOK; + } + } else { + status = NOTOK; + } + } else { + /* Not an error if text/plain couldn't be built. */ + } + + return status; +} + + +/* + * Clone a MIME part. + */ +static CT +divide_part (CT ct) { + CT new_part; + + NEW0(new_part); + /* Just copy over what is needed for decoding. c_vrsn and c_celine aren't necessary. */ new_part->c_file = add (ct->c_file, NULL); new_part->c_begin = ct->c_begin; @@ -1043,49 +1526,65 @@ divide_part (CT ct) { } +/* + * Copy the content info from one part to another. + */ static void copy_ctinfo (CI dest, CI src) { - char **s_ap, **d_ap, **s_vp, **d_vp; + PM s_pm, d_pm; dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL; dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL; - for (s_ap = src->ci_attrs, d_ap = dest->ci_attrs, - s_vp = src->ci_values, d_vp = dest->ci_values; - *s_ap; - ++s_ap, ++d_ap, ++s_vp, ++d_vp) { - *d_ap = add (*s_ap, NULL); - *d_vp = *s_vp; + for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) { + d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name, + s_pm->pm_value, 0); + if (s_pm->pm_charset) { + d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset); + } + if (s_pm->pm_lang) { + d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang); + } } - *d_ap = NULL; dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL; dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL; } +/* + * Decode content. + */ static int decode_part (CT ct) { char *tmp_decoded; int status; + char *tempfile; - tmp_decoded = add (m_mktemp2 (tmp, invo_name, NULL, NULL), NULL); + if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) { + adios (NULL, "unable to create temporary file in %s", get_temp_dir()); + } + tmp_decoded = add (tempfile, NULL); /* The following call will load ct->c_cefile.ce_file with the tmp filename of the decoded content. tmp_decoded will contain the encoded output, get rid of that. */ status = output_message (ct, tmp_decoded); - unlink (tmp_decoded); + (void) m_unlink (tmp_decoded); free (tmp_decoded); return status; } -/* Some of the arguments aren't really needed now, but maybe will - be in the future for other than text types. */ +/* + * Reformat content as plain text. + * Some of the arguments aren't really needed now, but maybe will + * be in the future for other than text types. + */ static int reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { int output_subtype, output_encoding; + const char *reason = NULL; char *cp, *cf; int status; @@ -1095,46 +1594,30 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { Could show_multi() in mhshowsbr.c avoid this? */ /* Check for invo_name-format-type/subtype. */ - cp = concat (invo_name, "-format-", type, "/", subtype, NULL); - if ((cf = context_find (cp)) && *cf != '\0') { - if (strchr (cf, '>')) { - free (cp); - advise (NULL, "'>' prohibited in \"%s\",\nplease fix your " - "%s-format-%s/%s profile entry", cf, invo_name, type, - subtype); - return NOTOK; - } - } else { - free (cp); - - /* Check for invo_name-format-type. */ - cp = concat (invo_name, "-format-", type, NULL); - if (! (cf = context_find (cp)) || *cf == '\0') { - free (cp); - if (verbosw) { - advise (NULL, "Don't know how to convert %s, there is no " - "%s-format-%s/%s profile entry", - ct->c_file, invo_name, type, subtype); - } - return NOTOK; + if ((cf = context_find_by_type ("format", type, subtype)) == NULL) { + if (verbosw) { + advise (NULL, "Don't know how to convert %s, there is no " + "%s-format-%s/%s profile entry", + ct->c_file, invo_name, type, subtype); } + return NOTOK; + } + if (strchr (cf, '>')) { + advise (NULL, "'>' prohibited in \"%s\",\nplease fix your " + "%s-format-%s/%s profile entry", cf, invo_name, type, + subtype ? subtype : ""); - if (strchr (cf, '>')) { - free (cp); - advise (NULL, "'>' prohibited in \"%s\"", cf); - return NOTOK; - } + return NOTOK; } - free (cp); cp = concat (cf, " >", file, NULL); - status = show_content_aux (ct, 1, 0, cp, NULL); + status = show_content_aux (ct, 0, cp, NULL, NULL); free (cp); /* Unlink decoded content tmp file and free its filename to avoid leaks. The file stream should already have been closed. */ if (ct->c_cefile.ce_unlink) { - unlink (ct->c_cefile.ce_file); + (void) m_unlink (ct->c_cefile.ce_file); free (ct->c_cefile.ce_file); ct->c_cefile.ce_file = NULL; ct->c_cefile.ce_unlink = 0; @@ -1146,8 +1629,8 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { /* Set subtype to 0, which is always an UNKNOWN subtype. */ output_subtype = 0; } - output_encoding = charset_encoding (ct); + output_encoding = content_encoding (ct, &reason); if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) { ct->c_cefile.ce_file = file; ct->c_cefile.ce_unlink = 1; @@ -1160,19 +1643,9 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { } -/* Identifies 7bit or 8bit content based on charset. */ -static int -charset_encoding (CT ct) { - /* norm_charmap() is case sensitive. */ - char *codeset = upcase (content_codeset (ct)); - int encoding = - strcmp (norm_charmap (codeset), "US-ASCII") ? CE_8BIT : CE_7BIT; - - free (codeset); - return encoding; -} - - +/* + * Fill in a multipart/alternative part. + */ static CT build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { char *boundary_prefix = "----=_nmh-multipart"; @@ -1182,14 +1655,12 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { CT ct; struct part *p; struct multipart *m; - char *cp; const struct str2init *ctinit; - if ((ct = (CT) calloc (1, sizeof *ct)) == NULL) - adios (NULL, "out of memory"); + NEW0(ct); /* Set up the multipart/alternative part. These fields of *ct were - initialized to 0 by calloc(): + initialized to 0 by mh_xcalloc(): c_fp, c_unlink, c_begin, c_end, c_vrsn, c_ctline, c_celine, c_id, c_descr, c_dispo, c_partno, @@ -1197,7 +1668,7 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { c_cefile, c_encoding, c_digested, c_digest[16], c_ctexbody, c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx, - c_umask, c_pid, c_rfc934, + c_umask, c_rfc934, c_showproc, c_termproc, c_storeproc, c_storage, c_folder */ @@ -1224,6 +1695,7 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { boundary_in_content (&new_part->c_cefile.ce_fp, new_part->c_cefile.ce_file, boundary)) == -1) { + free (ct); return NULL; } } @@ -1234,6 +1706,7 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { if ((found_boundary = boundary_in_content (&new_part->c_fp, new_part->c_file, boundary)) == -1) { + free (ct); return NULL; } } @@ -1254,6 +1727,7 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { if (found_boundary) { advise (NULL, "giving up trying to find a unique boundary"); + free (ct); return NULL; } } @@ -1282,27 +1756,19 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { ct->c_ctinfo.ci_subtype = add (subtypename, NULL); } - name = concat (" ", typename, "/", subtypename, boundary_indicator, - boundary, NULL); - if ((cp = strstr (name, boundary_indicator))) { - ct->c_ctinfo.ci_attrs[0] = name; - ct->c_ctinfo.ci_attrs[1] = NULL; - /* ci_values don't get free'd, so point into ci_attrs. */ - ct->c_ctinfo.ci_values[0] = cp + strlen (boundary_indicator); - } + add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm, + "boundary", boundary, 0); - p = (struct part *) mh_xmalloc (sizeof *p); - p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next); + NEW(p); + NEW(p->mp_next); p->mp_next->mp_next = NULL; p->mp_next->mp_part = first_alt; - if ((m = (struct multipart *) calloc (1, sizeof (struct multipart))) == - NULL) - adios (NULL, "out of memory"); + NEW0(m); m->mp_start = concat (boundary, "\n", NULL); m->mp_stop = concat (boundary, "--\n", NULL); m->mp_parts = p; - ct->c_ctparams = (void *) m; + ct->c_ctparams = m; free (boundary); @@ -1310,7 +1776,9 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { } -/* Check that the boundary does not appear in the content. */ +/* + * Check that the boundary does not appear in the content. + */ static int boundary_in_content (FILE **fp, char *file, const char *boundary) { char buffer[BUFSIZ]; @@ -1335,7 +1803,9 @@ boundary_in_content (FILE **fp, char *file, const char *boundary) { } -/* Remove all non-Content headers. */ +/* + * Remove all non-Content headers. + */ static void transfer_noncontent_headers (CT old, CT new) { HF hp, hp_prev; @@ -1377,6 +1847,9 @@ transfer_noncontent_headers (CT old, CT new) { } +/* + * Set content type. + */ static int set_ct_type (CT ct, int type, int subtype, int encoding) { char *typename = ct_type_str (type); @@ -1431,30 +1904,70 @@ set_ct_type (CT ct, int type, int subtype, int encoding) { } +/* + * It's not necessary to update the charset parameter of a Content-Type + * header for a text part. According to RFC 2045 Sec. 6.4, the body + * (content) was originally in the specified charset, "and will be in + * that character set again after decoding." + */ static int -decode_text_parts (CT ct, int encoding, int *message_mods) { +decode_text_parts (CT ct, int encoding, const char *decodetypes, + int *message_mods) { int status = OK; + int lf_line_endings = 0; switch (ct->c_type) { - case CT_TEXT: + case CT_MULTIPART: { + struct multipart *m = (struct multipart *) ct->c_ctparams; + struct part *part; + + /* Should check to see if the body for this part is encoded? + For now, it gets passed along as-is by InitMultiPart(). */ + for (part = m->mp_parts; status == OK && part; part = part->mp_next) { + status = decode_text_parts (part->mp_part, encoding, decodetypes, + message_mods); + } + break; + } + + case CT_MESSAGE: + if (ct->c_subtype == MESSAGE_EXTERNAL) { + struct exbody *e = (struct exbody *) ct->c_ctparams; + + status = decode_text_parts (e->eb_content, encoding, decodetypes, + message_mods); + } + break; + + default: + if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) { + break; + } + + lf_line_endings = + ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings; + switch (ct->c_encoding) { case CE_BASE64: case CE_QUOTED: { int ct_encoding; if (decode_part (ct) == OK && ct->c_cefile.ce_file) { - if ((ct_encoding = content_encoding (ct)) == CE_BINARY && - encoding != CE_BINARY) { + const char *reason = NULL; + + if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY + && encoding != CE_BINARY) { /* The decoding isn't acceptable so discard it. Leave status as OK to allow other transformations. */ if (verbosw) { - report (ct->c_partno, ct->c_file, - "will not decode%s because it is binary", + report (NULL, ct->c_partno, ct->c_file, + "will not decode%s because it is binary (%s)", ct->c_partno ? "" : ct->c_ctline ? ct->c_ctline - : ""); + : "", + reason); } - unlink (ct->c_cefile.ce_file); + (void) m_unlink (ct->c_cefile.ce_file); free (ct->c_cefile.ce_file); ct->c_cefile.ce_file = NULL; } else if (ct->c_encoding == CE_QUOTED && @@ -1462,30 +1975,34 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { /* The decoding isn't acceptable so discard it. Leave status as OK to allow other transformations. */ if (verbosw) { - report (ct->c_partno, ct->c_file, + report (NULL, ct->c_partno, ct->c_file, "will not decode%s because it is 8bit", ct->c_partno ? "" : ct->c_ctline ? ct->c_ctline : ""); } - unlink (ct->c_cefile.ce_file); + (void) m_unlink (ct->c_cefile.ce_file); free (ct->c_cefile.ce_file); ct->c_cefile.ce_file = NULL; } else { int enc; - if (ct_encoding == CE_BINARY) + + if (ct_encoding == CE_BINARY) { enc = CE_BINARY; - else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) + } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) { enc = CE_QUOTED; - else - enc = charset_encoding (ct); + } else { + enc = ct_encoding; + } if (set_ce (ct, enc) == OK) { ++*message_mods; if (verbosw) { - report (ct->c_partno, ct->c_file, "decode%s", + report (NULL, ct->c_partno, ct->c_file, "decode%s", ct->c_ctline ? ct->c_ctline : ""); } - strip_crs (ct, message_mods); + if (lf_line_endings) { + strip_crs (ct, message_mods); + } } else { status = NOTOK; } @@ -1497,52 +2014,67 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { } case CE_8BIT: case CE_7BIT: - strip_crs (ct, message_mods); + if (lf_line_endings) { + strip_crs (ct, message_mods); + } break; default: break; } break; - - case CT_MULTIPART: { - struct multipart *m = (struct multipart *) ct->c_ctparams; - struct part *part; - - /* Should check to see if the body for this part is encoded? - For now, it gets passed along as-is by InitMultiPart(). */ - for (part = m->mp_parts; status == OK && part; part = part->mp_next) { - status = decode_text_parts (part->mp_part, encoding, message_mods); - } - break; } - case CT_MESSAGE: - if (ct->c_subtype == MESSAGE_EXTERNAL) { - struct exbody *e; + return status; +} - e = (struct exbody *) ct->c_ctparams; - status = decode_text_parts (e->eb_content, encoding, message_mods); - } - break; - default: - break; +/* + * Determine if the part with type[/subtype] should be decoded, according to + * decodetypes (which came from the -decodetypes switch). + */ +static int +should_decode(const char *decodetypes, const char *type, const char *subtype) { + /* Quick search for matching type[/subtype] in decodetypes: bracket + decodetypes with commas, then search for ,type, and ,type/subtype, in + it. */ + + int found_match = 0; + char *delimited_decodetypes = concat(",", decodetypes, ",", NULL); + char *delimited_type = concat(",", type, ",", NULL); + + if (nmh_strcasestr(delimited_decodetypes, delimited_type)) { + found_match = 1; + } else if (subtype != NULL) { + char *delimited_type_subtype = + concat(",", type, "/", subtype, ",", NULL); + + if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) { + found_match = 1; + } + free(delimited_type_subtype); } - return status; + free(delimited_type); + free(delimited_decodetypes); + + return found_match; } -/* See if the decoded content is 7bit, 8bit, or binary. It's binary - if it has any NUL characters, a CR not followed by a LF, or lines - greater than 998 characters in length. */ +/* + * See if the decoded content is 7bit, 8bit, or binary. It's binary + * if it has any NUL characters, a CR not followed by a LF, or lines + * greater than 998 characters in length. If binary, reason is set + * to a string explaining why. + */ static int -content_encoding (CT ct) { +content_encoding (CT ct, const char **reason) { CE ce = &ct->c_cefile; int encoding = CE_7BIT; if (ce->ce_file) { + size_t line_len = 0; char buffer[BUFSIZ]; size_t inbytes; @@ -1556,13 +2088,22 @@ content_encoding (CT ct) { (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) { char *cp; size_t i; - size_t line_len = 0; int last_char_was_cr = 0; for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) { if (*cp == '\0' || ++line_len > 998 || (*cp != '\n' && last_char_was_cr)) { encoding = CE_BINARY; + if (*cp == '\0') { + *reason = "null character"; + } else if (line_len > 998) { + *reason = "line length > 998"; + } else if (*cp != '\n' && last_char_was_cr) { + *reason = "CR not followed by LF"; + } else { + /* Should not reach this. */ + *reason = ""; + } break; } else if (*cp == '\n') { line_len = 0; @@ -1582,14 +2123,23 @@ content_encoding (CT ct) { } +/* + * Strip carriage returns from content. + */ static int strip_crs (CT ct, int *message_mods) { - /* norm_charmap() is case sensitive. */ - char *codeset = upcase (content_codeset (ct)); + char *charset = content_charset (ct); int status = OK; - /* Only strip carriage returns if content is ASCII. */ - if (! strcmp (norm_charmap (codeset), "US-ASCII")) { + /* Only strip carriage returns if content is ASCII or another + charset that has the same readily recognizable CR followed by a + LF. We can include UTF-8 here because if the high-order bit of + a UTF-8 byte is 0, then it must be a single-byte ASCII + character. */ + if (! strcasecmp (charset, "US-ASCII") || + ! strcasecmp (charset, "UTF-8") || + ! strncasecmp (charset, "ISO-8859-", 9) || + ! strncasecmp (charset, "WINDOWS-12", 10)) { char **file = NULL; FILE **fp = NULL; size_t begin; @@ -1636,7 +2186,7 @@ strip_crs (CT ct, int *message_mods) { size_t i; int last_char_was_cr = 0; - if (end > 0) bytes_to_read -= bytes_read; + if (end > 0) { bytes_to_read -= bytes_read; } for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) { if (*cp == '\n' && last_char_was_cr) { @@ -1650,8 +2200,14 @@ strip_crs (CT ct, int *message_mods) { if (has_crs) { int fd; - char *stripped_content_file = - add (m_mktemp2 (tmp, invo_name, &fd, NULL), NULL); + char *stripped_content_file; + char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL); + + if (tempfile == NULL) { + adios (NULL, "unable to create temporary file in %s", + get_temp_dir()); + } + stripped_content_file = add (tempfile, NULL); /* Strip each CR before a LF from the content. */ fseeko (*fp, begin, SEEK_SET); @@ -1665,27 +2221,34 @@ strip_crs (CT ct, int *message_mods) { if (*cp == '\r') { last_char_was_cr = 1; } else if (last_char_was_cr) { - if (*cp != '\n') write (fd, "\r", 1); - write (fd, cp, 1); + if (*cp != '\n') { + if (write (fd, "\r", 1) < 0) { + advise (tempfile, "CR write"); + } + } + if (write (fd, cp, 1) < 0) { + advise (tempfile, "write"); + } last_char_was_cr = 0; } else { - write (fd, cp, 1); + if (write (fd, cp, 1) < 0) { + advise (tempfile, "write"); + } last_char_was_cr = 0; } - } } if (close (fd)) { - admonish (NULL, "unable to write temporaty file %s", + admonish (NULL, "unable to write temporary file %s", stripped_content_file); - unlink (stripped_content_file); + (void) m_unlink (stripped_content_file); status = NOTOK; } else { /* Replace the decoded file with the converted one. */ if (ct->c_cefile.ce_file) { if (ct->c_cefile.ce_unlink) { - unlink (ct->c_cefile.ce_file); + (void) m_unlink (ct->c_cefile.ce_file); } free (ct->c_cefile.ce_file); } @@ -1694,7 +2257,9 @@ strip_crs (CT ct, int *message_mods) { ++*message_mods; if (verbosw) { - report (NULL, *file, "stripped CRs"); + report (NULL, ct->c_partno, + begin == 0 && end == 0 ? "" : *file, + "stripped CRs"); } } } @@ -1706,42 +2271,135 @@ strip_crs (CT ct, int *message_mods) { } } - free (codeset); + free (charset); + return status; } -char * -content_codeset (CT ct) { - const char *const charset = "charset"; - char *default_codeset = NULL; - CI ctinfo = &ct->c_ctinfo; - char **ap, **vp; - char **src_codeset = NULL; - - for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) { - if (! strcasecmp (*ap, charset)) { - src_codeset = vp; - break; +/* + * Add/update, if necessary, the message C-T-E, based on the least restrictive + * of the part C-T-E's. + */ +static void +update_cte (CT ct) { + const int least_restrictive_enc = least_restrictive_encoding (ct); + + if (least_restrictive_enc != CE_UNKNOWN && + least_restrictive_enc != CE_7BIT) { + char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL); + HF hf; + int found_cte = 0; + + /* Update/add Content-Transfer-Encoding header field. */ + for (hf = ct->c_first_hf; hf; hf = hf->next) { + if (! strcasecmp (ENCODING_FIELD, hf->name)) { + found_cte = 1; + free (hf->value); + hf->value = cte; + } + } + if (! found_cte) { + add_header (ct, add (ENCODING_FIELD, NULL), cte); + } + } +} + + +/* + * Find the least restrictive encoding (7bit, 8bit, binary) of the parts + * within a message. + */ +static int +least_restrictive_encoding (CT ct) { + int encoding = CE_UNKNOWN; + + switch (ct->c_type) { + case CT_MULTIPART: { + struct multipart *m = (struct multipart *) ct->c_ctparams; + struct part *part; + + for (part = m->mp_parts; part; part = part->mp_next) { + const int part_encoding = + least_restrictive_encoding (part->mp_part); + + if (less_restrictive (encoding, part_encoding)) { + encoding = part_encoding; + } } + break; } - /* RFC 2045, Sec. 5.2: default to us-ascii. */ - if (src_codeset == NULL) src_codeset = &default_codeset; - if (*src_codeset == NULL) *src_codeset = "US-ASCII"; + case CT_MESSAGE: + if (ct->c_subtype == MESSAGE_EXTERNAL) { + struct exbody *e = (struct exbody *) ct->c_ctparams; + const int part_encoding = + least_restrictive_encoding (e->eb_content); + + if (less_restrictive (encoding, part_encoding)) { + encoding = part_encoding; + } + } + break; + + default: { + if (less_restrictive (encoding, ct->c_encoding)) { + encoding = ct->c_encoding; + } + }} + + return encoding; +} + - return *src_codeset; +/* + * Return whether the second encoding is less restrictive than the first, where + * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So, + * CE_BINARY is less restrictive than CE_8BIT and + * CE_8BIT is less restrictive than CE_7BIT. + */ +static int +less_restrictive (int encoding, int second_encoding) { + switch (second_encoding) { + case CE_BINARY: + return encoding != CE_BINARY; + case CE_8BIT: + return encoding != CE_BINARY && encoding != CE_8BIT; + case CE_7BIT: + return encoding != CE_BINARY && encoding != CE_8BIT && + encoding != CE_7BIT; + default : + return 0; + } } +/* + * Convert character set of each part. + */ static int -convert_codesets (CT ct, char *dest_codeset, int *message_mods) { +convert_charsets (CT ct, char *dest_charset, int *message_mods) { int status = OK; switch (ct->c_type) { case CT_TEXT: if (ct->c_subtype == TEXT_PLAIN) { - status = convert_codeset (ct, dest_codeset, message_mods); + status = convert_charset (ct, dest_charset, message_mods); + if (status == OK) { + if (verbosw) { + char *ct_charset = content_charset (ct); + + report (NULL, ct->c_partno, ct->c_file, + "convert %s to %s", ct_charset, dest_charset); + free (ct_charset); + } + } else { + char *ct_charset = content_charset (ct); + + report ("iconv", ct->c_partno, ct->c_file, + "failed to convert %s to %s", ct_charset, dest_charset); + free (ct_charset); + } } break; @@ -1753,18 +2411,17 @@ convert_codesets (CT ct, char *dest_codeset, int *message_mods) { For now, it gets passed along as-is by InitMultiPart(). */ for (part = m->mp_parts; status == OK && part; part = part->mp_next) { status = - convert_codesets (part->mp_part, dest_codeset, message_mods); + convert_charsets (part->mp_part, dest_charset, message_mods); } break; } case CT_MESSAGE: if (ct->c_subtype == MESSAGE_EXTERNAL) { - struct exbody *e; + struct exbody *e = (struct exbody *) ct->c_ctparams; - e = (struct exbody *) ct->c_ctparams; status = - convert_codesets (e->eb_content, dest_codeset, message_mods); + convert_charsets (e->eb_content, dest_charset, message_mods); } break; @@ -1776,157 +2433,193 @@ convert_codesets (CT ct, char *dest_codeset, int *message_mods) { } +/* + * Fix various problems that aren't handled elsewhere. These + * are fixed unconditionally: there are no switches to disable + * them. Currently, "problems" are these: + * 1) remove extraneous semicolon at the end of a header parameter list + * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and + * filename parameters in Content-Type and Content-Disposition + * headers, respectively. + */ static int -convert_codeset (CT ct, char *dest_codeset, int *message_mods) { - char *src_codeset = content_codeset (ct); +fix_always (CT ct, int *message_mods) { int status = OK; - /* norm_charmap() is case sensitive. */ - char *src_codeset_u = upcase (src_codeset); - char *dest_codeset_u = upcase (dest_codeset); - int different_codesets = - strcmp (norm_charmap (src_codeset), norm_charmap (dest_codeset)); + switch (ct->c_type) { + case CT_MULTIPART: { + struct multipart *m = (struct multipart *) ct->c_ctparams; + struct part *part; + + for (part = m->mp_parts; status == OK && part; part = part->mp_next) { + status = fix_always (part->mp_part, message_mods); + } + break; + } - free (dest_codeset_u); - free (src_codeset_u); + case CT_MESSAGE: + if (ct->c_subtype == MESSAGE_EXTERNAL) { + struct exbody *e = (struct exbody *) ct->c_ctparams; - if (different_codesets) { -#ifdef HAVE_ICONV - iconv_t conv_desc = NULL; - char *dest; - int fd = -1; - char **file = NULL; - FILE **fp = NULL; - size_t begin; - size_t end; - int opened_input_file = 0; - char src_buffer[BUFSIZ]; + status = fix_always (e->eb_content, message_mods); + } + break; + + default: { HF hf; - if ((conv_desc = iconv_open (dest_codeset, src_codeset)) == - (iconv_t) -1) { - advise (NULL, "Can't convert %s to %s", src_codeset, dest_codeset); - return -1; + if (ct->c_first_hf) { + fix_filename_encoding (ct); } - dest = add (m_mktemp2 (tmp, invo_name, &fd, NULL), NULL); + for (hf = ct->c_first_hf; hf; hf = hf->next) { + size_t len = strlen (hf->value); - if (ct->c_cefile.ce_file) { - file = &ct->c_cefile.ce_file; - fp = &ct->c_cefile.ce_fp; - begin = end = 0; - } else if (ct->c_file) { - file = &ct->c_file; - fp = &ct->c_fp; - begin = (size_t) ct->c_begin; - end = (size_t) ct->c_end; - } /* else no input file: shouldn't happen */ + if (strcasecmp (hf->name, TYPE_FIELD) != 0 && + strcasecmp (hf->name, DISPO_FIELD) != 0) { + /* Only do this for Content-Type and + Content-Disposition fields because those are the + only headers that parse_mime() warns about. */ + continue; + } - if (file && *file && fp) { - if (! *fp) { - if ((*fp = fopen (*file, "r")) == NULL) { - advise (*file, "unable to open for reading"); - status = NOTOK; - } else { - opened_input_file = 1; + /* whitespace following a trailing ';' will be nuked as well */ + if (hf->value[len - 1] == '\n') { + while (isspace((unsigned char)(hf->value[len - 2]))) { + if (len-- == 0) { break; } } } - } - if (fp && *fp) { - size_t inbytes; - size_t bytes_to_read = - end > 0 && end > begin ? end - begin : sizeof src_buffer; + if (hf->value[len - 2] == ';') { + /* Remove trailing ';' from parameter value. */ + hf->value[len - 2] = '\n'; + hf->value[len - 1] = '\0'; + + /* Also, if Content-Type parameter, remove trailing ';' + from ct->c_ctline. This probably isn't necessary + but can't hurt. */ + if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) { + size_t l = strlen(ct->c_ctline) - 1; + while (isspace((unsigned char)(ct->c_ctline[l])) || + ct->c_ctline[l] == ';') { + ct->c_ctline[l--] = '\0'; + if (l == 0) { break; } + } + } - fseeko (*fp, begin, SEEK_SET); - while ((inbytes = fread (src_buffer, 1, - min (bytes_to_read, sizeof src_buffer), - *fp)) > 0) { - char dest_buffer[BUFSIZ]; - char *ib = src_buffer, *ob = dest_buffer; - size_t outbytes = sizeof dest_buffer; - size_t outbytes_before = outbytes; - - if (end > 0) bytes_to_read -= inbytes; - - if (iconv (conv_desc, &ib, &inbytes, &ob, &outbytes) == - (size_t) -1) { - status = NOTOK; - break; - } else { - write (fd, dest_buffer, outbytes_before - outbytes); + ++*message_mods; + if (verbosw) { + report (NULL, ct->c_partno, ct->c_file, + "remove trailing ; from %s parameter value", + hf->name); } } + } + }} - if (opened_input_file) { - fclose (*fp); - *fp = NULL; - } + return status; +} + + +/* + * Factor out common code for loops in fix_filename_encoding(). + */ +static int +fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) { + size_t value_len; + int fixed = 0; + + if (((value_len = strlen (value)) > 0) && + strncmp (value, "=?", 2) == 0 && + strncmp (&value[value_len - 2], "?=", 2) == 0) { + /* Looks like an RFC 2047 encoded parameter. */ + char decoded[PATH_MAX + 1]; + + if (decode_rfc2047 (value, decoded, sizeof decoded)) { + /* Encode using RFC 2231. */ + replace_param (first_pm, last_pm, name, decoded, 0); + fixed = 1; + } else { + advise (NULL, "failed to decode %s parameter %s", name, value); } + } - iconv_close (conv_desc); - close (fd); + return fixed; +} - if (status == OK) { - /* Replace the decoded file with the converted one. */ - if (ct->c_cefile.ce_file) { - if (ct->c_cefile.ce_unlink) { - unlink (ct->c_cefile.ce_file); - } - free (ct->c_cefile.ce_file); - } - ct->c_cefile.ce_file = dest; - ct->c_cefile.ce_unlink = 1; - ++*message_mods; - if (verbosw) { - report (ct->c_partno, ct->c_file, "convert %s to %s", - src_codeset, dest_codeset); - } +/* + * Replace RFC 2047 encoding with RFC 2231 encoding of name and + * filename parameters in Content-Type and Content-Disposition + * headers, respectively. + */ +static int +fix_filename_encoding (CT ct) { + PM pm; + HF hf; + int fixed = 0; + + for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) { + if (pm->pm_name && pm->pm_value && + strcasecmp (pm->pm_name, "name") == 0) { + fixed = fix_filename_param (pm->pm_name, pm->pm_value, + &ct->c_ctinfo.ci_first_pm, + &ct->c_ctinfo.ci_last_pm); + } + } - /* Update ci_attrs. */ - src_codeset = dest_codeset; + for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) { + if (pm->pm_name && pm->pm_value && + strcasecmp (pm->pm_name, "filename") == 0) { + fixed = fix_filename_param (pm->pm_name, pm->pm_value, + &ct->c_dispo_first, + &ct->c_dispo_last); + } + } - /* Update ct->c_ctline. */ - if (ct->c_ctline) { - char *ctline = - update_attr (ct->c_ctline, "charset=", dest_codeset); + /* Fix hf values to correspond. */ + for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) { + enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER; - free (ct->c_ctline); - ct->c_ctline = ctline; - } /* else no CT line, which is odd */ + if (strcasecmp (hf->name, TYPE_FIELD) == 0) { + field = TYPE_HEADER; + } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) { + field = DISPO_HEADER; + } - /* Update Content-Type header field. */ - for (hf = ct->c_first_hf; hf; hf = hf->next) { - if (! strcasecmp (TYPE_FIELD, hf->name)) { - char *ctline_less_newline = - update_attr (hf->value, "charset=", dest_codeset); - char *ctline = concat (ctline_less_newline, "\n", NULL); - free (ctline_less_newline); - - free (hf->value); - hf->value = ctline; - break; - } + if (field != OTHER) { + const char *const semicolon_loc = strchr (hf->value, ';'); + + if (semicolon_loc) { + const size_t len = + strlen (hf->name) + 1 + semicolon_loc - hf->value; + const char *const params = + output_params (len, + field == TYPE_HEADER + ? ct->c_ctinfo.ci_first_pm + : ct->c_dispo_first, + NULL, 0); + const char *const new_params = concat (params, "\n", NULL); + + replace_substring (&hf->value, semicolon_loc, new_params); + free ((char *) new_params); + free ((char *) params); + } else { + advise (NULL, "did not find semicolon in %s:%s\n", + hf->name, hf->value); } - } else { - unlink (dest); } -#else /* ! HAVE_ICONV */ - NMH_UNUSED (message_mods); - - advise (NULL, "Can't convert %s to %s without iconv", src_codeset, - dest_codeset); - status = NOTOK; -#endif /* ! HAVE_ICONV */ } - return status; + return OK; } +/* + * Output content in input file to output file. + */ static int -write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, +write_content (CT ct, const char *input_filename, char *outfile, int modify_inplace, int message_mods) { int status = OK; @@ -1958,9 +2651,9 @@ write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, } } } - if (new != -1) close (new); - if (old != -1) close (old); - unlink (outfile); + if (new != -1) { close (new); } + if (old != -1) { close (old); } + (void) m_unlink (outfile); if (i < 0) { /* The -file argument processing used path() to @@ -1976,7 +2669,7 @@ write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, } else { admonish (NULL, "unable to remove input file %s, " "not modifying it", infile); - unlink (outfile); + (void) m_unlink (outfile); status = NOTOK; } @@ -1986,7 +2679,7 @@ write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, } } else { /* No modifications and didn't need the tmp outfile. */ - unlink (outfile); + (void) m_unlink (outfile); } } else { /* Output is going to some file. Produce it whether or not @@ -1999,29 +2692,67 @@ write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, } +/* + * parse_mime() does not set lf_line_endings in struct text, so use this + * function to do it. It touches the parts the decodetypes identifies. + */ +static void +set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) { + switch (ct->c_type) { + case CT_MULTIPART: { + struct multipart *m = (struct multipart *) ct->c_ctparams; + struct part *part; + + for (part = m->mp_parts; part; part = part->mp_next) { + set_text_ctparams(part->mp_part, decodetypes, lf_line_endings); + } + break; + } + + case CT_MESSAGE: + if (ct->c_subtype == MESSAGE_EXTERNAL) { + struct exbody *e = (struct exbody *) ct->c_ctparams; + + set_text_ctparams(e->eb_content, decodetypes, lf_line_endings); + } + break; + + default: + if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) { + if (ct->c_ctparams == NULL) { + ct->c_ctparams = mh_xcalloc(1, sizeof (struct text)); + } + ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings; + } + } +} + + /* * If "rmmproc" is defined, call that to remove the file. Otherwise, * use the standard MH backup file. */ static int -remove_file (char *file) { +remove_file (const char *file) { if (rmmproc) { char *rmm_command = concat (rmmproc, " ", file, NULL); int status = system (rmm_command); free (rmm_command); return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK; - } else { - /* This is OK for a non-message file, it still uses the - BACKUP_PREFIX form. The backup file will be in the same - directory as file. */ - return rename (file, m_backup (file)); } + /* This is OK for a non-message file, it still uses the + BACKUP_PREFIX form. The backup file will be in the same + directory as file. */ + return rename (file, m_backup (file)); } +/* + * Output formatted message to user. + */ static void -report (char *partno, char *filename, char *message, ...) { +report (char *what, char *partno, char *filename, char *message, ...) { va_list args; char *fmt; @@ -2030,7 +2761,7 @@ report (char *partno, char *filename, char *message, ...) { fmt = concat (filename, partno ? " part " : ", ", partno ? partno : "", partno ? ", " : "", message, NULL); - advertise (NULL, NULL, fmt, args); + advertise (what, NULL, fmt, args); free (fmt); va_end (args); @@ -2038,17 +2769,6 @@ report (char *partno, char *filename, char *message, ...) { } -static char * -upcase (char *str) { - char *up = cpytrim (str); - char *cp; - - for (cp = up; *cp; ++cp) *cp = toupper ((unsigned char) *cp); - - return up; -} - - static void pipeser (int i) {