X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/43242c623638e8b4fc23b46d61dea80765c7a332..c9794733:/uip/mhfixmsg.c?ds=inline diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c index 8c5268ef..069e3284 100644 --- a/uip/mhfixmsg.c +++ b/uip/mhfixmsg.c @@ -12,17 +12,16 @@ #include #include #include -#ifdef HAVE_ICONV -# include -#endif #define MHFIXMSG_SWITCHES \ X("decodetext 8bit|7bit", 0, DECODETEXTSW) \ X("nodecodetext", 0, NDECODETEXTSW) \ - X("textcodeset", 0, TEXTCODESETSW) \ - X("notextcodeset", 0, NTEXTCODESETSW) \ + X("textcharset", 0, TEXTCHARSETSW) \ + X("notextcharset", 0, NTEXTCHARSETSW) \ X("reformat", 0, REFORMATSW) \ X("noreformat", 0, NREFORMATSW) \ + X("replacetextplain", 0, REPLACETEXTPLAINSW) \ + X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \ X("fixboundary", 0, FIXBOUNDARYSW) \ X("nofixboundary", 0, NFIXBOUNDARYSW) \ X("fixcte", 0, FIXCTESW) \ @@ -51,7 +50,6 @@ int debugsw; /* Needed by mhparse.c. */ #define quitser pipeser /* mhparse.c */ -extern char *tmp; /* directory to place tmp files */ extern int skip_mp_cte_check; /* flag to InitMultiPart */ extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */ extern int bogus_mp_content; /* flag from InitMultiPart */ @@ -62,7 +60,7 @@ void reverse_parts (CT); int output_message (CT, char *); /* mhshowsbr.c */ -int show_content_aux (CT, int, int, char *, char *); +int show_content_aux (CT, int, char *, char *); /* mhmisc.c */ void flush_errors (void); @@ -78,19 +76,19 @@ typedef struct fix_transformations { int fixboundary; int fixcte; int reformat; + int replacetextplain; int decodetext; - char *textcodeset; + char *textcharset; } fix_transformations; int mhfixmsgsbr (CT *, const fix_transformations *, char *); static void reverse_alternative_parts (CT); static int fix_boundary (CT *, int *); static int get_multipart_boundary (CT, char **); -static int replace_boundary (CT, char *, const char *); -static char *update_attr (char *, const char *, const char *e); +static int replace_boundary (CT, char *, char *); static int fix_multipart_cte (CT, int *); static int set_ce (CT, int); -static int ensure_text_plain (CT *, CT, int *); +static int ensure_text_plain (CT *, CT, int *, int); static CT build_text_plain_part (CT); static CT divide_part (CT); static void copy_ctinfo (CI, CI); @@ -102,13 +100,12 @@ static int boundary_in_content (FILE **, char *, const char *); static void transfer_noncontent_headers (CT, CT); static int set_ct_type (CT, int type, int subtype, int encoding); static int decode_text_parts (CT, int, int *); -static int content_encoding (CT); -static int convert_codesets (CT, char *, int *); -static int convert_codeset (CT, char *, int *); +static int content_encoding (CT, const char **); +static int strip_crs (CT, int *); +static int convert_charsets (CT, char *, int *); static int write_content (CT, char *, char *, int, int); static int remove_file (char *); -static void report (char *, char *, char *, ...); -static char *upcase (char *); +static void report (char *, char *, char *, char *, ...); static void pipeser (int); @@ -126,18 +123,13 @@ main (int argc, char **argv) { int status = OK; fix_transformations fx; fx.reformat = fx.fixcte = fx.fixboundary = 1; + fx.replacetextplain = 0; fx.decodetext = CE_8BIT; - fx.textcodeset = NULL; + fx.textcharset = NULL; - done = freects_done; - -#ifdef LOCALE - setlocale(LC_ALL, ""); -#endif - invo_name = r1bindex (argv[0], '/'); + if (nmh_init(argv[0], 1)) { return 1; } - /* read user profile/context */ - context_read(); + done = freects_done; arguments = getarguments (invo_name, argc, argv, 1); argp = arguments; @@ -177,13 +169,13 @@ main (int argc, char **argv) { case NDECODETEXTSW: fx.decodetext = 0; continue; - case TEXTCODESETSW: + case TEXTCHARSETSW: if (! (cp = *argp++) || (*cp == '-' && cp[1])) adios (NULL, "missing argument to %s", argp[-2]); - fx.textcodeset = cp; + fx.textcharset = cp; continue; - case NTEXTCODESETSW: - fx.textcodeset = 0; + case NTEXTCHARSETSW: + fx.textcharset = 0; continue; case FIXBOUNDARYSW: fx.fixboundary = 1; @@ -203,19 +195,22 @@ main (int argc, char **argv) { case NREFORMATSW: fx.reformat = 0; continue; - + case REPLACETEXTPLAINSW: + fx.replacetextplain = 1; + continue; + case NREPLACETEXTPLAINSW: + fx.replacetextplain = 0; + continue; case FILESW: if (! (cp = *argp++) || (*cp == '-' && cp[1])) adios (NULL, "missing argument to %s", argp[-2]); file = *cp == '-' ? add (cp, NULL) : path (cp, TFILE); continue; - case OUTFILESW: if (! (cp = *argp++) || (*cp == '-' && cp[1])) adios (NULL, "missing argument to %s", argp[-2]); outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE); continue; - case RPROCSW: if (!(rmmproc = *argp++) || *rmmproc == '-') adios (NULL, "missing argument to %s", argp[-2]); @@ -223,7 +218,6 @@ main (int argc, char **argv) { case NRPRCSW: rmmproc = NULL; continue; - case VERBSW: verbosw = 1; continue; @@ -237,8 +231,14 @@ main (int argc, char **argv) { adios (NULL, "only one folder at a time!"); else folder = pluspath (cp); - } else - app_msgarg(&msgs, cp); + } else { + if (*cp == '/') { + /* Interpret a full path as a filename, not a message. */ + file = add (cp, NULL); + } else { + app_msgarg (&msgs, cp); + } + } } SIGNAL (SIGQUIT, quitser); @@ -252,16 +252,6 @@ main (int argc, char **argv) { fclose (fp); } - /* - * Check for storage directory. If specified, - * then store temporary files there. Else we - * store them in standard nmh directory. - */ - if ((cp = context_find (nmhstorage)) && *cp) - tmp = concat (cp, "/", invo_name, NULL); - else - tmp = add (m_maildir (invo_name), NULL); - suppress_bogus_mp_content_warning = skip_mp_cte_check = 1; if (! context_find ("path")) @@ -286,17 +276,17 @@ main (int argc, char **argv) { using_stdin = 1; - if ((cp = m_mktemp2 (tmp, invo_name, &fd, NULL)) == NULL) { - adios (NULL, "unable to create temporary file"); + if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) { + adios (NULL, "unable to create temporary file in %s", + get_temp_dir()); } else { free (file); file = add (cp, NULL); - chmod (file, 0600); cpydata (STDIN_FILENO, fd, "-", file); } if (close (fd)) { - unlink (file); + (void) m_unlink (file); adios (NULL, "failed to write temporary file"); } } @@ -359,7 +349,7 @@ main (int argc, char **argv) { status += mhfixmsgsbr (ctp, &fx, outfile); if (using_stdin) { - unlink (file); + (void) m_unlink (file); if (! outfile) { /* Just calling m_backup() unlinks the backup file. */ @@ -372,7 +362,6 @@ main (int argc, char **argv) { } free (outfile); - free (tmp); free (file); /* done is freects_done, which will clean up all of cts. */ @@ -394,7 +383,12 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { modify_inplace = 1; if ((*ctp)->c_file) { - outfile = add (m_mktemp2 (tmp, invo_name, NULL, NULL), NULL); + char *tempfile; + if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) { + adios (NULL, "unable to create temporary file in %s", + get_temp_dir()); + } + outfile = add (tempfile, NULL); } else { adios (NULL, "missing both input and output filenames\n"); } @@ -408,13 +402,14 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { status = fix_multipart_cte (*ctp, &message_mods); } if (status == OK && fx->reformat) { - status = ensure_text_plain (ctp, NULL, &message_mods); + status = + ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain); } if (status == OK && fx->decodetext) { status = decode_text_parts (*ctp, fx->decodetext, &message_mods); } - if (status == OK && fx->textcodeset != NULL) { - status = convert_codesets (*ctp, fx->textcodeset, &message_mods); + if (status == OK && fx->textcharset != NULL) { + status = convert_charsets (*ctp, fx->textcharset, &message_mods); } if (! (*ctp)->c_umask) { @@ -455,7 +450,7 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { } if (modify_inplace) { - if (status != OK) unlink (outfile); + if (status != OK) (void) m_unlink (outfile); free (outfile); outfile = NULL; } @@ -511,7 +506,7 @@ fix_boundary (CT *ct, int *message_mods) { if (get_multipart_boundary (*ct, &part_boundary) == OK) { char *fixed; - if ((fixed = m_mktemp2 (tmp, invo_name, NULL, &(*ct)->c_fp))) { + if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) { if (replace_boundary (*ct, fixed, part_boundary) == OK) { char *filename = add ((*ct)->c_file, NULL); @@ -521,7 +516,7 @@ fix_boundary (CT *ct, int *message_mods) { ++*message_mods; if (verbosw) { - report (NULL, filename, + report (NULL, NULL, filename, "fix multipart boundary"); } } @@ -531,7 +526,8 @@ fix_boundary (CT *ct, int *message_mods) { status = NOTOK; } } else { - advise (NULL, "unable to create temporary file"); + advise (NULL, "unable to create temporary file in %s", + get_temp_dir()); status = NOTOK; } @@ -567,10 +563,11 @@ get_multipart_boundary (CT ct, char **part_boundary) { while (begin >= (off_t) ct->c_begin) { fseeko (ct->c_fp, begin, SEEK_SET); while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) { - char *end = buffer + bytes_read - 1; - char *cp; + char *cp = rfind_str (buffer, bytes_read, "--"); + + if (cp) { + char *end; - if ((cp = rfind_str (buffer, bytes_read, "--"))) { /* Trim off trailing "--" and anything beyond. */ *cp-- = '\0'; if ((end = rfind_str (buffer, cp - buffer, "\n"))) { @@ -626,7 +623,7 @@ get_multipart_boundary (CT ct, char **part_boundary) { /* Open and copy ct->c_file to file, replacing the multipart boundary. */ static int -replace_boundary (CT ct, char *file, const char *boundary) { +replace_boundary (CT ct, char *file, char *boundary) { FILE *fpin, *fpout; int compnum, state; char buf[BUFSIZ], name[NAMESZ]; @@ -672,10 +669,22 @@ replace_boundary (CT ct, char *file, const char *boundary) { if (strcasecmp (TYPE_FIELD, np)) { fprintf (fpout, "%s:%s", np, vp); } else { - char *new_boundary = update_attr (vp, "boundary=", boundary); - - fprintf (fpout, "%s:%s\n", np, new_boundary); - free (new_boundary); + char *new_ctline, *new_params; + + replace_param(&ct->c_ctinfo.ci_first_pm, + &ct->c_ctinfo.ci_last_pm, "boundary", + boundary, 0); + + new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/", + ct->c_ctinfo.ci_subtype, NULL); + new_params = output_params(strlen(TYPE_FIELD) + + strlen(new_ctline) + 1, + ct->c_ctinfo.ci_first_pm, NULL, 0); + fprintf (fpout, "%s:%s%s\n", np, new_ctline, + new_params ? new_params : ""); + free(new_ctline); + if (new_params) + free(new_params); } free (vp); @@ -715,44 +724,6 @@ replace_boundary (CT ct, char *file, const char *boundary) { } -/* Change the value of a name=value pair in a header field body. - If the name isn't there, append them. In any case, a new - string will be allocated and must be free'd by the caller. - Trims any trailing newlines. */ -static char * -update_attr (char *body, const char *name, const char *value) { - char *bp = nmh_strcasestr (body, name); - char *new_body; - - if (bp) { - char *other_attrs = strchr (bp, ';'); - - *(bp + strlen (name)) = '\0'; - new_body = concat (body, "\"", value, "\"", NULL); - - if (other_attrs) { - char *cp; - - /* Trim any trailing newlines. */ - for (cp = &other_attrs[strlen (other_attrs) - 1]; - cp > other_attrs && *cp == '\n'; - *cp-- = '\0') continue; - new_body = add (other_attrs, new_body); - } - } else { - char *cp; - - /* Append name/value pair, after first removing a final newline - and (extraneous) semicolon. */ - if (*(cp = &body[strlen (body) - 1]) == '\n') *cp = '\0'; - if (*(cp = &body[strlen (body) - 1]) == ';') *cp = '\0'; - new_body = concat (body, "; ", name, "\"", value, "\"", NULL); - } - - return new_body; -} - - static int fix_multipart_cte (CT ct, int *message_mods) { int status = OK; @@ -788,7 +759,7 @@ fix_multipart_cte (CT ct, int *message_mods) { ++*message_mods; if (verbosw) { char *encoding = cpytrim (hf->value); - report (ct->c_partno, ct->c_file, + report (NULL, ct->c_partno, ct->c_file, "replace Content-Transfer-Encoding of %s " "with 8 bit", encoding); free (encoding); @@ -871,7 +842,7 @@ set_ce (CT ct, int encoding) { /* Make sure each text part has a corresponding text/plain part. */ static int -ensure_text_plain (CT *ct, CT parent, int *message_mods) { +ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) { int status = OK; switch ((*ct)->c_type) { @@ -884,17 +855,36 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { if (parent && parent->c_type == CT_MULTIPART && parent->c_subtype == MULTI_ALTERNATE) { struct multipart *mp = (struct multipart *) parent->c_ctparams; - struct part *part; + struct part *part, *prev; int new_subpart_number = 1; /* See if there is a sibling text/plain. */ - for (part = mp->mp_parts; part; part = part->mp_next) { + for (prev = part = mp->mp_parts; part; part = part->mp_next) { ++new_subpart_number; if (part->mp_part->c_type == CT_TEXT && part->mp_part->c_subtype == TEXT_PLAIN) { - has_text_plain = 1; + if (replacetextplain) { + struct part *old_part; + if (part == mp->mp_parts) { + old_part = mp->mp_parts; + mp->mp_parts = part->mp_next; + } else { + old_part = prev->mp_next; + prev->mp_next = part->mp_next; + } + if (verbosw) { + report (NULL, parent->c_partno, parent->c_file, + "remove text/plain part %s", + old_part->mp_part->c_partno); + } + free_content (old_part->mp_part); + free (old_part); + } else { + has_text_plain = 1; + } break; } + prev = part; } if (! has_text_plain) { @@ -914,7 +904,7 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { ++*message_mods; if (verbosw) { - report (parent->c_partno, parent->c_file, + report (NULL, parent->c_partno, parent->c_file, "insert text/plain part"); } } else { @@ -941,7 +931,7 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { ++*message_mods; if (verbosw) { - report ((*ct)->c_partno, (*ct)->c_file, + report (NULL, (*ct)->c_partno, (*ct)->c_file, "insert text/plain part"); } } else { @@ -965,7 +955,8 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { for (part = mp->mp_parts; status == OK && part; part = part->mp_next) { if ((*ct)->c_type == CT_MULTIPART) { - status = ensure_text_plain (&part->mp_part, *ct, message_mods); + status = ensure_text_plain (&part->mp_part, *ct, message_mods, + replacetextplain); } } break; @@ -976,7 +967,8 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { struct exbody *e; e = (struct exbody *) (*ct)->c_ctparams; - status = ensure_text_plain (&e->eb_content, *ct, message_mods); + status = ensure_text_plain (&e->eb_content, *ct, message_mods, + replacetextplain); } break; } @@ -995,7 +987,13 @@ build_text_plain_part (CT encoded_part) { contains the decoded contents. And the decoding function, such as openQuoted, will have set ...->ce_unlink to 1 so that it will be unlinked by free_content (). */ - tmp_plain_file = add (m_mktemp2 (tmp, invo_name, NULL, NULL), NULL); + char *tempfile; + + if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) { + advise (NULL, "unable to create temporary file in %s", + get_temp_dir()); + } + tmp_plain_file = add (tempfile, NULL); if (reformat_part (tp_part, tmp_plain_file, tp_part->c_ctinfo.ci_type, tp_part->c_ctinfo.ci_subtype, @@ -1005,7 +1003,7 @@ build_text_plain_part (CT encoded_part) { } free_content (tp_part); - unlink (tmp_plain_file); + (void) m_unlink (tmp_plain_file); free (tmp_plain_file); return NULL; @@ -1043,19 +1041,19 @@ divide_part (CT ct) { static void copy_ctinfo (CI dest, CI src) { - char **s_ap, **d_ap, **s_vp, **d_vp; + PM s_pm, d_pm; dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL; dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL; - for (s_ap = src->ci_attrs, d_ap = dest->ci_attrs, - s_vp = src->ci_values, d_vp = dest->ci_values; - *s_ap; - ++s_ap, ++d_ap, ++s_vp, ++d_vp) { - *d_ap = add (*s_ap, NULL); - *d_vp = *s_vp; + for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) { + d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name, + s_pm->pm_value, 0); + if (s_pm->pm_charset) + d_pm->pm_charset = getcpy(s_pm->pm_charset); + if (s_pm->pm_lang) + d_pm->pm_lang = getcpy(s_pm->pm_lang); } - *d_ap = NULL; dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL; dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL; @@ -1066,13 +1064,17 @@ static int decode_part (CT ct) { char *tmp_decoded; int status; + char *tempfile; - tmp_decoded = add (m_mktemp2 (tmp, invo_name, NULL, NULL), NULL); + if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) { + adios (NULL, "unable to create temporary file in %s", get_temp_dir()); + } + tmp_decoded = add (tempfile, NULL); /* The following call will load ct->c_cefile.ce_file with the tmp filename of the decoded content. tmp_decoded will contain the encoded output, get rid of that. */ status = output_message (ct, tmp_decoded); - unlink (tmp_decoded); + (void) m_unlink (tmp_decoded); free (tmp_decoded); return status; @@ -1126,13 +1128,13 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { free (cp); cp = concat (cf, " >", file, NULL); - status = show_content_aux (ct, 1, 0, cp, NULL); + status = show_content_aux (ct, 0, cp, NULL); free (cp); /* Unlink decoded content tmp file and free its filename to avoid leaks. The file stream should already have been closed. */ if (ct->c_cefile.ce_unlink) { - unlink (ct->c_cefile.ce_file); + (void) m_unlink (ct->c_cefile.ce_file); free (ct->c_cefile.ce_file); ct->c_cefile.ce_file = NULL; ct->c_cefile.ce_unlink = 0; @@ -1158,24 +1160,15 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { } -/* Identifies 7bit or 8bit content based on charset, if specified. */ +/* Identifies 7bit or 8bit content based on charset. */ static int charset_encoding (CT ct) { - int encoding = CE_8BIT; - CI ctinfo = &ct->c_ctinfo; - char **ap, **vp; - - for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) { - if (! strcasecmp (*ap, "charset")) { - /* norm_charmap() is case sensitive. */ - char *ch = upcase (*vp); - - if (! strcmp (norm_charmap (ch), "US-ASCII")) encoding = CE_7BIT; - free (ch); - break; - } - } + /* norm_charmap() is case sensitive. */ + char *charset = upcase (content_charset (ct)); + int encoding = + strcmp (norm_charmap (charset), "US-ASCII") ? CE_8BIT : CE_7BIT; + free (charset); return encoding; } @@ -1189,7 +1182,6 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { CT ct; struct part *p; struct multipart *m; - char *cp; const struct str2init *ctinit; if ((ct = (CT) calloc (1, sizeof *ct)) == NULL) @@ -1204,7 +1196,7 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { c_cefile, c_encoding, c_digested, c_digest[16], c_ctexbody, c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx, - c_umask, c_pid, c_rfc934, + c_umask, c_rfc934, c_showproc, c_termproc, c_storeproc, c_storage, c_folder */ @@ -1231,6 +1223,7 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { boundary_in_content (&new_part->c_cefile.ce_fp, new_part->c_cefile.ce_file, boundary)) == -1) { + free (ct); return NULL; } } @@ -1241,6 +1234,7 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { if ((found_boundary = boundary_in_content (&new_part->c_fp, new_part->c_file, boundary)) == -1) { + free (ct); return NULL; } } @@ -1261,6 +1255,7 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { if (found_boundary) { advise (NULL, "giving up trying to find a unique boundary"); + free (ct); return NULL; } } @@ -1289,14 +1284,8 @@ build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) { ct->c_ctinfo.ci_subtype = add (subtypename, NULL); } - name = concat (" ", typename, "/", subtypename, boundary_indicator, - boundary, NULL); - if ((cp = strstr (name, boundary_indicator))) { - ct->c_ctinfo.ci_attrs[0] = name; - ct->c_ctinfo.ci_attrs[1] = NULL; - /* ci_values don't get free'd, so point into ci_attrs. */ - ct->c_ctinfo.ci_values[0] = cp + strlen (boundary_indicator); - } + add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm, + "boundary", boundary, 0); p = (struct part *) mh_xmalloc (sizeof *p); p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next); @@ -1450,18 +1439,21 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { int ct_encoding; if (decode_part (ct) == OK && ct->c_cefile.ce_file) { - if ((ct_encoding = content_encoding (ct)) == CE_BINARY && - encoding != CE_BINARY) { + const char *reason = NULL; + + if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY + && encoding != CE_BINARY) { /* The decoding isn't acceptable so discard it. Leave status as OK to allow other transformations. */ if (verbosw) { - report (ct->c_partno, ct->c_file, - "will not decode%s because it is binary", + report (NULL, ct->c_partno, ct->c_file, + "will not decode%s because it is binary (%s)", ct->c_partno ? "" : ct->c_ctline ? ct->c_ctline - : ""); + : "", + reason); } - unlink (ct->c_cefile.ce_file); + (void) m_unlink (ct->c_cefile.ce_file); free (ct->c_cefile.ce_file); ct->c_cefile.ce_file = NULL; } else if (ct->c_encoding == CE_QUOTED && @@ -1469,13 +1461,13 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { /* The decoding isn't acceptable so discard it. Leave status as OK to allow other transformations. */ if (verbosw) { - report (ct->c_partno, ct->c_file, + report (NULL, ct->c_partno, ct->c_file, "will not decode%s because it is 8bit", ct->c_partno ? "" : ct->c_ctline ? ct->c_ctline : ""); } - unlink (ct->c_cefile.ce_file); + (void) m_unlink (ct->c_cefile.ce_file); free (ct->c_cefile.ce_file); ct->c_cefile.ce_file = NULL; } else { @@ -1489,9 +1481,10 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { if (set_ce (ct, enc) == OK) { ++*message_mods; if (verbosw) { - report (ct->c_partno, ct->c_file, "decode%s", + report (NULL, ct->c_partno, ct->c_file, "decode%s", ct->c_ctline ? ct->c_ctline : ""); } + strip_crs (ct, message_mods); } else { status = NOTOK; } @@ -1501,9 +1494,14 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { } break; } + case CE_8BIT: + case CE_7BIT: + strip_crs (ct, message_mods); + break; default: break; } + break; case CT_MULTIPART: { @@ -1537,13 +1535,15 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { /* See if the decoded content is 7bit, 8bit, or binary. It's binary if it has any NUL characters, a CR not followed by a LF, or lines - greater than 998 characters in length. */ + greater than 998 characters in length. If binary, reason is set + to a string explaining why. */ static int -content_encoding (CT ct) { +content_encoding (CT ct, const char **reason) { CE ce = &ct->c_cefile; int encoding = CE_7BIT; if (ce->ce_file) { + size_t line_len = 0; char buffer[BUFSIZ]; size_t inbytes; @@ -1557,13 +1557,22 @@ content_encoding (CT ct) { (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) { char *cp; size_t i; - size_t line_len = 0; int last_char_was_cr = 0; for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) { if (*cp == '\0' || ++line_len > 998 || (*cp != '\n' && last_char_was_cr)) { encoding = CE_BINARY; + if (*cp == '\0') { + *reason = "null character"; + } else if (line_len > 998) { + *reason = "line length > 998"; + } else if (*cp != '\n' && last_char_was_cr) { + *reason = "CR not followed by LF"; + } else { + /* Should not reach this. */ + *reason = ""; + } break; } else if (*cp == '\n') { line_len = 0; @@ -1584,98 +1593,37 @@ content_encoding (CT ct) { static int -convert_codesets (CT ct, char *dest_codeset, int *message_mods) { +strip_crs (CT ct, int *message_mods) { + /* norm_charmap() is case sensitive. */ + char *charset = upcase (content_charset (ct)); int status = OK; - switch (ct->c_type) { - case CT_TEXT: - if (ct->c_subtype == TEXT_PLAIN) { - status = convert_codeset (ct, dest_codeset, message_mods); - } - break; - - case CT_MULTIPART: { - struct multipart *m = (struct multipart *) ct->c_ctparams; - struct part *part; - - /* Should check to see if the body for this part is encoded? - For now, it gets passed along as-is by InitMultiPart(). */ - for (part = m->mp_parts; status == OK && part; part = part->mp_next) { - status = - convert_codesets (part->mp_part, dest_codeset, message_mods); - } - break; - } - - case CT_MESSAGE: - if (ct->c_subtype == MESSAGE_EXTERNAL) { - struct exbody *e; - - e = (struct exbody *) ct->c_ctparams; - status = - convert_codesets (e->eb_content, dest_codeset, message_mods); - } - break; - - default: - break; - } - - return status; -} - - -static int -convert_codeset (CT ct, char *dest_codeset, int *message_mods) { - const char *const charset = "charset"; - char **src_codeset = NULL; - char *default_codeset = NULL; - CI ctinfo = &ct->c_ctinfo; - char **ap, **vp; - int status = OK; - - for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) { - if (! strcasecmp (*ap, charset)) { - src_codeset = vp; - break; - } - } - /* RFC 2045, Sec. 5.2: default to us-ascii. */ - if (src_codeset == NULL) src_codeset = &default_codeset; - if (*src_codeset == NULL) *src_codeset = "US-ASCII"; - - if (strcmp (norm_charmap (*src_codeset), norm_charmap (dest_codeset))) { -#ifdef HAVE_ICONV - iconv_t conv_desc = NULL; - char *dest; - int fd = -1; + /* Only strip carriage returns if content is ASCII or another + charset that has the same readily recognizable CR followed by a + LF. We can include UTF-8 here because if the high-order bit of + a UTF-8 byte is 0, then it must be a single-byte ASCII + character. */ + if (! strcmp (norm_charmap (charset), "US-ASCII") || + ! strncmp (norm_charmap (charset), "ISO-8859-", 9) || + ! strncmp (norm_charmap (charset), "UTF-8", 5) || + ! strncmp (norm_charmap (charset), "WINDOWS-12", 10)) { char **file = NULL; FILE **fp = NULL; - long begin; - long end; + size_t begin; + size_t end; + int has_crs = 0; int opened_input_file = 0; - char src_buffer[BUFSIZ]; - HF hf; - - if ((conv_desc = iconv_open (dest_codeset, *src_codeset)) == - (iconv_t) -1) { - advise (NULL, "Can't convert %s to %s", *src_codeset, dest_codeset); - return -1; - } - - dest = add (m_mktemp2 (tmp, invo_name, &fd, NULL), NULL); if (ct->c_cefile.ce_file) { file = &ct->c_cefile.ce_file; fp = &ct->c_cefile.ce_fp; - begin = 0; - end = -1; + begin = end = 0; } else if (ct->c_file) { file = &ct->c_file; fp = &ct->c_fp; - begin = ct->c_begin; - end = ct->c_end; - } /* else no input file: shouldn't happen */ + begin = (size_t) ct->c_begin; + end = (size_t) ct->c_end; + } /* else don't know where the content is */ if (file && *file && fp) { if (! *fp) { @@ -1689,25 +1637,89 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) { } if (fp && *fp) { - size_t inbytes; - size_t max = end > 0 ? (size_t) (end-begin) : sizeof src_buffer; + char buffer[BUFSIZ]; + size_t bytes_read; + size_t bytes_to_read = + end > 0 && end > begin ? end - begin : sizeof buffer; fseeko (*fp, begin, SEEK_SET); - while (status == OK && max > 0 && - (inbytes = fread (src_buffer, 1, max, *fp)) > 0) { - char dest_buffer[BUFSIZ]; - char *ib = src_buffer, *ob = dest_buffer; - size_t outbytes = sizeof dest_buffer; - size_t outbytes_before = outbytes; + while ((bytes_read = fread (buffer, 1, + min (bytes_to_read, sizeof buffer), + *fp)) > 0) { + /* Look for CR followed by a LF. This is supposed to + be text so there should be LF's. If not, don't + modify the content. */ + char *cp; + size_t i; + int last_char_was_cr = 0; + + if (end > 0) bytes_to_read -= bytes_read; + + for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) { + if (*cp == '\n' && last_char_was_cr) { + has_crs = 1; + break; + } - if (end > 0) max -= inbytes; + last_char_was_cr = *cp == '\r' ? 1 : 0; + } + } + + if (has_crs) { + int fd; + char *stripped_content_file; + char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL); + + if (tempfile == NULL) { + adios (NULL, "unable to create temporary file in %s", + get_temp_dir()); + } + stripped_content_file = add (tempfile, NULL); + + /* Strip each CR before a LF from the content. */ + fseeko (*fp, begin, SEEK_SET); + while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > + 0) { + char *cp; + size_t i; + int last_char_was_cr = 0; + + for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) { + if (*cp == '\r') { + last_char_was_cr = 1; + } else if (last_char_was_cr) { + if (*cp != '\n') write (fd, "\r", 1); + write (fd, cp, 1); + last_char_was_cr = 0; + } else { + write (fd, cp, 1); + last_char_was_cr = 0; + } + } + } - if (iconv (conv_desc, &ib, &inbytes, &ob, &outbytes) == - (size_t) -1) { + if (close (fd)) { + admonish (NULL, "unable to write temporary file %s", + stripped_content_file); + (void) m_unlink (stripped_content_file); status = NOTOK; - break; } else { - write (fd, dest_buffer, outbytes_before - outbytes); + /* Replace the decoded file with the converted one. */ + if (ct->c_cefile.ce_file) { + if (ct->c_cefile.ce_unlink) { + (void) m_unlink (ct->c_cefile.ce_file); + } + free (ct->c_cefile.ce_file); + } + ct->c_cefile.ce_file = stripped_content_file; + ct->c_cefile.ce_unlink = 1; + + ++*message_mods; + if (verbosw) { + report (NULL, ct->c_partno, + begin == 0 && end == 0 ? "" : *file, + "stripped CRs"); + } } } @@ -1716,62 +1728,60 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) { *fp = NULL; } } + } - iconv_close (conv_desc); - close (fd); + free (charset); + return status; +} - if (status == OK) { - /* Replace the decoded file with the converted one. */ - if (ct->c_cefile.ce_file) { - if (ct->c_cefile.ce_unlink) { - unlink (ct->c_cefile.ce_file); - } - free (ct->c_cefile.ce_file); - } - ct->c_cefile.ce_file = dest; - ct->c_cefile.ce_unlink = 1; - ++*message_mods; - if (verbosw) { - report (ct->c_partno, ct->c_file, "convert %s to %s", - *src_codeset, dest_codeset); +static int +convert_charsets (CT ct, char *dest_charset, int *message_mods) { + int status = OK; + + switch (ct->c_type) { + case CT_TEXT: + if (ct->c_subtype == TEXT_PLAIN) { + status = convert_charset (ct, dest_charset, message_mods); + if (status == OK) { + if (verbosw) { + report (NULL, ct->c_partno, ct->c_file, + "convert %s to %s", + content_charset(ct), dest_charset); + } + } else { + report ("iconv", ct->c_partno, ct->c_file, + "failed to convert %s to %s", + content_charset(ct), dest_charset); } + } + break; - /* Update ci_attrs. */ - *src_codeset = dest_codeset; + case CT_MULTIPART: { + struct multipart *m = (struct multipart *) ct->c_ctparams; + struct part *part; - /* Update ct->c_ctline. */ - if (ct->c_ctline) { - char *ctline = - update_attr (ct->c_ctline, "charset=", dest_codeset); + /* Should check to see if the body for this part is encoded? + For now, it gets passed along as-is by InitMultiPart(). */ + for (part = m->mp_parts; status == OK && part; part = part->mp_next) { + status = + convert_charsets (part->mp_part, dest_charset, message_mods); + } + break; + } - free (ct->c_ctline); - ct->c_ctline = ctline; - } /* else no CT line, which is odd */ + case CT_MESSAGE: + if (ct->c_subtype == MESSAGE_EXTERNAL) { + struct exbody *e; - /* Update Content-Type header field. */ - for (hf = ct->c_first_hf; hf; hf = hf->next) { - if (! strcasecmp (TYPE_FIELD, hf->name)) { - char *ctline_less_newline = - update_attr (hf->value, "charset=", dest_codeset); - char *ctline = concat (ctline_less_newline, "\n", NULL); - free (ctline_less_newline); - - free (hf->value); - hf->value = ctline; - break; - } - } - } else { - unlink (dest); + e = (struct exbody *) ct->c_ctparams; + status = + convert_charsets (e->eb_content, dest_charset, message_mods); } -#else /* ! HAVE_ICONV */ - NMH_UNUSED (message_mods); + break; - advise (NULL, "Can't convert %s to %s without iconv", *src_codeset, - dest_codeset); - status = NOTOK; -#endif /* ! HAVE_ICONV */ + default: + break; } return status; @@ -1813,7 +1823,7 @@ write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, } if (new != -1) close (new); if (old != -1) close (old); - unlink (outfile); + (void) m_unlink (outfile); if (i < 0) { /* The -file argument processing used path() to @@ -1829,7 +1839,7 @@ write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, } else { admonish (NULL, "unable to remove input file %s, " "not modifying it", infile); - unlink (outfile); + (void) m_unlink (outfile); status = NOTOK; } @@ -1839,7 +1849,7 @@ write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, } } else { /* No modifications and didn't need the tmp outfile. */ - unlink (outfile); + (void) m_unlink (outfile); } } else { /* Output is going to some file. Produce it whether or not @@ -1874,7 +1884,7 @@ remove_file (char *file) { static void -report (char *partno, char *filename, char *message, ...) { +report (char *what, char *partno, char *filename, char *message, ...) { va_list args; char *fmt; @@ -1883,7 +1893,7 @@ report (char *partno, char *filename, char *message, ...) { fmt = concat (filename, partno ? " part " : ", ", partno ? partno : "", partno ? ", " : "", message, NULL); - advertise (NULL, NULL, fmt, args); + advertise (what, NULL, fmt, args); free (fmt); va_end (args); @@ -1891,17 +1901,6 @@ report (char *partno, char *filename, char *message, ...) { } -static char * -upcase (char *str) { - char *up = cpytrim (str); - char *cp; - - for (cp = up; *cp; ++cp) *cp = toupper ((unsigned char) *cp); - - return up; -} - - static void pipeser (int i) {