X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/43242c623638e8b4fc23b46d61dea80765c7a332..4d0e01fe1da830a6e8fdf13bd6bc21603642d5da:/uip/mhfixmsg.c diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c index 8c5268ef..fd777699 100644 --- a/uip/mhfixmsg.c +++ b/uip/mhfixmsg.c @@ -23,6 +23,8 @@ X("notextcodeset", 0, NTEXTCODESETSW) \ X("reformat", 0, REFORMATSW) \ X("noreformat", 0, NREFORMATSW) \ + X("replacetextplain", 0, REPLACETEXTPLAINSW) \ + X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \ X("fixboundary", 0, FIXBOUNDARYSW) \ X("nofixboundary", 0, NFIXBOUNDARYSW) \ X("fixcte", 0, FIXCTESW) \ @@ -78,6 +80,7 @@ typedef struct fix_transformations { int fixboundary; int fixcte; int reformat; + int replacetextplain; int decodetext; char *textcodeset; } fix_transformations; @@ -90,7 +93,7 @@ static int replace_boundary (CT, char *, const char *); static char *update_attr (char *, const char *, const char *e); static int fix_multipart_cte (CT, int *); static int set_ce (CT, int); -static int ensure_text_plain (CT *, CT, int *); +static int ensure_text_plain (CT *, CT, int *, int); static CT build_text_plain_part (CT); static CT divide_part (CT); static void copy_ctinfo (CI, CI); @@ -103,8 +106,10 @@ static void transfer_noncontent_headers (CT, CT); static int set_ct_type (CT, int type, int subtype, int encoding); static int decode_text_parts (CT, int, int *); static int content_encoding (CT); +static int strip_crs (CT, int *); static int convert_codesets (CT, char *, int *); static int convert_codeset (CT, char *, int *); +static char *content_codeset (CT); static int write_content (CT, char *, char *, int, int); static int remove_file (char *); static void report (char *, char *, char *, ...); @@ -126,6 +131,7 @@ main (int argc, char **argv) { int status = OK; fix_transformations fx; fx.reformat = fx.fixcte = fx.fixboundary = 1; + fx.replacetextplain = 0; fx.decodetext = CE_8BIT; fx.textcodeset = NULL; @@ -203,19 +209,22 @@ main (int argc, char **argv) { case NREFORMATSW: fx.reformat = 0; continue; - + case REPLACETEXTPLAINSW: + fx.replacetextplain = 1; + continue; + case NREPLACETEXTPLAINSW: + fx.replacetextplain = 0; + continue; case FILESW: if (! (cp = *argp++) || (*cp == '-' && cp[1])) adios (NULL, "missing argument to %s", argp[-2]); file = *cp == '-' ? add (cp, NULL) : path (cp, TFILE); continue; - case OUTFILESW: if (! (cp = *argp++) || (*cp == '-' && cp[1])) adios (NULL, "missing argument to %s", argp[-2]); outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE); continue; - case RPROCSW: if (!(rmmproc = *argp++) || *rmmproc == '-') adios (NULL, "missing argument to %s", argp[-2]); @@ -223,7 +232,6 @@ main (int argc, char **argv) { case NRPRCSW: rmmproc = NULL; continue; - case VERBSW: verbosw = 1; continue; @@ -237,8 +245,14 @@ main (int argc, char **argv) { adios (NULL, "only one folder at a time!"); else folder = pluspath (cp); - } else - app_msgarg(&msgs, cp); + } else { + if (*cp == '/') { + /* Interpret a full path as a filename, not a message. */ + file = add (cp, NULL); + } else { + app_msgarg (&msgs, cp); + } + } } SIGNAL (SIGQUIT, quitser); @@ -408,7 +422,8 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { status = fix_multipart_cte (*ctp, &message_mods); } if (status == OK && fx->reformat) { - status = ensure_text_plain (ctp, NULL, &message_mods); + status = + ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain); } if (status == OK && fx->decodetext) { status = decode_text_parts (*ctp, fx->decodetext, &message_mods); @@ -871,7 +886,7 @@ set_ce (CT ct, int encoding) { /* Make sure each text part has a corresponding text/plain part. */ static int -ensure_text_plain (CT *ct, CT parent, int *message_mods) { +ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) { int status = OK; switch ((*ct)->c_type) { @@ -884,17 +899,36 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { if (parent && parent->c_type == CT_MULTIPART && parent->c_subtype == MULTI_ALTERNATE) { struct multipart *mp = (struct multipart *) parent->c_ctparams; - struct part *part; + struct part *part, *prev; int new_subpart_number = 1; /* See if there is a sibling text/plain. */ - for (part = mp->mp_parts; part; part = part->mp_next) { + for (prev = part = mp->mp_parts; part; part = part->mp_next) { ++new_subpart_number; if (part->mp_part->c_type == CT_TEXT && part->mp_part->c_subtype == TEXT_PLAIN) { - has_text_plain = 1; + if (replacetextplain) { + struct part *old_part; + if (part == mp->mp_parts) { + old_part = mp->mp_parts; + mp->mp_parts = part->mp_next; + } else { + old_part = prev->mp_next; + prev->mp_next = part->mp_next; + } + if (verbosw) { + report (parent->c_partno, parent->c_file, + "remove text/plain part %s", + old_part->mp_part->c_partno); + } + free_content (old_part->mp_part); + free (old_part); + } else { + has_text_plain = 1; + } break; } + prev = part; } if (! has_text_plain) { @@ -965,7 +999,8 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { for (part = mp->mp_parts; status == OK && part; part = part->mp_next) { if ((*ct)->c_type == CT_MULTIPART) { - status = ensure_text_plain (&part->mp_part, *ct, message_mods); + status = ensure_text_plain (&part->mp_part, *ct, message_mods, + replacetextplain); } } break; @@ -976,7 +1011,8 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { struct exbody *e; e = (struct exbody *) (*ct)->c_ctparams; - status = ensure_text_plain (&e->eb_content, *ct, message_mods); + status = ensure_text_plain (&e->eb_content, *ct, message_mods, + replacetextplain); } break; } @@ -1158,24 +1194,15 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { } -/* Identifies 7bit or 8bit content based on charset, if specified. */ +/* Identifies 7bit or 8bit content based on charset. */ static int charset_encoding (CT ct) { - int encoding = CE_8BIT; - CI ctinfo = &ct->c_ctinfo; - char **ap, **vp; - - for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) { - if (! strcasecmp (*ap, "charset")) { - /* norm_charmap() is case sensitive. */ - char *ch = upcase (*vp); - - if (! strcmp (norm_charmap (ch), "US-ASCII")) encoding = CE_7BIT; - free (ch); - break; - } - } + /* norm_charmap() is case sensitive. */ + char *codeset = upcase (content_codeset (ct)); + int encoding = + strcmp (norm_charmap (codeset), "US-ASCII") ? CE_8BIT : CE_7BIT; + free (codeset); return encoding; } @@ -1492,6 +1519,7 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { report (ct->c_partno, ct->c_file, "decode%s", ct->c_ctline ? ct->c_ctline : ""); } + strip_crs (ct, message_mods); } else { status = NOTOK; } @@ -1501,9 +1529,14 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { } break; } + case CE_8BIT: + case CE_7BIT: + strip_crs (ct, message_mods); + break; default: break; } + break; case CT_MULTIPART: { @@ -1544,6 +1577,7 @@ content_encoding (CT ct) { int encoding = CE_7BIT; if (ce->ce_file) { + size_t line_len = 0; char buffer[BUFSIZ]; size_t inbytes; @@ -1557,7 +1591,6 @@ content_encoding (CT ct) { (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) { char *cp; size_t i; - size_t line_len = 0; int last_char_was_cr = 0; for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) { @@ -1583,6 +1616,166 @@ content_encoding (CT ct) { } +static int +strip_crs (CT ct, int *message_mods) { + /* norm_charmap() is case sensitive. */ + char *codeset = upcase (content_codeset (ct)); + int status = OK; + + /* Only strip carriage returns if content is ASCII or another + codeset that has the same readily recognizable CR followed by a + LF. We can include UTF-8 here because if the high-order bit of + a UTF-8 byte is 0, then it must be a single-byte ASCII + character. */ + if (! strcmp (norm_charmap (codeset), "US-ASCII") || + ! strncmp (norm_charmap (codeset), "ISO-8859-", 9) || + ! strncmp (norm_charmap (codeset), "UTF-8", 5) || + ! strncmp (norm_charmap (codeset), "WINDOWS-12", 10)) { + char **file = NULL; + FILE **fp = NULL; + size_t begin; + size_t end; + int has_crs = 0; + int opened_input_file = 0; + + if (ct->c_cefile.ce_file) { + file = &ct->c_cefile.ce_file; + fp = &ct->c_cefile.ce_fp; + begin = end = 0; + } else if (ct->c_file) { + file = &ct->c_file; + fp = &ct->c_fp; + begin = (size_t) ct->c_begin; + end = (size_t) ct->c_end; + } /* else don't know where the content is */ + + if (file && *file && fp) { + if (! *fp) { + if ((*fp = fopen (*file, "r")) == NULL) { + advise (*file, "unable to open for reading"); + status = NOTOK; + } else { + opened_input_file = 1; + } + } + } + + if (fp && *fp) { + char buffer[BUFSIZ]; + size_t bytes_read; + size_t bytes_to_read = + end > 0 && end > begin ? end - begin : sizeof buffer; + + fseeko (*fp, begin, SEEK_SET); + while ((bytes_read = fread (buffer, 1, + min (bytes_to_read, sizeof buffer), + *fp)) > 0) { + /* Look for CR followed by a LF. This is supposed to + be text so there should be LF's. If not, don't + modify the content. */ + char *cp; + size_t i; + int last_char_was_cr = 0; + + if (end > 0) bytes_to_read -= bytes_read; + + for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) { + if (*cp == '\n' && last_char_was_cr) { + has_crs = 1; + break; + } + + last_char_was_cr = *cp == '\r' ? 1 : 0; + } + } + + if (has_crs) { + int fd; + char *stripped_content_file = + add (m_mktemp2 (tmp, invo_name, &fd, NULL), NULL); + + /* Strip each CR before a LF from the content. */ + fseeko (*fp, begin, SEEK_SET); + while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > + 0) { + char *cp; + size_t i; + int last_char_was_cr = 0; + + for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) { + if (*cp == '\r') { + last_char_was_cr = 1; + } else if (last_char_was_cr) { + if (*cp != '\n') write (fd, "\r", 1); + write (fd, cp, 1); + last_char_was_cr = 0; + } else { + write (fd, cp, 1); + last_char_was_cr = 0; + } + } + } + + if (close (fd)) { + admonish (NULL, "unable to write temporary file %s", + stripped_content_file); + unlink (stripped_content_file); + status = NOTOK; + } else { + /* Replace the decoded file with the converted one. */ + if (ct->c_cefile.ce_file) { + if (ct->c_cefile.ce_unlink) { + unlink (ct->c_cefile.ce_file); + } + free (ct->c_cefile.ce_file); + } + ct->c_cefile.ce_file = stripped_content_file; + ct->c_cefile.ce_unlink = 1; + + ++*message_mods; + if (verbosw) { + report (ct->c_partno, + begin == 0 && end == 0 ? "" : *file, + "stripped CRs"); + } + } + } + + if (opened_input_file) { + fclose (*fp); + *fp = NULL; + } + } + } + + free (codeset); + return status; +} + + +char * +content_codeset (CT ct) { + const char *const charset = "charset"; + char *default_codeset = NULL; + CI ctinfo = &ct->c_ctinfo; + char **ap, **vp; + char **src_codeset = NULL; + + for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) { + if (! strcasecmp (*ap, charset)) { + src_codeset = vp; + break; + } + } + + /* RFC 2045, Sec. 5.2: default to us-ascii. */ + if (src_codeset == NULL) src_codeset = &default_codeset; + if (*src_codeset == NULL) *src_codeset = "US-ASCII"; + + return *src_codeset; +} + + static int convert_codesets (CT ct, char *dest_codeset, int *message_mods) { int status = OK; @@ -1627,39 +1820,34 @@ convert_codesets (CT ct, char *dest_codeset, int *message_mods) { static int convert_codeset (CT ct, char *dest_codeset, int *message_mods) { - const char *const charset = "charset"; - char **src_codeset = NULL; - char *default_codeset = NULL; - CI ctinfo = &ct->c_ctinfo; - char **ap, **vp; + char *src_codeset = content_codeset (ct); int status = OK; - for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) { - if (! strcasecmp (*ap, charset)) { - src_codeset = vp; - break; - } - } - /* RFC 2045, Sec. 5.2: default to us-ascii. */ - if (src_codeset == NULL) src_codeset = &default_codeset; - if (*src_codeset == NULL) *src_codeset = "US-ASCII"; + /* norm_charmap() is case sensitive. */ + char *src_codeset_u = upcase (src_codeset); + char *dest_codeset_u = upcase (dest_codeset); + int different_codesets = + strcmp (norm_charmap (src_codeset), norm_charmap (dest_codeset)); + + free (dest_codeset_u); + free (src_codeset_u); - if (strcmp (norm_charmap (*src_codeset), norm_charmap (dest_codeset))) { + if (different_codesets) { #ifdef HAVE_ICONV iconv_t conv_desc = NULL; char *dest; int fd = -1; char **file = NULL; FILE **fp = NULL; - long begin; - long end; + size_t begin; + size_t end; int opened_input_file = 0; char src_buffer[BUFSIZ]; HF hf; - if ((conv_desc = iconv_open (dest_codeset, *src_codeset)) == + if ((conv_desc = iconv_open (dest_codeset, src_codeset)) == (iconv_t) -1) { - advise (NULL, "Can't convert %s to %s", *src_codeset, dest_codeset); + advise (NULL, "Can't convert %s to %s", src_codeset, dest_codeset); return -1; } @@ -1668,13 +1856,12 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) { if (ct->c_cefile.ce_file) { file = &ct->c_cefile.ce_file; fp = &ct->c_cefile.ce_fp; - begin = 0; - end = -1; + begin = end = 0; } else if (ct->c_file) { file = &ct->c_file; fp = &ct->c_fp; - begin = ct->c_begin; - end = ct->c_end; + begin = (size_t) ct->c_begin; + end = (size_t) ct->c_end; } /* else no input file: shouldn't happen */ if (file && *file && fp) { @@ -1690,17 +1877,20 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) { if (fp && *fp) { size_t inbytes; - size_t max = end > 0 ? (size_t) (end-begin) : sizeof src_buffer; + size_t bytes_to_read = + end > 0 && end > begin ? end - begin : sizeof src_buffer; fseeko (*fp, begin, SEEK_SET); - while (status == OK && max > 0 && - (inbytes = fread (src_buffer, 1, max, *fp)) > 0) { + while ((inbytes = fread (src_buffer, 1, + min (bytes_to_read, sizeof src_buffer), + *fp)) > 0) { char dest_buffer[BUFSIZ]; - char *ib = src_buffer, *ob = dest_buffer; + ICONV_CONST char *ib = src_buffer; + char *ob = dest_buffer; size_t outbytes = sizeof dest_buffer; size_t outbytes_before = outbytes; - if (end > 0) max -= inbytes; + if (end > 0) bytes_to_read -= inbytes; if (iconv (conv_desc, &ib, &inbytes, &ob, &outbytes) == (size_t) -1) { @@ -1734,11 +1924,11 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) { ++*message_mods; if (verbosw) { report (ct->c_partno, ct->c_file, "convert %s to %s", - *src_codeset, dest_codeset); + src_codeset, dest_codeset); } /* Update ci_attrs. */ - *src_codeset = dest_codeset; + src_codeset = dest_codeset; /* Update ct->c_ctline. */ if (ct->c_ctline) { @@ -1768,7 +1958,7 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) { #else /* ! HAVE_ICONV */ NMH_UNUSED (message_mods); - advise (NULL, "Can't convert %s to %s without iconv", *src_codeset, + advise (NULL, "Can't convert %s to %s without iconv", src_codeset, dest_codeset); status = NOTOK; #endif /* ! HAVE_ICONV */