X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/bd7a5de6be16c5e5466264439ebdc157fcafab74..100803609988d80412fed733a0a754cd32a18ce2:/uip/mhfixmsg.c diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c index 602eb907..fd777699 100644 --- a/uip/mhfixmsg.c +++ b/uip/mhfixmsg.c @@ -23,6 +23,8 @@ X("notextcodeset", 0, NTEXTCODESETSW) \ X("reformat", 0, REFORMATSW) \ X("noreformat", 0, NREFORMATSW) \ + X("replacetextplain", 0, REPLACETEXTPLAINSW) \ + X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \ X("fixboundary", 0, FIXBOUNDARYSW) \ X("nofixboundary", 0, NFIXBOUNDARYSW) \ X("fixcte", 0, FIXCTESW) \ @@ -78,6 +80,7 @@ typedef struct fix_transformations { int fixboundary; int fixcte; int reformat; + int replacetextplain; int decodetext; char *textcodeset; } fix_transformations; @@ -90,7 +93,7 @@ static int replace_boundary (CT, char *, const char *); static char *update_attr (char *, const char *, const char *e); static int fix_multipart_cte (CT, int *); static int set_ce (CT, int); -static int ensure_text_plain (CT *, CT, int *); +static int ensure_text_plain (CT *, CT, int *, int); static CT build_text_plain_part (CT); static CT divide_part (CT); static void copy_ctinfo (CI, CI); @@ -103,7 +106,7 @@ static void transfer_noncontent_headers (CT, CT); static int set_ct_type (CT, int type, int subtype, int encoding); static int decode_text_parts (CT, int, int *); static int content_encoding (CT); -static int strip_crs (CT); +static int strip_crs (CT, int *); static int convert_codesets (CT, char *, int *); static int convert_codeset (CT, char *, int *); static char *content_codeset (CT); @@ -128,6 +131,7 @@ main (int argc, char **argv) { int status = OK; fix_transformations fx; fx.reformat = fx.fixcte = fx.fixboundary = 1; + fx.replacetextplain = 0; fx.decodetext = CE_8BIT; fx.textcodeset = NULL; @@ -205,19 +209,22 @@ main (int argc, char **argv) { case NREFORMATSW: fx.reformat = 0; continue; - + case REPLACETEXTPLAINSW: + fx.replacetextplain = 1; + continue; + case NREPLACETEXTPLAINSW: + fx.replacetextplain = 0; + continue; case FILESW: if (! (cp = *argp++) || (*cp == '-' && cp[1])) adios (NULL, "missing argument to %s", argp[-2]); file = *cp == '-' ? add (cp, NULL) : path (cp, TFILE); continue; - case OUTFILESW: if (! (cp = *argp++) || (*cp == '-' && cp[1])) adios (NULL, "missing argument to %s", argp[-2]); outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE); continue; - case RPROCSW: if (!(rmmproc = *argp++) || *rmmproc == '-') adios (NULL, "missing argument to %s", argp[-2]); @@ -225,7 +232,6 @@ main (int argc, char **argv) { case NRPRCSW: rmmproc = NULL; continue; - case VERBSW: verbosw = 1; continue; @@ -239,8 +245,14 @@ main (int argc, char **argv) { adios (NULL, "only one folder at a time!"); else folder = pluspath (cp); - } else - app_msgarg(&msgs, cp); + } else { + if (*cp == '/') { + /* Interpret a full path as a filename, not a message. */ + file = add (cp, NULL); + } else { + app_msgarg (&msgs, cp); + } + } } SIGNAL (SIGQUIT, quitser); @@ -410,7 +422,8 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { status = fix_multipart_cte (*ctp, &message_mods); } if (status == OK && fx->reformat) { - status = ensure_text_plain (ctp, NULL, &message_mods); + status = + ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain); } if (status == OK && fx->decodetext) { status = decode_text_parts (*ctp, fx->decodetext, &message_mods); @@ -873,7 +886,7 @@ set_ce (CT ct, int encoding) { /* Make sure each text part has a corresponding text/plain part. */ static int -ensure_text_plain (CT *ct, CT parent, int *message_mods) { +ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) { int status = OK; switch ((*ct)->c_type) { @@ -886,17 +899,36 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { if (parent && parent->c_type == CT_MULTIPART && parent->c_subtype == MULTI_ALTERNATE) { struct multipart *mp = (struct multipart *) parent->c_ctparams; - struct part *part; + struct part *part, *prev; int new_subpart_number = 1; /* See if there is a sibling text/plain. */ - for (part = mp->mp_parts; part; part = part->mp_next) { + for (prev = part = mp->mp_parts; part; part = part->mp_next) { ++new_subpart_number; if (part->mp_part->c_type == CT_TEXT && part->mp_part->c_subtype == TEXT_PLAIN) { - has_text_plain = 1; + if (replacetextplain) { + struct part *old_part; + if (part == mp->mp_parts) { + old_part = mp->mp_parts; + mp->mp_parts = part->mp_next; + } else { + old_part = prev->mp_next; + prev->mp_next = part->mp_next; + } + if (verbosw) { + report (parent->c_partno, parent->c_file, + "remove text/plain part %s", + old_part->mp_part->c_partno); + } + free_content (old_part->mp_part); + free (old_part); + } else { + has_text_plain = 1; + } break; } + prev = part; } if (! has_text_plain) { @@ -967,7 +999,8 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { for (part = mp->mp_parts; status == OK && part; part = part->mp_next) { if ((*ct)->c_type == CT_MULTIPART) { - status = ensure_text_plain (&part->mp_part, *ct, message_mods); + status = ensure_text_plain (&part->mp_part, *ct, message_mods, + replacetextplain); } } break; @@ -978,7 +1011,8 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { struct exbody *e; e = (struct exbody *) (*ct)->c_ctparams; - status = ensure_text_plain (&e->eb_content, *ct, message_mods); + status = ensure_text_plain (&e->eb_content, *ct, message_mods, + replacetextplain); } break; } @@ -1485,7 +1519,7 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { report (ct->c_partno, ct->c_file, "decode%s", ct->c_ctline ? ct->c_ctline : ""); } - strip_crs (ct); + strip_crs (ct, message_mods); } else { status = NOTOK; } @@ -1497,7 +1531,7 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { } case CE_8BIT: case CE_7BIT: - strip_crs (ct); + strip_crs (ct, message_mods); break; default: break; @@ -1543,6 +1577,7 @@ content_encoding (CT ct) { int encoding = CE_7BIT; if (ce->ce_file) { + size_t line_len = 0; char buffer[BUFSIZ]; size_t inbytes; @@ -1556,7 +1591,6 @@ content_encoding (CT ct) { (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) { char *cp; size_t i; - size_t line_len = 0; int last_char_was_cr = 0; for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) { @@ -1583,17 +1617,24 @@ content_encoding (CT ct) { static int -strip_crs (CT ct) { +strip_crs (CT ct, int *message_mods) { /* norm_charmap() is case sensitive. */ char *codeset = upcase (content_codeset (ct)); int status = OK; - /* Only strip carriage returns if content is ASCII. */ - if (! strcmp (norm_charmap (codeset), "US-ASCII")) { + /* Only strip carriage returns if content is ASCII or another + codeset that has the same readily recognizable CR followed by a + LF. We can include UTF-8 here because if the high-order bit of + a UTF-8 byte is 0, then it must be a single-byte ASCII + character. */ + if (! strcmp (norm_charmap (codeset), "US-ASCII") || + ! strncmp (norm_charmap (codeset), "ISO-8859-", 9) || + ! strncmp (norm_charmap (codeset), "UTF-8", 5) || + ! strncmp (norm_charmap (codeset), "WINDOWS-12", 10)) { char **file = NULL; FILE **fp = NULL; size_t begin; - ssize_t end; + size_t end; int has_crs = 0; int opened_input_file = 0; @@ -1605,7 +1646,7 @@ strip_crs (CT ct) { file = &ct->c_file; fp = &ct->c_fp; begin = (size_t) ct->c_begin; - end = (ssize_t) ct->c_end; + end = (size_t) ct->c_end; } /* else don't know where the content is */ if (file && *file && fp) { @@ -1622,10 +1663,13 @@ strip_crs (CT ct) { if (fp && *fp) { char buffer[BUFSIZ]; size_t bytes_read; - ssize_t max = end > 0 ? end - begin : sizeof buffer; + size_t bytes_to_read = + end > 0 && end > begin ? end - begin : sizeof buffer; fseeko (*fp, begin, SEEK_SET); - while ((bytes_read = fread (buffer, 1, max, *fp)) > 0) { + while ((bytes_read = fread (buffer, 1, + min (bytes_to_read, sizeof buffer), + *fp)) > 0) { /* Look for CR followed by a LF. This is supposed to be text so there should be LF's. If not, don't modify the content. */ @@ -1633,7 +1677,7 @@ strip_crs (CT ct) { size_t i; int last_char_was_cr = 0; - if (end > 0) max -= bytes_read; + if (end > 0) bytes_to_read -= bytes_read; for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) { if (*cp == '\n' && last_char_was_cr) { @@ -1652,7 +1696,8 @@ strip_crs (CT ct) { /* Strip each CR before a LF from the content. */ fseeko (*fp, begin, SEEK_SET); - while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) { + while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > + 0) { char *cp; size_t i; int last_char_was_cr = 0; @@ -1668,18 +1713,15 @@ strip_crs (CT ct) { write (fd, cp, 1); last_char_was_cr = 0; } - } } if (close (fd)) { - admonish (NULL, "unable to write temporaty file %s", + admonish (NULL, "unable to write temporary file %s", stripped_content_file); unlink (stripped_content_file); status = NOTOK; - } - - if (status == OK) { + } else { /* Replace the decoded file with the converted one. */ if (ct->c_cefile.ce_file) { if (ct->c_cefile.ce_unlink) { @@ -1689,6 +1731,13 @@ strip_crs (CT ct) { } ct->c_cefile.ce_file = stripped_content_file; ct->c_cefile.ce_unlink = 1; + + ++*message_mods; + if (verbosw) { + report (ct->c_partno, + begin == 0 && end == 0 ? "" : *file, + "stripped CRs"); + } } } @@ -1791,7 +1840,7 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) { char **file = NULL; FILE **fp = NULL; size_t begin; - ssize_t end; + size_t end; int opened_input_file = 0; char src_buffer[BUFSIZ]; HF hf; @@ -1807,13 +1856,12 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) { if (ct->c_cefile.ce_file) { file = &ct->c_cefile.ce_file; fp = &ct->c_cefile.ce_fp; - begin = 0; - end = -1; + begin = end = 0; } else if (ct->c_file) { file = &ct->c_file; fp = &ct->c_fp; begin = (size_t) ct->c_begin; - end = (ssize_t) ct->c_end; + end = (size_t) ct->c_end; } /* else no input file: shouldn't happen */ if (file && *file && fp) { @@ -1829,17 +1877,20 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) { if (fp && *fp) { size_t inbytes; - ssize_t max = end > 0 ? end - begin : sizeof src_buffer; + size_t bytes_to_read = + end > 0 && end > begin ? end - begin : sizeof src_buffer; fseeko (*fp, begin, SEEK_SET); - while (status == OK && max > 0 && - (inbytes = fread (src_buffer, 1, max, *fp)) > 0) { + while ((inbytes = fread (src_buffer, 1, + min (bytes_to_read, sizeof src_buffer), + *fp)) > 0) { char dest_buffer[BUFSIZ]; - char *ib = src_buffer, *ob = dest_buffer; + ICONV_CONST char *ib = src_buffer; + char *ob = dest_buffer; size_t outbytes = sizeof dest_buffer; size_t outbytes_before = outbytes; - if (end > 0) max -= inbytes; + if (end > 0) bytes_to_read -= inbytes; if (iconv (conv_desc, &ib, &inbytes, &ob, &outbytes) == (size_t) -1) {