X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/bd7a5de6be16c5e5466264439ebdc157fcafab74..100803609988d80412fed733a0a754cd32a18ce2:/uip/mhfixmsg.c

diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c
index 602eb907..fd777699 100644
--- a/uip/mhfixmsg.c
+++ b/uip/mhfixmsg.c
@@ -23,6 +23,8 @@
     X("notextcodeset", 0, NTEXTCODESETSW) \
     X("reformat", 0, REFORMATSW) \
     X("noreformat", 0, NREFORMATSW) \
+    X("replacetextplain", 0, REPLACETEXTPLAINSW) \
+    X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
     X("fixboundary", 0, FIXBOUNDARYSW) \
     X("nofixboundary", 0, NFIXBOUNDARYSW) \
     X("fixcte", 0, FIXCTESW) \
@@ -78,6 +80,7 @@ typedef struct fix_transformations {
     int fixboundary;
     int fixcte;
     int reformat;
+    int replacetextplain;
     int decodetext;
     char *textcodeset;
 } fix_transformations;
@@ -90,7 +93,7 @@ static int replace_boundary (CT, char *, const char *);
 static char *update_attr (char *, const char *, const char *e);
 static int fix_multipart_cte (CT, int *);
 static int set_ce (CT, int);
-static int ensure_text_plain (CT *, CT, int *);
+static int ensure_text_plain (CT *, CT, int *, int);
 static CT build_text_plain_part (CT);
 static CT divide_part (CT);
 static void copy_ctinfo (CI, CI);
@@ -103,7 +106,7 @@ static void transfer_noncontent_headers (CT, CT);
 static int set_ct_type (CT, int type, int subtype, int encoding);
 static int decode_text_parts (CT, int, int *);
 static int content_encoding (CT);
-static int strip_crs (CT);
+static int strip_crs (CT, int *);
 static int convert_codesets (CT, char *, int *);
 static int convert_codeset (CT, char *, int *);
 static char *content_codeset (CT);
@@ -128,6 +131,7 @@ main (int argc, char **argv) {
     int status = OK;
     fix_transformations fx;
     fx.reformat = fx.fixcte = fx.fixboundary = 1;
+    fx.replacetextplain = 0;
     fx.decodetext = CE_8BIT;
     fx.textcodeset = NULL;
 
@@ -205,19 +209,22 @@ main (int argc, char **argv) {
             case NREFORMATSW:
                 fx.reformat = 0;
                 continue;
-
+            case REPLACETEXTPLAINSW:
+                fx.replacetextplain = 1;
+                continue;
+            case NREPLACETEXTPLAINSW:
+                fx.replacetextplain = 0;
+                continue;
             case FILESW:
                 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
                     adios (NULL, "missing argument to %s", argp[-2]);
                 file = *cp == '-'  ?  add (cp, NULL)  :  path (cp, TFILE);
                 continue;
-
             case OUTFILESW:
                 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
                     adios (NULL, "missing argument to %s", argp[-2]);
                 outfile = *cp == '-'  ?  add (cp, NULL)  :  path (cp, TFILE);
                 continue;
-
             case RPROCSW:
                 if (!(rmmproc = *argp++) || *rmmproc == '-')
                     adios (NULL, "missing argument to %s", argp[-2]);
@@ -225,7 +232,6 @@ main (int argc, char **argv) {
             case NRPRCSW:
                 rmmproc = NULL;
                 continue;
-
             case VERBSW:
                 verbosw = 1;
                 continue;
@@ -239,8 +245,14 @@ main (int argc, char **argv) {
                 adios (NULL, "only one folder at a time!");
             else
                 folder = pluspath (cp);
-        } else
-                app_msgarg(&msgs, cp);
+        } else {
+            if (*cp == '/') {
+                /* Interpret a full path as a filename, not a message. */
+                file = add (cp, NULL);
+            } else {
+                app_msgarg (&msgs, cp);
+            }
+        }
     }
 
     SIGNAL (SIGQUIT, quitser);
@@ -410,7 +422,8 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
         status = fix_multipart_cte (*ctp, &message_mods);
     }
     if (status == OK  &&  fx->reformat) {
-        status = ensure_text_plain (ctp, NULL, &message_mods);
+        status =
+            ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
     }
     if (status == OK  &&  fx->decodetext) {
         status = decode_text_parts (*ctp, fx->decodetext, &message_mods);
@@ -873,7 +886,7 @@ set_ce (CT ct, int encoding) {
 
 /* Make sure each text part has a corresponding text/plain part. */
 static int
-ensure_text_plain (CT *ct, CT parent, int *message_mods) {
+ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
     int status = OK;
 
     switch ((*ct)->c_type) {
@@ -886,17 +899,36 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) {
         if (parent  &&  parent->c_type == CT_MULTIPART  &&
             parent->c_subtype == MULTI_ALTERNATE) {
             struct multipart *mp = (struct multipart *) parent->c_ctparams;
-            struct part *part;
+            struct part *part, *prev;
             int new_subpart_number = 1;
 
             /* See if there is a sibling text/plain. */
-            for (part = mp->mp_parts; part; part = part->mp_next) {
+            for (prev = part = mp->mp_parts; part; part = part->mp_next) {
                 ++new_subpart_number;
                 if (part->mp_part->c_type == CT_TEXT  &&
                     part->mp_part->c_subtype == TEXT_PLAIN) {
-                    has_text_plain = 1;
+                    if (replacetextplain) {
+                        struct part *old_part;
+                        if (part == mp->mp_parts) {
+                            old_part = mp->mp_parts;
+                            mp->mp_parts = part->mp_next;
+                        } else {
+                            old_part = prev->mp_next;
+                            prev->mp_next = part->mp_next;
+                        }
+                        if (verbosw) {
+                            report (parent->c_partno, parent->c_file,
+                                    "remove text/plain part %s",
+                                    old_part->mp_part->c_partno);
+                        }
+                        free_content (old_part->mp_part);
+                        free (old_part);
+                    } else {
+                        has_text_plain = 1;
+                    }
                     break;
                 }
+                prev = part;
             }
 
             if (! has_text_plain) {
@@ -967,7 +999,8 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) {
 
         for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
             if ((*ct)->c_type == CT_MULTIPART) {
-                status = ensure_text_plain (&part->mp_part, *ct, message_mods);
+                status = ensure_text_plain (&part->mp_part, *ct, message_mods,
+                                            replacetextplain);
             }
         }
         break;
@@ -978,7 +1011,8 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) {
             struct exbody *e;
 
             e = (struct exbody *) (*ct)->c_ctparams;
-            status = ensure_text_plain (&e->eb_content, *ct, message_mods);
+            status = ensure_text_plain (&e->eb_content, *ct, message_mods,
+                                        replacetextplain);
         }
         break;
     }
@@ -1485,7 +1519,7 @@ decode_text_parts (CT ct, int encoding, int *message_mods) {
                             report (ct->c_partno, ct->c_file, "decode%s",
                                     ct->c_ctline ? ct->c_ctline : "");
                         }
-                        strip_crs (ct);
+                        strip_crs (ct, message_mods);
                     } else {
                         status = NOTOK;
                     }
@@ -1497,7 +1531,7 @@ decode_text_parts (CT ct, int encoding, int *message_mods) {
         }
         case CE_8BIT:
         case CE_7BIT:
-            strip_crs (ct);
+            strip_crs (ct, message_mods);
             break;
         default:
             break;
@@ -1543,6 +1577,7 @@ content_encoding (CT ct) {
     int encoding = CE_7BIT;
 
     if (ce->ce_file) {
+        size_t line_len = 0;
         char buffer[BUFSIZ];
         size_t inbytes;
 
@@ -1556,7 +1591,6 @@ content_encoding (CT ct) {
                (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
             char *cp;
             size_t i;
-            size_t line_len = 0;
             int last_char_was_cr = 0;
 
             for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
@@ -1583,17 +1617,24 @@ content_encoding (CT ct) {
 
 
 static int
-strip_crs (CT ct) {
+strip_crs (CT ct, int *message_mods) {
     /* norm_charmap() is case sensitive. */
     char *codeset = upcase (content_codeset (ct));
     int status = OK;
 
-    /* Only strip carriage returns if content is ASCII. */
-    if (! strcmp (norm_charmap (codeset), "US-ASCII")) {
+    /* Only strip carriage returns if content is ASCII or another
+       codeset that has the same readily recognizable CR followed by a
+       LF.  We can include UTF-8 here because if the high-order bit of
+       a UTF-8 byte is 0, then it must be a single-byte ASCII
+       character. */
+    if (! strcmp (norm_charmap (codeset), "US-ASCII")  ||
+        ! strncmp (norm_charmap (codeset), "ISO-8859-", 9)  ||
+        ! strncmp (norm_charmap (codeset), "UTF-8", 5)  ||
+        ! strncmp (norm_charmap (codeset), "WINDOWS-12", 10)) {
         char **file = NULL;
         FILE **fp = NULL;
         size_t begin;
-        ssize_t end;
+        size_t end;
         int has_crs = 0;
         int opened_input_file = 0;
 
@@ -1605,7 +1646,7 @@ strip_crs (CT ct) {
             file = &ct->c_file;
             fp = &ct->c_fp;
             begin = (size_t) ct->c_begin;
-            end = (ssize_t) ct->c_end;
+            end = (size_t) ct->c_end;
         } /* else don't know where the content is */
 
         if (file  &&  *file  &&  fp) {
@@ -1622,10 +1663,13 @@ strip_crs (CT ct) {
         if (fp  &&  *fp) {
             char buffer[BUFSIZ];
             size_t bytes_read;
-            ssize_t max = end > 0  ?  end - begin  :  sizeof buffer;
+            size_t bytes_to_read =
+                end > 0 && end > begin  ?  end - begin  :  sizeof buffer;
 
             fseeko (*fp, begin, SEEK_SET);
-            while ((bytes_read = fread (buffer, 1, max, *fp)) > 0) {
+            while ((bytes_read = fread (buffer, 1,
+                                        min (bytes_to_read, sizeof buffer),
+                                        *fp)) > 0) {
                 /* Look for CR followed by a LF.  This is supposed to
                    be text so there should be LF's.  If not, don't
                    modify the content. */
@@ -1633,7 +1677,7 @@ strip_crs (CT ct) {
                 size_t i;
                 int last_char_was_cr = 0;
 
-                if (end > 0) max -= bytes_read;
+                if (end > 0) bytes_to_read -= bytes_read;
 
                 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
                     if (*cp == '\n'  &&  last_char_was_cr) {
@@ -1652,7 +1696,8 @@ strip_crs (CT ct) {
 
                 /* Strip each CR before a LF from the content. */
                 fseeko (*fp, begin, SEEK_SET);
-                while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
+                while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
+                       0) {
                     char *cp;
                     size_t i;
                     int last_char_was_cr = 0;
@@ -1668,18 +1713,15 @@ strip_crs (CT ct) {
                             write (fd, cp, 1);
                             last_char_was_cr = 0;
                         }
-
                     }
                 }
 
                 if (close (fd)) {
-                    admonish (NULL, "unable to write temporaty file %s",
+                    admonish (NULL, "unable to write temporary file %s",
                               stripped_content_file);
                     unlink (stripped_content_file);
                     status = NOTOK;
-                }
-
-                if (status == OK) {
+                } else {
                     /* Replace the decoded file with the converted one. */
                     if (ct->c_cefile.ce_file) {
                         if (ct->c_cefile.ce_unlink) {
@@ -1689,6 +1731,13 @@ strip_crs (CT ct) {
                     }
                     ct->c_cefile.ce_file = stripped_content_file;
                     ct->c_cefile.ce_unlink = 1;
+
+                    ++*message_mods;
+                    if (verbosw) {
+                        report (ct->c_partno,
+                                begin == 0 && end == 0  ?  ""  :  *file,
+                                "stripped CRs");
+                    }
                 }
             }
 
@@ -1791,7 +1840,7 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) {
         char **file = NULL;
         FILE **fp = NULL;
         size_t begin;
-        ssize_t end;
+        size_t end;
         int opened_input_file = 0;
         char src_buffer[BUFSIZ];
         HF hf;
@@ -1807,13 +1856,12 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) {
         if (ct->c_cefile.ce_file) {
             file = &ct->c_cefile.ce_file;
             fp = &ct->c_cefile.ce_fp;
-            begin = 0;
-            end = -1;
+            begin = end = 0;
         } else if (ct->c_file) {
             file = &ct->c_file;
             fp = &ct->c_fp;
             begin = (size_t) ct->c_begin;
-            end = (ssize_t) ct->c_end;
+            end = (size_t) ct->c_end;
         } /* else no input file: shouldn't happen */
 
         if (file  &&  *file  &&  fp) {
@@ -1829,17 +1877,20 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) {
 
         if (fp  &&  *fp) {
             size_t inbytes;
-            ssize_t max = end > 0  ?  end - begin  :  sizeof src_buffer;
+            size_t bytes_to_read =
+                end > 0 && end > begin  ?  end - begin  :  sizeof src_buffer;
 
             fseeko (*fp, begin, SEEK_SET);
-            while (status == OK  &&  max > 0  &&
-                   (inbytes = fread (src_buffer, 1, max, *fp)) > 0) {
+            while ((inbytes = fread (src_buffer, 1,
+                                     min (bytes_to_read, sizeof src_buffer),
+                                     *fp)) > 0) {
                 char dest_buffer[BUFSIZ];
-                char *ib = src_buffer, *ob = dest_buffer;
+                ICONV_CONST char *ib = src_buffer;
+                char *ob = dest_buffer;
                 size_t outbytes = sizeof dest_buffer;
                 size_t outbytes_before = outbytes;
 
-                if (end > 0) max -= inbytes;
+                if (end > 0) bytes_to_read -= inbytes;
 
                 if (iconv (conv_desc, &ib, &inbytes, &ob, &outbytes) ==
                     (size_t) -1) {