X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/1034d3efd9b660b5fd8ceaa83afa083b7f3ab792..bd7a5de6be16c5e5466264439ebdc157fcafab74:/uip/mhfixmsg.c

diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c
index 8c5268ef..602eb907 100644
--- a/uip/mhfixmsg.c
+++ b/uip/mhfixmsg.c
@@ -103,8 +103,10 @@ static void transfer_noncontent_headers (CT, CT);
 static int set_ct_type (CT, int type, int subtype, int encoding);
 static int decode_text_parts (CT, int, int *);
 static int content_encoding (CT);
+static int strip_crs (CT);
 static int convert_codesets (CT, char *, int *);
 static int convert_codeset (CT, char *, int *);
+static char *content_codeset (CT);
 static int write_content (CT, char *, char *, int, int);
 static int remove_file (char *);
 static void report (char *, char *, char *, ...);
@@ -1158,24 +1160,15 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
 }
 
 
-/* Identifies 7bit or 8bit content based on charset, if specified. */
+/* Identifies 7bit or 8bit content based on charset. */
 static int
 charset_encoding (CT ct) {
-    int encoding = CE_8BIT;
-    CI ctinfo = &ct->c_ctinfo;
-    char **ap, **vp;
-
-    for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) {
-        if (! strcasecmp (*ap, "charset")) {
-            /* norm_charmap() is case sensitive. */
-            char *ch = upcase (*vp);
-
-            if (! strcmp (norm_charmap (ch), "US-ASCII")) encoding = CE_7BIT;
-            free (ch);
-            break;
-        }
-    }
+    /* norm_charmap() is case sensitive. */
+    char *codeset = upcase (content_codeset (ct));
+    int encoding =
+        strcmp (norm_charmap (codeset), "US-ASCII")  ?  CE_8BIT  :  CE_7BIT;
 
+    free (codeset);
     return encoding;
 }
 
@@ -1492,6 +1485,7 @@ decode_text_parts (CT ct, int encoding, int *message_mods) {
                             report (ct->c_partno, ct->c_file, "decode%s",
                                     ct->c_ctline ? ct->c_ctline : "");
                         }
+                        strip_crs (ct);
                     } else {
                         status = NOTOK;
                     }
@@ -1501,9 +1495,14 @@ decode_text_parts (CT ct, int encoding, int *message_mods) {
             }
             break;
         }
+        case CE_8BIT:
+        case CE_7BIT:
+            strip_crs (ct);
+            break;
         default:
             break;
         }
+
         break;
 
     case CT_MULTIPART: {
@@ -1583,6 +1582,151 @@ content_encoding (CT ct) {
 }
 
 
+static int
+strip_crs (CT ct) {
+    /* norm_charmap() is case sensitive. */
+    char *codeset = upcase (content_codeset (ct));
+    int status = OK;
+
+    /* Only strip carriage returns if content is ASCII. */
+    if (! strcmp (norm_charmap (codeset), "US-ASCII")) {
+        char **file = NULL;
+        FILE **fp = NULL;
+        size_t begin;
+        ssize_t end;
+        int has_crs = 0;
+        int opened_input_file = 0;
+
+        if (ct->c_cefile.ce_file) {
+            file = &ct->c_cefile.ce_file;
+            fp = &ct->c_cefile.ce_fp;
+            begin = end = 0;
+        } else if (ct->c_file) {
+            file = &ct->c_file;
+            fp = &ct->c_fp;
+            begin = (size_t) ct->c_begin;
+            end = (ssize_t) ct->c_end;
+        } /* else don't know where the content is */
+
+        if (file  &&  *file  &&  fp) {
+            if (! *fp) {
+                if ((*fp = fopen (*file, "r")) == NULL) {
+                    advise (*file, "unable to open for reading");
+                    status = NOTOK;
+                } else {
+                    opened_input_file = 1;
+                }
+            }
+        }
+
+        if (fp  &&  *fp) {
+            char buffer[BUFSIZ];
+            size_t bytes_read;
+            ssize_t max = end > 0  ?  end - begin  :  sizeof buffer;
+
+            fseeko (*fp, begin, SEEK_SET);
+            while ((bytes_read = fread (buffer, 1, max, *fp)) > 0) {
+                /* Look for CR followed by a LF.  This is supposed to
+                   be text so there should be LF's.  If not, don't
+                   modify the content. */
+                char *cp;
+                size_t i;
+                int last_char_was_cr = 0;
+
+                if (end > 0) max -= bytes_read;
+
+                for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
+                    if (*cp == '\n'  &&  last_char_was_cr) {
+                        has_crs = 1;
+                        break;
+                    }
+
+                    last_char_was_cr = *cp == '\r'  ?  1  :  0;
+                }
+            }
+
+            if (has_crs) {
+                int fd;
+                char *stripped_content_file =
+                    add (m_mktemp2 (tmp, invo_name, &fd, NULL), NULL);
+
+                /* Strip each CR before a LF from the content. */
+                fseeko (*fp, begin, SEEK_SET);
+                while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
+                    char *cp;
+                    size_t i;
+                    int last_char_was_cr = 0;
+
+                    for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
+                        if (*cp == '\r') {
+                            last_char_was_cr = 1;
+                        } else if (last_char_was_cr) {
+                            if (*cp != '\n') write (fd, "\r", 1);
+                            write (fd, cp, 1);
+                            last_char_was_cr = 0;
+                        } else {
+                            write (fd, cp, 1);
+                            last_char_was_cr = 0;
+                        }
+
+                    }
+                }
+
+                if (close (fd)) {
+                    admonish (NULL, "unable to write temporaty file %s",
+                              stripped_content_file);
+                    unlink (stripped_content_file);
+                    status = NOTOK;
+                }
+
+                if (status == OK) {
+                    /* Replace the decoded file with the converted one. */
+                    if (ct->c_cefile.ce_file) {
+                        if (ct->c_cefile.ce_unlink) {
+                            unlink (ct->c_cefile.ce_file);
+                        }
+                        free (ct->c_cefile.ce_file);
+                    }
+                    ct->c_cefile.ce_file = stripped_content_file;
+                    ct->c_cefile.ce_unlink = 1;
+                }
+            }
+
+            if (opened_input_file) {
+                fclose (*fp);
+                *fp = NULL;
+            }
+        }
+    }
+
+    free (codeset);
+    return status;
+}
+
+
+char *
+content_codeset (CT ct) {
+    const char *const charset = "charset";
+    char *default_codeset = NULL;
+    CI ctinfo = &ct->c_ctinfo;
+    char **ap, **vp;
+    char **src_codeset = NULL;
+
+    for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) {
+        if (! strcasecmp (*ap, charset)) {
+            src_codeset = vp;
+            break;
+        }
+    }
+
+    /* RFC 2045, Sec. 5.2:  default to us-ascii. */
+    if (src_codeset == NULL) src_codeset = &default_codeset;
+    if (*src_codeset == NULL) *src_codeset = "US-ASCII";
+
+    return *src_codeset;
+}
+
+
 static int
 convert_codesets (CT ct, char *dest_codeset, int *message_mods) {
     int status = OK;
@@ -1627,39 +1771,34 @@ convert_codesets (CT ct, char *dest_codeset, int *message_mods) {
 
 static int
 convert_codeset (CT ct, char *dest_codeset, int *message_mods) {
-    const char *const charset = "charset";
-    char **src_codeset = NULL;
-    char *default_codeset = NULL;
-    CI ctinfo = &ct->c_ctinfo;
-    char **ap, **vp;
+    char *src_codeset = content_codeset (ct);
     int status = OK;
 
-    for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) {
-        if (! strcasecmp (*ap, charset)) {
-            src_codeset = vp;
-            break;
-        }
-    }
-    /* RFC 2045, Sec. 5.2:  default to us-ascii. */
-    if (src_codeset == NULL) src_codeset = &default_codeset;
-    if (*src_codeset == NULL) *src_codeset = "US-ASCII";
+    /* norm_charmap() is case sensitive. */
+    char *src_codeset_u = upcase (src_codeset);
+    char *dest_codeset_u = upcase (dest_codeset);
+    int different_codesets =
+        strcmp (norm_charmap (src_codeset), norm_charmap (dest_codeset));
+
+    free (dest_codeset_u);
+    free (src_codeset_u);
 
-    if (strcmp (norm_charmap (*src_codeset), norm_charmap (dest_codeset))) {
+    if (different_codesets) {
 #ifdef HAVE_ICONV
         iconv_t conv_desc = NULL;
         char *dest;
         int fd = -1;
         char **file = NULL;
         FILE **fp = NULL;
-        long begin;
-        long end;
+        size_t begin;
+        ssize_t end;
         int opened_input_file = 0;
         char src_buffer[BUFSIZ];
         HF hf;
 
-        if ((conv_desc = iconv_open (dest_codeset, *src_codeset)) ==
+        if ((conv_desc = iconv_open (dest_codeset, src_codeset)) ==
             (iconv_t) -1) {
-            advise (NULL, "Can't convert %s to %s", *src_codeset, dest_codeset);
+            advise (NULL, "Can't convert %s to %s", src_codeset, dest_codeset);
             return -1;
         }
 
@@ -1673,8 +1812,8 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) {
         } else if (ct->c_file) {
             file = &ct->c_file;
             fp = &ct->c_fp;
-            begin = ct->c_begin;
-            end = ct->c_end;
+            begin = (size_t) ct->c_begin;
+            end = (ssize_t) ct->c_end;
         } /* else no input file: shouldn't happen */
 
         if (file  &&  *file  &&  fp) {
@@ -1690,7 +1829,7 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) {
 
         if (fp  &&  *fp) {
             size_t inbytes;
-            size_t max = end > 0  ?  (size_t) (end-begin)  :  sizeof src_buffer;
+            ssize_t max = end > 0  ?  end - begin  :  sizeof src_buffer;
 
             fseeko (*fp, begin, SEEK_SET);
             while (status == OK  &&  max > 0  &&
@@ -1734,11 +1873,11 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) {
             ++*message_mods;
             if (verbosw) {
                 report (ct->c_partno, ct->c_file, "convert %s to %s",
-                        *src_codeset, dest_codeset);
+                        src_codeset, dest_codeset);
             }
 
             /* Update ci_attrs. */
-            *src_codeset = dest_codeset;
+            src_codeset = dest_codeset;
 
             /* Update ct->c_ctline. */
             if (ct->c_ctline) {
@@ -1768,7 +1907,7 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) {
 #else  /* ! HAVE_ICONV */
         NMH_UNUSED (message_mods);
 
-        advise (NULL, "Can't convert %s to %s without iconv", *src_codeset,
+        advise (NULL, "Can't convert %s to %s without iconv", src_codeset,
                 dest_codeset);
         status = NOTOK;
 #endif /* ! HAVE_ICONV */