X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/63764e68d4fce20854dc2ba18a7244cb6a6769ca..fbb024c4a694e990a4f7430503783f03882bec8b:/uip/mhfixmsg.c?ds=sidebyside diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c index 1f240e45..602eb907 100644 --- a/uip/mhfixmsg.c +++ b/uip/mhfixmsg.c @@ -11,8 +11,6 @@ #include #include #include -#include -#include #include #ifdef HAVE_ICONV # include @@ -31,6 +29,8 @@ X("nofixcte", 0, NFIXCTESW) \ X("file file", 0, FILESW) \ X("outfile file", 0, OUTFILESW) \ + X("rmmproc program", 0, RPROCSW) \ + X("normmproc", 0, NRPRCSW) \ X("verbose", 0, VERBSW) \ X("noverbose", 0, NVERBSW) \ X("version", 0, VERSIONSW) \ @@ -103,8 +103,10 @@ static void transfer_noncontent_headers (CT, CT); static int set_ct_type (CT, int type, int subtype, int encoding); static int decode_text_parts (CT, int, int *); static int content_encoding (CT); +static int strip_crs (CT); static int convert_codesets (CT, char *, int *); static int convert_codeset (CT, char *, int *); +static char *content_codeset (CT); static int write_content (CT, char *, char *, int, int); static int remove_file (char *); static void report (char *, char *, char *, ...); @@ -216,6 +218,14 @@ main (int argc, char **argv) { outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE); continue; + case RPROCSW: + if (!(rmmproc = *argp++) || *rmmproc == '-') + adios (NULL, "missing argument to %s", argp[-2]); + continue; + case NRPRCSW: + rmmproc = NULL; + continue; + case VERBSW: verbosw = 1; continue; @@ -297,8 +307,7 @@ main (int argc, char **argv) { adios (NULL, "out of memory"); ctp = cts; - if ((ct = parse_mime (file))) - *ctp++ = ct; + if ((ct = parse_mime (file))) *ctp++ = ct; } else { /* * message(s) are coming from a folder @@ -315,7 +324,7 @@ main (int argc, char **argv) { adios (maildir, "unable to change directory to"); /* read folder and create message structure */ - if (! (mp = folder_read (folder))) + if (! (mp = folder_read (folder, 1))) adios (NULL, "unable to read folder %s", folder); /* check for empty folder */ @@ -337,24 +346,14 @@ main (int argc, char **argv) { char *msgnam; msgnam = m_name (msgnum); - if ((ct = parse_mime (msgnam))) - *ctp++ = ct; + if ((ct = parse_mime (msgnam))) *ctp++ = ct; } } - /* - * This is a hack. If we are using an external rmmproc, - * then save the current folder to the context file, - * so the external rmmproc will remove files from the correct - * directory. This should be moved to folder_delmsgs(). - */ - if (rmmproc) { - context_replace (pfolder, folder);/* update current folder */ - seq_setcur (mp, mp->hghsel); /* update current message */ - seq_save (mp); /* synchronize sequences */ - context_save (); /* save the context file */ - fflush (stdout); - } + seq_setcur (mp, mp->hghsel); /* update current message */ + seq_save (mp); /* synchronize sequences */ + context_replace (pfolder, folder);/* update current folder */ + context_save (); /* save the context file */ } if (*cts) { @@ -458,6 +457,7 @@ mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) { } if (modify_inplace) { + if (status != OK) unlink (outfile); free (outfile); outfile = NULL; } @@ -518,12 +518,14 @@ fix_boundary (CT *ct, int *message_mods) { char *filename = add ((*ct)->c_file, NULL); free_content (*ct); - *ct = parse_mime (fixed); - (*ct)->c_unlink = 1; - - ++*message_mods; - if (verbosw) { - report (NULL, filename, "fix multipart boundary"); + if ((*ct = parse_mime (fixed))) { + (*ct)->c_unlink = 1; + + ++*message_mods; + if (verbosw) { + report (NULL, filename, + "fix multipart boundary"); + } } free (filename); } else { @@ -599,15 +601,18 @@ get_multipart_boundary (CT ct, char **part_boundary) { if (cp && cp - buffer >= 2 && *--cp == '-' && *--cp == '-' && (cp > buffer && *--cp == '\n')) { + status = OK; break; } - /* Else the start and end boundaries didn't match, or - the start boundary doesn't begin with "\n--" (or - "--" if at the beginning of buffer). Keep trying. */ } else { + /* The start and end boundaries didn't match, or the + start boundary doesn't begin with "\n--" (or "--" + if at the beginning of buffer). Keep trying. */ status = NOTOK; } } + } else { + status = NOTOK; } if (status == OK) { @@ -923,27 +928,33 @@ ensure_text_plain (CT *ct, CT parent, int *message_mods) { } else { /* Slip new text/plain part into a new multipart/alternative. */ CT tp_part = build_text_plain_part (*ct); - CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART, - MULTI_ALTERNATE); - struct multipart *mp = (struct multipart *) mp_alt->c_ctparams; - - if (mp && mp->mp_parts && (mp->mp_parts->mp_part = tp_part)) { - /* Make the new multipart/alternative the parent. */ - *ct = mp_alt; - - ++*message_mods; - if (verbosw) { - report ((*ct)->c_partno, (*ct)->c_file, - "insert text/plain part"); + + if (tp_part) { + CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART, + MULTI_ALTERNATE); + if (mp_alt) { + struct multipart *mp = + (struct multipart *) mp_alt->c_ctparams; + + if (mp && mp->mp_parts) { + mp->mp_parts->mp_part = tp_part; + /* Make the new multipart/alternative the parent. */ + *ct = mp_alt; + + ++*message_mods; + if (verbosw) { + report ((*ct)->c_partno, (*ct)->c_file, + "insert text/plain part"); + } + } else { + free_content (tp_part); + free_content (mp_alt); + status = NOTOK; + } + } else { + status = NOTOK; } } else { - free_content (tp_part); - - /* Undo enough of what build_multipart_alt() did so - that free_content() can be called on mp_alt. */ - mp->mp_parts->mp_part = NULL; - mp->mp_parts->mp_next->mp_part = NULL; - free_content (mp_alt); status = NOTOK; } } @@ -1149,24 +1160,15 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { } -/* Identifies 7bit or 8bit content based on charset, if specified. */ +/* Identifies 7bit or 8bit content based on charset. */ static int charset_encoding (CT ct) { - int encoding = CE_8BIT; - CI ctinfo = &ct->c_ctinfo; - char **ap, **vp; - - for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) { - if (! strcasecmp (*ap, "charset")) { - /* norm_charmap() is case sensitive. */ - char *ch = upcase (*vp); - - if (! strcmp (norm_charmap (ch), "US-ASCII")) encoding = CE_7BIT; - free (ch); - break; - } - } + /* norm_charmap() is case sensitive. */ + char *codeset = upcase (content_codeset (ct)); + int encoding = + strcmp (norm_charmap (codeset), "US-ASCII") ? CE_8BIT : CE_7BIT; + free (codeset); return encoding; } @@ -1443,6 +1445,8 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { if (decode_part (ct) == OK && ct->c_cefile.ce_file) { if ((ct_encoding = content_encoding (ct)) == CE_BINARY && encoding != CE_BINARY) { + /* The decoding isn't acceptable so discard it. + Leave status as OK to allow other transformations. */ if (verbosw) { report (ct->c_partno, ct->c_file, "will not decode%s because it is binary", @@ -1453,7 +1457,10 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { unlink (ct->c_cefile.ce_file); free (ct->c_cefile.ce_file); ct->c_cefile.ce_file = NULL; - } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) { + } else if (ct->c_encoding == CE_QUOTED && + ct_encoding == CE_8BIT && encoding == CE_7BIT) { + /* The decoding isn't acceptable so discard it. + Leave status as OK to allow other transformations. */ if (verbosw) { report (ct->c_partno, ct->c_file, "will not decode%s because it is 8bit", @@ -1465,15 +1472,20 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { free (ct->c_cefile.ce_file); ct->c_cefile.ce_file = NULL; } else { - int enc = ct_encoding == CE_BINARY - ? CE_BINARY - : charset_encoding (ct); + int enc; + if (ct_encoding == CE_BINARY) + enc = CE_BINARY; + else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) + enc = CE_QUOTED; + else + enc = charset_encoding (ct); if (set_ce (ct, enc) == OK) { ++*message_mods; if (verbosw) { report (ct->c_partno, ct->c_file, "decode%s", ct->c_ctline ? ct->c_ctline : ""); } + strip_crs (ct); } else { status = NOTOK; } @@ -1483,9 +1495,14 @@ decode_text_parts (CT ct, int encoding, int *message_mods) { } break; } + case CE_8BIT: + case CE_7BIT: + strip_crs (ct); + break; default: break; } + break; case CT_MULTIPART: { @@ -1565,6 +1582,151 @@ content_encoding (CT ct) { } +static int +strip_crs (CT ct) { + /* norm_charmap() is case sensitive. */ + char *codeset = upcase (content_codeset (ct)); + int status = OK; + + /* Only strip carriage returns if content is ASCII. */ + if (! strcmp (norm_charmap (codeset), "US-ASCII")) { + char **file = NULL; + FILE **fp = NULL; + size_t begin; + ssize_t end; + int has_crs = 0; + int opened_input_file = 0; + + if (ct->c_cefile.ce_file) { + file = &ct->c_cefile.ce_file; + fp = &ct->c_cefile.ce_fp; + begin = end = 0; + } else if (ct->c_file) { + file = &ct->c_file; + fp = &ct->c_fp; + begin = (size_t) ct->c_begin; + end = (ssize_t) ct->c_end; + } /* else don't know where the content is */ + + if (file && *file && fp) { + if (! *fp) { + if ((*fp = fopen (*file, "r")) == NULL) { + advise (*file, "unable to open for reading"); + status = NOTOK; + } else { + opened_input_file = 1; + } + } + } + + if (fp && *fp) { + char buffer[BUFSIZ]; + size_t bytes_read; + ssize_t max = end > 0 ? end - begin : sizeof buffer; + + fseeko (*fp, begin, SEEK_SET); + while ((bytes_read = fread (buffer, 1, max, *fp)) > 0) { + /* Look for CR followed by a LF. This is supposed to + be text so there should be LF's. If not, don't + modify the content. */ + char *cp; + size_t i; + int last_char_was_cr = 0; + + if (end > 0) max -= bytes_read; + + for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) { + if (*cp == '\n' && last_char_was_cr) { + has_crs = 1; + break; + } + + last_char_was_cr = *cp == '\r' ? 1 : 0; + } + } + + if (has_crs) { + int fd; + char *stripped_content_file = + add (m_mktemp2 (tmp, invo_name, &fd, NULL), NULL); + + /* Strip each CR before a LF from the content. */ + fseeko (*fp, begin, SEEK_SET); + while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) { + char *cp; + size_t i; + int last_char_was_cr = 0; + + for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) { + if (*cp == '\r') { + last_char_was_cr = 1; + } else if (last_char_was_cr) { + if (*cp != '\n') write (fd, "\r", 1); + write (fd, cp, 1); + last_char_was_cr = 0; + } else { + write (fd, cp, 1); + last_char_was_cr = 0; + } + + } + } + + if (close (fd)) { + admonish (NULL, "unable to write temporaty file %s", + stripped_content_file); + unlink (stripped_content_file); + status = NOTOK; + } + + if (status == OK) { + /* Replace the decoded file with the converted one. */ + if (ct->c_cefile.ce_file) { + if (ct->c_cefile.ce_unlink) { + unlink (ct->c_cefile.ce_file); + } + free (ct->c_cefile.ce_file); + } + ct->c_cefile.ce_file = stripped_content_file; + ct->c_cefile.ce_unlink = 1; + } + } + + if (opened_input_file) { + fclose (*fp); + *fp = NULL; + } + } + } + + free (codeset); + return status; +} + + +char * +content_codeset (CT ct) { + const char *const charset = "charset"; + char *default_codeset = NULL; + CI ctinfo = &ct->c_ctinfo; + char **ap, **vp; + char **src_codeset = NULL; + + for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) { + if (! strcasecmp (*ap, charset)) { + src_codeset = vp; + break; + } + } + + /* RFC 2045, Sec. 5.2: default to us-ascii. */ + if (src_codeset == NULL) src_codeset = &default_codeset; + if (*src_codeset == NULL) *src_codeset = "US-ASCII"; + + return *src_codeset; +} + + static int convert_codesets (CT ct, char *dest_codeset, int *message_mods) { int status = OK; @@ -1609,39 +1771,34 @@ convert_codesets (CT ct, char *dest_codeset, int *message_mods) { static int convert_codeset (CT ct, char *dest_codeset, int *message_mods) { - const char *const charset = "charset"; - char **src_codeset = NULL; - char *default_codeset = NULL; - CI ctinfo = &ct->c_ctinfo; - char **ap, **vp; + char *src_codeset = content_codeset (ct); int status = OK; - for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) { - if (! strcasecmp (*ap, charset)) { - src_codeset = vp; - break; - } - } - /* RFC 2045, Sec. 5.2: default to us-ascii. */ - if (src_codeset == NULL) src_codeset = &default_codeset; - if (*src_codeset == NULL) *src_codeset = "US-ASCII"; + /* norm_charmap() is case sensitive. */ + char *src_codeset_u = upcase (src_codeset); + char *dest_codeset_u = upcase (dest_codeset); + int different_codesets = + strcmp (norm_charmap (src_codeset), norm_charmap (dest_codeset)); - if (strcmp (norm_charmap (*src_codeset), norm_charmap (dest_codeset))) { + free (dest_codeset_u); + free (src_codeset_u); + + if (different_codesets) { #ifdef HAVE_ICONV iconv_t conv_desc = NULL; char *dest; int fd = -1; char **file = NULL; FILE **fp = NULL; - long begin; - long end; + size_t begin; + ssize_t end; int opened_input_file = 0; char src_buffer[BUFSIZ]; HF hf; - if ((conv_desc = iconv_open (dest_codeset, *src_codeset)) == + if ((conv_desc = iconv_open (dest_codeset, src_codeset)) == (iconv_t) -1) { - advise (NULL, "Can't convert %s to %s", *src_codeset, dest_codeset); + advise (NULL, "Can't convert %s to %s", src_codeset, dest_codeset); return -1; } @@ -1655,8 +1812,8 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) { } else if (ct->c_file) { file = &ct->c_file; fp = &ct->c_fp; - begin = ct->c_begin; - end = ct->c_end; + begin = (size_t) ct->c_begin; + end = (ssize_t) ct->c_end; } /* else no input file: shouldn't happen */ if (file && *file && fp) { @@ -1672,7 +1829,7 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) { if (fp && *fp) { size_t inbytes; - size_t max = end > 0 ? (size_t) (end-begin) : sizeof src_buffer; + ssize_t max = end > 0 ? end - begin : sizeof src_buffer; fseeko (*fp, begin, SEEK_SET); while (status == OK && max > 0 && @@ -1716,11 +1873,11 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) { ++*message_mods; if (verbosw) { report (ct->c_partno, ct->c_file, "convert %s to %s", - *src_codeset, dest_codeset); + src_codeset, dest_codeset); } /* Update ci_attrs. */ - *src_codeset = dest_codeset; + src_codeset = dest_codeset; /* Update ct->c_ctline. */ if (ct->c_ctline) { @@ -1750,7 +1907,7 @@ convert_codeset (CT ct, char *dest_codeset, int *message_mods) { #else /* ! HAVE_ICONV */ NMH_UNUSED (message_mods); - advise (NULL, "Can't convert %s to %s without iconv", *src_codeset, + advise (NULL, "Can't convert %s to %s without iconv", src_codeset, dest_codeset); status = NOTOK; #endif /* ! HAVE_ICONV */ @@ -1774,14 +1931,39 @@ write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, if (remove_file (infile) == OK) { if (rename (outfile, infile)) { - /* The -file argument processing used path() to - expand filename to absolute path. */ - int file = ct->c_file && ct->c_file[0] == '/'; - - admonish (NULL, "unable to rename %s %s to %s", - file ? "file" : "message", outfile, infile); + /* Rename didn't work, possibly because of an + attempt to rename across filesystems. Try + brute force copy. */ + int old = open (outfile, O_RDONLY); + int new = + open (infile, O_WRONLY | O_CREAT, m_gmprot ()); + int i = -1; + + if (old != -1 && new != -1) { + char buffer[BUFSIZ]; + + while ((i = read (old, buffer, sizeof buffer)) > + 0) { + if (write (new, buffer, i) != i) { + i = -1; + break; + } + } + } + if (new != -1) close (new); + if (old != -1) close (old); unlink (outfile); - status = NOTOK; + + if (i < 0) { + /* The -file argument processing used path() to + expand filename to absolute path. */ + int file = ct->c_file && ct->c_file[0] == '/'; + + admonish (NULL, "unable to rename %s %s to %s", + file ? "file" : "message", outfile, + infile); + status = NOTOK; + } } } else { admonish (NULL, "unable to remove input file %s, " @@ -1791,6 +1973,8 @@ write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, } free (infile); + } else { + status = NOTOK; } } else { /* No modifications and didn't need the tmp outfile. */ @@ -1815,13 +1999,10 @@ static int remove_file (char *file) { if (rmmproc) { char *rmm_command = concat (rmmproc, " ", file, NULL); - int status = WIFEXITED (status = system (rmm_command)) - ? WEXITSTATUS (status) - : NOTOK; + int status = system (rmm_command); free (rmm_command); - - return status; + return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK; } else { /* This is OK for a non-message file, it still uses the BACKUP_PREFIX form. The backup file will be in the same