/*
- * mhfixmsg.c -- rewrite a message with various tranformations
+ * mhfixmsg.c -- rewrite a message with various transformations
*
* This code is Copyright (c) 2002 and 2013, by the authors of nmh.
* See the COPYRIGHT file in the root directory of the nmh
#include <fcntl.h>
#define MHFIXMSG_SWITCHES \
- X("decodetext 8bit|7bit", 0, DECODETEXTSW) \
+ X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
X("nodecodetext", 0, NDECODETEXTSW) \
X("decodetypes", 0, DECODETYPESW) \
+ X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
+ X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
X("textcharset", 0, TEXTCHARSETSW) \
X("notextcharset", 0, NTEXTCHARSETSW) \
X("reformat", 0, REFORMATSW) \
X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
X("fixboundary", 0, FIXBOUNDARYSW) \
X("nofixboundary", 0, NFIXBOUNDARYSW) \
- X("fixcte", 0, FIXCTESW) \
- X("nofixcte", 0, NFIXCTESW) \
+ X("fixcte", 0, FIXCOMPOSITECTESW) \
+ X("nofixcte", 0, NFIXCOMPOSITECTESW) \
X("fixtype mimetype", 0, FIXTYPESW) \
X("file file", 0, FILESW) \
X("outfile file", 0, OUTFILESW) \
*/
typedef struct fix_transformations {
int fixboundary;
- int fixcte;
+ int fixcompositecte;
svector_t fixtypes;
int reformat;
int replacetextplain;
int decodetext;
char *decodetypes;
+ /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
+ int lf_line_endings;
char *textcharset;
} fix_transformations;
int mhfixmsgsbr (CT *, const fix_transformations *, char *);
static int fix_boundary (CT *, int *);
+static int copy_input_to_output (const char *, const char *);
static int get_multipart_boundary (CT, char **);
static int replace_boundary (CT, char *, char *);
static int fix_types (CT, svector_t, int *);
static char *replace_substring (char **, const char *, const char *);
static char *remove_parameter (char *, const char *);
-static int fix_multipart_cte (CT, int *);
+static int fix_composite_cte (CT, int *);
static int set_ce (CT, int);
static int ensure_text_plain (CT *, CT, int *, int);
static int find_textplain_sibling (CT, int, int *);
static void copy_ctinfo (CI, CI);
static int decode_part (CT);
static int reformat_part (CT, char *, char *, char *, int);
-static int charset_encoding (CT);
static CT build_multipart_alt (CT, CT, int, int);
static int boundary_in_content (FILE **, char *, const char *);
static void transfer_noncontent_headers (CT, CT);
static int should_decode(const char *, const char *, const char *);
static int content_encoding (CT, const char **);
static int strip_crs (CT, int *);
+static void update_cte (CT);
+static int least_restrictive_encoding (CT);
+static int less_restrictive (int, int);
static int convert_charsets (CT, char *, int *);
static int fix_always (CT, int *);
-static int write_content (CT, char *, char *, int, int);
-static int remove_file (char *);
+static int fix_filename_param (char *, char *, PM *, PM *);
+static int fix_filename_encoding (CT);
+static int write_content (CT, const char *, char *, int, int);
+static void set_text_ctparams(CT, char *, int);
+static int remove_file (const char *);
static void report (char *, char *, char *, char *, ...);
static void pipeser (int);
int chgflag = 1;
int status = OK;
fix_transformations fx;
- fx.reformat = fx.fixcte = fx.fixboundary = 1;
+ fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
fx.fixtypes = NULL;
fx.replacetextplain = 0;
fx.decodetext = CE_8BIT;
- fx.decodetypes = "text"; /* Default to all text content. */
+ fx.decodetypes = "text,application/ics"; /* Default, per man page. */
+ fx.lf_line_endings = 0;
fx.textcharset = NULL;
- if (nmh_init(argv[0], 1)) { return 1; }
+ if (nmh_init(argv[0], 2)) { return 1; }
done = freects_done;
done (0);
case DECODETEXTSW:
- if (! (cp = *argp++) || *cp == '-')
+ if (! (cp = *argp++) || *cp == '-') {
adios (NULL, "missing argument to %s", argp[-2]);
+ }
if (! strcasecmp (cp, "8bit")) {
fx.decodetext = CE_8BIT;
} else if (! strcasecmp (cp, "7bit")) {
fx.decodetext = CE_7BIT;
+ } else if (! strcasecmp (cp, "binary")) {
+ fx.decodetext = CE_BINARY;
} else {
adios (NULL, "invalid argument to %s", argp[-2]);
}
fx.decodetext = 0;
continue;
case DECODETYPESW:
- if (! (cp = *argp++) || *cp == '-')
+ if (! (cp = *argp++) || *cp == '-') {
adios (NULL, "missing argument to %s", argp[-2]);
+ }
fx.decodetypes = cp;
continue;
+ case CRLFLINEBREAKSSW:
+ fx.lf_line_endings = 0;
+ continue;
+ case NCRLFLINEBREAKSSW:
+ fx.lf_line_endings = 1;
+ continue;
case TEXTCHARSETSW:
- if (! (cp = *argp++) || (*cp == '-' && cp[1]))
+ if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
adios (NULL, "missing argument to %s", argp[-2]);
+ }
fx.textcharset = cp;
continue;
case NTEXTCHARSETSW:
case NFIXBOUNDARYSW:
fx.fixboundary = 0;
continue;
- case FIXCTESW:
- fx.fixcte = 1;
+ case FIXCOMPOSITECTESW:
+ fx.fixcompositecte = 1;
continue;
- case NFIXCTESW:
- fx.fixcte = 0;
+ case NFIXCOMPOSITECTESW:
+ fx.fixcompositecte = 0;
continue;
case FIXTYPESW:
- if (! (cp = *argp++) || (*cp == '-' && cp[1]))
+ if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
adios (NULL, "missing argument to %s", argp[-2]);
+ }
if (! strncasecmp (cp, "multipart/", 10) ||
! strncasecmp (cp, "message/", 8)) {
adios (NULL, "-fixtype %s not allowed", cp);
fx.replacetextplain = 0;
continue;
case FILESW:
- if (! (cp = *argp++) || (*cp == '-' && cp[1]))
+ if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
adios (NULL, "missing argument to %s", argp[-2]);
+ }
file = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
continue;
case OUTFILESW:
- if (! (cp = *argp++) || (*cp == '-' && cp[1]))
+ if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
adios (NULL, "missing argument to %s", argp[-2]);
+ }
outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
continue;
case RPROCSW:
- if (!(rmmproc = *argp++) || *rmmproc == '-')
+ if (!(rmmproc = *argp++) || *rmmproc == '-') {
adios (NULL, "missing argument to %s", argp[-2]);
+ }
continue;
case NRPRCSW:
rmmproc = NULL;
}
}
if (*cp == '+' || *cp == '@') {
- if (folder)
+ if (folder) {
adios (NULL, "only one folder at a time!");
- else
+ } else {
folder = pluspath (cp);
+ }
} else {
if (*cp == '/') {
/* Interpret a full path as a filename, not a message. */
suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
suppress_extraneous_trailing_semicolon_warning = 1;
- if (! context_find ("path"))
+ if (! context_find ("path")) {
free (path ("./", TFOLDER));
+ }
- if (file && msgs.size)
+ if (file && msgs.size) {
adios (NULL, "cannot specify msg and file at same time!");
+ }
/*
* check if message is coming from file
}
ctp = cts;
- if ((ct = parse_mime (file))) { *ctp++ = ct; }
+ if ((ct = parse_mime (file))) {
+ set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
+ *ctp++ = ct;
+ } else {
+ advise (NULL, "unable to parse message from file %s", file);
+ status = NOTOK;
+
+ /* If there's an outfile, pass the input message unchanged, so the message won't
+ get dropped from a pipeline. */
+ if (outfile) {
+ /* Something went wrong. Output might be expected, such as if this were run
+ as a filter. Just copy the input to the output. */
+ if (copy_input_to_output (file, outfile) != OK) {
+ advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
+ }
+ }
+ }
} else {
/*
* message(s) are coming from a folder
*/
CT ct;
- if (! msgs.size)
+ if (! msgs.size) {
app_msgarg(&msgs, "cur");
- if (! folder)
+ }
+ if (! folder) {
folder = getfolder (1);
+ }
maildir = m_maildir (folder);
- if (chdir (maildir) == NOTOK)
+ if (chdir (maildir) == NOTOK) {
adios (maildir, "unable to change directory to");
+ }
/* read folder and create message structure */
- if (! (mp = folder_read (folder, 1)))
+ if (! (mp = folder_read (folder, 1))) {
adios (NULL, "unable to read folder %s", folder);
+ }
/* check for empty folder */
- if (mp->nummsg == 0)
+ if (mp->nummsg == 0) {
adios (NULL, "no messages in %s", folder);
+ }
/* parse all the message ranges/sequences and set SELECTED */
for (msgnum = 0; msgnum < msgs.size; msgnum++)
- if (! m_convert (mp, msgs.msgs[msgnum]))
+ if (! m_convert (mp, msgs.msgs[msgnum])) {
done (1);
+ }
seq_setprev (mp); /* set the previous-sequence */
if (! (cts =
char *msgnam;
msgnam = m_name (msgnum);
- if ((ct = parse_mime (msgnam))) { *ctp++ = ct; }
+ if ((ct = parse_mime (msgnam))) {
+ set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
+ *ctp++ = ct;
+ } else {
+ advise (NULL, "unable to parse message %s", msgnam);
+ status = NOTOK;
+
+ /* If there's an outfile, pass the input message unchanged, so the message won't
+ get dropped from a pipeline. */
+ if (outfile) {
+ /* Something went wrong. Output might be expected, such as if this were run
+ as a filter. Just copy the input to the output. */
+ const char *input_filename = path (msgnam, TFILE);
+
+ if (copy_input_to_output (input_filename, outfile) != OK) {
+ advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
+ }
+ }
+ }
}
}
if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
free (outfile);
free (file);
+ free (folder);
+ free (arguments);
/* done is freects_done, which will clean up all of cts. */
done (status);
}
+/*
+ * Apply transformations to one message.
+ */
int
mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
/* Store input filename in case one of the transformations, i.e.,
if (status == OK && fx->fixtypes != NULL) {
status = fix_types (*ctp, fx->fixtypes, &message_mods);
}
- if (status == OK && fx->fixcte) {
- status = fix_multipart_cte (*ctp, &message_mods);
+ if (status == OK && fx->fixcompositecte) {
+ status = fix_composite_cte (*ctp, &message_mods);
}
if (status == OK && fx->reformat) {
status =
ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
}
if (status == OK && fx->decodetext) {
- status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes, &message_mods);
+ status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
+ &message_mods);
+ update_cte (*ctp);
}
if (status == OK && fx->textcharset != NULL) {
status = convert_charsets (*ctp, fx->textcharset, &message_mods);
/* Something went wrong. Output might be expected, such
as if this were run as a filter. Just copy the input
to the output. */
- int in = open (input_filename, O_RDONLY);
- int out = strcmp (outfile, "-")
- ? open (outfile, O_WRONLY | O_CREAT, m_gmprot ())
- : STDOUT_FILENO;
-
- if (in != -1 && out != -1) {
- cpydata (in, out, input_filename, outfile);
- } else {
- status = NOTOK;
+ if (copy_input_to_output (input_filename, outfile) != OK) {
+ advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
}
-
- close (out);
- close (in);
}
if (modify_inplace) {
}
+/*
+ * Copy input message to output. Assumes not modifying in place, so this
+ * might be running as part of a pipeline.
+ */
+static int
+copy_input_to_output (const char *input_filename, const char *output_filename) {
+ int in = open (input_filename, O_RDONLY);
+ int out = strcmp (output_filename, "-")
+ ? open (output_filename, O_WRONLY | O_CREAT, m_gmprot ())
+ : STDOUT_FILENO;
+ int status = OK;
+
+ if (in != -1 && out != -1) {
+ cpydata (in, out, input_filename, output_filename);
+ } else {
+ status = NOTOK;
+ }
+
+ close (out);
+ close (in);
+
+ return status;
+}
+
+
+/*
+ * Fix mismatched outer level boundary.
+ */
static int
fix_boundary (CT *ct, int *message_mods) {
struct multipart *mp;
}
free (part_boundary);
+ } else {
+ /* Couldn't fix the boundary. Report failure so that mhfixmsg
+ doesn't modify the message. */
+ status = NOTOK;
}
+ } else {
+ /* No multipart struct, even though the content type is
+ CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
+ the message. */
+ status = NOTOK;
}
}
}
+/*
+ * Find boundary at end of multipart.
+ */
static int
get_multipart_boundary (CT ct, char **part_boundary) {
char buffer[BUFSIZ];
}
-/* Open and copy ct->c_file to file, replacing the multipart boundary. */
+/*
+ * Open and copy ct->c_file to file, replacing the multipart boundary.
+ */
static int
replace_boundary (CT ct, char *file, char *boundary) {
FILE *fpin, *fpout;
fprintf (fpout, "%s:%s%s\n", np, new_ctline,
new_params ? new_params : "");
free(new_ctline);
- if (new_params)
+ if (new_params) {
free(new_params);
+ }
}
free (vp);
}
+/*
+ * Fix Content-Type header to reflect the content of its part.
+ */
static int
fix_types (CT ct, svector_t fixtypes, int *message_mods) {
int status = OK;
return status;
}
+
+/*
+ * Replace a substring, allocating space to hold the new one.
+ */
char *
replace_substring (char **str, const char *old, const char *new) {
char *cp;
}
}
+
/*
* Remove a name=value parameter, given just its name, from a header value.
*/
return str;
}
+
+/*
+ * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
+ * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
+ * 8 bit.
+ */
static int
-fix_multipart_cte (CT ct, int *message_mods) {
+fix_composite_cte (CT ct, int *message_mods) {
int status = OK;
- if (ct->c_type == CT_MULTIPART) {
- struct multipart *m;
- struct part *part;
-
+ if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
ct->c_encoding != CE_BINARY) {
HF hf;
set_ce (ct, CE_8BIT);
}
- m = (struct multipart *) ct->c_ctparams;
- for (part = m->mp_parts; part; part = part->mp_next) {
- if (fix_multipart_cte (part->mp_part, message_mods) != OK) {
- status = NOTOK;
- break;
+ if (ct->c_type == CT_MULTIPART) {
+ struct multipart *m;
+ struct part *part;
+
+ m = (struct multipart *) ct->c_ctparams;
+ for (part = m->mp_parts; part; part = part->mp_next) {
+ if (fix_composite_cte (part->mp_part, message_mods) != OK) {
+ status = NOTOK;
+ break;
+ }
}
}
}
}
+/*
+ * Set content encoding.
+ */
static int
set_ce (CT ct, int encoding) {
const char *ce = ce_str (encoding);
ct->c_cefile.ce_file to the name of the file containing
the contents. */
+ if (ct->c_ceclosefnx) {
+ (*ct->c_ceclosefnx) (ct);
+ }
+
/* Restore the cefile. */
ct->c_cefile = decoded_content_info;
}
-/* Make sure each text part has a corresponding text/plain part. */
+/*
+ * Make sure each text part has a corresponding text/plain part.
+ */
static int
ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
int status = OK;
}
-/* See if there is a sibling text/plain. */
+/*
+ * See if there is a sibling text/plain, and return its subpart number.
+ */
static int
find_textplain_sibling (CT parent, int replacetextplain,
int *new_subpart_number) {
}
+/*
+ * Insert a new text/plain part.
+ */
static int
insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
struct multipart *mp = (struct multipart *) parent->c_ctparams;
}
+/*
+ * Create a text/plain part to go along with non-plain sibling part.
+ */
static CT
build_text_plain_part (CT encoded_part) {
CT tp_part = divide_part (encoded_part);
}
-/* Slip new text/plain part into a new multipart/alternative. */
+/*
+ * Slip new text/plain part into a new multipart/alternative.
+ */
static int
insert_into_new_mp_alt (CT *ct, int *message_mods) {
CT tp_part = build_text_plain_part (*ct);
CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
MULTI_ALTERNATE);
if (mp_alt) {
- struct multipart *mp =
- (struct multipart *) mp_alt->c_ctparams;
+ struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
if (mp && mp->mp_parts) {
mp->mp_parts->mp_part = tp_part;
return status;
}
+
+/*
+ * Clone a MIME part.
+ */
static CT
divide_part (CT ct) {
CT new_part;
}
+/*
+ * Copy the content info from one part to another.
+ */
static void
copy_ctinfo (CI dest, CI src) {
PM s_pm, d_pm;
for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
s_pm->pm_value, 0);
- if (s_pm->pm_charset)
+ if (s_pm->pm_charset) {
d_pm->pm_charset = getcpy(s_pm->pm_charset);
- if (s_pm->pm_lang)
+ }
+ if (s_pm->pm_lang) {
d_pm->pm_lang = getcpy(s_pm->pm_lang);
+ }
}
dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL;
}
+/*
+ * Decode content.
+ */
static int
decode_part (CT ct) {
char *tmp_decoded;
}
-/* Some of the arguments aren't really needed now, but maybe will
- be in the future for other than text types. */
+/*
+ * Reformat content as plain text.
+ * Some of the arguments aren't really needed now, but maybe will
+ * be in the future for other than text types.
+ */
static int
reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
int output_subtype, output_encoding;
+ const char *reason = NULL;
char *cp, *cf;
int status;
/* Set subtype to 0, which is always an UNKNOWN subtype. */
output_subtype = 0;
}
- output_encoding = charset_encoding (ct);
+ output_encoding = content_encoding (ct, &reason);
if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
ct->c_cefile.ce_file = file;
ct->c_cefile.ce_unlink = 1;
}
-/* Identifies 7bit or 8bit content based on charset. */
-static int
-charset_encoding (CT ct) {
- char *ct_charset = content_charset (ct);
- int encoding = strcasecmp (ct_charset, "US-ASCII") ? CE_8BIT : CE_7BIT;
-
- free (ct_charset);
-
- return encoding;
-}
-
-
+/*
+ * Fill in a multipart/alternative part.
+ */
static CT
build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
char *boundary_prefix = "----=_nmh-multipart";
}
-/* Check that the boundary does not appear in the content. */
+/*
+ * Check that the boundary does not appear in the content.
+ */
static int
boundary_in_content (FILE **fp, char *file, const char *boundary) {
char buffer[BUFSIZ];
}
-/* Remove all non-Content headers. */
+/*
+ * Remove all non-Content headers.
+ */
static void
transfer_noncontent_headers (CT old, CT new) {
HF hp, hp_prev;
}
+/*
+ * Set content type.
+ */
static int
set_ct_type (CT ct, int type, int subtype, int encoding) {
char *typename = ct_type_str (type);
}
+/*
+ * It's not necessary to update the charset parameter of a Content-Type
+ * header for a text part. According to RFC 2045 Sec. 6.4, the body
+ * (content) was originally in the specified charset, "and will be in
+ * that character set again after decoding."
+ */
static int
-decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mods) {
+decode_text_parts (CT ct, int encoding, const char *decodetypes,
+ int *message_mods) {
int status = OK;
+ int lf_line_endings = 0;
switch (ct->c_type) {
- case CT_TEXT:
+ case CT_MULTIPART: {
+ struct multipart *m = (struct multipart *) ct->c_ctparams;
+ struct part *part;
+
+ /* Should check to see if the body for this part is encoded?
+ For now, it gets passed along as-is by InitMultiPart(). */
+ for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
+ status = decode_text_parts (part->mp_part, encoding, decodetypes,
+ message_mods);
+ }
+ break;
+ }
+
+ case CT_MESSAGE:
+ if (ct->c_subtype == MESSAGE_EXTERNAL) {
+ struct exbody *e = (struct exbody *) ct->c_ctparams;
+
+ status = decode_text_parts (e->eb_content, encoding, decodetypes,
+ message_mods);
+ }
+ break;
+
+ default:
if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
break;
}
+ lf_line_endings =
+ ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
+
switch (ct->c_encoding) {
case CE_BASE64:
case CE_QUOTED: {
ct->c_cefile.ce_file = NULL;
} else {
int enc;
- if (ct_encoding == CE_BINARY)
+
+ if (ct_encoding == CE_BINARY) {
enc = CE_BINARY;
- else if (ct_encoding == CE_8BIT && encoding == CE_7BIT)
+ } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
enc = CE_QUOTED;
- else
- enc = charset_encoding (ct);
+ } else {
+ enc = ct_encoding;
+ }
if (set_ce (ct, enc) == OK) {
++*message_mods;
if (verbosw) {
report (NULL, ct->c_partno, ct->c_file, "decode%s",
ct->c_ctline ? ct->c_ctline : "");
}
- strip_crs (ct, message_mods);
+ if (lf_line_endings) {
+ strip_crs (ct, message_mods);
+ }
} else {
status = NOTOK;
}
}
case CE_8BIT:
case CE_7BIT:
- strip_crs (ct, message_mods);
+ if (lf_line_endings) {
+ strip_crs (ct, message_mods);
+ }
break;
default:
break;
}
break;
-
- case CT_MULTIPART: {
- struct multipart *m = (struct multipart *) ct->c_ctparams;
- struct part *part;
-
- /* Should check to see if the body for this part is encoded?
- For now, it gets passed along as-is by InitMultiPart(). */
- for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
- status = decode_text_parts (part->mp_part, encoding, decodetypes, message_mods);
- }
- break;
- }
-
- case CT_MESSAGE:
- if (ct->c_subtype == MESSAGE_EXTERNAL) {
- struct exbody *e = (struct exbody *) ct->c_ctparams;
-
- status = decode_text_parts (e->eb_content, encoding, decodetypes, message_mods);
- }
- break;
-
- default:
- break;
}
return status;
}
-/* Determine if the part with type[/subtype] should be decoded, according to
- decodetypes (which came from the -decodetypes switch). */
+/*
+ * Determine if the part with type[/subtype] should be decoded, according to
+ * decodetypes (which came from the -decodetypes switch).
+ */
static int
should_decode(const char *decodetypes, const char *type, const char *subtype) {
/* Quick search for matching type[/subtype] in decodetypes: bracket
}
-/* See if the decoded content is 7bit, 8bit, or binary. It's binary
- if it has any NUL characters, a CR not followed by a LF, or lines
- greater than 998 characters in length. If binary, reason is set
- to a string explaining why. */
+/*
+ * See if the decoded content is 7bit, 8bit, or binary. It's binary
+ * if it has any NUL characters, a CR not followed by a LF, or lines
+ * greater than 998 characters in length. If binary, reason is set
+ * to a string explaining why.
+ */
static int
content_encoding (CT ct, const char **reason) {
CE ce = &ct->c_cefile;
}
+/*
+ * Strip carriage returns from content.
+ */
static int
strip_crs (CT ct, int *message_mods) {
char *charset = content_charset (ct);
}
+/*
+ * Add/update, if necessary, the message C-T-E, based on the least restrictive
+ * of the part C-T-E's.
+ */
+static void
+update_cte (CT ct) {
+ const int least_restrictive_enc = least_restrictive_encoding (ct);
+
+ if (least_restrictive_enc != CE_UNKNOWN &&
+ least_restrictive_enc != CE_7BIT) {
+ char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
+ HF hf;
+ int found_cte = 0;
+
+ /* Update/add Content-Transfer-Encoding header field. */
+ for (hf = ct->c_first_hf; hf; hf = hf->next) {
+ if (! strcasecmp (ENCODING_FIELD, hf->name)) {
+ found_cte = 1;
+ free (hf->value);
+ hf->value = cte;
+ }
+ }
+ if (! found_cte) {
+ add_header (ct, add (ENCODING_FIELD, NULL), cte);
+ }
+ }
+}
+
+
+/*
+ * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
+ * within a message.
+ */
+static int
+least_restrictive_encoding (CT ct) {
+ int encoding = CE_UNKNOWN;
+
+ switch (ct->c_type) {
+ case CT_MULTIPART: {
+ struct multipart *m = (struct multipart *) ct->c_ctparams;
+ struct part *part;
+
+ for (part = m->mp_parts; part; part = part->mp_next) {
+ const int part_encoding =
+ least_restrictive_encoding (part->mp_part);
+
+ if (less_restrictive (encoding, part_encoding)) {
+ encoding = part_encoding;
+ }
+ }
+ break;
+ }
+
+ case CT_MESSAGE:
+ if (ct->c_subtype == MESSAGE_EXTERNAL) {
+ struct exbody *e = (struct exbody *) ct->c_ctparams;
+ const int part_encoding =
+ least_restrictive_encoding (e->eb_content);
+
+ if (less_restrictive (encoding, part_encoding)) {
+ encoding = part_encoding;
+ }
+ }
+ break;
+
+ default: {
+ if (less_restrictive (encoding, ct->c_encoding)) {
+ encoding = ct->c_encoding;
+ }
+ }}
+
+ return encoding;
+}
+
+
+/*
+ * Return whether the second encoding is less restrictive than the first, where
+ * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
+ * CE_BINARY is less restrictive than CE_8BIT and
+ * CE_8BIT is less restrictive than CE_7BIT.
+ */
+static int
+less_restrictive (int encoding, int second_encoding) {
+ switch (second_encoding) {
+ case CE_BINARY:
+ return encoding != CE_BINARY;
+ case CE_8BIT:
+ return encoding != CE_BINARY && encoding != CE_8BIT;
+ case CE_7BIT:
+ return encoding != CE_BINARY && encoding != CE_8BIT &&
+ encoding != CE_7BIT;
+ default :
+ return 0;
+ }
+}
+
+
+/*
+ * Convert character set of each part.
+ */
static int
convert_charsets (CT ct, char *dest_charset, int *message_mods) {
int status = OK;
/*
* Fix various problems that aren't handled elsewhere. These
* are fixed unconditionally: there are no switches to disable
- * them. (Currently, "problems" is just one: an extraneous
- * semicolon at the end of a header parameter list.)
+ * them. Currently, "problems" are these:
+ * 1) remove extraneous semicolon at the end of a header parameter list
+ * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
+ * filename parameters in Content-Type and Content-Disposition
+ * headers, respectively.
*/
static int
fix_always (CT ct, int *message_mods) {
default: {
HF hf;
+ if (ct->c_first_hf) {
+ fix_filename_encoding (ct);
+ }
+
for (hf = ct->c_first_hf; hf; hf = hf->next) {
size_t len = strlen (hf->value);
}
+/*
+ * Factor out common code for loops in fix_filename_encoding().
+ */
+static int
+fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
+ size_t value_len;
+ int fixed = 0;
+
+ if (((value_len = strlen (value)) > 0) &&
+ strncmp (value, "=?", 2) == 0 &&
+ strncmp (&value[value_len - 2], "?=", 2) == 0) {
+ /* Looks like an RFC 2047 encoded parameter. */
+ char decoded[PATH_MAX + 1];
+
+ if (decode_rfc2047 (value, decoded, sizeof decoded)) {
+ /* Encode using RFC 2231. */
+ replace_param (first_pm, last_pm, name, decoded, 0);
+ fixed = 1;
+ } else {
+ advise (NULL, "failed to decode %s parameter %s", name, value);
+ }
+ }
+
+ return fixed;
+}
+
+
+/*
+ * Replace RFC 2047 encoding with RFC 2231 encoding of name and
+ * filename parameters in Content-Type and Content-Disposition
+ * headers, respectively.
+ */
+static int
+fix_filename_encoding (CT ct) {
+ PM pm;
+ HF hf;
+ int fixed = 0;
+
+ for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
+ if (pm->pm_name && pm->pm_value &&
+ strcasecmp (pm->pm_name, "name") == 0) {
+ fixed = fix_filename_param (pm->pm_name, pm->pm_value,
+ &ct->c_ctinfo.ci_first_pm,
+ &ct->c_ctinfo.ci_last_pm);
+ }
+ }
+
+ for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
+ if (pm->pm_name && pm->pm_value &&
+ strcasecmp (pm->pm_name, "filename") == 0) {
+ fixed = fix_filename_param (pm->pm_name, pm->pm_value,
+ &ct->c_dispo_first,
+ &ct->c_dispo_last);
+ }
+ }
+
+ /* Fix hf values to correspond. */
+ for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
+ enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
+
+ if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
+ field = TYPE_HEADER;
+ } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
+ field = DISPO_HEADER;
+ }
+
+ if (field != OTHER) {
+ const char *const semicolon_loc = strchr (hf->value, ';');
+
+ if (semicolon_loc) {
+ const size_t len =
+ strlen (hf->name) + 1 + semicolon_loc - hf->value;
+ const char *const params =
+ output_params (len,
+ field == TYPE_HEADER
+ ? ct->c_ctinfo.ci_first_pm
+ : ct->c_dispo_first,
+ NULL, 0);
+ const char *const new_params = concat (params, "\n", NULL);
+
+ replace_substring (&hf->value, semicolon_loc, new_params);
+ free ((char *) new_params);
+ free ((char *) params);
+ } else {
+ advise (NULL, "did not find semicolon in %s:%s\n",
+ hf->name, hf->value);
+ }
+ }
+ }
+
+ return OK;
+}
+
+
+/*
+ * Output content in input file to output file.
+ */
static int
-write_content (CT ct, char *input_filename, char *outfile, int modify_inplace,
+write_content (CT ct, const char *input_filename, char *outfile, int modify_inplace,
int message_mods) {
int status = OK;
}
+/*
+ * parse_mime() does not set lf_line_endings in struct text, so use this
+ * function to do it. It touches the parts the decodetypes identifies.
+ */
+static void
+set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
+ switch (ct->c_type) {
+ case CT_MULTIPART: {
+ struct multipart *m = (struct multipart *) ct->c_ctparams;
+ struct part *part;
+
+ for (part = m->mp_parts; part; part = part->mp_next) {
+ set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
+ }
+ break;
+ }
+
+ case CT_MESSAGE:
+ if (ct->c_subtype == MESSAGE_EXTERNAL) {
+ struct exbody *e = (struct exbody *) ct->c_ctparams;
+
+ set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
+ }
+ break;
+
+ default:
+ if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
+ if (ct->c_ctparams == NULL) {
+ if ((ct->c_ctparams = (struct text *) mh_xcalloc (1, sizeof (struct text))) == NULL) {
+ adios (NULL, "out of memory");
+ }
+ }
+ ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
+ }
+ }
+}
+
+
/*
* If "rmmproc" is defined, call that to remove the file. Otherwise,
* use the standard MH backup file.
*/
static int
-remove_file (char *file) {
+remove_file (const char *file) {
if (rmmproc) {
char *rmm_command = concat (rmmproc, " ", file, NULL);
int status = system (rmm_command);
}
+/*
+ * Output formatted message to user.
+ */
static void
report (char *what, char *partno, char *filename, char *message, ...) {
va_list args;