#include <h/utils.h>
#include <h/signals.h>
#include <fcntl.h>
-#ifdef HAVE_ICONV
-# include <iconv.h>
-#endif
#define MHFIXMSG_SWITCHES \
X("decodetext 8bit|7bit", 0, DECODETEXTSW) \
X("nodecodetext", 0, NDECODETEXTSW) \
- X("textcodeset", 0, TEXTCODESETSW) \
- X("notextcodeset", 0, NTEXTCODESETSW) \
+ X("textcharset", 0, TEXTCHARSETSW) \
+ X("notextcharset", 0, NTEXTCHARSETSW) \
X("reformat", 0, REFORMATSW) \
X("noreformat", 0, NREFORMATSW) \
X("replacetextplain", 0, REPLACETEXTPLAINSW) \
int reformat;
int replacetextplain;
int decodetext;
- char *textcodeset;
+ char *textcharset;
} fix_transformations;
int mhfixmsgsbr (CT *, const fix_transformations *, char *);
static void reverse_alternative_parts (CT);
static int fix_boundary (CT *, int *);
static int get_multipart_boundary (CT, char **);
-static int replace_boundary (CT, char *, const char *);
-static char *update_attr (char *, const char *, const char *e);
+static int replace_boundary (CT, char *, char *);
static int fix_multipart_cte (CT, int *);
static int set_ce (CT, int);
static int ensure_text_plain (CT *, CT, int *, int);
static void transfer_noncontent_headers (CT, CT);
static int set_ct_type (CT, int type, int subtype, int encoding);
static int decode_text_parts (CT, int, int *);
-static int content_encoding (CT);
+static int content_encoding (CT, const char **);
static int strip_crs (CT, int *);
-static int convert_codesets (CT, char *, int *);
-static int convert_codeset (CT, char *, int *);
-static char *content_codeset (CT);
+static int convert_charsets (CT, char *, int *);
static int write_content (CT, char *, char *, int, int);
static int remove_file (char *);
static void report (char *, char *, char *, ...);
-static char *upcase (char *);
static void pipeser (int);
fx.reformat = fx.fixcte = fx.fixboundary = 1;
fx.replacetextplain = 0;
fx.decodetext = CE_8BIT;
- fx.textcodeset = NULL;
+ fx.textcharset = NULL;
if (nmh_init(argv[0], 1)) { return 1; }
case NDECODETEXTSW:
fx.decodetext = 0;
continue;
- case TEXTCODESETSW:
+ case TEXTCHARSETSW:
if (! (cp = *argp++) || (*cp == '-' && cp[1]))
adios (NULL, "missing argument to %s", argp[-2]);
- fx.textcodeset = cp;
+ fx.textcharset = cp;
continue;
- case NTEXTCODESETSW:
- fx.textcodeset = 0;
+ case NTEXTCHARSETSW:
+ fx.textcharset = 0;
continue;
case FIXBOUNDARYSW:
fx.fixboundary = 1;
if (status == OK && fx->decodetext) {
status = decode_text_parts (*ctp, fx->decodetext, &message_mods);
}
- if (status == OK && fx->textcodeset != NULL) {
- status = convert_codesets (*ctp, fx->textcodeset, &message_mods);
+ if (status == OK && fx->textcharset != NULL) {
+ status = convert_charsets (*ctp, fx->textcharset, &message_mods);
}
if (! (*ctp)->c_umask) {
/* Open and copy ct->c_file to file, replacing the multipart boundary. */
static int
-replace_boundary (CT ct, char *file, const char *boundary) {
+replace_boundary (CT ct, char *file, char *boundary) {
FILE *fpin, *fpout;
int compnum, state;
char buf[BUFSIZ], name[NAMESZ];
if (strcasecmp (TYPE_FIELD, np)) {
fprintf (fpout, "%s:%s", np, vp);
} else {
- char *new_boundary = update_attr (vp, "boundary=", boundary);
-
- fprintf (fpout, "%s:%s\n", np, new_boundary);
- free (new_boundary);
+ char *new_ctline, *new_params;
+
+ replace_param(&ct->c_ctinfo.ci_first_pm,
+ &ct->c_ctinfo.ci_last_pm, "boundary",
+ boundary, 0);
+
+ new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
+ ct->c_ctinfo.ci_subtype, NULL);
+ new_params = output_params(strlen(TYPE_FIELD) +
+ strlen(new_ctline) + 1,
+ ct->c_ctinfo.ci_first_pm, NULL, 0);
+ fprintf (fpout, "%s:%s%s\n", np, new_ctline,
+ new_params ? new_params : "");
+ free(new_ctline);
+ if (new_params)
+ free(new_params);
}
free (vp);
}
-/* Change the value of a name=value pair in a header field body.
- If the name isn't there, append them. In any case, a new
- string will be allocated and must be free'd by the caller.
- Trims any trailing newlines. */
-static char *
-update_attr (char *body, const char *name, const char *value) {
- char *bp = nmh_strcasestr (body, name);
- char *new_body;
-
- if (bp) {
- char *other_attrs = strchr (bp, ';');
-
- *(bp + strlen (name)) = '\0';
- new_body = concat (body, "\"", value, "\"", NULL);
-
- if (other_attrs) {
- char *cp;
-
- /* Trim any trailing newlines. */
- for (cp = &other_attrs[strlen (other_attrs) - 1];
- cp > other_attrs && *cp == '\n';
- *cp-- = '\0') continue;
- new_body = add (other_attrs, new_body);
- }
- } else {
- char *cp;
-
- /* Append name/value pair, after first removing a final newline
- and (extraneous) semicolon. */
- if (*(cp = &body[strlen (body) - 1]) == '\n') *cp = '\0';
- if (*(cp = &body[strlen (body) - 1]) == ';') *cp = '\0';
- new_body = concat (body, "; ", name, "\"", value, "\"", NULL);
- }
-
- return new_body;
-}
-
-
static int
fix_multipart_cte (CT ct, int *message_mods) {
int status = OK;
static void
copy_ctinfo (CI dest, CI src) {
- char **s_ap, **d_ap, **s_vp, **d_vp;
+ PM s_pm, d_pm;
dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL;
dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL;
- for (s_ap = src->ci_attrs, d_ap = dest->ci_attrs,
- s_vp = src->ci_values, d_vp = dest->ci_values;
- *s_ap;
- ++s_ap, ++d_ap, ++s_vp, ++d_vp) {
- *d_ap = add (*s_ap, NULL);
- *d_vp = *s_vp;
+ for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
+ d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
+ s_pm->pm_value, 0);
+ if (s_pm->pm_charset)
+ d_pm->pm_charset = getcpy(s_pm->pm_charset);
+ if (s_pm->pm_lang)
+ d_pm->pm_lang = getcpy(s_pm->pm_lang);
}
- *d_ap = NULL;
dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL;
dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL;
static int
charset_encoding (CT ct) {
/* norm_charmap() is case sensitive. */
- char *codeset = upcase (content_codeset (ct));
+ char *charset = upcase (content_charset (ct));
int encoding =
- strcmp (norm_charmap (codeset), "US-ASCII") ? CE_8BIT : CE_7BIT;
+ strcmp (norm_charmap (charset), "US-ASCII") ? CE_8BIT : CE_7BIT;
- free (codeset);
+ free (charset);
return encoding;
}
CT ct;
struct part *p;
struct multipart *m;
- char *cp;
const struct str2init *ctinit;
if ((ct = (CT) calloc (1, sizeof *ct)) == NULL)
ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
}
- name = concat (" ", typename, "/", subtypename, boundary_indicator,
- boundary, NULL);
- if ((cp = strstr (name, boundary_indicator))) {
- ct->c_ctinfo.ci_attrs[0] = name;
- ct->c_ctinfo.ci_attrs[1] = NULL;
- /* ci_values don't get free'd, so point into ci_attrs. */
- ct->c_ctinfo.ci_values[0] = cp + strlen (boundary_indicator);
- }
+ add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
+ "boundary", boundary, 0);
p = (struct part *) mh_xmalloc (sizeof *p);
p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next);
int ct_encoding;
if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
- if ((ct_encoding = content_encoding (ct)) == CE_BINARY &&
- encoding != CE_BINARY) {
+ const char *reason = NULL;
+
+ if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
+ && encoding != CE_BINARY) {
/* The decoding isn't acceptable so discard it.
Leave status as OK to allow other transformations. */
if (verbosw) {
report (ct->c_partno, ct->c_file,
- "will not decode%s because it is binary",
+ "will not decode%s because it is binary (%s)",
ct->c_partno ? ""
: ct->c_ctline ? ct->c_ctline
- : "");
+ : "",
+ reason);
}
(void) m_unlink (ct->c_cefile.ce_file);
free (ct->c_cefile.ce_file);
/* See if the decoded content is 7bit, 8bit, or binary. It's binary
if it has any NUL characters, a CR not followed by a LF, or lines
- greater than 998 characters in length. */
+ greater than 998 characters in length. If binary, reason is set
+ to a string explaining why. */
static int
-content_encoding (CT ct) {
+content_encoding (CT ct, const char **reason) {
CE ce = &ct->c_cefile;
int encoding = CE_7BIT;
if (*cp == '\0' || ++line_len > 998 ||
(*cp != '\n' && last_char_was_cr)) {
encoding = CE_BINARY;
+ if (*cp == '\0') {
+ *reason = "null character";
+ } else if (line_len > 998) {
+ *reason = "line length > 998";
+ } else if (*cp != '\n' && last_char_was_cr) {
+ *reason = "CR not followed by LF";
+ } else {
+ /* Should not reach this. */
+ *reason = "";
+ }
break;
} else if (*cp == '\n') {
line_len = 0;
static int
strip_crs (CT ct, int *message_mods) {
/* norm_charmap() is case sensitive. */
- char *codeset = upcase (content_codeset (ct));
+ char *charset = upcase (content_charset (ct));
int status = OK;
/* Only strip carriage returns if content is ASCII or another
- codeset that has the same readily recognizable CR followed by a
+ charset that has the same readily recognizable CR followed by a
LF. We can include UTF-8 here because if the high-order bit of
a UTF-8 byte is 0, then it must be a single-byte ASCII
character. */
- if (! strcmp (norm_charmap (codeset), "US-ASCII") ||
- ! strncmp (norm_charmap (codeset), "ISO-8859-", 9) ||
- ! strncmp (norm_charmap (codeset), "UTF-8", 5) ||
- ! strncmp (norm_charmap (codeset), "WINDOWS-12", 10)) {
+ if (! strcmp (norm_charmap (charset), "US-ASCII") ||
+ ! strncmp (norm_charmap (charset), "ISO-8859-", 9) ||
+ ! strncmp (norm_charmap (charset), "UTF-8", 5) ||
+ ! strncmp (norm_charmap (charset), "WINDOWS-12", 10)) {
char **file = NULL;
FILE **fp = NULL;
size_t begin;
if (has_crs) {
int fd;
char *stripped_content_file;
- char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
+ char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
if (tempfile == NULL) {
adios (NULL, "unable to create temporary file in %s",
}
}
- free (codeset);
+ free (charset);
return status;
}
-char *
-content_codeset (CT ct) {
- const char *const charset = "charset";
- char *default_codeset = NULL;
- CI ctinfo = &ct->c_ctinfo;
- char **ap, **vp;
- char **src_codeset = NULL;
-
- for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) {
- if (! strcasecmp (*ap, charset)) {
- src_codeset = vp;
- break;
- }
- }
-
- /* RFC 2045, Sec. 5.2: default to us-ascii. */
- if (src_codeset == NULL) src_codeset = &default_codeset;
- if (*src_codeset == NULL) *src_codeset = "US-ASCII";
-
- return *src_codeset;
-}
-
-
static int
-convert_codesets (CT ct, char *dest_codeset, int *message_mods) {
+convert_charsets (CT ct, char *dest_charset, int *message_mods) {
int status = OK;
switch (ct->c_type) {
case CT_TEXT:
if (ct->c_subtype == TEXT_PLAIN) {
- status = convert_codeset (ct, dest_codeset, message_mods);
+ status = convert_charset (ct, dest_charset, message_mods);
+ if (verbosw && status == OK) {
+ report (ct->c_partno, ct->c_file, "convert %s to %s",
+ content_charset(ct), dest_charset);
+ }
}
break;
For now, it gets passed along as-is by InitMultiPart(). */
for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
status =
- convert_codesets (part->mp_part, dest_codeset, message_mods);
+ convert_charsets (part->mp_part, dest_charset, message_mods);
}
break;
}
e = (struct exbody *) ct->c_ctparams;
status =
- convert_codesets (e->eb_content, dest_codeset, message_mods);
+ convert_charsets (e->eb_content, dest_charset, message_mods);
}
break;
}
-static int
-convert_codeset (CT ct, char *dest_codeset, int *message_mods) {
- char *src_codeset = content_codeset (ct);
- int status = OK;
-
- /* norm_charmap() is case sensitive. */
- char *src_codeset_u = upcase (src_codeset);
- char *dest_codeset_u = upcase (dest_codeset);
- int different_codesets =
- strcmp (norm_charmap (src_codeset), norm_charmap (dest_codeset));
-
- free (dest_codeset_u);
- free (src_codeset_u);
-
- if (different_codesets) {
-#ifdef HAVE_ICONV
- iconv_t conv_desc = NULL;
- char *dest;
- int fd = -1;
- char **file = NULL;
- FILE **fp = NULL;
- size_t begin;
- size_t end;
- int opened_input_file = 0;
- char src_buffer[BUFSIZ];
- HF hf;
- char *tempfile;
-
- if ((conv_desc = iconv_open (dest_codeset, src_codeset)) ==
- (iconv_t) -1) {
- advise (NULL, "Can't convert %s to %s", src_codeset, dest_codeset);
- return -1;
- }
-
- if ((tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
- adios (NULL, "unable to create temporary file in %s",
- get_temp_dir());
- }
- dest = add (tempfile, NULL);
-
- if (ct->c_cefile.ce_file) {
- file = &ct->c_cefile.ce_file;
- fp = &ct->c_cefile.ce_fp;
- begin = end = 0;
- } else if (ct->c_file) {
- file = &ct->c_file;
- fp = &ct->c_fp;
- begin = (size_t) ct->c_begin;
- end = (size_t) ct->c_end;
- } /* else no input file: shouldn't happen */
-
- if (file && *file && fp) {
- if (! *fp) {
- if ((*fp = fopen (*file, "r")) == NULL) {
- advise (*file, "unable to open for reading");
- status = NOTOK;
- } else {
- opened_input_file = 1;
- }
- }
- }
-
- if (fp && *fp) {
- size_t inbytes;
- size_t bytes_to_read =
- end > 0 && end > begin ? end - begin : sizeof src_buffer;
-
- fseeko (*fp, begin, SEEK_SET);
- while ((inbytes = fread (src_buffer, 1,
- min (bytes_to_read, sizeof src_buffer),
- *fp)) > 0) {
- char dest_buffer[BUFSIZ];
- ICONV_CONST char *ib = src_buffer;
- char *ob = dest_buffer;
- size_t outbytes = sizeof dest_buffer;
- size_t outbytes_before = outbytes;
-
- if (end > 0) bytes_to_read -= inbytes;
-
- if (iconv (conv_desc, &ib, &inbytes, &ob, &outbytes) ==
- (size_t) -1) {
- status = NOTOK;
- break;
- } else {
- write (fd, dest_buffer, outbytes_before - outbytes);
- }
- }
-
- if (opened_input_file) {
- fclose (*fp);
- *fp = NULL;
- }
- }
-
- iconv_close (conv_desc);
- close (fd);
-
- if (status == OK) {
- /* Replace the decoded file with the converted one. */
- if (ct->c_cefile.ce_file) {
- if (ct->c_cefile.ce_unlink) {
- (void) m_unlink (ct->c_cefile.ce_file);
- }
- free (ct->c_cefile.ce_file);
- }
- ct->c_cefile.ce_file = dest;
- ct->c_cefile.ce_unlink = 1;
-
- ++*message_mods;
- if (verbosw) {
- report (ct->c_partno, ct->c_file, "convert %s to %s",
- src_codeset, dest_codeset);
- }
-
- /* Update ci_attrs. */
- src_codeset = dest_codeset;
-
- /* Update ct->c_ctline. */
- if (ct->c_ctline) {
- char *ctline =
- update_attr (ct->c_ctline, "charset=", dest_codeset);
-
- free (ct->c_ctline);
- ct->c_ctline = ctline;
- } /* else no CT line, which is odd */
-
- /* Update Content-Type header field. */
- for (hf = ct->c_first_hf; hf; hf = hf->next) {
- if (! strcasecmp (TYPE_FIELD, hf->name)) {
- char *ctline_less_newline =
- update_attr (hf->value, "charset=", dest_codeset);
- char *ctline = concat (ctline_less_newline, "\n", NULL);
- free (ctline_less_newline);
-
- free (hf->value);
- hf->value = ctline;
- break;
- }
- }
- } else {
- (void) m_unlink (dest);
- }
-#else /* ! HAVE_ICONV */
- NMH_UNUSED (message_mods);
-
- advise (NULL, "Can't convert %s to %s without iconv", src_codeset,
- dest_codeset);
- status = NOTOK;
-#endif /* ! HAVE_ICONV */
- }
-
- return status;
-}
-
-
static int
write_content (CT ct, char *input_filename, char *outfile, int modify_inplace,
int message_mods) {
}
-static char *
-upcase (char *str) {
- char *up = cpytrim (str);
- char *cp;
-
- for (cp = up; *cp; ++cp) *cp = toupper ((unsigned char) *cp);
-
- return up;
-}
-
-
static void
pipeser (int i)
{