From: David Levine Date: Sat, 8 Feb 2014 17:11:25 +0000 (-0600) Subject: Moved upcase(), update_attr(), content_charset(), and X-Git-Url: https://diplodocus.org/git/nmh/commitdiff_plain/f2e710b193928f3e70f48580276498a2debe809b?hp=e8f4ffff12b6a31e49008d12bf89fc752a8c7e03 Moved upcase(), update_attr(), content_charset(), and convert_charset() out of mhfixmsg.c so that other programs can use them. --- diff --git a/Makefile.am b/Makefile.am index bdcca089..5bc612c8 100644 --- a/Makefile.am +++ b/Makefile.am @@ -334,7 +334,7 @@ uip_mhlist_LDADD = $(LDADD) $(TERMLIB) $(POSTLINK) uip_mhn_SOURCES = uip/mhn.c uip/mhparse.c uip/mhcachesbr.c uip/mhshowsbr.c \ uip/mhlistsbr.c uip/mhstoresbr.c uip/mhmisc.c uip/mhfree.c \ uip/md5.c -uip_mhn_LDADD = $(LDADD) $(TERMLIB) $(POSTLINK) +uip_mhn_LDADD = $(LDADD) $(TERMLIB) $(ICONVLIB) $(POSTLINK) uip_mhparam_SOURCES = uip/mhparam.c uip_mhparam_LDADD = $(LDADD) $(POSTLINK) @@ -345,12 +345,12 @@ uip_mhpath_LDADD = $(LDADD) $(POSTLINK) uip_mhshow_SOURCES = uip/mhshow.c uip/mhparse.c uip/mhcachesbr.c \ uip/mhshowsbr.c uip/mhlistsbr.c uip/mhmisc.c \ uip/mhfree.c uip/md5.c -uip_mhshow_LDADD = $(LDADD) $(TERMLIB) $(POSTLINK) +uip_mhshow_LDADD = $(LDADD) $(TERMLIB) $(ICONVLIB) $(POSTLINK) uip_mhstore_SOURCES = uip/mhstore.c uip/mhparse.c uip/mhcachesbr.c \ uip/mhshowsbr.c uip/mhlistsbr.c uip/mhstoresbr.c \ uip/mhmisc.c uip/mhfree.c uip/md5.c -uip_mhstore_LDADD = $(LDADD) $(TERMLIB) $(POSTLINK) +uip_mhstore_LDADD = $(LDADD) $(TERMLIB) $(ICONVLIB) $(POSTLINK) uip_msgchk_SOURCES = uip/msgchk.c uip/popsbr.c uip_msgchk_LDADD = $(LDADD) $(SASLLIB) $(POSTLINK) diff --git a/h/mhparse.h b/h/mhparse.h index bb55153f..72188c6d 100644 --- a/h/mhparse.h +++ b/h/mhparse.h @@ -327,5 +327,8 @@ const struct str2init *get_ct_init (int); const char *ce_str (int); const struct str2init *get_ce_method (const char *); int parse_header_attrs (const char *, int, char **, CI, int *); +char *update_attr (char *, const char *, const char *e); +char *content_charset (CT); +int convert_charset (CT, char *, int *); extern int checksw; /* Add Content-MD5 field */ diff --git a/h/prototypes.h b/h/prototypes.h index 1ff92f64..621146c3 100644 --- a/h/prototypes.h +++ b/h/prototypes.h @@ -46,6 +46,7 @@ void cpydgst (int, int, char *, char *); char *cpytrim (const char *); int decode_rfc2047 (char *, char *, size_t); void discard (FILE *); +char *upcase (const char *); int default_done (int); /* diff --git a/sbr/utils.c b/sbr/utils.c index d8b060fb..59ab7c72 100644 --- a/sbr/utils.c +++ b/sbr/utils.c @@ -352,3 +352,16 @@ nmh_init(const char *argv0, int read_context) { return status; } } + + +/* Returns copy of argument str with all characters converted to upper + case, and trimmed whitespace (see cpytrim()) . */ +char * +upcase (const char *str) { + char *up = cpytrim (str); + char *cp; + + for (cp = up; *cp; ++cp) { *cp = toupper ((unsigned char) *cp); } + + return up; +} diff --git a/uip/mhfixmsg.c b/uip/mhfixmsg.c index 20162508..4c0a3ef5 100644 --- a/uip/mhfixmsg.c +++ b/uip/mhfixmsg.c @@ -12,9 +12,6 @@ #include #include #include -#ifdef HAVE_ICONV -# include -#endif #define MHFIXMSG_SWITCHES \ X("decodetext 8bit|7bit", 0, DECODETEXTSW) \ @@ -89,7 +86,6 @@ static void reverse_alternative_parts (CT); static int fix_boundary (CT *, int *); static int get_multipart_boundary (CT, char **); static int replace_boundary (CT, char *, const char *); -static char *update_attr (char *, const char *, const char *e); static int fix_multipart_cte (CT, int *); static int set_ce (CT, int); static int ensure_text_plain (CT *, CT, int *, int); @@ -107,12 +103,9 @@ static int decode_text_parts (CT, int, int *); static int content_encoding (CT); static int strip_crs (CT, int *); static int convert_codesets (CT, char *, int *); -static int convert_codeset (CT, char *, int *); -static char *content_codeset (CT); static int write_content (CT, char *, char *, int, int); static int remove_file (char *); static void report (char *, char *, char *, ...); -static char *upcase (char *); static void pipeser (int); @@ -718,44 +711,6 @@ replace_boundary (CT ct, char *file, const char *boundary) { } -/* Change the value of a name=value pair in a header field body. - If the name isn't there, append them. In any case, a new - string will be allocated and must be free'd by the caller. - Trims any trailing newlines. */ -static char * -update_attr (char *body, const char *name, const char *value) { - char *bp = nmh_strcasestr (body, name); - char *new_body; - - if (bp) { - char *other_attrs = strchr (bp, ';'); - - *(bp + strlen (name)) = '\0'; - new_body = concat (body, "\"", value, "\"", NULL); - - if (other_attrs) { - char *cp; - - /* Trim any trailing newlines. */ - for (cp = &other_attrs[strlen (other_attrs) - 1]; - cp > other_attrs && *cp == '\n'; - *cp-- = '\0') continue; - new_body = add (other_attrs, new_body); - } - } else { - char *cp; - - /* Append name/value pair, after first removing a final newline - and (extraneous) semicolon. */ - if (*(cp = &body[strlen (body) - 1]) == '\n') *cp = '\0'; - if (*(cp = &body[strlen (body) - 1]) == ';') *cp = '\0'; - new_body = concat (body, "; ", name, "\"", value, "\"", NULL); - } - - return new_body; -} - - static int fix_multipart_cte (CT ct, int *message_mods) { int status = OK; @@ -1196,7 +1151,7 @@ reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) { static int charset_encoding (CT ct) { /* norm_charmap() is case sensitive. */ - char *codeset = upcase (content_codeset (ct)); + char *codeset = upcase (content_charset (ct)); int encoding = strcmp (norm_charmap (codeset), "US-ASCII") ? CE_8BIT : CE_7BIT; @@ -1617,7 +1572,7 @@ content_encoding (CT ct) { static int strip_crs (CT ct, int *message_mods) { /* norm_charmap() is case sensitive. */ - char *codeset = upcase (content_codeset (ct)); + char *codeset = upcase (content_charset (ct)); int status = OK; /* Only strip carriage returns if content is ASCII or another @@ -1757,29 +1712,6 @@ strip_crs (CT ct, int *message_mods) { } -char * -content_codeset (CT ct) { - const char *const charset = "charset"; - char *default_codeset = NULL; - CI ctinfo = &ct->c_ctinfo; - char **ap, **vp; - char **src_codeset = NULL; - - for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) { - if (! strcasecmp (*ap, charset)) { - src_codeset = vp; - break; - } - } - - /* RFC 2045, Sec. 5.2: default to us-ascii. */ - if (src_codeset == NULL) src_codeset = &default_codeset; - if (*src_codeset == NULL) *src_codeset = "US-ASCII"; - - return *src_codeset; -} - - static int convert_codesets (CT ct, char *dest_codeset, int *message_mods) { int status = OK; @@ -1787,7 +1719,11 @@ convert_codesets (CT ct, char *dest_codeset, int *message_mods) { switch (ct->c_type) { case CT_TEXT: if (ct->c_subtype == TEXT_PLAIN) { - status = convert_codeset (ct, dest_codeset, message_mods); + status = convert_charset (ct, dest_codeset, message_mods); + if (verbosw && status == OK) { + report (ct->c_partno, ct->c_file, "convert %s to %s", + content_charset(ct), dest_codeset); + } } break; @@ -1822,161 +1758,6 @@ convert_codesets (CT ct, char *dest_codeset, int *message_mods) { } -static int -convert_codeset (CT ct, char *dest_codeset, int *message_mods) { - char *src_codeset = content_codeset (ct); - int status = OK; - - /* norm_charmap() is case sensitive. */ - char *src_codeset_u = upcase (src_codeset); - char *dest_codeset_u = upcase (dest_codeset); - int different_codesets = - strcmp (norm_charmap (src_codeset), norm_charmap (dest_codeset)); - - free (dest_codeset_u); - free (src_codeset_u); - - if (different_codesets) { -#ifdef HAVE_ICONV - iconv_t conv_desc = NULL; - char *dest; - int fd = -1; - char **file = NULL; - FILE **fp = NULL; - size_t begin; - size_t end; - int opened_input_file = 0; - char src_buffer[BUFSIZ]; - HF hf; - char *tempfile; - - if ((conv_desc = iconv_open (dest_codeset, src_codeset)) == - (iconv_t) -1) { - advise (NULL, "Can't convert %s to %s", src_codeset, dest_codeset); - return -1; - } - - if ((tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) { - adios (NULL, "unable to create temporary file in %s", - get_temp_dir()); - } - dest = add (tempfile, NULL); - - if (ct->c_cefile.ce_file) { - file = &ct->c_cefile.ce_file; - fp = &ct->c_cefile.ce_fp; - begin = end = 0; - } else if (ct->c_file) { - file = &ct->c_file; - fp = &ct->c_fp; - begin = (size_t) ct->c_begin; - end = (size_t) ct->c_end; - } /* else no input file: shouldn't happen */ - - if (file && *file && fp) { - if (! *fp) { - if ((*fp = fopen (*file, "r")) == NULL) { - advise (*file, "unable to open for reading"); - status = NOTOK; - } else { - opened_input_file = 1; - } - } - } - - if (fp && *fp) { - size_t inbytes; - size_t bytes_to_read = - end > 0 && end > begin ? end - begin : sizeof src_buffer; - - fseeko (*fp, begin, SEEK_SET); - while ((inbytes = fread (src_buffer, 1, - min (bytes_to_read, sizeof src_buffer), - *fp)) > 0) { - char dest_buffer[BUFSIZ]; - ICONV_CONST char *ib = src_buffer; - char *ob = dest_buffer; - size_t outbytes = sizeof dest_buffer; - size_t outbytes_before = outbytes; - - if (end > 0) bytes_to_read -= inbytes; - - if (iconv (conv_desc, &ib, &inbytes, &ob, &outbytes) == - (size_t) -1) { - status = NOTOK; - break; - } else { - write (fd, dest_buffer, outbytes_before - outbytes); - } - } - - if (opened_input_file) { - fclose (*fp); - *fp = NULL; - } - } - - iconv_close (conv_desc); - close (fd); - - if (status == OK) { - /* Replace the decoded file with the converted one. */ - if (ct->c_cefile.ce_file) { - if (ct->c_cefile.ce_unlink) { - (void) m_unlink (ct->c_cefile.ce_file); - } - free (ct->c_cefile.ce_file); - } - ct->c_cefile.ce_file = dest; - ct->c_cefile.ce_unlink = 1; - - ++*message_mods; - if (verbosw) { - report (ct->c_partno, ct->c_file, "convert %s to %s", - src_codeset, dest_codeset); - } - - /* Update ci_attrs. */ - src_codeset = dest_codeset; - - /* Update ct->c_ctline. */ - if (ct->c_ctline) { - char *ctline = - update_attr (ct->c_ctline, "charset=", dest_codeset); - - free (ct->c_ctline); - ct->c_ctline = ctline; - } /* else no CT line, which is odd */ - - /* Update Content-Type header field. */ - for (hf = ct->c_first_hf; hf; hf = hf->next) { - if (! strcasecmp (TYPE_FIELD, hf->name)) { - char *ctline_less_newline = - update_attr (hf->value, "charset=", dest_codeset); - char *ctline = concat (ctline_less_newline, "\n", NULL); - free (ctline_less_newline); - - free (hf->value); - hf->value = ctline; - break; - } - } - } else { - (void) m_unlink (dest); - } -#else /* ! HAVE_ICONV */ - NMH_UNUSED (message_mods); - - advise (NULL, "Can't convert %s to %s without iconv", src_codeset, - dest_codeset); - status = NOTOK; -#endif /* ! HAVE_ICONV */ - } - - return status; -} - - static int write_content (CT ct, char *input_filename, char *outfile, int modify_inplace, int message_mods) { @@ -2090,17 +1871,6 @@ report (char *partno, char *filename, char *message, ...) { } -static char * -upcase (char *str) { - char *up = cpytrim (str); - char *cp; - - for (cp = up; *cp; ++cp) *cp = toupper ((unsigned char) *cp); - - return up; -} - - static void pipeser (int i) { diff --git a/uip/mhparse.c b/uip/mhparse.c index 8af26906..9873e0b1 100644 --- a/uip/mhparse.c +++ b/uip/mhparse.c @@ -3351,3 +3351,64 @@ bad_quote: *header_attrp = cp; return OK; } + + +char * +content_charset (CT ct) { + const char *const charset = "charset"; + char *default_charset = NULL; + CI ctinfo = &ct->c_ctinfo; + char **ap, **vp; + char **src_charset = NULL; + + for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) { + if (! strcasecmp (*ap, charset)) { + src_charset = vp; + break; + } + } + + /* RFC 2045, Sec. 5.2: default to us-ascii. */ + if (src_charset == NULL) src_charset = &default_charset; + if (*src_charset == NULL) *src_charset = "US-ASCII"; + + return *src_charset; +} + + +/* Change the value of a name=value pair in a header field body. + If the name isn't there, append them. In any case, a new + string will be allocated and must be free'd by the caller. + Trims any trailing newlines. */ +char * +update_attr (char *body, const char *name, const char *value) { + char *bp = nmh_strcasestr (body, name); + char *new_body; + + if (bp) { + char *other_attrs = strchr (bp, ';'); + + *(bp + strlen (name)) = '\0'; + new_body = concat (body, "\"", value, "\"", NULL); + + if (other_attrs) { + char *cp; + + /* Trim any trailing newlines. */ + for (cp = &other_attrs[strlen (other_attrs) - 1]; + cp > other_attrs && *cp == '\n'; + *cp-- = '\0') continue; + new_body = add (other_attrs, new_body); + } + } else { + char *cp; + + /* Append name/value pair, after first removing a final newline + and (extraneous) semicolon. */ + if (*(cp = &body[strlen (body) - 1]) == '\n') *cp = '\0'; + if (*(cp = &body[strlen (body) - 1]) == ';') *cp = '\0'; + new_body = concat (body, "; ", name, "\"", value, "\"", NULL); + } + + return new_body; +} diff --git a/uip/mhshowsbr.c b/uip/mhshowsbr.c index fe0e3304..6f7f1ee6 100644 --- a/uip/mhshowsbr.c +++ b/uip/mhshowsbr.c @@ -17,6 +17,9 @@ #include #include #include +#ifdef HAVE_ICONV +# include +#endif extern int debugsw; @@ -1063,6 +1066,157 @@ show_external (CT ct, int serial, int alternate) } +int +convert_charset (CT ct, char *dest_charset, int *message_mods) { + char *src_charset = content_charset (ct); + int status = OK; + + /* norm_charmap() is case sensitive. */ + char *src_charset_u = upcase (src_charset); + char *dest_charset_u = upcase (dest_charset); + int different_charsets = + strcmp (norm_charmap (src_charset), norm_charmap (dest_charset)); + + free (dest_charset_u); + free (src_charset_u); + + if (different_charsets) { +#ifdef HAVE_ICONV + iconv_t conv_desc = NULL; + char *dest; + int fd = -1; + char **file = NULL; + FILE **fp = NULL; + size_t begin; + size_t end; + int opened_input_file = 0; + char src_buffer[BUFSIZ]; + HF hf; + char *tempfile; + + if ((conv_desc = iconv_open (dest_charset, src_charset)) == + (iconv_t) -1) { + advise (NULL, "Can't convert %s to %s", src_charset, dest_charset); + return NOTOK; + } + + if ((tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) { + adios (NULL, "unable to create temporary file in %s", + get_temp_dir()); + } + dest = add (tempfile, NULL); + + if (ct->c_cefile.ce_file) { + file = &ct->c_cefile.ce_file; + fp = &ct->c_cefile.ce_fp; + begin = end = 0; + } else if (ct->c_file) { + file = &ct->c_file; + fp = &ct->c_fp; + begin = (size_t) ct->c_begin; + end = (size_t) ct->c_end; + } /* else no input file: shouldn't happen */ + + if (file && *file && fp) { + if (! *fp) { + if ((*fp = fopen (*file, "r")) == NULL) { + advise (*file, "unable to open for reading"); + status = NOTOK; + } else { + opened_input_file = 1; + } + } + } + + if (fp && *fp) { + size_t inbytes; + size_t bytes_to_read = + end > 0 && end > begin ? end - begin : sizeof src_buffer; + + fseeko (*fp, begin, SEEK_SET); + while ((inbytes = fread (src_buffer, 1, + min (bytes_to_read, sizeof src_buffer), + *fp)) > 0) { + char dest_buffer[BUFSIZ]; + ICONV_CONST char *ib = src_buffer; + char *ob = dest_buffer; + size_t outbytes = sizeof dest_buffer; + size_t outbytes_before = outbytes; + + if (end > 0) bytes_to_read -= inbytes; + + if (iconv (conv_desc, &ib, &inbytes, &ob, &outbytes) == + (size_t) -1) { + status = NOTOK; + break; + } else { + write (fd, dest_buffer, outbytes_before - outbytes); + } + } + + if (opened_input_file) { + fclose (*fp); + *fp = NULL; + } + } + + iconv_close (conv_desc); + close (fd); + + if (status == OK) { + /* Replace the decoded file with the converted one. */ + if (ct->c_cefile.ce_file) { + if (ct->c_cefile.ce_unlink) { + (void) m_unlink (ct->c_cefile.ce_file); + } + free (ct->c_cefile.ce_file); + } + ct->c_cefile.ce_file = dest; + ct->c_cefile.ce_unlink = 1; + + ++*message_mods; + + /* Update ci_attrs. */ + src_charset = dest_charset; + + /* Update ct->c_ctline. */ + if (ct->c_ctline) { + char *ctline = + update_attr (ct->c_ctline, "charset=", dest_charset); + + free (ct->c_ctline); + ct->c_ctline = ctline; + } /* else no CT line, which is odd */ + + /* Update Content-Type header field. */ + for (hf = ct->c_first_hf; hf; hf = hf->next) { + if (! strcasecmp (TYPE_FIELD, hf->name)) { + char *ctline_less_newline = + update_attr (hf->value, "charset=", dest_charset); + char *ctline = concat (ctline_less_newline, "\n", NULL); + free (ctline_less_newline); + + free (hf->value); + hf->value = ctline; + break; + } + } + } else { + (void) m_unlink (dest); + } +#else /* ! HAVE_ICONV */ + NMH_UNUSED (message_mods); + + advise (NULL, "Can't convert %s to %s without iconv", src_charset, + dest_charset); + status = NOTOK; +#endif /* ! HAVE_ICONV */ + } + + return status; +} + + static void intrser (int i) {