+static int
+strip_crs (CT ct, int *message_mods) {
+ /* norm_charmap() is case sensitive. */
+ char *codeset = upcase (content_codeset (ct));
+ int status = OK;
+
+ /* Only strip carriage returns if content is ASCII or another
+ codeset that has the same readily recognizable CR followed by a
+ LF. We can include UTF-8 here because if the high-order bit of
+ a UTF-8 byte is 0, then it must be a single-byte ASCII
+ character. */
+ if (! strcmp (norm_charmap (codeset), "US-ASCII") ||
+ ! strncmp (norm_charmap (codeset), "ISO-8859-", 9) ||
+ ! strncmp (norm_charmap (codeset), "UTF-8", 5) ||
+ ! strncmp (norm_charmap (codeset), "WINDOWS-12", 10)) {
+ char **file = NULL;
+ FILE **fp = NULL;
+ size_t begin;
+ size_t end;
+ int has_crs = 0;
+ int opened_input_file = 0;
+
+ if (ct->c_cefile.ce_file) {
+ file = &ct->c_cefile.ce_file;
+ fp = &ct->c_cefile.ce_fp;
+ begin = end = 0;
+ } else if (ct->c_file) {
+ file = &ct->c_file;
+ fp = &ct->c_fp;
+ begin = (size_t) ct->c_begin;
+ end = (size_t) ct->c_end;
+ } /* else don't know where the content is */
+
+ if (file && *file && fp) {
+ if (! *fp) {
+ if ((*fp = fopen (*file, "r")) == NULL) {
+ advise (*file, "unable to open for reading");
+ status = NOTOK;
+ } else {
+ opened_input_file = 1;
+ }
+ }
+ }
+
+ if (fp && *fp) {
+ char buffer[BUFSIZ];
+ size_t bytes_read;
+ size_t bytes_to_read =
+ end > 0 && end > begin ? end - begin : sizeof buffer;
+
+ fseeko (*fp, begin, SEEK_SET);
+ while ((bytes_read = fread (buffer, 1,
+ min (bytes_to_read, sizeof buffer),
+ *fp)) > 0) {
+ /* Look for CR followed by a LF. This is supposed to
+ be text so there should be LF's. If not, don't
+ modify the content. */
+ char *cp;
+ size_t i;
+ int last_char_was_cr = 0;
+
+ if (end > 0) bytes_to_read -= bytes_read;
+
+ for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
+ if (*cp == '\n' && last_char_was_cr) {
+ has_crs = 1;
+ break;
+ }
+
+ last_char_was_cr = *cp == '\r' ? 1 : 0;
+ }
+ }
+
+ if (has_crs) {
+ int fd;
+ char *stripped_content_file =
+ add (m_mktemp2 (tmp, invo_name, &fd, NULL), NULL);
+
+ /* Strip each CR before a LF from the content. */
+ fseeko (*fp, begin, SEEK_SET);
+ while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
+ 0) {
+ char *cp;
+ size_t i;
+ int last_char_was_cr = 0;
+
+ for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
+ if (*cp == '\r') {
+ last_char_was_cr = 1;
+ } else if (last_char_was_cr) {
+ if (*cp != '\n') write (fd, "\r", 1);
+ write (fd, cp, 1);
+ last_char_was_cr = 0;
+ } else {
+ write (fd, cp, 1);
+ last_char_was_cr = 0;
+ }
+ }
+ }
+
+ if (close (fd)) {
+ admonish (NULL, "unable to write temporary file %s",
+ stripped_content_file);
+ unlink (stripped_content_file);
+ status = NOTOK;
+ } else {
+ /* Replace the decoded file with the converted one. */
+ if (ct->c_cefile.ce_file) {
+ if (ct->c_cefile.ce_unlink) {
+ unlink (ct->c_cefile.ce_file);
+ }
+ free (ct->c_cefile.ce_file);
+ }
+ ct->c_cefile.ce_file = stripped_content_file;
+ ct->c_cefile.ce_unlink = 1;
+
+ ++*message_mods;
+ if (verbosw) {
+ report (ct->c_partno,
+ begin == 0 && end == 0 ? "" : *file,
+ "stripped CRs");
+ }
+ }
+ }
+
+ if (opened_input_file) {
+ fclose (*fp);
+ *fp = NULL;
+ }
+ }
+ }
+
+ free (codeset);
+ return status;
+}
+
+
+char *
+content_codeset (CT ct) {
+ const char *const charset = "charset";
+ char *default_codeset = NULL;
+ CI ctinfo = &ct->c_ctinfo;
+ char **ap, **vp;
+ char **src_codeset = NULL;
+
+ for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) {
+ if (! strcasecmp (*ap, charset)) {
+ src_codeset = vp;
+ break;
+ }
+ }
+
+ /* RFC 2045, Sec. 5.2: default to us-ascii. */
+ if (src_codeset == NULL) src_codeset = &default_codeset;
+ if (*src_codeset == NULL) *src_codeset = "US-ASCII";
+
+ return *src_codeset;
+}
+
+