diplodocus.org Git - nmh/blob - uip/mhfixmsg.c

   1 /*
   2  * mhfixmsg.c -- rewrite a message with various transformations
   3  *
   4  * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
   5  * See the COPYRIGHT file in the root directory of the nmh
   6  * distribution for complete copyright information.
   7  */
   8
   9 #include <h/mh.h>
  10 #include <h/mime.h>
  11 #include <h/mhparse.h>
  12 #include <h/utils.h>
  13 #include <h/signals.h>
  14 #include <fcntl.h>
  15
  16 #define MHFIXMSG_SWITCHES \
  17     X("decodetext 8bit|7bit", 0, DECODETEXTSW) \
  18     X("nodecodetext", 0, NDECODETEXTSW) \
  19     X("decodetypes", 0, DECODETYPESW) \
  20     X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
  21     X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
  22     X("textcharset", 0, TEXTCHARSETSW) \
  23     X("notextcharset", 0, NTEXTCHARSETSW) \
  24     X("reformat", 0, REFORMATSW) \
  25     X("noreformat", 0, NREFORMATSW) \
  26     X("replacetextplain", 0, REPLACETEXTPLAINSW) \
  27     X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
  28     X("fixboundary", 0, FIXBOUNDARYSW) \
  29     X("nofixboundary", 0, NFIXBOUNDARYSW) \
  30     X("fixcte", 0, FIXCTESW) \
  31     X("nofixcte", 0, NFIXCTESW) \
  32     X("fixtype mimetype", 0, FIXTYPESW) \
  33     X("file file", 0, FILESW) \
  34     X("outfile file", 0, OUTFILESW) \
  35     X("rmmproc program", 0, RPROCSW) \
  36     X("normmproc", 0, NRPRCSW) \
  37     X("changecur", 0, CHGSW) \
  38     X("nochangecur", 0, NCHGSW) \
  39     X("verbose", 0, VERBSW) \
  40     X("noverbose", 0, NVERBSW) \
  41     X("version", 0, VERSIONSW) \
  42     X("help", 0, HELPSW) \
  43
  44 #define X(sw, minchars, id) id,
  45 DEFINE_SWITCH_ENUM(MHFIXMSG);
  46 #undef X
  47
  48 #define X(sw, minchars, id) { sw, minchars, id },
  49 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
  50 #undef X
  51
  52
  53 int verbosw;
  54 int debugsw; /* Needed by mhparse.c. */
  55
  56 #define quitser pipeser
  57
  58 /* mhparse.c */
  59 extern int skip_mp_cte_check;                 /* flag to InitMultiPart */
  60 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
  61 extern int bogus_mp_content;                  /* flag from InitMultiPart */
  62 /* flags to/from parse_header_attrs */
  63 extern int suppress_extraneous_trailing_semicolon_warning;
  64 extern int extraneous_trailing_semicolon;
  65
  66 /* mhoutsbr.c */
  67 int output_message (CT, char *);
  68
  69 /* mhmisc.c */
  70 void flush_errors (void);
  71
  72 /* mhfree.c */
  73 extern CT *cts;
  74 void freects_done (int) NORETURN;
  75
  76 /*
  77  * static prototypes
  78  */
  79 typedef struct fix_transformations {
  80     int fixboundary;
  81     int fixcte;
  82     svector_t fixtypes;
  83     int reformat;
  84     int replacetextplain;
  85     int decodetext;
  86     char *decodetypes;
  87     /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
  88     int lf_line_endings;
  89     char *textcharset;
  90 } fix_transformations;
  91
  92 int mhfixmsgsbr (CT *, const fix_transformations *, char *);
  93 static int fix_boundary (CT *, int *);
  94 static int copy_input_to_output (const char *, const char *);
  95 static int get_multipart_boundary (CT, char **);
  96 static int replace_boundary (CT, char *, char *);
  97 static int fix_types (CT, svector_t, int *);
  98 static char *replace_substring (char **, const char *, const char *);
  99 static char *remove_parameter (char *, const char *);
 100 static int fix_multipart_cte (CT, int *);
 101 static int set_ce (CT, int);
 102 static int ensure_text_plain (CT *, CT, int *, int);
 103 static int find_textplain_sibling (CT, int, int *);
 104 static int insert_new_text_plain_part (CT, int, CT);
 105 static CT build_text_plain_part (CT);
 106 static int insert_into_new_mp_alt (CT *, int *);
 107 static CT divide_part (CT);
 108 static void copy_ctinfo (CI, CI);
 109 static int decode_part (CT);
 110 static int reformat_part (CT, char *, char *, char *, int);
 111 static int charset_encoding (CT);
 112 static CT build_multipart_alt (CT, CT, int, int);
 113 static int boundary_in_content (FILE **, char *, const char *);
 114 static void transfer_noncontent_headers (CT, CT);
 115 static int set_ct_type (CT, int type, int subtype, int encoding);
 116 static int decode_text_parts (CT, int, const char *, int *);
 117 static int should_decode(const char *, const char *, const char *);
 118 static int content_encoding (CT, const char **);
 119 static int strip_crs (CT, int *);
 120 static int convert_charsets (CT, char *, int *);
 121 static int fix_always (CT, int *);
 122 static int write_content (CT, const char *, char *, int, int);
 123 static void set_text_ctparams(CT, char *, int);
 124 static int remove_file (const char *);
 125 static void report (char *, char *, char *, char *, ...);
 126 static void pipeser (int);
 127
 128
 129 int
 130 main (int argc, char **argv) {
 131     int msgnum;
 132     char *cp, *file = NULL, *folder = NULL;
 133     char *maildir, buf[100], *outfile = NULL;
 134     char **argp, **arguments;
 135     struct msgs_array msgs = { 0, 0, NULL };
 136     struct msgs *mp = NULL;
 137     CT *ctp;
 138     FILE *fp;
 139     int using_stdin = 0;
 140     int chgflag = 1;
 141     int status = OK;
 142     fix_transformations fx;
 143     fx.reformat = fx.fixcte = fx.fixboundary = 1;
 144     fx.fixtypes = NULL;
 145     fx.replacetextplain = 0;
 146     fx.decodetext = CE_8BIT;
 147     fx.decodetypes = "text,application/ics";  /* Default, per man page. */
 148     fx.lf_line_endings = 0;
 149     fx.textcharset = NULL;
 150
 151     if (nmh_init(argv[0], 1)) { return 1; }
 152
 153     done = freects_done;
 154
 155     arguments = getarguments (invo_name, argc, argv, 1);
 156     argp = arguments;
 157
 158     /*
 159      * Parse arguments
 160      */
 161     while ((cp = *argp++)) {
 162         if (*cp == '-') {
 163             switch (smatch (++cp, switches)) {
 164             case AMBIGSW:
 165                 ambigsw (cp, switches);
 166                 done (1);
 167             case UNKWNSW:
 168                 adios (NULL, "-%s unknown", cp);
 169
 170             case HELPSW:
 171                 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
 172                         invo_name);
 173                 print_help (buf, switches, 1);
 174                 done (0);
 175             case VERSIONSW:
 176                 print_version(invo_name);
 177                 done (0);
 178
 179             case DECODETEXTSW:
 180                 if (! (cp = *argp++)  ||  *cp == '-')
 181                     adios (NULL, "missing argument to %s", argp[-2]);
 182                 if (! strcasecmp (cp, "8bit")) {
 183                     fx.decodetext = CE_8BIT;
 184                 } else if (! strcasecmp (cp, "7bit")) {
 185                     fx.decodetext = CE_7BIT;
 186                 } else {
 187                     adios (NULL, "invalid argument to %s", argp[-2]);
 188                 }
 189                 continue;
 190             case NDECODETEXTSW:
 191                 fx.decodetext = 0;
 192                 continue;
 193             case DECODETYPESW:
 194                 if (! (cp = *argp++)  ||  *cp == '-')
 195                     adios (NULL, "missing argument to %s", argp[-2]);
 196                 fx.decodetypes = cp;
 197                 continue;
 198             case CRLFLINEBREAKSSW:
 199                 fx.lf_line_endings = 0;
 200                 continue;
 201             case NCRLFLINEBREAKSSW:
 202                 fx.lf_line_endings = 1;
 203                 continue;
 204             case TEXTCHARSETSW:
 205                 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
 206                     adios (NULL, "missing argument to %s", argp[-2]);
 207                 fx.textcharset = cp;
 208                 continue;
 209             case NTEXTCHARSETSW:
 210                 fx.textcharset = 0;
 211                 continue;
 212             case FIXBOUNDARYSW:
 213                 fx.fixboundary = 1;
 214                 continue;
 215             case NFIXBOUNDARYSW:
 216                 fx.fixboundary = 0;
 217                 continue;
 218             case FIXCTESW:
 219                 fx.fixcte = 1;
 220                 continue;
 221             case NFIXCTESW:
 222                 fx.fixcte = 0;
 223                 continue;
 224             case FIXTYPESW:
 225                 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
 226                     adios (NULL, "missing argument to %s", argp[-2]);
 227                 if (! strncasecmp (cp, "multipart/", 10)  ||
 228                     ! strncasecmp (cp, "message/", 8)) {
 229                     adios (NULL, "-fixtype %s not allowed", cp);
 230                 } else if (! strchr (cp, '/')) {
 231                     adios (NULL, "-fixtype requires type/subtype");
 232                 }
 233                 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
 234                 svector_push_back (fx.fixtypes, cp);
 235                 continue;
 236             case REFORMATSW:
 237                 fx.reformat = 1;
 238                 continue;
 239             case NREFORMATSW:
 240                 fx.reformat = 0;
 241                 continue;
 242             case REPLACETEXTPLAINSW:
 243                 fx.replacetextplain = 1;
 244                 continue;
 245             case NREPLACETEXTPLAINSW:
 246                 fx.replacetextplain = 0;
 247                 continue;
 248             case FILESW:
 249                 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
 250                     adios (NULL, "missing argument to %s", argp[-2]);
 251                 file = *cp == '-'  ?  add (cp, NULL)  :  path (cp, TFILE);
 252                 continue;
 253             case OUTFILESW:
 254                 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
 255                     adios (NULL, "missing argument to %s", argp[-2]);
 256                 outfile = *cp == '-'  ?  add (cp, NULL)  :  path (cp, TFILE);
 257                 continue;
 258             case RPROCSW:
 259                 if (!(rmmproc = *argp++) || *rmmproc == '-')
 260                     adios (NULL, "missing argument to %s", argp[-2]);
 261                 continue;
 262             case NRPRCSW:
 263                 rmmproc = NULL;
 264                 continue;
 265             case CHGSW:
 266                 chgflag = 1;
 267                 continue;
 268             case NCHGSW:
 269                 chgflag = 0;
 270                 continue;
 271             case VERBSW:
 272                 verbosw = 1;
 273                 continue;
 274             case NVERBSW:
 275                 verbosw = 0;
 276                 continue;
 277             }
 278         }
 279         if (*cp == '+' || *cp == '@') {
 280             if (folder)
 281                 adios (NULL, "only one folder at a time!");
 282             else
 283                 folder = pluspath (cp);
 284         } else {
 285             if (*cp == '/') {
 286                 /* Interpret a full path as a filename, not a message. */
 287                 file = add (cp, NULL);
 288             } else {
 289                 app_msgarg (&msgs, cp);
 290             }
 291         }
 292     }
 293
 294     SIGNAL (SIGQUIT, quitser);
 295     SIGNAL (SIGPIPE, pipeser);
 296
 297     /*
 298      * Read the standard profile setup
 299      */
 300     if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
 301         readconfig ((struct node **) 0, fp, cp, 0);
 302         fclose (fp);
 303     }
 304
 305     suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
 306     suppress_extraneous_trailing_semicolon_warning = 1;
 307
 308     if (! context_find ("path"))
 309         free (path ("./", TFOLDER));
 310
 311     if (file && msgs.size)
 312         adios (NULL, "cannot specify msg and file at same time!");
 313
 314     /*
 315      * check if message is coming from file
 316      */
 317     if (file) {
 318         /* If file is stdin, create a tmp file name before parse_mime()
 319            has a chance, because it might put in on a different
 320            filesystem than the output file.  Instead, put it in the
 321            user's preferred tmp directory. */
 322         CT ct;
 323
 324         if (! strcmp ("-", file)) {
 325             int fd;
 326             char *cp;
 327
 328             using_stdin = 1;
 329
 330             if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
 331                 adios (NULL, "unable to create temporary file in %s",
 332                        get_temp_dir());
 333             } else {
 334                 free (file);
 335                 file = add (cp, NULL);
 336                 cpydata (STDIN_FILENO, fd, "-", file);
 337             }
 338
 339             if (close (fd)) {
 340                 (void) m_unlink (file);
 341                 adios (NULL, "failed to write temporary file");
 342             }
 343         }
 344
 345         if (! (cts = (CT *) mh_xcalloc ((size_t) 2, sizeof *cts))) {
 346             adios (NULL, "out of memory");
 347         }
 348         ctp = cts;
 349
 350         if ((ct = parse_mime (file))) {
 351             set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
 352             *ctp++ = ct;
 353         } else {
 354             advise (NULL, "unable to parse message from file %s", file);
 355             status = NOTOK;
 356
 357             /* If there's an outfile, pass the input message unchanged, so the message won't
 358                get dropped from a pipeline. */
 359             if (outfile) {
 360                 /* Something went wrong.  Output might be expected, such as if this were run
 361                    as a filter.  Just copy the input to the output. */
 362                 if (copy_input_to_output (file, outfile) != OK) {
 363                     advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
 364                 }
 365             }
 366         }
 367     } else {
 368         /*
 369          * message(s) are coming from a folder
 370          */
 371         CT ct;
 372
 373         if (! msgs.size)
 374             app_msgarg(&msgs, "cur");
 375         if (! folder)
 376             folder = getfolder (1);
 377         maildir = m_maildir (folder);
 378
 379         if (chdir (maildir) == NOTOK)
 380             adios (maildir, "unable to change directory to");
 381
 382         /* read folder and create message structure */
 383         if (! (mp = folder_read (folder, 1)))
 384             adios (NULL, "unable to read folder %s", folder);
 385
 386         /* check for empty folder */
 387         if (mp->nummsg == 0)
 388             adios (NULL, "no messages in %s", folder);
 389
 390         /* parse all the message ranges/sequences and set SELECTED */
 391         for (msgnum = 0; msgnum < msgs.size; msgnum++)
 392             if (! m_convert (mp, msgs.msgs[msgnum]))
 393                 done (1);
 394         seq_setprev (mp);       /* set the previous-sequence */
 395
 396         if (! (cts =
 397                (CT *) mh_xcalloc ((size_t) (mp->numsel + 1), sizeof *cts))) {
 398             adios (NULL, "out of memory");
 399         }
 400         ctp = cts;
 401
 402         for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
 403             if (is_selected(mp, msgnum)) {
 404                 char *msgnam;
 405
 406                 msgnam = m_name (msgnum);
 407                 if ((ct = parse_mime (msgnam))) {
 408                     set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
 409                     *ctp++ = ct;
 410                 } else {
 411                     advise (NULL, "unable to parse message %s", msgnam);
 412                     status = NOTOK;
 413
 414                     /* If there's an outfile, pass the input message unchanged, so the message won't
 415                        get dropped from a pipeline. */
 416                     if (outfile) {
 417                         /* Something went wrong.  Output might be expected, such as if this were run
 418                            as a filter.  Just copy the input to the output. */
 419                         const char *input_filename = path (msgnam, TFILE);
 420
 421                         if (copy_input_to_output (input_filename, outfile) != OK) {
 422                             advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
 423                         }
 424                     }
 425                 }
 426             }
 427         }
 428
 429         if (chgflag) {
 430             seq_setcur (mp, mp->hghsel);  /* update current message */
 431         }
 432         seq_save (mp);                    /* synchronize sequences  */
 433         context_replace (pfolder, folder);/* update current folder  */
 434         context_save ();                  /* save the context file  */
 435     }
 436
 437     if (*cts) {
 438         for (ctp = cts; *ctp; ++ctp) {
 439             status += mhfixmsgsbr (ctp, &fx, outfile);
 440
 441             if (using_stdin) {
 442                 (void) m_unlink (file);
 443
 444                 if (! outfile) {
 445                     /* Just calling m_backup() unlinks the backup file. */
 446                     (void) m_backup (file);
 447                 }
 448             }
 449         }
 450     } else {
 451         status = 1;
 452     }
 453
 454     if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
 455     free (outfile);
 456     free (file);
 457
 458     /* done is freects_done, which will clean up all of cts. */
 459     done (status);
 460     return NOTOK;
 461 }
 462
 463
 464 int
 465 mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
 466     /* Store input filename in case one of the transformations, i.e.,
 467        fix_boundary(), rewrites to a tmp file. */
 468     char *input_filename = add ((*ctp)->c_file, NULL);
 469     int modify_inplace = 0;
 470     int message_mods = 0;
 471     int status = OK;
 472
 473     if (outfile == NULL) {
 474         modify_inplace = 1;
 475
 476         if ((*ctp)->c_file) {
 477             char *tempfile;
 478             if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
 479                 adios (NULL, "unable to create temporary file in %s",
 480                        get_temp_dir());
 481             }
 482             outfile = add (tempfile, NULL);
 483         } else {
 484             adios (NULL, "missing both input and output filenames\n");
 485         }
 486     }
 487
 488     reverse_alternative_parts (*ctp);
 489     status = fix_always (*ctp, &message_mods);
 490     if (status == OK  &&  fx->fixboundary) {
 491         status = fix_boundary (ctp, &message_mods);
 492     }
 493     if (status == OK  && fx->fixtypes != NULL) {
 494         status = fix_types (*ctp, fx->fixtypes, &message_mods);
 495     }
 496     if (status == OK  &&  fx->fixcte) {
 497         status = fix_multipart_cte (*ctp, &message_mods);
 498     }
 499     if (status == OK  &&  fx->reformat) {
 500         status =
 501             ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
 502     }
 503     if (status == OK  &&  fx->decodetext) {
 504         status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes, &message_mods);
 505     }
 506     if (status == OK  &&  fx->textcharset != NULL) {
 507         status = convert_charsets (*ctp, fx->textcharset, &message_mods);
 508     }
 509
 510     if (status == OK  &&  ! (*ctp)->c_umask) {
 511         /* Set the umask for the contents file.  This currently
 512            isn't used but just in case it is in the future. */
 513         struct stat st;
 514
 515         if (stat ((*ctp)->c_file, &st) != NOTOK) {
 516             (*ctp)->c_umask = ~(st.st_mode & 0777);
 517         } else {
 518             (*ctp)->c_umask = ~m_gmprot();
 519         }
 520     }
 521
 522     /*
 523      * Write the content to a file
 524      */
 525     if (status == OK) {
 526         status = write_content (*ctp, input_filename, outfile, modify_inplace,
 527                                 message_mods);
 528     } else if (! modify_inplace) {
 529         /* Something went wrong.  Output might be expected, such
 530            as if this were run as a filter.  Just copy the input
 531            to the output. */
 532         if (copy_input_to_output (input_filename, outfile) != OK) {
 533             advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
 534         }
 535     }
 536
 537     if (modify_inplace) {
 538         if (status != OK) { (void) m_unlink (outfile); }
 539         free (outfile);
 540         outfile = NULL;
 541     }
 542
 543     free (input_filename);
 544
 545     return status;
 546 }
 547
 548
 549 /* Copy input message to output.  Assumes not modifying in place, so this
 550    might be running as part of a pipeline. */
 551 static int
 552 copy_input_to_output (const char *input_filename, const char *output_filename) {
 553     int in = open (input_filename, O_RDONLY);
 554     int out = strcmp (output_filename, "-")
 555         ?  open (output_filename, O_WRONLY | O_CREAT, m_gmprot ())
 556         :  STDOUT_FILENO;
 557     int status = OK;
 558
 559     if (in != -1  &&  out != -1) {
 560         cpydata (in, out, input_filename, output_filename);
 561     } else {
 562         status = NOTOK;
 563     }
 564
 565     close (out);
 566     close (in);
 567
 568     return status;
 569 }
 570
 571
 572 static int
 573 fix_boundary (CT *ct, int *message_mods) {
 574     struct multipart *mp;
 575     int status = OK;
 576
 577     if (ct  &&  (*ct)->c_type == CT_MULTIPART  &&  bogus_mp_content) {
 578         mp = (struct multipart *) (*ct)->c_ctparams;
 579
 580         /*
 581          * 1) Get boundary at end of part.
 582          * 2) Get boundary at beginning of part and compare to the end-of-part
 583          *    boundary.
 584          * 3) Write out contents of ct to tmp file, replacing boundary in
 585          *    header with boundary from part.  Set c_unlink to 1.
 586          * 4) Free ct.
 587          * 5) Call parse_mime() on the tmp file, replacing ct.
 588          */
 589
 590         if (mp  &&  mp->mp_start) {
 591             char *part_boundary;
 592
 593             if (get_multipart_boundary (*ct, &part_boundary) == OK) {
 594                 char *fixed;
 595
 596                 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
 597                     if (replace_boundary (*ct, fixed, part_boundary) == OK) {
 598                         char *filename = add ((*ct)->c_file, NULL);
 599                         CT fixed_ct;
 600
 601                         free_content (*ct);
 602                         if ((fixed_ct = parse_mime (fixed))) {
 603                             *ct = fixed_ct;
 604                             (*ct)->c_unlink = 1;
 605
 606                             ++*message_mods;
 607                             if (verbosw) {
 608                                 report (NULL, NULL, filename,
 609                                         "fix multipart boundary");
 610                             }
 611                         } else {
 612                             *ct = NULL;
 613                             advise (NULL, "unable to parse fixed part");
 614                             status = NOTOK;
 615                         }
 616                         free (filename);
 617                     } else {
 618                         advise (NULL, "unable to replace broken boundary");
 619                         status = NOTOK;
 620                     }
 621                 } else {
 622                     advise (NULL, "unable to create temporary file in %s",
 623                             get_temp_dir());
 624                     status = NOTOK;
 625                 }
 626
 627                 free (part_boundary);
 628             } else {
 629                 /* Couldn't fix the boundary.  Report failure so that mhfixmsg
 630                    doesn't modify the message. */
 631                 status = NOTOK;
 632             }
 633         } else {
 634             /* No multipart struct, even though the content type is
 635                CT_MULTIPART.  Report failure so that mhfixmsg doesn't modify
 636                the message. */
 637             status = NOTOK;
 638         }
 639     }
 640
 641     return status;
 642 }
 643
 644
 645 static int
 646 get_multipart_boundary (CT ct, char **part_boundary) {
 647     char buffer[BUFSIZ];
 648     char *end_boundary = NULL;
 649     off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
 650         ?  (off_t) (ct->c_end - sizeof buffer)
 651         :  (off_t) ct->c_begin;
 652     size_t bytes_read;
 653     int status = OK;
 654
 655     /* This will fail if the boundary spans fread() calls.  BUFSIZ should
 656        be big enough, even if it's just 1024, to make that unlikely. */
 657
 658     /* free_content() will close ct->c_fp. */
 659     if (! ct->c_fp  &&  (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
 660         advise (ct->c_file, "unable to open for reading");
 661         return NOTOK;
 662     }
 663
 664     /* Get boundary at end of multipart. */
 665     while (begin >= (off_t) ct->c_begin) {
 666         fseeko (ct->c_fp, begin, SEEK_SET);
 667         while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
 668             char *cp = rfind_str (buffer, bytes_read, "--");
 669
 670             if (cp) {
 671                 char *end;
 672
 673                 /* Trim off trailing "--" and anything beyond. */
 674                 *cp-- = '\0';
 675                 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
 676                     if (strlen (end) > 3  &&  *end++ == '\n'  &&
 677                         *end++ == '-'  &&  *end++ == '-') {
 678                         end_boundary = add (end, NULL);
 679                         break;
 680                     }
 681                 }
 682             }
 683         }
 684
 685         if (! end_boundary  &&  begin > (off_t) (ct->c_begin + sizeof buffer)) {
 686             begin -= sizeof buffer;
 687         } else {
 688             break;
 689         }
 690     }
 691
 692     /* Get boundary at beginning of multipart. */
 693     if (end_boundary) {
 694         fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
 695         while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
 696             if (bytes_read >= strlen (end_boundary)) {
 697                 char *cp = find_str (buffer, bytes_read, end_boundary);
 698
 699                 if (cp  &&  cp - buffer >= 2  &&  *--cp == '-'  &&
 700                     *--cp == '-'  &&  (cp > buffer  &&  *--cp == '\n')) {
 701                     status = OK;
 702                     break;
 703                 }
 704             } else {
 705                 /* The start and end boundaries didn't match, or the
 706                    start boundary doesn't begin with "\n--" (or "--"
 707                    if at the beginning of buffer).  Keep trying. */
 708                 status = NOTOK;
 709             }
 710         }
 711     } else {
 712         status = NOTOK;
 713     }
 714
 715     if (status == OK) {
 716         *part_boundary = end_boundary;
 717     } else {
 718         *part_boundary = NULL;
 719         free (end_boundary);
 720     }
 721
 722     return status;
 723 }
 724
 725
 726 /* Open and copy ct->c_file to file, replacing the multipart boundary. */
 727 static int
 728 replace_boundary (CT ct, char *file, char *boundary) {
 729     FILE *fpin, *fpout;
 730     int compnum, state;
 731     char buf[BUFSIZ], name[NAMESZ];
 732     char *np, *vp;
 733     m_getfld_state_t gstate = 0;
 734     int status = OK;
 735
 736     if (ct->c_file == NULL) {
 737         advise (NULL, "missing input filename");
 738         return NOTOK;
 739     }
 740
 741     if ((fpin = fopen (ct->c_file, "r")) == NULL) {
 742         advise (ct->c_file, "unable to open for reading");
 743         return NOTOK;
 744     }
 745
 746     if ((fpout = fopen (file, "w")) == NULL) {
 747         fclose (fpin);
 748         advise (file, "unable to open for writing");
 749         return NOTOK;
 750     }
 751
 752     for (compnum = 1;;) {
 753         int bufsz = (int) sizeof buf;
 754
 755         switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
 756         case FLD:
 757         case FLDPLUS:
 758             compnum++;
 759
 760             /* get copies of the buffers */
 761             np = add (name, NULL);
 762             vp = add (buf, NULL);
 763
 764             /* if necessary, get rest of field */
 765             while (state == FLDPLUS) {
 766                 bufsz = sizeof buf;
 767                 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
 768                 vp = add (buf, vp);     /* add to previous value */
 769             }
 770
 771             if (strcasecmp (TYPE_FIELD, np)) {
 772                 fprintf (fpout, "%s:%s", np, vp);
 773             } else {
 774                 char *new_ctline, *new_params;
 775
 776                 replace_param(&ct->c_ctinfo.ci_first_pm,
 777                               &ct->c_ctinfo.ci_last_pm, "boundary",
 778                               boundary, 0);
 779
 780                 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
 781                                     ct->c_ctinfo.ci_subtype, NULL);
 782                 new_params = output_params(strlen(TYPE_FIELD) +
 783                                            strlen(new_ctline) + 1,
 784                                            ct->c_ctinfo.ci_first_pm, NULL, 0);
 785                 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
 786                          new_params ? new_params : "");
 787                 free(new_ctline);
 788                 if (new_params)
 789                     free(new_params);
 790             }
 791
 792             free (vp);
 793             free (np);
 794
 795             continue;
 796
 797         case BODY:
 798             fputs ("\n", fpout);
 799             /* buf will have a terminating NULL, skip it. */
 800             if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
 801                 advise (file, "fwrite");
 802             }
 803             continue;
 804
 805         case FILEEOF:
 806             break;
 807
 808         case LENERR:
 809         case FMTERR:
 810             advise (NULL, "message format error in component #%d", compnum);
 811             status = NOTOK;
 812             break;
 813
 814         default:
 815             advise (NULL, "getfld() returned %d", state);
 816             status = NOTOK;
 817             break;
 818         }
 819
 820         break;
 821     }
 822
 823     m_getfld_state_destroy (&gstate);
 824     fclose (fpout);
 825     fclose (fpin);
 826
 827     return status;
 828 }
 829
 830
 831 static int
 832 fix_types (CT ct, svector_t fixtypes, int *message_mods) {
 833     int status = OK;
 834
 835     switch (ct->c_type) {
 836     case CT_MULTIPART: {
 837         struct multipart *m = (struct multipart *) ct->c_ctparams;
 838         struct part *part;
 839
 840         for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
 841             status = fix_types (part->mp_part, fixtypes, message_mods);
 842         }
 843         break;
 844     }
 845
 846     case CT_MESSAGE:
 847         if (ct->c_subtype == MESSAGE_EXTERNAL) {
 848             struct exbody *e = (struct exbody *) ct->c_ctparams;
 849
 850             status = fix_types (e->eb_content, fixtypes, message_mods);
 851         }
 852         break;
 853
 854     default: {
 855         char **typep, *type;
 856
 857         if (ct->c_ctinfo.ci_type  &&  ct->c_ctinfo.ci_subtype) {
 858             for (typep = svector_strs (fixtypes);
 859                  typep && (type = *typep);
 860                  ++typep) {
 861                 char *type_subtype =
 862                     concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
 863                             NULL);
 864
 865                 if (! strcasecmp (type, type_subtype)  &&
 866                     decode_part (ct) == OK  &&
 867                     ct->c_cefile.ce_file != NULL) {
 868                     char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
 869                     char *cp;
 870
 871                     if ((cp = strchr (ct_type_subtype, ';'))) {
 872                         /* Truncate to remove any parameter list from
 873                            mime_type () result. */
 874                         *cp = '\0';
 875                     }
 876
 877                     if (strcasecmp (type, ct_type_subtype)) {
 878                         char *ct_type, *ct_subtype;
 879                         HF hf;
 880
 881                         /* The Content-Type header does not match the
 882                            content, so update these struct Content
 883                            fields to match:
 884                            * c_type, c_subtype
 885                            * c_ctinfo.ci_type, c_ctinfo.ci_subtype
 886                            * c_ctline
 887                            */
 888                         /* Extract type and subtype from type/subtype. */
 889                         ct_type = getcpy (ct_type_subtype);
 890                         if ((cp = strchr (ct_type, '/'))) {
 891                             *cp = '\0';
 892                             ct_subtype = getcpy (++cp);
 893                         } else {
 894                             advise (NULL, "missing / in MIME type of %s %s",
 895                                     ct->c_file, ct->c_partno);
 896                             free (ct_type);
 897                             return NOTOK;
 898                         }
 899
 900                         ct->c_type = ct_str_type (ct_type);
 901                         ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
 902
 903                         free (ct->c_ctinfo.ci_type);
 904                         ct->c_ctinfo.ci_type = ct_type;
 905                         free (ct->c_ctinfo.ci_subtype);
 906                         ct->c_ctinfo.ci_subtype = ct_subtype;
 907                         if (! replace_substring (&ct->c_ctline, type,
 908                                                  ct_type_subtype)) {
 909                             advise (NULL, "did not find %s in %s",
 910                                     type, ct->c_ctline);
 911                         }
 912
 913                         /* Update Content-Type header field. */
 914                         for (hf = ct->c_first_hf; hf; hf = hf->next) {
 915                             if (! strcasecmp (TYPE_FIELD, hf->name)) {
 916                                 if (replace_substring (&hf->value, type,
 917                                                        ct_type_subtype)) {
 918                                     ++*message_mods;
 919                                     if (verbosw) {
 920                                         report (NULL, ct->c_partno, ct->c_file,
 921                                                 "change Content-Type in header "
 922                                                 "from %s to %s",
 923                                                 type, ct_type_subtype);
 924                                     }
 925                                     break;
 926                                 } else {
 927                                     advise (NULL, "did not find %s in %s",
 928                                             type, hf->value);
 929                                 }
 930                             }
 931                         }
 932                     }
 933                     free (ct_type_subtype);
 934                 }
 935                 free (type_subtype);
 936             }
 937         }
 938     }}
 939
 940     return status;
 941 }
 942
 943 char *
 944 replace_substring (char **str, const char *old, const char *new) {
 945     char *cp;
 946
 947     if ((cp = strstr (*str, old))) {
 948         char *remainder = cp + strlen (old);
 949         char *prefix, *new_str;
 950
 951         if (cp - *str) {
 952             prefix = getcpy (*str);
 953             *(prefix + (cp - *str)) = '\0';
 954             new_str = concat (prefix, new, remainder, NULL);
 955             free (prefix);
 956         } else {
 957             new_str = concat (new, remainder, NULL);
 958         }
 959
 960         free (*str);
 961
 962         return *str = new_str;
 963     } else {
 964         return NULL;
 965     }
 966 }
 967
 968 /*
 969  * Remove a name=value parameter, given just its name, from a header value.
 970  */
 971 char *
 972 remove_parameter (char *str, const char *name) {
 973     /* It looks to me, based on the BNF in RFC 2045, than there can't
 974        be whitespace betwwen the parameter name and the "=", or
 975        between the "=" and the parameter value. */
 976     char *param_name = concat (name, "=", NULL);
 977     char *cp;
 978
 979     if ((cp = strstr (str, param_name))) {
 980         char *start, *end;
 981         size_t count = 1;
 982
 983         /* Remove any leading spaces, before the parameter name. */
 984         for (start = cp;
 985              start > str && isspace ((unsigned char) *(start-1));
 986              --start) {
 987             continue;
 988         }
 989         /* Remove a leading semicolon. */
 990         if (start > str  &&  *(start-1) == ';') { --start; }
 991
 992         end = cp + strlen (name) + 1;
 993         if (*end == '"') {
 994             /* Skip past the quoted value, and then the final quote. */
 995             for (++end ; *end  &&  *end != '"'; ++end) { continue; }
 996             ++end;
 997         } else {
 998             /* Skip past the value. */
 999             for (++end ; *end  &&  ! isspace ((unsigned char) *end); ++end) {}
1000         }
1001
1002         /* Count how many characters need to be moved.  Include
1003            trailing null, which is accounted for by the
1004            initialization of count to 1. */
1005         for (cp = end; *cp; ++cp) { ++count; }
1006         (void) memmove (start, end, count);
1007     }
1008
1009     free (param_name);
1010
1011     return str;
1012 }
1013
1014 static int
1015 fix_multipart_cte (CT ct, int *message_mods) {
1016     int status = OK;
1017
1018     if (ct->c_type == CT_MULTIPART) {
1019         struct multipart *m;
1020         struct part *part;
1021
1022         if (ct->c_encoding != CE_7BIT  &&  ct->c_encoding != CE_8BIT  &&
1023             ct->c_encoding != CE_BINARY) {
1024             HF hf;
1025
1026             for (hf = ct->c_first_hf; hf; hf = hf->next) {
1027                 char *name = hf->name;
1028                 for (; *name && isspace ((unsigned char) *name); ++name) {
1029                     continue;
1030                 }
1031
1032                 if (! strncasecmp (name, ENCODING_FIELD,
1033                                    strlen (ENCODING_FIELD))) {
1034                     char *prefix = "Nmh-REPLACED-INVALID-";
1035                     HF h = mh_xmalloc (sizeof *h);
1036
1037                     h->name = add (hf->name, NULL);
1038                     h->hf_encoding = hf->hf_encoding;
1039                     h->next = hf->next;
1040                     hf->next = h;
1041
1042                     /* Retain old header but prefix its name. */
1043                     free (hf->name);
1044                     hf->name = concat (prefix, h->name, NULL);
1045
1046                     ++*message_mods;
1047                     if (verbosw) {
1048                         char *encoding = cpytrim (hf->value);
1049                         report (NULL, ct->c_partno, ct->c_file,
1050                                 "replace Content-Transfer-Encoding of %s "
1051                                 "with 8 bit", encoding);
1052                         free (encoding);
1053                     }
1054
1055                     h->value = add (" 8bit\n", NULL);
1056
1057                     /* Don't need to warn for multiple C-T-E header
1058                        fields, parse_mime() already does that.  But
1059                        if there are any, fix them all as necessary. */
1060                     hf = h;
1061                 }
1062             }
1063
1064             set_ce (ct, CE_8BIT);
1065         }
1066
1067         m = (struct multipart *) ct->c_ctparams;
1068         for (part = m->mp_parts; part; part = part->mp_next) {
1069             if (fix_multipart_cte (part->mp_part, message_mods) != OK) {
1070                 status = NOTOK;
1071                 break;
1072             }
1073         }
1074     }
1075
1076     return status;
1077 }
1078
1079
1080 static int
1081 set_ce (CT ct, int encoding) {
1082     const char *ce = ce_str (encoding);
1083     const struct str2init *ctinit = get_ce_method (ce);
1084
1085     if (ctinit) {
1086         char *cte = concat (" ", ce, "\n", NULL);
1087         int found_cte = 0;
1088         HF hf;
1089         /* Decoded contents might be in ct->c_cefile.ce_file, if the
1090            caller is decode_text_parts ().  Save because we'll
1091            overwrite below. */
1092         struct cefile decoded_content_info = ct->c_cefile;
1093
1094         ct->c_encoding = encoding;
1095
1096         ct->c_ctinitfnx = ctinit->si_init;
1097         /* This will assign ct->c_cefile with an all-0 struct, which
1098            is what we want. */
1099         (*ctinit->si_init) (ct);
1100         /* After returning, the caller should set
1101            ct->c_cefile.ce_file to the name of the file containing
1102            the contents. */
1103
1104         /* Restore the cefile. */
1105         ct->c_cefile = decoded_content_info;
1106
1107         /* Update/add Content-Transfer-Encoding header field. */
1108         for (hf = ct->c_first_hf; hf; hf = hf->next) {
1109             if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1110                 found_cte = 1;
1111                 free (hf->value);
1112                 hf->value = cte;
1113             }
1114         }
1115         if (! found_cte) {
1116             add_header (ct, add (ENCODING_FIELD, NULL), cte);
1117         }
1118
1119         /* Update c_celine.  It's used only by mhlist -debug. */
1120         free (ct->c_celine);
1121         ct->c_celine = add (cte, NULL);
1122
1123         return OK;
1124     } else {
1125         return NOTOK;
1126     }
1127 }
1128
1129
1130 /* Make sure each text part has a corresponding text/plain part. */
1131 static int
1132 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
1133     int status = OK;
1134
1135     switch ((*ct)->c_type) {
1136     case CT_TEXT: {
1137         /* Nothing to do for text/plain. */
1138         if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1139
1140         if (parent  &&  parent->c_type == CT_MULTIPART  &&
1141             parent->c_subtype == MULTI_ALTERNATE) {
1142             int new_subpart_number = 1;
1143             int has_text_plain =
1144                 find_textplain_sibling (parent, replacetextplain,
1145                                         &new_subpart_number);
1146
1147             if (! has_text_plain) {
1148                 /* Parent is a multipart/alternative.  Insert a new
1149                    text/plain subpart. */
1150                 const int inserted =
1151                     insert_new_text_plain_part (*ct, new_subpart_number,
1152                                                 parent);
1153                 if (inserted) {
1154                     ++*message_mods;
1155                     if (verbosw) {
1156                         report (NULL, parent->c_partno, parent->c_file,
1157                                 "insert text/plain part");
1158                     }
1159                 } else {
1160                     status = NOTOK;
1161                 }
1162             }
1163         } else if (parent  &&  parent->c_type == CT_MULTIPART  &&
1164             parent->c_subtype == MULTI_RELATED) {
1165             char *type_subtype =
1166                 concat ((*ct)->c_ctinfo.ci_type, "/",
1167                         (*ct)->c_ctinfo.ci_subtype, NULL);
1168             const char *parent_type =
1169                 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1170             int new_subpart_number = 1;
1171             int has_text_plain = 0;
1172
1173             /* Have to do string comparison on the subtype because we
1174                don't enumerate all of them in c_subtype values.
1175                parent_type will be NULL if the multipart/related part
1176                doesn't have a type parameter.  The type parameter must
1177                be specified according to RFC 2387 Sec. 3.1 but not all
1178                messages comply. */
1179             if (parent_type  &&  strcasecmp (type_subtype, parent_type) == 0) {
1180                 /* The type of this part matches the root type of the
1181                    parent multipart/related.  Look to see if there's
1182                    text/plain sibling. */
1183                 has_text_plain =
1184                     find_textplain_sibling (parent, replacetextplain,
1185                                             &new_subpart_number);
1186             }
1187
1188             free (type_subtype);
1189
1190             if (! has_text_plain) {
1191                 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1192                 struct part *part;
1193                 int siblings = 0;
1194
1195                 for (part = mp->mp_parts; part; part = part->mp_next) {
1196                     if (*ct != part->mp_part) {
1197                         ++siblings;
1198                     }
1199                 }
1200
1201                 if (siblings) {
1202                     /* Parent is a multipart/related.  Insert a new
1203                        text/plain subpart in a new multipart/alternative. */
1204                     if (insert_into_new_mp_alt (ct, message_mods)) {
1205                         /* Not an error if text/plain couldn't be added. */
1206                     }
1207                 } else {
1208                     /* There are no siblings, so insert a new text/plain
1209                        subpart, and change the parent type from
1210                        multipart/related to multipart/alternative. */
1211                     const int inserted =
1212                         insert_new_text_plain_part (*ct, new_subpart_number,
1213                                                     parent);
1214
1215                     if (inserted) {
1216                         HF hf;
1217
1218                         parent->c_subtype = MULTI_ALTERNATE;
1219                         parent->c_ctinfo.ci_subtype = getcpy ("alternative");
1220                         if (! replace_substring (&parent->c_ctline, "/related",
1221                                                  "/alternative")) {
1222                             advise (NULL,
1223                                     "did not find multipart/related in %s",
1224                                     parent->c_ctline);
1225                         }
1226
1227                         /* Update Content-Type header field. */
1228                         for (hf = parent->c_first_hf; hf; hf = hf->next) {
1229                             if (! strcasecmp (TYPE_FIELD, hf->name)) {
1230                                 if (replace_substring (&hf->value, "/related",
1231                                                        "/alternative")) {
1232                                     ++*message_mods;
1233                                     if (verbosw) {
1234                                         report (NULL, parent->c_partno,
1235                                                 parent->c_file,
1236                                                 "insert text/plain part");
1237                                     }
1238
1239                                     /* Remove, e.g., type="text/html" from
1240                                        multipart/alternative. */
1241                                     remove_parameter (hf->value, "type");
1242                                     break;
1243                                 } else {
1244                                     advise (NULL, "did not find multipart/"
1245                                                   "related in header %s",
1246                                             hf->value);
1247                                 }
1248                             }
1249                         }
1250                     } else {
1251                         /* Not an error if text/plain couldn't be inserted. */
1252                     }
1253                 }
1254             }
1255         } else {
1256             if (insert_into_new_mp_alt (ct, message_mods)) {
1257                 status = NOTOK;
1258             }
1259         }
1260         break;
1261     }
1262
1263     case CT_MULTIPART: {
1264         struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1265         struct part *part;
1266
1267         for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1268             if ((*ct)->c_type == CT_MULTIPART) {
1269                 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1270                                             replacetextplain);
1271             }
1272         }
1273         break;
1274     }
1275
1276     case CT_MESSAGE:
1277         if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1278             struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1279
1280             status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1281                                         replacetextplain);
1282         }
1283         break;
1284     }
1285
1286     return status;
1287 }
1288
1289
1290 /* See if there is a sibling text/plain. */
1291 static int
1292 find_textplain_sibling (CT parent, int replacetextplain,
1293                         int *new_subpart_number) {
1294     struct multipart *mp = (struct multipart *) parent->c_ctparams;
1295     struct part *part, *prev;
1296     int has_text_plain = 0;
1297
1298     for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1299         ++*new_subpart_number;
1300         if (part->mp_part->c_type == CT_TEXT  &&
1301             part->mp_part->c_subtype == TEXT_PLAIN) {
1302             if (replacetextplain) {
1303                 struct part *old_part;
1304                 if (part == mp->mp_parts) {
1305                     old_part = mp->mp_parts;
1306                     mp->mp_parts = part->mp_next;
1307                 } else {
1308                     old_part = prev->mp_next;
1309                     prev->mp_next = part->mp_next;
1310                 }
1311                 if (verbosw) {
1312                     report (NULL, parent->c_partno, parent->c_file,
1313                             "remove text/plain part %s",
1314                             old_part->mp_part->c_partno);
1315                 }
1316                 free_content (old_part->mp_part);
1317                 free (old_part);
1318             } else {
1319                 has_text_plain = 1;
1320             }
1321             break;
1322         }
1323         prev = part;
1324     }
1325
1326     return has_text_plain;
1327 }
1328
1329
1330 static int
1331 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
1332     struct multipart *mp = (struct multipart *) parent->c_ctparams;
1333     struct part *new_part = mh_xmalloc (sizeof *new_part);
1334
1335     if ((new_part->mp_part = build_text_plain_part (ct))) {
1336         char buffer[16];
1337         snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1338
1339         new_part->mp_next = mp->mp_parts;
1340         mp->mp_parts = new_part;
1341         new_part->mp_part->c_partno =
1342             concat (parent->c_partno ? parent->c_partno : "1", ".",
1343                     buffer, NULL);
1344
1345         return 1;
1346     } else {
1347         free_content (new_part->mp_part);
1348         free (new_part);
1349
1350         return 0;
1351     }
1352 }
1353
1354
1355 static CT
1356 build_text_plain_part (CT encoded_part) {
1357     CT tp_part = divide_part (encoded_part);
1358     char *tmp_plain_file = NULL;
1359
1360     if (decode_part (tp_part) == OK) {
1361         /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1362            contains the decoded contents.  And the decoding function, such
1363            as openQuoted, will have set ...->ce_unlink to 1 so that it will
1364            be unlinked by free_content (). */
1365         char *tempfile;
1366
1367         if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1368             advise (NULL, "unable to create temporary file in %s",
1369                     get_temp_dir());
1370         } else {
1371             tmp_plain_file = add (tempfile, NULL);
1372             if (reformat_part (tp_part, tmp_plain_file,
1373                                tp_part->c_ctinfo.ci_type,
1374                                tp_part->c_ctinfo.ci_subtype,
1375                                tp_part->c_type) == OK) {
1376                 return tp_part;
1377             }
1378         }
1379     }
1380
1381     free_content (tp_part);
1382     if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1383     free (tmp_plain_file);
1384
1385     return NULL;
1386 }
1387
1388
1389 /* Slip new text/plain part into a new multipart/alternative. */
1390 static int
1391 insert_into_new_mp_alt (CT *ct, int *message_mods) {
1392     CT tp_part = build_text_plain_part (*ct);
1393     int status = OK;
1394
1395     if (tp_part) {
1396         CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1397                                          MULTI_ALTERNATE);
1398         if (mp_alt) {
1399             struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1400
1401             if (mp  &&  mp->mp_parts) {
1402                 mp->mp_parts->mp_part = tp_part;
1403                 /* Make the new multipart/alternative the parent. */
1404                 *ct = mp_alt;
1405
1406                 ++*message_mods;
1407                 if (verbosw) {
1408                     report (NULL, (*ct)->c_partno, (*ct)->c_file,
1409                             "insert text/plain part");
1410                 }
1411             } else {
1412                 free_content (tp_part);
1413                 free_content (mp_alt);
1414                 status = NOTOK;
1415             }
1416         } else {
1417             status = NOTOK;
1418         }
1419     } else {
1420         /* Not an error if text/plain couldn't be built. */
1421     }
1422
1423     return status;
1424 }
1425
1426 static CT
1427 divide_part (CT ct) {
1428     CT new_part;
1429
1430     if ((new_part = (CT) mh_xcalloc (1, sizeof *new_part)) == NULL)
1431         adios (NULL, "out of memory");
1432
1433     /* Just copy over what is needed for decoding.  c_vrsn and
1434        c_celine aren't necessary. */
1435     new_part->c_file = add (ct->c_file, NULL);
1436     new_part->c_begin = ct->c_begin;
1437     new_part->c_end = ct->c_end;
1438     copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1439     new_part->c_type = ct->c_type;
1440     new_part->c_cefile = ct->c_cefile;
1441     new_part->c_encoding = ct->c_encoding;
1442     new_part->c_ctinitfnx = ct->c_ctinitfnx;
1443     new_part->c_ceopenfnx = ct->c_ceopenfnx;
1444     new_part->c_ceclosefnx = ct->c_ceclosefnx;
1445     new_part->c_cesizefnx = ct->c_cesizefnx;
1446
1447     /* c_ctline is used by reformat__part(), so it can preserve
1448        anything after the type/subtype. */
1449     new_part->c_ctline = add (ct->c_ctline, NULL);
1450
1451     return new_part;
1452 }
1453
1454
1455 static void
1456 copy_ctinfo (CI dest, CI src) {
1457     PM s_pm, d_pm;
1458
1459     dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL;
1460     dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL;
1461
1462     for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1463         d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1464                          s_pm->pm_value, 0);
1465         if (s_pm->pm_charset)
1466             d_pm->pm_charset = getcpy(s_pm->pm_charset);
1467         if (s_pm->pm_lang)
1468             d_pm->pm_lang = getcpy(s_pm->pm_lang);
1469     }
1470
1471     dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL;
1472     dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL;
1473 }
1474
1475
1476 static int
1477 decode_part (CT ct) {
1478     char *tmp_decoded;
1479     int status;
1480     char *tempfile;
1481
1482     if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1483         adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1484     }
1485     tmp_decoded = add (tempfile, NULL);
1486     /* The following call will load ct->c_cefile.ce_file with the tmp
1487        filename of the decoded content.  tmp_decoded will contain the
1488        encoded output, get rid of that. */
1489     status = output_message (ct, tmp_decoded);
1490     (void) m_unlink (tmp_decoded);
1491     free (tmp_decoded);
1492
1493     return status;
1494 }
1495
1496
1497 /* Some of the arguments aren't really needed now, but maybe will
1498    be in the future for other than text types. */
1499 static int
1500 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1501     int output_subtype, output_encoding;
1502     char *cp, *cf;
1503     int status;
1504
1505     /* Hacky:  this redirects the output from whatever command is used
1506        to show the part to a file.  So, the user can't have any output
1507        redirection in that command.
1508        Could show_multi() in mhshowsbr.c avoid this? */
1509
1510     /* Check for invo_name-format-type/subtype. */
1511     if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1512         if (verbosw) {
1513             advise (NULL, "Don't know how to convert %s, there is no "
1514                     "%s-format-%s/%s profile entry",
1515                     ct->c_file, invo_name, type, subtype);
1516         }
1517         return NOTOK;
1518     } else {
1519         if (strchr (cf, '>')) {
1520             advise (NULL, "'>' prohibited in \"%s\",\nplease fix your "
1521                     "%s-format-%s/%s profile entry", cf, invo_name, type,
1522                     subtype ? subtype : "");
1523
1524             return NOTOK;
1525         }
1526     }
1527
1528     cp = concat (cf, " >", file, NULL);
1529     status = show_content_aux (ct, 0, cp, NULL, NULL);
1530     free (cp);
1531
1532     /* Unlink decoded content tmp file and free its filename to avoid
1533        leaks.  The file stream should already have been closed. */
1534     if (ct->c_cefile.ce_unlink) {
1535         (void) m_unlink (ct->c_cefile.ce_file);
1536         free (ct->c_cefile.ce_file);
1537         ct->c_cefile.ce_file = NULL;
1538         ct->c_cefile.ce_unlink = 0;
1539     }
1540
1541     if (c_type == CT_TEXT) {
1542         output_subtype = TEXT_PLAIN;
1543     } else {
1544         /* Set subtype to 0, which is always an UNKNOWN subtype. */
1545         output_subtype = 0;
1546     }
1547     output_encoding = charset_encoding (ct);
1548
1549     if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1550         ct->c_cefile.ce_file = file;
1551         ct->c_cefile.ce_unlink = 1;
1552     } else {
1553         ct->c_cefile.ce_unlink = 0;
1554         status = NOTOK;
1555     }
1556
1557     return status;
1558 }
1559
1560
1561 /* Identifies 7bit or 8bit content based on charset. */
1562 static int
1563 charset_encoding (CT ct) {
1564     char *ct_charset = content_charset (ct);
1565     int encoding = strcasecmp (ct_charset, "US-ASCII")  ?  CE_8BIT  :  CE_7BIT;
1566
1567     free (ct_charset);
1568
1569     return encoding;
1570 }
1571
1572
1573 static CT
1574 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1575     char *boundary_prefix = "----=_nmh-multipart";
1576     char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1577     char *boundary_indicator = "; boundary=";
1578     char *typename, *subtypename, *name;
1579     CT ct;
1580     struct part *p;
1581     struct multipart *m;
1582     const struct str2init *ctinit;
1583
1584     if ((ct = (CT) mh_xcalloc (1, sizeof *ct)) == NULL)
1585         adios (NULL, "out of memory");
1586
1587     /* Set up the multipart/alternative part.  These fields of *ct were
1588        initialized to 0 by mh_xcalloc():
1589        c_fp, c_unlink, c_begin, c_end,
1590        c_vrsn, c_ctline, c_celine,
1591        c_id, c_descr, c_dispo, c_partno,
1592        c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1593        c_cefile, c_encoding,
1594        c_digested, c_digest[16], c_ctexbody,
1595        c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1596        c_umask, c_rfc934,
1597        c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1598     */
1599
1600     ct->c_file = add (first_alt->c_file, NULL);
1601     ct->c_type = type;
1602     ct->c_subtype = subtype;
1603
1604     ctinit = get_ct_init (ct->c_type);
1605
1606     typename = ct_type_str (type);
1607     subtypename = ct_subtype_str (type, subtype);
1608
1609     {
1610         int serial = 0;
1611         int found_boundary = 1;
1612
1613         while (found_boundary  &&  serial < 1000000) {
1614             found_boundary = 0;
1615
1616             /* Ensure that the boundary doesn't appear in the decoded
1617                content. */
1618             if (new_part->c_cefile.ce_file) {
1619                 if ((found_boundary =
1620                      boundary_in_content (&new_part->c_cefile.ce_fp,
1621                                           new_part->c_cefile.ce_file,
1622                                           boundary)) == -1) {
1623                     free (ct);
1624                     return NULL;
1625                 }
1626             }
1627
1628             /* Ensure that the boundary doesn't appear in the encoded
1629                content. */
1630             if (! found_boundary  &&  new_part->c_file) {
1631                 if ((found_boundary = boundary_in_content (&new_part->c_fp,
1632                                                            new_part->c_file,
1633                                                            boundary)) == -1) {
1634                     free (ct);
1635                     return NULL;
1636                 }
1637             }
1638
1639             if (found_boundary) {
1640                 /* Try a slightly different boundary. */
1641                 char buffer2[16];
1642
1643                 free (boundary);
1644                 ++serial;
1645                 snprintf (buffer2, sizeof buffer2, "%d", serial);
1646                 boundary =
1647                     concat (boundary_prefix,
1648                             first_alt->c_partno ? first_alt->c_partno : "",
1649                             "-", buffer2,  NULL);
1650             }
1651         }
1652
1653         if (found_boundary) {
1654             advise (NULL, "giving up trying to find a unique boundary");
1655             free (ct);
1656             return NULL;
1657         }
1658     }
1659
1660     name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1661                    boundary, "\"", NULL);
1662
1663     /* Load c_first_hf and c_last_hf. */
1664     transfer_noncontent_headers (first_alt, ct);
1665     add_header (ct, add (TYPE_FIELD, NULL), concat (name, "\n", NULL));
1666     free (name);
1667
1668     /* Load c_partno. */
1669     if (first_alt->c_partno) {
1670         ct->c_partno = add (first_alt->c_partno, NULL);
1671         free (first_alt->c_partno);
1672         first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1673         new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1674     } else {
1675         first_alt->c_partno = add ("1", NULL);
1676         new_part->c_partno = add ("2", NULL);
1677     }
1678
1679     if (ctinit) {
1680         ct->c_ctinfo.ci_type = add (typename, NULL);
1681         ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1682     }
1683
1684     add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1685               "boundary", boundary, 0);
1686
1687     p = (struct part *) mh_xmalloc (sizeof *p);
1688     p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next);
1689     p->mp_next->mp_next = NULL;
1690     p->mp_next->mp_part = first_alt;
1691
1692     if ((m = (struct multipart *) mh_xcalloc (1, sizeof (struct multipart))) ==
1693         NULL)
1694         adios (NULL, "out of memory");
1695     m->mp_start = concat (boundary, "\n", NULL);
1696     m->mp_stop = concat (boundary, "--\n", NULL);
1697     m->mp_parts = p;
1698     ct->c_ctparams = m;
1699
1700     free (boundary);
1701
1702     return ct;
1703 }
1704
1705
1706 /* Check that the boundary does not appear in the content. */
1707 static int
1708 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1709     char buffer[BUFSIZ];
1710     size_t bytes_read;
1711     int found_boundary = 0;
1712
1713     /* free_content() will close *fp if we fopen it here. */
1714     if (! *fp  &&  (*fp = fopen (file, "r")) == NULL) {
1715         advise (file, "unable to open %s for reading", file);
1716         return NOTOK;
1717     }
1718
1719     fseeko (*fp, 0L, SEEK_SET);
1720     while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1721         if (find_str (buffer, bytes_read, boundary)) {
1722             found_boundary = 1;
1723             break;
1724         }
1725     }
1726
1727     return found_boundary;
1728 }
1729
1730
1731 /* Remove all non-Content headers. */
1732 static void
1733 transfer_noncontent_headers (CT old, CT new) {
1734     HF hp, hp_prev;
1735
1736     hp_prev = hp = old->c_first_hf;
1737     while (hp) {
1738         HF next = hp->next;
1739
1740         if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1741             if (hp == old->c_last_hf) {
1742                 if (hp == old->c_first_hf) {
1743                     old->c_last_hf =  old->c_first_hf = NULL;
1744                 } else {
1745                     hp_prev->next = NULL;
1746                     old->c_last_hf =  hp_prev;
1747                 }
1748             } else {
1749                 if (hp == old->c_first_hf) {
1750                     old->c_first_hf = next;
1751                 } else {
1752                     hp_prev->next = next;
1753                 }
1754             }
1755
1756             /* Put node hp in the new CT. */
1757             if (new->c_first_hf == NULL) {
1758                 new->c_first_hf = hp;
1759             } else {
1760                 new->c_last_hf->next = hp;
1761             }
1762             new->c_last_hf = hp;
1763         } else {
1764             /* A Content- header, leave in old. */
1765             hp_prev = hp;
1766         }
1767
1768         hp = next;
1769     }
1770 }
1771
1772
1773 static int
1774 set_ct_type (CT ct, int type, int subtype, int encoding) {
1775     char *typename = ct_type_str (type);
1776     char *subtypename = ct_subtype_str (type, subtype);
1777     /* E.g, " text/plain" */
1778     char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1779     /* E.g, " text/plain\n" */
1780     char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1781     int found_content_type = 0;
1782     HF hf;
1783     const char *cp = NULL;
1784     char *ctline;
1785     int status;
1786
1787     /* Update/add Content-Type header field. */
1788     for (hf = ct->c_first_hf; hf; hf = hf->next) {
1789         if (! strcasecmp (TYPE_FIELD, hf->name)) {
1790             found_content_type = 1;
1791             free (hf->value);
1792             hf->value = (cp = strchr (ct->c_ctline, ';'))
1793                 ?  concat (type_subtypename, cp, "\n", NULL)
1794                 :  add (name_plus_nl, NULL);
1795         }
1796     }
1797     if (! found_content_type) {
1798         add_header (ct, add (TYPE_FIELD, NULL),
1799                     (cp = strchr (ct->c_ctline, ';'))
1800                     ?  concat (type_subtypename, cp, "\n", NULL)
1801                     :  add (name_plus_nl, NULL));
1802     }
1803
1804     /* Some of these might not be used, but set them anyway. */
1805     ctline = cp
1806         ?  concat (type_subtypename, cp, NULL)
1807         :  concat (type_subtypename, NULL);
1808     free (ct->c_ctline);
1809     ct->c_ctline = ctline;
1810     /* Leave other ctinfo members as they were. */
1811     free (ct->c_ctinfo.ci_type);
1812     ct->c_ctinfo.ci_type = add (typename, NULL);
1813     free (ct->c_ctinfo.ci_subtype);
1814     ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1815     ct->c_type = type;
1816     ct->c_subtype = subtype;
1817
1818     free (name_plus_nl);
1819     free (type_subtypename);
1820
1821     status = set_ce (ct, encoding);
1822
1823     return status;
1824 }
1825
1826
1827 static int
1828 decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mods) {
1829     int status = OK;
1830     int lf_line_endings = 0;
1831
1832     switch (ct->c_type) {
1833     case CT_MULTIPART: {
1834         struct multipart *m = (struct multipart *) ct->c_ctparams;
1835         struct part *part;
1836
1837         /* Should check to see if the body for this part is encoded?
1838            For now, it gets passed along as-is by InitMultiPart(). */
1839         for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
1840             status = decode_text_parts (part->mp_part, encoding, decodetypes, message_mods);
1841         }
1842         break;
1843     }
1844
1845     case CT_MESSAGE:
1846         if (ct->c_subtype == MESSAGE_EXTERNAL) {
1847             struct exbody *e = (struct exbody *) ct->c_ctparams;
1848
1849             status = decode_text_parts (e->eb_content, encoding, decodetypes, message_mods);
1850         }
1851         break;
1852
1853     default:
1854         if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
1855             break;
1856         }
1857
1858         lf_line_endings =
1859             ct->c_ctparams  &&  ((struct text *) ct->c_ctparams)->lf_line_endings;
1860
1861         switch (ct->c_encoding) {
1862         case CE_BASE64:
1863         case CE_QUOTED: {
1864             int ct_encoding;
1865
1866             if (decode_part (ct) == OK  &&  ct->c_cefile.ce_file) {
1867                 const char *reason = NULL;
1868
1869                 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
1870                     &&  encoding != CE_BINARY) {
1871                     /* The decoding isn't acceptable so discard it.
1872                        Leave status as OK to allow other transformations. */
1873                     if (verbosw) {
1874                         report (NULL, ct->c_partno, ct->c_file,
1875                                 "will not decode%s because it is binary (%s)",
1876                                 ct->c_partno  ?  ""
1877                                               :  ct->c_ctline  ?  ct->c_ctline
1878                                                                :  "",
1879                                 reason);
1880                     }
1881                     (void) m_unlink (ct->c_cefile.ce_file);
1882                     free (ct->c_cefile.ce_file);
1883                     ct->c_cefile.ce_file = NULL;
1884                 } else if (ct->c_encoding == CE_QUOTED  &&
1885                            ct_encoding == CE_8BIT  &&  encoding == CE_7BIT) {
1886                     /* The decoding isn't acceptable so discard it.
1887                        Leave status as OK to allow other transformations. */
1888                     if (verbosw) {
1889                         report (NULL, ct->c_partno, ct->c_file,
1890                                 "will not decode%s because it is 8bit",
1891                                 ct->c_partno  ?  ""
1892                                               :  ct->c_ctline  ?  ct->c_ctline
1893                                                                :  "");
1894                     }
1895                     (void) m_unlink (ct->c_cefile.ce_file);
1896                     free (ct->c_cefile.ce_file);
1897                     ct->c_cefile.ce_file = NULL;
1898                 } else {
1899                     int enc;
1900                     if (ct_encoding == CE_BINARY)
1901                         enc = CE_BINARY;
1902                     else if (ct_encoding == CE_8BIT  &&  encoding == CE_7BIT)
1903                         enc = CE_QUOTED;
1904                     else
1905                         enc = charset_encoding (ct);
1906                     if (set_ce (ct, enc) == OK) {
1907                         ++*message_mods;
1908                         if (verbosw) {
1909                             report (NULL, ct->c_partno, ct->c_file, "decode%s",
1910                                     ct->c_ctline ? ct->c_ctline : "");
1911                         }
1912                         if (lf_line_endings) {
1913                             strip_crs (ct, message_mods);
1914                         }
1915                     } else {
1916                         status = NOTOK;
1917                     }
1918                 }
1919             } else {
1920                 status = NOTOK;
1921             }
1922             break;
1923         }
1924         case CE_8BIT:
1925         case CE_7BIT:
1926             if (lf_line_endings) {
1927                 strip_crs (ct, message_mods);
1928             }
1929             break;
1930         default:
1931             break;
1932         }
1933
1934         break;
1935     }
1936
1937     return status;
1938 }
1939
1940
1941 /* Determine if the part with type[/subtype] should be decoded, according to
1942    decodetypes (which came from the -decodetypes switch). */
1943 static int
1944 should_decode(const char *decodetypes, const char *type, const char *subtype) {
1945     /* Quick search for matching type[/subtype] in decodetypes:  bracket
1946        decodetypes with commas, then search for ,type, and ,type/subtype, in
1947        it. */
1948
1949     int found_match = 0;
1950     char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
1951     char *delimited_type = concat(",", type, ",", NULL);
1952
1953     if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
1954         found_match = 1;
1955     } else if (subtype != NULL) {
1956         char *delimited_type_subtype =
1957             concat(",", type, "/", subtype, ",", NULL);
1958
1959         if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
1960             found_match = 1;
1961         }
1962         free(delimited_type_subtype);
1963     }
1964
1965     free(delimited_type);
1966     free(delimited_decodetypes);
1967
1968     return found_match;
1969 }
1970
1971
1972 /* See if the decoded content is 7bit, 8bit, or binary.  It's binary
1973    if it has any NUL characters, a CR not followed by a LF, or lines
1974    greater than 998 characters in length.  If binary, reason is set
1975    to a string explaining why. */
1976 static int
1977 content_encoding (CT ct, const char **reason) {
1978     CE ce = &ct->c_cefile;
1979     int encoding = CE_7BIT;
1980
1981     if (ce->ce_file) {
1982         size_t line_len = 0;
1983         char buffer[BUFSIZ];
1984         size_t inbytes;
1985
1986         if (! ce->ce_fp  &&  (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
1987             advise (ce->ce_file, "unable to open for reading");
1988             return CE_UNKNOWN;
1989         }
1990
1991         fseeko (ce->ce_fp, 0L, SEEK_SET);
1992         while (encoding != CE_BINARY  &&
1993                (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
1994             char *cp;
1995             size_t i;
1996             int last_char_was_cr = 0;
1997
1998             for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
1999                 if (*cp == '\0'  ||  ++line_len > 998  ||
2000                     (*cp != '\n'  &&  last_char_was_cr)) {
2001                     encoding = CE_BINARY;
2002                     if (*cp == '\0') {
2003                         *reason = "null character";
2004                     } else if (line_len > 998) {
2005                         *reason = "line length > 998";
2006                     } else if (*cp != '\n'  &&  last_char_was_cr) {
2007                         *reason = "CR not followed by LF";
2008                     } else {
2009                         /* Should not reach this. */
2010                         *reason = "";
2011                     }
2012                     break;
2013                 } else if (*cp == '\n') {
2014                     line_len = 0;
2015                 } else if (! isascii ((unsigned char) *cp)) {
2016                     encoding = CE_8BIT;
2017                 }
2018
2019                 last_char_was_cr = *cp == '\r'  ?  1  :  0;
2020             }
2021         }
2022
2023         fclose (ce->ce_fp);
2024         ce->ce_fp = NULL;
2025     } /* else should never happen */
2026
2027     return encoding;
2028 }
2029
2030
2031 static int
2032 strip_crs (CT ct, int *message_mods) {
2033     char *charset = content_charset (ct);
2034     int status = OK;
2035
2036     /* Only strip carriage returns if content is ASCII or another
2037        charset that has the same readily recognizable CR followed by a
2038        LF.  We can include UTF-8 here because if the high-order bit of
2039        a UTF-8 byte is 0, then it must be a single-byte ASCII
2040        character. */
2041     if (! strcasecmp (charset, "US-ASCII")  ||
2042         ! strcasecmp (charset, "UTF-8")  ||
2043         ! strncasecmp (charset, "ISO-8859-", 9)  ||
2044         ! strncasecmp (charset, "WINDOWS-12", 10)) {
2045         char **file = NULL;
2046         FILE **fp = NULL;
2047         size_t begin;
2048         size_t end;
2049         int has_crs = 0;
2050         int opened_input_file = 0;
2051
2052         if (ct->c_cefile.ce_file) {
2053             file = &ct->c_cefile.ce_file;
2054             fp = &ct->c_cefile.ce_fp;
2055             begin = end = 0;
2056         } else if (ct->c_file) {
2057             file = &ct->c_file;
2058             fp = &ct->c_fp;
2059             begin = (size_t) ct->c_begin;
2060             end = (size_t) ct->c_end;
2061         } /* else don't know where the content is */
2062
2063         if (file  &&  *file  &&  fp) {
2064             if (! *fp) {
2065                 if ((*fp = fopen (*file, "r")) == NULL) {
2066                     advise (*file, "unable to open for reading");
2067                     status = NOTOK;
2068                 } else {
2069                     opened_input_file = 1;
2070                 }
2071             }
2072         }
2073
2074         if (fp  &&  *fp) {
2075             char buffer[BUFSIZ];
2076             size_t bytes_read;
2077             size_t bytes_to_read =
2078                 end > 0 && end > begin  ?  end - begin  :  sizeof buffer;
2079
2080             fseeko (*fp, begin, SEEK_SET);
2081             while ((bytes_read = fread (buffer, 1,
2082                                         min (bytes_to_read, sizeof buffer),
2083                                         *fp)) > 0) {
2084                 /* Look for CR followed by a LF.  This is supposed to
2085                    be text so there should be LF's.  If not, don't
2086                    modify the content. */
2087                 char *cp;
2088                 size_t i;
2089                 int last_char_was_cr = 0;
2090
2091                 if (end > 0) { bytes_to_read -= bytes_read; }
2092
2093                 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2094                     if (*cp == '\n'  &&  last_char_was_cr) {
2095                         has_crs = 1;
2096                         break;
2097                     }
2098
2099                     last_char_was_cr = *cp == '\r'  ?  1  :  0;
2100                 }
2101             }
2102
2103             if (has_crs) {
2104                 int fd;
2105                 char *stripped_content_file;
2106                 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2107
2108                 if (tempfile == NULL) {
2109                     adios (NULL, "unable to create temporary file in %s",
2110                            get_temp_dir());
2111                 }
2112                 stripped_content_file = add (tempfile, NULL);
2113
2114                 /* Strip each CR before a LF from the content. */
2115                 fseeko (*fp, begin, SEEK_SET);
2116                 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2117                        0) {
2118                     char *cp;
2119                     size_t i;
2120                     int last_char_was_cr = 0;
2121
2122                     for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2123                         if (*cp == '\r') {
2124                             last_char_was_cr = 1;
2125                         } else if (last_char_was_cr) {
2126                             if (*cp != '\n') {
2127                                 if (write (fd, "\r", 1) < 0) {
2128                                     advise (tempfile, "CR write");
2129                                 }
2130                             }
2131                             if (write (fd, cp, 1) < 0) {
2132                                 advise (tempfile, "write");
2133                             }
2134                             last_char_was_cr = 0;
2135                         } else {
2136                             if (write (fd, cp, 1) < 0) {
2137                                 advise (tempfile, "write");
2138                             }
2139                             last_char_was_cr = 0;
2140                         }
2141                     }
2142                 }
2143
2144                 if (close (fd)) {
2145                     admonish (NULL, "unable to write temporary file %s",
2146                               stripped_content_file);
2147                     (void) m_unlink (stripped_content_file);
2148                     status = NOTOK;
2149                 } else {
2150                     /* Replace the decoded file with the converted one. */
2151                     if (ct->c_cefile.ce_file) {
2152                         if (ct->c_cefile.ce_unlink) {
2153                             (void) m_unlink (ct->c_cefile.ce_file);
2154                         }
2155                         free (ct->c_cefile.ce_file);
2156                     }
2157                     ct->c_cefile.ce_file = stripped_content_file;
2158                     ct->c_cefile.ce_unlink = 1;
2159
2160                     ++*message_mods;
2161                     if (verbosw) {
2162                         report (NULL, ct->c_partno,
2163                                 begin == 0 && end == 0  ?  ""  :  *file,
2164                                 "stripped CRs");
2165                     }
2166                 }
2167             }
2168
2169             if (opened_input_file) {
2170                 fclose (*fp);
2171                 *fp = NULL;
2172             }
2173         }
2174     }
2175
2176     free (charset);
2177
2178     return status;
2179 }
2180
2181
2182 static int
2183 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
2184     int status = OK;
2185
2186     switch (ct->c_type) {
2187     case CT_TEXT:
2188         if (ct->c_subtype == TEXT_PLAIN) {
2189             status = convert_charset (ct, dest_charset, message_mods);
2190             if (status == OK) {
2191                 if (verbosw) {
2192                     char *ct_charset = content_charset (ct);
2193
2194                     report (NULL, ct->c_partno, ct->c_file,
2195                             "convert %s to %s", ct_charset, dest_charset);
2196                     free (ct_charset);
2197                 }
2198             } else {
2199                 char *ct_charset = content_charset (ct);
2200
2201                 report ("iconv", ct->c_partno, ct->c_file,
2202                         "failed to convert %s to %s", ct_charset, dest_charset);
2203                 free (ct_charset);
2204             }
2205         }
2206         break;
2207
2208     case CT_MULTIPART: {
2209         struct multipart *m = (struct multipart *) ct->c_ctparams;
2210         struct part *part;
2211
2212         /* Should check to see if the body for this part is encoded?
2213            For now, it gets passed along as-is by InitMultiPart(). */
2214         for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
2215             status =
2216                 convert_charsets (part->mp_part, dest_charset, message_mods);
2217         }
2218         break;
2219     }
2220
2221     case CT_MESSAGE:
2222         if (ct->c_subtype == MESSAGE_EXTERNAL) {
2223             struct exbody *e = (struct exbody *) ct->c_ctparams;
2224
2225             status =
2226                 convert_charsets (e->eb_content, dest_charset, message_mods);
2227         }
2228         break;
2229
2230     default:
2231         break;
2232     }
2233
2234     return status;
2235 }
2236
2237
2238 /*
2239  * Fix various problems that aren't handled elsewhere.  These
2240  * are fixed unconditionally:  there are no switches to disable
2241  * them.  (Currently, "problems" is just one:  an extraneous
2242  * semicolon at the end of a header parameter list.)
2243  */
2244 static int
2245 fix_always (CT ct, int *message_mods) {
2246     int status = OK;
2247
2248     switch (ct->c_type) {
2249     case CT_MULTIPART: {
2250         struct multipart *m = (struct multipart *) ct->c_ctparams;
2251         struct part *part;
2252
2253         for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
2254             status = fix_always (part->mp_part, message_mods);
2255         }
2256         break;
2257     }
2258
2259     case CT_MESSAGE:
2260         if (ct->c_subtype == MESSAGE_EXTERNAL) {
2261             struct exbody *e = (struct exbody *) ct->c_ctparams;
2262
2263             status = fix_always (e->eb_content, message_mods);
2264         }
2265         break;
2266
2267     default: {
2268         HF hf;
2269
2270         for (hf = ct->c_first_hf; hf; hf = hf->next) {
2271             size_t len = strlen (hf->value);
2272
2273             if (strcasecmp (hf->name, TYPE_FIELD) != 0  &&
2274                 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2275                 /* Only do this for Content-Type and
2276                    Content-Disposition fields because those are the
2277                    only headers that parse_mime() warns about. */
2278                 continue;
2279             }
2280
2281             /* whitespace following a trailing ';' will be nuked as well */
2282             if (hf->value[len - 1] == '\n') {
2283                 while (isspace((unsigned char)(hf->value[len - 2]))) {
2284                     if (len-- == 0) { break; }
2285                 }
2286             }
2287
2288             if (hf->value[len - 2] == ';') {
2289                 /* Remove trailing ';' from parameter value. */
2290                 hf->value[len - 2] = '\n';
2291                 hf->value[len - 1] = '\0';
2292
2293                 /* Also, if Content-Type parameter, remove trailing ';'
2294                    from ct->c_ctline.  This probably isn't necessary
2295                    but can't hurt. */
2296                 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2297                     size_t l = strlen(ct->c_ctline) - 1;
2298                     while (isspace((unsigned char)(ct->c_ctline[l])) ||
2299                            ct->c_ctline[l] == ';') {
2300                         ct->c_ctline[l--] = '\0';
2301                         if (l == 0) { break; }
2302                     }
2303                 }
2304
2305                 ++*message_mods;
2306                 if (verbosw) {
2307                     report (NULL, ct->c_partno, ct->c_file,
2308                             "remove trailing ; from %s parameter value",
2309                             hf->name);
2310                 }
2311             }
2312         }
2313     }}
2314
2315     return status;
2316 }
2317
2318
2319 static int
2320 write_content (CT ct, const char *input_filename, char *outfile, int modify_inplace,
2321                int message_mods) {
2322     int status = OK;
2323
2324     if (modify_inplace) {
2325         if (message_mods > 0) {
2326             if ((status = output_message (ct, outfile)) == OK) {
2327                 char *infile = input_filename
2328                     ?  add (input_filename, NULL)
2329                     :  add (ct->c_file ? ct->c_file : "-", NULL);
2330
2331                 if (remove_file (infile) == OK) {
2332                     if (rename (outfile, infile)) {
2333                         /* Rename didn't work, possibly because of an
2334                            attempt to rename across filesystems.  Try
2335                            brute force copy. */
2336                         int old = open (outfile, O_RDONLY);
2337                         int new =
2338                             open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2339                         int i = -1;
2340
2341                         if (old != -1  &&  new != -1) {
2342                             char buffer[BUFSIZ];
2343
2344                             while ((i = read (old, buffer, sizeof buffer)) >
2345                                    0) {
2346                                 if (write (new, buffer, i) != i) {
2347                                     i = -1;
2348                                     break;
2349                                 }
2350                             }
2351                         }
2352                         if (new != -1) { close (new); }
2353                         if (old != -1) { close (old); }
2354                         (void) m_unlink (outfile);
2355
2356                         if (i < 0) {
2357                             /* The -file argument processing used path() to
2358                                expand filename to absolute path. */
2359                             int file = ct->c_file  &&  ct->c_file[0] == '/';
2360
2361                             admonish (NULL, "unable to rename %s %s to %s",
2362                                       file ? "file" : "message", outfile,
2363                                       infile);
2364                             status = NOTOK;
2365                         }
2366                     }
2367                 } else {
2368                     admonish (NULL, "unable to remove input file %s, "
2369                               "not modifying it", infile);
2370                     (void) m_unlink (outfile);
2371                     status = NOTOK;
2372                 }
2373
2374                 free (infile);
2375             } else {
2376                 status = NOTOK;
2377             }
2378         } else {
2379             /* No modifications and didn't need the tmp outfile. */
2380             (void) m_unlink (outfile);
2381         }
2382     } else {
2383         /* Output is going to some file.  Produce it whether or not
2384            there were modifications. */
2385         status = output_message (ct, outfile);
2386     }
2387
2388     flush_errors ();
2389     return status;
2390 }
2391
2392
2393 /*
2394  * parse_mime() does not set lf_line_endings in struct text, so use this function to do it.
2395  * It touches the parts the decodetypes identifies.
2396  */
2397 static void
2398 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
2399     switch (ct->c_type) {
2400     case CT_MULTIPART: {
2401         struct multipart *m = (struct multipart *) ct->c_ctparams;
2402         struct part *part;
2403
2404         for (part = m->mp_parts; part; part = part->mp_next) {
2405             set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2406         }
2407         break;
2408     }
2409
2410     case CT_MESSAGE:
2411         if (ct->c_subtype == MESSAGE_EXTERNAL) {
2412             struct exbody *e = (struct exbody *) ct->c_ctparams;
2413
2414             set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2415         }
2416         break;
2417
2418     default:
2419         if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2420             if (ct->c_ctparams == NULL) {
2421                 if ((ct->c_ctparams = (struct text *) mh_xcalloc (1, sizeof (struct text))) == NULL) {
2422                     adios (NULL, "out of memory");
2423                 }
2424             }
2425             ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2426         }
2427     }
2428 }
2429
2430
2431 /*
2432  * If "rmmproc" is defined, call that to remove the file.  Otherwise,
2433  * use the standard MH backup file.
2434  */
2435 static int
2436 remove_file (const char *file) {
2437     if (rmmproc) {
2438         char *rmm_command = concat (rmmproc, " ", file, NULL);
2439         int status = system (rmm_command);
2440
2441         free (rmm_command);
2442         return WIFEXITED (status)  ?  WEXITSTATUS (status)  :  NOTOK;
2443     } else {
2444         /* This is OK for a non-message file, it still uses the
2445            BACKUP_PREFIX form.  The backup file will be in the same
2446            directory as file. */
2447         return rename (file, m_backup (file));
2448     }
2449 }
2450
2451
2452 static void
2453 report (char *what, char *partno, char *filename, char *message, ...) {
2454     va_list args;
2455     char *fmt;
2456
2457     if (verbosw) {
2458         va_start (args, message);
2459         fmt = concat (filename, partno ? " part " : ", ",
2460                       partno ? partno : "", partno ? ", " : "", message, NULL);
2461
2462         advertise (what, NULL, fmt, args);
2463
2464         free (fmt);
2465         va_end (args);
2466     }
2467 }
2468
2469
2470 static void
2471 pipeser (int i)
2472 {
2473     if (i == SIGQUIT) {
2474         fflush (stdout);
2475         fprintf (stderr, "\n");
2476         fflush (stderr);
2477     }
2478
2479     done (1);
2480     /* NOTREACHED */
2481 }