diplodocus.org Git - nmh/blob - uip/mhfixmsg.c

   1 /*
   2  * mhfixmsg.c -- rewrite a message with various transformations
   3  *
   4  * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
   5  * See the COPYRIGHT file in the root directory of the nmh
   6  * distribution for complete copyright information.
   7  */
   8
   9 #include <h/mh.h>
  10 #include <h/mime.h>
  11 #include <h/mhparse.h>
  12 #include <h/utils.h>
  13 #include <h/signals.h>
  14 #include <fcntl.h>
  15
  16 #define MHFIXMSG_SWITCHES \
  17     X("decodetext 8bit|7bit", 0, DECODETEXTSW) \
  18     X("nodecodetext", 0, NDECODETEXTSW) \
  19     X("decodetypes", 0, DECODETYPESW) \
  20     X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
  21     X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
  22     X("textcharset", 0, TEXTCHARSETSW) \
  23     X("notextcharset", 0, NTEXTCHARSETSW) \
  24     X("reformat", 0, REFORMATSW) \
  25     X("noreformat", 0, NREFORMATSW) \
  26     X("replacetextplain", 0, REPLACETEXTPLAINSW) \
  27     X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
  28     X("fixboundary", 0, FIXBOUNDARYSW) \
  29     X("nofixboundary", 0, NFIXBOUNDARYSW) \
  30     X("fixcte", 0, FIXCTESW) \
  31     X("nofixcte", 0, NFIXCTESW) \
  32     X("fixtype mimetype", 0, FIXTYPESW) \
  33     X("file file", 0, FILESW) \
  34     X("outfile file", 0, OUTFILESW) \
  35     X("rmmproc program", 0, RPROCSW) \
  36     X("normmproc", 0, NRPRCSW) \
  37     X("changecur", 0, CHGSW) \
  38     X("nochangecur", 0, NCHGSW) \
  39     X("verbose", 0, VERBSW) \
  40     X("noverbose", 0, NVERBSW) \
  41     X("version", 0, VERSIONSW) \
  42     X("help", 0, HELPSW) \
  43
  44 #define X(sw, minchars, id) id,
  45 DEFINE_SWITCH_ENUM(MHFIXMSG);
  46 #undef X
  47
  48 #define X(sw, minchars, id) { sw, minchars, id },
  49 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
  50 #undef X
  51
  52
  53 int verbosw;
  54 int debugsw; /* Needed by mhparse.c. */
  55
  56 #define quitser pipeser
  57
  58 /* mhparse.c */
  59 extern int skip_mp_cte_check;                 /* flag to InitMultiPart */
  60 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
  61 extern int bogus_mp_content;                  /* flag from InitMultiPart */
  62 /* flags to/from parse_header_attrs */
  63 extern int suppress_extraneous_trailing_semicolon_warning;
  64 extern int extraneous_trailing_semicolon;
  65
  66 /* mhoutsbr.c */
  67 int output_message (CT, char *);
  68
  69 /* mhmisc.c */
  70 void flush_errors (void);
  71
  72 /* mhfree.c */
  73 extern CT *cts;
  74 void freects_done (int) NORETURN;
  75
  76 /*
  77  * static prototypes
  78  */
  79 typedef struct fix_transformations {
  80     int fixboundary;
  81     int fixcte;
  82     svector_t fixtypes;
  83     int reformat;
  84     int replacetextplain;
  85     int decodetext;
  86     char *decodetypes;
  87     /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
  88     int lf_line_endings;
  89     char *textcharset;
  90 } fix_transformations;
  91
  92 int mhfixmsgsbr (CT *, const fix_transformations *, char *);
  93 static int fix_boundary (CT *, int *);
  94 static int copy_input_to_output (const char *, const char *);
  95 static int get_multipart_boundary (CT, char **);
  96 static int replace_boundary (CT, char *, char *);
  97 static int fix_types (CT, svector_t, int *);
  98 static char *replace_substring (char **, const char *, const char *);
  99 static char *remove_parameter (char *, const char *);
 100 static int fix_multipart_cte (CT, int *);
 101 static int set_ce (CT, int);
 102 static int ensure_text_plain (CT *, CT, int *, int);
 103 static int find_textplain_sibling (CT, int, int *);
 104 static int insert_new_text_plain_part (CT, int, CT);
 105 static CT build_text_plain_part (CT);
 106 static int insert_into_new_mp_alt (CT *, int *);
 107 static CT divide_part (CT);
 108 static void copy_ctinfo (CI, CI);
 109 static int decode_part (CT);
 110 static int reformat_part (CT, char *, char *, char *, int);
 111 static int charset_encoding (CT);
 112 static CT build_multipart_alt (CT, CT, int, int);
 113 static int boundary_in_content (FILE **, char *, const char *);
 114 static void transfer_noncontent_headers (CT, CT);
 115 static int set_ct_type (CT, int type, int subtype, int encoding);
 116 static int decode_text_parts (CT, int, const char *, int *);
 117 static int should_decode(const char *, const char *, const char *);
 118 static int content_encoding (CT, const char **);
 119 static int strip_crs (CT, int *);
 120 static int convert_charsets (CT, char *, int *);
 121 static int fix_always (CT, int *);
 122 static int write_content (CT, const char *, char *, int, int);
 123 static void set_text_ctparams(CT, char *, int);
 124 static int remove_file (const char *);
 125 static void report (char *, char *, char *, char *, ...);
 126 static void pipeser (int);
 127
 128
 129 int
 130 main (int argc, char **argv) {
 131     int msgnum;
 132     char *cp, *file = NULL, *folder = NULL;
 133     char *maildir, buf[100], *outfile = NULL;
 134     char **argp, **arguments;
 135     struct msgs_array msgs = { 0, 0, NULL };
 136     struct msgs *mp = NULL;
 137     CT *ctp;
 138     FILE *fp;
 139     int using_stdin = 0;
 140     int chgflag = 1;
 141     int status = OK;
 142     fix_transformations fx;
 143     fx.reformat = fx.fixcte = fx.fixboundary = 1;
 144     fx.fixtypes = NULL;
 145     fx.replacetextplain = 0;
 146     fx.decodetext = CE_8BIT;
 147     fx.decodetypes = "text,application/ics";  /* Default, per man page. */
 148     fx.lf_line_endings = 0;
 149     fx.textcharset = NULL;
 150
 151     if (nmh_init(argv[0], 1)) { return 1; }
 152
 153     done = freects_done;
 154
 155     arguments = getarguments (invo_name, argc, argv, 1);
 156     argp = arguments;
 157
 158     /*
 159      * Parse arguments
 160      */
 161     while ((cp = *argp++)) {
 162         if (*cp == '-') {
 163             switch (smatch (++cp, switches)) {
 164             case AMBIGSW:
 165                 ambigsw (cp, switches);
 166                 done (1);
 167             case UNKWNSW:
 168                 adios (NULL, "-%s unknown", cp);
 169
 170             case HELPSW:
 171                 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
 172                         invo_name);
 173                 print_help (buf, switches, 1);
 174                 done (0);
 175             case VERSIONSW:
 176                 print_version(invo_name);
 177                 done (0);
 178
 179             case DECODETEXTSW:
 180                 if (! (cp = *argp++)  ||  *cp == '-') {
 181                     adios (NULL, "missing argument to %s", argp[-2]);
 182                 }
 183                 if (! strcasecmp (cp, "8bit")) {
 184                     fx.decodetext = CE_8BIT;
 185                 } else if (! strcasecmp (cp, "7bit")) {
 186                     fx.decodetext = CE_7BIT;
 187                 } else {
 188                     adios (NULL, "invalid argument to %s", argp[-2]);
 189                 }
 190                 continue;
 191             case NDECODETEXTSW:
 192                 fx.decodetext = 0;
 193                 continue;
 194             case DECODETYPESW:
 195                 if (! (cp = *argp++)  ||  *cp == '-') {
 196                     adios (NULL, "missing argument to %s", argp[-2]);
 197                 }
 198                 fx.decodetypes = cp;
 199                 continue;
 200             case CRLFLINEBREAKSSW:
 201                 fx.lf_line_endings = 0;
 202                 continue;
 203             case NCRLFLINEBREAKSSW:
 204                 fx.lf_line_endings = 1;
 205                 continue;
 206             case TEXTCHARSETSW:
 207                 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
 208                     adios (NULL, "missing argument to %s", argp[-2]);
 209                 }
 210                 fx.textcharset = cp;
 211                 continue;
 212             case NTEXTCHARSETSW:
 213                 fx.textcharset = 0;
 214                 continue;
 215             case FIXBOUNDARYSW:
 216                 fx.fixboundary = 1;
 217                 continue;
 218             case NFIXBOUNDARYSW:
 219                 fx.fixboundary = 0;
 220                 continue;
 221             case FIXCTESW:
 222                 fx.fixcte = 1;
 223                 continue;
 224             case NFIXCTESW:
 225                 fx.fixcte = 0;
 226                 continue;
 227             case FIXTYPESW:
 228                 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
 229                     adios (NULL, "missing argument to %s", argp[-2]);
 230                 }
 231                 if (! strncasecmp (cp, "multipart/", 10)  ||
 232                     ! strncasecmp (cp, "message/", 8)) {
 233                     adios (NULL, "-fixtype %s not allowed", cp);
 234                 } else if (! strchr (cp, '/')) {
 235                     adios (NULL, "-fixtype requires type/subtype");
 236                 }
 237                 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
 238                 svector_push_back (fx.fixtypes, cp);
 239                 continue;
 240             case REFORMATSW:
 241                 fx.reformat = 1;
 242                 continue;
 243             case NREFORMATSW:
 244                 fx.reformat = 0;
 245                 continue;
 246             case REPLACETEXTPLAINSW:
 247                 fx.replacetextplain = 1;
 248                 continue;
 249             case NREPLACETEXTPLAINSW:
 250                 fx.replacetextplain = 0;
 251                 continue;
 252             case FILESW:
 253                 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
 254                     adios (NULL, "missing argument to %s", argp[-2]);
 255                 }
 256                 file = *cp == '-'  ?  add (cp, NULL)  :  path (cp, TFILE);
 257                 continue;
 258             case OUTFILESW:
 259                 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
 260                     adios (NULL, "missing argument to %s", argp[-2]);
 261                 }
 262                 outfile = *cp == '-'  ?  add (cp, NULL)  :  path (cp, TFILE);
 263                 continue;
 264             case RPROCSW:
 265                 if (!(rmmproc = *argp++) || *rmmproc == '-') {
 266                     adios (NULL, "missing argument to %s", argp[-2]);
 267                 }
 268                 continue;
 269             case NRPRCSW:
 270                 rmmproc = NULL;
 271                 continue;
 272             case CHGSW:
 273                 chgflag = 1;
 274                 continue;
 275             case NCHGSW:
 276                 chgflag = 0;
 277                 continue;
 278             case VERBSW:
 279                 verbosw = 1;
 280                 continue;
 281             case NVERBSW:
 282                 verbosw = 0;
 283                 continue;
 284             }
 285         }
 286         if (*cp == '+' || *cp == '@') {
 287             if (folder) {
 288                 adios (NULL, "only one folder at a time!");
 289             } else {
 290                 folder = pluspath (cp);
 291             }
 292         } else {
 293             if (*cp == '/') {
 294                 /* Interpret a full path as a filename, not a message. */
 295                 file = add (cp, NULL);
 296             } else {
 297                 app_msgarg (&msgs, cp);
 298             }
 299         }
 300     }
 301
 302     SIGNAL (SIGQUIT, quitser);
 303     SIGNAL (SIGPIPE, pipeser);
 304
 305     /*
 306      * Read the standard profile setup
 307      */
 308     if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
 309         readconfig ((struct node **) 0, fp, cp, 0);
 310         fclose (fp);
 311     }
 312
 313     suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
 314     suppress_extraneous_trailing_semicolon_warning = 1;
 315
 316     if (! context_find ("path")) {
 317         free (path ("./", TFOLDER));
 318     }
 319
 320     if (file && msgs.size) {
 321         adios (NULL, "cannot specify msg and file at same time!");
 322     }
 323
 324     /*
 325      * check if message is coming from file
 326      */
 327     if (file) {
 328         /* If file is stdin, create a tmp file name before parse_mime()
 329            has a chance, because it might put in on a different
 330            filesystem than the output file.  Instead, put it in the
 331            user's preferred tmp directory. */
 332         CT ct;
 333
 334         if (! strcmp ("-", file)) {
 335             int fd;
 336             char *cp;
 337
 338             using_stdin = 1;
 339
 340             if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
 341                 adios (NULL, "unable to create temporary file in %s",
 342                        get_temp_dir());
 343             } else {
 344                 free (file);
 345                 file = add (cp, NULL);
 346                 cpydata (STDIN_FILENO, fd, "-", file);
 347             }
 348
 349             if (close (fd)) {
 350                 (void) m_unlink (file);
 351                 adios (NULL, "failed to write temporary file");
 352             }
 353         }
 354
 355         if (! (cts = (CT *) mh_xcalloc ((size_t) 2, sizeof *cts))) {
 356             adios (NULL, "out of memory");
 357         }
 358         ctp = cts;
 359
 360         if ((ct = parse_mime (file))) {
 361             set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
 362             *ctp++ = ct;
 363         } else {
 364             advise (NULL, "unable to parse message from file %s", file);
 365             status = NOTOK;
 366
 367             /* If there's an outfile, pass the input message unchanged, so the message won't
 368                get dropped from a pipeline. */
 369             if (outfile) {
 370                 /* Something went wrong.  Output might be expected, such as if this were run
 371                    as a filter.  Just copy the input to the output. */
 372                 if (copy_input_to_output (file, outfile) != OK) {
 373                     advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
 374                 }
 375             }
 376         }
 377     } else {
 378         /*
 379          * message(s) are coming from a folder
 380          */
 381         CT ct;
 382
 383         if (! msgs.size) {
 384             app_msgarg(&msgs, "cur");
 385         }
 386         if (! folder) {
 387             folder = getfolder (1);
 388         }
 389         maildir = m_maildir (folder);
 390
 391         if (chdir (maildir) == NOTOK) {
 392             adios (maildir, "unable to change directory to");
 393         }
 394
 395         /* read folder and create message structure */
 396         if (! (mp = folder_read (folder, 1))) {
 397             adios (NULL, "unable to read folder %s", folder);
 398         }
 399
 400         /* check for empty folder */
 401         if (mp->nummsg == 0) {
 402             adios (NULL, "no messages in %s", folder);
 403         }
 404
 405         /* parse all the message ranges/sequences and set SELECTED */
 406         for (msgnum = 0; msgnum < msgs.size; msgnum++)
 407             if (! m_convert (mp, msgs.msgs[msgnum])) {
 408                 done (1);
 409             }
 410         seq_setprev (mp);       /* set the previous-sequence */
 411
 412         if (! (cts =
 413                (CT *) mh_xcalloc ((size_t) (mp->numsel + 1), sizeof *cts))) {
 414             adios (NULL, "out of memory");
 415         }
 416         ctp = cts;
 417
 418         for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
 419             if (is_selected(mp, msgnum)) {
 420                 char *msgnam;
 421
 422                 msgnam = m_name (msgnum);
 423                 if ((ct = parse_mime (msgnam))) {
 424                     set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
 425                     *ctp++ = ct;
 426                 } else {
 427                     advise (NULL, "unable to parse message %s", msgnam);
 428                     status = NOTOK;
 429
 430                     /* If there's an outfile, pass the input message unchanged, so the message won't
 431                        get dropped from a pipeline. */
 432                     if (outfile) {
 433                         /* Something went wrong.  Output might be expected, such as if this were run
 434                            as a filter.  Just copy the input to the output. */
 435                         const char *input_filename = path (msgnam, TFILE);
 436
 437                         if (copy_input_to_output (input_filename, outfile) != OK) {
 438                             advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
 439                         }
 440                     }
 441                 }
 442             }
 443         }
 444
 445         if (chgflag) {
 446             seq_setcur (mp, mp->hghsel);  /* update current message */
 447         }
 448         seq_save (mp);                    /* synchronize sequences  */
 449         context_replace (pfolder, folder);/* update current folder  */
 450         context_save ();                  /* save the context file  */
 451     }
 452
 453     if (*cts) {
 454         for (ctp = cts; *ctp; ++ctp) {
 455             status += mhfixmsgsbr (ctp, &fx, outfile);
 456
 457             if (using_stdin) {
 458                 (void) m_unlink (file);
 459
 460                 if (! outfile) {
 461                     /* Just calling m_backup() unlinks the backup file. */
 462                     (void) m_backup (file);
 463                 }
 464             }
 465         }
 466     } else {
 467         status = 1;
 468     }
 469
 470     if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
 471     free (outfile);
 472     free (file);
 473     free (folder);
 474     free (arguments);
 475
 476     /* done is freects_done, which will clean up all of cts. */
 477     done (status);
 478     return NOTOK;
 479 }
 480
 481
 482 int
 483 mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
 484     /* Store input filename in case one of the transformations, i.e.,
 485        fix_boundary(), rewrites to a tmp file. */
 486     char *input_filename = add ((*ctp)->c_file, NULL);
 487     int modify_inplace = 0;
 488     int message_mods = 0;
 489     int status = OK;
 490
 491     if (outfile == NULL) {
 492         modify_inplace = 1;
 493
 494         if ((*ctp)->c_file) {
 495             char *tempfile;
 496             if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
 497                 adios (NULL, "unable to create temporary file in %s",
 498                        get_temp_dir());
 499             }
 500             outfile = add (tempfile, NULL);
 501         } else {
 502             adios (NULL, "missing both input and output filenames\n");
 503         }
 504     }
 505
 506     reverse_alternative_parts (*ctp);
 507     status = fix_always (*ctp, &message_mods);
 508     if (status == OK  &&  fx->fixboundary) {
 509         status = fix_boundary (ctp, &message_mods);
 510     }
 511     if (status == OK  && fx->fixtypes != NULL) {
 512         status = fix_types (*ctp, fx->fixtypes, &message_mods);
 513     }
 514     if (status == OK  &&  fx->fixcte) {
 515         status = fix_multipart_cte (*ctp, &message_mods);
 516     }
 517     if (status == OK  &&  fx->reformat) {
 518         status =
 519             ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
 520     }
 521     if (status == OK  &&  fx->decodetext) {
 522         status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes, &message_mods);
 523     }
 524     if (status == OK  &&  fx->textcharset != NULL) {
 525         status = convert_charsets (*ctp, fx->textcharset, &message_mods);
 526     }
 527
 528     if (status == OK  &&  ! (*ctp)->c_umask) {
 529         /* Set the umask for the contents file.  This currently
 530            isn't used but just in case it is in the future. */
 531         struct stat st;
 532
 533         if (stat ((*ctp)->c_file, &st) != NOTOK) {
 534             (*ctp)->c_umask = ~(st.st_mode & 0777);
 535         } else {
 536             (*ctp)->c_umask = ~m_gmprot();
 537         }
 538     }
 539
 540     /*
 541      * Write the content to a file
 542      */
 543     if (status == OK) {
 544         status = write_content (*ctp, input_filename, outfile, modify_inplace,
 545                                 message_mods);
 546     } else if (! modify_inplace) {
 547         /* Something went wrong.  Output might be expected, such
 548            as if this were run as a filter.  Just copy the input
 549            to the output. */
 550         if (copy_input_to_output (input_filename, outfile) != OK) {
 551             advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
 552         }
 553     }
 554
 555     if (modify_inplace) {
 556         if (status != OK) { (void) m_unlink (outfile); }
 557         free (outfile);
 558         outfile = NULL;
 559     }
 560
 561     free (input_filename);
 562
 563     return status;
 564 }
 565
 566
 567 /* Copy input message to output.  Assumes not modifying in place, so this
 568    might be running as part of a pipeline. */
 569 static int
 570 copy_input_to_output (const char *input_filename, const char *output_filename) {
 571     int in = open (input_filename, O_RDONLY);
 572     int out = strcmp (output_filename, "-")
 573         ?  open (output_filename, O_WRONLY | O_CREAT, m_gmprot ())
 574         :  STDOUT_FILENO;
 575     int status = OK;
 576
 577     if (in != -1  &&  out != -1) {
 578         cpydata (in, out, input_filename, output_filename);
 579     } else {
 580         status = NOTOK;
 581     }
 582
 583     close (out);
 584     close (in);
 585
 586     return status;
 587 }
 588
 589
 590 static int
 591 fix_boundary (CT *ct, int *message_mods) {
 592     struct multipart *mp;
 593     int status = OK;
 594
 595     if (ct  &&  (*ct)->c_type == CT_MULTIPART  &&  bogus_mp_content) {
 596         mp = (struct multipart *) (*ct)->c_ctparams;
 597
 598         /*
 599          * 1) Get boundary at end of part.
 600          * 2) Get boundary at beginning of part and compare to the end-of-part
 601          *    boundary.
 602          * 3) Write out contents of ct to tmp file, replacing boundary in
 603          *    header with boundary from part.  Set c_unlink to 1.
 604          * 4) Free ct.
 605          * 5) Call parse_mime() on the tmp file, replacing ct.
 606          */
 607
 608         if (mp  &&  mp->mp_start) {
 609             char *part_boundary;
 610
 611             if (get_multipart_boundary (*ct, &part_boundary) == OK) {
 612                 char *fixed;
 613
 614                 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
 615                     if (replace_boundary (*ct, fixed, part_boundary) == OK) {
 616                         char *filename = add ((*ct)->c_file, NULL);
 617                         CT fixed_ct;
 618
 619                         free_content (*ct);
 620                         if ((fixed_ct = parse_mime (fixed))) {
 621                             *ct = fixed_ct;
 622                             (*ct)->c_unlink = 1;
 623
 624                             ++*message_mods;
 625                             if (verbosw) {
 626                                 report (NULL, NULL, filename,
 627                                         "fix multipart boundary");
 628                             }
 629                         } else {
 630                             *ct = NULL;
 631                             advise (NULL, "unable to parse fixed part");
 632                             status = NOTOK;
 633                         }
 634                         free (filename);
 635                     } else {
 636                         advise (NULL, "unable to replace broken boundary");
 637                         status = NOTOK;
 638                     }
 639                 } else {
 640                     advise (NULL, "unable to create temporary file in %s",
 641                             get_temp_dir());
 642                     status = NOTOK;
 643                 }
 644
 645                 free (part_boundary);
 646             } else {
 647                 /* Couldn't fix the boundary.  Report failure so that mhfixmsg
 648                    doesn't modify the message. */
 649                 status = NOTOK;
 650             }
 651         } else {
 652             /* No multipart struct, even though the content type is
 653                CT_MULTIPART.  Report failure so that mhfixmsg doesn't modify
 654                the message. */
 655             status = NOTOK;
 656         }
 657     }
 658
 659     return status;
 660 }
 661
 662
 663 static int
 664 get_multipart_boundary (CT ct, char **part_boundary) {
 665     char buffer[BUFSIZ];
 666     char *end_boundary = NULL;
 667     off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
 668         ?  (off_t) (ct->c_end - sizeof buffer)
 669         :  (off_t) ct->c_begin;
 670     size_t bytes_read;
 671     int status = OK;
 672
 673     /* This will fail if the boundary spans fread() calls.  BUFSIZ should
 674        be big enough, even if it's just 1024, to make that unlikely. */
 675
 676     /* free_content() will close ct->c_fp. */
 677     if (! ct->c_fp  &&  (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
 678         advise (ct->c_file, "unable to open for reading");
 679         return NOTOK;
 680     }
 681
 682     /* Get boundary at end of multipart. */
 683     while (begin >= (off_t) ct->c_begin) {
 684         fseeko (ct->c_fp, begin, SEEK_SET);
 685         while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
 686             char *cp = rfind_str (buffer, bytes_read, "--");
 687
 688             if (cp) {
 689                 char *end;
 690
 691                 /* Trim off trailing "--" and anything beyond. */
 692                 *cp-- = '\0';
 693                 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
 694                     if (strlen (end) > 3  &&  *end++ == '\n'  &&
 695                         *end++ == '-'  &&  *end++ == '-') {
 696                         end_boundary = add (end, NULL);
 697                         break;
 698                     }
 699                 }
 700             }
 701         }
 702
 703         if (! end_boundary  &&  begin > (off_t) (ct->c_begin + sizeof buffer)) {
 704             begin -= sizeof buffer;
 705         } else {
 706             break;
 707         }
 708     }
 709
 710     /* Get boundary at beginning of multipart. */
 711     if (end_boundary) {
 712         fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
 713         while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
 714             if (bytes_read >= strlen (end_boundary)) {
 715                 char *cp = find_str (buffer, bytes_read, end_boundary);
 716
 717                 if (cp  &&  cp - buffer >= 2  &&  *--cp == '-'  &&
 718                     *--cp == '-'  &&  (cp > buffer  &&  *--cp == '\n')) {
 719                     status = OK;
 720                     break;
 721                 }
 722             } else {
 723                 /* The start and end boundaries didn't match, or the
 724                    start boundary doesn't begin with "\n--" (or "--"
 725                    if at the beginning of buffer).  Keep trying. */
 726                 status = NOTOK;
 727             }
 728         }
 729     } else {
 730         status = NOTOK;
 731     }
 732
 733     if (status == OK) {
 734         *part_boundary = end_boundary;
 735     } else {
 736         *part_boundary = NULL;
 737         free (end_boundary);
 738     }
 739
 740     return status;
 741 }
 742
 743
 744 /* Open and copy ct->c_file to file, replacing the multipart boundary. */
 745 static int
 746 replace_boundary (CT ct, char *file, char *boundary) {
 747     FILE *fpin, *fpout;
 748     int compnum, state;
 749     char buf[BUFSIZ], name[NAMESZ];
 750     char *np, *vp;
 751     m_getfld_state_t gstate = 0;
 752     int status = OK;
 753
 754     if (ct->c_file == NULL) {
 755         advise (NULL, "missing input filename");
 756         return NOTOK;
 757     }
 758
 759     if ((fpin = fopen (ct->c_file, "r")) == NULL) {
 760         advise (ct->c_file, "unable to open for reading");
 761         return NOTOK;
 762     }
 763
 764     if ((fpout = fopen (file, "w")) == NULL) {
 765         fclose (fpin);
 766         advise (file, "unable to open for writing");
 767         return NOTOK;
 768     }
 769
 770     for (compnum = 1;;) {
 771         int bufsz = (int) sizeof buf;
 772
 773         switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
 774         case FLD:
 775         case FLDPLUS:
 776             compnum++;
 777
 778             /* get copies of the buffers */
 779             np = add (name, NULL);
 780             vp = add (buf, NULL);
 781
 782             /* if necessary, get rest of field */
 783             while (state == FLDPLUS) {
 784                 bufsz = sizeof buf;
 785                 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
 786                 vp = add (buf, vp);     /* add to previous value */
 787             }
 788
 789             if (strcasecmp (TYPE_FIELD, np)) {
 790                 fprintf (fpout, "%s:%s", np, vp);
 791             } else {
 792                 char *new_ctline, *new_params;
 793
 794                 replace_param(&ct->c_ctinfo.ci_first_pm,
 795                               &ct->c_ctinfo.ci_last_pm, "boundary",
 796                               boundary, 0);
 797
 798                 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
 799                                     ct->c_ctinfo.ci_subtype, NULL);
 800                 new_params = output_params(strlen(TYPE_FIELD) +
 801                                            strlen(new_ctline) + 1,
 802                                            ct->c_ctinfo.ci_first_pm, NULL, 0);
 803                 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
 804                          new_params ? new_params : "");
 805                 free(new_ctline);
 806                 if (new_params) {
 807                     free(new_params);
 808                 }
 809             }
 810
 811             free (vp);
 812             free (np);
 813
 814             continue;
 815
 816         case BODY:
 817             fputs ("\n", fpout);
 818             /* buf will have a terminating NULL, skip it. */
 819             if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
 820                 advise (file, "fwrite");
 821             }
 822             continue;
 823
 824         case FILEEOF:
 825             break;
 826
 827         case LENERR:
 828         case FMTERR:
 829             advise (NULL, "message format error in component #%d", compnum);
 830             status = NOTOK;
 831             break;
 832
 833         default:
 834             advise (NULL, "getfld() returned %d", state);
 835             status = NOTOK;
 836             break;
 837         }
 838
 839         break;
 840     }
 841
 842     m_getfld_state_destroy (&gstate);
 843     fclose (fpout);
 844     fclose (fpin);
 845
 846     return status;
 847 }
 848
 849
 850 static int
 851 fix_types (CT ct, svector_t fixtypes, int *message_mods) {
 852     int status = OK;
 853
 854     switch (ct->c_type) {
 855     case CT_MULTIPART: {
 856         struct multipart *m = (struct multipart *) ct->c_ctparams;
 857         struct part *part;
 858
 859         for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
 860             status = fix_types (part->mp_part, fixtypes, message_mods);
 861         }
 862         break;
 863     }
 864
 865     case CT_MESSAGE:
 866         if (ct->c_subtype == MESSAGE_EXTERNAL) {
 867             struct exbody *e = (struct exbody *) ct->c_ctparams;
 868
 869             status = fix_types (e->eb_content, fixtypes, message_mods);
 870         }
 871         break;
 872
 873     default: {
 874         char **typep, *type;
 875
 876         if (ct->c_ctinfo.ci_type  &&  ct->c_ctinfo.ci_subtype) {
 877             for (typep = svector_strs (fixtypes);
 878                  typep && (type = *typep);
 879                  ++typep) {
 880                 char *type_subtype =
 881                     concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
 882                             NULL);
 883
 884                 if (! strcasecmp (type, type_subtype)  &&
 885                     decode_part (ct) == OK  &&
 886                     ct->c_cefile.ce_file != NULL) {
 887                     char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
 888                     char *cp;
 889
 890                     if ((cp = strchr (ct_type_subtype, ';'))) {
 891                         /* Truncate to remove any parameter list from
 892                            mime_type () result. */
 893                         *cp = '\0';
 894                     }
 895
 896                     if (strcasecmp (type, ct_type_subtype)) {
 897                         char *ct_type, *ct_subtype;
 898                         HF hf;
 899
 900                         /* The Content-Type header does not match the
 901                            content, so update these struct Content
 902                            fields to match:
 903                            * c_type, c_subtype
 904                            * c_ctinfo.ci_type, c_ctinfo.ci_subtype
 905                            * c_ctline
 906                            */
 907                         /* Extract type and subtype from type/subtype. */
 908                         ct_type = getcpy (ct_type_subtype);
 909                         if ((cp = strchr (ct_type, '/'))) {
 910                             *cp = '\0';
 911                             ct_subtype = getcpy (++cp);
 912                         } else {
 913                             advise (NULL, "missing / in MIME type of %s %s",
 914                                     ct->c_file, ct->c_partno);
 915                             free (ct_type);
 916                             return NOTOK;
 917                         }
 918
 919                         ct->c_type = ct_str_type (ct_type);
 920                         ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
 921
 922                         free (ct->c_ctinfo.ci_type);
 923                         ct->c_ctinfo.ci_type = ct_type;
 924                         free (ct->c_ctinfo.ci_subtype);
 925                         ct->c_ctinfo.ci_subtype = ct_subtype;
 926                         if (! replace_substring (&ct->c_ctline, type,
 927                                                  ct_type_subtype)) {
 928                             advise (NULL, "did not find %s in %s",
 929                                     type, ct->c_ctline);
 930                         }
 931
 932                         /* Update Content-Type header field. */
 933                         for (hf = ct->c_first_hf; hf; hf = hf->next) {
 934                             if (! strcasecmp (TYPE_FIELD, hf->name)) {
 935                                 if (replace_substring (&hf->value, type,
 936                                                        ct_type_subtype)) {
 937                                     ++*message_mods;
 938                                     if (verbosw) {
 939                                         report (NULL, ct->c_partno, ct->c_file,
 940                                                 "change Content-Type in header "
 941                                                 "from %s to %s",
 942                                                 type, ct_type_subtype);
 943                                     }
 944                                     break;
 945                                 } else {
 946                                     advise (NULL, "did not find %s in %s",
 947                                             type, hf->value);
 948                                 }
 949                             }
 950                         }
 951                     }
 952                     free (ct_type_subtype);
 953                 }
 954                 free (type_subtype);
 955             }
 956         }
 957     }}
 958
 959     return status;
 960 }
 961
 962 char *
 963 replace_substring (char **str, const char *old, const char *new) {
 964     char *cp;
 965
 966     if ((cp = strstr (*str, old))) {
 967         char *remainder = cp + strlen (old);
 968         char *prefix, *new_str;
 969
 970         if (cp - *str) {
 971             prefix = getcpy (*str);
 972             *(prefix + (cp - *str)) = '\0';
 973             new_str = concat (prefix, new, remainder, NULL);
 974             free (prefix);
 975         } else {
 976             new_str = concat (new, remainder, NULL);
 977         }
 978
 979         free (*str);
 980
 981         return *str = new_str;
 982     } else {
 983         return NULL;
 984     }
 985 }
 986
 987 /*
 988  * Remove a name=value parameter, given just its name, from a header value.
 989  */
 990 char *
 991 remove_parameter (char *str, const char *name) {
 992     /* It looks to me, based on the BNF in RFC 2045, than there can't
 993        be whitespace betwwen the parameter name and the "=", or
 994        between the "=" and the parameter value. */
 995     char *param_name = concat (name, "=", NULL);
 996     char *cp;
 997
 998     if ((cp = strstr (str, param_name))) {
 999         char *start, *end;
1000         size_t count = 1;
1001
1002         /* Remove any leading spaces, before the parameter name. */
1003         for (start = cp;
1004              start > str && isspace ((unsigned char) *(start-1));
1005              --start) {
1006             continue;
1007         }
1008         /* Remove a leading semicolon. */
1009         if (start > str  &&  *(start-1) == ';') { --start; }
1010
1011         end = cp + strlen (name) + 1;
1012         if (*end == '"') {
1013             /* Skip past the quoted value, and then the final quote. */
1014             for (++end ; *end  &&  *end != '"'; ++end) { continue; }
1015             ++end;
1016         } else {
1017             /* Skip past the value. */
1018             for (++end ; *end  &&  ! isspace ((unsigned char) *end); ++end) {}
1019         }
1020
1021         /* Count how many characters need to be moved.  Include
1022            trailing null, which is accounted for by the
1023            initialization of count to 1. */
1024         for (cp = end; *cp; ++cp) { ++count; }
1025         (void) memmove (start, end, count);
1026     }
1027
1028     free (param_name);
1029
1030     return str;
1031 }
1032
1033 static int
1034 fix_multipart_cte (CT ct, int *message_mods) {
1035     int status = OK;
1036
1037     if (ct->c_type == CT_MULTIPART) {
1038         struct multipart *m;
1039         struct part *part;
1040
1041         if (ct->c_encoding != CE_7BIT  &&  ct->c_encoding != CE_8BIT  &&
1042             ct->c_encoding != CE_BINARY) {
1043             HF hf;
1044
1045             for (hf = ct->c_first_hf; hf; hf = hf->next) {
1046                 char *name = hf->name;
1047                 for (; *name && isspace ((unsigned char) *name); ++name) {
1048                     continue;
1049                 }
1050
1051                 if (! strncasecmp (name, ENCODING_FIELD,
1052                                    strlen (ENCODING_FIELD))) {
1053                     char *prefix = "Nmh-REPLACED-INVALID-";
1054                     HF h = mh_xmalloc (sizeof *h);
1055
1056                     h->name = add (hf->name, NULL);
1057                     h->hf_encoding = hf->hf_encoding;
1058                     h->next = hf->next;
1059                     hf->next = h;
1060
1061                     /* Retain old header but prefix its name. */
1062                     free (hf->name);
1063                     hf->name = concat (prefix, h->name, NULL);
1064
1065                     ++*message_mods;
1066                     if (verbosw) {
1067                         char *encoding = cpytrim (hf->value);
1068                         report (NULL, ct->c_partno, ct->c_file,
1069                                 "replace Content-Transfer-Encoding of %s "
1070                                 "with 8 bit", encoding);
1071                         free (encoding);
1072                     }
1073
1074                     h->value = add (" 8bit\n", NULL);
1075
1076                     /* Don't need to warn for multiple C-T-E header
1077                        fields, parse_mime() already does that.  But
1078                        if there are any, fix them all as necessary. */
1079                     hf = h;
1080                 }
1081             }
1082
1083             set_ce (ct, CE_8BIT);
1084         }
1085
1086         m = (struct multipart *) ct->c_ctparams;
1087         for (part = m->mp_parts; part; part = part->mp_next) {
1088             if (fix_multipart_cte (part->mp_part, message_mods) != OK) {
1089                 status = NOTOK;
1090                 break;
1091             }
1092         }
1093     }
1094
1095     return status;
1096 }
1097
1098
1099 static int
1100 set_ce (CT ct, int encoding) {
1101     const char *ce = ce_str (encoding);
1102     const struct str2init *ctinit = get_ce_method (ce);
1103
1104     if (ctinit) {
1105         char *cte = concat (" ", ce, "\n", NULL);
1106         int found_cte = 0;
1107         HF hf;
1108         /* Decoded contents might be in ct->c_cefile.ce_file, if the
1109            caller is decode_text_parts ().  Save because we'll
1110            overwrite below. */
1111         struct cefile decoded_content_info = ct->c_cefile;
1112
1113         ct->c_encoding = encoding;
1114
1115         ct->c_ctinitfnx = ctinit->si_init;
1116         /* This will assign ct->c_cefile with an all-0 struct, which
1117            is what we want. */
1118         (*ctinit->si_init) (ct);
1119         /* After returning, the caller should set
1120            ct->c_cefile.ce_file to the name of the file containing
1121            the contents. */
1122
1123         /* Restore the cefile. */
1124         ct->c_cefile = decoded_content_info;
1125
1126         /* Update/add Content-Transfer-Encoding header field. */
1127         for (hf = ct->c_first_hf; hf; hf = hf->next) {
1128             if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1129                 found_cte = 1;
1130                 free (hf->value);
1131                 hf->value = cte;
1132             }
1133         }
1134         if (! found_cte) {
1135             add_header (ct, add (ENCODING_FIELD, NULL), cte);
1136         }
1137
1138         /* Update c_celine.  It's used only by mhlist -debug. */
1139         free (ct->c_celine);
1140         ct->c_celine = add (cte, NULL);
1141
1142         return OK;
1143     } else {
1144         return NOTOK;
1145     }
1146 }
1147
1148
1149 /* Make sure each text part has a corresponding text/plain part. */
1150 static int
1151 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
1152     int status = OK;
1153
1154     switch ((*ct)->c_type) {
1155     case CT_TEXT: {
1156         /* Nothing to do for text/plain. */
1157         if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1158
1159         if (parent  &&  parent->c_type == CT_MULTIPART  &&
1160             parent->c_subtype == MULTI_ALTERNATE) {
1161             int new_subpart_number = 1;
1162             int has_text_plain =
1163                 find_textplain_sibling (parent, replacetextplain,
1164                                         &new_subpart_number);
1165
1166             if (! has_text_plain) {
1167                 /* Parent is a multipart/alternative.  Insert a new
1168                    text/plain subpart. */
1169                 const int inserted =
1170                     insert_new_text_plain_part (*ct, new_subpart_number,
1171                                                 parent);
1172                 if (inserted) {
1173                     ++*message_mods;
1174                     if (verbosw) {
1175                         report (NULL, parent->c_partno, parent->c_file,
1176                                 "insert text/plain part");
1177                     }
1178                 } else {
1179                     status = NOTOK;
1180                 }
1181             }
1182         } else if (parent  &&  parent->c_type == CT_MULTIPART  &&
1183             parent->c_subtype == MULTI_RELATED) {
1184             char *type_subtype =
1185                 concat ((*ct)->c_ctinfo.ci_type, "/",
1186                         (*ct)->c_ctinfo.ci_subtype, NULL);
1187             const char *parent_type =
1188                 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1189             int new_subpart_number = 1;
1190             int has_text_plain = 0;
1191
1192             /* Have to do string comparison on the subtype because we
1193                don't enumerate all of them in c_subtype values.
1194                parent_type will be NULL if the multipart/related part
1195                doesn't have a type parameter.  The type parameter must
1196                be specified according to RFC 2387 Sec. 3.1 but not all
1197                messages comply. */
1198             if (parent_type  &&  strcasecmp (type_subtype, parent_type) == 0) {
1199                 /* The type of this part matches the root type of the
1200                    parent multipart/related.  Look to see if there's
1201                    text/plain sibling. */
1202                 has_text_plain =
1203                     find_textplain_sibling (parent, replacetextplain,
1204                                             &new_subpart_number);
1205             }
1206
1207             free (type_subtype);
1208
1209             if (! has_text_plain) {
1210                 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1211                 struct part *part;
1212                 int siblings = 0;
1213
1214                 for (part = mp->mp_parts; part; part = part->mp_next) {
1215                     if (*ct != part->mp_part) {
1216                         ++siblings;
1217                     }
1218                 }
1219
1220                 if (siblings) {
1221                     /* Parent is a multipart/related.  Insert a new
1222                        text/plain subpart in a new multipart/alternative. */
1223                     if (insert_into_new_mp_alt (ct, message_mods)) {
1224                         /* Not an error if text/plain couldn't be added. */
1225                     }
1226                 } else {
1227                     /* There are no siblings, so insert a new text/plain
1228                        subpart, and change the parent type from
1229                        multipart/related to multipart/alternative. */
1230                     const int inserted =
1231                         insert_new_text_plain_part (*ct, new_subpart_number,
1232                                                     parent);
1233
1234                     if (inserted) {
1235                         HF hf;
1236
1237                         parent->c_subtype = MULTI_ALTERNATE;
1238                         parent->c_ctinfo.ci_subtype = getcpy ("alternative");
1239                         if (! replace_substring (&parent->c_ctline, "/related",
1240                                                  "/alternative")) {
1241                             advise (NULL,
1242                                     "did not find multipart/related in %s",
1243                                     parent->c_ctline);
1244                         }
1245
1246                         /* Update Content-Type header field. */
1247                         for (hf = parent->c_first_hf; hf; hf = hf->next) {
1248                             if (! strcasecmp (TYPE_FIELD, hf->name)) {
1249                                 if (replace_substring (&hf->value, "/related",
1250                                                        "/alternative")) {
1251                                     ++*message_mods;
1252                                     if (verbosw) {
1253                                         report (NULL, parent->c_partno,
1254                                                 parent->c_file,
1255                                                 "insert text/plain part");
1256                                     }
1257
1258                                     /* Remove, e.g., type="text/html" from
1259                                        multipart/alternative. */
1260                                     remove_parameter (hf->value, "type");
1261                                     break;
1262                                 } else {
1263                                     advise (NULL, "did not find multipart/"
1264                                                   "related in header %s",
1265                                             hf->value);
1266                                 }
1267                             }
1268                         }
1269                     } else {
1270                         /* Not an error if text/plain couldn't be inserted. */
1271                     }
1272                 }
1273             }
1274         } else {
1275             if (insert_into_new_mp_alt (ct, message_mods)) {
1276                 status = NOTOK;
1277             }
1278         }
1279         break;
1280     }
1281
1282     case CT_MULTIPART: {
1283         struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1284         struct part *part;
1285
1286         for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1287             if ((*ct)->c_type == CT_MULTIPART) {
1288                 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1289                                             replacetextplain);
1290             }
1291         }
1292         break;
1293     }
1294
1295     case CT_MESSAGE:
1296         if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1297             struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1298
1299             status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1300                                         replacetextplain);
1301         }
1302         break;
1303     }
1304
1305     return status;
1306 }
1307
1308
1309 /* See if there is a sibling text/plain. */
1310 static int
1311 find_textplain_sibling (CT parent, int replacetextplain,
1312                         int *new_subpart_number) {
1313     struct multipart *mp = (struct multipart *) parent->c_ctparams;
1314     struct part *part, *prev;
1315     int has_text_plain = 0;
1316
1317     for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1318         ++*new_subpart_number;
1319         if (part->mp_part->c_type == CT_TEXT  &&
1320             part->mp_part->c_subtype == TEXT_PLAIN) {
1321             if (replacetextplain) {
1322                 struct part *old_part;
1323                 if (part == mp->mp_parts) {
1324                     old_part = mp->mp_parts;
1325                     mp->mp_parts = part->mp_next;
1326                 } else {
1327                     old_part = prev->mp_next;
1328                     prev->mp_next = part->mp_next;
1329                 }
1330                 if (verbosw) {
1331                     report (NULL, parent->c_partno, parent->c_file,
1332                             "remove text/plain part %s",
1333                             old_part->mp_part->c_partno);
1334                 }
1335                 free_content (old_part->mp_part);
1336                 free (old_part);
1337             } else {
1338                 has_text_plain = 1;
1339             }
1340             break;
1341         }
1342         prev = part;
1343     }
1344
1345     return has_text_plain;
1346 }
1347
1348
1349 static int
1350 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
1351     struct multipart *mp = (struct multipart *) parent->c_ctparams;
1352     struct part *new_part = mh_xmalloc (sizeof *new_part);
1353
1354     if ((new_part->mp_part = build_text_plain_part (ct))) {
1355         char buffer[16];
1356         snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1357
1358         new_part->mp_next = mp->mp_parts;
1359         mp->mp_parts = new_part;
1360         new_part->mp_part->c_partno =
1361             concat (parent->c_partno ? parent->c_partno : "1", ".",
1362                     buffer, NULL);
1363
1364         return 1;
1365     } else {
1366         free_content (new_part->mp_part);
1367         free (new_part);
1368
1369         return 0;
1370     }
1371 }
1372
1373
1374 static CT
1375 build_text_plain_part (CT encoded_part) {
1376     CT tp_part = divide_part (encoded_part);
1377     char *tmp_plain_file = NULL;
1378
1379     if (decode_part (tp_part) == OK) {
1380         /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1381            contains the decoded contents.  And the decoding function, such
1382            as openQuoted, will have set ...->ce_unlink to 1 so that it will
1383            be unlinked by free_content (). */
1384         char *tempfile;
1385
1386         if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1387             advise (NULL, "unable to create temporary file in %s",
1388                     get_temp_dir());
1389         } else {
1390             tmp_plain_file = add (tempfile, NULL);
1391             if (reformat_part (tp_part, tmp_plain_file,
1392                                tp_part->c_ctinfo.ci_type,
1393                                tp_part->c_ctinfo.ci_subtype,
1394                                tp_part->c_type) == OK) {
1395                 return tp_part;
1396             }
1397         }
1398     }
1399
1400     free_content (tp_part);
1401     if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1402     free (tmp_plain_file);
1403
1404     return NULL;
1405 }
1406
1407
1408 /* Slip new text/plain part into a new multipart/alternative. */
1409 static int
1410 insert_into_new_mp_alt (CT *ct, int *message_mods) {
1411     CT tp_part = build_text_plain_part (*ct);
1412     int status = OK;
1413
1414     if (tp_part) {
1415         CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1416                                          MULTI_ALTERNATE);
1417         if (mp_alt) {
1418             struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1419
1420             if (mp  &&  mp->mp_parts) {
1421                 mp->mp_parts->mp_part = tp_part;
1422                 /* Make the new multipart/alternative the parent. */
1423                 *ct = mp_alt;
1424
1425                 ++*message_mods;
1426                 if (verbosw) {
1427                     report (NULL, (*ct)->c_partno, (*ct)->c_file,
1428                             "insert text/plain part");
1429                 }
1430             } else {
1431                 free_content (tp_part);
1432                 free_content (mp_alt);
1433                 status = NOTOK;
1434             }
1435         } else {
1436             status = NOTOK;
1437         }
1438     } else {
1439         /* Not an error if text/plain couldn't be built. */
1440     }
1441
1442     return status;
1443 }
1444
1445 static CT
1446 divide_part (CT ct) {
1447     CT new_part;
1448
1449     if ((new_part = (CT) mh_xcalloc (1, sizeof *new_part)) == NULL)
1450         adios (NULL, "out of memory");
1451
1452     /* Just copy over what is needed for decoding.  c_vrsn and
1453        c_celine aren't necessary. */
1454     new_part->c_file = add (ct->c_file, NULL);
1455     new_part->c_begin = ct->c_begin;
1456     new_part->c_end = ct->c_end;
1457     copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1458     new_part->c_type = ct->c_type;
1459     new_part->c_cefile = ct->c_cefile;
1460     new_part->c_encoding = ct->c_encoding;
1461     new_part->c_ctinitfnx = ct->c_ctinitfnx;
1462     new_part->c_ceopenfnx = ct->c_ceopenfnx;
1463     new_part->c_ceclosefnx = ct->c_ceclosefnx;
1464     new_part->c_cesizefnx = ct->c_cesizefnx;
1465
1466     /* c_ctline is used by reformat__part(), so it can preserve
1467        anything after the type/subtype. */
1468     new_part->c_ctline = add (ct->c_ctline, NULL);
1469
1470     return new_part;
1471 }
1472
1473
1474 static void
1475 copy_ctinfo (CI dest, CI src) {
1476     PM s_pm, d_pm;
1477
1478     dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL;
1479     dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL;
1480
1481     for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1482         d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1483                          s_pm->pm_value, 0);
1484         if (s_pm->pm_charset)
1485             d_pm->pm_charset = getcpy(s_pm->pm_charset);
1486         if (s_pm->pm_lang)
1487             d_pm->pm_lang = getcpy(s_pm->pm_lang);
1488     }
1489
1490     dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL;
1491     dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL;
1492 }
1493
1494
1495 static int
1496 decode_part (CT ct) {
1497     char *tmp_decoded;
1498     int status;
1499     char *tempfile;
1500
1501     if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1502         adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1503     }
1504     tmp_decoded = add (tempfile, NULL);
1505     /* The following call will load ct->c_cefile.ce_file with the tmp
1506        filename of the decoded content.  tmp_decoded will contain the
1507        encoded output, get rid of that. */
1508     status = output_message (ct, tmp_decoded);
1509     (void) m_unlink (tmp_decoded);
1510     free (tmp_decoded);
1511
1512     return status;
1513 }
1514
1515
1516 /* Some of the arguments aren't really needed now, but maybe will
1517    be in the future for other than text types. */
1518 static int
1519 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1520     int output_subtype, output_encoding;
1521     char *cp, *cf;
1522     int status;
1523
1524     /* Hacky:  this redirects the output from whatever command is used
1525        to show the part to a file.  So, the user can't have any output
1526        redirection in that command.
1527        Could show_multi() in mhshowsbr.c avoid this? */
1528
1529     /* Check for invo_name-format-type/subtype. */
1530     if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1531         if (verbosw) {
1532             advise (NULL, "Don't know how to convert %s, there is no "
1533                     "%s-format-%s/%s profile entry",
1534                     ct->c_file, invo_name, type, subtype);
1535         }
1536         return NOTOK;
1537     } else {
1538         if (strchr (cf, '>')) {
1539             advise (NULL, "'>' prohibited in \"%s\",\nplease fix your "
1540                     "%s-format-%s/%s profile entry", cf, invo_name, type,
1541                     subtype ? subtype : "");
1542
1543             return NOTOK;
1544         }
1545     }
1546
1547     cp = concat (cf, " >", file, NULL);
1548     status = show_content_aux (ct, 0, cp, NULL, NULL);
1549     free (cp);
1550
1551     /* Unlink decoded content tmp file and free its filename to avoid
1552        leaks.  The file stream should already have been closed. */
1553     if (ct->c_cefile.ce_unlink) {
1554         (void) m_unlink (ct->c_cefile.ce_file);
1555         free (ct->c_cefile.ce_file);
1556         ct->c_cefile.ce_file = NULL;
1557         ct->c_cefile.ce_unlink = 0;
1558     }
1559
1560     if (c_type == CT_TEXT) {
1561         output_subtype = TEXT_PLAIN;
1562     } else {
1563         /* Set subtype to 0, which is always an UNKNOWN subtype. */
1564         output_subtype = 0;
1565     }
1566     output_encoding = charset_encoding (ct);
1567
1568     if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1569         ct->c_cefile.ce_file = file;
1570         ct->c_cefile.ce_unlink = 1;
1571     } else {
1572         ct->c_cefile.ce_unlink = 0;
1573         status = NOTOK;
1574     }
1575
1576     return status;
1577 }
1578
1579
1580 /* Identifies 7bit or 8bit content based on charset. */
1581 static int
1582 charset_encoding (CT ct) {
1583     char *ct_charset = content_charset (ct);
1584     int encoding = strcasecmp (ct_charset, "US-ASCII")  ?  CE_8BIT  :  CE_7BIT;
1585
1586     free (ct_charset);
1587
1588     return encoding;
1589 }
1590
1591
1592 static CT
1593 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1594     char *boundary_prefix = "----=_nmh-multipart";
1595     char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1596     char *boundary_indicator = "; boundary=";
1597     char *typename, *subtypename, *name;
1598     CT ct;
1599     struct part *p;
1600     struct multipart *m;
1601     const struct str2init *ctinit;
1602
1603     if ((ct = (CT) mh_xcalloc (1, sizeof *ct)) == NULL)
1604         adios (NULL, "out of memory");
1605
1606     /* Set up the multipart/alternative part.  These fields of *ct were
1607        initialized to 0 by mh_xcalloc():
1608        c_fp, c_unlink, c_begin, c_end,
1609        c_vrsn, c_ctline, c_celine,
1610        c_id, c_descr, c_dispo, c_partno,
1611        c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1612        c_cefile, c_encoding,
1613        c_digested, c_digest[16], c_ctexbody,
1614        c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1615        c_umask, c_rfc934,
1616        c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1617     */
1618
1619     ct->c_file = add (first_alt->c_file, NULL);
1620     ct->c_type = type;
1621     ct->c_subtype = subtype;
1622
1623     ctinit = get_ct_init (ct->c_type);
1624
1625     typename = ct_type_str (type);
1626     subtypename = ct_subtype_str (type, subtype);
1627
1628     {
1629         int serial = 0;
1630         int found_boundary = 1;
1631
1632         while (found_boundary  &&  serial < 1000000) {
1633             found_boundary = 0;
1634
1635             /* Ensure that the boundary doesn't appear in the decoded
1636                content. */
1637             if (new_part->c_cefile.ce_file) {
1638                 if ((found_boundary =
1639                      boundary_in_content (&new_part->c_cefile.ce_fp,
1640                                           new_part->c_cefile.ce_file,
1641                                           boundary)) == -1) {
1642                     free (ct);
1643                     return NULL;
1644                 }
1645             }
1646
1647             /* Ensure that the boundary doesn't appear in the encoded
1648                content. */
1649             if (! found_boundary  &&  new_part->c_file) {
1650                 if ((found_boundary = boundary_in_content (&new_part->c_fp,
1651                                                            new_part->c_file,
1652                                                            boundary)) == -1) {
1653                     free (ct);
1654                     return NULL;
1655                 }
1656             }
1657
1658             if (found_boundary) {
1659                 /* Try a slightly different boundary. */
1660                 char buffer2[16];
1661
1662                 free (boundary);
1663                 ++serial;
1664                 snprintf (buffer2, sizeof buffer2, "%d", serial);
1665                 boundary =
1666                     concat (boundary_prefix,
1667                             first_alt->c_partno ? first_alt->c_partno : "",
1668                             "-", buffer2,  NULL);
1669             }
1670         }
1671
1672         if (found_boundary) {
1673             advise (NULL, "giving up trying to find a unique boundary");
1674             free (ct);
1675             return NULL;
1676         }
1677     }
1678
1679     name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1680                    boundary, "\"", NULL);
1681
1682     /* Load c_first_hf and c_last_hf. */
1683     transfer_noncontent_headers (first_alt, ct);
1684     add_header (ct, add (TYPE_FIELD, NULL), concat (name, "\n", NULL));
1685     free (name);
1686
1687     /* Load c_partno. */
1688     if (first_alt->c_partno) {
1689         ct->c_partno = add (first_alt->c_partno, NULL);
1690         free (first_alt->c_partno);
1691         first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1692         new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1693     } else {
1694         first_alt->c_partno = add ("1", NULL);
1695         new_part->c_partno = add ("2", NULL);
1696     }
1697
1698     if (ctinit) {
1699         ct->c_ctinfo.ci_type = add (typename, NULL);
1700         ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1701     }
1702
1703     add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1704               "boundary", boundary, 0);
1705
1706     p = (struct part *) mh_xmalloc (sizeof *p);
1707     p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next);
1708     p->mp_next->mp_next = NULL;
1709     p->mp_next->mp_part = first_alt;
1710
1711     if ((m = (struct multipart *) mh_xcalloc (1, sizeof (struct multipart))) ==
1712         NULL)
1713         adios (NULL, "out of memory");
1714     m->mp_start = concat (boundary, "\n", NULL);
1715     m->mp_stop = concat (boundary, "--\n", NULL);
1716     m->mp_parts = p;
1717     ct->c_ctparams = m;
1718
1719     free (boundary);
1720
1721     return ct;
1722 }
1723
1724
1725 /* Check that the boundary does not appear in the content. */
1726 static int
1727 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1728     char buffer[BUFSIZ];
1729     size_t bytes_read;
1730     int found_boundary = 0;
1731
1732     /* free_content() will close *fp if we fopen it here. */
1733     if (! *fp  &&  (*fp = fopen (file, "r")) == NULL) {
1734         advise (file, "unable to open %s for reading", file);
1735         return NOTOK;
1736     }
1737
1738     fseeko (*fp, 0L, SEEK_SET);
1739     while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1740         if (find_str (buffer, bytes_read, boundary)) {
1741             found_boundary = 1;
1742             break;
1743         }
1744     }
1745
1746     return found_boundary;
1747 }
1748
1749
1750 /* Remove all non-Content headers. */
1751 static void
1752 transfer_noncontent_headers (CT old, CT new) {
1753     HF hp, hp_prev;
1754
1755     hp_prev = hp = old->c_first_hf;
1756     while (hp) {
1757         HF next = hp->next;
1758
1759         if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1760             if (hp == old->c_last_hf) {
1761                 if (hp == old->c_first_hf) {
1762                     old->c_last_hf =  old->c_first_hf = NULL;
1763                 } else {
1764                     hp_prev->next = NULL;
1765                     old->c_last_hf =  hp_prev;
1766                 }
1767             } else {
1768                 if (hp == old->c_first_hf) {
1769                     old->c_first_hf = next;
1770                 } else {
1771                     hp_prev->next = next;
1772                 }
1773             }
1774
1775             /* Put node hp in the new CT. */
1776             if (new->c_first_hf == NULL) {
1777                 new->c_first_hf = hp;
1778             } else {
1779                 new->c_last_hf->next = hp;
1780             }
1781             new->c_last_hf = hp;
1782         } else {
1783             /* A Content- header, leave in old. */
1784             hp_prev = hp;
1785         }
1786
1787         hp = next;
1788     }
1789 }
1790
1791
1792 static int
1793 set_ct_type (CT ct, int type, int subtype, int encoding) {
1794     char *typename = ct_type_str (type);
1795     char *subtypename = ct_subtype_str (type, subtype);
1796     /* E.g, " text/plain" */
1797     char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1798     /* E.g, " text/plain\n" */
1799     char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1800     int found_content_type = 0;
1801     HF hf;
1802     const char *cp = NULL;
1803     char *ctline;
1804     int status;
1805
1806     /* Update/add Content-Type header field. */
1807     for (hf = ct->c_first_hf; hf; hf = hf->next) {
1808         if (! strcasecmp (TYPE_FIELD, hf->name)) {
1809             found_content_type = 1;
1810             free (hf->value);
1811             hf->value = (cp = strchr (ct->c_ctline, ';'))
1812                 ?  concat (type_subtypename, cp, "\n", NULL)
1813                 :  add (name_plus_nl, NULL);
1814         }
1815     }
1816     if (! found_content_type) {
1817         add_header (ct, add (TYPE_FIELD, NULL),
1818                     (cp = strchr (ct->c_ctline, ';'))
1819                     ?  concat (type_subtypename, cp, "\n", NULL)
1820                     :  add (name_plus_nl, NULL));
1821     }
1822
1823     /* Some of these might not be used, but set them anyway. */
1824     ctline = cp
1825         ?  concat (type_subtypename, cp, NULL)
1826         :  concat (type_subtypename, NULL);
1827     free (ct->c_ctline);
1828     ct->c_ctline = ctline;
1829     /* Leave other ctinfo members as they were. */
1830     free (ct->c_ctinfo.ci_type);
1831     ct->c_ctinfo.ci_type = add (typename, NULL);
1832     free (ct->c_ctinfo.ci_subtype);
1833     ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1834     ct->c_type = type;
1835     ct->c_subtype = subtype;
1836
1837     free (name_plus_nl);
1838     free (type_subtypename);
1839
1840     status = set_ce (ct, encoding);
1841
1842     return status;
1843 }
1844
1845
1846 static int
1847 decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mods) {
1848     int status = OK;
1849     int lf_line_endings = 0;
1850
1851     switch (ct->c_type) {
1852     case CT_MULTIPART: {
1853         struct multipart *m = (struct multipart *) ct->c_ctparams;
1854         struct part *part;
1855
1856         /* Should check to see if the body for this part is encoded?
1857            For now, it gets passed along as-is by InitMultiPart(). */
1858         for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
1859             status = decode_text_parts (part->mp_part, encoding, decodetypes, message_mods);
1860         }
1861         break;
1862     }
1863
1864     case CT_MESSAGE:
1865         if (ct->c_subtype == MESSAGE_EXTERNAL) {
1866             struct exbody *e = (struct exbody *) ct->c_ctparams;
1867
1868             status = decode_text_parts (e->eb_content, encoding, decodetypes, message_mods);
1869         }
1870         break;
1871
1872     default:
1873         if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
1874             break;
1875         }
1876
1877         lf_line_endings =
1878             ct->c_ctparams  &&  ((struct text *) ct->c_ctparams)->lf_line_endings;
1879
1880         switch (ct->c_encoding) {
1881         case CE_BASE64:
1882         case CE_QUOTED: {
1883             int ct_encoding;
1884
1885             if (decode_part (ct) == OK  &&  ct->c_cefile.ce_file) {
1886                 const char *reason = NULL;
1887
1888                 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
1889                     &&  encoding != CE_BINARY) {
1890                     /* The decoding isn't acceptable so discard it.
1891                        Leave status as OK to allow other transformations. */
1892                     if (verbosw) {
1893                         report (NULL, ct->c_partno, ct->c_file,
1894                                 "will not decode%s because it is binary (%s)",
1895                                 ct->c_partno  ?  ""
1896                                               :  ct->c_ctline  ?  ct->c_ctline
1897                                                                :  "",
1898                                 reason);
1899                     }
1900                     (void) m_unlink (ct->c_cefile.ce_file);
1901                     free (ct->c_cefile.ce_file);
1902                     ct->c_cefile.ce_file = NULL;
1903                 } else if (ct->c_encoding == CE_QUOTED  &&
1904                            ct_encoding == CE_8BIT  &&  encoding == CE_7BIT) {
1905                     /* The decoding isn't acceptable so discard it.
1906                        Leave status as OK to allow other transformations. */
1907                     if (verbosw) {
1908                         report (NULL, ct->c_partno, ct->c_file,
1909                                 "will not decode%s because it is 8bit",
1910                                 ct->c_partno  ?  ""
1911                                               :  ct->c_ctline  ?  ct->c_ctline
1912                                                                :  "");
1913                     }
1914                     (void) m_unlink (ct->c_cefile.ce_file);
1915                     free (ct->c_cefile.ce_file);
1916                     ct->c_cefile.ce_file = NULL;
1917                 } else {
1918                     int enc;
1919                     if (ct_encoding == CE_BINARY) {
1920                         enc = CE_BINARY;
1921                     } else if (ct_encoding == CE_8BIT  &&  encoding == CE_7BIT) {
1922                         enc = CE_QUOTED;
1923                     } else {
1924                         enc = charset_encoding (ct);
1925                     }
1926                     if (set_ce (ct, enc) == OK) {
1927                         ++*message_mods;
1928                         if (verbosw) {
1929                             report (NULL, ct->c_partno, ct->c_file, "decode%s",
1930                                     ct->c_ctline ? ct->c_ctline : "");
1931                         }
1932                         if (lf_line_endings) {
1933                             strip_crs (ct, message_mods);
1934                         }
1935                     } else {
1936                         status = NOTOK;
1937                     }
1938                 }
1939             } else {
1940                 status = NOTOK;
1941             }
1942             break;
1943         }
1944         case CE_8BIT:
1945         case CE_7BIT:
1946             if (lf_line_endings) {
1947                 strip_crs (ct, message_mods);
1948             }
1949             break;
1950         default:
1951             break;
1952         }
1953
1954         break;
1955     }
1956
1957     return status;
1958 }
1959
1960
1961 /* Determine if the part with type[/subtype] should be decoded, according to
1962    decodetypes (which came from the -decodetypes switch). */
1963 static int
1964 should_decode(const char *decodetypes, const char *type, const char *subtype) {
1965     /* Quick search for matching type[/subtype] in decodetypes:  bracket
1966        decodetypes with commas, then search for ,type, and ,type/subtype, in
1967        it. */
1968
1969     int found_match = 0;
1970     char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
1971     char *delimited_type = concat(",", type, ",", NULL);
1972
1973     if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
1974         found_match = 1;
1975     } else if (subtype != NULL) {
1976         char *delimited_type_subtype =
1977             concat(",", type, "/", subtype, ",", NULL);
1978
1979         if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
1980             found_match = 1;
1981         }
1982         free(delimited_type_subtype);
1983     }
1984
1985     free(delimited_type);
1986     free(delimited_decodetypes);
1987
1988     return found_match;
1989 }
1990
1991
1992 /* See if the decoded content is 7bit, 8bit, or binary.  It's binary
1993    if it has any NUL characters, a CR not followed by a LF, or lines
1994    greater than 998 characters in length.  If binary, reason is set
1995    to a string explaining why. */
1996 static int
1997 content_encoding (CT ct, const char **reason) {
1998     CE ce = &ct->c_cefile;
1999     int encoding = CE_7BIT;
2000
2001     if (ce->ce_file) {
2002         size_t line_len = 0;
2003         char buffer[BUFSIZ];
2004         size_t inbytes;
2005
2006         if (! ce->ce_fp  &&  (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2007             advise (ce->ce_file, "unable to open for reading");
2008             return CE_UNKNOWN;
2009         }
2010
2011         fseeko (ce->ce_fp, 0L, SEEK_SET);
2012         while (encoding != CE_BINARY  &&
2013                (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2014             char *cp;
2015             size_t i;
2016             int last_char_was_cr = 0;
2017
2018             for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2019                 if (*cp == '\0'  ||  ++line_len > 998  ||
2020                     (*cp != '\n'  &&  last_char_was_cr)) {
2021                     encoding = CE_BINARY;
2022                     if (*cp == '\0') {
2023                         *reason = "null character";
2024                     } else if (line_len > 998) {
2025                         *reason = "line length > 998";
2026                     } else if (*cp != '\n'  &&  last_char_was_cr) {
2027                         *reason = "CR not followed by LF";
2028                     } else {
2029                         /* Should not reach this. */
2030                         *reason = "";
2031                     }
2032                     break;
2033                 } else if (*cp == '\n') {
2034                     line_len = 0;
2035                 } else if (! isascii ((unsigned char) *cp)) {
2036                     encoding = CE_8BIT;
2037                 }
2038
2039                 last_char_was_cr = *cp == '\r'  ?  1  :  0;
2040             }
2041         }
2042
2043         fclose (ce->ce_fp);
2044         ce->ce_fp = NULL;
2045     } /* else should never happen */
2046
2047     return encoding;
2048 }
2049
2050
2051 static int
2052 strip_crs (CT ct, int *message_mods) {
2053     char *charset = content_charset (ct);
2054     int status = OK;
2055
2056     /* Only strip carriage returns if content is ASCII or another
2057        charset that has the same readily recognizable CR followed by a
2058        LF.  We can include UTF-8 here because if the high-order bit of
2059        a UTF-8 byte is 0, then it must be a single-byte ASCII
2060        character. */
2061     if (! strcasecmp (charset, "US-ASCII")  ||
2062         ! strcasecmp (charset, "UTF-8")  ||
2063         ! strncasecmp (charset, "ISO-8859-", 9)  ||
2064         ! strncasecmp (charset, "WINDOWS-12", 10)) {
2065         char **file = NULL;
2066         FILE **fp = NULL;
2067         size_t begin;
2068         size_t end;
2069         int has_crs = 0;
2070         int opened_input_file = 0;
2071
2072         if (ct->c_cefile.ce_file) {
2073             file = &ct->c_cefile.ce_file;
2074             fp = &ct->c_cefile.ce_fp;
2075             begin = end = 0;
2076         } else if (ct->c_file) {
2077             file = &ct->c_file;
2078             fp = &ct->c_fp;
2079             begin = (size_t) ct->c_begin;
2080             end = (size_t) ct->c_end;
2081         } /* else don't know where the content is */
2082
2083         if (file  &&  *file  &&  fp) {
2084             if (! *fp) {
2085                 if ((*fp = fopen (*file, "r")) == NULL) {
2086                     advise (*file, "unable to open for reading");
2087                     status = NOTOK;
2088                 } else {
2089                     opened_input_file = 1;
2090                 }
2091             }
2092         }
2093
2094         if (fp  &&  *fp) {
2095             char buffer[BUFSIZ];
2096             size_t bytes_read;
2097             size_t bytes_to_read =
2098                 end > 0 && end > begin  ?  end - begin  :  sizeof buffer;
2099
2100             fseeko (*fp, begin, SEEK_SET);
2101             while ((bytes_read = fread (buffer, 1,
2102                                         min (bytes_to_read, sizeof buffer),
2103                                         *fp)) > 0) {
2104                 /* Look for CR followed by a LF.  This is supposed to
2105                    be text so there should be LF's.  If not, don't
2106                    modify the content. */
2107                 char *cp;
2108                 size_t i;
2109                 int last_char_was_cr = 0;
2110
2111                 if (end > 0) { bytes_to_read -= bytes_read; }
2112
2113                 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2114                     if (*cp == '\n'  &&  last_char_was_cr) {
2115                         has_crs = 1;
2116                         break;
2117                     }
2118
2119                     last_char_was_cr = *cp == '\r'  ?  1  :  0;
2120                 }
2121             }
2122
2123             if (has_crs) {
2124                 int fd;
2125                 char *stripped_content_file;
2126                 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2127
2128                 if (tempfile == NULL) {
2129                     adios (NULL, "unable to create temporary file in %s",
2130                            get_temp_dir());
2131                 }
2132                 stripped_content_file = add (tempfile, NULL);
2133
2134                 /* Strip each CR before a LF from the content. */
2135                 fseeko (*fp, begin, SEEK_SET);
2136                 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2137                        0) {
2138                     char *cp;
2139                     size_t i;
2140                     int last_char_was_cr = 0;
2141
2142                     for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2143                         if (*cp == '\r') {
2144                             last_char_was_cr = 1;
2145                         } else if (last_char_was_cr) {
2146                             if (*cp != '\n') {
2147                                 if (write (fd, "\r", 1) < 0) {
2148                                     advise (tempfile, "CR write");
2149                                 }
2150                             }
2151                             if (write (fd, cp, 1) < 0) {
2152                                 advise (tempfile, "write");
2153                             }
2154                             last_char_was_cr = 0;
2155                         } else {
2156                             if (write (fd, cp, 1) < 0) {
2157                                 advise (tempfile, "write");
2158                             }
2159                             last_char_was_cr = 0;
2160                         }
2161                     }
2162                 }
2163
2164                 if (close (fd)) {
2165                     admonish (NULL, "unable to write temporary file %s",
2166                               stripped_content_file);
2167                     (void) m_unlink (stripped_content_file);
2168                     status = NOTOK;
2169                 } else {
2170                     /* Replace the decoded file with the converted one. */
2171                     if (ct->c_cefile.ce_file) {
2172                         if (ct->c_cefile.ce_unlink) {
2173                             (void) m_unlink (ct->c_cefile.ce_file);
2174                         }
2175                         free (ct->c_cefile.ce_file);
2176                     }
2177                     ct->c_cefile.ce_file = stripped_content_file;
2178                     ct->c_cefile.ce_unlink = 1;
2179
2180                     ++*message_mods;
2181                     if (verbosw) {
2182                         report (NULL, ct->c_partno,
2183                                 begin == 0 && end == 0  ?  ""  :  *file,
2184                                 "stripped CRs");
2185                     }
2186                 }
2187             }
2188
2189             if (opened_input_file) {
2190                 fclose (*fp);
2191                 *fp = NULL;
2192             }
2193         }
2194     }
2195
2196     free (charset);
2197
2198     return status;
2199 }
2200
2201
2202 static int
2203 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
2204     int status = OK;
2205
2206     switch (ct->c_type) {
2207     case CT_TEXT:
2208         if (ct->c_subtype == TEXT_PLAIN) {
2209             status = convert_charset (ct, dest_charset, message_mods);
2210             if (status == OK) {
2211                 if (verbosw) {
2212                     char *ct_charset = content_charset (ct);
2213
2214                     report (NULL, ct->c_partno, ct->c_file,
2215                             "convert %s to %s", ct_charset, dest_charset);
2216                     free (ct_charset);
2217                 }
2218             } else {
2219                 char *ct_charset = content_charset (ct);
2220
2221                 report ("iconv", ct->c_partno, ct->c_file,
2222                         "failed to convert %s to %s", ct_charset, dest_charset);
2223                 free (ct_charset);
2224             }
2225         }
2226         break;
2227
2228     case CT_MULTIPART: {
2229         struct multipart *m = (struct multipart *) ct->c_ctparams;
2230         struct part *part;
2231
2232         /* Should check to see if the body for this part is encoded?
2233            For now, it gets passed along as-is by InitMultiPart(). */
2234         for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
2235             status =
2236                 convert_charsets (part->mp_part, dest_charset, message_mods);
2237         }
2238         break;
2239     }
2240
2241     case CT_MESSAGE:
2242         if (ct->c_subtype == MESSAGE_EXTERNAL) {
2243             struct exbody *e = (struct exbody *) ct->c_ctparams;
2244
2245             status =
2246                 convert_charsets (e->eb_content, dest_charset, message_mods);
2247         }
2248         break;
2249
2250     default:
2251         break;
2252     }
2253
2254     return status;
2255 }
2256
2257
2258 /*
2259  * Fix various problems that aren't handled elsewhere.  These
2260  * are fixed unconditionally:  there are no switches to disable
2261  * them.  (Currently, "problems" is just one:  an extraneous
2262  * semicolon at the end of a header parameter list.)
2263  */
2264 static int
2265 fix_always (CT ct, int *message_mods) {
2266     int status = OK;
2267
2268     switch (ct->c_type) {
2269     case CT_MULTIPART: {
2270         struct multipart *m = (struct multipart *) ct->c_ctparams;
2271         struct part *part;
2272
2273         for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
2274             status = fix_always (part->mp_part, message_mods);
2275         }
2276         break;
2277     }
2278
2279     case CT_MESSAGE:
2280         if (ct->c_subtype == MESSAGE_EXTERNAL) {
2281             struct exbody *e = (struct exbody *) ct->c_ctparams;
2282
2283             status = fix_always (e->eb_content, message_mods);
2284         }
2285         break;
2286
2287     default: {
2288         HF hf;
2289
2290         for (hf = ct->c_first_hf; hf; hf = hf->next) {
2291             size_t len = strlen (hf->value);
2292
2293             if (strcasecmp (hf->name, TYPE_FIELD) != 0  &&
2294                 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2295                 /* Only do this for Content-Type and
2296                    Content-Disposition fields because those are the
2297                    only headers that parse_mime() warns about. */
2298                 continue;
2299             }
2300
2301             /* whitespace following a trailing ';' will be nuked as well */
2302             if (hf->value[len - 1] == '\n') {
2303                 while (isspace((unsigned char)(hf->value[len - 2]))) {
2304                     if (len-- == 0) { break; }
2305                 }
2306             }
2307
2308             if (hf->value[len - 2] == ';') {
2309                 /* Remove trailing ';' from parameter value. */
2310                 hf->value[len - 2] = '\n';
2311                 hf->value[len - 1] = '\0';
2312
2313                 /* Also, if Content-Type parameter, remove trailing ';'
2314                    from ct->c_ctline.  This probably isn't necessary
2315                    but can't hurt. */
2316                 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2317                     size_t l = strlen(ct->c_ctline) - 1;
2318                     while (isspace((unsigned char)(ct->c_ctline[l])) ||
2319                            ct->c_ctline[l] == ';') {
2320                         ct->c_ctline[l--] = '\0';
2321                         if (l == 0) { break; }
2322                     }
2323                 }
2324
2325                 ++*message_mods;
2326                 if (verbosw) {
2327                     report (NULL, ct->c_partno, ct->c_file,
2328                             "remove trailing ; from %s parameter value",
2329                             hf->name);
2330                 }
2331             }
2332         }
2333     }}
2334
2335     return status;
2336 }
2337
2338
2339 static int
2340 write_content (CT ct, const char *input_filename, char *outfile, int modify_inplace,
2341                int message_mods) {
2342     int status = OK;
2343
2344     if (modify_inplace) {
2345         if (message_mods > 0) {
2346             if ((status = output_message (ct, outfile)) == OK) {
2347                 char *infile = input_filename
2348                     ?  add (input_filename, NULL)
2349                     :  add (ct->c_file ? ct->c_file : "-", NULL);
2350
2351                 if (remove_file (infile) == OK) {
2352                     if (rename (outfile, infile)) {
2353                         /* Rename didn't work, possibly because of an
2354                            attempt to rename across filesystems.  Try
2355                            brute force copy. */
2356                         int old = open (outfile, O_RDONLY);
2357                         int new =
2358                             open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2359                         int i = -1;
2360
2361                         if (old != -1  &&  new != -1) {
2362                             char buffer[BUFSIZ];
2363
2364                             while ((i = read (old, buffer, sizeof buffer)) >
2365                                    0) {
2366                                 if (write (new, buffer, i) != i) {
2367                                     i = -1;
2368                                     break;
2369                                 }
2370                             }
2371                         }
2372                         if (new != -1) { close (new); }
2373                         if (old != -1) { close (old); }
2374                         (void) m_unlink (outfile);
2375
2376                         if (i < 0) {
2377                             /* The -file argument processing used path() to
2378                                expand filename to absolute path. */
2379                             int file = ct->c_file  &&  ct->c_file[0] == '/';
2380
2381                             admonish (NULL, "unable to rename %s %s to %s",
2382                                       file ? "file" : "message", outfile,
2383                                       infile);
2384                             status = NOTOK;
2385                         }
2386                     }
2387                 } else {
2388                     admonish (NULL, "unable to remove input file %s, "
2389                               "not modifying it", infile);
2390                     (void) m_unlink (outfile);
2391                     status = NOTOK;
2392                 }
2393
2394                 free (infile);
2395             } else {
2396                 status = NOTOK;
2397             }
2398         } else {
2399             /* No modifications and didn't need the tmp outfile. */
2400             (void) m_unlink (outfile);
2401         }
2402     } else {
2403         /* Output is going to some file.  Produce it whether or not
2404            there were modifications. */
2405         status = output_message (ct, outfile);
2406     }
2407
2408     flush_errors ();
2409     return status;
2410 }
2411
2412
2413 /*
2414  * parse_mime() does not set lf_line_endings in struct text, so use this function to do it.
2415  * It touches the parts the decodetypes identifies.
2416  */
2417 static void
2418 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
2419     switch (ct->c_type) {
2420     case CT_MULTIPART: {
2421         struct multipart *m = (struct multipart *) ct->c_ctparams;
2422         struct part *part;
2423
2424         for (part = m->mp_parts; part; part = part->mp_next) {
2425             set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2426         }
2427         break;
2428     }
2429
2430     case CT_MESSAGE:
2431         if (ct->c_subtype == MESSAGE_EXTERNAL) {
2432             struct exbody *e = (struct exbody *) ct->c_ctparams;
2433
2434             set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2435         }
2436         break;
2437
2438     default:
2439         if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2440             if (ct->c_ctparams == NULL) {
2441                 if ((ct->c_ctparams = (struct text *) mh_xcalloc (1, sizeof (struct text))) == NULL) {
2442                     adios (NULL, "out of memory");
2443                 }
2444             }
2445             ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2446         }
2447     }
2448 }
2449
2450
2451 /*
2452  * If "rmmproc" is defined, call that to remove the file.  Otherwise,
2453  * use the standard MH backup file.
2454  */
2455 static int
2456 remove_file (const char *file) {
2457     if (rmmproc) {
2458         char *rmm_command = concat (rmmproc, " ", file, NULL);
2459         int status = system (rmm_command);
2460
2461         free (rmm_command);
2462         return WIFEXITED (status)  ?  WEXITSTATUS (status)  :  NOTOK;
2463     } else {
2464         /* This is OK for a non-message file, it still uses the
2465            BACKUP_PREFIX form.  The backup file will be in the same
2466            directory as file. */
2467         return rename (file, m_backup (file));
2468     }
2469 }
2470
2471
2472 static void
2473 report (char *what, char *partno, char *filename, char *message, ...) {
2474     va_list args;
2475     char *fmt;
2476
2477     if (verbosw) {
2478         va_start (args, message);
2479         fmt = concat (filename, partno ? " part " : ", ",
2480                       partno ? partno : "", partno ? ", " : "", message, NULL);
2481
2482         advertise (what, NULL, fmt, args);
2483
2484         free (fmt);
2485         va_end (args);
2486     }
2487 }
2488
2489
2490 static void
2491 pipeser (int i)
2492 {
2493     if (i == SIGQUIT) {
2494         fflush (stdout);
2495         fprintf (stderr, "\n");
2496         fflush (stderr);
2497     }
2498
2499     done (1);
2500     /* NOTREACHED */
2501 }