diplodocus.org Git - nmh/blob - uip/mhfixmsg.c

   1 /*
   2  * mhfixmsg.c -- rewrite a message with various transformations
   3  *
   4  * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
   5  * See the COPYRIGHT file in the root directory of the nmh
   6  * distribution for complete copyright information.
   7  */
   8
   9 #include <h/mh.h>
  10 #include <h/mime.h>
  11 #include <h/mhparse.h>
  12 #include <h/utils.h>
  13 #include <h/signals.h>
  14 #include <fcntl.h>
  15
  16 #define MHFIXMSG_SWITCHES \
  17     X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
  18     X("nodecodetext", 0, NDECODETEXTSW) \
  19     X("decodetypes", 0, DECODETYPESW) \
  20     X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
  21     X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
  22     X("textcharset", 0, TEXTCHARSETSW) \
  23     X("notextcharset", 0, NTEXTCHARSETSW) \
  24     X("reformat", 0, REFORMATSW) \
  25     X("noreformat", 0, NREFORMATSW) \
  26     X("replacetextplain", 0, REPLACETEXTPLAINSW) \
  27     X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
  28     X("fixboundary", 0, FIXBOUNDARYSW) \
  29     X("nofixboundary", 0, NFIXBOUNDARYSW) \
  30     X("fixcte", 0, FIXCTESW) \
  31     X("nofixcte", 0, NFIXCTESW) \
  32     X("fixtype mimetype", 0, FIXTYPESW) \
  33     X("file file", 0, FILESW) \
  34     X("outfile file", 0, OUTFILESW) \
  35     X("rmmproc program", 0, RPROCSW) \
  36     X("normmproc", 0, NRPRCSW) \
  37     X("changecur", 0, CHGSW) \
  38     X("nochangecur", 0, NCHGSW) \
  39     X("verbose", 0, VERBSW) \
  40     X("noverbose", 0, NVERBSW) \
  41     X("version", 0, VERSIONSW) \
  42     X("help", 0, HELPSW) \
  43
  44 #define X(sw, minchars, id) id,
  45 DEFINE_SWITCH_ENUM(MHFIXMSG);
  46 #undef X
  47
  48 #define X(sw, minchars, id) { sw, minchars, id },
  49 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
  50 #undef X
  51
  52
  53 int verbosw;
  54 int debugsw; /* Needed by mhparse.c. */
  55
  56 #define quitser pipeser
  57
  58 /* mhparse.c */
  59 extern int skip_mp_cte_check;                 /* flag to InitMultiPart */
  60 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
  61 extern int bogus_mp_content;                  /* flag from InitMultiPart */
  62 /* flags to/from parse_header_attrs */
  63 extern int suppress_extraneous_trailing_semicolon_warning;
  64 extern int extraneous_trailing_semicolon;
  65
  66 /* mhoutsbr.c */
  67 int output_message (CT, char *);
  68
  69 /* mhmisc.c */
  70 void flush_errors (void);
  71
  72 /* mhfree.c */
  73 extern CT *cts;
  74 void freects_done (int) NORETURN;
  75
  76 /*
  77  * static prototypes
  78  */
  79 typedef struct fix_transformations {
  80     int fixboundary;
  81     int fixcte;
  82     svector_t fixtypes;
  83     int reformat;
  84     int replacetextplain;
  85     int decodetext;
  86     char *decodetypes;
  87     /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
  88     int lf_line_endings;
  89     char *textcharset;
  90 } fix_transformations;
  91
  92 int mhfixmsgsbr (CT *, const fix_transformations *, char *);
  93 static int fix_boundary (CT *, int *);
  94 static int copy_input_to_output (const char *, const char *);
  95 static int get_multipart_boundary (CT, char **);
  96 static int replace_boundary (CT, char *, char *);
  97 static int fix_types (CT, svector_t, int *);
  98 static char *replace_substring (char **, const char *, const char *);
  99 static char *remove_parameter (char *, const char *);
 100 static int fix_multipart_cte (CT, int *);
 101 static int set_ce (CT, int);
 102 static int ensure_text_plain (CT *, CT, int *, int);
 103 static int find_textplain_sibling (CT, int, int *);
 104 static int insert_new_text_plain_part (CT, int, CT);
 105 static CT build_text_plain_part (CT);
 106 static int insert_into_new_mp_alt (CT *, int *);
 107 static CT divide_part (CT);
 108 static void copy_ctinfo (CI, CI);
 109 static int decode_part (CT);
 110 static int reformat_part (CT, char *, char *, char *, int);
 111 static int charset_encoding (CT);
 112 static CT build_multipart_alt (CT, CT, int, int);
 113 static int boundary_in_content (FILE **, char *, const char *);
 114 static void transfer_noncontent_headers (CT, CT);
 115 static int set_ct_type (CT, int type, int subtype, int encoding);
 116 static int decode_text_parts (CT, int, const char *, int *);
 117 static int should_decode(const char *, const char *, const char *);
 118 static int content_encoding (CT, const char **);
 119 static int strip_crs (CT, int *);
 120 static int convert_charsets (CT, char *, int *);
 121 static int fix_always (CT, int *);
 122 static int write_content (CT, const char *, char *, int, int);
 123 static void set_text_ctparams(CT, char *, int);
 124 static int remove_file (const char *);
 125 static void report (char *, char *, char *, char *, ...);
 126 static void pipeser (int);
 127
 128
 129 int
 130 main (int argc, char **argv) {
 131     int msgnum;
 132     char *cp, *file = NULL, *folder = NULL;
 133     char *maildir, buf[100], *outfile = NULL;
 134     char **argp, **arguments;
 135     struct msgs_array msgs = { 0, 0, NULL };
 136     struct msgs *mp = NULL;
 137     CT *ctp;
 138     FILE *fp;
 139     int using_stdin = 0;
 140     int chgflag = 1;
 141     int status = OK;
 142     fix_transformations fx;
 143     fx.reformat = fx.fixcte = fx.fixboundary = 1;
 144     fx.fixtypes = NULL;
 145     fx.replacetextplain = 0;
 146     fx.decodetext = CE_8BIT;
 147     fx.decodetypes = "text,application/ics";  /* Default, per man page. */
 148     fx.lf_line_endings = 0;
 149     fx.textcharset = NULL;
 150
 151     if (nmh_init(argv[0], 1)) { return 1; }
 152
 153     done = freects_done;
 154
 155     arguments = getarguments (invo_name, argc, argv, 1);
 156     argp = arguments;
 157
 158     /*
 159      * Parse arguments
 160      */
 161     while ((cp = *argp++)) {
 162         if (*cp == '-') {
 163             switch (smatch (++cp, switches)) {
 164             case AMBIGSW:
 165                 ambigsw (cp, switches);
 166                 done (1);
 167             case UNKWNSW:
 168                 adios (NULL, "-%s unknown", cp);
 169
 170             case HELPSW:
 171                 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
 172                         invo_name);
 173                 print_help (buf, switches, 1);
 174                 done (0);
 175             case VERSIONSW:
 176                 print_version(invo_name);
 177                 done (0);
 178
 179             case DECODETEXTSW:
 180                 if (! (cp = *argp++)  ||  *cp == '-') {
 181                     adios (NULL, "missing argument to %s", argp[-2]);
 182                 }
 183                 if (! strcasecmp (cp, "8bit")) {
 184                     fx.decodetext = CE_8BIT;
 185                 } else if (! strcasecmp (cp, "7bit")) {
 186                     fx.decodetext = CE_7BIT;
 187                 } else if (! strcasecmp (cp, "binary")) {
 188                     fx.decodetext = CE_BINARY;
 189                 } else {
 190                     adios (NULL, "invalid argument to %s", argp[-2]);
 191                 }
 192                 continue;
 193             case NDECODETEXTSW:
 194                 fx.decodetext = 0;
 195                 continue;
 196             case DECODETYPESW:
 197                 if (! (cp = *argp++)  ||  *cp == '-') {
 198                     adios (NULL, "missing argument to %s", argp[-2]);
 199                 }
 200                 fx.decodetypes = cp;
 201                 continue;
 202             case CRLFLINEBREAKSSW:
 203                 fx.lf_line_endings = 0;
 204                 continue;
 205             case NCRLFLINEBREAKSSW:
 206                 fx.lf_line_endings = 1;
 207                 continue;
 208             case TEXTCHARSETSW:
 209                 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
 210                     adios (NULL, "missing argument to %s", argp[-2]);
 211                 }
 212                 fx.textcharset = cp;
 213                 continue;
 214             case NTEXTCHARSETSW:
 215                 fx.textcharset = 0;
 216                 continue;
 217             case FIXBOUNDARYSW:
 218                 fx.fixboundary = 1;
 219                 continue;
 220             case NFIXBOUNDARYSW:
 221                 fx.fixboundary = 0;
 222                 continue;
 223             case FIXCTESW:
 224                 fx.fixcte = 1;
 225                 continue;
 226             case NFIXCTESW:
 227                 fx.fixcte = 0;
 228                 continue;
 229             case FIXTYPESW:
 230                 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
 231                     adios (NULL, "missing argument to %s", argp[-2]);
 232                 }
 233                 if (! strncasecmp (cp, "multipart/", 10)  ||
 234                     ! strncasecmp (cp, "message/", 8)) {
 235                     adios (NULL, "-fixtype %s not allowed", cp);
 236                 } else if (! strchr (cp, '/')) {
 237                     adios (NULL, "-fixtype requires type/subtype");
 238                 }
 239                 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
 240                 svector_push_back (fx.fixtypes, cp);
 241                 continue;
 242             case REFORMATSW:
 243                 fx.reformat = 1;
 244                 continue;
 245             case NREFORMATSW:
 246                 fx.reformat = 0;
 247                 continue;
 248             case REPLACETEXTPLAINSW:
 249                 fx.replacetextplain = 1;
 250                 continue;
 251             case NREPLACETEXTPLAINSW:
 252                 fx.replacetextplain = 0;
 253                 continue;
 254             case FILESW:
 255                 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
 256                     adios (NULL, "missing argument to %s", argp[-2]);
 257                 }
 258                 file = *cp == '-'  ?  add (cp, NULL)  :  path (cp, TFILE);
 259                 continue;
 260             case OUTFILESW:
 261                 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
 262                     adios (NULL, "missing argument to %s", argp[-2]);
 263                 }
 264                 outfile = *cp == '-'  ?  add (cp, NULL)  :  path (cp, TFILE);
 265                 continue;
 266             case RPROCSW:
 267                 if (!(rmmproc = *argp++) || *rmmproc == '-') {
 268                     adios (NULL, "missing argument to %s", argp[-2]);
 269                 }
 270                 continue;
 271             case NRPRCSW:
 272                 rmmproc = NULL;
 273                 continue;
 274             case CHGSW:
 275                 chgflag = 1;
 276                 continue;
 277             case NCHGSW:
 278                 chgflag = 0;
 279                 continue;
 280             case VERBSW:
 281                 verbosw = 1;
 282                 continue;
 283             case NVERBSW:
 284                 verbosw = 0;
 285                 continue;
 286             }
 287         }
 288         if (*cp == '+' || *cp == '@') {
 289             if (folder) {
 290                 adios (NULL, "only one folder at a time!");
 291             } else {
 292                 folder = pluspath (cp);
 293             }
 294         } else {
 295             if (*cp == '/') {
 296                 /* Interpret a full path as a filename, not a message. */
 297                 file = add (cp, NULL);
 298             } else {
 299                 app_msgarg (&msgs, cp);
 300             }
 301         }
 302     }
 303
 304     SIGNAL (SIGQUIT, quitser);
 305     SIGNAL (SIGPIPE, pipeser);
 306
 307     /*
 308      * Read the standard profile setup
 309      */
 310     if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
 311         readconfig ((struct node **) 0, fp, cp, 0);
 312         fclose (fp);
 313     }
 314
 315     suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
 316     suppress_extraneous_trailing_semicolon_warning = 1;
 317
 318     if (! context_find ("path")) {
 319         free (path ("./", TFOLDER));
 320     }
 321
 322     if (file && msgs.size) {
 323         adios (NULL, "cannot specify msg and file at same time!");
 324     }
 325
 326     /*
 327      * check if message is coming from file
 328      */
 329     if (file) {
 330         /* If file is stdin, create a tmp file name before parse_mime()
 331            has a chance, because it might put in on a different
 332            filesystem than the output file.  Instead, put it in the
 333            user's preferred tmp directory. */
 334         CT ct;
 335
 336         if (! strcmp ("-", file)) {
 337             int fd;
 338             char *cp;
 339
 340             using_stdin = 1;
 341
 342             if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
 343                 adios (NULL, "unable to create temporary file in %s",
 344                        get_temp_dir());
 345             } else {
 346                 free (file);
 347                 file = add (cp, NULL);
 348                 cpydata (STDIN_FILENO, fd, "-", file);
 349             }
 350
 351             if (close (fd)) {
 352                 (void) m_unlink (file);
 353                 adios (NULL, "failed to write temporary file");
 354             }
 355         }
 356
 357         if (! (cts = (CT *) mh_xcalloc ((size_t) 2, sizeof *cts))) {
 358             adios (NULL, "out of memory");
 359         }
 360         ctp = cts;
 361
 362         if ((ct = parse_mime (file))) {
 363             set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
 364             *ctp++ = ct;
 365         } else {
 366             advise (NULL, "unable to parse message from file %s", file);
 367             status = NOTOK;
 368
 369             /* If there's an outfile, pass the input message unchanged, so the message won't
 370                get dropped from a pipeline. */
 371             if (outfile) {
 372                 /* Something went wrong.  Output might be expected, such as if this were run
 373                    as a filter.  Just copy the input to the output. */
 374                 if (copy_input_to_output (file, outfile) != OK) {
 375                     advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
 376                 }
 377             }
 378         }
 379     } else {
 380         /*
 381          * message(s) are coming from a folder
 382          */
 383         CT ct;
 384
 385         if (! msgs.size) {
 386             app_msgarg(&msgs, "cur");
 387         }
 388         if (! folder) {
 389             folder = getfolder (1);
 390         }
 391         maildir = m_maildir (folder);
 392
 393         if (chdir (maildir) == NOTOK) {
 394             adios (maildir, "unable to change directory to");
 395         }
 396
 397         /* read folder and create message structure */
 398         if (! (mp = folder_read (folder, 1))) {
 399             adios (NULL, "unable to read folder %s", folder);
 400         }
 401
 402         /* check for empty folder */
 403         if (mp->nummsg == 0) {
 404             adios (NULL, "no messages in %s", folder);
 405         }
 406
 407         /* parse all the message ranges/sequences and set SELECTED */
 408         for (msgnum = 0; msgnum < msgs.size; msgnum++)
 409             if (! m_convert (mp, msgs.msgs[msgnum])) {
 410                 done (1);
 411             }
 412         seq_setprev (mp);       /* set the previous-sequence */
 413
 414         if (! (cts =
 415                (CT *) mh_xcalloc ((size_t) (mp->numsel + 1), sizeof *cts))) {
 416             adios (NULL, "out of memory");
 417         }
 418         ctp = cts;
 419
 420         for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
 421             if (is_selected(mp, msgnum)) {
 422                 char *msgnam;
 423
 424                 msgnam = m_name (msgnum);
 425                 if ((ct = parse_mime (msgnam))) {
 426                     set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
 427                     *ctp++ = ct;
 428                 } else {
 429                     advise (NULL, "unable to parse message %s", msgnam);
 430                     status = NOTOK;
 431
 432                     /* If there's an outfile, pass the input message unchanged, so the message won't
 433                        get dropped from a pipeline. */
 434                     if (outfile) {
 435                         /* Something went wrong.  Output might be expected, such as if this were run
 436                            as a filter.  Just copy the input to the output. */
 437                         const char *input_filename = path (msgnam, TFILE);
 438
 439                         if (copy_input_to_output (input_filename, outfile) != OK) {
 440                             advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
 441                         }
 442                     }
 443                 }
 444             }
 445         }
 446
 447         if (chgflag) {
 448             seq_setcur (mp, mp->hghsel);  /* update current message */
 449         }
 450         seq_save (mp);                    /* synchronize sequences  */
 451         context_replace (pfolder, folder);/* update current folder  */
 452         context_save ();                  /* save the context file  */
 453     }
 454
 455     if (*cts) {
 456         for (ctp = cts; *ctp; ++ctp) {
 457             status += mhfixmsgsbr (ctp, &fx, outfile);
 458
 459             if (using_stdin) {
 460                 (void) m_unlink (file);
 461
 462                 if (! outfile) {
 463                     /* Just calling m_backup() unlinks the backup file. */
 464                     (void) m_backup (file);
 465                 }
 466             }
 467         }
 468     } else {
 469         status = 1;
 470     }
 471
 472     if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
 473     free (outfile);
 474     free (file);
 475     free (folder);
 476     free (arguments);
 477
 478     /* done is freects_done, which will clean up all of cts. */
 479     done (status);
 480     return NOTOK;
 481 }
 482
 483
 484 int
 485 mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
 486     /* Store input filename in case one of the transformations, i.e.,
 487        fix_boundary(), rewrites to a tmp file. */
 488     char *input_filename = add ((*ctp)->c_file, NULL);
 489     int modify_inplace = 0;
 490     int message_mods = 0;
 491     int status = OK;
 492
 493     if (outfile == NULL) {
 494         modify_inplace = 1;
 495
 496         if ((*ctp)->c_file) {
 497             char *tempfile;
 498             if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
 499                 adios (NULL, "unable to create temporary file in %s",
 500                        get_temp_dir());
 501             }
 502             outfile = add (tempfile, NULL);
 503         } else {
 504             adios (NULL, "missing both input and output filenames\n");
 505         }
 506     }
 507
 508     reverse_alternative_parts (*ctp);
 509     status = fix_always (*ctp, &message_mods);
 510     if (status == OK  &&  fx->fixboundary) {
 511         status = fix_boundary (ctp, &message_mods);
 512     }
 513     if (status == OK  && fx->fixtypes != NULL) {
 514         status = fix_types (*ctp, fx->fixtypes, &message_mods);
 515     }
 516     if (status == OK  &&  fx->fixcte) {
 517         status = fix_multipart_cte (*ctp, &message_mods);
 518     }
 519     if (status == OK  &&  fx->reformat) {
 520         status =
 521             ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
 522     }
 523     if (status == OK  &&  fx->decodetext) {
 524         status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes, &message_mods);
 525     }
 526     if (status == OK  &&  fx->textcharset != NULL) {
 527         status = convert_charsets (*ctp, fx->textcharset, &message_mods);
 528     }
 529
 530     if (status == OK  &&  ! (*ctp)->c_umask) {
 531         /* Set the umask for the contents file.  This currently
 532            isn't used but just in case it is in the future. */
 533         struct stat st;
 534
 535         if (stat ((*ctp)->c_file, &st) != NOTOK) {
 536             (*ctp)->c_umask = ~(st.st_mode & 0777);
 537         } else {
 538             (*ctp)->c_umask = ~m_gmprot();
 539         }
 540     }
 541
 542     /*
 543      * Write the content to a file
 544      */
 545     if (status == OK) {
 546         status = write_content (*ctp, input_filename, outfile, modify_inplace,
 547                                 message_mods);
 548     } else if (! modify_inplace) {
 549         /* Something went wrong.  Output might be expected, such
 550            as if this were run as a filter.  Just copy the input
 551            to the output. */
 552         if (copy_input_to_output (input_filename, outfile) != OK) {
 553             advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
 554         }
 555     }
 556
 557     if (modify_inplace) {
 558         if (status != OK) { (void) m_unlink (outfile); }
 559         free (outfile);
 560         outfile = NULL;
 561     }
 562
 563     free (input_filename);
 564
 565     return status;
 566 }
 567
 568
 569 /* Copy input message to output.  Assumes not modifying in place, so this
 570    might be running as part of a pipeline. */
 571 static int
 572 copy_input_to_output (const char *input_filename, const char *output_filename) {
 573     int in = open (input_filename, O_RDONLY);
 574     int out = strcmp (output_filename, "-")
 575         ?  open (output_filename, O_WRONLY | O_CREAT, m_gmprot ())
 576         :  STDOUT_FILENO;
 577     int status = OK;
 578
 579     if (in != -1  &&  out != -1) {
 580         cpydata (in, out, input_filename, output_filename);
 581     } else {
 582         status = NOTOK;
 583     }
 584
 585     close (out);
 586     close (in);
 587
 588     return status;
 589 }
 590
 591
 592 static int
 593 fix_boundary (CT *ct, int *message_mods) {
 594     struct multipart *mp;
 595     int status = OK;
 596
 597     if (ct  &&  (*ct)->c_type == CT_MULTIPART  &&  bogus_mp_content) {
 598         mp = (struct multipart *) (*ct)->c_ctparams;
 599
 600         /*
 601          * 1) Get boundary at end of part.
 602          * 2) Get boundary at beginning of part and compare to the end-of-part
 603          *    boundary.
 604          * 3) Write out contents of ct to tmp file, replacing boundary in
 605          *    header with boundary from part.  Set c_unlink to 1.
 606          * 4) Free ct.
 607          * 5) Call parse_mime() on the tmp file, replacing ct.
 608          */
 609
 610         if (mp  &&  mp->mp_start) {
 611             char *part_boundary;
 612
 613             if (get_multipart_boundary (*ct, &part_boundary) == OK) {
 614                 char *fixed;
 615
 616                 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
 617                     if (replace_boundary (*ct, fixed, part_boundary) == OK) {
 618                         char *filename = add ((*ct)->c_file, NULL);
 619                         CT fixed_ct;
 620
 621                         free_content (*ct);
 622                         if ((fixed_ct = parse_mime (fixed))) {
 623                             *ct = fixed_ct;
 624                             (*ct)->c_unlink = 1;
 625
 626                             ++*message_mods;
 627                             if (verbosw) {
 628                                 report (NULL, NULL, filename,
 629                                         "fix multipart boundary");
 630                             }
 631                         } else {
 632                             *ct = NULL;
 633                             advise (NULL, "unable to parse fixed part");
 634                             status = NOTOK;
 635                         }
 636                         free (filename);
 637                     } else {
 638                         advise (NULL, "unable to replace broken boundary");
 639                         status = NOTOK;
 640                     }
 641                 } else {
 642                     advise (NULL, "unable to create temporary file in %s",
 643                             get_temp_dir());
 644                     status = NOTOK;
 645                 }
 646
 647                 free (part_boundary);
 648             } else {
 649                 /* Couldn't fix the boundary.  Report failure so that mhfixmsg
 650                    doesn't modify the message. */
 651                 status = NOTOK;
 652             }
 653         } else {
 654             /* No multipart struct, even though the content type is
 655                CT_MULTIPART.  Report failure so that mhfixmsg doesn't modify
 656                the message. */
 657             status = NOTOK;
 658         }
 659     }
 660
 661     return status;
 662 }
 663
 664
 665 static int
 666 get_multipart_boundary (CT ct, char **part_boundary) {
 667     char buffer[BUFSIZ];
 668     char *end_boundary = NULL;
 669     off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
 670         ?  (off_t) (ct->c_end - sizeof buffer)
 671         :  (off_t) ct->c_begin;
 672     size_t bytes_read;
 673     int status = OK;
 674
 675     /* This will fail if the boundary spans fread() calls.  BUFSIZ should
 676        be big enough, even if it's just 1024, to make that unlikely. */
 677
 678     /* free_content() will close ct->c_fp. */
 679     if (! ct->c_fp  &&  (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
 680         advise (ct->c_file, "unable to open for reading");
 681         return NOTOK;
 682     }
 683
 684     /* Get boundary at end of multipart. */
 685     while (begin >= (off_t) ct->c_begin) {
 686         fseeko (ct->c_fp, begin, SEEK_SET);
 687         while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
 688             char *cp = rfind_str (buffer, bytes_read, "--");
 689
 690             if (cp) {
 691                 char *end;
 692
 693                 /* Trim off trailing "--" and anything beyond. */
 694                 *cp-- = '\0';
 695                 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
 696                     if (strlen (end) > 3  &&  *end++ == '\n'  &&
 697                         *end++ == '-'  &&  *end++ == '-') {
 698                         end_boundary = add (end, NULL);
 699                         break;
 700                     }
 701                 }
 702             }
 703         }
 704
 705         if (! end_boundary  &&  begin > (off_t) (ct->c_begin + sizeof buffer)) {
 706             begin -= sizeof buffer;
 707         } else {
 708             break;
 709         }
 710     }
 711
 712     /* Get boundary at beginning of multipart. */
 713     if (end_boundary) {
 714         fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
 715         while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
 716             if (bytes_read >= strlen (end_boundary)) {
 717                 char *cp = find_str (buffer, bytes_read, end_boundary);
 718
 719                 if (cp  &&  cp - buffer >= 2  &&  *--cp == '-'  &&
 720                     *--cp == '-'  &&  (cp > buffer  &&  *--cp == '\n')) {
 721                     status = OK;
 722                     break;
 723                 }
 724             } else {
 725                 /* The start and end boundaries didn't match, or the
 726                    start boundary doesn't begin with "\n--" (or "--"
 727                    if at the beginning of buffer).  Keep trying. */
 728                 status = NOTOK;
 729             }
 730         }
 731     } else {
 732         status = NOTOK;
 733     }
 734
 735     if (status == OK) {
 736         *part_boundary = end_boundary;
 737     } else {
 738         *part_boundary = NULL;
 739         free (end_boundary);
 740     }
 741
 742     return status;
 743 }
 744
 745
 746 /* Open and copy ct->c_file to file, replacing the multipart boundary. */
 747 static int
 748 replace_boundary (CT ct, char *file, char *boundary) {
 749     FILE *fpin, *fpout;
 750     int compnum, state;
 751     char buf[BUFSIZ], name[NAMESZ];
 752     char *np, *vp;
 753     m_getfld_state_t gstate = 0;
 754     int status = OK;
 755
 756     if (ct->c_file == NULL) {
 757         advise (NULL, "missing input filename");
 758         return NOTOK;
 759     }
 760
 761     if ((fpin = fopen (ct->c_file, "r")) == NULL) {
 762         advise (ct->c_file, "unable to open for reading");
 763         return NOTOK;
 764     }
 765
 766     if ((fpout = fopen (file, "w")) == NULL) {
 767         fclose (fpin);
 768         advise (file, "unable to open for writing");
 769         return NOTOK;
 770     }
 771
 772     for (compnum = 1;;) {
 773         int bufsz = (int) sizeof buf;
 774
 775         switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
 776         case FLD:
 777         case FLDPLUS:
 778             compnum++;
 779
 780             /* get copies of the buffers */
 781             np = add (name, NULL);
 782             vp = add (buf, NULL);
 783
 784             /* if necessary, get rest of field */
 785             while (state == FLDPLUS) {
 786                 bufsz = sizeof buf;
 787                 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
 788                 vp = add (buf, vp);     /* add to previous value */
 789             }
 790
 791             if (strcasecmp (TYPE_FIELD, np)) {
 792                 fprintf (fpout, "%s:%s", np, vp);
 793             } else {
 794                 char *new_ctline, *new_params;
 795
 796                 replace_param(&ct->c_ctinfo.ci_first_pm,
 797                               &ct->c_ctinfo.ci_last_pm, "boundary",
 798                               boundary, 0);
 799
 800                 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
 801                                     ct->c_ctinfo.ci_subtype, NULL);
 802                 new_params = output_params(strlen(TYPE_FIELD) +
 803                                            strlen(new_ctline) + 1,
 804                                            ct->c_ctinfo.ci_first_pm, NULL, 0);
 805                 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
 806                          new_params ? new_params : "");
 807                 free(new_ctline);
 808                 if (new_params) {
 809                     free(new_params);
 810                 }
 811             }
 812
 813             free (vp);
 814             free (np);
 815
 816             continue;
 817
 818         case BODY:
 819             fputs ("\n", fpout);
 820             /* buf will have a terminating NULL, skip it. */
 821             if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
 822                 advise (file, "fwrite");
 823             }
 824             continue;
 825
 826         case FILEEOF:
 827             break;
 828
 829         case LENERR:
 830         case FMTERR:
 831             advise (NULL, "message format error in component #%d", compnum);
 832             status = NOTOK;
 833             break;
 834
 835         default:
 836             advise (NULL, "getfld() returned %d", state);
 837             status = NOTOK;
 838             break;
 839         }
 840
 841         break;
 842     }
 843
 844     m_getfld_state_destroy (&gstate);
 845     fclose (fpout);
 846     fclose (fpin);
 847
 848     return status;
 849 }
 850
 851
 852 static int
 853 fix_types (CT ct, svector_t fixtypes, int *message_mods) {
 854     int status = OK;
 855
 856     switch (ct->c_type) {
 857     case CT_MULTIPART: {
 858         struct multipart *m = (struct multipart *) ct->c_ctparams;
 859         struct part *part;
 860
 861         for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
 862             status = fix_types (part->mp_part, fixtypes, message_mods);
 863         }
 864         break;
 865     }
 866
 867     case CT_MESSAGE:
 868         if (ct->c_subtype == MESSAGE_EXTERNAL) {
 869             struct exbody *e = (struct exbody *) ct->c_ctparams;
 870
 871             status = fix_types (e->eb_content, fixtypes, message_mods);
 872         }
 873         break;
 874
 875     default: {
 876         char **typep, *type;
 877
 878         if (ct->c_ctinfo.ci_type  &&  ct->c_ctinfo.ci_subtype) {
 879             for (typep = svector_strs (fixtypes);
 880                  typep && (type = *typep);
 881                  ++typep) {
 882                 char *type_subtype =
 883                     concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
 884                             NULL);
 885
 886                 if (! strcasecmp (type, type_subtype)  &&
 887                     decode_part (ct) == OK  &&
 888                     ct->c_cefile.ce_file != NULL) {
 889                     char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
 890                     char *cp;
 891
 892                     if ((cp = strchr (ct_type_subtype, ';'))) {
 893                         /* Truncate to remove any parameter list from
 894                            mime_type () result. */
 895                         *cp = '\0';
 896                     }
 897
 898                     if (strcasecmp (type, ct_type_subtype)) {
 899                         char *ct_type, *ct_subtype;
 900                         HF hf;
 901
 902                         /* The Content-Type header does not match the
 903                            content, so update these struct Content
 904                            fields to match:
 905                            * c_type, c_subtype
 906                            * c_ctinfo.ci_type, c_ctinfo.ci_subtype
 907                            * c_ctline
 908                            */
 909                         /* Extract type and subtype from type/subtype. */
 910                         ct_type = getcpy (ct_type_subtype);
 911                         if ((cp = strchr (ct_type, '/'))) {
 912                             *cp = '\0';
 913                             ct_subtype = getcpy (++cp);
 914                         } else {
 915                             advise (NULL, "missing / in MIME type of %s %s",
 916                                     ct->c_file, ct->c_partno);
 917                             free (ct_type);
 918                             return NOTOK;
 919                         }
 920
 921                         ct->c_type = ct_str_type (ct_type);
 922                         ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
 923
 924                         free (ct->c_ctinfo.ci_type);
 925                         ct->c_ctinfo.ci_type = ct_type;
 926                         free (ct->c_ctinfo.ci_subtype);
 927                         ct->c_ctinfo.ci_subtype = ct_subtype;
 928                         if (! replace_substring (&ct->c_ctline, type,
 929                                                  ct_type_subtype)) {
 930                             advise (NULL, "did not find %s in %s",
 931                                     type, ct->c_ctline);
 932                         }
 933
 934                         /* Update Content-Type header field. */
 935                         for (hf = ct->c_first_hf; hf; hf = hf->next) {
 936                             if (! strcasecmp (TYPE_FIELD, hf->name)) {
 937                                 if (replace_substring (&hf->value, type,
 938                                                        ct_type_subtype)) {
 939                                     ++*message_mods;
 940                                     if (verbosw) {
 941                                         report (NULL, ct->c_partno, ct->c_file,
 942                                                 "change Content-Type in header "
 943                                                 "from %s to %s",
 944                                                 type, ct_type_subtype);
 945                                     }
 946                                     break;
 947                                 } else {
 948                                     advise (NULL, "did not find %s in %s",
 949                                             type, hf->value);
 950                                 }
 951                             }
 952                         }
 953                     }
 954                     free (ct_type_subtype);
 955                 }
 956                 free (type_subtype);
 957             }
 958         }
 959     }}
 960
 961     return status;
 962 }
 963
 964 char *
 965 replace_substring (char **str, const char *old, const char *new) {
 966     char *cp;
 967
 968     if ((cp = strstr (*str, old))) {
 969         char *remainder = cp + strlen (old);
 970         char *prefix, *new_str;
 971
 972         if (cp - *str) {
 973             prefix = getcpy (*str);
 974             *(prefix + (cp - *str)) = '\0';
 975             new_str = concat (prefix, new, remainder, NULL);
 976             free (prefix);
 977         } else {
 978             new_str = concat (new, remainder, NULL);
 979         }
 980
 981         free (*str);
 982
 983         return *str = new_str;
 984     } else {
 985         return NULL;
 986     }
 987 }
 988
 989 /*
 990  * Remove a name=value parameter, given just its name, from a header value.
 991  */
 992 char *
 993 remove_parameter (char *str, const char *name) {
 994     /* It looks to me, based on the BNF in RFC 2045, than there can't
 995        be whitespace betwwen the parameter name and the "=", or
 996        between the "=" and the parameter value. */
 997     char *param_name = concat (name, "=", NULL);
 998     char *cp;
 999
1000     if ((cp = strstr (str, param_name))) {
1001         char *start, *end;
1002         size_t count = 1;
1003
1004         /* Remove any leading spaces, before the parameter name. */
1005         for (start = cp;
1006              start > str && isspace ((unsigned char) *(start-1));
1007              --start) {
1008             continue;
1009         }
1010         /* Remove a leading semicolon. */
1011         if (start > str  &&  *(start-1) == ';') { --start; }
1012
1013         end = cp + strlen (name) + 1;
1014         if (*end == '"') {
1015             /* Skip past the quoted value, and then the final quote. */
1016             for (++end ; *end  &&  *end != '"'; ++end) { continue; }
1017             ++end;
1018         } else {
1019             /* Skip past the value. */
1020             for (++end ; *end  &&  ! isspace ((unsigned char) *end); ++end) {}
1021         }
1022
1023         /* Count how many characters need to be moved.  Include
1024            trailing null, which is accounted for by the
1025            initialization of count to 1. */
1026         for (cp = end; *cp; ++cp) { ++count; }
1027         (void) memmove (start, end, count);
1028     }
1029
1030     free (param_name);
1031
1032     return str;
1033 }
1034
1035 static int
1036 fix_multipart_cte (CT ct, int *message_mods) {
1037     int status = OK;
1038
1039     if (ct->c_type == CT_MULTIPART) {
1040         struct multipart *m;
1041         struct part *part;
1042
1043         if (ct->c_encoding != CE_7BIT  &&  ct->c_encoding != CE_8BIT  &&
1044             ct->c_encoding != CE_BINARY) {
1045             HF hf;
1046
1047             for (hf = ct->c_first_hf; hf; hf = hf->next) {
1048                 char *name = hf->name;
1049                 for (; *name && isspace ((unsigned char) *name); ++name) {
1050                     continue;
1051                 }
1052
1053                 if (! strncasecmp (name, ENCODING_FIELD,
1054                                    strlen (ENCODING_FIELD))) {
1055                     char *prefix = "Nmh-REPLACED-INVALID-";
1056                     HF h = mh_xmalloc (sizeof *h);
1057
1058                     h->name = add (hf->name, NULL);
1059                     h->hf_encoding = hf->hf_encoding;
1060                     h->next = hf->next;
1061                     hf->next = h;
1062
1063                     /* Retain old header but prefix its name. */
1064                     free (hf->name);
1065                     hf->name = concat (prefix, h->name, NULL);
1066
1067                     ++*message_mods;
1068                     if (verbosw) {
1069                         char *encoding = cpytrim (hf->value);
1070                         report (NULL, ct->c_partno, ct->c_file,
1071                                 "replace Content-Transfer-Encoding of %s "
1072                                 "with 8 bit", encoding);
1073                         free (encoding);
1074                     }
1075
1076                     h->value = add (" 8bit\n", NULL);
1077
1078                     /* Don't need to warn for multiple C-T-E header
1079                        fields, parse_mime() already does that.  But
1080                        if there are any, fix them all as necessary. */
1081                     hf = h;
1082                 }
1083             }
1084
1085             set_ce (ct, CE_8BIT);
1086         }
1087
1088         m = (struct multipart *) ct->c_ctparams;
1089         for (part = m->mp_parts; part; part = part->mp_next) {
1090             if (fix_multipart_cte (part->mp_part, message_mods) != OK) {
1091                 status = NOTOK;
1092                 break;
1093             }
1094         }
1095     }
1096
1097     return status;
1098 }
1099
1100
1101 static int
1102 set_ce (CT ct, int encoding) {
1103     const char *ce = ce_str (encoding);
1104     const struct str2init *ctinit = get_ce_method (ce);
1105
1106     if (ctinit) {
1107         char *cte = concat (" ", ce, "\n", NULL);
1108         int found_cte = 0;
1109         HF hf;
1110         /* Decoded contents might be in ct->c_cefile.ce_file, if the
1111            caller is decode_text_parts ().  Save because we'll
1112            overwrite below. */
1113         struct cefile decoded_content_info = ct->c_cefile;
1114
1115         ct->c_encoding = encoding;
1116
1117         ct->c_ctinitfnx = ctinit->si_init;
1118         /* This will assign ct->c_cefile with an all-0 struct, which
1119            is what we want. */
1120         (*ctinit->si_init) (ct);
1121         /* After returning, the caller should set
1122            ct->c_cefile.ce_file to the name of the file containing
1123            the contents. */
1124
1125         /* Restore the cefile. */
1126         ct->c_cefile = decoded_content_info;
1127
1128         /* Update/add Content-Transfer-Encoding header field. */
1129         for (hf = ct->c_first_hf; hf; hf = hf->next) {
1130             if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1131                 found_cte = 1;
1132                 free (hf->value);
1133                 hf->value = cte;
1134             }
1135         }
1136         if (! found_cte) {
1137             add_header (ct, add (ENCODING_FIELD, NULL), cte);
1138         }
1139
1140         /* Update c_celine.  It's used only by mhlist -debug. */
1141         free (ct->c_celine);
1142         ct->c_celine = add (cte, NULL);
1143
1144         return OK;
1145     } else {
1146         return NOTOK;
1147     }
1148 }
1149
1150
1151 /* Make sure each text part has a corresponding text/plain part. */
1152 static int
1153 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
1154     int status = OK;
1155
1156     switch ((*ct)->c_type) {
1157     case CT_TEXT: {
1158         /* Nothing to do for text/plain. */
1159         if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1160
1161         if (parent  &&  parent->c_type == CT_MULTIPART  &&
1162             parent->c_subtype == MULTI_ALTERNATE) {
1163             int new_subpart_number = 1;
1164             int has_text_plain =
1165                 find_textplain_sibling (parent, replacetextplain,
1166                                         &new_subpart_number);
1167
1168             if (! has_text_plain) {
1169                 /* Parent is a multipart/alternative.  Insert a new
1170                    text/plain subpart. */
1171                 const int inserted =
1172                     insert_new_text_plain_part (*ct, new_subpart_number,
1173                                                 parent);
1174                 if (inserted) {
1175                     ++*message_mods;
1176                     if (verbosw) {
1177                         report (NULL, parent->c_partno, parent->c_file,
1178                                 "insert text/plain part");
1179                     }
1180                 } else {
1181                     status = NOTOK;
1182                 }
1183             }
1184         } else if (parent  &&  parent->c_type == CT_MULTIPART  &&
1185             parent->c_subtype == MULTI_RELATED) {
1186             char *type_subtype =
1187                 concat ((*ct)->c_ctinfo.ci_type, "/",
1188                         (*ct)->c_ctinfo.ci_subtype, NULL);
1189             const char *parent_type =
1190                 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1191             int new_subpart_number = 1;
1192             int has_text_plain = 0;
1193
1194             /* Have to do string comparison on the subtype because we
1195                don't enumerate all of them in c_subtype values.
1196                parent_type will be NULL if the multipart/related part
1197                doesn't have a type parameter.  The type parameter must
1198                be specified according to RFC 2387 Sec. 3.1 but not all
1199                messages comply. */
1200             if (parent_type  &&  strcasecmp (type_subtype, parent_type) == 0) {
1201                 /* The type of this part matches the root type of the
1202                    parent multipart/related.  Look to see if there's
1203                    text/plain sibling. */
1204                 has_text_plain =
1205                     find_textplain_sibling (parent, replacetextplain,
1206                                             &new_subpart_number);
1207             }
1208
1209             free (type_subtype);
1210
1211             if (! has_text_plain) {
1212                 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1213                 struct part *part;
1214                 int siblings = 0;
1215
1216                 for (part = mp->mp_parts; part; part = part->mp_next) {
1217                     if (*ct != part->mp_part) {
1218                         ++siblings;
1219                     }
1220                 }
1221
1222                 if (siblings) {
1223                     /* Parent is a multipart/related.  Insert a new
1224                        text/plain subpart in a new multipart/alternative. */
1225                     if (insert_into_new_mp_alt (ct, message_mods)) {
1226                         /* Not an error if text/plain couldn't be added. */
1227                     }
1228                 } else {
1229                     /* There are no siblings, so insert a new text/plain
1230                        subpart, and change the parent type from
1231                        multipart/related to multipart/alternative. */
1232                     const int inserted =
1233                         insert_new_text_plain_part (*ct, new_subpart_number,
1234                                                     parent);
1235
1236                     if (inserted) {
1237                         HF hf;
1238
1239                         parent->c_subtype = MULTI_ALTERNATE;
1240                         parent->c_ctinfo.ci_subtype = getcpy ("alternative");
1241                         if (! replace_substring (&parent->c_ctline, "/related",
1242                                                  "/alternative")) {
1243                             advise (NULL,
1244                                     "did not find multipart/related in %s",
1245                                     parent->c_ctline);
1246                         }
1247
1248                         /* Update Content-Type header field. */
1249                         for (hf = parent->c_first_hf; hf; hf = hf->next) {
1250                             if (! strcasecmp (TYPE_FIELD, hf->name)) {
1251                                 if (replace_substring (&hf->value, "/related",
1252                                                        "/alternative")) {
1253                                     ++*message_mods;
1254                                     if (verbosw) {
1255                                         report (NULL, parent->c_partno,
1256                                                 parent->c_file,
1257                                                 "insert text/plain part");
1258                                     }
1259
1260                                     /* Remove, e.g., type="text/html" from
1261                                        multipart/alternative. */
1262                                     remove_parameter (hf->value, "type");
1263                                     break;
1264                                 } else {
1265                                     advise (NULL, "did not find multipart/"
1266                                                   "related in header %s",
1267                                             hf->value);
1268                                 }
1269                             }
1270                         }
1271                     } else {
1272                         /* Not an error if text/plain couldn't be inserted. */
1273                     }
1274                 }
1275             }
1276         } else {
1277             if (insert_into_new_mp_alt (ct, message_mods)) {
1278                 status = NOTOK;
1279             }
1280         }
1281         break;
1282     }
1283
1284     case CT_MULTIPART: {
1285         struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1286         struct part *part;
1287
1288         for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1289             if ((*ct)->c_type == CT_MULTIPART) {
1290                 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1291                                             replacetextplain);
1292             }
1293         }
1294         break;
1295     }
1296
1297     case CT_MESSAGE:
1298         if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1299             struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1300
1301             status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1302                                         replacetextplain);
1303         }
1304         break;
1305     }
1306
1307     return status;
1308 }
1309
1310
1311 /* See if there is a sibling text/plain. */
1312 static int
1313 find_textplain_sibling (CT parent, int replacetextplain,
1314                         int *new_subpart_number) {
1315     struct multipart *mp = (struct multipart *) parent->c_ctparams;
1316     struct part *part, *prev;
1317     int has_text_plain = 0;
1318
1319     for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1320         ++*new_subpart_number;
1321         if (part->mp_part->c_type == CT_TEXT  &&
1322             part->mp_part->c_subtype == TEXT_PLAIN) {
1323             if (replacetextplain) {
1324                 struct part *old_part;
1325                 if (part == mp->mp_parts) {
1326                     old_part = mp->mp_parts;
1327                     mp->mp_parts = part->mp_next;
1328                 } else {
1329                     old_part = prev->mp_next;
1330                     prev->mp_next = part->mp_next;
1331                 }
1332                 if (verbosw) {
1333                     report (NULL, parent->c_partno, parent->c_file,
1334                             "remove text/plain part %s",
1335                             old_part->mp_part->c_partno);
1336                 }
1337                 free_content (old_part->mp_part);
1338                 free (old_part);
1339             } else {
1340                 has_text_plain = 1;
1341             }
1342             break;
1343         }
1344         prev = part;
1345     }
1346
1347     return has_text_plain;
1348 }
1349
1350
1351 static int
1352 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
1353     struct multipart *mp = (struct multipart *) parent->c_ctparams;
1354     struct part *new_part = mh_xmalloc (sizeof *new_part);
1355
1356     if ((new_part->mp_part = build_text_plain_part (ct))) {
1357         char buffer[16];
1358         snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1359
1360         new_part->mp_next = mp->mp_parts;
1361         mp->mp_parts = new_part;
1362         new_part->mp_part->c_partno =
1363             concat (parent->c_partno ? parent->c_partno : "1", ".",
1364                     buffer, NULL);
1365
1366         return 1;
1367     } else {
1368         free_content (new_part->mp_part);
1369         free (new_part);
1370
1371         return 0;
1372     }
1373 }
1374
1375
1376 static CT
1377 build_text_plain_part (CT encoded_part) {
1378     CT tp_part = divide_part (encoded_part);
1379     char *tmp_plain_file = NULL;
1380
1381     if (decode_part (tp_part) == OK) {
1382         /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1383            contains the decoded contents.  And the decoding function, such
1384            as openQuoted, will have set ...->ce_unlink to 1 so that it will
1385            be unlinked by free_content (). */
1386         char *tempfile;
1387
1388         if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1389             advise (NULL, "unable to create temporary file in %s",
1390                     get_temp_dir());
1391         } else {
1392             tmp_plain_file = add (tempfile, NULL);
1393             if (reformat_part (tp_part, tmp_plain_file,
1394                                tp_part->c_ctinfo.ci_type,
1395                                tp_part->c_ctinfo.ci_subtype,
1396                                tp_part->c_type) == OK) {
1397                 return tp_part;
1398             }
1399         }
1400     }
1401
1402     free_content (tp_part);
1403     if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1404     free (tmp_plain_file);
1405
1406     return NULL;
1407 }
1408
1409
1410 /* Slip new text/plain part into a new multipart/alternative. */
1411 static int
1412 insert_into_new_mp_alt (CT *ct, int *message_mods) {
1413     CT tp_part = build_text_plain_part (*ct);
1414     int status = OK;
1415
1416     if (tp_part) {
1417         CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1418                                          MULTI_ALTERNATE);
1419         if (mp_alt) {
1420             struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1421
1422             if (mp  &&  mp->mp_parts) {
1423                 mp->mp_parts->mp_part = tp_part;
1424                 /* Make the new multipart/alternative the parent. */
1425                 *ct = mp_alt;
1426
1427                 ++*message_mods;
1428                 if (verbosw) {
1429                     report (NULL, (*ct)->c_partno, (*ct)->c_file,
1430                             "insert text/plain part");
1431                 }
1432             } else {
1433                 free_content (tp_part);
1434                 free_content (mp_alt);
1435                 status = NOTOK;
1436             }
1437         } else {
1438             status = NOTOK;
1439         }
1440     } else {
1441         /* Not an error if text/plain couldn't be built. */
1442     }
1443
1444     return status;
1445 }
1446
1447 static CT
1448 divide_part (CT ct) {
1449     CT new_part;
1450
1451     if ((new_part = (CT) mh_xcalloc (1, sizeof *new_part)) == NULL)
1452         adios (NULL, "out of memory");
1453
1454     /* Just copy over what is needed for decoding.  c_vrsn and
1455        c_celine aren't necessary. */
1456     new_part->c_file = add (ct->c_file, NULL);
1457     new_part->c_begin = ct->c_begin;
1458     new_part->c_end = ct->c_end;
1459     copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1460     new_part->c_type = ct->c_type;
1461     new_part->c_cefile = ct->c_cefile;
1462     new_part->c_encoding = ct->c_encoding;
1463     new_part->c_ctinitfnx = ct->c_ctinitfnx;
1464     new_part->c_ceopenfnx = ct->c_ceopenfnx;
1465     new_part->c_ceclosefnx = ct->c_ceclosefnx;
1466     new_part->c_cesizefnx = ct->c_cesizefnx;
1467
1468     /* c_ctline is used by reformat__part(), so it can preserve
1469        anything after the type/subtype. */
1470     new_part->c_ctline = add (ct->c_ctline, NULL);
1471
1472     return new_part;
1473 }
1474
1475
1476 static void
1477 copy_ctinfo (CI dest, CI src) {
1478     PM s_pm, d_pm;
1479
1480     dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL;
1481     dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL;
1482
1483     for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1484         d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1485                          s_pm->pm_value, 0);
1486         if (s_pm->pm_charset)
1487             d_pm->pm_charset = getcpy(s_pm->pm_charset);
1488         if (s_pm->pm_lang)
1489             d_pm->pm_lang = getcpy(s_pm->pm_lang);
1490     }
1491
1492     dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL;
1493     dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL;
1494 }
1495
1496
1497 static int
1498 decode_part (CT ct) {
1499     char *tmp_decoded;
1500     int status;
1501     char *tempfile;
1502
1503     if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1504         adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1505     }
1506     tmp_decoded = add (tempfile, NULL);
1507     /* The following call will load ct->c_cefile.ce_file with the tmp
1508        filename of the decoded content.  tmp_decoded will contain the
1509        encoded output, get rid of that. */
1510     status = output_message (ct, tmp_decoded);
1511     (void) m_unlink (tmp_decoded);
1512     free (tmp_decoded);
1513
1514     return status;
1515 }
1516
1517
1518 /* Some of the arguments aren't really needed now, but maybe will
1519    be in the future for other than text types. */
1520 static int
1521 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1522     int output_subtype, output_encoding;
1523     char *cp, *cf;
1524     int status;
1525
1526     /* Hacky:  this redirects the output from whatever command is used
1527        to show the part to a file.  So, the user can't have any output
1528        redirection in that command.
1529        Could show_multi() in mhshowsbr.c avoid this? */
1530
1531     /* Check for invo_name-format-type/subtype. */
1532     if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1533         if (verbosw) {
1534             advise (NULL, "Don't know how to convert %s, there is no "
1535                     "%s-format-%s/%s profile entry",
1536                     ct->c_file, invo_name, type, subtype);
1537         }
1538         return NOTOK;
1539     } else {
1540         if (strchr (cf, '>')) {
1541             advise (NULL, "'>' prohibited in \"%s\",\nplease fix your "
1542                     "%s-format-%s/%s profile entry", cf, invo_name, type,
1543                     subtype ? subtype : "");
1544
1545             return NOTOK;
1546         }
1547     }
1548
1549     cp = concat (cf, " >", file, NULL);
1550     status = show_content_aux (ct, 0, cp, NULL, NULL);
1551     free (cp);
1552
1553     /* Unlink decoded content tmp file and free its filename to avoid
1554        leaks.  The file stream should already have been closed. */
1555     if (ct->c_cefile.ce_unlink) {
1556         (void) m_unlink (ct->c_cefile.ce_file);
1557         free (ct->c_cefile.ce_file);
1558         ct->c_cefile.ce_file = NULL;
1559         ct->c_cefile.ce_unlink = 0;
1560     }
1561
1562     if (c_type == CT_TEXT) {
1563         output_subtype = TEXT_PLAIN;
1564     } else {
1565         /* Set subtype to 0, which is always an UNKNOWN subtype. */
1566         output_subtype = 0;
1567     }
1568     output_encoding = charset_encoding (ct);
1569
1570     if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1571         ct->c_cefile.ce_file = file;
1572         ct->c_cefile.ce_unlink = 1;
1573     } else {
1574         ct->c_cefile.ce_unlink = 0;
1575         status = NOTOK;
1576     }
1577
1578     return status;
1579 }
1580
1581
1582 /* Identifies 7bit or 8bit content based on charset. */
1583 static int
1584 charset_encoding (CT ct) {
1585     char *ct_charset = content_charset (ct);
1586     int encoding = strcasecmp (ct_charset, "US-ASCII")  ?  CE_8BIT  :  CE_7BIT;
1587
1588     free (ct_charset);
1589
1590     return encoding;
1591 }
1592
1593
1594 static CT
1595 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1596     char *boundary_prefix = "----=_nmh-multipart";
1597     char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1598     char *boundary_indicator = "; boundary=";
1599     char *typename, *subtypename, *name;
1600     CT ct;
1601     struct part *p;
1602     struct multipart *m;
1603     const struct str2init *ctinit;
1604
1605     if ((ct = (CT) mh_xcalloc (1, sizeof *ct)) == NULL)
1606         adios (NULL, "out of memory");
1607
1608     /* Set up the multipart/alternative part.  These fields of *ct were
1609        initialized to 0 by mh_xcalloc():
1610        c_fp, c_unlink, c_begin, c_end,
1611        c_vrsn, c_ctline, c_celine,
1612        c_id, c_descr, c_dispo, c_partno,
1613        c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1614        c_cefile, c_encoding,
1615        c_digested, c_digest[16], c_ctexbody,
1616        c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1617        c_umask, c_rfc934,
1618        c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1619     */
1620
1621     ct->c_file = add (first_alt->c_file, NULL);
1622     ct->c_type = type;
1623     ct->c_subtype = subtype;
1624
1625     ctinit = get_ct_init (ct->c_type);
1626
1627     typename = ct_type_str (type);
1628     subtypename = ct_subtype_str (type, subtype);
1629
1630     {
1631         int serial = 0;
1632         int found_boundary = 1;
1633
1634         while (found_boundary  &&  serial < 1000000) {
1635             found_boundary = 0;
1636
1637             /* Ensure that the boundary doesn't appear in the decoded
1638                content. */
1639             if (new_part->c_cefile.ce_file) {
1640                 if ((found_boundary =
1641                      boundary_in_content (&new_part->c_cefile.ce_fp,
1642                                           new_part->c_cefile.ce_file,
1643                                           boundary)) == -1) {
1644                     free (ct);
1645                     return NULL;
1646                 }
1647             }
1648
1649             /* Ensure that the boundary doesn't appear in the encoded
1650                content. */
1651             if (! found_boundary  &&  new_part->c_file) {
1652                 if ((found_boundary = boundary_in_content (&new_part->c_fp,
1653                                                            new_part->c_file,
1654                                                            boundary)) == -1) {
1655                     free (ct);
1656                     return NULL;
1657                 }
1658             }
1659
1660             if (found_boundary) {
1661                 /* Try a slightly different boundary. */
1662                 char buffer2[16];
1663
1664                 free (boundary);
1665                 ++serial;
1666                 snprintf (buffer2, sizeof buffer2, "%d", serial);
1667                 boundary =
1668                     concat (boundary_prefix,
1669                             first_alt->c_partno ? first_alt->c_partno : "",
1670                             "-", buffer2,  NULL);
1671             }
1672         }
1673
1674         if (found_boundary) {
1675             advise (NULL, "giving up trying to find a unique boundary");
1676             free (ct);
1677             return NULL;
1678         }
1679     }
1680
1681     name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1682                    boundary, "\"", NULL);
1683
1684     /* Load c_first_hf and c_last_hf. */
1685     transfer_noncontent_headers (first_alt, ct);
1686     add_header (ct, add (TYPE_FIELD, NULL), concat (name, "\n", NULL));
1687     free (name);
1688
1689     /* Load c_partno. */
1690     if (first_alt->c_partno) {
1691         ct->c_partno = add (first_alt->c_partno, NULL);
1692         free (first_alt->c_partno);
1693         first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1694         new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1695     } else {
1696         first_alt->c_partno = add ("1", NULL);
1697         new_part->c_partno = add ("2", NULL);
1698     }
1699
1700     if (ctinit) {
1701         ct->c_ctinfo.ci_type = add (typename, NULL);
1702         ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1703     }
1704
1705     add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1706               "boundary", boundary, 0);
1707
1708     p = (struct part *) mh_xmalloc (sizeof *p);
1709     p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next);
1710     p->mp_next->mp_next = NULL;
1711     p->mp_next->mp_part = first_alt;
1712
1713     if ((m = (struct multipart *) mh_xcalloc (1, sizeof (struct multipart))) ==
1714         NULL)
1715         adios (NULL, "out of memory");
1716     m->mp_start = concat (boundary, "\n", NULL);
1717     m->mp_stop = concat (boundary, "--\n", NULL);
1718     m->mp_parts = p;
1719     ct->c_ctparams = m;
1720
1721     free (boundary);
1722
1723     return ct;
1724 }
1725
1726
1727 /* Check that the boundary does not appear in the content. */
1728 static int
1729 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1730     char buffer[BUFSIZ];
1731     size_t bytes_read;
1732     int found_boundary = 0;
1733
1734     /* free_content() will close *fp if we fopen it here. */
1735     if (! *fp  &&  (*fp = fopen (file, "r")) == NULL) {
1736         advise (file, "unable to open %s for reading", file);
1737         return NOTOK;
1738     }
1739
1740     fseeko (*fp, 0L, SEEK_SET);
1741     while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1742         if (find_str (buffer, bytes_read, boundary)) {
1743             found_boundary = 1;
1744             break;
1745         }
1746     }
1747
1748     return found_boundary;
1749 }
1750
1751
1752 /* Remove all non-Content headers. */
1753 static void
1754 transfer_noncontent_headers (CT old, CT new) {
1755     HF hp, hp_prev;
1756
1757     hp_prev = hp = old->c_first_hf;
1758     while (hp) {
1759         HF next = hp->next;
1760
1761         if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1762             if (hp == old->c_last_hf) {
1763                 if (hp == old->c_first_hf) {
1764                     old->c_last_hf =  old->c_first_hf = NULL;
1765                 } else {
1766                     hp_prev->next = NULL;
1767                     old->c_last_hf =  hp_prev;
1768                 }
1769             } else {
1770                 if (hp == old->c_first_hf) {
1771                     old->c_first_hf = next;
1772                 } else {
1773                     hp_prev->next = next;
1774                 }
1775             }
1776
1777             /* Put node hp in the new CT. */
1778             if (new->c_first_hf == NULL) {
1779                 new->c_first_hf = hp;
1780             } else {
1781                 new->c_last_hf->next = hp;
1782             }
1783             new->c_last_hf = hp;
1784         } else {
1785             /* A Content- header, leave in old. */
1786             hp_prev = hp;
1787         }
1788
1789         hp = next;
1790     }
1791 }
1792
1793
1794 static int
1795 set_ct_type (CT ct, int type, int subtype, int encoding) {
1796     char *typename = ct_type_str (type);
1797     char *subtypename = ct_subtype_str (type, subtype);
1798     /* E.g, " text/plain" */
1799     char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1800     /* E.g, " text/plain\n" */
1801     char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1802     int found_content_type = 0;
1803     HF hf;
1804     const char *cp = NULL;
1805     char *ctline;
1806     int status;
1807
1808     /* Update/add Content-Type header field. */
1809     for (hf = ct->c_first_hf; hf; hf = hf->next) {
1810         if (! strcasecmp (TYPE_FIELD, hf->name)) {
1811             found_content_type = 1;
1812             free (hf->value);
1813             hf->value = (cp = strchr (ct->c_ctline, ';'))
1814                 ?  concat (type_subtypename, cp, "\n", NULL)
1815                 :  add (name_plus_nl, NULL);
1816         }
1817     }
1818     if (! found_content_type) {
1819         add_header (ct, add (TYPE_FIELD, NULL),
1820                     (cp = strchr (ct->c_ctline, ';'))
1821                     ?  concat (type_subtypename, cp, "\n", NULL)
1822                     :  add (name_plus_nl, NULL));
1823     }
1824
1825     /* Some of these might not be used, but set them anyway. */
1826     ctline = cp
1827         ?  concat (type_subtypename, cp, NULL)
1828         :  concat (type_subtypename, NULL);
1829     free (ct->c_ctline);
1830     ct->c_ctline = ctline;
1831     /* Leave other ctinfo members as they were. */
1832     free (ct->c_ctinfo.ci_type);
1833     ct->c_ctinfo.ci_type = add (typename, NULL);
1834     free (ct->c_ctinfo.ci_subtype);
1835     ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1836     ct->c_type = type;
1837     ct->c_subtype = subtype;
1838
1839     free (name_plus_nl);
1840     free (type_subtypename);
1841
1842     status = set_ce (ct, encoding);
1843
1844     return status;
1845 }
1846
1847
1848 /*
1849  * It's not necessary to update the charset parameter of a Content-Type
1850  * header for a text part.  According to RFC 2045 Sec. 6.4, the body
1851  * (content) was originally in the specified charset, "and will be in
1852  * that character set again after decoding."
1853  */
1854 static int
1855 decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mods) {
1856     int status = OK;
1857     int lf_line_endings = 0;
1858
1859     switch (ct->c_type) {
1860     case CT_MULTIPART: {
1861         struct multipart *m = (struct multipart *) ct->c_ctparams;
1862         struct part *part;
1863
1864         /* Should check to see if the body for this part is encoded?
1865            For now, it gets passed along as-is by InitMultiPart(). */
1866         for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
1867             status = decode_text_parts (part->mp_part, encoding, decodetypes, message_mods);
1868         }
1869         break;
1870     }
1871
1872     case CT_MESSAGE:
1873         if (ct->c_subtype == MESSAGE_EXTERNAL) {
1874             struct exbody *e = (struct exbody *) ct->c_ctparams;
1875
1876             status = decode_text_parts (e->eb_content, encoding, decodetypes, message_mods);
1877         }
1878         break;
1879
1880     default:
1881         if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
1882             break;
1883         }
1884
1885         lf_line_endings =
1886             ct->c_ctparams  &&  ((struct text *) ct->c_ctparams)->lf_line_endings;
1887
1888         switch (ct->c_encoding) {
1889         case CE_BASE64:
1890         case CE_QUOTED: {
1891             int ct_encoding;
1892
1893             if (decode_part (ct) == OK  &&  ct->c_cefile.ce_file) {
1894                 const char *reason = NULL;
1895
1896                 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
1897                     &&  encoding != CE_BINARY) {
1898                     /* The decoding isn't acceptable so discard it.
1899                        Leave status as OK to allow other transformations. */
1900                     if (verbosw) {
1901                         report (NULL, ct->c_partno, ct->c_file,
1902                                 "will not decode%s because it is binary (%s)",
1903                                 ct->c_partno  ?  ""
1904                                               :  ct->c_ctline  ?  ct->c_ctline
1905                                                                :  "",
1906                                 reason);
1907                     }
1908                     (void) m_unlink (ct->c_cefile.ce_file);
1909                     free (ct->c_cefile.ce_file);
1910                     ct->c_cefile.ce_file = NULL;
1911                 } else if (ct->c_encoding == CE_QUOTED  &&
1912                            ct_encoding == CE_8BIT  &&  encoding == CE_7BIT) {
1913                     /* The decoding isn't acceptable so discard it.
1914                        Leave status as OK to allow other transformations. */
1915                     if (verbosw) {
1916                         report (NULL, ct->c_partno, ct->c_file,
1917                                 "will not decode%s because it is 8bit",
1918                                 ct->c_partno  ?  ""
1919                                               :  ct->c_ctline  ?  ct->c_ctline
1920                                                                :  "");
1921                     }
1922                     (void) m_unlink (ct->c_cefile.ce_file);
1923                     free (ct->c_cefile.ce_file);
1924                     ct->c_cefile.ce_file = NULL;
1925                 } else {
1926                     int enc;
1927                     if (ct_encoding == CE_BINARY) {
1928                         enc = CE_BINARY;
1929                     } else if (ct_encoding == CE_8BIT  &&  encoding == CE_7BIT) {
1930                         enc = CE_QUOTED;
1931                     } else {
1932                         enc = charset_encoding (ct);
1933                     }
1934                     if (set_ce (ct, enc) == OK) {
1935                         ++*message_mods;
1936                         if (verbosw) {
1937                             report (NULL, ct->c_partno, ct->c_file, "decode%s",
1938                                     ct->c_ctline ? ct->c_ctline : "");
1939                         }
1940                         if (lf_line_endings) {
1941                             strip_crs (ct, message_mods);
1942                         }
1943                     } else {
1944                         status = NOTOK;
1945                     }
1946                 }
1947             } else {
1948                 status = NOTOK;
1949             }
1950             break;
1951         }
1952         case CE_8BIT:
1953         case CE_7BIT:
1954             if (lf_line_endings) {
1955                 strip_crs (ct, message_mods);
1956             }
1957             break;
1958         default:
1959             break;
1960         }
1961
1962         break;
1963     }
1964
1965     return status;
1966 }
1967
1968
1969 /* Determine if the part with type[/subtype] should be decoded, according to
1970    decodetypes (which came from the -decodetypes switch). */
1971 static int
1972 should_decode(const char *decodetypes, const char *type, const char *subtype) {
1973     /* Quick search for matching type[/subtype] in decodetypes:  bracket
1974        decodetypes with commas, then search for ,type, and ,type/subtype, in
1975        it. */
1976
1977     int found_match = 0;
1978     char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
1979     char *delimited_type = concat(",", type, ",", NULL);
1980
1981     if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
1982         found_match = 1;
1983     } else if (subtype != NULL) {
1984         char *delimited_type_subtype =
1985             concat(",", type, "/", subtype, ",", NULL);
1986
1987         if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
1988             found_match = 1;
1989         }
1990         free(delimited_type_subtype);
1991     }
1992
1993     free(delimited_type);
1994     free(delimited_decodetypes);
1995
1996     return found_match;
1997 }
1998
1999
2000 /* See if the decoded content is 7bit, 8bit, or binary.  It's binary
2001    if it has any NUL characters, a CR not followed by a LF, or lines
2002    greater than 998 characters in length.  If binary, reason is set
2003    to a string explaining why. */
2004 static int
2005 content_encoding (CT ct, const char **reason) {
2006     CE ce = &ct->c_cefile;
2007     int encoding = CE_7BIT;
2008
2009     if (ce->ce_file) {
2010         size_t line_len = 0;
2011         char buffer[BUFSIZ];
2012         size_t inbytes;
2013
2014         if (! ce->ce_fp  &&  (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2015             advise (ce->ce_file, "unable to open for reading");
2016             return CE_UNKNOWN;
2017         }
2018
2019         fseeko (ce->ce_fp, 0L, SEEK_SET);
2020         while (encoding != CE_BINARY  &&
2021                (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2022             char *cp;
2023             size_t i;
2024             int last_char_was_cr = 0;
2025
2026             for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2027                 if (*cp == '\0'  ||  ++line_len > 998  ||
2028                     (*cp != '\n'  &&  last_char_was_cr)) {
2029                     encoding = CE_BINARY;
2030                     if (*cp == '\0') {
2031                         *reason = "null character";
2032                     } else if (line_len > 998) {
2033                         *reason = "line length > 998";
2034                     } else if (*cp != '\n'  &&  last_char_was_cr) {
2035                         *reason = "CR not followed by LF";
2036                     } else {
2037                         /* Should not reach this. */
2038                         *reason = "";
2039                     }
2040                     break;
2041                 } else if (*cp == '\n') {
2042                     line_len = 0;
2043                 } else if (! isascii ((unsigned char) *cp)) {
2044                     encoding = CE_8BIT;
2045                 }
2046
2047                 last_char_was_cr = *cp == '\r'  ?  1  :  0;
2048             }
2049         }
2050
2051         fclose (ce->ce_fp);
2052         ce->ce_fp = NULL;
2053     } /* else should never happen */
2054
2055     return encoding;
2056 }
2057
2058
2059 static int
2060 strip_crs (CT ct, int *message_mods) {
2061     char *charset = content_charset (ct);
2062     int status = OK;
2063
2064     /* Only strip carriage returns if content is ASCII or another
2065        charset that has the same readily recognizable CR followed by a
2066        LF.  We can include UTF-8 here because if the high-order bit of
2067        a UTF-8 byte is 0, then it must be a single-byte ASCII
2068        character. */
2069     if (! strcasecmp (charset, "US-ASCII")  ||
2070         ! strcasecmp (charset, "UTF-8")  ||
2071         ! strncasecmp (charset, "ISO-8859-", 9)  ||
2072         ! strncasecmp (charset, "WINDOWS-12", 10)) {
2073         char **file = NULL;
2074         FILE **fp = NULL;
2075         size_t begin;
2076         size_t end;
2077         int has_crs = 0;
2078         int opened_input_file = 0;
2079
2080         if (ct->c_cefile.ce_file) {
2081             file = &ct->c_cefile.ce_file;
2082             fp = &ct->c_cefile.ce_fp;
2083             begin = end = 0;
2084         } else if (ct->c_file) {
2085             file = &ct->c_file;
2086             fp = &ct->c_fp;
2087             begin = (size_t) ct->c_begin;
2088             end = (size_t) ct->c_end;
2089         } /* else don't know where the content is */
2090
2091         if (file  &&  *file  &&  fp) {
2092             if (! *fp) {
2093                 if ((*fp = fopen (*file, "r")) == NULL) {
2094                     advise (*file, "unable to open for reading");
2095                     status = NOTOK;
2096                 } else {
2097                     opened_input_file = 1;
2098                 }
2099             }
2100         }
2101
2102         if (fp  &&  *fp) {
2103             char buffer[BUFSIZ];
2104             size_t bytes_read;
2105             size_t bytes_to_read =
2106                 end > 0 && end > begin  ?  end - begin  :  sizeof buffer;
2107
2108             fseeko (*fp, begin, SEEK_SET);
2109             while ((bytes_read = fread (buffer, 1,
2110                                         min (bytes_to_read, sizeof buffer),
2111                                         *fp)) > 0) {
2112                 /* Look for CR followed by a LF.  This is supposed to
2113                    be text so there should be LF's.  If not, don't
2114                    modify the content. */
2115                 char *cp;
2116                 size_t i;
2117                 int last_char_was_cr = 0;
2118
2119                 if (end > 0) { bytes_to_read -= bytes_read; }
2120
2121                 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2122                     if (*cp == '\n'  &&  last_char_was_cr) {
2123                         has_crs = 1;
2124                         break;
2125                     }
2126
2127                     last_char_was_cr = *cp == '\r'  ?  1  :  0;
2128                 }
2129             }
2130
2131             if (has_crs) {
2132                 int fd;
2133                 char *stripped_content_file;
2134                 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2135
2136                 if (tempfile == NULL) {
2137                     adios (NULL, "unable to create temporary file in %s",
2138                            get_temp_dir());
2139                 }
2140                 stripped_content_file = add (tempfile, NULL);
2141
2142                 /* Strip each CR before a LF from the content. */
2143                 fseeko (*fp, begin, SEEK_SET);
2144                 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2145                        0) {
2146                     char *cp;
2147                     size_t i;
2148                     int last_char_was_cr = 0;
2149
2150                     for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2151                         if (*cp == '\r') {
2152                             last_char_was_cr = 1;
2153                         } else if (last_char_was_cr) {
2154                             if (*cp != '\n') {
2155                                 if (write (fd, "\r", 1) < 0) {
2156                                     advise (tempfile, "CR write");
2157                                 }
2158                             }
2159                             if (write (fd, cp, 1) < 0) {
2160                                 advise (tempfile, "write");
2161                             }
2162                             last_char_was_cr = 0;
2163                         } else {
2164                             if (write (fd, cp, 1) < 0) {
2165                                 advise (tempfile, "write");
2166                             }
2167                             last_char_was_cr = 0;
2168                         }
2169                     }
2170                 }
2171
2172                 if (close (fd)) {
2173                     admonish (NULL, "unable to write temporary file %s",
2174                               stripped_content_file);
2175                     (void) m_unlink (stripped_content_file);
2176                     status = NOTOK;
2177                 } else {
2178                     /* Replace the decoded file with the converted one. */
2179                     if (ct->c_cefile.ce_file) {
2180                         if (ct->c_cefile.ce_unlink) {
2181                             (void) m_unlink (ct->c_cefile.ce_file);
2182                         }
2183                         free (ct->c_cefile.ce_file);
2184                     }
2185                     ct->c_cefile.ce_file = stripped_content_file;
2186                     ct->c_cefile.ce_unlink = 1;
2187
2188                     ++*message_mods;
2189                     if (verbosw) {
2190                         report (NULL, ct->c_partno,
2191                                 begin == 0 && end == 0  ?  ""  :  *file,
2192                                 "stripped CRs");
2193                     }
2194                 }
2195             }
2196
2197             if (opened_input_file) {
2198                 fclose (*fp);
2199                 *fp = NULL;
2200             }
2201         }
2202     }
2203
2204     free (charset);
2205
2206     return status;
2207 }
2208
2209
2210 static int
2211 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
2212     int status = OK;
2213
2214     switch (ct->c_type) {
2215     case CT_TEXT:
2216         if (ct->c_subtype == TEXT_PLAIN) {
2217             status = convert_charset (ct, dest_charset, message_mods);
2218             if (status == OK) {
2219                 if (verbosw) {
2220                     char *ct_charset = content_charset (ct);
2221
2222                     report (NULL, ct->c_partno, ct->c_file,
2223                             "convert %s to %s", ct_charset, dest_charset);
2224                     free (ct_charset);
2225                 }
2226             } else {
2227                 char *ct_charset = content_charset (ct);
2228
2229                 report ("iconv", ct->c_partno, ct->c_file,
2230                         "failed to convert %s to %s", ct_charset, dest_charset);
2231                 free (ct_charset);
2232             }
2233         }
2234         break;
2235
2236     case CT_MULTIPART: {
2237         struct multipart *m = (struct multipart *) ct->c_ctparams;
2238         struct part *part;
2239
2240         /* Should check to see if the body for this part is encoded?
2241            For now, it gets passed along as-is by InitMultiPart(). */
2242         for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
2243             status =
2244                 convert_charsets (part->mp_part, dest_charset, message_mods);
2245         }
2246         break;
2247     }
2248
2249     case CT_MESSAGE:
2250         if (ct->c_subtype == MESSAGE_EXTERNAL) {
2251             struct exbody *e = (struct exbody *) ct->c_ctparams;
2252
2253             status =
2254                 convert_charsets (e->eb_content, dest_charset, message_mods);
2255         }
2256         break;
2257
2258     default:
2259         break;
2260     }
2261
2262     return status;
2263 }
2264
2265
2266 /*
2267  * Fix various problems that aren't handled elsewhere.  These
2268  * are fixed unconditionally:  there are no switches to disable
2269  * them.  (Currently, "problems" is just one:  an extraneous
2270  * semicolon at the end of a header parameter list.)
2271  */
2272 static int
2273 fix_always (CT ct, int *message_mods) {
2274     int status = OK;
2275
2276     switch (ct->c_type) {
2277     case CT_MULTIPART: {
2278         struct multipart *m = (struct multipart *) ct->c_ctparams;
2279         struct part *part;
2280
2281         for (part = m->mp_parts; status == OK  &&  part; part = part->mp_next) {
2282             status = fix_always (part->mp_part, message_mods);
2283         }
2284         break;
2285     }
2286
2287     case CT_MESSAGE:
2288         if (ct->c_subtype == MESSAGE_EXTERNAL) {
2289             struct exbody *e = (struct exbody *) ct->c_ctparams;
2290
2291             status = fix_always (e->eb_content, message_mods);
2292         }
2293         break;
2294
2295     default: {
2296         HF hf;
2297
2298         for (hf = ct->c_first_hf; hf; hf = hf->next) {
2299             size_t len = strlen (hf->value);
2300
2301             if (strcasecmp (hf->name, TYPE_FIELD) != 0  &&
2302                 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2303                 /* Only do this for Content-Type and
2304                    Content-Disposition fields because those are the
2305                    only headers that parse_mime() warns about. */
2306                 continue;
2307             }
2308
2309             /* whitespace following a trailing ';' will be nuked as well */
2310             if (hf->value[len - 1] == '\n') {
2311                 while (isspace((unsigned char)(hf->value[len - 2]))) {
2312                     if (len-- == 0) { break; }
2313                 }
2314             }
2315
2316             if (hf->value[len - 2] == ';') {
2317                 /* Remove trailing ';' from parameter value. */
2318                 hf->value[len - 2] = '\n';
2319                 hf->value[len - 1] = '\0';
2320
2321                 /* Also, if Content-Type parameter, remove trailing ';'
2322                    from ct->c_ctline.  This probably isn't necessary
2323                    but can't hurt. */
2324                 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2325                     size_t l = strlen(ct->c_ctline) - 1;
2326                     while (isspace((unsigned char)(ct->c_ctline[l])) ||
2327                            ct->c_ctline[l] == ';') {
2328                         ct->c_ctline[l--] = '\0';
2329                         if (l == 0) { break; }
2330                     }
2331                 }
2332
2333                 ++*message_mods;
2334                 if (verbosw) {
2335                     report (NULL, ct->c_partno, ct->c_file,
2336                             "remove trailing ; from %s parameter value",
2337                             hf->name);
2338                 }
2339             }
2340         }
2341     }}
2342
2343     return status;
2344 }
2345
2346
2347 static int
2348 write_content (CT ct, const char *input_filename, char *outfile, int modify_inplace,
2349                int message_mods) {
2350     int status = OK;
2351
2352     if (modify_inplace) {
2353         if (message_mods > 0) {
2354             if ((status = output_message (ct, outfile)) == OK) {
2355                 char *infile = input_filename
2356                     ?  add (input_filename, NULL)
2357                     :  add (ct->c_file ? ct->c_file : "-", NULL);
2358
2359                 if (remove_file (infile) == OK) {
2360                     if (rename (outfile, infile)) {
2361                         /* Rename didn't work, possibly because of an
2362                            attempt to rename across filesystems.  Try
2363                            brute force copy. */
2364                         int old = open (outfile, O_RDONLY);
2365                         int new =
2366                             open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2367                         int i = -1;
2368
2369                         if (old != -1  &&  new != -1) {
2370                             char buffer[BUFSIZ];
2371
2372                             while ((i = read (old, buffer, sizeof buffer)) >
2373                                    0) {
2374                                 if (write (new, buffer, i) != i) {
2375                                     i = -1;
2376                                     break;
2377                                 }
2378                             }
2379                         }
2380                         if (new != -1) { close (new); }
2381                         if (old != -1) { close (old); }
2382                         (void) m_unlink (outfile);
2383
2384                         if (i < 0) {
2385                             /* The -file argument processing used path() to
2386                                expand filename to absolute path. */
2387                             int file = ct->c_file  &&  ct->c_file[0] == '/';
2388
2389                             admonish (NULL, "unable to rename %s %s to %s",
2390                                       file ? "file" : "message", outfile,
2391                                       infile);
2392                             status = NOTOK;
2393                         }
2394                     }
2395                 } else {
2396                     admonish (NULL, "unable to remove input file %s, "
2397                               "not modifying it", infile);
2398                     (void) m_unlink (outfile);
2399                     status = NOTOK;
2400                 }
2401
2402                 free (infile);
2403             } else {
2404                 status = NOTOK;
2405             }
2406         } else {
2407             /* No modifications and didn't need the tmp outfile. */
2408             (void) m_unlink (outfile);
2409         }
2410     } else {
2411         /* Output is going to some file.  Produce it whether or not
2412            there were modifications. */
2413         status = output_message (ct, outfile);
2414     }
2415
2416     flush_errors ();
2417     return status;
2418 }
2419
2420
2421 /*
2422  * parse_mime() does not set lf_line_endings in struct text, so use this function to do it.
2423  * It touches the parts the decodetypes identifies.
2424  */
2425 static void
2426 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
2427     switch (ct->c_type) {
2428     case CT_MULTIPART: {
2429         struct multipart *m = (struct multipart *) ct->c_ctparams;
2430         struct part *part;
2431
2432         for (part = m->mp_parts; part; part = part->mp_next) {
2433             set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2434         }
2435         break;
2436     }
2437
2438     case CT_MESSAGE:
2439         if (ct->c_subtype == MESSAGE_EXTERNAL) {
2440             struct exbody *e = (struct exbody *) ct->c_ctparams;
2441
2442             set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2443         }
2444         break;
2445
2446     default:
2447         if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2448             if (ct->c_ctparams == NULL) {
2449                 if ((ct->c_ctparams = (struct text *) mh_xcalloc (1, sizeof (struct text))) == NULL) {
2450                     adios (NULL, "out of memory");
2451                 }
2452             }
2453             ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2454         }
2455     }
2456 }
2457
2458
2459 /*
2460  * If "rmmproc" is defined, call that to remove the file.  Otherwise,
2461  * use the standard MH backup file.
2462  */
2463 static int
2464 remove_file (const char *file) {
2465     if (rmmproc) {
2466         char *rmm_command = concat (rmmproc, " ", file, NULL);
2467         int status = system (rmm_command);
2468
2469         free (rmm_command);
2470         return WIFEXITED (status)  ?  WEXITSTATUS (status)  :  NOTOK;
2471     } else {
2472         /* This is OK for a non-message file, it still uses the
2473            BACKUP_PREFIX form.  The backup file will be in the same
2474            directory as file. */
2475         return rename (file, m_backup (file));
2476     }
2477 }
2478
2479
2480 static void
2481 report (char *what, char *partno, char *filename, char *message, ...) {
2482     va_list args;
2483     char *fmt;
2484
2485     if (verbosw) {
2486         va_start (args, message);
2487         fmt = concat (filename, partno ? " part " : ", ",
2488                       partno ? partno : "", partno ? ", " : "", message, NULL);
2489
2490         advertise (what, NULL, fmt, args);
2491
2492         free (fmt);
2493         va_end (args);
2494     }
2495 }
2496
2497
2498 static void
2499 pipeser (int i)
2500 {
2501     if (i == SIGQUIT) {
2502         fflush (stdout);
2503         fprintf (stderr, "\n");
2504         fflush (stderr);
2505     }
2506
2507     done (1);
2508     /* NOTREACHED */
2509 }