diplodocus.org Git - nmh/blob - uip/mhparse.c

   1 /* mhparse.c -- routines to parse the contents of MIME messages
   2  *
   3  * This code is Copyright (c) 2002, by the authors of nmh.  See the
   4  * COPYRIGHT file in the root directory of the nmh distribution for
   5  * complete copyright information.
   6  */
   7
   8 #include <h/mh.h>
   9 #include <fcntl.h>
  10 #include <h/md5.h>
  11 #include <h/mts.h>
  12 #include <h/tws.h>
  13 #include <h/mime.h>
  14 #include <h/mhparse.h>
  15 #include <h/utils.h>
  16 #include "mhmisc.h"
  17 #include <h/mhcachesbr.h>
  18 #include "sbr/m_mktemp.h"
  19 #include "mhfree.h"
  20 #ifdef HAVE_ICONV
  21 # include <iconv.h>
  22 #endif /* HAVE_ICONV */
  23 #include "sbr/base64.h"
  24
  25
  26 extern int debugsw;
  27
  28 int checksw = 0;        /* check Content-MD5 field */
  29
  30 /*
  31  * These are for mhfixmsg to:
  32  * 1) Instruct parser not to detect invalid Content-Transfer-Encoding
  33  *    in a multipart.
  34  * 2) Suppress the warning about bogus multipart content, and report it.
  35  * 3) Suppress the warning about extraneous trailing ';' in header parameter
  36  *    lists.
  37  */
  38 bool skip_mp_cte_check;
  39 bool suppress_bogus_mp_content_warning;
  40 bool bogus_mp_content;
  41 bool suppress_extraneous_trailing_semicolon_warning;
  42
  43 /*
  44  * By default, suppress warning about multiple MIME-Version header fields.
  45  */
  46 bool suppress_multiple_mime_version_warning = true;
  47
  48 /* list of preferred type/subtype pairs, for -prefer */
  49 mime_type_subtype mime_preference[NPREFS];
  50 int npreferred;
  51
  52
  53 /*
  54  * Structures for TEXT messages
  55  */
  56 struct k2v SubText[] = {
  57     { "plain",    TEXT_PLAIN },
  58     { "richtext", TEXT_RICHTEXT },  /* defined in RFC 1341    */
  59     { "enriched", TEXT_ENRICHED },  /* defined in RFC 1896    */
  60     { NULL,       TEXT_UNKNOWN }    /* this one must be last! */
  61 };
  62
  63 /* Charset[] removed -- yozo.  Mon Oct  8 01:03:41 JST 2012 */
  64
  65 /*
  66  * Structures for MULTIPART messages
  67  */
  68 struct k2v SubMultiPart[] = {
  69     { "mixed",       MULTI_MIXED },
  70     { "alternative", MULTI_ALTERNATE },
  71     { "digest",      MULTI_DIGEST },
  72     { "parallel",    MULTI_PARALLEL },
  73     { "related",     MULTI_RELATED },
  74     { NULL,          MULTI_UNKNOWN }    /* this one must be last! */
  75 };
  76
  77 /*
  78  * Structures for MESSAGE messages
  79  */
  80 struct k2v SubMessage[] = {
  81     { "rfc822",        MESSAGE_RFC822 },
  82     { "partial",       MESSAGE_PARTIAL },
  83     { "external-body", MESSAGE_EXTERNAL },
  84     { NULL,            MESSAGE_UNKNOWN }        /* this one must be last! */
  85 };
  86
  87 /*
  88  * Structure for APPLICATION messages
  89  */
  90 struct k2v SubApplication[] = {
  91     { "octet-stream", APPLICATION_OCTETS },
  92     { "postscript",   APPLICATION_POSTSCRIPT },
  93     { NULL,           APPLICATION_UNKNOWN }     /* this one must be last! */
  94 };
  95
  96 /*
  97  * Mapping of names of CTE types in mhbuild directives
  98  */
  99 static struct k2v EncodingType[] = {
 100     { "8bit",                   CE_8BIT },
 101     { "qp",                     CE_QUOTED },
 102     { "q-p",                    CE_QUOTED },
 103     { "quoted-printable",       CE_QUOTED },
 104     { "b64",                    CE_BASE64 },
 105     { "base64",                 CE_BASE64 },
 106     { NULL,                     0 },
 107 };
 108
 109
 110 /*
 111  * static prototypes
 112  */
 113 static CT get_content (FILE *, char *, int);
 114 static int get_comment (const char *, const char *, char **, char **);
 115
 116 static int InitGeneric (CT);
 117 static int InitText (CT);
 118 static int InitMultiPart (CT);
 119 static void reverse_parts (CT);
 120 static void prefer_parts(CT ct);
 121 static int InitMessage (CT);
 122 static int InitApplication (CT);
 123 static int init_encoding (CT, OpenCEFunc);
 124 static unsigned long size_encoding (CT);
 125 static int InitBase64 (CT);
 126 static int openBase64 (CT, char **);
 127 static int InitQuoted (CT);
 128 static int openQuoted (CT, char **);
 129 static int Init7Bit (CT);
 130 static int openExternal (CT, CT, CE, char **, int *);
 131 static int InitFile (CT);
 132 static int openFile (CT, char **);
 133 static int InitFTP (CT);
 134 static int openFTP (CT, char **);
 135 static int InitMail (CT);
 136 static int openMail (CT, char **);
 137 static int readDigest (CT, char *);
 138 static int get_leftover_mp_content (CT, int);
 139 static int InitURL (CT);
 140 static int openURL (CT, char **);
 141 static int parse_header_attrs (const char *, const char *, char **, PM *,
 142                                PM *, char **);
 143 static size_t param_len(PM, int, size_t, int *, int *, size_t *);
 144 static size_t normal_param(PM, char *, size_t, size_t, size_t);
 145 static int get_dispo (char *, CT, int);
 146
 147 struct str2init str2cts[] = {
 148     { "application", CT_APPLICATION, InitApplication },
 149     { "audio",       CT_AUDIO,       InitGeneric },
 150     { "image",       CT_IMAGE,       InitGeneric },
 151     { "message",     CT_MESSAGE,     InitMessage },
 152     { "multipart",   CT_MULTIPART,   InitMultiPart },
 153     { "text",        CT_TEXT,        InitText },
 154     { "video",       CT_VIDEO,       InitGeneric },
 155     { NULL,          CT_EXTENSION,   NULL },  /* these two must be last! */
 156     { NULL,          CT_UNKNOWN,     NULL },
 157 };
 158
 159 struct str2init str2ces[] = {
 160     { "base64",           CE_BASE64,    InitBase64 },
 161     { "quoted-printable", CE_QUOTED,    InitQuoted },
 162     { "8bit",             CE_8BIT,      Init7Bit },
 163     { "7bit",             CE_7BIT,      Init7Bit },
 164     { "binary",           CE_BINARY,    Init7Bit },
 165     { NULL,               CE_EXTENSION, NULL },  /* these two must be last! */
 166     { NULL,               CE_UNKNOWN,   NULL },
 167 };
 168
 169 /*
 170  * NOTE WELL: si_key MUST NOT have value of NOTOK
 171  *
 172  * si_val is 1 if access method is anonymous.
 173  */
 174 struct str2init str2methods[] = {
 175     { "afs",         1, InitFile },
 176     { "anon-ftp",    1, InitFTP },
 177     { "ftp",         0, InitFTP },
 178     { "local-file",  0, InitFile },
 179     { "mail-server", 0, InitMail },
 180     { "url",         0, InitURL },
 181     { NULL,          0, NULL }
 182 };
 183
 184
 185 /*
 186  * Main entry point for parsing a MIME message or file.
 187  * It returns the Content structure for the top level
 188  * entity in the file.
 189  */
 190
 191 CT
 192 parse_mime (char *file)
 193 {
 194     int is_stdin;
 195     char buffer[BUFSIZ];
 196     FILE *fp;
 197     CT ct;
 198     size_t n;
 199     struct stat statbuf;
 200
 201     bogus_mp_content = false;
 202
 203     /*
 204      * Check if file is actually standard input
 205      */
 206     if ((is_stdin = !(strcmp (file, "-")))) {
 207         char *tfile = m_mktemp2(NULL, invo_name, NULL, &fp);
 208         if (tfile == NULL) {
 209             advise("mhparse", "unable to create temporary file in %s",
 210                    get_temp_dir());
 211             return NULL;
 212         }
 213         file = mh_xstrdup(tfile);
 214
 215         while ((n = fread(buffer, 1, sizeof(buffer), stdin)) > 0) {
 216             if (fwrite(buffer, 1, n, fp) != n) {
 217                 (void) m_unlink (file);
 218                 advise (file, "error copying to temporary file");
 219                 return NULL;
 220             }
 221         }
 222         fflush (fp);
 223
 224         if (ferror (stdin)) {
 225             (void) m_unlink (file);
 226             advise ("stdin", "error reading");
 227             return NULL;
 228         }
 229         if (ferror (fp)) {
 230             (void) m_unlink (file);
 231             advise (file, "error writing");
 232             return NULL;
 233         }
 234         fseek (fp, 0L, SEEK_SET);
 235     } else if (stat (file, &statbuf) == NOTOK) {
 236         advise (file, "unable to stat");
 237         return NULL;
 238     } else if (S_ISDIR(statbuf.st_mode)) {
 239         /* Don't try to parse a directory. */
 240         inform("%s is a directory", file);
 241         return NULL;
 242     } else if ((fp = fopen (file, "r")) == NULL) {
 243         advise (file, "unable to read");
 244         return NULL;
 245     }
 246
 247     if (!(ct = get_content (fp, file, 1))) {
 248         if (is_stdin)
 249             (void) m_unlink (file);
 250         inform("unable to decode %s", file);
 251         return NULL;
 252     }
 253
 254     if (is_stdin)
 255         ct->c_unlink = 1;       /* temp file to remove */
 256
 257     ct->c_fp = NULL;
 258
 259     if (ct->c_end == 0L) {
 260         fseek (fp, 0L, SEEK_END);
 261         ct->c_end = ftell (fp);
 262     }
 263
 264     if (ct->c_ctinitfnx && (*ct->c_ctinitfnx) (ct) == NOTOK) {
 265         fclose (fp);
 266         free_content (ct);
 267         return NULL;
 268     }
 269
 270     fclose (fp);
 271     return ct;
 272 }
 273
 274
 275 /*
 276  * Main routine for reading/parsing the headers
 277  * of a message content.
 278  *
 279  * toplevel =  1   # we are at the top level of the message
 280  * toplevel =  0   # we are inside message type or multipart type
 281  *                 # other than multipart/digest
 282  * toplevel = -1   # we are inside multipart/digest
 283  * NB: on failure we will fclose(in)!
 284  */
 285
 286 static CT
 287 get_content (FILE *in, char *file, int toplevel)
 288 {
 289     int compnum, state;
 290     char buf[NMH_BUFSIZ], name[NAMESZ];
 291     char *np, *vp;
 292     CT ct;
 293     HF hp;
 294     m_getfld_state_t gstate;
 295
 296     /* allocate the content structure */
 297     NEW0(ct);
 298     ct->c_fp = in;
 299     ct->c_file = mh_xstrdup(FENDNULL(file));
 300     ct->c_begin = ftell (ct->c_fp) + 1;
 301
 302     /*
 303      * Parse the header fields for this
 304      * content into a linked list.
 305      */
 306     gstate = m_getfld_state_init(in);
 307     m_getfld_track_filepos2(&gstate);
 308     for (compnum = 1;;) {
 309         int bufsz = sizeof buf;
 310         switch (state = m_getfld2(&gstate, name, buf, &bufsz)) {
 311         case FLD:
 312         case FLDPLUS:
 313             compnum++;
 314
 315             /* get copies of the buffers */
 316             np = mh_xstrdup(name);
 317             vp = mh_xstrdup(buf);
 318
 319             /* if necessary, get rest of field */
 320             while (state == FLDPLUS) {
 321                 bufsz = sizeof buf;
 322                 state = m_getfld2(&gstate, name, buf, &bufsz);
 323                 vp = add (buf, vp);     /* add to previous value */
 324             }
 325
 326             /* Now add the header data to the list */
 327             add_header (ct, np, vp);
 328
 329             /* continue, to see if this isn't the last header field */
 330             ct->c_begin = ftell (in) + 1;
 331             continue;
 332
 333         case BODY:
 334             /* There are two cases.  The unusual one is when there is no
 335              * blank line between the headers and the body.  This is
 336              * indicated by the name of the header starting with `:'.
 337              *
 338              * For both cases, normal first, `1' is the desired c_begin
 339              * file position for the start of the body, and `2' is the
 340              * file position when buf is returned.
 341              *
 342              *     f o o :   b a r \n \n b o d y \n    bufsz = 6
 343              *                          1          2   move -5
 344              *     f o o :   b a r \n b o d y \n       bufsz = 4
 345              *                       1       2         move -4
 346              *
 347              * For the normal case, bufsz includes the
 348              * header-terminating `\n', even though it is not in buf,
 349              * but bufsz isn't affected when it's missing in the unusual
 350              * case. */
 351             if (name[0] == ':') {
 352                 ct->c_begin = ftell(in) - bufsz;
 353             } else {
 354                 ct->c_begin = ftell (in) - (bufsz - 1);
 355             }
 356             break;
 357
 358         case FILEEOF:
 359             ct->c_begin = ftell (in);
 360             break;
 361
 362         case LENERR:
 363         case FMTERR:
 364             die("message format error in component #%d", compnum);
 365
 366         default:
 367             die("getfld() returned %d", state);
 368         }
 369
 370         /* break out of the loop */
 371         break;
 372     }
 373     m_getfld_state_destroy (&gstate);
 374
 375     /*
 376      * Read the content headers.  We will parse the
 377      * MIME related header fields into their various
 378      * structures and set internal flags related to
 379      * content type/subtype, etc.
 380      */
 381
 382     hp = ct->c_first_hf;        /* start at first header field */
 383     while (hp) {
 384         /* Get MIME-Version field */
 385         if (!strcasecmp (hp->name, VRSN_FIELD)) {
 386             int ucmp;
 387             char c, *cp, *dp;
 388             char *vrsn;
 389
 390             vrsn = mh_xstrdup(FENDNULL(hp->value));
 391
 392             /* Now, cleanup this field */
 393             cp = vrsn;
 394
 395             while (isspace ((unsigned char) *cp))
 396                 cp++;
 397             for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
 398                 *dp++ = ' ';
 399             for (dp = cp + strlen (cp) - 1; dp >= cp; dp--)
 400                 if (!isspace ((unsigned char) *dp))
 401                     break;
 402             *++dp = '\0';
 403             if (debugsw)
 404                 fprintf (stderr, "%s: %s\n", VRSN_FIELD, cp);
 405
 406             if (*cp == '('  &&
 407                 get_comment (ct->c_file, VRSN_FIELD, &cp, NULL) == NOTOK)
 408                 goto out;
 409
 410             for (dp = cp; istoken (*dp); dp++)
 411                 continue;
 412             c = *dp;
 413             *dp = '\0';
 414             ucmp = !strcasecmp (cp, VRSN_VALUE);
 415             *dp = c;
 416             if (!ucmp) {
 417                 inform("message %s has unknown value for %s: field (%s), continuing...",
 418                 ct->c_file, VRSN_FIELD, cp);
 419             }
 420             if (!ct->c_vrsn) {
 421                 ct->c_vrsn = vrsn;
 422             } else {
 423                 if (! suppress_multiple_mime_version_warning)
 424                     inform("message %s has multiple %s: fields",
 425                             ct->c_file, VRSN_FIELD);
 426                 free(vrsn);
 427             }
 428         }
 429         else if (!strcasecmp (hp->name, TYPE_FIELD)) {
 430         /* Get Content-Type field */
 431             struct str2init *s2i;
 432             CI ci = &ct->c_ctinfo;
 433
 434             /* Check if we've already seen a Content-Type header */
 435             if (ct->c_ctline) {
 436                 inform("message %s has multiple %s: fields",
 437                         ct->c_file, TYPE_FIELD);
 438                 goto next_header;
 439             }
 440
 441             /* Parse the Content-Type field */
 442             if (get_ctinfo (hp->value, ct, 0) == NOTOK)
 443                 goto out;
 444
 445             /*
 446              * Set the Init function and the internal
 447              * flag for this content type.
 448              */
 449             for (s2i = str2cts; s2i->si_key; s2i++)
 450                 if (!strcasecmp (ci->ci_type, s2i->si_key))
 451                     break;
 452             if (!s2i->si_key && !uprf (ci->ci_type, "X-"))
 453                 s2i++;
 454             ct->c_type = s2i->si_val;
 455             ct->c_ctinitfnx = s2i->si_init;
 456         }
 457         else if (!strcasecmp (hp->name, ENCODING_FIELD)) {
 458         /* Get Content-Transfer-Encoding field */
 459             char c, *cp, *dp;
 460             struct str2init *s2i;
 461
 462             /*
 463              * Check if we've already seen the
 464              * Content-Transfer-Encoding field
 465              */
 466             if (ct->c_celine) {
 467                 inform("message %s has multiple %s: fields",
 468                         ct->c_file, ENCODING_FIELD);
 469                 goto next_header;
 470             }
 471
 472             /* get copy of this field */
 473             ct->c_celine = cp = mh_xstrdup(FENDNULL(hp->value));
 474
 475             while (isspace ((unsigned char) *cp))
 476                 cp++;
 477             for (dp = cp; istoken (*dp); dp++)
 478                 continue;
 479             c = *dp;
 480             *dp = '\0';
 481
 482             /*
 483              * Find the internal flag and Init function
 484              * for this transfer encoding.
 485              */
 486             for (s2i = str2ces; s2i->si_key; s2i++)
 487                 if (!strcasecmp (cp, s2i->si_key))
 488                     break;
 489             if (!s2i->si_key && !uprf (cp, "X-"))
 490                 s2i++;
 491             *dp = c;
 492             ct->c_encoding = s2i->si_val;
 493
 494             /* Call the Init function for this encoding */
 495             if (s2i->si_init && (*s2i->si_init) (ct) == NOTOK)
 496                 goto out;
 497         }
 498         else if (!strcasecmp (hp->name, MD5_FIELD)) {
 499         /* Get Content-MD5 field */
 500             char *cp, *dp, *ep;
 501
 502             if (!checksw)
 503                 goto next_header;
 504
 505             if (ct->c_digested) {
 506                 inform("message %s has multiple %s: fields",
 507                         ct->c_file, MD5_FIELD);
 508                 goto next_header;
 509             }
 510
 511             ep = cp = mh_xstrdup(FENDNULL(hp->value)); /* get a copy */
 512
 513             while (isspace ((unsigned char) *cp))
 514                 cp++;
 515             for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
 516                 *dp++ = ' ';
 517             for (dp = cp + strlen (cp) - 1; dp >= cp; dp--)
 518                 if (!isspace ((unsigned char) *dp))
 519                     break;
 520             *++dp = '\0';
 521             if (debugsw)
 522                 fprintf (stderr, "%s: %s\n", MD5_FIELD, cp);
 523
 524             if (*cp == '('  &&
 525                 get_comment (ct->c_file, MD5_FIELD, &cp, NULL) == NOTOK) {
 526                 free (ep);
 527                 goto out;
 528             }
 529
 530             for (dp = cp; *dp && !isspace ((unsigned char) *dp); dp++)
 531                 continue;
 532             *dp = '\0';
 533
 534             readDigest (ct, cp);
 535             free (ep);
 536             ct->c_digested++;
 537         }
 538         else if (!strcasecmp (hp->name, ID_FIELD)) {
 539         /* Get Content-ID field */
 540             ct->c_id = add (hp->value, ct->c_id);
 541         }
 542         else if (!strcasecmp (hp->name, DESCR_FIELD)) {
 543         /* Get Content-Description field */
 544             ct->c_descr = add (hp->value, ct->c_descr);
 545         }
 546         else if (!strcasecmp (hp->name, DISPO_FIELD)) {
 547         /* Get Content-Disposition field */
 548             if (get_dispo(hp->value, ct, 0) == NOTOK)
 549                 goto out;
 550         }
 551
 552 next_header:
 553         hp = hp->next;  /* next header field */
 554     }
 555
 556     /*
 557      * Check if we saw a Content-Type field.
 558      * If not, then assign a default value for
 559      * it, and the Init function.
 560      */
 561     if (!ct->c_ctline) {
 562         /*
 563          * If we are inside a multipart/digest message,
 564          * so default type is message/rfc822
 565          */
 566         if (toplevel < 0) {
 567             if (get_ctinfo ("message/rfc822", ct, 0) == NOTOK)
 568                 goto out;
 569             ct->c_type = CT_MESSAGE;
 570             ct->c_ctinitfnx = InitMessage;
 571         } else {
 572             /*
 573              * Else default type is text/plain
 574              */
 575             if (get_ctinfo ("text/plain", ct, 0) == NOTOK)
 576                 goto out;
 577             ct->c_type = CT_TEXT;
 578             ct->c_ctinitfnx = InitText;
 579         }
 580     }
 581
 582     /* Use default Transfer-Encoding, if necessary */
 583     if (!ct->c_celine) {
 584         ct->c_encoding = CE_7BIT;
 585         Init7Bit (ct);
 586     }
 587
 588     return ct;
 589
 590 out:
 591     free_content (ct);
 592     return NULL;
 593 }
 594
 595
 596 /*
 597  * small routine to add header field to list
 598  */
 599
 600 int
 601 add_header (CT ct, char *name, char *value)
 602 {
 603     HF hp;
 604
 605     /* allocate header field structure */
 606     NEW(hp);
 607
 608     /* link data into header structure */
 609     hp->name = name;
 610     hp->value = value;
 611     hp->next = NULL;
 612
 613     /* link header structure into the list */
 614     if (ct->c_first_hf == NULL) {
 615         ct->c_first_hf = hp;            /* this is the first */
 616         ct->c_last_hf = hp;
 617     } else {
 618         ct->c_last_hf->next = hp;       /* add it to the end */
 619         ct->c_last_hf = hp;
 620     }
 621
 622     return 0;
 623 }
 624
 625
 626 /*
 627  * Parse Content-Type line and (if `magic' is non-zero) mhbuild composition
 628  * directives.  Fills in the information of the CTinfo structure.
 629  */
 630 int
 631 get_ctinfo (char *cp, CT ct, int magic)
 632 {
 633     char *dp;
 634     char c;
 635     CI ci;
 636     int status;
 637
 638     ci = &ct->c_ctinfo;
 639
 640     /* store copy of Content-Type line */
 641     cp = ct->c_ctline = mh_xstrdup(FENDNULL(cp));
 642
 643     while (isspace ((unsigned char) *cp))       /* trim leading spaces */
 644         cp++;
 645
 646     /* change newlines to spaces */
 647     for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
 648         *dp++ = ' ';
 649
 650     /* trim trailing spaces */
 651     for (dp = cp + strlen (cp) - 1; dp >= cp; dp--)
 652         if (!isspace ((unsigned char) *dp))
 653             break;
 654     *++dp = '\0';
 655
 656     if (debugsw)
 657         fprintf (stderr, "%s: %s\n", TYPE_FIELD, cp);
 658
 659     if (*cp == '(' && get_comment (ct->c_file, TYPE_FIELD, &cp,
 660                                    &ci->ci_comment) == NOTOK)
 661         return NOTOK;
 662
 663     for (dp = cp; istoken (*dp); dp++)
 664         continue;
 665     c = *dp;
 666     *dp = '\0';
 667     ci->ci_type = mh_xstrdup(cp);       /* store content type */
 668     *dp = c;
 669     cp = dp;
 670
 671     if (!*ci->ci_type) {
 672         inform("invalid %s: field in message %s (empty type)",
 673                 TYPE_FIELD, ct->c_file);
 674         return NOTOK;
 675     }
 676     to_lower(ci->ci_type);
 677
 678     while (isspace ((unsigned char) *cp))
 679         cp++;
 680
 681     if (*cp == '(' && get_comment (ct->c_file, TYPE_FIELD, &cp,
 682                                    &ci->ci_comment) == NOTOK)
 683         return NOTOK;
 684
 685     if (*cp != '/') {
 686         if (!magic)
 687             ci->ci_subtype = mh_xstrdup("");
 688         goto magic_skip;
 689     }
 690
 691     cp++;
 692     while (isspace ((unsigned char) *cp))
 693         cp++;
 694
 695     if (*cp == '(' && get_comment (ct->c_file, TYPE_FIELD, &cp,
 696                                    &ci->ci_comment) == NOTOK)
 697         return NOTOK;
 698
 699     for (dp = cp; istoken (*dp); dp++)
 700         continue;
 701     c = *dp;
 702     *dp = '\0';
 703     ci->ci_subtype = mh_xstrdup(cp);    /* store the content subtype */
 704     *dp = c;
 705     cp = dp;
 706
 707     if (!*ci->ci_subtype) {
 708         inform("invalid %s: field in message %s (empty subtype for \"%s\")",
 709             TYPE_FIELD, ct->c_file, ci->ci_type);
 710         return NOTOK;
 711     }
 712     to_lower(ci->ci_subtype);
 713
 714 magic_skip:
 715     while (isspace ((unsigned char) *cp))
 716         cp++;
 717
 718     if (*cp == '(' && get_comment (ct->c_file, TYPE_FIELD, &cp,
 719                                    &ci->ci_comment) == NOTOK)
 720         return NOTOK;
 721
 722     if ((status = parse_header_attrs (ct->c_file, TYPE_FIELD, &cp,
 723                                       &ci->ci_first_pm, &ci->ci_last_pm,
 724                                       &ci->ci_comment)) != OK) {
 725         return status == NOTOK ? NOTOK : OK;
 726     }
 727
 728     /*
 729      * Get any <Content-Id> given in buffer
 730      */
 731     if (magic && *cp == '<') {
 732         free(ct->c_id);
 733         ct->c_id = NULL;
 734         if (!(dp = strchr(ct->c_id = ++cp, '>'))) {
 735             inform("invalid ID in message %s", ct->c_file);
 736             return NOTOK;
 737         }
 738         c = *dp;
 739         *dp = '\0';
 740         if (*ct->c_id)
 741             ct->c_id = concat ("<", ct->c_id, ">\n", NULL);
 742         else
 743             ct->c_id = NULL;
 744         *dp++ = c;
 745         cp = dp;
 746
 747         while (isspace ((unsigned char) *cp))
 748             cp++;
 749     }
 750
 751     /*
 752      * Get any [Content-Description] given in buffer.
 753      */
 754     if (magic && *cp == '[') {
 755         ct->c_descr = ++cp;
 756         for (dp = cp + strlen (cp) - 1; dp >= cp; dp--)
 757             if (*dp == ']')
 758                 break;
 759         if (dp < cp) {
 760             inform("invalid description in message %s", ct->c_file);
 761             ct->c_descr = NULL;
 762             return NOTOK;
 763         }
 764
 765         c = *dp;
 766         *dp = '\0';
 767         if (*ct->c_descr)
 768             ct->c_descr = concat (ct->c_descr, "\n", NULL);
 769         else
 770             ct->c_descr = NULL;
 771         *dp++ = c;
 772         cp = dp;
 773
 774         while (isspace ((unsigned char) *cp))
 775             cp++;
 776     }
 777
 778     /*
 779      * Get any {Content-Disposition} given in buffer.
 780      */
 781     if (magic && *cp == '{') {
 782         ++cp;
 783         for (dp = cp + strlen (cp) - 1; dp >= cp; dp--)
 784             if (*dp == '}')
 785                 break;
 786         if (dp < cp) {
 787             inform("invalid disposition in message %s", ct->c_file);
 788             ct->c_dispo = NULL;
 789             return NOTOK;
 790         }
 791
 792         c = *dp;
 793         *dp = '\0';
 794
 795         if (get_dispo(cp, ct, 1) != OK)
 796             return NOTOK;
 797
 798         *dp++ = c;
 799         cp = dp;
 800
 801         while (isspace ((unsigned char) *cp))
 802             cp++;
 803     }
 804
 805     /*
 806      * Get any extension directives (right now just the content transfer
 807      * encoding, but maybe others) that we care about.
 808      */
 809
 810     if (magic && *cp == '*') {
 811         /*
 812          * See if it's a CTE we match on
 813          */
 814         struct k2v *kv;
 815
 816         dp = ++cp;
 817         while (*cp != '\0' && ! isspace((unsigned char) *cp))
 818             cp++;
 819
 820         if (dp == cp) {
 821             inform("invalid null transfer encoding specification");
 822             return NOTOK;
 823         }
 824
 825         if (*cp != '\0')
 826             *cp++ = '\0';
 827
 828         ct->c_reqencoding = CE_UNKNOWN;
 829
 830         for (kv = EncodingType; kv->kv_key; kv++) {
 831             if (strcasecmp(kv->kv_key, dp) == 0) {
 832                 ct->c_reqencoding = kv->kv_value;
 833                 break;
 834             }
 835         }
 836
 837         if (ct->c_reqencoding == CE_UNKNOWN) {
 838             inform("invalid CTE specification: \"%s\"", dp);
 839             return NOTOK;
 840         }
 841
 842         while (isspace ((unsigned char) *cp))
 843             cp++;
 844     }
 845
 846     /*
 847      * Check if anything is left over
 848      */
 849     if (*cp) {
 850         if (magic) {
 851             ci->ci_magic = mh_xstrdup(cp);
 852
 853             /* If there is a Content-Disposition header and it doesn't
 854                have a *filename=, extract it from the magic contents.
 855                The r1bindex call skips any leading directory
 856                components. */
 857             if (ct->c_dispo_type &&
 858                 !get_param(ct->c_dispo_first, "filename", '_', 1)) {
 859                 add_param(&ct->c_dispo_first, &ct->c_dispo_last, "filename",
 860                           r1bindex(ci->ci_magic, '/'), 0);
 861             }
 862         }
 863         else
 864             inform("extraneous information in message %s's %s: field\n"
 865                 "    (%s)", ct->c_file, TYPE_FIELD, cp);
 866     }
 867
 868     return OK;
 869 }
 870
 871
 872 /*
 873  * Parse out a Content-Disposition header.  A lot of this is cribbed from
 874  * get_ctinfo().
 875  */
 876 static int
 877 get_dispo (char *cp, CT ct, int buildflag)
 878 {
 879     char *dp, *dispoheader;
 880     char c;
 881     int status;
 882
 883     /*
 884      * Save the whole copy of the Content-Disposition header, unless we're
 885      * processing a mhbuild directive.  A NULL c_dispo will be a flag to
 886      * mhbuild that the disposition header needs to be generated at that
 887      * time.
 888      */
 889
 890     dispoheader = cp = mh_xstrdup(FENDNULL(cp));
 891
 892     while (isspace ((unsigned char) *cp))       /* trim leading spaces */
 893         cp++;
 894
 895     /* change newlines to spaces */
 896     for (dp = strchr(cp, '\n'); dp; dp = strchr(dp, '\n'))
 897         *dp++ = ' ';
 898
 899     /* trim trailing spaces */
 900     for (dp = cp + strlen (cp) - 1; dp >= cp; dp--)
 901         if (!isspace ((unsigned char) *dp))
 902             break;
 903     *++dp = '\0';
 904
 905     if (debugsw)
 906         fprintf (stderr, "%s: %s\n", DISPO_FIELD, cp);
 907
 908     if (*cp == '(' && get_comment (ct->c_file, DISPO_FIELD, &cp, NULL) ==
 909                                                         NOTOK) {
 910         free(dispoheader);
 911         return NOTOK;
 912     }
 913
 914     for (dp = cp; istoken (*dp); dp++)
 915         continue;
 916     c = *dp;
 917     *dp = '\0';
 918     ct->c_dispo_type = mh_xstrdup(cp);  /* store disposition type */
 919     *dp = c;
 920     cp = dp;
 921
 922     if (*cp == '(' && get_comment (ct->c_file, DISPO_FIELD, &cp, NULL) == NOTOK)
 923         return NOTOK;
 924
 925     if ((status = parse_header_attrs (ct->c_file, DISPO_FIELD, &cp,
 926                                       &ct->c_dispo_first, &ct->c_dispo_last,
 927                                       NULL)) != OK) {
 928         if (status == NOTOK) {
 929             free(dispoheader);
 930             return NOTOK;
 931         }
 932     } else if (*cp) {
 933         inform("extraneous information in message %s's %s: field\n    (%s)",
 934             ct->c_file, DISPO_FIELD, cp);
 935     }
 936
 937     if (buildflag)
 938         free(dispoheader);
 939     else
 940         ct->c_dispo = dispoheader;
 941
 942     return OK;
 943 }
 944
 945
 946 static int
 947 get_comment (const char *filename, const char *fieldname, char **ap,
 948              char **commentp)
 949 {
 950     int i;
 951     char *bp, *cp;
 952     char c, buffer[BUFSIZ], *dp;
 953
 954     cp = *ap;
 955     bp = buffer;
 956     cp++;
 957
 958     for (i = 0;;) {
 959         switch (c = *cp++) {
 960         case '\0':
 961 invalid:
 962         inform("invalid comment in message %s's %s: field",
 963                 filename, fieldname);
 964         return NOTOK;
 965
 966         case '\\':
 967             *bp++ = c;
 968             if ((c = *cp++) == '\0')
 969                 goto invalid;
 970             *bp++ = c;
 971             continue;
 972
 973         case '(':
 974             i++;
 975             /* FALLTHRU */
 976         default:
 977             *bp++ = c;
 978             continue;
 979
 980         case ')':
 981             if (--i < 0)
 982                 break;
 983             *bp++ = c;
 984             continue;
 985         }
 986         break;
 987     }
 988     *bp = '\0';
 989
 990     if (commentp) {
 991         if ((dp = *commentp)) {
 992             *commentp = concat (dp, " ", buffer, NULL);
 993             free (dp);
 994         } else {
 995             *commentp = mh_xstrdup(buffer);
 996         }
 997     }
 998
 999     while (isspace ((unsigned char) *cp))
1000         cp++;
1001
1002     *ap = cp;
1003     return OK;
1004 }
1005
1006
1007 /*
1008  * CONTENTS
1009  *
1010  * Handles content types audio, image, and video.
1011  * There's not much to do right here.
1012  */
1013
1014 static int
1015 InitGeneric (CT ct)
1016 {
1017     NMH_UNUSED (ct);
1018
1019     return OK;          /* not much to do here */
1020 }
1021
1022
1023 /*
1024  * TEXT
1025  */
1026
1027 static int
1028 InitText (CT ct)
1029 {
1030     char buffer[BUFSIZ];
1031     char *chset = NULL;
1032     char *cp;
1033     PM pm;
1034     struct text *t;
1035     CI ci = &ct->c_ctinfo;
1036
1037     /* check for missing subtype */
1038     if (!*ci->ci_subtype)
1039         ci->ci_subtype = add ("plain", ci->ci_subtype);
1040
1041     /* match subtype */
1042     ct->c_subtype = ct_str_subtype (CT_TEXT, ci->ci_subtype);
1043
1044     /* allocate text character set structure */
1045     NEW0(t);
1046     ct->c_ctparams = (void *) t;
1047
1048     /* scan for charset parameter */
1049     for (pm = ci->ci_first_pm; pm; pm = pm->pm_next)
1050         if (!strcasecmp (pm->pm_name, "charset"))
1051             break;
1052
1053     /* check if content specified a character set */
1054     if (pm) {
1055         chset = pm->pm_value;
1056         t->tx_charset = CHARSET_SPECIFIED;
1057     } else {
1058         t->tx_charset = CHARSET_UNSPECIFIED;
1059     }
1060
1061     /*
1062      * If we can not handle character set natively,
1063      * then check profile for string to modify the
1064      * terminal or display method.
1065      *
1066      * termproc is for mhshow, though mhlist -debug prints it, too.
1067      */
1068     if (chset != NULL && !check_charset (chset, strlen (chset))) {
1069         snprintf (buffer, sizeof(buffer), "%s-charset-%s", invo_name, chset);
1070         if ((cp = context_find (buffer)))
1071             ct->c_termproc = mh_xstrdup(cp);
1072     }
1073
1074     return OK;
1075 }
1076
1077
1078 /*
1079  * MULTIPART
1080  */
1081
1082 static int
1083 InitMultiPart (CT ct)
1084 {
1085     bool inout;
1086     long last, pos;
1087     char *cp, *dp;
1088     PM pm;
1089     char *bp;
1090     char *bufp = NULL;
1091     size_t buflen;
1092     ssize_t gotlen;
1093     struct multipart *m;
1094     struct part *part, **next;
1095     CI ci = &ct->c_ctinfo;
1096     CT p;
1097     FILE *fp;
1098
1099     /*
1100      * The encoding for multipart messages must be either
1101      * 7bit, 8bit, or binary (per RFC 2045).
1102      */
1103     if (! skip_mp_cte_check  &&  ct->c_encoding != CE_7BIT  &&
1104         ct->c_encoding != CE_8BIT  &&  ct->c_encoding != CE_BINARY) {
1105         /* Copy the Content-Transfer-Encoding header field body so we can
1106            remove any trailing whitespace and leading blanks from it. */
1107         char *cte = mh_xstrdup(ct->c_celine ? ct->c_celine : "(null)");
1108
1109         bp = cte + strlen (cte) - 1;
1110         while (bp >= cte && isspace ((unsigned char) *bp)) *bp-- = '\0';
1111         for (bp = cte; isblank((unsigned char)*bp); ++bp) continue;
1112
1113         inform("\"%s/%s\" type in message %s must be encoded in\n"
1114             "7bit, 8bit, or binary, per RFC 2045 (6.4).  "
1115             "mhfixmsg -fixcte can fix it, or\n"
1116             "manually edit the file and change the \"%s\"\n"
1117             "Content-Transfer-Encoding to one of those.  For now, continuing...",
1118             ci->ci_type, ci->ci_subtype, ct->c_file, bp);
1119         free (cte);
1120
1121         return NOTOK;
1122     }
1123
1124     /* match subtype */
1125     ct->c_subtype = ct_str_subtype (CT_MULTIPART, ci->ci_subtype);
1126
1127     /*
1128      * Check for "boundary" parameter, which is
1129      * required for multipart messages.
1130      */
1131     bp = 0;
1132     for (pm = ci->ci_first_pm; pm; pm = pm->pm_next) {
1133         if (!strcasecmp (pm->pm_name, "boundary")) {
1134             bp = pm->pm_value;
1135             break;
1136         }
1137     }
1138
1139     /* complain if boundary parameter is missing */
1140     if (!pm) {
1141         inform("a \"boundary\" parameter is mandatory for \"%s/%s\" type in message %s's %s: field",
1142             ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1143         return NOTOK;
1144     }
1145
1146     /* allocate primary structure for multipart info */
1147     NEW0(m);
1148     ct->c_ctparams = (void *) m;
1149
1150     /* check if boundary parameter contains only whitespace characters */
1151     for (cp = bp; isspace ((unsigned char) *cp); cp++)
1152         continue;
1153     if (!*cp) {
1154         inform("invalid \"boundary\" parameter for \"%s/%s\" type in message %s's %s: field",
1155                 ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1156         return NOTOK;
1157     }
1158
1159     /* remove trailing whitespace from boundary parameter */
1160     for (cp = bp, dp = cp + strlen (cp) - 1; dp > cp; dp--)
1161         if (!isspace ((unsigned char) *dp))
1162             break;
1163     *++dp = '\0';
1164
1165     /* record boundary separators */
1166     m->mp_start = concat (bp, "\n", NULL);
1167     m->mp_stop = concat (bp, "--\n", NULL);
1168
1169     if (!ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
1170         advise (ct->c_file, "unable to open for reading");
1171         return NOTOK;
1172     }
1173
1174     fseek (fp = ct->c_fp, pos = ct->c_begin, SEEK_SET);
1175     last = ct->c_end;
1176     next = &m->mp_parts;
1177     part = NULL;
1178     inout = true;
1179
1180     while ((gotlen = getline(&bufp, &buflen, fp)) != -1) {
1181         if (pos > last)
1182             break;
1183
1184         pos += gotlen;
1185         if (bufp[0] != '-' || bufp[1] != '-')
1186             continue;
1187         if (inout) {
1188             if (strcmp (bufp + 2, m->mp_start))
1189                 continue;
1190 next_part:
1191             NEW0(part);
1192             *next = part;
1193             next = &part->mp_next;
1194
1195             if (!(p = get_content (fp, ct->c_file,
1196                         ct->c_subtype == MULTI_DIGEST ? -1 : 0))) {
1197                 free(bufp);
1198                 ct->c_fp = NULL;
1199                 return NOTOK;
1200             }
1201             p->c_fp = NULL;
1202             part->mp_part = p;
1203             pos = p->c_begin;
1204             fseek (fp, pos, SEEK_SET);
1205             inout = false;
1206         } else {
1207             if (strcmp (bufp + 2, m->mp_start) == 0) {
1208                 inout = true;
1209 end_part:
1210                 p = part->mp_part;
1211                 p->c_end = ftell(fp) - (gotlen + 1);
1212                 if (p->c_end < p->c_begin)
1213                     p->c_begin = p->c_end;
1214                 if (inout)
1215                     goto next_part;
1216                 goto last_part;
1217             }
1218             if (strcmp (bufp + 2, m->mp_stop) == 0)
1219                 goto end_part;
1220         }
1221     }
1222
1223     if (! suppress_bogus_mp_content_warning) {
1224         inform("bogus multipart content in message %s", ct->c_file);
1225     }
1226     bogus_mp_content = true;
1227
1228     if (!inout && part) {
1229         p = part->mp_part;
1230         p->c_end = ct->c_end;
1231
1232         if (p->c_begin >= p->c_end) {
1233             for (next = &m->mp_parts; *next != part;
1234                      next = &((*next)->mp_next))
1235                 continue;
1236             *next = NULL;
1237             free_content (p);
1238             free(part);
1239         }
1240     }
1241
1242 last_part:
1243     /* reverse the order of the parts for multipart/alternative */
1244     if (ct->c_subtype == MULTI_ALTERNATE) {
1245         reverse_parts (ct);
1246         prefer_parts (ct);
1247     }
1248
1249     /*
1250      * label all subparts with part number, and
1251      * then initialize the content of the subpart.
1252      */
1253     {
1254         int partnum;
1255         char *pp;
1256         char partnam[BUFSIZ];
1257
1258         if (ct->c_partno) {
1259             snprintf (partnam, sizeof(partnam), "%s.", ct->c_partno);
1260             pp = partnam + strlen (partnam);
1261         } else {
1262             pp = partnam;
1263         }
1264
1265         for (part = m->mp_parts, partnum = 1; part;
1266                  part = part->mp_next, partnum++) {
1267             p = part->mp_part;
1268
1269             sprintf (pp, "%d", partnum);
1270             p->c_partno = mh_xstrdup(partnam);
1271
1272             /* initialize the content of the subparts */
1273             if (p->c_ctinitfnx && (*p->c_ctinitfnx) (p) == NOTOK) {
1274                 free(bufp);
1275                 fclose (ct->c_fp);
1276                 ct->c_fp = NULL;
1277                 return NOTOK;
1278             }
1279         }
1280     }
1281
1282     get_leftover_mp_content (ct, 1);
1283     get_leftover_mp_content (ct, 0);
1284
1285     free(bufp);
1286     fclose (ct->c_fp);
1287     ct->c_fp = NULL;
1288     return OK;
1289 }
1290
1291
1292 /*
1293  * reverse the order of the parts of a multipart/alternative,
1294  * presumably to put the "most favored" alternative first, for
1295  * ease of choosing/displaying it later on.  from a mail message on
1296  * nmh-workers, from kenh:
1297  *  "Stock" MH 6.8.5 did not have a reverse_parts() function, but I
1298  *  see code in mhn that did the same thing...  According to the RCS
1299  *  logs, that code was around from the initial checkin of mhn.c by
1300  *  John Romine in 1992, which is as far back as we have."
1301  */
1302 static void
1303 reverse_parts (CT ct)
1304 {
1305     struct multipart *m = (struct multipart *) ct->c_ctparams;
1306     struct part *part;
1307     struct part *next;
1308
1309     /* Reverse the order of its parts by walking the mp_parts list
1310        and pushing each node to the front. */
1311     for (part = m->mp_parts, m->mp_parts = NULL; part; part = next) {
1312         next = part->mp_next;
1313         part->mp_next = m->mp_parts;
1314         m->mp_parts = part;
1315     }
1316 }
1317
1318 static void
1319 move_preferred_part(CT ct, mime_type_subtype *pref)
1320 {
1321     struct multipart *m = (struct multipart *) ct->c_ctparams;
1322     struct part *part, *prev, *head, *nhead, *ntail;
1323     struct part h, n;
1324     CI ci;
1325
1326     /* move the matching part(s) to the head of the list:  walk the
1327      * list of parts, move matching parts to a new list (maintaining
1328      * their order), and finally, concatenate the old list onto the
1329      * new.
1330      */
1331
1332     head = &h;
1333     nhead = &n;
1334
1335     head->mp_next = m->mp_parts;
1336     nhead->mp_next = NULL;
1337     ntail = nhead;
1338
1339     prev = head;
1340     part = head->mp_next;
1341     while (part != NULL) {
1342         ci = &part->mp_part->c_ctinfo;
1343         if (!strcasecmp(ci->ci_type, pref->type) &&
1344             (!pref->subtype ||
1345                 !strcasecmp(ci->ci_subtype, pref->subtype))) {
1346             prev->mp_next = part->mp_next;
1347             part->mp_next = NULL;
1348             ntail->mp_next = part;
1349             ntail = part;
1350             part = prev->mp_next;
1351         } else {
1352             prev = part;
1353             part = prev->mp_next;
1354         }
1355     }
1356     ntail->mp_next = head->mp_next;
1357     m->mp_parts = nhead->mp_next;
1358 }
1359
1360 /*
1361  * move parts that match the user's preferences (-prefer) to the head
1362  * of the line.  process preferences in reverse so first one given
1363  * ends up first in line
1364  */
1365 static void
1366 prefer_parts(CT ct)
1367 {
1368     int i;
1369     for (i = 0; i < npreferred; i++)
1370         move_preferred_part(ct, mime_preference + i);
1371 }
1372
1373
1374
1375 /* parse_mime() arranges alternates in reverse (priority) order.  This
1376    function can be used to reverse them back.  This will put, for
1377    example, a text/plain part before a text/html part in a
1378    multipart/alternative part, for example, where it belongs. */
1379 void
1380 reverse_alternative_parts (CT ct)
1381 {
1382     if (ct->c_type == CT_MULTIPART) {
1383         struct multipart *m = (struct multipart *) ct->c_ctparams;
1384         struct part *part;
1385
1386         if (ct->c_subtype == MULTI_ALTERNATE) {
1387             reverse_parts (ct);
1388         }
1389
1390         /* And call recursively on each part of a multipart. */
1391         for (part = m->mp_parts; part; part = part->mp_next) {
1392             reverse_alternative_parts (part->mp_part);
1393         }
1394     }
1395 }
1396
1397
1398 /*
1399  * MESSAGE
1400  */
1401
1402 static int
1403 InitMessage (CT ct)
1404 {
1405     CI ci = &ct->c_ctinfo;
1406
1407     if ((ct->c_encoding != CE_7BIT) && (ct->c_encoding != CE_8BIT)) {
1408         inform("\"%s/%s\" type in message %s should be encoded in "
1409             "7bit or 8bit, continuing...", ci->ci_type, ci->ci_subtype,
1410             ct->c_file);
1411         return NOTOK;
1412     }
1413
1414     /* check for missing subtype */
1415     if (!*ci->ci_subtype)
1416         ci->ci_subtype = add ("rfc822", ci->ci_subtype);
1417
1418     /* match subtype */
1419     ct->c_subtype = ct_str_subtype (CT_MESSAGE, ci->ci_subtype);
1420
1421     switch (ct->c_subtype) {
1422         case MESSAGE_RFC822:
1423             break;
1424
1425         case MESSAGE_PARTIAL:
1426             {
1427                 PM pm;
1428                 struct partial *p;
1429
1430                 NEW0(p);
1431                 ct->c_ctparams = (void *) p;
1432
1433                 /* scan for parameters "id", "number", and "total" */
1434                 for (pm = ci->ci_first_pm; pm; pm = pm->pm_next) {
1435                     if (!strcasecmp (pm->pm_name, "id")) {
1436                         p->pm_partid = mh_xstrdup(FENDNULL(pm->pm_value));
1437                         continue;
1438                     }
1439                     if (!strcasecmp (pm->pm_name, "number")) {
1440                         if (sscanf (pm->pm_value, "%d", &p->pm_partno) != 1
1441                                 || p->pm_partno < 1) {
1442 invalid_param:
1443                             inform("invalid %s parameter for \"%s/%s\" type in message %s's %s field",
1444                                 pm->pm_name, ci->ci_type, ci->ci_subtype,
1445                                 ct->c_file, TYPE_FIELD);
1446                             return NOTOK;
1447                         }
1448                         continue;
1449                     }
1450                     if (!strcasecmp (pm->pm_name, "total")) {
1451                         if (sscanf (pm->pm_value, "%d", &p->pm_maxno) != 1
1452                                 || p->pm_maxno < 1)
1453                             goto invalid_param;
1454                         continue;
1455                     }
1456                 }
1457
1458                 if (!p->pm_partid
1459                         || !p->pm_partno
1460                         || (p->pm_maxno && p->pm_partno > p->pm_maxno)) {
1461                     inform("invalid parameters for \"%s/%s\" type in message %s's %s field",
1462                         ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1463                     return NOTOK;
1464                 }
1465             }
1466             break;
1467
1468         case MESSAGE_EXTERNAL:
1469             {
1470                 int exresult;
1471                 struct exbody *e;
1472                 CT p;
1473                 FILE *fp;
1474
1475                 NEW0(e);
1476                 ct->c_ctparams = (void *) e;
1477
1478                 if (!ct->c_fp
1479                         && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
1480                     advise (ct->c_file, "unable to open for reading");
1481                     return NOTOK;
1482                 }
1483
1484                 fseek (fp = ct->c_fp, ct->c_begin, SEEK_SET);
1485
1486                 if (!(p = get_content (fp, ct->c_file, 0))) {
1487                     ct->c_fp = NULL;
1488                     return NOTOK;
1489                 }
1490
1491                 e->eb_parent = ct;
1492                 e->eb_content = p;
1493                 p->c_ctexbody = e;
1494                 p->c_ceopenfnx = NULL;
1495                 if ((exresult = params_external (ct, 0)) != NOTOK
1496                         && p->c_ceopenfnx == openMail) {
1497                     int cc, size;
1498                     char *bp;
1499
1500                     if ((size = ct->c_end - p->c_begin) <= 0) {
1501                         if (!e->eb_subject)
1502                             content_error (NULL, ct,
1503                                            "empty body for access-type=mail-server");
1504                         goto no_body;
1505                     }
1506
1507                     e->eb_body = bp = mh_xmalloc ((unsigned) size);
1508                     fseek (p->c_fp, p->c_begin, SEEK_SET);
1509                     while (size > 0)
1510                         switch (cc = fread (bp, sizeof(*bp), size, p->c_fp)) {
1511                             case NOTOK:
1512                                 adios ("failed", "fread");
1513
1514                             case OK:
1515                                 die("unexpected EOF from fread");
1516
1517                             default:
1518                                 bp += cc, size -= cc;
1519                                 break;
1520                         }
1521                     *bp = 0;
1522                 }
1523 no_body:
1524                 p->c_fp = NULL;
1525                 p->c_end = p->c_begin;
1526
1527                 fclose (ct->c_fp);
1528                 ct->c_fp = NULL;
1529
1530                 if (exresult == NOTOK)
1531                     return NOTOK;
1532                 if (e->eb_flags == NOTOK)
1533                     return OK;
1534
1535                 switch (p->c_type) {
1536                     case CT_MULTIPART:
1537                         break;
1538
1539                     case CT_MESSAGE:
1540                         if (p->c_subtype != MESSAGE_RFC822)
1541                             break;
1542                         /* FALLTHRU */
1543                     default:
1544                         e->eb_partno = ct->c_partno;
1545                         if (p->c_ctinitfnx)
1546                             (*p->c_ctinitfnx) (p);
1547                         break;
1548                 }
1549             }
1550             break;
1551
1552         default:
1553             break;
1554     }
1555
1556     return OK;
1557 }
1558
1559
1560 int
1561 params_external (CT ct, int composing)
1562 {
1563     PM pm;
1564     struct exbody *e = (struct exbody *) ct->c_ctparams;
1565     CI ci = &ct->c_ctinfo;
1566
1567     ct->c_ceopenfnx = NULL;
1568     for (pm = ci->ci_first_pm; pm; pm = pm->pm_next) {
1569         if (!strcasecmp (pm->pm_name, "access-type")) {
1570             struct str2init *s2i;
1571             CT p = e->eb_content;
1572
1573             for (s2i = str2methods; s2i->si_key; s2i++)
1574                 if (!strcasecmp (pm->pm_value, s2i->si_key))
1575                     break;
1576             if (!s2i->si_key) {
1577                 e->eb_access = pm->pm_value;
1578                 e->eb_flags = NOTOK;
1579                 p->c_encoding = CE_EXTERNAL;
1580                 continue;
1581             }
1582             e->eb_access = s2i->si_key;
1583             e->eb_flags = s2i->si_val;
1584             p->c_encoding = CE_EXTERNAL;
1585
1586             /* Call the Init function for this external type */
1587             if ((*s2i->si_init)(p) == NOTOK)
1588                 return NOTOK;
1589             continue;
1590         }
1591         if (!strcasecmp (pm->pm_name, "name")) {
1592             e->eb_name = pm->pm_value;
1593             continue;
1594         }
1595         if (!strcasecmp (pm->pm_name, "permission")) {
1596             e->eb_permission = pm->pm_value;
1597             continue;
1598         }
1599         if (!strcasecmp (pm->pm_name, "site")) {
1600             e->eb_site = pm->pm_value;
1601             continue;
1602         }
1603         if (!strcasecmp (pm->pm_name, "directory")) {
1604             e->eb_dir = pm->pm_value;
1605             continue;
1606         }
1607         if (!strcasecmp (pm->pm_name, "mode")) {
1608             e->eb_mode = pm->pm_value;
1609             continue;
1610         }
1611         if (!strcasecmp (pm->pm_name, "size")) {
1612             sscanf (pm->pm_value, "%lu", &e->eb_size);
1613             continue;
1614         }
1615         if (!strcasecmp (pm->pm_name, "server")) {
1616             e->eb_server = pm->pm_value;
1617             continue;
1618         }
1619         if (!strcasecmp (pm->pm_name, "subject")) {
1620             e->eb_subject = pm->pm_value;
1621             continue;
1622         }
1623         if (!strcasecmp (pm->pm_name, "url")) {
1624             /*
1625              * According to RFC 2017, we have to remove all whitespace from
1626              * the URL
1627              */
1628
1629             char *u, *p = pm->pm_value;
1630             e->eb_url = u = mh_xmalloc(strlen(pm->pm_value) + 1);
1631
1632             for (; *p != '\0'; p++) {
1633                 if (! isspace((unsigned char) *p))
1634                     *u++ = *p;
1635             }
1636
1637             *u = '\0';
1638             continue;
1639         }
1640         if (composing && !strcasecmp (pm->pm_name, "body")) {
1641             e->eb_body = getcpy (pm->pm_value);
1642             continue;
1643         }
1644     }
1645
1646     if (!e->eb_access) {
1647         inform("invalid parameters for \"%s/%s\" type in message %s's %s field",
1648             ci->ci_type, ci->ci_subtype, ct->c_file, TYPE_FIELD);
1649         return NOTOK;
1650     }
1651
1652     return OK;
1653 }
1654
1655
1656 /*
1657  * APPLICATION
1658  */
1659
1660 static int
1661 InitApplication (CT ct)
1662 {
1663     CI ci = &ct->c_ctinfo;
1664
1665     /* match subtype */
1666     ct->c_subtype = ct_str_subtype (CT_APPLICATION, ci->ci_subtype);
1667
1668     return OK;
1669 }
1670
1671
1672 /*
1673  * TRANSFER ENCODINGS
1674  */
1675
1676 static int
1677 init_encoding (CT ct, OpenCEFunc openfnx)
1678 {
1679     ct->c_ceopenfnx  = openfnx;
1680     ct->c_ceclosefnx = close_encoding;
1681     ct->c_cesizefnx  = size_encoding;
1682
1683     return OK;
1684 }
1685
1686
1687 void
1688 close_encoding (CT ct)
1689 {
1690     CE ce = &ct->c_cefile;
1691
1692     if (ce->ce_fp) {
1693         fclose (ce->ce_fp);
1694         ce->ce_fp = NULL;
1695     }
1696 }
1697
1698
1699 static unsigned long
1700 size_encoding (CT ct)
1701 {
1702     int fd;
1703     unsigned long size;
1704     char *file;
1705     CE ce = &ct->c_cefile;
1706     struct stat st;
1707
1708     if (ce->ce_fp && fstat (fileno (ce->ce_fp), &st) != NOTOK)
1709         return (long) st.st_size;
1710
1711     if (ce->ce_file) {
1712         if (stat (ce->ce_file, &st) != NOTOK)
1713             return (long) st.st_size;
1714         return 0L;
1715     }
1716
1717     if (ct->c_encoding == CE_EXTERNAL)
1718         return ct->c_end - ct->c_begin;
1719
1720     file = NULL;
1721     if ((fd = (*ct->c_ceopenfnx) (ct, &file)) == NOTOK)
1722         return ct->c_end - ct->c_begin;
1723
1724     if (fstat (fd, &st) != NOTOK)
1725         size = (long) st.st_size;
1726     else
1727         size = 0L;
1728
1729     (*ct->c_ceclosefnx) (ct);
1730     return size;
1731 }
1732
1733
1734 /*
1735  * BASE64
1736  */
1737
1738 static int
1739 InitBase64 (CT ct)
1740 {
1741     return init_encoding (ct, openBase64);
1742 }
1743
1744
1745 static int
1746 openBase64 (CT ct, char **file)
1747 {
1748     ssize_t cc, len;
1749     int fd;
1750     bool own_ct_fp = false;
1751     char *cp, *buffer = NULL;
1752     /* sbeck -- handle suffixes */
1753     CI ci;
1754     CE ce = &ct->c_cefile;
1755     unsigned char *decoded;
1756     size_t decoded_len;
1757     unsigned char digest[16];
1758
1759     if (ce->ce_fp) {
1760         fseek (ce->ce_fp, 0L, SEEK_SET);
1761         goto ready_to_go;
1762     }
1763
1764     if (ce->ce_file) {
1765         if ((ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
1766             content_error (ce->ce_file, ct, "unable to fopen for reading");
1767             return NOTOK;
1768         }
1769         goto ready_to_go;
1770     }
1771
1772     if (*file == NULL) {
1773         ce->ce_unlink = 1;
1774     } else {
1775         ce->ce_file = mh_xstrdup(*file);
1776         ce->ce_unlink = 0;
1777     }
1778
1779     /* sbeck@cise.ufl.edu -- handle suffixes */
1780     ci = &ct->c_ctinfo;
1781     if ((cp = context_find_by_type ("suffix", ci->ci_type, ci->ci_subtype))) {
1782         if (ce->ce_unlink) {
1783             /* Create temporary file with filename extension. */
1784             if ((ce->ce_file = m_mktemps(invo_name, cp, NULL, NULL)) == NULL) {
1785                 die("unable to create temporary file in %s",
1786                       get_temp_dir());
1787             }
1788         } else {
1789             ce->ce_file = add (cp, ce->ce_file);
1790         }
1791     } else if (*file == NULL) {
1792         char *tempfile;
1793         if ((tempfile = m_mktemp2(NULL, invo_name, NULL, NULL)) == NULL) {
1794             die("unable to create temporary file in %s",
1795                   get_temp_dir());
1796         }
1797         ce->ce_file = mh_xstrdup(tempfile);
1798     }
1799
1800     if ((ce->ce_fp = fopen (ce->ce_file, "w+")) == NULL) {
1801         content_error (ce->ce_file, ct, "unable to fopen for reading/writing");
1802         return NOTOK;
1803     }
1804
1805     if ((len = ct->c_end - ct->c_begin) < 0)
1806         die("internal error(1)");
1807
1808     buffer = mh_xmalloc (len + 1);
1809
1810     if (! ct->c_fp) {
1811         if ((ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
1812             content_error (ct->c_file, ct, "unable to open for reading");
1813             return NOTOK;
1814         }
1815         own_ct_fp = true;
1816     }
1817
1818     lseek (fd = fileno (ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
1819     cp = buffer;
1820     while (len > 0) {
1821         switch (cc = read (fd, cp, len)) {
1822         case NOTOK:
1823             content_error (ct->c_file, ct, "error reading from");
1824             goto clean_up;
1825
1826         case OK:
1827             content_error (NULL, ct, "premature eof");
1828             goto clean_up;
1829
1830         default:
1831             if (cc > len)
1832                 cc = len;
1833             len -= cc;
1834             cp += cc;
1835         }
1836     }
1837
1838     /* decodeBase64() requires null-terminated input. */
1839     *cp = '\0';
1840
1841     if (decodeBase64 (buffer, &decoded, &decoded_len, ct->c_type == CT_TEXT,
1842                       ct->c_digested ? digest : NULL) != OK)
1843         goto clean_up;
1844
1845     {
1846         size_t i;
1847         unsigned char *decoded_p = decoded;
1848         for (i = 0; i < decoded_len; ++i) {
1849             putc (*decoded_p++, ce->ce_fp);
1850         }
1851         free(decoded);
1852         if (ferror (ce->ce_fp)) {
1853             content_error (ce->ce_file, ct, "error writing to");
1854             goto clean_up;
1855         }
1856
1857         if (ct->c_digested) {
1858             if (memcmp(digest, ct->c_digest,
1859                        sizeof digest)) {
1860                 content_error (NULL, ct,
1861                                "content integrity suspect (digest mismatch) -- continuing");
1862             } else {
1863                 if (debugsw) {
1864                     fprintf (stderr, "content integrity confirmed\n");
1865                 }
1866             }
1867         }
1868     }
1869
1870     fseek (ct->c_fp, 0L, SEEK_SET);
1871
1872     if (fflush (ce->ce_fp)) {
1873         content_error (ce->ce_file, ct, "error writing to");
1874         goto clean_up;
1875     }
1876
1877     fseek (ce->ce_fp, 0L, SEEK_SET);
1878
1879 ready_to_go:
1880     *file = ce->ce_file;
1881     if (own_ct_fp) {
1882       fclose (ct->c_fp);
1883       ct->c_fp = NULL;
1884     }
1885     free (buffer);
1886     return fileno (ce->ce_fp);
1887
1888 clean_up:
1889     if (own_ct_fp) {
1890       fclose (ct->c_fp);
1891       ct->c_fp = NULL;
1892     }
1893     free_encoding (ct, 0);
1894     free (buffer);
1895     return NOTOK;
1896 }
1897
1898
1899 /*
1900  * QUOTED PRINTABLE
1901  */
1902
1903 static char hex2nib[0x80] = {
1904     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1905     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1906     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1907     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1908     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1909     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1910     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1911     0x08, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1912     0x00, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00,
1913     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1914     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1915     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1916     0x00, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00,
1917     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1918     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1919     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1920 };
1921
1922
1923 static int
1924 InitQuoted (CT ct)
1925 {
1926     return init_encoding (ct, openQuoted);
1927 }
1928
1929
1930 static int
1931 openQuoted (CT ct, char **file)
1932 {
1933     int cc, digested, len, quoted;
1934     bool own_ct_fp = false;
1935     char *cp, *ep;
1936     char *bufp = NULL;
1937     size_t buflen;
1938     ssize_t gotlen;
1939     unsigned char mask;
1940     CE ce = &ct->c_cefile;
1941     /* sbeck -- handle suffixes */
1942     CI ci;
1943     MD5_CTX mdContext;
1944
1945     if (ce->ce_fp) {
1946         fseek (ce->ce_fp, 0L, SEEK_SET);
1947         goto ready_to_go;
1948     }
1949
1950     if (ce->ce_file) {
1951         if ((ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
1952             content_error (ce->ce_file, ct, "unable to fopen for reading");
1953             return NOTOK;
1954         }
1955         goto ready_to_go;
1956     }
1957
1958     if (*file == NULL) {
1959         ce->ce_unlink = 1;
1960     } else {
1961         ce->ce_file = mh_xstrdup(*file);
1962         ce->ce_unlink = 0;
1963     }
1964
1965     /* sbeck@cise.ufl.edu -- handle suffixes */
1966     ci = &ct->c_ctinfo;
1967     if ((cp = context_find_by_type ("suffix", ci->ci_type, ci->ci_subtype))) {
1968         if (ce->ce_unlink) {
1969             /* Create temporary file with filename extension. */
1970             if ((ce->ce_file = m_mktemps(invo_name, cp, NULL, NULL)) == NULL) {
1971                 die("unable to create temporary file in %s",
1972                       get_temp_dir());
1973             }
1974         } else {
1975             ce->ce_file = add (cp, ce->ce_file);
1976         }
1977     } else if (*file == NULL) {
1978         char *tempfile;
1979         if ((tempfile = m_mktemp2(NULL, invo_name, NULL, NULL)) == NULL) {
1980             die("unable to create temporary file in %s",
1981                   get_temp_dir());
1982         }
1983         ce->ce_file = mh_xstrdup(tempfile);
1984     }
1985
1986     if ((ce->ce_fp = fopen (ce->ce_file, "w+")) == NULL) {
1987         content_error (ce->ce_file, ct, "unable to fopen for reading/writing");
1988         return NOTOK;
1989     }
1990
1991     if ((len = ct->c_end - ct->c_begin) < 0)
1992         die("internal error(2)");
1993
1994     if (! ct->c_fp) {
1995         if ((ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
1996             content_error (ct->c_file, ct, "unable to open for reading");
1997             return NOTOK;
1998         }
1999         own_ct_fp = true;
2000     }
2001
2002     if ((digested = ct->c_digested))
2003         MD5Init (&mdContext);
2004
2005     quoted = 0;
2006
2007     fseek (ct->c_fp, ct->c_begin, SEEK_SET);
2008     while (len > 0) {
2009         if ((gotlen = getline(&bufp, &buflen, ct->c_fp)) == -1) {
2010             content_error (NULL, ct, "premature eof");
2011             goto clean_up;
2012         }
2013
2014         if ((cc = gotlen) > len)
2015             cc = len;
2016         len -= cc;
2017
2018         for (ep = (cp = bufp) + cc - 1; cp <= ep; ep--)
2019             if (!isspace ((unsigned char) *ep))
2020                 break;
2021         *++ep = '\n';
2022         ep++;
2023
2024         for (; cp < ep; cp++) {
2025             if (quoted > 0) {
2026                 /* in an escape sequence */
2027                 if (quoted == 1) {
2028                     /* at byte 1 of an escape sequence */
2029                     mask = hex2nib[((unsigned char) *cp) & 0x7f];
2030                     /* next is byte 2 */
2031                     quoted = 2;
2032                 } else {
2033                     /* at byte 2 of an escape sequence */
2034                     mask <<= 4;
2035                     mask |= hex2nib[((unsigned char) *cp) & 0x7f];
2036                     putc (mask, ce->ce_fp);
2037                     if (digested)
2038                         MD5Update (&mdContext, &mask, 1);
2039                     if (ferror (ce->ce_fp)) {
2040                         content_error (ce->ce_file, ct, "error writing to");
2041                         goto clean_up;
2042                     }
2043                     /* finished escape sequence; next may be literal or a new
2044                      * escape sequence */
2045                     quoted = 0;
2046                 }
2047                 /* on to next byte */
2048                 continue;
2049             }
2050
2051             /* not in an escape sequence */
2052             if (*cp == '=') {
2053                 /* starting an escape sequence, or invalid '='? */
2054                 if (cp + 1 < ep && cp[1] == '\n') {
2055                     /* "=\n" soft line break, eat the \n */
2056                     cp++;
2057                     continue;
2058                 }
2059                 if (cp + 1 >= ep || cp + 2 >= ep) {
2060                     /* We don't have 2 bytes left, so this is an invalid
2061                      * escape sequence; just show the raw bytes (below). */
2062                 } else if (isxdigit ((unsigned char) cp[1]) &&
2063                                         isxdigit ((unsigned char) cp[2])) {
2064                     /* Next 2 bytes are hex digits, making this a valid escape
2065                      * sequence; let's decode it (above). */
2066                     quoted = 1;
2067                     continue;
2068                 }
2069                 /* One or both of the next 2 is out of range, making this
2070                  * an invalid escape sequence; just show the raw bytes
2071                  * (below). */
2072             }
2073
2074             /* Just show the raw byte. */
2075             putc (*cp, ce->ce_fp);
2076             if (digested) {
2077                 if (*cp == '\n') {
2078                     MD5Update (&mdContext, (unsigned char *) "\r\n",2);
2079                 } else {
2080                     MD5Update (&mdContext, (unsigned char *) cp, 1);
2081                 }
2082             }
2083             if (ferror (ce->ce_fp)) {
2084                 content_error (ce->ce_file, ct, "error writing to");
2085                 goto clean_up;
2086             }
2087         }
2088     }
2089     if (quoted) {
2090         content_error (NULL, ct,
2091                        "invalid QUOTED-PRINTABLE encoding -- end-of-content while still quoting");
2092         goto clean_up;
2093     }
2094
2095     fseek (ct->c_fp, 0L, SEEK_SET);
2096
2097     if (fflush (ce->ce_fp)) {
2098         content_error (ce->ce_file, ct, "error writing to");
2099         goto clean_up;
2100     }
2101
2102     if (digested) {
2103         unsigned char digest[16];
2104
2105         MD5Final (digest, &mdContext);
2106         if (memcmp(digest, ct->c_digest,
2107                    sizeof digest))
2108             content_error (NULL, ct,
2109                            "content integrity suspect (digest mismatch) -- continuing");
2110         else if (debugsw)
2111             fprintf (stderr, "content integrity confirmed\n");
2112     }
2113
2114     fseek (ce->ce_fp, 0L, SEEK_SET);
2115
2116 ready_to_go:
2117     *file = ce->ce_file;
2118     if (own_ct_fp) {
2119       fclose (ct->c_fp);
2120       ct->c_fp = NULL;
2121     }
2122     free (bufp);
2123     return fileno (ce->ce_fp);
2124
2125 clean_up:
2126     free_encoding (ct, 0);
2127     if (own_ct_fp) {
2128       fclose (ct->c_fp);
2129       ct->c_fp = NULL;
2130     }
2131     free (bufp);
2132     return NOTOK;
2133 }
2134
2135
2136 /*
2137  * 7BIT
2138  */
2139
2140 static int
2141 Init7Bit (CT ct)
2142 {
2143     if (init_encoding (ct, open7Bit) == NOTOK)
2144         return NOTOK;
2145
2146     ct->c_cesizefnx = NULL;     /* no need to decode for real size */
2147     return OK;
2148 }
2149
2150
2151 int
2152 open7Bit (CT ct, char **file)
2153 {
2154     int cc, fd, len;
2155     bool own_ct_fp = false;
2156     char buffer[BUFSIZ];
2157     /* sbeck -- handle suffixes */
2158     char *cp;
2159     CI ci;
2160     CE ce = &ct->c_cefile;
2161
2162     if (ce->ce_fp) {
2163         fseek (ce->ce_fp, 0L, SEEK_SET);
2164         goto ready_to_go;
2165     }
2166
2167     if (ce->ce_file) {
2168         if ((ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2169             content_error (ce->ce_file, ct, "unable to fopen for reading");
2170             return NOTOK;
2171         }
2172         goto ready_to_go;
2173     }
2174
2175     if (*file == NULL) {
2176         ce->ce_unlink = 1;
2177     } else {
2178         ce->ce_file = mh_xstrdup(*file);
2179         ce->ce_unlink = 0;
2180     }
2181
2182     /* sbeck@cise.ufl.edu -- handle suffixes */
2183     ci = &ct->c_ctinfo;
2184     if ((cp = context_find_by_type ("suffix", ci->ci_type, ci->ci_subtype))) {
2185         if (ce->ce_unlink) {
2186             /* Create temporary file with filename extension. */
2187             if ((ce->ce_file = m_mktemps(invo_name, cp, NULL, NULL)) == NULL) {
2188                 die("unable to create temporary file in %s",
2189                       get_temp_dir());
2190             }
2191         } else {
2192             ce->ce_file = add (cp, ce->ce_file);
2193         }
2194     } else if (*file == NULL) {
2195         char *tempfile;
2196         if ((tempfile = m_mktemp2(NULL, invo_name, NULL, NULL)) == NULL) {
2197             die("unable to create temporary file in %s",
2198                   get_temp_dir());
2199         }
2200         ce->ce_file = mh_xstrdup(tempfile);
2201     }
2202
2203     if ((ce->ce_fp = fopen (ce->ce_file, "w+")) == NULL) {
2204         content_error (ce->ce_file, ct, "unable to fopen for reading/writing");
2205         return NOTOK;
2206     }
2207
2208     if (ct->c_type == CT_MULTIPART) {
2209         CI ci = &ct->c_ctinfo;
2210         char *buffer;
2211
2212         len = 0;
2213         fprintf (ce->ce_fp, "%s: %s/%s", TYPE_FIELD, ci->ci_type, ci->ci_subtype);
2214         len += LEN(TYPE_FIELD) + 2 + strlen (ci->ci_type)
2215             + 1 + strlen (ci->ci_subtype);
2216         buffer = output_params(len, ci->ci_first_pm, &len, 0);
2217
2218         if (buffer) {
2219             fputs (buffer, ce->ce_fp);
2220             free(buffer);
2221         }
2222
2223         if (ci->ci_comment) {
2224             if (len + 1 + (cc = 2 + strlen (ci->ci_comment)) >= CPERLIN) {
2225                 fputs ("\n\t", ce->ce_fp);
2226                 len = 8;
2227             }
2228             else {
2229                 putc (' ', ce->ce_fp);
2230                 len++;
2231             }
2232             fprintf (ce->ce_fp, "(%s)", ci->ci_comment);
2233             len += cc;
2234         }
2235         fprintf (ce->ce_fp, "\n");
2236         if (ct->c_id)
2237             fprintf (ce->ce_fp, "%s:%s", ID_FIELD, ct->c_id);
2238         if (ct->c_descr)
2239             fprintf (ce->ce_fp, "%s:%s", DESCR_FIELD, ct->c_descr);
2240         if (ct->c_dispo)
2241             fprintf (ce->ce_fp, "%s:%s", DISPO_FIELD, ct->c_dispo);
2242         fprintf (ce->ce_fp, "\n");
2243     }
2244
2245     if ((len = ct->c_end - ct->c_begin) < 0)
2246         die("internal error(3)");
2247
2248     if (! ct->c_fp) {
2249         if ((ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
2250             content_error (ct->c_file, ct, "unable to open for reading");
2251             return NOTOK;
2252         }
2253         own_ct_fp = true;
2254     }
2255
2256     lseek (fd = fileno (ct->c_fp), (off_t) ct->c_begin, SEEK_SET);
2257     while (len > 0)
2258         switch (cc = read (fd, buffer, sizeof(buffer) - 1)) {
2259         case NOTOK:
2260             content_error (ct->c_file, ct, "error reading from");
2261             goto clean_up;
2262
2263         case OK:
2264             content_error (NULL, ct, "premature eof");
2265             goto clean_up;
2266
2267         default:
2268             if (cc > len)
2269                 cc = len;
2270             len -= cc;
2271
2272             if ((int) fwrite (buffer, sizeof(*buffer), cc, ce->ce_fp) < cc) {
2273                 advise ("open7Bit", "fwrite");
2274             }
2275             if (ferror (ce->ce_fp)) {
2276                 content_error (ce->ce_file, ct, "error writing to");
2277                 goto clean_up;
2278             }
2279         }
2280
2281     fseek (ct->c_fp, 0L, SEEK_SET);
2282
2283     if (fflush (ce->ce_fp)) {
2284         content_error (ce->ce_file, ct, "error writing to");
2285         goto clean_up;
2286     }
2287
2288     fseek (ce->ce_fp, 0L, SEEK_SET);
2289
2290 ready_to_go:
2291     *file = ce->ce_file;
2292     if (own_ct_fp) {
2293       fclose (ct->c_fp);
2294       ct->c_fp = NULL;
2295     }
2296     return fileno (ce->ce_fp);
2297
2298 clean_up:
2299     free_encoding (ct, 0);
2300     if (own_ct_fp) {
2301       fclose (ct->c_fp);
2302       ct->c_fp = NULL;
2303     }
2304     return NOTOK;
2305 }
2306
2307
2308 /*
2309  * External
2310  */
2311
2312 static int
2313 openExternal (CT ct, CT cb, CE ce, char **file, int *fd)
2314 {
2315     char cachefile[BUFSIZ];
2316
2317     if (ce->ce_fp) {
2318         fseek (ce->ce_fp, 0L, SEEK_SET);
2319         goto ready_already;
2320     }
2321
2322     if (ce->ce_file) {
2323         if ((ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2324             content_error (ce->ce_file, ct, "unable to fopen for reading");
2325             return NOTOK;
2326         }
2327         goto ready_already;
2328     }
2329
2330     if (find_cache(ct, rcachesw, NULL, cb->c_id,
2331                 cachefile, sizeof(cachefile)) != NOTOK) {
2332         if ((ce->ce_fp = fopen (cachefile, "r"))) {
2333             ce->ce_file = mh_xstrdup(cachefile);
2334             ce->ce_unlink = 0;
2335             goto ready_already;
2336         }
2337         admonish (cachefile, "unable to fopen for reading");
2338     }
2339
2340     *fd = ce->ce_fp ? fileno (ce->ce_fp) : -1;
2341     return OK;
2342
2343 ready_already:
2344     *file = ce->ce_file;
2345     *fd = fileno (ce->ce_fp);
2346     return DONE;
2347 }
2348
2349 /*
2350  * File
2351  */
2352
2353 static int
2354 InitFile (CT ct)
2355 {
2356     return init_encoding (ct, openFile);
2357 }
2358
2359
2360 static int
2361 openFile (CT ct, char **file)
2362 {
2363     int fd, cachetype;
2364     char cachefile[BUFSIZ];
2365     struct exbody *e = ct->c_ctexbody;
2366     CE ce = &ct->c_cefile;
2367
2368     switch (openExternal (e->eb_parent, e->eb_content, ce, file, &fd)) {
2369         case NOTOK:
2370             return NOTOK;
2371
2372         case OK:
2373             break;
2374
2375         case DONE:
2376             return fd;
2377     }
2378
2379     if (!e->eb_name) {
2380         content_error (NULL, ct, "missing name parameter");
2381         return NOTOK;
2382     }
2383
2384     ce->ce_file = mh_xstrdup(e->eb_name);
2385     ce->ce_unlink = 0;
2386
2387     if ((ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2388         content_error (ce->ce_file, ct, "unable to fopen for reading");
2389         return NOTOK;
2390     }
2391
2392     if ((!e->eb_permission || strcasecmp (e->eb_permission, "read-write"))
2393             && find_cache (NULL, wcachesw, &cachetype, e->eb_content->c_id,
2394                 cachefile, sizeof(cachefile)) != NOTOK) {
2395         int mask;
2396         FILE *fp;
2397
2398         mask = umask (cachetype ? ~m_gmprot () : 0222);
2399         if ((fp = fopen (cachefile, "w"))) {
2400             int cc;
2401             char buffer[BUFSIZ];
2402             FILE *gp = ce->ce_fp;
2403
2404             fseek (gp, 0L, SEEK_SET);
2405
2406             while ((cc = fread (buffer, sizeof(*buffer), sizeof(buffer), gp))
2407                        > 0)
2408                 if ((int) fwrite (buffer, sizeof(*buffer), cc, fp) < cc) {
2409                     advise ("openFile", "fwrite");
2410                 }
2411             fflush (fp);
2412
2413             if (ferror (gp)) {
2414                 admonish (ce->ce_file, "error reading");
2415                 (void) m_unlink (cachefile);
2416             } else if (ferror (fp)) {
2417                 admonish (cachefile, "error writing");
2418                 (void) m_unlink (cachefile);
2419             }
2420             fclose (fp);
2421         }
2422         umask (mask);
2423     }
2424
2425     fseek (ce->ce_fp, 0L, SEEK_SET);
2426     *file = ce->ce_file;
2427     return fileno (ce->ce_fp);
2428 }
2429
2430 /*
2431  * FTP
2432  */
2433
2434 static int
2435 InitFTP (CT ct)
2436 {
2437     return init_encoding (ct, openFTP);
2438 }
2439
2440
2441 static int
2442 openFTP (CT ct, char **file)
2443 {
2444     int cachetype;
2445     bool caching;
2446     int fd;
2447     int len, buflen;
2448     char *bp, *ftp, *user, *pass;
2449     char buffer[BUFSIZ], cachefile[BUFSIZ];
2450     struct exbody *e;
2451     CE ce = &ct->c_cefile;
2452     static char *username = NULL;
2453     static char *password = NULL;
2454
2455     e  = ct->c_ctexbody;
2456
2457     if ((ftp = context_find (nmhaccessftp)) && !*ftp)
2458         ftp = NULL;
2459
2460     if (!ftp)
2461         return NOTOK;
2462
2463     switch (openExternal (e->eb_parent, e->eb_content, ce, file, &fd)) {
2464         case NOTOK:
2465             return NOTOK;
2466
2467         case OK:
2468             break;
2469
2470         case DONE:
2471             return fd;
2472     }
2473
2474     if (!e->eb_name || !e->eb_site) {
2475         content_error (NULL, ct, "missing %s parameter",
2476                        e->eb_name ? "site": "name");
2477         return NOTOK;
2478     }
2479
2480     /* Get the buffer ready to go */
2481     bp = buffer;
2482     buflen = sizeof(buffer);
2483
2484     /*
2485      * Construct the query message for user
2486      */
2487     snprintf (bp, buflen, "Retrieve %s", e->eb_name);
2488     len = strlen (bp);
2489     bp += len;
2490     buflen -= len;
2491
2492     if (e->eb_partno) {
2493         snprintf (bp, buflen, " (content %s)", e->eb_partno);
2494         len = strlen (bp);
2495         bp += len;
2496         buflen -= len;
2497     }
2498
2499     snprintf (bp, buflen, "\n    using %sFTP from site %s",
2500                     e->eb_flags ? "anonymous " : "", e->eb_site);
2501     len = strlen (bp);
2502     bp += len;
2503     buflen -= len;
2504
2505     if (e->eb_size > 0) {
2506         snprintf (bp, buflen, " (%lu octets)", e->eb_size);
2507         len = strlen (bp);
2508         bp += len;
2509         buflen -= len;
2510     }
2511     snprintf (bp, buflen, "? ");
2512
2513     /*
2514      * Now, check the answer
2515      */
2516     if (!read_yes_or_no_if_tty (buffer))
2517         return NOTOK;
2518
2519     if (e->eb_flags) {
2520         user = "anonymous";
2521         snprintf (buffer, sizeof(buffer), "%s@%s", getusername (),
2522                   LocalName (1));
2523         pass = buffer;
2524     } else {
2525         ruserpass (e->eb_site, &username, &password, 0);
2526         user = username;
2527         pass = password;
2528     }
2529
2530     ce->ce_unlink = (*file == NULL);
2531     caching = false;
2532     cachefile[0] = '\0';
2533     if ((!e->eb_permission || strcasecmp (e->eb_permission, "read-write"))
2534             && find_cache (NULL, wcachesw, &cachetype, e->eb_content->c_id,
2535                 cachefile, sizeof(cachefile)) != NOTOK) {
2536         if (*file == NULL) {
2537             ce->ce_unlink = 0;
2538             caching = true;
2539         }
2540     }
2541
2542     if (*file)
2543         ce->ce_file = mh_xstrdup(*file);
2544     else if (caching)
2545         ce->ce_file = mh_xstrdup(cachefile);
2546     else {
2547         char *tempfile;
2548         if ((tempfile = m_mktemp2(NULL, invo_name, NULL, NULL)) == NULL) {
2549             die("unable to create temporary file in %s",
2550                   get_temp_dir());
2551         }
2552         ce->ce_file = mh_xstrdup(tempfile);
2553     }
2554
2555     if ((ce->ce_fp = fopen (ce->ce_file, "w+")) == NULL) {
2556         content_error (ce->ce_file, ct, "unable to fopen for reading/writing");
2557         return NOTOK;
2558     }
2559
2560     {
2561         int child_id, vecp;
2562         char *vec[9];
2563
2564         vecp = 0;
2565         vec[vecp++] = r1bindex (ftp, '/');
2566         vec[vecp++] = e->eb_site;
2567         vec[vecp++] = user;
2568         vec[vecp++] = pass;
2569         vec[vecp++] = e->eb_dir;
2570         vec[vecp++] = e->eb_name;
2571         vec[vecp++] = ce->ce_file,
2572         vec[vecp++] = e->eb_mode && !strcasecmp (e->eb_mode, "ascii")
2573                         ? "ascii" : "binary";
2574         vec[vecp] = NULL;
2575
2576         fflush (stdout);
2577
2578         child_id = fork();
2579         switch (child_id) {
2580             case NOTOK:
2581                 adios ("fork", "unable to");
2582                 /* NOTREACHED */
2583
2584             case OK:
2585                 close (fileno (ce->ce_fp));
2586                 execvp (ftp, vec);
2587                 fprintf (stderr, "unable to exec ");
2588                 perror (ftp);
2589                 _exit(1);
2590                 /* NOTREACHED */
2591
2592             default:
2593                 if (pidXwait (child_id, NULL)) {
2594                     username = password = NULL;
2595                     ce->ce_unlink = 1;
2596                     return NOTOK;
2597                 }
2598                 break;
2599         }
2600     }
2601
2602     if (cachefile[0]) {
2603         if (caching)
2604             chmod (cachefile, cachetype ? m_gmprot () : 0444);
2605         else {
2606             int mask;
2607             FILE *fp;
2608
2609             mask = umask (cachetype ? ~m_gmprot () : 0222);
2610             if ((fp = fopen (cachefile, "w"))) {
2611                 int cc;
2612                 FILE *gp = ce->ce_fp;
2613
2614                 fseek (gp, 0L, SEEK_SET);
2615
2616                 while ((cc= fread (buffer, sizeof(*buffer), sizeof(buffer), gp))
2617                            > 0)
2618                     if ((int) fwrite (buffer, sizeof(*buffer), cc, fp) < cc) {
2619                         advise ("openFTP", "fwrite");
2620                     }
2621                 fflush (fp);
2622
2623                 if (ferror (gp)) {
2624                     admonish (ce->ce_file, "error reading");
2625                     (void) m_unlink (cachefile);
2626                 } else if (ferror (fp)) {
2627                     admonish (cachefile, "error writing");
2628                     (void) m_unlink (cachefile);
2629                 }
2630                 fclose (fp);
2631             }
2632             umask (mask);
2633         }
2634     }
2635
2636     fseek (ce->ce_fp, 0L, SEEK_SET);
2637     *file = ce->ce_file;
2638     return fileno (ce->ce_fp);
2639 }
2640
2641
2642 /*
2643  * Mail
2644  */
2645
2646 static int
2647 InitMail (CT ct)
2648 {
2649     return init_encoding (ct, openMail);
2650 }
2651
2652
2653 static int
2654 openMail (CT ct, char **file)
2655 {
2656     int child_id, fd, vecp;
2657     int len, buflen;
2658     char *bp, buffer[BUFSIZ], *vec[7];
2659     struct exbody *e = ct->c_ctexbody;
2660     CE ce = &ct->c_cefile;
2661
2662     switch (openExternal (e->eb_parent, e->eb_content, ce, file, &fd)) {
2663         case NOTOK:
2664             return NOTOK;
2665
2666         case OK:
2667             break;
2668
2669         case DONE:
2670             return fd;
2671     }
2672
2673     if (!e->eb_server) {
2674         content_error (NULL, ct, "missing server parameter");
2675         return NOTOK;
2676     }
2677
2678     /* Get buffer ready to go */
2679     bp = buffer;
2680     buflen = sizeof(buffer);
2681
2682     /* Now, construct query message */
2683     snprintf (bp, buflen, "Retrieve content");
2684     len = strlen (bp);
2685     bp += len;
2686     buflen -= len;
2687
2688     if (e->eb_partno) {
2689         snprintf (bp, buflen, " %s", e->eb_partno);
2690         len = strlen (bp);
2691         bp += len;
2692         buflen -= len;
2693     }
2694
2695     snprintf (bp, buflen, " by asking %s\n\n%s\n? ",
2696                     e->eb_server,
2697                     e->eb_subject ? e->eb_subject : e->eb_body);
2698
2699     /* Now, check answer */
2700     if (!read_yes_or_no_if_tty (buffer))
2701         return NOTOK;
2702
2703     vecp = 0;
2704     vec[vecp++] = r1bindex (mailproc, '/');
2705     vec[vecp++] = e->eb_server;
2706     vec[vecp++] = "-subject";
2707     vec[vecp++] = e->eb_subject ? e->eb_subject : "mail-server request";
2708     vec[vecp++] = "-body";
2709     vec[vecp++] = e->eb_body;
2710     vec[vecp] = NULL;
2711
2712     child_id = fork();
2713     switch (child_id) {
2714         case NOTOK:
2715             advise ("fork", "unable to");
2716             return NOTOK;
2717
2718         case OK:
2719             execvp (mailproc, vec);
2720             fprintf (stderr, "unable to exec ");
2721             perror (mailproc);
2722             _exit(1);
2723             /* NOTREACHED */
2724
2725         default:
2726             if (pidXwait (child_id, NULL) == OK)
2727                 inform("request sent");
2728             break;
2729     }
2730
2731     if (*file == NULL) {
2732         char *tempfile;
2733         if ((tempfile = m_mktemp2(NULL, invo_name, NULL, NULL)) == NULL) {
2734             die("unable to create temporary file in %s",
2735                   get_temp_dir());
2736         }
2737         ce->ce_file = mh_xstrdup(tempfile);
2738         ce->ce_unlink = 1;
2739     } else {
2740         ce->ce_file = mh_xstrdup(*file);
2741         ce->ce_unlink = 0;
2742     }
2743
2744     if ((ce->ce_fp = fopen (ce->ce_file, "w+")) == NULL) {
2745         content_error (ce->ce_file, ct, "unable to fopen for reading/writing");
2746         return NOTOK;
2747     }
2748
2749     /* showproc is for mhshow and mhstore, though mhlist -debug
2750      * prints it, too. */
2751     free(ct->c_showproc);
2752     ct->c_showproc = mh_xstrdup("true");
2753
2754     fseek (ce->ce_fp, 0L, SEEK_SET);
2755     *file = ce->ce_file;
2756     return fileno (ce->ce_fp);
2757 }
2758
2759
2760 /*
2761  * URL
2762  */
2763
2764 static int
2765 InitURL (CT ct)
2766 {
2767     return init_encoding (ct, openURL);
2768 }
2769
2770
2771 static int
2772 openURL (CT ct, char **file)
2773 {
2774     struct exbody *e = ct->c_ctexbody;
2775     CE ce = &ct->c_cefile;
2776     char *urlprog, *program;
2777     char buffer[BUFSIZ], cachefile[BUFSIZ];
2778     int fd;
2779     bool caching;
2780     int cachetype;
2781     struct msgs_array args = { 0, 0, NULL};
2782     pid_t child_id;
2783
2784     if ((urlprog = context_find(nmhaccessurl)) && *urlprog == '\0')
2785         urlprog = NULL;
2786
2787     if (! urlprog) {
2788         content_error(NULL, ct, "No entry for nmh-access-url in profile");
2789         return NOTOK;
2790     }
2791
2792     switch (openExternal(e->eb_parent, e->eb_content, ce, file, &fd)) {
2793         case NOTOK:
2794             return NOTOK;
2795
2796         case OK:
2797             break;
2798
2799         case DONE:
2800             return fd;
2801     }
2802
2803     if (!e->eb_url) {
2804         content_error(NULL, ct, "missing url parameter");
2805         return NOTOK;
2806     }
2807
2808     ce->ce_unlink = (*file == NULL);
2809     caching = false;
2810     cachefile[0] = '\0';
2811
2812     if (find_cache(NULL, wcachesw, &cachetype, e->eb_content->c_id,
2813                    cachefile, sizeof(cachefile)) != NOTOK) {
2814         if (*file == NULL) {
2815             ce->ce_unlink = 0;
2816             caching = true;
2817         }
2818     }
2819
2820     if (*file)
2821         ce->ce_file = mh_xstrdup(*file);
2822     else if (caching)
2823         ce->ce_file = mh_xstrdup(cachefile);
2824     else {
2825         char *tempfile;
2826         if ((tempfile = m_mktemp2(NULL, invo_name, NULL, NULL)) == NULL) {
2827             die("unable to create temporary file in %s",
2828                   get_temp_dir());
2829         }
2830         ce->ce_file = mh_xstrdup(tempfile);
2831     }
2832
2833     if ((ce->ce_fp = fopen(ce->ce_file, "w+")) == NULL) {
2834         content_error(ce->ce_file, ct, "unable to fopen for read/writing");
2835         return NOTOK;
2836     }
2837
2838     switch (child_id = fork()) {
2839     case NOTOK:
2840         adios ("fork", "unable to");
2841         /* NOTREACHED */
2842
2843     case OK:
2844         argsplit_msgarg(&args, urlprog, &program);
2845         app_msgarg(&args, e->eb_url);
2846         app_msgarg(&args, NULL);
2847         dup2(fileno(ce->ce_fp), 1);
2848         close(fileno(ce->ce_fp));
2849         execvp(program, args.msgs);
2850         fprintf(stderr, "Unable to exec ");
2851         perror(program);
2852         _exit(1);
2853         /* NOTREACHED */
2854
2855     default:
2856         if (pidXwait(child_id, NULL)) {
2857             ce->ce_unlink = 1;
2858             return NOTOK;
2859         }
2860     }
2861
2862     if (cachefile[0]) {
2863         if (caching)
2864             chmod(cachefile, cachetype ? m_gmprot() : 0444);
2865         else {
2866             int mask;
2867             FILE *fp;
2868
2869             mask = umask (cachetype ? ~m_gmprot() : 0222);
2870             if ((fp = fopen(cachefile, "w"))) {
2871                 int cc;
2872                 FILE *gp = ce->ce_fp;
2873
2874                 fseeko(gp, 0, SEEK_SET);
2875
2876                 while ((cc = fread(buffer, sizeof(*buffer),
2877                                    sizeof(buffer), gp)) > 0)
2878                     if ((int) fwrite(buffer, sizeof(*buffer), cc, fp) < cc) {
2879                         advise ("openURL", "fwrite");
2880                     }
2881
2882                 fflush(fp);
2883
2884                 if (ferror(gp)) {
2885                     admonish(ce->ce_file, "error reading");
2886                     (void) m_unlink (cachefile);
2887                 }
2888             }
2889             umask(mask);
2890         }
2891     }
2892
2893     fseeko(ce->ce_fp, 0, SEEK_SET);
2894     *file = ce->ce_file;
2895     return fileno(ce->ce_fp);
2896 }
2897
2898
2899 /*
2900  * Stores MD5 digest (in cp, from Content-MD5 header) in ct->c_digest.  It
2901  * has to be base64 decoded.
2902  */
2903 static int
2904 readDigest (CT ct, char *cp)
2905 {
2906     unsigned char *digest;
2907
2908     size_t len;
2909     if (decodeBase64 (cp, &digest, &len, 0, NULL) == OK) {
2910         const size_t maxlen = sizeof ct->c_digest;
2911
2912         if (strlen ((char *) digest) <= maxlen) {
2913             memcpy (ct->c_digest, digest, maxlen);
2914
2915             if (debugsw) {
2916                 size_t i;
2917
2918                 fprintf (stderr, "MD5 digest=");
2919                 for (i = 0; i < maxlen; ++i) {
2920                     fprintf (stderr, "%02x", ct->c_digest[i] & 0xff);
2921                 }
2922                 fprintf (stderr, "\n");
2923             }
2924
2925             return OK;
2926         }
2927         if (debugsw) {
2928             fprintf (stderr, "invalid MD5 digest (got %d octets)\n",
2929                      (int) strlen ((char *) digest));
2930         }
2931
2932         return NOTOK;
2933     }
2934
2935     return NOTOK;
2936 }
2937
2938
2939 /* Multipart parts might have content before the first subpart and/or
2940    after the last subpart that hasn't been stored anywhere else, so do
2941    that. */
2942 int
2943 get_leftover_mp_content (CT ct, int before /* or after */)
2944 {
2945     struct multipart *m = (struct multipart *) ct->c_ctparams;
2946     char *boundary;
2947     bool found_boundary = false;
2948     int max = BUFSIZ;
2949     char *bufp = NULL;
2950     size_t buflen;
2951     ssize_t gotlen;
2952     int read = 0;
2953     char *content = NULL;
2954
2955     if (! m) return NOTOK;
2956
2957     if (before) {
2958         if (! m->mp_parts  ||  ! m->mp_parts->mp_part) return NOTOK;
2959
2960         /* Isolate the beginning of this part to the beginning of the
2961            first subpart and save any content between them. */
2962         fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
2963         max = m->mp_parts->mp_part->c_begin - ct->c_begin;
2964         boundary = concat ("--", m->mp_start, NULL);
2965     } else {
2966         struct part *last_subpart = NULL;
2967         struct part *subpart;
2968
2969         /* Go to the last subpart to get its end position. */
2970         for (subpart = m->mp_parts; subpart; subpart = subpart->mp_next) {
2971             last_subpart = subpart;
2972         }
2973
2974         if (last_subpart == NULL) return NOTOK;
2975
2976         /* Isolate the end of the last subpart to the end of this part
2977            and save any content between them. */
2978         fseeko (ct->c_fp, last_subpart->mp_part->c_end, SEEK_SET);
2979         max = ct->c_end - last_subpart->mp_part->c_end;
2980         boundary = concat ("--", m->mp_stop, NULL);
2981     }
2982
2983     /* Back up by 1 to pick up the newline. */
2984     while ((gotlen = getline(&bufp, &buflen, ct->c_fp)) != -1) {
2985         read += gotlen;
2986         /* Don't look beyond beginning of first subpart (before) or
2987            next part (after). */
2988         if (read > max) bufp[read-max] = '\0';
2989
2990         if (before) {
2991             if (! strcmp (bufp, boundary)) {
2992                 found_boundary = true;
2993             }
2994         } else {
2995             if (! found_boundary  &&  ! strcmp (bufp, boundary)) {
2996                 found_boundary = true;
2997                 continue;
2998             }
2999         }
3000
3001         if ((before && ! found_boundary)  ||  (! before && found_boundary)) {
3002             if (content) {
3003                 char *old_content = content;
3004                 content = concat (content, bufp, NULL);
3005                 free (old_content);
3006             } else {
3007                 content = before
3008                     ?  concat ("\n", bufp, NULL)
3009                     :  concat (bufp, NULL);
3010             }
3011         }
3012
3013         if (before) {
3014             if (found_boundary  ||  read > max) break;
3015         } else {
3016             if (read > max) break;
3017         }
3018     }
3019
3020     /* Skip the newline if that's all there is. */
3021     if (content) {
3022         char *cp;
3023
3024         /* Remove trailing newline, except at EOF. */
3025         if ((before || ! feof (ct->c_fp)) &&
3026             (cp = content + strlen (content)) > content  &&
3027             *--cp == '\n') {
3028             *cp = '\0';
3029         }
3030
3031         if (strlen (content) > 1) {
3032             if (before) {
3033                 m->mp_content_before = content;
3034             } else {
3035                 m->mp_content_after = content;
3036             }
3037         } else {
3038             free (content);
3039         }
3040     }
3041
3042     free (boundary);
3043     free (bufp);
3044
3045     return OK;
3046 }
3047
3048
3049 char *
3050 ct_type_str (int type)
3051 {
3052     switch (type) {
3053     case CT_APPLICATION:
3054         return "application";
3055     case CT_AUDIO:
3056         return "audio";
3057     case CT_IMAGE:
3058         return "image";
3059     case CT_MESSAGE:
3060         return "message";
3061     case CT_MULTIPART:
3062         return "multipart";
3063     case CT_TEXT:
3064         return "text";
3065     case CT_VIDEO:
3066         return "video";
3067     case CT_EXTENSION:
3068         return "extension";
3069     default:
3070         return "unknown_type";
3071     }
3072 }
3073
3074
3075 char *
3076 ct_subtype_str (int type, int subtype)
3077 {
3078     switch (type) {
3079     case CT_APPLICATION:
3080         switch (subtype) {
3081         case APPLICATION_OCTETS:
3082             return "octets";
3083         case APPLICATION_POSTSCRIPT:
3084             return "postscript";
3085         default:
3086             return "unknown_app_subtype";
3087         }
3088     case CT_MESSAGE:
3089         switch (subtype) {
3090         case MESSAGE_RFC822:
3091             return "rfc822";
3092         case MESSAGE_PARTIAL:
3093             return "partial";
3094         case MESSAGE_EXTERNAL:
3095             return "external";
3096         default:
3097             return "unknown_msg_subtype";
3098         }
3099     case CT_MULTIPART:
3100         switch (subtype) {
3101         case MULTI_MIXED:
3102             return "mixed";
3103         case MULTI_ALTERNATE:
3104             return "alternative";
3105         case MULTI_DIGEST:
3106             return "digest";
3107         case MULTI_PARALLEL:
3108             return "parallel";
3109         case MULTI_RELATED:
3110             return "related";
3111         default:
3112             return "unknown_multipart_subtype";
3113         }
3114     case CT_TEXT:
3115         switch (subtype) {
3116         case TEXT_PLAIN:
3117             return "plain";
3118         case TEXT_RICHTEXT:
3119             return "richtext";
3120         case TEXT_ENRICHED:
3121             return "enriched";
3122         default:
3123             return "unknown_text_subtype";
3124         }
3125     default:
3126         return "unknown_type";
3127     }
3128 }
3129
3130
3131 int
3132 ct_str_type (const char *type)
3133 {
3134     struct str2init *s2i;
3135
3136     for (s2i = str2cts; s2i->si_key; ++s2i) {
3137         if (! strcasecmp (type, s2i->si_key)) {
3138             break;
3139         }
3140     }
3141     if (! s2i->si_key  &&  ! uprf (type, "X-")) {
3142         ++s2i;
3143     }
3144
3145     return s2i->si_val;
3146 }
3147
3148
3149 int
3150 ct_str_subtype (int type, const char *subtype)
3151 {
3152     struct k2v *kv;
3153
3154     switch (type) {
3155     case CT_APPLICATION:
3156         for (kv = SubApplication; kv->kv_key; ++kv) {
3157             if (! strcasecmp (subtype, kv->kv_key)) {
3158                 break;
3159             }
3160         }
3161         return kv->kv_value;
3162     case CT_MESSAGE:
3163         for (kv = SubMessage; kv->kv_key; ++kv) {
3164             if (! strcasecmp (subtype, kv->kv_key)) {
3165                 break;
3166             }
3167         }
3168         return kv->kv_value;
3169     case CT_MULTIPART:
3170         for (kv = SubMultiPart; kv->kv_key; ++kv) {
3171             if (! strcasecmp (subtype, kv->kv_key)) {
3172                 break;
3173             }
3174         }
3175         return kv->kv_value;
3176     case CT_TEXT:
3177         for (kv = SubText; kv->kv_key; ++kv) {
3178             if (! strcasecmp (subtype, kv->kv_key)) {
3179                 break;
3180             }
3181         }
3182         return kv->kv_value;
3183     default:
3184         return 0;
3185     }
3186 }
3187
3188
3189 /* Find the content type and InitFunc for the CT. */
3190 const struct str2init *
3191 get_ct_init (int type)
3192 {
3193     const struct str2init *sp;
3194
3195     for (sp = str2cts; sp->si_key; ++sp) {
3196         if (type == sp->si_val) {
3197             return sp;
3198         }
3199     }
3200
3201     return NULL;
3202 }
3203
3204 const char *
3205 ce_str (int encoding)
3206 {
3207     switch (encoding) {
3208     case CE_BASE64:
3209         return "base64";
3210     case CE_QUOTED:
3211         return "quoted-printable";
3212     case CE_8BIT:
3213         return "8bit";
3214     case CE_7BIT:
3215         return "7bit";
3216     case CE_BINARY:
3217         return "binary";
3218     case CE_EXTENSION:
3219         return "extension";
3220     case CE_EXTERNAL:
3221         return "external";
3222     default:
3223         return "unknown";
3224     }
3225 }
3226
3227 /* Find the content type and InitFunc for the content encoding method. */
3228 const struct str2init *
3229 get_ce_method (const char *method)
3230 {
3231     struct str2init *sp;
3232
3233     for (sp = str2ces; sp->si_key; ++sp) {
3234         if (! strcasecmp (method, sp->si_key)) {
3235             return sp;
3236         }
3237     }
3238
3239     return NULL;
3240 }
3241
3242 /*
3243  * Parse a series of MIME attributes (or parameters) given a header as
3244  * input.
3245  *
3246  * Arguments include:
3247  *
3248  * filename     - Name of input file (for error messages)
3249  * fieldname    - Name of field being processed
3250  * headerp      - Pointer to pointer of the beginning of the MIME attributes.
3251  *                Updated to point to end of attributes when finished.
3252  * param_head   - Pointer to head of parameter list
3253  * param_tail   - Pointer to tail of parameter list
3254  * commentp     - Pointer to header comment pointer (may be NULL)
3255  *
3256  * Returns OK if parsing was successful, NOTOK if parsing failed, and
3257  * DONE to indicate a benign error (minor parsing error, but the program
3258  * should continue).
3259  */
3260
3261 static int
3262 parse_header_attrs (const char *filename, const char *fieldname,
3263                     char **header_attrp, PM *param_head, PM *param_tail,
3264                     char **commentp)
3265 {
3266     char *cp = *header_attrp;
3267     PM pm;
3268     struct sectlist {
3269         char *value;
3270         int index;
3271         int len;
3272         struct sectlist *next;
3273     } *sp, *sp2;
3274     struct parmlist {
3275         char *name;
3276         char *charset;
3277         char *lang;
3278         struct sectlist *sechead;
3279         struct parmlist *next;
3280     } *pp, *pp2, *phead = NULL;
3281
3282     while (*cp == ';') {
3283         char *dp, *vp, *up, *nameptr, *valptr, *charset = NULL, *lang = NULL;
3284         bool encoded = false;
3285         bool partial = false;
3286         int len = 0, index = 0;
3287
3288         cp++;
3289         while (isspace ((unsigned char) *cp))
3290             cp++;
3291
3292         if (*cp == '('  &&
3293             get_comment (filename, fieldname, &cp, commentp) == NOTOK) {
3294             return NOTOK;
3295         }
3296
3297         if (*cp == 0) {
3298             if (! suppress_extraneous_trailing_semicolon_warning) {
3299                 inform("extraneous trailing ';' in message %s's %s: "
3300                     "parameter list", filename, fieldname);
3301             }
3302             return DONE;
3303         }
3304
3305         /* down case the attribute name */
3306         for (dp = cp; istoken ((unsigned char) *dp); dp++)
3307             *dp = tolower ((unsigned char) *dp);
3308
3309         for (up = dp; isspace ((unsigned char) *dp);)
3310             dp++;
3311         if (dp == cp || *dp != '=') {
3312             inform("invalid parameter in message %s's %s: field\n"
3313                 "    parameter %s (error detected at offset %ld)",
3314                 filename, fieldname, cp, (long)(dp - cp));
3315             return NOTOK;
3316         }
3317
3318         /*
3319          * To handle RFC 2231, we have to deal with the following extensions:
3320          *
3321          * name*=encoded-value
3322          * name*<N>=part-N-of-a-parameter-value
3323          * name*<N>*=encoded-part-N-of-a-parameter-value
3324          *
3325          * So the rule is:
3326          * If there's a * right before the equal sign, it's encoded.
3327          * If there's a * and one or more digits, then it's section N.
3328          *
3329          * Remember we can have one or the other, or both.  cp points to
3330          * beginning of name, up points past the last character in the
3331          * parameter name.
3332          */
3333
3334         for (vp = cp; vp < up; vp++) {
3335             if (*vp == '*' && vp < up - 1) {
3336                 partial = true;
3337                 continue;
3338             }
3339             if (*vp == '*' && vp == up - 1) {
3340                 encoded = true;
3341             } else if (partial) {
3342                 if (isdigit((unsigned char) *vp))
3343                     index = *vp - '0' + index * 10;
3344                 else {
3345                     inform("invalid parameter index in message %s's %s: field"
3346                         "\n    (parameter %s)", filename, fieldname, cp);
3347                     return NOTOK;
3348                 }
3349             } else {
3350                 len++;
3351             }
3352         }
3353
3354         /*
3355          * Break out the parameter name and value sections and allocate
3356          * memory for each.
3357          */
3358
3359         nameptr = mh_xmalloc(len + 1);
3360         strncpy(nameptr, cp, len);
3361         nameptr[len] = '\0';
3362
3363         for (dp++; isspace ((unsigned char) *dp);)
3364             dp++;
3365
3366         if (encoded) {
3367             /*
3368              * Single quotes delimit the character set and language tag.
3369              * They are required on the first section (or a complete
3370              * parameter).
3371              */
3372             if (index == 0) {
3373                 vp = dp;
3374                 while (*vp != '\'' && !isspace((unsigned char) *vp) &&
3375                                                         *vp != '\0')
3376                     vp++;
3377                 if (*vp == '\'') {
3378                     if (vp != dp) {
3379                         len = vp - dp;
3380                         charset = mh_xmalloc(len + 1);
3381                         strncpy(charset, dp, len);
3382                         charset[len] = '\0';
3383                     } else {
3384                         charset = NULL;
3385                     }
3386                     vp++;
3387                 } else {
3388                     inform("missing charset in message %s's %s: field\n"
3389                         "    (parameter %s)", filename, fieldname, nameptr);
3390                     free(nameptr);
3391                     return NOTOK;
3392                 }
3393                 dp = vp;
3394
3395                 while (*vp != '\'' && !isspace((unsigned char) *vp) &&
3396                                                         *vp != '\0')
3397                     vp++;
3398
3399                 if (*vp == '\'') {
3400                     if (vp != dp) {
3401                         len = vp - dp;
3402                         lang = mh_xmalloc(len + 1);
3403                         strncpy(lang, dp, len);
3404                         lang[len] = '\0';
3405                     } else {
3406                         lang = NULL;
3407                     }
3408                     vp++;
3409                 } else {
3410                     inform("missing language tag in message %s's %s: field\n"
3411                         "    (parameter %s)", filename, fieldname, nameptr);
3412                     free(nameptr);
3413                     free(charset);
3414                     return NOTOK;
3415                 }
3416
3417                 dp = vp;
3418             }
3419
3420             /*
3421              * At this point vp should be pointing at the beginning
3422              * of the encoded value/section.  Continue until we reach
3423              * the end or get whitespace.  But first, calculate the
3424              * length so we can allocate the correct buffer size.
3425              */
3426
3427             for (vp = dp, len = 0; istoken(*vp); vp++) {
3428                 if (*vp == '%') {
3429                      if (*(vp + 1) == '\0' ||
3430                                 !isxdigit((unsigned char) *(vp + 1)) ||
3431                                 *(vp + 2) == '\0' ||
3432                                 !isxdigit((unsigned char) *(vp + 2))) {
3433                         inform("invalid encoded sequence in message %s's %s: field\n"
3434                             "    (parameter %s)", filename, fieldname, nameptr);
3435                         free(nameptr);
3436                         free(charset);
3437                         free(lang);
3438                         return NOTOK;
3439                     }
3440                     vp += 2;
3441                 }
3442                 len++;
3443             }
3444
3445             up = valptr = mh_xmalloc(len + 1);
3446
3447             for (vp = dp; istoken(*vp); vp++) {
3448                 if (*vp == '%') {
3449                     *up++ = decode_qp(*(vp + 1), *(vp + 2));
3450                     vp += 2;
3451                 } else {
3452                     *up++ = *vp;
3453                 }
3454             }
3455
3456             *up = '\0';
3457             cp = vp;
3458         } else {
3459             /*
3460              * A "normal" string.  If it's got a leading quote, then we
3461              * strip the quotes out.  Otherwise go until we reach the end
3462              * or get whitespace.  Note we scan it twice; once to get the
3463              * length, then the second time copies it into the destination
3464              * buffer.
3465              */
3466
3467             len = 0;
3468
3469             if (*dp == '"') {
3470                 for (cp = dp + 1;;) {
3471                     switch (*cp++) {
3472                     case '\0':
3473 bad_quote:
3474                         inform("invalid quoted-string in message %s's %s: field\n"
3475                             "    (parameter %s)", filename, fieldname, nameptr);
3476                         free(nameptr);
3477                         free(charset);
3478                         free(lang);
3479                         return NOTOK;
3480                     case '"':
3481                         break;
3482
3483                     case '\\':
3484                         if (*++cp == '\0')
3485                             goto bad_quote;
3486                         /* FALLTHRU */
3487                     default:
3488                         len++;
3489                         continue;
3490                     }
3491                     break;
3492                 }
3493
3494             } else {
3495                 for (cp = dp; istoken (*cp); cp++) {
3496                     len++;
3497                 }
3498             }
3499
3500             valptr = mh_xmalloc(len + 1);
3501
3502             if (*dp == '"') {
3503                 int i;
3504                 for (cp = dp + 1, vp = valptr, i = 0; i < len; i++) {
3505                     if (*cp == '\\') {
3506                         cp++;
3507                     }
3508                     *vp++ = *cp++;
3509                 }
3510                 cp++;
3511             } else {
3512                 strncpy(valptr, cp = dp, len);
3513                 cp += len;
3514             }
3515
3516             valptr[len] = '\0';
3517         }
3518
3519         /*
3520          * If 'partial' is set, we don't allocate a parameter now.  We
3521          * put it on the parameter linked list to be reassembled later.
3522          *
3523          * "phead" points to a list of all parameters we need to reassemble.
3524          * Each parameter has a list of sections. We insert the sections in
3525          * order.
3526          */
3527
3528         if (partial) {
3529             for (pp = phead; pp != NULL; pp = pp->next) {
3530                 if (strcasecmp(nameptr, pp->name) == 0) {
3531                     free (nameptr);
3532                     nameptr = pp->name;
3533                     break;
3534                 }
3535             }
3536
3537             if (pp == NULL) {
3538                 NEW0(pp);
3539                 pp->name = nameptr;
3540                 pp->next = phead;
3541                 phead = pp;
3542             }
3543
3544             /*
3545              * Insert this into the section linked list
3546              */
3547
3548             NEW0(sp);
3549             sp->value = valptr;
3550             sp->index = index;
3551             sp->len = len;
3552
3553             if (pp->sechead == NULL || pp->sechead->index > index) {
3554                 sp->next = pp->sechead;
3555                 pp->sechead = sp;
3556             } else {
3557                 for (sp2 = pp->sechead; sp2 != NULL; sp2 = sp2->next) {
3558                     if (sp2->index == sp->index) {
3559                         inform("duplicate index (%d) in message %s's %s: field"
3560                             "\n    (parameter %s)", sp->index, filename,
3561                             fieldname, nameptr);
3562                         return NOTOK;
3563                     }
3564                     if (sp2->index < sp->index &&
3565                         (sp2->next == NULL || sp2->next->index > sp->index)) {
3566                         sp->next = sp2->next;
3567                         sp2->next = sp;
3568                         break;
3569                     }
3570                 }
3571
3572                 if (sp2 == NULL) {
3573                     inform("Internal error: cannot insert partial param "
3574                         "in message %s's %s: field\n    (parameter %s)",
3575                         filename, fieldname, nameptr);
3576                     return NOTOK;
3577                 }
3578             }
3579
3580             /*
3581              * Save our charset and lang tags.
3582              */
3583
3584             if (index == 0 && encoded) {
3585                 free(pp->charset);
3586                 pp->charset = charset;
3587                 free(pp->lang);
3588                 pp->lang = lang;
3589             }
3590         } else {
3591             pm = add_param(param_head, param_tail, nameptr, valptr, 1);
3592             pm->pm_charset = charset;
3593             pm->pm_lang = lang;
3594         }
3595
3596         while (isspace ((unsigned char) *cp))
3597             cp++;
3598
3599         if (*cp == '('  &&
3600             get_comment (filename, fieldname, &cp, commentp) == NOTOK) {
3601             return NOTOK;
3602         }
3603     }
3604
3605     /*
3606      * Now that we're done, reassemble all of the partial parameters.
3607      */
3608
3609     for (pp = phead; pp != NULL; ) {
3610         char *p, *q;
3611         size_t tlen = 0;
3612         int pindex = 0;
3613         for (sp = pp->sechead; sp != NULL; sp = sp->next) {
3614             if (sp->index != pindex++) {
3615                 inform("missing section %d for parameter in message "
3616                     "%s's %s: field\n    (parameter %s)", pindex - 1,
3617                     filename, fieldname, pp->name);
3618                 return NOTOK;
3619             }
3620             tlen += sp->len;
3621         }
3622
3623         p = q = mh_xmalloc(tlen + 1);
3624         for (sp = pp->sechead; sp != NULL; ) {
3625             memcpy(q, sp->value, sp->len);
3626             q += sp->len;
3627             free(sp->value);
3628             sp2 = sp->next;
3629             free(sp);
3630             sp = sp2;
3631         }
3632
3633         p[tlen] = '\0';
3634
3635         pm = add_param(param_head, param_tail, pp->name, p, 1);
3636         pm->pm_charset = pp->charset;
3637         pm->pm_lang = pp->lang;
3638         pp2 = pp->next;
3639         free(pp);
3640         pp = pp2;
3641     }
3642
3643     *header_attrp = cp;
3644     return OK;
3645 }
3646
3647 /*
3648  * Return the charset for a particular content type.
3649  */
3650
3651 char *
3652 content_charset (CT ct)
3653 {
3654     char *ret_charset = NULL;
3655
3656     ret_charset = get_param(ct->c_ctinfo.ci_first_pm, "charset", '?', 0);
3657
3658     return ret_charset ? ret_charset : mh_xstrdup("US-ASCII");
3659 }
3660
3661
3662 /*
3663  * Create a string based on a list of output parameters.  Assume that this
3664  * parameter string will be appended to an existing header, so start out
3665  * with the separator (;).  Perform RFC 2231 encoding when necessary.
3666  */
3667
3668 char *
3669 output_params(size_t initialwidth, PM params, int *offsetout, int external)
3670 {
3671     char *paramout = NULL;
3672     char line[CPERLIN * 2], *q;
3673     int curlen, index, cont, encode, i;
3674     size_t valoff, numchars;
3675
3676     while (params != NULL) {
3677         encode = 0;
3678         index = 0;
3679         valoff = 0;
3680         q = line;
3681
3682         if (external && strcasecmp(params->pm_name, "body") == 0)
3683             continue;
3684
3685         if (strlen(params->pm_name) > CPERLIN) {
3686             inform("Parameter name \"%s\" is too long", params->pm_name);
3687             free(paramout);
3688             return NULL;
3689         }
3690
3691         curlen = param_len(params, index, valoff, &encode, &cont, &numchars);
3692
3693         /*
3694          * Loop until we get a parameter that fits within a line.  We
3695          * assume new lines start with a tab, so check our overflow based
3696          * on that.
3697          */
3698
3699         while (cont) {
3700             *q++ = ';';
3701             *q++ = '\n';
3702             *q++ = '\t';
3703
3704             /*
3705              * At this point we're definitely continuing the line, so
3706              * be sure to include the parameter name and section index.
3707              */
3708
3709             q += snprintf(q, sizeof(line) - (q - line), "%s*%d",
3710                           params->pm_name, index);
3711
3712             /*
3713              * Both of these functions do a NUL termination
3714              */
3715
3716             if (encode)
3717                 i = encode_param(params, q, sizeof(line) - (q - line),
3718                                  numchars, valoff, index);
3719             else
3720                 i = normal_param(params, q, sizeof(line) - (q - line),
3721                                  numchars, valoff);
3722
3723             if (i == 0) {
3724                 free(paramout);
3725                 return NULL;
3726             }
3727
3728             valoff += numchars;
3729             index++;
3730             curlen = param_len(params, index, valoff, &encode, &cont,
3731                                &numchars);
3732             q = line;
3733
3734             /*
3735              * "line" starts with a ;\n\t, so that doesn't count against
3736              * the length.  But add 8 since it starts with a tab; that's
3737              * how we end up with 5.
3738              */
3739
3740             initialwidth = strlen(line) + 5;
3741
3742             /*
3743              * At this point the line should be built, so add it to our
3744              * current output buffer.
3745              */
3746
3747             paramout = add(line, paramout);
3748         }
3749
3750         /*
3751          * If this won't fit on the line, start a new one.  Save room in
3752          * case we need a semicolon on the end
3753          */
3754
3755         if (initialwidth + curlen > CPERLIN - 1) {
3756             *q++ = ';';
3757             *q++ = '\n';
3758             *q++ = '\t';
3759             initialwidth = 8;
3760         } else {
3761             *q++ = ';';
3762             *q++ = ' ';
3763             initialwidth += 2;
3764         }
3765
3766         /*
3767          * At this point, we're either finishing a continued parameter, or
3768          * we're working on a new one.
3769          */
3770
3771         if (index > 0) {
3772             q += snprintf(q, sizeof(line) - (q - line), "%s*%d",
3773                           params->pm_name, index);
3774         } else {
3775             strncpy(q, params->pm_name, sizeof(line) - (q - line));
3776             q += strlen(q);
3777         }
3778
3779         if (encode)
3780             i = encode_param(params, q, sizeof(line) - (q - line),
3781                              strlen(params->pm_value + valoff), valoff, index);
3782         else
3783             i = normal_param(params, q, sizeof(line) - (q - line),
3784                              strlen(params->pm_value + valoff), valoff);
3785
3786         if (i == 0) {
3787             free(paramout);
3788             return NULL;
3789         }
3790
3791         paramout = add(line, paramout);
3792         initialwidth += strlen(line);
3793
3794         params = params->pm_next;
3795     }
3796
3797     if (offsetout)
3798         *offsetout = initialwidth;
3799
3800     return paramout;
3801 }
3802
3803 /*
3804  * Calculate the size of a parameter.
3805  *
3806  * Arguments include
3807  *
3808  * pm           - The parameter being output
3809  * index        - If continuing the parameter, the index of the section
3810  *                we're on.
3811  * valueoff     - The current offset into the parameter value that we're
3812  *                working on (previous sections have consumed valueoff bytes).
3813  * encode       - Set if we should perform encoding on this parameter section
3814  *                (given that we're consuming bytesfit bytes).
3815  * cont         - Set if the remaining data in value will not fit on a single
3816  *                line and will need to be continued.
3817  * bytesfit     - The number of bytes that we can consume from the parameter
3818  *                value and still fit on a completely new line.  The
3819  *                calculation assumes the new line starts with a tab,
3820  *                includes the parameter name and any encoding, and fits
3821  *                within CPERLIN bytes.  Will always be at least 1.
3822  */
3823
3824 static size_t
3825 param_len(PM pm, int index, size_t valueoff, int *encode, int *cont,
3826           size_t *bytesfit)
3827 {
3828     char *start = pm->pm_value + valueoff, *p, indexchar[32];
3829     size_t len = 0, fit = 0;
3830     int fitlimit = 0, eightbit, maxfit;
3831
3832     *encode = 0;
3833
3834     /*
3835      * Add up the length.  First, start with the parameter name.
3836      */
3837
3838     len = strlen(pm->pm_name);
3839
3840     /*
3841      * Scan the parameter value and see if we need to do encoding for this
3842      * section.
3843      */
3844
3845     eightbit = contains8bit(start, NULL);
3846
3847     /*
3848      * Determine if we need to encode this section.  Encoding is necessary if:
3849      *
3850      * - There are any 8-bit characters at all and we're on the first
3851      *   section.
3852      * - There are 8-bit characters within N bytes of our section start.
3853      *   N is calculated based on the number of bytes it would take to
3854      *   reach CPERLIN.  Specifically:
3855      *          8 (starting tab) +
3856      *          strlen(param name) +
3857      *          4 ('* for section marker, '=', opening/closing '"')
3858      *          strlen (index)
3859      *  is the number of bytes used by everything that isn't part of the
3860      *  value.  So that gets subtracted from CPERLIN.
3861      */
3862
3863     snprintf(indexchar, sizeof(indexchar), "%d", index);
3864     maxfit = CPERLIN - (12 + len + strlen(indexchar));
3865     if ((eightbit && index == 0) || contains8bit(start, start + maxfit)) {
3866         *encode = 1;
3867     }
3868
3869     len++;      /* Add in equal sign */
3870
3871     if (*encode) {
3872         /*
3873          * We're using maxfit as a marker for how many characters we can
3874          * fit into the line.  Bump it by two because we're not using quotes
3875          * when encoding.
3876          */
3877
3878         maxfit += 2;
3879
3880         /*
3881          * If we don't have a charset or language tag in this parameter,
3882          * add them now.
3883          */
3884
3885         if (! pm->pm_charset) {
3886             pm->pm_charset = mh_xstrdup(write_charset_8bit());
3887             if (strcasecmp(pm->pm_charset, "US-ASCII") == 0)
3888                 die("8-bit characters in parameter \"%s\", but "
3889                       "local character set is US-ASCII", pm->pm_name);
3890         }
3891         if (! pm->pm_lang)
3892             pm->pm_lang = mh_xstrdup("");       /* Default to a blank lang tag */
3893
3894         len++;          /* For the encoding marker */
3895         maxfit--;
3896         if (index == 0) {
3897             int enclen = strlen(pm->pm_charset) + strlen(pm->pm_lang) + 2;
3898             len += enclen;
3899             maxfit-= enclen;
3900         } else {
3901             /*
3902              * We know we definitely need to include an index.  maxfit already
3903              * includes the section marker.
3904              */
3905             len += strlen(indexchar);
3906         }
3907         for (p = start; *p != '\0'; p++) {
3908             if (isparamencode(*p)) {
3909                 len += 3;
3910                 maxfit -= 3;
3911             } else {
3912                 len++;
3913                 maxfit--;
3914             }
3915             /*
3916              * Just so there's no confusion: maxfit is counting OUTPUT
3917              * characters (post-encoding).  fit is counting INPUT characters.
3918              */
3919             if (! fitlimit && maxfit >= 0)
3920                 fit++;
3921             else if (! fitlimit)
3922                 fitlimit++;
3923         }
3924     } else {
3925         /*
3926          * Calculate the string length, but add room for quoting \
3927          * and " if necessary.  Also account for quotes at beginning
3928          * and end.
3929          */
3930         for (p = start; *p != '\0'; p++) {
3931             switch (*p) {
3932             case '"':
3933             case '\\':
3934                 len++;
3935                 maxfit--;
3936             /* FALLTHRU */
3937             default:
3938                 len++;
3939                 maxfit--;
3940             }
3941             if (! fitlimit && maxfit >= 0)
3942                 fit++;
3943             else if (! fitlimit)
3944                 fitlimit++;
3945         }
3946
3947         len += 2;
3948     }
3949
3950     if (fit < 1)
3951         fit = 1;
3952
3953     *cont = fitlimit;
3954     *bytesfit = fit;
3955
3956     return len;
3957 }
3958
3959 /*
3960  * Output an encoded parameter string.
3961  */
3962
3963 size_t
3964 encode_param(PM pm, char *output, size_t len, size_t valuelen,
3965               size_t valueoff, int index)
3966 {
3967     size_t outlen = 0, n;
3968     char *endptr = output + len, *p;
3969
3970     /*
3971      * First, output the marker for an encoded string.
3972      */
3973
3974     *output++ = '*';
3975     *output++ = '=';
3976     outlen += 2;
3977
3978     /*
3979      * If the index is 0, output the character set and language tag.
3980      * If theses were NULL, they should have already been filled in
3981      * by param_len().
3982      */
3983
3984     if (index == 0) {
3985         n = snprintf(output, len - outlen, "%s'%s'", pm->pm_charset,
3986                      pm->pm_lang);
3987         output += n;
3988         outlen += n;
3989         if (output > endptr) {
3990             inform("Internal error: parameter buffer overflow");
3991             return 0;
3992         }
3993     }
3994
3995     /*
3996      * Copy over the value, encoding if necessary
3997      */
3998
3999     p = pm->pm_value + valueoff;
4000     while (valuelen-- > 0) {
4001         if (isparamencode(*p)) {
4002             n = snprintf(output, len - outlen, "%%%02X", (unsigned char) *p++);
4003             output += n;
4004             outlen += n;
4005         } else {
4006             *output++ = *p++;
4007             outlen++;
4008         }
4009         if (output > endptr) {
4010             inform("Internal error: parameter buffer overflow");
4011             return 0;
4012         }
4013     }
4014
4015     *output = '\0';
4016
4017     return outlen;
4018 }
4019
4020 /*
4021  * Output a "normal" parameter, without encoding.  Be sure to escape
4022  * quotes and backslashes if necessary.
4023  */
4024
4025 static size_t
4026 normal_param(PM pm, char *output, size_t len, size_t valuelen,
4027              size_t valueoff)
4028 {
4029     size_t outlen = 0;
4030     char *endptr = output + len, *p;
4031
4032     *output++ = '=';
4033     *output++ = '"';
4034     outlen += 2;
4035
4036     p = pm->pm_value + valueoff;
4037
4038     while (valuelen-- > 0) {
4039         switch (*p) {
4040         case '\\':
4041         case '"':
4042             *output++ = '\\';
4043             outlen++;
4044             /* FALLTHRU */
4045         default:
4046             *output++ = *p++;
4047             outlen++;
4048         }
4049         if (output > endptr) {
4050             inform("Internal error: parameter buffer overflow");
4051             return 0;
4052         }
4053     }
4054
4055     if (output - 2 > endptr) {
4056         inform("Internal error: parameter buffer overflow");
4057         return 0;
4058     }
4059
4060     *output++ = '"';
4061     *output++ = '\0';
4062
4063     return outlen + 1;
4064 }
4065
4066 /*
4067  * Add a parameter to the parameter linked list
4068  */
4069
4070 PM
4071 add_param(PM *first, PM *last, char *name, char *value, int nocopy)
4072 {
4073     PM pm;
4074
4075     NEW0(pm);
4076     pm->pm_name = nocopy ? name : getcpy(name);
4077     pm->pm_value = nocopy ? value : getcpy(value);
4078
4079     if (*first) {
4080         (*last)->pm_next = pm;
4081         *last = pm;
4082     } else {
4083         *first = pm;
4084         *last = pm;
4085     }
4086
4087     return pm;
4088 }
4089
4090 /*
4091  * Either replace a current parameter with a new value, or add the parameter
4092  * to the parameter linked list.
4093  */
4094
4095 PM
4096 replace_param(PM *first, PM *last, char *name, char *value, int nocopy)
4097 {
4098     PM pm;
4099
4100     for (pm = *first; pm != NULL; pm = pm->pm_next) {
4101         if (strcasecmp(name, pm->pm_name) == 0) {
4102             /*
4103              * If nocopy is set, it's assumed that we own both name
4104              * and value.  We don't need name, so we discard it now.
4105              */
4106             if (nocopy)
4107                 free(name);
4108             free(pm->pm_value);
4109             pm->pm_value = nocopy ? value : getcpy(value);
4110             return pm;
4111         }
4112     }
4113
4114     return add_param(first, last, name, value, nocopy);
4115 }
4116
4117 /*
4118  * Retrieve a parameter value from a parameter linked list.  If the parameter
4119  * value needs converted to the local character set, do that now.
4120  */
4121
4122 char *
4123 get_param(PM first, const char *name, char replace, int fetchonly)
4124 {
4125     while (first != NULL) {
4126         if (strcasecmp(name, first->pm_name) == 0) {
4127             if (fetchonly)
4128                 return first->pm_value;
4129             return getcpy(get_param_value(first, replace));
4130         }
4131         first = first->pm_next;
4132     }
4133
4134     return NULL;
4135 }
4136
4137 /*
4138  * Return a parameter value, converting to the local character set if
4139  * necessary
4140  */
4141
4142 char *
4143 get_param_value(PM pm, char replace)
4144 {
4145     static char buffer[4096];           /* I hope no parameters are larger */
4146     size_t bufsize = sizeof(buffer);
4147 #ifdef HAVE_ICONV
4148     size_t inbytes;
4149     int utf8;
4150     iconv_t cd;
4151     ICONV_CONST char *p;
4152 #else /* HAVE_ICONV */
4153     char *p;
4154 #endif /* HAVE_ICONV */
4155
4156     char *q;
4157
4158     /*
4159      * If we don't have a character set indicated, it's assumed to be
4160      * US-ASCII.  If it matches our character set, we don't need to convert
4161      * anything.
4162      */
4163
4164     if (!pm->pm_charset || check_charset(pm->pm_charset,
4165                                          strlen(pm->pm_charset))) {
4166         return pm->pm_value;
4167     }
4168
4169     /*
4170      * In this case, we need to convert.  If we have iconv support, use
4171      * that.  Otherwise, go through and simply replace every non-ASCII
4172      * character with the substitution character.
4173      */
4174
4175 #ifdef HAVE_ICONV
4176     q = buffer;
4177     bufsize = sizeof(buffer);
4178     utf8 = strcasecmp(pm->pm_charset, "UTF-8") == 0;
4179
4180     cd = iconv_open(get_charset(), pm->pm_charset);
4181     if (cd == (iconv_t) -1) {
4182         goto noiconv;
4183     }
4184
4185     inbytes = strlen(pm->pm_value);
4186     p = pm->pm_value;
4187
4188     while (inbytes) {
4189         if (iconv(cd, &p, &inbytes, &q, &bufsize) == (size_t)-1) {
4190             if (errno != EILSEQ) {
4191                 iconv_close(cd);
4192                 goto noiconv;
4193             }
4194             /*
4195              * Reset shift state, substitute our character,
4196              * try to restart conversion.
4197              */
4198
4199             iconv(cd, NULL, NULL, &q, &bufsize);
4200
4201             if (bufsize == 0) {
4202                 iconv_close(cd);
4203                 goto noiconv;
4204             }
4205             *q++ = replace;
4206             bufsize--;
4207             if (bufsize == 0) {
4208                 iconv_close(cd);
4209                 goto noiconv;
4210             }
4211             if (utf8) {
4212                 for (++p, --inbytes;
4213                      inbytes > 0 && (((unsigned char) *p) & 0xc0) == 0x80;
4214                      ++p, --inbytes)
4215                     continue;
4216             } else {
4217                 p++;
4218                 inbytes--;
4219             }
4220         }
4221     }
4222
4223     iconv_close(cd);
4224
4225     if (bufsize == 0)
4226         q--;
4227     *q = '\0';
4228
4229     return buffer;
4230
4231 noiconv:
4232 #endif /* HAVE_ICONV */
4233
4234     /*
4235      * Take everything non-ASCII and substitute the replacement character
4236      */
4237
4238     q = buffer;
4239     bufsize = sizeof(buffer);
4240     for (p = pm->pm_value; *p != '\0' && bufsize > 1; p++, q++, bufsize--) {
4241         if (isascii((unsigned char) *p) && isprint((unsigned char) *p))
4242             *q = *p;
4243         else
4244             *q = replace;
4245     }
4246
4247     *q = '\0';
4248
4249     return buffer;
4250 }