diplodocus.org Git - nmh/blob - sbr/encode_rfc2047.c

   1 /* encode_rfc2047.c -- encode message headers using RFC 2047 encoding.
   2  *
   3  * This code is Copyright (c) 2002, by the authors of nmh.  See the
   4  * COPYRIGHT file in the root directory of the nmh distribution for
   5  * complete copyright information.
   6  */
   7
   8 #include <h/mh.h>
   9 #include <h/mhparse.h>
  10 #include <h/addrsbr.h>
  11 #include <h/utils.h>
  12
  13 /*
  14  * List of headers that contain addresses and as a result require special
  15  * handling
  16  */
  17
  18 static char *address_headers[] = {
  19     "To",
  20     "From",
  21     "cc",
  22     "Bcc",
  23     "Reply-To",
  24     "Sender",
  25     "Resent-To",
  26     "Resent-From",
  27     "Resent-cc",
  28     "Resent-Bcc",
  29     "Resent-Reply-To",
  30     "Resent-Sender",
  31     NULL,
  32 };
  33
  34 /*
  35  * Macros we use for parsing headers
  36  */
  37
  38 #define is_fws(c) (c == '\t' || c == ' ' || c == '\n')
  39
  40 #define qphrasevalid(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || \
  41                          (c >= 'a' && c <= 'z') || \
  42                          c == '!' || c == '*' || c == '+' || c == '-' || \
  43                          c == '/' || c == '=' || c == '_')
  44 #define qpspecial(c) (c < ' ' || c == '=' || c == '?' || c == '_')
  45
  46 #define base64len(n) ((((n) + 2) / 3 ) * 4)     /* String len to base64 len */
  47 #define strbase64(n) ((n) / 4 * 3)              /* Chars that fit in base64 */
  48
  49 #define ENCODELINELIMIT 76
  50
  51 static void unfold_header(char **, int);
  52 static int field_encode_address(const char *, char **, int, const char *);
  53 static int field_encode_quoted(const char *, char **, const char *, int,
  54                                int, int);
  55 static int field_encode_base64(const char *, char **, const char *);
  56 static int scanstring(const char *, int *, int *, int *);
  57 static int utf8len(const char *);
  58 static int pref_encoding(int, int, int);
  59
  60 /*
  61  * Encode a message header using RFC 2047 encoding.  We make the assumption
  62  * that all characters < 128 are ASCII and as a consequence don't need any
  63  * encoding.
  64  */
  65
  66 int
  67 encode_rfc2047(const char *name, char **value, int encoding,
  68                const char *charset)
  69 {
  70     int i, asciicount = 0, eightbitcount = 0, qpspecialcount = 0;
  71     char *p;
  72
  73     /*
  74      * First, check to see if we even need to encode the header
  75      */
  76
  77     for (p = *value; *p != '\0'; p++) {
  78         if (isascii((unsigned char) *p)) {
  79             asciicount++;
  80             if (qpspecial((unsigned char) *p))
  81                 qpspecialcount++;
  82         } else
  83             eightbitcount++;
  84     }
  85
  86     if (eightbitcount == 0)
  87         return 0;
  88
  89     /*
  90      * Some rules from RFC 2047:
  91      *
  92      * - Encoded words cannot be more than 75 characters long
  93      * - Multiple "long" encoded words must be on new lines.
  94      *
  95      * Also, we're not permitted to encode email addresses, so
  96      * we need to actually _parse_ email addresses and only encode
  97      * the right bits.
  98      */
  99
 100     /*
 101      * If charset was NULL, then get the value from the locale.  But
 102      * we reject it if it returns US-ASCII
 103      */
 104
 105     if (charset == NULL)
 106         charset = write_charset_8bit();
 107
 108     if (strcasecmp(charset, "US-ASCII") == 0) {
 109         inform("Cannot use US-ASCII with 8 bit characters in header");
 110         return 1;
 111     }
 112
 113     /*
 114      * If we have an address header, then we need to parse the addresses
 115      * and only encode the names or comments.  Otherwise, handle it normally.
 116      */
 117
 118     for (i = 0; address_headers[i]; i++) {
 119         if (strcasecmp(name, address_headers[i]) == 0)
 120             return field_encode_address(name, value, encoding, charset);
 121     }
 122
 123     /*
 124      * On the encoding we choose, and the specifics of encoding:
 125      *
 126      * - If a specified encoding is passed in, we use that.
 127      * - Otherwise, pick which encoding is shorter.
 128      *
 129      * We don't quite handle continuation right here, but it should be
 130      * pretty close.
 131      */
 132
 133     if (encoding == CE_UNKNOWN)
 134         encoding = pref_encoding(asciicount, qpspecialcount, eightbitcount);
 135
 136     unfold_header(value, asciicount + eightbitcount);
 137
 138     switch (encoding) {
 139
 140     case CE_BASE64:
 141         return field_encode_base64(name, value, charset);
 142
 143     case CE_QUOTED:
 144         return field_encode_quoted(name, value, charset, asciicount,
 145                                    eightbitcount + qpspecialcount, 0);
 146
 147     default:
 148         inform("Internal error: unknown RFC-2047 encoding type");
 149         return 1;
 150     }
 151 }
 152
 153 /*
 154  * Encode our specified header (or field) using quoted-printable
 155  */
 156
 157 static int
 158 field_encode_quoted(const char *name, char **value, const char *charset,
 159                     int ascii, int encoded, int phraserules)
 160 {
 161     int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column, newline = 1;
 162     int charsetlen = strlen(charset), utf8;
 163     char *output = NULL, *p, *q = NULL;
 164
 165     /*
 166      * Right now we just encode the whole thing.  Maybe later on we'll
 167      * only encode things on a per-atom basis.
 168      */
 169
 170     p = *value;
 171
 172     column = prefixlen + 2;     /* Header name plus ": " */
 173
 174     utf8 = strcasecmp(charset, "UTF-8") == 0;
 175
 176     while (*p != '\0') {
 177         /*
 178          * Start a new line, if it's time
 179          */
 180         if (newline) {
 181             /*
 182              * If it's the start of the header, we don't need to pad it
 183              *
 184              * The length of the output string is ...
 185              * =?charset?Q?...?=  so that's 7+strlen(charset) + 2 for \n NUL
 186              *
 187              * plus 1 for every ASCII character and 3 for every eight bit
 188              * or special character (eight bit characters are written as =XX).
 189              *
 190              */
 191
 192             int tokenlen;
 193
 194             outlen += 9 + charsetlen + ascii + 3 * encoded;
 195
 196             /*
 197              * If output is set, then we're continuing the header.  Otherwise
 198              * do the initial allocation.
 199              */
 200
 201             if (output) {
 202                 int curlen = q - output, i;
 203                 outlen += prefixlen + 1;        /* Header plus \n ": " */
 204                 output = mh_xrealloc(output, outlen);
 205                 q = output + curlen;
 206                 *q++ = '?';
 207                 *q++ = '=';
 208                 *q++ = '\n';
 209                 for (i = 0; i < prefixlen; i++)
 210                     *q++ = ' ';
 211             } else {
 212                 /*
 213                  * A bit of a hack here; the header can contain multiple
 214                  * spaces (probably at least one) until we get to the
 215                  * actual text.  Copy until we get to a non-space.
 216                  */
 217                 output = mh_xmalloc(outlen);
 218                 q = output;
 219                 while (is_fws(*p))
 220                     *q++ = *p++;
 221             }
 222
 223             tokenlen = snprintf(q, outlen - (q - output), "=?%s?Q?", charset);
 224             q += tokenlen;
 225             column = prefixlen + tokenlen;
 226             newline = 0;
 227         }
 228
 229         /*
 230          * Process each character, encoding if necessary
 231          *
 232          * Note that we have a different set of rules if we're processing
 233          * RFC 5322 'phrase' (something you'd see in an address header).
 234          */
 235
 236         column++;
 237
 238         if (*p == ' ') {
 239             *q++ = '_';
 240             ascii--;
 241         } else if (isascii((unsigned char) *p) &&
 242                    (phraserules ? qphrasevalid((unsigned char) *p) :
 243                                         !qpspecial((unsigned char) *p))) {
 244             *q++ = *p;
 245             ascii--;
 246         } else {
 247             snprintf(q, outlen - (q - output), "=%02X", (unsigned char) *p);
 248             q += 3;
 249             column += 2;        /* column already incremented by 1 above */
 250             encoded--;
 251         }
 252
 253         p++;
 254
 255         /*
 256          * We're not allowed more than ENCODELINELIMIT characters per line,
 257          * so reserve some room for the final ?=.
 258          *
 259          * If prefixlen == 0, we haven't been passed in a header name, so
 260          * don't ever wrap the field (we're likely doing an address).
 261          */
 262
 263         if (prefixlen == 0)
 264             continue;
 265
 266         if (column >= ENCODELINELIMIT - 2) {
 267             newline = 1;
 268         } else if (utf8) {
 269             /*
 270              * Okay, this is a bit weird, but to explain a bit more ...
 271              *
 272              * RFC 2047 prohibits the splitting of multibyte characters
 273              * across encoded words.  Right now we only handle the case
 274              * of UTF-8, the most common multibyte encoding.
 275              *
 276              * p is now pointing at the next input character.  If we're
 277              * using UTF-8 _and_ we'd go over ENCODELINELIMIT given the
 278              * length of the complete character, then trigger a newline
 279              * now.  Note that we check the length * 3 since we have to
 280              * allow for the encoded output.
 281              */
 282             if (column + (utf8len(p) * 3) > ENCODELINELIMIT - 2) {
 283                 newline = 1;
 284             }
 285         }
 286     }
 287
 288     if (q == NULL) {
 289         /* This should never happen, but just in case.  Found by
 290            clang static analyzer. */
 291         inform("null output encoding for %s, continuing...", *value);
 292         return 1;
 293     }
 294     *q++ = '?';
 295     *q++ = '=';
 296
 297     if (prefixlen)
 298         *q++ = '\n';
 299
 300     *q = '\0';
 301
 302     free(*value);
 303
 304     *value = output;
 305
 306     return 0;
 307 }
 308
 309 /*
 310  * Encode our specified header (or field) using base64.
 311  *
 312  * This is a little easier since every character gets encoded, we can
 313  * calculate the line wrap up front.
 314  */
 315
 316 static int
 317 field_encode_base64(const char *name, char **value, const char *charset)
 318 {
 319     int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset);
 320     int outlen = 0, numencode, curlen;
 321     char *output = NULL, *p = *value, *q = NULL, *linestart = NULL;
 322
 323     /*
 324      * Skip over any leading white space.
 325      */
 326
 327     while (*p == ' ' || *p == '\t')
 328         p++;
 329
 330     /*
 331      * If we had a zero-length prefix, then just encode the whole field
 332      * as-is, without line wrapping.  Note that in addition to the encoding
 333      *
 334      * The added length we need is =? + charset + ?B? ... ?=
 335      *
 336      * That's 7 + strlen(charset) + 2 (for \n NUL).
 337      */
 338
 339     while (prefixlen && ((base64len(strlen(p)) + 7 + charsetlen +
 340                           prefixlen) > ENCODELINELIMIT)) {
 341
 342         /*
 343          * Our very first time, don't pad the line in the front
 344          *
 345          * Note ENCODELINELIMIT is + 2 because of \n \0
 346          */
 347
 348
 349         if (! output) {
 350             outlen += ENCODELINELIMIT + 2;
 351             output = q = mh_xmalloc(outlen);
 352             linestart = q - prefixlen;  /* Yes, this is intentional */
 353         } else {
 354             int curstart = linestart - output;
 355             curlen = q - output;
 356
 357             outlen += ENCODELINELIMIT + 2;
 358             output = mh_xrealloc(output, outlen);
 359             q = output + curlen;
 360             linestart = output + curstart;
 361         }
 362
 363         /*
 364          * We should have enough space now, so prepend the encoding markers
 365          * and character set information.  The leading space is intentional.
 366          */
 367
 368         q += snprintf(q, outlen - (q - output), " =?%s?B?", charset);
 369
 370         /*
 371          * Find out how much room we have left on the line and see how
 372          * many characters we can stuff in.  The start of our line
 373          * is marked by "linestart", so use that to figure out how
 374          * many characters are left out of ENCODELINELIMIT.  Reserve
 375          * 2 characters for the end markers and calculate how many
 376          * characters we can fit into that space given the base64
 377          * encoding expansion.
 378          */
 379
 380         numencode = strbase64(ENCODELINELIMIT - (q - linestart) - 2);
 381
 382         if (numencode <= 0) {
 383             inform("Internal error: tried to encode %d characters "
 384                    "in base64", numencode);
 385             return 1;
 386         }
 387
 388         /*
 389          * RFC 2047 prohibits spanning multibyte characters across tokens.
 390          * Right now we only check for UTF-8.
 391          *
 392          * So note the key here ... we want to make sure the character BEYOND
 393          * our last character is not a continuation byte.  If it's the start
 394          * of a new multibyte character or a single-byte character, that's ok.
 395          */
 396
 397         if (strcasecmp(charset, "UTF-8") == 0) {
 398             /*
 399              * p points to the start of our current buffer, so p + numencode
 400              * is one past the last character to encode
 401              */
 402
 403             while (numencode > 0 && ((*(p + numencode) & 0xc0) == 0x80))
 404                 numencode--;
 405
 406             if (numencode == 0) {
 407                 inform("Internal error: could not find start of "
 408                        "UTF-8 character when base64 encoding header");
 409                 return 1;
 410             }
 411         }
 412
 413         if (writeBase64raw((unsigned char *) p, numencode,
 414                            (unsigned char *) q) != OK) {
 415             inform("Internal error: base64 encoding of header failed");
 416             return 1;
 417         }
 418
 419         p += numencode;
 420         q += base64len(numencode);
 421
 422         /*
 423          * This will point us at the beginning of the new line (trust me).
 424          */
 425
 426         linestart = q + 3;
 427
 428         /*
 429          * What's going on here?  Well, we know we're continuing to the next
 430          * line, so we want to add continuation padding.  We also add the
 431          * trailing marker for the RFC 2047 token at this time as well.
 432          * This uses a trick of snprintf(); we tell it to print a zero-length
 433          * string, but pad it out to prefixlen - 1 characters; that ends
 434          * up always printing out the requested number of spaces.  We use
 435          * prefixlen - 1 because we always add a space on the starting
 436          * token marker; this makes things work out correctly for the first
 437          * line, which should have a space between the ':' and the start
 438          * of the token.
 439          *
 440          * It's okay if you don't follow all of that.
 441          */
 442
 443         q += snprintf(q, outlen - (q - output), "?=\n%*s", prefixlen - 1, "");
 444     }
 445
 446     /*
 447      * We're here if there is either no prefix, or we can fit it in less
 448      * than ENCODELINELIMIT characters.  Encode the whole thing.
 449      */
 450
 451     outlen += prefixlen + 9 + charsetlen + base64len(strlen(p));
 452     curlen = q - output;
 453
 454     output = mh_xrealloc(output, outlen);
 455     q = output + curlen;
 456
 457     q += snprintf(q, outlen - (q - output), "%s=?%s?B?",
 458                   prefixlen ? " " : "", charset);
 459
 460     if (writeBase64raw((unsigned char *) p, strlen(p),
 461                        (unsigned char *) q) != OK) {
 462         inform("Internal error: base64 encoding of header failed");
 463         return 1;
 464     }
 465
 466     strcat(q, "?=");
 467
 468     if (prefixlen)
 469         strcat(q, "\n");
 470
 471     free(*value);
 472
 473     *value = output;
 474
 475     return 0;
 476 }
 477
 478 /*
 479  * Calculate the length of a UTF-8 character.
 480  *
 481  * If it's not a UTF-8 character (or we're in the middle of a multibyte
 482  * character) then simply return 0.
 483  */
 484
 485 static int
 486 utf8len(const char *p)
 487 {
 488     int len = 1;
 489
 490     if (*p == '\0')
 491         return 0;
 492
 493     if (isascii((unsigned char) *p) || (((unsigned char) *p) & 0xc0) == 0x80)
 494         return 0;
 495
 496     p++;
 497     while ((((unsigned char) *p++) & 0xc0) == 0x80)
 498         len++;
 499
 500     return len;
 501 }
 502
 503 /*
 504  * "Unfold" a header, making it a single line (without continuation)
 505  *
 506  * We cheat a bit here; we never make the string longer, so using the
 507  * original length here is fine.
 508  */
 509
 510 static void
 511 unfold_header(char **value, int len)
 512 {
 513     char *str = mh_xmalloc(len + 1);
 514     char *p = str, *q = *value;
 515
 516     while (*q != '\0') {
 517         if (*q == '\n') {
 518             /*
 519              * When we get a newline, skip to the next non-whitespace
 520              * character and add a space to replace all of the whitespace
 521              *
 522              * This has the side effect of stripping off the final newline
 523              * for the header; we put it back in the encoding routine.
 524              */
 525             while (is_fws(*q))
 526                 q++;
 527             if (*q == '\0')
 528                 break;
 529
 530             *p++ = ' ';
 531         } else {
 532             *p++ = *q++;
 533         }
 534     }
 535
 536     *p = '\0';
 537
 538     free(*value);
 539     *value = str;
 540 }
 541
 542 /*
 543  * Decode a header containing addresses.  This means we have to parse
 544  * each address and only encode the display-name or comment field.
 545  */
 546
 547 static int
 548 field_encode_address(const char *name, char **value, int encoding,
 549                      const char *charset)
 550 {
 551     int prefixlen = strlen(name) + 2, column = prefixlen, groupflag;
 552     int asciichars, specialchars, eightbitchars, reformat = 0, errflag = 0;
 553     size_t len;
 554     char *mp, *cp = NULL, *output = NULL;
 555     char *tmpbuf = NULL;
 556     size_t tmpbufsize = 0;
 557     struct mailname *mn;
 558     char errbuf[BUFSIZ];
 559
 560     /*
 561      * Because these are addresses, we need to handle them individually.
 562      *
 563      * Break them down and process them one by one.  This means we have to
 564      * rewrite the whole header, but that's unavoidable.
 565      */
 566
 567     /*
 568      * The output headers always have to start with a space first; this
 569      * is just the way the API works right now.
 570      */
 571
 572     output = add(" ", output);
 573
 574     for (groupflag = 0; (mp = getname(*value)); ) {
 575         if ((mn = getm(mp, NULL, 0, errbuf, sizeof(errbuf))) == NULL) {
 576             inform("%s: %s", errbuf, mp);
 577             errflag++;
 578             continue;
 579         }
 580
 581         reformat = 0;
 582
 583         /*
 584          * We only care if the phrase (m_pers) or any trailing comment
 585          * (m_note) have 8-bit characters.  If doing q-p, we also need
 586          * to encode anything marked as qspecial().  Unquote it first
 587          * so the specialchars count is right.
 588          */
 589
 590         if (! mn->m_pers)
 591             goto check_note;
 592
 593         if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) {
 594             tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
 595         }
 596
 597         unquote_string(mn->m_pers, tmpbuf);
 598
 599         if (scanstring(tmpbuf, &asciichars, &eightbitchars,
 600                        &specialchars)) {
 601             /*
 602              * If we have 8-bit characters, encode it.
 603              */
 604
 605             if (encoding == CE_UNKNOWN)
 606                 encoding = pref_encoding(asciichars, specialchars,
 607                                          eightbitchars);
 608
 609             /*
 610              * This is okay, because the output of unquote_string will be either
 611              * equal or shorter than the original.
 612              */
 613
 614             strcpy(mn->m_pers, tmpbuf);
 615
 616             switch (encoding) {
 617
 618             case CE_BASE64:
 619                 if (field_encode_base64(NULL, &mn->m_pers, charset)) {
 620                     errflag++;
 621                     goto out;
 622                 }
 623                 break;
 624
 625             case CE_QUOTED:
 626                 if (field_encode_quoted(NULL, &mn->m_pers, charset, asciichars,
 627                                         eightbitchars + specialchars, 1)) {
 628                     errflag++;
 629                     goto out;
 630                 }
 631                 break;
 632
 633             default:
 634                 inform("Internal error: unknown RFC-2047 encoding type");
 635                 errflag++;
 636                 goto out;
 637             }
 638
 639             reformat++;
 640         }
 641
 642         check_note:
 643
 644         /*
 645          * The "note" field is generally a comment at the end of the address,
 646          * at least as how it's implemented here.  Notes are always surrounded
 647          * by parenthesis (since they're comments).  Strip them out and
 648          * then put them back when we format the final field, but they do
 649          * not get encoded.
 650          */
 651
 652         if (! mn->m_note)
 653             goto do_reformat;
 654
 655         if ((len = strlen(mn->m_note)) + 1 > tmpbufsize) {
 656             tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
 657         }
 658
 659         if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') {
 660             inform("Internal error: Invalid note field \"%s\"",
 661                    mn->m_note);
 662             errflag++;
 663             goto out;
 664         }
 665
 666         strncpy(tmpbuf, mn->m_note + 1, len - 1);
 667         tmpbuf[len - 2] = '\0';
 668
 669         if (scanstring(tmpbuf, &asciichars, &eightbitchars,
 670                        &specialchars)) {
 671             /*
 672              * If we have 8-bit characters, encode it.
 673              */
 674
 675             if (encoding == CE_UNKNOWN)
 676                 encoding = pref_encoding(asciichars, specialchars,
 677                                          eightbitchars);
 678
 679             switch (encoding) {
 680
 681             case CE_BASE64:
 682                 if (field_encode_base64(NULL, &tmpbuf, charset)) {
 683                     errflag++;
 684                     goto out;
 685                 }
 686                 break;
 687
 688             case CE_QUOTED:
 689                 if (field_encode_quoted(NULL, &tmpbuf, charset, asciichars,
 690                                         eightbitchars + specialchars, 1)) {
 691                     errflag++;
 692                     goto out;
 693                 }
 694                 break;
 695
 696             default:
 697                 inform("Internal error: unknown RFC-2047 encoding type");
 698                 errflag++;
 699                 goto out;
 700             }
 701
 702             reformat++;
 703
 704             /*
 705              * Make sure the size of tmpbuf is correct (it always gets
 706              * reallocated in the above functions).
 707              */
 708
 709             tmpbufsize = strlen(tmpbuf) + 1;
 710
 711             /*
 712              * Put the note field back surrounded by parenthesis.
 713              */
 714
 715             mn->m_note = mh_xrealloc(mn->m_note, tmpbufsize + 2);
 716
 717             snprintf(mn->m_note, tmpbufsize + 2, "(%s)", tmpbuf);
 718         }
 719
 720 do_reformat:
 721
 722         /*
 723          * So, some explanation is in order.
 724          *
 725          * We know we need to rewrite at least one address in the header,
 726          * otherwise we wouldn't be here.  If we had to reformat this
 727          * particular address, then run it through adrformat().  Otherwise
 728          * we can use m_text directly.
 729          */
 730
 731         /*
 732          * If we were in a group but are no longer, make sure we add a
 733          * semicolon (which needs to be FIRST, as it needs to be at the end
 734          * of the last address).
 735          */
 736
 737         if (groupflag && ! mn->m_ingrp) {
 738             output = add(";", output);
 739             column++;
 740         }
 741
 742         groupflag = mn->m_ingrp;
 743
 744         if (mn->m_gname) {
 745             cp = mh_xstrdup(mn->m_gname);
 746         }
 747
 748         if (reformat) {
 749             cp = add(adrformat(mn), cp);
 750         } else {
 751             cp = add(mn->m_text, cp);
 752         }
 753
 754         len = strlen(cp);
 755
 756         /*
 757          * If we're not at the beginning of the line, add a command and
 758          * either a space or a newline.
 759          */
 760
 761         if (column != prefixlen) {
 762             if (len + column + 2 > OUTPUTLINELEN) {
 763
 764                 if ((size_t) (prefixlen + 3) < tmpbufsize)
 765                     tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = prefixlen + 3);
 766
 767                 snprintf(tmpbuf, tmpbufsize, ",\n%*s", column = prefixlen, "");
 768                 output = add(tmpbuf, output);
 769             } else {
 770                 output = add(", ", output);
 771                 column += 2;
 772             }
 773         }
 774
 775         /*
 776          * Finally add the address
 777          */
 778
 779         output = add(cp, output);
 780         column += len;
 781         free(cp);
 782         cp = NULL;
 783     }
 784
 785     /*
 786      * Just in case we're at the end of a list
 787      */
 788
 789     if (groupflag) {
 790         output = add(";", output);
 791     }
 792
 793     output = add("\n", output);
 794
 795     free(*value);
 796     *value = output;
 797     output = NULL;
 798
 799 out:
 800     mh_xfree(tmpbuf);
 801     mh_xfree(output);
 802
 803     return errflag > 0;
 804 }
 805
 806 /*
 807  * Scan a string, check for characters that need to be encoded
 808  */
 809
 810 static int
 811 scanstring(const char *string, int *asciilen, int *eightbitchars,
 812            int *specialchars)
 813 {
 814     *asciilen = 0;
 815     *eightbitchars = 0;
 816     *specialchars = 0;
 817
 818     for (; *string != '\0'; string++) {
 819         if ((isascii((unsigned char) *string))) {
 820             (*asciilen)++;
 821             /*
 822              * So, a space is not a valid phrase character, but we're counting
 823              * an exception here, because in q-p a space can be directly
 824              * encoded as an underscore.
 825              */
 826             if (!qphrasevalid((unsigned char) *string) && *string != ' ')
 827                 (*specialchars)++;
 828         } else {
 829             (*eightbitchars)++;
 830         }
 831     }
 832
 833     return *eightbitchars > 0;
 834 }
 835
 836 /*
 837  * This function is to be used to decide which encoding algorithm we should
 838  * use if one is not given.  Basically, we pick whichever one is the shorter
 839  * of the two.
 840  *
 841  * Arguments are:
 842  *
 843  * ascii        - Number of ASCII characters in to-be-encoded string.
 844  * specials     - Number of ASCII characters in to-be-encoded string that
 845  *                still require encoding under quoted-printable.  Note that
 846  *                these are included in the "ascii" total.
 847  * eightbit     - Eight-bit characters in the to-be-encoded string.
 848  *
 849  * Returns one of CE_BASE64 or CE_QUOTED.
 850  */
 851
 852 static int
 853 pref_encoding(int ascii, int specials, int eightbits)
 854 {
 855     /*
 856      * The length of the q-p encoding is:
 857      *
 858      * ascii - specials + (specials + eightbits) * 3.
 859      *
 860      * The length of the base64 encoding is:
 861      *
 862      * base64len(ascii + eightbits)     (See macro for details)
 863      */
 864
 865     return base64len(ascii + eightbits) < (ascii - specials +
 866                         (specials + eightbits) * 3) ? CE_BASE64 : CE_QUOTED;
 867 }