diplodocus.org Git - nmh/blob - sbr/encode_rfc2047.c

   1 /*
   2  * Routines to encode message headers using RFC 2047-encoding.
   3  *
   4  * This code is Copyright (c) 2002, by the authors of nmh.  See the
   5  * COPYRIGHT file in the root directory of the nmh distribution for
   6  * complete copyright information.
   7  */
   8
   9 #include <h/mh.h>
  10 #include <h/mhparse.h>
  11 #include <h/addrsbr.h>
  12 #include <h/utils.h>
  13
  14 /*
  15  * List of headers that contain addresses and as a result require special
  16  * handling
  17  */
  18
  19 static char *address_headers[] = {
  20     "To",
  21     "From",
  22     "cc",
  23     "Bcc",
  24     "Reply-To",
  25     "Sender",
  26     "Resent-To",
  27     "Resent-From",
  28     "Resent-cc",
  29     "Resent-Bcc",
  30     "Resent-Reply-To",
  31     "Resent-Sender",
  32     NULL,
  33 };
  34
  35 /*
  36  * Macros we use for parsing headers
  37  */
  38
  39 #define is_fws(c) (c == '\t' || c == ' ' || c == '\n')
  40
  41 #define qphrasevalid(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || \
  42                          (c >= 'a' && c <= 'z') || \
  43                          c == '!' || c == '*' || c == '+' || c == '-' || \
  44                          c == '/' || c == '=' || c == '_')
  45 #define qpspecial(c) (c < ' ' || c == '=' || c == '?' || c == '_')
  46
  47 #define base64len(n) ((((n) + 2) / 3 ) * 4)     /* String len to base64 len */
  48 #define strbase64(n) ((n) / 4 * 3)              /* Chars that fit in base64 */
  49
  50 #define ENCODELINELIMIT 76
  51
  52 static void unfold_header(char **, int);
  53 static int field_encode_address(const char *, char **, int, const char *);
  54 static int field_encode_quoted(const char *, char **, const char *, int,
  55                                int, int);
  56 static int field_encode_base64(const char *, char **, const char *);
  57 static int scanstring(const char *, int *, int *, int *);
  58 static int utf8len(const char *);
  59 static int pref_encoding(int, int, int);
  60
  61 /*
  62  * Encode a message header using RFC 2047 encoding.  We make the assumption
  63  * that all characters < 128 are ASCII and as a consequence don't need any
  64  * encoding.
  65  */
  66
  67 int
  68 encode_rfc2047(const char *name, char **value, int encoding,
  69                const char *charset)
  70 {
  71     int i, asciicount = 0, eightbitcount = 0, qpspecialcount = 0;
  72     char *p;
  73
  74     /*
  75      * First, check to see if we even need to encode the header
  76      */
  77
  78     for (p = *value; *p != '\0'; p++) {
  79         if (isascii((unsigned char) *p)) {
  80             asciicount++;
  81             if (qpspecial((unsigned char) *p))
  82                 qpspecialcount++;
  83         } else
  84             eightbitcount++;
  85     }
  86
  87     if (eightbitcount == 0)
  88         return 0;
  89
  90     /*
  91      * Some rules from RFC 2047:
  92      *
  93      * - Encoded words cannot be more than 75 characters long
  94      * - Multiple "long" encoded words must be on new lines.
  95      *
  96      * Also, we're not permitted to encode email addresses, so
  97      * we need to actually _parse_ email addresses and only encode
  98      * the right bits.
  99      */
 100
 101     /*
 102      * If charset was NULL, then get the value from the locale.  But
 103      * we reject it if it returns US-ASCII
 104      */
 105
 106     if (charset == NULL)
 107         charset = write_charset_8bit();
 108
 109     if (strcasecmp(charset, "US-ASCII") == 0) {
 110         advise(NULL, "Cannot use US-ASCII with 8 bit characters in header");
 111         return 1;
 112     }
 113
 114     /*
 115      * If we have an address header, then we need to parse the addresses
 116      * and only encode the names or comments.  Otherwise, handle it normally.
 117      */
 118
 119     for (i = 0; address_headers[i]; i++) {
 120         if (strcasecmp(name, address_headers[i]) == 0)
 121             return field_encode_address(name, value, encoding, charset);
 122     }
 123
 124     /*
 125      * On the encoding we choose, and the specifics of encoding:
 126      *
 127      * - If a specified encoding is passed in, we use that.
 128      * - Otherwise, pick which encoding is shorter.
 129      *
 130      * We don't quite handle continuation right here, but it should be
 131      * pretty close.
 132      */
 133
 134     if (encoding == CE_UNKNOWN)
 135         encoding = pref_encoding(asciicount, qpspecialcount, eightbitcount);
 136
 137     unfold_header(value, asciicount + eightbitcount);
 138
 139     switch (encoding) {
 140
 141     case CE_BASE64:
 142         return field_encode_base64(name, value, charset);
 143
 144     case CE_QUOTED:
 145         return field_encode_quoted(name, value, charset, asciicount,
 146                                    eightbitcount + qpspecialcount, 0);
 147
 148     default:
 149         advise(NULL, "Internal error: unknown RFC-2047 encoding type");
 150         return 1;
 151     }
 152 }
 153
 154 /*
 155  * Encode our specified header (or field) using quoted-printable
 156  */
 157
 158 static int
 159 field_encode_quoted(const char *name, char **value, const char *charset,
 160                     int ascii, int encoded, int phraserules)
 161 {
 162     int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column, newline = 1;
 163     int charsetlen = strlen(charset), utf8;
 164     char *output = NULL, *p, *q = NULL;
 165
 166     /*
 167      * Right now we just encode the whole thing.  Maybe later on we'll
 168      * only encode things on a per-atom basis.
 169      */
 170
 171     p = *value;
 172
 173     column = prefixlen + 2;     /* Header name plus ": " */
 174
 175     utf8 = strcasecmp(charset, "UTF-8") == 0;
 176
 177     while (*p != '\0') {
 178         /*
 179          * Start a new line, if it's time
 180          */
 181         if (newline) {
 182             /*
 183              * If it's the start of the header, we don't need to pad it
 184              *
 185              * The length of the output string is ...
 186              * =?charset?Q?...?=  so that's 7+strlen(charset) + 2 for \n NUL
 187              *
 188              * plus 1 for every ASCII character and 3 for every eight bit
 189              * or special character (eight bit characters are written as =XX).
 190              *
 191              */
 192
 193             int tokenlen;
 194
 195             outlen += 9 + charsetlen + ascii + 3 * encoded;
 196
 197             /*
 198              * If output is set, then we're continuing the header.  Otherwise
 199              * do the initial allocation.
 200              */
 201
 202             if (output) {
 203                 int curlen = q - output, i;
 204                 outlen += prefixlen + 1;        /* Header plus \n ": " */
 205                 output = mh_xrealloc(output, outlen);
 206                 q = output + curlen;
 207                 *q++ = '?';
 208                 *q++ = '=';
 209                 *q++ = '\n';
 210                 for (i = 0; i < prefixlen; i++)
 211                     *q++ = ' ';
 212             } else {
 213                 /*
 214                  * A bit of a hack here; the header can contain multiple
 215                  * spaces (probably at least one) until we get to the
 216                  * actual text.  Copy until we get to a non-space.
 217                  */
 218                 output = mh_xmalloc(outlen);
 219                 q = output;
 220                 while (is_fws(*p))
 221                     *q++ = *p++;
 222             }
 223
 224             tokenlen = snprintf(q, outlen - (q - output), "=?%s?Q?", charset);
 225             q += tokenlen;
 226             column = prefixlen + tokenlen;
 227             newline = 0;
 228         }
 229
 230         /*
 231          * Process each character, encoding if necessary
 232          *
 233          * Note that we have a different set of rules if we're processing
 234          * RFC 5322 'phrase' (something you'd see in an address header).
 235          */
 236
 237         column++;
 238
 239         if (*p == ' ') {
 240             *q++ = '_';
 241             ascii--;
 242         } else if (isascii((unsigned char) *p) &&
 243                    (phraserules ? qphrasevalid((unsigned char) *p) :
 244                                         !qpspecial((unsigned char) *p))) {
 245             *q++ = *p;
 246             ascii--;
 247         } else {
 248             snprintf(q, outlen - (q - output), "=%02X", (unsigned char) *p);
 249             q += 3;
 250             column += 2;        /* column already incremented by 1 above */
 251             encoded--;
 252         }
 253
 254         p++;
 255
 256         /*
 257          * We're not allowed more than ENCODELINELIMIT characters per line,
 258          * so reserve some room for the final ?=.
 259          *
 260          * If prefixlen == 0, we haven't been passed in a header name, so
 261          * don't ever wrap the field (we're likely doing an address).
 262          */
 263
 264         if (prefixlen == 0)
 265             continue;
 266
 267         if (column >= ENCODELINELIMIT - 2) {
 268             newline = 1;
 269         } else if (utf8) {
 270             /*
 271              * Okay, this is a bit weird, but to explain a bit more ...
 272              *
 273              * RFC 2047 prohibits the splitting of multibyte characters
 274              * across encoded words.  Right now we only handle the case
 275              * of UTF-8, the most common multibyte encoding.
 276              *
 277              * p is now pointing at the next input character.  If we're
 278              * using UTF-8 _and_ we'd go over ENCODELINELIMIT given the
 279              * length of the complete character, then trigger a newline
 280              * now.  Note that we check the length * 3 since we have to
 281              * allow for the encoded output.
 282              */
 283             if (column + (utf8len(p) * 3) > ENCODELINELIMIT - 2) {
 284                 newline = 1;
 285             }
 286         }
 287     }
 288
 289     if (q == NULL) {
 290         /* This should never happen, but just in case.  Found by
 291            clang static analyzer. */
 292         admonish (NULL, "null output encoding for %s", *value);
 293         return 1;
 294     }
 295     *q++ = '?';
 296     *q++ = '=';
 297
 298     if (prefixlen)
 299         *q++ = '\n';
 300
 301     *q = '\0';
 302
 303     free(*value);
 304
 305     *value = output;
 306
 307     return 0;
 308 }
 309
 310 /*
 311  * Encode our specified header (or field) using base64.
 312  *
 313  * This is a little easier since every character gets encoded, we can
 314  * calculate the line wrap up front.
 315  */
 316
 317 static int
 318 field_encode_base64(const char *name, char **value, const char *charset)
 319 {
 320     int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset);
 321     int outlen = 0, numencode, curlen;
 322     char *output = NULL, *p = *value, *q = NULL, *linestart = NULL;
 323
 324     /*
 325      * Skip over any leading white space.
 326      */
 327
 328     while (*p == ' ' || *p == '\t')
 329         p++;
 330
 331     /*
 332      * If we had a zero-length prefix, then just encode the whole field
 333      * as-is, without line wrapping.  Note that in addition to the encoding
 334      *
 335      * The added length we need is =? + charset + ?B? ... ?=
 336      *
 337      * That's 7 + strlen(charset) + 2 (for \n NUL).
 338      */
 339
 340     while (prefixlen && ((base64len(strlen(p)) + 7 + charsetlen +
 341                           prefixlen) > ENCODELINELIMIT)) {
 342
 343         /*
 344          * Our very first time, don't pad the line in the front
 345          *
 346          * Note ENCODELINELIMIT is + 2 because of \n \0
 347          */
 348
 349
 350         if (! output) {
 351             outlen += ENCODELINELIMIT + 2;
 352             output = q = mh_xmalloc(outlen);
 353             linestart = q - prefixlen;  /* Yes, this is intentional */
 354         } else {
 355             int curstart = linestart - output;
 356             curlen = q - output;
 357
 358             outlen += ENCODELINELIMIT + 2;
 359             output = mh_xrealloc(output, outlen);
 360             q = output + curlen;
 361             linestart = output + curstart;
 362         }
 363
 364         /*
 365          * We should have enough space now, so prepend the encoding markers
 366          * and character set information.  The leading space is intentional.
 367          */
 368
 369         q += snprintf(q, outlen - (q - output), " =?%s?B?", charset);
 370
 371         /*
 372          * Find out how much room we have left on the line and see how
 373          * many characters we can stuff in.  The start of our line
 374          * is marked by "linestart", so use that to figure out how
 375          * many characters are left out of ENCODELINELIMIT.  Reserve
 376          * 2 characters for the end markers and calculate how many
 377          * characters we can fit into that space given the base64
 378          * encoding expansion.
 379          */
 380
 381         numencode = strbase64(ENCODELINELIMIT - (q - linestart) - 2);
 382
 383         if (numencode <= 0) {
 384             advise(NULL, "Internal error: tried to encode %d characters "
 385                    "in base64", numencode);
 386             return 1;
 387         }
 388
 389         /*
 390          * RFC 2047 prohibits spanning multibyte characters across tokens.
 391          * Right now we only check for UTF-8.
 392          *
 393          * So note the key here ... we want to make sure the character BEYOND
 394          * our last character is not a continuation byte.  If it's the start
 395          * of a new multibyte character or a single-byte character, that's ok.
 396          */
 397
 398         if (strcasecmp(charset, "UTF-8") == 0) {
 399             /*
 400              * p points to the start of our current buffer, so p + numencode
 401              * is one past the last character to encode
 402              */
 403
 404             while (numencode > 0 && ((*(p + numencode) & 0xc0) == 0x80))
 405                 numencode--;
 406
 407             if (numencode == 0) {
 408                 advise(NULL, "Internal error: could not find start of "
 409                        "UTF-8 character when base64 encoding header");
 410                 return 1;
 411             }
 412         }
 413
 414         if (writeBase64raw((unsigned char *) p, numencode,
 415                            (unsigned char *) q) != OK) {
 416             advise(NULL, "Internal error: base64 encoding of header failed");
 417             return 1;
 418         }
 419
 420         p += numencode;
 421         q += base64len(numencode);
 422
 423         /*
 424          * This will point us at the beginning of the new line (trust me).
 425          */
 426
 427         linestart = q + 3;
 428
 429         /*
 430          * What's going on here?  Well, we know we're continuing to the next
 431          * line, so we want to add continuation padding.  We also add the
 432          * trailing marker for the RFC 2047 token at this time as well.
 433          * This uses a trick of snprintf(); we tell it to print a zero-length
 434          * string, but pad it out to prefixlen - 1 characters; that ends
 435          * up always printing out the requested number of spaces.  We use
 436          * prefixlen - 1 because we always add a space on the starting
 437          * token marker; this makes things work out correctly for the first
 438          * line, which should have a space between the ':' and the start
 439          * of the token.
 440          *
 441          * It's okay if you don't follow all of that.
 442          */
 443
 444         q += snprintf(q, outlen - (q - output), "?=\n%*s", prefixlen - 1, "");
 445     }
 446
 447     /*
 448      * We're here if there is either no prefix, or we can fit it in less
 449      * than ENCODELINELIMIT characters.  Encode the whole thing.
 450      */
 451
 452     outlen += prefixlen + 9 + charsetlen + base64len(strlen(p));
 453     curlen = q - output;
 454
 455     output = mh_xrealloc(output, outlen);
 456     q = output + curlen;
 457
 458     q += snprintf(q, outlen - (q - output), "%s=?%s?B?",
 459                   prefixlen ? " " : "", charset);
 460
 461     if (writeBase64raw((unsigned char *) p, strlen(p),
 462                        (unsigned char *) q) != OK) {
 463         advise(NULL, "Internal error: base64 encoding of header failed");
 464         return 1;
 465     }
 466
 467     strcat(q, "?=");
 468
 469     if (prefixlen)
 470         strcat(q, "\n");
 471
 472     free(*value);
 473
 474     *value = output;
 475
 476     return 0;
 477 }
 478
 479 /*
 480  * Calculate the length of a UTF-8 character.
 481  *
 482  * If it's not a UTF-8 character (or we're in the middle of a multibyte
 483  * character) then simply return 0.
 484  */
 485
 486 static int
 487 utf8len(const char *p)
 488 {
 489     int len = 1;
 490
 491     if (*p == '\0')
 492         return 0;
 493
 494     if (isascii((unsigned char) *p) || (((unsigned char) *p) & 0xc0) == 0x80)
 495         return 0;
 496
 497     p++;
 498     while ((((unsigned char) *p++) & 0xc0) == 0x80)
 499         len++;
 500
 501     return len;
 502 }
 503
 504 /*
 505  * "Unfold" a header, making it a single line (without continuation)
 506  *
 507  * We cheat a bit here; we never make the string longer, so using the
 508  * original length here is fine.
 509  */
 510
 511 static void
 512 unfold_header(char **value, int len)
 513 {
 514     char *str = mh_xmalloc(len + 1);
 515     char *p = str, *q = *value;
 516
 517     while (*q != '\0') {
 518         if (*q == '\n') {
 519             /*
 520              * When we get a newline, skip to the next non-whitespace
 521              * character and add a space to replace all of the whitespace
 522              *
 523              * This has the side effect of stripping off the final newline
 524              * for the header; we put it back in the encoding routine.
 525              */
 526             while (is_fws(*q))
 527                 q++;
 528             if (*q == '\0')
 529                 break;
 530
 531             *p++ = ' ';
 532         } else {
 533             *p++ = *q++;
 534         }
 535     }
 536
 537     *p = '\0';
 538
 539     free(*value);
 540     *value = str;
 541 }
 542
 543 /*
 544  * Decode a header containing addresses.  This means we have to parse
 545  * each address and only encode the display-name or comment field.
 546  */
 547
 548 static int
 549 field_encode_address(const char *name, char **value, int encoding,
 550                      const char *charset)
 551 {
 552     int prefixlen = strlen(name) + 2, column = prefixlen, groupflag;
 553     int asciichars, specialchars, eightbitchars, reformat = 0, errflag = 0;
 554     size_t len;
 555     char *mp, *cp = NULL, *output = NULL;
 556     char *tmpbuf = NULL;
 557     size_t tmpbufsize = 0;
 558     struct mailname *mn;
 559     char errbuf[BUFSIZ];
 560
 561     /*
 562      * Because these are addresses, we need to handle them individually.
 563      *
 564      * Break them down and process them one by one.  This means we have to
 565      * rewrite the whole header, but that's unavoidable.
 566      */
 567
 568     /*
 569      * The output headers always have to start with a space first; this
 570      * is just the way the API works right now.
 571      */
 572
 573     output = add(" ", output);
 574
 575     for (groupflag = 0; (mp = getname(*value)); ) {
 576         if ((mn = getm(mp, NULL, 0, errbuf, sizeof(errbuf))) == NULL) {
 577             advise(NULL, "%s: %s", errbuf, mp);
 578             errflag++;
 579             continue;
 580         }
 581
 582         reformat = 0;
 583
 584         /*
 585          * We only care if the phrase (m_pers) or any trailing comment
 586          * (m_note) have 8-bit characters.  If doing q-p, we also need
 587          * to encode anything marked as qspecial().  Unquote it first
 588          * so the specialchars count is right.
 589          */
 590
 591         if (! mn->m_pers)
 592             goto check_note;
 593
 594         if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) {
 595             tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
 596         }
 597
 598         unquote_string(mn->m_pers, tmpbuf);
 599
 600         if (scanstring(tmpbuf, &asciichars, &eightbitchars,
 601                        &specialchars)) {
 602             /*
 603              * If we have 8-bit characters, encode it.
 604              */
 605
 606             if (encoding == CE_UNKNOWN)
 607                 encoding = pref_encoding(asciichars, specialchars,
 608                                          eightbitchars);
 609
 610             /*
 611              * This is okay, because the output of unquote_string will be either
 612              * equal or shorter than the original.
 613              */
 614
 615             strcpy(mn->m_pers, tmpbuf);
 616
 617             switch (encoding) {
 618
 619             case CE_BASE64:
 620                 if (field_encode_base64(NULL, &mn->m_pers, charset)) {
 621                     errflag++;
 622                     goto out;
 623                 }
 624                 break;
 625
 626             case CE_QUOTED:
 627                 if (field_encode_quoted(NULL, &mn->m_pers, charset, asciichars,
 628                                         eightbitchars + specialchars, 1)) {
 629                     errflag++;
 630                     goto out;
 631                 }
 632                 break;
 633
 634             default:
 635                 advise(NULL, "Internal error: unknown RFC-2047 encoding type");
 636                 errflag++;
 637                 goto out;
 638             }
 639
 640             reformat++;
 641         }
 642
 643         check_note:
 644
 645         /*
 646          * The "note" field is generally a comment at the end of the address,
 647          * at least as how it's implemented here.  Notes are always surrounded
 648          * by parenthesis (since they're comments).  Strip them out and
 649          * then put them back when we format the final field, but they do
 650          * not get encoded.
 651          */
 652
 653         if (! mn->m_note)
 654             goto do_reformat;
 655
 656         if ((len = strlen(mn->m_note)) + 1 > tmpbufsize) {
 657             tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
 658         }
 659
 660         if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') {
 661             advise(NULL, "Internal error: Invalid note field \"%s\"",
 662                    mn->m_note);
 663             errflag++;
 664             goto out;
 665         }
 666
 667         strncpy(tmpbuf, mn->m_note + 1, len - 1);
 668         tmpbuf[len - 2] = '\0';
 669
 670         if (scanstring(tmpbuf, &asciichars, &eightbitchars,
 671                        &specialchars)) {
 672             /*
 673              * If we have 8-bit characters, encode it.
 674              */
 675
 676             if (encoding == CE_UNKNOWN)
 677                 encoding = pref_encoding(asciichars, specialchars,
 678                                          eightbitchars);
 679
 680             switch (encoding) {
 681
 682             case CE_BASE64:
 683                 if (field_encode_base64(NULL, &tmpbuf, charset)) {
 684                     errflag++;
 685                     goto out;
 686                 }
 687                 break;
 688
 689             case CE_QUOTED:
 690                 if (field_encode_quoted(NULL, &tmpbuf, charset, asciichars,
 691                                         eightbitchars + specialchars, 1)) {
 692                     errflag++;
 693                     goto out;
 694                 }
 695                 break;
 696
 697             default:
 698                 advise(NULL, "Internal error: unknown RFC-2047 encoding type");
 699                 errflag++;
 700                 goto out;
 701             }
 702
 703             reformat++;
 704
 705             /*
 706              * Make sure the size of tmpbuf is correct (it always gets
 707              * reallocated in the above functions).
 708              */
 709
 710             tmpbufsize = strlen(tmpbuf) + 1;
 711
 712             /*
 713              * Put the note field back surrounded by parenthesis.
 714              */
 715
 716             mn->m_note = mh_xrealloc(mn->m_note, tmpbufsize + 2);
 717
 718             snprintf(mn->m_note, tmpbufsize + 2, "(%s)", tmpbuf);
 719         }
 720
 721 do_reformat:
 722
 723         /*
 724          * So, some explanation is in order.
 725          *
 726          * We know we need to rewrite at least one address in the header,
 727          * otherwise we wouldn't be here.  If we had to reformat this
 728          * particular address, then run it through adrformat().  Otherwise
 729          * we can use m_text directly.
 730          */
 731
 732         /*
 733          * If we were in a group but are no longer, make sure we add a
 734          * semicolon (which needs to be FIRST, as it needs to be at the end
 735          * of the last address).
 736          */
 737
 738         if (groupflag && ! mn->m_ingrp) {
 739             output = add(";", output);
 740             column += 1;
 741         }
 742
 743         groupflag = mn->m_ingrp;
 744
 745         if (mn->m_gname) {
 746             cp = mh_xstrdup(mn->m_gname);
 747         }
 748
 749         if (reformat) {
 750             cp = add(adrformat(mn), cp);
 751         } else {
 752             cp = add(mn->m_text, cp);
 753         }
 754
 755         len = strlen(cp);
 756
 757         /*
 758          * If we're not at the beginning of the line, add a command and
 759          * either a space or a newline.
 760          */
 761
 762         if (column != prefixlen) {
 763             if (len + column + 2 > OUTPUTLINELEN) {
 764
 765                 if ((size_t) (prefixlen + 3) < tmpbufsize)
 766                     tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = prefixlen + 3);
 767
 768                 snprintf(tmpbuf, tmpbufsize, ",\n%*s", column = prefixlen, "");
 769                 output = add(tmpbuf, output);
 770             } else {
 771                 output = add(", ", output);
 772                 column += 2;
 773             }
 774         }
 775
 776         /*
 777          * Finally add the address
 778          */
 779
 780         output = add(cp, output);
 781         column += len;
 782         free(cp);
 783         cp = NULL;
 784     }
 785
 786     /*
 787      * Just in case we're at the end of a list
 788      */
 789
 790     if (groupflag) {
 791         output = add(";", output);
 792     }
 793
 794     output = add("\n", output);
 795
 796     free(*value);
 797     *value = output;
 798     output = NULL;
 799
 800 out:
 801     mh_xfree(tmpbuf);
 802     mh_xfree(output);
 803
 804     return errflag > 0;
 805 }
 806
 807 /*
 808  * Scan a string, check for characters that need to be encoded
 809  */
 810
 811 static int
 812 scanstring(const char *string, int *asciilen, int *eightbitchars,
 813            int *specialchars)
 814 {
 815     *asciilen = 0;
 816     *eightbitchars = 0;
 817     *specialchars = 0;
 818
 819     for (; *string != '\0'; string++) {
 820         if ((isascii((unsigned char) *string))) {
 821             (*asciilen)++;
 822             /*
 823              * So, a space is not a valid phrase character, but we're counting
 824              * an exception here, because in q-p a space can be directly
 825              * encoded as an underscore.
 826              */
 827             if (!qphrasevalid((unsigned char) *string) && *string != ' ')
 828                 (*specialchars)++;
 829         } else {
 830             (*eightbitchars)++;
 831         }
 832     }
 833
 834     return *eightbitchars > 0;
 835 }
 836
 837 /*
 838  * This function is to be used to decide which encoding algorithm we should
 839  * use if one is not given.  Basically, we pick whichever one is the shorter
 840  * of the two.
 841  *
 842  * Arguments are:
 843  *
 844  * ascii        - Number of ASCII characters in to-be-encoded string.
 845  * specials     - Number of ASCII characters in to-be-encoded string that
 846  *                still require encoding under quoted-printable.  Note that
 847  *                these are included in the "ascii" total.
 848  * eightbit     - Eight-bit characters in the to-be-encoded string.
 849  *
 850  * Returns one of CE_BASE64 or CE_QUOTED.
 851  */
 852
 853 static int
 854 pref_encoding(int ascii, int specials, int eightbits)
 855 {
 856     /*
 857      * The length of the q-p encoding is:
 858      *
 859      * ascii - specials + (specials + eightbits) * 3.
 860      *
 861      * The length of the base64 encoding is:
 862      *
 863      * base64len(ascii + eightbits)     (See macro for details)
 864      */
 865
 866     return base64len(ascii + eightbits) < (ascii - specials +
 867                         (specials + eightbits) * 3) ? CE_BASE64 : CE_QUOTED;
 868 }