diplodocus.org Git - nmh/blob - sbr/encode_rfc2047.c

   1 /* encode_rfc2047.c -- encode message headers using RFC 2047 encoding.
   2  *
   3  * This code is Copyright (c) 2002, by the authors of nmh.  See the
   4  * COPYRIGHT file in the root directory of the nmh distribution for
   5  * complete copyright information.
   6  */
   7
   8 #include <h/mh.h>
   9 #include <h/mhparse.h>
  10 #include <h/addrsbr.h>
  11 #include <h/utils.h>
  12 #include "base64.h"
  13 #include "unquote.h"
  14
  15 /*
  16  * List of headers that contain addresses and as a result require special
  17  * handling
  18  */
  19
  20 static char *address_headers[] = {
  21     "To",
  22     "From",
  23     "cc",
  24     "Bcc",
  25     "Reply-To",
  26     "Sender",
  27     "Resent-To",
  28     "Resent-From",
  29     "Resent-cc",
  30     "Resent-Bcc",
  31     "Resent-Reply-To",
  32     "Resent-Sender",
  33     NULL,
  34 };
  35
  36 /*
  37  * Macros we use for parsing headers
  38  */
  39
  40 #define is_fws(c) (c == '\t' || c == ' ' || c == '\n')
  41
  42 #define qphrasevalid(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || \
  43                          (c >= 'a' && c <= 'z') || \
  44                          c == '!' || c == '*' || c == '+' || c == '-' || \
  45                          c == '/' || c == '=' || c == '_')
  46 #define qpspecial(c) (c < ' ' || c == '=' || c == '?' || c == '_')
  47
  48 #define base64len(n) ((((n) + 2) / 3 ) * 4)     /* String len to base64 len */
  49 #define strbase64(n) ((n) / 4 * 3)              /* Chars that fit in base64 */
  50
  51 #define ENCODELINELIMIT 76
  52
  53 static void unfold_header(char **, int);
  54 static int field_encode_address(const char *, char **, int, const char *);
  55 static int field_encode_quoted(const char *, char **, const char *, int,
  56                                int, int);
  57 static int field_encode_base64(const char *, char **, const char *);
  58 static int scanstring(const char *, int *, int *, int *);
  59 static int utf8len(const char *);
  60 static int pref_encoding(int, int, int);
  61
  62 /*
  63  * Encode a message header using RFC 2047 encoding.  We make the assumption
  64  * that all characters < 128 are ASCII and as a consequence don't need any
  65  * encoding.
  66  */
  67
  68 int
  69 encode_rfc2047(const char *name, char **value, int encoding,
  70                const char *charset)
  71 {
  72     int i, asciicount = 0, eightbitcount = 0, qpspecialcount = 0;
  73     char *p;
  74
  75     /*
  76      * First, check to see if we even need to encode the header
  77      */
  78
  79     for (p = *value; *p != '\0'; p++) {
  80         if (isascii((unsigned char) *p)) {
  81             asciicount++;
  82             if (qpspecial((unsigned char) *p))
  83                 qpspecialcount++;
  84         } else
  85             eightbitcount++;
  86     }
  87
  88     if (eightbitcount == 0)
  89         return 0;
  90
  91     /*
  92      * Some rules from RFC 2047:
  93      *
  94      * - Encoded words cannot be more than 75 characters long
  95      * - Multiple "long" encoded words must be on new lines.
  96      *
  97      * Also, we're not permitted to encode email addresses, so
  98      * we need to actually _parse_ email addresses and only encode
  99      * the right bits.
 100      */
 101
 102     /*
 103      * If charset was NULL, then get the value from the locale.  But
 104      * we reject it if it returns US-ASCII
 105      */
 106
 107     if (charset == NULL)
 108         charset = write_charset_8bit();
 109
 110     if (strcasecmp(charset, "US-ASCII") == 0) {
 111         inform("Cannot use US-ASCII with 8 bit characters in header");
 112         return 1;
 113     }
 114
 115     /*
 116      * If we have an address header, then we need to parse the addresses
 117      * and only encode the names or comments.  Otherwise, handle it normally.
 118      */
 119
 120     for (i = 0; address_headers[i]; i++) {
 121         if (strcasecmp(name, address_headers[i]) == 0)
 122             return field_encode_address(name, value, encoding, charset);
 123     }
 124
 125     /*
 126      * On the encoding we choose, and the specifics of encoding:
 127      *
 128      * - If a specified encoding is passed in, we use that.
 129      * - Otherwise, pick which encoding is shorter.
 130      *
 131      * We don't quite handle continuation right here, but it should be
 132      * pretty close.
 133      */
 134
 135     if (encoding == CE_UNKNOWN)
 136         encoding = pref_encoding(asciicount, qpspecialcount, eightbitcount);
 137
 138     unfold_header(value, asciicount + eightbitcount);
 139
 140     switch (encoding) {
 141
 142     case CE_BASE64:
 143         return field_encode_base64(name, value, charset);
 144
 145     case CE_QUOTED:
 146         return field_encode_quoted(name, value, charset, asciicount,
 147                                    eightbitcount + qpspecialcount, 0);
 148
 149     default:
 150         inform("Internal error: unknown RFC-2047 encoding type");
 151         return 1;
 152     }
 153 }
 154
 155 /*
 156  * Encode our specified header (or field) using quoted-printable
 157  */
 158
 159 static int
 160 field_encode_quoted(const char *name, char **value, const char *charset,
 161                     int ascii, int encoded, int phraserules)
 162 {
 163     int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column, newline = 1;
 164     int charsetlen = strlen(charset), utf8;
 165     char *output = NULL, *p, *q = NULL;
 166
 167     /*
 168      * Right now we just encode the whole thing.  Maybe later on we'll
 169      * only encode things on a per-atom basis.
 170      */
 171
 172     p = *value;
 173
 174     column = prefixlen + 2;     /* Header name plus ": " */
 175
 176     utf8 = strcasecmp(charset, "UTF-8") == 0;
 177
 178     while (*p != '\0') {
 179         /*
 180          * Start a new line, if it's time
 181          */
 182         if (newline) {
 183             /*
 184              * If it's the start of the header, we don't need to pad it
 185              *
 186              * The length of the output string is ...
 187              * =?charset?Q?...?=  so that's 7+strlen(charset) + 2 for \n NUL
 188              *
 189              * plus 1 for every ASCII character and 3 for every eight bit
 190              * or special character (eight bit characters are written as =XX).
 191              *
 192              */
 193
 194             int tokenlen;
 195
 196             outlen += 9 + charsetlen + ascii + 3 * encoded;
 197
 198             /*
 199              * If output is set, then we're continuing the header.  Otherwise
 200              * do the initial allocation.
 201              */
 202
 203             if (output) {
 204                 int curlen = q - output, i;
 205                 outlen += prefixlen + 1;        /* Header plus \n ": " */
 206                 output = mh_xrealloc(output, outlen);
 207                 q = output + curlen;
 208                 *q++ = '?';
 209                 *q++ = '=';
 210                 *q++ = '\n';
 211                 for (i = 0; i < prefixlen; i++)
 212                     *q++ = ' ';
 213             } else {
 214                 /*
 215                  * A bit of a hack here; the header can contain multiple
 216                  * spaces (probably at least one) until we get to the
 217                  * actual text.  Copy until we get to a non-space.
 218                  */
 219                 output = mh_xmalloc(outlen);
 220                 q = output;
 221                 while (is_fws(*p))
 222                     *q++ = *p++;
 223             }
 224
 225             tokenlen = snprintf(q, outlen - (q - output), "=?%s?Q?", charset);
 226             q += tokenlen;
 227             column = prefixlen + tokenlen;
 228             newline = 0;
 229         }
 230
 231         /*
 232          * Process each character, encoding if necessary
 233          *
 234          * Note that we have a different set of rules if we're processing
 235          * RFC 5322 'phrase' (something you'd see in an address header).
 236          */
 237
 238         column++;
 239
 240         if (*p == ' ') {
 241             *q++ = '_';
 242             ascii--;
 243         } else if (isascii((unsigned char) *p) &&
 244                    (phraserules ? qphrasevalid((unsigned char) *p) :
 245                                         !qpspecial((unsigned char) *p))) {
 246             *q++ = *p;
 247             ascii--;
 248         } else {
 249             snprintf(q, outlen - (q - output), "=%02X", (unsigned char) *p);
 250             q += 3;
 251             column += 2;        /* column already incremented by 1 above */
 252             encoded--;
 253         }
 254
 255         p++;
 256
 257         /*
 258          * We're not allowed more than ENCODELINELIMIT characters per line,
 259          * so reserve some room for the final ?=.
 260          *
 261          * If prefixlen == 0, we haven't been passed in a header name, so
 262          * don't ever wrap the field (we're likely doing an address).
 263          */
 264
 265         if (prefixlen == 0)
 266             continue;
 267
 268         if (column >= ENCODELINELIMIT - 2) {
 269             newline = 1;
 270         } else if (utf8) {
 271             /*
 272              * Okay, this is a bit weird, but to explain a bit more ...
 273              *
 274              * RFC 2047 prohibits the splitting of multibyte characters
 275              * across encoded words.  Right now we only handle the case
 276              * of UTF-8, the most common multibyte encoding.
 277              *
 278              * p is now pointing at the next input character.  If we're
 279              * using UTF-8 _and_ we'd go over ENCODELINELIMIT given the
 280              * length of the complete character, then trigger a newline
 281              * now.  Note that we check the length * 3 since we have to
 282              * allow for the encoded output.
 283              */
 284             if (column + (utf8len(p) * 3) > ENCODELINELIMIT - 2) {
 285                 newline = 1;
 286             }
 287         }
 288     }
 289
 290     if (q == NULL) {
 291         /* This should never happen, but just in case.  Found by
 292            clang static analyzer. */
 293         inform("null output encoding for %s, continuing...", *value);
 294         return 1;
 295     }
 296     *q++ = '?';
 297     *q++ = '=';
 298
 299     if (prefixlen)
 300         *q++ = '\n';
 301
 302     *q = '\0';
 303
 304     free(*value);
 305
 306     *value = output;
 307
 308     return 0;
 309 }
 310
 311 /*
 312  * Encode our specified header (or field) using base64.
 313  *
 314  * This is a little easier since every character gets encoded, we can
 315  * calculate the line wrap up front.
 316  */
 317
 318 static int
 319 field_encode_base64(const char *name, char **value, const char *charset)
 320 {
 321     int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset);
 322     int outlen = 0, numencode, curlen;
 323     char *output = NULL, *p = *value, *q = NULL, *linestart = NULL;
 324
 325     /*
 326      * Skip over any leading white space.
 327      */
 328
 329     while (*p == ' ' || *p == '\t')
 330         p++;
 331
 332     /*
 333      * If we had a zero-length prefix, then just encode the whole field
 334      * as-is, without line wrapping.  Note that in addition to the encoding
 335      *
 336      * The added length we need is =? + charset + ?B? ... ?=
 337      *
 338      * That's 7 + strlen(charset) + 2 (for \n NUL).
 339      */
 340
 341     while (prefixlen && ((base64len(strlen(p)) + 7 + charsetlen +
 342                           prefixlen) > ENCODELINELIMIT)) {
 343
 344         /*
 345          * Our very first time, don't pad the line in the front
 346          *
 347          * Note ENCODELINELIMIT is + 2 because of \n \0
 348          */
 349
 350
 351         if (! output) {
 352             outlen += ENCODELINELIMIT + 2;
 353             output = q = mh_xmalloc(outlen);
 354             linestart = q - prefixlen;  /* Yes, this is intentional */
 355         } else {
 356             int curstart = linestart - output;
 357             curlen = q - output;
 358
 359             outlen += ENCODELINELIMIT + 2;
 360             output = mh_xrealloc(output, outlen);
 361             q = output + curlen;
 362             linestart = output + curstart;
 363         }
 364
 365         /*
 366          * We should have enough space now, so prepend the encoding markers
 367          * and character set information.  The leading space is intentional.
 368          */
 369
 370         q += snprintf(q, outlen - (q - output), " =?%s?B?", charset);
 371
 372         /*
 373          * Find out how much room we have left on the line and see how
 374          * many characters we can stuff in.  The start of our line
 375          * is marked by "linestart", so use that to figure out how
 376          * many characters are left out of ENCODELINELIMIT.  Reserve
 377          * 2 characters for the end markers and calculate how many
 378          * characters we can fit into that space given the base64
 379          * encoding expansion.
 380          */
 381
 382         numencode = strbase64(ENCODELINELIMIT - (q - linestart) - 2);
 383
 384         if (numencode <= 0) {
 385             inform("Internal error: tried to encode %d characters "
 386                    "in base64", numencode);
 387             return 1;
 388         }
 389
 390         /*
 391          * RFC 2047 prohibits spanning multibyte characters across tokens.
 392          * Right now we only check for UTF-8.
 393          *
 394          * So note the key here ... we want to make sure the character BEYOND
 395          * our last character is not a continuation byte.  If it's the start
 396          * of a new multibyte character or a single-byte character, that's ok.
 397          */
 398
 399         if (strcasecmp(charset, "UTF-8") == 0) {
 400             /*
 401              * p points to the start of our current buffer, so p + numencode
 402              * is one past the last character to encode
 403              */
 404
 405             while (numencode > 0 && ((*(p + numencode) & 0xc0) == 0x80))
 406                 numencode--;
 407
 408             if (numencode == 0) {
 409                 inform("Internal error: could not find start of "
 410                        "UTF-8 character when base64 encoding header");
 411                 return 1;
 412             }
 413         }
 414
 415         if (writeBase64raw((unsigned char *) p, numencode,
 416                            (unsigned char *) q) != OK) {
 417             inform("Internal error: base64 encoding of header failed");
 418             return 1;
 419         }
 420
 421         p += numencode;
 422         q += base64len(numencode);
 423
 424         /*
 425          * This will point us at the beginning of the new line (trust me).
 426          */
 427
 428         linestart = q + 3;
 429
 430         /*
 431          * What's going on here?  Well, we know we're continuing to the next
 432          * line, so we want to add continuation padding.  We also add the
 433          * trailing marker for the RFC 2047 token at this time as well.
 434          * This uses a trick of snprintf(); we tell it to print a zero-length
 435          * string, but pad it out to prefixlen - 1 characters; that ends
 436          * up always printing out the requested number of spaces.  We use
 437          * prefixlen - 1 because we always add a space on the starting
 438          * token marker; this makes things work out correctly for the first
 439          * line, which should have a space between the ':' and the start
 440          * of the token.
 441          *
 442          * It's okay if you don't follow all of that.
 443          */
 444
 445         q += snprintf(q, outlen - (q - output), "?=\n%*s", prefixlen - 1, "");
 446     }
 447
 448     /*
 449      * We're here if there is either no prefix, or we can fit it in less
 450      * than ENCODELINELIMIT characters.  Encode the whole thing.
 451      */
 452
 453     outlen += prefixlen + 9 + charsetlen + base64len(strlen(p));
 454     curlen = q - output;
 455
 456     output = mh_xrealloc(output, outlen);
 457     q = output + curlen;
 458
 459     q += snprintf(q, outlen - (q - output), "%s=?%s?B?",
 460                   prefixlen ? " " : "", charset);
 461
 462     if (writeBase64raw((unsigned char *) p, strlen(p),
 463                        (unsigned char *) q) != OK) {
 464         inform("Internal error: base64 encoding of header failed");
 465         return 1;
 466     }
 467
 468     strcat(q, "?=");
 469
 470     if (prefixlen)
 471         strcat(q, "\n");
 472
 473     free(*value);
 474
 475     *value = output;
 476
 477     return 0;
 478 }
 479
 480 /*
 481  * Calculate the length of a UTF-8 character.
 482  *
 483  * If it's not a UTF-8 character (or we're in the middle of a multibyte
 484  * character) then simply return 0.
 485  */
 486
 487 static int
 488 utf8len(const char *p)
 489 {
 490     int len = 1;
 491
 492     if (*p == '\0')
 493         return 0;
 494
 495     if (isascii((unsigned char) *p) || (((unsigned char) *p) & 0xc0) == 0x80)
 496         return 0;
 497
 498     p++;
 499     while ((((unsigned char) *p++) & 0xc0) == 0x80)
 500         len++;
 501
 502     return len;
 503 }
 504
 505 /*
 506  * "Unfold" a header, making it a single line (without continuation)
 507  *
 508  * We cheat a bit here; we never make the string longer, so using the
 509  * original length here is fine.
 510  */
 511
 512 static void
 513 unfold_header(char **value, int len)
 514 {
 515     char *str = mh_xmalloc(len + 1);
 516     char *p = str, *q = *value;
 517
 518     while (*q != '\0') {
 519         if (*q == '\n') {
 520             /*
 521              * When we get a newline, skip to the next non-whitespace
 522              * character and add a space to replace all of the whitespace
 523              *
 524              * This has the side effect of stripping off the final newline
 525              * for the header; we put it back in the encoding routine.
 526              */
 527             while (is_fws(*q))
 528                 q++;
 529             if (*q == '\0')
 530                 break;
 531
 532             *p++ = ' ';
 533         } else {
 534             *p++ = *q++;
 535         }
 536     }
 537
 538     *p = '\0';
 539
 540     free(*value);
 541     *value = str;
 542 }
 543
 544 /*
 545  * Decode a header containing addresses.  This means we have to parse
 546  * each address and only encode the display-name or comment field.
 547  */
 548
 549 static int
 550 field_encode_address(const char *name, char **value, int encoding,
 551                      const char *charset)
 552 {
 553     int prefixlen = strlen(name) + 2, column = prefixlen, groupflag;
 554     int asciichars, specialchars, eightbitchars, reformat = 0, errflag = 0;
 555     size_t len;
 556     char *mp, *cp = NULL, *output = NULL;
 557     char *tmpbuf = NULL;
 558     size_t tmpbufsize = 0;
 559     struct mailname *mn;
 560     char errbuf[BUFSIZ];
 561
 562     /*
 563      * Because these are addresses, we need to handle them individually.
 564      *
 565      * Break them down and process them one by one.  This means we have to
 566      * rewrite the whole header, but that's unavoidable.
 567      */
 568
 569     /*
 570      * The output headers always have to start with a space first; this
 571      * is just the way the API works right now.
 572      */
 573
 574     output = add(" ", output);
 575
 576     for (groupflag = 0; (mp = getname(*value)); ) {
 577         if ((mn = getm(mp, NULL, 0, errbuf, sizeof(errbuf))) == NULL) {
 578             inform("%s: %s", errbuf, mp);
 579             errflag++;
 580             continue;
 581         }
 582
 583         reformat = 0;
 584
 585         /*
 586          * We only care if the phrase (m_pers) or any trailing comment
 587          * (m_note) have 8-bit characters.  If doing q-p, we also need
 588          * to encode anything marked as qspecial().  Unquote it first
 589          * so the specialchars count is right.
 590          */
 591
 592         if (! mn->m_pers)
 593             goto check_note;
 594
 595         if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) {
 596             tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
 597         }
 598
 599         unquote_string(mn->m_pers, tmpbuf);
 600
 601         if (scanstring(tmpbuf, &asciichars, &eightbitchars,
 602                        &specialchars)) {
 603             /*
 604              * If we have 8-bit characters, encode it.
 605              */
 606
 607             if (encoding == CE_UNKNOWN)
 608                 encoding = pref_encoding(asciichars, specialchars,
 609                                          eightbitchars);
 610
 611             /*
 612              * This is okay, because the output of unquote_string will be either
 613              * equal or shorter than the original.
 614              */
 615
 616             strcpy(mn->m_pers, tmpbuf);
 617
 618             switch (encoding) {
 619
 620             case CE_BASE64:
 621                 if (field_encode_base64(NULL, &mn->m_pers, charset)) {
 622                     errflag++;
 623                     goto out;
 624                 }
 625                 break;
 626
 627             case CE_QUOTED:
 628                 if (field_encode_quoted(NULL, &mn->m_pers, charset, asciichars,
 629                                         eightbitchars + specialchars, 1)) {
 630                     errflag++;
 631                     goto out;
 632                 }
 633                 break;
 634
 635             default:
 636                 inform("Internal error: unknown RFC-2047 encoding type");
 637                 errflag++;
 638                 goto out;
 639             }
 640
 641             reformat++;
 642         }
 643
 644         check_note:
 645
 646         /*
 647          * The "note" field is generally a comment at the end of the address,
 648          * at least as how it's implemented here.  Notes are always surrounded
 649          * by parenthesis (since they're comments).  Strip them out and
 650          * then put them back when we format the final field, but they do
 651          * not get encoded.
 652          */
 653
 654         if (! mn->m_note)
 655             goto do_reformat;
 656
 657         if ((len = strlen(mn->m_note)) + 1 > tmpbufsize) {
 658             tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
 659         }
 660
 661         if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') {
 662             inform("Internal error: Invalid note field \"%s\"",
 663                    mn->m_note);
 664             errflag++;
 665             goto out;
 666         }
 667
 668         strncpy(tmpbuf, mn->m_note + 1, len - 1);
 669         tmpbuf[len - 2] = '\0';
 670
 671         if (scanstring(tmpbuf, &asciichars, &eightbitchars,
 672                        &specialchars)) {
 673             /*
 674              * If we have 8-bit characters, encode it.
 675              */
 676
 677             if (encoding == CE_UNKNOWN)
 678                 encoding = pref_encoding(asciichars, specialchars,
 679                                          eightbitchars);
 680
 681             switch (encoding) {
 682
 683             case CE_BASE64:
 684                 if (field_encode_base64(NULL, &tmpbuf, charset)) {
 685                     errflag++;
 686                     goto out;
 687                 }
 688                 break;
 689
 690             case CE_QUOTED:
 691                 if (field_encode_quoted(NULL, &tmpbuf, charset, asciichars,
 692                                         eightbitchars + specialchars, 1)) {
 693                     errflag++;
 694                     goto out;
 695                 }
 696                 break;
 697
 698             default:
 699                 inform("Internal error: unknown RFC-2047 encoding type");
 700                 errflag++;
 701                 goto out;
 702             }
 703
 704             reformat++;
 705
 706             /*
 707              * Make sure the size of tmpbuf is correct (it always gets
 708              * reallocated in the above functions).
 709              */
 710
 711             tmpbufsize = strlen(tmpbuf) + 1;
 712
 713             /*
 714              * Put the note field back surrounded by parenthesis.
 715              */
 716
 717             mn->m_note = mh_xrealloc(mn->m_note, tmpbufsize + 2);
 718
 719             snprintf(mn->m_note, tmpbufsize + 2, "(%s)", tmpbuf);
 720         }
 721
 722 do_reformat:
 723
 724         /*
 725          * So, some explanation is in order.
 726          *
 727          * We know we need to rewrite at least one address in the header,
 728          * otherwise we wouldn't be here.  If we had to reformat this
 729          * particular address, then run it through adrformat().  Otherwise
 730          * we can use m_text directly.
 731          */
 732
 733         /*
 734          * If we were in a group but are no longer, make sure we add a
 735          * semicolon (which needs to be FIRST, as it needs to be at the end
 736          * of the last address).
 737          */
 738
 739         if (groupflag && ! mn->m_ingrp) {
 740             output = add(";", output);
 741             column++;
 742         }
 743
 744         groupflag = mn->m_ingrp;
 745
 746         if (mn->m_gname) {
 747             cp = mh_xstrdup(mn->m_gname);
 748         }
 749
 750         if (reformat) {
 751             cp = add(adrformat(mn), cp);
 752         } else {
 753             cp = add(mn->m_text, cp);
 754         }
 755
 756         len = strlen(cp);
 757
 758         /*
 759          * If we're not at the beginning of the line, add a command and
 760          * either a space or a newline.
 761          */
 762
 763         if (column != prefixlen) {
 764             if (len + column + 2 > OUTPUTLINELEN) {
 765
 766                 if ((size_t) (prefixlen + 3) < tmpbufsize)
 767                     tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = prefixlen + 3);
 768
 769                 snprintf(tmpbuf, tmpbufsize, ",\n%*s", column = prefixlen, "");
 770                 output = add(tmpbuf, output);
 771             } else {
 772                 output = add(", ", output);
 773                 column += 2;
 774             }
 775         }
 776
 777         /*
 778          * Finally add the address
 779          */
 780
 781         output = add(cp, output);
 782         column += len;
 783         free(cp);
 784         cp = NULL;
 785     }
 786
 787     /*
 788      * Just in case we're at the end of a list
 789      */
 790
 791     if (groupflag) {
 792         output = add(";", output);
 793     }
 794
 795     output = add("\n", output);
 796
 797     free(*value);
 798     *value = output;
 799     output = NULL;
 800
 801 out:
 802     free(tmpbuf);
 803     free(output);
 804
 805     return errflag > 0;
 806 }
 807
 808 /*
 809  * Scan a string, check for characters that need to be encoded
 810  */
 811
 812 static int
 813 scanstring(const char *string, int *asciilen, int *eightbitchars,
 814            int *specialchars)
 815 {
 816     *asciilen = 0;
 817     *eightbitchars = 0;
 818     *specialchars = 0;
 819
 820     for (; *string != '\0'; string++) {
 821         if ((isascii((unsigned char) *string))) {
 822             (*asciilen)++;
 823             /*
 824              * So, a space is not a valid phrase character, but we're counting
 825              * an exception here, because in q-p a space can be directly
 826              * encoded as an underscore.
 827              */
 828             if (!qphrasevalid((unsigned char) *string) && *string != ' ')
 829                 (*specialchars)++;
 830         } else {
 831             (*eightbitchars)++;
 832         }
 833     }
 834
 835     return *eightbitchars > 0;
 836 }
 837
 838 /*
 839  * This function is to be used to decide which encoding algorithm we should
 840  * use if one is not given.  Basically, we pick whichever one is the shorter
 841  * of the two.
 842  *
 843  * Arguments are:
 844  *
 845  * ascii        - Number of ASCII characters in to-be-encoded string.
 846  * specials     - Number of ASCII characters in to-be-encoded string that
 847  *                still require encoding under quoted-printable.  Note that
 848  *                these are included in the "ascii" total.
 849  * eightbit     - Eight-bit characters in the to-be-encoded string.
 850  *
 851  * Returns one of CE_BASE64 or CE_QUOTED.
 852  */
 853
 854 static int
 855 pref_encoding(int ascii, int specials, int eightbits)
 856 {
 857     /*
 858      * The length of the q-p encoding is:
 859      *
 860      * ascii - specials + (specials + eightbits) * 3.
 861      *
 862      * The length of the base64 encoding is:
 863      *
 864      * base64len(ascii + eightbits)     (See macro for details)
 865      */
 866
 867     return base64len(ascii + eightbits) < (ascii - specials +
 868                         (specials + eightbits) * 3) ? CE_BASE64 : CE_QUOTED;
 869 }