diplodocus.org Git - nmh/blob - sbr/encode_rfc2047.c

   1 /* encode_rfc2047.c -- encode message headers using RFC 2047 encoding.
   2  *
   3  * This code is Copyright (c) 2002, by the authors of nmh.  See the
   4  * COPYRIGHT file in the root directory of the nmh distribution for
   5  * complete copyright information.
   6  */
   7
   8 #include <h/mh.h>
   9 #include <h/mhparse.h>
  10 #include <h/addrsbr.h>
  11 #include <h/utils.h>
  12 #include "base64.h"
  13 #include "unquote.h"
  14
  15 /*
  16  * List of headers that contain addresses and as a result require special
  17  * handling
  18  */
  19
  20 static char *address_headers[] = {
  21     "To",
  22     "From",
  23     "cc",
  24     "Bcc",
  25     "Reply-To",
  26     "Sender",
  27     "Resent-To",
  28     "Resent-From",
  29     "Resent-cc",
  30     "Resent-Bcc",
  31     "Resent-Reply-To",
  32     "Resent-Sender",
  33     NULL,
  34 };
  35
  36 /*
  37  * Macros we use for parsing headers
  38  */
  39
  40 #define is_fws(c) (c == '\t' || c == ' ' || c == '\n')
  41
  42 #define qphrasevalid(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || \
  43                          (c >= 'a' && c <= 'z') || \
  44                          c == '!' || c == '*' || c == '+' || c == '-' || \
  45                          c == '/' || c == '=' || c == '_')
  46 #define qpspecial(c) (c < ' ' || c == '=' || c == '?' || c == '_')
  47
  48 #define base64len(n) ((((n) + 2) / 3 ) * 4)     /* String len to base64 len */
  49 #define strbase64(n) ((n) / 4 * 3)              /* Chars that fit in base64 */
  50
  51 #define ENCODELINELIMIT 76
  52
  53 static void unfold_header(char **, int);
  54 static int field_encode_address(const char *, char **, int, const char *);
  55 static int field_encode_quoted(const char *, char **, const char *, int,
  56                                int, int);
  57 static int field_encode_base64(const char *, char **, const char *);
  58 static int scanstring(const char *, int *, int *, int *);
  59 static int utf8len(const char *);
  60 static int pref_encoding(int, int, int);
  61
  62 /*
  63  * Encode a message header using RFC 2047 encoding.  We make the assumption
  64  * that all characters < 128 are ASCII and as a consequence don't need any
  65  * encoding.
  66  */
  67
  68 int
  69 encode_rfc2047(const char *name, char **value, int encoding,
  70                const char *charset)
  71 {
  72     int i, asciicount = 0, eightbitcount = 0, qpspecialcount = 0;
  73     char *p;
  74
  75     /*
  76      * First, check to see if we even need to encode the header
  77      */
  78
  79     for (p = *value; *p != '\0'; p++) {
  80         if (isascii((unsigned char) *p)) {
  81             asciicount++;
  82             if (qpspecial((unsigned char) *p))
  83                 qpspecialcount++;
  84         } else
  85             eightbitcount++;
  86     }
  87
  88     if (eightbitcount == 0)
  89         return 0;
  90
  91     /*
  92      * Some rules from RFC 2047:
  93      *
  94      * - Encoded words cannot be more than 75 characters long
  95      * - Multiple "long" encoded words must be on new lines.
  96      *
  97      * Also, we're not permitted to encode email addresses, so
  98      * we need to actually _parse_ email addresses and only encode
  99      * the right bits.
 100      */
 101
 102     /*
 103      * If charset was NULL, then get the value from the locale.  But
 104      * we reject it if it returns US-ASCII
 105      */
 106
 107     if (charset == NULL)
 108         charset = write_charset_8bit();
 109
 110     if (strcasecmp(charset, "US-ASCII") == 0) {
 111         inform("Cannot use US-ASCII with 8 bit characters in header");
 112         return 1;
 113     }
 114
 115     /*
 116      * If we have an address header, then we need to parse the addresses
 117      * and only encode the names or comments.  Otherwise, handle it normally.
 118      */
 119
 120     for (i = 0; address_headers[i]; i++) {
 121         if (strcasecmp(name, address_headers[i]) == 0)
 122             return field_encode_address(name, value, encoding, charset);
 123     }
 124
 125     /*
 126      * On the encoding we choose, and the specifics of encoding:
 127      *
 128      * - If a specified encoding is passed in, we use that.
 129      * - Otherwise, pick which encoding is shorter.
 130      *
 131      * We don't quite handle continuation right here, but it should be
 132      * pretty close.
 133      */
 134
 135     if (encoding == CE_UNKNOWN)
 136         encoding = pref_encoding(asciicount, qpspecialcount, eightbitcount);
 137
 138     unfold_header(value, asciicount + eightbitcount);
 139
 140     switch (encoding) {
 141
 142     case CE_BASE64:
 143         return field_encode_base64(name, value, charset);
 144
 145     case CE_QUOTED:
 146         return field_encode_quoted(name, value, charset, asciicount,
 147                                    eightbitcount + qpspecialcount, 0);
 148
 149     default:
 150         inform("Internal error: unknown RFC-2047 encoding type");
 151         return 1;
 152     }
 153 }
 154
 155 /*
 156  * Encode our specified header (or field) using quoted-printable
 157  */
 158
 159 static int
 160 field_encode_quoted(const char *name, char **value, const char *charset,
 161                     int ascii, int encoded, int phraserules)
 162 {
 163     int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column;
 164     int charsetlen = strlen(charset), utf8;
 165     char *output = NULL, *p, *q = NULL;
 166
 167     /*
 168      * Right now we just encode the whole thing.  Maybe later on we'll
 169      * only encode things on a per-atom basis.
 170      */
 171
 172     p = *value;
 173
 174     column = prefixlen + 2;     /* Header name plus ": " */
 175
 176     utf8 = strcasecmp(charset, "UTF-8") == 0;
 177
 178     bool newline = true;
 179     while (*p != '\0') {
 180         /*
 181          * Start a new line, if it's time
 182          */
 183         if (newline) {
 184             /*
 185              * If it's the start of the header, we don't need to pad it
 186              *
 187              * The length of the output string is ...
 188              * =?charset?Q?...?=  so that's 7+strlen(charset) + 2 for \n NUL
 189              *
 190              * plus 1 for every ASCII character and 3 for every eight bit
 191              * or special character (eight bit characters are written as =XX).
 192              *
 193              */
 194
 195             int tokenlen;
 196
 197             outlen += 9 + charsetlen + ascii + 3 * encoded;
 198
 199             /*
 200              * If output is set, then we're continuing the header.  Otherwise
 201              * do the initial allocation.
 202              */
 203
 204             if (output) {
 205                 int curlen = q - output, i;
 206                 outlen += prefixlen + 1;        /* Header plus \n ": " */
 207                 output = mh_xrealloc(output, outlen);
 208                 q = output + curlen;
 209                 *q++ = '?';
 210                 *q++ = '=';
 211                 *q++ = '\n';
 212                 for (i = 0; i < prefixlen; i++)
 213                     *q++ = ' ';
 214             } else {
 215                 /*
 216                  * A bit of a hack here; the header can contain multiple
 217                  * spaces (probably at least one) until we get to the
 218                  * actual text.  Copy until we get to a non-space.
 219                  */
 220                 output = mh_xmalloc(outlen);
 221                 q = output;
 222                 while (is_fws(*p))
 223                     *q++ = *p++;
 224             }
 225
 226             tokenlen = snprintf(q, outlen - (q - output), "=?%s?Q?", charset);
 227             q += tokenlen;
 228             column = prefixlen + tokenlen;
 229             newline = false;
 230         }
 231
 232         /*
 233          * Process each character, encoding if necessary
 234          *
 235          * Note that we have a different set of rules if we're processing
 236          * RFC 5322 'phrase' (something you'd see in an address header).
 237          */
 238
 239         column++;
 240
 241         if (*p == ' ') {
 242             *q++ = '_';
 243             ascii--;
 244         } else if (isascii((unsigned char) *p) &&
 245                    (phraserules ? qphrasevalid((unsigned char) *p) :
 246                                         !qpspecial((unsigned char) *p))) {
 247             *q++ = *p;
 248             ascii--;
 249         } else {
 250             snprintf(q, outlen - (q - output), "=%02X", (unsigned char) *p);
 251             q += 3;
 252             column += 2;        /* column already incremented by 1 above */
 253             encoded--;
 254         }
 255
 256         p++;
 257
 258         /*
 259          * We're not allowed more than ENCODELINELIMIT characters per line,
 260          * so reserve some room for the final ?=.
 261          *
 262          * If prefixlen == 0, we haven't been passed in a header name, so
 263          * don't ever wrap the field (we're likely doing an address).
 264          */
 265
 266         if (prefixlen == 0)
 267             continue;
 268
 269         if (column >= ENCODELINELIMIT - 2) {
 270             newline = true;
 271         } else if (utf8) {
 272             /*
 273              * Okay, this is a bit weird, but to explain a bit more ...
 274              *
 275              * RFC 2047 prohibits the splitting of multibyte characters
 276              * across encoded words.  Right now we only handle the case
 277              * of UTF-8, the most common multibyte encoding.
 278              *
 279              * p is now pointing at the next input character.  If we're
 280              * using UTF-8 _and_ we'd go over ENCODELINELIMIT given the
 281              * length of the complete character, then trigger a newline
 282              * now.  Note that we check the length * 3 since we have to
 283              * allow for the encoded output.
 284              */
 285             if (column + (utf8len(p) * 3) > ENCODELINELIMIT - 2) {
 286                 newline = true;
 287             }
 288         }
 289     }
 290
 291     if (q == NULL) {
 292         /* This should never happen, but just in case.  Found by
 293            clang static analyzer. */
 294         inform("null output encoding for %s, continuing...", *value);
 295         return 1;
 296     }
 297     *q++ = '?';
 298     *q++ = '=';
 299
 300     if (prefixlen)
 301         *q++ = '\n';
 302
 303     *q = '\0';
 304
 305     free(*value);
 306
 307     *value = output;
 308
 309     return 0;
 310 }
 311
 312 /*
 313  * Encode our specified header (or field) using base64.
 314  *
 315  * This is a little easier since every character gets encoded, we can
 316  * calculate the line wrap up front.
 317  */
 318
 319 static int
 320 field_encode_base64(const char *name, char **value, const char *charset)
 321 {
 322     int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset);
 323     int outlen = 0, numencode, curlen;
 324     char *output = NULL, *p = *value, *q = NULL, *linestart = NULL;
 325
 326     /*
 327      * Skip over any leading white space.
 328      */
 329
 330     while (*p == ' ' || *p == '\t')
 331         p++;
 332
 333     /*
 334      * If we had a zero-length prefix, then just encode the whole field
 335      * as-is, without line wrapping.  Note that in addition to the encoding
 336      *
 337      * The added length we need is =? + charset + ?B? ... ?=
 338      *
 339      * That's 7 + strlen(charset) + 2 (for \n NUL).
 340      */
 341
 342     while (prefixlen && ((base64len(strlen(p)) + 7 + charsetlen +
 343                           prefixlen) > ENCODELINELIMIT)) {
 344
 345         /*
 346          * Our very first time, don't pad the line in the front
 347          *
 348          * Note ENCODELINELIMIT is + 2 because of \n \0
 349          */
 350
 351
 352         if (! output) {
 353             outlen += ENCODELINELIMIT + 2;
 354             output = q = mh_xmalloc(outlen);
 355             linestart = q - prefixlen;  /* Yes, this is intentional */
 356         } else {
 357             int curstart = linestart - output;
 358             curlen = q - output;
 359
 360             outlen += ENCODELINELIMIT + 2;
 361             output = mh_xrealloc(output, outlen);
 362             q = output + curlen;
 363             linestart = output + curstart;
 364         }
 365
 366         /*
 367          * We should have enough space now, so prepend the encoding markers
 368          * and character set information.  The leading space is intentional.
 369          */
 370
 371         q += snprintf(q, outlen - (q - output), " =?%s?B?", charset);
 372
 373         /*
 374          * Find out how much room we have left on the line and see how
 375          * many characters we can stuff in.  The start of our line
 376          * is marked by "linestart", so use that to figure out how
 377          * many characters are left out of ENCODELINELIMIT.  Reserve
 378          * 2 characters for the end markers and calculate how many
 379          * characters we can fit into that space given the base64
 380          * encoding expansion.
 381          */
 382
 383         numencode = strbase64(ENCODELINELIMIT - (q - linestart) - 2);
 384
 385         if (numencode <= 0) {
 386             inform("Internal error: tried to encode %d characters "
 387                    "in base64", numencode);
 388             return 1;
 389         }
 390
 391         /*
 392          * RFC 2047 prohibits spanning multibyte characters across tokens.
 393          * Right now we only check for UTF-8.
 394          *
 395          * So note the key here ... we want to make sure the character BEYOND
 396          * our last character is not a continuation byte.  If it's the start
 397          * of a new multibyte character or a single-byte character, that's ok.
 398          */
 399
 400         if (strcasecmp(charset, "UTF-8") == 0) {
 401             /*
 402              * p points to the start of our current buffer, so p + numencode
 403              * is one past the last character to encode
 404              */
 405
 406             while (numencode > 0 && ((*(p + numencode) & 0xc0) == 0x80))
 407                 numencode--;
 408
 409             if (numencode == 0) {
 410                 inform("Internal error: could not find start of "
 411                        "UTF-8 character when base64 encoding header");
 412                 return 1;
 413             }
 414         }
 415
 416         if (writeBase64raw((unsigned char *) p, numencode,
 417                            (unsigned char *) q) != OK) {
 418             inform("Internal error: base64 encoding of header failed");
 419             return 1;
 420         }
 421
 422         p += numencode;
 423         q += base64len(numencode);
 424
 425         /*
 426          * This will point us at the beginning of the new line (trust me).
 427          */
 428
 429         linestart = q + 3;
 430
 431         /*
 432          * What's going on here?  Well, we know we're continuing to the next
 433          * line, so we want to add continuation padding.  We also add the
 434          * trailing marker for the RFC 2047 token at this time as well.
 435          * This uses a trick of snprintf(); we tell it to print a zero-length
 436          * string, but pad it out to prefixlen - 1 characters; that ends
 437          * up always printing out the requested number of spaces.  We use
 438          * prefixlen - 1 because we always add a space on the starting
 439          * token marker; this makes things work out correctly for the first
 440          * line, which should have a space between the ':' and the start
 441          * of the token.
 442          *
 443          * It's okay if you don't follow all of that.
 444          */
 445
 446         q += snprintf(q, outlen - (q - output), "?=\n%*s", prefixlen - 1, "");
 447     }
 448
 449     /*
 450      * We're here if there is either no prefix, or we can fit it in less
 451      * than ENCODELINELIMIT characters.  Encode the whole thing.
 452      */
 453
 454     outlen += prefixlen + 9 + charsetlen + base64len(strlen(p));
 455     curlen = q - output;
 456
 457     output = mh_xrealloc(output, outlen);
 458     q = output + curlen;
 459
 460     q += snprintf(q, outlen - (q - output), "%s=?%s?B?",
 461                   prefixlen ? " " : "", charset);
 462
 463     if (writeBase64raw((unsigned char *) p, strlen(p),
 464                        (unsigned char *) q) != OK) {
 465         inform("Internal error: base64 encoding of header failed");
 466         return 1;
 467     }
 468
 469     strcat(q, "?=");
 470
 471     if (prefixlen)
 472         strcat(q, "\n");
 473
 474     free(*value);
 475
 476     *value = output;
 477
 478     return 0;
 479 }
 480
 481 /*
 482  * Calculate the length of a UTF-8 character.
 483  *
 484  * If it's not a UTF-8 character (or we're in the middle of a multibyte
 485  * character) then simply return 0.
 486  */
 487
 488 static int
 489 utf8len(const char *p)
 490 {
 491     int len = 1;
 492
 493     if (*p == '\0')
 494         return 0;
 495
 496     if (isascii((unsigned char) *p) || (((unsigned char) *p) & 0xc0) == 0x80)
 497         return 0;
 498
 499     p++;
 500     while ((((unsigned char) *p++) & 0xc0) == 0x80)
 501         len++;
 502
 503     return len;
 504 }
 505
 506 /*
 507  * "Unfold" a header, making it a single line (without continuation)
 508  *
 509  * We cheat a bit here; we never make the string longer, so using the
 510  * original length here is fine.
 511  */
 512
 513 static void
 514 unfold_header(char **value, int len)
 515 {
 516     char *str = mh_xmalloc(len + 1);
 517     char *p = str, *q = *value;
 518
 519     while (*q != '\0') {
 520         if (*q == '\n') {
 521             /*
 522              * When we get a newline, skip to the next non-whitespace
 523              * character and add a space to replace all of the whitespace
 524              *
 525              * This has the side effect of stripping off the final newline
 526              * for the header; we put it back in the encoding routine.
 527              */
 528             while (is_fws(*q))
 529                 q++;
 530             if (*q == '\0')
 531                 break;
 532
 533             *p++ = ' ';
 534         } else {
 535             *p++ = *q++;
 536         }
 537     }
 538
 539     *p = '\0';
 540
 541     free(*value);
 542     *value = str;
 543 }
 544
 545 /*
 546  * Decode a header containing addresses.  This means we have to parse
 547  * each address and only encode the display-name or comment field.
 548  */
 549
 550 static int
 551 field_encode_address(const char *name, char **value, int encoding,
 552                      const char *charset)
 553 {
 554     int prefixlen = strlen(name) + 2, column = prefixlen, groupflag;
 555     int asciichars;
 556     int specialchars;
 557     int eightbitchars;
 558     bool reformat = false;
 559     bool errflag = false;
 560     size_t len;
 561     char *mp, *cp = NULL, *output = NULL;
 562     char *tmpbuf = NULL;
 563     size_t tmpbufsize = 0;
 564     struct mailname *mn;
 565     char errbuf[BUFSIZ];
 566
 567     /*
 568      * Because these are addresses, we need to handle them individually.
 569      *
 570      * Break them down and process them one by one.  This means we have to
 571      * rewrite the whole header, but that's unavoidable.
 572      */
 573
 574     /*
 575      * The output headers always have to start with a space first; this
 576      * is just the way the API works right now.
 577      */
 578
 579     output = add(" ", output);
 580
 581     for (groupflag = 0; (mp = getname(*value)); ) {
 582         if ((mn = getm(mp, NULL, 0, errbuf, sizeof(errbuf))) == NULL) {
 583             inform("%s: %s", errbuf, mp);
 584             errflag = true;
 585             continue;
 586         }
 587
 588         reformat = false;
 589
 590         /*
 591          * We only care if the phrase (m_pers) or any trailing comment
 592          * (m_note) have 8-bit characters.  If doing q-p, we also need
 593          * to encode anything marked as qspecial().  Unquote it first
 594          * so the specialchars count is right.
 595          */
 596
 597         if (! mn->m_pers)
 598             goto check_note;
 599
 600         if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) {
 601             tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
 602         }
 603
 604         unquote_string(mn->m_pers, tmpbuf);
 605
 606         if (scanstring(tmpbuf, &asciichars, &eightbitchars,
 607                        &specialchars)) {
 608             /*
 609              * If we have 8-bit characters, encode it.
 610              */
 611
 612             if (encoding == CE_UNKNOWN)
 613                 encoding = pref_encoding(asciichars, specialchars,
 614                                          eightbitchars);
 615
 616             /*
 617              * This is okay, because the output of unquote_string will be either
 618              * equal or shorter than the original.
 619              */
 620
 621             strcpy(mn->m_pers, tmpbuf);
 622
 623             switch (encoding) {
 624
 625             case CE_BASE64:
 626                 if (field_encode_base64(NULL, &mn->m_pers, charset)) {
 627                     errflag = true;
 628                     goto out;
 629                 }
 630                 break;
 631
 632             case CE_QUOTED:
 633                 if (field_encode_quoted(NULL, &mn->m_pers, charset, asciichars,
 634                                         eightbitchars + specialchars, 1)) {
 635                     errflag = true;
 636                     goto out;
 637                 }
 638                 break;
 639
 640             default:
 641                 inform("Internal error: unknown RFC-2047 encoding type");
 642                 errflag = true;
 643                 goto out;
 644             }
 645
 646             reformat = true;
 647         }
 648
 649         check_note:
 650
 651         /*
 652          * The "note" field is generally a comment at the end of the address,
 653          * at least as how it's implemented here.  Notes are always surrounded
 654          * by parenthesis (since they're comments).  Strip them out and
 655          * then put them back when we format the final field, but they do
 656          * not get encoded.
 657          */
 658
 659         if (! mn->m_note)
 660             goto do_reformat;
 661
 662         if ((len = strlen(mn->m_note)) + 1 > tmpbufsize) {
 663             tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
 664         }
 665
 666         if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') {
 667             inform("Internal error: Invalid note field \"%s\"",
 668                    mn->m_note);
 669             errflag = true;
 670             goto out;
 671         }
 672
 673         strncpy(tmpbuf, mn->m_note + 1, len - 1);
 674         tmpbuf[len - 2] = '\0';
 675
 676         if (scanstring(tmpbuf, &asciichars, &eightbitchars,
 677                        &specialchars)) {
 678             /*
 679              * If we have 8-bit characters, encode it.
 680              */
 681
 682             if (encoding == CE_UNKNOWN)
 683                 encoding = pref_encoding(asciichars, specialchars,
 684                                          eightbitchars);
 685
 686             switch (encoding) {
 687
 688             case CE_BASE64:
 689                 if (field_encode_base64(NULL, &tmpbuf, charset)) {
 690                     errflag = true;
 691                     goto out;
 692                 }
 693                 break;
 694
 695             case CE_QUOTED:
 696                 if (field_encode_quoted(NULL, &tmpbuf, charset, asciichars,
 697                                         eightbitchars + specialchars, 1)) {
 698                     errflag = true;
 699                     goto out;
 700                 }
 701                 break;
 702
 703             default:
 704                 inform("Internal error: unknown RFC-2047 encoding type");
 705                 errflag = true;
 706                 goto out;
 707             }
 708
 709             reformat = true;
 710
 711             /*
 712              * Make sure the size of tmpbuf is correct (it always gets
 713              * reallocated in the above functions).
 714              */
 715
 716             tmpbufsize = strlen(tmpbuf) + 1;
 717
 718             /*
 719              * Put the note field back surrounded by parenthesis.
 720              */
 721
 722             mn->m_note = mh_xrealloc(mn->m_note, tmpbufsize + 2);
 723
 724             snprintf(mn->m_note, tmpbufsize + 2, "(%s)", tmpbuf);
 725         }
 726
 727 do_reformat:
 728
 729         /*
 730          * So, some explanation is in order.
 731          *
 732          * We know we need to rewrite at least one address in the header,
 733          * otherwise we wouldn't be here.  If we had to reformat this
 734          * particular address, then run it through adrformat().  Otherwise
 735          * we can use m_text directly.
 736          */
 737
 738         /*
 739          * If we were in a group but are no longer, make sure we add a
 740          * semicolon (which needs to be FIRST, as it needs to be at the end
 741          * of the last address).
 742          */
 743
 744         if (groupflag && ! mn->m_ingrp) {
 745             output = add(";", output);
 746             column++;
 747         }
 748
 749         groupflag = mn->m_ingrp;
 750
 751         if (mn->m_gname) {
 752             cp = mh_xstrdup(mn->m_gname);
 753         }
 754
 755         if (reformat) {
 756             cp = add(adrformat(mn), cp);
 757         } else {
 758             cp = add(mn->m_text, cp);
 759         }
 760
 761         len = strlen(cp);
 762
 763         /*
 764          * If we're not at the beginning of the line, add a command and
 765          * either a space or a newline.
 766          */
 767
 768         if (column != prefixlen) {
 769             if (len + column + 2 > OUTPUTLINELEN) {
 770
 771                 if ((size_t) (prefixlen + 3) < tmpbufsize)
 772                     tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = prefixlen + 3);
 773
 774                 snprintf(tmpbuf, tmpbufsize, ",\n%*s", column = prefixlen, "");
 775                 output = add(tmpbuf, output);
 776             } else {
 777                 output = add(", ", output);
 778                 column += 2;
 779             }
 780         }
 781
 782         /*
 783          * Finally add the address
 784          */
 785
 786         output = add(cp, output);
 787         column += len;
 788         free(cp);
 789         cp = NULL;
 790     }
 791
 792     /*
 793      * Just in case we're at the end of a list
 794      */
 795
 796     if (groupflag) {
 797         output = add(";", output);
 798     }
 799
 800     output = add("\n", output);
 801
 802     free(*value);
 803     *value = output;
 804     output = NULL;
 805
 806 out:
 807     free(tmpbuf);
 808     free(output);
 809
 810     return errflag;
 811 }
 812
 813 /*
 814  * Scan a string, check for characters that need to be encoded
 815  */
 816
 817 static int
 818 scanstring(const char *string, int *asciilen, int *eightbitchars,
 819            int *specialchars)
 820 {
 821     *asciilen = 0;
 822     *eightbitchars = 0;
 823     *specialchars = 0;
 824
 825     for (; *string != '\0'; string++) {
 826         if ((isascii((unsigned char) *string))) {
 827             (*asciilen)++;
 828             /*
 829              * So, a space is not a valid phrase character, but we're counting
 830              * an exception here, because in q-p a space can be directly
 831              * encoded as an underscore.
 832              */
 833             if (!qphrasevalid((unsigned char) *string) && *string != ' ')
 834                 (*specialchars)++;
 835         } else {
 836             (*eightbitchars)++;
 837         }
 838     }
 839
 840     return *eightbitchars > 0;
 841 }
 842
 843 /*
 844  * This function is to be used to decide which encoding algorithm we should
 845  * use if one is not given.  Basically, we pick whichever one is the shorter
 846  * of the two.
 847  *
 848  * Arguments are:
 849  *
 850  * ascii        - Number of ASCII characters in to-be-encoded string.
 851  * specials     - Number of ASCII characters in to-be-encoded string that
 852  *                still require encoding under quoted-printable.  Note that
 853  *                these are included in the "ascii" total.
 854  * eightbit     - Eight-bit characters in the to-be-encoded string.
 855  *
 856  * Returns one of CE_BASE64 or CE_QUOTED.
 857  */
 858
 859 static int
 860 pref_encoding(int ascii, int specials, int eightbits)
 861 {
 862     /*
 863      * The length of the q-p encoding is:
 864      *
 865      * ascii - specials + (specials + eightbits) * 3.
 866      *
 867      * The length of the base64 encoding is:
 868      *
 869      * base64len(ascii + eightbits)     (See macro for details)
 870      */
 871
 872     return base64len(ascii + eightbits) < (ascii - specials +
 873                         (specials + eightbits) * 3) ? CE_BASE64 : CE_QUOTED;
 874 }