diplodocus.org Git - nmh/blob - sbr/encode_rfc2047.c

   1 /* encode_rfc2047.c -- encode message headers using RFC 2047 encoding.
   2  *
   3  * This code is Copyright (c) 2002, by the authors of nmh.  See the
   4  * COPYRIGHT file in the root directory of the nmh distribution for
   5  * complete copyright information.
   6  */
   7
   8 #include "h/mh.h"
   9 #include "encode_rfc2047.h"
  10 #include "check_charset.h"
  11 #include "error.h"
  12 #include "h/mhparse.h"
  13 #include "h/addrsbr.h"
  14 #include "h/utils.h"
  15 #include "base64.h"
  16 #include "unquote.h"
  17
  18 /*
  19  * List of headers that contain addresses and as a result require special
  20  * handling
  21  */
  22
  23 static char *address_headers[] = {
  24     "To",
  25     "From",
  26     "cc",
  27     "Bcc",
  28     "Reply-To",
  29     "Sender",
  30     "Resent-To",
  31     "Resent-From",
  32     "Resent-cc",
  33     "Resent-Bcc",
  34     "Resent-Reply-To",
  35     "Resent-Sender",
  36     NULL,
  37 };
  38
  39 /*
  40  * Macros we use for parsing headers
  41  */
  42
  43 #define is_fws(c) (c == '\t' || c == ' ' || c == '\n')
  44
  45 #define qphrasevalid(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || \
  46                          (c >= 'a' && c <= 'z') || \
  47                          c == '!' || c == '*' || c == '+' || c == '-' || \
  48                          c == '/' || c == '=' || c == '_')
  49 #define qpspecial(c) (c < ' ' || c == '=' || c == '?' || c == '_')
  50
  51 #define base64len(n) ((((n) + 2) / 3 ) * 4)     /* String len to base64 len */
  52 #define strbase64(n) ((n) / 4 * 3)              /* Chars that fit in base64 */
  53
  54 #define ENCODELINELIMIT 76
  55
  56 static void unfold_header(char **, int);
  57 static int field_encode_address(const char *, char **, int, const char *);
  58 static int field_encode_quoted(const char *, char **, const char *, int,
  59                                int, int);
  60 static int field_encode_base64(const char *, char **, const char *);
  61 static int scanstring(const char *, int *, int *, int *);
  62 static int utf8len(const char *);
  63 static int pref_encoding(int, int, int);
  64
  65 /*
  66  * Encode a message header using RFC 2047 encoding.  We make the assumption
  67  * that all characters < 128 are ASCII and as a consequence don't need any
  68  * encoding.
  69  */
  70
  71 int
  72 encode_rfc2047(const char *name, char **value, int encoding,
  73                const char *charset)
  74 {
  75     int i, asciicount = 0, eightbitcount = 0, qpspecialcount = 0;
  76     char *p;
  77
  78     /*
  79      * First, check to see if we even need to encode the header
  80      */
  81
  82     for (p = *value; *p != '\0'; p++) {
  83         if (isascii((unsigned char) *p)) {
  84             asciicount++;
  85             if (qpspecial((unsigned char) *p))
  86                 qpspecialcount++;
  87         } else
  88             eightbitcount++;
  89     }
  90
  91     if (eightbitcount == 0)
  92         return 0;
  93
  94     /*
  95      * Some rules from RFC 2047:
  96      *
  97      * - Encoded words cannot be more than 75 characters long
  98      * - Multiple "long" encoded words must be on new lines.
  99      *
 100      * Also, we're not permitted to encode email addresses, so
 101      * we need to actually _parse_ email addresses and only encode
 102      * the right bits.
 103      */
 104
 105     /*
 106      * If charset was NULL, then get the value from the locale.  But
 107      * we reject it if it returns US-ASCII
 108      */
 109
 110     if (charset == NULL)
 111         charset = write_charset_8bit();
 112
 113     if (strcasecmp(charset, "US-ASCII") == 0) {
 114         inform("Cannot use US-ASCII with 8 bit characters in header");
 115         return 1;
 116     }
 117
 118     /*
 119      * If we have an address header, then we need to parse the addresses
 120      * and only encode the names or comments.  Otherwise, handle it normally.
 121      */
 122
 123     for (i = 0; address_headers[i]; i++) {
 124         if (strcasecmp(name, address_headers[i]) == 0)
 125             return field_encode_address(name, value, encoding, charset);
 126     }
 127
 128     /*
 129      * On the encoding we choose, and the specifics of encoding:
 130      *
 131      * - If a specified encoding is passed in, we use that.
 132      * - Otherwise, pick which encoding is shorter.
 133      *
 134      * We don't quite handle continuation right here, but it should be
 135      * pretty close.
 136      */
 137
 138     if (encoding == CE_UNKNOWN)
 139         encoding = pref_encoding(asciicount, qpspecialcount, eightbitcount);
 140
 141     unfold_header(value, asciicount + eightbitcount);
 142
 143     switch (encoding) {
 144
 145     case CE_BASE64:
 146         return field_encode_base64(name, value, charset);
 147
 148     case CE_QUOTED:
 149         return field_encode_quoted(name, value, charset, asciicount,
 150                                    eightbitcount + qpspecialcount, 0);
 151
 152     default:
 153         inform("Internal error: unknown RFC-2047 encoding type");
 154         return 1;
 155     }
 156 }
 157
 158 /*
 159  * Encode our specified header (or field) using quoted-printable
 160  */
 161
 162 static int
 163 field_encode_quoted(const char *name, char **value, const char *charset,
 164                     int ascii, int encoded, int phraserules)
 165 {
 166     int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column;
 167     int charsetlen = strlen(charset), utf8;
 168     char *output = NULL, *p, *q = NULL;
 169
 170     /*
 171      * Right now we just encode the whole thing.  Maybe later on we'll
 172      * only encode things on a per-atom basis.
 173      */
 174
 175     p = *value;
 176
 177     column = prefixlen + 2;     /* Header name plus ": " */
 178
 179     utf8 = strcasecmp(charset, "UTF-8") == 0;
 180
 181     bool newline = true;
 182     while (*p != '\0') {
 183         /*
 184          * Start a new line, if it's time
 185          */
 186         if (newline) {
 187             /*
 188              * If it's the start of the header, we don't need to pad it
 189              *
 190              * The length of the output string is ...
 191              * =?charset?Q?...?=  so that's 7+strlen(charset) + 2 for \n NUL
 192              *
 193              * plus 1 for every ASCII character and 3 for every eight bit
 194              * or special character (eight bit characters are written as =XX).
 195              *
 196              */
 197
 198             int tokenlen;
 199
 200             outlen += 9 + charsetlen + ascii + 3 * encoded;
 201
 202             /*
 203              * If output is set, then we're continuing the header.  Otherwise
 204              * do the initial allocation.
 205              */
 206
 207             if (output) {
 208                 int curlen = q - output, i;
 209                 outlen += prefixlen + 1;        /* Header plus \n ": " */
 210                 output = mh_xrealloc(output, outlen);
 211                 q = output + curlen;
 212                 *q++ = '?';
 213                 *q++ = '=';
 214                 *q++ = '\n';
 215                 for (i = 0; i < prefixlen; i++)
 216                     *q++ = ' ';
 217             } else {
 218                 /*
 219                  * A bit of a hack here; the header can contain multiple
 220                  * spaces (probably at least one) until we get to the
 221                  * actual text.  Copy until we get to a non-space.
 222                  */
 223                 output = mh_xmalloc(outlen);
 224                 q = output;
 225                 while (is_fws(*p))
 226                     *q++ = *p++;
 227             }
 228
 229             tokenlen = snprintf(q, outlen - (q - output), "=?%s?Q?", charset);
 230             q += tokenlen;
 231             column = prefixlen + tokenlen;
 232             newline = false;
 233         }
 234
 235         /*
 236          * Process each character, encoding if necessary
 237          *
 238          * Note that we have a different set of rules if we're processing
 239          * RFC 5322 'phrase' (something you'd see in an address header).
 240          */
 241
 242         column++;
 243
 244         if (*p == ' ') {
 245             *q++ = '_';
 246             ascii--;
 247         } else if (isascii((unsigned char) *p) &&
 248                    (phraserules ? qphrasevalid((unsigned char) *p) :
 249                                         !qpspecial((unsigned char) *p))) {
 250             *q++ = *p;
 251             ascii--;
 252         } else {
 253             snprintf(q, outlen - (q - output), "=%02X", (unsigned char) *p);
 254             q += 3;
 255             column += 2;        /* column already incremented by 1 above */
 256             encoded--;
 257         }
 258
 259         p++;
 260
 261         /*
 262          * We're not allowed more than ENCODELINELIMIT characters per line,
 263          * so reserve some room for the final ?=.
 264          *
 265          * If prefixlen == 0, we haven't been passed in a header name, so
 266          * don't ever wrap the field (we're likely doing an address).
 267          */
 268
 269         if (prefixlen == 0)
 270             continue;
 271
 272         if (column >= ENCODELINELIMIT - 2) {
 273             newline = true;
 274         } else if (utf8) {
 275             /*
 276              * Okay, this is a bit weird, but to explain a bit more ...
 277              *
 278              * RFC 2047 prohibits the splitting of multibyte characters
 279              * across encoded words.  Right now we only handle the case
 280              * of UTF-8, the most common multibyte encoding.
 281              *
 282              * p is now pointing at the next input character.  If we're
 283              * using UTF-8 _and_ we'd go over ENCODELINELIMIT given the
 284              * length of the complete character, then trigger a newline
 285              * now.  Note that we check the length * 3 since we have to
 286              * allow for the encoded output.
 287              */
 288             if (column + (utf8len(p) * 3) > ENCODELINELIMIT - 2) {
 289                 newline = true;
 290             }
 291         }
 292     }
 293
 294     if (q == NULL) {
 295         /* This should never happen, but just in case.  Found by
 296            clang static analyzer. */
 297         inform("null output encoding for %s, continuing...", *value);
 298         return 1;
 299     }
 300     *q++ = '?';
 301     *q++ = '=';
 302
 303     if (prefixlen)
 304         *q++ = '\n';
 305
 306     *q = '\0';
 307
 308     free(*value);
 309
 310     *value = output;
 311
 312     return 0;
 313 }
 314
 315 /*
 316  * Encode our specified header (or field) using base64.
 317  *
 318  * This is a little easier since every character gets encoded, we can
 319  * calculate the line wrap up front.
 320  */
 321
 322 static int
 323 field_encode_base64(const char *name, char **value, const char *charset)
 324 {
 325     int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset);
 326     int outlen = 0, numencode, curlen;
 327     char *output = NULL, *p = *value, *q = NULL, *linestart = NULL;
 328
 329     /*
 330      * Skip over any leading white space.
 331      */
 332
 333     while (*p == ' ' || *p == '\t')
 334         p++;
 335
 336     /*
 337      * If we had a zero-length prefix, then just encode the whole field
 338      * as-is, without line wrapping.  Note that in addition to the encoding
 339      *
 340      * The added length we need is =? + charset + ?B? ... ?=
 341      *
 342      * That's 7 + strlen(charset) + 2 (for \n NUL).
 343      */
 344
 345     while (prefixlen && ((base64len(strlen(p)) + 7 + charsetlen +
 346                           prefixlen) > ENCODELINELIMIT)) {
 347
 348         /*
 349          * Our very first time, don't pad the line in the front
 350          *
 351          * Note ENCODELINELIMIT is + 2 because of \n \0
 352          */
 353
 354
 355         if (! output) {
 356             outlen += ENCODELINELIMIT + 2;
 357             output = q = mh_xmalloc(outlen);
 358             linestart = q - prefixlen;  /* Yes, this is intentional */
 359         } else {
 360             int curstart = linestart - output;
 361             curlen = q - output;
 362
 363             outlen += ENCODELINELIMIT + 2;
 364             output = mh_xrealloc(output, outlen);
 365             q = output + curlen;
 366             linestart = output + curstart;
 367         }
 368
 369         /*
 370          * We should have enough space now, so prepend the encoding markers
 371          * and character set information.  The leading space is intentional.
 372          */
 373
 374         q += snprintf(q, outlen - (q - output), " =?%s?B?", charset);
 375
 376         /*
 377          * Find out how much room we have left on the line and see how
 378          * many characters we can stuff in.  The start of our line
 379          * is marked by "linestart", so use that to figure out how
 380          * many characters are left out of ENCODELINELIMIT.  Reserve
 381          * 2 characters for the end markers and calculate how many
 382          * characters we can fit into that space given the base64
 383          * encoding expansion.
 384          */
 385
 386         numencode = strbase64(ENCODELINELIMIT - (q - linestart) - 2);
 387
 388         if (numencode <= 0) {
 389             inform("Internal error: tried to encode %d characters "
 390                    "in base64", numencode);
 391             return 1;
 392         }
 393
 394         /*
 395          * RFC 2047 prohibits spanning multibyte characters across tokens.
 396          * Right now we only check for UTF-8.
 397          *
 398          * So note the key here ... we want to make sure the character BEYOND
 399          * our last character is not a continuation byte.  If it's the start
 400          * of a new multibyte character or a single-byte character, that's ok.
 401          */
 402
 403         if (strcasecmp(charset, "UTF-8") == 0) {
 404             /*
 405              * p points to the start of our current buffer, so p + numencode
 406              * is one past the last character to encode
 407              */
 408
 409             while (numencode > 0 && ((*(p + numencode) & 0xc0) == 0x80))
 410                 numencode--;
 411
 412             if (numencode == 0) {
 413                 inform("Internal error: could not find start of "
 414                        "UTF-8 character when base64 encoding header");
 415                 return 1;
 416             }
 417         }
 418
 419         if (writeBase64raw((unsigned char *) p, numencode,
 420                            (unsigned char *) q) != OK) {
 421             inform("Internal error: base64 encoding of header failed");
 422             return 1;
 423         }
 424
 425         p += numencode;
 426         q += base64len(numencode);
 427
 428         /*
 429          * This will point us at the beginning of the new line (trust me).
 430          */
 431
 432         linestart = q + 3;
 433
 434         /*
 435          * What's going on here?  Well, we know we're continuing to the next
 436          * line, so we want to add continuation padding.  We also add the
 437          * trailing marker for the RFC 2047 token at this time as well.
 438          * This uses a trick of snprintf(); we tell it to print a zero-length
 439          * string, but pad it out to prefixlen - 1 characters; that ends
 440          * up always printing out the requested number of spaces.  We use
 441          * prefixlen - 1 because we always add a space on the starting
 442          * token marker; this makes things work out correctly for the first
 443          * line, which should have a space between the ':' and the start
 444          * of the token.
 445          *
 446          * It's okay if you don't follow all of that.
 447          */
 448
 449         q += snprintf(q, outlen - (q - output), "?=\n%*s", prefixlen - 1, "");
 450     }
 451
 452     /*
 453      * We're here if there is either no prefix, or we can fit it in less
 454      * than ENCODELINELIMIT characters.  Encode the whole thing.
 455      */
 456
 457     outlen += prefixlen + 9 + charsetlen + base64len(strlen(p));
 458     curlen = q - output;
 459
 460     output = mh_xrealloc(output, outlen);
 461     q = output + curlen;
 462
 463     q += snprintf(q, outlen - (q - output), "%s=?%s?B?",
 464                   prefixlen ? " " : "", charset);
 465
 466     if (writeBase64raw((unsigned char *) p, strlen(p),
 467                        (unsigned char *) q) != OK) {
 468         inform("Internal error: base64 encoding of header failed");
 469         return 1;
 470     }
 471
 472     strcat(q, "?=");
 473
 474     if (prefixlen)
 475         strcat(q, "\n");
 476
 477     free(*value);
 478
 479     *value = output;
 480
 481     return 0;
 482 }
 483
 484 /*
 485  * Calculate the length of a UTF-8 character.
 486  *
 487  * If it's not a UTF-8 character (or we're in the middle of a multibyte
 488  * character) then simply return 0.
 489  */
 490
 491 static int
 492 utf8len(const char *p)
 493 {
 494     int len = 1;
 495
 496     if (*p == '\0')
 497         return 0;
 498
 499     if (isascii((unsigned char) *p) || (((unsigned char) *p) & 0xc0) == 0x80)
 500         return 0;
 501
 502     p++;
 503     while ((((unsigned char) *p++) & 0xc0) == 0x80)
 504         len++;
 505
 506     return len;
 507 }
 508
 509 /*
 510  * "Unfold" a header, making it a single line (without continuation)
 511  *
 512  * We cheat a bit here; we never make the string longer, so using the
 513  * original length here is fine.
 514  */
 515
 516 static void
 517 unfold_header(char **value, int len)
 518 {
 519     char *str = mh_xmalloc(len + 1);
 520     char *p = str, *q = *value;
 521
 522     while (*q != '\0') {
 523         if (*q == '\n') {
 524             /*
 525              * When we get a newline, skip to the next non-whitespace
 526              * character and add a space to replace all of the whitespace
 527              *
 528              * This has the side effect of stripping off the final newline
 529              * for the header; we put it back in the encoding routine.
 530              */
 531             while (is_fws(*q))
 532                 q++;
 533             if (*q == '\0')
 534                 break;
 535
 536             *p++ = ' ';
 537         } else {
 538             *p++ = *q++;
 539         }
 540     }
 541
 542     *p = '\0';
 543
 544     free(*value);
 545     *value = str;
 546 }
 547
 548 /*
 549  * Decode a header containing addresses.  This means we have to parse
 550  * each address and only encode the display-name or comment field.
 551  */
 552
 553 static int
 554 field_encode_address(const char *name, char **value, int encoding,
 555                      const char *charset)
 556 {
 557     int prefixlen = strlen(name) + 2, column = prefixlen, groupflag;
 558     int asciichars;
 559     int specialchars;
 560     int eightbitchars;
 561     bool reformat = false;
 562     bool errflag = false;
 563     size_t len;
 564     char *mp, *cp = NULL, *output = NULL;
 565     char *tmpbuf = NULL;
 566     size_t tmpbufsize = 0;
 567     struct mailname *mn;
 568     char errbuf[BUFSIZ];
 569
 570     /*
 571      * Because these are addresses, we need to handle them individually.
 572      *
 573      * Break them down and process them one by one.  This means we have to
 574      * rewrite the whole header, but that's unavoidable.
 575      */
 576
 577     /*
 578      * The output headers always have to start with a space first; this
 579      * is just the way the API works right now.
 580      */
 581
 582     output = add(" ", output);
 583
 584     for (groupflag = 0; (mp = getname(*value)); ) {
 585         if ((mn = getm(mp, NULL, 0, errbuf, sizeof(errbuf))) == NULL) {
 586             inform("%s: %s", errbuf, mp);
 587             errflag = true;
 588             continue;
 589         }
 590
 591         reformat = false;
 592
 593         /*
 594          * We only care if the phrase (m_pers) or any trailing comment
 595          * (m_note) have 8-bit characters.  If doing q-p, we also need
 596          * to encode anything marked as qspecial().  Unquote it first
 597          * so the specialchars count is right.
 598          */
 599
 600         if (! mn->m_pers)
 601             goto check_note;
 602
 603         if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) {
 604             tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
 605         }
 606
 607         unquote_string(mn->m_pers, tmpbuf);
 608
 609         if (scanstring(tmpbuf, &asciichars, &eightbitchars,
 610                        &specialchars)) {
 611             /*
 612              * If we have 8-bit characters, encode it.
 613              */
 614
 615             if (encoding == CE_UNKNOWN)
 616                 encoding = pref_encoding(asciichars, specialchars,
 617                                          eightbitchars);
 618
 619             /*
 620              * This is okay, because the output of unquote_string will be either
 621              * equal or shorter than the original.
 622              */
 623
 624             strcpy(mn->m_pers, tmpbuf);
 625
 626             switch (encoding) {
 627
 628             case CE_BASE64:
 629                 if (field_encode_base64(NULL, &mn->m_pers, charset)) {
 630                     errflag = true;
 631                     goto out;
 632                 }
 633                 break;
 634
 635             case CE_QUOTED:
 636                 if (field_encode_quoted(NULL, &mn->m_pers, charset, asciichars,
 637                                         eightbitchars + specialchars, 1)) {
 638                     errflag = true;
 639                     goto out;
 640                 }
 641                 break;
 642
 643             default:
 644                 inform("Internal error: unknown RFC-2047 encoding type");
 645                 errflag = true;
 646                 goto out;
 647             }
 648
 649             reformat = true;
 650         }
 651
 652         check_note:
 653
 654         /*
 655          * The "note" field is generally a comment at the end of the address,
 656          * at least as how it's implemented here.  Notes are always surrounded
 657          * by parenthesis (since they're comments).  Strip them out and
 658          * then put them back when we format the final field, but they do
 659          * not get encoded.
 660          */
 661
 662         if (! mn->m_note)
 663             goto do_reformat;
 664
 665         if ((len = strlen(mn->m_note)) + 1 > tmpbufsize) {
 666             tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
 667         }
 668
 669         if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') {
 670             inform("Internal error: Invalid note field \"%s\"",
 671                    mn->m_note);
 672             errflag = true;
 673             goto out;
 674         }
 675
 676         strncpy(tmpbuf, mn->m_note + 1, len - 1);
 677         tmpbuf[len - 2] = '\0';
 678
 679         if (scanstring(tmpbuf, &asciichars, &eightbitchars,
 680                        &specialchars)) {
 681             /*
 682              * If we have 8-bit characters, encode it.
 683              */
 684
 685             if (encoding == CE_UNKNOWN)
 686                 encoding = pref_encoding(asciichars, specialchars,
 687                                          eightbitchars);
 688
 689             switch (encoding) {
 690
 691             case CE_BASE64:
 692                 if (field_encode_base64(NULL, &tmpbuf, charset)) {
 693                     errflag = true;
 694                     goto out;
 695                 }
 696                 break;
 697
 698             case CE_QUOTED:
 699                 if (field_encode_quoted(NULL, &tmpbuf, charset, asciichars,
 700                                         eightbitchars + specialchars, 1)) {
 701                     errflag = true;
 702                     goto out;
 703                 }
 704                 break;
 705
 706             default:
 707                 inform("Internal error: unknown RFC-2047 encoding type");
 708                 errflag = true;
 709                 goto out;
 710             }
 711
 712             reformat = true;
 713
 714             /*
 715              * Make sure the size of tmpbuf is correct (it always gets
 716              * reallocated in the above functions).
 717              */
 718
 719             tmpbufsize = strlen(tmpbuf) + 1;
 720
 721             /*
 722              * Put the note field back surrounded by parenthesis.
 723              */
 724
 725             mn->m_note = mh_xrealloc(mn->m_note, tmpbufsize + 2);
 726
 727             snprintf(mn->m_note, tmpbufsize + 2, "(%s)", tmpbuf);
 728         }
 729
 730 do_reformat:
 731
 732         /*
 733          * So, some explanation is in order.
 734          *
 735          * We know we need to rewrite at least one address in the header,
 736          * otherwise we wouldn't be here.  If we had to reformat this
 737          * particular address, then run it through adrformat().  Otherwise
 738          * we can use m_text directly.
 739          */
 740
 741         /*
 742          * If we were in a group but are no longer, make sure we add a
 743          * semicolon (which needs to be FIRST, as it needs to be at the end
 744          * of the last address).
 745          */
 746
 747         if (groupflag && ! mn->m_ingrp) {
 748             output = add(";", output);
 749             column++;
 750         }
 751
 752         groupflag = mn->m_ingrp;
 753
 754         if (mn->m_gname) {
 755             cp = mh_xstrdup(mn->m_gname);
 756         }
 757
 758         if (reformat) {
 759             cp = add(adrformat(mn), cp);
 760         } else {
 761             cp = add(mn->m_text, cp);
 762         }
 763
 764         len = strlen(cp);
 765
 766         /*
 767          * If we're not at the beginning of the line, add a command and
 768          * either a space or a newline.
 769          */
 770
 771         if (column != prefixlen) {
 772             if (len + column + 2 > OUTPUTLINELEN) {
 773
 774                 if ((size_t) (prefixlen + 3) < tmpbufsize)
 775                     tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = prefixlen + 3);
 776
 777                 snprintf(tmpbuf, tmpbufsize, ",\n%*s", column = prefixlen, "");
 778                 output = add(tmpbuf, output);
 779             } else {
 780                 output = add(", ", output);
 781                 column += 2;
 782             }
 783         }
 784
 785         /*
 786          * Finally add the address
 787          */
 788
 789         output = add(cp, output);
 790         column += len;
 791         free(cp);
 792         cp = NULL;
 793     }
 794
 795     /*
 796      * Just in case we're at the end of a list
 797      */
 798
 799     if (groupflag) {
 800         output = add(";", output);
 801     }
 802
 803     output = add("\n", output);
 804
 805     free(*value);
 806     *value = output;
 807     output = NULL;
 808
 809 out:
 810     free(tmpbuf);
 811     free(output);
 812
 813     return errflag;
 814 }
 815
 816 /*
 817  * Scan a string, check for characters that need to be encoded
 818  */
 819
 820 static int
 821 scanstring(const char *string, int *asciilen, int *eightbitchars,
 822            int *specialchars)
 823 {
 824     *asciilen = 0;
 825     *eightbitchars = 0;
 826     *specialchars = 0;
 827
 828     for (; *string != '\0'; string++) {
 829         if ((isascii((unsigned char) *string))) {
 830             (*asciilen)++;
 831             /*
 832              * So, a space is not a valid phrase character, but we're counting
 833              * an exception here, because in q-p a space can be directly
 834              * encoded as an underscore.
 835              */
 836             if (!qphrasevalid((unsigned char) *string) && *string != ' ')
 837                 (*specialchars)++;
 838         } else {
 839             (*eightbitchars)++;
 840         }
 841     }
 842
 843     return *eightbitchars > 0;
 844 }
 845
 846 /*
 847  * This function is to be used to decide which encoding algorithm we should
 848  * use if one is not given.  Basically, we pick whichever one is the shorter
 849  * of the two.
 850  *
 851  * Arguments are:
 852  *
 853  * ascii        - Number of ASCII characters in to-be-encoded string.
 854  * specials     - Number of ASCII characters in to-be-encoded string that
 855  *                still require encoding under quoted-printable.  Note that
 856  *                these are included in the "ascii" total.
 857  * eightbit     - Eight-bit characters in the to-be-encoded string.
 858  *
 859  * Returns one of CE_BASE64 or CE_QUOTED.
 860  */
 861
 862 static int
 863 pref_encoding(int ascii, int specials, int eightbits)
 864 {
 865     /*
 866      * The length of the q-p encoding is:
 867      *
 868      * ascii - specials + (specials + eightbits) * 3.
 869      *
 870      * The length of the base64 encoding is:
 871      *
 872      * base64len(ascii + eightbits)     (See macro for details)
 873      */
 874
 875     return base64len(ascii + eightbits) < (ascii - specials +
 876                         (specials + eightbits) * 3) ? CE_BASE64 : CE_QUOTED;
 877 }