]> diplodocus.org Git - nmh/blob - sbr/encode_rfc2047.c
sendsbr.c: Move interface to own file.
[nmh] / sbr / encode_rfc2047.c
1 /* encode_rfc2047.c -- encode message headers using RFC 2047 encoding.
2 *
3 * This code is Copyright (c) 2002, by the authors of nmh. See the
4 * COPYRIGHT file in the root directory of the nmh distribution for
5 * complete copyright information.
6 */
7
8 #include "h/mh.h"
9 #include "encode_rfc2047.h"
10 #include "check_charset.h"
11 #include "error.h"
12 #include "h/mhparse.h"
13 #include "h/addrsbr.h"
14 #include "h/utils.h"
15 #include "base64.h"
16 #include "unquote.h"
17
18 /*
19 * List of headers that contain addresses and as a result require special
20 * handling
21 */
22
23 static char *address_headers[] = {
24 "To",
25 "From",
26 "cc",
27 "Bcc",
28 "Reply-To",
29 "Sender",
30 "Resent-To",
31 "Resent-From",
32 "Resent-cc",
33 "Resent-Bcc",
34 "Resent-Reply-To",
35 "Resent-Sender",
36 NULL,
37 };
38
39 /*
40 * Macros we use for parsing headers
41 */
42
43 #define is_fws(c) (c == '\t' || c == ' ' || c == '\n')
44
45 #define qphrasevalid(c) ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || \
46 (c >= 'a' && c <= 'z') || \
47 c == '!' || c == '*' || c == '+' || c == '-' || \
48 c == '/' || c == '=' || c == '_')
49 #define qpspecial(c) (c < ' ' || c == '=' || c == '?' || c == '_')
50
51 #define base64len(n) ((((n) + 2) / 3 ) * 4) /* String len to base64 len */
52 #define strbase64(n) ((n) / 4 * 3) /* Chars that fit in base64 */
53
54 #define ENCODELINELIMIT 76
55
56 static void unfold_header(char **, int);
57 static int field_encode_address(const char *, char **, int, const char *);
58 static int field_encode_quoted(const char *, char **, const char *, int,
59 int, int);
60 static int field_encode_base64(const char *, char **, const char *);
61 static int scanstring(const char *, int *, int *, int *);
62 static int utf8len(const char *);
63 static int pref_encoding(int, int, int);
64
65 /*
66 * Encode a message header using RFC 2047 encoding. We make the assumption
67 * that all characters < 128 are ASCII and as a consequence don't need any
68 * encoding.
69 */
70
71 int
72 encode_rfc2047(const char *name, char **value, int encoding,
73 const char *charset)
74 {
75 int i, asciicount = 0, eightbitcount = 0, qpspecialcount = 0;
76 char *p;
77
78 /*
79 * First, check to see if we even need to encode the header
80 */
81
82 for (p = *value; *p != '\0'; p++) {
83 if (isascii((unsigned char) *p)) {
84 asciicount++;
85 if (qpspecial((unsigned char) *p))
86 qpspecialcount++;
87 } else
88 eightbitcount++;
89 }
90
91 if (eightbitcount == 0)
92 return 0;
93
94 /*
95 * Some rules from RFC 2047:
96 *
97 * - Encoded words cannot be more than 75 characters long
98 * - Multiple "long" encoded words must be on new lines.
99 *
100 * Also, we're not permitted to encode email addresses, so
101 * we need to actually _parse_ email addresses and only encode
102 * the right bits.
103 */
104
105 /*
106 * If charset was NULL, then get the value from the locale. But
107 * we reject it if it returns US-ASCII
108 */
109
110 if (charset == NULL)
111 charset = write_charset_8bit();
112
113 if (strcasecmp(charset, "US-ASCII") == 0) {
114 inform("Cannot use US-ASCII with 8 bit characters in header");
115 return 1;
116 }
117
118 /*
119 * If we have an address header, then we need to parse the addresses
120 * and only encode the names or comments. Otherwise, handle it normally.
121 */
122
123 for (i = 0; address_headers[i]; i++) {
124 if (strcasecmp(name, address_headers[i]) == 0)
125 return field_encode_address(name, value, encoding, charset);
126 }
127
128 /*
129 * On the encoding we choose, and the specifics of encoding:
130 *
131 * - If a specified encoding is passed in, we use that.
132 * - Otherwise, pick which encoding is shorter.
133 *
134 * We don't quite handle continuation right here, but it should be
135 * pretty close.
136 */
137
138 if (encoding == CE_UNKNOWN)
139 encoding = pref_encoding(asciicount, qpspecialcount, eightbitcount);
140
141 unfold_header(value, asciicount + eightbitcount);
142
143 switch (encoding) {
144
145 case CE_BASE64:
146 return field_encode_base64(name, value, charset);
147
148 case CE_QUOTED:
149 return field_encode_quoted(name, value, charset, asciicount,
150 eightbitcount + qpspecialcount, 0);
151
152 default:
153 inform("Internal error: unknown RFC-2047 encoding type");
154 return 1;
155 }
156 }
157
158 /*
159 * Encode our specified header (or field) using quoted-printable
160 */
161
162 static int
163 field_encode_quoted(const char *name, char **value, const char *charset,
164 int ascii, int encoded, int phraserules)
165 {
166 int prefixlen = name ? strlen(name) + 2: 0, outlen = 0, column;
167 int charsetlen = strlen(charset), utf8;
168 char *output = NULL, *p, *q = NULL;
169
170 /*
171 * Right now we just encode the whole thing. Maybe later on we'll
172 * only encode things on a per-atom basis.
173 */
174
175 p = *value;
176
177 column = prefixlen + 2; /* Header name plus ": " */
178
179 utf8 = strcasecmp(charset, "UTF-8") == 0;
180
181 bool newline = true;
182 while (*p != '\0') {
183 /*
184 * Start a new line, if it's time
185 */
186 if (newline) {
187 /*
188 * If it's the start of the header, we don't need to pad it
189 *
190 * The length of the output string is ...
191 * =?charset?Q?...?= so that's 7+strlen(charset) + 2 for \n NUL
192 *
193 * plus 1 for every ASCII character and 3 for every eight bit
194 * or special character (eight bit characters are written as =XX).
195 *
196 */
197
198 int tokenlen;
199
200 outlen += 9 + charsetlen + ascii + 3 * encoded;
201
202 /*
203 * If output is set, then we're continuing the header. Otherwise
204 * do the initial allocation.
205 */
206
207 if (output) {
208 int curlen = q - output, i;
209 outlen += prefixlen + 1; /* Header plus \n ": " */
210 output = mh_xrealloc(output, outlen);
211 q = output + curlen;
212 *q++ = '?';
213 *q++ = '=';
214 *q++ = '\n';
215 for (i = 0; i < prefixlen; i++)
216 *q++ = ' ';
217 } else {
218 /*
219 * A bit of a hack here; the header can contain multiple
220 * spaces (probably at least one) until we get to the
221 * actual text. Copy until we get to a non-space.
222 */
223 output = mh_xmalloc(outlen);
224 q = output;
225 while (is_fws(*p))
226 *q++ = *p++;
227 }
228
229 tokenlen = snprintf(q, outlen - (q - output), "=?%s?Q?", charset);
230 q += tokenlen;
231 column = prefixlen + tokenlen;
232 newline = false;
233 }
234
235 /*
236 * Process each character, encoding if necessary
237 *
238 * Note that we have a different set of rules if we're processing
239 * RFC 5322 'phrase' (something you'd see in an address header).
240 */
241
242 column++;
243
244 if (*p == ' ') {
245 *q++ = '_';
246 ascii--;
247 } else if (isascii((unsigned char) *p) &&
248 (phraserules ? qphrasevalid((unsigned char) *p) :
249 !qpspecial((unsigned char) *p))) {
250 *q++ = *p;
251 ascii--;
252 } else {
253 snprintf(q, outlen - (q - output), "=%02X", (unsigned char) *p);
254 q += 3;
255 column += 2; /* column already incremented by 1 above */
256 encoded--;
257 }
258
259 p++;
260
261 /*
262 * We're not allowed more than ENCODELINELIMIT characters per line,
263 * so reserve some room for the final ?=.
264 *
265 * If prefixlen == 0, we haven't been passed in a header name, so
266 * don't ever wrap the field (we're likely doing an address).
267 */
268
269 if (prefixlen == 0)
270 continue;
271
272 if (column >= ENCODELINELIMIT - 2) {
273 newline = true;
274 } else if (utf8) {
275 /*
276 * Okay, this is a bit weird, but to explain a bit more ...
277 *
278 * RFC 2047 prohibits the splitting of multibyte characters
279 * across encoded words. Right now we only handle the case
280 * of UTF-8, the most common multibyte encoding.
281 *
282 * p is now pointing at the next input character. If we're
283 * using UTF-8 _and_ we'd go over ENCODELINELIMIT given the
284 * length of the complete character, then trigger a newline
285 * now. Note that we check the length * 3 since we have to
286 * allow for the encoded output.
287 */
288 if (column + (utf8len(p) * 3) > ENCODELINELIMIT - 2) {
289 newline = true;
290 }
291 }
292 }
293
294 if (q == NULL) {
295 /* This should never happen, but just in case. Found by
296 clang static analyzer. */
297 inform("null output encoding for %s, continuing...", *value);
298 return 1;
299 }
300 *q++ = '?';
301 *q++ = '=';
302
303 if (prefixlen)
304 *q++ = '\n';
305
306 *q = '\0';
307
308 free(*value);
309
310 *value = output;
311
312 return 0;
313 }
314
315 /*
316 * Encode our specified header (or field) using base64.
317 *
318 * This is a little easier since every character gets encoded, we can
319 * calculate the line wrap up front.
320 */
321
322 static int
323 field_encode_base64(const char *name, char **value, const char *charset)
324 {
325 int prefixlen = name ? strlen(name) + 2 : 0, charsetlen = strlen(charset);
326 int outlen = 0, numencode, curlen;
327 char *output = NULL, *p = *value, *q = NULL, *linestart = NULL;
328
329 /*
330 * Skip over any leading white space.
331 */
332
333 while (*p == ' ' || *p == '\t')
334 p++;
335
336 /*
337 * If we had a zero-length prefix, then just encode the whole field
338 * as-is, without line wrapping. Note that in addition to the encoding
339 *
340 * The added length we need is =? + charset + ?B? ... ?=
341 *
342 * That's 7 + strlen(charset) + 2 (for \n NUL).
343 */
344
345 while (prefixlen && ((base64len(strlen(p)) + 7 + charsetlen +
346 prefixlen) > ENCODELINELIMIT)) {
347
348 /*
349 * Our very first time, don't pad the line in the front
350 *
351 * Note ENCODELINELIMIT is + 2 because of \n \0
352 */
353
354
355 if (! output) {
356 outlen += ENCODELINELIMIT + 2;
357 output = q = mh_xmalloc(outlen);
358 linestart = q - prefixlen; /* Yes, this is intentional */
359 } else {
360 int curstart = linestart - output;
361 curlen = q - output;
362
363 outlen += ENCODELINELIMIT + 2;
364 output = mh_xrealloc(output, outlen);
365 q = output + curlen;
366 linestart = output + curstart;
367 }
368
369 /*
370 * We should have enough space now, so prepend the encoding markers
371 * and character set information. The leading space is intentional.
372 */
373
374 q += snprintf(q, outlen - (q - output), " =?%s?B?", charset);
375
376 /*
377 * Find out how much room we have left on the line and see how
378 * many characters we can stuff in. The start of our line
379 * is marked by "linestart", so use that to figure out how
380 * many characters are left out of ENCODELINELIMIT. Reserve
381 * 2 characters for the end markers and calculate how many
382 * characters we can fit into that space given the base64
383 * encoding expansion.
384 */
385
386 numencode = strbase64(ENCODELINELIMIT - (q - linestart) - 2);
387
388 if (numencode <= 0) {
389 inform("Internal error: tried to encode %d characters "
390 "in base64", numencode);
391 return 1;
392 }
393
394 /*
395 * RFC 2047 prohibits spanning multibyte characters across tokens.
396 * Right now we only check for UTF-8.
397 *
398 * So note the key here ... we want to make sure the character BEYOND
399 * our last character is not a continuation byte. If it's the start
400 * of a new multibyte character or a single-byte character, that's ok.
401 */
402
403 if (strcasecmp(charset, "UTF-8") == 0) {
404 /*
405 * p points to the start of our current buffer, so p + numencode
406 * is one past the last character to encode
407 */
408
409 while (numencode > 0 && ((*(p + numencode) & 0xc0) == 0x80))
410 numencode--;
411
412 if (numencode == 0) {
413 inform("Internal error: could not find start of "
414 "UTF-8 character when base64 encoding header");
415 return 1;
416 }
417 }
418
419 if (writeBase64raw((unsigned char *) p, numencode,
420 (unsigned char *) q) != OK) {
421 inform("Internal error: base64 encoding of header failed");
422 return 1;
423 }
424
425 p += numencode;
426 q += base64len(numencode);
427
428 /*
429 * This will point us at the beginning of the new line (trust me).
430 */
431
432 linestart = q + 3;
433
434 /*
435 * What's going on here? Well, we know we're continuing to the next
436 * line, so we want to add continuation padding. We also add the
437 * trailing marker for the RFC 2047 token at this time as well.
438 * This uses a trick of snprintf(); we tell it to print a zero-length
439 * string, but pad it out to prefixlen - 1 characters; that ends
440 * up always printing out the requested number of spaces. We use
441 * prefixlen - 1 because we always add a space on the starting
442 * token marker; this makes things work out correctly for the first
443 * line, which should have a space between the ':' and the start
444 * of the token.
445 *
446 * It's okay if you don't follow all of that.
447 */
448
449 q += snprintf(q, outlen - (q - output), "?=\n%*s", prefixlen - 1, "");
450 }
451
452 /*
453 * We're here if there is either no prefix, or we can fit it in less
454 * than ENCODELINELIMIT characters. Encode the whole thing.
455 */
456
457 outlen += prefixlen + 9 + charsetlen + base64len(strlen(p));
458 curlen = q - output;
459
460 output = mh_xrealloc(output, outlen);
461 q = output + curlen;
462
463 q += snprintf(q, outlen - (q - output), "%s=?%s?B?",
464 prefixlen ? " " : "", charset);
465
466 if (writeBase64raw((unsigned char *) p, strlen(p),
467 (unsigned char *) q) != OK) {
468 inform("Internal error: base64 encoding of header failed");
469 return 1;
470 }
471
472 strcat(q, "?=");
473
474 if (prefixlen)
475 strcat(q, "\n");
476
477 free(*value);
478
479 *value = output;
480
481 return 0;
482 }
483
484 /*
485 * Calculate the length of a UTF-8 character.
486 *
487 * If it's not a UTF-8 character (or we're in the middle of a multibyte
488 * character) then simply return 0.
489 */
490
491 static int
492 utf8len(const char *p)
493 {
494 int len = 1;
495
496 if (*p == '\0')
497 return 0;
498
499 if (isascii((unsigned char) *p) || (((unsigned char) *p) & 0xc0) == 0x80)
500 return 0;
501
502 p++;
503 while ((((unsigned char) *p++) & 0xc0) == 0x80)
504 len++;
505
506 return len;
507 }
508
509 /*
510 * "Unfold" a header, making it a single line (without continuation)
511 *
512 * We cheat a bit here; we never make the string longer, so using the
513 * original length here is fine.
514 */
515
516 static void
517 unfold_header(char **value, int len)
518 {
519 char *str = mh_xmalloc(len + 1);
520 char *p = str, *q = *value;
521
522 while (*q != '\0') {
523 if (*q == '\n') {
524 /*
525 * When we get a newline, skip to the next non-whitespace
526 * character and add a space to replace all of the whitespace
527 *
528 * This has the side effect of stripping off the final newline
529 * for the header; we put it back in the encoding routine.
530 */
531 while (is_fws(*q))
532 q++;
533 if (*q == '\0')
534 break;
535
536 *p++ = ' ';
537 } else {
538 *p++ = *q++;
539 }
540 }
541
542 *p = '\0';
543
544 free(*value);
545 *value = str;
546 }
547
548 /*
549 * Decode a header containing addresses. This means we have to parse
550 * each address and only encode the display-name or comment field.
551 */
552
553 static int
554 field_encode_address(const char *name, char **value, int encoding,
555 const char *charset)
556 {
557 int prefixlen = strlen(name) + 2, column = prefixlen, groupflag;
558 int asciichars;
559 int specialchars;
560 int eightbitchars;
561 bool reformat = false;
562 bool errflag = false;
563 size_t len;
564 char *mp, *cp = NULL, *output = NULL;
565 char *tmpbuf = NULL;
566 size_t tmpbufsize = 0;
567 struct mailname *mn;
568 char errbuf[BUFSIZ];
569
570 /*
571 * Because these are addresses, we need to handle them individually.
572 *
573 * Break them down and process them one by one. This means we have to
574 * rewrite the whole header, but that's unavoidable.
575 */
576
577 /*
578 * The output headers always have to start with a space first; this
579 * is just the way the API works right now.
580 */
581
582 output = add(" ", output);
583
584 for (groupflag = 0; (mp = getname(*value)); ) {
585 if ((mn = getm(mp, NULL, 0, errbuf, sizeof(errbuf))) == NULL) {
586 inform("%s: %s", errbuf, mp);
587 errflag = true;
588 continue;
589 }
590
591 reformat = false;
592
593 /*
594 * We only care if the phrase (m_pers) or any trailing comment
595 * (m_note) have 8-bit characters. If doing q-p, we also need
596 * to encode anything marked as qspecial(). Unquote it first
597 * so the specialchars count is right.
598 */
599
600 if (! mn->m_pers)
601 goto check_note;
602
603 if ((len = strlen(mn->m_pers)) + 1 > tmpbufsize) {
604 tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
605 }
606
607 unquote_string(mn->m_pers, tmpbuf);
608
609 if (scanstring(tmpbuf, &asciichars, &eightbitchars,
610 &specialchars)) {
611 /*
612 * If we have 8-bit characters, encode it.
613 */
614
615 if (encoding == CE_UNKNOWN)
616 encoding = pref_encoding(asciichars, specialchars,
617 eightbitchars);
618
619 /*
620 * This is okay, because the output of unquote_string will be either
621 * equal or shorter than the original.
622 */
623
624 strcpy(mn->m_pers, tmpbuf);
625
626 switch (encoding) {
627
628 case CE_BASE64:
629 if (field_encode_base64(NULL, &mn->m_pers, charset)) {
630 errflag = true;
631 goto out;
632 }
633 break;
634
635 case CE_QUOTED:
636 if (field_encode_quoted(NULL, &mn->m_pers, charset, asciichars,
637 eightbitchars + specialchars, 1)) {
638 errflag = true;
639 goto out;
640 }
641 break;
642
643 default:
644 inform("Internal error: unknown RFC-2047 encoding type");
645 errflag = true;
646 goto out;
647 }
648
649 reformat = true;
650 }
651
652 check_note:
653
654 /*
655 * The "note" field is generally a comment at the end of the address,
656 * at least as how it's implemented here. Notes are always surrounded
657 * by parenthesis (since they're comments). Strip them out and
658 * then put them back when we format the final field, but they do
659 * not get encoded.
660 */
661
662 if (! mn->m_note)
663 goto do_reformat;
664
665 if ((len = strlen(mn->m_note)) + 1 > tmpbufsize) {
666 tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = len + 1);
667 }
668
669 if (mn->m_note[0] != '(' || mn->m_note[len - 1] != ')') {
670 inform("Internal error: Invalid note field \"%s\"",
671 mn->m_note);
672 errflag = true;
673 goto out;
674 }
675
676 strncpy(tmpbuf, mn->m_note + 1, len - 1);
677 tmpbuf[len - 2] = '\0';
678
679 if (scanstring(tmpbuf, &asciichars, &eightbitchars,
680 &specialchars)) {
681 /*
682 * If we have 8-bit characters, encode it.
683 */
684
685 if (encoding == CE_UNKNOWN)
686 encoding = pref_encoding(asciichars, specialchars,
687 eightbitchars);
688
689 switch (encoding) {
690
691 case CE_BASE64:
692 if (field_encode_base64(NULL, &tmpbuf, charset)) {
693 errflag = true;
694 goto out;
695 }
696 break;
697
698 case CE_QUOTED:
699 if (field_encode_quoted(NULL, &tmpbuf, charset, asciichars,
700 eightbitchars + specialchars, 1)) {
701 errflag = true;
702 goto out;
703 }
704 break;
705
706 default:
707 inform("Internal error: unknown RFC-2047 encoding type");
708 errflag = true;
709 goto out;
710 }
711
712 reformat = true;
713
714 /*
715 * Make sure the size of tmpbuf is correct (it always gets
716 * reallocated in the above functions).
717 */
718
719 tmpbufsize = strlen(tmpbuf) + 1;
720
721 /*
722 * Put the note field back surrounded by parenthesis.
723 */
724
725 mn->m_note = mh_xrealloc(mn->m_note, tmpbufsize + 2);
726
727 snprintf(mn->m_note, tmpbufsize + 2, "(%s)", tmpbuf);
728 }
729
730 do_reformat:
731
732 /*
733 * So, some explanation is in order.
734 *
735 * We know we need to rewrite at least one address in the header,
736 * otherwise we wouldn't be here. If we had to reformat this
737 * particular address, then run it through adrformat(). Otherwise
738 * we can use m_text directly.
739 */
740
741 /*
742 * If we were in a group but are no longer, make sure we add a
743 * semicolon (which needs to be FIRST, as it needs to be at the end
744 * of the last address).
745 */
746
747 if (groupflag && ! mn->m_ingrp) {
748 output = add(";", output);
749 column++;
750 }
751
752 groupflag = mn->m_ingrp;
753
754 if (mn->m_gname) {
755 cp = mh_xstrdup(mn->m_gname);
756 }
757
758 if (reformat) {
759 cp = add(adrformat(mn), cp);
760 } else {
761 cp = add(mn->m_text, cp);
762 }
763
764 len = strlen(cp);
765
766 /*
767 * If we're not at the beginning of the line, add a command and
768 * either a space or a newline.
769 */
770
771 if (column != prefixlen) {
772 if (len + column + 2 > OUTPUTLINELEN) {
773
774 if ((size_t) (prefixlen + 3) < tmpbufsize)
775 tmpbuf = mh_xrealloc(tmpbuf, tmpbufsize = prefixlen + 3);
776
777 snprintf(tmpbuf, tmpbufsize, ",\n%*s", column = prefixlen, "");
778 output = add(tmpbuf, output);
779 } else {
780 output = add(", ", output);
781 column += 2;
782 }
783 }
784
785 /*
786 * Finally add the address
787 */
788
789 output = add(cp, output);
790 column += len;
791 free(cp);
792 cp = NULL;
793 }
794
795 /*
796 * Just in case we're at the end of a list
797 */
798
799 if (groupflag) {
800 output = add(";", output);
801 }
802
803 output = add("\n", output);
804
805 free(*value);
806 *value = output;
807 output = NULL;
808
809 out:
810 free(tmpbuf);
811 free(output);
812
813 return errflag;
814 }
815
816 /*
817 * Scan a string, check for characters that need to be encoded
818 */
819
820 static int
821 scanstring(const char *string, int *asciilen, int *eightbitchars,
822 int *specialchars)
823 {
824 *asciilen = 0;
825 *eightbitchars = 0;
826 *specialchars = 0;
827
828 for (; *string != '\0'; string++) {
829 if ((isascii((unsigned char) *string))) {
830 (*asciilen)++;
831 /*
832 * So, a space is not a valid phrase character, but we're counting
833 * an exception here, because in q-p a space can be directly
834 * encoded as an underscore.
835 */
836 if (!qphrasevalid((unsigned char) *string) && *string != ' ')
837 (*specialchars)++;
838 } else {
839 (*eightbitchars)++;
840 }
841 }
842
843 return *eightbitchars > 0;
844 }
845
846 /*
847 * This function is to be used to decide which encoding algorithm we should
848 * use if one is not given. Basically, we pick whichever one is the shorter
849 * of the two.
850 *
851 * Arguments are:
852 *
853 * ascii - Number of ASCII characters in to-be-encoded string.
854 * specials - Number of ASCII characters in to-be-encoded string that
855 * still require encoding under quoted-printable. Note that
856 * these are included in the "ascii" total.
857 * eightbit - Eight-bit characters in the to-be-encoded string.
858 *
859 * Returns one of CE_BASE64 or CE_QUOTED.
860 */
861
862 static int
863 pref_encoding(int ascii, int specials, int eightbits)
864 {
865 /*
866 * The length of the q-p encoding is:
867 *
868 * ascii - specials + (specials + eightbits) * 3.
869 *
870 * The length of the base64 encoding is:
871 *
872 * base64len(ascii + eightbits) (See macro for details)
873 */
874
875 return base64len(ascii + eightbits) < (ascii - specials +
876 (specials + eightbits) * 3) ? CE_BASE64 : CE_QUOTED;
877 }