]> diplodocus.org Git - nmh/blob - sbr/mf.c
Cope with sasl_decode64() returning SASL_CONTINUE as well as SASL_OK.
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * $Id$
6 *
7 * This code is Copyright (c) 2002, by the authors of nmh. See the
8 * COPYRIGHT file in the root directory of the nmh distribution for
9 * complete copyright information.
10 */
11
12 #include <h/mf.h>
13 #include <ctype.h>
14 #include <stdio.h>
15 #include <h/utils.h>
16
17 /*
18 * static prototypes
19 */
20 static char *getcpy (char *);
21 static void compress (char *, unsigned char *);
22 static int isat (char *);
23 static int parse_address (void);
24 static int phrase (char *);
25 static int route_addr (char *);
26 static int local_part (char *);
27 static int domain (char *);
28 static int route (char *);
29 static int my_lex (char *);
30
31
32 static char *
33 getcpy (char *s)
34 {
35 register char *p;
36
37 if (!s) {
38 /* causes compiles to blow up because the symbol _cleanup is undefined
39 where did this ever come from? */
40 /* _cleanup(); */
41 abort();
42 for(;;)
43 pause();
44 }
45 p = mh_xmalloc ((size_t) (strlen (s) + 2));
46 strcpy (p, s);
47 return p;
48 }
49
50
51 int
52 isfrom(char *string)
53 {
54 return (strncmp (string, "From ", 5) == 0
55 || strncmp (string, ">From ", 6) == 0);
56 }
57
58
59 int
60 lequal (unsigned char *a, unsigned char *b)
61 {
62 for (; *a; a++, b++)
63 if (*b == 0)
64 return FALSE;
65 else {
66 char c1 = islower (*a) ? toupper (*a) : *a;
67 char c2 = islower (*b) ? toupper (*b) : *b;
68 if (c1 != c2)
69 return FALSE;
70 }
71
72 return (*b == 0);
73 }
74
75
76 /*
77 * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
78 * addresses, so for each list of addresses we see if we can find some
79 * character to give us a hint.
80 */
81
82
83 #define CHKADR 0 /* undertermined address style */
84 #define UNIXDR 1 /* UNIX-style address */
85 #define ARPADR 2 /* ARPAnet-style address */
86
87
88 static char *punctuators = ";<>.()[]";
89 static char *vp = NULL;
90 static char *tp = NULL;
91
92 static struct adrx adrxs1;
93
94
95 struct adrx *
96 seekadrx (char *addrs)
97 {
98 static int state = CHKADR;
99 register char *cp;
100 register struct adrx *adrxp;
101
102 if (state == CHKADR)
103 for (state = UNIXDR, cp = addrs; *cp; cp++)
104 if (strchr(punctuators, *cp)) {
105 state = ARPADR;
106 break;
107 }
108
109 switch (state) {
110 case UNIXDR:
111 adrxp = uucpadrx (addrs);
112 break;
113
114 case ARPADR:
115 default:
116 adrxp = getadrx (addrs);
117 break;
118 }
119
120 if (adrxp == NULL)
121 state = CHKADR;
122
123 return adrxp;
124 }
125
126
127 /*
128 * uucpadrx() implements a partial UUCP-style address parser. It's based
129 * on the UUCP notion that addresses are separated by spaces or commas.
130 */
131
132
133 struct adrx *
134 uucpadrx (char *addrs)
135 {
136 register unsigned char *cp, *wp, *xp, *yp;
137 register char *zp;
138 register struct adrx *adrxp = &adrxs1;
139
140 if (vp == NULL) {
141 vp = tp = getcpy (addrs);
142 compress (addrs, vp);
143 }
144 else
145 if (tp == NULL) {
146 free (vp);
147 vp = NULL;
148 return NULL;
149 }
150
151 for (cp = tp; isspace (*cp); cp++)
152 continue;
153 if (*cp == 0) {
154 free (vp);
155 vp = tp = NULL;
156 return NULL;
157 }
158
159 if ((wp = strchr(cp, ',')) == NULL) {
160 if ((wp = strchr(cp, ' ')) != NULL) {
161 xp = wp;
162 while (isspace (*xp))
163 xp++;
164 if (*xp != 0 && isat (--xp)) {
165 yp = xp + 4;
166 while (isspace (*yp))
167 yp++;
168 if (*yp != 0) {
169 if ((zp = strchr(yp, ' ')) != NULL)
170 *zp = 0, tp = ++zp;
171 else
172 tp = NULL;
173 }
174 else
175 *wp = 0, tp = ++wp;
176 }
177 else
178 *wp = 0, tp = ++wp;
179 }
180 else
181 tp = NULL;
182 }
183 else
184 *wp = 0, tp = ++wp;
185
186 if (adrxp->text)
187 free (adrxp->text);
188 adrxp->text = getcpy (cp);
189 adrxp->mbox = cp;
190 adrxp->host = adrxp->path = NULL;
191 if ((wp = strrchr(cp, '@')) != NULL) {
192 *wp++ = 0;
193 adrxp->host = *wp ? wp : NULL;
194 }
195 else
196 for (wp = cp + strlen (cp) - 4; wp >= cp; wp--)
197 if (isat (wp)) {
198 *wp++ = 0;
199 adrxp->host = wp + 3;
200 }
201
202 adrxp->pers = adrxp->grp = adrxp->note = adrxp->err = NULL;
203 adrxp->ingrp = 0;
204
205 return adrxp;
206 }
207
208
209 static void
210 compress (char *fp, unsigned char *tp)
211 {
212 register char c;
213 register unsigned char *cp;
214
215 for (c = ' ', cp = tp; (*tp = *fp++) != 0;)
216 if (isspace (*tp)) {
217 if (c != ' ')
218 *tp++ = c = ' ';
219 }
220 else
221 c = *tp++;
222
223 if (c == ' ' && cp < tp)
224 *--tp = 0;
225 }
226
227
228 static int
229 isat (char *p)
230 {
231 return (strncmp (p, " AT ", 4)
232 && strncmp (p, " At ", 4)
233 && strncmp (p, " aT ", 4)
234 && strncmp (p, " at ", 4) ? FALSE : TRUE);
235 }
236
237
238 /*
239 *
240 * getadrx() implements a partial 822-style address parser. The parser
241 * is neither complete nor correct. It does however recognize nearly all
242 * of the 822 address syntax. In addition it handles the majority of the
243 * 733 syntax as well. Most problems arise from trying to accomodate both.
244 *
245 * In terms of 822, the route-specification in
246 *
247 * "<" [route] local-part "@" domain ">"
248 *
249 * is parsed and returned unchanged. Multiple at-signs are compressed
250 * via source-routing. Recursive groups are not allowed as per the
251 * standard.
252 *
253 * In terms of 733, " at " is recognized as equivalent to "@".
254 *
255 * In terms of both the parser will not complain about missing hosts.
256 *
257 * -----
258 *
259 * We should not allow addresses like
260 *
261 * Marshall T. Rose <MRose@UCI>
262 *
263 * but should insist on
264 *
265 * "Marshall T. Rose" <MRose@UCI>
266 *
267 * Unfortunately, a lot of mailers stupidly let people get away with this.
268 *
269 * -----
270 *
271 * We should not allow addresses like
272 *
273 * <MRose@UCI>
274 *
275 * but should insist on
276 *
277 * MRose@UCI
278 *
279 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
280 * this.
281 *
282 * -----
283 *
284 * We should not allow addresses like
285 *
286 * @UCI:MRose@UCI-750a
287 *
288 * but should insist on
289 *
290 * Marshall Rose <@UCI:MRose@UCI-750a>
291 *
292 * Unfortunately, a lot of mailers stupidly do this.
293 *
294 */
295
296 #define QUOTE '\\'
297
298 #define LX_END 0
299 #define LX_ERR 1
300 #define LX_ATOM 2
301 #define LX_QSTR 3
302 #define LX_DLIT 4
303 #define LX_SEMI 5
304 #define LX_COMA 6
305 #define LX_LBRK 7
306 #define LX_RBRK 8
307 #define LX_COLN 9
308 #define LX_DOT 10
309 #define LX_AT 11
310
311 struct specials {
312 char lx_chr;
313 int lx_val;
314 };
315
316 static struct specials special[] = {
317 { ';', LX_SEMI },
318 { ',', LX_COMA },
319 { '<', LX_LBRK },
320 { '>', LX_RBRK },
321 { ':', LX_COLN },
322 { '.', LX_DOT },
323 { '@', LX_AT },
324 { '(', LX_ERR },
325 { ')', LX_ERR },
326 { QUOTE, LX_ERR },
327 { '"', LX_ERR },
328 { '[', LX_ERR },
329 { ']', LX_ERR },
330 { 0, 0 }
331 };
332
333 static int glevel = 0;
334 static int ingrp = 0;
335 static int last_lex = LX_END;
336
337 static char *dp = NULL;
338 static unsigned char *cp = NULL;
339 static unsigned char *ap = NULL;
340 static char *pers = NULL;
341 static char *mbox = NULL;
342 static char *host = NULL;
343 static char *path = NULL;
344 static char *grp = NULL;
345 static char *note = NULL;
346 static char err[BUFSIZ];
347 static char adr[BUFSIZ];
348
349 static struct adrx adrxs2;
350
351
352 struct adrx *
353 getadrx (char *addrs)
354 {
355 register char *bp;
356 register struct adrx *adrxp = &adrxs2;
357
358 if (pers)
359 free (pers);
360 if (mbox)
361 free (mbox);
362 if (host)
363 free (host);
364 if (path)
365 free (path);
366 if (grp)
367 free (grp);
368 if (note)
369 free (note);
370 pers = mbox = host = path = grp = note = NULL;
371 err[0] = 0;
372
373 if (dp == NULL) {
374 dp = cp = getcpy (addrs ? addrs : "");
375 glevel = 0;
376 }
377 else
378 if (cp == NULL) {
379 free (dp);
380 dp = NULL;
381 return NULL;
382 }
383
384 switch (parse_address ()) {
385 case DONE:
386 free (dp);
387 dp = cp = NULL;
388 return NULL;
389
390 case OK:
391 switch (last_lex) {
392 case LX_COMA:
393 case LX_END:
394 break;
395
396 default: /* catch trailing comments */
397 bp = cp;
398 my_lex (adr);
399 cp = bp;
400 break;
401 }
402 break;
403
404 default:
405 break;
406 }
407
408 if (err[0])
409 for (;;) {
410 switch (last_lex) {
411 case LX_COMA:
412 case LX_END:
413 break;
414
415 default:
416 my_lex (adr);
417 continue;
418 }
419 break;
420 }
421 while (isspace (*ap))
422 ap++;
423 if (cp)
424 sprintf (adr, "%.*s", (int)(cp - ap), ap);
425 else
426 strcpy (adr, ap);
427 bp = adr + strlen (adr) - 1;
428 if (*bp == ',' || *bp == ';' || *bp == '\n')
429 *bp = 0;
430
431 adrxp->text = adr;
432 adrxp->pers = pers;
433 adrxp->mbox = mbox;
434 adrxp->host = host;
435 adrxp->path = path;
436 adrxp->grp = grp;
437 adrxp->ingrp = ingrp;
438 adrxp->note = note;
439 adrxp->err = err[0] ? err : NULL;
440
441 return adrxp;
442 }
443
444
445 static int
446 parse_address (void)
447 {
448 char buffer[BUFSIZ];
449
450 again: ;
451 ap = cp;
452 switch (my_lex (buffer)) {
453 case LX_ATOM:
454 case LX_QSTR:
455 pers = getcpy (buffer);
456 break;
457
458 case LX_SEMI:
459 if (glevel-- <= 0) {
460 strcpy (err, "extraneous semi-colon");
461 return NOTOK;
462 }
463 case LX_COMA:
464 if (note) {
465 free (note);
466 note = NULL;
467 }
468 goto again;
469
470 case LX_END:
471 return DONE;
472
473 case LX_LBRK: /* sigh (2) */
474 goto get_addr;
475
476 case LX_AT: /* sigh (3) */
477 cp = ap;
478 if (route_addr (buffer) == NOTOK)
479 return NOTOK;
480 return OK; /* why be choosy? */
481
482 default:
483 sprintf (err, "illegal address construct (%s)", buffer);
484 return NOTOK;
485 }
486
487 switch (my_lex (buffer)) {
488 case LX_ATOM:
489 case LX_QSTR:
490 pers = add (buffer, add (" ", pers));
491 more_phrase: ; /* sigh (1) */
492 if (phrase (buffer) == NOTOK)
493 return NOTOK;
494
495 switch (last_lex) {
496 case LX_LBRK:
497 get_addr: ;
498 if (route_addr (buffer) == NOTOK)
499 return NOTOK;
500 if (last_lex == LX_RBRK)
501 return OK;
502 sprintf (err, "missing right-bracket (%s)", buffer);
503 return NOTOK;
504
505 case LX_COLN:
506 get_group: ;
507 if (glevel++ > 0) {
508 sprintf (err, "nested groups not allowed (%s)", pers);
509 return NOTOK;
510 }
511 grp = add (": ", pers);
512 pers = NULL;
513 {
514 char *pp = cp;
515
516 for (;;)
517 switch (my_lex (buffer)) {
518 case LX_SEMI:
519 case LX_END: /* tsk, tsk */
520 glevel--;
521 return OK;
522
523 case LX_COMA:
524 continue;
525
526 default:
527 cp = pp;
528 return parse_address ();
529 }
530 }
531
532 case LX_DOT: /* sigh (1) */
533 pers = add (".", pers);
534 goto more_phrase;
535
536 default:
537 sprintf (err, "no mailbox in address, only a phrase (%s%s)",
538 pers, buffer);
539 return NOTOK;
540 }
541
542 case LX_LBRK:
543 goto get_addr;
544
545 case LX_COLN:
546 goto get_group;
547
548 case LX_DOT:
549 mbox = add (buffer, pers);
550 pers = NULL;
551 if (route_addr (buffer) == NOTOK)
552 return NOTOK;
553 goto check_end;
554
555 case LX_AT:
556 ingrp = glevel;
557 mbox = pers;
558 pers = NULL;
559 if (domain (buffer) == NOTOK)
560 return NOTOK;
561 check_end: ;
562 switch (last_lex) {
563 case LX_SEMI:
564 if (glevel-- <= 0) {
565 strcpy (err, "extraneous semi-colon");
566 return NOTOK;
567 }
568 case LX_COMA:
569 case LX_END:
570 return OK;
571
572 default:
573 sprintf (err, "junk after local@domain (%s)", buffer);
574 return NOTOK;
575 }
576
577 case LX_SEMI: /* no host */
578 case LX_COMA:
579 case LX_END:
580 ingrp = glevel;
581 if (last_lex == LX_SEMI && glevel-- <= 0) {
582 strcpy (err, "extraneous semi-colon");
583 return NOTOK;
584 }
585 mbox = pers;
586 pers = NULL;
587 return OK;
588
589 default:
590 sprintf (err, "missing mailbox (%s)", buffer);
591 return NOTOK;
592 }
593 }
594
595
596 static int
597 phrase (char *buffer)
598 {
599 for (;;)
600 switch (my_lex (buffer)) {
601 case LX_ATOM:
602 case LX_QSTR:
603 pers = add (buffer, add (" ", pers));
604 continue;
605
606 default:
607 return OK;
608 }
609 }
610
611
612 static int
613 route_addr (char *buffer)
614 {
615 register char *pp = cp;
616
617 if (my_lex (buffer) == LX_AT) {
618 if (route (buffer) == NOTOK)
619 return NOTOK;
620 }
621 else
622 cp = pp;
623
624 if (local_part (buffer) == NOTOK)
625 return NOTOK;
626
627 switch (last_lex) {
628 case LX_AT:
629 return domain (buffer);
630
631 case LX_SEMI: /* if in group */
632 case LX_RBRK: /* no host */
633 case LX_COMA:
634 case LX_END:
635 return OK;
636
637 default:
638 sprintf (err, "no at-sign after local-part (%s)", buffer);
639 return NOTOK;
640 }
641 }
642
643
644 static int
645 local_part (char *buffer)
646 {
647 ingrp = glevel;
648
649 for (;;) {
650 switch (my_lex (buffer)) {
651 case LX_ATOM:
652 case LX_QSTR:
653 mbox = add (buffer, mbox);
654 break;
655
656 default:
657 sprintf (err, "no mailbox in local-part (%s)", buffer);
658 return NOTOK;
659 }
660
661 switch (my_lex (buffer)) {
662 case LX_DOT:
663 mbox = add (buffer, mbox);
664 continue;
665
666 default:
667 return OK;
668 }
669 }
670 }
671
672
673 static int
674 domain (char *buffer)
675 {
676 for (;;) {
677 switch (my_lex (buffer)) {
678 case LX_ATOM:
679 case LX_DLIT:
680 host = add (buffer, host);
681 break;
682
683 default:
684 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
685 return NOTOK;
686 }
687
688 switch (my_lex (buffer)) {
689 case LX_DOT:
690 host = add (buffer, host);
691 continue;
692
693 case LX_AT: /* sigh (0) */
694 mbox = add (host, add ("%", mbox));
695 free (host);
696 host = NULL;
697 continue;
698
699 default:
700 return OK;
701 }
702 }
703 }
704
705
706 static int
707 route (char *buffer)
708 {
709 path = getcpy ("@");
710
711 for (;;) {
712 switch (my_lex (buffer)) {
713 case LX_ATOM:
714 case LX_DLIT:
715 path = add (buffer, path);
716 break;
717
718 default:
719 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
720 return NOTOK;
721 }
722 switch (my_lex (buffer)) {
723 case LX_COMA:
724 path = add (buffer, path);
725 for (;;) {
726 switch (my_lex (buffer)) {
727 case LX_COMA:
728 continue;
729
730 case LX_AT:
731 path = add (buffer, path);
732 break;
733
734 default:
735 sprintf (err, "no at-sign found for next domain in route (%s)",
736 buffer);
737 }
738 break;
739 }
740 continue;
741
742 case LX_AT: /* XXX */
743 case LX_DOT:
744 path = add (buffer, path);
745 continue;
746
747 case LX_COLN:
748 path = add (buffer, path);
749 return OK;
750
751 default:
752 sprintf (err, "no colon found to terminate route (%s)", buffer);
753 return NOTOK;
754 }
755 }
756 }
757
758
759 static int
760 my_lex (char *buffer)
761 {
762 /* buffer should be at least BUFSIZ bytes long */
763 int i, gotat = 0;
764 register unsigned char c;
765 register char *bp;
766
767 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
768 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
769
770 bp = buffer;
771 *bp = 0;
772 if (!cp)
773 return (last_lex = LX_END);
774
775 gotat = isat (cp);
776 c = *cp++;
777 while (isspace (c))
778 c = *cp++;
779 if (c == 0) {
780 cp = NULL;
781 return (last_lex = LX_END);
782 }
783
784 if (c == '(') {
785 ADDCHR(c);
786 for (i = 0;;)
787 switch (c = *cp++) {
788 case 0:
789 cp = NULL;
790 return (last_lex = LX_ERR);
791 case QUOTE:
792 ADDCHR(c);
793 if ((c = *cp++) == 0) {
794 cp = NULL;
795 return (last_lex = LX_ERR);
796 }
797 ADDCHR(c);
798 continue;
799 case '(':
800 i++;
801 default:
802 ADDCHR(c);
803 continue;
804 case ')':
805 ADDCHR(c);
806 if (--i < 0) {
807 *bp = 0;
808 note = note ? add (buffer, add (" ", note))
809 : getcpy (buffer);
810 return my_lex (buffer);
811 }
812 }
813 }
814
815 if (c == '"') {
816 ADDCHR(c);
817 for (;;)
818 switch (c = *cp++) {
819 case 0:
820 cp = NULL;
821 return (last_lex = LX_ERR);
822 case QUOTE:
823 ADDCHR(c);
824 if ((c = *cp++) == 0) {
825 cp = NULL;
826 return (last_lex = LX_ERR);
827 }
828 default:
829 ADDCHR(c);
830 continue;
831 case '"':
832 ADDCHR(c);
833 *bp = 0;
834 return (last_lex = LX_QSTR);
835 }
836 }
837
838 if (c == '[') {
839 ADDCHR(c);
840 for (;;)
841 switch (c = *cp++) {
842 case 0:
843 cp = NULL;
844 return (last_lex = LX_ERR);
845 case QUOTE:
846 ADDCHR(c);
847 if ((c = *cp++) == 0) {
848 cp = NULL;
849 return (last_lex = LX_ERR);
850 }
851 default:
852 ADDCHR(c);
853 continue;
854 case ']':
855 ADDCHR(c);
856 *bp = 0;
857 return (last_lex = LX_DLIT);
858 }
859 }
860
861 ADDCHR(c);
862 *bp = 0;
863 for (i = 0; special[i].lx_chr != 0; i++)
864 if (c == special[i].lx_chr)
865 return (last_lex = special[i].lx_val);
866
867 if (iscntrl (c))
868 return (last_lex = LX_ERR);
869
870 for (;;) {
871 if ((c = *cp++) == 0)
872 break;
873 for (i = 0; special[i].lx_chr != 0; i++)
874 if (c == special[i].lx_chr)
875 goto got_atom;
876 if (iscntrl (c) || isspace (c))
877 break;
878 ADDCHR(c);
879 }
880 got_atom: ;
881 if (c == 0)
882 cp = NULL;
883 else
884 cp--;
885 *bp = 0;
886 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
887 ? LX_ATOM : LX_AT;
888 return last_lex;
889
890 my_lex_buffull:
891 /* Out of buffer space. *bp is the last byte in the buffer */
892 *bp = 0;
893 return (last_lex = LX_ERR);
894 }
895
896
897 char *
898 legal_person (char *p)
899 {
900 int i;
901 register char *cp;
902 static char buffer[BUFSIZ];
903
904 if (*p == '"')
905 return p;
906 for (cp = p; *cp; cp++)
907 for (i = 0; special[i].lx_chr; i++)
908 if (*cp == special[i].lx_chr) {
909 sprintf (buffer, "\"%s\"", p);
910 return buffer;
911 }
912
913 return p;
914 }
915
916
917 int
918 mfgets (FILE *in, char **bp)
919 {
920 int i;
921 register char *cp, *dp, *ep;
922 static int len = 0;
923 static char *pp = NULL;
924
925 if (pp == NULL)
926 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
927
928 for (ep = (cp = pp) + len - 2;;) {
929 switch (i = getc (in)) {
930 case EOF:
931 eol: ;
932 if (cp != pp) {
933 *cp = 0;
934 *bp = pp;
935 return OK;
936 }
937 eoh: ;
938 *bp = NULL;
939 free (pp);
940 pp = NULL;
941 return DONE;
942
943 case 0:
944 continue;
945
946 case '\n':
947 if (cp == pp) /* end of headers, gobble it */
948 goto eoh;
949 switch (i = getc (in)) {
950 default: /* end of line */
951 case '\n': /* end of headers, save for next call */
952 ungetc (i, in);
953 goto eol;
954
955 case ' ': /* continue headers */
956 case '\t':
957 *cp++ = '\n';
958 break;
959 } /* fall into default case */
960
961 default:
962 *cp++ = i;
963 break;
964 }
965 if (cp >= ep) {
966 dp = mh_xrealloc (pp, (size_t) (len += BUFSIZ));
967 cp += dp - pp, ep = (pp = cp) + len - 2;
968 }
969 }
970 }