]> diplodocus.org Git - nmh/blob - sbr/mf.c
Bring these changes over from the branch.
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * $Id$
6 *
7 * This code is Copyright (c) 2002, by the authors of nmh. See the
8 * COPYRIGHT file in the root directory of the nmh distribution for
9 * complete copyright information.
10 */
11
12 #include <h/mf.h>
13 #include <ctype.h>
14 #include <stdio.h>
15
16 /*
17 * static prototypes
18 */
19 static char *getcpy (char *);
20 static char *add (char *, char *);
21 static void compress (char *, char *);
22 static int isat (char *);
23 static int parse_address (void);
24 static int phrase (char *);
25 static int route_addr (char *);
26 static int local_part (char *);
27 static int domain (char *);
28 static int route (char *);
29 static int my_lex (char *);
30
31
32 static char *
33 getcpy (char *s)
34 {
35 register char *p;
36
37 if (!s) {
38 /* causes compiles to blow up because the symbol _cleanup is undefined
39 where did this ever come from? */
40 /* _cleanup(); */
41 abort();
42 for(;;)
43 pause();
44 }
45 if ((p = malloc ((size_t) (strlen (s) + 2))))
46 strcpy (p, s);
47 return p;
48 }
49
50
51 static char *
52 add (char *s1, char *s2)
53 {
54 register char *p;
55
56 if (!s2)
57 return getcpy (s1);
58
59 if ((p = malloc ((size_t) (strlen (s1) + strlen (s2) + 2))))
60 sprintf (p, "%s%s", s2, s1);
61 free (s2);
62 return p;
63 }
64
65 int
66 isfrom(char *string)
67 {
68 return (strncmp (string, "From ", 5) == 0
69 || strncmp (string, ">From ", 6) == 0);
70 }
71
72
73 int
74 lequal (char *a, char *b)
75 {
76 for (; *a; a++, b++)
77 if (*b == 0)
78 return FALSE;
79 else {
80 char c1 = islower (*a) ? toupper (*a) : *a;
81 char c2 = islower (*b) ? toupper (*b) : *b;
82 if (c1 != c2)
83 return FALSE;
84 }
85
86 return (*b == 0);
87 }
88
89
90 /*
91 * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
92 * addresses, so for each list of addresses we see if we can find some
93 * character to give us a hint.
94 */
95
96
97 #define CHKADR 0 /* undertermined address style */
98 #define UNIXDR 1 /* UNIX-style address */
99 #define ARPADR 2 /* ARPAnet-style address */
100
101
102 static char *punctuators = ";<>.()[]";
103 static char *vp = NULL;
104 static char *tp = NULL;
105
106 static struct adrx adrxs1;
107
108
109 struct adrx *
110 seekadrx (char *addrs)
111 {
112 static int state = CHKADR;
113 register char *cp;
114 register struct adrx *adrxp;
115
116 if (state == CHKADR)
117 for (state = UNIXDR, cp = addrs; *cp; cp++)
118 if (strchr(punctuators, *cp)) {
119 state = ARPADR;
120 break;
121 }
122
123 switch (state) {
124 case UNIXDR:
125 adrxp = uucpadrx (addrs);
126 break;
127
128 case ARPADR:
129 default:
130 adrxp = getadrx (addrs);
131 break;
132 }
133
134 if (adrxp == NULL)
135 state = CHKADR;
136
137 return adrxp;
138 }
139
140
141 /*
142 * uucpadrx() implements a partial UUCP-style address parser. It's based
143 * on the UUCP notion that addresses are separated by spaces or commas.
144 */
145
146
147 struct adrx *
148 uucpadrx (char *addrs)
149 {
150 register char *cp, *wp, *xp, *yp, *zp;
151 register struct adrx *adrxp = &adrxs1;
152
153 if (vp == NULL) {
154 vp = tp = getcpy (addrs);
155 compress (addrs, vp);
156 }
157 else
158 if (tp == NULL) {
159 free (vp);
160 vp = NULL;
161 return NULL;
162 }
163
164 for (cp = tp; isspace (*cp); cp++)
165 continue;
166 if (*cp == 0) {
167 free (vp);
168 vp = tp = NULL;
169 return NULL;
170 }
171
172 if ((wp = strchr(cp, ',')) == NULL) {
173 if ((wp = strchr(cp, ' ')) != NULL) {
174 xp = wp;
175 while (isspace (*xp))
176 xp++;
177 if (*xp != 0 && isat (--xp)) {
178 yp = xp + 4;
179 while (isspace (*yp))
180 yp++;
181 if (*yp != 0) {
182 if ((zp = strchr(yp, ' ')) != NULL)
183 *zp = 0, tp = ++zp;
184 else
185 tp = NULL;
186 }
187 else
188 *wp = 0, tp = ++wp;
189 }
190 else
191 *wp = 0, tp = ++wp;
192 }
193 else
194 tp = NULL;
195 }
196 else
197 *wp = 0, tp = ++wp;
198
199 if (adrxp->text)
200 free (adrxp->text);
201 adrxp->text = getcpy (cp);
202 adrxp->mbox = cp;
203 adrxp->host = adrxp->path = NULL;
204 if ((wp = strrchr(cp, '@')) != NULL) {
205 *wp++ = 0;
206 adrxp->host = *wp ? wp : NULL;
207 }
208 else
209 for (wp = cp + strlen (cp) - 4; wp >= cp; wp--)
210 if (isat (wp)) {
211 *wp++ = 0;
212 adrxp->host = wp + 3;
213 }
214
215 adrxp->pers = adrxp->grp = adrxp->note = adrxp->err = NULL;
216 adrxp->ingrp = 0;
217
218 return adrxp;
219 }
220
221
222 static void
223 compress (char *fp, char *tp)
224 {
225 register char c, *cp;
226
227 for (c = ' ', cp = tp; (*tp = *fp++) != 0;)
228 if (isspace (*tp)) {
229 if (c != ' ')
230 *tp++ = c = ' ';
231 }
232 else
233 c = *tp++;
234
235 if (c == ' ' && cp < tp)
236 *--tp = 0;
237 }
238
239
240 static int
241 isat (char *p)
242 {
243 return (strncmp (p, " AT ", 4)
244 && strncmp (p, " At ", 4)
245 && strncmp (p, " aT ", 4)
246 && strncmp (p, " at ", 4) ? FALSE : TRUE);
247 }
248
249
250 /*
251 *
252 * getadrx() implements a partial 822-style address parser. The parser
253 * is neither complete nor correct. It does however recognize nearly all
254 * of the 822 address syntax. In addition it handles the majority of the
255 * 733 syntax as well. Most problems arise from trying to accomodate both.
256 *
257 * In terms of 822, the route-specification in
258 *
259 * "<" [route] local-part "@" domain ">"
260 *
261 * is parsed and returned unchanged. Multiple at-signs are compressed
262 * via source-routing. Recursive groups are not allowed as per the
263 * standard.
264 *
265 * In terms of 733, " at " is recognized as equivalent to "@".
266 *
267 * In terms of both the parser will not complain about missing hosts.
268 *
269 * -----
270 *
271 * We should not allow addresses like
272 *
273 * Marshall T. Rose <MRose@UCI>
274 *
275 * but should insist on
276 *
277 * "Marshall T. Rose" <MRose@UCI>
278 *
279 * Unfortunately, a lot of mailers stupidly let people get away with this.
280 *
281 * -----
282 *
283 * We should not allow addresses like
284 *
285 * <MRose@UCI>
286 *
287 * but should insist on
288 *
289 * MRose@UCI
290 *
291 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
292 * this.
293 *
294 * -----
295 *
296 * We should not allow addresses like
297 *
298 * @UCI:MRose@UCI-750a
299 *
300 * but should insist on
301 *
302 * Marshall Rose <@UCI:MRose@UCI-750a>
303 *
304 * Unfortunately, a lot of mailers stupidly do this.
305 *
306 */
307
308 #define QUOTE '\\'
309
310 #define LX_END 0
311 #define LX_ERR 1
312 #define LX_ATOM 2
313 #define LX_QSTR 3
314 #define LX_DLIT 4
315 #define LX_SEMI 5
316 #define LX_COMA 6
317 #define LX_LBRK 7
318 #define LX_RBRK 8
319 #define LX_COLN 9
320 #define LX_DOT 10
321 #define LX_AT 11
322
323 struct specials {
324 char lx_chr;
325 int lx_val;
326 };
327
328 static struct specials special[] = {
329 { ';', LX_SEMI },
330 { ',', LX_COMA },
331 { '<', LX_LBRK },
332 { '>', LX_RBRK },
333 { ':', LX_COLN },
334 { '.', LX_DOT },
335 { '@', LX_AT },
336 { '(', LX_ERR },
337 { ')', LX_ERR },
338 { QUOTE, LX_ERR },
339 { '"', LX_ERR },
340 { '[', LX_ERR },
341 { ']', LX_ERR },
342 { 0, 0 }
343 };
344
345 static int glevel = 0;
346 static int ingrp = 0;
347 static int last_lex = LX_END;
348
349 static char *dp = NULL;
350 static char *cp = NULL;
351 static char *ap = NULL;
352 static char *pers = NULL;
353 static char *mbox = NULL;
354 static char *host = NULL;
355 static char *path = NULL;
356 static char *grp = NULL;
357 static char *note = NULL;
358 static char err[BUFSIZ];
359 static char adr[BUFSIZ];
360
361 static struct adrx adrxs2;
362
363
364 struct adrx *
365 getadrx (char *addrs)
366 {
367 register char *bp;
368 register struct adrx *adrxp = &adrxs2;
369
370 if (pers)
371 free (pers);
372 if (mbox)
373 free (mbox);
374 if (host)
375 free (host);
376 if (path)
377 free (path);
378 if (grp)
379 free (grp);
380 if (note)
381 free (note);
382 pers = mbox = host = path = grp = note = NULL;
383 err[0] = 0;
384
385 if (dp == NULL) {
386 dp = cp = getcpy (addrs ? addrs : "");
387 glevel = 0;
388 }
389 else
390 if (cp == NULL) {
391 free (dp);
392 dp = NULL;
393 return NULL;
394 }
395
396 switch (parse_address ()) {
397 case DONE:
398 free (dp);
399 dp = cp = NULL;
400 return NULL;
401
402 case OK:
403 switch (last_lex) {
404 case LX_COMA:
405 case LX_END:
406 break;
407
408 default: /* catch trailing comments */
409 bp = cp;
410 my_lex (adr);
411 cp = bp;
412 break;
413 }
414 break;
415
416 default:
417 break;
418 }
419
420 if (err[0])
421 for (;;) {
422 switch (last_lex) {
423 case LX_COMA:
424 case LX_END:
425 break;
426
427 default:
428 my_lex (adr);
429 continue;
430 }
431 break;
432 }
433 while (isspace (*ap))
434 ap++;
435 if (cp)
436 sprintf (adr, "%.*s", cp - ap, ap);
437 else
438 strcpy (adr, ap);
439 bp = adr + strlen (adr) - 1;
440 if (*bp == ',' || *bp == ';' || *bp == '\n')
441 *bp = 0;
442
443 adrxp->text = adr;
444 adrxp->pers = pers;
445 adrxp->mbox = mbox;
446 adrxp->host = host;
447 adrxp->path = path;
448 adrxp->grp = grp;
449 adrxp->ingrp = ingrp;
450 adrxp->note = note;
451 adrxp->err = err[0] ? err : NULL;
452
453 return adrxp;
454 }
455
456
457 static int
458 parse_address (void)
459 {
460 char buffer[BUFSIZ];
461
462 again: ;
463 ap = cp;
464 switch (my_lex (buffer)) {
465 case LX_ATOM:
466 case LX_QSTR:
467 pers = getcpy (buffer);
468 break;
469
470 case LX_SEMI:
471 if (glevel-- <= 0) {
472 strcpy (err, "extraneous semi-colon");
473 return NOTOK;
474 }
475 case LX_COMA:
476 if (note) {
477 free (note);
478 note = NULL;
479 }
480 goto again;
481
482 case LX_END:
483 return DONE;
484
485 case LX_LBRK: /* sigh (2) */
486 goto get_addr;
487
488 case LX_AT: /* sigh (3) */
489 cp = ap;
490 if (route_addr (buffer) == NOTOK)
491 return NOTOK;
492 return OK; /* why be choosy? */
493
494 default:
495 sprintf (err, "illegal address construct (%s)", buffer);
496 return NOTOK;
497 }
498
499 switch (my_lex (buffer)) {
500 case LX_ATOM:
501 case LX_QSTR:
502 pers = add (buffer, add (" ", pers));
503 more_phrase: ; /* sigh (1) */
504 if (phrase (buffer) == NOTOK)
505 return NOTOK;
506
507 switch (last_lex) {
508 case LX_LBRK:
509 get_addr: ;
510 if (route_addr (buffer) == NOTOK)
511 return NOTOK;
512 if (last_lex == LX_RBRK)
513 return OK;
514 sprintf (err, "missing right-bracket (%s)", buffer);
515 return NOTOK;
516
517 case LX_COLN:
518 get_group: ;
519 if (glevel++ > 0) {
520 sprintf (err, "nested groups not allowed (%s)", pers);
521 return NOTOK;
522 }
523 grp = add (": ", pers);
524 pers = NULL;
525 {
526 char *pp = cp;
527
528 for (;;)
529 switch (my_lex (buffer)) {
530 case LX_SEMI:
531 case LX_END: /* tsk, tsk */
532 glevel--;
533 return OK;
534
535 case LX_COMA:
536 continue;
537
538 default:
539 cp = pp;
540 return parse_address ();
541 }
542 }
543
544 case LX_DOT: /* sigh (1) */
545 pers = add (".", pers);
546 goto more_phrase;
547
548 default:
549 sprintf (err, "no mailbox in address, only a phrase (%s%s)",
550 pers, buffer);
551 return NOTOK;
552 }
553
554 case LX_LBRK:
555 goto get_addr;
556
557 case LX_COLN:
558 goto get_group;
559
560 case LX_DOT:
561 mbox = add (buffer, pers);
562 pers = NULL;
563 if (route_addr (buffer) == NOTOK)
564 return NOTOK;
565 goto check_end;
566
567 case LX_AT:
568 ingrp = glevel;
569 mbox = pers;
570 pers = NULL;
571 if (domain (buffer) == NOTOK)
572 return NOTOK;
573 check_end: ;
574 switch (last_lex) {
575 case LX_SEMI:
576 if (glevel-- <= 0) {
577 strcpy (err, "extraneous semi-colon");
578 return NOTOK;
579 }
580 case LX_COMA:
581 case LX_END:
582 return OK;
583
584 default:
585 sprintf (err, "junk after local@domain (%s)", buffer);
586 return NOTOK;
587 }
588
589 case LX_SEMI: /* no host */
590 case LX_COMA:
591 case LX_END:
592 ingrp = glevel;
593 if (last_lex == LX_SEMI && glevel-- <= 0) {
594 strcpy (err, "extraneous semi-colon");
595 return NOTOK;
596 }
597 mbox = pers;
598 pers = NULL;
599 return OK;
600
601 default:
602 sprintf (err, "missing mailbox (%s)", buffer);
603 return NOTOK;
604 }
605 }
606
607
608 static int
609 phrase (char *buffer)
610 {
611 for (;;)
612 switch (my_lex (buffer)) {
613 case LX_ATOM:
614 case LX_QSTR:
615 pers = add (buffer, add (" ", pers));
616 continue;
617
618 default:
619 return OK;
620 }
621 }
622
623
624 static int
625 route_addr (char *buffer)
626 {
627 register char *pp = cp;
628
629 if (my_lex (buffer) == LX_AT) {
630 if (route (buffer) == NOTOK)
631 return NOTOK;
632 }
633 else
634 cp = pp;
635
636 if (local_part (buffer) == NOTOK)
637 return NOTOK;
638
639 switch (last_lex) {
640 case LX_AT:
641 return domain (buffer);
642
643 case LX_SEMI: /* if in group */
644 case LX_RBRK: /* no host */
645 case LX_COMA:
646 case LX_END:
647 return OK;
648
649 default:
650 sprintf (err, "no at-sign after local-part (%s)", buffer);
651 return NOTOK;
652 }
653 }
654
655
656 static int
657 local_part (char *buffer)
658 {
659 ingrp = glevel;
660
661 for (;;) {
662 switch (my_lex (buffer)) {
663 case LX_ATOM:
664 case LX_QSTR:
665 mbox = add (buffer, mbox);
666 break;
667
668 default:
669 sprintf (err, "no mailbox in local-part (%s)", buffer);
670 return NOTOK;
671 }
672
673 switch (my_lex (buffer)) {
674 case LX_DOT:
675 mbox = add (buffer, mbox);
676 continue;
677
678 default:
679 return OK;
680 }
681 }
682 }
683
684
685 static int
686 domain (char *buffer)
687 {
688 for (;;) {
689 switch (my_lex (buffer)) {
690 case LX_ATOM:
691 case LX_DLIT:
692 host = add (buffer, host);
693 break;
694
695 default:
696 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
697 return NOTOK;
698 }
699
700 switch (my_lex (buffer)) {
701 case LX_DOT:
702 host = add (buffer, host);
703 continue;
704
705 case LX_AT: /* sigh (0) */
706 mbox = add (host, add ("%", mbox));
707 free (host);
708 host = NULL;
709 continue;
710
711 default:
712 return OK;
713 }
714 }
715 }
716
717
718 static int
719 route (char *buffer)
720 {
721 path = getcpy ("@");
722
723 for (;;) {
724 switch (my_lex (buffer)) {
725 case LX_ATOM:
726 case LX_DLIT:
727 path = add (buffer, path);
728 break;
729
730 default:
731 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
732 return NOTOK;
733 }
734 switch (my_lex (buffer)) {
735 case LX_COMA:
736 path = add (buffer, path);
737 for (;;) {
738 switch (my_lex (buffer)) {
739 case LX_COMA:
740 continue;
741
742 case LX_AT:
743 path = add (buffer, path);
744 break;
745
746 default:
747 sprintf (err, "no at-sign found for next domain in route (%s)",
748 buffer);
749 }
750 break;
751 }
752 continue;
753
754 case LX_AT: /* XXX */
755 case LX_DOT:
756 path = add (buffer, path);
757 continue;
758
759 case LX_COLN:
760 path = add (buffer, path);
761 return OK;
762
763 default:
764 sprintf (err, "no colon found to terminate route (%s)", buffer);
765 return NOTOK;
766 }
767 }
768 }
769
770
771 static int
772 my_lex (char *buffer)
773 {
774 int i, gotat = 0;
775 register char c, *bp;
776
777 bp = buffer;
778 *bp = 0;
779 if (!cp)
780 return (last_lex = LX_END);
781
782 gotat = isat (cp);
783 c = *cp++;
784 while (isspace (c))
785 c = *cp++;
786 if (c == 0) {
787 cp = NULL;
788 return (last_lex = LX_END);
789 }
790
791 if (c == '(')
792 for (*bp++ = c, i = 0;;)
793 switch (c = *cp++) {
794 case 0:
795 cp = NULL;
796 return (last_lex = LX_ERR);
797 case QUOTE:
798 *bp++ = c;
799 if ((c = *cp++) == 0) {
800 cp = NULL;
801 return (last_lex = LX_ERR);
802 }
803 *bp++ = c;
804 continue;
805 case '(':
806 i++;
807 default:
808 *bp++ = c;
809 continue;
810 case ')':
811 *bp++ = c;
812 if (--i < 0) {
813 *bp = 0;
814 note = note ? add (buffer, add (" ", note))
815 : getcpy (buffer);
816 return my_lex (buffer);
817 }
818 }
819
820 if (c == '"')
821 for (*bp++ = c;;)
822 switch (c = *cp++) {
823 case 0:
824 cp = NULL;
825 return (last_lex = LX_ERR);
826 case QUOTE:
827 *bp++ = c;
828 if ((c = *cp++) == 0) {
829 cp = NULL;
830 return (last_lex = LX_ERR);
831 }
832 default:
833 *bp++ = c;
834 continue;
835 case '"':
836 *bp++ = c;
837 *bp = 0;
838 return (last_lex = LX_QSTR);
839 }
840
841 if (c == '[')
842 for (*bp++ = c;;)
843 switch (c = *cp++) {
844 case 0:
845 cp = NULL;
846 return (last_lex = LX_ERR);
847 case QUOTE:
848 *bp++ = c;
849 if ((c = *cp++) == 0) {
850 cp = NULL;
851 return (last_lex = LX_ERR);
852 }
853 default:
854 *bp++ = c;
855 continue;
856 case ']':
857 *bp++ = c;
858 *bp = 0;
859 return (last_lex = LX_DLIT);
860 }
861
862 *bp++ = c;
863 *bp = 0;
864 for (i = 0; special[i].lx_chr != 0; i++)
865 if (c == special[i].lx_chr)
866 return (last_lex = special[i].lx_val);
867
868 if (iscntrl (c))
869 return (last_lex = LX_ERR);
870
871 for (;;) {
872 if ((c = *cp++) == 0)
873 break;
874 for (i = 0; special[i].lx_chr != 0; i++)
875 if (c == special[i].lx_chr)
876 goto got_atom;
877 if (iscntrl (c) || isspace (c))
878 break;
879 *bp++ = c;
880 }
881 got_atom: ;
882 if (c == 0)
883 cp = NULL;
884 else
885 cp--;
886 *bp = 0;
887 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
888 ? LX_ATOM : LX_AT;
889 return last_lex;
890 }
891
892
893 char *
894 legal_person (char *p)
895 {
896 int i;
897 register char *cp;
898 static char buffer[BUFSIZ];
899
900 if (*p == '"')
901 return p;
902 for (cp = p; *cp; cp++)
903 for (i = 0; special[i].lx_chr; i++)
904 if (*cp == special[i].lx_chr) {
905 sprintf (buffer, "\"%s\"", p);
906 return buffer;
907 }
908
909 return p;
910 }
911
912
913 int
914 mfgets (FILE *in, char **bp)
915 {
916 int i;
917 register char *cp, *dp, *ep;
918 static int len = 0;
919 static char *pp = NULL;
920
921 if (pp == NULL)
922 if (!(pp = malloc ((size_t) (len = BUFSIZ))))
923 return NOTOK;
924
925 for (ep = (cp = pp) + len - 2;;) {
926 switch (i = getc (in)) {
927 case EOF:
928 eol: ;
929 if (cp != pp) {
930 *cp = 0;
931 *bp = pp;
932 return OK;
933 }
934 eoh: ;
935 *bp = NULL;
936 free (pp);
937 pp = NULL;
938 return DONE;
939
940 case 0:
941 continue;
942
943 case '\n':
944 if (cp == pp) /* end of headers, gobble it */
945 goto eoh;
946 switch (i = getc (in)) {
947 default: /* end of line */
948 case '\n': /* end of headers, save for next call */
949 ungetc (i, in);
950 goto eol;
951
952 case ' ': /* continue headers */
953 case '\t':
954 *cp++ = '\n';
955 break;
956 } /* fall into default case */
957
958 default:
959 *cp++ = i;
960 break;
961 }
962 if (cp >= ep) {
963 if (!(dp = realloc (pp, (size_t) (len += BUFSIZ)))) {
964 free (pp);
965 pp = NULL;
966 return NOTOK;
967 }
968 else
969 cp += dp - pp, ep = (pp = cp) + len - 2;
970 }
971 }
972 }