]> diplodocus.org Git - nmh/blob - zotnet/mf/mf.c
Added notes about the configure change.
[nmh] / zotnet / mf / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * $Id$
6 */
7
8 #include <mf.h>
9 #include <ctype.h>
10 #include <stdio.h>
11
12 /*
13 * static prototypes
14 */
15 static char *getcpy (char *);
16 static char *add (char *, char *);
17 static void compress (char *, char *);
18 static int isat (char *);
19 static int parse_address (void);
20 static int phrase (char *);
21 static int route_addr (char *);
22 static int local_part (char *);
23 static int domain (char *);
24 static int route (char *);
25 static int my_lex (char *);
26
27
28 static char *
29 getcpy (char *s)
30 {
31 register char *p;
32
33 if (!s) {
34 _cleanup();
35 abort();
36 for(;;)
37 pause();
38 }
39 if ((p = malloc ((size_t) (strlen (s) + 2))))
40 strcpy (p, s);
41 return p;
42 }
43
44
45 static char *
46 add (char *s1, char *s2)
47 {
48 register char *p;
49
50 if (!s2)
51 return getcpy (s1);
52
53 if ((p = malloc ((size_t) (strlen (s1) + strlen (s2) + 2))))
54 sprintf (p, "%s%s", s2, s1);
55 free (s2);
56 return p;
57 }
58
59 int
60 isfrom(char *string)
61 {
62 return (strncmp (string, "From ", 5) == 0
63 || strncmp (string, ">From ", 6) == 0);
64 }
65
66
67 int
68 lequal (char *a, char *b)
69 {
70 for (; *a; a++, b++)
71 if (*b == 0)
72 return FALSE;
73 else {
74 char c1 = islower (*a) ? toupper (*a) : *a;
75 char c2 = islower (*b) ? toupper (*b) : *b;
76 if (c1 != c2)
77 return FALSE;
78 }
79
80 return (*b == 0);
81 }
82
83
84 /*
85 * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
86 * addresses, so for each list of addresses we see if we can find some
87 * character to give us a hint.
88 */
89
90
91 #define CHKADR 0 /* undertermined address style */
92 #define UNIXDR 1 /* UNIX-style address */
93 #define ARPADR 2 /* ARPAnet-style address */
94
95
96 static char *punctuators = ";<>.()[]";
97 static char *vp = NULL;
98 static char *tp = NULL;
99
100 static struct adrx adrxs1;
101
102
103 struct adrx *
104 seekadrx (char *addrs)
105 {
106 static int state = CHKADR;
107 register char *cp;
108 register struct adrx *adrxp;
109
110 if (state == CHKADR)
111 for (state = UNIXDR, cp = addrs; *cp; cp++)
112 if (strchr(punctuators, *cp)) {
113 state = ARPADR;
114 break;
115 }
116
117 switch (state) {
118 case UNIXDR:
119 adrxp = uucpadrx (addrs);
120 break;
121
122 case ARPADR:
123 default:
124 adrxp = getadrx (addrs);
125 break;
126 }
127
128 if (adrxp == NULL)
129 state = CHKADR;
130
131 return adrxp;
132 }
133
134
135 /*
136 * uucpadrx() implements a partial UUCP-style address parser. It's based
137 * on the UUCP notion that addresses are separated by spaces or commas.
138 */
139
140
141 struct adrx *
142 uucpadrx (char *addrs)
143 {
144 register char *cp, *wp, *xp, *yp, *zp;
145 register struct adrx *adrxp = &adrxs1;
146
147 if (vp == NULL) {
148 vp = tp = getcpy (addrs);
149 compress (addrs, vp);
150 }
151 else
152 if (tp == NULL) {
153 free (vp);
154 vp = NULL;
155 return NULL;
156 }
157
158 for (cp = tp; isspace (*cp); cp++)
159 continue;
160 if (*cp == 0) {
161 free (vp);
162 vp = tp = NULL;
163 return NULL;
164 }
165
166 if ((wp = strchr(cp, ',')) == NULL)
167 if ((wp = strchr(cp, ' ')) != NULL) {
168 xp = wp;
169 while (isspace (*xp))
170 xp++;
171 if (*xp != 0 && isat (--xp)) {
172 yp = xp + 4;
173 while (isspace (*yp))
174 yp++;
175 if (*yp != 0)
176 if ((zp = strchr(yp, ' ')) != NULL)
177 *zp = 0, tp = ++zp;
178 else
179 tp = NULL;
180 else
181 *wp = 0, tp = ++wp;
182 }
183 else
184 *wp = 0, tp = ++wp;
185 }
186 else
187 tp = NULL;
188 else
189 *wp = 0, tp = ++wp;
190
191 if (adrxp->text)
192 free (adrxp->text);
193 adrxp->text = getcpy (cp);
194 adrxp->mbox = cp;
195 adrxp->host = adrxp->path = NULL;
196 if ((wp = strrchr(cp, '@')) != NULL) {
197 *wp++ = 0;
198 adrxp->host = *wp ? wp : NULL;
199 }
200 else
201 for (wp = cp + strlen (cp) - 4; wp >= cp; wp--)
202 if (isat (wp)) {
203 *wp++ = 0;
204 adrxp->host = wp + 3;
205 }
206
207 adrxp->pers = adrxp->grp = adrxp->note = adrxp->err = NULL;
208 adrxp->ingrp = 0;
209
210 return adrxp;
211 }
212
213
214 static void
215 compress (char *fp, char *tp)
216 {
217 register char c, *cp;
218
219 for (c = ' ', cp = tp; (*tp = *fp++) != 0;)
220 if (isspace (*tp)) {
221 if (c != ' ')
222 *tp++ = c = ' ';
223 }
224 else
225 c = *tp++;
226
227 if (c == ' ' && cp < tp)
228 *--tp = 0;
229 }
230
231
232 static int
233 isat (char *p)
234 {
235 return (strncmp (p, " AT ", 4)
236 && strncmp (p, " At ", 4)
237 && strncmp (p, " aT ", 4)
238 && strncmp (p, " at ", 4) ? FALSE : TRUE);
239 }
240
241
242 /*
243 *
244 * getadrx() implements a partial 822-style address parser. The parser
245 * is neither complete nor correct. It does however recognize nearly all
246 * of the 822 address syntax. In addition it handles the majority of the
247 * 733 syntax as well. Most problems arise from trying to accomodate both.
248 *
249 * In terms of 822, the route-specification in
250 *
251 * "<" [route] local-part "@" domain ">"
252 *
253 * is parsed and returned unchanged. Multiple at-signs are compressed
254 * via source-routing. Recursive groups are not allowed as per the
255 * standard.
256 *
257 * In terms of 733, " at " is recognized as equivalent to "@".
258 *
259 * In terms of both the parser will not complain about missing hosts.
260 *
261 * -----
262 *
263 * We should not allow addresses like
264 *
265 * Marshall T. Rose <MRose@UCI>
266 *
267 * but should insist on
268 *
269 * "Marshall T. Rose" <MRose@UCI>
270 *
271 * Unfortunately, a lot of mailers stupidly let people get away with this.
272 *
273 * -----
274 *
275 * We should not allow addresses like
276 *
277 * <MRose@UCI>
278 *
279 * but should insist on
280 *
281 * MRose@UCI
282 *
283 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
284 * this.
285 *
286 * -----
287 *
288 * We should not allow addresses like
289 *
290 * @UCI:MRose@UCI-750a
291 *
292 * but should insist on
293 *
294 * Marshall Rose <@UCI:MRose@UCI-750a>
295 *
296 * Unfortunately, a lot of mailers stupidly do this.
297 *
298 */
299
300 #define QUOTE '\\'
301
302 #define LX_END 0
303 #define LX_ERR 1
304 #define LX_ATOM 2
305 #define LX_QSTR 3
306 #define LX_DLIT 4
307 #define LX_SEMI 5
308 #define LX_COMA 6
309 #define LX_LBRK 7
310 #define LX_RBRK 8
311 #define LX_COLN 9
312 #define LX_DOT 10
313 #define LX_AT 11
314
315 struct specials {
316 char lx_chr;
317 int lx_val;
318 };
319
320 static struct specials special[] = {
321 { ';', LX_SEMI },
322 { ',', LX_COMA },
323 { '<', LX_LBRK },
324 { '>', LX_RBRK },
325 { ':', LX_COLN },
326 { '.', LX_DOT },
327 { '@', LX_AT },
328 { '(', LX_ERR },
329 { ')', LX_ERR },
330 { QUOTE, LX_ERR },
331 { '"', LX_ERR },
332 { '[', LX_ERR },
333 { ']', LX_ERR },
334 { 0, 0 }
335 };
336
337 static int glevel = 0;
338 static int ingrp = 0;
339 static int last_lex = LX_END;
340
341 static char *dp = NULL;
342 static char *cp = NULL;
343 static char *ap = NULL;
344 static char *pers = NULL;
345 static char *mbox = NULL;
346 static char *host = NULL;
347 static char *path = NULL;
348 static char *grp = NULL;
349 static char *note = NULL;
350 static char err[BUFSIZ];
351 static char adr[BUFSIZ];
352
353 static struct adrx adrxs2;
354
355
356 struct adrx *
357 getadrx (char *addrs)
358 {
359 register char *bp;
360 register struct adrx *adrxp = &adrxs2;
361
362 if (pers)
363 free (pers);
364 if (mbox)
365 free (mbox);
366 if (host)
367 free (host);
368 if (path)
369 free (path);
370 if (grp)
371 free (grp);
372 if (note)
373 free (note);
374 pers = mbox = host = path = grp = note = NULL;
375 err[0] = 0;
376
377 if (dp == NULL) {
378 dp = cp = getcpy (addrs ? addrs : "");
379 glevel = 0;
380 }
381 else
382 if (cp == NULL) {
383 free (dp);
384 dp = NULL;
385 return NULL;
386 }
387
388 switch (parse_address ()) {
389 case DONE:
390 free (dp);
391 dp = cp = NULL;
392 return NULL;
393
394 case OK:
395 switch (last_lex) {
396 case LX_COMA:
397 case LX_END:
398 break;
399
400 default: /* catch trailing comments */
401 bp = cp;
402 my_lex (adr);
403 cp = bp;
404 break;
405 }
406 break;
407
408 default:
409 break;
410 }
411
412 if (err[0])
413 for (;;) {
414 switch (last_lex) {
415 case LX_COMA:
416 case LX_END:
417 break;
418
419 default:
420 my_lex (adr);
421 continue;
422 }
423 break;
424 }
425 while (isspace (*ap))
426 ap++;
427 if (cp)
428 sprintf (adr, "%.*s", cp - ap, ap);
429 else
430 strcpy (adr, ap);
431 bp = adr + strlen (adr) - 1;
432 if (*bp == ',' || *bp == ';' || *bp == '\n')
433 *bp = 0;
434
435 adrxp->text = adr;
436 adrxp->pers = pers;
437 adrxp->mbox = mbox;
438 adrxp->host = host;
439 adrxp->path = path;
440 adrxp->grp = grp;
441 adrxp->ingrp = ingrp;
442 adrxp->note = note;
443 adrxp->err = err[0] ? err : NULL;
444
445 return adrxp;
446 }
447
448
449 static int
450 parse_address (void)
451 {
452 char buffer[BUFSIZ];
453
454 again: ;
455 ap = cp;
456 switch (my_lex (buffer)) {
457 case LX_ATOM:
458 case LX_QSTR:
459 pers = getcpy (buffer);
460 break;
461
462 case LX_SEMI:
463 if (glevel-- <= 0) {
464 strcpy (err, "extraneous semi-colon");
465 return NOTOK;
466 }
467 case LX_COMA:
468 if (note) {
469 free (note);
470 note = NULL;
471 }
472 goto again;
473
474 case LX_END:
475 return DONE;
476
477 case LX_LBRK: /* sigh (2) */
478 goto get_addr;
479
480 case LX_AT: /* sigh (3) */
481 cp = ap;
482 if (route_addr (buffer) == NOTOK)
483 return NOTOK;
484 return OK; /* why be choosy? */
485
486 default:
487 sprintf (err, "illegal address construct (%s)", buffer);
488 return NOTOK;
489 }
490
491 switch (my_lex (buffer)) {
492 case LX_ATOM:
493 case LX_QSTR:
494 pers = add (buffer, add (" ", pers));
495 more_phrase: ; /* sigh (1) */
496 if (phrase (buffer) == NOTOK)
497 return NOTOK;
498
499 switch (last_lex) {
500 case LX_LBRK:
501 get_addr: ;
502 if (route_addr (buffer) == NOTOK)
503 return NOTOK;
504 if (last_lex == LX_RBRK)
505 return OK;
506 sprintf (err, "missing right-bracket (%s)", buffer);
507 return NOTOK;
508
509 case LX_COLN:
510 get_group: ;
511 if (glevel++ > 0) {
512 sprintf (err, "nested groups not allowed (%s)", pers);
513 return NOTOK;
514 }
515 grp = add (": ", pers);
516 pers = NULL;
517 {
518 char *pp = cp;
519
520 for (;;)
521 switch (my_lex (buffer)) {
522 case LX_SEMI:
523 case LX_END: /* tsk, tsk */
524 glevel--;
525 return OK;
526
527 case LX_COMA:
528 continue;
529
530 default:
531 cp = pp;
532 return parse_address ();
533 }
534 }
535
536 case LX_DOT: /* sigh (1) */
537 pers = add (".", pers);
538 goto more_phrase;
539
540 default:
541 sprintf (err, "no mailbox in address, only a phrase (%s%s)",
542 pers, buffer);
543 return NOTOK;
544 }
545
546 case LX_LBRK:
547 goto get_addr;
548
549 case LX_COLN:
550 goto get_group;
551
552 case LX_DOT:
553 mbox = add (buffer, pers);
554 pers = NULL;
555 if (route_addr (buffer) == NOTOK)
556 return NOTOK;
557 goto check_end;
558
559 case LX_AT:
560 ingrp = glevel;
561 mbox = pers;
562 pers = NULL;
563 if (domain (buffer) == NOTOK)
564 return NOTOK;
565 check_end: ;
566 switch (last_lex) {
567 case LX_SEMI:
568 if (glevel-- <= 0) {
569 strcpy (err, "extraneous semi-colon");
570 return NOTOK;
571 }
572 case LX_COMA:
573 case LX_END:
574 return OK;
575
576 default:
577 sprintf (err, "junk after local@domain (%s)", buffer);
578 return NOTOK;
579 }
580
581 case LX_SEMI: /* no host */
582 case LX_COMA:
583 case LX_END:
584 ingrp = glevel;
585 if (last_lex == LX_SEMI && glevel-- <= 0) {
586 strcpy (err, "extraneous semi-colon");
587 return NOTOK;
588 }
589 mbox = pers;
590 pers = NULL;
591 return OK;
592
593 default:
594 sprintf (err, "missing mailbox (%s)", buffer);
595 return NOTOK;
596 }
597 }
598
599
600 static int
601 phrase (char *buffer)
602 {
603 for (;;)
604 switch (my_lex (buffer)) {
605 case LX_ATOM:
606 case LX_QSTR:
607 pers = add (buffer, add (" ", pers));
608 continue;
609
610 default:
611 return OK;
612 }
613 }
614
615
616 static int
617 route_addr (char *buffer)
618 {
619 register char *pp = cp;
620
621 if (my_lex (buffer) == LX_AT) {
622 if (route (buffer) == NOTOK)
623 return NOTOK;
624 }
625 else
626 cp = pp;
627
628 if (local_part (buffer) == NOTOK)
629 return NOTOK;
630
631 switch (last_lex) {
632 case LX_AT:
633 return domain (buffer);
634
635 case LX_SEMI: /* if in group */
636 case LX_RBRK: /* no host */
637 case LX_COMA:
638 case LX_END:
639 return OK;
640
641 default:
642 sprintf (err, "no at-sign after local-part (%s)", buffer);
643 return NOTOK;
644 }
645 }
646
647
648 static int
649 local_part (char *buffer)
650 {
651 ingrp = glevel;
652
653 for (;;) {
654 switch (my_lex (buffer)) {
655 case LX_ATOM:
656 case LX_QSTR:
657 mbox = add (buffer, mbox);
658 break;
659
660 default:
661 sprintf (err, "no mailbox in local-part (%s)", buffer);
662 return NOTOK;
663 }
664
665 switch (my_lex (buffer)) {
666 case LX_DOT:
667 mbox = add (buffer, mbox);
668 continue;
669
670 default:
671 return OK;
672 }
673 }
674 }
675
676
677 static int
678 domain (char *buffer)
679 {
680 for (;;) {
681 switch (my_lex (buffer)) {
682 case LX_ATOM:
683 case LX_DLIT:
684 host = add (buffer, host);
685 break;
686
687 default:
688 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
689 return NOTOK;
690 }
691
692 switch (my_lex (buffer)) {
693 case LX_DOT:
694 host = add (buffer, host);
695 continue;
696
697 case LX_AT: /* sigh (0) */
698 mbox = add (host, add ("%", mbox));
699 free (host);
700 host = NULL;
701 continue;
702
703 default:
704 return OK;
705 }
706 }
707 }
708
709
710 static int
711 route (char *buffer)
712 {
713 path = getcpy ("@");
714
715 for (;;) {
716 switch (my_lex (buffer)) {
717 case LX_ATOM:
718 case LX_DLIT:
719 path = add (buffer, path);
720 break;
721
722 default:
723 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
724 return NOTOK;
725 }
726 switch (my_lex (buffer)) {
727 case LX_COMA:
728 path = add (buffer, path);
729 for (;;) {
730 switch (my_lex (buffer)) {
731 case LX_COMA:
732 continue;
733
734 case LX_AT:
735 path = add (buffer, path);
736 break;
737
738 default:
739 sprintf (err, "no at-sign found for next domain in route (%s)",
740 buffer);
741 }
742 break;
743 }
744 continue;
745
746 case LX_AT: /* XXX */
747 case LX_DOT:
748 path = add (buffer, path);
749 continue;
750
751 case LX_COLN:
752 path = add (buffer, path);
753 return OK;
754
755 default:
756 sprintf (err, "no colon found to terminate route (%s)", buffer);
757 return NOTOK;
758 }
759 }
760 }
761
762
763 static int
764 my_lex (char *buffer)
765 {
766 int i, gotat = 0;
767 register char c, *bp;
768
769 bp = buffer;
770 *bp = 0;
771 if (!cp)
772 return (last_lex = LX_END);
773
774 gotat = isat (cp);
775 c = *cp++;
776 while (isspace (c))
777 c = *cp++;
778 if (c == 0) {
779 cp = NULL;
780 return (last_lex = LX_END);
781 }
782
783 if (c == '(')
784 for (*bp++ = c, i = 0;;)
785 switch (c = *cp++) {
786 case 0:
787 cp = NULL;
788 return (last_lex = LX_ERR);
789 case QUOTE:
790 *bp++ = c;
791 if ((c = *cp++) == 0) {
792 cp = NULL;
793 return (last_lex = LX_ERR);
794 }
795 *bp++ = c;
796 continue;
797 case '(':
798 i++;
799 default:
800 *bp++ = c;
801 continue;
802 case ')':
803 *bp++ = c;
804 if (--i < 0) {
805 *bp = 0;
806 note = note ? add (buffer, add (" ", note))
807 : getcpy (buffer);
808 return my_lex (buffer);
809 }
810 }
811
812 if (c == '"')
813 for (*bp++ = c;;)
814 switch (c = *cp++) {
815 case 0:
816 cp = NULL;
817 return (last_lex = LX_ERR);
818 case QUOTE:
819 *bp++ = c;
820 if ((c = *cp++) == 0) {
821 cp = NULL;
822 return (last_lex = LX_ERR);
823 }
824 default:
825 *bp++ = c;
826 continue;
827 case '"':
828 *bp++ = c;
829 *bp = 0;
830 return (last_lex = LX_QSTR);
831 }
832
833 if (c == '[')
834 for (*bp++ = c;;)
835 switch (c = *cp++) {
836 case 0:
837 cp = NULL;
838 return (last_lex = LX_ERR);
839 case QUOTE:
840 *bp++ = c;
841 if ((c = *cp++) == 0) {
842 cp = NULL;
843 return (last_lex = LX_ERR);
844 }
845 default:
846 *bp++ = c;
847 continue;
848 case ']':
849 *bp++ = c;
850 *bp = 0;
851 return (last_lex = LX_DLIT);
852 }
853
854 *bp++ = c;
855 *bp = 0;
856 for (i = 0; special[i].lx_chr != 0; i++)
857 if (c == special[i].lx_chr)
858 return (last_lex = special[i].lx_val);
859
860 if (iscntrl (c))
861 return (last_lex = LX_ERR);
862
863 for (;;) {
864 if ((c = *cp++) == 0)
865 break;
866 for (i = 0; special[i].lx_chr != 0; i++)
867 if (c == special[i].lx_chr)
868 goto got_atom;
869 if (iscntrl (c) || isspace (c))
870 break;
871 *bp++ = c;
872 }
873 got_atom: ;
874 if (c == 0)
875 cp = NULL;
876 else
877 cp--;
878 *bp = 0;
879 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
880 ? LX_ATOM : LX_AT;
881 return last_lex;
882 }
883
884
885 char *
886 legal_person (char *p)
887 {
888 int i;
889 register char *cp;
890 static char buffer[BUFSIZ];
891
892 if (*p == '"')
893 return p;
894 for (cp = p; *cp; cp++)
895 for (i = 0; special[i].lx_chr; i++)
896 if (*cp == special[i].lx_chr) {
897 sprintf (buffer, "\"%s\"", p);
898 return buffer;
899 }
900
901 return p;
902 }
903
904
905 int
906 mfgets (FILE *in, char **bp)
907 {
908 int i;
909 register char *cp, *dp, *ep;
910 static int len = 0;
911 static char *pp = NULL;
912
913 if (pp == NULL)
914 if (!(pp = malloc ((size_t) (len = BUFSIZ))))
915 return NOTOK;
916
917 for (ep = (cp = pp) + len - 2;;) {
918 switch (i = getc (in)) {
919 case EOF:
920 eol: ;
921 if (cp != pp) {
922 *cp = 0;
923 *bp = pp;
924 return OK;
925 }
926 eoh: ;
927 *bp = NULL;
928 free (pp);
929 pp = NULL;
930 return DONE;
931
932 case 0:
933 continue;
934
935 case '\n':
936 if (cp == pp) /* end of headers, gobble it */
937 goto eoh;
938 switch (i = getc (in)) {
939 default: /* end of line */
940 case '\n': /* end of headers, save for next call */
941 ungetc (i, in);
942 goto eol;
943
944 case ' ': /* continue headers */
945 case '\t':
946 *cp++ = '\n';
947 break;
948 } /* fall into default case */
949
950 default:
951 *cp++ = i;
952 break;
953 }
954 if (cp >= ep)
955 if (!(dp = realloc (pp, (size_t) (len += BUFSIZ)))) {
956 free (pp);
957 pp = NULL;
958 return NOTOK;
959 }
960 else
961 cp += dp - pp, ep = (pp = cp) + len - 2;
962 }
963 }