]> diplodocus.org Git - nmh/blob - zotnet/mf/mf.c
Just reworded the bit about '%s' being safe not to quote (it's only safe not to
[nmh] / zotnet / mf / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * $Id$
6 */
7
8 #include <mf.h>
9 #include <ctype.h>
10 #include <stdio.h>
11
12 /*
13 * static prototypes
14 */
15 static char *getcpy (char *);
16 static char *add (char *, char *);
17 static void compress (char *, char *);
18 static int isat (char *);
19 static int parse_address (void);
20 static int phrase (char *);
21 static int route_addr (char *);
22 static int local_part (char *);
23 static int domain (char *);
24 static int route (char *);
25 static int my_lex (char *);
26
27
28 static char *
29 getcpy (char *s)
30 {
31 register char *p;
32
33 if (!s) {
34 /* causes compiles to blow up because the symbol _cleanup is undefined
35 where did this ever come from? */
36 /* _cleanup(); */
37 abort();
38 for(;;)
39 pause();
40 }
41 if ((p = malloc ((size_t) (strlen (s) + 2))))
42 strcpy (p, s);
43 return p;
44 }
45
46
47 static char *
48 add (char *s1, char *s2)
49 {
50 register char *p;
51
52 if (!s2)
53 return getcpy (s1);
54
55 if ((p = malloc ((size_t) (strlen (s1) + strlen (s2) + 2))))
56 sprintf (p, "%s%s", s2, s1);
57 free (s2);
58 return p;
59 }
60
61 int
62 isfrom(char *string)
63 {
64 return (strncmp (string, "From ", 5) == 0
65 || strncmp (string, ">From ", 6) == 0);
66 }
67
68
69 int
70 lequal (char *a, char *b)
71 {
72 for (; *a; a++, b++)
73 if (*b == 0)
74 return FALSE;
75 else {
76 char c1 = islower (*a) ? toupper (*a) : *a;
77 char c2 = islower (*b) ? toupper (*b) : *b;
78 if (c1 != c2)
79 return FALSE;
80 }
81
82 return (*b == 0);
83 }
84
85
86 /*
87 * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
88 * addresses, so for each list of addresses we see if we can find some
89 * character to give us a hint.
90 */
91
92
93 #define CHKADR 0 /* undertermined address style */
94 #define UNIXDR 1 /* UNIX-style address */
95 #define ARPADR 2 /* ARPAnet-style address */
96
97
98 static char *punctuators = ";<>.()[]";
99 static char *vp = NULL;
100 static char *tp = NULL;
101
102 static struct adrx adrxs1;
103
104
105 struct adrx *
106 seekadrx (char *addrs)
107 {
108 static int state = CHKADR;
109 register char *cp;
110 register struct adrx *adrxp;
111
112 if (state == CHKADR)
113 for (state = UNIXDR, cp = addrs; *cp; cp++)
114 if (strchr(punctuators, *cp)) {
115 state = ARPADR;
116 break;
117 }
118
119 switch (state) {
120 case UNIXDR:
121 adrxp = uucpadrx (addrs);
122 break;
123
124 case ARPADR:
125 default:
126 adrxp = getadrx (addrs);
127 break;
128 }
129
130 if (adrxp == NULL)
131 state = CHKADR;
132
133 return adrxp;
134 }
135
136
137 /*
138 * uucpadrx() implements a partial UUCP-style address parser. It's based
139 * on the UUCP notion that addresses are separated by spaces or commas.
140 */
141
142
143 struct adrx *
144 uucpadrx (char *addrs)
145 {
146 register char *cp, *wp, *xp, *yp, *zp;
147 register struct adrx *adrxp = &adrxs1;
148
149 if (vp == NULL) {
150 vp = tp = getcpy (addrs);
151 compress (addrs, vp);
152 }
153 else
154 if (tp == NULL) {
155 free (vp);
156 vp = NULL;
157 return NULL;
158 }
159
160 for (cp = tp; isspace (*cp); cp++)
161 continue;
162 if (*cp == 0) {
163 free (vp);
164 vp = tp = NULL;
165 return NULL;
166 }
167
168 if ((wp = strchr(cp, ',')) == NULL) {
169 if ((wp = strchr(cp, ' ')) != NULL) {
170 xp = wp;
171 while (isspace (*xp))
172 xp++;
173 if (*xp != 0 && isat (--xp)) {
174 yp = xp + 4;
175 while (isspace (*yp))
176 yp++;
177 if (*yp != 0) {
178 if ((zp = strchr(yp, ' ')) != NULL)
179 *zp = 0, tp = ++zp;
180 else
181 tp = NULL;
182 }
183 else
184 *wp = 0, tp = ++wp;
185 }
186 else
187 *wp = 0, tp = ++wp;
188 }
189 else
190 tp = NULL;
191 }
192 else
193 *wp = 0, tp = ++wp;
194
195 if (adrxp->text)
196 free (adrxp->text);
197 adrxp->text = getcpy (cp);
198 adrxp->mbox = cp;
199 adrxp->host = adrxp->path = NULL;
200 if ((wp = strrchr(cp, '@')) != NULL) {
201 *wp++ = 0;
202 adrxp->host = *wp ? wp : NULL;
203 }
204 else
205 for (wp = cp + strlen (cp) - 4; wp >= cp; wp--)
206 if (isat (wp)) {
207 *wp++ = 0;
208 adrxp->host = wp + 3;
209 }
210
211 adrxp->pers = adrxp->grp = adrxp->note = adrxp->err = NULL;
212 adrxp->ingrp = 0;
213
214 return adrxp;
215 }
216
217
218 static void
219 compress (char *fp, char *tp)
220 {
221 register char c, *cp;
222
223 for (c = ' ', cp = tp; (*tp = *fp++) != 0;)
224 if (isspace (*tp)) {
225 if (c != ' ')
226 *tp++ = c = ' ';
227 }
228 else
229 c = *tp++;
230
231 if (c == ' ' && cp < tp)
232 *--tp = 0;
233 }
234
235
236 static int
237 isat (char *p)
238 {
239 return (strncmp (p, " AT ", 4)
240 && strncmp (p, " At ", 4)
241 && strncmp (p, " aT ", 4)
242 && strncmp (p, " at ", 4) ? FALSE : TRUE);
243 }
244
245
246 /*
247 *
248 * getadrx() implements a partial 822-style address parser. The parser
249 * is neither complete nor correct. It does however recognize nearly all
250 * of the 822 address syntax. In addition it handles the majority of the
251 * 733 syntax as well. Most problems arise from trying to accomodate both.
252 *
253 * In terms of 822, the route-specification in
254 *
255 * "<" [route] local-part "@" domain ">"
256 *
257 * is parsed and returned unchanged. Multiple at-signs are compressed
258 * via source-routing. Recursive groups are not allowed as per the
259 * standard.
260 *
261 * In terms of 733, " at " is recognized as equivalent to "@".
262 *
263 * In terms of both the parser will not complain about missing hosts.
264 *
265 * -----
266 *
267 * We should not allow addresses like
268 *
269 * Marshall T. Rose <MRose@UCI>
270 *
271 * but should insist on
272 *
273 * "Marshall T. Rose" <MRose@UCI>
274 *
275 * Unfortunately, a lot of mailers stupidly let people get away with this.
276 *
277 * -----
278 *
279 * We should not allow addresses like
280 *
281 * <MRose@UCI>
282 *
283 * but should insist on
284 *
285 * MRose@UCI
286 *
287 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
288 * this.
289 *
290 * -----
291 *
292 * We should not allow addresses like
293 *
294 * @UCI:MRose@UCI-750a
295 *
296 * but should insist on
297 *
298 * Marshall Rose <@UCI:MRose@UCI-750a>
299 *
300 * Unfortunately, a lot of mailers stupidly do this.
301 *
302 */
303
304 #define QUOTE '\\'
305
306 #define LX_END 0
307 #define LX_ERR 1
308 #define LX_ATOM 2
309 #define LX_QSTR 3
310 #define LX_DLIT 4
311 #define LX_SEMI 5
312 #define LX_COMA 6
313 #define LX_LBRK 7
314 #define LX_RBRK 8
315 #define LX_COLN 9
316 #define LX_DOT 10
317 #define LX_AT 11
318
319 struct specials {
320 char lx_chr;
321 int lx_val;
322 };
323
324 static struct specials special[] = {
325 { ';', LX_SEMI },
326 { ',', LX_COMA },
327 { '<', LX_LBRK },
328 { '>', LX_RBRK },
329 { ':', LX_COLN },
330 { '.', LX_DOT },
331 { '@', LX_AT },
332 { '(', LX_ERR },
333 { ')', LX_ERR },
334 { QUOTE, LX_ERR },
335 { '"', LX_ERR },
336 { '[', LX_ERR },
337 { ']', LX_ERR },
338 { 0, 0 }
339 };
340
341 static int glevel = 0;
342 static int ingrp = 0;
343 static int last_lex = LX_END;
344
345 static char *dp = NULL;
346 static char *cp = NULL;
347 static char *ap = NULL;
348 static char *pers = NULL;
349 static char *mbox = NULL;
350 static char *host = NULL;
351 static char *path = NULL;
352 static char *grp = NULL;
353 static char *note = NULL;
354 static char err[BUFSIZ];
355 static char adr[BUFSIZ];
356
357 static struct adrx adrxs2;
358
359
360 struct adrx *
361 getadrx (char *addrs)
362 {
363 register char *bp;
364 register struct adrx *adrxp = &adrxs2;
365
366 if (pers)
367 free (pers);
368 if (mbox)
369 free (mbox);
370 if (host)
371 free (host);
372 if (path)
373 free (path);
374 if (grp)
375 free (grp);
376 if (note)
377 free (note);
378 pers = mbox = host = path = grp = note = NULL;
379 err[0] = 0;
380
381 if (dp == NULL) {
382 dp = cp = getcpy (addrs ? addrs : "");
383 glevel = 0;
384 }
385 else
386 if (cp == NULL) {
387 free (dp);
388 dp = NULL;
389 return NULL;
390 }
391
392 switch (parse_address ()) {
393 case DONE:
394 free (dp);
395 dp = cp = NULL;
396 return NULL;
397
398 case OK:
399 switch (last_lex) {
400 case LX_COMA:
401 case LX_END:
402 break;
403
404 default: /* catch trailing comments */
405 bp = cp;
406 my_lex (adr);
407 cp = bp;
408 break;
409 }
410 break;
411
412 default:
413 break;
414 }
415
416 if (err[0])
417 for (;;) {
418 switch (last_lex) {
419 case LX_COMA:
420 case LX_END:
421 break;
422
423 default:
424 my_lex (adr);
425 continue;
426 }
427 break;
428 }
429 while (isspace (*ap))
430 ap++;
431 if (cp)
432 sprintf (adr, "%.*s", cp - ap, ap);
433 else
434 strcpy (adr, ap);
435 bp = adr + strlen (adr) - 1;
436 if (*bp == ',' || *bp == ';' || *bp == '\n')
437 *bp = 0;
438
439 adrxp->text = adr;
440 adrxp->pers = pers;
441 adrxp->mbox = mbox;
442 adrxp->host = host;
443 adrxp->path = path;
444 adrxp->grp = grp;
445 adrxp->ingrp = ingrp;
446 adrxp->note = note;
447 adrxp->err = err[0] ? err : NULL;
448
449 return adrxp;
450 }
451
452
453 static int
454 parse_address (void)
455 {
456 char buffer[BUFSIZ];
457
458 again: ;
459 ap = cp;
460 switch (my_lex (buffer)) {
461 case LX_ATOM:
462 case LX_QSTR:
463 pers = getcpy (buffer);
464 break;
465
466 case LX_SEMI:
467 if (glevel-- <= 0) {
468 strcpy (err, "extraneous semi-colon");
469 return NOTOK;
470 }
471 case LX_COMA:
472 if (note) {
473 free (note);
474 note = NULL;
475 }
476 goto again;
477
478 case LX_END:
479 return DONE;
480
481 case LX_LBRK: /* sigh (2) */
482 goto get_addr;
483
484 case LX_AT: /* sigh (3) */
485 cp = ap;
486 if (route_addr (buffer) == NOTOK)
487 return NOTOK;
488 return OK; /* why be choosy? */
489
490 default:
491 sprintf (err, "illegal address construct (%s)", buffer);
492 return NOTOK;
493 }
494
495 switch (my_lex (buffer)) {
496 case LX_ATOM:
497 case LX_QSTR:
498 pers = add (buffer, add (" ", pers));
499 more_phrase: ; /* sigh (1) */
500 if (phrase (buffer) == NOTOK)
501 return NOTOK;
502
503 switch (last_lex) {
504 case LX_LBRK:
505 get_addr: ;
506 if (route_addr (buffer) == NOTOK)
507 return NOTOK;
508 if (last_lex == LX_RBRK)
509 return OK;
510 sprintf (err, "missing right-bracket (%s)", buffer);
511 return NOTOK;
512
513 case LX_COLN:
514 get_group: ;
515 if (glevel++ > 0) {
516 sprintf (err, "nested groups not allowed (%s)", pers);
517 return NOTOK;
518 }
519 grp = add (": ", pers);
520 pers = NULL;
521 {
522 char *pp = cp;
523
524 for (;;)
525 switch (my_lex (buffer)) {
526 case LX_SEMI:
527 case LX_END: /* tsk, tsk */
528 glevel--;
529 return OK;
530
531 case LX_COMA:
532 continue;
533
534 default:
535 cp = pp;
536 return parse_address ();
537 }
538 }
539
540 case LX_DOT: /* sigh (1) */
541 pers = add (".", pers);
542 goto more_phrase;
543
544 default:
545 sprintf (err, "no mailbox in address, only a phrase (%s%s)",
546 pers, buffer);
547 return NOTOK;
548 }
549
550 case LX_LBRK:
551 goto get_addr;
552
553 case LX_COLN:
554 goto get_group;
555
556 case LX_DOT:
557 mbox = add (buffer, pers);
558 pers = NULL;
559 if (route_addr (buffer) == NOTOK)
560 return NOTOK;
561 goto check_end;
562
563 case LX_AT:
564 ingrp = glevel;
565 mbox = pers;
566 pers = NULL;
567 if (domain (buffer) == NOTOK)
568 return NOTOK;
569 check_end: ;
570 switch (last_lex) {
571 case LX_SEMI:
572 if (glevel-- <= 0) {
573 strcpy (err, "extraneous semi-colon");
574 return NOTOK;
575 }
576 case LX_COMA:
577 case LX_END:
578 return OK;
579
580 default:
581 sprintf (err, "junk after local@domain (%s)", buffer);
582 return NOTOK;
583 }
584
585 case LX_SEMI: /* no host */
586 case LX_COMA:
587 case LX_END:
588 ingrp = glevel;
589 if (last_lex == LX_SEMI && glevel-- <= 0) {
590 strcpy (err, "extraneous semi-colon");
591 return NOTOK;
592 }
593 mbox = pers;
594 pers = NULL;
595 return OK;
596
597 default:
598 sprintf (err, "missing mailbox (%s)", buffer);
599 return NOTOK;
600 }
601 }
602
603
604 static int
605 phrase (char *buffer)
606 {
607 for (;;)
608 switch (my_lex (buffer)) {
609 case LX_ATOM:
610 case LX_QSTR:
611 pers = add (buffer, add (" ", pers));
612 continue;
613
614 default:
615 return OK;
616 }
617 }
618
619
620 static int
621 route_addr (char *buffer)
622 {
623 register char *pp = cp;
624
625 if (my_lex (buffer) == LX_AT) {
626 if (route (buffer) == NOTOK)
627 return NOTOK;
628 }
629 else
630 cp = pp;
631
632 if (local_part (buffer) == NOTOK)
633 return NOTOK;
634
635 switch (last_lex) {
636 case LX_AT:
637 return domain (buffer);
638
639 case LX_SEMI: /* if in group */
640 case LX_RBRK: /* no host */
641 case LX_COMA:
642 case LX_END:
643 return OK;
644
645 default:
646 sprintf (err, "no at-sign after local-part (%s)", buffer);
647 return NOTOK;
648 }
649 }
650
651
652 static int
653 local_part (char *buffer)
654 {
655 ingrp = glevel;
656
657 for (;;) {
658 switch (my_lex (buffer)) {
659 case LX_ATOM:
660 case LX_QSTR:
661 mbox = add (buffer, mbox);
662 break;
663
664 default:
665 sprintf (err, "no mailbox in local-part (%s)", buffer);
666 return NOTOK;
667 }
668
669 switch (my_lex (buffer)) {
670 case LX_DOT:
671 mbox = add (buffer, mbox);
672 continue;
673
674 default:
675 return OK;
676 }
677 }
678 }
679
680
681 static int
682 domain (char *buffer)
683 {
684 for (;;) {
685 switch (my_lex (buffer)) {
686 case LX_ATOM:
687 case LX_DLIT:
688 host = add (buffer, host);
689 break;
690
691 default:
692 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
693 return NOTOK;
694 }
695
696 switch (my_lex (buffer)) {
697 case LX_DOT:
698 host = add (buffer, host);
699 continue;
700
701 case LX_AT: /* sigh (0) */
702 mbox = add (host, add ("%", mbox));
703 free (host);
704 host = NULL;
705 continue;
706
707 default:
708 return OK;
709 }
710 }
711 }
712
713
714 static int
715 route (char *buffer)
716 {
717 path = getcpy ("@");
718
719 for (;;) {
720 switch (my_lex (buffer)) {
721 case LX_ATOM:
722 case LX_DLIT:
723 path = add (buffer, path);
724 break;
725
726 default:
727 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
728 return NOTOK;
729 }
730 switch (my_lex (buffer)) {
731 case LX_COMA:
732 path = add (buffer, path);
733 for (;;) {
734 switch (my_lex (buffer)) {
735 case LX_COMA:
736 continue;
737
738 case LX_AT:
739 path = add (buffer, path);
740 break;
741
742 default:
743 sprintf (err, "no at-sign found for next domain in route (%s)",
744 buffer);
745 }
746 break;
747 }
748 continue;
749
750 case LX_AT: /* XXX */
751 case LX_DOT:
752 path = add (buffer, path);
753 continue;
754
755 case LX_COLN:
756 path = add (buffer, path);
757 return OK;
758
759 default:
760 sprintf (err, "no colon found to terminate route (%s)", buffer);
761 return NOTOK;
762 }
763 }
764 }
765
766
767 static int
768 my_lex (char *buffer)
769 {
770 int i, gotat = 0;
771 register char c, *bp;
772
773 bp = buffer;
774 *bp = 0;
775 if (!cp)
776 return (last_lex = LX_END);
777
778 gotat = isat (cp);
779 c = *cp++;
780 while (isspace (c))
781 c = *cp++;
782 if (c == 0) {
783 cp = NULL;
784 return (last_lex = LX_END);
785 }
786
787 if (c == '(')
788 for (*bp++ = c, i = 0;;)
789 switch (c = *cp++) {
790 case 0:
791 cp = NULL;
792 return (last_lex = LX_ERR);
793 case QUOTE:
794 *bp++ = c;
795 if ((c = *cp++) == 0) {
796 cp = NULL;
797 return (last_lex = LX_ERR);
798 }
799 *bp++ = c;
800 continue;
801 case '(':
802 i++;
803 default:
804 *bp++ = c;
805 continue;
806 case ')':
807 *bp++ = c;
808 if (--i < 0) {
809 *bp = 0;
810 note = note ? add (buffer, add (" ", note))
811 : getcpy (buffer);
812 return my_lex (buffer);
813 }
814 }
815
816 if (c == '"')
817 for (*bp++ = c;;)
818 switch (c = *cp++) {
819 case 0:
820 cp = NULL;
821 return (last_lex = LX_ERR);
822 case QUOTE:
823 *bp++ = c;
824 if ((c = *cp++) == 0) {
825 cp = NULL;
826 return (last_lex = LX_ERR);
827 }
828 default:
829 *bp++ = c;
830 continue;
831 case '"':
832 *bp++ = c;
833 *bp = 0;
834 return (last_lex = LX_QSTR);
835 }
836
837 if (c == '[')
838 for (*bp++ = c;;)
839 switch (c = *cp++) {
840 case 0:
841 cp = NULL;
842 return (last_lex = LX_ERR);
843 case QUOTE:
844 *bp++ = c;
845 if ((c = *cp++) == 0) {
846 cp = NULL;
847 return (last_lex = LX_ERR);
848 }
849 default:
850 *bp++ = c;
851 continue;
852 case ']':
853 *bp++ = c;
854 *bp = 0;
855 return (last_lex = LX_DLIT);
856 }
857
858 *bp++ = c;
859 *bp = 0;
860 for (i = 0; special[i].lx_chr != 0; i++)
861 if (c == special[i].lx_chr)
862 return (last_lex = special[i].lx_val);
863
864 if (iscntrl (c))
865 return (last_lex = LX_ERR);
866
867 for (;;) {
868 if ((c = *cp++) == 0)
869 break;
870 for (i = 0; special[i].lx_chr != 0; i++)
871 if (c == special[i].lx_chr)
872 goto got_atom;
873 if (iscntrl (c) || isspace (c))
874 break;
875 *bp++ = c;
876 }
877 got_atom: ;
878 if (c == 0)
879 cp = NULL;
880 else
881 cp--;
882 *bp = 0;
883 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
884 ? LX_ATOM : LX_AT;
885 return last_lex;
886 }
887
888
889 char *
890 legal_person (char *p)
891 {
892 int i;
893 register char *cp;
894 static char buffer[BUFSIZ];
895
896 if (*p == '"')
897 return p;
898 for (cp = p; *cp; cp++)
899 for (i = 0; special[i].lx_chr; i++)
900 if (*cp == special[i].lx_chr) {
901 sprintf (buffer, "\"%s\"", p);
902 return buffer;
903 }
904
905 return p;
906 }
907
908
909 int
910 mfgets (FILE *in, char **bp)
911 {
912 int i;
913 register char *cp, *dp, *ep;
914 static int len = 0;
915 static char *pp = NULL;
916
917 if (pp == NULL)
918 if (!(pp = malloc ((size_t) (len = BUFSIZ))))
919 return NOTOK;
920
921 for (ep = (cp = pp) + len - 2;;) {
922 switch (i = getc (in)) {
923 case EOF:
924 eol: ;
925 if (cp != pp) {
926 *cp = 0;
927 *bp = pp;
928 return OK;
929 }
930 eoh: ;
931 *bp = NULL;
932 free (pp);
933 pp = NULL;
934 return DONE;
935
936 case 0:
937 continue;
938
939 case '\n':
940 if (cp == pp) /* end of headers, gobble it */
941 goto eoh;
942 switch (i = getc (in)) {
943 default: /* end of line */
944 case '\n': /* end of headers, save for next call */
945 ungetc (i, in);
946 goto eol;
947
948 case ' ': /* continue headers */
949 case '\t':
950 *cp++ = '\n';
951 break;
952 } /* fall into default case */
953
954 default:
955 *cp++ = i;
956 break;
957 }
958 if (cp >= ep) {
959 if (!(dp = realloc (pp, (size_t) (len += BUFSIZ)))) {
960 free (pp);
961 pp = NULL;
962 return NOTOK;
963 }
964 else
965 cp += dp - pp, ep = (pp = cp) + len - 2;
966 }
967 }
968 }