]> diplodocus.org Git - nmh/blob - sbr/mf.c
Fix stupid accidental dependence on a bash quirk in previous
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * $Id$
6 *
7 * This code is Copyright (c) 2002, by the authors of nmh. See the
8 * COPYRIGHT file in the root directory of the nmh distribution for
9 * complete copyright information.
10 */
11
12 #include <h/mf.h>
13 #include <ctype.h>
14 #include <stdio.h>
15
16 /*
17 * static prototypes
18 */
19 static char *getcpy (char *);
20 static char *add (char *, char *);
21 static void compress (char *, char *);
22 static int isat (char *);
23 static int parse_address (void);
24 static int phrase (char *);
25 static int route_addr (char *);
26 static int local_part (char *);
27 static int domain (char *);
28 static int route (char *);
29 static int my_lex (char *);
30
31
32 static char *
33 getcpy (char *s)
34 {
35 register char *p;
36
37 if (!s) {
38 /* causes compiles to blow up because the symbol _cleanup is undefined
39 where did this ever come from? */
40 /* _cleanup(); */
41 abort();
42 for(;;)
43 pause();
44 }
45 if ((p = malloc ((size_t) (strlen (s) + 2))))
46 strcpy (p, s);
47 return p;
48 }
49
50
51 static char *
52 add (char *s1, char *s2)
53 {
54 register char *p;
55
56 if (!s2)
57 return getcpy (s1);
58
59 if ((p = malloc ((size_t) (strlen (s1) + strlen (s2) + 2))))
60 sprintf (p, "%s%s", s2, s1);
61 free (s2);
62 return p;
63 }
64
65 int
66 isfrom(char *string)
67 {
68 return (strncmp (string, "From ", 5) == 0
69 || strncmp (string, ">From ", 6) == 0);
70 }
71
72
73 int
74 lequal (char *a, char *b)
75 {
76 for (; *a; a++, b++)
77 if (*b == 0)
78 return FALSE;
79 else {
80 char c1 = islower (*a) ? toupper (*a) : *a;
81 char c2 = islower (*b) ? toupper (*b) : *b;
82 if (c1 != c2)
83 return FALSE;
84 }
85
86 return (*b == 0);
87 }
88
89
90 /*
91 * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
92 * addresses, so for each list of addresses we see if we can find some
93 * character to give us a hint.
94 */
95
96
97 #define CHKADR 0 /* undertermined address style */
98 #define UNIXDR 1 /* UNIX-style address */
99 #define ARPADR 2 /* ARPAnet-style address */
100
101
102 static char *punctuators = ";<>.()[]";
103 static char *vp = NULL;
104 static char *tp = NULL;
105
106 static struct adrx adrxs1;
107
108
109 struct adrx *
110 seekadrx (char *addrs)
111 {
112 static int state = CHKADR;
113 register char *cp;
114 register struct adrx *adrxp;
115
116 if (state == CHKADR)
117 for (state = UNIXDR, cp = addrs; *cp; cp++)
118 if (strchr(punctuators, *cp)) {
119 state = ARPADR;
120 break;
121 }
122
123 switch (state) {
124 case UNIXDR:
125 adrxp = uucpadrx (addrs);
126 break;
127
128 case ARPADR:
129 default:
130 adrxp = getadrx (addrs);
131 break;
132 }
133
134 if (adrxp == NULL)
135 state = CHKADR;
136
137 return adrxp;
138 }
139
140
141 /*
142 * uucpadrx() implements a partial UUCP-style address parser. It's based
143 * on the UUCP notion that addresses are separated by spaces or commas.
144 */
145
146
147 struct adrx *
148 uucpadrx (char *addrs)
149 {
150 register char *cp, *wp, *xp, *yp, *zp;
151 register struct adrx *adrxp = &adrxs1;
152
153 if (vp == NULL) {
154 vp = tp = getcpy (addrs);
155 compress (addrs, vp);
156 }
157 else
158 if (tp == NULL) {
159 free (vp);
160 vp = NULL;
161 return NULL;
162 }
163
164 for (cp = tp; isspace (*cp); cp++)
165 continue;
166 if (*cp == 0) {
167 free (vp);
168 vp = tp = NULL;
169 return NULL;
170 }
171
172 if ((wp = strchr(cp, ',')) == NULL) {
173 if ((wp = strchr(cp, ' ')) != NULL) {
174 xp = wp;
175 while (isspace (*xp))
176 xp++;
177 if (*xp != 0 && isat (--xp)) {
178 yp = xp + 4;
179 while (isspace (*yp))
180 yp++;
181 if (*yp != 0) {
182 if ((zp = strchr(yp, ' ')) != NULL)
183 *zp = 0, tp = ++zp;
184 else
185 tp = NULL;
186 }
187 else
188 *wp = 0, tp = ++wp;
189 }
190 else
191 *wp = 0, tp = ++wp;
192 }
193 else
194 tp = NULL;
195 }
196 else
197 *wp = 0, tp = ++wp;
198
199 if (adrxp->text)
200 free (adrxp->text);
201 adrxp->text = getcpy (cp);
202 adrxp->mbox = cp;
203 adrxp->host = adrxp->path = NULL;
204 if ((wp = strrchr(cp, '@')) != NULL) {
205 *wp++ = 0;
206 adrxp->host = *wp ? wp : NULL;
207 }
208 else
209 for (wp = cp + strlen (cp) - 4; wp >= cp; wp--)
210 if (isat (wp)) {
211 *wp++ = 0;
212 adrxp->host = wp + 3;
213 }
214
215 adrxp->pers = adrxp->grp = adrxp->note = adrxp->err = NULL;
216 adrxp->ingrp = 0;
217
218 return adrxp;
219 }
220
221
222 static void
223 compress (char *fp, char *tp)
224 {
225 register char c, *cp;
226
227 for (c = ' ', cp = tp; (*tp = *fp++) != 0;)
228 if (isspace (*tp)) {
229 if (c != ' ')
230 *tp++ = c = ' ';
231 }
232 else
233 c = *tp++;
234
235 if (c == ' ' && cp < tp)
236 *--tp = 0;
237 }
238
239
240 static int
241 isat (char *p)
242 {
243 return (strncmp (p, " AT ", 4)
244 && strncmp (p, " At ", 4)
245 && strncmp (p, " aT ", 4)
246 && strncmp (p, " at ", 4) ? FALSE : TRUE);
247 }
248
249
250 /*
251 *
252 * getadrx() implements a partial 822-style address parser. The parser
253 * is neither complete nor correct. It does however recognize nearly all
254 * of the 822 address syntax. In addition it handles the majority of the
255 * 733 syntax as well. Most problems arise from trying to accomodate both.
256 *
257 * In terms of 822, the route-specification in
258 *
259 * "<" [route] local-part "@" domain ">"
260 *
261 * is parsed and returned unchanged. Multiple at-signs are compressed
262 * via source-routing. Recursive groups are not allowed as per the
263 * standard.
264 *
265 * In terms of 733, " at " is recognized as equivalent to "@".
266 *
267 * In terms of both the parser will not complain about missing hosts.
268 *
269 * -----
270 *
271 * We should not allow addresses like
272 *
273 * Marshall T. Rose <MRose@UCI>
274 *
275 * but should insist on
276 *
277 * "Marshall T. Rose" <MRose@UCI>
278 *
279 * Unfortunately, a lot of mailers stupidly let people get away with this.
280 *
281 * -----
282 *
283 * We should not allow addresses like
284 *
285 * <MRose@UCI>
286 *
287 * but should insist on
288 *
289 * MRose@UCI
290 *
291 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
292 * this.
293 *
294 * -----
295 *
296 * We should not allow addresses like
297 *
298 * @UCI:MRose@UCI-750a
299 *
300 * but should insist on
301 *
302 * Marshall Rose <@UCI:MRose@UCI-750a>
303 *
304 * Unfortunately, a lot of mailers stupidly do this.
305 *
306 */
307
308 #define QUOTE '\\'
309
310 #define LX_END 0
311 #define LX_ERR 1
312 #define LX_ATOM 2
313 #define LX_QSTR 3
314 #define LX_DLIT 4
315 #define LX_SEMI 5
316 #define LX_COMA 6
317 #define LX_LBRK 7
318 #define LX_RBRK 8
319 #define LX_COLN 9
320 #define LX_DOT 10
321 #define LX_AT 11
322
323 struct specials {
324 char lx_chr;
325 int lx_val;
326 };
327
328 static struct specials special[] = {
329 { ';', LX_SEMI },
330 { ',', LX_COMA },
331 { '<', LX_LBRK },
332 { '>', LX_RBRK },
333 { ':', LX_COLN },
334 { '.', LX_DOT },
335 { '@', LX_AT },
336 { '(', LX_ERR },
337 { ')', LX_ERR },
338 { QUOTE, LX_ERR },
339 { '"', LX_ERR },
340 { '[', LX_ERR },
341 { ']', LX_ERR },
342 { 0, 0 }
343 };
344
345 static int glevel = 0;
346 static int ingrp = 0;
347 static int last_lex = LX_END;
348
349 static char *dp = NULL;
350 static char *cp = NULL;
351 static char *ap = NULL;
352 static char *pers = NULL;
353 static char *mbox = NULL;
354 static char *host = NULL;
355 static char *path = NULL;
356 static char *grp = NULL;
357 static char *note = NULL;
358 static char err[BUFSIZ];
359 static char adr[BUFSIZ];
360
361 static struct adrx adrxs2;
362
363
364 struct adrx *
365 getadrx (char *addrs)
366 {
367 register char *bp;
368 register struct adrx *adrxp = &adrxs2;
369
370 if (pers)
371 free (pers);
372 if (mbox)
373 free (mbox);
374 if (host)
375 free (host);
376 if (path)
377 free (path);
378 if (grp)
379 free (grp);
380 if (note)
381 free (note);
382 pers = mbox = host = path = grp = note = NULL;
383 err[0] = 0;
384
385 if (dp == NULL) {
386 dp = cp = getcpy (addrs ? addrs : "");
387 glevel = 0;
388 }
389 else
390 if (cp == NULL) {
391 free (dp);
392 dp = NULL;
393 return NULL;
394 }
395
396 switch (parse_address ()) {
397 case DONE:
398 free (dp);
399 dp = cp = NULL;
400 return NULL;
401
402 case OK:
403 switch (last_lex) {
404 case LX_COMA:
405 case LX_END:
406 break;
407
408 default: /* catch trailing comments */
409 bp = cp;
410 my_lex (adr);
411 cp = bp;
412 break;
413 }
414 break;
415
416 default:
417 break;
418 }
419
420 if (err[0])
421 for (;;) {
422 switch (last_lex) {
423 case LX_COMA:
424 case LX_END:
425 break;
426
427 default:
428 my_lex (adr);
429 continue;
430 }
431 break;
432 }
433 while (isspace (*ap))
434 ap++;
435 if (cp)
436 sprintf (adr, "%.*s", cp - ap, ap);
437 else
438 strcpy (adr, ap);
439 bp = adr + strlen (adr) - 1;
440 if (*bp == ',' || *bp == ';' || *bp == '\n')
441 *bp = 0;
442
443 adrxp->text = adr;
444 adrxp->pers = pers;
445 adrxp->mbox = mbox;
446 adrxp->host = host;
447 adrxp->path = path;
448 adrxp->grp = grp;
449 adrxp->ingrp = ingrp;
450 adrxp->note = note;
451 adrxp->err = err[0] ? err : NULL;
452
453 return adrxp;
454 }
455
456
457 static int
458 parse_address (void)
459 {
460 char buffer[BUFSIZ];
461
462 again: ;
463 ap = cp;
464 switch (my_lex (buffer)) {
465 case LX_ATOM:
466 case LX_QSTR:
467 pers = getcpy (buffer);
468 break;
469
470 case LX_SEMI:
471 if (glevel-- <= 0) {
472 strcpy (err, "extraneous semi-colon");
473 return NOTOK;
474 }
475 case LX_COMA:
476 if (note) {
477 free (note);
478 note = NULL;
479 }
480 goto again;
481
482 case LX_END:
483 return DONE;
484
485 case LX_LBRK: /* sigh (2) */
486 goto get_addr;
487
488 case LX_AT: /* sigh (3) */
489 cp = ap;
490 if (route_addr (buffer) == NOTOK)
491 return NOTOK;
492 return OK; /* why be choosy? */
493
494 default:
495 sprintf (err, "illegal address construct (%s)", buffer);
496 return NOTOK;
497 }
498
499 switch (my_lex (buffer)) {
500 case LX_ATOM:
501 case LX_QSTR:
502 pers = add (buffer, add (" ", pers));
503 more_phrase: ; /* sigh (1) */
504 if (phrase (buffer) == NOTOK)
505 return NOTOK;
506
507 switch (last_lex) {
508 case LX_LBRK:
509 get_addr: ;
510 if (route_addr (buffer) == NOTOK)
511 return NOTOK;
512 if (last_lex == LX_RBRK)
513 return OK;
514 sprintf (err, "missing right-bracket (%s)", buffer);
515 return NOTOK;
516
517 case LX_COLN:
518 get_group: ;
519 if (glevel++ > 0) {
520 sprintf (err, "nested groups not allowed (%s)", pers);
521 return NOTOK;
522 }
523 grp = add (": ", pers);
524 pers = NULL;
525 {
526 char *pp = cp;
527
528 for (;;)
529 switch (my_lex (buffer)) {
530 case LX_SEMI:
531 case LX_END: /* tsk, tsk */
532 glevel--;
533 return OK;
534
535 case LX_COMA:
536 continue;
537
538 default:
539 cp = pp;
540 return parse_address ();
541 }
542 }
543
544 case LX_DOT: /* sigh (1) */
545 pers = add (".", pers);
546 goto more_phrase;
547
548 default:
549 sprintf (err, "no mailbox in address, only a phrase (%s%s)",
550 pers, buffer);
551 return NOTOK;
552 }
553
554 case LX_LBRK:
555 goto get_addr;
556
557 case LX_COLN:
558 goto get_group;
559
560 case LX_DOT:
561 mbox = add (buffer, pers);
562 pers = NULL;
563 if (route_addr (buffer) == NOTOK)
564 return NOTOK;
565 goto check_end;
566
567 case LX_AT:
568 ingrp = glevel;
569 mbox = pers;
570 pers = NULL;
571 if (domain (buffer) == NOTOK)
572 return NOTOK;
573 check_end: ;
574 switch (last_lex) {
575 case LX_SEMI:
576 if (glevel-- <= 0) {
577 strcpy (err, "extraneous semi-colon");
578 return NOTOK;
579 }
580 case LX_COMA:
581 case LX_END:
582 return OK;
583
584 default:
585 sprintf (err, "junk after local@domain (%s)", buffer);
586 return NOTOK;
587 }
588
589 case LX_SEMI: /* no host */
590 case LX_COMA:
591 case LX_END:
592 ingrp = glevel;
593 if (last_lex == LX_SEMI && glevel-- <= 0) {
594 strcpy (err, "extraneous semi-colon");
595 return NOTOK;
596 }
597 mbox = pers;
598 pers = NULL;
599 return OK;
600
601 default:
602 sprintf (err, "missing mailbox (%s)", buffer);
603 return NOTOK;
604 }
605 }
606
607
608 static int
609 phrase (char *buffer)
610 {
611 for (;;)
612 switch (my_lex (buffer)) {
613 case LX_ATOM:
614 case LX_QSTR:
615 pers = add (buffer, add (" ", pers));
616 continue;
617
618 default:
619 return OK;
620 }
621 }
622
623
624 static int
625 route_addr (char *buffer)
626 {
627 register char *pp = cp;
628
629 if (my_lex (buffer) == LX_AT) {
630 if (route (buffer) == NOTOK)
631 return NOTOK;
632 }
633 else
634 cp = pp;
635
636 if (local_part (buffer) == NOTOK)
637 return NOTOK;
638
639 switch (last_lex) {
640 case LX_AT:
641 return domain (buffer);
642
643 case LX_SEMI: /* if in group */
644 case LX_RBRK: /* no host */
645 case LX_COMA:
646 case LX_END:
647 return OK;
648
649 default:
650 sprintf (err, "no at-sign after local-part (%s)", buffer);
651 return NOTOK;
652 }
653 }
654
655
656 static int
657 local_part (char *buffer)
658 {
659 ingrp = glevel;
660
661 for (;;) {
662 switch (my_lex (buffer)) {
663 case LX_ATOM:
664 case LX_QSTR:
665 mbox = add (buffer, mbox);
666 break;
667
668 default:
669 sprintf (err, "no mailbox in local-part (%s)", buffer);
670 return NOTOK;
671 }
672
673 switch (my_lex (buffer)) {
674 case LX_DOT:
675 mbox = add (buffer, mbox);
676 continue;
677
678 default:
679 return OK;
680 }
681 }
682 }
683
684
685 static int
686 domain (char *buffer)
687 {
688 for (;;) {
689 switch (my_lex (buffer)) {
690 case LX_ATOM:
691 case LX_DLIT:
692 host = add (buffer, host);
693 break;
694
695 default:
696 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
697 return NOTOK;
698 }
699
700 switch (my_lex (buffer)) {
701 case LX_DOT:
702 host = add (buffer, host);
703 continue;
704
705 case LX_AT: /* sigh (0) */
706 mbox = add (host, add ("%", mbox));
707 free (host);
708 host = NULL;
709 continue;
710
711 default:
712 return OK;
713 }
714 }
715 }
716
717
718 static int
719 route (char *buffer)
720 {
721 path = getcpy ("@");
722
723 for (;;) {
724 switch (my_lex (buffer)) {
725 case LX_ATOM:
726 case LX_DLIT:
727 path = add (buffer, path);
728 break;
729
730 default:
731 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
732 return NOTOK;
733 }
734 switch (my_lex (buffer)) {
735 case LX_COMA:
736 path = add (buffer, path);
737 for (;;) {
738 switch (my_lex (buffer)) {
739 case LX_COMA:
740 continue;
741
742 case LX_AT:
743 path = add (buffer, path);
744 break;
745
746 default:
747 sprintf (err, "no at-sign found for next domain in route (%s)",
748 buffer);
749 }
750 break;
751 }
752 continue;
753
754 case LX_AT: /* XXX */
755 case LX_DOT:
756 path = add (buffer, path);
757 continue;
758
759 case LX_COLN:
760 path = add (buffer, path);
761 return OK;
762
763 default:
764 sprintf (err, "no colon found to terminate route (%s)", buffer);
765 return NOTOK;
766 }
767 }
768 }
769
770
771 static int
772 my_lex (char *buffer)
773 {
774 /* buffer should be at least BUFSIZ bytes long */
775 int i, gotat = 0;
776 register char c, *bp;
777
778 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
779 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
780
781 bp = buffer;
782 *bp = 0;
783 if (!cp)
784 return (last_lex = LX_END);
785
786 gotat = isat (cp);
787 c = *cp++;
788 while (isspace (c))
789 c = *cp++;
790 if (c == 0) {
791 cp = NULL;
792 return (last_lex = LX_END);
793 }
794
795 if (c == '(') {
796 ADDCHR(c);
797 for (i = 0;;)
798 switch (c = *cp++) {
799 case 0:
800 cp = NULL;
801 return (last_lex = LX_ERR);
802 case QUOTE:
803 ADDCHR(c);
804 if ((c = *cp++) == 0) {
805 cp = NULL;
806 return (last_lex = LX_ERR);
807 }
808 ADDCHR(c);
809 continue;
810 case '(':
811 i++;
812 default:
813 ADDCHR(c);
814 continue;
815 case ')':
816 ADDCHR(c);
817 if (--i < 0) {
818 *bp = 0;
819 note = note ? add (buffer, add (" ", note))
820 : getcpy (buffer);
821 return my_lex (buffer);
822 }
823 }
824 }
825
826 if (c == '"') {
827 ADDCHR(c);
828 for (;;)
829 switch (c = *cp++) {
830 case 0:
831 cp = NULL;
832 return (last_lex = LX_ERR);
833 case QUOTE:
834 ADDCHR(c);
835 if ((c = *cp++) == 0) {
836 cp = NULL;
837 return (last_lex = LX_ERR);
838 }
839 default:
840 ADDCHR(c);
841 continue;
842 case '"':
843 ADDCHR(c);
844 *bp = 0;
845 return (last_lex = LX_QSTR);
846 }
847 }
848
849 if (c == '[') {
850 ADDCHR(c);
851 for (;;)
852 switch (c = *cp++) {
853 case 0:
854 cp = NULL;
855 return (last_lex = LX_ERR);
856 case QUOTE:
857 ADDCHR(c);
858 if ((c = *cp++) == 0) {
859 cp = NULL;
860 return (last_lex = LX_ERR);
861 }
862 default:
863 ADDCHR(c);
864 continue;
865 case ']':
866 ADDCHR(c);
867 *bp = 0;
868 return (last_lex = LX_DLIT);
869 }
870 }
871
872 ADDCHR(c);
873 *bp = 0;
874 for (i = 0; special[i].lx_chr != 0; i++)
875 if (c == special[i].lx_chr)
876 return (last_lex = special[i].lx_val);
877
878 if (iscntrl (c))
879 return (last_lex = LX_ERR);
880
881 for (;;) {
882 if ((c = *cp++) == 0)
883 break;
884 for (i = 0; special[i].lx_chr != 0; i++)
885 if (c == special[i].lx_chr)
886 goto got_atom;
887 if (iscntrl (c) || isspace (c))
888 break;
889 ADDCHR(c);
890 }
891 got_atom: ;
892 if (c == 0)
893 cp = NULL;
894 else
895 cp--;
896 *bp = 0;
897 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
898 ? LX_ATOM : LX_AT;
899 return last_lex;
900
901 my_lex_buffull:
902 /* Out of buffer space. *bp is the last byte in the buffer */
903 *bp = 0;
904 return (last_lex = LX_ERR);
905 }
906
907
908 char *
909 legal_person (char *p)
910 {
911 int i;
912 register char *cp;
913 static char buffer[BUFSIZ];
914
915 if (*p == '"')
916 return p;
917 for (cp = p; *cp; cp++)
918 for (i = 0; special[i].lx_chr; i++)
919 if (*cp == special[i].lx_chr) {
920 sprintf (buffer, "\"%s\"", p);
921 return buffer;
922 }
923
924 return p;
925 }
926
927
928 int
929 mfgets (FILE *in, char **bp)
930 {
931 int i;
932 register char *cp, *dp, *ep;
933 static int len = 0;
934 static char *pp = NULL;
935
936 if (pp == NULL)
937 if (!(pp = malloc ((size_t) (len = BUFSIZ))))
938 return NOTOK;
939
940 for (ep = (cp = pp) + len - 2;;) {
941 switch (i = getc (in)) {
942 case EOF:
943 eol: ;
944 if (cp != pp) {
945 *cp = 0;
946 *bp = pp;
947 return OK;
948 }
949 eoh: ;
950 *bp = NULL;
951 free (pp);
952 pp = NULL;
953 return DONE;
954
955 case 0:
956 continue;
957
958 case '\n':
959 if (cp == pp) /* end of headers, gobble it */
960 goto eoh;
961 switch (i = getc (in)) {
962 default: /* end of line */
963 case '\n': /* end of headers, save for next call */
964 ungetc (i, in);
965 goto eol;
966
967 case ' ': /* continue headers */
968 case '\t':
969 *cp++ = '\n';
970 break;
971 } /* fall into default case */
972
973 default:
974 *cp++ = i;
975 break;
976 }
977 if (cp >= ep) {
978 if (!(dp = realloc (pp, (size_t) (len += BUFSIZ)))) {
979 free (pp);
980 pp = NULL;
981 return NOTOK;
982 }
983 else
984 cp += dp - pp, ep = (pp = cp) + len - 2;
985 }
986 }
987 }