]> diplodocus.org Git - nmh/blob - sbr/mf.c
Fix a segfault that happens when using the -file option.
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
8 */
9
10 #include <h/mf.h>
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <h/utils.h>
14
15 /*
16 * static prototypes
17 */
18 static char *getcpy (char *);
19 static void compress (char *, unsigned char *);
20 static int isat (char *);
21 static int parse_address (void);
22 static int phrase (char *);
23 static int route_addr (char *);
24 static int local_part (char *);
25 static int domain (char *);
26 static int route (char *);
27 static int my_lex (char *);
28
29
30 static char *
31 getcpy (char *s)
32 {
33 register char *p;
34
35 if (!s) {
36 /* causes compiles to blow up because the symbol _cleanup is undefined
37 where did this ever come from? */
38 /* _cleanup(); */
39 abort();
40 for(;;)
41 pause();
42 }
43 p = mh_xmalloc ((size_t) (strlen (s) + 2));
44 strcpy (p, s);
45 return p;
46 }
47
48
49 int
50 isfrom(char *string)
51 {
52 return (strncmp (string, "From ", 5) == 0
53 || strncmp (string, ">From ", 6) == 0);
54 }
55
56
57 int
58 lequal (unsigned char *a, unsigned char *b)
59 {
60 for (; *a; a++, b++)
61 if (*b == 0)
62 return FALSE;
63 else {
64 char c1 = islower (*a) ? toupper (*a) : *a;
65 char c2 = islower (*b) ? toupper (*b) : *b;
66 if (c1 != c2)
67 return FALSE;
68 }
69
70 return (*b == 0);
71 }
72
73
74 /*
75 * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
76 * addresses, so for each list of addresses we see if we can find some
77 * character to give us a hint.
78 */
79
80
81 #define CHKADR 0 /* undertermined address style */
82 #define UNIXDR 1 /* UNIX-style address */
83 #define ARPADR 2 /* ARPAnet-style address */
84
85
86 static char *punctuators = ";<>.()[]";
87 static char *vp = NULL;
88 static char *tp = NULL;
89
90 static struct adrx adrxs1;
91
92
93 struct adrx *
94 seekadrx (char *addrs)
95 {
96 static int state = CHKADR;
97 register char *cp;
98 register struct adrx *adrxp;
99
100 if (state == CHKADR)
101 for (state = UNIXDR, cp = addrs; *cp; cp++)
102 if (strchr(punctuators, *cp)) {
103 state = ARPADR;
104 break;
105 }
106
107 switch (state) {
108 case UNIXDR:
109 adrxp = uucpadrx (addrs);
110 break;
111
112 case ARPADR:
113 default:
114 adrxp = getadrx (addrs);
115 break;
116 }
117
118 if (adrxp == NULL)
119 state = CHKADR;
120
121 return adrxp;
122 }
123
124
125 /*
126 * uucpadrx() implements a partial UUCP-style address parser. It's based
127 * on the UUCP notion that addresses are separated by spaces or commas.
128 */
129
130
131 struct adrx *
132 uucpadrx (char *addrs)
133 {
134 register unsigned char *cp, *wp, *xp, *yp;
135 register char *zp;
136 register struct adrx *adrxp = &adrxs1;
137
138 if (vp == NULL) {
139 vp = tp = getcpy (addrs);
140 compress (addrs, vp);
141 }
142 else
143 if (tp == NULL) {
144 free (vp);
145 vp = NULL;
146 return NULL;
147 }
148
149 for (cp = tp; isspace (*cp); cp++)
150 continue;
151 if (*cp == 0) {
152 free (vp);
153 vp = tp = NULL;
154 return NULL;
155 }
156
157 if ((wp = strchr(cp, ',')) == NULL) {
158 if ((wp = strchr(cp, ' ')) != NULL) {
159 xp = wp;
160 while (isspace (*xp))
161 xp++;
162 if (*xp != 0 && isat (--xp)) {
163 yp = xp + 4;
164 while (isspace (*yp))
165 yp++;
166 if (*yp != 0) {
167 if ((zp = strchr(yp, ' ')) != NULL)
168 *zp = 0, tp = ++zp;
169 else
170 tp = NULL;
171 }
172 else
173 *wp = 0, tp = ++wp;
174 }
175 else
176 *wp = 0, tp = ++wp;
177 }
178 else
179 tp = NULL;
180 }
181 else
182 *wp = 0, tp = ++wp;
183
184 if (adrxp->text)
185 free (adrxp->text);
186 adrxp->text = getcpy (cp);
187 adrxp->mbox = cp;
188 adrxp->host = adrxp->path = NULL;
189 if ((wp = strrchr(cp, '@')) != NULL) {
190 *wp++ = 0;
191 adrxp->host = *wp ? wp : NULL;
192 }
193 else
194 for (wp = cp + strlen (cp) - 4; wp >= cp; wp--)
195 if (isat (wp)) {
196 *wp++ = 0;
197 adrxp->host = wp + 3;
198 }
199
200 adrxp->pers = adrxp->grp = adrxp->note = adrxp->err = NULL;
201 adrxp->ingrp = 0;
202
203 return adrxp;
204 }
205
206
207 static void
208 compress (char *fp, unsigned char *tp)
209 {
210 register char c;
211 register unsigned char *cp;
212
213 for (c = ' ', cp = tp; (*tp = *fp++) != 0;)
214 if (isspace (*tp)) {
215 if (c != ' ')
216 *tp++ = c = ' ';
217 }
218 else
219 c = *tp++;
220
221 if (c == ' ' && cp < tp)
222 *--tp = 0;
223 }
224
225
226 static int
227 isat (char *p)
228 {
229 return (strncmp (p, " AT ", 4)
230 && strncmp (p, " At ", 4)
231 && strncmp (p, " aT ", 4)
232 && strncmp (p, " at ", 4) ? FALSE : TRUE);
233 }
234
235
236 /*
237 *
238 * getadrx() implements a partial 822-style address parser. The parser
239 * is neither complete nor correct. It does however recognize nearly all
240 * of the 822 address syntax. In addition it handles the majority of the
241 * 733 syntax as well. Most problems arise from trying to accomodate both.
242 *
243 * In terms of 822, the route-specification in
244 *
245 * "<" [route] local-part "@" domain ">"
246 *
247 * is parsed and returned unchanged. Multiple at-signs are compressed
248 * via source-routing. Recursive groups are not allowed as per the
249 * standard.
250 *
251 * In terms of 733, " at " is recognized as equivalent to "@".
252 *
253 * In terms of both the parser will not complain about missing hosts.
254 *
255 * -----
256 *
257 * We should not allow addresses like
258 *
259 * Marshall T. Rose <MRose@UCI>
260 *
261 * but should insist on
262 *
263 * "Marshall T. Rose" <MRose@UCI>
264 *
265 * Unfortunately, a lot of mailers stupidly let people get away with this.
266 *
267 * -----
268 *
269 * We should not allow addresses like
270 *
271 * <MRose@UCI>
272 *
273 * but should insist on
274 *
275 * MRose@UCI
276 *
277 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
278 * this.
279 *
280 * -----
281 *
282 * We should not allow addresses like
283 *
284 * @UCI:MRose@UCI-750a
285 *
286 * but should insist on
287 *
288 * Marshall Rose <@UCI:MRose@UCI-750a>
289 *
290 * Unfortunately, a lot of mailers stupidly do this.
291 *
292 */
293
294 #define QUOTE '\\'
295
296 #define LX_END 0
297 #define LX_ERR 1
298 #define LX_ATOM 2
299 #define LX_QSTR 3
300 #define LX_DLIT 4
301 #define LX_SEMI 5
302 #define LX_COMA 6
303 #define LX_LBRK 7
304 #define LX_RBRK 8
305 #define LX_COLN 9
306 #define LX_DOT 10
307 #define LX_AT 11
308
309 struct specials {
310 char lx_chr;
311 int lx_val;
312 };
313
314 static struct specials special[] = {
315 { ';', LX_SEMI },
316 { ',', LX_COMA },
317 { '<', LX_LBRK },
318 { '>', LX_RBRK },
319 { ':', LX_COLN },
320 { '.', LX_DOT },
321 { '@', LX_AT },
322 { '(', LX_ERR },
323 { ')', LX_ERR },
324 { QUOTE, LX_ERR },
325 { '"', LX_ERR },
326 { '[', LX_ERR },
327 { ']', LX_ERR },
328 { 0, 0 }
329 };
330
331 static int glevel = 0;
332 static int ingrp = 0;
333 static int last_lex = LX_END;
334
335 static char *dp = NULL;
336 static unsigned char *cp = NULL;
337 static unsigned char *ap = NULL;
338 static char *pers = NULL;
339 static char *mbox = NULL;
340 static char *host = NULL;
341 static char *path = NULL;
342 static char *grp = NULL;
343 static char *note = NULL;
344 static char err[BUFSIZ];
345 static char adr[BUFSIZ];
346
347 static struct adrx adrxs2;
348
349
350 struct adrx *
351 getadrx (char *addrs)
352 {
353 register char *bp;
354 register struct adrx *adrxp = &adrxs2;
355
356 if (pers)
357 free (pers);
358 if (mbox)
359 free (mbox);
360 if (host)
361 free (host);
362 if (path)
363 free (path);
364 if (grp)
365 free (grp);
366 if (note)
367 free (note);
368 pers = mbox = host = path = grp = note = NULL;
369 err[0] = 0;
370
371 if (dp == NULL) {
372 dp = cp = getcpy (addrs ? addrs : "");
373 glevel = 0;
374 }
375 else
376 if (cp == NULL) {
377 free (dp);
378 dp = NULL;
379 return NULL;
380 }
381
382 switch (parse_address ()) {
383 case DONE:
384 free (dp);
385 dp = cp = NULL;
386 return NULL;
387
388 case OK:
389 switch (last_lex) {
390 case LX_COMA:
391 case LX_END:
392 break;
393
394 default: /* catch trailing comments */
395 bp = cp;
396 my_lex (adr);
397 cp = bp;
398 break;
399 }
400 break;
401
402 default:
403 break;
404 }
405
406 if (err[0])
407 for (;;) {
408 switch (last_lex) {
409 case LX_COMA:
410 case LX_END:
411 break;
412
413 default:
414 my_lex (adr);
415 continue;
416 }
417 break;
418 }
419 while (isspace (*ap))
420 ap++;
421 if (cp)
422 sprintf (adr, "%.*s", (int)(cp - ap), ap);
423 else
424 strcpy (adr, ap);
425 bp = adr + strlen (adr) - 1;
426 if (*bp == ',' || *bp == ';' || *bp == '\n')
427 *bp = 0;
428
429 adrxp->text = adr;
430 adrxp->pers = pers;
431 adrxp->mbox = mbox;
432 adrxp->host = host;
433 adrxp->path = path;
434 adrxp->grp = grp;
435 adrxp->ingrp = ingrp;
436 adrxp->note = note;
437 adrxp->err = err[0] ? err : NULL;
438
439 return adrxp;
440 }
441
442
443 static int
444 parse_address (void)
445 {
446 char buffer[BUFSIZ];
447
448 again: ;
449 ap = cp;
450 switch (my_lex (buffer)) {
451 case LX_ATOM:
452 case LX_QSTR:
453 pers = getcpy (buffer);
454 break;
455
456 case LX_SEMI:
457 if (glevel-- <= 0) {
458 strcpy (err, "extraneous semi-colon");
459 return NOTOK;
460 }
461 case LX_COMA:
462 if (note) {
463 free (note);
464 note = NULL;
465 }
466 goto again;
467
468 case LX_END:
469 return DONE;
470
471 case LX_LBRK: /* sigh (2) */
472 goto get_addr;
473
474 case LX_AT: /* sigh (3) */
475 cp = ap;
476 if (route_addr (buffer) == NOTOK)
477 return NOTOK;
478 return OK; /* why be choosy? */
479
480 default:
481 sprintf (err, "illegal address construct (%s)", buffer);
482 return NOTOK;
483 }
484
485 switch (my_lex (buffer)) {
486 case LX_ATOM:
487 case LX_QSTR:
488 pers = add (buffer, add (" ", pers));
489 more_phrase: ; /* sigh (1) */
490 if (phrase (buffer) == NOTOK)
491 return NOTOK;
492
493 switch (last_lex) {
494 case LX_LBRK:
495 get_addr: ;
496 if (route_addr (buffer) == NOTOK)
497 return NOTOK;
498 if (last_lex == LX_RBRK)
499 return OK;
500 sprintf (err, "missing right-bracket (%s)", buffer);
501 return NOTOK;
502
503 case LX_COLN:
504 get_group: ;
505 if (glevel++ > 0) {
506 sprintf (err, "nested groups not allowed (%s)", pers);
507 return NOTOK;
508 }
509 grp = add (": ", pers);
510 pers = NULL;
511 {
512 char *pp = cp;
513
514 for (;;)
515 switch (my_lex (buffer)) {
516 case LX_SEMI:
517 case LX_END: /* tsk, tsk */
518 glevel--;
519 return OK;
520
521 case LX_COMA:
522 continue;
523
524 default:
525 cp = pp;
526 return parse_address ();
527 }
528 }
529
530 case LX_DOT: /* sigh (1) */
531 pers = add (".", pers);
532 goto more_phrase;
533
534 default:
535 sprintf (err, "no mailbox in address, only a phrase (%s%s)",
536 pers, buffer);
537 return NOTOK;
538 }
539
540 case LX_LBRK:
541 goto get_addr;
542
543 case LX_COLN:
544 goto get_group;
545
546 case LX_DOT:
547 mbox = add (buffer, pers);
548 pers = NULL;
549 if (route_addr (buffer) == NOTOK)
550 return NOTOK;
551 goto check_end;
552
553 case LX_AT:
554 ingrp = glevel;
555 mbox = pers;
556 pers = NULL;
557 if (domain (buffer) == NOTOK)
558 return NOTOK;
559 check_end: ;
560 switch (last_lex) {
561 case LX_SEMI:
562 if (glevel-- <= 0) {
563 strcpy (err, "extraneous semi-colon");
564 return NOTOK;
565 }
566 case LX_COMA:
567 case LX_END:
568 return OK;
569
570 default:
571 sprintf (err, "junk after local@domain (%s)", buffer);
572 return NOTOK;
573 }
574
575 case LX_SEMI: /* no host */
576 case LX_COMA:
577 case LX_END:
578 ingrp = glevel;
579 if (last_lex == LX_SEMI && glevel-- <= 0) {
580 strcpy (err, "extraneous semi-colon");
581 return NOTOK;
582 }
583 mbox = pers;
584 pers = NULL;
585 return OK;
586
587 default:
588 sprintf (err, "missing mailbox (%s)", buffer);
589 return NOTOK;
590 }
591 }
592
593
594 static int
595 phrase (char *buffer)
596 {
597 for (;;)
598 switch (my_lex (buffer)) {
599 case LX_ATOM:
600 case LX_QSTR:
601 pers = add (buffer, add (" ", pers));
602 continue;
603
604 default:
605 return OK;
606 }
607 }
608
609
610 static int
611 route_addr (char *buffer)
612 {
613 register char *pp = cp;
614
615 if (my_lex (buffer) == LX_AT) {
616 if (route (buffer) == NOTOK)
617 return NOTOK;
618 }
619 else
620 cp = pp;
621
622 if (local_part (buffer) == NOTOK)
623 return NOTOK;
624
625 switch (last_lex) {
626 case LX_AT:
627 return domain (buffer);
628
629 case LX_SEMI: /* if in group */
630 case LX_RBRK: /* no host */
631 case LX_COMA:
632 case LX_END:
633 return OK;
634
635 default:
636 sprintf (err, "no at-sign after local-part (%s)", buffer);
637 return NOTOK;
638 }
639 }
640
641
642 static int
643 local_part (char *buffer)
644 {
645 ingrp = glevel;
646
647 for (;;) {
648 switch (my_lex (buffer)) {
649 case LX_ATOM:
650 case LX_QSTR:
651 mbox = add (buffer, mbox);
652 break;
653
654 default:
655 sprintf (err, "no mailbox in local-part (%s)", buffer);
656 return NOTOK;
657 }
658
659 switch (my_lex (buffer)) {
660 case LX_DOT:
661 mbox = add (buffer, mbox);
662 continue;
663
664 default:
665 return OK;
666 }
667 }
668 }
669
670
671 static int
672 domain (char *buffer)
673 {
674 for (;;) {
675 switch (my_lex (buffer)) {
676 case LX_ATOM:
677 case LX_DLIT:
678 host = add (buffer, host);
679 break;
680
681 default:
682 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
683 return NOTOK;
684 }
685
686 switch (my_lex (buffer)) {
687 case LX_DOT:
688 host = add (buffer, host);
689 continue;
690
691 case LX_AT: /* sigh (0) */
692 mbox = add (host, add ("%", mbox));
693 free (host);
694 host = NULL;
695 continue;
696
697 default:
698 return OK;
699 }
700 }
701 }
702
703
704 static int
705 route (char *buffer)
706 {
707 path = getcpy ("@");
708
709 for (;;) {
710 switch (my_lex (buffer)) {
711 case LX_ATOM:
712 case LX_DLIT:
713 path = add (buffer, path);
714 break;
715
716 default:
717 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
718 return NOTOK;
719 }
720 switch (my_lex (buffer)) {
721 case LX_COMA:
722 path = add (buffer, path);
723 for (;;) {
724 switch (my_lex (buffer)) {
725 case LX_COMA:
726 continue;
727
728 case LX_AT:
729 path = add (buffer, path);
730 break;
731
732 default:
733 sprintf (err, "no at-sign found for next domain in route (%s)",
734 buffer);
735 }
736 break;
737 }
738 continue;
739
740 case LX_AT: /* XXX */
741 case LX_DOT:
742 path = add (buffer, path);
743 continue;
744
745 case LX_COLN:
746 path = add (buffer, path);
747 return OK;
748
749 default:
750 sprintf (err, "no colon found to terminate route (%s)", buffer);
751 return NOTOK;
752 }
753 }
754 }
755
756
757 static int
758 my_lex (char *buffer)
759 {
760 /* buffer should be at least BUFSIZ bytes long */
761 int i, gotat = 0;
762 register unsigned char c;
763 register char *bp;
764
765 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
766 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
767
768 bp = buffer;
769 *bp = 0;
770 if (!cp)
771 return (last_lex = LX_END);
772
773 gotat = isat (cp);
774 c = *cp++;
775 while (isspace (c))
776 c = *cp++;
777 if (c == 0) {
778 cp = NULL;
779 return (last_lex = LX_END);
780 }
781
782 if (c == '(') {
783 ADDCHR(c);
784 for (i = 0;;)
785 switch (c = *cp++) {
786 case 0:
787 cp = NULL;
788 return (last_lex = LX_ERR);
789 case QUOTE:
790 ADDCHR(c);
791 if ((c = *cp++) == 0) {
792 cp = NULL;
793 return (last_lex = LX_ERR);
794 }
795 ADDCHR(c);
796 continue;
797 case '(':
798 i++;
799 default:
800 ADDCHR(c);
801 continue;
802 case ')':
803 ADDCHR(c);
804 if (--i < 0) {
805 *bp = 0;
806 note = note ? add (buffer, add (" ", note))
807 : getcpy (buffer);
808 return my_lex (buffer);
809 }
810 }
811 }
812
813 if (c == '"') {
814 ADDCHR(c);
815 for (;;)
816 switch (c = *cp++) {
817 case 0:
818 cp = NULL;
819 return (last_lex = LX_ERR);
820 case QUOTE:
821 ADDCHR(c);
822 if ((c = *cp++) == 0) {
823 cp = NULL;
824 return (last_lex = LX_ERR);
825 }
826 default:
827 ADDCHR(c);
828 continue;
829 case '"':
830 ADDCHR(c);
831 *bp = 0;
832 return (last_lex = LX_QSTR);
833 }
834 }
835
836 if (c == '[') {
837 ADDCHR(c);
838 for (;;)
839 switch (c = *cp++) {
840 case 0:
841 cp = NULL;
842 return (last_lex = LX_ERR);
843 case QUOTE:
844 ADDCHR(c);
845 if ((c = *cp++) == 0) {
846 cp = NULL;
847 return (last_lex = LX_ERR);
848 }
849 default:
850 ADDCHR(c);
851 continue;
852 case ']':
853 ADDCHR(c);
854 *bp = 0;
855 return (last_lex = LX_DLIT);
856 }
857 }
858
859 ADDCHR(c);
860 *bp = 0;
861 for (i = 0; special[i].lx_chr != 0; i++)
862 if (c == special[i].lx_chr)
863 return (last_lex = special[i].lx_val);
864
865 if (iscntrl (c))
866 return (last_lex = LX_ERR);
867
868 for (;;) {
869 if ((c = *cp++) == 0)
870 break;
871 for (i = 0; special[i].lx_chr != 0; i++)
872 if (c == special[i].lx_chr)
873 goto got_atom;
874 if (iscntrl (c) || isspace (c))
875 break;
876 ADDCHR(c);
877 }
878 got_atom: ;
879 if (c == 0)
880 cp = NULL;
881 else
882 cp--;
883 *bp = 0;
884 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
885 ? LX_ATOM : LX_AT;
886 return last_lex;
887
888 my_lex_buffull:
889 /* Out of buffer space. *bp is the last byte in the buffer */
890 *bp = 0;
891 return (last_lex = LX_ERR);
892 }
893
894
895 char *
896 legal_person (char *p)
897 {
898 int i;
899 register char *cp;
900 static char buffer[BUFSIZ];
901
902 if (*p == '"')
903 return p;
904 for (cp = p; *cp; cp++)
905 for (i = 0; special[i].lx_chr; i++)
906 if (*cp == special[i].lx_chr) {
907 sprintf (buffer, "\"%s\"", p);
908 return buffer;
909 }
910
911 return p;
912 }
913
914
915 int
916 mfgets (FILE *in, char **bp)
917 {
918 int i;
919 register char *cp, *dp, *ep;
920 static int len = 0;
921 static char *pp = NULL;
922
923 if (pp == NULL)
924 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
925
926 for (ep = (cp = pp) + len - 2;;) {
927 switch (i = getc (in)) {
928 case EOF:
929 eol: ;
930 if (cp != pp) {
931 *cp = 0;
932 *bp = pp;
933 return OK;
934 }
935 eoh: ;
936 *bp = NULL;
937 free (pp);
938 pp = NULL;
939 return DONE;
940
941 case 0:
942 continue;
943
944 case '\n':
945 if (cp == pp) /* end of headers, gobble it */
946 goto eoh;
947 switch (i = getc (in)) {
948 default: /* end of line */
949 case '\n': /* end of headers, save for next call */
950 ungetc (i, in);
951 goto eol;
952
953 case ' ': /* continue headers */
954 case '\t':
955 *cp++ = '\n';
956 break;
957 } /* fall into default case */
958
959 default:
960 *cp++ = i;
961 break;
962 }
963 if (cp >= ep) {
964 dp = mh_xrealloc (pp, (size_t) (len += BUFSIZ));
965 cp += dp - pp, ep = (pp = cp) + len - 2;
966 }
967 }
968 }