]> diplodocus.org Git - nmh/blob - sbr/mf.c
Document argsplit changes in mh-profile man page.
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
8 */
9
10 #include <h/mf.h>
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <h/utils.h>
14
15 /*
16 * static prototypes
17 */
18 static char *getcpy (char *);
19 static void compress (char *, char *);
20 static int isat (char *);
21 static int parse_address (void);
22 static int phrase (char *);
23 static int route_addr (char *);
24 static int local_part (char *);
25 static int domain (char *);
26 static int route (char *);
27 static int my_lex (char *);
28
29
30 static char *
31 getcpy (char *s)
32 {
33 register char *p;
34
35 if (!s) {
36 /* causes compiles to blow up because the symbol _cleanup is undefined
37 where did this ever come from? */
38 /* _cleanup(); */
39 abort();
40 for(;;)
41 pause();
42 }
43 p = mh_xmalloc ((size_t) (strlen (s) + 2));
44 strcpy (p, s);
45 return p;
46 }
47
48
49 int
50 isfrom(char *string)
51 {
52 return (strncmp (string, "From ", 5) == 0
53 || strncmp (string, ">From ", 6) == 0);
54 }
55
56
57 int
58 lequal (char *a, char *b)
59 {
60 for (; *a; a++, b++)
61 if (*b == 0)
62 return FALSE;
63 else {
64 char c1 = islower ((unsigned char) *a) ?
65 toupper ((unsigned char) *a) : *a;
66 char c2 = islower ((unsigned char) *b) ?
67 toupper ((unsigned char) *b) : *b;
68 if (c1 != c2)
69 return FALSE;
70 }
71
72 return (*b == 0);
73 }
74
75
76 /*
77 * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
78 * addresses, so for each list of addresses we see if we can find some
79 * character to give us a hint.
80 */
81
82
83 #define CHKADR 0 /* undertermined address style */
84 #define UNIXDR 1 /* UNIX-style address */
85 #define ARPADR 2 /* ARPAnet-style address */
86
87
88 static char *punctuators = ";<>.()[]";
89 static char *vp = NULL;
90 static char *tp = NULL;
91
92 static struct adrx adrxs1;
93
94
95 struct adrx *
96 seekadrx (char *addrs)
97 {
98 static int state = CHKADR;
99 register char *cp;
100 register struct adrx *adrxp;
101
102 if (state == CHKADR)
103 for (state = UNIXDR, cp = addrs; *cp; cp++)
104 if (strchr(punctuators, *cp)) {
105 state = ARPADR;
106 break;
107 }
108
109 switch (state) {
110 case UNIXDR:
111 adrxp = uucpadrx (addrs);
112 break;
113
114 case ARPADR:
115 default:
116 adrxp = getadrx (addrs);
117 break;
118 }
119
120 if (adrxp == NULL)
121 state = CHKADR;
122
123 return adrxp;
124 }
125
126
127 /*
128 * uucpadrx() implements a partial UUCP-style address parser. It's based
129 * on the UUCP notion that addresses are separated by spaces or commas.
130 */
131
132
133 struct adrx *
134 uucpadrx (char *addrs)
135 {
136 char *cp, *wp, *xp, *yp, *zp;
137 struct adrx *adrxp = &adrxs1;
138
139 if (vp == NULL) {
140 vp = tp = getcpy (addrs);
141 compress (addrs, vp);
142 }
143 else
144 if (tp == NULL) {
145 free (vp);
146 vp = NULL;
147 return NULL;
148 }
149
150 for (cp = tp; isspace ((unsigned char) *cp); cp++)
151 continue;
152 if (*cp == 0) {
153 free (vp);
154 vp = tp = NULL;
155 return NULL;
156 }
157
158 if ((wp = strchr(cp, ',')) == NULL) {
159 if ((wp = strchr(cp, ' ')) != NULL) {
160 xp = wp;
161 while (isspace ((unsigned char) *xp))
162 xp++;
163 if (*xp != 0 && isat (--xp)) {
164 yp = xp + 4;
165 while (isspace ((unsigned char) *yp))
166 yp++;
167 if (*yp != 0) {
168 if ((zp = strchr(yp, ' ')) != NULL)
169 *zp = 0, tp = ++zp;
170 else
171 tp = NULL;
172 }
173 else
174 *wp = 0, tp = ++wp;
175 }
176 else
177 *wp = 0, tp = ++wp;
178 }
179 else
180 tp = NULL;
181 }
182 else
183 *wp = 0, tp = ++wp;
184
185 if (adrxp->text)
186 free (adrxp->text);
187 adrxp->text = getcpy (cp);
188 adrxp->mbox = cp;
189 adrxp->host = adrxp->path = NULL;
190 if ((wp = strrchr(cp, '@')) != NULL) {
191 *wp++ = 0;
192 adrxp->host = *wp ? wp : NULL;
193 }
194 else
195 for (wp = cp + strlen (cp) - 4; wp >= cp; wp--)
196 if (isat (wp)) {
197 *wp++ = 0;
198 adrxp->host = wp + 3;
199 }
200
201 adrxp->pers = adrxp->grp = adrxp->note = adrxp->err = NULL;
202 adrxp->ingrp = 0;
203
204 return adrxp;
205 }
206
207
208 static void
209 compress (char *fp, char *tp)
210 {
211 char c, *cp;
212
213 for (c = ' ', cp = tp; (*tp = *fp++) != 0;)
214 if (isspace ((unsigned char) *tp)) {
215 if (c != ' ')
216 *tp++ = c = ' ';
217 }
218 else
219 c = *tp++;
220
221 if (c == ' ' && cp < tp)
222 *--tp = 0;
223 }
224
225
226 static int
227 isat (char *p)
228 {
229 return (strncmp (p, " AT ", 4)
230 && strncmp (p, " At ", 4)
231 && strncmp (p, " aT ", 4)
232 && strncmp (p, " at ", 4) ? FALSE : TRUE);
233 }
234
235
236 /*
237 *
238 * getadrx() implements a partial 822-style address parser. The parser
239 * is neither complete nor correct. It does however recognize nearly all
240 * of the 822 address syntax. In addition it handles the majority of the
241 * 733 syntax as well. Most problems arise from trying to accomodate both.
242 *
243 * In terms of 822, the route-specification in
244 *
245 * "<" [route] local-part "@" domain ">"
246 *
247 * is parsed and returned unchanged. Multiple at-signs are compressed
248 * via source-routing. Recursive groups are not allowed as per the
249 * standard.
250 *
251 * In terms of 733, " at " is recognized as equivalent to "@".
252 *
253 * In terms of both the parser will not complain about missing hosts.
254 *
255 * -----
256 *
257 * We should not allow addresses like
258 *
259 * Marshall T. Rose <MRose@UCI>
260 *
261 * but should insist on
262 *
263 * "Marshall T. Rose" <MRose@UCI>
264 *
265 * Unfortunately, a lot of mailers stupidly let people get away with this.
266 *
267 * -----
268 *
269 * We should not allow addresses like
270 *
271 * <MRose@UCI>
272 *
273 * but should insist on
274 *
275 * MRose@UCI
276 *
277 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
278 * this.
279 *
280 * -----
281 *
282 * We should not allow addresses like
283 *
284 * @UCI:MRose@UCI-750a
285 *
286 * but should insist on
287 *
288 * Marshall Rose <@UCI:MRose@UCI-750a>
289 *
290 * Unfortunately, a lot of mailers stupidly do this.
291 *
292 */
293
294 #define QUOTE '\\'
295
296 #define LX_END 0
297 #define LX_ERR 1
298 #define LX_ATOM 2
299 #define LX_QSTR 3
300 #define LX_DLIT 4
301 #define LX_SEMI 5
302 #define LX_COMA 6
303 #define LX_LBRK 7
304 #define LX_RBRK 8
305 #define LX_COLN 9
306 #define LX_DOT 10
307 #define LX_AT 11
308
309 struct specials {
310 char lx_chr;
311 int lx_val;
312 };
313
314 static struct specials special[] = {
315 { ';', LX_SEMI },
316 { ',', LX_COMA },
317 { '<', LX_LBRK },
318 { '>', LX_RBRK },
319 { ':', LX_COLN },
320 { '.', LX_DOT },
321 { '@', LX_AT },
322 { '(', LX_ERR },
323 { ')', LX_ERR },
324 { QUOTE, LX_ERR },
325 { '"', LX_ERR },
326 { '[', LX_ERR },
327 { ']', LX_ERR },
328 { 0, 0 }
329 };
330
331 static int glevel = 0;
332 static int ingrp = 0;
333 static int last_lex = LX_END;
334
335 static char *dp = NULL;
336 static char *cp = NULL;
337 static char *ap = NULL;
338 static char *pers = NULL;
339 static char *mbox = NULL;
340 static char *host = NULL;
341 static char *path = NULL;
342 static char *grp = NULL;
343 static char *note = NULL;
344 static char err[BUFSIZ];
345 static char adr[BUFSIZ];
346
347 static struct adrx adrxs2;
348
349
350 struct adrx *
351 getadrx (char *addrs)
352 {
353 register char *bp;
354 register struct adrx *adrxp = &adrxs2;
355
356 if (pers)
357 free (pers);
358 if (mbox)
359 free (mbox);
360 if (host)
361 free (host);
362 if (path)
363 free (path);
364 if (grp)
365 free (grp);
366 if (note)
367 free (note);
368 pers = mbox = host = path = grp = note = NULL;
369 err[0] = 0;
370
371 if (dp == NULL) {
372 dp = cp = getcpy (addrs ? addrs : "");
373 glevel = 0;
374 }
375 else
376 if (cp == NULL) {
377 free (dp);
378 dp = NULL;
379 return NULL;
380 }
381
382 switch (parse_address ()) {
383 case DONE:
384 free (dp);
385 dp = cp = NULL;
386 return NULL;
387
388 case OK:
389 switch (last_lex) {
390 case LX_COMA:
391 case LX_END:
392 break;
393
394 default: /* catch trailing comments */
395 bp = cp;
396 my_lex (adr);
397 cp = bp;
398 break;
399 }
400 break;
401
402 default:
403 break;
404 }
405
406 if (err[0])
407 for (;;) {
408 switch (last_lex) {
409 case LX_COMA:
410 case LX_END:
411 break;
412
413 default:
414 my_lex (adr);
415 continue;
416 }
417 break;
418 }
419 while (isspace ((unsigned char) *ap))
420 ap++;
421 if (cp)
422 sprintf (adr, "%.*s", (int)(cp - ap), ap);
423 else
424 strcpy (adr, ap);
425 bp = adr + strlen (adr) - 1;
426 if (*bp == ',' || *bp == ';' || *bp == '\n')
427 *bp = 0;
428
429 adrxp->text = adr;
430 adrxp->pers = pers;
431 adrxp->mbox = mbox;
432 adrxp->host = host;
433 adrxp->path = path;
434 adrxp->grp = grp;
435 adrxp->ingrp = ingrp;
436 adrxp->note = note;
437 adrxp->err = err[0] ? err : NULL;
438
439 return adrxp;
440 }
441
442
443 static int
444 parse_address (void)
445 {
446 char buffer[BUFSIZ];
447
448 again: ;
449 ap = cp;
450 switch (my_lex (buffer)) {
451 case LX_ATOM:
452 case LX_QSTR:
453 pers = getcpy (buffer);
454 break;
455
456 case LX_SEMI:
457 if (glevel-- <= 0) {
458 strcpy (err, "extraneous semi-colon");
459 return NOTOK;
460 }
461 case LX_COMA:
462 if (note) {
463 free (note);
464 note = NULL;
465 }
466 goto again;
467
468 case LX_END:
469 return DONE;
470
471 case LX_LBRK: /* sigh (2) */
472 goto get_addr;
473
474 case LX_AT: /* sigh (3) */
475 cp = ap;
476 if (route_addr (buffer) == NOTOK)
477 return NOTOK;
478 return OK; /* why be choosy? */
479
480 default:
481 sprintf (err, "illegal address construct (%s)", buffer);
482 return NOTOK;
483 }
484
485 switch (my_lex (buffer)) {
486 case LX_ATOM:
487 case LX_QSTR:
488 pers = add (buffer, add (" ", pers));
489 more_phrase: ; /* sigh (1) */
490 if (phrase (buffer) == NOTOK)
491 return NOTOK;
492
493 switch (last_lex) {
494 case LX_LBRK:
495 get_addr: ;
496 if (route_addr (buffer) == NOTOK)
497 return NOTOK;
498 if (last_lex == LX_RBRK)
499 return OK;
500 sprintf (err, "missing right-bracket (%s)", buffer);
501 return NOTOK;
502
503 case LX_COLN:
504 get_group: ;
505 if (glevel++ > 0) {
506 sprintf (err, "nested groups not allowed (%s)", pers);
507 return NOTOK;
508 }
509 grp = add (": ", pers);
510 pers = NULL;
511 {
512 char *pp = cp;
513
514 for (;;)
515 switch (my_lex (buffer)) {
516 case LX_SEMI:
517 case LX_END: /* tsk, tsk */
518 glevel--;
519 return OK;
520
521 case LX_COMA:
522 continue;
523
524 default:
525 cp = pp;
526 return parse_address ();
527 }
528 }
529
530 case LX_DOT: /* sigh (1) */
531 pers = add (".", pers);
532 goto more_phrase;
533
534 default:
535 sprintf (err, "no mailbox in address, only a phrase (%s%s)",
536 pers, buffer);
537 return NOTOK;
538 }
539
540 case LX_LBRK:
541 goto get_addr;
542
543 case LX_COLN:
544 goto get_group;
545
546 case LX_DOT:
547 mbox = add (buffer, pers);
548 pers = NULL;
549 if (route_addr (buffer) == NOTOK)
550 return NOTOK;
551 goto check_end;
552
553 case LX_AT:
554 ingrp = glevel;
555 mbox = pers;
556 pers = NULL;
557 if (domain (buffer) == NOTOK)
558 return NOTOK;
559 check_end: ;
560 switch (last_lex) {
561 case LX_SEMI:
562 if (glevel-- <= 0) {
563 strcpy (err, "extraneous semi-colon");
564 return NOTOK;
565 }
566 case LX_COMA:
567 case LX_END:
568 return OK;
569
570 default:
571 sprintf (err, "junk after local@domain (%s)", buffer);
572 return NOTOK;
573 }
574
575 case LX_SEMI: /* no host */
576 case LX_COMA:
577 case LX_END:
578 ingrp = glevel;
579 if (last_lex == LX_SEMI && glevel-- <= 0) {
580 strcpy (err, "extraneous semi-colon");
581 return NOTOK;
582 }
583 mbox = pers;
584 pers = NULL;
585 return OK;
586
587 default:
588 sprintf (err, "missing mailbox (%s)", buffer);
589 return NOTOK;
590 }
591 }
592
593
594 static int
595 phrase (char *buffer)
596 {
597 for (;;)
598 switch (my_lex (buffer)) {
599 case LX_ATOM:
600 case LX_QSTR:
601 pers = add (buffer, add (" ", pers));
602 continue;
603
604 default:
605 return OK;
606 }
607 }
608
609
610 static int
611 route_addr (char *buffer)
612 {
613 register char *pp = cp;
614
615 if (my_lex (buffer) == LX_AT) {
616 if (route (buffer) == NOTOK)
617 return NOTOK;
618 }
619 else
620 cp = pp;
621
622 if (local_part (buffer) == NOTOK)
623 return NOTOK;
624
625 switch (last_lex) {
626 case LX_AT:
627 return domain (buffer);
628
629 case LX_SEMI: /* if in group */
630 case LX_RBRK: /* no host */
631 case LX_COMA:
632 case LX_END:
633 return OK;
634
635 default:
636 sprintf (err, "no at-sign after local-part (%s)", buffer);
637 return NOTOK;
638 }
639 }
640
641
642 static int
643 local_part (char *buffer)
644 {
645 ingrp = glevel;
646
647 for (;;) {
648 switch (my_lex (buffer)) {
649 case LX_ATOM:
650 case LX_QSTR:
651 mbox = add (buffer, mbox);
652 break;
653
654 default:
655 sprintf (err, "no mailbox in local-part (%s)", buffer);
656 return NOTOK;
657 }
658
659 switch (my_lex (buffer)) {
660 case LX_DOT:
661 mbox = add (buffer, mbox);
662 continue;
663
664 default:
665 return OK;
666 }
667 }
668 }
669
670
671 static int
672 domain (char *buffer)
673 {
674 for (;;) {
675 switch (my_lex (buffer)) {
676 case LX_ATOM:
677 case LX_DLIT:
678 host = add (buffer, host);
679 break;
680
681 default:
682 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
683 return NOTOK;
684 }
685
686 switch (my_lex (buffer)) {
687 case LX_DOT:
688 host = add (buffer, host);
689 continue;
690
691 case LX_AT: /* sigh (0) */
692 mbox = add (host, add ("%", mbox));
693 free (host);
694 host = NULL;
695 continue;
696
697 default:
698 return OK;
699 }
700 }
701 }
702
703
704 static int
705 route (char *buffer)
706 {
707 path = getcpy ("@");
708
709 for (;;) {
710 switch (my_lex (buffer)) {
711 case LX_ATOM:
712 case LX_DLIT:
713 path = add (buffer, path);
714 break;
715
716 default:
717 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
718 return NOTOK;
719 }
720 switch (my_lex (buffer)) {
721 case LX_COMA:
722 path = add (buffer, path);
723 for (;;) {
724 switch (my_lex (buffer)) {
725 case LX_COMA:
726 continue;
727
728 case LX_AT:
729 path = add (buffer, path);
730 break;
731
732 default:
733 sprintf (err, "no at-sign found for next domain in route (%s)",
734 buffer);
735 }
736 break;
737 }
738 continue;
739
740 case LX_AT: /* XXX */
741 case LX_DOT:
742 path = add (buffer, path);
743 continue;
744
745 case LX_COLN:
746 path = add (buffer, path);
747 return OK;
748
749 default:
750 sprintf (err, "no colon found to terminate route (%s)", buffer);
751 return NOTOK;
752 }
753 }
754 }
755
756
757 static int
758 my_lex (char *buffer)
759 {
760 /* buffer should be at least BUFSIZ bytes long */
761 int i, gotat = 0;
762 char c, *bp;
763
764 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
765 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
766
767 bp = buffer;
768 *bp = 0;
769 if (!cp)
770 return (last_lex = LX_END);
771
772 gotat = isat (cp);
773 c = *cp++;
774 while (isspace ((unsigned char) c))
775 c = *cp++;
776 if (c == 0) {
777 cp = NULL;
778 return (last_lex = LX_END);
779 }
780
781 if (c == '(') {
782 ADDCHR(c);
783 for (i = 0;;)
784 switch (c = *cp++) {
785 case 0:
786 cp = NULL;
787 return (last_lex = LX_ERR);
788 case QUOTE:
789 ADDCHR(c);
790 if ((c = *cp++) == 0) {
791 cp = NULL;
792 return (last_lex = LX_ERR);
793 }
794 ADDCHR(c);
795 continue;
796 case '(':
797 i++;
798 default:
799 ADDCHR(c);
800 continue;
801 case ')':
802 ADDCHR(c);
803 if (--i < 0) {
804 *bp = 0;
805 note = note ? add (buffer, add (" ", note))
806 : getcpy (buffer);
807 return my_lex (buffer);
808 }
809 }
810 }
811
812 if (c == '"') {
813 ADDCHR(c);
814 for (;;)
815 switch (c = *cp++) {
816 case 0:
817 cp = NULL;
818 return (last_lex = LX_ERR);
819 case QUOTE:
820 ADDCHR(c);
821 if ((c = *cp++) == 0) {
822 cp = NULL;
823 return (last_lex = LX_ERR);
824 }
825 default:
826 ADDCHR(c);
827 continue;
828 case '"':
829 ADDCHR(c);
830 *bp = 0;
831 return (last_lex = LX_QSTR);
832 }
833 }
834
835 if (c == '[') {
836 ADDCHR(c);
837 for (;;)
838 switch (c = *cp++) {
839 case 0:
840 cp = NULL;
841 return (last_lex = LX_ERR);
842 case QUOTE:
843 ADDCHR(c);
844 if ((c = *cp++) == 0) {
845 cp = NULL;
846 return (last_lex = LX_ERR);
847 }
848 default:
849 ADDCHR(c);
850 continue;
851 case ']':
852 ADDCHR(c);
853 *bp = 0;
854 return (last_lex = LX_DLIT);
855 }
856 }
857
858 ADDCHR(c);
859 *bp = 0;
860 for (i = 0; special[i].lx_chr != 0; i++)
861 if (c == special[i].lx_chr)
862 return (last_lex = special[i].lx_val);
863
864 if (iscntrl ((unsigned char) c))
865 return (last_lex = LX_ERR);
866
867 for (;;) {
868 if ((c = *cp++) == 0)
869 break;
870 for (i = 0; special[i].lx_chr != 0; i++)
871 if (c == special[i].lx_chr)
872 goto got_atom;
873 if (iscntrl ((unsigned char) c) || isspace ((unsigned char) c))
874 break;
875 ADDCHR(c);
876 }
877 got_atom: ;
878 if (c == 0)
879 cp = NULL;
880 else
881 cp--;
882 *bp = 0;
883 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
884 ? LX_ATOM : LX_AT;
885 return last_lex;
886
887 my_lex_buffull:
888 /* Out of buffer space. *bp is the last byte in the buffer */
889 *bp = 0;
890 return (last_lex = LX_ERR);
891 }
892
893
894 char *
895 legal_person (char *p)
896 {
897 int i;
898 register char *cp;
899 static char buffer[BUFSIZ];
900
901 if (*p == '"')
902 return p;
903 for (cp = p; *cp; cp++)
904 for (i = 0; special[i].lx_chr; i++)
905 if (*cp == special[i].lx_chr) {
906 sprintf (buffer, "\"%s\"", p);
907 return buffer;
908 }
909
910 return p;
911 }
912
913
914 int
915 mfgets (FILE *in, char **bp)
916 {
917 int i;
918 register char *cp, *dp, *ep;
919 static int len = 0;
920 static char *pp = NULL;
921
922 if (pp == NULL)
923 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
924
925 for (ep = (cp = pp) + len - 2;;) {
926 switch (i = getc (in)) {
927 case EOF:
928 eol: ;
929 if (cp != pp) {
930 *cp = 0;
931 *bp = pp;
932 return OK;
933 }
934 eoh: ;
935 *bp = NULL;
936 free (pp);
937 pp = NULL;
938 return DONE;
939
940 case 0:
941 continue;
942
943 case '\n':
944 if (cp == pp) /* end of headers, gobble it */
945 goto eoh;
946 switch (i = getc (in)) {
947 default: /* end of line */
948 case '\n': /* end of headers, save for next call */
949 ungetc (i, in);
950 goto eol;
951
952 case ' ': /* continue headers */
953 case '\t':
954 *cp++ = '\n';
955 break;
956 } /* fall into default case */
957
958 default:
959 *cp++ = i;
960 break;
961 }
962 if (cp >= ep) {
963 dp = mh_xrealloc (pp, (size_t) (len += BUFSIZ));
964 cp += dp - pp, ep = (pp = cp) + len - 2;
965 }
966 }
967 }