]> diplodocus.org Git - nmh/blob - docs/historical/mh-6.8.5/zotnet/mf/mf.c
Beginning of implementation of new argsplit() function to handle arguments
[nmh] / docs / historical / mh-6.8.5 / zotnet / mf / mf.c
1 /* mf.c - mail filter subroutines */
2 #ifndef lint
3 static char ident[] = "@(#)$Id: mf.c,v 1.10 1992/12/15 00:20:22 jromine Exp $";
4 #endif /* lint */
5
6 #include "mf.h"
7 #include <ctype.h>
8 #include <stdio.h>
9
10 static int isat(), parse_address(), phrase();
11 static int route_addr(), local_part(), domain(), route();
12 static int my_lex();
13 static void compress();
14 /* \f */
15
16 static char *getcpy (s)
17 register char *s;
18 {
19 register char *p;
20
21 if (!s) { _cleanup(); abort(); for(;;) pause();}
22 if ((p = malloc ((unsigned) (strlen (s) + 2))) != NULL)
23 (void) strcpy (p, s);
24 return p;
25 }
26
27
28 static char *add (s1, s2)
29 register char *s1,
30 *s2;
31 {
32 register char *p;
33
34 if (s2 == NULL)
35 return getcpy (s1);
36
37 if ((p = malloc ((unsigned) (strlen (s1) + strlen (s2) + 2))) != NULL)
38 (void) sprintf (p, "%s%s", s2, s1);
39 free (s2);
40 return p;
41 }
42
43 /* \f */
44
45 int isfrom (string)
46 register char *string;
47 {
48 return (strncmp (string, "From ", 5) == 0
49 || strncmp (string, ">From ", 6) == 0);
50 }
51
52
53 int lequal (a, b)
54 register char *a,
55 *b;
56 {
57 for (; *a; a++, b++)
58 if (*b == 0)
59 return FALSE;
60 else {
61 char c1 = islower (*a) ? toupper (*a) : *a;
62 char c2 = islower (*b) ? toupper (*b) : *b;
63 if (c1 != c2)
64 return FALSE;
65 }
66
67 return (*b == 0);
68 }
69
70 /* \f */
71
72 /*
73 *
74 * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
75 * addresses, so for each list of addresses we see if we can find some
76 * character to give us a hint.
77 *
78 */
79
80
81 #define CHKADR 0 /* undertermined address style */
82 #define UNIXDR 1 /* UNIX-style address */
83 #define ARPADR 2 /* ARPAnet-style address */
84
85
86 static char *punctuators = ";<>.()[]";
87 static char *vp = NULL;
88 static char *tp = NULL;
89
90 static struct adrx adrxs1;
91
92 /* \f */
93
94 struct adrx *seekadrx (addrs)
95 register char *addrs;
96 {
97 static int state = CHKADR;
98 register char *cp;
99 register struct adrx *adrxp;
100
101 if (state == CHKADR)
102 for (state = UNIXDR, cp = addrs; *cp; cp++)
103 if (index (punctuators, *cp)) {
104 state = ARPADR;
105 break;
106 }
107
108 switch (state) {
109 case UNIXDR:
110 adrxp = uucpadrx (addrs);
111 break;
112
113 case ARPADR:
114 default:
115 adrxp = getadrx (addrs);
116 break;
117 }
118
119 if (adrxp == NULL)
120 state = CHKADR;
121
122 return adrxp;
123 }
124
125 /* \f */
126
127 /*
128 *
129 * uucpadrx() implements a partial UUCP-style address parser. It's based
130 * on the UUCP notion that addresses are separated by spaces or commas.
131 *
132 */
133
134
135 struct adrx *uucpadrx (addrs)
136 register char *addrs;
137 {
138 register char *cp,
139 *wp,
140 *xp,
141 *yp,
142 *zp;
143 register struct adrx *adrxp = &adrxs1;
144
145 if (vp == NULL) {
146 vp = tp = getcpy (addrs);
147 compress (addrs, vp);
148 }
149 else
150 if (tp == NULL) {
151 free (vp);
152 vp = NULL;
153 return NULL;
154 }
155
156 for (cp = tp; isspace (*cp); cp++)
157 continue;
158 if (*cp == 0) {
159 free (vp);
160 vp = tp = NULL;
161 return NULL;
162 }
163
164 /* \f */
165
166 if ((wp = index (cp, ',')) == NULL)
167 if ((wp = index (cp, ' ')) != NULL) {
168 xp = wp;
169 while (isspace (*xp))
170 xp++;
171 if (*xp != 0 && isat (--xp)) {
172 yp = xp + 4;
173 while (isspace (*yp))
174 yp++;
175 if (*yp != 0)
176 if ((zp = index (yp, ' ')) != NULL)
177 *zp = 0, tp = ++zp;
178 else
179 tp = NULL;
180 else
181 *wp = 0, tp = ++wp;
182 }
183 else
184 *wp = 0, tp = ++wp;
185 }
186 else
187 tp = NULL;
188 else
189 *wp = 0, tp = ++wp;
190
191 if (adrxp -> text)
192 free (adrxp -> text);
193 adrxp -> text = getcpy (cp);
194 adrxp -> mbox = cp;
195 adrxp -> host = adrxp -> path = NULL;
196 if ((wp = rindex (cp, '@')) != NULL) {
197 *wp++ = 0;
198 adrxp -> host = *wp ? wp : NULL;
199 }
200 else
201 for (wp = cp + strlen (cp) - 4; wp >= cp; wp--)
202 if (isat (wp)) {
203 *wp++ = 0;
204 adrxp -> host = wp + 3;
205 }
206
207 adrxp -> pers = adrxp -> grp = adrxp -> note = adrxp -> err = NULL;
208 adrxp -> ingrp = 0;
209
210 return adrxp;
211 }
212
213 /* \f */
214
215 static void compress (fp, tp)
216 register char *fp,
217 *tp;
218 {
219 register char c,
220 *cp;
221
222 for (c = ' ', cp = tp; (*tp = *fp++) != 0;)
223 if (isspace (*tp)) {
224 if (c != ' ')
225 *tp++ = c = ' ';
226 }
227 else
228 c = *tp++;
229
230 if (c == ' ' && cp < tp)
231 *--tp = 0;
232 }
233
234
235 static int isat (p)
236 register char *p;
237 {
238 return (strncmp (p, " AT ", 4)
239 && strncmp (p, " At ", 4)
240 && strncmp (p, " aT ", 4)
241 && strncmp (p, " at ", 4) ? FALSE : TRUE);
242 }
243
244 /* \f */
245
246 /*
247 *
248 * getadrx() implements a partial 822-style address parser. The parser
249 * is neither complete nor correct. It does however recognize nearly all
250 * of the 822 address syntax. In addition it handles the majority of the
251 * 733 syntax as well. Most problems arise from trying to accomodate both.
252 *
253 * In terms of 822, the route-specification in
254 *
255 * "<" [route] local-part "@" domain ">"
256 *
257 * is parsed and returned unchanged. Multiple at-signs are compressed
258 * via source-routing. Recursive groups are not allowed as per the
259 * standard.
260 *
261 * In terms of 733, " at " is recognized as equivalent to "@".
262 *
263 * In terms of both the parser will not complain about missing hosts.
264 *
265 * -----
266 *
267 * We should not allow addresses like
268 *
269 * Marshall T. Rose <MRose@UCI>
270 *
271 * but should insist on
272 *
273 * "Marshall T. Rose" <MRose@UCI>
274 *
275 * Unfortunately, a lot of mailers stupidly let people get away with this.
276 *
277 * -----
278 *
279 * We should not allow addresses like
280 *
281 * <MRose@UCI>
282 *
283 * but should insist on
284 *
285 * MRose@UCI
286 *
287 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
288 * this.
289 *
290 * -----
291 *
292 * We should not allow addresses like
293 *
294 * @UCI:MRose@UCI-750a
295 *
296 * but should insist on
297 *
298 * Marshall Rose <@UCI:MRose@UCI-750a>
299 *
300 * Unfortunately, a lot of mailers stupidly do this.
301 *
302 */
303
304 /* \f */
305
306 #define QUOTE '\\'
307
308 #define LX_END 0
309 #define LX_ERR 1
310 #define LX_ATOM 2
311 #define LX_QSTR 3
312 #define LX_DLIT 4
313 #define LX_SEMI 5
314 #define LX_COMA 6
315 #define LX_LBRK 7
316 #define LX_RBRK 8
317 #define LX_COLN 9
318 #define LX_DOT 10
319 #define LX_AT 11
320
321 static struct {
322 char lx_chr;
323 int lx_val;
324 } special[] = {
325 ';', LX_SEMI,
326 ',', LX_COMA,
327 '<', LX_LBRK,
328 '>', LX_RBRK,
329 ':', LX_COLN,
330 '.', LX_DOT,
331 '@', LX_AT,
332 '(', LX_ERR,
333 ')', LX_ERR,
334 QUOTE, LX_ERR,
335 '"', LX_ERR,
336 '[', LX_ERR,
337 ']', LX_ERR,
338 0, 0
339 };
340
341 /* \f */
342
343 static int glevel = 0;
344 static int ingrp = 0;
345 static int last_lex = LX_END;
346
347 static char *dp = NULL;
348 static char *cp = NULL;
349 static char *ap = NULL;
350 static char *pers = NULL;
351 static char *mbox = NULL;
352 static char *host = NULL;
353 static char *path = NULL;
354 static char *grp = NULL;
355 static char *note = NULL;
356 static char err[BUFSIZ];
357 static char adr[BUFSIZ];
358
359 static struct adrx adrxs2;
360
361 /* \f */
362
363 struct adrx *getadrx (addrs)
364 register char *addrs;
365 {
366 register char *bp;
367 register struct adrx *adrxp = &adrxs2;
368
369 if (pers)
370 free (pers);
371 if (mbox)
372 free (mbox);
373 if (host)
374 free (host);
375 if (path)
376 free (path);
377 if (grp)
378 free (grp);
379 if (note)
380 free (note);
381 pers = mbox = host = path = grp = note = NULL;
382 err[0] = 0;
383
384 if (dp == NULL) {
385 dp = cp = getcpy (addrs ? addrs : "");
386 glevel = 0;
387 }
388 else
389 if (cp == NULL) {
390 free (dp);
391 dp = NULL;
392 return NULL;
393 }
394
395 switch (parse_address ()) {
396 case DONE:
397 free (dp);
398 dp = cp = NULL;
399 return NULL;
400
401 case OK:
402 switch (last_lex) {
403 case LX_COMA:
404 case LX_END:
405 break;
406
407 default: /* catch trailing comments */
408 bp = cp;
409 (void) my_lex (adr);
410 cp = bp;
411 break;
412 }
413 break;
414
415 default:
416 break;
417 }
418
419 if (err[0])
420 for (;;) {
421 switch (last_lex) {
422 case LX_COMA:
423 case LX_END:
424 break;
425
426 default:
427 (void) my_lex (adr);
428 continue;
429 }
430 break;
431 }
432 while (isspace (*ap))
433 ap++;
434 if (cp)
435 (void) sprintf (adr, "%.*s", cp - ap, ap);
436 else
437 (void) strcpy (adr, ap);
438 bp = adr + strlen (adr) - 1;
439 if (*bp == ',' || *bp == ';' || *bp == '\n')
440 *bp = 0;
441
442 /* \f */
443
444 adrxp -> text = adr;
445 adrxp -> pers = pers;
446 adrxp -> mbox = mbox;
447 adrxp -> host = host;
448 adrxp -> path = path;
449 adrxp -> grp = grp;
450 adrxp -> ingrp = ingrp;
451 adrxp -> note = note;
452 adrxp -> err = err[0] ? err : NULL;
453
454 return adrxp;
455 }
456
457 /* \f */
458
459 static int parse_address () {
460 char buffer[BUFSIZ];
461
462 again: ;
463 ap = cp;
464 switch (my_lex (buffer)) {
465 case LX_ATOM:
466 case LX_QSTR:
467 pers = getcpy (buffer);
468 break;
469
470 case LX_SEMI:
471 if (glevel-- <= 0) {
472 (void) strcpy (err, "extraneous semi-colon");
473 return NOTOK;
474 }
475 case LX_COMA:
476 if (note) {
477 free (note);
478 note = NULL;
479 }
480 goto again;
481
482 case LX_END:
483 return DONE;
484
485 case LX_LBRK: /* sigh (2) */
486 goto get_addr;
487
488 case LX_AT: /* sigh (3) */
489 cp = ap;
490 if (route_addr (buffer) == NOTOK)
491 return NOTOK;
492 return OK; /* why be choosy? */
493
494 default:
495 (void) sprintf (err, "illegal address construct (%s)", buffer);
496 return NOTOK;
497 }
498
499 /* \f */
500
501 switch (my_lex (buffer)) {
502 case LX_ATOM:
503 case LX_QSTR:
504 pers = add (buffer, add (" ", pers));
505 more_phrase: ; /* sigh (1) */
506 if (phrase (buffer) == NOTOK)
507 return NOTOK;
508
509 switch (last_lex) {
510 case LX_LBRK:
511 get_addr: ;
512 if (route_addr (buffer) == NOTOK)
513 return NOTOK;
514 if (last_lex == LX_RBRK)
515 return OK;
516 (void) sprintf (err, "missing right-bracket (%s)", buffer);
517 return NOTOK;
518
519 case LX_COLN:
520 get_group: ;
521 if (glevel++ > 0) {
522 (void) sprintf (err, "nested groups not allowed (%s)",
523 pers);
524 return NOTOK;
525 }
526 grp = add (": ", pers);
527 pers = NULL;
528 {
529 char *pp = cp;
530
531 for (;;)
532 switch (my_lex (buffer)) {
533 case LX_SEMI:
534 case LX_END: /* tsk, tsk */
535 glevel--;
536 return OK;
537
538 case LX_COMA:
539 continue;
540
541 default:
542 cp = pp;
543 return parse_address ();
544 }
545 }
546
547 case LX_DOT: /* sigh (1) */
548 pers = add (".", pers);
549 goto more_phrase;
550
551 default:
552 (void) sprintf (err,
553 "no mailbox in address, only a phrase (%s%s)",
554 pers, buffer);
555 return NOTOK;
556 }
557
558 /* \f */
559
560 case LX_LBRK:
561 goto get_addr;
562
563 case LX_COLN:
564 goto get_group;
565
566 case LX_DOT:
567 mbox = add (buffer, pers);
568 pers = NULL;
569 if (route_addr (buffer) == NOTOK)
570 return NOTOK;
571 goto check_end;
572
573 case LX_AT:
574 ingrp = glevel;
575 mbox = pers;
576 pers = NULL;
577 if (domain (buffer) == NOTOK)
578 return NOTOK;
579 check_end: ;
580 switch (last_lex) {
581 case LX_SEMI:
582 if (glevel-- <= 0) {
583 (void) strcpy (err, "extraneous semi-colon");
584 return NOTOK;
585 }
586 case LX_COMA:
587 case LX_END:
588 return OK;
589
590 default:
591 (void) sprintf (err, "junk after local@domain (%s)",
592 buffer);
593 return NOTOK;
594 }
595
596 case LX_SEMI: /* no host */
597 case LX_COMA:
598 case LX_END:
599 ingrp = glevel;
600 if (last_lex == LX_SEMI && glevel-- <= 0) {
601 (void) strcpy (err, "extraneous semi-colon");
602 return NOTOK;
603 }
604 mbox = pers;
605 pers = NULL;
606 return OK;
607
608 default:
609 (void) sprintf (err, "missing mailbox (%s)", buffer);
610 return NOTOK;
611 }
612 }
613
614 /* \f */
615
616 static int phrase (buffer)
617 register char *buffer;
618 {
619 for (;;)
620 switch (my_lex (buffer)) {
621 case LX_ATOM:
622 case LX_QSTR:
623 pers = add (buffer, add (" ", pers));
624 continue;
625
626 default:
627 return OK;
628 }
629 }
630
631 /* \f */
632
633 static int route_addr (buffer)
634 register char *buffer;
635 {
636 register char *pp = cp;
637
638 if (my_lex (buffer) == LX_AT) {
639 if (route (buffer) == NOTOK)
640 return NOTOK;
641 }
642 else
643 cp = pp;
644
645 if (local_part (buffer) == NOTOK)
646 return NOTOK;
647
648 switch (last_lex) {
649 case LX_AT:
650 return domain (buffer);
651
652 case LX_SEMI: /* if in group */
653 case LX_RBRK: /* no host */
654 case LX_COMA:
655 case LX_END:
656 return OK;
657
658 default:
659 (void) sprintf (err, "no at-sign after local-part (%s)", buffer);
660 return NOTOK;
661 }
662 }
663
664 /* \f */
665
666 static int local_part (buffer)
667 register char *buffer;
668 {
669 ingrp = glevel;
670
671 for (;;) {
672 switch (my_lex (buffer)) {
673 case LX_ATOM:
674 case LX_QSTR:
675 mbox = add (buffer, mbox);
676 break;
677
678 default:
679 (void) sprintf (err, "no mailbox in local-part (%s)", buffer);
680 return NOTOK;
681 }
682
683 switch (my_lex (buffer)) {
684 case LX_DOT:
685 mbox = add (buffer, mbox);
686 continue;
687
688 default:
689 return OK;
690 }
691 }
692 }
693
694 /* \f */
695
696 static int domain (buffer)
697 register char *buffer;
698 {
699 for (;;) {
700 switch (my_lex (buffer)) {
701 case LX_ATOM:
702 case LX_DLIT:
703 host = add (buffer, host);
704 break;
705
706 default:
707 (void) sprintf (err,
708 "no sub-domain in domain-part of address (%s)",
709 buffer);
710 return NOTOK;
711 }
712
713 switch (my_lex (buffer)) {
714 case LX_DOT:
715 host = add (buffer, host);
716 continue;
717
718 case LX_AT: /* sigh (0) */
719 mbox = add (host, add ("%", mbox));
720 free (host);
721 host = NULL;
722 continue;
723
724 default:
725 return OK;
726 }
727 }
728 }
729
730 /* \f */
731
732 static int route (buffer)
733 register char *buffer;
734 {
735 path = getcpy ("@");
736
737 for (;;) {
738 switch (my_lex (buffer)) {
739 case LX_ATOM:
740 case LX_DLIT:
741 path = add (buffer, path);
742 break;
743
744 default:
745 (void) sprintf (err,
746 "no sub-domain in domain-part of address (%s)",
747 buffer);
748 return NOTOK;
749 }
750 switch (my_lex (buffer)) {
751 case LX_COMA:
752 path = add (buffer, path);
753 for (;;) {
754 switch (my_lex (buffer)) {
755 case LX_COMA:
756 continue;
757
758 case LX_AT:
759 path = add (buffer, path);
760 break;
761
762 default:
763 (void) sprintf (err,
764 "no at-sign found for next domain in route (%s)",
765 buffer);
766 }
767 break;
768 }
769 continue;
770
771 case LX_AT: /* XXX */
772 case LX_DOT:
773 path = add (buffer, path);
774 continue;
775
776 case LX_COLN:
777 path = add (buffer, path);
778 return OK;
779
780 default:
781 (void) sprintf (err,
782 "no colon found to terminate route (%s)", buffer);
783 return NOTOK;
784 }
785 }
786 }
787
788 /* \f */
789
790 static int my_lex (buffer)
791 register char *buffer;
792 {
793 int i, gotat = 0;
794 register char c,
795 *bp;
796
797 bp = buffer;
798 *bp = 0;
799 if (!cp)
800 return (last_lex = LX_END);
801
802 gotat = isat (cp);
803 c = *cp++;
804 while (isspace (c))
805 c = *cp++;
806 if (c == 0) {
807 cp = NULL;
808 return (last_lex = LX_END);
809 }
810
811 if (c == '(')
812 for (*bp++ = c, i = 0;;)
813 switch (c = *cp++) {
814 case 0:
815 cp = NULL;
816 return (last_lex = LX_ERR);
817 case QUOTE:
818 *bp++ = c;
819 if ((c = *cp++) == 0) {
820 cp = NULL;
821 return (last_lex = LX_ERR);
822 }
823 *bp++ = c;
824 continue;
825 case '(':
826 i++;
827 default:
828 *bp++ = c;
829 continue;
830 case ')':
831 *bp++ = c;
832 if (--i < 0) {
833 *bp = 0;
834 note = note ? add (buffer, add (" ", note))
835 : getcpy (buffer);
836 return my_lex (buffer);
837 }
838 }
839
840 /* \f */
841
842 if (c == '"')
843 for (*bp++ = c;;)
844 switch (c = *cp++) {
845 case 0:
846 cp = NULL;
847 return (last_lex = LX_ERR);
848 case QUOTE:
849 *bp++ = c;
850 if ((c = *cp++) == 0) {
851 cp = NULL;
852 return (last_lex = LX_ERR);
853 }
854 default:
855 *bp++ = c;
856 continue;
857 case '"':
858 *bp++ = c;
859 *bp = 0;
860 return (last_lex = LX_QSTR);
861 }
862
863 if (c == '[')
864 for (*bp++ = c;;)
865 switch (c = *cp++) {
866 case 0:
867 cp = NULL;
868 return (last_lex = LX_ERR);
869 case QUOTE:
870 *bp++ = c;
871 if ((c = *cp++) == 0) {
872 cp = NULL;
873 return (last_lex = LX_ERR);
874 }
875 default:
876 *bp++ = c;
877 continue;
878 case ']':
879 *bp++ = c;
880 *bp = 0;
881 return (last_lex = LX_DLIT);
882 }
883
884 /* \f */
885
886 *bp++ = c;
887 *bp = 0;
888 for (i = 0; special[i].lx_chr != 0; i++)
889 if (c == special[i].lx_chr)
890 return (last_lex = special[i].lx_val);
891
892 if (iscntrl (c))
893 return (last_lex = LX_ERR);
894
895 for (;;) {
896 if ((c = *cp++) == 0)
897 break;
898 for (i = 0; special[i].lx_chr != 0; i++)
899 if (c == special[i].lx_chr)
900 goto got_atom;
901 if (iscntrl (c) || isspace (c))
902 break;
903 *bp++ = c;
904 }
905 got_atom: ;
906 if (c == 0)
907 cp = NULL;
908 else
909 cp--;
910 *bp = 0;
911 last_lex = !gotat || cp == NULL || index (cp, '<') != NULL
912 ? LX_ATOM : LX_AT;
913 return last_lex;
914 }
915
916 /* \f */
917
918 char *legal_person (p)
919 register char *p;
920 {
921 int i;
922 register char *cp;
923 static char buffer[BUFSIZ];
924
925 if (*p == '"')
926 return p;
927 for (cp = p; *cp; cp++)
928 for (i = 0; special[i].lx_chr; i++)
929 if (*cp == special[i].lx_chr) {
930 (void) sprintf (buffer, "\"%s\"", p);
931 return buffer;
932 }
933
934 return p;
935 }
936
937 /* \f */
938
939 int mfgets (in, bp)
940 register FILE *in;
941 register char **bp;
942 {
943 int i;
944 register char *cp,
945 *dp,
946 *ep;
947 static int len = 0;
948 static char *pp = NULL;
949
950 if (pp == NULL)
951 if ((pp = malloc ((unsigned) (len = BUFSIZ))) == NULL)
952 return NOTOK;
953
954 for (ep = (cp = pp) + len - 2;;) {
955 switch (i = getc (in)) {
956 case EOF:
957 eol: ;
958 if (cp != pp) {
959 *cp = 0;
960 *bp = pp;
961 return OK;
962 }
963 eoh: ;
964 *bp = NULL;
965 free (pp);
966 pp = NULL;
967 return DONE;
968
969 case 0:
970 continue;
971
972 case '\n':
973 if (cp == pp) /* end of headers, gobble it */
974 goto eoh;
975 switch (i = getc (in)) {
976 default: /* end of line */
977 case '\n': /* end of headers, save for next call */
978 (void) ungetc (i, in);
979 goto eol;
980
981 case ' ': /* continue headers */
982 case '\t':
983 *cp++ = '\n';
984 break;
985 } /* fall into default case */
986
987 default:
988 *cp++ = i;
989 break;
990 }
991 if (cp >= ep)
992 if ((dp = realloc (pp, (unsigned) (len += BUFSIZ))) == NULL) {
993 free (pp);
994 pp = NULL;
995 return NOTOK;
996 }
997 else
998 cp += dp - pp, ep = (pp = cp) + len - 2;
999 }
1000 }