]>
diplodocus.org Git - nmh/blob - zotnet/mf/mf.c
3 * mf.c -- mail filter subroutines
15 static char *getcpy (char *);
16 static char *add (char *, char *);
17 static void compress (char *, char *);
18 static int isat (char *);
19 static int parse_address (void);
20 static int phrase (char *);
21 static int route_addr (char *);
22 static int local_part (char *);
23 static int domain (char *);
24 static int route (char *);
25 static int my_lex (char *);
39 if ((p
= malloc ((size_t) (strlen (s
) + 2))))
46 add (char *s1
, char *s2
)
53 if ((p
= malloc ((size_t) (strlen (s1
) + strlen (s2
) + 2))))
54 sprintf (p
, "%s%s", s2
, s1
);
62 return (strncmp (string
, "From ", 5) == 0
63 || strncmp (string
, ">From ", 6) == 0);
68 lequal (char *a
, char *b
)
74 char c1
= islower (*a
) ? toupper (*a
) : *a
;
75 char c2
= islower (*b
) ? toupper (*b
) : *b
;
85 * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
86 * addresses, so for each list of addresses we see if we can find some
87 * character to give us a hint.
91 #define CHKADR 0 /* undertermined address style */
92 #define UNIXDR 1 /* UNIX-style address */
93 #define ARPADR 2 /* ARPAnet-style address */
96 static char *punctuators
= ";<>.()[]";
97 static char *vp
= NULL
;
98 static char *tp
= NULL
;
100 static struct adrx adrxs1
;
104 seekadrx (char *addrs
)
106 static int state
= CHKADR
;
108 register struct adrx
*adrxp
;
111 for (state
= UNIXDR
, cp
= addrs
; *cp
; cp
++)
112 if (strchr(punctuators
, *cp
)) {
119 adrxp
= uucpadrx (addrs
);
124 adrxp
= getadrx (addrs
);
136 * uucpadrx() implements a partial UUCP-style address parser. It's based
137 * on the UUCP notion that addresses are separated by spaces or commas.
142 uucpadrx (char *addrs
)
144 register char *cp
, *wp
, *xp
, *yp
, *zp
;
145 register struct adrx
*adrxp
= &adrxs1
;
148 vp
= tp
= getcpy (addrs
);
149 compress (addrs
, vp
);
158 for (cp
= tp
; isspace (*cp
); cp
++)
166 if ((wp
= strchr(cp
, ',')) == NULL
)
167 if ((wp
= strchr(cp
, ' ')) != NULL
) {
169 while (isspace (*xp
))
171 if (*xp
!= 0 && isat (--xp
)) {
173 while (isspace (*yp
))
176 if ((zp
= strchr(yp
, ' ')) != NULL
)
193 adrxp
->text
= getcpy (cp
);
195 adrxp
->host
= adrxp
->path
= NULL
;
196 if ((wp
= strrchr(cp
, '@')) != NULL
) {
198 adrxp
->host
= *wp
? wp
: NULL
;
201 for (wp
= cp
+ strlen (cp
) - 4; wp
>= cp
; wp
--)
204 adrxp
->host
= wp
+ 3;
207 adrxp
->pers
= adrxp
->grp
= adrxp
->note
= adrxp
->err
= NULL
;
215 compress (char *fp
, char *tp
)
217 register char c
, *cp
;
219 for (c
= ' ', cp
= tp
; (*tp
= *fp
++) != 0;)
227 if (c
== ' ' && cp
< tp
)
235 return (strncmp (p
, " AT ", 4)
236 && strncmp (p
, " At ", 4)
237 && strncmp (p
, " aT ", 4)
238 && strncmp (p
, " at ", 4) ? FALSE
: TRUE
);
244 * getadrx() implements a partial 822-style address parser. The parser
245 * is neither complete nor correct. It does however recognize nearly all
246 * of the 822 address syntax. In addition it handles the majority of the
247 * 733 syntax as well. Most problems arise from trying to accomodate both.
249 * In terms of 822, the route-specification in
251 * "<" [route] local-part "@" domain ">"
253 * is parsed and returned unchanged. Multiple at-signs are compressed
254 * via source-routing. Recursive groups are not allowed as per the
257 * In terms of 733, " at " is recognized as equivalent to "@".
259 * In terms of both the parser will not complain about missing hosts.
263 * We should not allow addresses like
265 * Marshall T. Rose <MRose@UCI>
267 * but should insist on
269 * "Marshall T. Rose" <MRose@UCI>
271 * Unfortunately, a lot of mailers stupidly let people get away with this.
275 * We should not allow addresses like
279 * but should insist on
283 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
288 * We should not allow addresses like
290 * @UCI:MRose@UCI-750a
292 * but should insist on
294 * Marshall Rose <@UCI:MRose@UCI-750a>
296 * Unfortunately, a lot of mailers stupidly do this.
320 static struct specials special
[] = {
337 static int glevel
= 0;
338 static int ingrp
= 0;
339 static int last_lex
= LX_END
;
341 static char *dp
= NULL
;
342 static char *cp
= NULL
;
343 static char *ap
= NULL
;
344 static char *pers
= NULL
;
345 static char *mbox
= NULL
;
346 static char *host
= NULL
;
347 static char *path
= NULL
;
348 static char *grp
= NULL
;
349 static char *note
= NULL
;
350 static char err
[BUFSIZ
];
351 static char adr
[BUFSIZ
];
353 static struct adrx adrxs2
;
357 getadrx (char *addrs
)
360 register struct adrx
*adrxp
= &adrxs2
;
374 pers
= mbox
= host
= path
= grp
= note
= NULL
;
378 dp
= cp
= getcpy (addrs
? addrs
: "");
388 switch (parse_address ()) {
400 default: /* catch trailing comments */
425 while (isspace (*ap
))
428 sprintf (adr
, "%.*s", cp
- ap
, ap
);
431 bp
= adr
+ strlen (adr
) - 1;
432 if (*bp
== ',' || *bp
== ';' || *bp
== '\n')
441 adrxp
->ingrp
= ingrp
;
443 adrxp
->err
= err
[0] ? err
: NULL
;
456 switch (my_lex (buffer
)) {
459 pers
= getcpy (buffer
);
464 strcpy (err
, "extraneous semi-colon");
477 case LX_LBRK
: /* sigh (2) */
480 case LX_AT
: /* sigh (3) */
482 if (route_addr (buffer
) == NOTOK
)
484 return OK
; /* why be choosy? */
487 sprintf (err
, "illegal address construct (%s)", buffer
);
491 switch (my_lex (buffer
)) {
494 pers
= add (buffer
, add (" ", pers
));
495 more_phrase
: ; /* sigh (1) */
496 if (phrase (buffer
) == NOTOK
)
502 if (route_addr (buffer
) == NOTOK
)
504 if (last_lex
== LX_RBRK
)
506 sprintf (err
, "missing right-bracket (%s)", buffer
);
512 sprintf (err
, "nested groups not allowed (%s)", pers
);
515 grp
= add (": ", pers
);
521 switch (my_lex (buffer
)) {
523 case LX_END
: /* tsk, tsk */
532 return parse_address ();
536 case LX_DOT
: /* sigh (1) */
537 pers
= add (".", pers
);
541 sprintf (err
, "no mailbox in address, only a phrase (%s%s)",
553 mbox
= add (buffer
, pers
);
555 if (route_addr (buffer
) == NOTOK
)
563 if (domain (buffer
) == NOTOK
)
569 strcpy (err
, "extraneous semi-colon");
577 sprintf (err
, "junk after local@domain (%s)", buffer
);
581 case LX_SEMI
: /* no host */
585 if (last_lex
== LX_SEMI
&& glevel
-- <= 0) {
586 strcpy (err
, "extraneous semi-colon");
594 sprintf (err
, "missing mailbox (%s)", buffer
);
601 phrase (char *buffer
)
604 switch (my_lex (buffer
)) {
607 pers
= add (buffer
, add (" ", pers
));
617 route_addr (char *buffer
)
619 register char *pp
= cp
;
621 if (my_lex (buffer
) == LX_AT
) {
622 if (route (buffer
) == NOTOK
)
628 if (local_part (buffer
) == NOTOK
)
633 return domain (buffer
);
635 case LX_SEMI
: /* if in group */
636 case LX_RBRK
: /* no host */
642 sprintf (err
, "no at-sign after local-part (%s)", buffer
);
649 local_part (char *buffer
)
654 switch (my_lex (buffer
)) {
657 mbox
= add (buffer
, mbox
);
661 sprintf (err
, "no mailbox in local-part (%s)", buffer
);
665 switch (my_lex (buffer
)) {
667 mbox
= add (buffer
, mbox
);
678 domain (char *buffer
)
681 switch (my_lex (buffer
)) {
684 host
= add (buffer
, host
);
688 sprintf (err
, "no sub-domain in domain-part of address (%s)", buffer
);
692 switch (my_lex (buffer
)) {
694 host
= add (buffer
, host
);
697 case LX_AT
: /* sigh (0) */
698 mbox
= add (host
, add ("%", mbox
));
716 switch (my_lex (buffer
)) {
719 path
= add (buffer
, path
);
723 sprintf (err
, "no sub-domain in domain-part of address (%s)", buffer
);
726 switch (my_lex (buffer
)) {
728 path
= add (buffer
, path
);
730 switch (my_lex (buffer
)) {
735 path
= add (buffer
, path
);
739 sprintf (err
, "no at-sign found for next domain in route (%s)",
746 case LX_AT
: /* XXX */
748 path
= add (buffer
, path
);
752 path
= add (buffer
, path
);
756 sprintf (err
, "no colon found to terminate route (%s)", buffer
);
764 my_lex (char *buffer
)
767 register char c
, *bp
;
772 return (last_lex
= LX_END
);
780 return (last_lex
= LX_END
);
784 for (*bp
++ = c
, i
= 0;;)
788 return (last_lex
= LX_ERR
);
791 if ((c
= *cp
++) == 0) {
793 return (last_lex
= LX_ERR
);
806 note
= note
? add (buffer
, add (" ", note
))
808 return my_lex (buffer
);
817 return (last_lex
= LX_ERR
);
820 if ((c
= *cp
++) == 0) {
822 return (last_lex
= LX_ERR
);
830 return (last_lex
= LX_QSTR
);
838 return (last_lex
= LX_ERR
);
841 if ((c
= *cp
++) == 0) {
843 return (last_lex
= LX_ERR
);
851 return (last_lex
= LX_DLIT
);
856 for (i
= 0; special
[i
].lx_chr
!= 0; i
++)
857 if (c
== special
[i
].lx_chr
)
858 return (last_lex
= special
[i
].lx_val
);
861 return (last_lex
= LX_ERR
);
864 if ((c
= *cp
++) == 0)
866 for (i
= 0; special
[i
].lx_chr
!= 0; i
++)
867 if (c
== special
[i
].lx_chr
)
869 if (iscntrl (c
) || isspace (c
))
879 last_lex
= !gotat
|| cp
== NULL
|| strchr(cp
, '<') != NULL
886 legal_person (char *p
)
890 static char buffer
[BUFSIZ
];
894 for (cp
= p
; *cp
; cp
++)
895 for (i
= 0; special
[i
].lx_chr
; i
++)
896 if (*cp
== special
[i
].lx_chr
) {
897 sprintf (buffer
, "\"%s\"", p
);
906 mfgets (FILE *in
, char **bp
)
909 register char *cp
, *dp
, *ep
;
911 static char *pp
= NULL
;
914 if (!(pp
= malloc ((size_t) (len
= BUFSIZ
))))
917 for (ep
= (cp
= pp
) + len
- 2;;) {
918 switch (i
= getc (in
)) {
936 if (cp
== pp
) /* end of headers, gobble it */
938 switch (i
= getc (in
)) {
939 default: /* end of line */
940 case '\n': /* end of headers, save for next call */
944 case ' ': /* continue headers */
948 } /* fall into default case */
955 if (!(dp
= realloc (pp
, (size_t) (len
+= BUFSIZ
)))) {
961 cp
+= dp
- pp
, ep
= (pp
= cp
) + len
- 2;