]>
diplodocus.org Git - nmh/blob - sbr/mf.c
3 * mf.c -- mail filter subroutines
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
18 static char *getcpy (char *);
19 static void compress (char *, unsigned char *);
20 static int isat (char *);
21 static int parse_address (void);
22 static int phrase (char *);
23 static int route_addr (char *);
24 static int local_part (char *);
25 static int domain (char *);
26 static int route (char *);
27 static int my_lex (char *);
36 /* causes compiles to blow up because the symbol _cleanup is undefined
37 where did this ever come from? */
43 p
= mh_xmalloc ((size_t) (strlen (s
) + 2));
52 return (strncmp (string
, "From ", 5) == 0
53 || strncmp (string
, ">From ", 6) == 0);
58 lequal (unsigned char *a
, unsigned char *b
)
64 char c1
= islower (*a
) ? toupper (*a
) : *a
;
65 char c2
= islower (*b
) ? toupper (*b
) : *b
;
75 * seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
76 * addresses, so for each list of addresses we see if we can find some
77 * character to give us a hint.
81 #define CHKADR 0 /* undertermined address style */
82 #define UNIXDR 1 /* UNIX-style address */
83 #define ARPADR 2 /* ARPAnet-style address */
86 static char *punctuators
= ";<>.()[]";
87 static char *vp
= NULL
;
88 static char *tp
= NULL
;
90 static struct adrx adrxs1
;
94 seekadrx (char *addrs
)
96 static int state
= CHKADR
;
98 register struct adrx
*adrxp
;
101 for (state
= UNIXDR
, cp
= addrs
; *cp
; cp
++)
102 if (strchr(punctuators
, *cp
)) {
109 adrxp
= uucpadrx (addrs
);
114 adrxp
= getadrx (addrs
);
126 * uucpadrx() implements a partial UUCP-style address parser. It's based
127 * on the UUCP notion that addresses are separated by spaces or commas.
132 uucpadrx (char *addrs
)
134 register unsigned char *cp
, *wp
, *xp
, *yp
;
136 register struct adrx
*adrxp
= &adrxs1
;
139 vp
= tp
= getcpy (addrs
);
140 compress (addrs
, vp
);
149 for (cp
= tp
; isspace (*cp
); cp
++)
157 if ((wp
= strchr(cp
, ',')) == NULL
) {
158 if ((wp
= strchr(cp
, ' ')) != NULL
) {
160 while (isspace (*xp
))
162 if (*xp
!= 0 && isat (--xp
)) {
164 while (isspace (*yp
))
167 if ((zp
= strchr(yp
, ' ')) != NULL
)
186 adrxp
->text
= getcpy (cp
);
188 adrxp
->host
= adrxp
->path
= NULL
;
189 if ((wp
= strrchr(cp
, '@')) != NULL
) {
191 adrxp
->host
= *wp
? wp
: NULL
;
194 for (wp
= cp
+ strlen (cp
) - 4; wp
>= cp
; wp
--)
197 adrxp
->host
= wp
+ 3;
200 adrxp
->pers
= adrxp
->grp
= adrxp
->note
= adrxp
->err
= NULL
;
208 compress (char *fp
, unsigned char *tp
)
211 register unsigned char *cp
;
213 for (c
= ' ', cp
= tp
; (*tp
= *fp
++) != 0;)
221 if (c
== ' ' && cp
< tp
)
229 return (strncmp (p
, " AT ", 4)
230 && strncmp (p
, " At ", 4)
231 && strncmp (p
, " aT ", 4)
232 && strncmp (p
, " at ", 4) ? FALSE
: TRUE
);
238 * getadrx() implements a partial 822-style address parser. The parser
239 * is neither complete nor correct. It does however recognize nearly all
240 * of the 822 address syntax. In addition it handles the majority of the
241 * 733 syntax as well. Most problems arise from trying to accomodate both.
243 * In terms of 822, the route-specification in
245 * "<" [route] local-part "@" domain ">"
247 * is parsed and returned unchanged. Multiple at-signs are compressed
248 * via source-routing. Recursive groups are not allowed as per the
251 * In terms of 733, " at " is recognized as equivalent to "@".
253 * In terms of both the parser will not complain about missing hosts.
257 * We should not allow addresses like
259 * Marshall T. Rose <MRose@UCI>
261 * but should insist on
263 * "Marshall T. Rose" <MRose@UCI>
265 * Unfortunately, a lot of mailers stupidly let people get away with this.
269 * We should not allow addresses like
273 * but should insist on
277 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
282 * We should not allow addresses like
284 * @UCI:MRose@UCI-750a
286 * but should insist on
288 * Marshall Rose <@UCI:MRose@UCI-750a>
290 * Unfortunately, a lot of mailers stupidly do this.
314 static struct specials special
[] = {
331 static int glevel
= 0;
332 static int ingrp
= 0;
333 static int last_lex
= LX_END
;
335 static char *dp
= NULL
;
336 static unsigned char *cp
= NULL
;
337 static unsigned char *ap
= NULL
;
338 static char *pers
= NULL
;
339 static char *mbox
= NULL
;
340 static char *host
= NULL
;
341 static char *path
= NULL
;
342 static char *grp
= NULL
;
343 static char *note
= NULL
;
344 static char err
[BUFSIZ
];
345 static char adr
[BUFSIZ
];
347 static struct adrx adrxs2
;
351 getadrx (char *addrs
)
354 register struct adrx
*adrxp
= &adrxs2
;
368 pers
= mbox
= host
= path
= grp
= note
= NULL
;
372 dp
= cp
= getcpy (addrs
? addrs
: "");
382 switch (parse_address ()) {
394 default: /* catch trailing comments */
419 while (isspace (*ap
))
422 sprintf (adr
, "%.*s", (int)(cp
- ap
), ap
);
425 bp
= adr
+ strlen (adr
) - 1;
426 if (*bp
== ',' || *bp
== ';' || *bp
== '\n')
435 adrxp
->ingrp
= ingrp
;
437 adrxp
->err
= err
[0] ? err
: NULL
;
450 switch (my_lex (buffer
)) {
453 pers
= getcpy (buffer
);
458 strcpy (err
, "extraneous semi-colon");
471 case LX_LBRK
: /* sigh (2) */
474 case LX_AT
: /* sigh (3) */
476 if (route_addr (buffer
) == NOTOK
)
478 return OK
; /* why be choosy? */
481 sprintf (err
, "illegal address construct (%s)", buffer
);
485 switch (my_lex (buffer
)) {
488 pers
= add (buffer
, add (" ", pers
));
489 more_phrase
: ; /* sigh (1) */
490 if (phrase (buffer
) == NOTOK
)
496 if (route_addr (buffer
) == NOTOK
)
498 if (last_lex
== LX_RBRK
)
500 sprintf (err
, "missing right-bracket (%s)", buffer
);
506 sprintf (err
, "nested groups not allowed (%s)", pers
);
509 grp
= add (": ", pers
);
515 switch (my_lex (buffer
)) {
517 case LX_END
: /* tsk, tsk */
526 return parse_address ();
530 case LX_DOT
: /* sigh (1) */
531 pers
= add (".", pers
);
535 sprintf (err
, "no mailbox in address, only a phrase (%s%s)",
547 mbox
= add (buffer
, pers
);
549 if (route_addr (buffer
) == NOTOK
)
557 if (domain (buffer
) == NOTOK
)
563 strcpy (err
, "extraneous semi-colon");
571 sprintf (err
, "junk after local@domain (%s)", buffer
);
575 case LX_SEMI
: /* no host */
579 if (last_lex
== LX_SEMI
&& glevel
-- <= 0) {
580 strcpy (err
, "extraneous semi-colon");
588 sprintf (err
, "missing mailbox (%s)", buffer
);
595 phrase (char *buffer
)
598 switch (my_lex (buffer
)) {
601 pers
= add (buffer
, add (" ", pers
));
611 route_addr (char *buffer
)
613 register char *pp
= cp
;
615 if (my_lex (buffer
) == LX_AT
) {
616 if (route (buffer
) == NOTOK
)
622 if (local_part (buffer
) == NOTOK
)
627 return domain (buffer
);
629 case LX_SEMI
: /* if in group */
630 case LX_RBRK
: /* no host */
636 sprintf (err
, "no at-sign after local-part (%s)", buffer
);
643 local_part (char *buffer
)
648 switch (my_lex (buffer
)) {
651 mbox
= add (buffer
, mbox
);
655 sprintf (err
, "no mailbox in local-part (%s)", buffer
);
659 switch (my_lex (buffer
)) {
661 mbox
= add (buffer
, mbox
);
672 domain (char *buffer
)
675 switch (my_lex (buffer
)) {
678 host
= add (buffer
, host
);
682 sprintf (err
, "no sub-domain in domain-part of address (%s)", buffer
);
686 switch (my_lex (buffer
)) {
688 host
= add (buffer
, host
);
691 case LX_AT
: /* sigh (0) */
692 mbox
= add (host
, add ("%", mbox
));
710 switch (my_lex (buffer
)) {
713 path
= add (buffer
, path
);
717 sprintf (err
, "no sub-domain in domain-part of address (%s)", buffer
);
720 switch (my_lex (buffer
)) {
722 path
= add (buffer
, path
);
724 switch (my_lex (buffer
)) {
729 path
= add (buffer
, path
);
733 sprintf (err
, "no at-sign found for next domain in route (%s)",
740 case LX_AT
: /* XXX */
742 path
= add (buffer
, path
);
746 path
= add (buffer
, path
);
750 sprintf (err
, "no colon found to terminate route (%s)", buffer
);
758 my_lex (char *buffer
)
760 /* buffer should be at least BUFSIZ bytes long */
762 register unsigned char c
;
765 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
766 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
771 return (last_lex
= LX_END
);
779 return (last_lex
= LX_END
);
788 return (last_lex
= LX_ERR
);
791 if ((c
= *cp
++) == 0) {
793 return (last_lex
= LX_ERR
);
806 note
= note
? add (buffer
, add (" ", note
))
808 return my_lex (buffer
);
819 return (last_lex
= LX_ERR
);
822 if ((c
= *cp
++) == 0) {
824 return (last_lex
= LX_ERR
);
832 return (last_lex
= LX_QSTR
);
842 return (last_lex
= LX_ERR
);
845 if ((c
= *cp
++) == 0) {
847 return (last_lex
= LX_ERR
);
855 return (last_lex
= LX_DLIT
);
861 for (i
= 0; special
[i
].lx_chr
!= 0; i
++)
862 if (c
== special
[i
].lx_chr
)
863 return (last_lex
= special
[i
].lx_val
);
866 return (last_lex
= LX_ERR
);
869 if ((c
= *cp
++) == 0)
871 for (i
= 0; special
[i
].lx_chr
!= 0; i
++)
872 if (c
== special
[i
].lx_chr
)
874 if (iscntrl (c
) || isspace (c
))
884 last_lex
= !gotat
|| cp
== NULL
|| strchr(cp
, '<') != NULL
889 /* Out of buffer space. *bp is the last byte in the buffer */
891 return (last_lex
= LX_ERR
);
896 legal_person (char *p
)
900 static char buffer
[BUFSIZ
];
904 for (cp
= p
; *cp
; cp
++)
905 for (i
= 0; special
[i
].lx_chr
; i
++)
906 if (*cp
== special
[i
].lx_chr
) {
907 sprintf (buffer
, "\"%s\"", p
);
916 mfgets (FILE *in
, char **bp
)
919 register char *cp
, *dp
, *ep
;
921 static char *pp
= NULL
;
924 pp
= mh_xmalloc ((size_t) (len
= BUFSIZ
));
926 for (ep
= (cp
= pp
) + len
- 2;;) {
927 switch (i
= getc (in
)) {
945 if (cp
== pp
) /* end of headers, gobble it */
947 switch (i
= getc (in
)) {
948 default: /* end of line */
949 case '\n': /* end of headers, save for next call */
953 case ' ': /* continue headers */
957 } /* fall into default case */
964 dp
= mh_xrealloc (pp
, (size_t) (len
+= BUFSIZ
));
965 cp
+= dp
- pp
, ep
= (pp
= cp
) + len
- 2;