]> diplodocus.org Git - nmh/blob - sbr/mf.c
Reverted commit 9a4b4a3d3b27fe4a7ff6d0b8724ce1c06b5917eb.
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
8 */
9
10 #include <h/mf.h>
11 #include <h/utils.h>
12
13 /*
14 * static prototypes
15 */
16 static int isat (const char *);
17 static int parse_address (void);
18 static int phrase (char *);
19 static int route_addr (char *);
20 static int local_part (char *);
21 static int domain (char *);
22 static int route (char *);
23 static int my_lex (char *);
24
25
26 int
27 isfrom(const char *string)
28 {
29 return (HasPrefix(string, "From ")
30 || HasPrefix(string, ">From "));
31 }
32
33
34 int
35 lequal (const char *a, const char *b)
36 {
37 char c1, c2;
38
39 for (; *a; a++, b++) {
40 if (*b == 0)
41 return FALSE;
42 c1 = toupper((unsigned char)*a);
43 c2 = toupper((unsigned char)*b);
44 if (c1 != c2)
45 return FALSE;
46 }
47
48 return (*b == 0);
49 }
50
51
52 static int
53 isat (const char *p)
54 {
55 return HasPrefix(p, " AT ") || HasPrefix(p, " At ") ||
56 HasPrefix(p, " aT ") || HasPrefix(p, " at ");
57 }
58
59
60 /*
61 *
62 * getadrx() implements a partial 822-style address parser. The parser
63 * is neither complete nor correct. It does however recognize nearly all
64 * of the 822 address syntax. In addition it handles the majority of the
65 * 733 syntax as well. Most problems arise from trying to accommodate both.
66 *
67 * In terms of 822, the route-specification in
68 *
69 * "<" [route] local-part "@" domain ">"
70 *
71 * is parsed and returned unchanged. Multiple at-signs are compressed
72 * via source-routing. Recursive groups are not allowed as per the
73 * standard.
74 *
75 * In terms of 733, " at " is recognized as equivalent to "@".
76 *
77 * In terms of both the parser will not complain about missing hosts.
78 *
79 * -----
80 *
81 * We should not allow addresses like
82 *
83 * Marshall T. Rose <MRose@UCI>
84 *
85 * but should insist on
86 *
87 * "Marshall T. Rose" <MRose@UCI>
88 *
89 * Unfortunately, a lot of mailers stupidly let people get away with this.
90 *
91 * -----
92 *
93 * We should not allow addresses like
94 *
95 * <MRose@UCI>
96 *
97 * but should insist on
98 *
99 * MRose@UCI
100 *
101 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
102 * this.
103 *
104 * -----
105 *
106 * We should not allow addresses like
107 *
108 * @UCI:MRose@UCI-750a
109 *
110 * but should insist on
111 *
112 * Marshall Rose <@UCI:MRose@UCI-750a>
113 *
114 * Unfortunately, a lot of mailers stupidly do this.
115 *
116 */
117
118 #define QUOTE '\\'
119
120 #define LX_END 0
121 #define LX_ERR 1
122 #define LX_ATOM 2
123 #define LX_QSTR 3
124 #define LX_DLIT 4
125 #define LX_SEMI 5
126 #define LX_COMA 6
127 #define LX_LBRK 7
128 #define LX_RBRK 8
129 #define LX_COLN 9
130 #define LX_DOT 10
131 #define LX_AT 11
132
133 struct specials {
134 char lx_chr;
135 int lx_val;
136 };
137
138 static struct specials special[] = {
139 { ';', LX_SEMI },
140 { ',', LX_COMA },
141 { '<', LX_LBRK },
142 { '>', LX_RBRK },
143 { ':', LX_COLN },
144 { '.', LX_DOT },
145 { '@', LX_AT },
146 { '(', LX_ERR },
147 { ')', LX_ERR },
148 { QUOTE, LX_ERR },
149 { '"', LX_ERR },
150 { '[', LX_ERR },
151 { ']', LX_ERR },
152 { 0, 0 }
153 };
154
155 static int glevel = 0;
156 static int ingrp = 0;
157 static int last_lex = LX_END;
158
159 static char *dp = NULL;
160 static char *cp = NULL;
161 static char *ap = NULL;
162 static char *pers = NULL;
163 static char *mbox = NULL;
164 static char *host = NULL;
165 static char *path = NULL;
166 static char *grp = NULL;
167 static char *note = NULL;
168 static char err[BUFSIZ];
169 static char adr[BUFSIZ];
170
171 static struct adrx adrxs2;
172
173
174 /* eai = Email Address Internationalization */
175 struct adrx *
176 getadrx (const char *addrs, int eai)
177 {
178 char *bp;
179 struct adrx *adrxp = &adrxs2;
180
181 mh_xfree(pers);
182 mh_xfree(mbox);
183 mh_xfree(host);
184 mh_xfree(path);
185 mh_xfree(grp);
186 mh_xfree(note);
187 pers = mbox = host = path = grp = note = NULL;
188 err[0] = 0;
189
190 if (dp == NULL) {
191 dp = cp = strdup (addrs ? addrs : "");
192 glevel = 0;
193 }
194 else
195 if (cp == NULL) {
196 free (dp);
197 dp = NULL;
198 return NULL;
199 }
200
201 switch (parse_address ()) {
202 case DONE:
203 free (dp);
204 dp = cp = NULL;
205 return NULL;
206
207 case OK:
208 switch (last_lex) {
209 case LX_COMA:
210 case LX_END:
211 break;
212
213 default: /* catch trailing comments */
214 bp = cp;
215 my_lex (adr);
216 cp = bp;
217 break;
218 }
219 break;
220
221 default:
222 break;
223 }
224
225 if (! eai) {
226 /*
227 * Reject the address if key fields contain 8bit characters
228 */
229
230 if (contains8bit(mbox, NULL) || contains8bit(host, NULL) ||
231 contains8bit(path, NULL) || contains8bit(grp, NULL)) {
232 strcpy(err, "Address contains 8-bit characters");
233 }
234 }
235
236 if (err[0])
237 for (;;) {
238 switch (last_lex) {
239 case LX_COMA:
240 case LX_END:
241 break;
242
243 default:
244 my_lex (adr);
245 continue;
246 }
247 break;
248 }
249 while (isspace ((unsigned char) *ap))
250 ap++;
251 if (cp)
252 snprintf(adr, sizeof adr, "%.*s", (int)(cp - ap), ap);
253 else
254 strcpy (adr, ap);
255 bp = adr + strlen (adr) - 1;
256 if (*bp == ',' || *bp == ';' || *bp == '\n')
257 *bp = 0;
258
259 adrxp->text = adr;
260 adrxp->pers = pers;
261 adrxp->mbox = mbox;
262 adrxp->host = host;
263 adrxp->path = path;
264 adrxp->grp = grp;
265 adrxp->ingrp = ingrp;
266 adrxp->note = note;
267 adrxp->err = err[0] ? err : NULL;
268
269 return adrxp;
270 }
271
272
273 static int
274 parse_address (void)
275 {
276 char buffer[BUFSIZ];
277
278 again: ;
279 ap = cp;
280 switch (my_lex (buffer)) {
281 case LX_ATOM:
282 case LX_QSTR:
283 pers = strdup (buffer);
284 break;
285
286 case LX_SEMI:
287 if (glevel-- <= 0) {
288 strcpy (err, "extraneous semi-colon");
289 return NOTOK;
290 }
291 case LX_COMA:
292 mh_xfree(note);
293 note = NULL;
294 goto again;
295
296 case LX_END:
297 return DONE;
298
299 case LX_LBRK: /* sigh (2) */
300 goto get_addr;
301
302 case LX_AT: /* sigh (3) */
303 cp = ap;
304 if (route_addr (buffer) == NOTOK)
305 return NOTOK;
306 return OK; /* why be choosy? */
307
308 default:
309 snprintf(err, sizeof err, "illegal address construct (%s)", buffer);
310 return NOTOK;
311 }
312
313 switch (my_lex (buffer)) {
314 case LX_ATOM:
315 case LX_QSTR:
316 pers = add (buffer, add (" ", pers));
317 more_phrase: ; /* sigh (1) */
318 if (phrase (buffer) == NOTOK)
319 return NOTOK;
320
321 switch (last_lex) {
322 case LX_LBRK:
323 get_addr: ;
324 if (route_addr (buffer) == NOTOK)
325 return NOTOK;
326 if (last_lex == LX_RBRK)
327 return OK;
328 snprintf(err, sizeof err, "missing right-bracket (%s)", buffer);
329 return NOTOK;
330
331 case LX_COLN:
332 get_group: ;
333 if (glevel++ > 0) {
334 snprintf(err, sizeof err, "nested groups not allowed (%s)", pers);
335 return NOTOK;
336 }
337 grp = add (": ", pers);
338 pers = NULL;
339 {
340 char *pp = cp;
341
342 for (;;)
343 switch (my_lex (buffer)) {
344 case LX_SEMI:
345 case LX_END: /* tsk, tsk */
346 glevel--;
347 return OK;
348
349 case LX_COMA:
350 continue;
351
352 default:
353 cp = pp;
354 return parse_address ();
355 }
356 }
357
358 case LX_DOT: /* sigh (1) */
359 pers = add (".", pers);
360 goto more_phrase;
361
362 default:
363 snprintf(err, sizeof err, "no mailbox in address, only a phrase (%s%s)",
364 pers, buffer);
365 return NOTOK;
366 }
367
368 case LX_LBRK:
369 goto get_addr;
370
371 case LX_COLN:
372 goto get_group;
373
374 case LX_DOT:
375 mbox = add (buffer, pers);
376 pers = NULL;
377 if (route_addr (buffer) == NOTOK)
378 return NOTOK;
379 goto check_end;
380
381 case LX_AT:
382 ingrp = glevel;
383 mbox = pers;
384 pers = NULL;
385 if (domain (buffer) == NOTOK)
386 return NOTOK;
387 check_end: ;
388 switch (last_lex) {
389 case LX_SEMI:
390 if (glevel-- <= 0) {
391 strcpy (err, "extraneous semi-colon");
392 return NOTOK;
393 }
394 case LX_COMA:
395 case LX_END:
396 return OK;
397
398 default:
399 snprintf(err, sizeof err, "junk after local@domain (%s)", buffer);
400 return NOTOK;
401 }
402
403 case LX_SEMI: /* no host */
404 case LX_COMA:
405 case LX_END:
406 ingrp = glevel;
407 if (last_lex == LX_SEMI && glevel-- <= 0) {
408 strcpy (err, "extraneous semi-colon");
409 return NOTOK;
410 }
411 mbox = pers;
412 pers = NULL;
413 return OK;
414
415 default:
416 snprintf(err, sizeof err, "missing mailbox (%s)", buffer);
417 return NOTOK;
418 }
419 }
420
421
422 static int
423 phrase (char *buffer)
424 {
425 for (;;)
426 switch (my_lex (buffer)) {
427 case LX_ATOM:
428 case LX_QSTR:
429 pers = add (buffer, add (" ", pers));
430 continue;
431
432 default:
433 return OK;
434 }
435 }
436
437
438 static int
439 route_addr (char *buffer)
440 {
441 char *pp = cp;
442
443 if (my_lex (buffer) == LX_AT) {
444 if (route (buffer) == NOTOK)
445 return NOTOK;
446 }
447 else
448 cp = pp;
449
450 if (local_part (buffer) == NOTOK)
451 return NOTOK;
452
453 switch (last_lex) {
454 case LX_AT:
455 return domain (buffer);
456
457 case LX_SEMI: /* if in group */
458 case LX_RBRK: /* no host */
459 case LX_COMA:
460 case LX_END:
461 return OK;
462
463 default:
464 snprintf(err, sizeof err, "no at-sign after local-part (%s)", buffer);
465 return NOTOK;
466 }
467 }
468
469
470 static int
471 local_part (char *buffer)
472 {
473 ingrp = glevel;
474
475 for (;;) {
476 switch (my_lex (buffer)) {
477 case LX_ATOM:
478 case LX_QSTR:
479 mbox = add (buffer, mbox);
480 break;
481
482 default:
483 snprintf(err, sizeof err, "no mailbox in local-part (%s)", buffer);
484 return NOTOK;
485 }
486
487 switch (my_lex (buffer)) {
488 case LX_DOT:
489 mbox = add (buffer, mbox);
490 continue;
491
492 default:
493 return OK;
494 }
495 }
496 }
497
498
499 static int
500 domain (char *buffer)
501 {
502 for (;;) {
503 switch (my_lex (buffer)) {
504 case LX_ATOM:
505 case LX_DLIT:
506 host = add (buffer, host);
507 break;
508
509 default:
510 snprintf(err, sizeof err, "no sub-domain in domain-part of address (%s)", buffer);
511 return NOTOK;
512 }
513
514 switch (my_lex (buffer)) {
515 case LX_DOT:
516 host = add (buffer, host);
517 continue;
518
519 case LX_AT: /* sigh (0) */
520 mbox = add (host, add ("%", mbox));
521 free (host);
522 host = NULL;
523 continue;
524
525 default:
526 return OK;
527 }
528 }
529 }
530
531
532 static int
533 route (char *buffer)
534 {
535 path = strdup ("@");
536
537 for (;;) {
538 switch (my_lex (buffer)) {
539 case LX_ATOM:
540 case LX_DLIT:
541 path = add (buffer, path);
542 break;
543
544 default:
545 snprintf(err, sizeof err, "no sub-domain in domain-part of address (%s)", buffer);
546 return NOTOK;
547 }
548 switch (my_lex (buffer)) {
549 case LX_COMA:
550 path = add (buffer, path);
551 for (;;) {
552 switch (my_lex (buffer)) {
553 case LX_COMA:
554 continue;
555
556 case LX_AT:
557 path = add (buffer, path);
558 break;
559
560 default:
561 snprintf(err, sizeof err, "no at-sign found for next domain in route (%s)",
562 buffer);
563 }
564 break;
565 }
566 continue;
567
568 case LX_AT: /* XXX */
569 case LX_DOT:
570 path = add (buffer, path);
571 continue;
572
573 case LX_COLN:
574 path = add (buffer, path);
575 return OK;
576
577 default:
578 snprintf(err, sizeof err, "no colon found to terminate route (%s)", buffer);
579 return NOTOK;
580 }
581 }
582 }
583
584
585 static int
586 my_lex (char *buffer)
587 {
588 /* buffer should be at least BUFSIZ bytes long */
589 int i, gotat = 0;
590 char c, *bp;
591
592 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
593 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
594
595 bp = buffer;
596 *bp = 0;
597 if (!cp)
598 return (last_lex = LX_END);
599
600 gotat = isat (cp);
601 c = *cp++;
602 while (isspace ((unsigned char) c))
603 c = *cp++;
604 if (c == 0) {
605 cp = NULL;
606 return (last_lex = LX_END);
607 }
608
609 if (c == '(') {
610 ADDCHR(c);
611 for (i = 0;;)
612 switch (c = *cp++) {
613 case 0:
614 cp = NULL;
615 return (last_lex = LX_ERR);
616 case QUOTE:
617 ADDCHR(c);
618 if ((c = *cp++) == 0) {
619 cp = NULL;
620 return (last_lex = LX_ERR);
621 }
622 ADDCHR(c);
623 continue;
624 case '(':
625 i++;
626 default:
627 ADDCHR(c);
628 continue;
629 case ')':
630 ADDCHR(c);
631 if (--i < 0) {
632 *bp = 0;
633 note = note ? add (buffer, add (" ", note))
634 : strdup (buffer);
635 return my_lex (buffer);
636 }
637 }
638 }
639
640 if (c == '"') {
641 ADDCHR(c);
642 for (;;)
643 switch (c = *cp++) {
644 case 0:
645 cp = NULL;
646 return (last_lex = LX_ERR);
647 case QUOTE:
648 ADDCHR(c);
649 if ((c = *cp++) == 0) {
650 cp = NULL;
651 return (last_lex = LX_ERR);
652 }
653 default:
654 ADDCHR(c);
655 continue;
656 case '"':
657 ADDCHR(c);
658 *bp = 0;
659 return (last_lex = LX_QSTR);
660 }
661 }
662
663 if (c == '[') {
664 ADDCHR(c);
665 for (;;)
666 switch (c = *cp++) {
667 case 0:
668 cp = NULL;
669 return (last_lex = LX_ERR);
670 case QUOTE:
671 ADDCHR(c);
672 if ((c = *cp++) == 0) {
673 cp = NULL;
674 return (last_lex = LX_ERR);
675 }
676 default:
677 ADDCHR(c);
678 continue;
679 case ']':
680 ADDCHR(c);
681 *bp = 0;
682 return (last_lex = LX_DLIT);
683 }
684 }
685
686 ADDCHR(c);
687 *bp = 0;
688 for (i = 0; special[i].lx_chr != 0; i++)
689 if (c == special[i].lx_chr)
690 return (last_lex = special[i].lx_val);
691
692 if (iscntrl ((unsigned char) c))
693 return (last_lex = LX_ERR);
694
695 for (;;) {
696 if ((c = *cp++) == 0)
697 break;
698 for (i = 0; special[i].lx_chr != 0; i++)
699 if (c == special[i].lx_chr)
700 goto got_atom;
701 if (iscntrl ((unsigned char) c) || isspace ((unsigned char) c))
702 break;
703 ADDCHR(c);
704 }
705 got_atom: ;
706 if (c == 0)
707 cp = NULL;
708 else
709 cp--;
710 *bp = 0;
711 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
712 ? LX_ATOM : LX_AT;
713 return last_lex;
714
715 my_lex_buffull:
716 /* Out of buffer space. *bp is the last byte in the buffer */
717 *bp = 0;
718 return (last_lex = LX_ERR);
719 }
720
721
722 char *
723 legal_person (const char *p)
724 {
725 int i;
726 const char *cp;
727 static char buffer[BUFSIZ];
728
729 if (*p == '"')
730 return (char *) p;
731 for (cp = p; *cp; cp++)
732 for (i = 0; special[i].lx_chr; i++)
733 if (*cp == special[i].lx_chr) {
734 snprintf(buffer, sizeof buffer, "\"%s\"", p);
735 return buffer;
736 }
737
738 return (char *) p;
739 }
740
741
742 int
743 mfgets (FILE *in, char **bp)
744 {
745 int i;
746 char *cp, *dp, *ep;
747 static int len = 0;
748 static char *pp = NULL;
749
750 if (pp == NULL)
751 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
752
753 for (ep = (cp = pp) + len - 2;;) {
754 switch (i = getc (in)) {
755 case EOF:
756 eol: ;
757 if (cp != pp) {
758 *cp = 0;
759 *bp = pp;
760 return OK;
761 }
762 eoh: ;
763 *bp = NULL;
764 free (pp);
765 pp = NULL;
766 return DONE;
767
768 case 0:
769 continue;
770
771 case '\n':
772 if (cp == pp) /* end of headers, gobble it */
773 goto eoh;
774 switch (i = getc (in)) {
775 default: /* end of line */
776 case '\n': /* end of headers, save for next call */
777 ungetc (i, in);
778 goto eol;
779
780 case ' ': /* continue headers */
781 case '\t':
782 *cp++ = '\n';
783 break;
784 } /* fall into default case */
785
786 default:
787 *cp++ = i;
788 break;
789 }
790 if (cp >= ep) {
791 dp = mh_xrealloc (pp, (size_t) (len += BUFSIZ));
792 cp += dp - pp, ep = (pp = cp) + len - 2;
793 }
794 }
795 }