]> diplodocus.org Git - nmh/blob - sbr/mf.c
Assume POSIX ctype.h; don't vet tolower()'s parameter.
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
8 */
9
10 #include <h/mf.h>
11 #include <h/utils.h>
12
13 /*
14 * static prototypes
15 */
16 static int isat (const char *);
17 static int parse_address (void);
18 static int phrase (char *);
19 static int route_addr (char *);
20 static int local_part (char *);
21 static int domain (char *);
22 static int route (char *);
23 static int my_lex (char *);
24
25
26 int
27 isfrom(const char *string)
28 {
29 return (strncmp (string, "From ", 5) == 0
30 || strncmp (string, ">From ", 6) == 0);
31 }
32
33
34 int
35 lequal (const char *a, const char *b)
36 {
37 char c1, c2;
38
39 for (; *a; a++, b++) {
40 if (*b == 0)
41 return FALSE;
42 c1 = islower ((unsigned char) *a) ?
43 toupper ((unsigned char) *a) : *a;
44 c2 = islower ((unsigned char) *b) ?
45 toupper ((unsigned char) *b) : *b;
46 if (c1 != c2)
47 return FALSE;
48 }
49
50 return (*b == 0);
51 }
52
53
54 static int
55 isat (const char *p)
56 {
57 return (strncmp (p, " AT ", 4)
58 && strncmp (p, " At ", 4)
59 && strncmp (p, " aT ", 4)
60 && strncmp (p, " at ", 4) ? FALSE : TRUE);
61 }
62
63
64 /*
65 *
66 * getadrx() implements a partial 822-style address parser. The parser
67 * is neither complete nor correct. It does however recognize nearly all
68 * of the 822 address syntax. In addition it handles the majority of the
69 * 733 syntax as well. Most problems arise from trying to accommodate both.
70 *
71 * In terms of 822, the route-specification in
72 *
73 * "<" [route] local-part "@" domain ">"
74 *
75 * is parsed and returned unchanged. Multiple at-signs are compressed
76 * via source-routing. Recursive groups are not allowed as per the
77 * standard.
78 *
79 * In terms of 733, " at " is recognized as equivalent to "@".
80 *
81 * In terms of both the parser will not complain about missing hosts.
82 *
83 * -----
84 *
85 * We should not allow addresses like
86 *
87 * Marshall T. Rose <MRose@UCI>
88 *
89 * but should insist on
90 *
91 * "Marshall T. Rose" <MRose@UCI>
92 *
93 * Unfortunately, a lot of mailers stupidly let people get away with this.
94 *
95 * -----
96 *
97 * We should not allow addresses like
98 *
99 * <MRose@UCI>
100 *
101 * but should insist on
102 *
103 * MRose@UCI
104 *
105 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
106 * this.
107 *
108 * -----
109 *
110 * We should not allow addresses like
111 *
112 * @UCI:MRose@UCI-750a
113 *
114 * but should insist on
115 *
116 * Marshall Rose <@UCI:MRose@UCI-750a>
117 *
118 * Unfortunately, a lot of mailers stupidly do this.
119 *
120 */
121
122 #define QUOTE '\\'
123
124 #define LX_END 0
125 #define LX_ERR 1
126 #define LX_ATOM 2
127 #define LX_QSTR 3
128 #define LX_DLIT 4
129 #define LX_SEMI 5
130 #define LX_COMA 6
131 #define LX_LBRK 7
132 #define LX_RBRK 8
133 #define LX_COLN 9
134 #define LX_DOT 10
135 #define LX_AT 11
136
137 struct specials {
138 char lx_chr;
139 int lx_val;
140 };
141
142 static struct specials special[] = {
143 { ';', LX_SEMI },
144 { ',', LX_COMA },
145 { '<', LX_LBRK },
146 { '>', LX_RBRK },
147 { ':', LX_COLN },
148 { '.', LX_DOT },
149 { '@', LX_AT },
150 { '(', LX_ERR },
151 { ')', LX_ERR },
152 { QUOTE, LX_ERR },
153 { '"', LX_ERR },
154 { '[', LX_ERR },
155 { ']', LX_ERR },
156 { 0, 0 }
157 };
158
159 static int glevel = 0;
160 static int ingrp = 0;
161 static int last_lex = LX_END;
162
163 static char *dp = NULL;
164 static char *cp = NULL;
165 static char *ap = NULL;
166 static char *pers = NULL;
167 static char *mbox = NULL;
168 static char *host = NULL;
169 static char *path = NULL;
170 static char *grp = NULL;
171 static char *note = NULL;
172 static char err[BUFSIZ];
173 static char adr[BUFSIZ];
174
175 static struct adrx adrxs2;
176
177
178 /* eai = Email Address Internationalization */
179 struct adrx *
180 getadrx (const char *addrs, int eai)
181 {
182 char *bp;
183 struct adrx *adrxp = &adrxs2;
184
185 if (pers)
186 free (pers);
187 if (mbox)
188 free (mbox);
189 if (host)
190 free (host);
191 if (path)
192 free (path);
193 if (grp)
194 free (grp);
195 if (note)
196 free (note);
197 pers = mbox = host = path = grp = note = NULL;
198 err[0] = 0;
199
200 if (dp == NULL) {
201 dp = cp = strdup (addrs ? addrs : "");
202 glevel = 0;
203 }
204 else
205 if (cp == NULL) {
206 free (dp);
207 dp = NULL;
208 return NULL;
209 }
210
211 switch (parse_address ()) {
212 case DONE:
213 free (dp);
214 dp = cp = NULL;
215 return NULL;
216
217 case OK:
218 switch (last_lex) {
219 case LX_COMA:
220 case LX_END:
221 break;
222
223 default: /* catch trailing comments */
224 bp = cp;
225 my_lex (adr);
226 cp = bp;
227 break;
228 }
229 break;
230
231 default:
232 break;
233 }
234
235 if (! eai) {
236 /*
237 * Reject the address if key fields contain 8bit characters
238 */
239
240 if (contains8bit(mbox, NULL) || contains8bit(host, NULL) ||
241 contains8bit(path, NULL) || contains8bit(grp, NULL)) {
242 strcpy(err, "Address contains 8-bit characters");
243 }
244 }
245
246 if (err[0])
247 for (;;) {
248 switch (last_lex) {
249 case LX_COMA:
250 case LX_END:
251 break;
252
253 default:
254 my_lex (adr);
255 continue;
256 }
257 break;
258 }
259 while (isspace ((unsigned char) *ap))
260 ap++;
261 if (cp)
262 snprintf(adr, sizeof adr, "%.*s", (int)(cp - ap), ap);
263 else
264 strcpy (adr, ap);
265 bp = adr + strlen (adr) - 1;
266 if (*bp == ',' || *bp == ';' || *bp == '\n')
267 *bp = 0;
268
269 adrxp->text = adr;
270 adrxp->pers = pers;
271 adrxp->mbox = mbox;
272 adrxp->host = host;
273 adrxp->path = path;
274 adrxp->grp = grp;
275 adrxp->ingrp = ingrp;
276 adrxp->note = note;
277 adrxp->err = err[0] ? err : NULL;
278
279 return adrxp;
280 }
281
282
283 static int
284 parse_address (void)
285 {
286 char buffer[BUFSIZ];
287
288 again: ;
289 ap = cp;
290 switch (my_lex (buffer)) {
291 case LX_ATOM:
292 case LX_QSTR:
293 pers = strdup (buffer);
294 break;
295
296 case LX_SEMI:
297 if (glevel-- <= 0) {
298 strcpy (err, "extraneous semi-colon");
299 return NOTOK;
300 }
301 case LX_COMA:
302 if (note) {
303 free (note);
304 note = NULL;
305 }
306 goto again;
307
308 case LX_END:
309 return DONE;
310
311 case LX_LBRK: /* sigh (2) */
312 goto get_addr;
313
314 case LX_AT: /* sigh (3) */
315 cp = ap;
316 if (route_addr (buffer) == NOTOK)
317 return NOTOK;
318 return OK; /* why be choosy? */
319
320 default:
321 snprintf(err, sizeof err, "illegal address construct (%s)", buffer);
322 return NOTOK;
323 }
324
325 switch (my_lex (buffer)) {
326 case LX_ATOM:
327 case LX_QSTR:
328 pers = add (buffer, add (" ", pers));
329 more_phrase: ; /* sigh (1) */
330 if (phrase (buffer) == NOTOK)
331 return NOTOK;
332
333 switch (last_lex) {
334 case LX_LBRK:
335 get_addr: ;
336 if (route_addr (buffer) == NOTOK)
337 return NOTOK;
338 if (last_lex == LX_RBRK)
339 return OK;
340 snprintf(err, sizeof err, "missing right-bracket (%s)", buffer);
341 return NOTOK;
342
343 case LX_COLN:
344 get_group: ;
345 if (glevel++ > 0) {
346 snprintf(err, sizeof err, "nested groups not allowed (%s)", pers);
347 return NOTOK;
348 }
349 grp = add (": ", pers);
350 pers = NULL;
351 {
352 char *pp = cp;
353
354 for (;;)
355 switch (my_lex (buffer)) {
356 case LX_SEMI:
357 case LX_END: /* tsk, tsk */
358 glevel--;
359 return OK;
360
361 case LX_COMA:
362 continue;
363
364 default:
365 cp = pp;
366 return parse_address ();
367 }
368 }
369
370 case LX_DOT: /* sigh (1) */
371 pers = add (".", pers);
372 goto more_phrase;
373
374 default:
375 snprintf(err, sizeof err, "no mailbox in address, only a phrase (%s%s)",
376 pers, buffer);
377 return NOTOK;
378 }
379
380 case LX_LBRK:
381 goto get_addr;
382
383 case LX_COLN:
384 goto get_group;
385
386 case LX_DOT:
387 mbox = add (buffer, pers);
388 pers = NULL;
389 if (route_addr (buffer) == NOTOK)
390 return NOTOK;
391 goto check_end;
392
393 case LX_AT:
394 ingrp = glevel;
395 mbox = pers;
396 pers = NULL;
397 if (domain (buffer) == NOTOK)
398 return NOTOK;
399 check_end: ;
400 switch (last_lex) {
401 case LX_SEMI:
402 if (glevel-- <= 0) {
403 strcpy (err, "extraneous semi-colon");
404 return NOTOK;
405 }
406 case LX_COMA:
407 case LX_END:
408 return OK;
409
410 default:
411 snprintf(err, sizeof err, "junk after local@domain (%s)", buffer);
412 return NOTOK;
413 }
414
415 case LX_SEMI: /* no host */
416 case LX_COMA:
417 case LX_END:
418 ingrp = glevel;
419 if (last_lex == LX_SEMI && glevel-- <= 0) {
420 strcpy (err, "extraneous semi-colon");
421 return NOTOK;
422 }
423 mbox = pers;
424 pers = NULL;
425 return OK;
426
427 default:
428 snprintf(err, sizeof err, "missing mailbox (%s)", buffer);
429 return NOTOK;
430 }
431 }
432
433
434 static int
435 phrase (char *buffer)
436 {
437 for (;;)
438 switch (my_lex (buffer)) {
439 case LX_ATOM:
440 case LX_QSTR:
441 pers = add (buffer, add (" ", pers));
442 continue;
443
444 default:
445 return OK;
446 }
447 }
448
449
450 static int
451 route_addr (char *buffer)
452 {
453 char *pp = cp;
454
455 if (my_lex (buffer) == LX_AT) {
456 if (route (buffer) == NOTOK)
457 return NOTOK;
458 }
459 else
460 cp = pp;
461
462 if (local_part (buffer) == NOTOK)
463 return NOTOK;
464
465 switch (last_lex) {
466 case LX_AT:
467 return domain (buffer);
468
469 case LX_SEMI: /* if in group */
470 case LX_RBRK: /* no host */
471 case LX_COMA:
472 case LX_END:
473 return OK;
474
475 default:
476 snprintf(err, sizeof err, "no at-sign after local-part (%s)", buffer);
477 return NOTOK;
478 }
479 }
480
481
482 static int
483 local_part (char *buffer)
484 {
485 ingrp = glevel;
486
487 for (;;) {
488 switch (my_lex (buffer)) {
489 case LX_ATOM:
490 case LX_QSTR:
491 mbox = add (buffer, mbox);
492 break;
493
494 default:
495 snprintf(err, sizeof err, "no mailbox in local-part (%s)", buffer);
496 return NOTOK;
497 }
498
499 switch (my_lex (buffer)) {
500 case LX_DOT:
501 mbox = add (buffer, mbox);
502 continue;
503
504 default:
505 return OK;
506 }
507 }
508 }
509
510
511 static int
512 domain (char *buffer)
513 {
514 for (;;) {
515 switch (my_lex (buffer)) {
516 case LX_ATOM:
517 case LX_DLIT:
518 host = add (buffer, host);
519 break;
520
521 default:
522 snprintf(err, sizeof err, "no sub-domain in domain-part of address (%s)", buffer);
523 return NOTOK;
524 }
525
526 switch (my_lex (buffer)) {
527 case LX_DOT:
528 host = add (buffer, host);
529 continue;
530
531 case LX_AT: /* sigh (0) */
532 mbox = add (host, add ("%", mbox));
533 free (host);
534 host = NULL;
535 continue;
536
537 default:
538 return OK;
539 }
540 }
541 }
542
543
544 static int
545 route (char *buffer)
546 {
547 path = strdup ("@");
548
549 for (;;) {
550 switch (my_lex (buffer)) {
551 case LX_ATOM:
552 case LX_DLIT:
553 path = add (buffer, path);
554 break;
555
556 default:
557 snprintf(err, sizeof err, "no sub-domain in domain-part of address (%s)", buffer);
558 return NOTOK;
559 }
560 switch (my_lex (buffer)) {
561 case LX_COMA:
562 path = add (buffer, path);
563 for (;;) {
564 switch (my_lex (buffer)) {
565 case LX_COMA:
566 continue;
567
568 case LX_AT:
569 path = add (buffer, path);
570 break;
571
572 default:
573 snprintf(err, sizeof err, "no at-sign found for next domain in route (%s)",
574 buffer);
575 }
576 break;
577 }
578 continue;
579
580 case LX_AT: /* XXX */
581 case LX_DOT:
582 path = add (buffer, path);
583 continue;
584
585 case LX_COLN:
586 path = add (buffer, path);
587 return OK;
588
589 default:
590 snprintf(err, sizeof err, "no colon found to terminate route (%s)", buffer);
591 return NOTOK;
592 }
593 }
594 }
595
596
597 static int
598 my_lex (char *buffer)
599 {
600 /* buffer should be at least BUFSIZ bytes long */
601 int i, gotat = 0;
602 char c, *bp;
603
604 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
605 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
606
607 bp = buffer;
608 *bp = 0;
609 if (!cp)
610 return (last_lex = LX_END);
611
612 gotat = isat (cp);
613 c = *cp++;
614 while (isspace ((unsigned char) c))
615 c = *cp++;
616 if (c == 0) {
617 cp = NULL;
618 return (last_lex = LX_END);
619 }
620
621 if (c == '(') {
622 ADDCHR(c);
623 for (i = 0;;)
624 switch (c = *cp++) {
625 case 0:
626 cp = NULL;
627 return (last_lex = LX_ERR);
628 case QUOTE:
629 ADDCHR(c);
630 if ((c = *cp++) == 0) {
631 cp = NULL;
632 return (last_lex = LX_ERR);
633 }
634 ADDCHR(c);
635 continue;
636 case '(':
637 i++;
638 default:
639 ADDCHR(c);
640 continue;
641 case ')':
642 ADDCHR(c);
643 if (--i < 0) {
644 *bp = 0;
645 note = note ? add (buffer, add (" ", note))
646 : strdup (buffer);
647 return my_lex (buffer);
648 }
649 }
650 }
651
652 if (c == '"') {
653 ADDCHR(c);
654 for (;;)
655 switch (c = *cp++) {
656 case 0:
657 cp = NULL;
658 return (last_lex = LX_ERR);
659 case QUOTE:
660 ADDCHR(c);
661 if ((c = *cp++) == 0) {
662 cp = NULL;
663 return (last_lex = LX_ERR);
664 }
665 default:
666 ADDCHR(c);
667 continue;
668 case '"':
669 ADDCHR(c);
670 *bp = 0;
671 return (last_lex = LX_QSTR);
672 }
673 }
674
675 if (c == '[') {
676 ADDCHR(c);
677 for (;;)
678 switch (c = *cp++) {
679 case 0:
680 cp = NULL;
681 return (last_lex = LX_ERR);
682 case QUOTE:
683 ADDCHR(c);
684 if ((c = *cp++) == 0) {
685 cp = NULL;
686 return (last_lex = LX_ERR);
687 }
688 default:
689 ADDCHR(c);
690 continue;
691 case ']':
692 ADDCHR(c);
693 *bp = 0;
694 return (last_lex = LX_DLIT);
695 }
696 }
697
698 ADDCHR(c);
699 *bp = 0;
700 for (i = 0; special[i].lx_chr != 0; i++)
701 if (c == special[i].lx_chr)
702 return (last_lex = special[i].lx_val);
703
704 if (iscntrl ((unsigned char) c))
705 return (last_lex = LX_ERR);
706
707 for (;;) {
708 if ((c = *cp++) == 0)
709 break;
710 for (i = 0; special[i].lx_chr != 0; i++)
711 if (c == special[i].lx_chr)
712 goto got_atom;
713 if (iscntrl ((unsigned char) c) || isspace ((unsigned char) c))
714 break;
715 ADDCHR(c);
716 }
717 got_atom: ;
718 if (c == 0)
719 cp = NULL;
720 else
721 cp--;
722 *bp = 0;
723 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
724 ? LX_ATOM : LX_AT;
725 return last_lex;
726
727 my_lex_buffull:
728 /* Out of buffer space. *bp is the last byte in the buffer */
729 *bp = 0;
730 return (last_lex = LX_ERR);
731 }
732
733
734 char *
735 legal_person (const char *p)
736 {
737 int i;
738 const char *cp;
739 static char buffer[BUFSIZ];
740
741 if (*p == '"')
742 return (char *) p;
743 for (cp = p; *cp; cp++)
744 for (i = 0; special[i].lx_chr; i++)
745 if (*cp == special[i].lx_chr) {
746 snprintf(buffer, sizeof buffer, "\"%s\"", p);
747 return buffer;
748 }
749
750 return (char *) p;
751 }
752
753
754 int
755 mfgets (FILE *in, char **bp)
756 {
757 int i;
758 char *cp, *dp, *ep;
759 static int len = 0;
760 static char *pp = NULL;
761
762 if (pp == NULL)
763 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
764
765 for (ep = (cp = pp) + len - 2;;) {
766 switch (i = getc (in)) {
767 case EOF:
768 eol: ;
769 if (cp != pp) {
770 *cp = 0;
771 *bp = pp;
772 return OK;
773 }
774 eoh: ;
775 *bp = NULL;
776 free (pp);
777 pp = NULL;
778 return DONE;
779
780 case 0:
781 continue;
782
783 case '\n':
784 if (cp == pp) /* end of headers, gobble it */
785 goto eoh;
786 switch (i = getc (in)) {
787 default: /* end of line */
788 case '\n': /* end of headers, save for next call */
789 ungetc (i, in);
790 goto eol;
791
792 case ' ': /* continue headers */
793 case '\t':
794 *cp++ = '\n';
795 break;
796 } /* fall into default case */
797
798 default:
799 *cp++ = i;
800 break;
801 }
802 if (cp >= ep) {
803 dp = mh_xrealloc (pp, (size_t) (len += BUFSIZ));
804 cp += dp - pp, ep = (pp = cp) + len - 2;
805 }
806 }
807 }