]> diplodocus.org Git - nmh/blob - sbr/mf.c
Assume POSIX ctype.h; don't vet toupper()'s parameter.
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
8 */
9
10 #include <h/mf.h>
11 #include <h/utils.h>
12
13 /*
14 * static prototypes
15 */
16 static int isat (const char *);
17 static int parse_address (void);
18 static int phrase (char *);
19 static int route_addr (char *);
20 static int local_part (char *);
21 static int domain (char *);
22 static int route (char *);
23 static int my_lex (char *);
24
25
26 int
27 isfrom(const char *string)
28 {
29 return (strncmp (string, "From ", 5) == 0
30 || strncmp (string, ">From ", 6) == 0);
31 }
32
33
34 int
35 lequal (const char *a, const char *b)
36 {
37 char c1, c2;
38
39 for (; *a; a++, b++) {
40 if (*b == 0)
41 return FALSE;
42 c1 = toupper((unsigned char)*a);
43 c2 = toupper((unsigned char)*b);
44 if (c1 != c2)
45 return FALSE;
46 }
47
48 return (*b == 0);
49 }
50
51
52 static int
53 isat (const char *p)
54 {
55 return (strncmp (p, " AT ", 4)
56 && strncmp (p, " At ", 4)
57 && strncmp (p, " aT ", 4)
58 && strncmp (p, " at ", 4) ? FALSE : TRUE);
59 }
60
61
62 /*
63 *
64 * getadrx() implements a partial 822-style address parser. The parser
65 * is neither complete nor correct. It does however recognize nearly all
66 * of the 822 address syntax. In addition it handles the majority of the
67 * 733 syntax as well. Most problems arise from trying to accommodate both.
68 *
69 * In terms of 822, the route-specification in
70 *
71 * "<" [route] local-part "@" domain ">"
72 *
73 * is parsed and returned unchanged. Multiple at-signs are compressed
74 * via source-routing. Recursive groups are not allowed as per the
75 * standard.
76 *
77 * In terms of 733, " at " is recognized as equivalent to "@".
78 *
79 * In terms of both the parser will not complain about missing hosts.
80 *
81 * -----
82 *
83 * We should not allow addresses like
84 *
85 * Marshall T. Rose <MRose@UCI>
86 *
87 * but should insist on
88 *
89 * "Marshall T. Rose" <MRose@UCI>
90 *
91 * Unfortunately, a lot of mailers stupidly let people get away with this.
92 *
93 * -----
94 *
95 * We should not allow addresses like
96 *
97 * <MRose@UCI>
98 *
99 * but should insist on
100 *
101 * MRose@UCI
102 *
103 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
104 * this.
105 *
106 * -----
107 *
108 * We should not allow addresses like
109 *
110 * @UCI:MRose@UCI-750a
111 *
112 * but should insist on
113 *
114 * Marshall Rose <@UCI:MRose@UCI-750a>
115 *
116 * Unfortunately, a lot of mailers stupidly do this.
117 *
118 */
119
120 #define QUOTE '\\'
121
122 #define LX_END 0
123 #define LX_ERR 1
124 #define LX_ATOM 2
125 #define LX_QSTR 3
126 #define LX_DLIT 4
127 #define LX_SEMI 5
128 #define LX_COMA 6
129 #define LX_LBRK 7
130 #define LX_RBRK 8
131 #define LX_COLN 9
132 #define LX_DOT 10
133 #define LX_AT 11
134
135 struct specials {
136 char lx_chr;
137 int lx_val;
138 };
139
140 static struct specials special[] = {
141 { ';', LX_SEMI },
142 { ',', LX_COMA },
143 { '<', LX_LBRK },
144 { '>', LX_RBRK },
145 { ':', LX_COLN },
146 { '.', LX_DOT },
147 { '@', LX_AT },
148 { '(', LX_ERR },
149 { ')', LX_ERR },
150 { QUOTE, LX_ERR },
151 { '"', LX_ERR },
152 { '[', LX_ERR },
153 { ']', LX_ERR },
154 { 0, 0 }
155 };
156
157 static int glevel = 0;
158 static int ingrp = 0;
159 static int last_lex = LX_END;
160
161 static char *dp = NULL;
162 static char *cp = NULL;
163 static char *ap = NULL;
164 static char *pers = NULL;
165 static char *mbox = NULL;
166 static char *host = NULL;
167 static char *path = NULL;
168 static char *grp = NULL;
169 static char *note = NULL;
170 static char err[BUFSIZ];
171 static char adr[BUFSIZ];
172
173 static struct adrx adrxs2;
174
175
176 /* eai = Email Address Internationalization */
177 struct adrx *
178 getadrx (const char *addrs, int eai)
179 {
180 char *bp;
181 struct adrx *adrxp = &adrxs2;
182
183 if (pers)
184 free (pers);
185 if (mbox)
186 free (mbox);
187 if (host)
188 free (host);
189 if (path)
190 free (path);
191 if (grp)
192 free (grp);
193 if (note)
194 free (note);
195 pers = mbox = host = path = grp = note = NULL;
196 err[0] = 0;
197
198 if (dp == NULL) {
199 dp = cp = strdup (addrs ? addrs : "");
200 glevel = 0;
201 }
202 else
203 if (cp == NULL) {
204 free (dp);
205 dp = NULL;
206 return NULL;
207 }
208
209 switch (parse_address ()) {
210 case DONE:
211 free (dp);
212 dp = cp = NULL;
213 return NULL;
214
215 case OK:
216 switch (last_lex) {
217 case LX_COMA:
218 case LX_END:
219 break;
220
221 default: /* catch trailing comments */
222 bp = cp;
223 my_lex (adr);
224 cp = bp;
225 break;
226 }
227 break;
228
229 default:
230 break;
231 }
232
233 if (! eai) {
234 /*
235 * Reject the address if key fields contain 8bit characters
236 */
237
238 if (contains8bit(mbox, NULL) || contains8bit(host, NULL) ||
239 contains8bit(path, NULL) || contains8bit(grp, NULL)) {
240 strcpy(err, "Address contains 8-bit characters");
241 }
242 }
243
244 if (err[0])
245 for (;;) {
246 switch (last_lex) {
247 case LX_COMA:
248 case LX_END:
249 break;
250
251 default:
252 my_lex (adr);
253 continue;
254 }
255 break;
256 }
257 while (isspace ((unsigned char) *ap))
258 ap++;
259 if (cp)
260 snprintf(adr, sizeof adr, "%.*s", (int)(cp - ap), ap);
261 else
262 strcpy (adr, ap);
263 bp = adr + strlen (adr) - 1;
264 if (*bp == ',' || *bp == ';' || *bp == '\n')
265 *bp = 0;
266
267 adrxp->text = adr;
268 adrxp->pers = pers;
269 adrxp->mbox = mbox;
270 adrxp->host = host;
271 adrxp->path = path;
272 adrxp->grp = grp;
273 adrxp->ingrp = ingrp;
274 adrxp->note = note;
275 adrxp->err = err[0] ? err : NULL;
276
277 return adrxp;
278 }
279
280
281 static int
282 parse_address (void)
283 {
284 char buffer[BUFSIZ];
285
286 again: ;
287 ap = cp;
288 switch (my_lex (buffer)) {
289 case LX_ATOM:
290 case LX_QSTR:
291 pers = strdup (buffer);
292 break;
293
294 case LX_SEMI:
295 if (glevel-- <= 0) {
296 strcpy (err, "extraneous semi-colon");
297 return NOTOK;
298 }
299 case LX_COMA:
300 if (note) {
301 free (note);
302 note = NULL;
303 }
304 goto again;
305
306 case LX_END:
307 return DONE;
308
309 case LX_LBRK: /* sigh (2) */
310 goto get_addr;
311
312 case LX_AT: /* sigh (3) */
313 cp = ap;
314 if (route_addr (buffer) == NOTOK)
315 return NOTOK;
316 return OK; /* why be choosy? */
317
318 default:
319 snprintf(err, sizeof err, "illegal address construct (%s)", buffer);
320 return NOTOK;
321 }
322
323 switch (my_lex (buffer)) {
324 case LX_ATOM:
325 case LX_QSTR:
326 pers = add (buffer, add (" ", pers));
327 more_phrase: ; /* sigh (1) */
328 if (phrase (buffer) == NOTOK)
329 return NOTOK;
330
331 switch (last_lex) {
332 case LX_LBRK:
333 get_addr: ;
334 if (route_addr (buffer) == NOTOK)
335 return NOTOK;
336 if (last_lex == LX_RBRK)
337 return OK;
338 snprintf(err, sizeof err, "missing right-bracket (%s)", buffer);
339 return NOTOK;
340
341 case LX_COLN:
342 get_group: ;
343 if (glevel++ > 0) {
344 snprintf(err, sizeof err, "nested groups not allowed (%s)", pers);
345 return NOTOK;
346 }
347 grp = add (": ", pers);
348 pers = NULL;
349 {
350 char *pp = cp;
351
352 for (;;)
353 switch (my_lex (buffer)) {
354 case LX_SEMI:
355 case LX_END: /* tsk, tsk */
356 glevel--;
357 return OK;
358
359 case LX_COMA:
360 continue;
361
362 default:
363 cp = pp;
364 return parse_address ();
365 }
366 }
367
368 case LX_DOT: /* sigh (1) */
369 pers = add (".", pers);
370 goto more_phrase;
371
372 default:
373 snprintf(err, sizeof err, "no mailbox in address, only a phrase (%s%s)",
374 pers, buffer);
375 return NOTOK;
376 }
377
378 case LX_LBRK:
379 goto get_addr;
380
381 case LX_COLN:
382 goto get_group;
383
384 case LX_DOT:
385 mbox = add (buffer, pers);
386 pers = NULL;
387 if (route_addr (buffer) == NOTOK)
388 return NOTOK;
389 goto check_end;
390
391 case LX_AT:
392 ingrp = glevel;
393 mbox = pers;
394 pers = NULL;
395 if (domain (buffer) == NOTOK)
396 return NOTOK;
397 check_end: ;
398 switch (last_lex) {
399 case LX_SEMI:
400 if (glevel-- <= 0) {
401 strcpy (err, "extraneous semi-colon");
402 return NOTOK;
403 }
404 case LX_COMA:
405 case LX_END:
406 return OK;
407
408 default:
409 snprintf(err, sizeof err, "junk after local@domain (%s)", buffer);
410 return NOTOK;
411 }
412
413 case LX_SEMI: /* no host */
414 case LX_COMA:
415 case LX_END:
416 ingrp = glevel;
417 if (last_lex == LX_SEMI && glevel-- <= 0) {
418 strcpy (err, "extraneous semi-colon");
419 return NOTOK;
420 }
421 mbox = pers;
422 pers = NULL;
423 return OK;
424
425 default:
426 snprintf(err, sizeof err, "missing mailbox (%s)", buffer);
427 return NOTOK;
428 }
429 }
430
431
432 static int
433 phrase (char *buffer)
434 {
435 for (;;)
436 switch (my_lex (buffer)) {
437 case LX_ATOM:
438 case LX_QSTR:
439 pers = add (buffer, add (" ", pers));
440 continue;
441
442 default:
443 return OK;
444 }
445 }
446
447
448 static int
449 route_addr (char *buffer)
450 {
451 char *pp = cp;
452
453 if (my_lex (buffer) == LX_AT) {
454 if (route (buffer) == NOTOK)
455 return NOTOK;
456 }
457 else
458 cp = pp;
459
460 if (local_part (buffer) == NOTOK)
461 return NOTOK;
462
463 switch (last_lex) {
464 case LX_AT:
465 return domain (buffer);
466
467 case LX_SEMI: /* if in group */
468 case LX_RBRK: /* no host */
469 case LX_COMA:
470 case LX_END:
471 return OK;
472
473 default:
474 snprintf(err, sizeof err, "no at-sign after local-part (%s)", buffer);
475 return NOTOK;
476 }
477 }
478
479
480 static int
481 local_part (char *buffer)
482 {
483 ingrp = glevel;
484
485 for (;;) {
486 switch (my_lex (buffer)) {
487 case LX_ATOM:
488 case LX_QSTR:
489 mbox = add (buffer, mbox);
490 break;
491
492 default:
493 snprintf(err, sizeof err, "no mailbox in local-part (%s)", buffer);
494 return NOTOK;
495 }
496
497 switch (my_lex (buffer)) {
498 case LX_DOT:
499 mbox = add (buffer, mbox);
500 continue;
501
502 default:
503 return OK;
504 }
505 }
506 }
507
508
509 static int
510 domain (char *buffer)
511 {
512 for (;;) {
513 switch (my_lex (buffer)) {
514 case LX_ATOM:
515 case LX_DLIT:
516 host = add (buffer, host);
517 break;
518
519 default:
520 snprintf(err, sizeof err, "no sub-domain in domain-part of address (%s)", buffer);
521 return NOTOK;
522 }
523
524 switch (my_lex (buffer)) {
525 case LX_DOT:
526 host = add (buffer, host);
527 continue;
528
529 case LX_AT: /* sigh (0) */
530 mbox = add (host, add ("%", mbox));
531 free (host);
532 host = NULL;
533 continue;
534
535 default:
536 return OK;
537 }
538 }
539 }
540
541
542 static int
543 route (char *buffer)
544 {
545 path = strdup ("@");
546
547 for (;;) {
548 switch (my_lex (buffer)) {
549 case LX_ATOM:
550 case LX_DLIT:
551 path = add (buffer, path);
552 break;
553
554 default:
555 snprintf(err, sizeof err, "no sub-domain in domain-part of address (%s)", buffer);
556 return NOTOK;
557 }
558 switch (my_lex (buffer)) {
559 case LX_COMA:
560 path = add (buffer, path);
561 for (;;) {
562 switch (my_lex (buffer)) {
563 case LX_COMA:
564 continue;
565
566 case LX_AT:
567 path = add (buffer, path);
568 break;
569
570 default:
571 snprintf(err, sizeof err, "no at-sign found for next domain in route (%s)",
572 buffer);
573 }
574 break;
575 }
576 continue;
577
578 case LX_AT: /* XXX */
579 case LX_DOT:
580 path = add (buffer, path);
581 continue;
582
583 case LX_COLN:
584 path = add (buffer, path);
585 return OK;
586
587 default:
588 snprintf(err, sizeof err, "no colon found to terminate route (%s)", buffer);
589 return NOTOK;
590 }
591 }
592 }
593
594
595 static int
596 my_lex (char *buffer)
597 {
598 /* buffer should be at least BUFSIZ bytes long */
599 int i, gotat = 0;
600 char c, *bp;
601
602 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
603 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
604
605 bp = buffer;
606 *bp = 0;
607 if (!cp)
608 return (last_lex = LX_END);
609
610 gotat = isat (cp);
611 c = *cp++;
612 while (isspace ((unsigned char) c))
613 c = *cp++;
614 if (c == 0) {
615 cp = NULL;
616 return (last_lex = LX_END);
617 }
618
619 if (c == '(') {
620 ADDCHR(c);
621 for (i = 0;;)
622 switch (c = *cp++) {
623 case 0:
624 cp = NULL;
625 return (last_lex = LX_ERR);
626 case QUOTE:
627 ADDCHR(c);
628 if ((c = *cp++) == 0) {
629 cp = NULL;
630 return (last_lex = LX_ERR);
631 }
632 ADDCHR(c);
633 continue;
634 case '(':
635 i++;
636 default:
637 ADDCHR(c);
638 continue;
639 case ')':
640 ADDCHR(c);
641 if (--i < 0) {
642 *bp = 0;
643 note = note ? add (buffer, add (" ", note))
644 : strdup (buffer);
645 return my_lex (buffer);
646 }
647 }
648 }
649
650 if (c == '"') {
651 ADDCHR(c);
652 for (;;)
653 switch (c = *cp++) {
654 case 0:
655 cp = NULL;
656 return (last_lex = LX_ERR);
657 case QUOTE:
658 ADDCHR(c);
659 if ((c = *cp++) == 0) {
660 cp = NULL;
661 return (last_lex = LX_ERR);
662 }
663 default:
664 ADDCHR(c);
665 continue;
666 case '"':
667 ADDCHR(c);
668 *bp = 0;
669 return (last_lex = LX_QSTR);
670 }
671 }
672
673 if (c == '[') {
674 ADDCHR(c);
675 for (;;)
676 switch (c = *cp++) {
677 case 0:
678 cp = NULL;
679 return (last_lex = LX_ERR);
680 case QUOTE:
681 ADDCHR(c);
682 if ((c = *cp++) == 0) {
683 cp = NULL;
684 return (last_lex = LX_ERR);
685 }
686 default:
687 ADDCHR(c);
688 continue;
689 case ']':
690 ADDCHR(c);
691 *bp = 0;
692 return (last_lex = LX_DLIT);
693 }
694 }
695
696 ADDCHR(c);
697 *bp = 0;
698 for (i = 0; special[i].lx_chr != 0; i++)
699 if (c == special[i].lx_chr)
700 return (last_lex = special[i].lx_val);
701
702 if (iscntrl ((unsigned char) c))
703 return (last_lex = LX_ERR);
704
705 for (;;) {
706 if ((c = *cp++) == 0)
707 break;
708 for (i = 0; special[i].lx_chr != 0; i++)
709 if (c == special[i].lx_chr)
710 goto got_atom;
711 if (iscntrl ((unsigned char) c) || isspace ((unsigned char) c))
712 break;
713 ADDCHR(c);
714 }
715 got_atom: ;
716 if (c == 0)
717 cp = NULL;
718 else
719 cp--;
720 *bp = 0;
721 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
722 ? LX_ATOM : LX_AT;
723 return last_lex;
724
725 my_lex_buffull:
726 /* Out of buffer space. *bp is the last byte in the buffer */
727 *bp = 0;
728 return (last_lex = LX_ERR);
729 }
730
731
732 char *
733 legal_person (const char *p)
734 {
735 int i;
736 const char *cp;
737 static char buffer[BUFSIZ];
738
739 if (*p == '"')
740 return (char *) p;
741 for (cp = p; *cp; cp++)
742 for (i = 0; special[i].lx_chr; i++)
743 if (*cp == special[i].lx_chr) {
744 snprintf(buffer, sizeof buffer, "\"%s\"", p);
745 return buffer;
746 }
747
748 return (char *) p;
749 }
750
751
752 int
753 mfgets (FILE *in, char **bp)
754 {
755 int i;
756 char *cp, *dp, *ep;
757 static int len = 0;
758 static char *pp = NULL;
759
760 if (pp == NULL)
761 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
762
763 for (ep = (cp = pp) + len - 2;;) {
764 switch (i = getc (in)) {
765 case EOF:
766 eol: ;
767 if (cp != pp) {
768 *cp = 0;
769 *bp = pp;
770 return OK;
771 }
772 eoh: ;
773 *bp = NULL;
774 free (pp);
775 pp = NULL;
776 return DONE;
777
778 case 0:
779 continue;
780
781 case '\n':
782 if (cp == pp) /* end of headers, gobble it */
783 goto eoh;
784 switch (i = getc (in)) {
785 default: /* end of line */
786 case '\n': /* end of headers, save for next call */
787 ungetc (i, in);
788 goto eol;
789
790 case ' ': /* continue headers */
791 case '\t':
792 *cp++ = '\n';
793 break;
794 } /* fall into default case */
795
796 default:
797 *cp++ = i;
798 break;
799 }
800 if (cp >= ep) {
801 dp = mh_xrealloc (pp, (size_t) (len += BUFSIZ));
802 cp += dp - pp, ep = (pp = cp) + len - 2;
803 }
804 }
805 }