]> diplodocus.org Git - nmh/blob - sbr/mf.c
Escape literal leading full stop in man/new.man.
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
8 */
9
10 #include <h/mf.h>
11 #include <h/utils.h>
12
13 /*
14 * static prototypes
15 */
16 static int isat (const char *);
17 static int parse_address (void);
18 static int phrase (char *);
19 static int route_addr (char *);
20 static int local_part (char *);
21 static int domain (char *);
22 static int route (char *);
23 static int my_lex (char *);
24
25
26 int
27 isfrom(const char *string)
28 {
29 return (has_prefix(string, "From ")
30 || has_prefix(string, ">From "));
31 }
32
33
34 int
35 lequal (const char *a, const char *b)
36 {
37 char c1, c2;
38
39 for (; *a; a++, b++) {
40 if (*b == 0)
41 return FALSE;
42 c1 = toupper((unsigned char)*a);
43 c2 = toupper((unsigned char)*b);
44 if (c1 != c2)
45 return FALSE;
46 }
47
48 return (*b == 0);
49 }
50
51
52 static int
53 isat (const char *p)
54 {
55 return has_prefix(p, " AT ") || has_prefix(p, " At ") ||
56 has_prefix(p, " aT ") || has_prefix(p, " at ");
57 }
58
59
60 /*
61 *
62 * getadrx() implements a partial 822-style address parser. The parser
63 * is neither complete nor correct. It does however recognize nearly all
64 * of the 822 address syntax. In addition it handles the majority of the
65 * 733 syntax as well. Most problems arise from trying to accommodate both.
66 *
67 * In terms of 822, the route-specification in
68 *
69 * "<" [route] local-part "@" domain ">"
70 *
71 * is parsed and returned unchanged. Multiple at-signs are compressed
72 * via source-routing. Recursive groups are not allowed as per the
73 * standard.
74 *
75 * In terms of 733, " at " is recognized as equivalent to "@".
76 *
77 * In terms of both the parser will not complain about missing hosts.
78 *
79 * -----
80 *
81 * We should not allow addresses like
82 *
83 * Marshall T. Rose <MRose@UCI>
84 *
85 * but should insist on
86 *
87 * "Marshall T. Rose" <MRose@UCI>
88 *
89 * Unfortunately, a lot of mailers stupidly let people get away with this.
90 *
91 * -----
92 *
93 * We should not allow addresses like
94 *
95 * <MRose@UCI>
96 *
97 * but should insist on
98 *
99 * MRose@UCI
100 *
101 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
102 * this.
103 *
104 * -----
105 *
106 * We should not allow addresses like
107 *
108 * @UCI:MRose@UCI-750a
109 *
110 * but should insist on
111 *
112 * Marshall Rose <@UCI:MRose@UCI-750a>
113 *
114 * Unfortunately, a lot of mailers stupidly do this.
115 *
116 */
117
118 #define QUOTE '\\'
119
120 #define LX_END 0
121 #define LX_ERR 1
122 #define LX_ATOM 2
123 #define LX_QSTR 3
124 #define LX_DLIT 4
125 #define LX_SEMI 5
126 #define LX_COMA 6
127 #define LX_LBRK 7
128 #define LX_RBRK 8
129 #define LX_COLN 9
130 #define LX_DOT 10
131 #define LX_AT 11
132
133 struct specials {
134 char lx_chr;
135 int lx_val;
136 };
137
138 static struct specials special[] = {
139 { ';', LX_SEMI },
140 { ',', LX_COMA },
141 { '<', LX_LBRK },
142 { '>', LX_RBRK },
143 { ':', LX_COLN },
144 { '.', LX_DOT },
145 { '@', LX_AT },
146 { '(', LX_ERR },
147 { ')', LX_ERR },
148 { QUOTE, LX_ERR },
149 { '"', LX_ERR },
150 { '[', LX_ERR },
151 { ']', LX_ERR },
152 { 0, 0 }
153 };
154
155 static int glevel = 0;
156 static int ingrp = 0;
157 static int last_lex = LX_END;
158
159 static char *dp = NULL;
160 static char *cp = NULL;
161 static char *ap = NULL;
162 static char *pers = NULL;
163 static char *mbox = NULL;
164 static char *host = NULL;
165 static char *path = NULL;
166 static char *grp = NULL;
167 static char *note = NULL;
168 static char err[BUFSIZ];
169 static char adr[BUFSIZ];
170
171 static struct adrx adrxs2;
172
173
174 /* eai = Email Address Internationalization */
175 struct adrx *
176 getadrx (const char *addrs, int eai)
177 {
178 char *bp;
179 struct adrx *adrxp = &adrxs2;
180
181 mh_xfree(pers);
182 mh_xfree(mbox);
183 mh_xfree(host);
184 mh_xfree(path);
185 mh_xfree(grp);
186 mh_xfree(note);
187 pers = mbox = host = path = grp = note = NULL;
188 err[0] = 0;
189
190 if (dp == NULL) {
191 dp = cp = strdup (addrs ? addrs : "");
192 glevel = 0;
193 }
194 else
195 if (cp == NULL) {
196 free (dp);
197 dp = NULL;
198 return NULL;
199 }
200
201 switch (parse_address ()) {
202 case DONE:
203 free (dp);
204 dp = cp = NULL;
205 return NULL;
206
207 case OK:
208 switch (last_lex) {
209 case LX_COMA:
210 case LX_END:
211 break;
212
213 default: /* catch trailing comments */
214 bp = cp;
215 my_lex (adr);
216 cp = bp;
217 break;
218 }
219 break;
220
221 default:
222 break;
223 }
224
225 if (! eai) {
226 /*
227 * Reject the address if key fields contain 8bit characters
228 */
229
230 if (contains8bit(mbox, NULL) || contains8bit(host, NULL) ||
231 contains8bit(path, NULL) || contains8bit(grp, NULL)) {
232 strcpy(err, "Address contains 8-bit characters");
233 }
234 }
235
236 if (err[0])
237 for (;;) {
238 switch (last_lex) {
239 case LX_COMA:
240 case LX_END:
241 break;
242
243 default:
244 my_lex (adr);
245 continue;
246 }
247 break;
248 }
249 while (isspace ((unsigned char) *ap))
250 ap++;
251 if (cp)
252 snprintf(adr, sizeof adr, "%.*s", (int)(cp - ap), ap);
253 else
254 strcpy (adr, ap);
255 bp = adr + strlen (adr) - 1;
256 if (*bp == ',' || *bp == ';' || *bp == '\n')
257 *bp = 0;
258
259 adrxp->text = adr;
260 adrxp->pers = pers;
261 adrxp->mbox = mbox;
262 adrxp->host = host;
263 adrxp->path = path;
264 adrxp->grp = grp;
265 adrxp->ingrp = ingrp;
266 adrxp->note = note;
267 adrxp->err = err[0] ? err : NULL;
268
269 return adrxp;
270 }
271
272
273 static int
274 parse_address (void)
275 {
276 char buffer[BUFSIZ];
277
278 again: ;
279 ap = cp;
280 switch (my_lex (buffer)) {
281 case LX_ATOM:
282 case LX_QSTR:
283 pers = strdup (buffer);
284 break;
285
286 case LX_SEMI:
287 if (glevel-- <= 0) {
288 strcpy (err, "extraneous semi-colon");
289 return NOTOK;
290 }
291 /* FALLTHRU */
292 case LX_COMA:
293 mh_xfree(note);
294 note = NULL;
295 goto again;
296
297 case LX_END:
298 return DONE;
299
300 case LX_LBRK: /* sigh (2) */
301 goto get_addr;
302
303 case LX_AT: /* sigh (3) */
304 cp = ap;
305 if (route_addr (buffer) == NOTOK)
306 return NOTOK;
307 return OK; /* why be choosy? */
308
309 default:
310 snprintf(err, sizeof err, "illegal address construct (%s)", buffer);
311 return NOTOK;
312 }
313
314 switch (my_lex (buffer)) {
315 case LX_ATOM:
316 case LX_QSTR:
317 pers = add (buffer, add (" ", pers));
318 more_phrase: ; /* sigh (1) */
319 if (phrase (buffer) == NOTOK)
320 return NOTOK;
321
322 switch (last_lex) {
323 case LX_LBRK:
324 get_addr: ;
325 if (route_addr (buffer) == NOTOK)
326 return NOTOK;
327 if (last_lex == LX_RBRK)
328 return OK;
329 snprintf(err, sizeof err, "missing right-bracket (%s)", buffer);
330 return NOTOK;
331
332 case LX_COLN:
333 get_group: ;
334 if (glevel++ > 0) {
335 snprintf(err, sizeof err, "nested groups not allowed (%s)", pers);
336 return NOTOK;
337 }
338 grp = add (": ", pers);
339 pers = NULL;
340 {
341 char *pp = cp;
342
343 for (;;)
344 switch (my_lex (buffer)) {
345 case LX_SEMI:
346 case LX_END: /* tsk, tsk */
347 glevel--;
348 return OK;
349
350 case LX_COMA:
351 continue;
352
353 default:
354 cp = pp;
355 return parse_address ();
356 }
357 }
358
359 case LX_DOT: /* sigh (1) */
360 pers = add (".", pers);
361 goto more_phrase;
362
363 default:
364 snprintf(err, sizeof err, "no mailbox in address, only a phrase (%s%s)",
365 pers, buffer);
366 return NOTOK;
367 }
368
369 case LX_LBRK:
370 goto get_addr;
371
372 case LX_COLN:
373 goto get_group;
374
375 case LX_DOT:
376 mbox = add (buffer, pers);
377 pers = NULL;
378 if (route_addr (buffer) == NOTOK)
379 return NOTOK;
380 goto check_end;
381
382 case LX_AT:
383 ingrp = glevel;
384 mbox = pers;
385 pers = NULL;
386 if (domain (buffer) == NOTOK)
387 return NOTOK;
388 check_end: ;
389 switch (last_lex) {
390 case LX_SEMI:
391 if (glevel-- <= 0) {
392 strcpy (err, "extraneous semi-colon");
393 return NOTOK;
394 }
395 /* FALLTHRU */
396 case LX_COMA:
397 case LX_END:
398 return OK;
399
400 default:
401 snprintf(err, sizeof err, "junk after local@domain (%s)", buffer);
402 return NOTOK;
403 }
404
405 case LX_SEMI: /* no host */
406 case LX_COMA:
407 case LX_END:
408 ingrp = glevel;
409 if (last_lex == LX_SEMI && glevel-- <= 0) {
410 strcpy (err, "extraneous semi-colon");
411 return NOTOK;
412 }
413 mbox = pers;
414 pers = NULL;
415 return OK;
416
417 default:
418 snprintf(err, sizeof err, "missing mailbox (%s)", buffer);
419 return NOTOK;
420 }
421 }
422
423
424 static int
425 phrase (char *buffer)
426 {
427 for (;;)
428 switch (my_lex (buffer)) {
429 case LX_ATOM:
430 case LX_QSTR:
431 pers = add (buffer, add (" ", pers));
432 continue;
433
434 default:
435 return OK;
436 }
437 }
438
439
440 static int
441 route_addr (char *buffer)
442 {
443 char *pp = cp;
444
445 if (my_lex (buffer) == LX_AT) {
446 if (route (buffer) == NOTOK)
447 return NOTOK;
448 }
449 else
450 cp = pp;
451
452 if (local_part (buffer) == NOTOK)
453 return NOTOK;
454
455 switch (last_lex) {
456 case LX_AT:
457 return domain (buffer);
458
459 case LX_SEMI: /* if in group */
460 case LX_RBRK: /* no host */
461 case LX_COMA:
462 case LX_END:
463 return OK;
464
465 default:
466 snprintf(err, sizeof err, "no at-sign after local-part (%s)", buffer);
467 return NOTOK;
468 }
469 }
470
471
472 static int
473 local_part (char *buffer)
474 {
475 ingrp = glevel;
476
477 for (;;) {
478 switch (my_lex (buffer)) {
479 case LX_ATOM:
480 case LX_QSTR:
481 mbox = add (buffer, mbox);
482 break;
483
484 default:
485 snprintf(err, sizeof err, "no mailbox in local-part (%s)", buffer);
486 return NOTOK;
487 }
488
489 switch (my_lex (buffer)) {
490 case LX_DOT:
491 mbox = add (buffer, mbox);
492 continue;
493
494 default:
495 return OK;
496 }
497 }
498 }
499
500
501 static int
502 domain (char *buffer)
503 {
504 for (;;) {
505 switch (my_lex (buffer)) {
506 case LX_ATOM:
507 case LX_DLIT:
508 host = add (buffer, host);
509 break;
510
511 default:
512 snprintf(err, sizeof err, "no sub-domain in domain-part of address (%s)", buffer);
513 return NOTOK;
514 }
515
516 switch (my_lex (buffer)) {
517 case LX_DOT:
518 host = add (buffer, host);
519 continue;
520
521 case LX_AT: /* sigh (0) */
522 mbox = add (host, add ("%", mbox));
523 free (host);
524 host = NULL;
525 continue;
526
527 default:
528 return OK;
529 }
530 }
531 }
532
533
534 static int
535 route (char *buffer)
536 {
537 path = strdup ("@");
538
539 for (;;) {
540 switch (my_lex (buffer)) {
541 case LX_ATOM:
542 case LX_DLIT:
543 path = add (buffer, path);
544 break;
545
546 default:
547 snprintf(err, sizeof err, "no sub-domain in domain-part of address (%s)", buffer);
548 return NOTOK;
549 }
550 switch (my_lex (buffer)) {
551 case LX_COMA:
552 path = add (buffer, path);
553 for (;;) {
554 switch (my_lex (buffer)) {
555 case LX_COMA:
556 continue;
557
558 case LX_AT:
559 path = add (buffer, path);
560 break;
561
562 default:
563 snprintf(err, sizeof err, "no at-sign found for next domain in route (%s)",
564 buffer);
565 }
566 break;
567 }
568 continue;
569
570 case LX_AT: /* XXX */
571 case LX_DOT:
572 path = add (buffer, path);
573 continue;
574
575 case LX_COLN:
576 path = add (buffer, path);
577 return OK;
578
579 default:
580 snprintf(err, sizeof err, "no colon found to terminate route (%s)", buffer);
581 return NOTOK;
582 }
583 }
584 }
585
586
587 static int
588 my_lex (char *buffer)
589 {
590 /* buffer should be at least BUFSIZ bytes long */
591 int i, gotat = 0;
592 char c, *bp;
593
594 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
595 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
596
597 bp = buffer;
598 *bp = 0;
599 if (!cp)
600 return (last_lex = LX_END);
601
602 gotat = isat (cp);
603 c = *cp++;
604 while (isspace ((unsigned char) c))
605 c = *cp++;
606 if (c == 0) {
607 cp = NULL;
608 return (last_lex = LX_END);
609 }
610
611 if (c == '(') {
612 ADDCHR(c);
613 for (i = 0;;)
614 switch (c = *cp++) {
615 case 0:
616 cp = NULL;
617 return (last_lex = LX_ERR);
618 case QUOTE:
619 ADDCHR(c);
620 if ((c = *cp++) == 0) {
621 cp = NULL;
622 return (last_lex = LX_ERR);
623 }
624 ADDCHR(c);
625 continue;
626 case '(':
627 i++;
628 /* FALLTHRU */
629 default:
630 ADDCHR(c);
631 continue;
632 case ')':
633 ADDCHR(c);
634 if (--i < 0) {
635 *bp = 0;
636 note = note ? add (buffer, add (" ", note))
637 : strdup (buffer);
638 return my_lex (buffer);
639 }
640 }
641 }
642
643 if (c == '"') {
644 ADDCHR(c);
645 for (;;)
646 switch (c = *cp++) {
647 case 0:
648 cp = NULL;
649 return (last_lex = LX_ERR);
650 case QUOTE:
651 ADDCHR(c);
652 if ((c = *cp++) == 0) {
653 cp = NULL;
654 return (last_lex = LX_ERR);
655 }
656 /* FALLTHRU */
657 default:
658 ADDCHR(c);
659 continue;
660 case '"':
661 ADDCHR(c);
662 *bp = 0;
663 return (last_lex = LX_QSTR);
664 }
665 }
666
667 if (c == '[') {
668 ADDCHR(c);
669 for (;;)
670 switch (c = *cp++) {
671 case 0:
672 cp = NULL;
673 return (last_lex = LX_ERR);
674 case QUOTE:
675 ADDCHR(c);
676 if ((c = *cp++) == 0) {
677 cp = NULL;
678 return (last_lex = LX_ERR);
679 }
680 /* FALLTHRU */
681 default:
682 ADDCHR(c);
683 continue;
684 case ']':
685 ADDCHR(c);
686 *bp = 0;
687 return (last_lex = LX_DLIT);
688 }
689 }
690
691 ADDCHR(c);
692 *bp = 0;
693 for (i = 0; special[i].lx_chr != 0; i++)
694 if (c == special[i].lx_chr)
695 return (last_lex = special[i].lx_val);
696
697 if (iscntrl ((unsigned char) c))
698 return (last_lex = LX_ERR);
699
700 for (;;) {
701 if ((c = *cp++) == 0)
702 break;
703 for (i = 0; special[i].lx_chr != 0; i++)
704 if (c == special[i].lx_chr)
705 goto got_atom;
706 if (iscntrl ((unsigned char) c) || isspace ((unsigned char) c))
707 break;
708 ADDCHR(c);
709 }
710 got_atom: ;
711 if (c == 0)
712 cp = NULL;
713 else
714 cp--;
715 *bp = 0;
716 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
717 ? LX_ATOM : LX_AT;
718 return last_lex;
719
720 my_lex_buffull:
721 /* Out of buffer space. *bp is the last byte in the buffer */
722 *bp = 0;
723 return (last_lex = LX_ERR);
724 }
725
726
727 char *
728 legal_person (const char *p)
729 {
730 int i;
731 const char *cp;
732 static char buffer[BUFSIZ];
733
734 if (*p == '"')
735 return (char *) p;
736 for (cp = p; *cp; cp++)
737 for (i = 0; special[i].lx_chr; i++)
738 if (*cp == special[i].lx_chr) {
739 snprintf(buffer, sizeof buffer, "\"%s\"", p);
740 return buffer;
741 }
742
743 return (char *) p;
744 }
745
746
747 int
748 mfgets (FILE *in, char **bp)
749 {
750 int i;
751 char *cp, *dp, *ep;
752 static int len = 0;
753 static char *pp = NULL;
754
755 if (pp == NULL)
756 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
757
758 for (ep = (cp = pp) + len - 2;;) {
759 switch (i = getc (in)) {
760 case EOF:
761 eol: ;
762 if (cp != pp) {
763 *cp = 0;
764 *bp = pp;
765 return OK;
766 }
767 eoh: ;
768 *bp = NULL;
769 free (pp);
770 pp = NULL;
771 return DONE;
772
773 case 0:
774 continue;
775
776 case '\n':
777 if (cp == pp) /* end of headers, gobble it */
778 goto eoh;
779 switch (i = getc (in)) {
780 default: /* end of line */
781 case '\n': /* end of headers, save for next call */
782 ungetc (i, in);
783 goto eol;
784
785 case ' ': /* continue headers */
786 case '\t':
787 *cp++ = '\n';
788 break;
789 }
790 /* FALLTHRU */
791
792 default:
793 *cp++ = i;
794 break;
795 }
796 if (cp >= ep) {
797 dp = mh_xrealloc (pp, (size_t) (len += BUFSIZ));
798 cp += dp - pp, ep = (pp = cp) + len - 2;
799 }
800 }
801 }