]> diplodocus.org Git - nmh/blob - sbr/mf.c
Makefile.am: Add test/inc/test-eom-align to XFAIL_TESTS.
[nmh] / sbr / mf.c
1 /* mf.c -- mail filter subroutines
2 *
3 * This code is Copyright (c) 2002, by the authors of nmh. See the
4 * COPYRIGHT file in the root directory of the nmh distribution for
5 * complete copyright information.
6 */
7
8 #include <h/mh.h>
9 #include <h/mf.h>
10 #include <h/utils.h>
11
12 /*
13 * static prototypes
14 */
15 static int isat (const char *);
16 static int parse_address (void);
17 static int phrase (char *);
18 static int route_addr (char *);
19 static int local_part (char *);
20 static int domain (char *);
21 static int route (char *);
22 static int my_lex (char *);
23
24
25 static int
26 isat (const char *p)
27 {
28 return *p == ' ' &&
29 (p[1] == 'a' || p[1] == 'A') &&
30 (p[2] == 't' || p[2] == 'T') &&
31 p[3] == ' ';
32 }
33
34
35 /*
36 *
37 * getadrx() implements a partial 822-style address parser. The parser
38 * is neither complete nor correct. It does however recognize nearly all
39 * of the 822 address syntax. In addition it handles the majority of the
40 * 733 syntax as well. Most problems arise from trying to accommodate both.
41 *
42 * In terms of 822, the route-specification in
43 *
44 * "<" [route] local-part "@" domain ">"
45 *
46 * is parsed and returned unchanged. Multiple at-signs are compressed
47 * via source-routing. Recursive groups are not allowed as per the
48 * standard.
49 *
50 * In terms of 733, " at " is recognized as equivalent to "@".
51 *
52 * In terms of both the parser will not complain about missing hosts.
53 *
54 * -----
55 *
56 * We should not allow addresses like
57 *
58 * Marshall T. Rose <MRose@UCI>
59 *
60 * but should insist on
61 *
62 * "Marshall T. Rose" <MRose@UCI>
63 *
64 * Unfortunately, a lot of mailers stupidly let people get away with this.
65 *
66 * -----
67 *
68 * We should not allow addresses like
69 *
70 * <MRose@UCI>
71 *
72 * but should insist on
73 *
74 * MRose@UCI
75 *
76 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
77 * this.
78 *
79 * -----
80 *
81 * We should not allow addresses like
82 *
83 * @UCI:MRose@UCI-750a
84 *
85 * but should insist on
86 *
87 * Marshall Rose <@UCI:MRose@UCI-750a>
88 *
89 * Unfortunately, a lot of mailers stupidly do this.
90 *
91 */
92
93 #define QUOTE '\\'
94
95 #define LX_END 0
96 #define LX_ERR 1
97 #define LX_ATOM 2
98 #define LX_QSTR 3
99 #define LX_DLIT 4
100 #define LX_SEMI 5
101 #define LX_COMA 6
102 #define LX_LBRK 7
103 #define LX_RBRK 8
104 #define LX_COLN 9
105 #define LX_DOT 10
106 #define LX_AT 11
107
108 struct specials {
109 char lx_chr;
110 int lx_val;
111 };
112
113 static struct specials special[] = {
114 { ';', LX_SEMI },
115 { ',', LX_COMA },
116 { '<', LX_LBRK },
117 { '>', LX_RBRK },
118 { ':', LX_COLN },
119 { '.', LX_DOT },
120 { '@', LX_AT },
121 { '(', LX_ERR },
122 { ')', LX_ERR },
123 { QUOTE, LX_ERR },
124 { '"', LX_ERR },
125 { '[', LX_ERR },
126 { ']', LX_ERR },
127 { 0, 0 }
128 };
129
130 static int glevel = 0;
131 static int ingrp = 0;
132 static int last_lex = LX_END;
133
134 static char *dp = NULL;
135 static char *cp = NULL;
136 static char *ap = NULL;
137 static char *pers = NULL;
138 static char *mbox = NULL;
139 static char *host = NULL;
140 static char *routepath = NULL;
141 static char *grp = NULL;
142 static char *note = NULL;
143 static char err[BUFSIZ];
144 static char adr[BUFSIZ];
145
146 static struct adrx adrxs2;
147
148
149 /* eai = Email Address Internationalization */
150 struct adrx *
151 getadrx (const char *addrs, int eai)
152 {
153 char *bp;
154 struct adrx *adrxp = &adrxs2;
155
156 mh_xfree(pers);
157 mh_xfree(mbox);
158 mh_xfree(host);
159 mh_xfree(routepath);
160 mh_xfree(grp);
161 mh_xfree(note);
162 pers = mbox = host = routepath = grp = note = NULL;
163 err[0] = 0;
164
165 if (dp == NULL) {
166 dp = cp = strdup (FENDNULL(addrs));
167 glevel = 0;
168 }
169 else
170 if (cp == NULL) {
171 free (dp);
172 dp = NULL;
173 return NULL;
174 }
175
176 switch (parse_address ()) {
177 case DONE:
178 free (dp);
179 dp = cp = NULL;
180 return NULL;
181
182 case OK:
183 switch (last_lex) {
184 case LX_COMA:
185 case LX_END:
186 break;
187
188 default: /* catch trailing comments */
189 bp = cp;
190 my_lex (adr);
191 cp = bp;
192 break;
193 }
194 break;
195
196 default:
197 break;
198 }
199
200 if (! eai) {
201 /*
202 * Reject the address if key fields contain 8bit characters
203 */
204
205 if (contains8bit(mbox, NULL) || contains8bit(host, NULL) ||
206 contains8bit(routepath, NULL) || contains8bit(grp, NULL)) {
207 strcpy(err, "Address contains 8-bit characters");
208 }
209 }
210
211 if (err[0])
212 for (;;) {
213 switch (last_lex) {
214 case LX_COMA:
215 case LX_END:
216 break;
217
218 default:
219 my_lex (adr);
220 continue;
221 }
222 break;
223 }
224 while (isspace ((unsigned char) *ap))
225 ap++;
226 if (cp)
227 snprintf(adr, sizeof adr, "%.*s", (int)(cp - ap), ap);
228 else
229 strcpy (adr, ap);
230 bp = adr + strlen (adr) - 1;
231 if (*bp == ',' || *bp == ';' || *bp == '\n')
232 *bp = 0;
233
234 adrxp->text = adr;
235 adrxp->pers = pers;
236 adrxp->mbox = mbox;
237 adrxp->host = host;
238 adrxp->path = routepath;
239 adrxp->grp = grp;
240 adrxp->ingrp = ingrp;
241 adrxp->note = note;
242 adrxp->err = err[0] ? err : NULL;
243
244 return adrxp;
245 }
246
247
248 static int
249 parse_address (void)
250 {
251 char buffer[BUFSIZ];
252
253 again: ;
254 ap = cp;
255 switch (my_lex (buffer)) {
256 case LX_ATOM:
257 case LX_QSTR:
258 pers = strdup (buffer);
259 break;
260
261 case LX_SEMI:
262 if (glevel-- <= 0) {
263 strcpy (err, "extraneous semi-colon");
264 return NOTOK;
265 }
266 /* FALLTHRU */
267 case LX_COMA:
268 mh_xfree(note);
269 note = NULL;
270 goto again;
271
272 case LX_END:
273 return DONE;
274
275 case LX_LBRK: /* sigh (2) */
276 goto get_addr;
277
278 case LX_AT: /* sigh (3) */
279 cp = ap;
280 if (route_addr (buffer) == NOTOK)
281 return NOTOK;
282 return OK; /* why be choosy? */
283
284 default:
285 snprintf(err, sizeof err, "illegal address construct (%s)", buffer);
286 return NOTOK;
287 }
288
289 switch (my_lex (buffer)) {
290 case LX_ATOM:
291 case LX_QSTR:
292 pers = add (buffer, add (" ", pers));
293 more_phrase: ; /* sigh (1) */
294 if (phrase (buffer) == NOTOK)
295 return NOTOK;
296
297 switch (last_lex) {
298 case LX_LBRK:
299 get_addr: ;
300 if (route_addr (buffer) == NOTOK)
301 return NOTOK;
302 if (last_lex == LX_RBRK)
303 return OK;
304 snprintf(err, sizeof err, "missing right-bracket (%s)", buffer);
305 return NOTOK;
306
307 case LX_COLN:
308 get_group: ;
309 if (glevel++ > 0) {
310 snprintf(err, sizeof err, "nested groups not allowed (%s)", pers);
311 return NOTOK;
312 }
313 grp = add (": ", pers);
314 pers = NULL;
315 {
316 char *pp = cp;
317
318 for (;;)
319 switch (my_lex (buffer)) {
320 case LX_SEMI:
321 case LX_END: /* tsk, tsk */
322 glevel--;
323 return OK;
324
325 case LX_COMA:
326 continue;
327
328 default:
329 cp = pp;
330 return parse_address ();
331 }
332 }
333
334 case LX_DOT: /* sigh (1) */
335 pers = add (".", pers);
336 goto more_phrase;
337
338 default:
339 snprintf(err, sizeof err, "no mailbox in address, only a phrase (%s%s)",
340 pers, buffer);
341 return NOTOK;
342 }
343
344 case LX_LBRK:
345 goto get_addr;
346
347 case LX_COLN:
348 goto get_group;
349
350 case LX_DOT:
351 mbox = add (buffer, pers);
352 pers = NULL;
353 if (route_addr (buffer) == NOTOK)
354 return NOTOK;
355 goto check_end;
356
357 case LX_AT:
358 ingrp = glevel;
359 mbox = pers;
360 pers = NULL;
361 if (domain (buffer) == NOTOK)
362 return NOTOK;
363 check_end: ;
364 switch (last_lex) {
365 case LX_SEMI:
366 if (glevel-- <= 0) {
367 strcpy (err, "extraneous semi-colon");
368 return NOTOK;
369 }
370 return OK;
371 case LX_COMA:
372 case LX_END:
373 return OK;
374
375 default:
376 snprintf(err, sizeof err, "junk after local@domain (%s)", buffer);
377 return NOTOK;
378 }
379
380 case LX_SEMI: /* no host */
381 case LX_COMA:
382 case LX_END:
383 ingrp = glevel;
384 if (last_lex == LX_SEMI && glevel-- <= 0) {
385 strcpy (err, "extraneous semi-colon");
386 return NOTOK;
387 }
388 mbox = pers;
389 pers = NULL;
390 return OK;
391
392 default:
393 snprintf(err, sizeof err, "missing mailbox (%s)", buffer);
394 return NOTOK;
395 }
396 }
397
398
399 static int
400 phrase (char *buffer)
401 {
402 for (;;)
403 switch (my_lex (buffer)) {
404 case LX_ATOM:
405 case LX_QSTR:
406 pers = add (buffer, add (" ", pers));
407 continue;
408
409 default:
410 return OK;
411 }
412 }
413
414
415 static int
416 route_addr (char *buffer)
417 {
418 char *pp = cp;
419
420 if (my_lex (buffer) == LX_AT) {
421 if (route (buffer) == NOTOK)
422 return NOTOK;
423 }
424 else
425 cp = pp;
426
427 if (local_part (buffer) == NOTOK)
428 return NOTOK;
429
430 switch (last_lex) {
431 case LX_AT:
432 return domain (buffer);
433
434 case LX_SEMI: /* if in group */
435 case LX_RBRK: /* no host */
436 case LX_COMA:
437 case LX_END:
438 return OK;
439
440 default:
441 snprintf(err, sizeof err, "no at-sign after local-part (%s)", buffer);
442 return NOTOK;
443 }
444 }
445
446
447 static int
448 local_part (char *buffer)
449 {
450 ingrp = glevel;
451
452 for (;;) {
453 switch (my_lex (buffer)) {
454 case LX_ATOM:
455 case LX_QSTR:
456 mbox = add (buffer, mbox);
457 break;
458
459 default:
460 snprintf(err, sizeof err, "no mailbox in local-part (%s)", buffer);
461 return NOTOK;
462 }
463
464 switch (my_lex (buffer)) {
465 case LX_DOT:
466 mbox = add (buffer, mbox);
467 continue;
468
469 default:
470 return OK;
471 }
472 }
473 }
474
475
476 static int
477 domain (char *buffer)
478 {
479 for (;;) {
480 switch (my_lex (buffer)) {
481 case LX_ATOM:
482 case LX_DLIT:
483 host = add (buffer, host);
484 break;
485
486 default:
487 snprintf(err, sizeof err, "no sub-domain in domain-part of address (%s)", buffer);
488 return NOTOK;
489 }
490
491 switch (my_lex (buffer)) {
492 case LX_DOT:
493 host = add (buffer, host);
494 continue;
495
496 case LX_AT: /* sigh (0) */
497 mbox = add (host, add ("%", mbox));
498 free (host);
499 host = NULL;
500 continue;
501
502 default:
503 return OK;
504 }
505 }
506 }
507
508
509 static int
510 route (char *buffer)
511 {
512 routepath = mh_xstrdup ("@");
513
514 for (;;) {
515 switch (my_lex (buffer)) {
516 case LX_ATOM:
517 case LX_DLIT:
518 routepath = add (buffer, routepath);
519 break;
520
521 default:
522 snprintf(err, sizeof err, "no sub-domain in domain-part of address (%s)", buffer);
523 return NOTOK;
524 }
525 switch (my_lex (buffer)) {
526 case LX_COMA:
527 routepath = add (buffer, routepath);
528 for (;;) {
529 switch (my_lex (buffer)) {
530 case LX_COMA:
531 continue;
532
533 case LX_AT:
534 routepath = add (buffer, routepath);
535 break;
536
537 default:
538 snprintf(err, sizeof err, "no at-sign found for next domain in route (%s)",
539 buffer);
540 }
541 break;
542 }
543 continue;
544
545 case LX_AT: /* XXX */
546 case LX_DOT:
547 routepath = add (buffer, routepath);
548 continue;
549
550 case LX_COLN:
551 routepath = add (buffer, routepath);
552 return OK;
553
554 default:
555 snprintf(err, sizeof err, "no colon found to terminate route (%s)", buffer);
556 return NOTOK;
557 }
558 }
559 }
560
561
562 static int
563 my_lex (char *buffer)
564 {
565 /* buffer should be at least BUFSIZ bytes long */
566 int i, gotat = 0;
567 char c, *bp;
568
569 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
570 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
571
572 bp = buffer;
573 *bp = 0;
574 if (!cp)
575 return (last_lex = LX_END);
576
577 gotat = isat (cp);
578 c = *cp++;
579 while (isspace ((unsigned char) c))
580 c = *cp++;
581 if (c == 0) {
582 cp = NULL;
583 return (last_lex = LX_END);
584 }
585
586 if (c == '(') {
587 ADDCHR(c);
588 for (i = 0;;)
589 switch (c = *cp++) {
590 case 0:
591 cp = NULL;
592 return (last_lex = LX_ERR);
593 case QUOTE:
594 ADDCHR(c);
595 if ((c = *cp++) == 0) {
596 cp = NULL;
597 return (last_lex = LX_ERR);
598 }
599 ADDCHR(c);
600 continue;
601 case '(':
602 i++;
603 /* FALLTHRU */
604 default:
605 ADDCHR(c);
606 continue;
607 case ')':
608 ADDCHR(c);
609 if (--i < 0) {
610 *bp = 0;
611 note = note ? add (buffer, add (" ", note))
612 : strdup (buffer);
613 return my_lex (buffer);
614 }
615 }
616 }
617
618 if (c == '"') {
619 ADDCHR(c);
620 for (;;)
621 switch (c = *cp++) {
622 case 0:
623 cp = NULL;
624 return (last_lex = LX_ERR);
625 case QUOTE:
626 ADDCHR(c);
627 if ((c = *cp++) == 0) {
628 cp = NULL;
629 return (last_lex = LX_ERR);
630 }
631 /* FALLTHRU */
632 default:
633 ADDCHR(c);
634 continue;
635 case '"':
636 ADDCHR(c);
637 *bp = 0;
638 return (last_lex = LX_QSTR);
639 }
640 }
641
642 if (c == '[') {
643 ADDCHR(c);
644 for (;;)
645 switch (c = *cp++) {
646 case 0:
647 cp = NULL;
648 return (last_lex = LX_ERR);
649 case QUOTE:
650 ADDCHR(c);
651 if ((c = *cp++) == 0) {
652 cp = NULL;
653 return (last_lex = LX_ERR);
654 }
655 /* FALLTHRU */
656 default:
657 ADDCHR(c);
658 continue;
659 case ']':
660 ADDCHR(c);
661 *bp = 0;
662 return (last_lex = LX_DLIT);
663 }
664 }
665
666 ADDCHR(c);
667 *bp = 0;
668 for (i = 0; special[i].lx_chr != 0; i++)
669 if (c == special[i].lx_chr)
670 return (last_lex = special[i].lx_val);
671
672 if (iscntrl ((unsigned char) c))
673 return (last_lex = LX_ERR);
674
675 for (;;) {
676 if ((c = *cp++) == 0)
677 break;
678 for (i = 0; special[i].lx_chr != 0; i++)
679 if (c == special[i].lx_chr)
680 goto got_atom;
681 if (iscntrl ((unsigned char) c) || isspace ((unsigned char) c))
682 break;
683 ADDCHR(c);
684 }
685 got_atom: ;
686 if (c == 0)
687 cp = NULL;
688 else
689 cp--;
690 *bp = 0;
691 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
692 ? LX_ATOM : LX_AT;
693 return last_lex;
694
695 my_lex_buffull:
696 /* Out of buffer space. *bp is the last byte in the buffer */
697 *bp = 0;
698 return (last_lex = LX_ERR);
699 }
700
701
702 char *
703 legal_person (const char *p)
704 {
705 int i;
706 const char *cp;
707 static char buffer[BUFSIZ];
708
709 if (*p == '"')
710 return (char *) p;
711 for (cp = p; *cp; cp++)
712 for (i = 0; special[i].lx_chr; i++)
713 if (*cp == special[i].lx_chr) {
714 snprintf(buffer, sizeof buffer, "\"%s\"", p);
715 return buffer;
716 }
717
718 return (char *) p;
719 }