]> diplodocus.org Git - nmh/blob - sbr/mf.c
Fix invalid pointer arithmetic.
[nmh] / sbr / mf.c
1 /* mf.c -- mail filter subroutines
2 *
3 * This code is Copyright (c) 2002, by the authors of nmh. See the
4 * COPYRIGHT file in the root directory of the nmh distribution for
5 * complete copyright information.
6 */
7
8 #include <h/mh.h>
9 #include <h/mf.h>
10 #include <h/utils.h>
11
12 /*
13 * static prototypes
14 */
15 static int isat (const char *);
16 static int parse_address (void);
17 static int phrase (char *);
18 static int route_addr (char *);
19 static int local_part (char *);
20 static int domain (char *);
21 static int route (char *);
22 static int my_lex (char *);
23
24
25 static int
26 isat (const char *p)
27 {
28 return *p == ' ' &&
29 (p[1] == 'a' || p[1] == 'A') &&
30 (p[2] == 't' || p[2] == 'T') &&
31 p[3] == ' ';
32 }
33
34
35 /*
36 *
37 * getadrx() implements a partial 822-style address parser. The parser
38 * is neither complete nor correct. It does however recognize nearly all
39 * of the 822 address syntax. In addition it handles the majority of the
40 * 733 syntax as well. Most problems arise from trying to accommodate both.
41 *
42 * In terms of 822, the route-specification in
43 *
44 * "<" [route] local-part "@" domain ">"
45 *
46 * is parsed and returned unchanged. Multiple at-signs are compressed
47 * via source-routing. Recursive groups are not allowed as per the
48 * standard.
49 *
50 * In terms of 733, " at " is recognized as equivalent to "@".
51 *
52 * In terms of both the parser will not complain about missing hosts.
53 *
54 * -----
55 *
56 * We should not allow addresses like
57 *
58 * Marshall T. Rose <MRose@UCI>
59 *
60 * but should insist on
61 *
62 * "Marshall T. Rose" <MRose@UCI>
63 *
64 * Unfortunately, a lot of mailers stupidly let people get away with this.
65 *
66 * -----
67 *
68 * We should not allow addresses like
69 *
70 * <MRose@UCI>
71 *
72 * but should insist on
73 *
74 * MRose@UCI
75 *
76 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
77 * this.
78 *
79 * -----
80 *
81 * We should not allow addresses like
82 *
83 * @UCI:MRose@UCI-750a
84 *
85 * but should insist on
86 *
87 * Marshall Rose <@UCI:MRose@UCI-750a>
88 *
89 * Unfortunately, a lot of mailers stupidly do this.
90 *
91 */
92
93 #define LX_END 0
94 #define LX_ERR 1
95 #define LX_ATOM 2
96 #define LX_QSTR 3
97 #define LX_DLIT 4
98 #define LX_SEMI 5
99 #define LX_COMA 6
100 #define LX_LBRK 7
101 #define LX_RBRK 8
102 #define LX_COLN 9
103 #define LX_DOT 10
104 #define LX_AT 11
105
106 struct specials {
107 char lx_chr;
108 int lx_val;
109 };
110
111 static struct specials special[] = {
112 { ';', LX_SEMI },
113 { ',', LX_COMA },
114 { '<', LX_LBRK },
115 { '>', LX_RBRK },
116 { ':', LX_COLN },
117 { '.', LX_DOT },
118 { '@', LX_AT },
119 { '(', LX_ERR },
120 { ')', LX_ERR },
121 { '\\', LX_ERR },
122 { '"', LX_ERR },
123 { '[', LX_ERR },
124 { ']', LX_ERR },
125 { 0, 0 }
126 };
127
128 static int glevel = 0;
129 static int ingrp = 0;
130 static int last_lex = LX_END;
131
132 static char *dp = NULL;
133 static char *cp = NULL;
134 static char *ap = NULL;
135 static char *pers = NULL;
136 static char *mbox = NULL;
137 static char *host = NULL;
138 static char *routepath = NULL;
139 static char *grp = NULL;
140 static char *note = NULL;
141 static char err[BUFSIZ];
142 static char adr[BUFSIZ];
143
144 static struct adrx adrxs2;
145
146
147 /* eai = Email Address Internationalization */
148 struct adrx *
149 getadrx (const char *addrs, int eai)
150 {
151 int parse;
152 char *bp;
153 struct adrx *adrxp = &adrxs2;
154
155 free(pers);
156 free(mbox);
157 free(host);
158 free(routepath);
159 free(grp);
160 free(note);
161 pers = mbox = host = routepath = grp = note = NULL;
162 err[0] = 0;
163
164 if (dp == NULL) {
165 dp = cp = strdup (FENDNULL(addrs));
166 glevel = 0;
167 } else if (cp == NULL) {
168 free (dp);
169 dp = NULL;
170 return NULL;
171 }
172
173 parse = parse_address();
174 if (parse == DONE) {
175 free(dp);
176 dp = cp = NULL;
177 return NULL;
178 }
179 if (parse == OK && last_lex != LX_COMA && last_lex != LX_END) {
180 /* catch trailing comments */
181 bp = cp;
182 my_lex(adr);
183 cp = bp;
184 }
185
186 /* Reject the address if key fields contain 8bit characters. */
187 if (!eai &&
188 (contains8bit(mbox, NULL) || contains8bit(host, NULL) ||
189 contains8bit(routepath, NULL) || contains8bit(grp, NULL)))
190 strcpy(err, "Address contains 8-bit characters");
191
192 if (err[0])
193 while (last_lex != LX_COMA && last_lex != LX_END)
194 my_lex(adr);
195
196 while (isspace ((unsigned char) *ap))
197 ap++;
198 if (cp)
199 snprintf(adr, sizeof adr, "%.*s", (int)(cp - ap), ap);
200 else
201 strcpy (adr, ap);
202 bp = adr + strlen (adr) - 1;
203 if (*bp == ',' || *bp == ';' || *bp == '\n')
204 *bp = 0;
205
206 adrxp->text = adr;
207 adrxp->pers = pers;
208 adrxp->mbox = mbox;
209 adrxp->host = host;
210 adrxp->path = routepath;
211 adrxp->grp = grp;
212 adrxp->ingrp = ingrp;
213 adrxp->note = note;
214 adrxp->err = err[0] ? err : NULL;
215
216 return adrxp;
217 }
218
219
220 static int
221 parse_address (void)
222 {
223 char buffer[BUFSIZ];
224
225 again: ;
226 ap = cp;
227 switch (my_lex (buffer)) {
228 case LX_ATOM:
229 case LX_QSTR:
230 pers = strdup (buffer);
231 break;
232
233 case LX_SEMI:
234 if (glevel-- <= 0) {
235 strcpy (err, "extraneous semi-colon");
236 return NOTOK;
237 }
238 /* FALLTHRU */
239 case LX_COMA:
240 free(note);
241 note = NULL;
242 goto again;
243
244 case LX_END:
245 return DONE;
246
247 case LX_LBRK: /* sigh (2) */
248 goto get_addr;
249
250 case LX_AT: /* sigh (3) */
251 cp = ap;
252 if (route_addr (buffer) == NOTOK)
253 return NOTOK;
254 return OK; /* why be choosy? */
255
256 default:
257 snprintf(err, sizeof err, "illegal address construct (%s)", buffer);
258 return NOTOK;
259 }
260
261 switch (my_lex (buffer)) {
262 case LX_ATOM:
263 case LX_QSTR:
264 pers = add (buffer, add (" ", pers));
265 more_phrase: ; /* sigh (1) */
266 if (phrase (buffer) == NOTOK)
267 return NOTOK;
268
269 switch (last_lex) {
270 case LX_LBRK:
271 get_addr: ;
272 if (route_addr (buffer) == NOTOK)
273 return NOTOK;
274 if (last_lex == LX_RBRK)
275 return OK;
276 snprintf(err, sizeof err, "missing right-bracket (%s)", buffer);
277 return NOTOK;
278
279 case LX_COLN:
280 get_group: ;
281 if (glevel++ > 0) {
282 snprintf(err, sizeof err, "nested groups not allowed (%s)", pers);
283 return NOTOK;
284 }
285 grp = add (": ", pers);
286 pers = NULL;
287 {
288 char *pp = cp;
289
290 for (;;)
291 switch (my_lex (buffer)) {
292 case LX_SEMI:
293 case LX_END: /* tsk, tsk */
294 glevel--;
295 return OK;
296
297 case LX_COMA:
298 continue;
299
300 default:
301 cp = pp;
302 return parse_address ();
303 }
304 }
305
306 case LX_DOT: /* sigh (1) */
307 pers = add (".", pers);
308 goto more_phrase;
309
310 default:
311 snprintf(err, sizeof err, "no mailbox in address, only a phrase (%s%s)",
312 pers, buffer);
313 return NOTOK;
314 }
315
316 case LX_LBRK:
317 goto get_addr;
318
319 case LX_COLN:
320 goto get_group;
321
322 case LX_DOT:
323 mbox = add (buffer, pers);
324 pers = NULL;
325 if (route_addr (buffer) == NOTOK)
326 return NOTOK;
327 goto check_end;
328
329 case LX_AT:
330 ingrp = glevel;
331 mbox = pers;
332 pers = NULL;
333 if (domain (buffer) == NOTOK)
334 return NOTOK;
335 check_end: ;
336 switch (last_lex) {
337 case LX_SEMI:
338 if (glevel-- <= 0) {
339 strcpy (err, "extraneous semi-colon");
340 return NOTOK;
341 }
342 return OK;
343 case LX_COMA:
344 case LX_END:
345 return OK;
346
347 default:
348 snprintf(err, sizeof err, "junk after local@domain (%s)", buffer);
349 return NOTOK;
350 }
351
352 case LX_SEMI: /* no host */
353 case LX_COMA:
354 case LX_END:
355 ingrp = glevel;
356 if (last_lex == LX_SEMI && glevel-- <= 0) {
357 strcpy (err, "extraneous semi-colon");
358 return NOTOK;
359 }
360 mbox = pers;
361 pers = NULL;
362 return OK;
363
364 default:
365 snprintf(err, sizeof err, "missing mailbox (%s)", buffer);
366 return NOTOK;
367 }
368 }
369
370
371 static int
372 phrase (char *buffer)
373 {
374 int lex;
375
376 while ((lex = my_lex(buffer)) == LX_ATOM || lex == LX_QSTR)
377 pers = add(buffer, add(" ", pers));
378
379 return OK;
380 }
381
382
383 static int
384 route_addr (char *buffer)
385 {
386 char *pp = cp;
387
388 if (my_lex (buffer) == LX_AT) {
389 if (route (buffer) == NOTOK)
390 return NOTOK;
391 }
392 else
393 cp = pp;
394
395 if (local_part (buffer) == NOTOK)
396 return NOTOK;
397
398 switch (last_lex) {
399 case LX_AT:
400 return domain (buffer);
401
402 case LX_SEMI: /* if in group */
403 case LX_RBRK: /* no host */
404 case LX_COMA:
405 case LX_END:
406 return OK;
407
408 default:
409 snprintf(err, sizeof err, "no at-sign after local-part (%s)", buffer);
410 return NOTOK;
411 }
412 }
413
414
415 static int
416 local_part (char *buffer)
417 {
418 ingrp = glevel;
419
420 for (;;) {
421 switch (my_lex (buffer)) {
422 case LX_ATOM:
423 case LX_QSTR:
424 mbox = add (buffer, mbox);
425 break;
426
427 default:
428 snprintf(err, sizeof err, "no mailbox in local-part (%s)", buffer);
429 return NOTOK;
430 }
431
432 switch (my_lex (buffer)) {
433 case LX_DOT:
434 mbox = add (buffer, mbox);
435 continue;
436
437 default:
438 return OK;
439 }
440 }
441 }
442
443
444 static int
445 domain (char *buffer)
446 {
447 for (;;) {
448 switch (my_lex (buffer)) {
449 case LX_ATOM:
450 case LX_DLIT:
451 host = add (buffer, host);
452 break;
453
454 default:
455 snprintf(err, sizeof err, "no sub-domain in domain-part of address (%s)", buffer);
456 return NOTOK;
457 }
458
459 switch (my_lex (buffer)) {
460 case LX_DOT:
461 host = add (buffer, host);
462 continue;
463
464 case LX_AT: /* sigh (0) */
465 mbox = add (host, add ("%", mbox));
466 free (host);
467 host = NULL;
468 continue;
469
470 default:
471 return OK;
472 }
473 }
474 }
475
476
477 static int
478 route (char *buffer)
479 {
480 routepath = mh_xstrdup ("@");
481
482 for (;;) {
483 switch (my_lex (buffer)) {
484 case LX_ATOM:
485 case LX_DLIT:
486 routepath = add (buffer, routepath);
487 break;
488
489 default:
490 snprintf(err, sizeof err, "no sub-domain in domain-part of address (%s)", buffer);
491 return NOTOK;
492 }
493 switch (my_lex (buffer)) {
494 case LX_COMA:
495 routepath = add (buffer, routepath);
496 for (;;) {
497 switch (my_lex (buffer)) {
498 case LX_COMA:
499 continue;
500
501 case LX_AT:
502 routepath = add (buffer, routepath);
503 break;
504
505 default:
506 snprintf(err, sizeof err, "no at-sign found for next domain in route (%s)",
507 buffer);
508 }
509 break;
510 }
511 continue;
512
513 case LX_AT: /* XXX */
514 case LX_DOT:
515 routepath = add (buffer, routepath);
516 continue;
517
518 case LX_COLN:
519 routepath = add (buffer, routepath);
520 return OK;
521
522 default:
523 snprintf(err, sizeof err, "no colon found to terminate route (%s)", buffer);
524 return NOTOK;
525 }
526 }
527 }
528
529
530 static int
531 my_lex (char *buffer)
532 {
533 /* buffer should be at least BUFSIZ bytes long */
534 int i, gotat = 0;
535 char c, *bp;
536
537 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
538 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
539
540 bp = buffer;
541 *bp = 0;
542 if (!cp)
543 return last_lex = LX_END;
544
545 gotat = isat (cp);
546 c = *cp++;
547 while (isspace ((unsigned char) c))
548 c = *cp++;
549 if (c == 0) {
550 cp = NULL;
551 return last_lex = LX_END;
552 }
553
554 if (c == '(') {
555 ADDCHR(c);
556 for (i = 0;;)
557 switch (c = *cp++) {
558 case 0:
559 cp = NULL;
560 return last_lex = LX_ERR;
561 case '\\':
562 ADDCHR(c);
563 if ((c = *cp++) == 0) {
564 cp = NULL;
565 return last_lex = LX_ERR;
566 }
567 ADDCHR(c);
568 continue;
569 case '(':
570 i++;
571 /* FALLTHRU */
572 default:
573 ADDCHR(c);
574 continue;
575 case ')':
576 ADDCHR(c);
577 if (--i < 0) {
578 *bp = 0;
579 note = note ? add (buffer, add (" ", note))
580 : strdup (buffer);
581 return my_lex (buffer);
582 }
583 }
584 }
585
586 if (c == '"') {
587 ADDCHR(c);
588 for (;;)
589 switch (c = *cp++) {
590 case 0:
591 cp = NULL;
592 return last_lex = LX_ERR;
593 case '\\':
594 ADDCHR(c);
595 if ((c = *cp++) == 0) {
596 cp = NULL;
597 return last_lex = LX_ERR;
598 }
599 /* FALLTHRU */
600 default:
601 ADDCHR(c);
602 continue;
603 case '"':
604 ADDCHR(c);
605 *bp = 0;
606 return last_lex = LX_QSTR;
607 }
608 }
609
610 if (c == '[') {
611 ADDCHR(c);
612 for (;;)
613 switch (c = *cp++) {
614 case 0:
615 cp = NULL;
616 return last_lex = LX_ERR;
617 case '\\':
618 ADDCHR(c);
619 if ((c = *cp++) == 0) {
620 cp = NULL;
621 return last_lex = LX_ERR;
622 }
623 /* FALLTHRU */
624 default:
625 ADDCHR(c);
626 continue;
627 case ']':
628 ADDCHR(c);
629 *bp = 0;
630 return last_lex = LX_DLIT;
631 }
632 }
633
634 ADDCHR(c);
635 *bp = 0;
636 for (i = 0; special[i].lx_chr != 0; i++)
637 if (c == special[i].lx_chr)
638 return last_lex = special[i].lx_val;
639
640 if (iscntrl ((unsigned char) c))
641 return last_lex = LX_ERR;
642
643 for (;;) {
644 if ((c = *cp++) == 0)
645 break;
646 for (i = 0; special[i].lx_chr != 0; i++)
647 if (c == special[i].lx_chr)
648 goto got_atom;
649 if (iscntrl ((unsigned char) c) || isspace ((unsigned char) c))
650 break;
651 ADDCHR(c);
652 }
653 got_atom: ;
654 if (c == 0)
655 cp = NULL;
656 else
657 cp--;
658 *bp = 0;
659 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
660 ? LX_ATOM : LX_AT;
661 return last_lex;
662
663 my_lex_buffull:
664 /* Out of buffer space. *bp is the last byte in the buffer */
665 *bp = 0;
666 return last_lex = LX_ERR;
667 }
668
669
670 char *
671 legal_person (const char *p)
672 {
673 int i;
674 const char *cp;
675 static char buffer[BUFSIZ];
676
677 if (*p == '"')
678 return (char *) p;
679 for (cp = p; *cp; cp++)
680 for (i = 0; special[i].lx_chr; i++)
681 if (*cp == special[i].lx_chr) {
682 snprintf(buffer, sizeof buffer, "\"%s\"", p);
683 return buffer;
684 }
685
686 return (char *) p;
687 }