]> diplodocus.org Git - nmh/blob - sbr/mf.c
Don't need to cast to `char *' for free(3) these days.
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
8 */
9
10 #include <h/mf.h>
11 #include <h/utils.h>
12
13 /*
14 * static prototypes
15 */
16 static int isat (const char *);
17 static int parse_address (void);
18 static int phrase (char *);
19 static int route_addr (char *);
20 static int local_part (char *);
21 static int domain (char *);
22 static int route (char *);
23 static int my_lex (char *);
24
25
26 int
27 isfrom(const char *string)
28 {
29 return (strncmp (string, "From ", 5) == 0
30 || strncmp (string, ">From ", 6) == 0);
31 }
32
33
34 int
35 lequal (const char *a, const char *b)
36 {
37 char c1, c2;
38
39 for (; *a; a++, b++) {
40 if (*b == 0)
41 return FALSE;
42 c1 = toupper((unsigned char)*a);
43 c2 = toupper((unsigned char)*b);
44 if (c1 != c2)
45 return FALSE;
46 }
47
48 return (*b == 0);
49 }
50
51
52 static int
53 isat (const char *p)
54 {
55 return (strncmp (p, " AT ", 4)
56 && strncmp (p, " At ", 4)
57 && strncmp (p, " aT ", 4)
58 && strncmp (p, " at ", 4) ? FALSE : TRUE);
59 }
60
61
62 /*
63 *
64 * getadrx() implements a partial 822-style address parser. The parser
65 * is neither complete nor correct. It does however recognize nearly all
66 * of the 822 address syntax. In addition it handles the majority of the
67 * 733 syntax as well. Most problems arise from trying to accommodate both.
68 *
69 * In terms of 822, the route-specification in
70 *
71 * "<" [route] local-part "@" domain ">"
72 *
73 * is parsed and returned unchanged. Multiple at-signs are compressed
74 * via source-routing. Recursive groups are not allowed as per the
75 * standard.
76 *
77 * In terms of 733, " at " is recognized as equivalent to "@".
78 *
79 * In terms of both the parser will not complain about missing hosts.
80 *
81 * -----
82 *
83 * We should not allow addresses like
84 *
85 * Marshall T. Rose <MRose@UCI>
86 *
87 * but should insist on
88 *
89 * "Marshall T. Rose" <MRose@UCI>
90 *
91 * Unfortunately, a lot of mailers stupidly let people get away with this.
92 *
93 * -----
94 *
95 * We should not allow addresses like
96 *
97 * <MRose@UCI>
98 *
99 * but should insist on
100 *
101 * MRose@UCI
102 *
103 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
104 * this.
105 *
106 * -----
107 *
108 * We should not allow addresses like
109 *
110 * @UCI:MRose@UCI-750a
111 *
112 * but should insist on
113 *
114 * Marshall Rose <@UCI:MRose@UCI-750a>
115 *
116 * Unfortunately, a lot of mailers stupidly do this.
117 *
118 */
119
120 #define QUOTE '\\'
121
122 #define LX_END 0
123 #define LX_ERR 1
124 #define LX_ATOM 2
125 #define LX_QSTR 3
126 #define LX_DLIT 4
127 #define LX_SEMI 5
128 #define LX_COMA 6
129 #define LX_LBRK 7
130 #define LX_RBRK 8
131 #define LX_COLN 9
132 #define LX_DOT 10
133 #define LX_AT 11
134
135 struct specials {
136 char lx_chr;
137 int lx_val;
138 };
139
140 static struct specials special[] = {
141 { ';', LX_SEMI },
142 { ',', LX_COMA },
143 { '<', LX_LBRK },
144 { '>', LX_RBRK },
145 { ':', LX_COLN },
146 { '.', LX_DOT },
147 { '@', LX_AT },
148 { '(', LX_ERR },
149 { ')', LX_ERR },
150 { QUOTE, LX_ERR },
151 { '"', LX_ERR },
152 { '[', LX_ERR },
153 { ']', LX_ERR },
154 { 0, 0 }
155 };
156
157 static int glevel = 0;
158 static int ingrp = 0;
159 static int last_lex = LX_END;
160
161 static char *dp = NULL;
162 static char *cp = NULL;
163 static char *ap = NULL;
164 static char *pers = NULL;
165 static char *mbox = NULL;
166 static char *host = NULL;
167 static char *path = NULL;
168 static char *grp = NULL;
169 static char *note = NULL;
170 static char err[BUFSIZ];
171 static char adr[BUFSIZ];
172
173 static struct adrx adrxs2;
174
175
176 /* eai = Email Address Internationalization */
177 struct adrx *
178 getadrx (const char *addrs, int eai)
179 {
180 char *bp;
181 struct adrx *adrxp = &adrxs2;
182
183 mh_xfree(pers);
184 mh_xfree(mbox);
185 mh_xfree(host);
186 mh_xfree(path);
187 mh_xfree(grp);
188 mh_xfree(note);
189 pers = mbox = host = path = grp = note = NULL;
190 err[0] = 0;
191
192 if (dp == NULL) {
193 dp = cp = strdup (addrs ? addrs : "");
194 glevel = 0;
195 }
196 else
197 if (cp == NULL) {
198 free (dp);
199 dp = NULL;
200 return NULL;
201 }
202
203 switch (parse_address ()) {
204 case DONE:
205 free (dp);
206 dp = cp = NULL;
207 return NULL;
208
209 case OK:
210 switch (last_lex) {
211 case LX_COMA:
212 case LX_END:
213 break;
214
215 default: /* catch trailing comments */
216 bp = cp;
217 my_lex (adr);
218 cp = bp;
219 break;
220 }
221 break;
222
223 default:
224 break;
225 }
226
227 if (! eai) {
228 /*
229 * Reject the address if key fields contain 8bit characters
230 */
231
232 if (contains8bit(mbox, NULL) || contains8bit(host, NULL) ||
233 contains8bit(path, NULL) || contains8bit(grp, NULL)) {
234 strcpy(err, "Address contains 8-bit characters");
235 }
236 }
237
238 if (err[0])
239 for (;;) {
240 switch (last_lex) {
241 case LX_COMA:
242 case LX_END:
243 break;
244
245 default:
246 my_lex (adr);
247 continue;
248 }
249 break;
250 }
251 while (isspace ((unsigned char) *ap))
252 ap++;
253 if (cp)
254 snprintf(adr, sizeof adr, "%.*s", (int)(cp - ap), ap);
255 else
256 strcpy (adr, ap);
257 bp = adr + strlen (adr) - 1;
258 if (*bp == ',' || *bp == ';' || *bp == '\n')
259 *bp = 0;
260
261 adrxp->text = adr;
262 adrxp->pers = pers;
263 adrxp->mbox = mbox;
264 adrxp->host = host;
265 adrxp->path = path;
266 adrxp->grp = grp;
267 adrxp->ingrp = ingrp;
268 adrxp->note = note;
269 adrxp->err = err[0] ? err : NULL;
270
271 return adrxp;
272 }
273
274
275 static int
276 parse_address (void)
277 {
278 char buffer[BUFSIZ];
279
280 again: ;
281 ap = cp;
282 switch (my_lex (buffer)) {
283 case LX_ATOM:
284 case LX_QSTR:
285 pers = strdup (buffer);
286 break;
287
288 case LX_SEMI:
289 if (glevel-- <= 0) {
290 strcpy (err, "extraneous semi-colon");
291 return NOTOK;
292 }
293 case LX_COMA:
294 mh_xfree(note);
295 note = NULL;
296 goto again;
297
298 case LX_END:
299 return DONE;
300
301 case LX_LBRK: /* sigh (2) */
302 goto get_addr;
303
304 case LX_AT: /* sigh (3) */
305 cp = ap;
306 if (route_addr (buffer) == NOTOK)
307 return NOTOK;
308 return OK; /* why be choosy? */
309
310 default:
311 snprintf(err, sizeof err, "illegal address construct (%s)", buffer);
312 return NOTOK;
313 }
314
315 switch (my_lex (buffer)) {
316 case LX_ATOM:
317 case LX_QSTR:
318 pers = add (buffer, add (" ", pers));
319 more_phrase: ; /* sigh (1) */
320 if (phrase (buffer) == NOTOK)
321 return NOTOK;
322
323 switch (last_lex) {
324 case LX_LBRK:
325 get_addr: ;
326 if (route_addr (buffer) == NOTOK)
327 return NOTOK;
328 if (last_lex == LX_RBRK)
329 return OK;
330 snprintf(err, sizeof err, "missing right-bracket (%s)", buffer);
331 return NOTOK;
332
333 case LX_COLN:
334 get_group: ;
335 if (glevel++ > 0) {
336 snprintf(err, sizeof err, "nested groups not allowed (%s)", pers);
337 return NOTOK;
338 }
339 grp = add (": ", pers);
340 pers = NULL;
341 {
342 char *pp = cp;
343
344 for (;;)
345 switch (my_lex (buffer)) {
346 case LX_SEMI:
347 case LX_END: /* tsk, tsk */
348 glevel--;
349 return OK;
350
351 case LX_COMA:
352 continue;
353
354 default:
355 cp = pp;
356 return parse_address ();
357 }
358 }
359
360 case LX_DOT: /* sigh (1) */
361 pers = add (".", pers);
362 goto more_phrase;
363
364 default:
365 snprintf(err, sizeof err, "no mailbox in address, only a phrase (%s%s)",
366 pers, buffer);
367 return NOTOK;
368 }
369
370 case LX_LBRK:
371 goto get_addr;
372
373 case LX_COLN:
374 goto get_group;
375
376 case LX_DOT:
377 mbox = add (buffer, pers);
378 pers = NULL;
379 if (route_addr (buffer) == NOTOK)
380 return NOTOK;
381 goto check_end;
382
383 case LX_AT:
384 ingrp = glevel;
385 mbox = pers;
386 pers = NULL;
387 if (domain (buffer) == NOTOK)
388 return NOTOK;
389 check_end: ;
390 switch (last_lex) {
391 case LX_SEMI:
392 if (glevel-- <= 0) {
393 strcpy (err, "extraneous semi-colon");
394 return NOTOK;
395 }
396 case LX_COMA:
397 case LX_END:
398 return OK;
399
400 default:
401 snprintf(err, sizeof err, "junk after local@domain (%s)", buffer);
402 return NOTOK;
403 }
404
405 case LX_SEMI: /* no host */
406 case LX_COMA:
407 case LX_END:
408 ingrp = glevel;
409 if (last_lex == LX_SEMI && glevel-- <= 0) {
410 strcpy (err, "extraneous semi-colon");
411 return NOTOK;
412 }
413 mbox = pers;
414 pers = NULL;
415 return OK;
416
417 default:
418 snprintf(err, sizeof err, "missing mailbox (%s)", buffer);
419 return NOTOK;
420 }
421 }
422
423
424 static int
425 phrase (char *buffer)
426 {
427 for (;;)
428 switch (my_lex (buffer)) {
429 case LX_ATOM:
430 case LX_QSTR:
431 pers = add (buffer, add (" ", pers));
432 continue;
433
434 default:
435 return OK;
436 }
437 }
438
439
440 static int
441 route_addr (char *buffer)
442 {
443 char *pp = cp;
444
445 if (my_lex (buffer) == LX_AT) {
446 if (route (buffer) == NOTOK)
447 return NOTOK;
448 }
449 else
450 cp = pp;
451
452 if (local_part (buffer) == NOTOK)
453 return NOTOK;
454
455 switch (last_lex) {
456 case LX_AT:
457 return domain (buffer);
458
459 case LX_SEMI: /* if in group */
460 case LX_RBRK: /* no host */
461 case LX_COMA:
462 case LX_END:
463 return OK;
464
465 default:
466 snprintf(err, sizeof err, "no at-sign after local-part (%s)", buffer);
467 return NOTOK;
468 }
469 }
470
471
472 static int
473 local_part (char *buffer)
474 {
475 ingrp = glevel;
476
477 for (;;) {
478 switch (my_lex (buffer)) {
479 case LX_ATOM:
480 case LX_QSTR:
481 mbox = add (buffer, mbox);
482 break;
483
484 default:
485 snprintf(err, sizeof err, "no mailbox in local-part (%s)", buffer);
486 return NOTOK;
487 }
488
489 switch (my_lex (buffer)) {
490 case LX_DOT:
491 mbox = add (buffer, mbox);
492 continue;
493
494 default:
495 return OK;
496 }
497 }
498 }
499
500
501 static int
502 domain (char *buffer)
503 {
504 for (;;) {
505 switch (my_lex (buffer)) {
506 case LX_ATOM:
507 case LX_DLIT:
508 host = add (buffer, host);
509 break;
510
511 default:
512 snprintf(err, sizeof err, "no sub-domain in domain-part of address (%s)", buffer);
513 return NOTOK;
514 }
515
516 switch (my_lex (buffer)) {
517 case LX_DOT:
518 host = add (buffer, host);
519 continue;
520
521 case LX_AT: /* sigh (0) */
522 mbox = add (host, add ("%", mbox));
523 free (host);
524 host = NULL;
525 continue;
526
527 default:
528 return OK;
529 }
530 }
531 }
532
533
534 static int
535 route (char *buffer)
536 {
537 path = strdup ("@");
538
539 for (;;) {
540 switch (my_lex (buffer)) {
541 case LX_ATOM:
542 case LX_DLIT:
543 path = add (buffer, path);
544 break;
545
546 default:
547 snprintf(err, sizeof err, "no sub-domain in domain-part of address (%s)", buffer);
548 return NOTOK;
549 }
550 switch (my_lex (buffer)) {
551 case LX_COMA:
552 path = add (buffer, path);
553 for (;;) {
554 switch (my_lex (buffer)) {
555 case LX_COMA:
556 continue;
557
558 case LX_AT:
559 path = add (buffer, path);
560 break;
561
562 default:
563 snprintf(err, sizeof err, "no at-sign found for next domain in route (%s)",
564 buffer);
565 }
566 break;
567 }
568 continue;
569
570 case LX_AT: /* XXX */
571 case LX_DOT:
572 path = add (buffer, path);
573 continue;
574
575 case LX_COLN:
576 path = add (buffer, path);
577 return OK;
578
579 default:
580 snprintf(err, sizeof err, "no colon found to terminate route (%s)", buffer);
581 return NOTOK;
582 }
583 }
584 }
585
586
587 static int
588 my_lex (char *buffer)
589 {
590 /* buffer should be at least BUFSIZ bytes long */
591 int i, gotat = 0;
592 char c, *bp;
593
594 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
595 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
596
597 bp = buffer;
598 *bp = 0;
599 if (!cp)
600 return (last_lex = LX_END);
601
602 gotat = isat (cp);
603 c = *cp++;
604 while (isspace ((unsigned char) c))
605 c = *cp++;
606 if (c == 0) {
607 cp = NULL;
608 return (last_lex = LX_END);
609 }
610
611 if (c == '(') {
612 ADDCHR(c);
613 for (i = 0;;)
614 switch (c = *cp++) {
615 case 0:
616 cp = NULL;
617 return (last_lex = LX_ERR);
618 case QUOTE:
619 ADDCHR(c);
620 if ((c = *cp++) == 0) {
621 cp = NULL;
622 return (last_lex = LX_ERR);
623 }
624 ADDCHR(c);
625 continue;
626 case '(':
627 i++;
628 default:
629 ADDCHR(c);
630 continue;
631 case ')':
632 ADDCHR(c);
633 if (--i < 0) {
634 *bp = 0;
635 note = note ? add (buffer, add (" ", note))
636 : strdup (buffer);
637 return my_lex (buffer);
638 }
639 }
640 }
641
642 if (c == '"') {
643 ADDCHR(c);
644 for (;;)
645 switch (c = *cp++) {
646 case 0:
647 cp = NULL;
648 return (last_lex = LX_ERR);
649 case QUOTE:
650 ADDCHR(c);
651 if ((c = *cp++) == 0) {
652 cp = NULL;
653 return (last_lex = LX_ERR);
654 }
655 default:
656 ADDCHR(c);
657 continue;
658 case '"':
659 ADDCHR(c);
660 *bp = 0;
661 return (last_lex = LX_QSTR);
662 }
663 }
664
665 if (c == '[') {
666 ADDCHR(c);
667 for (;;)
668 switch (c = *cp++) {
669 case 0:
670 cp = NULL;
671 return (last_lex = LX_ERR);
672 case QUOTE:
673 ADDCHR(c);
674 if ((c = *cp++) == 0) {
675 cp = NULL;
676 return (last_lex = LX_ERR);
677 }
678 default:
679 ADDCHR(c);
680 continue;
681 case ']':
682 ADDCHR(c);
683 *bp = 0;
684 return (last_lex = LX_DLIT);
685 }
686 }
687
688 ADDCHR(c);
689 *bp = 0;
690 for (i = 0; special[i].lx_chr != 0; i++)
691 if (c == special[i].lx_chr)
692 return (last_lex = special[i].lx_val);
693
694 if (iscntrl ((unsigned char) c))
695 return (last_lex = LX_ERR);
696
697 for (;;) {
698 if ((c = *cp++) == 0)
699 break;
700 for (i = 0; special[i].lx_chr != 0; i++)
701 if (c == special[i].lx_chr)
702 goto got_atom;
703 if (iscntrl ((unsigned char) c) || isspace ((unsigned char) c))
704 break;
705 ADDCHR(c);
706 }
707 got_atom: ;
708 if (c == 0)
709 cp = NULL;
710 else
711 cp--;
712 *bp = 0;
713 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
714 ? LX_ATOM : LX_AT;
715 return last_lex;
716
717 my_lex_buffull:
718 /* Out of buffer space. *bp is the last byte in the buffer */
719 *bp = 0;
720 return (last_lex = LX_ERR);
721 }
722
723
724 char *
725 legal_person (const char *p)
726 {
727 int i;
728 const char *cp;
729 static char buffer[BUFSIZ];
730
731 if (*p == '"')
732 return (char *) p;
733 for (cp = p; *cp; cp++)
734 for (i = 0; special[i].lx_chr; i++)
735 if (*cp == special[i].lx_chr) {
736 snprintf(buffer, sizeof buffer, "\"%s\"", p);
737 return buffer;
738 }
739
740 return (char *) p;
741 }
742
743
744 int
745 mfgets (FILE *in, char **bp)
746 {
747 int i;
748 char *cp, *dp, *ep;
749 static int len = 0;
750 static char *pp = NULL;
751
752 if (pp == NULL)
753 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
754
755 for (ep = (cp = pp) + len - 2;;) {
756 switch (i = getc (in)) {
757 case EOF:
758 eol: ;
759 if (cp != pp) {
760 *cp = 0;
761 *bp = pp;
762 return OK;
763 }
764 eoh: ;
765 *bp = NULL;
766 free (pp);
767 pp = NULL;
768 return DONE;
769
770 case 0:
771 continue;
772
773 case '\n':
774 if (cp == pp) /* end of headers, gobble it */
775 goto eoh;
776 switch (i = getc (in)) {
777 default: /* end of line */
778 case '\n': /* end of headers, save for next call */
779 ungetc (i, in);
780 goto eol;
781
782 case ' ': /* continue headers */
783 case '\t':
784 *cp++ = '\n';
785 break;
786 } /* fall into default case */
787
788 default:
789 *cp++ = i;
790 break;
791 }
792 if (cp >= ep) {
793 dp = mh_xrealloc (pp, (size_t) (len += BUFSIZ));
794 cp += dp - pp, ep = (pp = cp) + len - 2;
795 }
796 }
797 }