]> diplodocus.org Git - nmh/blob - sbr/mf.c
Removed a bunch of unreachable break statements found by
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
8 */
9
10 #include <h/mf.h>
11 #include <h/utils.h>
12
13 /*
14 * static prototypes
15 */
16 static int isat (const char *);
17 static int parse_address (void);
18 static int phrase (char *);
19 static int route_addr (char *);
20 static int local_part (char *);
21 static int domain (char *);
22 static int route (char *);
23 static int my_lex (char *);
24 static int contains8bit (const char *);
25
26
27 int
28 isfrom(const char *string)
29 {
30 return (strncmp (string, "From ", 5) == 0
31 || strncmp (string, ">From ", 6) == 0);
32 }
33
34
35 int
36 lequal (const char *a, const char *b)
37 {
38 for (; *a; a++, b++)
39 if (*b == 0)
40 return FALSE;
41 else {
42 char c1 = islower ((unsigned char) *a) ?
43 toupper ((unsigned char) *a) : *a;
44 char c2 = islower ((unsigned char) *b) ?
45 toupper ((unsigned char) *b) : *b;
46 if (c1 != c2)
47 return FALSE;
48 }
49
50 return (*b == 0);
51 }
52
53
54 static int
55 isat (const char *p)
56 {
57 return (strncmp (p, " AT ", 4)
58 && strncmp (p, " At ", 4)
59 && strncmp (p, " aT ", 4)
60 && strncmp (p, " at ", 4) ? FALSE : TRUE);
61 }
62
63
64 /*
65 *
66 * getadrx() implements a partial 822-style address parser. The parser
67 * is neither complete nor correct. It does however recognize nearly all
68 * of the 822 address syntax. In addition it handles the majority of the
69 * 733 syntax as well. Most problems arise from trying to accomodate both.
70 *
71 * In terms of 822, the route-specification in
72 *
73 * "<" [route] local-part "@" domain ">"
74 *
75 * is parsed and returned unchanged. Multiple at-signs are compressed
76 * via source-routing. Recursive groups are not allowed as per the
77 * standard.
78 *
79 * In terms of 733, " at " is recognized as equivalent to "@".
80 *
81 * In terms of both the parser will not complain about missing hosts.
82 *
83 * -----
84 *
85 * We should not allow addresses like
86 *
87 * Marshall T. Rose <MRose@UCI>
88 *
89 * but should insist on
90 *
91 * "Marshall T. Rose" <MRose@UCI>
92 *
93 * Unfortunately, a lot of mailers stupidly let people get away with this.
94 *
95 * -----
96 *
97 * We should not allow addresses like
98 *
99 * <MRose@UCI>
100 *
101 * but should insist on
102 *
103 * MRose@UCI
104 *
105 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
106 * this.
107 *
108 * -----
109 *
110 * We should not allow addresses like
111 *
112 * @UCI:MRose@UCI-750a
113 *
114 * but should insist on
115 *
116 * Marshall Rose <@UCI:MRose@UCI-750a>
117 *
118 * Unfortunately, a lot of mailers stupidly do this.
119 *
120 */
121
122 #define QUOTE '\\'
123
124 #define LX_END 0
125 #define LX_ERR 1
126 #define LX_ATOM 2
127 #define LX_QSTR 3
128 #define LX_DLIT 4
129 #define LX_SEMI 5
130 #define LX_COMA 6
131 #define LX_LBRK 7
132 #define LX_RBRK 8
133 #define LX_COLN 9
134 #define LX_DOT 10
135 #define LX_AT 11
136
137 struct specials {
138 char lx_chr;
139 int lx_val;
140 };
141
142 static struct specials special[] = {
143 { ';', LX_SEMI },
144 { ',', LX_COMA },
145 { '<', LX_LBRK },
146 { '>', LX_RBRK },
147 { ':', LX_COLN },
148 { '.', LX_DOT },
149 { '@', LX_AT },
150 { '(', LX_ERR },
151 { ')', LX_ERR },
152 { QUOTE, LX_ERR },
153 { '"', LX_ERR },
154 { '[', LX_ERR },
155 { ']', LX_ERR },
156 { 0, 0 }
157 };
158
159 static int glevel = 0;
160 static int ingrp = 0;
161 static int last_lex = LX_END;
162
163 static char *dp = NULL;
164 static char *cp = NULL;
165 static char *ap = NULL;
166 static char *pers = NULL;
167 static char *mbox = NULL;
168 static char *host = NULL;
169 static char *path = NULL;
170 static char *grp = NULL;
171 static char *note = NULL;
172 static char err[BUFSIZ];
173 static char adr[BUFSIZ];
174
175 static struct adrx adrxs2;
176
177
178 struct adrx *
179 getadrx (const char *addrs)
180 {
181 register char *bp;
182 register struct adrx *adrxp = &adrxs2;
183
184 if (pers)
185 free (pers);
186 if (mbox)
187 free (mbox);
188 if (host)
189 free (host);
190 if (path)
191 free (path);
192 if (grp)
193 free (grp);
194 if (note)
195 free (note);
196 pers = mbox = host = path = grp = note = NULL;
197 err[0] = 0;
198
199 if (dp == NULL) {
200 dp = cp = strdup (addrs ? addrs : "");
201 glevel = 0;
202 }
203 else
204 if (cp == NULL) {
205 free (dp);
206 dp = NULL;
207 return NULL;
208 }
209
210 switch (parse_address ()) {
211 case DONE:
212 free (dp);
213 dp = cp = NULL;
214 return NULL;
215
216 case OK:
217 switch (last_lex) {
218 case LX_COMA:
219 case LX_END:
220 break;
221
222 default: /* catch trailing comments */
223 bp = cp;
224 my_lex (adr);
225 cp = bp;
226 break;
227 }
228 break;
229
230 default:
231 break;
232 }
233
234 /*
235 * Reject the address if key fields contain 8bit characters
236 */
237
238 if (contains8bit(mbox) || contains8bit(host) || contains8bit(path) ||
239 contains8bit(grp)) {
240 strcpy(err, "Address contains 8-bit characters");
241 }
242
243 if (err[0])
244 for (;;) {
245 switch (last_lex) {
246 case LX_COMA:
247 case LX_END:
248 break;
249
250 default:
251 my_lex (adr);
252 continue;
253 }
254 break;
255 }
256 while (isspace ((unsigned char) *ap))
257 ap++;
258 if (cp)
259 sprintf (adr, "%.*s", (int)(cp - ap), ap);
260 else
261 strcpy (adr, ap);
262 bp = adr + strlen (adr) - 1;
263 if (*bp == ',' || *bp == ';' || *bp == '\n')
264 *bp = 0;
265
266 adrxp->text = adr;
267 adrxp->pers = pers;
268 adrxp->mbox = mbox;
269 adrxp->host = host;
270 adrxp->path = path;
271 adrxp->grp = grp;
272 adrxp->ingrp = ingrp;
273 adrxp->note = note;
274 adrxp->err = err[0] ? err : NULL;
275
276 return adrxp;
277 }
278
279
280 static int
281 parse_address (void)
282 {
283 char buffer[BUFSIZ];
284
285 again: ;
286 ap = cp;
287 switch (my_lex (buffer)) {
288 case LX_ATOM:
289 case LX_QSTR:
290 pers = strdup (buffer);
291 break;
292
293 case LX_SEMI:
294 if (glevel-- <= 0) {
295 strcpy (err, "extraneous semi-colon");
296 return NOTOK;
297 }
298 case LX_COMA:
299 if (note) {
300 free (note);
301 note = NULL;
302 }
303 goto again;
304
305 case LX_END:
306 return DONE;
307
308 case LX_LBRK: /* sigh (2) */
309 goto get_addr;
310
311 case LX_AT: /* sigh (3) */
312 cp = ap;
313 if (route_addr (buffer) == NOTOK)
314 return NOTOK;
315 return OK; /* why be choosy? */
316
317 default:
318 sprintf (err, "illegal address construct (%s)", buffer);
319 return NOTOK;
320 }
321
322 switch (my_lex (buffer)) {
323 case LX_ATOM:
324 case LX_QSTR:
325 pers = add (buffer, add (" ", pers));
326 more_phrase: ; /* sigh (1) */
327 if (phrase (buffer) == NOTOK)
328 return NOTOK;
329
330 switch (last_lex) {
331 case LX_LBRK:
332 get_addr: ;
333 if (route_addr (buffer) == NOTOK)
334 return NOTOK;
335 if (last_lex == LX_RBRK)
336 return OK;
337 sprintf (err, "missing right-bracket (%s)", buffer);
338 return NOTOK;
339
340 case LX_COLN:
341 get_group: ;
342 if (glevel++ > 0) {
343 sprintf (err, "nested groups not allowed (%s)", pers);
344 return NOTOK;
345 }
346 grp = add (": ", pers);
347 pers = NULL;
348 {
349 char *pp = cp;
350
351 for (;;)
352 switch (my_lex (buffer)) {
353 case LX_SEMI:
354 case LX_END: /* tsk, tsk */
355 glevel--;
356 return OK;
357
358 case LX_COMA:
359 continue;
360
361 default:
362 cp = pp;
363 return parse_address ();
364 }
365 }
366
367 case LX_DOT: /* sigh (1) */
368 pers = add (".", pers);
369 goto more_phrase;
370
371 default:
372 sprintf (err, "no mailbox in address, only a phrase (%s%s)",
373 pers, buffer);
374 return NOTOK;
375 }
376
377 case LX_LBRK:
378 goto get_addr;
379
380 case LX_COLN:
381 goto get_group;
382
383 case LX_DOT:
384 mbox = add (buffer, pers);
385 pers = NULL;
386 if (route_addr (buffer) == NOTOK)
387 return NOTOK;
388 goto check_end;
389
390 case LX_AT:
391 ingrp = glevel;
392 mbox = pers;
393 pers = NULL;
394 if (domain (buffer) == NOTOK)
395 return NOTOK;
396 check_end: ;
397 switch (last_lex) {
398 case LX_SEMI:
399 if (glevel-- <= 0) {
400 strcpy (err, "extraneous semi-colon");
401 return NOTOK;
402 }
403 case LX_COMA:
404 case LX_END:
405 return OK;
406
407 default:
408 sprintf (err, "junk after local@domain (%s)", buffer);
409 return NOTOK;
410 }
411
412 case LX_SEMI: /* no host */
413 case LX_COMA:
414 case LX_END:
415 ingrp = glevel;
416 if (last_lex == LX_SEMI && glevel-- <= 0) {
417 strcpy (err, "extraneous semi-colon");
418 return NOTOK;
419 }
420 mbox = pers;
421 pers = NULL;
422 return OK;
423
424 default:
425 sprintf (err, "missing mailbox (%s)", buffer);
426 return NOTOK;
427 }
428 }
429
430
431 static int
432 phrase (char *buffer)
433 {
434 for (;;)
435 switch (my_lex (buffer)) {
436 case LX_ATOM:
437 case LX_QSTR:
438 pers = add (buffer, add (" ", pers));
439 continue;
440
441 default:
442 return OK;
443 }
444 }
445
446
447 static int
448 route_addr (char *buffer)
449 {
450 register char *pp = cp;
451
452 if (my_lex (buffer) == LX_AT) {
453 if (route (buffer) == NOTOK)
454 return NOTOK;
455 }
456 else
457 cp = pp;
458
459 if (local_part (buffer) == NOTOK)
460 return NOTOK;
461
462 switch (last_lex) {
463 case LX_AT:
464 return domain (buffer);
465
466 case LX_SEMI: /* if in group */
467 case LX_RBRK: /* no host */
468 case LX_COMA:
469 case LX_END:
470 return OK;
471
472 default:
473 sprintf (err, "no at-sign after local-part (%s)", buffer);
474 return NOTOK;
475 }
476 }
477
478
479 static int
480 local_part (char *buffer)
481 {
482 ingrp = glevel;
483
484 for (;;) {
485 switch (my_lex (buffer)) {
486 case LX_ATOM:
487 case LX_QSTR:
488 mbox = add (buffer, mbox);
489 break;
490
491 default:
492 sprintf (err, "no mailbox in local-part (%s)", buffer);
493 return NOTOK;
494 }
495
496 switch (my_lex (buffer)) {
497 case LX_DOT:
498 mbox = add (buffer, mbox);
499 continue;
500
501 default:
502 return OK;
503 }
504 }
505 }
506
507
508 static int
509 domain (char *buffer)
510 {
511 for (;;) {
512 switch (my_lex (buffer)) {
513 case LX_ATOM:
514 case LX_DLIT:
515 host = add (buffer, host);
516 break;
517
518 default:
519 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
520 return NOTOK;
521 }
522
523 switch (my_lex (buffer)) {
524 case LX_DOT:
525 host = add (buffer, host);
526 continue;
527
528 case LX_AT: /* sigh (0) */
529 mbox = add (host, add ("%", mbox));
530 free (host);
531 host = NULL;
532 continue;
533
534 default:
535 return OK;
536 }
537 }
538 }
539
540
541 static int
542 route (char *buffer)
543 {
544 path = strdup ("@");
545
546 for (;;) {
547 switch (my_lex (buffer)) {
548 case LX_ATOM:
549 case LX_DLIT:
550 path = add (buffer, path);
551 break;
552
553 default:
554 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
555 return NOTOK;
556 }
557 switch (my_lex (buffer)) {
558 case LX_COMA:
559 path = add (buffer, path);
560 for (;;) {
561 switch (my_lex (buffer)) {
562 case LX_COMA:
563 continue;
564
565 case LX_AT:
566 path = add (buffer, path);
567 break;
568
569 default:
570 sprintf (err, "no at-sign found for next domain in route (%s)",
571 buffer);
572 }
573 break;
574 }
575 continue;
576
577 case LX_AT: /* XXX */
578 case LX_DOT:
579 path = add (buffer, path);
580 continue;
581
582 case LX_COLN:
583 path = add (buffer, path);
584 return OK;
585
586 default:
587 sprintf (err, "no colon found to terminate route (%s)", buffer);
588 return NOTOK;
589 }
590 }
591 }
592
593
594 static int
595 my_lex (char *buffer)
596 {
597 /* buffer should be at least BUFSIZ bytes long */
598 int i, gotat = 0;
599 char c, *bp;
600
601 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
602 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
603
604 bp = buffer;
605 *bp = 0;
606 if (!cp)
607 return (last_lex = LX_END);
608
609 gotat = isat (cp);
610 c = *cp++;
611 while (isspace ((unsigned char) c))
612 c = *cp++;
613 if (c == 0) {
614 cp = NULL;
615 return (last_lex = LX_END);
616 }
617
618 if (c == '(') {
619 ADDCHR(c);
620 for (i = 0;;)
621 switch (c = *cp++) {
622 case 0:
623 cp = NULL;
624 return (last_lex = LX_ERR);
625 case QUOTE:
626 ADDCHR(c);
627 if ((c = *cp++) == 0) {
628 cp = NULL;
629 return (last_lex = LX_ERR);
630 }
631 ADDCHR(c);
632 continue;
633 case '(':
634 i++;
635 default:
636 ADDCHR(c);
637 continue;
638 case ')':
639 ADDCHR(c);
640 if (--i < 0) {
641 *bp = 0;
642 note = note ? add (buffer, add (" ", note))
643 : strdup (buffer);
644 return my_lex (buffer);
645 }
646 }
647 }
648
649 if (c == '"') {
650 ADDCHR(c);
651 for (;;)
652 switch (c = *cp++) {
653 case 0:
654 cp = NULL;
655 return (last_lex = LX_ERR);
656 case QUOTE:
657 ADDCHR(c);
658 if ((c = *cp++) == 0) {
659 cp = NULL;
660 return (last_lex = LX_ERR);
661 }
662 default:
663 ADDCHR(c);
664 continue;
665 case '"':
666 ADDCHR(c);
667 *bp = 0;
668 return (last_lex = LX_QSTR);
669 }
670 }
671
672 if (c == '[') {
673 ADDCHR(c);
674 for (;;)
675 switch (c = *cp++) {
676 case 0:
677 cp = NULL;
678 return (last_lex = LX_ERR);
679 case QUOTE:
680 ADDCHR(c);
681 if ((c = *cp++) == 0) {
682 cp = NULL;
683 return (last_lex = LX_ERR);
684 }
685 default:
686 ADDCHR(c);
687 continue;
688 case ']':
689 ADDCHR(c);
690 *bp = 0;
691 return (last_lex = LX_DLIT);
692 }
693 }
694
695 ADDCHR(c);
696 *bp = 0;
697 for (i = 0; special[i].lx_chr != 0; i++)
698 if (c == special[i].lx_chr)
699 return (last_lex = special[i].lx_val);
700
701 if (iscntrl ((unsigned char) c))
702 return (last_lex = LX_ERR);
703
704 for (;;) {
705 if ((c = *cp++) == 0)
706 break;
707 for (i = 0; special[i].lx_chr != 0; i++)
708 if (c == special[i].lx_chr)
709 goto got_atom;
710 if (iscntrl ((unsigned char) c) || isspace ((unsigned char) c))
711 break;
712 ADDCHR(c);
713 }
714 got_atom: ;
715 if (c == 0)
716 cp = NULL;
717 else
718 cp--;
719 *bp = 0;
720 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
721 ? LX_ATOM : LX_AT;
722 return last_lex;
723
724 my_lex_buffull:
725 /* Out of buffer space. *bp is the last byte in the buffer */
726 *bp = 0;
727 return (last_lex = LX_ERR);
728 }
729
730
731 /*
732 * Return true if the string contains an 8-bit character
733 */
734
735 static int
736 contains8bit(const char *p)
737 {
738 if (! p)
739 return 0;
740
741 for (; *p; p++) {
742 if (! isascii((unsigned char) *p))
743 return 1;
744 }
745
746 return 0;
747 }
748
749
750 char *
751 legal_person (const char *p)
752 {
753 int i;
754 register const char *cp;
755 static char buffer[BUFSIZ];
756
757 if (*p == '"')
758 return (char *) p;
759 for (cp = p; *cp; cp++)
760 for (i = 0; special[i].lx_chr; i++)
761 if (*cp == special[i].lx_chr) {
762 sprintf (buffer, "\"%s\"", p);
763 return buffer;
764 }
765
766 return (char *) p;
767 }
768
769
770 int
771 mfgets (FILE *in, char **bp)
772 {
773 int i;
774 register char *cp, *dp, *ep;
775 static int len = 0;
776 static char *pp = NULL;
777
778 if (pp == NULL)
779 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
780
781 for (ep = (cp = pp) + len - 2;;) {
782 switch (i = getc (in)) {
783 case EOF:
784 eol: ;
785 if (cp != pp) {
786 *cp = 0;
787 *bp = pp;
788 return OK;
789 }
790 eoh: ;
791 *bp = NULL;
792 free (pp);
793 pp = NULL;
794 return DONE;
795
796 case 0:
797 continue;
798
799 case '\n':
800 if (cp == pp) /* end of headers, gobble it */
801 goto eoh;
802 switch (i = getc (in)) {
803 default: /* end of line */
804 case '\n': /* end of headers, save for next call */
805 ungetc (i, in);
806 goto eol;
807
808 case ' ': /* continue headers */
809 case '\t':
810 *cp++ = '\n';
811 break;
812 } /* fall into default case */
813
814 default:
815 *cp++ = i;
816 break;
817 }
818 if (cp >= ep) {
819 dp = mh_xrealloc (pp, (size_t) (len += BUFSIZ));
820 cp += dp - pp, ep = (pp = cp) + len - 2;
821 }
822 }
823 }