]> diplodocus.org Git - nmh/blob - sbr/mf.c
Another pass at cleaning up (some of) the manpages.
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
8 */
9
10 #include <h/mf.h>
11 #include <h/utils.h>
12
13 /*
14 * static prototypes
15 */
16 static int isat (const char *);
17 static int parse_address (void);
18 static int phrase (char *);
19 static int route_addr (char *);
20 static int local_part (char *);
21 static int domain (char *);
22 static int route (char *);
23 static int my_lex (char *);
24
25
26 int
27 isfrom(const char *string)
28 {
29 return (strncmp (string, "From ", 5) == 0
30 || strncmp (string, ">From ", 6) == 0);
31 }
32
33
34 int
35 lequal (const char *a, const char *b)
36 {
37 for (; *a; a++, b++)
38 if (*b == 0)
39 return FALSE;
40 else {
41 char c1 = islower ((unsigned char) *a) ?
42 toupper ((unsigned char) *a) : *a;
43 char c2 = islower ((unsigned char) *b) ?
44 toupper ((unsigned char) *b) : *b;
45 if (c1 != c2)
46 return FALSE;
47 }
48
49 return (*b == 0);
50 }
51
52
53 static int
54 isat (const char *p)
55 {
56 return (strncmp (p, " AT ", 4)
57 && strncmp (p, " At ", 4)
58 && strncmp (p, " aT ", 4)
59 && strncmp (p, " at ", 4) ? FALSE : TRUE);
60 }
61
62
63 /*
64 *
65 * getadrx() implements a partial 822-style address parser. The parser
66 * is neither complete nor correct. It does however recognize nearly all
67 * of the 822 address syntax. In addition it handles the majority of the
68 * 733 syntax as well. Most problems arise from trying to accomodate both.
69 *
70 * In terms of 822, the route-specification in
71 *
72 * "<" [route] local-part "@" domain ">"
73 *
74 * is parsed and returned unchanged. Multiple at-signs are compressed
75 * via source-routing. Recursive groups are not allowed as per the
76 * standard.
77 *
78 * In terms of 733, " at " is recognized as equivalent to "@".
79 *
80 * In terms of both the parser will not complain about missing hosts.
81 *
82 * -----
83 *
84 * We should not allow addresses like
85 *
86 * Marshall T. Rose <MRose@UCI>
87 *
88 * but should insist on
89 *
90 * "Marshall T. Rose" <MRose@UCI>
91 *
92 * Unfortunately, a lot of mailers stupidly let people get away with this.
93 *
94 * -----
95 *
96 * We should not allow addresses like
97 *
98 * <MRose@UCI>
99 *
100 * but should insist on
101 *
102 * MRose@UCI
103 *
104 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
105 * this.
106 *
107 * -----
108 *
109 * We should not allow addresses like
110 *
111 * @UCI:MRose@UCI-750a
112 *
113 * but should insist on
114 *
115 * Marshall Rose <@UCI:MRose@UCI-750a>
116 *
117 * Unfortunately, a lot of mailers stupidly do this.
118 *
119 */
120
121 #define QUOTE '\\'
122
123 #define LX_END 0
124 #define LX_ERR 1
125 #define LX_ATOM 2
126 #define LX_QSTR 3
127 #define LX_DLIT 4
128 #define LX_SEMI 5
129 #define LX_COMA 6
130 #define LX_LBRK 7
131 #define LX_RBRK 8
132 #define LX_COLN 9
133 #define LX_DOT 10
134 #define LX_AT 11
135
136 struct specials {
137 char lx_chr;
138 int lx_val;
139 };
140
141 static struct specials special[] = {
142 { ';', LX_SEMI },
143 { ',', LX_COMA },
144 { '<', LX_LBRK },
145 { '>', LX_RBRK },
146 { ':', LX_COLN },
147 { '.', LX_DOT },
148 { '@', LX_AT },
149 { '(', LX_ERR },
150 { ')', LX_ERR },
151 { QUOTE, LX_ERR },
152 { '"', LX_ERR },
153 { '[', LX_ERR },
154 { ']', LX_ERR },
155 { 0, 0 }
156 };
157
158 static int glevel = 0;
159 static int ingrp = 0;
160 static int last_lex = LX_END;
161
162 static char *dp = NULL;
163 static char *cp = NULL;
164 static char *ap = NULL;
165 static char *pers = NULL;
166 static char *mbox = NULL;
167 static char *host = NULL;
168 static char *path = NULL;
169 static char *grp = NULL;
170 static char *note = NULL;
171 static char err[BUFSIZ];
172 static char adr[BUFSIZ];
173
174 static struct adrx adrxs2;
175
176
177 struct adrx *
178 getadrx (const char *addrs)
179 {
180 register char *bp;
181 register struct adrx *adrxp = &adrxs2;
182
183 if (pers)
184 free (pers);
185 if (mbox)
186 free (mbox);
187 if (host)
188 free (host);
189 if (path)
190 free (path);
191 if (grp)
192 free (grp);
193 if (note)
194 free (note);
195 pers = mbox = host = path = grp = note = NULL;
196 err[0] = 0;
197
198 if (dp == NULL) {
199 dp = cp = strdup (addrs ? addrs : "");
200 glevel = 0;
201 }
202 else
203 if (cp == NULL) {
204 free (dp);
205 dp = NULL;
206 return NULL;
207 }
208
209 switch (parse_address ()) {
210 case DONE:
211 free (dp);
212 dp = cp = NULL;
213 return NULL;
214
215 case OK:
216 switch (last_lex) {
217 case LX_COMA:
218 case LX_END:
219 break;
220
221 default: /* catch trailing comments */
222 bp = cp;
223 my_lex (adr);
224 cp = bp;
225 break;
226 }
227 break;
228
229 default:
230 break;
231 }
232
233 /*
234 * Reject the address if key fields contain 8bit characters
235 */
236
237 if (contains8bit(mbox, NULL) || contains8bit(host, NULL) ||
238 contains8bit(path, NULL) || contains8bit(grp, NULL)) {
239 strcpy(err, "Address contains 8-bit characters");
240 }
241
242 if (err[0])
243 for (;;) {
244 switch (last_lex) {
245 case LX_COMA:
246 case LX_END:
247 break;
248
249 default:
250 my_lex (adr);
251 continue;
252 }
253 break;
254 }
255 while (isspace ((unsigned char) *ap))
256 ap++;
257 if (cp)
258 sprintf (adr, "%.*s", (int)(cp - ap), ap);
259 else
260 strcpy (adr, ap);
261 bp = adr + strlen (adr) - 1;
262 if (*bp == ',' || *bp == ';' || *bp == '\n')
263 *bp = 0;
264
265 adrxp->text = adr;
266 adrxp->pers = pers;
267 adrxp->mbox = mbox;
268 adrxp->host = host;
269 adrxp->path = path;
270 adrxp->grp = grp;
271 adrxp->ingrp = ingrp;
272 adrxp->note = note;
273 adrxp->err = err[0] ? err : NULL;
274
275 return adrxp;
276 }
277
278
279 static int
280 parse_address (void)
281 {
282 char buffer[BUFSIZ];
283
284 again: ;
285 ap = cp;
286 switch (my_lex (buffer)) {
287 case LX_ATOM:
288 case LX_QSTR:
289 pers = strdup (buffer);
290 break;
291
292 case LX_SEMI:
293 if (glevel-- <= 0) {
294 strcpy (err, "extraneous semi-colon");
295 return NOTOK;
296 }
297 case LX_COMA:
298 if (note) {
299 free (note);
300 note = NULL;
301 }
302 goto again;
303
304 case LX_END:
305 return DONE;
306
307 case LX_LBRK: /* sigh (2) */
308 goto get_addr;
309
310 case LX_AT: /* sigh (3) */
311 cp = ap;
312 if (route_addr (buffer) == NOTOK)
313 return NOTOK;
314 return OK; /* why be choosy? */
315
316 default:
317 sprintf (err, "illegal address construct (%s)", buffer);
318 return NOTOK;
319 }
320
321 switch (my_lex (buffer)) {
322 case LX_ATOM:
323 case LX_QSTR:
324 pers = add (buffer, add (" ", pers));
325 more_phrase: ; /* sigh (1) */
326 if (phrase (buffer) == NOTOK)
327 return NOTOK;
328
329 switch (last_lex) {
330 case LX_LBRK:
331 get_addr: ;
332 if (route_addr (buffer) == NOTOK)
333 return NOTOK;
334 if (last_lex == LX_RBRK)
335 return OK;
336 sprintf (err, "missing right-bracket (%s)", buffer);
337 return NOTOK;
338
339 case LX_COLN:
340 get_group: ;
341 if (glevel++ > 0) {
342 sprintf (err, "nested groups not allowed (%s)", pers);
343 return NOTOK;
344 }
345 grp = add (": ", pers);
346 pers = NULL;
347 {
348 char *pp = cp;
349
350 for (;;)
351 switch (my_lex (buffer)) {
352 case LX_SEMI:
353 case LX_END: /* tsk, tsk */
354 glevel--;
355 return OK;
356
357 case LX_COMA:
358 continue;
359
360 default:
361 cp = pp;
362 return parse_address ();
363 }
364 }
365
366 case LX_DOT: /* sigh (1) */
367 pers = add (".", pers);
368 goto more_phrase;
369
370 default:
371 sprintf (err, "no mailbox in address, only a phrase (%s%s)",
372 pers, buffer);
373 return NOTOK;
374 }
375
376 case LX_LBRK:
377 goto get_addr;
378
379 case LX_COLN:
380 goto get_group;
381
382 case LX_DOT:
383 mbox = add (buffer, pers);
384 pers = NULL;
385 if (route_addr (buffer) == NOTOK)
386 return NOTOK;
387 goto check_end;
388
389 case LX_AT:
390 ingrp = glevel;
391 mbox = pers;
392 pers = NULL;
393 if (domain (buffer) == NOTOK)
394 return NOTOK;
395 check_end: ;
396 switch (last_lex) {
397 case LX_SEMI:
398 if (glevel-- <= 0) {
399 strcpy (err, "extraneous semi-colon");
400 return NOTOK;
401 }
402 case LX_COMA:
403 case LX_END:
404 return OK;
405
406 default:
407 sprintf (err, "junk after local@domain (%s)", buffer);
408 return NOTOK;
409 }
410
411 case LX_SEMI: /* no host */
412 case LX_COMA:
413 case LX_END:
414 ingrp = glevel;
415 if (last_lex == LX_SEMI && glevel-- <= 0) {
416 strcpy (err, "extraneous semi-colon");
417 return NOTOK;
418 }
419 mbox = pers;
420 pers = NULL;
421 return OK;
422
423 default:
424 sprintf (err, "missing mailbox (%s)", buffer);
425 return NOTOK;
426 }
427 }
428
429
430 static int
431 phrase (char *buffer)
432 {
433 for (;;)
434 switch (my_lex (buffer)) {
435 case LX_ATOM:
436 case LX_QSTR:
437 pers = add (buffer, add (" ", pers));
438 continue;
439
440 default:
441 return OK;
442 }
443 }
444
445
446 static int
447 route_addr (char *buffer)
448 {
449 register char *pp = cp;
450
451 if (my_lex (buffer) == LX_AT) {
452 if (route (buffer) == NOTOK)
453 return NOTOK;
454 }
455 else
456 cp = pp;
457
458 if (local_part (buffer) == NOTOK)
459 return NOTOK;
460
461 switch (last_lex) {
462 case LX_AT:
463 return domain (buffer);
464
465 case LX_SEMI: /* if in group */
466 case LX_RBRK: /* no host */
467 case LX_COMA:
468 case LX_END:
469 return OK;
470
471 default:
472 sprintf (err, "no at-sign after local-part (%s)", buffer);
473 return NOTOK;
474 }
475 }
476
477
478 static int
479 local_part (char *buffer)
480 {
481 ingrp = glevel;
482
483 for (;;) {
484 switch (my_lex (buffer)) {
485 case LX_ATOM:
486 case LX_QSTR:
487 mbox = add (buffer, mbox);
488 break;
489
490 default:
491 sprintf (err, "no mailbox in local-part (%s)", buffer);
492 return NOTOK;
493 }
494
495 switch (my_lex (buffer)) {
496 case LX_DOT:
497 mbox = add (buffer, mbox);
498 continue;
499
500 default:
501 return OK;
502 }
503 }
504 }
505
506
507 static int
508 domain (char *buffer)
509 {
510 for (;;) {
511 switch (my_lex (buffer)) {
512 case LX_ATOM:
513 case LX_DLIT:
514 host = add (buffer, host);
515 break;
516
517 default:
518 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
519 return NOTOK;
520 }
521
522 switch (my_lex (buffer)) {
523 case LX_DOT:
524 host = add (buffer, host);
525 continue;
526
527 case LX_AT: /* sigh (0) */
528 mbox = add (host, add ("%", mbox));
529 free (host);
530 host = NULL;
531 continue;
532
533 default:
534 return OK;
535 }
536 }
537 }
538
539
540 static int
541 route (char *buffer)
542 {
543 path = strdup ("@");
544
545 for (;;) {
546 switch (my_lex (buffer)) {
547 case LX_ATOM:
548 case LX_DLIT:
549 path = add (buffer, path);
550 break;
551
552 default:
553 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
554 return NOTOK;
555 }
556 switch (my_lex (buffer)) {
557 case LX_COMA:
558 path = add (buffer, path);
559 for (;;) {
560 switch (my_lex (buffer)) {
561 case LX_COMA:
562 continue;
563
564 case LX_AT:
565 path = add (buffer, path);
566 break;
567
568 default:
569 sprintf (err, "no at-sign found for next domain in route (%s)",
570 buffer);
571 }
572 break;
573 }
574 continue;
575
576 case LX_AT: /* XXX */
577 case LX_DOT:
578 path = add (buffer, path);
579 continue;
580
581 case LX_COLN:
582 path = add (buffer, path);
583 return OK;
584
585 default:
586 sprintf (err, "no colon found to terminate route (%s)", buffer);
587 return NOTOK;
588 }
589 }
590 }
591
592
593 static int
594 my_lex (char *buffer)
595 {
596 /* buffer should be at least BUFSIZ bytes long */
597 int i, gotat = 0;
598 char c, *bp;
599
600 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
601 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
602
603 bp = buffer;
604 *bp = 0;
605 if (!cp)
606 return (last_lex = LX_END);
607
608 gotat = isat (cp);
609 c = *cp++;
610 while (isspace ((unsigned char) c))
611 c = *cp++;
612 if (c == 0) {
613 cp = NULL;
614 return (last_lex = LX_END);
615 }
616
617 if (c == '(') {
618 ADDCHR(c);
619 for (i = 0;;)
620 switch (c = *cp++) {
621 case 0:
622 cp = NULL;
623 return (last_lex = LX_ERR);
624 case QUOTE:
625 ADDCHR(c);
626 if ((c = *cp++) == 0) {
627 cp = NULL;
628 return (last_lex = LX_ERR);
629 }
630 ADDCHR(c);
631 continue;
632 case '(':
633 i++;
634 default:
635 ADDCHR(c);
636 continue;
637 case ')':
638 ADDCHR(c);
639 if (--i < 0) {
640 *bp = 0;
641 note = note ? add (buffer, add (" ", note))
642 : strdup (buffer);
643 return my_lex (buffer);
644 }
645 }
646 }
647
648 if (c == '"') {
649 ADDCHR(c);
650 for (;;)
651 switch (c = *cp++) {
652 case 0:
653 cp = NULL;
654 return (last_lex = LX_ERR);
655 case QUOTE:
656 ADDCHR(c);
657 if ((c = *cp++) == 0) {
658 cp = NULL;
659 return (last_lex = LX_ERR);
660 }
661 default:
662 ADDCHR(c);
663 continue;
664 case '"':
665 ADDCHR(c);
666 *bp = 0;
667 return (last_lex = LX_QSTR);
668 }
669 }
670
671 if (c == '[') {
672 ADDCHR(c);
673 for (;;)
674 switch (c = *cp++) {
675 case 0:
676 cp = NULL;
677 return (last_lex = LX_ERR);
678 case QUOTE:
679 ADDCHR(c);
680 if ((c = *cp++) == 0) {
681 cp = NULL;
682 return (last_lex = LX_ERR);
683 }
684 default:
685 ADDCHR(c);
686 continue;
687 case ']':
688 ADDCHR(c);
689 *bp = 0;
690 return (last_lex = LX_DLIT);
691 }
692 }
693
694 ADDCHR(c);
695 *bp = 0;
696 for (i = 0; special[i].lx_chr != 0; i++)
697 if (c == special[i].lx_chr)
698 return (last_lex = special[i].lx_val);
699
700 if (iscntrl ((unsigned char) c))
701 return (last_lex = LX_ERR);
702
703 for (;;) {
704 if ((c = *cp++) == 0)
705 break;
706 for (i = 0; special[i].lx_chr != 0; i++)
707 if (c == special[i].lx_chr)
708 goto got_atom;
709 if (iscntrl ((unsigned char) c) || isspace ((unsigned char) c))
710 break;
711 ADDCHR(c);
712 }
713 got_atom: ;
714 if (c == 0)
715 cp = NULL;
716 else
717 cp--;
718 *bp = 0;
719 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
720 ? LX_ATOM : LX_AT;
721 return last_lex;
722
723 my_lex_buffull:
724 /* Out of buffer space. *bp is the last byte in the buffer */
725 *bp = 0;
726 return (last_lex = LX_ERR);
727 }
728
729
730 char *
731 legal_person (const char *p)
732 {
733 int i;
734 register const char *cp;
735 static char buffer[BUFSIZ];
736
737 if (*p == '"')
738 return (char *) p;
739 for (cp = p; *cp; cp++)
740 for (i = 0; special[i].lx_chr; i++)
741 if (*cp == special[i].lx_chr) {
742 sprintf (buffer, "\"%s\"", p);
743 return buffer;
744 }
745
746 return (char *) p;
747 }
748
749
750 int
751 mfgets (FILE *in, char **bp)
752 {
753 int i;
754 register char *cp, *dp, *ep;
755 static int len = 0;
756 static char *pp = NULL;
757
758 if (pp == NULL)
759 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
760
761 for (ep = (cp = pp) + len - 2;;) {
762 switch (i = getc (in)) {
763 case EOF:
764 eol: ;
765 if (cp != pp) {
766 *cp = 0;
767 *bp = pp;
768 return OK;
769 }
770 eoh: ;
771 *bp = NULL;
772 free (pp);
773 pp = NULL;
774 return DONE;
775
776 case 0:
777 continue;
778
779 case '\n':
780 if (cp == pp) /* end of headers, gobble it */
781 goto eoh;
782 switch (i = getc (in)) {
783 default: /* end of line */
784 case '\n': /* end of headers, save for next call */
785 ungetc (i, in);
786 goto eol;
787
788 case ' ': /* continue headers */
789 case '\t':
790 *cp++ = '\n';
791 break;
792 } /* fall into default case */
793
794 default:
795 *cp++ = i;
796 break;
797 }
798 if (cp >= ep) {
799 dp = mh_xrealloc (pp, (size_t) (len += BUFSIZ));
800 cp += dp - pp, ep = (pp = cp) + len - 2;
801 }
802 }
803 }