]> diplodocus.org Git - nmh/blob - sbr/mf.c
mh-sequence.man: document new '=+' and '=-' for selecting relative msgs
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
8 */
9
10 #include <h/mf.h>
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <h/utils.h>
14
15 /*
16 * static prototypes
17 */
18 static char *getcpy (char *);
19 static int isat (char *);
20 static int parse_address (void);
21 static int phrase (char *);
22 static int route_addr (char *);
23 static int local_part (char *);
24 static int domain (char *);
25 static int route (char *);
26 static int my_lex (char *);
27
28
29 static char *
30 getcpy (char *s)
31 {
32 register char *p;
33
34 if (!s) {
35 /* causes compiles to blow up because the symbol _cleanup is undefined
36 where did this ever come from? */
37 /* _cleanup(); */
38 abort();
39 for(;;)
40 pause();
41 }
42 p = mh_xmalloc ((size_t) (strlen (s) + 2));
43 strcpy (p, s);
44 return p;
45 }
46
47
48 int
49 isfrom(char *string)
50 {
51 return (strncmp (string, "From ", 5) == 0
52 || strncmp (string, ">From ", 6) == 0);
53 }
54
55
56 int
57 lequal (char *a, char *b)
58 {
59 for (; *a; a++, b++)
60 if (*b == 0)
61 return FALSE;
62 else {
63 char c1 = islower ((unsigned char) *a) ?
64 toupper ((unsigned char) *a) : *a;
65 char c2 = islower ((unsigned char) *b) ?
66 toupper ((unsigned char) *b) : *b;
67 if (c1 != c2)
68 return FALSE;
69 }
70
71 return (*b == 0);
72 }
73
74
75 static int
76 isat (char *p)
77 {
78 return (strncmp (p, " AT ", 4)
79 && strncmp (p, " At ", 4)
80 && strncmp (p, " aT ", 4)
81 && strncmp (p, " at ", 4) ? FALSE : TRUE);
82 }
83
84
85 /*
86 *
87 * getadrx() implements a partial 822-style address parser. The parser
88 * is neither complete nor correct. It does however recognize nearly all
89 * of the 822 address syntax. In addition it handles the majority of the
90 * 733 syntax as well. Most problems arise from trying to accomodate both.
91 *
92 * In terms of 822, the route-specification in
93 *
94 * "<" [route] local-part "@" domain ">"
95 *
96 * is parsed and returned unchanged. Multiple at-signs are compressed
97 * via source-routing. Recursive groups are not allowed as per the
98 * standard.
99 *
100 * In terms of 733, " at " is recognized as equivalent to "@".
101 *
102 * In terms of both the parser will not complain about missing hosts.
103 *
104 * -----
105 *
106 * We should not allow addresses like
107 *
108 * Marshall T. Rose <MRose@UCI>
109 *
110 * but should insist on
111 *
112 * "Marshall T. Rose" <MRose@UCI>
113 *
114 * Unfortunately, a lot of mailers stupidly let people get away with this.
115 *
116 * -----
117 *
118 * We should not allow addresses like
119 *
120 * <MRose@UCI>
121 *
122 * but should insist on
123 *
124 * MRose@UCI
125 *
126 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
127 * this.
128 *
129 * -----
130 *
131 * We should not allow addresses like
132 *
133 * @UCI:MRose@UCI-750a
134 *
135 * but should insist on
136 *
137 * Marshall Rose <@UCI:MRose@UCI-750a>
138 *
139 * Unfortunately, a lot of mailers stupidly do this.
140 *
141 */
142
143 #define QUOTE '\\'
144
145 #define LX_END 0
146 #define LX_ERR 1
147 #define LX_ATOM 2
148 #define LX_QSTR 3
149 #define LX_DLIT 4
150 #define LX_SEMI 5
151 #define LX_COMA 6
152 #define LX_LBRK 7
153 #define LX_RBRK 8
154 #define LX_COLN 9
155 #define LX_DOT 10
156 #define LX_AT 11
157
158 struct specials {
159 char lx_chr;
160 int lx_val;
161 };
162
163 static struct specials special[] = {
164 { ';', LX_SEMI },
165 { ',', LX_COMA },
166 { '<', LX_LBRK },
167 { '>', LX_RBRK },
168 { ':', LX_COLN },
169 { '.', LX_DOT },
170 { '@', LX_AT },
171 { '(', LX_ERR },
172 { ')', LX_ERR },
173 { QUOTE, LX_ERR },
174 { '"', LX_ERR },
175 { '[', LX_ERR },
176 { ']', LX_ERR },
177 { 0, 0 }
178 };
179
180 static int glevel = 0;
181 static int ingrp = 0;
182 static int last_lex = LX_END;
183
184 static char *dp = NULL;
185 static char *cp = NULL;
186 static char *ap = NULL;
187 static char *pers = NULL;
188 static char *mbox = NULL;
189 static char *host = NULL;
190 static char *path = NULL;
191 static char *grp = NULL;
192 static char *note = NULL;
193 static char err[BUFSIZ];
194 static char adr[BUFSIZ];
195
196 static struct adrx adrxs2;
197
198
199 struct adrx *
200 getadrx (char *addrs)
201 {
202 register char *bp;
203 register struct adrx *adrxp = &adrxs2;
204
205 if (pers)
206 free (pers);
207 if (mbox)
208 free (mbox);
209 if (host)
210 free (host);
211 if (path)
212 free (path);
213 if (grp)
214 free (grp);
215 if (note)
216 free (note);
217 pers = mbox = host = path = grp = note = NULL;
218 err[0] = 0;
219
220 if (dp == NULL) {
221 dp = cp = getcpy (addrs ? addrs : "");
222 glevel = 0;
223 }
224 else
225 if (cp == NULL) {
226 free (dp);
227 dp = NULL;
228 return NULL;
229 }
230
231 switch (parse_address ()) {
232 case DONE:
233 free (dp);
234 dp = cp = NULL;
235 return NULL;
236
237 case OK:
238 switch (last_lex) {
239 case LX_COMA:
240 case LX_END:
241 break;
242
243 default: /* catch trailing comments */
244 bp = cp;
245 my_lex (adr);
246 cp = bp;
247 break;
248 }
249 break;
250
251 default:
252 break;
253 }
254
255 if (err[0])
256 for (;;) {
257 switch (last_lex) {
258 case LX_COMA:
259 case LX_END:
260 break;
261
262 default:
263 my_lex (adr);
264 continue;
265 }
266 break;
267 }
268 while (isspace ((unsigned char) *ap))
269 ap++;
270 if (cp)
271 sprintf (adr, "%.*s", (int)(cp - ap), ap);
272 else
273 strcpy (adr, ap);
274 bp = adr + strlen (adr) - 1;
275 if (*bp == ',' || *bp == ';' || *bp == '\n')
276 *bp = 0;
277
278 adrxp->text = adr;
279 adrxp->pers = pers;
280 adrxp->mbox = mbox;
281 adrxp->host = host;
282 adrxp->path = path;
283 adrxp->grp = grp;
284 adrxp->ingrp = ingrp;
285 adrxp->note = note;
286 adrxp->err = err[0] ? err : NULL;
287
288 return adrxp;
289 }
290
291
292 static int
293 parse_address (void)
294 {
295 char buffer[BUFSIZ];
296
297 again: ;
298 ap = cp;
299 switch (my_lex (buffer)) {
300 case LX_ATOM:
301 case LX_QSTR:
302 pers = getcpy (buffer);
303 break;
304
305 case LX_SEMI:
306 if (glevel-- <= 0) {
307 strcpy (err, "extraneous semi-colon");
308 return NOTOK;
309 }
310 case LX_COMA:
311 if (note) {
312 free (note);
313 note = NULL;
314 }
315 goto again;
316
317 case LX_END:
318 return DONE;
319
320 case LX_LBRK: /* sigh (2) */
321 goto get_addr;
322
323 case LX_AT: /* sigh (3) */
324 cp = ap;
325 if (route_addr (buffer) == NOTOK)
326 return NOTOK;
327 return OK; /* why be choosy? */
328
329 default:
330 sprintf (err, "illegal address construct (%s)", buffer);
331 return NOTOK;
332 }
333
334 switch (my_lex (buffer)) {
335 case LX_ATOM:
336 case LX_QSTR:
337 pers = add (buffer, add (" ", pers));
338 more_phrase: ; /* sigh (1) */
339 if (phrase (buffer) == NOTOK)
340 return NOTOK;
341
342 switch (last_lex) {
343 case LX_LBRK:
344 get_addr: ;
345 if (route_addr (buffer) == NOTOK)
346 return NOTOK;
347 if (last_lex == LX_RBRK)
348 return OK;
349 sprintf (err, "missing right-bracket (%s)", buffer);
350 return NOTOK;
351
352 case LX_COLN:
353 get_group: ;
354 if (glevel++ > 0) {
355 sprintf (err, "nested groups not allowed (%s)", pers);
356 return NOTOK;
357 }
358 grp = add (": ", pers);
359 pers = NULL;
360 {
361 char *pp = cp;
362
363 for (;;)
364 switch (my_lex (buffer)) {
365 case LX_SEMI:
366 case LX_END: /* tsk, tsk */
367 glevel--;
368 return OK;
369
370 case LX_COMA:
371 continue;
372
373 default:
374 cp = pp;
375 return parse_address ();
376 }
377 }
378
379 case LX_DOT: /* sigh (1) */
380 pers = add (".", pers);
381 goto more_phrase;
382
383 default:
384 sprintf (err, "no mailbox in address, only a phrase (%s%s)",
385 pers, buffer);
386 return NOTOK;
387 }
388
389 case LX_LBRK:
390 goto get_addr;
391
392 case LX_COLN:
393 goto get_group;
394
395 case LX_DOT:
396 mbox = add (buffer, pers);
397 pers = NULL;
398 if (route_addr (buffer) == NOTOK)
399 return NOTOK;
400 goto check_end;
401
402 case LX_AT:
403 ingrp = glevel;
404 mbox = pers;
405 pers = NULL;
406 if (domain (buffer) == NOTOK)
407 return NOTOK;
408 check_end: ;
409 switch (last_lex) {
410 case LX_SEMI:
411 if (glevel-- <= 0) {
412 strcpy (err, "extraneous semi-colon");
413 return NOTOK;
414 }
415 case LX_COMA:
416 case LX_END:
417 return OK;
418
419 default:
420 sprintf (err, "junk after local@domain (%s)", buffer);
421 return NOTOK;
422 }
423
424 case LX_SEMI: /* no host */
425 case LX_COMA:
426 case LX_END:
427 ingrp = glevel;
428 if (last_lex == LX_SEMI && glevel-- <= 0) {
429 strcpy (err, "extraneous semi-colon");
430 return NOTOK;
431 }
432 mbox = pers;
433 pers = NULL;
434 return OK;
435
436 default:
437 sprintf (err, "missing mailbox (%s)", buffer);
438 return NOTOK;
439 }
440 }
441
442
443 static int
444 phrase (char *buffer)
445 {
446 for (;;)
447 switch (my_lex (buffer)) {
448 case LX_ATOM:
449 case LX_QSTR:
450 pers = add (buffer, add (" ", pers));
451 continue;
452
453 default:
454 return OK;
455 }
456 }
457
458
459 static int
460 route_addr (char *buffer)
461 {
462 register char *pp = cp;
463
464 if (my_lex (buffer) == LX_AT) {
465 if (route (buffer) == NOTOK)
466 return NOTOK;
467 }
468 else
469 cp = pp;
470
471 if (local_part (buffer) == NOTOK)
472 return NOTOK;
473
474 switch (last_lex) {
475 case LX_AT:
476 return domain (buffer);
477
478 case LX_SEMI: /* if in group */
479 case LX_RBRK: /* no host */
480 case LX_COMA:
481 case LX_END:
482 return OK;
483
484 default:
485 sprintf (err, "no at-sign after local-part (%s)", buffer);
486 return NOTOK;
487 }
488 }
489
490
491 static int
492 local_part (char *buffer)
493 {
494 ingrp = glevel;
495
496 for (;;) {
497 switch (my_lex (buffer)) {
498 case LX_ATOM:
499 case LX_QSTR:
500 mbox = add (buffer, mbox);
501 break;
502
503 default:
504 sprintf (err, "no mailbox in local-part (%s)", buffer);
505 return NOTOK;
506 }
507
508 switch (my_lex (buffer)) {
509 case LX_DOT:
510 mbox = add (buffer, mbox);
511 continue;
512
513 default:
514 return OK;
515 }
516 }
517 }
518
519
520 static int
521 domain (char *buffer)
522 {
523 for (;;) {
524 switch (my_lex (buffer)) {
525 case LX_ATOM:
526 case LX_DLIT:
527 host = add (buffer, host);
528 break;
529
530 default:
531 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
532 return NOTOK;
533 }
534
535 switch (my_lex (buffer)) {
536 case LX_DOT:
537 host = add (buffer, host);
538 continue;
539
540 case LX_AT: /* sigh (0) */
541 mbox = add (host, add ("%", mbox));
542 free (host);
543 host = NULL;
544 continue;
545
546 default:
547 return OK;
548 }
549 }
550 }
551
552
553 static int
554 route (char *buffer)
555 {
556 path = getcpy ("@");
557
558 for (;;) {
559 switch (my_lex (buffer)) {
560 case LX_ATOM:
561 case LX_DLIT:
562 path = add (buffer, path);
563 break;
564
565 default:
566 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
567 return NOTOK;
568 }
569 switch (my_lex (buffer)) {
570 case LX_COMA:
571 path = add (buffer, path);
572 for (;;) {
573 switch (my_lex (buffer)) {
574 case LX_COMA:
575 continue;
576
577 case LX_AT:
578 path = add (buffer, path);
579 break;
580
581 default:
582 sprintf (err, "no at-sign found for next domain in route (%s)",
583 buffer);
584 }
585 break;
586 }
587 continue;
588
589 case LX_AT: /* XXX */
590 case LX_DOT:
591 path = add (buffer, path);
592 continue;
593
594 case LX_COLN:
595 path = add (buffer, path);
596 return OK;
597
598 default:
599 sprintf (err, "no colon found to terminate route (%s)", buffer);
600 return NOTOK;
601 }
602 }
603 }
604
605
606 static int
607 my_lex (char *buffer)
608 {
609 /* buffer should be at least BUFSIZ bytes long */
610 int i, gotat = 0;
611 char c, *bp;
612
613 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
614 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
615
616 bp = buffer;
617 *bp = 0;
618 if (!cp)
619 return (last_lex = LX_END);
620
621 gotat = isat (cp);
622 c = *cp++;
623 while (isspace ((unsigned char) c))
624 c = *cp++;
625 if (c == 0) {
626 cp = NULL;
627 return (last_lex = LX_END);
628 }
629
630 if (c == '(') {
631 ADDCHR(c);
632 for (i = 0;;)
633 switch (c = *cp++) {
634 case 0:
635 cp = NULL;
636 return (last_lex = LX_ERR);
637 case QUOTE:
638 ADDCHR(c);
639 if ((c = *cp++) == 0) {
640 cp = NULL;
641 return (last_lex = LX_ERR);
642 }
643 ADDCHR(c);
644 continue;
645 case '(':
646 i++;
647 default:
648 ADDCHR(c);
649 continue;
650 case ')':
651 ADDCHR(c);
652 if (--i < 0) {
653 *bp = 0;
654 note = note ? add (buffer, add (" ", note))
655 : getcpy (buffer);
656 return my_lex (buffer);
657 }
658 }
659 }
660
661 if (c == '"') {
662 ADDCHR(c);
663 for (;;)
664 switch (c = *cp++) {
665 case 0:
666 cp = NULL;
667 return (last_lex = LX_ERR);
668 case QUOTE:
669 ADDCHR(c);
670 if ((c = *cp++) == 0) {
671 cp = NULL;
672 return (last_lex = LX_ERR);
673 }
674 default:
675 ADDCHR(c);
676 continue;
677 case '"':
678 ADDCHR(c);
679 *bp = 0;
680 return (last_lex = LX_QSTR);
681 }
682 }
683
684 if (c == '[') {
685 ADDCHR(c);
686 for (;;)
687 switch (c = *cp++) {
688 case 0:
689 cp = NULL;
690 return (last_lex = LX_ERR);
691 case QUOTE:
692 ADDCHR(c);
693 if ((c = *cp++) == 0) {
694 cp = NULL;
695 return (last_lex = LX_ERR);
696 }
697 default:
698 ADDCHR(c);
699 continue;
700 case ']':
701 ADDCHR(c);
702 *bp = 0;
703 return (last_lex = LX_DLIT);
704 }
705 }
706
707 ADDCHR(c);
708 *bp = 0;
709 for (i = 0; special[i].lx_chr != 0; i++)
710 if (c == special[i].lx_chr)
711 return (last_lex = special[i].lx_val);
712
713 if (iscntrl ((unsigned char) c))
714 return (last_lex = LX_ERR);
715
716 for (;;) {
717 if ((c = *cp++) == 0)
718 break;
719 for (i = 0; special[i].lx_chr != 0; i++)
720 if (c == special[i].lx_chr)
721 goto got_atom;
722 if (iscntrl ((unsigned char) c) || isspace ((unsigned char) c))
723 break;
724 ADDCHR(c);
725 }
726 got_atom: ;
727 if (c == 0)
728 cp = NULL;
729 else
730 cp--;
731 *bp = 0;
732 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
733 ? LX_ATOM : LX_AT;
734 return last_lex;
735
736 my_lex_buffull:
737 /* Out of buffer space. *bp is the last byte in the buffer */
738 *bp = 0;
739 return (last_lex = LX_ERR);
740 }
741
742
743 char *
744 legal_person (char *p)
745 {
746 int i;
747 register char *cp;
748 static char buffer[BUFSIZ];
749
750 if (*p == '"')
751 return p;
752 for (cp = p; *cp; cp++)
753 for (i = 0; special[i].lx_chr; i++)
754 if (*cp == special[i].lx_chr) {
755 sprintf (buffer, "\"%s\"", p);
756 return buffer;
757 }
758
759 return p;
760 }
761
762
763 int
764 mfgets (FILE *in, char **bp)
765 {
766 int i;
767 register char *cp, *dp, *ep;
768 static int len = 0;
769 static char *pp = NULL;
770
771 if (pp == NULL)
772 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
773
774 for (ep = (cp = pp) + len - 2;;) {
775 switch (i = getc (in)) {
776 case EOF:
777 eol: ;
778 if (cp != pp) {
779 *cp = 0;
780 *bp = pp;
781 return OK;
782 }
783 eoh: ;
784 *bp = NULL;
785 free (pp);
786 pp = NULL;
787 return DONE;
788
789 case 0:
790 continue;
791
792 case '\n':
793 if (cp == pp) /* end of headers, gobble it */
794 goto eoh;
795 switch (i = getc (in)) {
796 default: /* end of line */
797 case '\n': /* end of headers, save for next call */
798 ungetc (i, in);
799 goto eol;
800
801 case ' ': /* continue headers */
802 case '\t':
803 *cp++ = '\n';
804 break;
805 } /* fall into default case */
806
807 default:
808 *cp++ = i;
809 break;
810 }
811 if (cp >= ep) {
812 dp = mh_xrealloc (pp, (size_t) (len += BUFSIZ));
813 cp += dp - pp, ep = (pp = cp) + len - 2;
814 }
815 }
816 }