]> diplodocus.org Git - nmh/blob - sbr/mf.c
Remove SYNOPSIS from nmh(7).
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
8 */
9
10 #include <h/mf.h>
11 #include <h/utils.h>
12
13 /*
14 * static prototypes
15 */
16 static int isat (const char *);
17 static int parse_address (void);
18 static int phrase (char *);
19 static int route_addr (char *);
20 static int local_part (char *);
21 static int domain (char *);
22 static int route (char *);
23 static int my_lex (char *);
24
25
26 int
27 isfrom(const char *string)
28 {
29 return (strncmp (string, "From ", 5) == 0
30 || strncmp (string, ">From ", 6) == 0);
31 }
32
33
34 int
35 lequal (const char *a, const char *b)
36 {
37 for (; *a; a++, b++)
38 if (*b == 0)
39 return FALSE;
40 else {
41 char c1 = islower ((unsigned char) *a) ?
42 toupper ((unsigned char) *a) : *a;
43 char c2 = islower ((unsigned char) *b) ?
44 toupper ((unsigned char) *b) : *b;
45 if (c1 != c2)
46 return FALSE;
47 }
48
49 return (*b == 0);
50 }
51
52
53 static int
54 isat (const char *p)
55 {
56 return (strncmp (p, " AT ", 4)
57 && strncmp (p, " At ", 4)
58 && strncmp (p, " aT ", 4)
59 && strncmp (p, " at ", 4) ? FALSE : TRUE);
60 }
61
62
63 /*
64 *
65 * getadrx() implements a partial 822-style address parser. The parser
66 * is neither complete nor correct. It does however recognize nearly all
67 * of the 822 address syntax. In addition it handles the majority of the
68 * 733 syntax as well. Most problems arise from trying to accomodate both.
69 *
70 * In terms of 822, the route-specification in
71 *
72 * "<" [route] local-part "@" domain ">"
73 *
74 * is parsed and returned unchanged. Multiple at-signs are compressed
75 * via source-routing. Recursive groups are not allowed as per the
76 * standard.
77 *
78 * In terms of 733, " at " is recognized as equivalent to "@".
79 *
80 * In terms of both the parser will not complain about missing hosts.
81 *
82 * -----
83 *
84 * We should not allow addresses like
85 *
86 * Marshall T. Rose <MRose@UCI>
87 *
88 * but should insist on
89 *
90 * "Marshall T. Rose" <MRose@UCI>
91 *
92 * Unfortunately, a lot of mailers stupidly let people get away with this.
93 *
94 * -----
95 *
96 * We should not allow addresses like
97 *
98 * <MRose@UCI>
99 *
100 * but should insist on
101 *
102 * MRose@UCI
103 *
104 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
105 * this.
106 *
107 * -----
108 *
109 * We should not allow addresses like
110 *
111 * @UCI:MRose@UCI-750a
112 *
113 * but should insist on
114 *
115 * Marshall Rose <@UCI:MRose@UCI-750a>
116 *
117 * Unfortunately, a lot of mailers stupidly do this.
118 *
119 */
120
121 #define QUOTE '\\'
122
123 #define LX_END 0
124 #define LX_ERR 1
125 #define LX_ATOM 2
126 #define LX_QSTR 3
127 #define LX_DLIT 4
128 #define LX_SEMI 5
129 #define LX_COMA 6
130 #define LX_LBRK 7
131 #define LX_RBRK 8
132 #define LX_COLN 9
133 #define LX_DOT 10
134 #define LX_AT 11
135
136 struct specials {
137 char lx_chr;
138 int lx_val;
139 };
140
141 static struct specials special[] = {
142 { ';', LX_SEMI },
143 { ',', LX_COMA },
144 { '<', LX_LBRK },
145 { '>', LX_RBRK },
146 { ':', LX_COLN },
147 { '.', LX_DOT },
148 { '@', LX_AT },
149 { '(', LX_ERR },
150 { ')', LX_ERR },
151 { QUOTE, LX_ERR },
152 { '"', LX_ERR },
153 { '[', LX_ERR },
154 { ']', LX_ERR },
155 { 0, 0 }
156 };
157
158 static int glevel = 0;
159 static int ingrp = 0;
160 static int last_lex = LX_END;
161
162 static char *dp = NULL;
163 static char *cp = NULL;
164 static char *ap = NULL;
165 static char *pers = NULL;
166 static char *mbox = NULL;
167 static char *host = NULL;
168 static char *path = NULL;
169 static char *grp = NULL;
170 static char *note = NULL;
171 static char err[BUFSIZ];
172 static char adr[BUFSIZ];
173
174 static struct adrx adrxs2;
175
176
177 struct adrx *
178 getadrx (const char *addrs)
179 {
180 register char *bp;
181 register struct adrx *adrxp = &adrxs2;
182
183 if (pers)
184 free (pers);
185 if (mbox)
186 free (mbox);
187 if (host)
188 free (host);
189 if (path)
190 free (path);
191 if (grp)
192 free (grp);
193 if (note)
194 free (note);
195 pers = mbox = host = path = grp = note = NULL;
196 err[0] = 0;
197
198 if (dp == NULL) {
199 dp = cp = strdup (addrs ? addrs : "");
200 glevel = 0;
201 }
202 else
203 if (cp == NULL) {
204 free (dp);
205 dp = NULL;
206 return NULL;
207 }
208
209 switch (parse_address ()) {
210 case DONE:
211 free (dp);
212 dp = cp = NULL;
213 return NULL;
214
215 case OK:
216 switch (last_lex) {
217 case LX_COMA:
218 case LX_END:
219 break;
220
221 default: /* catch trailing comments */
222 bp = cp;
223 my_lex (adr);
224 cp = bp;
225 break;
226 }
227 break;
228
229 default:
230 break;
231 }
232
233 if (err[0])
234 for (;;) {
235 switch (last_lex) {
236 case LX_COMA:
237 case LX_END:
238 break;
239
240 default:
241 my_lex (adr);
242 continue;
243 }
244 break;
245 }
246 while (isspace ((unsigned char) *ap))
247 ap++;
248 if (cp)
249 sprintf (adr, "%.*s", (int)(cp - ap), ap);
250 else
251 strcpy (adr, ap);
252 bp = adr + strlen (adr) - 1;
253 if (*bp == ',' || *bp == ';' || *bp == '\n')
254 *bp = 0;
255
256 adrxp->text = adr;
257 adrxp->pers = pers;
258 adrxp->mbox = mbox;
259 adrxp->host = host;
260 adrxp->path = path;
261 adrxp->grp = grp;
262 adrxp->ingrp = ingrp;
263 adrxp->note = note;
264 adrxp->err = err[0] ? err : NULL;
265
266 return adrxp;
267 }
268
269
270 static int
271 parse_address (void)
272 {
273 char buffer[BUFSIZ];
274
275 again: ;
276 ap = cp;
277 switch (my_lex (buffer)) {
278 case LX_ATOM:
279 case LX_QSTR:
280 pers = strdup (buffer);
281 break;
282
283 case LX_SEMI:
284 if (glevel-- <= 0) {
285 strcpy (err, "extraneous semi-colon");
286 return NOTOK;
287 }
288 case LX_COMA:
289 if (note) {
290 free (note);
291 note = NULL;
292 }
293 goto again;
294
295 case LX_END:
296 return DONE;
297
298 case LX_LBRK: /* sigh (2) */
299 goto get_addr;
300
301 case LX_AT: /* sigh (3) */
302 cp = ap;
303 if (route_addr (buffer) == NOTOK)
304 return NOTOK;
305 return OK; /* why be choosy? */
306
307 default:
308 sprintf (err, "illegal address construct (%s)", buffer);
309 return NOTOK;
310 }
311
312 switch (my_lex (buffer)) {
313 case LX_ATOM:
314 case LX_QSTR:
315 pers = add (buffer, add (" ", pers));
316 more_phrase: ; /* sigh (1) */
317 if (phrase (buffer) == NOTOK)
318 return NOTOK;
319
320 switch (last_lex) {
321 case LX_LBRK:
322 get_addr: ;
323 if (route_addr (buffer) == NOTOK)
324 return NOTOK;
325 if (last_lex == LX_RBRK)
326 return OK;
327 sprintf (err, "missing right-bracket (%s)", buffer);
328 return NOTOK;
329
330 case LX_COLN:
331 get_group: ;
332 if (glevel++ > 0) {
333 sprintf (err, "nested groups not allowed (%s)", pers);
334 return NOTOK;
335 }
336 grp = add (": ", pers);
337 pers = NULL;
338 {
339 char *pp = cp;
340
341 for (;;)
342 switch (my_lex (buffer)) {
343 case LX_SEMI:
344 case LX_END: /* tsk, tsk */
345 glevel--;
346 return OK;
347
348 case LX_COMA:
349 continue;
350
351 default:
352 cp = pp;
353 return parse_address ();
354 }
355 }
356
357 case LX_DOT: /* sigh (1) */
358 pers = add (".", pers);
359 goto more_phrase;
360
361 default:
362 sprintf (err, "no mailbox in address, only a phrase (%s%s)",
363 pers, buffer);
364 return NOTOK;
365 }
366
367 case LX_LBRK:
368 goto get_addr;
369
370 case LX_COLN:
371 goto get_group;
372
373 case LX_DOT:
374 mbox = add (buffer, pers);
375 pers = NULL;
376 if (route_addr (buffer) == NOTOK)
377 return NOTOK;
378 goto check_end;
379
380 case LX_AT:
381 ingrp = glevel;
382 mbox = pers;
383 pers = NULL;
384 if (domain (buffer) == NOTOK)
385 return NOTOK;
386 check_end: ;
387 switch (last_lex) {
388 case LX_SEMI:
389 if (glevel-- <= 0) {
390 strcpy (err, "extraneous semi-colon");
391 return NOTOK;
392 }
393 case LX_COMA:
394 case LX_END:
395 return OK;
396
397 default:
398 sprintf (err, "junk after local@domain (%s)", buffer);
399 return NOTOK;
400 }
401
402 case LX_SEMI: /* no host */
403 case LX_COMA:
404 case LX_END:
405 ingrp = glevel;
406 if (last_lex == LX_SEMI && glevel-- <= 0) {
407 strcpy (err, "extraneous semi-colon");
408 return NOTOK;
409 }
410 mbox = pers;
411 pers = NULL;
412 return OK;
413
414 default:
415 sprintf (err, "missing mailbox (%s)", buffer);
416 return NOTOK;
417 }
418 }
419
420
421 static int
422 phrase (char *buffer)
423 {
424 for (;;)
425 switch (my_lex (buffer)) {
426 case LX_ATOM:
427 case LX_QSTR:
428 pers = add (buffer, add (" ", pers));
429 continue;
430
431 default:
432 return OK;
433 }
434 }
435
436
437 static int
438 route_addr (char *buffer)
439 {
440 register char *pp = cp;
441
442 if (my_lex (buffer) == LX_AT) {
443 if (route (buffer) == NOTOK)
444 return NOTOK;
445 }
446 else
447 cp = pp;
448
449 if (local_part (buffer) == NOTOK)
450 return NOTOK;
451
452 switch (last_lex) {
453 case LX_AT:
454 return domain (buffer);
455
456 case LX_SEMI: /* if in group */
457 case LX_RBRK: /* no host */
458 case LX_COMA:
459 case LX_END:
460 return OK;
461
462 default:
463 sprintf (err, "no at-sign after local-part (%s)", buffer);
464 return NOTOK;
465 }
466 }
467
468
469 static int
470 local_part (char *buffer)
471 {
472 ingrp = glevel;
473
474 for (;;) {
475 switch (my_lex (buffer)) {
476 case LX_ATOM:
477 case LX_QSTR:
478 mbox = add (buffer, mbox);
479 break;
480
481 default:
482 sprintf (err, "no mailbox in local-part (%s)", buffer);
483 return NOTOK;
484 }
485
486 switch (my_lex (buffer)) {
487 case LX_DOT:
488 mbox = add (buffer, mbox);
489 continue;
490
491 default:
492 return OK;
493 }
494 }
495 }
496
497
498 static int
499 domain (char *buffer)
500 {
501 for (;;) {
502 switch (my_lex (buffer)) {
503 case LX_ATOM:
504 case LX_DLIT:
505 host = add (buffer, host);
506 break;
507
508 default:
509 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
510 return NOTOK;
511 }
512
513 switch (my_lex (buffer)) {
514 case LX_DOT:
515 host = add (buffer, host);
516 continue;
517
518 case LX_AT: /* sigh (0) */
519 mbox = add (host, add ("%", mbox));
520 free (host);
521 host = NULL;
522 continue;
523
524 default:
525 return OK;
526 }
527 }
528 }
529
530
531 static int
532 route (char *buffer)
533 {
534 path = strdup ("@");
535
536 for (;;) {
537 switch (my_lex (buffer)) {
538 case LX_ATOM:
539 case LX_DLIT:
540 path = add (buffer, path);
541 break;
542
543 default:
544 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
545 return NOTOK;
546 }
547 switch (my_lex (buffer)) {
548 case LX_COMA:
549 path = add (buffer, path);
550 for (;;) {
551 switch (my_lex (buffer)) {
552 case LX_COMA:
553 continue;
554
555 case LX_AT:
556 path = add (buffer, path);
557 break;
558
559 default:
560 sprintf (err, "no at-sign found for next domain in route (%s)",
561 buffer);
562 }
563 break;
564 }
565 continue;
566
567 case LX_AT: /* XXX */
568 case LX_DOT:
569 path = add (buffer, path);
570 continue;
571
572 case LX_COLN:
573 path = add (buffer, path);
574 return OK;
575
576 default:
577 sprintf (err, "no colon found to terminate route (%s)", buffer);
578 return NOTOK;
579 }
580 }
581 }
582
583
584 static int
585 my_lex (char *buffer)
586 {
587 /* buffer should be at least BUFSIZ bytes long */
588 int i, gotat = 0;
589 char c, *bp;
590
591 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
592 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
593
594 bp = buffer;
595 *bp = 0;
596 if (!cp)
597 return (last_lex = LX_END);
598
599 gotat = isat (cp);
600 c = *cp++;
601 while (isspace ((unsigned char) c))
602 c = *cp++;
603 if (c == 0) {
604 cp = NULL;
605 return (last_lex = LX_END);
606 }
607
608 if (c == '(') {
609 ADDCHR(c);
610 for (i = 0;;)
611 switch (c = *cp++) {
612 case 0:
613 cp = NULL;
614 return (last_lex = LX_ERR);
615 case QUOTE:
616 ADDCHR(c);
617 if ((c = *cp++) == 0) {
618 cp = NULL;
619 return (last_lex = LX_ERR);
620 }
621 ADDCHR(c);
622 continue;
623 case '(':
624 i++;
625 default:
626 ADDCHR(c);
627 continue;
628 case ')':
629 ADDCHR(c);
630 if (--i < 0) {
631 *bp = 0;
632 note = note ? add (buffer, add (" ", note))
633 : strdup (buffer);
634 return my_lex (buffer);
635 }
636 }
637 }
638
639 if (c == '"') {
640 ADDCHR(c);
641 for (;;)
642 switch (c = *cp++) {
643 case 0:
644 cp = NULL;
645 return (last_lex = LX_ERR);
646 case QUOTE:
647 ADDCHR(c);
648 if ((c = *cp++) == 0) {
649 cp = NULL;
650 return (last_lex = LX_ERR);
651 }
652 default:
653 ADDCHR(c);
654 continue;
655 case '"':
656 ADDCHR(c);
657 *bp = 0;
658 return (last_lex = LX_QSTR);
659 }
660 }
661
662 if (c == '[') {
663 ADDCHR(c);
664 for (;;)
665 switch (c = *cp++) {
666 case 0:
667 cp = NULL;
668 return (last_lex = LX_ERR);
669 case QUOTE:
670 ADDCHR(c);
671 if ((c = *cp++) == 0) {
672 cp = NULL;
673 return (last_lex = LX_ERR);
674 }
675 default:
676 ADDCHR(c);
677 continue;
678 case ']':
679 ADDCHR(c);
680 *bp = 0;
681 return (last_lex = LX_DLIT);
682 }
683 }
684
685 ADDCHR(c);
686 *bp = 0;
687 for (i = 0; special[i].lx_chr != 0; i++)
688 if (c == special[i].lx_chr)
689 return (last_lex = special[i].lx_val);
690
691 if (iscntrl ((unsigned char) c))
692 return (last_lex = LX_ERR);
693
694 for (;;) {
695 if ((c = *cp++) == 0)
696 break;
697 for (i = 0; special[i].lx_chr != 0; i++)
698 if (c == special[i].lx_chr)
699 goto got_atom;
700 if (iscntrl ((unsigned char) c) || isspace ((unsigned char) c))
701 break;
702 ADDCHR(c);
703 }
704 got_atom: ;
705 if (c == 0)
706 cp = NULL;
707 else
708 cp--;
709 *bp = 0;
710 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
711 ? LX_ATOM : LX_AT;
712 return last_lex;
713
714 my_lex_buffull:
715 /* Out of buffer space. *bp is the last byte in the buffer */
716 *bp = 0;
717 return (last_lex = LX_ERR);
718 }
719
720
721 char *
722 legal_person (const char *p)
723 {
724 int i;
725 register const char *cp;
726 static char buffer[BUFSIZ];
727
728 if (*p == '"')
729 return (char *) p;
730 for (cp = p; *cp; cp++)
731 for (i = 0; special[i].lx_chr; i++)
732 if (*cp == special[i].lx_chr) {
733 sprintf (buffer, "\"%s\"", p);
734 return buffer;
735 }
736
737 return (char *) p;
738 }
739
740
741 int
742 mfgets (FILE *in, char **bp)
743 {
744 int i;
745 register char *cp, *dp, *ep;
746 static int len = 0;
747 static char *pp = NULL;
748
749 if (pp == NULL)
750 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
751
752 for (ep = (cp = pp) + len - 2;;) {
753 switch (i = getc (in)) {
754 case EOF:
755 eol: ;
756 if (cp != pp) {
757 *cp = 0;
758 *bp = pp;
759 return OK;
760 }
761 eoh: ;
762 *bp = NULL;
763 free (pp);
764 pp = NULL;
765 return DONE;
766
767 case 0:
768 continue;
769
770 case '\n':
771 if (cp == pp) /* end of headers, gobble it */
772 goto eoh;
773 switch (i = getc (in)) {
774 default: /* end of line */
775 case '\n': /* end of headers, save for next call */
776 ungetc (i, in);
777 goto eol;
778
779 case ' ': /* continue headers */
780 case '\t':
781 *cp++ = '\n';
782 break;
783 } /* fall into default case */
784
785 default:
786 *cp++ = i;
787 break;
788 }
789 if (cp >= ep) {
790 dp = mh_xrealloc (pp, (size_t) (len += BUFSIZ));
791 cp += dp - pp, ep = (pp = cp) + len - 2;
792 }
793 }
794 }