]> diplodocus.org Git - nmh/blob - sbr/mf.c
Replace getcpy() with mh_xstrdup() where the string isn't NULL.
[nmh] / sbr / mf.c
1
2 /*
3 * mf.c -- mail filter subroutines
4 *
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
8 */
9
10 #include <h/mf.h>
11 #include <h/utils.h>
12
13 /*
14 * static prototypes
15 */
16 static int isat (const char *);
17 static int parse_address (void);
18 static int phrase (char *);
19 static int route_addr (char *);
20 static int local_part (char *);
21 static int domain (char *);
22 static int route (char *);
23 static int my_lex (char *);
24
25
26 int
27 isfrom(const char *string)
28 {
29 return (strncmp (string, "From ", 5) == 0
30 || strncmp (string, ">From ", 6) == 0);
31 }
32
33
34 int
35 lequal (const char *a, const char *b)
36 {
37 for (; *a; a++, b++)
38 if (*b == 0)
39 return FALSE;
40 else {
41 char c1 = islower ((unsigned char) *a) ?
42 toupper ((unsigned char) *a) : *a;
43 char c2 = islower ((unsigned char) *b) ?
44 toupper ((unsigned char) *b) : *b;
45 if (c1 != c2)
46 return FALSE;
47 }
48
49 return (*b == 0);
50 }
51
52
53 static int
54 isat (const char *p)
55 {
56 return (strncmp (p, " AT ", 4)
57 && strncmp (p, " At ", 4)
58 && strncmp (p, " aT ", 4)
59 && strncmp (p, " at ", 4) ? FALSE : TRUE);
60 }
61
62
63 /*
64 *
65 * getadrx() implements a partial 822-style address parser. The parser
66 * is neither complete nor correct. It does however recognize nearly all
67 * of the 822 address syntax. In addition it handles the majority of the
68 * 733 syntax as well. Most problems arise from trying to accommodate both.
69 *
70 * In terms of 822, the route-specification in
71 *
72 * "<" [route] local-part "@" domain ">"
73 *
74 * is parsed and returned unchanged. Multiple at-signs are compressed
75 * via source-routing. Recursive groups are not allowed as per the
76 * standard.
77 *
78 * In terms of 733, " at " is recognized as equivalent to "@".
79 *
80 * In terms of both the parser will not complain about missing hosts.
81 *
82 * -----
83 *
84 * We should not allow addresses like
85 *
86 * Marshall T. Rose <MRose@UCI>
87 *
88 * but should insist on
89 *
90 * "Marshall T. Rose" <MRose@UCI>
91 *
92 * Unfortunately, a lot of mailers stupidly let people get away with this.
93 *
94 * -----
95 *
96 * We should not allow addresses like
97 *
98 * <MRose@UCI>
99 *
100 * but should insist on
101 *
102 * MRose@UCI
103 *
104 * Unfortunately, a lot of mailers stupidly let people's UAs get away with
105 * this.
106 *
107 * -----
108 *
109 * We should not allow addresses like
110 *
111 * @UCI:MRose@UCI-750a
112 *
113 * but should insist on
114 *
115 * Marshall Rose <@UCI:MRose@UCI-750a>
116 *
117 * Unfortunately, a lot of mailers stupidly do this.
118 *
119 */
120
121 #define QUOTE '\\'
122
123 #define LX_END 0
124 #define LX_ERR 1
125 #define LX_ATOM 2
126 #define LX_QSTR 3
127 #define LX_DLIT 4
128 #define LX_SEMI 5
129 #define LX_COMA 6
130 #define LX_LBRK 7
131 #define LX_RBRK 8
132 #define LX_COLN 9
133 #define LX_DOT 10
134 #define LX_AT 11
135
136 struct specials {
137 char lx_chr;
138 int lx_val;
139 };
140
141 static struct specials special[] = {
142 { ';', LX_SEMI },
143 { ',', LX_COMA },
144 { '<', LX_LBRK },
145 { '>', LX_RBRK },
146 { ':', LX_COLN },
147 { '.', LX_DOT },
148 { '@', LX_AT },
149 { '(', LX_ERR },
150 { ')', LX_ERR },
151 { QUOTE, LX_ERR },
152 { '"', LX_ERR },
153 { '[', LX_ERR },
154 { ']', LX_ERR },
155 { 0, 0 }
156 };
157
158 static int glevel = 0;
159 static int ingrp = 0;
160 static int last_lex = LX_END;
161
162 static char *dp = NULL;
163 static char *cp = NULL;
164 static char *ap = NULL;
165 static char *pers = NULL;
166 static char *mbox = NULL;
167 static char *host = NULL;
168 static char *path = NULL;
169 static char *grp = NULL;
170 static char *note = NULL;
171 static char err[BUFSIZ];
172 static char adr[BUFSIZ];
173
174 static struct adrx adrxs2;
175
176
177 /* eai = Email Address Internationalization */
178 struct adrx *
179 getadrx (const char *addrs, int eai)
180 {
181 char *bp;
182 struct adrx *adrxp = &adrxs2;
183
184 if (pers)
185 free (pers);
186 if (mbox)
187 free (mbox);
188 if (host)
189 free (host);
190 if (path)
191 free (path);
192 if (grp)
193 free (grp);
194 if (note)
195 free (note);
196 pers = mbox = host = path = grp = note = NULL;
197 err[0] = 0;
198
199 if (dp == NULL) {
200 dp = cp = strdup (addrs ? addrs : "");
201 glevel = 0;
202 }
203 else
204 if (cp == NULL) {
205 free (dp);
206 dp = NULL;
207 return NULL;
208 }
209
210 switch (parse_address ()) {
211 case DONE:
212 free (dp);
213 dp = cp = NULL;
214 return NULL;
215
216 case OK:
217 switch (last_lex) {
218 case LX_COMA:
219 case LX_END:
220 break;
221
222 default: /* catch trailing comments */
223 bp = cp;
224 my_lex (adr);
225 cp = bp;
226 break;
227 }
228 break;
229
230 default:
231 break;
232 }
233
234 if (! eai) {
235 /*
236 * Reject the address if key fields contain 8bit characters
237 */
238
239 if (contains8bit(mbox, NULL) || contains8bit(host, NULL) ||
240 contains8bit(path, NULL) || contains8bit(grp, NULL)) {
241 strcpy(err, "Address contains 8-bit characters");
242 }
243 }
244
245 if (err[0])
246 for (;;) {
247 switch (last_lex) {
248 case LX_COMA:
249 case LX_END:
250 break;
251
252 default:
253 my_lex (adr);
254 continue;
255 }
256 break;
257 }
258 while (isspace ((unsigned char) *ap))
259 ap++;
260 if (cp)
261 sprintf (adr, "%.*s", (int)(cp - ap), ap);
262 else
263 strcpy (adr, ap);
264 bp = adr + strlen (adr) - 1;
265 if (*bp == ',' || *bp == ';' || *bp == '\n')
266 *bp = 0;
267
268 adrxp->text = adr;
269 adrxp->pers = pers;
270 adrxp->mbox = mbox;
271 adrxp->host = host;
272 adrxp->path = path;
273 adrxp->grp = grp;
274 adrxp->ingrp = ingrp;
275 adrxp->note = note;
276 adrxp->err = err[0] ? err : NULL;
277
278 return adrxp;
279 }
280
281
282 static int
283 parse_address (void)
284 {
285 char buffer[BUFSIZ];
286
287 again: ;
288 ap = cp;
289 switch (my_lex (buffer)) {
290 case LX_ATOM:
291 case LX_QSTR:
292 pers = strdup (buffer);
293 break;
294
295 case LX_SEMI:
296 if (glevel-- <= 0) {
297 strcpy (err, "extraneous semi-colon");
298 return NOTOK;
299 }
300 case LX_COMA:
301 if (note) {
302 free (note);
303 note = NULL;
304 }
305 goto again;
306
307 case LX_END:
308 return DONE;
309
310 case LX_LBRK: /* sigh (2) */
311 goto get_addr;
312
313 case LX_AT: /* sigh (3) */
314 cp = ap;
315 if (route_addr (buffer) == NOTOK)
316 return NOTOK;
317 return OK; /* why be choosy? */
318
319 default:
320 sprintf (err, "illegal address construct (%s)", buffer);
321 return NOTOK;
322 }
323
324 switch (my_lex (buffer)) {
325 case LX_ATOM:
326 case LX_QSTR:
327 pers = add (buffer, add (" ", pers));
328 more_phrase: ; /* sigh (1) */
329 if (phrase (buffer) == NOTOK)
330 return NOTOK;
331
332 switch (last_lex) {
333 case LX_LBRK:
334 get_addr: ;
335 if (route_addr (buffer) == NOTOK)
336 return NOTOK;
337 if (last_lex == LX_RBRK)
338 return OK;
339 sprintf (err, "missing right-bracket (%s)", buffer);
340 return NOTOK;
341
342 case LX_COLN:
343 get_group: ;
344 if (glevel++ > 0) {
345 sprintf (err, "nested groups not allowed (%s)", pers);
346 return NOTOK;
347 }
348 grp = add (": ", pers);
349 pers = NULL;
350 {
351 char *pp = cp;
352
353 for (;;)
354 switch (my_lex (buffer)) {
355 case LX_SEMI:
356 case LX_END: /* tsk, tsk */
357 glevel--;
358 return OK;
359
360 case LX_COMA:
361 continue;
362
363 default:
364 cp = pp;
365 return parse_address ();
366 }
367 }
368
369 case LX_DOT: /* sigh (1) */
370 pers = add (".", pers);
371 goto more_phrase;
372
373 default:
374 sprintf (err, "no mailbox in address, only a phrase (%s%s)",
375 pers, buffer);
376 return NOTOK;
377 }
378
379 case LX_LBRK:
380 goto get_addr;
381
382 case LX_COLN:
383 goto get_group;
384
385 case LX_DOT:
386 mbox = add (buffer, pers);
387 pers = NULL;
388 if (route_addr (buffer) == NOTOK)
389 return NOTOK;
390 goto check_end;
391
392 case LX_AT:
393 ingrp = glevel;
394 mbox = pers;
395 pers = NULL;
396 if (domain (buffer) == NOTOK)
397 return NOTOK;
398 check_end: ;
399 switch (last_lex) {
400 case LX_SEMI:
401 if (glevel-- <= 0) {
402 strcpy (err, "extraneous semi-colon");
403 return NOTOK;
404 }
405 case LX_COMA:
406 case LX_END:
407 return OK;
408
409 default:
410 sprintf (err, "junk after local@domain (%s)", buffer);
411 return NOTOK;
412 }
413
414 case LX_SEMI: /* no host */
415 case LX_COMA:
416 case LX_END:
417 ingrp = glevel;
418 if (last_lex == LX_SEMI && glevel-- <= 0) {
419 strcpy (err, "extraneous semi-colon");
420 return NOTOK;
421 }
422 mbox = pers;
423 pers = NULL;
424 return OK;
425
426 default:
427 sprintf (err, "missing mailbox (%s)", buffer);
428 return NOTOK;
429 }
430 }
431
432
433 static int
434 phrase (char *buffer)
435 {
436 for (;;)
437 switch (my_lex (buffer)) {
438 case LX_ATOM:
439 case LX_QSTR:
440 pers = add (buffer, add (" ", pers));
441 continue;
442
443 default:
444 return OK;
445 }
446 }
447
448
449 static int
450 route_addr (char *buffer)
451 {
452 char *pp = cp;
453
454 if (my_lex (buffer) == LX_AT) {
455 if (route (buffer) == NOTOK)
456 return NOTOK;
457 }
458 else
459 cp = pp;
460
461 if (local_part (buffer) == NOTOK)
462 return NOTOK;
463
464 switch (last_lex) {
465 case LX_AT:
466 return domain (buffer);
467
468 case LX_SEMI: /* if in group */
469 case LX_RBRK: /* no host */
470 case LX_COMA:
471 case LX_END:
472 return OK;
473
474 default:
475 sprintf (err, "no at-sign after local-part (%s)", buffer);
476 return NOTOK;
477 }
478 }
479
480
481 static int
482 local_part (char *buffer)
483 {
484 ingrp = glevel;
485
486 for (;;) {
487 switch (my_lex (buffer)) {
488 case LX_ATOM:
489 case LX_QSTR:
490 mbox = add (buffer, mbox);
491 break;
492
493 default:
494 sprintf (err, "no mailbox in local-part (%s)", buffer);
495 return NOTOK;
496 }
497
498 switch (my_lex (buffer)) {
499 case LX_DOT:
500 mbox = add (buffer, mbox);
501 continue;
502
503 default:
504 return OK;
505 }
506 }
507 }
508
509
510 static int
511 domain (char *buffer)
512 {
513 for (;;) {
514 switch (my_lex (buffer)) {
515 case LX_ATOM:
516 case LX_DLIT:
517 host = add (buffer, host);
518 break;
519
520 default:
521 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
522 return NOTOK;
523 }
524
525 switch (my_lex (buffer)) {
526 case LX_DOT:
527 host = add (buffer, host);
528 continue;
529
530 case LX_AT: /* sigh (0) */
531 mbox = add (host, add ("%", mbox));
532 free (host);
533 host = NULL;
534 continue;
535
536 default:
537 return OK;
538 }
539 }
540 }
541
542
543 static int
544 route (char *buffer)
545 {
546 path = strdup ("@");
547
548 for (;;) {
549 switch (my_lex (buffer)) {
550 case LX_ATOM:
551 case LX_DLIT:
552 path = add (buffer, path);
553 break;
554
555 default:
556 sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
557 return NOTOK;
558 }
559 switch (my_lex (buffer)) {
560 case LX_COMA:
561 path = add (buffer, path);
562 for (;;) {
563 switch (my_lex (buffer)) {
564 case LX_COMA:
565 continue;
566
567 case LX_AT:
568 path = add (buffer, path);
569 break;
570
571 default:
572 sprintf (err, "no at-sign found for next domain in route (%s)",
573 buffer);
574 }
575 break;
576 }
577 continue;
578
579 case LX_AT: /* XXX */
580 case LX_DOT:
581 path = add (buffer, path);
582 continue;
583
584 case LX_COLN:
585 path = add (buffer, path);
586 return OK;
587
588 default:
589 sprintf (err, "no colon found to terminate route (%s)", buffer);
590 return NOTOK;
591 }
592 }
593 }
594
595
596 static int
597 my_lex (char *buffer)
598 {
599 /* buffer should be at least BUFSIZ bytes long */
600 int i, gotat = 0;
601 char c, *bp;
602
603 /* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
604 #define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
605
606 bp = buffer;
607 *bp = 0;
608 if (!cp)
609 return (last_lex = LX_END);
610
611 gotat = isat (cp);
612 c = *cp++;
613 while (isspace ((unsigned char) c))
614 c = *cp++;
615 if (c == 0) {
616 cp = NULL;
617 return (last_lex = LX_END);
618 }
619
620 if (c == '(') {
621 ADDCHR(c);
622 for (i = 0;;)
623 switch (c = *cp++) {
624 case 0:
625 cp = NULL;
626 return (last_lex = LX_ERR);
627 case QUOTE:
628 ADDCHR(c);
629 if ((c = *cp++) == 0) {
630 cp = NULL;
631 return (last_lex = LX_ERR);
632 }
633 ADDCHR(c);
634 continue;
635 case '(':
636 i++;
637 default:
638 ADDCHR(c);
639 continue;
640 case ')':
641 ADDCHR(c);
642 if (--i < 0) {
643 *bp = 0;
644 note = note ? add (buffer, add (" ", note))
645 : strdup (buffer);
646 return my_lex (buffer);
647 }
648 }
649 }
650
651 if (c == '"') {
652 ADDCHR(c);
653 for (;;)
654 switch (c = *cp++) {
655 case 0:
656 cp = NULL;
657 return (last_lex = LX_ERR);
658 case QUOTE:
659 ADDCHR(c);
660 if ((c = *cp++) == 0) {
661 cp = NULL;
662 return (last_lex = LX_ERR);
663 }
664 default:
665 ADDCHR(c);
666 continue;
667 case '"':
668 ADDCHR(c);
669 *bp = 0;
670 return (last_lex = LX_QSTR);
671 }
672 }
673
674 if (c == '[') {
675 ADDCHR(c);
676 for (;;)
677 switch (c = *cp++) {
678 case 0:
679 cp = NULL;
680 return (last_lex = LX_ERR);
681 case QUOTE:
682 ADDCHR(c);
683 if ((c = *cp++) == 0) {
684 cp = NULL;
685 return (last_lex = LX_ERR);
686 }
687 default:
688 ADDCHR(c);
689 continue;
690 case ']':
691 ADDCHR(c);
692 *bp = 0;
693 return (last_lex = LX_DLIT);
694 }
695 }
696
697 ADDCHR(c);
698 *bp = 0;
699 for (i = 0; special[i].lx_chr != 0; i++)
700 if (c == special[i].lx_chr)
701 return (last_lex = special[i].lx_val);
702
703 if (iscntrl ((unsigned char) c))
704 return (last_lex = LX_ERR);
705
706 for (;;) {
707 if ((c = *cp++) == 0)
708 break;
709 for (i = 0; special[i].lx_chr != 0; i++)
710 if (c == special[i].lx_chr)
711 goto got_atom;
712 if (iscntrl ((unsigned char) c) || isspace ((unsigned char) c))
713 break;
714 ADDCHR(c);
715 }
716 got_atom: ;
717 if (c == 0)
718 cp = NULL;
719 else
720 cp--;
721 *bp = 0;
722 last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
723 ? LX_ATOM : LX_AT;
724 return last_lex;
725
726 my_lex_buffull:
727 /* Out of buffer space. *bp is the last byte in the buffer */
728 *bp = 0;
729 return (last_lex = LX_ERR);
730 }
731
732
733 char *
734 legal_person (const char *p)
735 {
736 int i;
737 const char *cp;
738 static char buffer[BUFSIZ];
739
740 if (*p == '"')
741 return (char *) p;
742 for (cp = p; *cp; cp++)
743 for (i = 0; special[i].lx_chr; i++)
744 if (*cp == special[i].lx_chr) {
745 sprintf (buffer, "\"%s\"", p);
746 return buffer;
747 }
748
749 return (char *) p;
750 }
751
752
753 int
754 mfgets (FILE *in, char **bp)
755 {
756 int i;
757 char *cp, *dp, *ep;
758 static int len = 0;
759 static char *pp = NULL;
760
761 if (pp == NULL)
762 pp = mh_xmalloc ((size_t) (len = BUFSIZ));
763
764 for (ep = (cp = pp) + len - 2;;) {
765 switch (i = getc (in)) {
766 case EOF:
767 eol: ;
768 if (cp != pp) {
769 *cp = 0;
770 *bp = pp;
771 return OK;
772 }
773 eoh: ;
774 *bp = NULL;
775 free (pp);
776 pp = NULL;
777 return DONE;
778
779 case 0:
780 continue;
781
782 case '\n':
783 if (cp == pp) /* end of headers, gobble it */
784 goto eoh;
785 switch (i = getc (in)) {
786 default: /* end of line */
787 case '\n': /* end of headers, save for next call */
788 ungetc (i, in);
789 goto eol;
790
791 case ' ': /* continue headers */
792 case '\t':
793 *cp++ = '\n';
794 break;
795 } /* fall into default case */
796
797 default:
798 *cp++ = i;
799 break;
800 }
801 if (cp >= ep) {
802 dp = mh_xrealloc (pp, (size_t) (len += BUFSIZ));
803 cp += dp - pp, ep = (pp = cp) + len - 2;
804 }
805 }
806 }