]> diplodocus.org Git - nmh/blob - sbr/m_getfld.c
* uip/new.c: cast folder_len to int to avoid warning on
[nmh] / sbr / m_getfld.c
1
2 /*
3 * m_getfld.c -- read/parse a message
4 *
5 * $Id$
6 *
7 * This code is Copyright (c) 2002, by the authors of nmh. See the
8 * COPYRIGHT file in the root directory of the nmh distribution for
9 * complete copyright information.
10 */
11
12 #include <h/mh.h>
13 #include <h/mts.h>
14 #include <h/utils.h>
15
16 /* This module has a long and checkered history. First, it didn't burst
17 maildrops correctly because it considered two CTRL-A:s in a row to be
18 an inter-message delimiter. It really is four CTRL-A:s followed by a
19 newline. Unfortunately, MMDF will convert this delimiter *inside* a
20 message to a CTRL-B followed by three CTRL-A:s and a newline. This
21 caused the old version of m_getfld() to declare eom prematurely. The
22 fix was a lot slower than
23
24 c == '\001' && peekc (iob) == '\001'
25
26 but it worked, and to increase generality, MBOX style maildrops could
27 be parsed as well. Unfortunately the speed issue finally caught up with
28 us since this routine is at the very heart of MH.
29
30 To speed things up considerably, the routine Eom() was made an auxilary
31 function called by the macro eom(). Unless we are bursting a maildrop,
32 the eom() macro returns FALSE saying we aren't at the end of the
33 message.
34
35 The next thing to do is to read the mts.conf file and initialize
36 delimiter[] and delimlen accordingly...
37
38 After mhl was made a built-in in msh, m_getfld() worked just fine
39 (using m_unknown() at startup). Until one day: a message which was
40 the result of a bursting was shown. Then, since the burst boundaries
41 aren't CTRL-A:s, m_getfld() would blinding plunge on past the boundary.
42 Very sad. The solution: introduce m_eomsbr(). This hook gets called
43 after the end of each line (since testing for eom involves an fseek()).
44 This worked fine, until one day: a message with no body portion arrived.
45 Then the
46
47 while (eom (c = Getc (iob), iob))
48 continue;
49
50 loop caused m_getfld() to return FMTERR. So, that logic was changed to
51 check for (*eom_action) and act accordingly.
52
53 This worked fine, until one day: someone didn't use four CTRL:A's as
54 their delimiters. So, the bullet got bit and we read mts.h and
55 continue to struggle on. It's not that bad though, since the only time
56 the code gets executed is when inc (or msh) calls it, and both of these
57 have already called mts_init().
58
59 ------------------------
60 (Written by Van Jacobson for the mh6 m_getfld, January, 1986):
61
62 This routine was accounting for 60% of the cpu time used by most mh
63 programs. I spent a bit of time tuning and it now accounts for <10%
64 of the time used. Like any heavily tuned routine, it's a bit
65 complex and you want to be sure you understand everything that it's
66 doing before you start hacking on it. Let me try to emphasize
67 that: every line in this atrocity depends on every other line,
68 sometimes in subtle ways. You should understand it all, in detail,
69 before trying to change any part. If you do change it, test the
70 result thoroughly (I use a hand-constructed test file that exercises
71 all the ways a header name, header body, header continuation,
72 header-body separator, body line and body eom can align themselves
73 with respect to a buffer boundary). "Minor" bugs in this routine
74 result in garbaged or lost mail.
75
76 If you hack on this and slow it down, I, my children and my
77 children's children will curse you.
78
79 This routine gets used on three different types of files: normal,
80 single msg files, "packed" unix or mmdf mailboxs (when used by inc)
81 and packed, directoried bulletin board files (when used by msh).
82 The biggest impact of different file types is in "eom" testing. The
83 code has been carefully organized to test for eom at appropriate
84 times and at no other times (since the check is quite expensive).
85 I have tried to arrange things so that the eom check need only be
86 done on entry to this routine. Since an eom can only occur after a
87 newline, this is easy to manage for header fields. For the msg
88 body, we try to efficiently search the input buffer to see if
89 contains the eom delimiter. If it does, we take up to the
90 delimiter, otherwise we take everything in the buffer. (The change
91 to the body eom/copy processing produced the most noticeable
92 performance difference, particularly for "inc" and "show".)
93
94 There are three qualitatively different things this routine busts
95 out of a message: field names, field text and msg bodies. Field
96 names are typically short (~8 char) and the loop that extracts them
97 might terminate on a colon, newline or max width. I considered
98 using a Vax "scanc" to locate the end of the field followed by a
99 "bcopy" but the routine call overhead on a Vax is too large for this
100 to work on short names. If Berkeley ever makes "inline" part of the
101 C optimiser (so things like "scanc" turn into inline instructions) a
102 change here would be worthwhile.
103
104 Field text is typically 60 - 100 characters so there's (barely)
105 a win in doing a routine call to something that does a "locc"
106 followed by a "bmove". About 30% of the fields have continuations
107 (usually the 822 "received:" lines) and each continuation generates
108 another routine call. "Inline" would be a big win here, as well.
109
110 Messages, as of this writing, seem to come in two flavors: small
111 (~1K) and long (>2K). Most messages have 400 - 600 bytes of headers
112 so message bodies average at least a few hundred characters.
113 Assuming your system uses reasonably sized stdio buffers (1K or
114 more), this routine should be able to remove the body in large
115 (>500 byte) chunks. The makes the cost of a call to "bcopy"
116 small but there is a premium on checking for the eom in packed
117 maildrops. The eom pattern is always a simple string so we can
118 construct an efficient pattern matcher for it (e.g., a Vax "matchc"
119 instruction). Some thought went into recognizing the start of
120 an eom that has been split across two buffers.
121
122 This routine wants to deal with large chunks of data so, rather
123 than "getc" into a local buffer, it uses stdio's buffer. If
124 you try to use it on a non-buffered file, you'll get what you
125 deserve. This routine "knows" that struct FILEs have a _ptr
126 and a _cnt to describe the current state of the buffer and
127 it knows that _filbuf ignores the _ptr & _cnt and simply fills
128 the buffer. If stdio on your system doesn't work this way, you
129 may have to make small changes in this routine.
130
131 This routine also "knows" that an EOF indication on a stream is
132 "sticky" (i.e., you will keep getting EOF until you reposition the
133 stream). If your system doesn't work this way it is broken and you
134 should complain to the vendor. As a consequence of the sticky
135 EOF, this routine will never return any kind of EOF status when
136 there is data in "name" or "buf").
137 */
138
139
140 /*
141 * static prototypes
142 */
143 static int m_Eom (int, FILE *);
144 static unsigned char *matchc(int, char *, int, char *);
145 static unsigned char *locc(int, unsigned char *, unsigned char);
146
147 #define Getc(iob) getc(iob)
148 #define eom(c,iob) (msg_style != MS_DEFAULT && \
149 (((c) == *msg_delim && m_Eom(c,iob)) ||\
150 (eom_action && (*eom_action)(c))))
151
152 static unsigned char **pat_map;
153
154 /*
155 * defined in sbr/m_msgdef.c = 0
156 * This is a disgusting hack for "inc" so it can know how many
157 * characters were stuffed in the buffer on the last call
158 * (see comments in uip/scansbr.c).
159 */
160 extern int msg_count;
161
162 /*
163 * defined in sbr/m_msgdef.c = MS_DEFAULT
164 */
165 extern int msg_style;
166
167 /*
168 * The "full" delimiter string for a packed maildrop consists
169 * of a newline followed by the actual delimiter. E.g., the
170 * full string for a Unix maildrop would be: "\n\nFrom ".
171 * "Fdelim" points to the start of the full string and is used
172 * in the BODY case of the main routine to search the buffer for
173 * a possible eom. Msg_delim points to the first character of
174 * the actual delim. string (i.e., fdelim+1). Edelim
175 * points to the 2nd character of actual delimiter string. It
176 * is used in m_Eom because the first character of the string
177 * has been read and matched before m_Eom is called.
178 */
179 extern char *msg_delim; /* defined in sbr/m_msgdef.c = "" */
180 static unsigned char *fdelim;
181 static unsigned char *delimend;
182 static int fdelimlen;
183 static unsigned char *edelim;
184 static int edelimlen;
185
186 static int (*eom_action)(int) = NULL;
187
188 #ifdef _FSTDIO
189 # define _ptr _p /* Gag */
190 # define _cnt _r /* Retch */
191 # define _filbuf __srget /* Puke */
192 # define DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
193 #endif
194
195 #ifdef SCO_5_STDIO
196 # define _ptr __ptr
197 # define _cnt __cnt
198 # define _base __base
199 # define _filbuf(fp) ((fp)->__cnt = 0, __filbuf(fp))
200 # define DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
201 #endif
202
203 #ifndef DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
204 extern int _filbuf(FILE*);
205 #endif
206
207
208 int
209 m_getfld (int state, unsigned char *name, unsigned char *buf,
210 int bufsz, FILE *iob)
211 {
212 register unsigned char *bp, *cp, *ep, *sp;
213 register int cnt, c, i, j;
214
215 if ((c = Getc(iob)) < 0) {
216 msg_count = 0;
217 *buf = 0;
218 return FILEEOF;
219 }
220 if (eom (c, iob)) {
221 if (! eom_action) {
222 /* flush null messages */
223 while ((c = Getc(iob)) >= 0 && eom (c, iob))
224 ;
225 if (c >= 0)
226 ungetc(c, iob);
227 }
228 msg_count = 0;
229 *buf = 0;
230 return FILEEOF;
231 }
232
233 switch (state) {
234 case FLDEOF:
235 case BODYEOF:
236 case FLD:
237 if (c == '\n' || c == '-') {
238 /* we hit the header/body separator */
239 while (c != '\n' && (c = Getc(iob)) >= 0)
240 ;
241
242 if (c < 0 || (c = Getc(iob)) < 0 || eom (c, iob)) {
243 if (! eom_action) {
244 /* flush null messages */
245 while ((c = Getc(iob)) >= 0 && eom (c, iob))
246 ;
247 if (c >= 0)
248 ungetc(c, iob);
249 }
250 msg_count = 0;
251 *buf = 0;
252 return FILEEOF;
253 }
254 state = BODY;
255 goto body;
256 }
257 /*
258 * get the name of this component. take characters up
259 * to a ':', a newline or NAMESZ-1 characters, whichever
260 * comes first.
261 */
262 cp = name;
263 i = NAMESZ - 1;
264 for (;;) {
265 #ifdef LINUX_STDIO
266 bp = sp = (unsigned char *) iob->_IO_read_ptr - 1;
267 j = (cnt = ((long) iob->_IO_read_end -
268 (long) iob->_IO_read_ptr) + 1) < i ? cnt : i;
269 #elif defined(__DragonFly__)
270 bp = sp = (unsigned char *) ((struct __FILE_public *)iob)->_p - 1;
271 j = (cnt = ((struct __FILE_public *)iob)->_r+1) < i ? cnt : i;
272 #else
273 bp = sp = (unsigned char *) iob->_ptr - 1;
274 j = (cnt = iob->_cnt+1) < i ? cnt : i;
275 #endif
276 while (--j >= 0 && (c = *bp++) != ':' && c != '\n')
277 *cp++ = c;
278
279 j = bp - sp;
280 if ((cnt -= j) <= 0) {
281 #ifdef LINUX_STDIO
282 iob->_IO_read_ptr = iob->_IO_read_end;
283 if (__underflow(iob) == EOF) {
284 #elif defined(__DragonFly__)
285 if (__srget(iob) == EOF) {
286 #else
287 if (_filbuf(iob) == EOF) {
288 #endif
289 *cp = *buf = 0;
290 advise (NULL, "eof encountered in field \"%s\"", name);
291 return FMTERR;
292 }
293 #ifdef LINUX_STDIO
294 iob->_IO_read_ptr++; /* NOT automatic in __underflow()! */
295 #endif
296 } else {
297 #ifdef LINUX_STDIO
298 iob->_IO_read_ptr = bp + 1;
299 #elif defined(__DragonFly__)
300 ((struct __FILE_public *)iob)->_p = bp + 1;
301 ((struct __FILE_public *)iob)->_r = cnt - 1;
302 #else
303 iob->_ptr = bp + 1;
304 iob->_cnt = cnt - 1;
305 #endif
306 }
307 if (c == ':')
308 break;
309
310 /*
311 * something went wrong. possibilities are:
312 * . hit a newline (error)
313 * . got more than namesz chars. (error)
314 * . hit the end of the buffer. (loop)
315 */
316 if (c == '\n') {
317 /* We hit the end of the line without seeing ':' to
318 * terminate the field name. This is usually (always?)
319 * spam. But, blowing up is lame, especially when
320 * scan(1)ing a folder with such messages. Pretend such
321 * lines are the first of the body (at least mutt also
322 * handles it this way). */
323
324 /* See if buf can hold this line, since we were assuming
325 * we had a buffer of NAMESZ, not bufsz. */
326 /* + 1 for the newline */
327 if (bufsz < j + 1) {
328 /* No, it can't. Oh well, guess we'll blow up. */
329 *cp = *buf = 0;
330 advise (NULL, "eol encountered in field \"%s\"", name);
331 state = FMTERR;
332 goto finish;
333 }
334 memcpy (buf, name, j - 1);
335 buf[j - 1] = '\n';
336 buf[j] = '\0';
337 /* mhparse.c:get_content wants to find the position of the
338 * body start, but it thinks there's a blank line between
339 * the header and the body (naturally!), so seek back so
340 * that things line up even though we don't have that
341 * blank line in this case. Simpler parsers (e.g. mhl)
342 * get extra newlines, but that should be harmless enough,
343 * right? This is a corrupt message anyway. */
344 fseek (iob, ftell (iob) - 2, SEEK_SET);
345 return BODY;
346 }
347 if ((i -= j) <= 0) {
348 *cp = *buf = 0;
349 advise (NULL, "field name \"%s\" exceeds %d bytes", name, NAMESZ - 2);
350 state = LENERR;
351 goto finish;
352 }
353 }
354
355 while (isspace (*--cp) && cp >= name)
356 ;
357 *++cp = 0;
358 /* fall through */
359
360 case FLDPLUS:
361 /*
362 * get (more of) the text of a field. take
363 * characters up to the end of this field (newline
364 * followed by non-blank) or bufsz-1 characters.
365 */
366 cp = buf; i = bufsz-1;
367 for (;;) {
368 #ifdef LINUX_STDIO
369 cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
370 bp = (unsigned char *) --iob->_IO_read_ptr;
371 #elif defined(__DragonFly__)
372 cnt = ((struct __FILE_public *)iob)->_r++;
373 bp = (unsigned char *) --((struct __FILE_public *)iob)->_p;
374 #else
375 cnt = iob->_cnt++;
376 bp = (unsigned char *) --iob->_ptr;
377 #endif
378 c = cnt < i ? cnt : i;
379 while ((ep = locc( c, bp, '\n' ))) {
380 /*
381 * if we hit the end of this field, return.
382 */
383 if ((j = *++ep) != ' ' && j != '\t') {
384 #ifdef LINUX_STDIO
385 j = ep - (unsigned char *) iob->_IO_read_ptr;
386 memcpy (cp, iob->_IO_read_ptr, j);
387 iob->_IO_read_ptr = ep;
388 #elif defined(__DragonFly__)
389 j = ep - (unsigned char *) ((struct __FILE_public *)iob)->_p;
390 memcpy (cp, ((struct __FILE_public *)iob)->_p, j);
391 ((struct __FILE_public *)iob)->_p = ep;
392 ((struct __FILE_public *)iob)->_r -= j;
393 #else
394 j = ep - (unsigned char *) iob->_ptr;
395 memcpy (cp, iob->_ptr, j);
396 iob->_ptr = ep;
397 iob->_cnt -= j;
398 #endif
399 cp += j;
400 state = FLD;
401 goto finish;
402 }
403 c -= ep - bp;
404 bp = ep;
405 }
406 /*
407 * end of input or dest buffer - copy what we've found.
408 */
409 #ifdef LINUX_STDIO
410 c += bp - (unsigned char *) iob->_IO_read_ptr;
411 memcpy( cp, iob->_IO_read_ptr, c);
412 #elif defined(__DragonFly__)
413 c += bp - (unsigned char *) ((struct __FILE_public *)iob)->_p;
414 memcpy( cp, ((struct __FILE_public *)iob)->_p, c);
415 #else
416 c += bp - (unsigned char *) iob->_ptr;
417 memcpy( cp, iob->_ptr, c);
418 #endif
419 i -= c;
420 cp += c;
421 if (i <= 0) {
422 /* the dest buffer is full */
423 #ifdef LINUX_STDIO
424 iob->_IO_read_ptr += c;
425 #elif defined(__DragonFly__)
426 ((struct __FILE_public *)iob)->_r -= c;
427 ((struct __FILE_public *)iob)->_p += c;
428 #else
429 iob->_cnt -= c;
430 iob->_ptr += c;
431 #endif
432 state = FLDPLUS;
433 break;
434 }
435 /*
436 * There's one character left in the input buffer.
437 * Copy it & fill the buffer. If the last char
438 * was a newline and the next char is not whitespace,
439 * this is the end of the field. Otherwise loop.
440 */
441 --i;
442 #ifdef LINUX_STDIO
443 *cp++ = j = *(iob->_IO_read_ptr + c);
444 iob->_IO_read_ptr = iob->_IO_read_end;
445 c = __underflow(iob);
446 iob->_IO_read_ptr++; /* NOT automatic! */
447 #elif defined(__DragonFly__)
448 *cp++ =j = *(((struct __FILE_public *)iob)->_p + c);
449 c = __srget(iob);
450 #else
451 *cp++ = j = *(iob->_ptr + c);
452 c = _filbuf(iob);
453 #endif
454 if (c == EOF ||
455 ((j == '\0' || j == '\n') && c != ' ' && c != '\t')) {
456 if (c != EOF) {
457 #ifdef LINUX_STDIO
458 --iob->_IO_read_ptr;
459 #elif defined(__DragonFly__)
460 --((struct __FILE_public *)iob)->_p;
461 ++((struct __FILE_public *)iob)->_r;
462 #else
463 --iob->_ptr;
464 ++iob->_cnt;
465 #endif
466 }
467 state = FLD;
468 break;
469 }
470 }
471 break;
472
473 case BODY:
474 body:
475 /*
476 * get the message body up to bufsz characters or the
477 * end of the message. Sleazy hack: if bufsz is negative
478 * we assume that we were called to copy directly into
479 * the output buffer and we don't add an eos.
480 */
481 i = (bufsz < 0) ? -bufsz : bufsz-1;
482 #ifdef LINUX_STDIO
483 bp = (unsigned char *) --iob->_IO_read_ptr;
484 cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
485 #elif defined(__DragonFly__)
486 bp = (unsigned char *) --((struct __FILE_public *)iob)->_p;
487 cnt = ++((struct __FILE_public *)iob)->_r;
488 #else
489 bp = (unsigned char *) --iob->_ptr;
490 cnt = ++iob->_cnt;
491 #endif
492 c = (cnt < i ? cnt : i);
493 if (msg_style != MS_DEFAULT && c > 1) {
494 /*
495 * packed maildrop - only take up to the (possible)
496 * start of the next message. This "matchc" should
497 * probably be a Boyer-Moore matcher for non-vaxen,
498 * particularly since we have the alignment table
499 * all built for the end-of-buffer test (next).
500 * But our vax timings indicate that the "matchc"
501 * instruction is 50% faster than a carefully coded
502 * B.M. matcher for most strings. (So much for elegant
503 * algorithms vs. brute force.) Since I (currently)
504 * run MH on a vax, we use the matchc instruction. --vj
505 */
506 if ((ep = matchc( fdelimlen, fdelim, c, bp )))
507 c = ep - bp + 1;
508 else {
509 /*
510 * There's no delim in the buffer but there may be
511 * a partial one at the end. If so, we want to leave
512 * it so the "eom" check on the next call picks it up.
513 * Use a modified Boyer-Moore matcher to make this
514 * check relatively cheap. The first "if" figures
515 * out what position in the pattern matches the last
516 * character in the buffer. The inner "while" matches
517 * the pattern against the buffer, backwards starting
518 * at that position. Note that unless the buffer
519 * ends with one of the characters in the pattern
520 * (excluding the first and last), we do only one test.
521 */
522 ep = bp + c - 1;
523 if ((sp = pat_map[*ep])) {
524 do {
525 /* This if() is true unless (a) the buffer is too
526 * small to contain this delimiter prefix, or
527 * (b) it contains exactly enough chars for the
528 * delimiter prefix.
529 * For case (a) obviously we aren't going to match.
530 * For case (b), if the buffer really contained exactly
531 * a delim prefix, then the m_eom call at entry
532 * should have found it. Thus it's not a delim
533 * and we know we won't get a match.
534 */
535 if (((sp - fdelim) + 2) <= c) {
536 cp = sp;
537 /* Unfortunately although fdelim has a preceding NUL
538 * we can't use this as a sentinel in case the buffer
539 * contains a NUL in exactly the wrong place (this
540 * would cause us to run off the front of fdelim).
541 */
542 while (*--ep == *--cp)
543 if (cp < fdelim)
544 break;
545 if (cp < fdelim) {
546 /* we matched the entire delim prefix,
547 * so only take the buffer up to there.
548 * we know ep >= bp -- check above prevents underrun
549 */
550 c = (ep - bp) + 2;
551 break;
552 }
553 }
554 /* try matching one less char of delim string */
555 ep = bp + c - 1;
556 } while (--sp > fdelim);
557 }
558 }
559 }
560 memcpy( buf, bp, c );
561 #ifdef LINUX_STDIO
562 iob->_IO_read_ptr += c;
563 #elif defined(__DragonFly__)
564 ((struct __FILE_public *)iob)->_r -= c;
565 ((struct __FILE_public *)iob)->_p += c;
566 #else
567 iob->_cnt -= c;
568 iob->_ptr += c;
569 #endif
570 if (bufsz < 0) {
571 msg_count = c;
572 return (state);
573 }
574 cp = buf + c;
575 break;
576
577 default:
578 adios (NULL, "m_getfld() called with bogus state of %d", state);
579 }
580 finish:
581 *cp = 0;
582 msg_count = cp - buf;
583 return (state);
584 }
585
586
587 #ifdef RPATHS
588 static char unixbuf[BUFSIZ] = "";
589 #endif /* RPATHS */
590
591 void
592 m_unknown(FILE *iob)
593 {
594 register int c;
595 register long pos;
596 char text[10];
597 register char *cp;
598 register char *delimstr;
599
600 /*
601 * Figure out what the message delimitter string is for this
602 * maildrop. (This used to be part of m_Eom but I didn't like
603 * the idea of an "if" statement that could only succeed on the
604 * first call to m_Eom getting executed on each call, i.e., at
605 * every newline in the message).
606 *
607 * If the first line of the maildrop is a Unix "From " line, we
608 * say the style is MBOX and eat the rest of the line. Otherwise
609 * we say the style is MMDF and look for the delimiter string
610 * specified when nmh was built (or from the mts.conf file).
611 */
612
613 msg_style = MS_UNKNOWN;
614
615 pos = ftell (iob);
616 if (fread (text, sizeof(*text), 5, iob) == 5
617 && strncmp (text, "From ", 5) == 0) {
618 msg_style = MS_MBOX;
619 delimstr = "\nFrom ";
620 #ifndef RPATHS
621 while ((c = getc (iob)) != '\n' && c >= 0)
622 ;
623 #else /* RPATHS */
624 cp = unixbuf;
625 while ((c = getc (iob)) != '\n' && cp - unixbuf < BUFSIZ - 1)
626 *cp++ = c;
627 *cp = 0;
628 #endif /* RPATHS */
629 } else {
630 /* not a Unix style maildrop */
631 fseek (iob, pos, SEEK_SET);
632 if (mmdlm2 == NULL || *mmdlm2 == 0)
633 mmdlm2 = "\001\001\001\001\n";
634 delimstr = mmdlm2;
635 msg_style = MS_MMDF;
636 }
637 c = strlen (delimstr);
638 fdelim = (unsigned char *) mh_xmalloc((size_t) (c + 3));
639 *fdelim++ = '\0';
640 *fdelim = '\n';
641 msg_delim = (char *)fdelim+1;
642 edelim = (unsigned char *)msg_delim+1;
643 fdelimlen = c + 1;
644 edelimlen = c - 1;
645 strcpy (msg_delim, delimstr);
646 delimend = (unsigned char *)msg_delim + edelimlen;
647 if (edelimlen <= 1)
648 adios (NULL, "maildrop delimiter must be at least 2 bytes");
649 /*
650 * build a Boyer-Moore end-position map for the matcher in m_getfld.
651 * N.B. - we don't match just the first char (since it's the newline
652 * separator) or the last char (since the matchc would have found it
653 * if it was a real delim).
654 */
655 pat_map = (unsigned char **) calloc (256, sizeof(unsigned char *));
656
657 for (cp = (char *) fdelim + 1; cp < (char *) delimend; cp++ )
658 pat_map[(unsigned char)*cp] = (unsigned char *) cp;
659
660 if (msg_style == MS_MMDF) {
661 /* flush extra msg hdrs */
662 while ((c = Getc(iob)) >= 0 && eom (c, iob))
663 ;
664 if (c >= 0)
665 ungetc(c, iob);
666 }
667 }
668
669
670 void
671 m_eomsbr (int (*action)(int))
672 {
673 if ((eom_action = action)) {
674 msg_style = MS_MSH;
675 *msg_delim = 0;
676 fdelimlen = 1;
677 delimend = fdelim;
678 } else {
679 msg_style = MS_MMDF;
680 msg_delim = (char *)fdelim + 1;
681 fdelimlen = strlen((char *)fdelim);
682 delimend = (unsigned char *)(msg_delim + edelimlen);
683 }
684 }
685
686
687 /*
688 * test for msg delimiter string
689 */
690
691 static int
692 m_Eom (int c, FILE *iob)
693 {
694 register long pos = 0L;
695 register int i;
696 char text[10];
697 #ifdef RPATHS
698 register char *cp;
699 #endif /* RPATHS */
700
701 pos = ftell (iob);
702 if ((i = fread (text, sizeof *text, edelimlen, iob)) != edelimlen
703 || strncmp (text, (char *)edelim, edelimlen)) {
704 if (i == 0 && msg_style == MS_MBOX)
705 /* the final newline in the (brain damaged) unix-format
706 * maildrop is part of the delimitter - delete it.
707 */
708 return 1;
709
710 #if 0
711 fseek (iob, pos, SEEK_SET);
712 #endif
713
714 fseek (iob, (long)(pos-1), SEEK_SET);
715 getc (iob); /* should be OK */
716 return 0;
717 }
718
719 if (msg_style == MS_MBOX) {
720 #ifndef RPATHS
721 while ((c = getc (iob)) != '\n')
722 if (c < 0)
723 break;
724 #else /* RPATHS */
725 cp = unixbuf;
726 while ((c = getc (iob)) != '\n' && c >= 0 && cp - unixbuf < BUFSIZ - 1)
727 *cp++ = c;
728 *cp = 0;
729 #endif /* RPATHS */
730 }
731
732 return 1;
733 }
734
735
736 #ifdef RPATHS
737 /*
738 * Return the Return-Path and Delivery-Date
739 * header information.
740 *
741 * Currently, I'm assuming that the "From " line
742 * takes one of the following forms.
743 *
744 * From sender date remote from host (for UUCP delivery)
745 * From sender@host date (for sendmail delivery)
746 */
747
748 int
749 get_returnpath (char *rp, int rplen, char *dd, int ddlen)
750 {
751 char *ap, *bp, *cp, *dp;
752
753 ap = unixbuf;
754 if (!(bp = cp = strchr(ap, ' ')))
755 return 0;
756
757 /*
758 * Check for "remote from" in envelope to see
759 * if this message uses UUCP style addressing
760 */
761 while ((cp = strchr(++cp, 'r'))) {
762 if (strncmp (cp, "remote from", 11) == 0) {
763 cp = strrchr (cp, ' ');
764 break;
765 }
766 }
767
768 /*
769 * Get the Return-Path information from
770 * the "From " envelope.
771 */
772 if (cp) {
773 /* return path for UUCP style addressing */
774 dp = strchr (++cp, '\n');
775 snprintf (rp, rplen, "%.*s!%.*s\n", (int)(dp - cp), cp, (int)(bp - ap), ap);
776 } else {
777 /* return path for standard domain addressing */
778 snprintf (rp, rplen, "%.*s\n", (int)(bp - ap), ap);
779 }
780
781 /*
782 * advance over the spaces to get to
783 * delivery date on envelope
784 */
785 while (*bp == ' ')
786 bp++;
787
788 /* Now get delivery date from envelope */
789 snprintf (dd, ddlen, "%.*s\n", 24, bp);
790
791 unixbuf[0] = 0;
792 return 1;
793 }
794 #endif /* RPATHS */
795
796
797 static unsigned char *
798 matchc(int patln, char *pat, int strln, char *str)
799 {
800 register char *es = str + strln - patln;
801 register char *sp;
802 register char *pp;
803 register char *ep = pat + patln;
804 register char pc = *pat++;
805
806 for(;;) {
807 while (pc != *str++)
808 if (str > es)
809 return 0;
810 if (str > es+1)
811 return 0;
812 sp = str; pp = pat;
813 while (pp < ep && *sp++ == *pp)
814 pp++;
815 if (pp >= ep)
816 return ((unsigned char *)--str);
817 }
818 }
819
820
821 /*
822 * Locate character "term" in the next "cnt" characters of "src".
823 * If found, return its address, otherwise return 0.
824 */
825
826 static unsigned char *
827 locc(int cnt, unsigned char *src, unsigned char term)
828 {
829 while (*src++ != term && --cnt > 0);
830
831 return (cnt > 0 ? --src : (unsigned char *)0);
832 }
833