]> diplodocus.org Git - nmh/blob - sbr/m_getfld.c
Fix a segfault that happens when using the -file option.
[nmh] / sbr / m_getfld.c
1
2 /*
3 * m_getfld.c -- read/parse a message
4 *
5 * This code is Copyright (c) 2002, by the authors of nmh. See the
6 * COPYRIGHT file in the root directory of the nmh distribution for
7 * complete copyright information.
8 */
9
10 #include <h/mh.h>
11 #include <h/mts.h>
12 #include <h/utils.h>
13
14 /* This module has a long and checkered history. First, it didn't burst
15 maildrops correctly because it considered two CTRL-A:s in a row to be
16 an inter-message delimiter. It really is four CTRL-A:s followed by a
17 newline. Unfortunately, MMDF will convert this delimiter *inside* a
18 message to a CTRL-B followed by three CTRL-A:s and a newline. This
19 caused the old version of m_getfld() to declare eom prematurely. The
20 fix was a lot slower than
21
22 c == '\001' && peekc (iob) == '\001'
23
24 but it worked, and to increase generality, MBOX style maildrops could
25 be parsed as well. Unfortunately the speed issue finally caught up with
26 us since this routine is at the very heart of MH.
27
28 To speed things up considerably, the routine Eom() was made an auxilary
29 function called by the macro eom(). Unless we are bursting a maildrop,
30 the eom() macro returns FALSE saying we aren't at the end of the
31 message.
32
33 The next thing to do is to read the mts.conf file and initialize
34 delimiter[] and delimlen accordingly...
35
36 After mhl was made a built-in in msh, m_getfld() worked just fine
37 (using m_unknown() at startup). Until one day: a message which was
38 the result of a bursting was shown. Then, since the burst boundaries
39 aren't CTRL-A:s, m_getfld() would blinding plunge on past the boundary.
40 Very sad. The solution: introduce m_eomsbr(). This hook gets called
41 after the end of each line (since testing for eom involves an fseek()).
42 This worked fine, until one day: a message with no body portion arrived.
43 Then the
44
45 while (eom (c = Getc (iob), iob))
46 continue;
47
48 loop caused m_getfld() to return FMTERR. So, that logic was changed to
49 check for (*eom_action) and act accordingly.
50
51 This worked fine, until one day: someone didn't use four CTRL:A's as
52 their delimiters. So, the bullet got bit and we read mts.h and
53 continue to struggle on. It's not that bad though, since the only time
54 the code gets executed is when inc (or msh) calls it, and both of these
55 have already called mts_init().
56
57 ------------------------
58 (Written by Van Jacobson for the mh6 m_getfld, January, 1986):
59
60 This routine was accounting for 60% of the cpu time used by most mh
61 programs. I spent a bit of time tuning and it now accounts for <10%
62 of the time used. Like any heavily tuned routine, it's a bit
63 complex and you want to be sure you understand everything that it's
64 doing before you start hacking on it. Let me try to emphasize
65 that: every line in this atrocity depends on every other line,
66 sometimes in subtle ways. You should understand it all, in detail,
67 before trying to change any part. If you do change it, test the
68 result thoroughly (I use a hand-constructed test file that exercises
69 all the ways a header name, header body, header continuation,
70 header-body separator, body line and body eom can align themselves
71 with respect to a buffer boundary). "Minor" bugs in this routine
72 result in garbaged or lost mail.
73
74 If you hack on this and slow it down, I, my children and my
75 children's children will curse you.
76
77 This routine gets used on three different types of files: normal,
78 single msg files, "packed" unix or mmdf mailboxs (when used by inc)
79 and packed, directoried bulletin board files (when used by msh).
80 The biggest impact of different file types is in "eom" testing. The
81 code has been carefully organized to test for eom at appropriate
82 times and at no other times (since the check is quite expensive).
83 I have tried to arrange things so that the eom check need only be
84 done on entry to this routine. Since an eom can only occur after a
85 newline, this is easy to manage for header fields. For the msg
86 body, we try to efficiently search the input buffer to see if
87 contains the eom delimiter. If it does, we take up to the
88 delimiter, otherwise we take everything in the buffer. (The change
89 to the body eom/copy processing produced the most noticeable
90 performance difference, particularly for "inc" and "show".)
91
92 There are three qualitatively different things this routine busts
93 out of a message: field names, field text and msg bodies. Field
94 names are typically short (~8 char) and the loop that extracts them
95 might terminate on a colon, newline or max width. I considered
96 using a Vax "scanc" to locate the end of the field followed by a
97 "bcopy" but the routine call overhead on a Vax is too large for this
98 to work on short names. If Berkeley ever makes "inline" part of the
99 C optimiser (so things like "scanc" turn into inline instructions) a
100 change here would be worthwhile.
101
102 Field text is typically 60 - 100 characters so there's (barely)
103 a win in doing a routine call to something that does a "locc"
104 followed by a "bmove". About 30% of the fields have continuations
105 (usually the 822 "received:" lines) and each continuation generates
106 another routine call. "Inline" would be a big win here, as well.
107
108 Messages, as of this writing, seem to come in two flavors: small
109 (~1K) and long (>2K). Most messages have 400 - 600 bytes of headers
110 so message bodies average at least a few hundred characters.
111 Assuming your system uses reasonably sized stdio buffers (1K or
112 more), this routine should be able to remove the body in large
113 (>500 byte) chunks. The makes the cost of a call to "bcopy"
114 small but there is a premium on checking for the eom in packed
115 maildrops. The eom pattern is always a simple string so we can
116 construct an efficient pattern matcher for it (e.g., a Vax "matchc"
117 instruction). Some thought went into recognizing the start of
118 an eom that has been split across two buffers.
119
120 This routine wants to deal with large chunks of data so, rather
121 than "getc" into a local buffer, it uses stdio's buffer. If
122 you try to use it on a non-buffered file, you'll get what you
123 deserve. This routine "knows" that struct FILEs have a _ptr
124 and a _cnt to describe the current state of the buffer and
125 it knows that _filbuf ignores the _ptr & _cnt and simply fills
126 the buffer. If stdio on your system doesn't work this way, you
127 may have to make small changes in this routine.
128
129 This routine also "knows" that an EOF indication on a stream is
130 "sticky" (i.e., you will keep getting EOF until you reposition the
131 stream). If your system doesn't work this way it is broken and you
132 should complain to the vendor. As a consequence of the sticky
133 EOF, this routine will never return any kind of EOF status when
134 there is data in "name" or "buf").
135 */
136
137
138 /*
139 * static prototypes
140 */
141 static int m_Eom (int, FILE *);
142 static unsigned char *matchc(int, char *, int, char *);
143 static unsigned char *locc(int, unsigned char *, unsigned char);
144
145 #define Getc(iob) getc(iob)
146 #define eom(c,iob) (msg_style != MS_DEFAULT && \
147 (((c) == *msg_delim && m_Eom(c,iob)) ||\
148 (eom_action && (*eom_action)(c))))
149
150 static unsigned char **pat_map;
151
152 /*
153 * defined in sbr/m_msgdef.c = 0
154 * This is a disgusting hack for "inc" so it can know how many
155 * characters were stuffed in the buffer on the last call
156 * (see comments in uip/scansbr.c).
157 */
158 extern int msg_count;
159
160 /*
161 * defined in sbr/m_msgdef.c = MS_DEFAULT
162 */
163 extern int msg_style;
164
165 /*
166 * The "full" delimiter string for a packed maildrop consists
167 * of a newline followed by the actual delimiter. E.g., the
168 * full string for a Unix maildrop would be: "\n\nFrom ".
169 * "Fdelim" points to the start of the full string and is used
170 * in the BODY case of the main routine to search the buffer for
171 * a possible eom. Msg_delim points to the first character of
172 * the actual delim. string (i.e., fdelim+1). Edelim
173 * points to the 2nd character of actual delimiter string. It
174 * is used in m_Eom because the first character of the string
175 * has been read and matched before m_Eom is called.
176 */
177 extern char *msg_delim; /* defined in sbr/m_msgdef.c = "" */
178 static unsigned char *fdelim;
179 static unsigned char *delimend;
180 static int fdelimlen;
181 static unsigned char *edelim;
182 static int edelimlen;
183
184 static int (*eom_action)(int) = NULL;
185
186 #ifdef _FSTDIO
187 # define _ptr _p /* Gag */
188 # define _cnt _r /* Retch */
189 # define _filbuf __srget /* Puke */
190 # define DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
191 #endif
192
193 #ifndef DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
194 extern int _filbuf(FILE*);
195 #endif
196
197
198 int
199 m_getfld (int state, unsigned char *name, unsigned char *buf,
200 int bufsz, FILE *iob)
201 {
202 register unsigned char *bp, *cp, *ep, *sp;
203 register int cnt, c, i, j;
204
205 if ((c = Getc(iob)) < 0) {
206 msg_count = 0;
207 *buf = 0;
208 return FILEEOF;
209 }
210 if (eom (c, iob)) {
211 if (! eom_action) {
212 /* flush null messages */
213 while ((c = Getc(iob)) >= 0 && eom (c, iob))
214 ;
215 if (c >= 0)
216 ungetc(c, iob);
217 }
218 msg_count = 0;
219 *buf = 0;
220 return FILEEOF;
221 }
222
223 switch (state) {
224 case FLDEOF:
225 case BODYEOF:
226 case FLD:
227 if (c == '\n' || c == '-') {
228 /* we hit the header/body separator */
229 while (c != '\n' && (c = Getc(iob)) >= 0)
230 ;
231
232 if (c < 0 || (c = Getc(iob)) < 0 || eom (c, iob)) {
233 if (! eom_action) {
234 /* flush null messages */
235 while ((c = Getc(iob)) >= 0 && eom (c, iob))
236 ;
237 if (c >= 0)
238 ungetc(c, iob);
239 }
240 msg_count = 0;
241 *buf = 0;
242 return FILEEOF;
243 }
244 state = BODY;
245 goto body;
246 }
247 /*
248 * get the name of this component. take characters up
249 * to a ':', a newline or NAMESZ-1 characters, whichever
250 * comes first.
251 */
252 cp = name;
253 i = NAMESZ - 1;
254 for (;;) {
255 #ifdef LINUX_STDIO
256 bp = sp = (unsigned char *) iob->_IO_read_ptr - 1;
257 j = (cnt = ((long) iob->_IO_read_end -
258 (long) iob->_IO_read_ptr) + 1) < i ? cnt : i;
259 #elif defined(__DragonFly__)
260 bp = sp = (unsigned char *) ((struct __FILE_public *)iob)->_p - 1;
261 j = (cnt = ((struct __FILE_public *)iob)->_r+1) < i ? cnt : i;
262 #else
263 bp = sp = (unsigned char *) iob->_ptr - 1;
264 j = (cnt = iob->_cnt+1) < i ? cnt : i;
265 #endif
266 while (--j >= 0 && (c = *bp++) != ':' && c != '\n')
267 *cp++ = c;
268
269 j = bp - sp;
270 if ((cnt -= j) <= 0) {
271 #ifdef LINUX_STDIO
272 iob->_IO_read_ptr = iob->_IO_read_end;
273 if (__underflow(iob) == EOF) {
274 #elif defined(__DragonFly__)
275 if (__srget(iob) == EOF) {
276 #else
277 if (_filbuf(iob) == EOF) {
278 #endif
279 *cp = *buf = 0;
280 advise (NULL, "eof encountered in field \"%s\"", name);
281 return FMTERR;
282 }
283 #ifdef LINUX_STDIO
284 iob->_IO_read_ptr++; /* NOT automatic in __underflow()! */
285 #endif
286 } else {
287 #ifdef LINUX_STDIO
288 iob->_IO_read_ptr = bp + 1;
289 #elif defined(__DragonFly__)
290 ((struct __FILE_public *)iob)->_p = bp + 1;
291 ((struct __FILE_public *)iob)->_r = cnt - 1;
292 #else
293 iob->_ptr = bp + 1;
294 iob->_cnt = cnt - 1;
295 #endif
296 }
297 if (c == ':')
298 break;
299
300 /*
301 * something went wrong. possibilities are:
302 * . hit a newline (error)
303 * . got more than namesz chars. (error)
304 * . hit the end of the buffer. (loop)
305 */
306 if (c == '\n') {
307 /* We hit the end of the line without seeing ':' to
308 * terminate the field name. This is usually (always?)
309 * spam. But, blowing up is lame, especially when
310 * scan(1)ing a folder with such messages. Pretend such
311 * lines are the first of the body (at least mutt also
312 * handles it this way). */
313
314 /* See if buf can hold this line, since we were assuming
315 * we had a buffer of NAMESZ, not bufsz. */
316 /* + 1 for the newline */
317 if (bufsz < j + 1) {
318 /* No, it can't. Oh well, guess we'll blow up. */
319 *cp = *buf = 0;
320 advise (NULL, "eol encountered in field \"%s\"", name);
321 state = FMTERR;
322 goto finish;
323 }
324 memcpy (buf, name, j - 1);
325 buf[j - 1] = '\n';
326 buf[j] = '\0';
327 /* mhparse.c:get_content wants to find the position of the
328 * body start, but it thinks there's a blank line between
329 * the header and the body (naturally!), so seek back so
330 * that things line up even though we don't have that
331 * blank line in this case. Simpler parsers (e.g. mhl)
332 * get extra newlines, but that should be harmless enough,
333 * right? This is a corrupt message anyway. */
334 fseek (iob, ftell (iob) - 2, SEEK_SET);
335 return BODY;
336 }
337 if ((i -= j) <= 0) {
338 *cp = *buf = 0;
339 advise (NULL, "field name \"%s\" exceeds %d bytes", name, NAMESZ - 2);
340 state = LENERR;
341 goto finish;
342 }
343 }
344
345 while (isspace (*--cp) && cp >= name)
346 ;
347 *++cp = 0;
348 /* fall through */
349
350 case FLDPLUS:
351 /*
352 * get (more of) the text of a field. take
353 * characters up to the end of this field (newline
354 * followed by non-blank) or bufsz-1 characters.
355 */
356 cp = buf; i = bufsz-1;
357 for (;;) {
358 #ifdef LINUX_STDIO
359 cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
360 bp = (unsigned char *) --iob->_IO_read_ptr;
361 #elif defined(__DragonFly__)
362 cnt = ((struct __FILE_public *)iob)->_r++;
363 bp = (unsigned char *) --((struct __FILE_public *)iob)->_p;
364 #else
365 cnt = iob->_cnt++;
366 bp = (unsigned char *) --iob->_ptr;
367 #endif
368 c = cnt < i ? cnt : i;
369 while ((ep = locc( c, bp, '\n' ))) {
370 /*
371 * if we hit the end of this field, return.
372 */
373 if ((j = *++ep) != ' ' && j != '\t') {
374 #ifdef LINUX_STDIO
375 j = ep - (unsigned char *) iob->_IO_read_ptr;
376 memcpy (cp, iob->_IO_read_ptr, j);
377 iob->_IO_read_ptr = ep;
378 #elif defined(__DragonFly__)
379 j = ep - (unsigned char *) ((struct __FILE_public *)iob)->_p;
380 memcpy (cp, ((struct __FILE_public *)iob)->_p, j);
381 ((struct __FILE_public *)iob)->_p = ep;
382 ((struct __FILE_public *)iob)->_r -= j;
383 #else
384 j = ep - (unsigned char *) iob->_ptr;
385 memcpy (cp, iob->_ptr, j);
386 iob->_ptr = ep;
387 iob->_cnt -= j;
388 #endif
389 cp += j;
390 state = FLD;
391 goto finish;
392 }
393 c -= ep - bp;
394 bp = ep;
395 }
396 /*
397 * end of input or dest buffer - copy what we've found.
398 */
399 #ifdef LINUX_STDIO
400 c += bp - (unsigned char *) iob->_IO_read_ptr;
401 memcpy( cp, iob->_IO_read_ptr, c);
402 #elif defined(__DragonFly__)
403 c += bp - (unsigned char *) ((struct __FILE_public *)iob)->_p;
404 memcpy( cp, ((struct __FILE_public *)iob)->_p, c);
405 #else
406 c += bp - (unsigned char *) iob->_ptr;
407 memcpy( cp, iob->_ptr, c);
408 #endif
409 i -= c;
410 cp += c;
411 if (i <= 0) {
412 /* the dest buffer is full */
413 #ifdef LINUX_STDIO
414 iob->_IO_read_ptr += c;
415 #elif defined(__DragonFly__)
416 ((struct __FILE_public *)iob)->_r -= c;
417 ((struct __FILE_public *)iob)->_p += c;
418 #else
419 iob->_cnt -= c;
420 iob->_ptr += c;
421 #endif
422 state = FLDPLUS;
423 break;
424 }
425 /*
426 * There's one character left in the input buffer.
427 * Copy it & fill the buffer. If the last char
428 * was a newline and the next char is not whitespace,
429 * this is the end of the field. Otherwise loop.
430 */
431 --i;
432 #ifdef LINUX_STDIO
433 *cp++ = j = *(iob->_IO_read_ptr + c);
434 iob->_IO_read_ptr = iob->_IO_read_end;
435 c = __underflow(iob);
436 iob->_IO_read_ptr++; /* NOT automatic! */
437 #elif defined(__DragonFly__)
438 *cp++ =j = *(((struct __FILE_public *)iob)->_p + c);
439 c = __srget(iob);
440 #else
441 *cp++ = j = *(iob->_ptr + c);
442 c = _filbuf(iob);
443 #endif
444 if (c == EOF ||
445 ((j == '\0' || j == '\n') && c != ' ' && c != '\t')) {
446 if (c != EOF) {
447 #ifdef LINUX_STDIO
448 --iob->_IO_read_ptr;
449 #elif defined(__DragonFly__)
450 --((struct __FILE_public *)iob)->_p;
451 ++((struct __FILE_public *)iob)->_r;
452 #else
453 --iob->_ptr;
454 ++iob->_cnt;
455 #endif
456 }
457 state = FLD;
458 break;
459 }
460 }
461 break;
462
463 case BODY:
464 body:
465 /*
466 * get the message body up to bufsz characters or the
467 * end of the message. Sleazy hack: if bufsz is negative
468 * we assume that we were called to copy directly into
469 * the output buffer and we don't add an eos.
470 */
471 i = (bufsz < 0) ? -bufsz : bufsz-1;
472 #ifdef LINUX_STDIO
473 bp = (unsigned char *) --iob->_IO_read_ptr;
474 cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
475 #elif defined(__DragonFly__)
476 bp = (unsigned char *) --((struct __FILE_public *)iob)->_p;
477 cnt = ++((struct __FILE_public *)iob)->_r;
478 #else
479 bp = (unsigned char *) --iob->_ptr;
480 cnt = ++iob->_cnt;
481 #endif
482 c = (cnt < i ? cnt : i);
483 if (msg_style != MS_DEFAULT && c > 1) {
484 /*
485 * packed maildrop - only take up to the (possible)
486 * start of the next message. This "matchc" should
487 * probably be a Boyer-Moore matcher for non-vaxen,
488 * particularly since we have the alignment table
489 * all built for the end-of-buffer test (next).
490 * But our vax timings indicate that the "matchc"
491 * instruction is 50% faster than a carefully coded
492 * B.M. matcher for most strings. (So much for elegant
493 * algorithms vs. brute force.) Since I (currently)
494 * run MH on a vax, we use the matchc instruction. --vj
495 */
496 if ((ep = matchc( fdelimlen, fdelim, c, bp )))
497 c = ep - bp + 1;
498 else {
499 /*
500 * There's no delim in the buffer but there may be
501 * a partial one at the end. If so, we want to leave
502 * it so the "eom" check on the next call picks it up.
503 * Use a modified Boyer-Moore matcher to make this
504 * check relatively cheap. The first "if" figures
505 * out what position in the pattern matches the last
506 * character in the buffer. The inner "while" matches
507 * the pattern against the buffer, backwards starting
508 * at that position. Note that unless the buffer
509 * ends with one of the characters in the pattern
510 * (excluding the first and last), we do only one test.
511 */
512 ep = bp + c - 1;
513 if ((sp = pat_map[*ep])) {
514 do {
515 /* This if() is true unless (a) the buffer is too
516 * small to contain this delimiter prefix, or
517 * (b) it contains exactly enough chars for the
518 * delimiter prefix.
519 * For case (a) obviously we aren't going to match.
520 * For case (b), if the buffer really contained exactly
521 * a delim prefix, then the m_eom call at entry
522 * should have found it. Thus it's not a delim
523 * and we know we won't get a match.
524 */
525 if (((sp - fdelim) + 2) <= c) {
526 cp = sp;
527 /* Unfortunately although fdelim has a preceding NUL
528 * we can't use this as a sentinel in case the buffer
529 * contains a NUL in exactly the wrong place (this
530 * would cause us to run off the front of fdelim).
531 */
532 while (*--ep == *--cp)
533 if (cp < fdelim)
534 break;
535 if (cp < fdelim) {
536 /* we matched the entire delim prefix,
537 * so only take the buffer up to there.
538 * we know ep >= bp -- check above prevents underrun
539 */
540 c = (ep - bp) + 2;
541 break;
542 }
543 }
544 /* try matching one less char of delim string */
545 ep = bp + c - 1;
546 } while (--sp > fdelim);
547 }
548 }
549 }
550 memcpy( buf, bp, c );
551 #ifdef LINUX_STDIO
552 iob->_IO_read_ptr += c;
553 #elif defined(__DragonFly__)
554 ((struct __FILE_public *)iob)->_r -= c;
555 ((struct __FILE_public *)iob)->_p += c;
556 #else
557 iob->_cnt -= c;
558 iob->_ptr += c;
559 #endif
560 if (bufsz < 0) {
561 msg_count = c;
562 return (state);
563 }
564 cp = buf + c;
565 break;
566
567 default:
568 adios (NULL, "m_getfld() called with bogus state of %d", state);
569 }
570 finish:
571 *cp = 0;
572 msg_count = cp - buf;
573 return (state);
574 }
575
576
577 void
578 m_unknown(FILE *iob)
579 {
580 register int c;
581 register long pos;
582 char text[10];
583 register char *cp;
584 register char *delimstr;
585
586 /*
587 * Figure out what the message delimitter string is for this
588 * maildrop. (This used to be part of m_Eom but I didn't like
589 * the idea of an "if" statement that could only succeed on the
590 * first call to m_Eom getting executed on each call, i.e., at
591 * every newline in the message).
592 *
593 * If the first line of the maildrop is a Unix "From " line, we
594 * say the style is MBOX and eat the rest of the line. Otherwise
595 * we say the style is MMDF and look for the delimiter string
596 * specified when nmh was built (or from the mts.conf file).
597 */
598
599 msg_style = MS_UNKNOWN;
600
601 pos = ftell (iob);
602 if (fread (text, sizeof(*text), 5, iob) == 5
603 && strncmp (text, "From ", 5) == 0) {
604 msg_style = MS_MBOX;
605 delimstr = "\nFrom ";
606 while ((c = getc (iob)) != '\n' && c >= 0)
607 ;
608 } else {
609 /* not a Unix style maildrop */
610 fseek (iob, pos, SEEK_SET);
611 if (mmdlm2 == NULL || *mmdlm2 == 0)
612 mmdlm2 = "\001\001\001\001\n";
613 delimstr = mmdlm2;
614 msg_style = MS_MMDF;
615 }
616 c = strlen (delimstr);
617 fdelim = (unsigned char *) mh_xmalloc((size_t) (c + 3));
618 *fdelim++ = '\0';
619 *fdelim = '\n';
620 msg_delim = (char *)fdelim+1;
621 edelim = (unsigned char *)msg_delim+1;
622 fdelimlen = c + 1;
623 edelimlen = c - 1;
624 strcpy (msg_delim, delimstr);
625 delimend = (unsigned char *)msg_delim + edelimlen;
626 if (edelimlen <= 1)
627 adios (NULL, "maildrop delimiter must be at least 2 bytes");
628 /*
629 * build a Boyer-Moore end-position map for the matcher in m_getfld.
630 * N.B. - we don't match just the first char (since it's the newline
631 * separator) or the last char (since the matchc would have found it
632 * if it was a real delim).
633 */
634 pat_map = (unsigned char **) calloc (256, sizeof(unsigned char *));
635
636 for (cp = (char *) fdelim + 1; cp < (char *) delimend; cp++ )
637 pat_map[(unsigned char)*cp] = (unsigned char *) cp;
638
639 if (msg_style == MS_MMDF) {
640 /* flush extra msg hdrs */
641 while ((c = Getc(iob)) >= 0 && eom (c, iob))
642 ;
643 if (c >= 0)
644 ungetc(c, iob);
645 }
646 }
647
648
649 void
650 m_eomsbr (int (*action)(int))
651 {
652 if ((eom_action = action)) {
653 msg_style = MS_MSH;
654 *msg_delim = 0;
655 fdelimlen = 1;
656 delimend = fdelim;
657 } else {
658 msg_style = MS_MMDF;
659 msg_delim = (char *)fdelim + 1;
660 fdelimlen = strlen((char *)fdelim);
661 delimend = (unsigned char *)(msg_delim + edelimlen);
662 }
663 }
664
665
666 /*
667 * test for msg delimiter string
668 */
669
670 static int
671 m_Eom (int c, FILE *iob)
672 {
673 register long pos = 0L;
674 register int i;
675 char text[10];
676
677 pos = ftell (iob);
678 if ((i = fread (text, sizeof *text, edelimlen, iob)) != edelimlen
679 || strncmp (text, (char *)edelim, edelimlen)) {
680 if (i == 0 && msg_style == MS_MBOX)
681 /* the final newline in the (brain damaged) unix-format
682 * maildrop is part of the delimitter - delete it.
683 */
684 return 1;
685
686 #if 0
687 fseek (iob, pos, SEEK_SET);
688 #endif
689
690 fseek (iob, (long)(pos-1), SEEK_SET);
691 getc (iob); /* should be OK */
692 return 0;
693 }
694
695 if (msg_style == MS_MBOX) {
696 while ((c = getc (iob)) != '\n')
697 if (c < 0)
698 break;
699 }
700
701 return 1;
702 }
703
704
705 static unsigned char *
706 matchc(int patln, char *pat, int strln, char *str)
707 {
708 register char *es = str + strln - patln;
709 register char *sp;
710 register char *pp;
711 register char *ep = pat + patln;
712 register char pc = *pat++;
713
714 for(;;) {
715 while (pc != *str++)
716 if (str > es)
717 return 0;
718 if (str > es+1)
719 return 0;
720 sp = str; pp = pat;
721 while (pp < ep && *sp++ == *pp)
722 pp++;
723 if (pp >= ep)
724 return ((unsigned char *)--str);
725 }
726 }
727
728
729 /*
730 * Locate character "term" in the next "cnt" characters of "src".
731 * If found, return its address, otherwise return 0.
732 */
733
734 static unsigned char *
735 locc(int cnt, unsigned char *src, unsigned char term)
736 {
737 while (*src++ != term && --cnt > 0);
738
739 return (cnt > 0 ? --src : (unsigned char *)0);
740 }
741