-
-/*
- * m_getfld.c -- read/parse a message
+/* m_getfld.c -- read/parse a message
*
* This code is Copyright (c) 2002, by the authors of nmh. See the
* COPYRIGHT file in the root directory of the nmh distribution for
names are typically short (~8 char) and the loop that extracts them
might terminate on a colon, newline or max width. I considered
using a Vax "scanc" to locate the end of the field followed by a
- "bcopy" but the routine call overhead on a Vax is too large for this
+ "memmove" but the routine call overhead on a Vax is too large for this
to work on short names. If Berkeley ever makes "inline" part of the
C optimiser (so things like "scanc" turn into inline instructions) a
change here would be worthwhile.
so message bodies average at least a few hundred characters.
Assuming your system uses reasonably sized stdio buffers (1K or
more), this routine should be able to remove the body in large
- (>500 byte) chunks. The makes the cost of a call to "bcopy"
+ (>500 byte) chunks. The makes the cost of a call to "memmove"
small but there is a premium on checking for the eom in packed
maildrops. The eom pattern is always a simple string so we can
construct an efficient pattern matcher for it (e.g., a Vax "matchc"
/*
* static prototypes
*/
-struct m_getfld_state;
static int m_Eom (m_getfld_state_t);
-static char *matchc(int, char *, int, char *);
#define eom(c,s) (s->msg_style != MS_DEFAULT && \
((c) == *s->msg_delim && m_Eom(s)))
m_getfld_state_init (m_getfld_state_t *gstate, FILE *iob) {
m_getfld_state_t s;
- s = *gstate = (m_getfld_state_t) mh_xmalloc(sizeof (struct m_getfld_state));
+ NEW(s);
+ *gstate = s;
s->readpos = s->end = s->msg_buf;
s->bytes_read = s->total_bytes_read = 0;
s->last_caller_pos = s->last_internal_pos = 0;
ssize_t retain = s->edelimlen;
size_t num_read;
- if (retain < s->end - s->readpos) retain = s->end - s->readpos;
+ if (retain < s->end - s->readpos)
+ retain = s->end - s->readpos;
assert (retain <= s->readpos - s->msg_buf);
/* Move what we want to retain at end of the buffer to the beginning. */
Peek (m_getfld_state_t s) {
if (s->end - s->readpos < 1 && read_more (s) == 0) {
return EOF;
- } else {
- return s->readpos < s->end ? (unsigned char) *s->readpos : EOF;
}
+ return s->readpos < s->end ? (unsigned char) *s->readpos : EOF;
}
static int
Ungetc (int c, m_getfld_state_t s) {
if (s->readpos == s->msg_buf) {
return EOF;
- } else {
- --s->bytes_read;
- return *--s->readpos = (unsigned char) c;
}
+ --s->bytes_read;
+ return *--s->readpos = (unsigned char) c;
}
FILE *iob)
{
m_getfld_state_t s;
- register char *cp;
- register int max, n, c;
+ char *cp;
+ int max, n, c;
enter_getfld (gstate, iob);
s = *gstate;
int next_char;
if (c == EOF || (next_char = Peek (s)) == EOF) {
*bufsz = *cp = *buf = 0;
- advise (NULL, "eof encountered in field \"%s\"", name);
+ inform("eof encountered in field \"%s\"", name);
leave_getfld (s);
return s->state = FMTERR;
}
if (*bufsz < n + 1) {
/* No, it can't. Oh well, guess we'll blow up. */
*bufsz = *cp = *buf = 0;
- advise (NULL, "eol encountered in field \"%s\"", name);
+ inform("eol encountered in field \"%s\"", name);
s->state = FMTERR;
break;
}
memcpy (buf, name, n - 1);
buf[n - 1] = '\n';
buf[n] = '\0';
+ /* Indicate this wasn't a header field using a character
+ that can't appear in a header field. */
+ name[0] = ':';
/* The last character read was '\n'. s->bytes_read
(and n) include that, but it was not put into the
name array in the for loop above. So subtract 1. */
*bufsz = --s->bytes_read; /* == n - 1 */
leave_getfld (s);
return s->state = BODY;
- } else if (max <= n) {
+ }
+ if (max <= n) {
/* By design, the loop above discards the last character
it had read. It's in c, use it. */
*cp++ = c;
*bufsz = *cp = *buf = 0;
- advise (NULL, "field name \"%s\" exceeds %d bytes", name,
+ inform("field name \"%s\" exceeds %d bytes", name,
NAMESZ - 2);
s->state = LENERR;
break;
while (isspace ((unsigned char) *--cp) && cp >= name) continue;
*++cp = 0;
/* readpos points to the first character of the field body. */
- /* fall through */
+ /* FALLTHRU */
case FLDPLUS: {
/*
n = 0;
for (finished = 0; ! finished; ) {
while (c != '\n' && c != EOF && n++ < max) {
- if ((c = Getc (s)) != EOF) { *cp++ = c; }
+ if ((c = Getc (s)) != EOF)
+ *cp++ = c;
}
- if (c != EOF) c = Peek (s);
+ if (c != EOF)
+ c = Peek (s);
if (max < n) {
/* The dest buffer is full. Need to back the read
pointer up by one because when m_getfld() is
*/
char *bp;
+ name[0] = '\0';
max = *bufsz-1;
/* Back up and store the current position. */
bp = --s->readpos;
*/
char *ep;
- if ((ep = matchc( s->fdelimlen, s->fdelim, c, bp )))
+ if ((ep = memmem(bp, c, s->fdelim, s->fdelimlen)))
c = ep - bp + 1;
else {
/*
m_unknown(m_getfld_state_t *gstate, FILE *iob)
{
m_getfld_state_t s;
- register int c;
+ int c;
char text[MAX_DELIMITER_SIZE];
char from[] = "From ";
- register char *cp;
- register char *delimstr;
+ char *cp;
+ char *delimstr;
unsigned int i;
enter_getfld (gstate, iob);
s = *gstate;
/*
- * Figure out what the message delimitter string is for this
+ * Figure out what the message delimiter string is for this
* maildrop. (This used to be part of m_Eom but I didn't like
* the idea of an "if" statement that could only succeed on the
* first call to m_Eom getting executed on each call, i.e., at
if ((c = Getc (s)) == EOF) {
*cp = '\0';
break;
- } else {
- *cp = c;
}
+ *cp = c;
}
if (i == sizeof from-1 && strncmp (text, "From ", sizeof from-1) == 0) {
delimstr = mmdlm2;
s->msg_style = MS_MMDF;
}
+
c = strlen (delimstr);
- s->fdelim = mh_xmalloc (c + 3);
+ s->fdelim = mh_xmalloc (c + 3); /* \0, \n, delimstr, \0 */
*s->fdelim++ = '\0';
*s->fdelim = '\n';
- s->msg_delim = s->fdelim+1;
- s->edelim = s->msg_delim+1;
s->fdelimlen = c + 1;
- s->edelimlen = c - 1; /* == strlen (delimstr) */
+ s->msg_delim = s->fdelim+1;
strcpy (s->msg_delim, delimstr);
+ s->edelim = s->msg_delim+1;
+ s->edelimlen = c - 1;
s->delimend = s->msg_delim + s->edelimlen;
if (s->edelimlen <= 1)
adios (NULL, "maildrop delimiter must be at least 2 bytes");
+
+ /* Now malloc'd memory at s->fdelim-1 referenced several times:
+ *
+ * delimstr "\nFrom " "\001\001\001\001\n"
+ * c 6 5
+ * s->fdelim \0"\n\nFrom " \0"\n\001\001\001\001\n"
+ * s->fdelimlen 6 5
+ * s->msg_delim "\nFrom " "\001\001\001\001\n"
+ * s->edelim "From " "\001\001\001\n"
+ * s->edelimlen 5 4
+ * s->delimend " " "\n"
+ */
+
/*
* build a Boyer-Moore end-position map for the matcher in m_getfld.
* N.B. - we don't match just the first char (since it's the newline
static int
m_Eom (m_getfld_state_t s)
{
- register int i;
+ int i;
char text[MAX_DELIMITER_SIZE];
char *cp;
if ((c2 = Getc (s)) == EOF) {
*cp = '\0';
break;
- } else {
- *cp = c2;
}
+ *cp = c2;
}
if (i != s->edelimlen ||
strncmp (text, (char *)s->edelim, s->edelimlen)) {
if (i == 0 && s->msg_style == MS_MBOX)
/* the final newline in the (brain damaged) unix-format
- * maildrop is part of the delimitter - delete it.
+ * maildrop is part of the delimiter - delete it.
*/
return 1;
return 1;
}
-
-
-static char *
-matchc(int patln, char *pat, int strln, char *str)
-{
- register char *es = str + strln - patln;
- register char *sp;
- register char *pp;
- register char *ep = pat + patln;
- register char pc = *pat++;
-
- for(;;) {
- while (pc != *str++)
- if (str > es)
- return 0;
- if (str > es+1)
- return 0;
- sp = str; pp = pat;
- while (pp < ep && *sp++ == *pp)
- pp++;
- if (pp >= ep)
- return --str;
- }
-}