#include <h/tws.h>
#include <h/utils.h>
-#ifdef _FSTDIO
-# define _ptr _p /* Gag */
-# define _cnt _w /* Wretch */
-#endif
-
-#define MAXSCANL 256 /* longest possible scan line */
-
/*
* Buffer size for content part of header fields. We want this
* to be large enough so that we don't do a lot of extra FLDPLUS
#define SBUFSIZ 512
static struct format *fmt;
-#ifdef JLR
-static struct format *fmt_top;
-#endif /* JLR */
-
static struct comp *datecomp; /* pntr to "date" comp */
static struct comp *bodycomp; /* pntr to "body" pseudo-comp *
* (if referenced) */
static int dat[5]; /* aux. data for format routine */
-char *scanl = 0; /* text of most recent scanline */
+static m_getfld_state_t gstate; /* for accessor functions below */
#define DIEWRERR() adios (scnmsg, "write error on")
*/
static int mh_fputs(char *, FILE *);
-#ifdef MULTIBYTE_SUPPORT
-#define SCAN_CHARWIDTH MB_CUR_MAX
-#else
-#define SCAN_CHARWIDTH 1
-#endif
-
int
scan (FILE *inb, int innum, int outnum, char *nfs, int width, int curflg,
- int unseen, char *folder, long size, int noisy)
+ int unseen, char *folder, long size, int noisy, charstring_t *scanl)
{
int i, compnum, encrypted, state;
- unsigned char *cp, *tmpbuf;
- char **nxtbuf;
+ char *cp, *tmpbuf, *startbody, **nxtbuf;
char *saved_c_text = NULL;
struct comp *cptr;
struct comp **savecomp;
char *scnmsg = NULL;
FILE *scnout = NULL;
char name[NAMESZ];
+ int bufsz;
static int rlwidth, slwidth;
- /* first-time only initialization */
- if (!scanl) {
- if (width == 0) {
+ /* first-time only initialization, which will always happen the
+ way the code is now, with callers initializing *scanl to NULL.
+ scanl used to be a global. */
+ if (! *scanl) {
+ if (width == -1) {
+ /* Default: width of the terminal, but at least WIDTH/2. */
if ((width = sc_width ()) < WIDTH/2)
width = WIDTH/2;
- else if (width > MAXSCANL)
- width = MAXSCANL;
+ } else if (width == 0) {
+ /* Unlimited width. */
+ width = INT_MAX;
}
dat[3] = slwidth = width;
- scanl = (char *) mh_xmalloc((size_t) SCAN_CHARWIDTH * (slwidth + 2) );
+ *scanl = charstring_create (width < NMH_BUFSIZ ? width : NMH_BUFSIZ);
if (outnum)
umask(~m_gmprot());
/* Compile format string */
- ncomps = fmt_compile (nfs, &fmt) + 1;
-
-#ifdef JLR
- fmt_top = fmt;
-#endif /* JLR */
- FINDCOMP(bodycomp, "body");
- FINDCOMP(datecomp, "date");
- FINDCOMP(cptr, "folder");
+ ncomps = fmt_compile (nfs, &fmt, 1) + 2;
+
+ bodycomp = fmt_findcomp("body");
+ datecomp = fmt_findcomp("date");
+ cptr = fmt_findcomp("folder");
if (cptr && folder)
- cptr->c_text = folder;
- FINDCOMP(cptr, "encrypted");
- if (!cptr)
- if ((cptr = (struct comp *) calloc (1, sizeof(*cptr)))) {
- cptr->c_name = "encrypted";
- cptr->c_next = wantcomp[i = CHASH (cptr->c_name)];
- wantcomp[i] = cptr;
+ cptr->c_text = mh_xstrdup(folder);
+ if (fmt_addcompentry("encrypted")) {
ncomps++;
}
- FINDCOMP (cptr, "dtimenow");
+ cptr = fmt_findcomp("dtimenow");
if (cptr)
cptr->c_text = getcpy(dtimenow (0));
- nxtbuf = compbuffers = (char **) calloc((size_t) ncomps, sizeof(char *));
- if (nxtbuf == NULL)
- adios (NULL, "unable to allocate component buffers");
- used_buf = (struct comp **) calloc((size_t) (ncomps+1),
- sizeof(struct comp *));
- if (used_buf == NULL)
- adios (NULL, "unable to allocate component buffer stack");
+
+ /*
+ * In other programs I got rid of this complicated buffer switching,
+ * but since scan reads lots of messages at once and this complicated
+ * memory management, I decided to keep it; otherwise there was
+ * the potential for a lot of malloc() and free()s, and I could
+ * see the malloc() pool really getting fragmented. Maybe it
+ * wouldn't be an issue in practice; perhaps this will get
+ * revisited someday.
+ *
+ * So, some notes for what's going on:
+ *
+ * nxtbuf is an array of pointers that contains malloc()'d buffers
+ * to hold our component text. used_buf is an array of struct comp
+ * pointers that holds pointers to component structures we found while
+ * processing a message.
+ *
+ * We read in the message with m_getfld(), using "tmpbuf" as our
+ * input buffer. tmpbuf is set at the start of message processing
+ * to the first buffer in our buffer pool (nxtbuf).
+ *
+ * Every time we find a component we care about, we set that component's
+ * text buffer to the current value of tmpbuf, and then switch tmpbuf
+ * to the next buffer in our pool. We also add that component to
+ * our used_buf pool.
+ *
+ * When we're done, we go back and zero out all of the component
+ * text buffer pointers that we saved in used_buf.
+ *
+ * Note that this means c_text memory is NOT owned by the fmt_module
+ * and it's our responsibility to free it.
+ */
+
+ nxtbuf = compbuffers = mh_xcalloc(ncomps, sizeof *nxtbuf);
+ used_buf = mh_xcalloc(ncomps + 1, sizeof *used_buf);
used_buf += ncomps+1; *--used_buf = 0;
- rlwidth = bodycomp && (width > SBUFSIZ) ? width : SBUFSIZ;
+ rlwidth = bodycomp && (width > SBUFSIZ)
+ ? min (width, NMH_BUFSIZ)
+ : SBUFSIZ;
for (i = ncomps; i--; )
*nxtbuf++ = mh_xmalloc(rlwidth);
}
nxtbuf = compbuffers;
savecomp = used_buf;
tmpbuf = *nxtbuf++;
+ startbody = NULL;
dat[0] = innum ? innum : outnum;
dat[1] = curflg;
dat[4] = unseen;
* Get the first field. If the message is non-empty
* and we're doing an "inc", open the output file.
*/
- if ((state = m_getfld (FLD, name, tmpbuf, rlwidth, inb)) == FILEEOF) {
+ bufsz = rlwidth;
+ m_getfld_state_reset (&gstate);
+ if ((state = m_getfld (&gstate, name, tmpbuf, &bufsz, inb)) == FILEEOF) {
if (ferror(inb)) {
advise("read", "unable to"); /* "read error" */
return SCNFAT;
- } else {
- return SCNEOF;
}
+ return SCNEOF;
}
if (outnum) {
}
/* scan - main loop */
- for (compnum = 1; ; state = m_getfld (state, name, tmpbuf, rlwidth, inb)) {
+ for (compnum = 1; ;
+ bufsz = rlwidth, state = m_getfld (&gstate, name, tmpbuf, &bufsz, inb)) {
switch (state) {
case FLD:
case FLDPLUS:
* buffer as the component temp buffer (buffer switching
* saves an extra copy of the component text).
*/
- if ((cptr = wantcomp[CHASH(name)])) {
- do {
- if (!mh_strcasecmp(name, cptr->c_name)) {
- if (! cptr->c_text) {
- cptr->c_text = tmpbuf;
- for (cp = tmpbuf + strlen (tmpbuf) - 1;
+ if ((cptr = fmt_findcasecomp(name))) {
+ if (! cptr->c_text) {
+ cptr->c_text = tmpbuf;
+ for (cp = tmpbuf + strlen (tmpbuf) - 1;
cp >= tmpbuf; cp--)
- if (isspace (*cp))
- *cp = 0;
- else
- break;
- *--savecomp = cptr;
- tmpbuf = *nxtbuf++;
- }
- break;
- }
- } while ((cptr = cptr->c_next));
+ if (isspace ((unsigned char) *cp))
+ *cp = 0;
+ else
+ break;
+ *--savecomp = cptr;
+ tmpbuf = *nxtbuf++;
+ }
}
while (state == FLDPLUS) {
- state = m_getfld (state, name, tmpbuf, rlwidth, inb);
+ bufsz = rlwidth;
+ state = m_getfld (&gstate, name, tmpbuf, &bufsz, inb);
if (outnum)
FPUTS (tmpbuf);
}
break;
case BODY:
- compnum = -1;
/*
* A slight hack ... if we have less than rlwidth characters
* in the buffer, call m_getfld again.
*/
if ((i = strlen(tmpbuf)) < rlwidth) {
- state = m_getfld (state, name, tmpbuf + i,
- rlwidth - i, inb);
+ bufsz = rlwidth - i;
+ state = m_getfld (&gstate, name, tmpbuf + i, &bufsz, inb);
}
+
if (! outnum) {
state = FILEEOF; /* stop now if scan cmd */
+ if (bodycomp && startbody == NULL)
+ startbody = tmpbuf;
goto finished;
}
if (putc ('\n', scnout) == EOF) DIEWRERR();
FPUTS (tmpbuf);
/*
- * performance hack: some people like to run "inc" on
- * things like net.sources or large digests. We do a
- * copy directly into the output buffer rather than
- * going through an intermediate buffer.
+ * The previous code here used to call m_getfld() using
+ * pointers to the underlying output stdio buffers to
+ * avoid the extra copy. Tests by Markus Schnalke show
+ * no noticeable performance loss on larger mailboxes
+ * if we incur an extra copy, and messing around with
+ * internal stdio buffers is becoming more and more
+ * unportable as times go on. So from now on just deal
+ * with the overhead of an extra copy.
*
- * We need the amount of data m_getfld found & don't
- * want to do a strlen on the long buffer so there's
- * a hack in m_getfld to save the amount of data it
- * returned in the global "msg_count".
+ * Subtle change - with the previous code tmpbuf wasn't
+ * used, so we could reuse it for the {body} component.
+ * Now since we're using tmpbuf as our read buffer we
+ * need to save the beginning of the body for later.
+ * See the above (and below) use of startbody.
*/
body:;
+ if (bodycomp && startbody == NULL) {
+ startbody = tmpbuf;
+ tmpbuf = *nxtbuf++;
+ }
+
while (state == BODY) {
-#ifdef LINUX_STDIO
- if (scnout->_IO_write_ptr == scnout->_IO_write_end) {
-#elif defined(__DragonFly__)
- if (((struct __FILE_public *)scnout)->_w <= 0) {
-#else
- if (scnout->_cnt <= 0) {
-#endif
- if (fflush(scnout) == EOF)
- DIEWRERR ();
- }
-#ifdef LINUX_STDIO
- state = m_getfld(state, name, scnout->_IO_write_ptr,
- (long)scnout->_IO_write_ptr-(long)scnout->_IO_write_end , inb);
- scnout->_IO_write_ptr += msg_count;
-#elif defined(__DragonFly__)
- state = m_getfld( state, name, ((struct __FILE_public *)scnout)->_p, -(((struct __FILE_public *)scnout)->_w), inb );
- ((struct __FILE_public *)scnout)->_w -= msg_count;
- ((struct __FILE_public *)scnout)->_p += msg_count;
-#else
- state = m_getfld( state, name, scnout->_ptr, -(scnout->_cnt), inb );
- scnout->_cnt -= msg_count;
- scnout->_ptr += msg_count;
-#endif
+ bufsz = rlwidth;
+ state = m_getfld (&gstate, name, tmpbuf, &bufsz, inb);
+ FPUTS(tmpbuf);
}
goto finished;
case LENERR:
case FMTERR:
- fprintf (stderr,
- innum ? "??Format error (message %d) in "
- : "??Format error in ",
- outnum ? outnum : innum);
+ if (innum)
+ fprintf (stderr, "??Format error (message %d) in ",
+ outnum ? outnum : innum);
+ else
+ fprintf (stderr, "??Format error in ");
+
fprintf (stderr, "component %d\n", compnum);
if (outnum) {
/* Save and restore buffer so we don't trash our dynamic pool! */
if (bodycomp) {
saved_c_text = bodycomp->c_text;
- bodycomp->c_text = tmpbuf;
+ bodycomp->c_text = startbody;
}
if (size)
if (datecomp) {
if (! datecomp->c_text) {
if (datecomp->c_tws == NULL)
- datecomp->c_tws = (struct tws *)
- calloc((size_t) 1, sizeof(*datecomp->c_tws));
- if (datecomp->c_tws == NULL)
- adios (NULL, "unable to allocate tws buffer");
+ NEW0(datecomp->c_tws);
*datecomp->c_tws = *dlocaltime ((time_t *) &st.st_mtime);
datecomp->c_flags |= CF_DATEFAB|CF_TRUE;
} else {
}
}
- fmt_scan (fmt, scanl, slwidth, dat);
-
-#if 0
- fmt = fmt_scan (fmt, scanl, slwidth, dat);
- if (!fmt)
- fmt = fmt_top; /* reset for old format files */
-#endif
+ fmt_scan (fmt, *scanl, slwidth, dat, NULL);
if (bodycomp)
bodycomp->c_text = saved_c_text;
if (noisy)
- fputs (scanl, stdout);
+ fputs (charstring_buffer (*scanl), stdout);
- FINDCOMP (cptr, "encrypted");
+ cptr = fmt_findcomp ("encrypted");
encrypted = cptr && cptr->c_text;
/* return dynamically allocated buffers to pool */
while ((cptr = *savecomp++)) {
- *--nxtbuf = cptr->c_text;
cptr->c_text = NULL;
}
- *--nxtbuf = tmpbuf;
if (outnum && (ferror(scnout) || fclose (scnout) == EOF))
DIEWRERR();
}
-/*
- * Cheat: we are loaded with adrparse, which wants a routine called
- * OfficialName(). We call adrparse:getm() with the correct arguments
- * to prevent OfficialName() from being called. Hence, the following
- * is to keep the loader happy.
- */
-char *
-OfficialName (char *name)
-{
- return name;
-}
-
-
static int
mh_fputs(char *s, FILE *stream)
{
return (0);
}
+/* The following two functions allow access to the global gstate above. */
+void
+scan_finished () {
+ m_getfld_state_destroy (&gstate);
+}
+
+void
+scan_detect_mbox_style (FILE *f) {
+ m_unknown (&gstate, f);
+}