X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/a1e2db74e04c31153801efabcc31b6f68587eeb4..63621a81d16ab743de6b57d47578a9a2c670ad22:/uip/scansbr.c diff --git a/uip/scansbr.c b/uip/scansbr.c index 3d66bcf5..457379cd 100644 --- a/uip/scansbr.c +++ b/uip/scansbr.c @@ -1,6 +1,4 @@ - -/* - * scansbr.c -- routines to help scan along... +/* scansbr.c -- routines to help scan along... * * This code is Copyright (c) 2002, by the authors of nmh. See the * COPYRIGHT file in the root directory of the nmh distribution for @@ -13,28 +11,9 @@ #include #include #include - -#ifdef _FSTDIO -# define _ptr _p /* Gag */ -# define _cnt _w /* Wretch */ -#endif - -#define MAXSCANL 256 /* longest possible scan line */ - -/* - * Buffer size for content part of header fields. We want this - * to be large enough so that we don't do a lot of extra FLDPLUS - * calls on m_getfld but small enough so that we don't snarf - * the entire message body when we're only going to display 30 - * characters of it. - */ -#define SBUFSIZ 512 +#include "sbr/terminal.h" static struct format *fmt; -#ifdef JLR -static struct format *fmt_top; -#endif /* JLR */ - static struct comp *datecomp; /* pntr to "date" comp */ static struct comp *bodycomp; /* pntr to "body" pseudo-comp * * (if referenced) */ @@ -44,85 +23,116 @@ static struct comp **used_buf = 0; /* stack for comp that use buffers */ static int dat[5]; /* aux. data for format routine */ -char *scanl = 0; /* text of most recent scanline */ +static m_getfld_state_t gstate; /* for accessor functions below */ #define DIEWRERR() adios (scnmsg, "write error on") +#define PUTC(c) \ + if (putc((c), scnout) == EOF) \ + DIEWRERR(); + #define FPUTS(buf) {\ - if (mh_fputs(buf,scnout) == EOF)\ + if (fputs(buf,scnout) == EOF)\ DIEWRERR();\ } -/* - * prototypes - */ -static int mh_fputs(char *, FILE *); - -#ifdef MULTIBYTE_SUPPORT -#define SCAN_CHARWIDTH MB_CUR_MAX -#else -#define SCAN_CHARWIDTH 1 -#endif +/* outnum determines how the input from inb is copied. If positive then + * it is the number of the message to create, e.g. for inc(1), and all + * of the email is copied into that message, with some tweaks. If 0, + * e.g. `scan 42', then reading inb can dubiously stop after a whole + * buffer of body, even though this might not be enough to fulfill the + * scan format and width. Or if -1 then no copy is being created, but + * all of inb must be read because the next message must be found, e.g. + * `scan -file foo.mbox'. */ int scan (FILE *inb, int innum, int outnum, char *nfs, int width, int curflg, - int unseen, char *folder, long size, int noisy) + int unseen, char *folder, long size, int noisy, charstring_t *scanl) { + static bool deja_vu; + static int tty_width; int i, compnum, encrypted, state; - unsigned char *cp, *tmpbuf; - char **nxtbuf; + char *cp, *tmpbuf, *startbody, **nxtbuf; char *saved_c_text = NULL; struct comp *cptr; struct comp **savecomp; char *scnmsg = NULL; FILE *scnout = NULL; char name[NAMESZ]; + int bufsz; static int rlwidth, slwidth; - /* first-time only initialization */ - if (!scanl) { - if (width == 0) { - if ((width = sc_width ()) < WIDTH/2) - width = WIDTH/2; - else if (width > MAXSCANL) - width = MAXSCANL; + /* first-time only initialization, which will always happen the + way the code is now, with callers initializing *scanl to NULL. + scanl used to be a global. */ + if (! *scanl) { + if (width == -1) { + if (!deja_vu) { + deja_vu = true; + tty_width = sc_width(); + } + + width = max(tty_width, WIDTH / 2); + } else if (width == 0) { + /* Unlimited width. */ + width = INT_MAX; } dat[3] = slwidth = width; - scanl = (char *) mh_xmalloc((size_t) SCAN_CHARWIDTH * (slwidth + 2) ); + *scanl = charstring_create (min(width, NMH_BUFSIZ)); if (outnum) umask(~m_gmprot()); /* Compile format string */ - ncomps = fmt_compile (nfs, &fmt) + 1; - -#ifdef JLR - fmt_top = fmt; -#endif /* JLR */ - FINDCOMP(bodycomp, "body"); - FINDCOMP(datecomp, "date"); - FINDCOMP(cptr, "folder"); + ncomps = fmt_compile (nfs, &fmt, 1) + 2; + + bodycomp = fmt_findcomp("body"); + datecomp = fmt_findcomp("date"); + cptr = fmt_findcomp("folder"); if (cptr && folder) - cptr->c_text = folder; - FINDCOMP(cptr, "encrypted"); - if (!cptr) - if ((cptr = (struct comp *) calloc (1, sizeof(*cptr)))) { - cptr->c_name = "encrypted"; - cptr->c_next = wantcomp[i = CHASH (cptr->c_name)]; - wantcomp[i] = cptr; + cptr->c_text = mh_xstrdup(folder); + if (fmt_addcompentry("encrypted")) { ncomps++; } - FINDCOMP (cptr, "dtimenow"); + cptr = fmt_findcomp("dtimenow"); if (cptr) cptr->c_text = getcpy(dtimenow (0)); - nxtbuf = compbuffers = (char **) calloc((size_t) ncomps, sizeof(char *)); - if (nxtbuf == NULL) - adios (NULL, "unable to allocate component buffers"); - used_buf = (struct comp **) calloc((size_t) (ncomps+1), - sizeof(struct comp *)); - if (used_buf == NULL) - adios (NULL, "unable to allocate component buffer stack"); + + /* + * In other programs I got rid of this complicated buffer switching, + * but since scan reads lots of messages at once and this complicated + * memory management, I decided to keep it; otherwise there was + * the potential for a lot of malloc() and free()s, and I could + * see the malloc() pool really getting fragmented. Maybe it + * wouldn't be an issue in practice; perhaps this will get + * revisited someday. + * + * So, some notes for what's going on: + * + * nxtbuf is an array of pointers that contains malloc()'d buffers + * to hold our component text. used_buf is an array of struct comp + * pointers that holds pointers to component structures we found while + * processing a message. + * + * We read in the message with m_getfld(), using "tmpbuf" as our + * input buffer. tmpbuf is set at the start of message processing + * to the first buffer in our buffer pool (nxtbuf). + * + * Every time we find a component we care about, we set that component's + * text buffer to the current value of tmpbuf, and then switch tmpbuf + * to the next buffer in our pool. We also add that component to + * our used_buf pool. + * + * When we're done, we go back and zero out all of the component + * text buffer pointers that we saved in used_buf. + * + * Note that this means c_text memory is NOT owned by the fmt_module + * and it's our responsibility to free it. + */ + + nxtbuf = compbuffers = mh_xcalloc(ncomps, sizeof *nxtbuf); + used_buf = mh_xcalloc(ncomps + 1, sizeof *used_buf); used_buf += ncomps+1; *--used_buf = 0; - rlwidth = bodycomp && (width > SBUFSIZ) ? width : SBUFSIZ; + rlwidth = NMH_BUFSIZ; for (i = ncomps; i--; ) *nxtbuf++ = mh_xmalloc(rlwidth); } @@ -133,6 +143,7 @@ scan (FILE *inb, int innum, int outnum, char *nfs, int width, int curflg, nxtbuf = compbuffers; savecomp = used_buf; tmpbuf = *nxtbuf++; + startbody = NULL; dat[0] = innum ? innum : outnum; dat[1] = curflg; dat[4] = unseen; @@ -141,36 +152,34 @@ scan (FILE *inb, int innum, int outnum, char *nfs, int width, int curflg, * Get the first field. If the message is non-empty * and we're doing an "inc", open the output file. */ - if ((state = m_getfld (FLD, name, tmpbuf, rlwidth, inb)) == FILEEOF) { + bufsz = rlwidth; + m_getfld_state_reset (&gstate); + if ((state = m_getfld (&gstate, name, tmpbuf, &bufsz, inb)) == FILEEOF) { if (ferror(inb)) { advise("read", "unable to"); /* "read error" */ return SCNFAT; - } else { - return SCNEOF; } + return SCNEOF; } - if (outnum) { - if (outnum > 0) { - scnmsg = m_name (outnum); - if (*scnmsg == '?') /* msg num out of range */ - return SCNNUM; - } else { - scnmsg = "/dev/null"; - } - if ((scnout = fopen (scnmsg, "w")) == NULL) - adios (scnmsg, "unable to write"); + if (outnum > 0) { + scnmsg = m_name (outnum); + if (*scnmsg == '?') /* msg num out of range */ + return SCNNUM; + if ((scnout = fopen (scnmsg, "w")) == NULL) + adios (scnmsg, "unable to write"); } /* scan - main loop */ - for (compnum = 1; ; state = m_getfld (state, name, tmpbuf, rlwidth, inb)) { + for (compnum = 1; ; + bufsz = rlwidth, state = m_getfld (&gstate, name, tmpbuf, &bufsz, inb)) { switch (state) { case FLD: case FLDPLUS: compnum++; - if (outnum) { + if (scnout) { FPUTS (name); - if ( putc (':', scnout) == EOF) DIEWRERR(); + PUTC(':'); FPUTS (tmpbuf); } /* @@ -179,104 +188,97 @@ scan (FILE *inb, int innum, int outnum, char *nfs, int width, int curflg, * buffer as the component temp buffer (buffer switching * saves an extra copy of the component text). */ - if ((cptr = wantcomp[CHASH(name)])) { - do { - if (!mh_strcasecmp(name, cptr->c_name)) { - if (! cptr->c_text) { - cptr->c_text = tmpbuf; - for (cp = tmpbuf + strlen (tmpbuf) - 1; + if ((cptr = fmt_findcasecomp(name))) { + if (! cptr->c_text) { + cptr->c_text = tmpbuf; + for (cp = tmpbuf + strlen (tmpbuf) - 1; cp >= tmpbuf; cp--) - if (isspace (*cp)) - *cp = 0; - else - break; - *--savecomp = cptr; - tmpbuf = *nxtbuf++; - } - break; - } - } while ((cptr = cptr->c_next)); + if (isspace ((unsigned char) *cp)) + *cp = 0; + else + break; + *--savecomp = cptr; + tmpbuf = *nxtbuf++; + } } while (state == FLDPLUS) { - state = m_getfld (state, name, tmpbuf, rlwidth, inb); - if (outnum) + bufsz = rlwidth; + state = m_getfld (&gstate, name, tmpbuf, &bufsz, inb); + if (scnout) FPUTS (tmpbuf); } break; case BODY: - compnum = -1; /* * A slight hack ... if we have less than rlwidth characters * in the buffer, call m_getfld again. */ if ((i = strlen(tmpbuf)) < rlwidth) { - state = m_getfld (state, name, tmpbuf + i, - rlwidth - i, inb); + bufsz = rlwidth - i; + state = m_getfld (&gstate, name, tmpbuf + i, &bufsz, inb); } - if (! outnum) { + + if (outnum == 0) { state = FILEEOF; /* stop now if scan cmd */ + if (bodycomp && startbody == NULL) + startbody = tmpbuf; goto finished; } - if (putc ('\n', scnout) == EOF) DIEWRERR(); - FPUTS (tmpbuf); + if (scnout) { + PUTC('\n'); + FPUTS (tmpbuf); + } /* - * performance hack: some people like to run "inc" on - * things like net.sources or large digests. We do a - * copy directly into the output buffer rather than - * going through an intermediate buffer. + * The previous code here used to call m_getfld() using + * pointers to the underlying output stdio buffers to + * avoid the extra copy. Tests by Markus Schnalke show + * no noticeable performance loss on larger mailboxes + * if we incur an extra copy, and messing around with + * internal stdio buffers is becoming more and more + * unportable as times go on. So from now on just deal + * with the overhead of an extra copy. * - * We need the amount of data m_getfld found & don't - * want to do a strlen on the long buffer so there's - * a hack in m_getfld to save the amount of data it - * returned in the global "msg_count". + * Subtle change - with the previous code tmpbuf wasn't + * used, so we could reuse it for the {body} component. + * Now since we're using tmpbuf as our read buffer we + * need to save the beginning of the body for later. + * See the above (and below) use of startbody. */ body:; + if (bodycomp && startbody == NULL) { + startbody = tmpbuf; + tmpbuf = *nxtbuf++; + } + while (state == BODY) { -#ifdef LINUX_STDIO - if (scnout->_IO_write_ptr == scnout->_IO_write_end) { -#elif defined(__DragonFly__) - if (((struct __FILE_public *)scnout)->_w <= 0) { -#else - if (scnout->_cnt <= 0) { -#endif - if (fflush(scnout) == EOF) - DIEWRERR (); - } -#ifdef LINUX_STDIO - state = m_getfld(state, name, scnout->_IO_write_ptr, - (long)scnout->_IO_write_ptr-(long)scnout->_IO_write_end , inb); - scnout->_IO_write_ptr += msg_count; -#elif defined(__DragonFly__) - state = m_getfld( state, name, ((struct __FILE_public *)scnout)->_p, -(((struct __FILE_public *)scnout)->_w), inb ); - ((struct __FILE_public *)scnout)->_w -= msg_count; - ((struct __FILE_public *)scnout)->_p += msg_count; -#else - state = m_getfld( state, name, scnout->_ptr, -(scnout->_cnt), inb ); - scnout->_cnt -= msg_count; - scnout->_ptr += msg_count; -#endif + bufsz = rlwidth; + state = m_getfld (&gstate, name, tmpbuf, &bufsz, inb); + if (scnout) + FPUTS(tmpbuf); } goto finished; case LENERR: case FMTERR: - fprintf (stderr, - innum ? "??Format error (message %d) in " - : "??Format error in ", - outnum ? outnum : innum); + if (innum) + fprintf (stderr, "??Format error (message %d) in ", + outnum ? outnum : innum); + else + fprintf (stderr, "??Format error in "); + fprintf (stderr, "component %d\n", compnum); - if (outnum) { + if (scnout) { FPUTS ("\n\nBAD MSG:\n"); FPUTS (name); - if (putc ('\n', scnout) == EOF) DIEWRERR(); + PUTC('\n'); state = BODY; goto body; } - /* fall through */ + goto finished; case FILEEOF: goto finished; @@ -298,12 +300,12 @@ finished: /* Save and restore buffer so we don't trash our dynamic pool! */ if (bodycomp) { saved_c_text = bodycomp->c_text; - bodycomp->c_text = tmpbuf; + bodycomp->c_text = startbody; } if (size) dat[2] = size; - else if (outnum > 0) + else if (scnout) { dat[2] = ftell(scnout); if (dat[2] == EOF) DIEWRERR(); @@ -318,10 +320,7 @@ finished: if (datecomp) { if (! datecomp->c_text) { if (datecomp->c_tws == NULL) - datecomp->c_tws = (struct tws *) - calloc((size_t) 1, sizeof(*datecomp->c_tws)); - if (datecomp->c_tws == NULL) - adios (NULL, "unable to allocate tws buffer"); + NEW0(datecomp->c_tws); *datecomp->c_tws = *dlocaltime ((time_t *) &st.st_mtime); datecomp->c_flags |= CF_DATEFAB|CF_TRUE; } else { @@ -330,45 +329,36 @@ finished: } } - fmt_scan (fmt, scanl, slwidth, dat); - -#if 0 - fmt = fmt_scan (fmt, scanl, slwidth, dat); - if (!fmt) - fmt = fmt_top; /* reset for old format files */ -#endif + fmt_scan (fmt, *scanl, slwidth, dat, NULL); if (bodycomp) bodycomp->c_text = saved_c_text; if (noisy) - fputs (scanl, stdout); + fputs (charstring_buffer (*scanl), stdout); - FINDCOMP (cptr, "encrypted"); + cptr = fmt_findcomp ("encrypted"); encrypted = cptr && cptr->c_text; /* return dynamically allocated buffers to pool */ while ((cptr = *savecomp++)) { - *--nxtbuf = cptr->c_text; cptr->c_text = NULL; } - *--nxtbuf = tmpbuf; - if (outnum && (ferror(scnout) || fclose (scnout) == EOF)) + if (scnout && (ferror(scnout) || fclose (scnout) == EOF)) DIEWRERR(); return (state != FILEEOF ? SCNERR : encrypted ? SCNENC : SCNMSG); } -static int -mh_fputs(char *s, FILE *stream) -{ - char c; - - while ((c = *s++)) - if (putc (c,stream) == EOF ) - return(EOF); - return (0); +/* The following two functions allow access to the global gstate above. */ +void +scan_finished(void) { + m_getfld_state_destroy (&gstate); } +void +scan_detect_mbox_style (FILE *f) { + m_unknown (&gstate, f); +}