X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/5dd6771b28c257af405d7248639ed0e3bcdce38b..ef1ba39e8dae81091b6c3e73e72825ef6edea3c6:/uip/scansbr.c diff --git a/uip/scansbr.c b/uip/scansbr.c index 04dc2152..8d2a46d2 100644 --- a/uip/scansbr.c +++ b/uip/scansbr.c @@ -14,20 +14,6 @@ #include #include -#ifdef _FSTDIO -# define _ptr _p /* Gag */ -# define _cnt _w /* Wretch */ -#endif - -#ifdef SCO_5_STDIO -# define _ptr __ptr -# define _cnt __cnt -# define _base __base -# define _filbuf(fp) ((fp)->__cnt = 0, __filbuf(fp)) -#endif - -#define MAXSCANL 256 /* longest possible scan line */ - /* * Buffer size for content part of header fields. We want this * to be large enough so that we don't do a lot of extra FLDPLUS @@ -38,10 +24,6 @@ #define SBUFSIZ 512 static struct format *fmt; -#ifdef JLR -static struct format *fmt_top; -#endif /* JLR */ - static struct comp *datecomp; /* pntr to "date" comp */ static struct comp *bodycomp; /* pntr to "body" pseudo-comp * * (if referenced) */ @@ -51,7 +33,7 @@ static struct comp **used_buf = 0; /* stack for comp that use buffers */ static int dat[5]; /* aux. data for format routine */ -char *scanl = 0; /* text of most recent scanline */ +static m_getfld_state_t gstate; /* for accessor functions below */ #define DIEWRERR() adios (scnmsg, "write error on") @@ -63,79 +45,93 @@ char *scanl = 0; /* text of most recent scanline */ /* * prototypes */ -int sc_width (void); /* from termsbr.c */ static int mh_fputs(char *, FILE *); -#ifdef MULTIBYTE_SUPPORT -#define SCAN_CHARWIDTH MB_CUR_MAX -#else -#define SCAN_CHARWIDTH 1 -#endif - int scan (FILE *inb, int innum, int outnum, char *nfs, int width, int curflg, - int unseen, char *folder, long size, int noisy) + int unseen, char *folder, long size, int noisy, charstring_t *scanl) { int i, compnum, encrypted, state; - unsigned char *cp, *tmpbuf; - char **nxtbuf; + char *cp, *tmpbuf, *startbody, **nxtbuf; char *saved_c_text = NULL; struct comp *cptr; struct comp **savecomp; char *scnmsg = NULL; FILE *scnout = NULL; char name[NAMESZ]; + int bufsz; static int rlwidth, slwidth; -#ifdef RPATHS - char returnpath[BUFSIZ]; - char deliverydate[BUFSIZ]; -#endif - - /* first-time only initialization */ - if (!scanl) { - if (width == 0) { + /* first-time only initialization, which will always happen the + way the code is now, with callers initializing *scanl to NULL. + scanl used to be a global. */ + if (! *scanl) { + if (width == -1) { + /* Default: width of the terminal, but at least WIDTH/2. */ if ((width = sc_width ()) < WIDTH/2) width = WIDTH/2; - else if (width > MAXSCANL) - width = MAXSCANL; + } else if (width == 0) { + /* Unlimited width. */ + width = INT_MAX; } dat[3] = slwidth = width; - scanl = (char *) mh_xmalloc((size_t) SCAN_CHARWIDTH * (slwidth + 2) ); + *scanl = charstring_create (width < NMH_BUFSIZ ? width : NMH_BUFSIZ); if (outnum) umask(~m_gmprot()); /* Compile format string */ - ncomps = fmt_compile (nfs, &fmt) + 1; - -#ifdef JLR - fmt_top = fmt; -#endif /* JLR */ - FINDCOMP(bodycomp, "body"); - FINDCOMP(datecomp, "date"); - FINDCOMP(cptr, "folder"); + ncomps = fmt_compile (nfs, &fmt, 1) + 2; + + bodycomp = fmt_findcomp("body"); + datecomp = fmt_findcomp("date"); + cptr = fmt_findcomp("folder"); if (cptr && folder) - cptr->c_text = folder; - FINDCOMP(cptr, "encrypted"); - if (!cptr) - if ((cptr = (struct comp *) calloc (1, sizeof(*cptr)))) { - cptr->c_name = "encrypted"; - cptr->c_next = wantcomp[i = CHASH (cptr->c_name)]; - wantcomp[i] = cptr; + cptr->c_text = mh_xstrdup(folder); + if (fmt_addcompentry("encrypted")) { ncomps++; } - FINDCOMP (cptr, "dtimenow"); + cptr = fmt_findcomp("dtimenow"); if (cptr) cptr->c_text = getcpy(dtimenow (0)); - nxtbuf = compbuffers = (char **) calloc((size_t) ncomps, sizeof(char *)); - if (nxtbuf == NULL) - adios (NULL, "unable to allocate component buffers"); - used_buf = (struct comp **) calloc((size_t) (ncomps+1), - sizeof(struct comp *)); - if (used_buf == NULL) - adios (NULL, "unable to allocate component buffer stack"); + + /* + * In other programs I got rid of this complicated buffer switching, + * but since scan reads lots of messages at once and this complicated + * memory management, I decided to keep it; otherwise there was + * the potential for a lot of malloc() and free()s, and I could + * see the malloc() pool really getting fragmented. Maybe it + * wouldn't be an issue in practice; perhaps this will get + * revisited someday. + * + * So, some notes for what's going on: + * + * nxtbuf is an array of pointers that contains malloc()'d buffers + * to hold our component text. used_buf is an array of struct comp + * pointers that holds pointers to component structures we found while + * processing a message. + * + * We read in the message with m_getfld(), using "tmpbuf" as our + * input buffer. tmpbuf is set at the start of message processing + * to the first buffer in our buffer pool (nxtbuf). + * + * Every time we find a component we care about, we set that component's + * text buffer to the current value of tmpbuf, and then switch tmpbuf + * to the next buffer in our pool. We also add that component to + * our used_buf pool. + * + * When we're done, we go back and zero out all of the component + * text buffer pointers that we saved in used_buf. + * + * Note that this means c_text memory is NOT owned by the fmt_module + * and it's our responsibility to free it. + */ + + nxtbuf = compbuffers = mh_xcalloc(ncomps, sizeof *nxtbuf); + used_buf = mh_xcalloc(ncomps + 1, sizeof *used_buf); used_buf += ncomps+1; *--used_buf = 0; - rlwidth = bodycomp && (width > SBUFSIZ) ? width : SBUFSIZ; + rlwidth = bodycomp && (width > SBUFSIZ) + ? min (width, NMH_BUFSIZ) + : SBUFSIZ; for (i = ncomps; i--; ) *nxtbuf++ = mh_xmalloc(rlwidth); } @@ -146,6 +142,7 @@ scan (FILE *inb, int innum, int outnum, char *nfs, int width, int curflg, nxtbuf = compbuffers; savecomp = used_buf; tmpbuf = *nxtbuf++; + startbody = NULL; dat[0] = innum ? innum : outnum; dat[1] = curflg; dat[4] = unseen; @@ -154,13 +151,14 @@ scan (FILE *inb, int innum, int outnum, char *nfs, int width, int curflg, * Get the first field. If the message is non-empty * and we're doing an "inc", open the output file. */ - if ((state = m_getfld (FLD, name, tmpbuf, rlwidth, inb)) == FILEEOF) { + bufsz = rlwidth; + m_getfld_state_reset (&gstate); + if ((state = m_getfld (&gstate, name, tmpbuf, &bufsz, inb)) == FILEEOF) { if (ferror(inb)) { advise("read", "unable to"); /* "read error" */ return SCNFAT; - } else { - return SCNEOF; } + return SCNEOF; } if (outnum) { @@ -173,23 +171,11 @@ scan (FILE *inb, int innum, int outnum, char *nfs, int width, int curflg, } if ((scnout = fopen (scnmsg, "w")) == NULL) adios (scnmsg, "unable to write"); -#ifdef RPATHS - /* - * Add the Return-Path and Delivery-Date - * header fields to message. - */ - if (get_returnpath (returnpath, sizeof(returnpath), - deliverydate, sizeof(deliverydate))) { - FPUTS ("Return-Path: "); - FPUTS (returnpath); - FPUTS ("Delivery-Date: "); - FPUTS (deliverydate); - } -#endif /* RPATHS */ } /* scan - main loop */ - for (compnum = 1; ; state = m_getfld (state, name, tmpbuf, rlwidth, inb)) { + for (compnum = 1; ; + bufsz = rlwidth, state = m_getfld (&gstate, name, tmpbuf, &bufsz, inb)) { switch (state) { case FLD: case FLDPLUS: @@ -205,85 +191,84 @@ scan (FILE *inb, int innum, int outnum, char *nfs, int width, int curflg, * buffer as the component temp buffer (buffer switching * saves an extra copy of the component text). */ - if ((cptr = wantcomp[CHASH(name)])) { - do { - if (!mh_strcasecmp(name, cptr->c_name)) { - if (! cptr->c_text) { - cptr->c_text = tmpbuf; - for (cp = tmpbuf + strlen (tmpbuf) - 1; + if ((cptr = fmt_findcasecomp(name))) { + if (! cptr->c_text) { + cptr->c_text = tmpbuf; + for (cp = tmpbuf + strlen (tmpbuf) - 1; cp >= tmpbuf; cp--) - if (isspace (*cp)) - *cp = 0; - else - break; - *--savecomp = cptr; - tmpbuf = *nxtbuf++; - } - break; - } - } while ((cptr = cptr->c_next)); + if (isspace ((unsigned char) *cp)) + *cp = 0; + else + break; + *--savecomp = cptr; + tmpbuf = *nxtbuf++; + } } while (state == FLDPLUS) { - state = m_getfld (state, name, tmpbuf, rlwidth, inb); + bufsz = rlwidth; + state = m_getfld (&gstate, name, tmpbuf, &bufsz, inb); if (outnum) FPUTS (tmpbuf); } break; case BODY: - compnum = -1; + /* + * A slight hack ... if we have less than rlwidth characters + * in the buffer, call m_getfld again. + */ + + if ((i = strlen(tmpbuf)) < rlwidth) { + bufsz = rlwidth - i; + state = m_getfld (&gstate, name, tmpbuf + i, &bufsz, inb); + } + if (! outnum) { state = FILEEOF; /* stop now if scan cmd */ + if (bodycomp && startbody == NULL) + startbody = tmpbuf; goto finished; } if (putc ('\n', scnout) == EOF) DIEWRERR(); FPUTS (tmpbuf); /* - * performance hack: some people like to run "inc" on - * things like net.sources or large digests. We do a - * copy directly into the output buffer rather than - * going through an intermediate buffer. + * The previous code here used to call m_getfld() using + * pointers to the underlying output stdio buffers to + * avoid the extra copy. Tests by Markus Schnalke show + * no noticeable performance loss on larger mailboxes + * if we incur an extra copy, and messing around with + * internal stdio buffers is becoming more and more + * unportable as times go on. So from now on just deal + * with the overhead of an extra copy. * - * We need the amount of data m_getfld found & don't - * want to do a strlen on the long buffer so there's - * a hack in m_getfld to save the amount of data it - * returned in the global "msg_count". + * Subtle change - with the previous code tmpbuf wasn't + * used, so we could reuse it for the {body} component. + * Now since we're using tmpbuf as our read buffer we + * need to save the beginning of the body for later. + * See the above (and below) use of startbody. */ body:; + if (bodycomp && startbody == NULL) { + startbody = tmpbuf; + tmpbuf = *nxtbuf++; + } + while (state == BODY) { -#ifdef LINUX_STDIO - if (scnout->_IO_write_ptr == scnout->_IO_write_end) { -#elif defined(__DragonFly__) - if (((struct __FILE_public *)scnout)->_w <= 0) { -#else - if (scnout->_cnt <= 0) { -#endif - if (fflush(scnout) == EOF) - DIEWRERR (); - } -#ifdef LINUX_STDIO - state = m_getfld(state, name, scnout->_IO_write_ptr, - (long)scnout->_IO_write_ptr-(long)scnout->_IO_write_end , inb); - scnout->_IO_write_ptr += msg_count; -#elif defined(__DragonFly__) - state = m_getfld( state, name, ((struct __FILE_public *)scnout)->_p, -(((struct __FILE_public *)scnout)->_w), inb ); - ((struct __FILE_public *)scnout)->_w -= msg_count; - ((struct __FILE_public *)scnout)->_p += msg_count; -#else - state = m_getfld( state, name, scnout->_ptr, -(scnout->_cnt), inb ); - scnout->_cnt -= msg_count; - scnout->_ptr += msg_count; -#endif + bufsz = rlwidth; + state = m_getfld (&gstate, name, tmpbuf, &bufsz, inb); + FPUTS(tmpbuf); } goto finished; case LENERR: case FMTERR: - fprintf (stderr, - innum ? "??Format error (message %d) in " - : "??Format error in ", - outnum ? outnum : innum); + if (innum) + fprintf (stderr, "??Format error (message %d) in ", + outnum ? outnum : innum); + else + fprintf (stderr, "??Format error in "); + fprintf (stderr, "component %d\n", compnum); if (outnum) { @@ -315,7 +300,7 @@ finished: /* Save and restore buffer so we don't trash our dynamic pool! */ if (bodycomp) { saved_c_text = bodycomp->c_text; - bodycomp->c_text = tmpbuf; + bodycomp->c_text = startbody; } if (size) @@ -335,10 +320,7 @@ finished: if (datecomp) { if (! datecomp->c_text) { if (datecomp->c_tws == NULL) - datecomp->c_tws = (struct tws *) - calloc((size_t) 1, sizeof(*datecomp->c_tws)); - if (datecomp->c_tws == NULL) - adios (NULL, "unable to allocate tws buffer"); + NEW0(datecomp->c_tws); *datecomp->c_tws = *dlocaltime ((time_t *) &st.st_mtime); datecomp->c_flags |= CF_DATEFAB|CF_TRUE; } else { @@ -347,29 +329,21 @@ finished: } } - fmt_scan (fmt, scanl, slwidth, dat); - -#if 0 - fmt = fmt_scan (fmt, scanl, slwidth, dat); - if (!fmt) - fmt = fmt_top; /* reset for old format files */ -#endif + fmt_scan (fmt, *scanl, slwidth, dat, NULL); if (bodycomp) bodycomp->c_text = saved_c_text; if (noisy) - fputs (scanl, stdout); + fputs (charstring_buffer (*scanl), stdout); - FINDCOMP (cptr, "encrypted"); + cptr = fmt_findcomp ("encrypted"); encrypted = cptr && cptr->c_text; /* return dynamically allocated buffers to pool */ while ((cptr = *savecomp++)) { - *--nxtbuf = cptr->c_text; cptr->c_text = NULL; } - *--nxtbuf = tmpbuf; if (outnum && (ferror(scnout) || fclose (scnout) == EOF)) DIEWRERR(); @@ -378,19 +352,6 @@ finished: } -/* - * Cheat: we are loaded with adrparse, which wants a routine called - * OfficialName(). We call adrparse:getm() with the correct arguments - * to prevent OfficialName() from being called. Hence, the following - * is to keep the loader happy. - */ -char * -OfficialName (char *name) -{ - return name; -} - - static int mh_fputs(char *s, FILE *stream) { @@ -402,3 +363,13 @@ mh_fputs(char *s, FILE *stream) return (0); } +/* The following two functions allow access to the global gstate above. */ +void +scan_finished () { + m_getfld_state_destroy (&gstate); +} + +void +scan_detect_mbox_style (FILE *f) { + m_unknown (&gstate, f); +}