X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/f087de9a3279289fb273c099bac54b67ca4c5c99..ccafa1944:/sbr/m_getfld.c?ds=inline diff --git a/sbr/m_getfld.c b/sbr/m_getfld.c index 7268aff3..af44f09a 100644 --- a/sbr/m_getfld.c +++ b/sbr/m_getfld.c @@ -11,6 +11,75 @@ #include #include +/* + Purpose + ======= + Reads an Internet message (RFC 5322), or one or more messages + stored in a maildrop in mbox (RFC 4155) or MMDF format, from a file + stream. Each call to m_getfld() reads one header field, or a + portion of the body, in sequence. + + Inputs + ====== + gstate: opaque parse state + bufsz: maximum number of characters to load into buf + iob: input file stream + + Outputs + ======= + name: header field name (array of size NAMESZ=999) + buf: either a header field body or message body + bufsz: number of characters loaded into buf + (return value): message parse state on return from function + + Functions + ========= + void m_getfld_state_destroy (m_getfld_state_t *gstate): destroys + the parse state pointed to by the gstate argument. + + m_getfld_state_reset (m_getfld_state_t *gstate): resets the parse + state to FLD. + + void m_unknown(FILE *iob): Determines the message delimiter string + for the maildrop. Called by inc, scan, and msh when reading from a + maildrop file. + + void m_eomsbr (int (*action)(int)): Sets the hook to check for end + of message in a maildrop. Called only by msh. + + State variables + =============== + m_getfld() retains state internally between calls in the + m_getfld_state_t variable. These are used for detecting the end of + each message when reading maildrops: + + unsigned char **pat_map + unsigned char *fdelim + unsigned char *delimend + int fdelimlen + unsigned char *edelim + int edelimlen + char *msg_delim + int msg_style + int (*eom_action)(int) + + Usage + ===== + m_getfld_state_t gstate = 0; + ... + int state = m_getfld (&gstate, ...); + ... + m_getfld_state_destroy (&gstate); + + The state is retained internally by gstate. To reset its state to FLD: + m_getfld_state_reset (&gstate); +*/ + +/* The following described the old implementation. The high-level + structure hasn't changed, but some of the details have. I'm + leaving this as-is, though, for posterity. + */ + /* This module has a long and checkered history. First, it didn't burst maildrops correctly because it considered two CTRL-A:s in a row to be an inter-message delimiter. It really is four CTRL-A:s followed by a @@ -134,63 +203,6 @@ there is data in "name" or "buf"). */ -/* -Purpose -======= -Reads an Internet message (RFC 5322), or one or more messages stored in a -maildrop in mbox (RFC 4155) or MMDF format, from a file stream. Each call -to m_getfld() reads one header field, or a portion of the body, in sequence. - -Inputs -====== -gstate: opaque parse state -bufsz: maximum number of characters to load into buf -iob: input file stream - -Outputs -======= -name: header field name (array of size NAMESZ=999) -buf: either a header field body or message body -bufsz: number of characters loaded into buf -(return value): message parse state on return from function - -Functions -========= -void m_unknown(FILE *iob): Determines the message delimiter string for the - maildrop. Called by inc, scan, and msh when reading from a maildrop file. - -void m_eomsbr (int (*action)(int)): Sets the hook to check for end of - message in a maildrop. Called only by msh. - -State variables -=============== -m_getfld() retains state internally between calls in the m_getfld_state_t -variable. These are used for detecting the end of each message when reading -maildrops: - - unsigned char **pat_map - unsigned char *fdelim - unsigned char *delimend - int fdelimlen - unsigned char *edelim - int edelimlen - char *msg_delim - int msg_style - int (*eom_action)(int) - -Usage -===== - m_getfld_state_t gstate; - m_getfld_state_init (&gstate); - int state = m_getfld (gstate, ...); - ... - m_getfld_state_destroy (&gstate); - -The state is retained internally by gstate. To reset its state to FLD: - - m_getfld_state_reset (&gstate); -*/ - /* * static prototypes */ @@ -210,8 +222,9 @@ static unsigned char *matchc(int, char *, int, char *); * separate messages in a maildrop, such as mbox "From ". * * Some of the tests in the test suite assume a MSG_INPUT_SIZE - * of 8192. */ -#define MSG_INPUT_SIZE (BUFSIZ >= 1024 ? BUFSIZ : 1024) + * of 4096. + */ +#define MSG_INPUT_SIZE 4096 #define MAX_DELIMITER_SIZE 5 struct m_getfld_state { @@ -253,35 +266,56 @@ struct m_getfld_state { int edelimlen; int (*eom_action)(int); int state; + int track_filepos; }; +static void -m_getfld_state_init (m_getfld_state_t *s) { - *s = (m_getfld_state_t) mh_xmalloc(sizeof (struct m_getfld_state)); - (*s)->readpos = (*s)->end = (*s)->msg_buf; - (*s)->bytes_read = (*s)->total_bytes_read = 0; - (*s)->last_caller_pos = (*s)->last_internal_pos = 0; - /* (*s)->iob gets loaded on every call to m_getfld()/m_unknown(). */ - (*s)->pat_map = NULL; - (*s)->msg_style = MS_DEFAULT; - (*s)->msg_delim = ""; - (*s)->fdelim = (*s)->delimend = (*s)->edelim = NULL; - (*s)->fdelimlen = (*s)->edelimlen = 0; - (*s)->eom_action = NULL; - (*s)->state = FLD; +m_getfld_state_init (m_getfld_state_t *gstate, FILE *iob) { + m_getfld_state_t s; + + s = *gstate = (m_getfld_state_t) mh_xmalloc(sizeof (struct m_getfld_state)); + s->readpos = s->end = s->msg_buf; + s->bytes_read = s->total_bytes_read = 0; + s->last_caller_pos = s->last_internal_pos = 0; + s->iob = iob; + s->pat_map = NULL; + s->msg_style = MS_DEFAULT; + s->msg_delim = ""; + s->fdelim = s->delimend = s->edelim = NULL; + s->fdelimlen = s->edelimlen = 0; + s->eom_action = NULL; + s->state = FLD; + s->track_filepos = 0; } /* scan() needs to force a state an initial state of FLD for each message. */ void -m_getfld_state_reset (m_getfld_state_t *s) { - (*s)->state = FLD; +m_getfld_state_reset (m_getfld_state_t *gstate) { + if (*gstate) { + (*gstate)->state = FLD; + } } -void m_getfld_state_destroy (m_getfld_state_t *s) { - if (*s) { - if ((*s)->fdelim) free ((*s)->fdelim-1); - free (*s); - *s = 0; +/* If the caller interleaves ftell*()/fseek*() calls with m_getfld() + calls, m_getfld() must keep track of the file position. The caller + must use this function to inform m_getfld(). */ +void +m_getfld_track_filepos (m_getfld_state_t *gstate, FILE *iob) { + if (! *gstate) { + m_getfld_state_init (gstate, iob); + } + + (*gstate)->track_filepos = 1; +} + +void m_getfld_state_destroy (m_getfld_state_t *gstate) { + m_getfld_state_t s = *gstate; + + if (s) { + if (s->fdelim) free (s->fdelim-1); + free (s); + *gstate = 0; } } @@ -315,17 +349,25 @@ void m_getfld_state_destroy (m_getfld_state_t *s) { static void -enter_getfld (m_getfld_state_t s, FILE *iob) { +enter_getfld (m_getfld_state_t *gstate, FILE *iob) { + m_getfld_state_t s; off_t pos = ftello (iob); - /* Ugly. The parser opens the input file multiple times, so we - have to always use the FILE * that's passed to m_getfld(). - Though this might not be necessary any more, as long as the - parser inits a new m_getfld_state for each file. See comment - below about the readpos shift code being currently unused. */ + if (! *gstate) { + m_getfld_state_init (gstate, iob); + } + s = *gstate; + s->bytes_read = 0; + + /* This is ugly and no longer necessary, but is retained just in + case it's needed again. The parser used to open the input file + multiple times, so we had to always use the FILE * that's + passed to m_getfld(). Now the parser inits a new + m_getfld_state for each file. See comment below about the + readpos shift code being currently unused. */ s->iob = iob; - if (pos != 0 || s->last_internal_pos != 0) { + if (s->track_filepos && (pos != 0 || s->last_internal_pos != 0)) { if (s->last_internal_pos == 0) { s->total_bytes_read = pos; } else { @@ -365,19 +407,20 @@ enter_getfld (m_getfld_state_t s, FILE *iob) { fseeko (iob, pos, SEEK_SET); } } - - s->bytes_read = 0; } static void leave_getfld (m_getfld_state_t s) { - /* Save the internal file position that we use for the input buffer. */ - s->last_internal_pos = ftello (s->iob); - - /* Set file stream position so that callers can use ftell(). */ s->total_bytes_read += s->bytes_read; - fseeko (s->iob, s->total_bytes_read, SEEK_SET); - s->last_caller_pos = ftello (s->iob); + + if (s->track_filepos) { + /* Save the internal file position that we use for the input buffer. */ + s->last_internal_pos = ftello (s->iob); + + /* Set file stream position so that callers can use ftell(). */ + fseeko (s->iob, s->total_bytes_read, SEEK_SET); + s->last_caller_pos = ftello (s->iob); + } } static size_t @@ -439,13 +482,15 @@ Ungetc (int c, m_getfld_state_t s) { int -m_getfld (m_getfld_state_t s, unsigned char name[NAMESZ], unsigned char *buf, - int *bufsz, FILE *iob) +m_getfld (m_getfld_state_t *gstate, unsigned char name[NAMESZ], + unsigned char *buf, int *bufsz, FILE *iob) { + m_getfld_state_t s; register unsigned char *cp; register int max, n, c; - enter_getfld (s, iob); + enter_getfld (gstate, iob); + s = *gstate; if ((c = Getc(s)) < 0) { *bufsz = *buf = 0; @@ -710,8 +755,9 @@ m_getfld (m_getfld_state_t s, unsigned char name[NAMESZ], unsigned char *buf, void -m_unknown(m_getfld_state_t s, FILE *iob) +m_unknown(m_getfld_state_t *gstate, FILE *iob) { + m_getfld_state_t s; register int c; char text[MAX_DELIMITER_SIZE]; char from[] = "From "; @@ -719,7 +765,8 @@ m_unknown(m_getfld_state_t s, FILE *iob) register char *delimstr; unsigned int i; - enter_getfld (s, iob); + enter_getfld (gstate, iob); + s = *gstate; /* * Figure out what the message delimitter string is for this