X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/8bb0f8000b86df9270f2410de3fb6f41fa63e88b..f4bef2e06b4284481b3f900e853d1667f52eb37b:/sbr/m_getfld.c diff --git a/sbr/m_getfld.c b/sbr/m_getfld.c index 8303b54a..af44f09a 100644 --- a/sbr/m_getfld.c +++ b/sbr/m_getfld.c @@ -11,6 +11,75 @@ #include #include +/* + Purpose + ======= + Reads an Internet message (RFC 5322), or one or more messages + stored in a maildrop in mbox (RFC 4155) or MMDF format, from a file + stream. Each call to m_getfld() reads one header field, or a + portion of the body, in sequence. + + Inputs + ====== + gstate: opaque parse state + bufsz: maximum number of characters to load into buf + iob: input file stream + + Outputs + ======= + name: header field name (array of size NAMESZ=999) + buf: either a header field body or message body + bufsz: number of characters loaded into buf + (return value): message parse state on return from function + + Functions + ========= + void m_getfld_state_destroy (m_getfld_state_t *gstate): destroys + the parse state pointed to by the gstate argument. + + m_getfld_state_reset (m_getfld_state_t *gstate): resets the parse + state to FLD. + + void m_unknown(FILE *iob): Determines the message delimiter string + for the maildrop. Called by inc, scan, and msh when reading from a + maildrop file. + + void m_eomsbr (int (*action)(int)): Sets the hook to check for end + of message in a maildrop. Called only by msh. + + State variables + =============== + m_getfld() retains state internally between calls in the + m_getfld_state_t variable. These are used for detecting the end of + each message when reading maildrops: + + unsigned char **pat_map + unsigned char *fdelim + unsigned char *delimend + int fdelimlen + unsigned char *edelim + int edelimlen + char *msg_delim + int msg_style + int (*eom_action)(int) + + Usage + ===== + m_getfld_state_t gstate = 0; + ... + int state = m_getfld (&gstate, ...); + ... + m_getfld_state_destroy (&gstate); + + The state is retained internally by gstate. To reset its state to FLD: + m_getfld_state_reset (&gstate); +*/ + +/* The following described the old implementation. The high-level + structure hasn't changed, but some of the details have. I'm + leaving this as-is, though, for posterity. + */ + /* This module has a long and checkered history. First, it didn't burst maildrops correctly because it considered two CTRL-A:s in a row to be an inter-message delimiter. It really is four CTRL-A:s followed by a @@ -134,63 +203,6 @@ there is data in "name" or "buf"). */ -/* -Purpose -======= -Reads an Internet message (RFC 5322), or one or more messages stored in a -maildrop in mbox (RFC 4155) or MMDF format, from a file stream. Each call -to m_getfld() reads one header field, or a portion of the body, in sequence. - -Inputs -====== -gstate: opaque parse state -bufsz: maximum number of characters to load into buf -iob: input file stream - -Outputs -======= -name: header field name (array of size NAMESZ=999) -buf: either a header field body or message body -bufsz: number of characters loaded into buf -(return value): message parse state on return from function - -Functions -========= -void m_unknown(FILE *iob): Determines the message delimiter string for the - maildrop. Called by inc, scan, and msh when reading from a maildrop file. - -void m_eomsbr (int (*action)(int)): Sets the hook to check for end of - message in a maildrop. Called only by msh. - -State variables -=============== -m_getfld() retains state internally between calls in the m_getfld_state_t -variable. These are used for detecting the end of each message when reading -maildrops: - - unsigned char **pat_map - unsigned char *fdelim - unsigned char *delimend - int fdelimlen - unsigned char *edelim - int edelimlen - char *msg_delim - int msg_style - int (*eom_action)(int) - -Usage -===== - m_getfld_state_t gstate = 0; - ... - int state = m_getfld (&gstate, ...); - ... - m_getfld_state_destroy (&gstate); - -The state is retained internally by gstate. To reset its state to FLD: - - m_getfld_state_reset (&gstate); -*/ - /* * static prototypes */ @@ -210,8 +222,9 @@ static unsigned char *matchc(int, char *, int, char *); * separate messages in a maildrop, such as mbox "From ". * * Some of the tests in the test suite assume a MSG_INPUT_SIZE - * of 8192. */ -#define MSG_INPUT_SIZE (BUFSIZ >= 1024 ? BUFSIZ : 1024) + * of 4096. + */ +#define MSG_INPUT_SIZE 4096 #define MAX_DELIMITER_SIZE 5 struct m_getfld_state { @@ -253,18 +266,19 @@ struct m_getfld_state { int edelimlen; int (*eom_action)(int); int state; + int track_filepos; }; static void -m_getfld_state_init (m_getfld_state_t *gstate) { +m_getfld_state_init (m_getfld_state_t *gstate, FILE *iob) { m_getfld_state_t s; s = *gstate = (m_getfld_state_t) mh_xmalloc(sizeof (struct m_getfld_state)); s->readpos = s->end = s->msg_buf; s->bytes_read = s->total_bytes_read = 0; s->last_caller_pos = s->last_internal_pos = 0; - /* s->iob gets loaded on every call to m_getfld()/m_unknown(). */ + s->iob = iob; s->pat_map = NULL; s->msg_style = MS_DEFAULT; s->msg_delim = ""; @@ -272,16 +286,27 @@ m_getfld_state_init (m_getfld_state_t *gstate) { s->fdelimlen = s->edelimlen = 0; s->eom_action = NULL; s->state = FLD; + s->track_filepos = 0; } /* scan() needs to force a state an initial state of FLD for each message. */ void m_getfld_state_reset (m_getfld_state_t *gstate) { + if (*gstate) { + (*gstate)->state = FLD; + } +} + +/* If the caller interleaves ftell*()/fseek*() calls with m_getfld() + calls, m_getfld() must keep track of the file position. The caller + must use this function to inform m_getfld(). */ +void +m_getfld_track_filepos (m_getfld_state_t *gstate, FILE *iob) { if (! *gstate) { - m_getfld_state_init (gstate); + m_getfld_state_init (gstate, iob); } - (*gstate)->state = FLD; + (*gstate)->track_filepos = 1; } void m_getfld_state_destroy (m_getfld_state_t *gstate) { @@ -329,20 +354,20 @@ enter_getfld (m_getfld_state_t *gstate, FILE *iob) { off_t pos = ftello (iob); if (! *gstate) { - m_getfld_state_init (gstate); + m_getfld_state_init (gstate, iob); } s = *gstate; s->bytes_read = 0; - /* Ugly. The parser (used to) open the input file multiple times, - so we have to always use the FILE * that's passed to - m_getfld(). Though this might not be necessary any more, as - long as the parser inits a new m_getfld_state for each file. - See comment below about the readpos shift code being currently - unused. */ + /* This is ugly and no longer necessary, but is retained just in + case it's needed again. The parser used to open the input file + multiple times, so we had to always use the FILE * that's + passed to m_getfld(). Now the parser inits a new + m_getfld_state for each file. See comment below about the + readpos shift code being currently unused. */ s->iob = iob; - if (pos != 0 || s->last_internal_pos != 0) { + if (s->track_filepos && (pos != 0 || s->last_internal_pos != 0)) { if (s->last_internal_pos == 0) { s->total_bytes_read = pos; } else { @@ -386,13 +411,16 @@ enter_getfld (m_getfld_state_t *gstate, FILE *iob) { static void leave_getfld (m_getfld_state_t s) { - /* Save the internal file position that we use for the input buffer. */ - s->last_internal_pos = ftello (s->iob); - - /* Set file stream position so that callers can use ftell(). */ s->total_bytes_read += s->bytes_read; - fseeko (s->iob, s->total_bytes_read, SEEK_SET); - s->last_caller_pos = ftello (s->iob); + + if (s->track_filepos) { + /* Save the internal file position that we use for the input buffer. */ + s->last_internal_pos = ftello (s->iob); + + /* Set file stream position so that callers can use ftell(). */ + fseeko (s->iob, s->total_bytes_read, SEEK_SET); + s->last_caller_pos = ftello (s->iob); + } } static size_t