#include <h/mh.h>
#include <h/mts.h>
#include <h/utils.h>
+#include <inttypes.h>
/*
Purpose
Usage
=====
- m_getfld_state_t gstate = 0;
- ...
- int state = m_getfld (&gstate, ...);
- ...
+ m_getfld_state_t gstate;
+
+ gstate = m_getfld_state_init(mailfp);
+ Perhaps m_getfld_track_filepos2(&gstate);
+ ...
+ state = m_getfld2(&gstate, ...);
+ ...Repeat until finished with mailfp.
m_getfld_state_destroy (&gstate);
The state is retained internally by gstate. To reset its state to FLD:
To speed things up considerably, the routine Eom() was made an auxiliary
function called by the macro eom(). Unless we are bursting a maildrop,
- the eom() macro returns FALSE saying we aren't at the end of the
+ the eom() macro returns false saying we aren't at the end of the
message.
The next thing to do is to read the mts.conf file and initialize
/*
* static prototypes
*/
+static void Ungetc(m_getfld_state_t s);
static int m_Eom (m_getfld_state_t);
#define eom(c,s) (s->msg_style != MS_DEFAULT && \
#define MAX_DELIMITER_SIZE 5
struct m_getfld_state {
+ /* The file to read from; I/O block. Caller keeps passing it after
+ * initialisation due to historic interface so it keeps getting
+ * updated, presumably to the same value. */
+ FILE *iob;
+
/* Holds content of iob. */
char msg_buf[2 * MSG_INPUT_SIZE + MAX_DELIMITER_SIZE];
/* Points to the next byte to read from msg_buf. */
* equals end then msg_buf is empty. */
char *end;
- /* Bytes of iob consumed during this call. */
- off_t bytes_read;
+ /* Whether the caller intends to ftell(3)/fseek(3) iob's position,
+ * and thus whether m_getfld() needs to detect that and compensate. */
+ int track_filepos;
/* Position in iob given what's been consumed ready for returning to
* the caller. Further than this may have been read into msg_buf. */
off_t total_bytes_read;
+ /* Bytes of iob consumed during this call. */
+ off_t bytes_read;
/* What fseeko(3) tells us iob's position is having just explicitly
* set it to total_bytes_read. Surely always the same? */
off_t last_caller_pos;
/* Saved position in iob from filling msg_buf, prior to returning. */
off_t last_internal_pos;
- /* The file to read from; I/O block. Caller keeps passing it after
- * initialisation due to historic interface so it keeps getting
- * updated, presumably to the same value. */
- FILE *iob;
- /* Maps all the bytes of msg_delim, apart from the last two,
- * including the NUL, onto the last position in msg_delim where they
- * occur. Bytes not present are NULL. */
- char **pat_map;
/* One of the MS_* macros tracking the type of iob's content and
* thus if it's a single email, or several with delimeters. Default
* is MS_DEFAULT. */
int msg_style;
- /*
- * The "full" delimiter string for a packed maildrop consists
- * of a newline followed by the actual delimiter. E.g., the
- * full string for a Unix maildrop would be: "\n\nFrom ".
- * "fdelim" points to the start of the full string and is used
- * in the BODY case of the main routine to search the buffer for
- * a possible eom. Msg_delim points to the first character of
- * the actual delim. string (i.e., fdelim+1). edelim
- * points to the 2nd character of actual delimiter string. It
- * is used in m_Eom because the first character of the string
- * has been read and matched before m_Eom is called.
- */
/* The message delimeter if iob has multiple emails, else NULL. For
* MS_MBOX it's the string that separates two emails, "\nFrom ",
* starting From_ line of the next, but for MS_MMDF it's
* "\001\001\001\001\n" that may start or terminate an email. */
char *msg_delim;
+ /* The last non-NUL char of msg_delim. */
+ char *delimend;
/* When searching for msg_delim after an email, it's only of
* interest at the start of the line, i.e. when preceded by a
* linefeed. fdelim points to msg_delim[-1] that contains '\n' so
* it can be used as the needle. */
char *fdelim;
- /* The last non-NUL char of msg_delim. */
- char *delimend;
/* strlen(fdelim). */
int fdelimlen;
/* The second char of msg_delim. Used when the first char has
* s->fdelim s->fdelimlen=7 s->fdelim s->fdelimlen=6
*/
+ /* Maps all the bytes of msg_delim, apart from the last two,
+ * including the NUL, onto the last position in msg_delim where they
+ * occur. Bytes not present are NULL. */
+ char **pat_map;
+
/* The parser's current state. Also returned to the caller, amongst
* other possible values, to indicate the token consumed. One of
* FLD, FLDPLUS, BODY, or FILEEOF. */
int state;
- /* Whether the caller intends to ftell(3)/fseek(3) iob's position,
- * and thus whether m_getfld() needs to detect that and compensate. */
- int track_filepos;
};
-static
-void
-m_getfld_state_init (m_getfld_state_t *gstate, FILE *iob) {
+m_getfld_state_t m_getfld_state_init(FILE *iob)
+{
m_getfld_state_t s;
NEW(s);
- *gstate = s;
s->readpos = s->end = s->msg_buf;
s->bytes_read = s->total_bytes_read = 0;
s->last_caller_pos = s->last_internal_pos = 0;
s->fdelimlen = s->edelimlen = 0;
s->state = FLD;
s->track_filepos = 0;
+
+ return s;
}
/* scan() needs to force an initial state of FLD for each message. */
void
m_getfld_track_filepos (m_getfld_state_t *gstate, FILE *iob) {
if (! *gstate) {
- m_getfld_state_init (gstate, iob);
+ *gstate = m_getfld_state_init(iob);
}
(*gstate)->track_filepos = 1;
}
+/* m_getfld_track_filepos() with the existing iob. */
+void m_getfld_track_filepos2(m_getfld_state_t *gstate)
+{
+ if (!*gstate)
+ adios(NULL, "m_getfld_track_filepos2 without gstate");
+
+ m_getfld_track_filepos(gstate, (*gstate)->iob);
+}
+
void m_getfld_state_destroy (m_getfld_state_t *gstate) {
m_getfld_state_t s = *gstate;
off_t pos_movement;
if (! *gstate) {
- m_getfld_state_init (gstate, iob);
+ *gstate = m_getfld_state_init(iob);
}
s = *gstate;
s->bytes_read = 0;
if (!s->track_filepos)
return;
- pos = ftello(iob);
+ if ((pos = ftello(iob)) == -1)
+ adios("getfld's iob", "failed to get offset on entry");
if (pos == 0 && s->last_internal_pos == 0)
return;
s->total_bytes_read += pos_movement;
pos = s->last_internal_pos;
} else {
+ off_t off;
size_t num_read;
/* This seek skips past an integral number of
chunks of size MSG_INPUT_SIZE. */
- fseeko (iob, pos/MSG_INPUT_SIZE * MSG_INPUT_SIZE, SEEK_SET);
+ off = pos / MSG_INPUT_SIZE * MSG_INPUT_SIZE;
+ if (fseeko(iob, off, SEEK_SET) == -1)
+ adios("getfld's iob", "failed to set offset to skip: "
+ "%" PRIdMAX, (intmax_t)off);
num_read = fread (s->msg_buf, 1, MSG_INPUT_SIZE, iob);
s->readpos = s->msg_buf + pos % MSG_INPUT_SIZE;
s->end = s->msg_buf + num_read;
}
}
- fseeko (iob, pos, SEEK_SET);
+ if (fseeko(iob, pos, SEEK_SET) == -1)
+ adios("getfld's iob", "failed to set offset on entry: %" PRIdMAX,
+ (intmax_t)pos);
}
static void
if (s->track_filepos) {
/* Save the internal file position that we use for the input buffer. */
- s->last_internal_pos = ftello (s->iob);
+ if ((s->last_internal_pos = ftello(s->iob)) == -1)
+ adios("getfld's iob", "failed to get offset before seek");
/* Set file stream position so that callers can use ftell(). */
- fseeko (s->iob, s->total_bytes_read, SEEK_SET);
- s->last_caller_pos = ftello (s->iob);
+ if (fseeko(s->iob, s->total_bytes_read, SEEK_SET) == -1)
+ adios("getfld's iob", "failed to set offset: %" PRIdMAX,
+ (intmax_t)s->total_bytes_read);
+
+ s->last_caller_pos = s->total_bytes_read;
}
}
return (unsigned char)*s->readpos++;
}
-/* Return the next character that would be read by Getc() without
- * consuming it, fetching more of the input for the buffer if required,
- * or EOF on end of file. */
+/* Return the next character that Getc() would return, which may be EOF. */
static int
-Peek (m_getfld_state_t s) {
- if (s->end - s->readpos < 1 && read_more (s) == 0) {
- return EOF;
- }
- return s->readpos < s->end ? (unsigned char) *s->readpos : EOF;
+Peek (m_getfld_state_t s)
+{
+ int c;
+
+ c = Getc(s);
+ if (c != EOF)
+ Ungetc(s);
+
+ return c;
}
-/* If there's room, put non-EOF c back into msg_buf and rewind so it's
- * read next. c need not be the value already in the buffer. If there
- * isn't room then return EOF, else return c. */
-static int
-Ungetc (int c, m_getfld_state_t s) {
- if (s->readpos == s->msg_buf) {
- return EOF;
- }
- --s->bytes_read;
- return *--s->readpos = (unsigned char) c;
+/* If there's room, undo the consumption of one character from msg_buf,
+ * rewinding so it's read next, else die. */
+static void
+Ungetc(m_getfld_state_t s)
+{
+ if (s->readpos == s->msg_buf)
+ adios(NULL, "Ungetc() at start of message buffer.");
+
+ s->readpos--;
+ s->bytes_read--;
}
;
if (c != EOF)
- Ungetc(c, s);
+ Ungetc(s);
*bufsz = *buf = 0;
leave_getfld (s);
return s->state = FILEEOF;
while ((c = Getc(s)) != EOF && eom (c, s))
;
if (c != EOF)
- Ungetc(c, s);
+ Ungetc(s);
*bufsz = *buf = 0;
leave_getfld (s);
return s->state = FILEEOF;
}
+/* m_getfld() with the existing iob. */
+int m_getfld2(m_getfld_state_t *gstate, char name[NAMESZ], char *buf, int *bufsz)
+{
+ if (!*gstate)
+ adios(NULL, "m_getfld2 without gstate");
+
+ return m_getfld(gstate, name, buf, bufsz, (*gstate)->iob);
+}
+
+
void
m_unknown(m_getfld_state_t *gstate, FILE *iob)
{
while ((c = Getc(s)) != EOF && eom (c, s))
;
if (c != EOF)
- Ungetc(c, s);
+ Ungetc(s);
}
leave_getfld (s);
}
+/* m_unknown() with the existing iob. */
+void m_unknown2(m_getfld_state_t *gstate)
+{
+ if (!*gstate)
+ adios(NULL, "m_unknown2 without gstate");
+
+ m_unknown(gstate, (*gstate)->iob);
+}
+
+
/*
* test for msg delimiter string
*/