/* icalendar.l -- icalendar (RFC 5545) scanner * * This code is Copyright (c) 2014, by the authors of nmh. See the * COPYRIGHT file in the root directory of the nmh distribution for * complete copyright information. */ /* See porting notes at end of this file. */ %{ #include "h/mh.h" #include "h/icalendar.h" #include "sbr/icalparse.h" #include "sbr/base64.h" static char *unfold (char *, size_t *); static void destroy_icallex(void); %} /* * These flex options aren't used: * 8bit not needed * case-insensitive not needed * align not used because this isn't performance critical */ %option outfile="lex.yy.c" prefix="ical" %option perf-report warn %option never-interactive noinput noyywrap /* * From RFC 5545 § 3.1. */ name {iana-token}|{x-name} iana-token ({ALPHA}|{DIGIT}|-)+ x-name X-({vendorid}-)?({ALPHA}|{DIGIT}|-)+ vendorid ({ALPHA}|{DIGIT}){3,} param-name {iana-token}|{x-name} param-value {paramtext}|{quoted-string} paramtext {SAFE-CHAR}* value {VALUE-CHAR}* quoted-string {DQUOTE}{QSAFE-CHAR}*{DQUOTE} QSAFE-CHAR {WSP}|[\x21\x23-\x7E]|{NON-US-ASCII} SAFE-CHAR {WSP}|[\x21\x23-\x2B\x2D-\x39\x3C-\x7E]|{NON-US-ASCII} VALUE-CHAR {WSP}|[\x21-\x7E]|{NON-US-ASCII} /* The following is a short-cut definition that admits more that the UNICODE characters permitted by RFC 5545. */ NON-US-ASCII [\x80-\xF8]{2,4} /* The following excludes HTAB, unlike {CTL}. */ CONTROL [\x00-\x08\x0A-\x1F\x7F] EQUAL = /* Solaris lex requires that the , be escaped. */ COMMA \, /* * From RFC 5545 § 2.1. */ COLON : SEMICOLON ; /* * From RFC 5545 § 3.3.11. */ text ({TSAFE-CHAR}|:|{DQUOTE}|{ESCAPED-CHAR})* ESCAPED-CHAR \\\\|\\;|\\,|\\N|\\n TSAFE-CHAR {WSP}|[\x21\x23-\x2B\x2D-\x39\x3C-\x5B\x5D-\x7E]|{NON-US-ASCII| /* * Core rules (definitions) from RFC 5234 Appendix B.1. */ ALPHA [\x41-\x5A\x61-\x7A] BIT [01] CHAR [\x01-\x7F] CR \x0D /* Variance from RFC 5234: the {CR} is required in CRLF, but it is optional below to support Unix filesystem convention. */ CRLF ({CR}?{LF})+ CTL [\x00-\x1F\x7F] DIGIT [\x30-\x39] DQUOTE \x22 HEXDIG {DIGIT}|[A-F] HTAB \x09 LF \x0A LWSP ({WSP}|({CRLF}{WSP}))* OCTET [\x00-\xFF] SP \x20 VCHAR [\x21-\x7E] WSP {SP}|{HTAB} /* * Our definitions. */ fold {CRLF}{WSP} folded-name {name}({fold}+{iana-token})+ folded-param-name {param-name}({fold}+{iana-token})+ folded-quoted-string {DQUOTE}{QSAFE-CHAR}*{fold}+{QSAFE-CHAR}*{DQUOTE} folded-param-value {paramtext}({fold}{paramtext}*)+|{folded-quoted-string} folded-value {VALUE-CHAR}*({fold}{VALUE-CHAR}*)+ %s s_name s_colon s_value s_semicolon s_param_name s_equal s_comma %% {CRLF} { /* Eat any leading newlines. */ } {folded-name} { /* flex 2.5.4 defines icalleng as an int instead of a size_t, so copy it. */ size_t len = icalleng; unfold (icaltext, &len); icalleng = len; icallval = strdup (icaltext); /* yy_push_state (s_name); * s_name */ BEGIN (s_name); /* s_name */ return ICAL_NAME; } {name} { icallval = strdup (icaltext); /* yy_push_state (s_name); * s_name */ BEGIN (s_name); /* s_name */ return ICAL_NAME; } {COLON} { /* Don't need to strdup a single character. */ icallval = icaltext; /* yy_pop_state (); * INITIAL */ /* yy_push_state (s_colon); * s_colon */ BEGIN (s_colon); /* s_colon */ return ICAL_COLON; } {folded-value} { /* flex 2.5.4 defines icalleng as an int instead of a size_t, so copy it. */ size_t len = icalleng; unfold (icaltext, &len); icalleng = len; icallval = strdup (icaltext); /* yy_pop_state (); * INITIAL */ /* yy_push_state (s_value); * s_value */ BEGIN (s_value); /* s_value */ return ICAL_VALUE; } {value} { icallval = strdup (icaltext); /* yy_pop_state (); * INITIAL */ /* yy_push_state (s_value); * s_value */ BEGIN (s_value); /* s_value */ return ICAL_VALUE; } {SEMICOLON} { /* Don't need to strdup a single character. */ icallval = icaltext; /* yy_push_state (s_semicolon); * s_name, s_semicolon */ BEGIN (s_semicolon); /* s_name, s_semicolon */ return ICAL_SEMICOLON; } {folded-param-name} { /* flex 2.5.4 defines icalleng as an int instead of a size_t, so copy it. */ size_t len = icalleng; unfold (icaltext, &len); icalleng = len; icallval = strdup (icaltext); /* yy_pop_state (); * s_name */ /* yy_push_state (s_param_name); * s_name, s_param_name */ BEGIN (s_param_name); /* s_name, s_param_name */ return ICAL_PARAM_NAME; } {param-name} { icallval = strdup (icaltext); /* yy_pop_state (); * s_name */ /* yy_push_state (s_param_name); * s_name, s_param_name */ BEGIN (s_param_name); /* s_name, s_param_name */ return ICAL_PARAM_NAME; } {EQUAL} { /* Don't need to strdup a single character. */ icallval = icaltext; /* yy_pop_state (); * s_name */ /* yy_push_state (s_equal); * s_name, s_equal */ BEGIN (s_equal); /* s_name, s_equal */ return ICAL_EQUAL; } {folded-param-value} { /* flex 2.5.4 defines icalleng as an int instead of a size_t, so copy it. */ size_t len = icalleng; unfold (icaltext, &len); icalleng = len; icallval = strdup (icaltext); /* yy_pop_state (); * s_name */ BEGIN (s_name); /* s_name */ return ICAL_PARAM_VALUE; } {param-value} { icallval = strdup (icaltext); /* yy_pop_state (); * s_name */ BEGIN (s_name); /* s_name */ return ICAL_PARAM_VALUE; } {COMMA} { /* Don't need to strdup a single character. */ icallval = icaltext; /* yy_push_state (s_comma); * s_name, s_comma */ BEGIN (s_comma); /* s_name, s_comma */ return ICAL_COMMA; } {CRLF} { /* Use start condition to ensure that all newlines are where expected. */ icallval = icaltext; /* yy_pop_state (); * INITIAL */ BEGIN (INITIAL); /* INITIAL */ return ICAL_CRLF; } {CRLF} { /* Null value. */ icallval = strdup (""); /* yy_pop_state (); * INITIAL */ /* yy_push_state (s_value); * s_value */ BEGIN (s_value); /* s_value */ /* Push the newline back so it can be handled in the proper state. */ unput ('\n'); return ICAL_VALUE; } . { /* By default, flex will just pass unmatched text. Catch it instead. */ contentline *clines = vevents.last->contentlines; contentline *cline; if (clines && (cline = clines->last)) { if (cline->unexpected == NULL) { cline->unexpected = charstring_create (0); } charstring_append_cstring (cline->unexpected, icaltext); } } <> { /* See next rule for when start state is not INITIAL. */ destroy_icallex (); yyterminate (); } <> { /* Missing a final newline after a token. The input does not conform to RFC 5545 § 3.1, which requires that each contentline end with a CRLF. (Assume that the token is at the end of a contentline.) Be liberal in what we accept by faking a newline here, and setting the start state to terminate on the next call. */ BEGIN (INITIAL); return ICAL_CRLF; } %% static char * unfold (char *text, size_t *leng) { /* It's legal to shorten text and modify leng (because we don't use yymore()). */ char *cp; /* First squash any CR-LF-WSP sequences. */ while ((cp = strstr (text, "\r\n ")) || (cp = strstr (text, "\r\n\t"))) { /* Subtract any characters prior to fold sequence and 3 for the fold sequence, and add 1 for the terminating null. */ (void) memmove (cp, cp + 3, *leng - (cp - text) - 3 + 1); *leng -= 3; } /* Then squash any LF-WSP sequences. */ while ((cp = strstr (text, "\n ")) || (cp = strstr (text, "\n\t"))) { /* Subtract any characters prior to fold sequence and 2 for the fold sequence, and add 1 for the terminating null. */ (void) memmove (cp, cp + 2, *leng - (cp - text) - 2 + 1); *leng -= 2; } return text; } /* * To clean up memory, call the function provided by modern * versions of flex. Older versions don't have it, and of * course this won't do anything if the scanner was built * with something other than flex. */ static void destroy_icallex(void) { #if defined FLEX_SCANNER && defined YY_FLEX_SUBMINOR_VERSION /* Hack: rely on fact that the the YY_FLEX_SUBMINOR_VERSION #define was added to flex (flex.skl v. 2.163) after #yylex_destroy() was added. */ icallex_destroy (); #endif /* FLEX_SCANNER && YY_CURRENT_BUFFER_LVALUE */ } /* * See comment in h/icalendar.h about having to provide these * because flex 2.5.4 doesn't. */ void icalset_inputfile (FILE *file) { yyin = file; } void icalset_outputfile (FILE *file) { yyout = file; } /* * Porting notes * ------------- * POSIX lex only supports an entry point name of yylex(). nmh * programs can contain multiple scanners (see sbr/dtimep.l), so * nmh requires the use of flex to build them. * In addition, if there is a need to port this to Solaris lex: * - Use the lex -e or -w option. * - Comment out all of the %options. * - Comment out the <> rule. * - The start condition and pattern must be on the same line. * - Comments must be inside rules, not just before them. * - Don't use start condition stack. In the code, above BEGIN's are * used instead, and the contents of an imaginary start condition * stack are shown after each. The stack operations are also shown * in comments. */