2 * icalendar.l -- icalendar (RFC 5545) scanner
4 * This code is Copyright (c) 2014, by the authors of nmh. See the
5 * COPYRIGHT file in the root directory of the nmh distribution for
6 * complete copyright information.
9 /* See porting notes at end of this file. */
13 #include "h/icalendar.h"
14 #include "sbr/icalparse.h"
16 static char *unfold (char *, size_t *);
17 static void destroy_icallex ();
21 * These flex options aren't used:
23 * case-insensitive not needed
24 * align not used because this isn't performance critical
26 %option outfile="lex.yy.c" prefix="ical"
27 %option perf-report warn
28 %option never-interactive noinput noyywrap
31 * From RFC 5545 § 3.1.
33 name {iana-token}|{x-name}
34 iana-token ({ALPHA}|{DIGIT}|-)+
35 x-name X-({vendorid}-)?({ALPHA}|{DIGIT}|-)+
36 vendorid ({ALPHA}|{DIGIT}){3,}
37 param-name {iana-token}|{x-name}
38 param-value {paramtext}|{quoted-string}
39 paramtext {SAFE-CHAR}*
41 quoted-string {DQUOTE}{QSAFE-CHAR}*{DQUOTE}
42 QSAFE-CHAR {WSP}|[\x21\x23-\x7E]|{NON-US-ASCII}
43 SAFE-CHAR {WSP}|[\x21\x23-\x2B\x2D-\x39\x3C-\x7E]|{NON-US-ASCII}
44 VALUE-CHAR {WSP}|[\x21-\x7E]|{NON-US-ASCII}
45 /* The following is a short-cut definition that admits more
46 that the UNICODE characters permitted by RFC 5545. */
47 NON-US-ASCII [\x80-\xF8]{2,4}
48 /* The following excludes HTAB, unlike {CTL}. */
49 CONTROL [\x00-\x08\x0A-\x1F\x7F]
51 /* Solaris lex requires that the , be escaped. */
54 * From RFC 5545 § 2.1.
60 * From RFC 5545 § 3.3.11.
62 text ({TSAFE-CHAR}|:|{DQUOTE}|{ESCAPED-CHAR})*
63 ESCAPED-CHAR \\\\|\\;|\\,|\\N|\\n
64 TSAFE-CHAR {WSP}|[\x21\x23-\x2B\x2D-\x39\x3C-\x5B\x5D-\x7E]|{NON-US-ASCII|
67 * Core rules (definitions) from RFC 5234 Appendix B.1.
69 ALPHA [\x41-\x5A\x61-\x7A]
73 /* Variance from RFC 5234: the {CR} is required in
74 CRLF, but it is optional below to support Unix
75 filesystem convention. */
83 LWSP ({WSP}|({CRLF}{WSP}))*
93 folded-name {name}({fold}+{iana-token})+
94 folded-param-name {param-name}({fold}+{iana-token})+
95 folded-quoted-string {DQUOTE}{QSAFE-CHAR}*{fold}+{QSAFE-CHAR}*{DQUOTE}
96 folded-param-value {paramtext}({fold}{paramtext}*)+|{folded-quoted-string}
97 folded-value {VALUE-CHAR}*({fold}{VALUE-CHAR}*)+
99 %s s_name s_colon s_value s_semicolon s_param_name s_equal s_comma
105 /* Eat any leading newlines. */
110 /* flex 2.5.4 defines icalleng as an int instead of a size_t,
112 size_t len = icalleng;
113 unfold (icaltext, &len);
116 icallval = strdup (icaltext);
117 /* yy_push_state (s_name); * s_name */
118 BEGIN (s_name); /* s_name */
124 icallval = strdup (icaltext);
125 /* yy_push_state (s_name); * s_name */
126 BEGIN (s_name); /* s_name */
132 /* Don't need to strdup a single character. */
134 /* yy_pop_state (); * INITIAL */
135 /* yy_push_state (s_colon); * s_colon */
136 BEGIN (s_colon); /* s_colon */
142 /* flex 2.5.4 defines icalleng as an int instead of a size_t,
144 size_t len = icalleng;
145 unfold (icaltext, &len);
148 icallval = strdup (icaltext);
149 /* yy_pop_state (); * INITIAL */
150 /* yy_push_state (s_value); * s_value */
151 BEGIN (s_value); /* s_value */
157 icallval = strdup (icaltext);
158 /* yy_pop_state (); * INITIAL */
159 /* yy_push_state (s_value); * s_value */
160 BEGIN (s_value); /* s_value */
166 /* Don't need to strdup a single character. */
168 /* yy_push_state (s_semicolon); * s_name, s_semicolon */
169 BEGIN (s_semicolon); /* s_name, s_semicolon */
170 return ICAL_SEMICOLON;
174 {folded-param-name} {
175 /* flex 2.5.4 defines icalleng as an int instead of a size_t,
177 size_t len = icalleng;
178 unfold (icaltext, &len);
181 icallval = strdup (icaltext);
182 /* yy_pop_state (); * s_name */
183 /* yy_push_state (s_param_name); * s_name, s_param_name */
184 BEGIN (s_param_name); /* s_name, s_param_name */
185 return ICAL_PARAM_NAME;
190 icallval = strdup (icaltext);
191 /* yy_pop_state (); * s_name */
192 /* yy_push_state (s_param_name); * s_name, s_param_name */
193 BEGIN (s_param_name); /* s_name, s_param_name */
194 return ICAL_PARAM_NAME;
199 /* Don't need to strdup a single character. */
201 /* yy_pop_state (); * s_name */
202 /* yy_push_state (s_equal); * s_name, s_equal */
203 BEGIN (s_equal); /* s_name, s_equal */
208 {folded-param-value} {
209 /* flex 2.5.4 defines icalleng as an int instead of a size_t,
211 size_t len = icalleng;
212 unfold (icaltext, &len);
215 icallval = strdup (icaltext);
216 /* yy_pop_state (); * s_name */
217 BEGIN (s_name); /* s_name */
218 return ICAL_PARAM_VALUE;
223 icallval = strdup (icaltext);
224 /* yy_pop_state (); * s_name */
225 BEGIN (s_name); /* s_name */
226 return ICAL_PARAM_VALUE;
231 /* Don't need to strdup a single character. */
233 /* yy_push_state (s_comma); * s_name, s_comma */
234 BEGIN (s_comma); /* s_name, s_comma */
240 /* Use start condition to ensure that all newlines are where expected. */
242 /* yy_pop_state (); * INITIAL */
243 BEGIN (INITIAL); /* INITIAL */
250 icallval = strdup ("");
251 /* yy_pop_state (); * INITIAL */
252 /* yy_push_state (s_value); * s_value */
253 BEGIN (s_value); /* s_value */
254 /* Push the newline back so it can be handled in the proper state. */
260 /* By default, flex will just pass unmatched text. Catch it instead. */
261 advise (NULL, "unexpected input: |%s|\n", icaltext);
272 unfold (char *text, size_t *leng) {
273 /* It's legal to shorten text and modify leng (because we don't
277 /* First squash any CR-LF-WSP sequences. */
278 while ((cp = strstr (text, "\r\n ")) || (cp = strstr (text, "\r\n\t"))) {
279 /* Subtract any characters prior to fold sequence and 3 for
280 the fold sequence, and add 1 for the terminating null. */
281 (void) memmove (cp, cp + 3, *leng - (cp - text) - 3 + 1);
285 /* Then squash any LF-WSP sequences. */
286 while ((cp = strstr (text, "\n ")) || (cp = strstr (text, "\n\t"))) {
287 /* Subtract any characters prior to fold sequence and 2 for
288 the fold sequence, and add 1 for the terminating null. */
289 (void) memmove (cp, cp + 2, *leng - (cp - text) - 2 + 1);
298 * To clean up memory, call the function provided by modern
299 * versions of flex. Older versions don't have it, and of
300 * course this won't do anything if the scanner was built
301 * with something other than flex.
305 #if defined FLEX_SCANNER && defined YY_FLEX_SUBMINOR_VERSION
306 /* Hack: rely on fact that the the YY_FLEX_SUBMINOR_VERSION
307 #define was added to flex (flex.skl v. 2.163) after
308 #yylex_destroy() was added. */
310 #endif /* FLEX_SCANNER && YY_CURRENT_BUFFER_LVALUE */
314 * See comment in h/icalendar.h about having to provide these
315 * because flex 2.5.4 doesn't.
318 icalset_inputfile (FILE *file) {
323 icalset_outputfile (FILE *file) {
330 * POSIX lex only supports an entry point name of yylex(). nmh
331 * programs can contain multiple scanners (see sbr/dtimep.l), so
332 * nmh requires the use of flex to build them.
333 * In addition, if there is a need to port this to Solaris lex:
334 * - Use the lex -e or -w option.
335 * - Comment out all of the %options.
336 * - Comment out the <<EOF>> rule.
337 * - The start condition and pattern must be on the same line.
338 * - Comments must be inside rules, not just before them.
339 * - Don't use start condition stack. In the code, above BEGIN's are
340 * used instead, and the contents of an imaginary start condition
341 * stack are shown after each. The stack operations are also shown