]> diplodocus.org Git - nmh/blob - sbr/addrparse.y
mhlsbr.c: Don't strchr(3) non-string NUL-less buffer.
[nmh] / sbr / addrparse.y
1 %name-prefix "addr"
2
3 /*
4 * Comments on these tokens:
5 *
6 * ATEXT is defined in RFC 5222 as:
7 * ALPHA / DIGIT /
8 * '!' / '#' / '$' / '%' / '&' / ''' / '*' / '+' / '-' / '/' /
9 * '=' / '?' / '^' / '_' / '`' / '{' / '|' / '}' / '~'
10 *
11 * All printable ASCII characters except for spaces and specials
12 *
13 * QSTRING is a quoted string, which is printable ASCII characters except
14 * for \ or the quote character surrounded by quotes. Use \ for quoting
15 * \ and the quote character.
16 *
17 * FWS is folding white space, which is defined as SP (\040), HTAB (\011),
18 * and NL (\012). Technically CR (\015) is part of that, but traditionally
19 * Unix format files don't have that character.
20 *
21 * COMMENT is a comment string, which is printable ASCII characters except
22 * for '(', ')', and '\'. Uses same quoting rules as QSTRING. To make
23 * the grammar slightly less conflict-happy, COMMENT must include any FWS
24 * in front or behind of it (simply have it eaten in the lexer).
25 *
26 * Everything else is a SPECIAL, which is returned directly. These are
27 * defined in RFC 5322 as:
28 *
29 * '(' / ')' / '<' / '>' / '[' / ']' / ':' / ';' / '@' / '\' / ',' / '.' /
30 * '"'
31 *
32 * Technically we don't return all of these; we handle () in comments, " in
33 * quoted string handling, and \ in those handlers.
34 */
35
36 %token ATEXT QSTRING FWS COMMENT
37
38 %%
39
40 /*
41 * A list of addresses; the main entry point to the parser
42 */
43 address_list: /* nothing */
44 | address_list ',' address
45 ;
46
47 /*
48 * A single address; can be a single mailbox, or a group address
49 */
50
51 address:
52 mailbox
53 | group
54 ;
55
56 /*
57 * A traditional single mailbox. Either in Name <user@name> or just a bare
58 * email address with no angle brackets.
59 */
60
61 mailbox:
62 name_addr
63 | addr_spec
64 ;
65
66 /*
67 * An email address, with the angle brackets. Optionally contains a display
68 * name in the front. The RFC says "display-name", but display-name is
69 * defined as a phrase, so we just use that.
70 */
71
72 name_addr:
73 phrase angle_addr
74 | angle_addr
75 ;
76
77 angle_addr:
78 cfws '<' addr_spec '>' cfws
79 | cfws '<' addr_spec '>'
80 | '<' addr_spec '>' cfws
81 | '<' addr_spec '>'
82 ;
83
84 /*
85 * The group list syntax. The group list is allowed to be empty or be
86 * spaces, so we define group_list as either being a mailbox list or
87 * just being CFWS. mailbox_list can be empty, so that can handle the
88 * case of nothing being between the ':' and the ';'
89 */
90 group:
91 phrase ':' group_list ';' cfws
92 | phrase ':' group_list ';'
93 | phrase ':' ';'
94 ;
95
96 group_list:
97 mailbox_list
98 | cfws
99 ;
100
101 mailbox_list: /* nothing */
102 | mailbox_list ',' mailbox
103 ;
104
105 addr_spec:
106 local_part '@' domain
107 ;
108
109 local_part:
110 dot_atom
111 | quoted_string
112 ;
113
114 domain:
115 dot_atom
116 | domain_literal
117 ;
118
119 domain_literal:
120 cfws '[' dtext_fws ']' cfws
121 | cfws '[' dtext_fws ']'
122 | '[' dtext_fws ']' cfws
123 | '[' dtext_fws ']'
124 ;
125
126 /*
127 * It was hard to make a definition of dtext and domain-literal that
128 * exactly matched the RFC. This was the best I could come up with.
129 */
130
131 dtext_fws: /* nothing */
132 | FWS ATEXT FWS
133 | FWS ATEXT
134 | ATEXT FWS
135 | dtext_fws FWS ATEXT FWS
136 | dtext_fws FWS ATEXT
137 | dtext_fws ATEXT FWS
138 | dtext_fws ATEXT
139 ;
140
141 phrase:
142 word
143 | phrase word
144 | obs_phrase
145 ;
146
147 /*
148 * obs-phrase is basically the same as "phrase", but after the first word
149 * you're allowed to have a '.'. I believe this is correct.
150 */
151
152 obs_phrase:
153 word obs_phrase_list
154 ;
155
156 obs_phrase_list:
157 word
158 | '.'
159 | obs_phrase_list word
160 ;
161
162 word:
163 atom
164 | quoted_string
165 ;
166
167 /*
168 * This makes sure any comments and white space before/after the quoted string
169 * get eaten.
170 */
171 quoted_string:
172 cfws QSTRING cfws
173 | QSTRING cfws
174 | cfws QSTRING
175 | QSTRING
176 ;
177
178 atom:
179 cfws ATEXT cfws
180 | cfws ATEXT
181 | ATEXT cfws
182 | ATEXT
183 ;
184
185 /*
186 * Making dot-atom work was a little confusing; I finally handled it by
187 * defining "dot_atom_text" as having two or more ATEXTs separated by
188 * '.', and defining dot_atom as allowing a single atom.
189 */
190 dot_atom:
191 atom
192 | cfws dot_atom_text cfws
193 | cfws dot_atom_text
194 | dot_atom_text cfws
195 | dot_atom_text
196 ;
197
198 dot_atom_text:
199 ATEXT '.' ATEXT
200 | dot_atom_text '.' ATEXT
201 ;
202
203 /*
204 * As mentioned above, technically in the CFWS definition in the RFC allows
205 * FWS before and after the comment. The lexer is responsible for eating
206 * the FWS before/after comments.
207 */
208 cfws:
209 COMMENT
210 | FWS
211 ;