+%name-prefix "addr"
+
+/*
+ * Comments on these tokens:
+ *
+ * ATEXT is defined in RFC 5222 as:
+ * ALPHA / DIGIT /
+ * '!' / '#' / '$' / '%' / '&' / ''' / '*' / '+' / '-' / '/' /
+ * '=' / '?' / '^' / '_' / '`' / '{' / '|' / '}' / '~'
+ *
+ * All printable ASCII characters except for spaces and specials
+ *
+ * QSTRING is a quoted string, which is printable ASCII characters except
+ * for \ or the quote character surrounded by quotes. Use \ for quoting
+ * \ and the quote character.
+ *
+ * FWS is folding white space, which is defined as SP (\040), HTAB (\011),
+ * and NL (\012). Technically CR (\015) is part of that, but traditionally
+ * Unix format files don't have that character.
+ *
+ * COMMENT is a comment string, which is printable ASCII characters except
+ * for '(', ')', and '\'. Uses same quoting rules as QSTRING. To make
+ * the grammer slightly less conflict-happy, COMMENT must include any FWS
+ * in front or behind of it (simply have it eaten in the lexer).
+ *
+ * Everything else is a SPECIAL, which is returned directly. These are
+ * defined in RFC 5322 as:
+ *
+ * '(' / ')' / '<' / '>' / '[' / ']' / ':' / ';' / '@' / '\' / ',' / '.' /
+ * '"'
+ *
+ * Technically we don't return all of these; we handle () in comments, " in
+ * quoted string handling, and \ in those handlers.
+ */
+
+%token ATEXT QSTRING FWS COMMENT
+
+%%
+
+/*
+ * A list of addresses; the main entry point to the parser
+ */
+address_list: /* nothing */
+ | address_list ',' address
+ ;
+
+/*
+ * A single address; can be a single mailbox, or a group address
+ */
+
+address:
+ mailbox
+ | group
+ ;
+
+/*
+ * A traditional single mailbox. Either in Name <user@name> or just a bare
+ * email address with no angle brackets.
+ */
+
+mailbox:
+ name_addr
+ | addr_spec
+ ;
+
+/*
+ * An email address, with the angle brackets. Optionally contains a display
+ * name in the front. The RFC says "display-name", but display-name is
+ * defined as a phrase, so we just use that.
+ */
+
+name_addr:
+ phrase angle_addr
+ | angle_addr
+ ;
+
+angle_addr:
+ cfws '<' addr_spec '>' cfws
+ | cfws '<' addr_spec '>'
+ | '<' addr_spec '>' cfws
+ | '<' addr_spec '>'
+ ;
+
+/*
+ * The group list syntax. The group list is allowed to be empty or be
+ * spaces, so we define group_list as either being a mailbox list or
+ * just being CFWS. mailbox_list can be empty, so that can handle the
+ * case of nothing being between the ':' and the ';'
+ */
+group:
+ phrase ':' group_list ';' cfws
+ | phrase ':' group_list ';'
+ | phrase ':' ';'
+ ;
+
+group_list:
+ mailbox_list
+ | cfws
+ ;
+
+mailbox_list: /* nothing */
+ | mailbox_list ',' mailbox
+ ;
+
+addr_spec:
+ local_part '@' domain
+ ;
+
+local_part:
+ dot_atom
+ | quoted_string
+ ;
+
+domain:
+ dot_atom
+ | domain_literal
+ ;
+
+domain_literal:
+ cfws '[' dtext_fws ']' cfws
+ | cfws '[' dtext_fws ']'
+ | '[' dtext_fws ']' cfws
+ | '[' dtext_fws ']'
+ ;
+
+/*
+ * It was hard to make a definition of dtext and domain-literal that
+ * exactly matched the RFC. This was the best I could come up with.
+ */
+
+dtext_fws: /* nothing */
+ | FWS ATEXT FWS
+ | FWS ATEXT
+ | ATEXT FWS
+ | dtext_fws FWS ATEXT FWS
+ | dtext_fws FWS ATEXT
+ | dtext_fws ATEXT FWS
+ | dtext_fws ATEXT
+ ;
+
+phrase:
+ word
+ | phrase word
+ | obs_phrase
+ ;
+
+/*
+ * obs-phrase is basically the same as "phrase", but after the first word
+ * you're allowed to have a '.'. I believe this is correct.
+ */
+
+obs_phrase:
+ word obs_phrase_list
+ ;
+
+obs_phrase_list:
+ word
+ | '.'
+ | obs_phrase_list word
+ ;
+
+word:
+ atom
+ | quoted_string
+ ;
+
+/*
+ * This makes sure any comments and white space before/after the quoted string
+ * get eaten.
+ */
+quoted_string:
+ cfws QSTRING cfws
+ | QSTRING cfws
+ | cfws QSTRING
+ | QSTRING
+ ;
+
+atom:
+ cfws ATEXT cfws
+ | cfws ATEXT
+ | ATEXT cfws
+ | ATEXT
+ ;
+
+/*
+ * Making dot-atom work was a little confusing; I finally handled it by
+ * defining "dot_atom_text" as having two or more ATEXTs separted by
+ * '.', and defining dot_atom as allowing a single atom.
+ */
+dot_atom:
+ atom
+ | cfws dot_atom_text cfws
+ | cfws dot_atom_text
+ | dot_atom_text cfws
+ | dot_atom_text
+ ;
+
+dot_atom_text:
+ ATEXT '.' ATEXT
+ | dot_atom_text '.' ATEXT
+ ;
+
+/*
+ * As mentioned above, technically in the CFWS definition in the RFC allows
+ * FWS before and after the comment. The lexer is responsible for eating
+ * the FWS before/after comments.
+ */
+cfws:
+ COMMENT
+ | FWS
+ ;