%name-prefix "addr" /* * Comments on these tokens: * * ATEXT is defined in RFC 5222 as: * ALPHA / DIGIT / * '!' / '#' / '$' / '%' / '&' / ''' / '*' / '+' / '-' / '/' / * '=' / '?' / '^' / '_' / '`' / '{' / '|' / '}' / '~' * * All printable ASCII characters except for spaces and specials * * QSTRING is a quoted string, which is printable ASCII characters except * for \ or the quote character surrounded by quotes. Use \ for quoting * \ and the quote character. * * FWS is folding white space, which is defined as SP (\040), HTAB (\011), * and NL (\012). Technically CR (\015) is part of that, but traditionally * Unix format files don't have that character. * * COMMENT is a comment string, which is printable ASCII characters except * for '(', ')', and '\'. Uses same quoting rules as QSTRING. To make * the grammer slightly less conflict-happy, COMMENT must include any FWS * in front or behind of it (simply have it eaten in the lexer). * * Everything else is a SPECIAL, which is returned directly. These are * defined in RFC 5322 as: * * '(' / ')' / '<' / '>' / '[' / ']' / ':' / ';' / '@' / '\' / ',' / '.' / * '"' * * Technically we don't return all of these; we handle () in comments, " in * quoted string handling, and \ in those handlers. */ %token ATEXT QSTRING FWS COMMENT %% /* * A list of addresses; the main entry point to the parser */ address_list: /* nothing */ | address_list ',' address ; /* * A single address; can be a single mailbox, or a group address */ address: mailbox | group ; /* * A traditional single mailbox. Either in Name or just a bare * email address with no angle brackets. */ mailbox: name_addr | addr_spec ; /* * An email address, with the angle brackets. Optionally contains a display * name in the front. The RFC says "display-name", but display-name is * defined as a phrase, so we just use that. */ name_addr: phrase angle_addr | angle_addr ; angle_addr: cfws '<' addr_spec '>' cfws | cfws '<' addr_spec '>' | '<' addr_spec '>' cfws | '<' addr_spec '>' ; /* * The group list syntax. The group list is allowed to be empty or be * spaces, so we define group_list as either being a mailbox list or * just being CFWS. mailbox_list can be empty, so that can handle the * case of nothing being between the ':' and the ';' */ group: phrase ':' group_list ';' cfws | phrase ':' group_list ';' | phrase ':' ';' ; group_list: mailbox_list | cfws ; mailbox_list: /* nothing */ | mailbox_list ',' mailbox ; addr_spec: local_part '@' domain ; local_part: dot_atom | quoted_string ; domain: dot_atom | domain_literal ; domain_literal: cfws '[' dtext_fws ']' cfws | cfws '[' dtext_fws ']' | '[' dtext_fws ']' cfws | '[' dtext_fws ']' ; /* * It was hard to make a definition of dtext and domain-literal that * exactly matched the RFC. This was the best I could come up with. */ dtext_fws: /* nothing */ | FWS ATEXT FWS | FWS ATEXT | ATEXT FWS | dtext_fws FWS ATEXT FWS | dtext_fws FWS ATEXT | dtext_fws ATEXT FWS | dtext_fws ATEXT ; phrase: word | phrase word | obs_phrase ; /* * obs-phrase is basically the same as "phrase", but after the first word * you're allowed to have a '.'. I believe this is correct. */ obs_phrase: word obs_phrase_list ; obs_phrase_list: word | '.' | obs_phrase_list word ; word: atom | quoted_string ; /* * This makes sure any comments and white space before/after the quoted string * get eaten. */ quoted_string: cfws QSTRING cfws | QSTRING cfws | cfws QSTRING | QSTRING ; atom: cfws ATEXT cfws | cfws ATEXT | ATEXT cfws | ATEXT ; /* * Making dot-atom work was a little confusing; I finally handled it by * defining "dot_atom_text" as having two or more ATEXTs separted by * '.', and defining dot_atom as allowing a single atom. */ dot_atom: atom | cfws dot_atom_text cfws | cfws dot_atom_text | dot_atom_text cfws | dot_atom_text ; dot_atom_text: ATEXT '.' ATEXT | dot_atom_text '.' ATEXT ; /* * As mentioned above, technically in the CFWS definition in the RFC allows * FWS before and after the comment. The lexer is responsible for eating * the FWS before/after comments. */ cfws: COMMENT | FWS ;