lex start { ignore /[\t\n ]+/ literal '^', '|', '-', ',', ':', '!', '?', '.' literal '(', ')', '{', '}', '*', '&', '+' literal '--', ':>', ':>>', '<:', '->', '**' token word /[a-zA-Z_][a-zA-Z0-9_]*/ token uint /[0-9]+/ } def start [expression] { print_xml( lhs ) } def expression [expression '|' term] | [expression '&' term] | [expression '-' term] | [expression '--' term] | [term] def term [factor_with_rep more_term] # Can resolve the ambiguity by making more_term shortest match. def more_term [] | [factor_with_rep more_term] | ['.' factor_with_rep more_term] | [':>' factor_with_rep more_term] | [':>>' factor_with_rep more_term] | ['<:' factor_with_rep more_term] def factor_with_rep [factor_with_rep '*'] | [factor_with_rep '**'] | [factor_with_rep '?'] | [factor_with_rep '+'] | [factor_with_rep '{' factor_rep_num '}'] | [factor_with_rep '{' ',' factor_rep_num '}'] | [factor_with_rep '{' factor_rep_num ',' '}'] | [factor_with_rep '{' factor_rep_num ',' factor_rep_num '}'] | [factor_with_neg] def factor_rep_num [uint] def factor_with_neg ['!' factor_with_neg] | ['^' factor_with_neg] | [factor] def factor [alphabet_num] | [word] | ['(' expression ')'] def alphabet_num [uint] | ['-' uint]