# lines, and fromlines lex lines { rl day /[A-Z][a-z][a-z]/ rl month /[A-Z][a-z][a-z]/ rl year /[0-9][0-9][0-9][0-9]/ rl time /[0-9][0-9] ':' [0-9][0-9] ( ':' [0-9][0-9] )? / rl letterZone /[A-Z][A-Z][A-Z]/ rl numZone /[+\-][0-9][0-9][0-9][0-9]/ rl zone / letterZone | numZone/ rl dayNum /[0-9 ][0-9]/ # These are the different formats of the date minus an obscure # type that has a funny string 'remote from xxx' on the end. Taken # from c-client in the imap-2000 distribution. rl date / day ' ' month ' ' dayNum ' ' time ' ' ( year | year ' ' zone | zone ' ' year ) / # From lines separate messages. We will exclude from_line from a message # body line. This will cause us to stay in message line up until an # entirely correct from line is matched. token from_line / 'From ' (any-'\n')* ' ' date '\n' / token simple_line / [^\n]* '\n' / } rl hchar /print - [ :]/ token header_name /hchar+/ token colon /':' ' '*/ token header_content / ([^\n] | '\n' [ \t])* '\n'/ token blank_line / '\n' / def header [header_name colon header_content] def message [from_line header* blank_line simple_line*] def start [message*] S: start = parse start( stdin ) print_xml( S )