# lines, and fromlines lex rl day /[A-Z][a-z][a-z]/ rl month /[A-Z][a-z][a-z]/ rl year /[0-9][0-9][0-9][0-9]/ rl time /[0-9][0-9] ':' [0-9][0-9] ( ':' [0-9][0-9] )? / rl letterZone /[A-Z][A-Z][A-Z]/ rl numZone /[+\-][0-9][0-9][0-9][0-9]/ rl zone / letterZone | numZone/ rl dayNum /[0-9 ][0-9]/ # These are the different formats of the date minus an obscure # type that has a funny string 'remote from xxx' on the end. Taken # from c-client in the imap-2000 distribution. rl date / day ' ' month ' ' dayNum ' ' time ' ' ( year | year ' ' zone | zone ' ' year ) / # From lines separate messages. We will exclude from_line from a message # body line. This will cause us to stay in message line up until an # entirely correct from line is matched. token from_line / 'From ' (any-'\n')* ' ' date '\n' / token simple_line / [^\n]* '\n' / end rl hchar /print - [ :]/ token header_name /hchar+/ token colon /':' ' '*/ token header_content / ([^\n] | '\n' [ \t])* '\n'/ token blank_line / '\n' / def header [header_name colon header_content] def message [from_line header* blank_line simple_line*] def start [message*] parse S: start[ stdin ] print( xml( S ) ) print( '\n' ) ##### IN ##### From thurston Tue Jan 2 21:16:50 2007 Return-Path: X-Spam-Level: * Received: from [109.111.71.111] (helo=twfmtr) by zifreax with smtp (Exim 4.43) id 1H1vfs-0005LN-HW; Tue, 2 Jan 2007 21:16:16 -0500 Message-ID: <459B113F.8050903@immoarthabitatge.com> X-Keywords: X-UID: 1 Content-Type: text/html; charset=ISO-8859-1 From thurston Wed Jan 3 02:35:48 2007 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.1.1 (2006-03-10) on mambo.cs.queensu.ca X-Spam-Level: ** X-Spam-Status: No, score=2.9 required=5.0 tests=BAYES_20,EXTRA_MPART_TYPE, HTML_40_50,HTML_IMAGE_ONLY_16,HTML_MESSAGE,RCVD_IN_BL_SPAMCOP_NET autolearn=no version=3.1.1 X-Bogosity: Unsure, tests=bogofilter, spamicity=0.971708, version=1.0.2 Status: RO X-UID: 2 ------=_NextPart_000_0010_01C72F11.F137BD60 charset="windows-1252" Content-Transfer-Encoding: quoted-printable ##### EXP ##### <_repeat_message>From thurston Tue Jan 2 21:16:50 2007 <_repeat_header>
Return-Path: <unknown>
X-Spam-Level: *
Received: from [109.111.71.111] (helo=twfmtr) by zifreax with smtp (Exim 4.43) id 1H1vfs-0005LN-HW; Tue, 2 Jan 2007 21:16:16 -0500
Message-ID: <459B113F.8050903@immoarthabitatge.com>
X-Keywords:
X-UID: 1
<_repeat_simple_line>Content-Type: text/html; charset=ISO-8859-1 </body> </html>
From thurston Wed Jan 3 02:35:48 2007 <_repeat_header>
Return-Path: <unknown>
X-Spam-Checker-Version: SpamAssassin 3.1.1 (2006-03-10) on mambo.cs.queensu.ca
X-Spam-Level: **
X-Spam-Status: No, score=2.9 required=5.0 tests=BAYES_20,EXTRA_MPART_TYPE, HTML_40_50,HTML_IMAGE_ONLY_16,HTML_MESSAGE,RCVD_IN_BL_SPAMCOP_NET autolearn=no version=3.1.1
X-Bogosity: Unsure, tests=bogofilter, spamicity=0.971708, version=1.0.2
Status: RO
X-UID: 2
<_repeat_simple_line>------=_NextPart_000_0010_01C72F11.F137BD60 charset="windows-1252" Content-Transfer-Encoding: quoted-printable