1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
##### LM #####
# lines, and fromlines
lex
rl day /[A-Z][a-z][a-z]/
rl month /[A-Z][a-z][a-z]/
rl year /[0-9][0-9][0-9][0-9]/
rl time /[0-9][0-9] ':' [0-9][0-9] ( ':' [0-9][0-9] )? /
rl letterZone /[A-Z][A-Z][A-Z]/
rl numZone /[+\-][0-9][0-9][0-9][0-9]/
rl zone / letterZone | numZone/
rl dayNum /[0-9 ][0-9]/
# These are the different formats of the date minus an obscure
# type that has a funny string 'remote from xxx' on the end. Taken
# from c-client in the imap-2000 distribution.
rl date / day ' ' month ' ' dayNum ' ' time ' '
( year | year ' ' zone | zone ' ' year ) /
# From lines separate messages. We will exclude from_line from a message
# body line. This will cause us to stay in message line up until an
# entirely correct from line is matched.
token from_line / 'From ' (any-'\n')* ' ' date '\n' /
token simple_line / [^\n]* '\n' /
end
rl hchar /print - [ :]/
token header_name /hchar+/
token colon /':' ' '*/
token header_content / ([^\n] | '\n' [ \t])* '\n'/
token blank_line / '\n' /
def header
[header_name colon header_content]
def message
[from_line header* blank_line simple_line*]
def start
[message*]
parse S: start[ stdin ]
print_xml( S )
print( '\n' )
##### IN #####
From thurston Tue Jan 2 21:16:50 2007
Return-Path: <unknown>
X-Spam-Level: *
Received: from [109.111.71.111] (helo=twfmtr)
by zifreax with smtp (Exim 4.43)
id 1H1vfs-0005LN-HW; Tue, 2 Jan 2007 21:16:16 -0500
Message-ID: <459B113F.8050903@immoarthabitatge.com>
X-Keywords:
X-UID: 1
Content-Type: text/html; charset=ISO-8859-1
</body>
</html>
From thurston Wed Jan 3 02:35:48 2007
Return-Path: <unknown>
X-Spam-Checker-Version: SpamAssassin 3.1.1 (2006-03-10) on mambo.cs.queensu.ca
X-Spam-Level: **
X-Spam-Status: No, score=2.9 required=5.0 tests=BAYES_20,EXTRA_MPART_TYPE,
HTML_40_50,HTML_IMAGE_ONLY_16,HTML_MESSAGE,RCVD_IN_BL_SPAMCOP_NET
autolearn=no version=3.1.1
X-Bogosity: Unsure, tests=bogofilter, spamicity=0.971708, version=1.0.2
Status: RO
X-UID: 2
------=_NextPart_000_0010_01C72F11.F137BD60
charset="windows-1252"
Content-Transfer-Encoding: quoted-printable
##### EXP #####
<start><_repeat_message><message><from_line>From thurston Tue Jan 2 21:16:50 2007
</from_line><_repeat_header><header><header_name>Return-Path</header_name><colon>: </colon><header_content><unknown>
</header_content></header><header><header_name>X-Spam-Level</header_name><colon>: </colon><header_content>*
</header_content></header><header><header_name>Received</header_name><colon>: </colon><header_content>from [109.111.71.111] (helo=twfmtr)
by zifreax with smtp (Exim 4.43)
id 1H1vfs-0005LN-HW; Tue, 2 Jan 2007 21:16:16 -0500
</header_content></header><header><header_name>Message-ID</header_name><colon>: </colon><header_content><459B113F.8050903@immoarthabitatge.com>
</header_content></header><header><header_name>X-Keywords</header_name><colon>: </colon><header_content>
</header_content></header><header><header_name>X-UID</header_name><colon>: </colon><header_content>1
</header_content></header></_repeat_header><blank_line>
</blank_line><_repeat_simple_line><simple_line>Content-Type: text/html; charset=ISO-8859-1
</simple_line><simple_line></body>
</simple_line><simple_line></html>
</simple_line><simple_line>
</simple_line></_repeat_simple_line></message><message><from_line>From thurston Wed Jan 3 02:35:48 2007
</from_line><_repeat_header><header><header_name>Return-Path</header_name><colon>: </colon><header_content><unknown>
</header_content></header><header><header_name>X-Spam-Checker-Version</header_name><colon>: </colon><header_content>SpamAssassin 3.1.1 (2006-03-10) on mambo.cs.queensu.ca
</header_content></header><header><header_name>X-Spam-Level</header_name><colon>: </colon><header_content>**
</header_content></header><header><header_name>X-Spam-Status</header_name><colon>: </colon><header_content>No, score=2.9 required=5.0 tests=BAYES_20,EXTRA_MPART_TYPE,
HTML_40_50,HTML_IMAGE_ONLY_16,HTML_MESSAGE,RCVD_IN_BL_SPAMCOP_NET
autolearn=no version=3.1.1
</header_content></header><header><header_name>X-Bogosity</header_name><colon>: </colon><header_content>Unsure, tests=bogofilter, spamicity=0.971708, version=1.0.2
</header_content></header><header><header_name>Status</header_name><colon>: </colon><header_content>RO
</header_content></header><header><header_name>X-UID</header_name><colon>: </colon><header_content>2
</header_content></header></_repeat_header><blank_line>
</blank_line><_repeat_simple_line><simple_line>------=_NextPart_000_0010_01C72F11.F137BD60
</simple_line><simple_line> charset="windows-1252"
</simple_line><simple_line>Content-Transfer-Encoding: quoted-printable
</simple_line><simple_line>
</simple_line></_repeat_simple_line></message></_repeat_message></start>
|