summaryrefslogtreecommitdiff
path: root/test/mailbox.lm
blob: ca3b9a9ce3273b83125d26b7dac07a13b9f52f70 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
##### LM #####

# lines, and fromlines 
lex
	rl day /[A-Z][a-z][a-z]/
	rl month /[A-Z][a-z][a-z]/
	rl year /[0-9][0-9][0-9][0-9]/
	rl time /[0-9][0-9] ':' [0-9][0-9]  ( ':' [0-9][0-9] )? /
	rl letterZone /[A-Z][A-Z][A-Z]/
	rl numZone /[+\-][0-9][0-9][0-9][0-9]/
	rl zone / letterZone | numZone/
	rl dayNum /[0-9 ][0-9]/

	# These are the different formats of the date minus an obscure
	# type that has a funny string 'remote from xxx' on the end. Taken
	# from c-client in the imap-2000 distribution.
	rl date / day ' ' month ' ' dayNum ' ' time ' '
		( year | year ' ' zone | zone ' ' year ) /

	# From lines separate messages. We will exclude from_line from a message
	# body line.  This will cause us to stay in message line up until an
	# entirely correct from line is matched.
	token from_line / 'From ' (any-'\n')* ' ' date '\n' /
	token simple_line / [^\n]* '\n' /
end

rl hchar /print - [ :]/
token header_name /hchar+/

token colon /':' ' '*/
token header_content / ([^\n] | '\n' [ \t])* '\n'/
token blank_line / '\n' /

def header 
	[header_name colon header_content]

def message 
	[from_line header* blank_line simple_line*]

def start 
	[message*]

parse S: start[ stdin ]
print_xml( S )
print( '\n' )
##### IN #####
From thurston  Tue Jan  2 21:16:50 2007
Return-Path: <unknown>
X-Spam-Level: *
Received: from [109.111.71.111] (helo=twfmtr)
	by zifreax with smtp (Exim 4.43)
	id 1H1vfs-0005LN-HW; Tue, 2 Jan 2007 21:16:16 -0500
Message-ID: <459B113F.8050903@immoarthabitatge.com>
X-Keywords:                   
X-UID: 1

Content-Type: text/html; charset=ISO-8859-1
</body>
</html>

From thurston  Wed Jan  3 02:35:48 2007
Return-Path: <unknown>
X-Spam-Checker-Version: SpamAssassin 3.1.1 (2006-03-10) on mambo.cs.queensu.ca
X-Spam-Level: **
X-Spam-Status: No, score=2.9 required=5.0 tests=BAYES_20,EXTRA_MPART_TYPE,
	HTML_40_50,HTML_IMAGE_ONLY_16,HTML_MESSAGE,RCVD_IN_BL_SPAMCOP_NET 
	autolearn=no version=3.1.1
X-Bogosity: Unsure, tests=bogofilter, spamicity=0.971708, version=1.0.2
Status: RO
X-UID: 2

------=_NextPart_000_0010_01C72F11.F137BD60
	charset="windows-1252"
Content-Transfer-Encoding: quoted-printable

##### EXP #####
<start><_repeat_message><message><from_line>From thurston  Tue Jan  2 21:16:50 2007
</from_line><_repeat_header><header><header_name>Return-Path</header_name><colon>: </colon><header_content>&lt;unknown&gt;
</header_content></header><header><header_name>X-Spam-Level</header_name><colon>: </colon><header_content>*
</header_content></header><header><header_name>Received</header_name><colon>: </colon><header_content>from [109.111.71.111] (helo=twfmtr)
	by zifreax with smtp (Exim 4.43)
	id 1H1vfs-0005LN-HW; Tue, 2 Jan 2007 21:16:16 -0500
</header_content></header><header><header_name>Message-ID</header_name><colon>: </colon><header_content>&lt;459B113F.8050903@immoarthabitatge.com&gt;
</header_content></header><header><header_name>X-Keywords</header_name><colon>:                   </colon><header_content>
</header_content></header><header><header_name>X-UID</header_name><colon>: </colon><header_content>1
</header_content></header></_repeat_header><blank_line>
</blank_line><_repeat_simple_line><simple_line>Content-Type: text/html; charset=ISO-8859-1
</simple_line><simple_line>&lt;/body&gt;
</simple_line><simple_line>&lt;/html&gt;
</simple_line><simple_line>
</simple_line></_repeat_simple_line></message><message><from_line>From thurston  Wed Jan  3 02:35:48 2007
</from_line><_repeat_header><header><header_name>Return-Path</header_name><colon>: </colon><header_content>&lt;unknown&gt;
</header_content></header><header><header_name>X-Spam-Checker-Version</header_name><colon>: </colon><header_content>SpamAssassin 3.1.1 (2006-03-10) on mambo.cs.queensu.ca
</header_content></header><header><header_name>X-Spam-Level</header_name><colon>: </colon><header_content>**
</header_content></header><header><header_name>X-Spam-Status</header_name><colon>: </colon><header_content>No, score=2.9 required=5.0 tests=BAYES_20,EXTRA_MPART_TYPE,
	HTML_40_50,HTML_IMAGE_ONLY_16,HTML_MESSAGE,RCVD_IN_BL_SPAMCOP_NET 
	autolearn=no version=3.1.1
</header_content></header><header><header_name>X-Bogosity</header_name><colon>: </colon><header_content>Unsure, tests=bogofilter, spamicity=0.971708, version=1.0.2
</header_content></header><header><header_name>Status</header_name><colon>: </colon><header_content>RO
</header_content></header><header><header_name>X-UID</header_name><colon>: </colon><header_content>2
</header_content></header></_repeat_header><blank_line>
</blank_line><_repeat_simple_line><simple_line>------=_NextPart_000_0010_01C72F11.F137BD60
</simple_line><simple_line>	charset="windows-1252"
</simple_line><simple_line>Content-Transfer-Encoding: quoted-printable
</simple_line><simple_line>
</simple_line></_repeat_simple_line></message></_repeat_message></start>