1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
use Test::More;
require HTML::Parser;
package P; @ISA = qw(HTML::Parser);
my @result;
sub start
{
my($self, $tag, $attr) = @_;
push @result, "START[$tag]";
for (sort keys %$attr) {
push @result, "\t$_: " . $attr->{$_};
}
$start++;
}
sub end
{
my($self, $tag) = @_;
push @result, "END[$tag]";
$end++;
}
sub text
{
my $self = shift;
push @result, "TEXT[$_[0]]";
$text++;
}
sub comment
{
my $self = shift;
push @result, "COMMENT[$_[0]]";
$comment++;
}
sub declaration
{
my $self = shift;
push @result, "DECLARATION[$_[0]]";
$declaration++;
}
package main;
@tests =
(
'<a ">' => ['START[a]', "\t\": \""],
'<a/>' => ['START[a/]',],
'<a />' => ['START[a]', "\t/: /"],
'<a a/>' => ['START[a]', "\ta/: a/"],
'<a a/=/>' => ['START[a]', "\ta/: /"],
'<a x="foo bar">' => ['START[a]', "\tx: foo\xA0bar"],
'<a x="foo bar">' => ['START[a]', "\tx: foo bar"],
'<å >' => ['TEXT[<å]', 'TEXT[ >]'],
'2 < 5' => ['TEXT[2 ]', 'TEXT[<]', 'TEXT[ 5]'],
'2 <5> 2' => ['TEXT[2 ]', 'TEXT[<5>]', 'TEXT[ 2]'],
'2 <a' => ['TEXT[2 ]', 'TEXT[<a]'],
'2 <a> 2' => ['TEXT[2 ]', 'START[a]', 'TEXT[ 2]'],
'2 <a href=foo' => ['TEXT[2 ]', 'TEXT[<a href=foo]'],
"2 <a href='foo bar'> 2" =>
['TEXT[2 ]', 'START[a]', "\thref: foo bar", 'TEXT[ 2]'],
'2 <a href=foo bar> 2' =>
['TEXT[2 ]', 'START[a]', "\tbar: bar", "\thref: foo", 'TEXT[ 2]'],
'2 <a href="foo bar"> 2' =>
['TEXT[2 ]', 'START[a]', "\thref: foo bar", 'TEXT[ 2]'],
'2 <a href="foo\'bar"> 2' =>
['TEXT[2 ]', 'START[a]', "\thref: foo'bar", 'TEXT[ 2]'],
"2 <a href='foo\"bar'> 2" =>
['TEXT[2 ]', 'START[a]', "\thref: foo\"bar", 'TEXT[ 2]'],
"2 <a href='foo"bar'> 2" =>
['TEXT[2 ]', 'START[a]', "\thref: foo\"bar", 'TEXT[ 2]'],
'2 <a.b> 2' => ['TEXT[2 ]', 'START[a.b]', 'TEXT[ 2]'],
'2 <a.b-12 a.b = 2 a> 2' =>
['TEXT[2 ]', 'START[a.b-12]', "\ta: a", "\ta.b: 2", 'TEXT[ 2]'],
'2 <a_b> 2' => ['TEXT[2 ]', 'START[a_b]', 'TEXT[ 2]'],
'<!ENTITY nbsp CDATA " " -- no-break space -->' =>
['DECLARATION[ENTITY nbsp CDATA " " -- no-break space --]'],
'<!-- comment -->' => ['COMMENT[ comment ]'],
'<!-- comment -- --- comment -->' =>
['COMMENT[ comment ]', 'COMMENT[- comment ]'],
'<!-- comment <!-- not comment --> comment -->' =>
['COMMENT[ comment <!]', 'COMMENT[> comment ]'],
'<!-- <a href="foo"> -->' => ['COMMENT[ <a href="foo"> ]'],
);
plan tests => @tests / 2;
my $i = 0;
TEST:
while (@tests) {
++$i;
my ($html, $expected) = splice @tests, 0, 2;
@result = ();
$p = new P;
$p->strict_comment(1);
$p->parse($html)->eof;
ok(eq_array($expected, \@result)) or diag("Expected: @$expected\n",
"Got: @result\n");
}
|