summaryrefslogtreecommitdiff
path: root/t/pullparser.t
blob: 80a186b399527d545424fe7901a22de6629121f9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
use Test::More tests => 3;

use HTML::PullParser;

my $doc = <<'EOT';
<title>Title</title>
<style> h1 { background: white }
<foo>
</style>
<H1 ID="3">Heading</H1>
<!-- ignore this -->

This is a text with a <A HREF="http://www.sol.no" name="l1">link</a>.
EOT

my $p = HTML::PullParser->new(doc   => $doc,
			      start => 'event,tagname,@attr',
                              end   => 'event,tagname',
			      text  => 'event,dtext',

                              ignore_elements         => [qw(script style)],
			      unbroken_text           => 1,
			      boolean_attribute_value => 1,
			     );

my $t = $p->get_token;
is($t->[0], "start");
is($t->[1], "title");
$p->unget_token($t);

my @a;
while (my $t = $p->get_token) {
    for (@$t) {
	s/\s/./g;
    }
    push(@a, join("|", @$t));
}

my $res = join("\n", @a, "");
#diag $res;
is($res, <<'EOT');
start|title
text|Title
end|title
text|..
start|h1|id|3
text|Heading
end|h1
text|...This.is.a.text.with.a.
start|a|href|http://www.sol.no|name|l1
text|link
end|a
text|..
EOT