1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
use Test::More tests => 4;
use strict;
use HTML::Parser;
my $p = HTML::Parser->new(api_version => 3);
$p->report_tags("a");
my @doc;
$p->handler(start => \&a_handler, "skipped_text, text");
$p->handler(end_document => \@doc, '@{skipped_text}');
$p->parse(<<EOT)->eof;
<title>hi</title>
<h1><a href="foo">link</a></h1>
and <a foo="">some</a> text.
EOT
sub a_handler {
push(@doc, shift);
my $text = shift;
push(@doc, uc($text));
}
is(join("", @doc), <<'EOT');
<title>hi</title>
<h1><A HREF="FOO">link</a></h1>
and <A FOO="">some</a> text.
EOT
#
# Comment stripper. Interaction with "" handlers.
#
my $doc = <<EOT;
<html>text</html>
<!-- comment -->
and some more <b>text</b>.
EOT
(my $expected = $doc) =~ s/<!--.*?-->//;
$p = HTML::Parser->new(api_version => 3);
$p->handler(comment => "");
$p->handler(end_document => sub {
my $stripped = shift;
#diag $stripped;
is($stripped, $expected);
}, "skipped_text");
for (split(//, $doc)) {
$p->parse($_);
}
$p->eof;
#
# Interaction with unbroken text
#
my @x;
$p = HTML::Parser->new(api_version => 3, unbroken_text => 1);
$p->handler(text => \@x, '@{"X", skipped_text, text}');
$p->handler(end => "");
$p->handler(end_document => \@x, '@{"Y", skipped_text}');
$doc = "a a<a>b b</a>c c<x>d d</x>e";
for (split(//, $doc)) {
$p->parse($_);
}
$p->eof;
#diag join(":", @x);
is(join(":", @x), "X::a a:X:<a>:b bc c:X:<x>:d de:Y:");
#
# The crash that Chip found
#
my $skipped;
$p = HTML::Parser->new(
ignore_tags => ["foo"],
start_h => [sub {$skipped = shift}, "skipped_text"],
);
$p->parse("\x{100}<foo>");
$p->parse("plain");
$p->parse("<bar>");
$p->eof;
is($skipped, "\x{100}<foo>plain");
|