blob: 883c61f0456d980bb15a8fb3e49af9d2ce2d64dc (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
|
#!perl -w
use strict;
use Test::More tests => 2;
use HTML::Parser ();
my $dtext = "";
my $text = "";
sub append
{
$dtext .= shift;
$text .= shift;
}
my $p = HTML::Parser->new(text_h => [\&append, "dtext, text"],
default_h => [\&append, "text, text" ],
);
my $doc = <<'EOT';
<title>å</title>
<a href="fooå">ååAA<A>AA</a>
<?å>
foo bar
foo bar
&xyzzy
&xyzzy;
<!-- � -->

ÿ
ÿ
ÿG
<!-- Ā -->
�
�
&
&#
&#x
<xmp>å</xmp>
<script>å</script>
<ScRIPT>å</scRIPT>
<skript>å</script>
EOT
$p->parse($doc)->eof;
is($text, $doc);
is($dtext, <<"EOT");
<title>ċ</title>
<a href="fooå">ċċAA<A>AA</a>
<?å>
foo\240bar
foo\240bar
&xyzzy
&xyzzy;
<!-- � -->
\1
\377
\377
\377G
<!-- Ā -->
�
�
&
&#
&#x
<xmp>å</xmp>
<script>å</script>
<ScRIPT>å</scRIPT>
<skript>ċ</script>
EOT
|