diff options
Diffstat (limited to 't/unicode-bom.t')
-rw-r--r-- | t/unicode-bom.t | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/t/unicode-bom.t b/t/unicode-bom.t new file mode 100644 index 0000000..b7398cf --- /dev/null +++ b/t/unicode-bom.t @@ -0,0 +1,63 @@ +#!perl -w + +use strict; +use Test::More tests => 2; +use HTML::Parser; + +SKIP: { +skip "This perl does not support Unicode", 2 if $] < 5.008; + +my @parsed; +my $p = HTML::Parser->new( + api_version => 3, + start_h => [\@parsed, 'tag, attr'], +); + +my @warn; +$SIG{__WARN__} = sub { + push(@warn, $_[0]); +}; + +$p->parse("\xEF\xBB\xBF<head>Hi there</head>"); +$p->eof; + +#use Encode; +$p->parse("\xEF\xBB\xBF<head>Hi there</head>" . chr(0x263A)); +$p->eof; + +$p->parse("\xFF\xFE<head>Hi there</head>"); +$p->eof; + +$p->parse("\xFE\xFF<head>Hi there</head>"); +$p->eof; + +$p->parse("\0\0\xFF\xFE<head>Hi there</head>"); +$p->eof; + +$p->parse("\xFE\xFF\0\0<head>Hi there</head>"); +$p->eof; + +for (@warn) { + s/line (\d+)/line ##/g; +} + +is(join("", @warn), <<EOT); +Parsing of undecoded UTF-8 will give garbage when decoding entities at $0 line ##. +Parsing of undecoded UTF-8 will give garbage when decoding entities at $0 line ##. +Parsing of undecoded UTF-16 at $0 line ##. +Parsing of undecoded UTF-16 at $0 line ##. +Parsing of undecoded UTF-32 at $0 line ##. +Parsing of undecoded UTF-32 at $0 line ##. +EOT + +@warn = (); + +$p = HTML::Parser->new( + api_version => 3, + start_h => [\@parsed, 'tag'], +); + +$p->parse("\xEF\xBB\xBF<head>Hi there</head>"); +$p->eof; +ok(!@warn); +} |