# Test Unicode entities use HTML::Entities; use Test::More tests => 26; SKIP: { skip "This perl does not support Unicode or Unicode entities not selected", 27 if $] < 5.008 || !&HTML::Entities::UNICODE_SUPPORT; is(decode_entities("&euro"), "&euro"); is(decode_entities("€"), "\x{20AC}"); is(decode_entities("å"), "å"); is(decode_entities("å"), "å"); is(decode_entities("񺄠"), chr(500000)); is(decode_entities("􏿽"), "\x{10FFFD}"); is(decode_entities(""), "\x{FFFC}"); is(decode_entities("﷐"), "\x{FFFD}"); is(decode_entities("﷑"), "\x{FFFD}"); is(decode_entities("﷠"), "\x{FFFD}"); is(decode_entities("﷯"), "\x{FFFD}"); is(decode_entities("￿"), "￿"); is(decode_entities("􏿿"), "\x{FFFD}"); is(decode_entities("�"), "�"); is(decode_entities("�"), "�"); is(decode_entities("�"), "�"); is(decode_entities("�"), "�"); is(decode_entities("�"), "�"); is(decode_entities("�"), "�"); is(decode_entities("&#ååå࿿"), "&#ååå\x{FFF}"); # This might fail when we get more than 64 bit UVs is(decode_entities("�"), "�"); is(decode_entities("�"), "�"); my $err; for ([32, 48], [120, 169], [240, 250], [250, 260], [965, 975], [3000, 3005]) { my $a = join("", map chr, $_->[0] .. $_->[1]); my $e = encode_entities($a); my $d = decode_entities($e); unless ($d eq $a) { diag "Wrong decoding in range $_->[0] .. $_->[1]"; # use Devel::Peek; Dump($a); Dump($d); $err++; } } ok(!$err); is(decode_entities("��"), chr(0x100085)); is(decode_entities("�"), chr(0xFFFD)); is(decode_entities("\260’\260"), "\x{b0}\x{2019}\x{b0}"); }