use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric);
use Test::More tests => 20;
$a = "Våre norske tegn bør æres";
decode_entities($a);
is($a, "Våre norske tegn bør æres");
encode_entities($a);
is($a, "Våre norske tegn bør æres");
decode_entities($a);
encode_entities_numeric($a);
is($a, "Våre norske tegn bør æres");
$a = "<&>\"'";
is(encode_entities($a), "<&>"'");
is(encode_entities_numeric($a), "<&>"'");
$a = "abcdef";
is(encode_entities($a, 'a-c'), "abcdef");
$a = "[24/7]\\";
is(encode_entities($a, '/'), "[24/7]\\");
is(encode_entities($a, '\\/'), "[24/7]\\");
is(encode_entities($a, '\\'), "[24/7]\");
is(encode_entities($a, ']\\'), "[24/7]\");
# See how well it does against rfc1866...
$ent = $plain = "";
while () {
next unless /^\s* "Våre norske tegn bør æres"
);
my ($got, $eval_ok);
$eval_ok= eval { $got= decode_entities((keys %hash)[0]); 1 };
is( $eval_ok, 1, "decode_entitites() when processing a key as input");
is( $got, (values %hash)[0], "decode_entities() decodes a key properly");
}
# From: Bill Simpson-Young
# Subject: HTML entities problem with 5.11
# To: libwww-perl@ics.uci.edu
# Date: Fri, 05 Sep 1997 16:56:55 +1000
# Message-Id: <199709050657.QAA10089@snowy.nsw.cmis.CSIRO.AU>
#
# Hi. I've got a problem that has surfaced with the changes to
# HTML::Entities.pm for 5.11 (it doesn't happen with 5.08). It's happening
# in the process of encoding then decoding special entities. Eg, what goes
# in as "abc&def&ghi" comes out as "abc&def;&ghi;".
is(decode_entities("abc&def&ghi&abc;&def;"), "abc&def&ghi&abc;&def;");
# Decoding of '
is(decode_entities("'"), "'");
is(encode_entities("'", "'"), "'");
is(decode_entities("Attention Homeοωnөrs...1ѕt Tімe Eνөг"),
"Attention Home\x{3BF}\x{3C9}n\x{4E9}rs...1\x{455}t T\x{456}\x{43C}e E\x{3BD}\x{4E9}\x{433}");
is(decode_entities("{&&& also Яœ}"),
"{&&& also \x{42F}\x{153}}");
__END__
# Quoted from rfc1866.txt
14. Proposed Entities
The HTML DTD references the "Added Latin 1" entity set, which only
supplies named entities for a subset of the non-ASCII characters in
[ISO-8859-1], namely the accented characters. The following entities
should be supported so that all ISO 8859-1 characters may only be
referenced symbolically. The names for these entities are taken from
the appendixes of [SGML].
Berners-Lee & Connolly Standards Track [Page 75]
RFC 1866 Hypertext Markup Language - 2.0 November 1995
Berners-Lee & Connolly Standards Track [Page 76]
RFC 1866 Hypertext Markup Language - 2.0 November 1995