use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric); use Test::More tests => 20; $a = "Våre norske tegn bør æres"; decode_entities($a); is($a, "Våre norske tegn bør æres"); encode_entities($a); is($a, "Våre norske tegn bør æres"); decode_entities($a); encode_entities_numeric($a); is($a, "Våre norske tegn bør æres"); $a = "<&>\"'"; is(encode_entities($a), "<&>"'"); is(encode_entities_numeric($a), "<&>"'"); $a = "abcdef"; is(encode_entities($a, 'a-c'), "abcdef"); $a = "[24/7]\\"; is(encode_entities($a, '/'), "[24/7]\\"); is(encode_entities($a, '\\/'), "[24/7]\\"); is(encode_entities($a, '\\'), "[24/7]\"); is(encode_entities($a, ']\\'), "[24/7]\"); # See how well it does against rfc1866... $ent = $plain = ""; while () { next unless /^\s* "Våre norske tegn bør æres" ); my ($got, $eval_ok); $eval_ok= eval { $got= decode_entities((keys %hash)[0]); 1 }; is( $eval_ok, 1, "decode_entitites() when processing a key as input"); is( $got, (values %hash)[0], "decode_entities() decodes a key properly"); } # From: Bill Simpson-Young # Subject: HTML entities problem with 5.11 # To: libwww-perl@ics.uci.edu # Date: Fri, 05 Sep 1997 16:56:55 +1000 # Message-Id: <199709050657.QAA10089@snowy.nsw.cmis.CSIRO.AU> # # Hi. I've got a problem that has surfaced with the changes to # HTML::Entities.pm for 5.11 (it doesn't happen with 5.08). It's happening # in the process of encoding then decoding special entities. Eg, what goes # in as "abc&def&ghi" comes out as "abc&def;&ghi;". is(decode_entities("abc&def&ghi&abc;&def;"), "abc&def&ghi&abc;&def;"); # Decoding of ' is(decode_entities("'"), "'"); is(encode_entities("'", "'"), "'"); is(decode_entities("Attention Homeοωnөrs...1ѕt Tімe Eνөг"), "Attention Home\x{3BF}\x{3C9}n\x{4E9}rs...1\x{455}t T\x{456}\x{43C}e E\x{3BD}\x{4E9}\x{433}"); is(decode_entities("{&amp;&amp;& also Яœ}"), "{&&& also \x{42F}\x{153}}"); __END__ # Quoted from rfc1866.txt 14. Proposed Entities The HTML DTD references the "Added Latin 1" entity set, which only supplies named entities for a subset of the non-ASCII characters in [ISO-8859-1], namely the accented characters. The following entities should be supported so that all ISO 8859-1 characters may only be referenced symbolically. The names for these entities are taken from the appendixes of [SGML]. Berners-Lee & Connolly Standards Track [Page 75] RFC 1866 Hypertext Markup Language - 2.0 November 1995 Berners-Lee & Connolly Standards Track [Page 76] RFC 1866 Hypertext Markup Language - 2.0 November 1995