diff options
author | Karl Williamson <khw@khw-desktop.(none)> | 2010-07-03 10:12:33 -0600 |
---|---|---|
committer | Jesse Vincent <jesse@bestpractical.com> | 2010-07-04 21:43:44 +0100 |
commit | 5a7fb30a54f192f9dc958d7a74add600705b96bb (patch) | |
tree | 95745f9a17dae9afbea0305e9b6ad7097dcf8d6e | |
parent | da9dec57e250ecec9d2000bc94f516e6b3ee468c (diff) | |
download | perl-5a7fb30a54f192f9dc958d7a74add600705b96bb.tar.gz |
charnames: check for use bytes in vianame; efficiency
When vianame returns a chr, it now verifies that it is legal under 'use
bytes'. Update .t
An instance of taking of a substr of a huge string is needed only in an
error leg. Move it to that leg for performance.
And make the message a subroutine so will be identical whenever raised.
-rw-r--r-- | lib/charnames.pm | 19 | ||||
-rw-r--r-- | lib/charnames.t | 9 |
2 files changed, 21 insertions, 7 deletions
diff --git a/lib/charnames.pm b/lib/charnames.pm index da52abc7ce..25a63d88d1 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -450,6 +450,11 @@ sub alias (@) } } # alias +sub not_legal_use_bytes_msg { + my ($name, $ord) = @_; + return sprintf("Character 0x%04x with name '$name' is above 0xFF with 'use bytes' in effect", $ord); +} + sub alias_file ($) { my ($arg, $file) = @_; @@ -549,9 +554,6 @@ sub lookup_name { return "\x{FFFD}"; } - # Get the official name in case need to output a message - $name = substr($txt, $off[0], $off[1] - $off[0]); - ## ## Now know where in the string the name starts. ## The code, in hex, is before that. @@ -577,7 +579,11 @@ sub lookup_name { # Here is compile time, "use bytes" is in effect, and the character # won't fit in a byte - croak sprintf("Character 0x%04x with name '$name' is above 0xFF", $ord); + + # Get the official name if have one for the message + $name = substr($txt, $off[0], $off[1] - $off[0]) if @off; + + croak not_legal_use_bytes_msg($name, $ord); } # lookup_name sub charnames { @@ -730,7 +736,10 @@ sub vianame # khw claims that this is bad. The function should return either a # an ord or a chr for all inputs; not be bipolar. Also, under 'use # bytes', can create a chr above 255. - return chr CORE::hex $1; + my $ord = CORE::hex $1; + return chr $ord if $ord <= 255 || ! ((caller 0)[8] & $bytes::hint_bits); + carp not_legal_use_bytes_msg($arg, $ord); + return; } if (! exists $vianame{$arg}) { diff --git a/lib/charnames.t b/lib/charnames.t index fa132e853a..e8ce58eda0 100644 --- a/lib/charnames.t +++ b/lib/charnames.t @@ -159,7 +159,8 @@ sub to_bytes { } { - is(sprintf("%04X", charnames::vianame("GOTHIC LETTER AHSA")), "10330"); + cmp_ok(charnames::vianame("GOTHIC LETTER AHSA"), "==", 0x10330, "Verify vianame \\N{name} returns an ord"); + is(charnames::vianame("U+10330"), "\x{10330}", "Verify vianame \\N{U+hex} returns a chr"); use warnings; my $warning_count = @WARN; ok (! defined charnames::vianame("NONE SUCH")); @@ -167,6 +168,10 @@ sub to_bytes { use bytes; is(charnames::vianame("GOTHIC LETTER AHSA"), 0x10330, "Verify vianame \\N{name} is unaffected by 'use bytes'"); + is(charnames::vianame("U+FF"), chr(0xFF), "Verify vianame \\N{U+FF} is unaffected by 'use bytes'"); + cmp_ok($warning_count, '==', @WARN, "Verify vianame doesn't warn on legal inputs"); + is(charnames::vianame("U+100"), undef, "Verify vianame \\N{U+100} is undef under 'use bytes'"); + ok($warning_count == @WARN - 1 && $WARN[-1] =~ /above 0xFF/, "Verify vianame gives appropriate warning for previous test"); } { @@ -670,7 +675,7 @@ is($_, 'foobar'); my $names = do "unicore/Name.pl"; ok(defined $names); my $non_ascii = native_to_latin1($names) =~ tr/\0-\177//c; -ok(! $non_ascii, "Make sure all names are ASCII-only"); +ok(! $non_ascii, "Verify all official names are ASCII-only"); # Verify that charnames propagate to eval("") my $evaltry = eval q[ "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}" ]; |