summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@khw-desktop.(none)>2010-07-03 10:12:33 -0600
committerJesse Vincent <jesse@bestpractical.com>2010-07-04 21:43:44 +0100
commit5a7fb30a54f192f9dc958d7a74add600705b96bb (patch)
tree95745f9a17dae9afbea0305e9b6ad7097dcf8d6e
parentda9dec57e250ecec9d2000bc94f516e6b3ee468c (diff)
downloadperl-5a7fb30a54f192f9dc958d7a74add600705b96bb.tar.gz
charnames: check for use bytes in vianame; efficiency
When vianame returns a chr, it now verifies that it is legal under 'use bytes'. Update .t An instance of taking of a substr of a huge string is needed only in an error leg. Move it to that leg for performance. And make the message a subroutine so will be identical whenever raised.
-rw-r--r--lib/charnames.pm19
-rw-r--r--lib/charnames.t9
2 files changed, 21 insertions, 7 deletions
diff --git a/lib/charnames.pm b/lib/charnames.pm
index da52abc7ce..25a63d88d1 100644
--- a/lib/charnames.pm
+++ b/lib/charnames.pm
@@ -450,6 +450,11 @@ sub alias (@)
}
} # alias
+sub not_legal_use_bytes_msg {
+ my ($name, $ord) = @_;
+ return sprintf("Character 0x%04x with name '$name' is above 0xFF with 'use bytes' in effect", $ord);
+}
+
sub alias_file ($)
{
my ($arg, $file) = @_;
@@ -549,9 +554,6 @@ sub lookup_name {
return "\x{FFFD}";
}
- # Get the official name in case need to output a message
- $name = substr($txt, $off[0], $off[1] - $off[0]);
-
##
## Now know where in the string the name starts.
## The code, in hex, is before that.
@@ -577,7 +579,11 @@ sub lookup_name {
# Here is compile time, "use bytes" is in effect, and the character
# won't fit in a byte
- croak sprintf("Character 0x%04x with name '$name' is above 0xFF", $ord);
+
+ # Get the official name if have one for the message
+ $name = substr($txt, $off[0], $off[1] - $off[0]) if @off;
+
+ croak not_legal_use_bytes_msg($name, $ord);
} # lookup_name
sub charnames {
@@ -730,7 +736,10 @@ sub vianame
# khw claims that this is bad. The function should return either a
# an ord or a chr for all inputs; not be bipolar. Also, under 'use
# bytes', can create a chr above 255.
- return chr CORE::hex $1;
+ my $ord = CORE::hex $1;
+ return chr $ord if $ord <= 255 || ! ((caller 0)[8] & $bytes::hint_bits);
+ carp not_legal_use_bytes_msg($arg, $ord);
+ return;
}
if (! exists $vianame{$arg}) {
diff --git a/lib/charnames.t b/lib/charnames.t
index fa132e853a..e8ce58eda0 100644
--- a/lib/charnames.t
+++ b/lib/charnames.t
@@ -159,7 +159,8 @@ sub to_bytes {
}
{
- is(sprintf("%04X", charnames::vianame("GOTHIC LETTER AHSA")), "10330");
+ cmp_ok(charnames::vianame("GOTHIC LETTER AHSA"), "==", 0x10330, "Verify vianame \\N{name} returns an ord");
+ is(charnames::vianame("U+10330"), "\x{10330}", "Verify vianame \\N{U+hex} returns a chr");
use warnings;
my $warning_count = @WARN;
ok (! defined charnames::vianame("NONE SUCH"));
@@ -167,6 +168,10 @@ sub to_bytes {
use bytes;
is(charnames::vianame("GOTHIC LETTER AHSA"), 0x10330, "Verify vianame \\N{name} is unaffected by 'use bytes'");
+ is(charnames::vianame("U+FF"), chr(0xFF), "Verify vianame \\N{U+FF} is unaffected by 'use bytes'");
+ cmp_ok($warning_count, '==', @WARN, "Verify vianame doesn't warn on legal inputs");
+ is(charnames::vianame("U+100"), undef, "Verify vianame \\N{U+100} is undef under 'use bytes'");
+ ok($warning_count == @WARN - 1 && $WARN[-1] =~ /above 0xFF/, "Verify vianame gives appropriate warning for previous test");
}
{
@@ -670,7 +675,7 @@ is($_, 'foobar');
my $names = do "unicore/Name.pl";
ok(defined $names);
my $non_ascii = native_to_latin1($names) =~ tr/\0-\177//c;
-ok(! $non_ascii, "Make sure all names are ASCII-only");
+ok(! $non_ascii, "Verify all official names are ASCII-only");
# Verify that charnames propagate to eval("")
my $evaltry = eval q[ "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}" ];