summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-03-09 12:37:24 -0600
committerKarl Williamson <khw@cpan.org>2015-09-14 22:38:35 -0600
commitbc37b130604215b78ec3e03d73b81cb08cfa741e (patch)
tree6539f673889e8f59392f01cfe98e527ccf48dde0
parentc79631a1c8b4ecf3ec1fc557c875d412058b3e47 (diff)
downloadperl-bc37b130604215b78ec3e03d73b81cb08cfa741e.tar.gz
PATCH [perl #120790] Unicode::UCD failure to warn on bad input
This ticket was originally because the requester did not realize the function Unicode::UCD::charscript took a code point argument instead of a chr one. It was rejected on that basis. But discussion here suggested it would be better to warn on bad input instead of just returning <undef>. It turns out that all other routines in Unicode::UCD but charscript and charblock already do warn. This commit extends that to the two outlier returns.
-rw-r--r--charclass_invlists.h2
-rw-r--r--lib/Unicode/UCD.pm6
-rw-r--r--lib/Unicode/UCD.t21
-rw-r--r--regcharclass.h2
4 files changed, 27 insertions, 4 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index adc4dd288c..a9ff0ca7b0 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -99495,7 +99495,7 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */
#endif /* EBCDIC POSIX-BC */
/* Generated from:
- * 0bca60a25eb4ccf2e04f50446db5f882322f50a9c61dc57bb806ccfc9b2e26a4 lib/Unicode/UCD.pm
+ * 40444e5cd670b57be97020416a80439abe6d08dc75b4fd29745894749183e9a0 lib/Unicode/UCD.pm
* ae98bec7e4f0564758eed81eca5015481ba32581f8a735a825b71b3bba714450 lib/unicore/ArabicShaping.txt
* 1687fe5994eb7e5c0dab8503fc2a1b3b479d91af9d3b8055941c9bd791f7d0b5 lib/unicore/BidiBrackets.txt
* 350d1302116194b0b21def287434b55c5088098fbc726e879f7420a391965643 lib/unicore/BidiMirroring.txt
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index 1854982491..56033a7591 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -5,7 +5,7 @@ use warnings;
no warnings 'surrogate'; # surrogates can be inputs to this
use charnames ();
-our $VERSION = '0.62';
+our $VERSION = '0.63';
require Exporter;
@@ -937,6 +937,9 @@ sub charblock {
elsif (exists $BLOCKS{$arg}) {
return _dclone $BLOCKS{$arg};
}
+
+ carp __PACKAGE__, "::charblock: unknown code '$arg'";
+ return;
}
=head2 B<charscript()>
@@ -1004,6 +1007,7 @@ sub charscript {
return _dclone $SCRIPTS{$arg};
}
+ carp __PACKAGE__, "::charscript: unknown code '$arg'";
return;
}
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index 22b2edbc93..83320d34a0 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -381,6 +381,15 @@ is(charblock(0x590), "Hebrew", "0x0590 - Hebrew unused charblock");
is(charscript(0x590), $unknown_script, "0x0590 - Hebrew unused charscript") if $v_unicode_version gt v3.0.1;
is(charblock(0x1FFFF), "No_Block", "0x1FFFF - unused charblock");
+{
+ my @warnings;
+ local $SIG{__WARN__} = sub { push @warnings, @_ };
+ is(charblock(chr(0x6237)), undef,
+ "Verify charblock of non-code point returns <undef>");
+ cmp_ok(scalar @warnings, '==', 1, " ... and generates 1 warning");
+ like($warnings[0], qr/unknown code/, " ... with the right text");
+}
+
my $fraction_3_4_code = sprintf("%04X", utf8::unicode_to_native(0xbe));
$cp = $fraction_3_4_code;
$charinfo = charinfo($fraction_3_4_code);
@@ -762,10 +771,20 @@ is(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)");
SKIP:
{
skip("Script property not in this release", 3) if $v_unicode_version lt v3.1.0;
+
+ {
+ my @warnings;
+ local $SIG{__WARN__} = sub { push @warnings, @_ };
+ is(charscript(chr(0x6237)), undef,
+ "Verify charscript of non-code point returns <undef>");
+ cmp_ok(scalar @warnings, '==', 1, " ... and generates 1 warning");
+ like($warnings[0], qr/unknown code/, " ... with the right text");
+ }
+
my $r1 = charscript('Latin');
if (ok(defined $r1, "Found Latin script")) {
skip("Latin range count will be wrong when using older Unicode release",
- 2) if $v_unicode_version lt $expected_version;
+ 2) if $current_version lt $expected_version;
my $n1 = @$r1;
is($n1, 31, "number of ranges in Latin script (Unicode $expected_version)") if $::IS_ASCII;
shift @$r1 while @$r1;
diff --git a/regcharclass.h b/regcharclass.h
index 5de7097942..ab653aebea 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -2472,7 +2472,7 @@
#endif /* H_REGCHARCLASS */
/* Generated from:
- * 0bca60a25eb4ccf2e04f50446db5f882322f50a9c61dc57bb806ccfc9b2e26a4 lib/Unicode/UCD.pm
+ * 40444e5cd670b57be97020416a80439abe6d08dc75b4fd29745894749183e9a0 lib/Unicode/UCD.pm
* ae98bec7e4f0564758eed81eca5015481ba32581f8a735a825b71b3bba714450 lib/unicore/ArabicShaping.txt
* 1687fe5994eb7e5c0dab8503fc2a1b3b479d91af9d3b8055941c9bd791f7d0b5 lib/unicore/BidiBrackets.txt
* 350d1302116194b0b21def287434b55c5088098fbc726e879f7420a391965643 lib/unicore/BidiMirroring.txt