PATCH [perl #120790] Unicode::UCD failure to warn on bad input

This ticket was originally because the requester did not realize the function Unicode::UCD::charscript took a code point argument instead of a chr one. It was rejected on that basis. But discussion here suggested it would be better to warn on bad input instead of just returning <undef>. It turns out that all other routines in Unicode::UCD but charscript and charblock already do warn. This commit extends that to the two outlier returns.
author: Karl Williamson <khw@cpan.org> 2015-03-09 12:37:24 -0600
committer: Karl Williamson <khw@cpan.org> 2015-09-14 22:38:35 -0600
commit: bc37b130604215b78ec3e03d73b81cb08cfa741e (patch)
tree: 6539f673889e8f59392f01cfe98e527ccf48dde0
parent: c79631a1c8b4ecf3ec1fc557c875d412058b3e47 (diff)
download: perl-bc37b130604215b78ec3e03d73b81cb08cfa741e.tar.gz
4 files changed, 27 insertions, 4 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index adc4dd288c..a9ff0ca7b0 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -99495,7 +99495,7 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */
 #endif	/* EBCDIC POSIX-BC */
 
 /* Generated from:
- * 0bca60a25eb4ccf2e04f50446db5f882322f50a9c61dc57bb806ccfc9b2e26a4 lib/Unicode/UCD.pm
+ * 40444e5cd670b57be97020416a80439abe6d08dc75b4fd29745894749183e9a0 lib/Unicode/UCD.pm
  * ae98bec7e4f0564758eed81eca5015481ba32581f8a735a825b71b3bba714450 lib/unicore/ArabicShaping.txt
  * 1687fe5994eb7e5c0dab8503fc2a1b3b479d91af9d3b8055941c9bd791f7d0b5 lib/unicore/BidiBrackets.txt
  * 350d1302116194b0b21def287434b55c5088098fbc726e879f7420a391965643 lib/unicore/BidiMirroring.txt
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index 1854982491..56033a7591 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -5,7 +5,7 @@ use warnings;
 no warnings 'surrogate';    # surrogates can be inputs to this
 use charnames ();
 
-our $VERSION = '0.62';
+our $VERSION = '0.63';
 
 require Exporter;
 
@@ -937,6 +937,9 @@ sub charblock {
     elsif (exists $BLOCKS{$arg}) {
         return _dclone $BLOCKS{$arg};
     }
+
+    carp __PACKAGE__, "::charblock: unknown code '$arg'";
+    return;
 }
 
 =head2 B<charscript()>
@@ -1004,6 +1007,7 @@ sub charscript {
         return _dclone $SCRIPTS{$arg};
     }
 
+    carp __PACKAGE__, "::charscript: unknown code '$arg'";
     return;
 }
 
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index 22b2edbc93..83320d34a0 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -381,6 +381,15 @@ is(charblock(0x590),          "Hebrew", "0x0590 - Hebrew unused charblock");
 is(charscript(0x590),         $unknown_script, "0x0590 - Hebrew unused charscript") if $v_unicode_version gt v3.0.1;
 is(charblock(0x1FFFF),        "No_Block", "0x1FFFF - unused charblock");
 
+{
+    my @warnings;
+    local $SIG{__WARN__} = sub { push @warnings, @_  };
+    is(charblock(chr(0x6237)), undef,
+        "Verify charblock of non-code point returns <undef>");
+    cmp_ok(scalar @warnings, '==', 1, "  ... and generates 1 warning");
+    like($warnings[0], qr/unknown code/, "  ... with the right text");
+}
+
 my $fraction_3_4_code = sprintf("%04X", utf8::unicode_to_native(0xbe));
 $cp = $fraction_3_4_code;
 $charinfo = charinfo($fraction_3_4_code);
@@ -762,10 +771,20 @@ is(Unicode::UCD::_getcode('U+123x'),  undef, "_getcode(x123)");
 SKIP:
 {
     skip("Script property not in this release", 3) if $v_unicode_version lt v3.1.0;
+
+    {
+        my @warnings;
+        local $SIG{__WARN__} = sub { push @warnings, @_  };
+        is(charscript(chr(0x6237)), undef,
+           "Verify charscript of non-code point returns <undef>");
+        cmp_ok(scalar @warnings, '==', 1, "  ... and generates 1 warning");
+        like($warnings[0], qr/unknown code/, "  ... with the right text");
+    }
+
     my $r1 = charscript('Latin');
     if (ok(defined $r1, "Found Latin script")) {
         skip("Latin range count will be wrong when using older Unicode release",
-             2) if $v_unicode_version lt $expected_version;
+             2) if $current_version lt $expected_version;
         my $n1 = @$r1;
         is($n1, 31, "number of ranges in Latin script (Unicode $expected_version)") if $::IS_ASCII;
         shift @$r1 while @$r1;
diff --git a/regcharclass.h b/regcharclass.h
index 5de7097942..ab653aebea 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -2472,7 +2472,7 @@
 #endif /* H_REGCHARCLASS */
 
 /* Generated from:
- * 0bca60a25eb4ccf2e04f50446db5f882322f50a9c61dc57bb806ccfc9b2e26a4 lib/Unicode/UCD.pm
+ * 40444e5cd670b57be97020416a80439abe6d08dc75b4fd29745894749183e9a0 lib/Unicode/UCD.pm
  * ae98bec7e4f0564758eed81eca5015481ba32581f8a735a825b71b3bba714450 lib/unicore/ArabicShaping.txt
  * 1687fe5994eb7e5c0dab8503fc2a1b3b479d91af9d3b8055941c9bd791f7d0b5 lib/unicore/BidiBrackets.txt
  * 350d1302116194b0b21def287434b55c5088098fbc726e879f7420a391965643 lib/unicore/BidiMirroring.txt
author	Karl Williamson <khw@cpan.org>	2015-03-09 12:37:24 -0600
committer	Karl Williamson <khw@cpan.org>	2015-09-14 22:38:35 -0600
commit	bc37b130604215b78ec3e03d73b81cb08cfa741e (patch)
tree	6539f673889e8f59392f01cfe98e527ccf48dde0
parent	c79631a1c8b4ecf3ec1fc557c875d412058b3e47 (diff)
download	perl-bc37b130604215b78ec3e03d73b81cb08cfa741e.tar.gz