summaryrefslogtreecommitdiff
path: root/lib/charnames.t
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2020-02-15 14:28:32 -0700
committerKarl Williamson <khw@cpan.org>2020-03-11 09:00:04 -0600
commitb1a91f306fb94c677de276c489b16fb5c5664544 (patch)
treef4ab6c438f8ff72189ada40a41432c44376efc05 /lib/charnames.t
parentffd8e5150761a7856dacc0c140fbd618377d71eb (diff)
downloadperl-b1a91f306fb94c677de276c489b16fb5c5664544.tar.gz
Implement \p{Name=/.../} wildcards
This commit adds wildcard subpatterns for the Name and Name Aliases properties.
Diffstat (limited to 'lib/charnames.t')
-rw-r--r--lib/charnames.t40
1 files changed, 38 insertions, 2 deletions
diff --git a/lib/charnames.t b/lib/charnames.t
index 2bcf13d444..330192991c 100644
--- a/lib/charnames.t
+++ b/lib/charnames.t
@@ -10,8 +10,8 @@ my $run_slow_tests = $ENV{PERL_RUN_SLOW_TESTS} || 0;
my $RUN_SLOW_TESTS_EVERY_CODE_POINT = 100;
# If $ENV{PERL_RUN_SLOW_TESTS} is at least 1 and less than the number above,
-# all code points with names are tested. If it is at least that number, all
-# 1,114,112 Unicode code points are tested.
+# all code points with names are tested, including wildcard search names. If
+# it is at least that number, all 1,114,112 Unicode code points are tested.
# Because \N{} is compile time, any warnings will get generated before
# execution, so have to have an array, and arrange things so no warning
@@ -114,6 +114,7 @@ sub get_loose_name ($) { # Modify name to stress the loose tests.
}
sub test_vianame ($$$) {
+ CORE::state $wildcard_count = 0;
# Run the vianame tests on a code point, both loose and full
@@ -126,23 +127,54 @@ sub test_vianame ($$$) {
# Get a copy of the name modified to stress the loose tests.
my $loose_name = get_loose_name($name);
+ my $right_anchor;
+
# Switch loose and full in vianame vs string_vianame half the time
if (rand() < .5) {
use charnames ":full";
$all_pass &= is(charnames::vianame($name), $i, "Verify vianame(\"$name\") is 0x$hex");
use charnames ":loose";
$all_pass &= is(charnames::string_vianame($loose_name), chr($i), "Verify string_vianame(\"$loose_name\") is chr(0x$hex)");
+ $right_anchor = '\\Z';
}
else {
use charnames ":loose";
$all_pass &= is(charnames::vianame($loose_name), $i, "Verify vianame(\"$loose_name\") is 0x$hex");
use charnames ":full";
$all_pass &= is(charnames::string_vianame($name), chr($i), "Verify string_vianame(\"$name\") is chr(0x$hex)");
+ $right_anchor = '\\z';
}
+ my $left_anchor = (rand() < .5) ? '^' : '\\A';
+
# \p{name=} is always loose matching
$all_pass &= like(chr($i), qr/^\p{name=$loose_name}$/, "Verify /\p{name=$loose_name}/ matches chr(0x$hex)");
+ $wildcard_count++;
+
+ # Because wildcard name matching is so real-time intensive, do it less
+ # frequently than the others
+ if ($wildcard_count >= 10) {
+ $wildcard_count = 0;
+
+ # A few control characters have anomalous names containing
+ # parentheses, which need to be escaped.
+ my $name_ref = \$name;
+ my $mod_name;
+ if ($i <= 0x85) { # NEL in ASCII; affected controls are lower than
+ # this in EBCDIC
+ $mod_name = $name =~ s/([()])/\\$1/gr;
+ $name_ref = \$mod_name;
+ }
+
+ # We anchor the name, randomly with the possible anchors.
+ my $assembled = $left_anchor. $$name_ref . $right_anchor;
+
+ # \p{name=/.../} is always full matching
+ $all_pass &= like(chr($i), qr!^\p{name=/$assembled/}!,
+ "Verify /\p{name=/$assembled/} matches chr(0x$hex)");
+ }
+
return $all_pass;
}
@@ -352,6 +384,10 @@ is(charnames::viacode("U+00000000000FEED"), "ARABIC LETTER WAW ISOLATED FORM", '
test_vianame(0x116C, "116C", "HANGUL JUNGSEONG OE");
test_vianame(0x1180, "1180", "HANGUL JUNGSEONG O-E");
+like(chr(0x59C3), qr/\p{name=\/\ACJK UNIFIED IDEOGRAPH-59C3\z\/}/,
+ 'Verify name wildcards delimitters can be escaped');
+like(chr(0xD800), qr!\p{name=/\A\z/}!,
+ 'Verify works on matching an empty name');
{
no warnings 'deprecated';