summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2011-01-16 14:16:20 +0000
committerDavid Mitchell <davem@iabyn.com>2011-01-16 14:16:20 +0000
commitd658a8a81c4f311bef688fd51df924a424429f14 (patch)
treec90aa43119f05a7732d7f1fac94f8c0eae753002
parent95a517db36c4d56cf2a5e6103e3235de4c1c38f8 (diff)
downloadperl-d658a8a81c4f311bef688fd51df924a424429f14.tar.gz
restrict \p{IsUserDefined} to In\w+ and In\w+
In L<perlunicode/"User-Defined Character Properties">, it says you can create custom properties by defining subroutines whose names begin with "In" or "Is". However, perl doesn't actually enforce that naming restriction, so \p{foo::bar} will call foo::Bar() if it exists. This commit finally enforces this convention. Note that this broke a number of existing tests for properties, since they didn't always use an Is/In prefix.
-rw-r--r--lib/utf8_heavy.pl2
-rw-r--r--t/re/regexp_unicode_prop.t24
-rw-r--r--t/uni/class.t12
3 files changed, 22 insertions, 16 deletions
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl
index 0a98732cbd..e271ba3a73 100644
--- a/lib/utf8_heavy.pl
+++ b/lib/utf8_heavy.pl
@@ -100,7 +100,7 @@ sub croak { require Carp; Carp::croak(@_) }
my $caller1 = $type =~ s/(.+)::// ? $1 : caller(1);
- if (defined $caller1 && $type =~ /^(?:\w+)$/) {
+ if (defined $caller1 && $type =~ /^I[ns]\w+$/) {
my $prop = "${caller1}::$type";
if (exists &{$prop}) {
no strict 'refs';
diff --git a/t/re/regexp_unicode_prop.t b/t/re/regexp_unicode_prop.t
index ba55b96115..e2c0c51d6f 100644
--- a/t/re/regexp_unicode_prop.t
+++ b/t/re/regexp_unicode_prop.t
@@ -88,14 +88,13 @@ my @USER_DEFINED_PROPERTIES = (
InNotKana => ['\x{3040}', '!\x{3041}'],
InConsonant => ['d', '!e'],
IsSyriac1 => ['\x{0712}', '!\x{072F}'],
- Syriac1 => ['\x{0712}', '!\x{072F}'],
'# User-defined character properties my lack \n at the end',
InGreekSmall => ['\N{GREEK SMALL LETTER PI}',
'\N{GREEK SMALL LETTER FINAL SIGMA}'],
InGreekCapital => ['\N{GREEK CAPITAL LETTER PI}', '!\x{03A2}'],
Dash => ['-'],
ASCII_Hex_Digit => ['!-', 'A'],
- AsciiHexAndDash => ['-', 'A'],
+ IsAsciiHexAndDash => ['-', 'A'],
);
@@ -118,7 +117,8 @@ my %SHORT_PROPERTIES = (
#
# Illegal properties
#
-my @ILLEGAL_PROPERTIES = qw [q qrst];
+my @ILLEGAL_PROPERTIES =
+ qw[q qrst f foo isfoo infoo ISfoo INfoo Is::foo In::foo];
my %d;
@@ -288,17 +288,23 @@ sub IsSyriac1 {<<'--'}
0730 074A
--
-sub Syriac1 {<<'--'}
-0712 072C
-0730 074A
---
-
sub InGreekSmall {return "03B1\t03C9"}
sub InGreekCapital {return "0391\t03A9\n-03A2"}
-sub AsciiHexAndDash {<<'--'}
+sub IsAsciiHexAndDash {<<'--'}
+utf8::ASCII_Hex_Digit
+utf8::Dash
--
+# fake user-defined properties; these subs shouldn't be called, because
+# their names don't start with In or Is
+
+sub f { die }
+sub foo { die }
+sub isfoo { die }
+sub infoo { die }
+sub ISfoo { die }
+sub INfoo { die }
+sub Is::foo { die }
+sub In::foo { die }
__END__
diff --git a/t/uni/class.t b/t/uni/class.t
index fedec4cdfe..40dbd9f299 100644
--- a/t/uni/class.t
+++ b/t/uni/class.t
@@ -6,13 +6,13 @@ BEGIN {
plan tests => 10;
-sub MyUniClass {
+sub IsMyUniClass {
<<END;
0030 004F
END
}
-sub Other::Class {
+sub Other::IsClass {
<<END;
0040 005F
END
@@ -20,8 +20,8 @@ END
sub A::B::Intersection {
<<END;
-+main::MyUniClass
-&Other::Class
++main::IsMyUniClass
+&Other::IsClass
END
}
@@ -57,10 +57,10 @@ is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
# make sure it finds user-defined class
-is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
+is(($str =~ /(\p{IsMyUniClass}+)/)[0], '0123456789:;<=>?@ABCDEFGHIJKLMNO');
# make sure it finds class in other package
-is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
+is(($str =~ /(\p{Other::IsClass}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');
# make sure it finds class in other OTHER package
is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');