summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-02-11 10:49:06 -0700
committerKarl Williamson <public@khwilliamson.com>2012-02-11 14:01:29 -0700
commitdab0c3e7e357a28081ca24560f20e83c1d47ce1f (patch)
treed9908bba3f41f503a3d7c9bd3bfb5b4c3ab15fd4
parent24ad4a07e88519ae8e63d0b67d519e62a935b577 (diff)
downloadperl-dab0c3e7e357a28081ca24560f20e83c1d47ce1f.tar.gz
regcomp.c: /[[:lower:]]/i should match the same as /\p{Lower}/i
Same for [[:upper:]] and \p{Upper}. These were matching instead all of [[:alpha:]] or \p{Alpha}. What /\p{Lower}/i and /\p{Upper}/i match instead is \p{Cased}, and so that is what these should match.
-rw-r--r--charclass_invlists.h24
-rw-r--r--embedvar.h1
-rw-r--r--intrpvar.h2
-rw-r--r--regcomp.c13
-rw-r--r--regen/mk_invlists.pl4
-rw-r--r--sv.c2
-rw-r--r--t/re/re_tests5
7 files changed, 45 insertions, 6 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index 5d78219daa..152793a2e6 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -36,6 +36,30 @@ UV ASCII_invlist[] = {
0
};
+UV L1Cased_invlist[] = {
+ 16, /* Number of elements */
+ 0, /* Current iteration position */
+ 1064334010, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
+ 65,
+ 91,
+ 97,
+ 123,
+ 170,
+ 171,
+ 181,
+ 182,
+ 186,
+ 187,
+ 192,
+ 215,
+ 216,
+ 247,
+ 248,
+ 443
+};
+
UV VertSpace_invlist[] = {
6, /* Number of elements */
0, /* Current iteration position */
diff --git a/embedvar.h b/embedvar.h
index 0321963f35..1d187feb92 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -54,6 +54,7 @@
#define PL_Dir (vTHX->IDir)
#define PL_Env (vTHX->IEnv)
#define PL_HorizSpace (vTHX->IHorizSpace)
+#define PL_L1Cased (vTHX->IL1Cased)
#define PL_L1PosixAlnum (vTHX->IL1PosixAlnum)
#define PL_L1PosixAlpha (vTHX->IL1PosixAlpha)
#define PL_L1PosixGraph (vTHX->IL1PosixGraph)
diff --git a/intrpvar.h b/intrpvar.h
index 43e4e051c2..4025a7c5a1 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -582,6 +582,8 @@ PERLVAR(I, PosixAlpha, SV *)
PERLVAR(I, PosixBlank, SV *)
PERLVAR(I, XPosixBlank, SV *)
+PERLVAR(I, L1Cased, SV *)
+
PERLVAR(I, PosixCntrl, SV *)
PERLVAR(I, XPosixCntrl, SV *)
diff --git a/regcomp.c b/regcomp.c
index 3123a15e6e..b33eddac88 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4830,6 +4830,8 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
PL_PosixBlank = _new_invlist_C_array(PosixBlank_invlist);
PL_XPosixBlank = _new_invlist_C_array(XPosixBlank_invlist);
+ PL_L1Cased = _new_invlist_C_array(L1Cased_invlist);
+
PL_PosixCntrl = _new_invlist_C_array(PosixCntrl_invlist);
PL_XPosixCntrl = _new_invlist_C_array(XPosixCntrl_invlist);
@@ -10872,7 +10874,8 @@ parseit:
case ANYOF_LOWER:
case ANYOF_NLOWER:
{ /* These require special handling, as they differ under
- folding, matching the corresponding Alpha property */
+ folding, matching Cased there (which in the ASCII range
+ is the same as Alpha */
SV* ascii_source;
SV* l1_source;
@@ -10880,8 +10883,8 @@ parseit:
if (FOLD && ! LOC) {
ascii_source = PL_PosixAlpha;
- l1_source = PL_L1PosixAlpha;
- Xname = "__XposixAlpha_i";
+ l1_source = PL_L1Cased;
+ Xname = "Cased";
}
else {
ascii_source = PL_PosixLower;
@@ -10939,8 +10942,8 @@ parseit:
if (FOLD && ! LOC) {
ascii_source = PL_PosixAlpha;
- l1_source = PL_L1PosixAlpha;
- Xname = "__XposixAlpha_i";
+ l1_source = PL_L1Cased;
+ Xname = "Cased";
}
else {
ascii_source = PL_PosixUpper;
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index 31c77dee7a..8102c29cd3 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -93,6 +93,7 @@ output_invlist("AboveLatin1", [ 256 ]);
for my $prop (qw(
ASCII
+ L1Cased
VertSpace
PerlSpace
XPerlSpace
@@ -132,7 +133,8 @@ for my $prop (qw(
# artifically cutting that off at 256 because 256 is the first code point
# above Latin1, we let the range go to its natural ending. That gives us
# extra information with no added space taken.
- my $lookup_prop = $prop =~ s/^L1/X/r;
+ my $lookup_prop = $prop;
+ $lookup_prop =~ s/^L1Posix/XPosix/ or $lookup_prop =~ s/^L1//;
my @invlist = prop_invlist($lookup_prop);
if ($lookup_prop ne $prop) {
diff --git a/sv.c b/sv.c
index f02627bf6e..214a17d44b 100644
--- a/sv.c
+++ b/sv.c
@@ -13398,6 +13398,8 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
PL_PosixBlank = sv_dup_inc(proto_perl->IPosixBlank, param);
PL_XPosixBlank = sv_dup_inc(proto_perl->IXPosixBlank, param);
+ PL_L1Cased = sv_dup_inc(proto_perl->IL1Cased, param);
+
PL_PosixCntrl = sv_dup_inc(proto_perl->IPosixCntrl, param);
PL_XPosixCntrl = sv_dup_inc(proto_perl->IXPosixCntrl, param);
diff --git a/t/re/re_tests b/t/re/re_tests
index 2bdb24cdb8..4eafaf79bc 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1567,4 +1567,9 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer
# /i properties shouldn't match more than the property itself
/[[:ascii:]]/i \N{KELVIN SIGN} n - -
+# [[:lower:]]/i and [[:upper:]]/i should match what \p{Lower} and \p{Upper} do.
+# which is \p{Cased}, not \p{Alpha},
+/[[:lower:]]/i \N{U+3400} n - -
+/[[:upper:]]/i \N{U+01BB} n - -
+
# vim: softtabstop=0 noexpandtab