diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-02-11 10:49:06 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-02-11 14:01:29 -0700 |
commit | dab0c3e7e357a28081ca24560f20e83c1d47ce1f (patch) | |
tree | d9908bba3f41f503a3d7c9bd3bfb5b4c3ab15fd4 | |
parent | 24ad4a07e88519ae8e63d0b67d519e62a935b577 (diff) | |
download | perl-dab0c3e7e357a28081ca24560f20e83c1d47ce1f.tar.gz |
regcomp.c: /[[:lower:]]/i should match the same as /\p{Lower}/i
Same for [[:upper:]] and \p{Upper}. These were matching instead all of
[[:alpha:]] or \p{Alpha}. What /\p{Lower}/i and /\p{Upper}/i match instead
is \p{Cased}, and so that is what these should match.
-rw-r--r-- | charclass_invlists.h | 24 | ||||
-rw-r--r-- | embedvar.h | 1 | ||||
-rw-r--r-- | intrpvar.h | 2 | ||||
-rw-r--r-- | regcomp.c | 13 | ||||
-rw-r--r-- | regen/mk_invlists.pl | 4 | ||||
-rw-r--r-- | sv.c | 2 | ||||
-rw-r--r-- | t/re/re_tests | 5 |
7 files changed, 45 insertions, 6 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h index 5d78219daa..152793a2e6 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -36,6 +36,30 @@ UV ASCII_invlist[] = { 0 }; +UV L1Cased_invlist[] = { + 16, /* Number of elements */ + 0, /* Current iteration position */ + 1064334010, /* Version and data structure type */ + 1, /* 0 if this is the first element of the list proper; + 1 if the next element is the first */ + 65, + 91, + 97, + 123, + 170, + 171, + 181, + 182, + 186, + 187, + 192, + 215, + 216, + 247, + 248, + 443 +}; + UV VertSpace_invlist[] = { 6, /* Number of elements */ 0, /* Current iteration position */ diff --git a/embedvar.h b/embedvar.h index 0321963f35..1d187feb92 100644 --- a/embedvar.h +++ b/embedvar.h @@ -54,6 +54,7 @@ #define PL_Dir (vTHX->IDir) #define PL_Env (vTHX->IEnv) #define PL_HorizSpace (vTHX->IHorizSpace) +#define PL_L1Cased (vTHX->IL1Cased) #define PL_L1PosixAlnum (vTHX->IL1PosixAlnum) #define PL_L1PosixAlpha (vTHX->IL1PosixAlpha) #define PL_L1PosixGraph (vTHX->IL1PosixGraph) diff --git a/intrpvar.h b/intrpvar.h index 43e4e051c2..4025a7c5a1 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -582,6 +582,8 @@ PERLVAR(I, PosixAlpha, SV *) PERLVAR(I, PosixBlank, SV *) PERLVAR(I, XPosixBlank, SV *) +PERLVAR(I, L1Cased, SV *) + PERLVAR(I, PosixCntrl, SV *) PERLVAR(I, XPosixCntrl, SV *) @@ -4830,6 +4830,8 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags) PL_PosixBlank = _new_invlist_C_array(PosixBlank_invlist); PL_XPosixBlank = _new_invlist_C_array(XPosixBlank_invlist); + PL_L1Cased = _new_invlist_C_array(L1Cased_invlist); + PL_PosixCntrl = _new_invlist_C_array(PosixCntrl_invlist); PL_XPosixCntrl = _new_invlist_C_array(XPosixCntrl_invlist); @@ -10872,7 +10874,8 @@ parseit: case ANYOF_LOWER: case ANYOF_NLOWER: { /* These require special handling, as they differ under - folding, matching the corresponding Alpha property */ + folding, matching Cased there (which in the ASCII range + is the same as Alpha */ SV* ascii_source; SV* l1_source; @@ -10880,8 +10883,8 @@ parseit: if (FOLD && ! LOC) { ascii_source = PL_PosixAlpha; - l1_source = PL_L1PosixAlpha; - Xname = "__XposixAlpha_i"; + l1_source = PL_L1Cased; + Xname = "Cased"; } else { ascii_source = PL_PosixLower; @@ -10939,8 +10942,8 @@ parseit: if (FOLD && ! LOC) { ascii_source = PL_PosixAlpha; - l1_source = PL_L1PosixAlpha; - Xname = "__XposixAlpha_i"; + l1_source = PL_L1Cased; + Xname = "Cased"; } else { ascii_source = PL_PosixUpper; diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index 31c77dee7a..8102c29cd3 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -93,6 +93,7 @@ output_invlist("AboveLatin1", [ 256 ]); for my $prop (qw( ASCII + L1Cased VertSpace PerlSpace XPerlSpace @@ -132,7 +133,8 @@ for my $prop (qw( # artifically cutting that off at 256 because 256 is the first code point # above Latin1, we let the range go to its natural ending. That gives us # extra information with no added space taken. - my $lookup_prop = $prop =~ s/^L1/X/r; + my $lookup_prop = $prop; + $lookup_prop =~ s/^L1Posix/XPosix/ or $lookup_prop =~ s/^L1//; my @invlist = prop_invlist($lookup_prop); if ($lookup_prop ne $prop) { @@ -13398,6 +13398,8 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_PosixBlank = sv_dup_inc(proto_perl->IPosixBlank, param); PL_XPosixBlank = sv_dup_inc(proto_perl->IXPosixBlank, param); + PL_L1Cased = sv_dup_inc(proto_perl->IL1Cased, param); + PL_PosixCntrl = sv_dup_inc(proto_perl->IPosixCntrl, param); PL_XPosixCntrl = sv_dup_inc(proto_perl->IXPosixCntrl, param); diff --git a/t/re/re_tests b/t/re/re_tests index 2bdb24cdb8..4eafaf79bc 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1567,4 +1567,9 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer # /i properties shouldn't match more than the property itself /[[:ascii:]]/i \N{KELVIN SIGN} n - - +# [[:lower:]]/i and [[:upper:]]/i should match what \p{Lower} and \p{Upper} do. +# which is \p{Cased}, not \p{Alpha}, +/[[:lower:]]/i \N{U+3400} n - - +/[[:upper:]]/i \N{U+01BB} n - - + # vim: softtabstop=0 noexpandtab |