summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embedvar.h2
-rw-r--r--intrpvar.h4
-rw-r--r--perl.c2
-rw-r--r--regexec.c45
-rw-r--r--sv.c1
-rw-r--r--t/re/reg_fold.t6
6 files changed, 60 insertions, 0 deletions
diff --git a/embedvar.h b/embedvar.h
index 87099c13aa..36f75759be 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -341,6 +341,7 @@
#define PL_utf8_ascii (vTHX->Iutf8_ascii)
#define PL_utf8_cntrl (vTHX->Iutf8_cntrl)
#define PL_utf8_digit (vTHX->Iutf8_digit)
+#define PL_utf8_foldclosures (vTHX->Iutf8_foldclosures)
#define PL_utf8_graph (vTHX->Iutf8_graph)
#define PL_utf8_idcont (vTHX->Iutf8_idcont)
#define PL_utf8_idstart (vTHX->Iutf8_idstart)
@@ -670,6 +671,7 @@
#define PL_Iutf8_ascii PL_utf8_ascii
#define PL_Iutf8_cntrl PL_utf8_cntrl
#define PL_Iutf8_digit PL_utf8_digit
+#define PL_Iutf8_foldclosures PL_utf8_foldclosures
#define PL_Iutf8_graph PL_utf8_graph
#define PL_Iutf8_idcont PL_utf8_idcont
#define PL_Iutf8_idstart PL_utf8_idstart
diff --git a/intrpvar.h b/intrpvar.h
index d919e1d357..1ab1495e8e 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -765,6 +765,10 @@ PERLVAR(Iregistered_mros, HV *)
/* Compile-time block start/end hooks */
PERLVAR(Iblockhooks, AV *)
+
+/* Everything that folds to a character, for case insensitivity regex matching */
+PERLVARI(Iutf8_foldclosures, HV *, NULL)
+
/* If you are adding a U8 or U16, check to see if there are 'Space' comments
* above on where there are gaps which currently will be structure padding. */
diff --git a/perl.c b/perl.c
index 157cd6b603..ed99612b7b 100644
--- a/perl.c
+++ b/perl.c
@@ -1003,6 +1003,7 @@ perl_destruct(pTHXx)
SvREFCNT_dec(PL_utf8_tofold);
SvREFCNT_dec(PL_utf8_idstart);
SvREFCNT_dec(PL_utf8_idcont);
+ SvREFCNT_dec(PL_utf8_foldclosures);
PL_utf8_alnum = NULL;
PL_utf8_ascii = NULL;
PL_utf8_alpha = NULL;
@@ -1022,6 +1023,7 @@ perl_destruct(pTHXx)
PL_utf8_tofold = NULL;
PL_utf8_idstart = NULL;
PL_utf8_idcont = NULL;
+ PL_utf8_foldclosures = NULL;
if (!specialWARN(PL_compiling.cop_warnings))
PerlMemShared_free(PL_compiling.cop_warnings);
diff --git a/regexec.c b/regexec.c
index 433bbeb8e5..a6da6ce0dc 100644
--- a/regexec.c
+++ b/regexec.c
@@ -6343,6 +6343,51 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
if (swash_fetch(sw, folded, 1)) { /* 1 => is utf8 */
match = TRUE;
}
+ else {
+ SV** listp;
+
+ /* Consider "k" =~ /[K]/i. The line above would
+ * have just folded the 'k' to itself, and that
+ * isn't going to match 'K'. So we look through
+ * the closure of everything that folds to 'k'.
+ * That will find the 'K'. Initialize the list, if
+ * necessary */
+ if (! PL_utf8_foldclosures) {
+
+ /* If the folds haven't been read in, call a fold
+ * function to force that */
+ if (! PL_utf8_tofold) {
+ U8 dummy[UTF8_MAXBYTES+1];
+ STRLEN dummy_len;
+ to_utf8_fold((U8*) "A", dummy, &dummy_len);
+ }
+ PL_utf8_foldclosures =
+ _swash_inversion_hash(PL_utf8_tofold);
+ }
+
+ /* The data structure is a hash with the keys every
+ * character that is folded to, like 'k', and the
+ * values each an array of everything that folds to
+ * its key. e.g. [ 'k', 'K', KELVIN_SIGN ] */
+ if ((listp = hv_fetch(PL_utf8_foldclosures,
+ (char *) folded, foldlen, FALSE)))
+ {
+ AV* list = (AV*) *listp;
+ IV i;
+ for (i = 0; i <= av_len(list); i++) {
+ SV** try_p = av_fetch(list, i, FALSE);
+ if (try_p == NULL) {
+ Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure");
+ }
+ /* Don't have to worry about embeded nulls
+ * since NULL isn't folded or foldable */
+ if (swash_fetch(sw, (U8*) SvPVX(*try_p),1)) {
+ match = TRUE;
+ break;
+ }
+ }
+ }
+ }
}
}
diff --git a/sv.c b/sv.c
index f3010af387..e2d498d8d8 100644
--- a/sv.c
+++ b/sv.c
@@ -13157,6 +13157,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
PL_registered_mros = hv_dup_inc(proto_perl->Iregistered_mros, param);
PL_blockhooks = av_dup_inc(proto_perl->Iblockhooks, param);
+ PL_utf8_foldclosures = hv_dup_inc(proto_perl->Iutf8_foldclosures, param);
/* Call the ->CLONE method, if it exists, for each of the stashes
identified by sv_dup() above.
diff --git a/t/re/reg_fold.t b/t/re/reg_fold.t
index 1c7dfe8fe9..af5ba282bb 100644
--- a/t/re/reg_fold.t
+++ b/t/re/reg_fold.t
@@ -72,6 +72,12 @@ while (<$fh>) {
}
}
}
+
+push @tests, qq[like chr(0x0430), qr/[=\x{0410}-\x{0411}]/i, 'Bug #71752 Unicode /i char in a range'];
+$count++;
+push @tests, qq[like 'a', qr/\\p{Upper}/i, "'a' =~ /\\\\p{Upper}/i"];
+$count++;
+
eval join ";\n","plan tests=>".($count-1),@tests,"1"
or die $@;
__DATA__