diff options
-rw-r--r-- | embedvar.h | 2 | ||||
-rw-r--r-- | intrpvar.h | 4 | ||||
-rw-r--r-- | perl.c | 2 | ||||
-rw-r--r-- | regexec.c | 45 | ||||
-rw-r--r-- | sv.c | 1 | ||||
-rw-r--r-- | t/re/reg_fold.t | 6 |
6 files changed, 60 insertions, 0 deletions
diff --git a/embedvar.h b/embedvar.h index 87099c13aa..36f75759be 100644 --- a/embedvar.h +++ b/embedvar.h @@ -341,6 +341,7 @@ #define PL_utf8_ascii (vTHX->Iutf8_ascii) #define PL_utf8_cntrl (vTHX->Iutf8_cntrl) #define PL_utf8_digit (vTHX->Iutf8_digit) +#define PL_utf8_foldclosures (vTHX->Iutf8_foldclosures) #define PL_utf8_graph (vTHX->Iutf8_graph) #define PL_utf8_idcont (vTHX->Iutf8_idcont) #define PL_utf8_idstart (vTHX->Iutf8_idstart) @@ -670,6 +671,7 @@ #define PL_Iutf8_ascii PL_utf8_ascii #define PL_Iutf8_cntrl PL_utf8_cntrl #define PL_Iutf8_digit PL_utf8_digit +#define PL_Iutf8_foldclosures PL_utf8_foldclosures #define PL_Iutf8_graph PL_utf8_graph #define PL_Iutf8_idcont PL_utf8_idcont #define PL_Iutf8_idstart PL_utf8_idstart diff --git a/intrpvar.h b/intrpvar.h index d919e1d357..1ab1495e8e 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -765,6 +765,10 @@ PERLVAR(Iregistered_mros, HV *) /* Compile-time block start/end hooks */ PERLVAR(Iblockhooks, AV *) + +/* Everything that folds to a character, for case insensitivity regex matching */ +PERLVARI(Iutf8_foldclosures, HV *, NULL) + /* If you are adding a U8 or U16, check to see if there are 'Space' comments * above on where there are gaps which currently will be structure padding. */ @@ -1003,6 +1003,7 @@ perl_destruct(pTHXx) SvREFCNT_dec(PL_utf8_tofold); SvREFCNT_dec(PL_utf8_idstart); SvREFCNT_dec(PL_utf8_idcont); + SvREFCNT_dec(PL_utf8_foldclosures); PL_utf8_alnum = NULL; PL_utf8_ascii = NULL; PL_utf8_alpha = NULL; @@ -1022,6 +1023,7 @@ perl_destruct(pTHXx) PL_utf8_tofold = NULL; PL_utf8_idstart = NULL; PL_utf8_idcont = NULL; + PL_utf8_foldclosures = NULL; if (!specialWARN(PL_compiling.cop_warnings)) PerlMemShared_free(PL_compiling.cop_warnings); @@ -6343,6 +6343,51 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, if (swash_fetch(sw, folded, 1)) { /* 1 => is utf8 */ match = TRUE; } + else { + SV** listp; + + /* Consider "k" =~ /[K]/i. The line above would + * have just folded the 'k' to itself, and that + * isn't going to match 'K'. So we look through + * the closure of everything that folds to 'k'. + * That will find the 'K'. Initialize the list, if + * necessary */ + if (! PL_utf8_foldclosures) { + + /* If the folds haven't been read in, call a fold + * function to force that */ + if (! PL_utf8_tofold) { + U8 dummy[UTF8_MAXBYTES+1]; + STRLEN dummy_len; + to_utf8_fold((U8*) "A", dummy, &dummy_len); + } + PL_utf8_foldclosures = + _swash_inversion_hash(PL_utf8_tofold); + } + + /* The data structure is a hash with the keys every + * character that is folded to, like 'k', and the + * values each an array of everything that folds to + * its key. e.g. [ 'k', 'K', KELVIN_SIGN ] */ + if ((listp = hv_fetch(PL_utf8_foldclosures, + (char *) folded, foldlen, FALSE))) + { + AV* list = (AV*) *listp; + IV i; + for (i = 0; i <= av_len(list); i++) { + SV** try_p = av_fetch(list, i, FALSE); + if (try_p == NULL) { + Perl_croak(aTHX_ "panic: invalid PL_utf8_foldclosures structure"); + } + /* Don't have to worry about embeded nulls + * since NULL isn't folded or foldable */ + if (swash_fetch(sw, (U8*) SvPVX(*try_p),1)) { + match = TRUE; + break; + } + } + } + } } } @@ -13157,6 +13157,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_registered_mros = hv_dup_inc(proto_perl->Iregistered_mros, param); PL_blockhooks = av_dup_inc(proto_perl->Iblockhooks, param); + PL_utf8_foldclosures = hv_dup_inc(proto_perl->Iutf8_foldclosures, param); /* Call the ->CLONE method, if it exists, for each of the stashes identified by sv_dup() above. diff --git a/t/re/reg_fold.t b/t/re/reg_fold.t index 1c7dfe8fe9..af5ba282bb 100644 --- a/t/re/reg_fold.t +++ b/t/re/reg_fold.t @@ -72,6 +72,12 @@ while (<$fh>) { } } } + +push @tests, qq[like chr(0x0430), qr/[=\x{0410}-\x{0411}]/i, 'Bug #71752 Unicode /i char in a range']; +$count++; +push @tests, qq[like 'a', qr/\\p{Upper}/i, "'a' =~ /\\\\p{Upper}/i"]; +$count++; + eval join ";\n","plan tests=>".($count-1),@tests,"1" or die $@; __DATA__ |