summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2010-11-27 10:26:01 -0700
committerFather Chrysostomos <sprout@cpan.org>2010-11-28 04:49:14 -0800
commite2e755386e52b4bdb22a5c9618390859ed5f7323 (patch)
treed610bd004bf05cbe53f0f33d9df2b0b5691c7eec /regexec.c
parent35bae5983226710c4c6eb1565b5e105b0599a820 (diff)
downloadperl-e2e755386e52b4bdb22a5c9618390859ed5f7323.tar.gz
regexec.c: Latin1 chars can fold match UTF8_ALL
Some ANYOF regnodes have the ANYOF_UNICODE_ALL flag set, which means they match any non-Latin1 character. These should match /i (in a utf8 target string) any ASCII or Latin1 character that folds outside the Latin1 range As part of this patch, an internal only macro is renamed to account for its new use in regexec.c. The cumbersome name is to ward off others from using it until the final semantics have been settled on.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c14
1 files changed, 10 insertions, 4 deletions
diff --git a/regexec.c b/regexec.c
index 375d4fd4b1..874dce3a2c 100644
--- a/regexec.c
+++ b/regexec.c
@@ -6300,11 +6300,17 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
/* If the bitmap didn't (or couldn't) match, and something outside the
* bitmap could match, try that */
if (!match) {
- if (utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) {
- match = TRUE;
+ if (utf8_target && (flags & ANYOF_UNICODE_ALL)) {
+ if (c >= 256
+ || ((flags & ANYOF_FOLD) /* Latin1 1 that has a non-Latin1 fold
+ should match */
+ && _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c)))
+ {
+ match = TRUE;
+ }
}
- else if ((flags & ANYOF_NONBITMAP_NON_UTF8)
- || (utf8_target && flags & ANYOF_UTF8))
+ if (!match && ((flags & ANYOF_NONBITMAP_NON_UTF8)
+ || (utf8_target && flags & ANYOF_UTF8)))
{
AV *av;
SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);