summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-01-15 13:42:58 -0700
committerKarl Williamson <public@khwilliamson.com>2011-01-16 08:18:54 -0700
commit11454c594f22abc5945e69a46fc965363dbf326e (patch)
tree8e51baaf062d5e28410294b9cac63f791c63ced2 /regexec.c
parentf424400810b6af341e96230836690da51c37b812 (diff)
downloadperl-11454c594f22abc5945e69a46fc965363dbf326e.tar.gz
Fix \xa0 matching both [\s] [\S], et.al.
This bug stemmed from Latin1 characters not matching any (non-complemented) character class in /d semantics when the target string is no utf8; but having unicode semantics when it isn't. The solution here is to add a special flag. There were several tests that relied on the broken behavior, specifically they tested that \xff isn't a printable word character even in utf8. I changed the deparse test to instead use a non-printable code point, and I changed the ones in re_tests to be TODOs, and will change them back using /a when that is shortly added.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c6
1 files changed, 6 insertions, 0 deletions
diff --git a/regexec.c b/regexec.c
index ca88d9f5ab..be0feeb80c 100644
--- a/regexec.c
+++ b/regexec.c
@@ -6336,6 +6336,12 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
if (c < 256) {
if (ANYOF_BITMAP_TEST(n, c))
match = TRUE;
+ else if (flags & ANYOF_NON_UTF8_LATIN1_ALL
+ && ! utf8_target
+ && ! isASCII(c))
+ {
+ match = TRUE;
+ }
else if (flags & ANYOF_LOCALE) {
PL_reg_flags |= RF_tainted;