summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-12-16 10:59:36 -0700
committerRicardo Signes <rjbs@cpan.org>2013-12-19 09:01:02 -0500
commit947ba0f1461d3d1d73664c622b45d185f8dcd13c (patch)
treedf5791ff9b5b346b7c98d41c008c461ae174db3a
parent9108791a984e5903c1261aee26cb1f616e61d9aa (diff)
downloadperl-947ba0f1461d3d1d73664c622b45d185f8dcd13c.tar.gz
PATCH: [perl #120799] 5.18 regression with [:^ascii] and \x80-\xFF
Posix classes generally match different sets of characters under /d rules than otherwise. This isn't true for [:ascii:], but the handling for it is shared with the others, so it needs to use the same mechanism to deal with that. I forgot this in commit bb9ee97444732c84b33c2f2432aa28e52e4651dc which created this regression. Our tests for this only use regexes with a single element, and an optimization added in 5.18 causes this bug to be bypassed. These tests should be enhanced to force both code paths, but not for this commit, which should be suitable for a maintenance release. (cherry picked from commit 46c10357a881cd92500e4ade81cbc8813e49e2cb)
-rw-r--r--regcomp.c9
-rw-r--r--t/re/re_tests1
2 files changed, 10 insertions, 0 deletions
diff --git a/regcomp.c b/regcomp.c
index 2a8ce8e867..0841f172e5 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -12687,12 +12687,21 @@ parseit:
}
else
#endif /* Not isascii(); just use the hard-coded definition for it */
+ {
_invlist_union_maybe_complement_2nd(
posixes,
PL_ASCII,
cBOOL(namedclass % 2), /* Complement if odd
(NASCII) */
&posixes);
+
+ /* The code points 128-255 added above will be
+ * subtracted out below under /d, so the flag needs to
+ * be set */
+ if (namedclass == ANYOF_NASCII && DEPENDS_SEMANTICS) {
+ ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_LATIN1_ALL;
+ }
+ }
}
else { /* Garden variety class */
diff --git a/t/re/re_tests b/t/re/re_tests
index 4d89e69e70..0af345ae3e 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -638,6 +638,7 @@ $(?<=^(a)) a y $1 a
([[:^alpha:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 01
((?a)[[:^alnum:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 __-- ${nulnul}${ffff}
([[:^ascii:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ${ffff}
+([:[:^ascii:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ${ffff} RT #120799
([[:^cntrl:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy__--
([[:^digit:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd
([[:^lower:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 AB