diff options
author | Karl Williamson <public@khwilliamson.com> | 2013-12-16 10:59:36 -0700 |
---|---|---|
committer | Ricardo Signes <rjbs@cpan.org> | 2013-12-19 09:01:02 -0500 |
commit | 947ba0f1461d3d1d73664c622b45d185f8dcd13c (patch) | |
tree | df5791ff9b5b346b7c98d41c008c461ae174db3a | |
parent | 9108791a984e5903c1261aee26cb1f616e61d9aa (diff) | |
download | perl-947ba0f1461d3d1d73664c622b45d185f8dcd13c.tar.gz |
PATCH: [perl #120799] 5.18 regression with [:^ascii] and \x80-\xFF
Posix classes generally match different sets of characters under /d
rules than otherwise. This isn't true for [:ascii:], but the handling
for it is shared with the others, so it needs to use the same mechanism
to deal with that. I forgot this in commit
bb9ee97444732c84b33c2f2432aa28e52e4651dc which created this regression.
Our tests for this only use regexes with a single element, and an
optimization added in 5.18 causes this bug to be bypassed. These tests
should be enhanced to force both code paths, but not for this commit,
which should be suitable for a maintenance release.
(cherry picked from commit 46c10357a881cd92500e4ade81cbc8813e49e2cb)
-rw-r--r-- | regcomp.c | 9 | ||||
-rw-r--r-- | t/re/re_tests | 1 |
2 files changed, 10 insertions, 0 deletions
@@ -12687,12 +12687,21 @@ parseit: } else #endif /* Not isascii(); just use the hard-coded definition for it */ + { _invlist_union_maybe_complement_2nd( posixes, PL_ASCII, cBOOL(namedclass % 2), /* Complement if odd (NASCII) */ &posixes); + + /* The code points 128-255 added above will be + * subtracted out below under /d, so the flag needs to + * be set */ + if (namedclass == ANYOF_NASCII && DEPENDS_SEMANTICS) { + ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_LATIN1_ALL; + } + } } else { /* Garden variety class */ diff --git a/t/re/re_tests b/t/re/re_tests index 4d89e69e70..0af345ae3e 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -638,6 +638,7 @@ $(?<=^(a)) a y $1 a ([[:^alpha:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 01 ((?a)[[:^alnum:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 __-- ${nulnul}${ffff} ([[:^ascii:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ${ffff} +([:[:^ascii:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ${ffff} RT #120799 ([[:^cntrl:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy__-- ([[:^digit:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd ([[:^lower:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 AB |