From 947ba0f1461d3d1d73664c622b45d185f8dcd13c Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 16 Dec 2013 10:59:36 -0700 Subject: PATCH: [perl #120799] 5.18 regression with [:^ascii] and \x80-\xFF Posix classes generally match different sets of characters under /d rules than otherwise. This isn't true for [:ascii:], but the handling for it is shared with the others, so it needs to use the same mechanism to deal with that. I forgot this in commit bb9ee97444732c84b33c2f2432aa28e52e4651dc which created this regression. Our tests for this only use regexes with a single element, and an optimization added in 5.18 causes this bug to be bypassed. These tests should be enhanced to force both code paths, but not for this commit, which should be suitable for a maintenance release. (cherry picked from commit 46c10357a881cd92500e4ade81cbc8813e49e2cb) --- regcomp.c | 9 +++++++++ t/re/re_tests | 1 + 2 files changed, 10 insertions(+) diff --git a/regcomp.c b/regcomp.c index 2a8ce8e867..0841f172e5 100644 --- a/regcomp.c +++ b/regcomp.c @@ -12687,12 +12687,21 @@ parseit: } else #endif /* Not isascii(); just use the hard-coded definition for it */ + { _invlist_union_maybe_complement_2nd( posixes, PL_ASCII, cBOOL(namedclass % 2), /* Complement if odd (NASCII) */ &posixes); + + /* The code points 128-255 added above will be + * subtracted out below under /d, so the flag needs to + * be set */ + if (namedclass == ANYOF_NASCII && DEPENDS_SEMANTICS) { + ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_LATIN1_ALL; + } + } } else { /* Garden variety class */ diff --git a/t/re/re_tests b/t/re/re_tests index 4d89e69e70..0af345ae3e 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -638,6 +638,7 @@ $(?<=^(a)) a y $1 a ([[:^alpha:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 01 ((?a)[[:^alnum:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 __-- ${nulnul}${ffff} ([[:^ascii:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ${ffff} +([:[:^ascii:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ${ffff} RT #120799 ([[:^cntrl:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy__-- ([[:^digit:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd ([[:^lower:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 AB -- cgit v1.2.1