diff options
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | pcre_compile.c | 28 | ||||
-rw-r--r-- | testdata/testinput16 | 26 | ||||
-rw-r--r-- | testdata/testinput19 | 17 | ||||
-rw-r--r-- | testdata/testinput7 | 9 | ||||
-rw-r--r-- | testdata/testoutput16 | 52 | ||||
-rw-r--r-- | testdata/testoutput19 | 26 | ||||
-rw-r--r-- | testdata/testoutput7 | 26 |
8 files changed, 157 insertions, 37 deletions
@@ -4,7 +4,7 @@ ChangeLog for PCRE Note that the PCRE 8.xx series (PCRE1) is now in a bugfix-only state. All development is happening in the PCRE2 10.xx series. -Version 8.40 17-June-2016 +Version 8.40 xx-xxxx-2016 ------------------------- 1. Using -o with -M in pcregrep could cause unnecessary repeated output when @@ -17,7 +17,13 @@ Version 8.40 17-June-2016 4. Ignore "show all captures" (/=) for DFA matching. -5. Fix unaligned accesses on x86. Patch by Marc Mutz. +5. Fix JIT unaligned accesses on x86. Patch by Marc Mutz. + +6. In any wide-character mode (8-bit UTF or any 16-bit or 32-bit mode), without + PCRE_UCP set, a negative character type such as \D in a positive class + should cause all characters greater than 255 to match, whatever else is in + the class. There was a bug that caused this not to happen if a Unicode + property item was added to such a class, for example [\D\P{Nd}] or [\W\pL]. Version 8.39 14-June-2016 diff --git a/pcre_compile.c b/pcre_compile.c index 7cd3950..fb80ed1 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -5579,6 +5579,34 @@ for (;; ptr++) #endif #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 { + /* For non-UCP wide characters, in a non-negative class containing \S or + similar (should_flip_negation is set), all characters greater than 255 + must be in the class. */ + + if ( +#if defined COMPILE_PCRE8 + utf && +#endif + should_flip_negation && !negate_class && (options & PCRE_UCP) == 0) + { + *class_uchardata++ = XCL_RANGE; + if (utf) /* Will always be utf in the 8-bit library */ + { + class_uchardata += PRIV(ord2utf)(0x100, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata); + } + else /* Can only happen for the 16-bit & 32-bit libraries */ + { +#if defined COMPILE_PCRE16 + *class_uchardata++ = 0x100; + *class_uchardata++ = 0xffffu; +#elif defined COMPILE_PCRE32 + *class_uchardata++ = 0x100; + *class_uchardata++ = 0xffffffffu; +#endif + } + } + *class_uchardata++ = XCL_END; /* Marks the end of extra data */ *code++ = OP_XCLASS; code += LINK_SIZE; diff --git a/testdata/testinput16 b/testdata/testinput16 index 15419e6..7ccde0a 100644 --- a/testdata/testinput16 +++ b/testdata/testinput16 @@ -38,4 +38,30 @@ /s+/i8SI SSss\x{17f} +/[\W\p{Any}]/BZ + abc + 123 + +/[\W\pL]/BZ + abc + ** Failers + 123 + +/[\D]/8 + \x{1d7cf} + +/[\D\P{Nd}]/8 + \x{1d7cf} + +/[^\D]/8 + a9b + ** Failers + \x{1d7cf} + +/[^\D\P{Nd}]/8 + a9b + \x{1d7cf} + ** Failers + \x{10000} + /-- End of testinput16 --/ diff --git a/testdata/testinput19 b/testdata/testinput19 index ce45afc..dfe8c7b 100644 --- a/testdata/testinput19 +++ b/testdata/testinput19 @@ -25,4 +25,21 @@ /s+/i8SI SSss\x{17f} +/[\D]/8 + \x{1d7cf} + +/[\D\P{Nd}]/8 + \x{1d7cf} + +/[^\D]/8 + a9b + ** Failers + \x{1d7cf} + +/[^\D\P{Nd}]/8 + a9b + \x{1d7cf} + ** Failers + \x{10000} + /-- End of testinput19 --/ diff --git a/testdata/testinput7 b/testdata/testinput7 index 00b9738..f44a810 100644 --- a/testdata/testinput7 +++ b/testdata/testinput7 @@ -838,15 +838,6 @@ of case for anything other than the ASCII letters. --/ /^s?c/mi8I scat -/[\W\p{Any}]/BZ - abc - 123 - -/[\W\pL]/BZ - abc - ** Failers - 123 - /a[[:punct:]b]/WBZ /a[[:punct:]b]/8WBZ diff --git a/testdata/testoutput16 b/testdata/testoutput16 index fd184cd..e6ba26a 100644 --- a/testdata/testoutput16 +++ b/testdata/testoutput16 @@ -138,4 +138,56 @@ Starting chars: S s \xc5 SSss\x{17f} 0: SSss\x{17f} +/[\W\p{Any}]/BZ +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{Any}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + 123 + 0: 1 + +/[\W\pL]/BZ +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{L}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + ** Failers + 0: * + 123 +No match + +/[\D]/8 + \x{1d7cf} + 0: \x{1d7cf} + +/[\D\P{Nd}]/8 + \x{1d7cf} + 0: \x{1d7cf} + +/[^\D]/8 + a9b + 0: 9 + ** Failers +No match + \x{1d7cf} +No match + +/[^\D\P{Nd}]/8 + a9b + 0: 9 + \x{1d7cf} + 0: \x{1d7cf} + ** Failers +No match + \x{10000} +No match + /-- End of testinput16 --/ diff --git a/testdata/testoutput19 b/testdata/testoutput19 index eb8a8f6..982bea4 100644 --- a/testdata/testoutput19 +++ b/testdata/testoutput19 @@ -105,4 +105,30 @@ Starting chars: S s \xff SSss\x{17f} 0: SSss\x{17f} +/[\D]/8 + \x{1d7cf} + 0: \x{1d7cf} + +/[\D\P{Nd}]/8 + \x{1d7cf} + 0: \x{1d7cf} + +/[^\D]/8 + a9b + 0: 9 + ** Failers +No match + \x{1d7cf} +No match + +/[^\D\P{Nd}]/8 + a9b + 0: 9 + \x{1d7cf} + 0: \x{1d7cf} + ** Failers +No match + \x{10000} +No match + /-- End of testinput19 --/ diff --git a/testdata/testoutput7 b/testdata/testoutput7 index fdfff64..2b167b2 100644 --- a/testdata/testoutput7 +++ b/testdata/testoutput7 @@ -2295,32 +2295,6 @@ Need char = 'c' (caseless) scat 0: sc -/[\W\p{Any}]/BZ ------------------------------------------------------------------- - Bra - [\x00-/:-@[-^`{-\xff\p{Any}] - Ket - End ------------------------------------------------------------------- - abc - 0: a - 123 - 0: 1 - -/[\W\pL]/BZ ------------------------------------------------------------------- - Bra - [\x00-/:-@[-^`{-\xff\p{L}] - Ket - End ------------------------------------------------------------------- - abc - 0: a - ** Failers - 0: * - 123 -No match - /a[[:punct:]b]/WBZ ------------------------------------------------------------------ Bra |