summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2016-08-04 17:15:38 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2016-08-04 17:15:38 +0000
commit1767e8be04f300ad10f2b358c281a23f0759e3d3 (patch)
treecc0b5659156d6e688c6ef62816759d87451fc098
parentf6b31ef64cbe2b68733ebb049e8a8339f3918d3a (diff)
downloadpcre-1767e8be04f300ad10f2b358c281a23f0759e3d3.tar.gz
Fix character class bug when a Unicode property was present with \D etc in a
non-ucp character class in a wide character mode. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1664 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog10
-rw-r--r--pcre_compile.c28
-rw-r--r--testdata/testinput1626
-rw-r--r--testdata/testinput1917
-rw-r--r--testdata/testinput79
-rw-r--r--testdata/testoutput1652
-rw-r--r--testdata/testoutput1926
-rw-r--r--testdata/testoutput726
8 files changed, 157 insertions, 37 deletions
diff --git a/ChangeLog b/ChangeLog
index c8f3642..d6f8a56 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,7 +4,7 @@ ChangeLog for PCRE
Note that the PCRE 8.xx series (PCRE1) is now in a bugfix-only state. All
development is happening in the PCRE2 10.xx series.
-Version 8.40 17-June-2016
+Version 8.40 xx-xxxx-2016
-------------------------
1. Using -o with -M in pcregrep could cause unnecessary repeated output when
@@ -17,7 +17,13 @@ Version 8.40 17-June-2016
4. Ignore "show all captures" (/=) for DFA matching.
-5. Fix unaligned accesses on x86. Patch by Marc Mutz.
+5. Fix JIT unaligned accesses on x86. Patch by Marc Mutz.
+
+6. In any wide-character mode (8-bit UTF or any 16-bit or 32-bit mode), without
+ PCRE_UCP set, a negative character type such as \D in a positive class
+ should cause all characters greater than 255 to match, whatever else is in
+ the class. There was a bug that caused this not to happen if a Unicode
+ property item was added to such a class, for example [\D\P{Nd}] or [\W\pL].
Version 8.39 14-June-2016
diff --git a/pcre_compile.c b/pcre_compile.c
index 7cd3950..fb80ed1 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -5579,6 +5579,34 @@ for (;; ptr++)
#endif
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
{
+ /* For non-UCP wide characters, in a non-negative class containing \S or
+ similar (should_flip_negation is set), all characters greater than 255
+ must be in the class. */
+
+ if (
+#if defined COMPILE_PCRE8
+ utf &&
+#endif
+ should_flip_negation && !negate_class && (options & PCRE_UCP) == 0)
+ {
+ *class_uchardata++ = XCL_RANGE;
+ if (utf) /* Will always be utf in the 8-bit library */
+ {
+ class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
+ class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
+ }
+ else /* Can only happen for the 16-bit & 32-bit libraries */
+ {
+#if defined COMPILE_PCRE16
+ *class_uchardata++ = 0x100;
+ *class_uchardata++ = 0xffffu;
+#elif defined COMPILE_PCRE32
+ *class_uchardata++ = 0x100;
+ *class_uchardata++ = 0xffffffffu;
+#endif
+ }
+ }
+
*class_uchardata++ = XCL_END; /* Marks the end of extra data */
*code++ = OP_XCLASS;
code += LINK_SIZE;
diff --git a/testdata/testinput16 b/testdata/testinput16
index 15419e6..7ccde0a 100644
--- a/testdata/testinput16
+++ b/testdata/testinput16
@@ -38,4 +38,30 @@
/s+/i8SI
SSss\x{17f}
+/[\W\p{Any}]/BZ
+ abc
+ 123
+
+/[\W\pL]/BZ
+ abc
+ ** Failers
+ 123
+
+/[\D]/8
+ \x{1d7cf}
+
+/[\D\P{Nd}]/8
+ \x{1d7cf}
+
+/[^\D]/8
+ a9b
+ ** Failers
+ \x{1d7cf}
+
+/[^\D\P{Nd}]/8
+ a9b
+ \x{1d7cf}
+ ** Failers
+ \x{10000}
+
/-- End of testinput16 --/
diff --git a/testdata/testinput19 b/testdata/testinput19
index ce45afc..dfe8c7b 100644
--- a/testdata/testinput19
+++ b/testdata/testinput19
@@ -25,4 +25,21 @@
/s+/i8SI
SSss\x{17f}
+/[\D]/8
+ \x{1d7cf}
+
+/[\D\P{Nd}]/8
+ \x{1d7cf}
+
+/[^\D]/8
+ a9b
+ ** Failers
+ \x{1d7cf}
+
+/[^\D\P{Nd}]/8
+ a9b
+ \x{1d7cf}
+ ** Failers
+ \x{10000}
+
/-- End of testinput19 --/
diff --git a/testdata/testinput7 b/testdata/testinput7
index 00b9738..f44a810 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -838,15 +838,6 @@ of case for anything other than the ASCII letters. --/
/^s?c/mi8I
scat
-/[\W\p{Any}]/BZ
- abc
- 123
-
-/[\W\pL]/BZ
- abc
- ** Failers
- 123
-
/a[[:punct:]b]/WBZ
/a[[:punct:]b]/8WBZ
diff --git a/testdata/testoutput16 b/testdata/testoutput16
index fd184cd..e6ba26a 100644
--- a/testdata/testoutput16
+++ b/testdata/testoutput16
@@ -138,4 +138,56 @@ Starting chars: S s \xc5
SSss\x{17f}
0: SSss\x{17f}
+/[\W\p{Any}]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-/:-@[-^`{-\xff\p{Any}]
+ Ket
+ End
+------------------------------------------------------------------
+ abc
+ 0: a
+ 123
+ 0: 1
+
+/[\W\pL]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-/:-@[-^`{-\xff\p{L}]
+ Ket
+ End
+------------------------------------------------------------------
+ abc
+ 0: a
+ ** Failers
+ 0: *
+ 123
+No match
+
+/[\D]/8
+ \x{1d7cf}
+ 0: \x{1d7cf}
+
+/[\D\P{Nd}]/8
+ \x{1d7cf}
+ 0: \x{1d7cf}
+
+/[^\D]/8
+ a9b
+ 0: 9
+ ** Failers
+No match
+ \x{1d7cf}
+No match
+
+/[^\D\P{Nd}]/8
+ a9b
+ 0: 9
+ \x{1d7cf}
+ 0: \x{1d7cf}
+ ** Failers
+No match
+ \x{10000}
+No match
+
/-- End of testinput16 --/
diff --git a/testdata/testoutput19 b/testdata/testoutput19
index eb8a8f6..982bea4 100644
--- a/testdata/testoutput19
+++ b/testdata/testoutput19
@@ -105,4 +105,30 @@ Starting chars: S s \xff
SSss\x{17f}
0: SSss\x{17f}
+/[\D]/8
+ \x{1d7cf}
+ 0: \x{1d7cf}
+
+/[\D\P{Nd}]/8
+ \x{1d7cf}
+ 0: \x{1d7cf}
+
+/[^\D]/8
+ a9b
+ 0: 9
+ ** Failers
+No match
+ \x{1d7cf}
+No match
+
+/[^\D\P{Nd}]/8
+ a9b
+ 0: 9
+ \x{1d7cf}
+ 0: \x{1d7cf}
+ ** Failers
+No match
+ \x{10000}
+No match
+
/-- End of testinput19 --/
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index fdfff64..2b167b2 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -2295,32 +2295,6 @@ Need char = 'c' (caseless)
scat
0: sc
-/[\W\p{Any}]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-/:-@[-^`{-\xff\p{Any}]
- Ket
- End
-------------------------------------------------------------------
- abc
- 0: a
- 123
- 0: 1
-
-/[\W\pL]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-/:-@[-^`{-\xff\p{L}]
- Ket
- End
-------------------------------------------------------------------
- abc
- 0: a
- ** Failers
- 0: *
- 123
-No match
-
/a[[:punct:]b]/WBZ
------------------------------------------------------------------
Bra