summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2015-11-17 17:13:43 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2015-11-17 17:13:43 +0000
commitdc6b02fe91bff56af218f2c68b70633eedd5d21d (patch)
tree1a8e6a9ea95ef40ec1bc027e6dcef42bedf70232
parent7bc692271121c6057cc862fab6f7d5e84295408e (diff)
downloadpcre2-dc6b02fe91bff56af218f2c68b70633eedd5d21d.tar.gz
Fix single-character POSIX class bug in UCP mode.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@440 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--ChangeLog4
-rw-r--r--src/pcre2_compile.c26
-rw-r--r--testdata/testinput24
-rw-r--r--testdata/testinput56
-rw-r--r--testdata/testoutput218
-rw-r--r--testdata/testoutput527
6 files changed, 74 insertions, 11 deletions
diff --git a/ChangeLog b/ChangeLog
index 2472fa0..4e3164f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -314,6 +314,10 @@ with JIT (possibly caused by SSE2?).
94. Support offset_limit in JIT.
+95. A sequence such as [[:punct:]b] that is, a POSIX character class followed
+by a single ASCII character in a class item, was incorrectly compiled in UCP
+mode. The POSIX class got lost, but only if the single character followed it.
+
Version 10.20 30-June-2015
--------------------------
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index c6e84ce..ff7bebd 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -1352,7 +1352,7 @@ if ((cb->external_flags & PCRE2_DUPCAPUSED) == 0 &&
/* A large and/or complex regex can take too long to process. We have to assume
it can match an empty string. This can happen more often when (?| groups are
-present in the pattern and the caching is disabled. Setting the cap at 1100
+present in the pattern and the caching is disabled. Setting the cap at 1100
allows the test for more than 1023 capturing patterns to work. */
if ((*countptr)++ > 1100) return CBE_TOOCOMPLICATED;
@@ -4729,16 +4729,20 @@ for (;; ptr++)
CLASS_SINGLE_CHARACTER:
if (class_one_char < 2) class_one_char++;
- /* If class_one_char is 1, we have the first single character in the
- class, and there have been no prior ranges, or XCLASS items generated by
- escapes. If this is the final character in the class, we can optimize by
- turning the item into a 1-character OP_CHAR[I] if it's positive, or
- OP_NOT[I] if it's negative. In the positive case, it can cause firstcu
- to be set. Otherwise, there can be no first char if this item is first,
- whatever repeat count may follow. In the case of reqcu, save the
- previous value for reinstating. */
+ /* If class_one_char is 1 and xclass_has_prop is false, we have the first
+ single character in the class, and there have been no prior ranges, or
+ XCLASS items generated by escapes. If this is the final character in the
+ class, we can optimize by turning the item into a 1-character OP_CHAR[I]
+ if it's positive, or OP_NOT[I] if it's negative. In the positive case, it
+ can cause firstcu to be set. Otherwise, there can be no first char if
+ this item is first, whatever repeat count may follow. In the case of
+ reqcu, save the previous value for reinstating. */
- if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
+ if (!inescq &&
+#ifdef SUPPORT_UNICODE
+ !xclass_has_prop &&
+#endif
+ class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
{
ptr++;
zeroreqcu = reqcu;
@@ -7287,7 +7291,7 @@ for (;; ptr++)
else
{
- if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */
+ if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */
if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
cb->max_lookbehind == 0)
cb->max_lookbehind = 1;
diff --git a/testdata/testinput2 b/testdata/testinput2
index a6cfd85..dfd1aa8 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4685,4 +4685,8 @@ a)"xI
"(*ANYCRLF)(?m)^(.*[^0-9\r\n].*|)$"g,replace=NaN
15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20
+/a[[:punct:]b]/bincode
+
+/a[b[:punct:]]/bincode
+
# End of testinput2
diff --git a/testdata/testinput5 b/testdata/testinput5
index a288459..2432d19 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -1691,4 +1691,10 @@
/abc\Cdef/info,utf
+/a[[:punct:]b]/ucp,bincode
+
+/a[[:punct:]b]/utf,ucp,bincode
+
+/a[b[:punct:]]/utf,ucp,bincode
+
# End of testinput5
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 9408756..e5350d5 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14888,4 +14888,22 @@ Subject length lower bound = 0
15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20
4: 15\x0d\x0aNaN\x0d\x0a20\x0d\x0aNaN\x0d\x0aNaN\x0d\x0aNaN\x0d\x0a20
+/a[[:punct:]b]/bincode
+------------------------------------------------------------------
+ Bra
+ a
+ [!-/:-@[-`b{-~]
+ Ket
+ End
+------------------------------------------------------------------
+
+/a[b[:punct:]]/bincode
+------------------------------------------------------------------
+ Bra
+ a
+ [!-/:-@[-`b{-~]
+ Ket
+ End
+------------------------------------------------------------------
+
# End of testinput2
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index ef93100..7eb9df3 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -4070,4 +4070,31 @@ First code unit = 'a'
Last code unit = 'f'
Subject length lower bound = 0
+/a[[:punct:]b]/ucp,bincode
+------------------------------------------------------------------
+ Bra
+ a
+ [b[:punct:]]
+ Ket
+ End
+------------------------------------------------------------------
+
+/a[[:punct:]b]/utf,ucp,bincode
+------------------------------------------------------------------
+ Bra
+ a
+ [b[:punct:]]
+ Ket
+ End
+------------------------------------------------------------------
+
+/a[b[:punct:]]/utf,ucp,bincode
+------------------------------------------------------------------
+ Bra
+ a
+ [b[:punct:]]
+ Ket
+ End
+------------------------------------------------------------------
+
# End of testinput5