diff options
author | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2015-11-17 17:13:43 +0000 |
---|---|---|
committer | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2015-11-17 17:13:43 +0000 |
commit | dc6b02fe91bff56af218f2c68b70633eedd5d21d (patch) | |
tree | 1a8e6a9ea95ef40ec1bc027e6dcef42bedf70232 | |
parent | 7bc692271121c6057cc862fab6f7d5e84295408e (diff) | |
download | pcre2-dc6b02fe91bff56af218f2c68b70633eedd5d21d.tar.gz |
Fix single-character POSIX class bug in UCP mode.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@440 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | src/pcre2_compile.c | 26 | ||||
-rw-r--r-- | testdata/testinput2 | 4 | ||||
-rw-r--r-- | testdata/testinput5 | 6 | ||||
-rw-r--r-- | testdata/testoutput2 | 18 | ||||
-rw-r--r-- | testdata/testoutput5 | 27 |
6 files changed, 74 insertions, 11 deletions
@@ -314,6 +314,10 @@ with JIT (possibly caused by SSE2?). 94. Support offset_limit in JIT. +95. A sequence such as [[:punct:]b] that is, a POSIX character class followed +by a single ASCII character in a class item, was incorrectly compiled in UCP +mode. The POSIX class got lost, but only if the single character followed it. + Version 10.20 30-June-2015 -------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index c6e84ce..ff7bebd 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -1352,7 +1352,7 @@ if ((cb->external_flags & PCRE2_DUPCAPUSED) == 0 && /* A large and/or complex regex can take too long to process. We have to assume it can match an empty string. This can happen more often when (?| groups are -present in the pattern and the caching is disabled. Setting the cap at 1100 +present in the pattern and the caching is disabled. Setting the cap at 1100 allows the test for more than 1023 capturing patterns to work. */ if ((*countptr)++ > 1100) return CBE_TOOCOMPLICATED; @@ -4729,16 +4729,20 @@ for (;; ptr++) CLASS_SINGLE_CHARACTER: if (class_one_char < 2) class_one_char++; - /* If class_one_char is 1, we have the first single character in the - class, and there have been no prior ranges, or XCLASS items generated by - escapes. If this is the final character in the class, we can optimize by - turning the item into a 1-character OP_CHAR[I] if it's positive, or - OP_NOT[I] if it's negative. In the positive case, it can cause firstcu - to be set. Otherwise, there can be no first char if this item is first, - whatever repeat count may follow. In the case of reqcu, save the - previous value for reinstating. */ + /* If class_one_char is 1 and xclass_has_prop is false, we have the first + single character in the class, and there have been no prior ranges, or + XCLASS items generated by escapes. If this is the final character in the + class, we can optimize by turning the item into a 1-character OP_CHAR[I] + if it's positive, or OP_NOT[I] if it's negative. In the positive case, it + can cause firstcu to be set. Otherwise, there can be no first char if + this item is first, whatever repeat count may follow. In the case of + reqcu, save the previous value for reinstating. */ - if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) + if (!inescq && +#ifdef SUPPORT_UNICODE + !xclass_has_prop && +#endif + class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) { ptr++; zeroreqcu = reqcu; @@ -7287,7 +7291,7 @@ for (;; ptr++) else { - if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */ + if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */ if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && cb->max_lookbehind == 0) cb->max_lookbehind = 1; diff --git a/testdata/testinput2 b/testdata/testinput2 index a6cfd85..dfd1aa8 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4685,4 +4685,8 @@ a)"xI "(*ANYCRLF)(?m)^(.*[^0-9\r\n].*|)$"g,replace=NaN 15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20 +/a[[:punct:]b]/bincode + +/a[b[:punct:]]/bincode + # End of testinput2 diff --git a/testdata/testinput5 b/testdata/testinput5 index a288459..2432d19 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -1691,4 +1691,10 @@ /abc\Cdef/info,utf +/a[[:punct:]b]/ucp,bincode + +/a[[:punct:]b]/utf,ucp,bincode + +/a[b[:punct:]]/utf,ucp,bincode + # End of testinput5 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 9408756..e5350d5 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14888,4 +14888,22 @@ Subject length lower bound = 0 15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20 4: 15\x0d\x0aNaN\x0d\x0a20\x0d\x0aNaN\x0d\x0aNaN\x0d\x0aNaN\x0d\x0a20 +/a[[:punct:]b]/bincode +------------------------------------------------------------------ + Bra + a + [!-/:-@[-`b{-~] + Ket + End +------------------------------------------------------------------ + +/a[b[:punct:]]/bincode +------------------------------------------------------------------ + Bra + a + [!-/:-@[-`b{-~] + Ket + End +------------------------------------------------------------------ + # End of testinput2 diff --git a/testdata/testoutput5 b/testdata/testoutput5 index ef93100..7eb9df3 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -4070,4 +4070,31 @@ First code unit = 'a' Last code unit = 'f' Subject length lower bound = 0 +/a[[:punct:]b]/ucp,bincode +------------------------------------------------------------------ + Bra + a + [b[:punct:]] + Ket + End +------------------------------------------------------------------ + +/a[[:punct:]b]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + a + [b[:punct:]] + Ket + End +------------------------------------------------------------------ + +/a[b[:punct:]]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + a + [b[:punct:]] + Ket + End +------------------------------------------------------------------ + # End of testinput5 |