diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-08-20 11:07:53 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-08-20 11:07:53 +0000 |
commit | 86d4fce0384fc9825e0e2cf81d9a4ebef117d7ca (patch) | |
tree | 9edab2c802f3684009c8ffec976bc8e5ed49ac00 | |
parent | 2577186e689baff05e3842bb78f1fc9f88c3b498 (diff) | |
download | pcre-86d4fce0384fc9825e0e2cf81d9a4ebef117d7ca.tar.gz |
Fix loop for classes containing \p or \P and just one ascii character.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@223 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | configure.ac | 4 | ||||
-rw-r--r-- | pcre_compile.c | 19 | ||||
-rw-r--r-- | testdata/testinput10 | 16 | ||||
-rw-r--r-- | testdata/testinput6 | 8 | ||||
-rw-r--r-- | testdata/testoutput10 | 72 | ||||
-rw-r--r-- | testdata/testoutput6 | 8 |
7 files changed, 120 insertions, 12 deletions
@@ -1,7 +1,7 @@ ChangeLog for PCRE ------------------ -Version 7.3 17-Aug-07 +Version 7.3 20-Aug-07 --------------------- 1. In the rejigging of the build system that eventually resulted in 7.1, the @@ -145,6 +145,9 @@ Version 7.3 17-Aug-07 25. Using pcregrep in multiline, inverted mode (-Mv) caused it to loop. +26. Patterns such as [\P{Yi}A] which include \p or \P and just one other + character were causing crashes (broken optimization). + Version 7.2 19-Jun-07 --------------------- diff --git a/configure.ac b/configure.ac index cb534b4..2379045 100644 --- a/configure.ac +++ b/configure.ac @@ -8,8 +8,8 @@ dnl empty. m4_define(pcre_major, [7]) m4_define(pcre_minor, [3]) -m4_define(pcre_prerelease, [-RC7]) -m4_define(pcre_date, [2007-08-17]) +m4_define(pcre_prerelease, [-RC8]) +m4_define(pcre_date, [2007-08-20]) # Libtool shared library interface versions (current:revision:age) m4_define(libpcre_version, [0:1:0]) diff --git a/pcre_compile.c b/pcre_compile.c index 05f370a..a1370d0 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -3176,11 +3176,14 @@ for (;; ptr++) } /* If class_charcount is 1, we saw precisely one character whose value is - less than 256. In non-UTF-8 mode we can always optimize. In UTF-8 mode, we - can optimize the negative case only if there were no characters >= 128 - because OP_NOT and the related opcodes like OP_NOTSTAR operate on - single-bytes only. This is an historical hangover. Maybe one day we can - tidy these opcodes to handle multi-byte characters. + less than 256. As long as there were no characters >= 128 and there was no + use of \p or \P, in other words, no use of any XCLASS features, we can + optimize. + + In UTF-8 mode, we can optimize the negative case only if there were no + characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR + operate on single-bytes only. This is an historical hangover. Maybe one day + we can tidy these opcodes to handle multi-byte characters. The optimization throws away the bit map. We turn the item into a 1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note @@ -3190,10 +3193,8 @@ for (;; ptr++) reqbyte, save the previous value for reinstating. */ #ifdef SUPPORT_UTF8 - if (class_charcount == 1 && - (!utf8 || - (!class_utf8 && (!negate_class || class_lastchar < 128)))) - + if (class_charcount == 1 && !class_utf8 && + (!utf8 || !negate_class || class_lastchar < 128)) #else if (class_charcount == 1) #endif diff --git a/testdata/testinput10 b/testdata/testinput10 index 28fef9d..726a389 100644 --- a/testdata/testinput10 +++ b/testdata/testinput10 @@ -105,4 +105,20 @@ are all themselves checked in other tests. --/ /( (?(1)0|)* )/xBM +/[a]/BM + +/[a]/8BM + +/[\xaa]/BM + +/[\xaa]/8BM + +/[^a]/BM + +/[^a]/8BM + +/[^\xaa]/BM + +/[^\xaa]/8BM + / End of testinput10 / diff --git a/testdata/testinput6 b/testdata/testinput6 index 400b14f..14b0645 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -806,4 +806,12 @@ was broken in all cases./ /(\P{Yi}{2}\277)?/ +/[\P{Yi}A]/ + +/[\P{Yi}\P{Yi}\P{Yi}A]/ + +/[^\P{Yi}A]/ + +/[^\P{Yi}\P{Yi}\P{Yi}A]/ + / End of testinput6 / diff --git a/testdata/testoutput10 b/testdata/testoutput10 index ffc84fa..dbd5924 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -594,4 +594,76 @@ Memory allocation (code space): 30 29 End ------------------------------------------------------------------ +/[a]/BM +Memory allocation (code space): 9 +------------------------------------------------------------------ + 0 5 Bra + 3 a + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[a]/8BM +Memory allocation (code space): 9 +------------------------------------------------------------------ + 0 5 Bra + 3 a + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[\xaa]/BM +Memory allocation (code space): 9 +------------------------------------------------------------------ + 0 5 Bra + 3 \xaa + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[\xaa]/8BM +Memory allocation (code space): 10 +------------------------------------------------------------------ + 0 6 Bra + 3 \x{aa} + 6 6 Ket + 9 End +------------------------------------------------------------------ + +/[^a]/BM +Memory allocation (code space): 9 +------------------------------------------------------------------ + 0 5 Bra + 3 [^a] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^a]/8BM +Memory allocation (code space): 9 +------------------------------------------------------------------ + 0 5 Bra + 3 [^a] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^\xaa]/BM +Memory allocation (code space): 9 +------------------------------------------------------------------ + 0 5 Bra + 3 [^\xaa] + 5 5 Ket + 8 End +------------------------------------------------------------------ + +/[^\xaa]/8BM +Memory allocation (code space): 40 +------------------------------------------------------------------ + 0 36 Bra + 3 [\x00-\xa9\xab-\xff] (neg) + 36 36 Ket + 39 End +------------------------------------------------------------------ + / End of testinput10 / diff --git a/testdata/testoutput6 b/testdata/testoutput6 index 2fc2db0..a39cf0e 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -1496,4 +1496,12 @@ No match /(\P{Yi}{2}\277)?/ +/[\P{Yi}A]/ + +/[\P{Yi}\P{Yi}\P{Yi}A]/ + +/[^\P{Yi}A]/ + +/[^\P{Yi}\P{Yi}\P{Yi}A]/ + / End of testinput6 / |