summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-08-20 11:07:53 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-08-20 11:07:53 +0000
commit86d4fce0384fc9825e0e2cf81d9a4ebef117d7ca (patch)
tree9edab2c802f3684009c8ffec976bc8e5ed49ac00
parent2577186e689baff05e3842bb78f1fc9f88c3b498 (diff)
downloadpcre-86d4fce0384fc9825e0e2cf81d9a4ebef117d7ca.tar.gz
Fix loop for classes containing \p or \P and just one ascii character.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@223 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog5
-rw-r--r--configure.ac4
-rw-r--r--pcre_compile.c19
-rw-r--r--testdata/testinput1016
-rw-r--r--testdata/testinput68
-rw-r--r--testdata/testoutput1072
-rw-r--r--testdata/testoutput68
7 files changed, 120 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 031ec34..94b478f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,7 @@
ChangeLog for PCRE
------------------
-Version 7.3 17-Aug-07
+Version 7.3 20-Aug-07
---------------------
1. In the rejigging of the build system that eventually resulted in 7.1, the
@@ -145,6 +145,9 @@ Version 7.3 17-Aug-07
25. Using pcregrep in multiline, inverted mode (-Mv) caused it to loop.
+26. Patterns such as [\P{Yi}A] which include \p or \P and just one other
+ character were causing crashes (broken optimization).
+
Version 7.2 19-Jun-07
---------------------
diff --git a/configure.ac b/configure.ac
index cb534b4..2379045 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,8 +8,8 @@ dnl empty.
m4_define(pcre_major, [7])
m4_define(pcre_minor, [3])
-m4_define(pcre_prerelease, [-RC7])
-m4_define(pcre_date, [2007-08-17])
+m4_define(pcre_prerelease, [-RC8])
+m4_define(pcre_date, [2007-08-20])
# Libtool shared library interface versions (current:revision:age)
m4_define(libpcre_version, [0:1:0])
diff --git a/pcre_compile.c b/pcre_compile.c
index 05f370a..a1370d0 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -3176,11 +3176,14 @@ for (;; ptr++)
}
/* If class_charcount is 1, we saw precisely one character whose value is
- less than 256. In non-UTF-8 mode we can always optimize. In UTF-8 mode, we
- can optimize the negative case only if there were no characters >= 128
- because OP_NOT and the related opcodes like OP_NOTSTAR operate on
- single-bytes only. This is an historical hangover. Maybe one day we can
- tidy these opcodes to handle multi-byte characters.
+ less than 256. As long as there were no characters >= 128 and there was no
+ use of \p or \P, in other words, no use of any XCLASS features, we can
+ optimize.
+
+ In UTF-8 mode, we can optimize the negative case only if there were no
+ characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR
+ operate on single-bytes only. This is an historical hangover. Maybe one day
+ we can tidy these opcodes to handle multi-byte characters.
The optimization throws away the bit map. We turn the item into a
1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note
@@ -3190,10 +3193,8 @@ for (;; ptr++)
reqbyte, save the previous value for reinstating. */
#ifdef SUPPORT_UTF8
- if (class_charcount == 1 &&
- (!utf8 ||
- (!class_utf8 && (!negate_class || class_lastchar < 128))))
-
+ if (class_charcount == 1 && !class_utf8 &&
+ (!utf8 || !negate_class || class_lastchar < 128))
#else
if (class_charcount == 1)
#endif
diff --git a/testdata/testinput10 b/testdata/testinput10
index 28fef9d..726a389 100644
--- a/testdata/testinput10
+++ b/testdata/testinput10
@@ -105,4 +105,20 @@ are all themselves checked in other tests. --/
/( (?(1)0|)* )/xBM
+/[a]/BM
+
+/[a]/8BM
+
+/[\xaa]/BM
+
+/[\xaa]/8BM
+
+/[^a]/BM
+
+/[^a]/8BM
+
+/[^\xaa]/BM
+
+/[^\xaa]/8BM
+
/ End of testinput10 /
diff --git a/testdata/testinput6 b/testdata/testinput6
index 400b14f..14b0645 100644
--- a/testdata/testinput6
+++ b/testdata/testinput6
@@ -806,4 +806,12 @@ was broken in all cases./
/(\P{Yi}{2}\277)?/
+/[\P{Yi}A]/
+
+/[\P{Yi}\P{Yi}\P{Yi}A]/
+
+/[^\P{Yi}A]/
+
+/[^\P{Yi}\P{Yi}\P{Yi}A]/
+
/ End of testinput6 /
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index ffc84fa..dbd5924 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -594,4 +594,76 @@ Memory allocation (code space): 30
29 End
------------------------------------------------------------------
+/[a]/BM
+Memory allocation (code space): 9
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 a
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[a]/8BM
+Memory allocation (code space): 9
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 a
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[\xaa]/BM
+Memory allocation (code space): 9
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 \xaa
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[\xaa]/8BM
+Memory allocation (code space): 10
+------------------------------------------------------------------
+ 0 6 Bra
+ 3 \x{aa}
+ 6 6 Ket
+ 9 End
+------------------------------------------------------------------
+
+/[^a]/BM
+Memory allocation (code space): 9
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 [^a]
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[^a]/8BM
+Memory allocation (code space): 9
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 [^a]
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[^\xaa]/BM
+Memory allocation (code space): 9
+------------------------------------------------------------------
+ 0 5 Bra
+ 3 [^\xaa]
+ 5 5 Ket
+ 8 End
+------------------------------------------------------------------
+
+/[^\xaa]/8BM
+Memory allocation (code space): 40
+------------------------------------------------------------------
+ 0 36 Bra
+ 3 [\x00-\xa9\xab-\xff] (neg)
+ 36 36 Ket
+ 39 End
+------------------------------------------------------------------
+
/ End of testinput10 /
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index 2fc2db0..a39cf0e 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -1496,4 +1496,12 @@ No match
/(\P{Yi}{2}\277)?/
+/[\P{Yi}A]/
+
+/[\P{Yi}\P{Yi}\P{Yi}A]/
+
+/[^\P{Yi}A]/
+
+/[^\P{Yi}\P{Yi}\P{Yi}A]/
+
/ End of testinput6 /