diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-08-20 11:07:53 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-08-20 11:07:53 +0000 |
commit | 86d4fce0384fc9825e0e2cf81d9a4ebef117d7ca (patch) | |
tree | 9edab2c802f3684009c8ffec976bc8e5ed49ac00 /pcre_compile.c | |
parent | 2577186e689baff05e3842bb78f1fc9f88c3b498 (diff) | |
download | pcre-86d4fce0384fc9825e0e2cf81d9a4ebef117d7ca.tar.gz |
Fix loop for classes containing \p or \P and just one ascii character.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@223 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_compile.c')
-rw-r--r-- | pcre_compile.c | 19 |
1 files changed, 10 insertions, 9 deletions
diff --git a/pcre_compile.c b/pcre_compile.c index 05f370a..a1370d0 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -3176,11 +3176,14 @@ for (;; ptr++) } /* If class_charcount is 1, we saw precisely one character whose value is - less than 256. In non-UTF-8 mode we can always optimize. In UTF-8 mode, we - can optimize the negative case only if there were no characters >= 128 - because OP_NOT and the related opcodes like OP_NOTSTAR operate on - single-bytes only. This is an historical hangover. Maybe one day we can - tidy these opcodes to handle multi-byte characters. + less than 256. As long as there were no characters >= 128 and there was no + use of \p or \P, in other words, no use of any XCLASS features, we can + optimize. + + In UTF-8 mode, we can optimize the negative case only if there were no + characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR + operate on single-bytes only. This is an historical hangover. Maybe one day + we can tidy these opcodes to handle multi-byte characters. The optimization throws away the bit map. We turn the item into a 1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note @@ -3190,10 +3193,8 @@ for (;; ptr++) reqbyte, save the previous value for reinstating. */ #ifdef SUPPORT_UTF8 - if (class_charcount == 1 && - (!utf8 || - (!class_utf8 && (!negate_class || class_lastchar < 128)))) - + if (class_charcount == 1 && !class_utf8 && + (!utf8 || !negate_class || class_lastchar < 128)) #else if (class_charcount == 1) #endif |