diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2013-12-22 20:47:08 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2013-12-22 20:47:08 +0000 |
commit | c07887b22a83fba842e88889c9e57a622b9ee439 (patch) | |
tree | 47153d0468ee797e5840dccdffdc3b934a51676c /pcre_compile.c | |
parent | f928c7adccd8daa61e76c22130d79689ec41f21c (diff) | |
download | pcre-c07887b22a83fba842e88889c9e57a622b9ee439.tar.gz |
The auto-possessification of character sets were improved. The JIT compiler also optimizes more character set checks.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1415 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_compile.c')
-rw-r--r-- | pcre_compile.c | 57 |
1 files changed, 36 insertions, 21 deletions
diff --git a/pcre_compile.c b/pcre_compile.c index 88b9636..d7e21d9 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -3070,8 +3070,11 @@ const pcre_uint32 *chr_ptr; const pcre_uint32 *ochr_ptr; const pcre_uint32 *list_ptr; const pcre_uchar *next_code; +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 +const pcre_uchar *xclass_flags; +#endif const pcre_uint8 *class_bitset; -const pcre_uint32 *set1, *set2, *set_end; +const pcre_uint8 *set1, *set2, *set_end; pcre_uint32 chr; BOOL accepted, invert_bits; @@ -3202,12 +3205,12 @@ for(;;) if (base_list[0] == OP_CLASS) #endif { - set1 = (pcre_uint32 *)(base_end - base_list[2]); + set1 = (pcre_uint8 *)(base_end - base_list[2]); list_ptr = list; } else { - set1 = (pcre_uint32 *)(code - list[2]); + set1 = (pcre_uint8 *)(code - list[2]); list_ptr = base_list; } @@ -3216,41 +3219,53 @@ for(;;) { case OP_CLASS: case OP_NCLASS: - set2 = (pcre_uint32 *) + set2 = (pcre_uint8 *) ((list_ptr == list ? code : base_end) - list_ptr[2]); break; - /* OP_XCLASS cannot be supported here, because its bitset - is not necessarily complete. E.g: [a-\0x{200}] is stored - as a character range, and the appropriate bits are not set. */ +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 + case OP_XCLASS: + xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE; + if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE; + if ((*xclass_flags & XCL_MAP) == 0) + { + /* No bits are set for characters < 256. */ + if (list[1] == 0) return TRUE; + /* Might be an empty repeat. */ + continue; + } + set2 = (pcre_uint8 *)(xclass_flags + 1); + break; +#endif case OP_NOT_DIGIT: - invert_bits = TRUE; - /* Fall through */ + invert_bits = TRUE; + /* Fall through */ case OP_DIGIT: - set2 = (pcre_uint32 *)(cd->cbits + cbit_digit); - break; + set2 = (pcre_uint8 *)(cd->cbits + cbit_digit); + break; case OP_NOT_WHITESPACE: - invert_bits = TRUE; - /* Fall through */ + invert_bits = TRUE; + /* Fall through */ case OP_WHITESPACE: - set2 = (pcre_uint32 *)(cd->cbits + cbit_space); - break; + set2 = (pcre_uint8 *)(cd->cbits + cbit_space); + break; case OP_NOT_WORDCHAR: - invert_bits = TRUE; - /* Fall through */ + invert_bits = TRUE; + /* Fall through */ case OP_WORDCHAR: - set2 = (pcre_uint32 *)(cd->cbits + cbit_word); - break; + set2 = (pcre_uint8 *)(cd->cbits + cbit_word); + break; default: return FALSE; } - /* Compare 4 bytes to improve speed. */ - set_end = set1 + (32 / 4); + /* Because the sets are unaligned, we need + to perform byte comparison here. */ + set_end = set1 + 32; if (invert_bits) { do |