diff options
author | zherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069> | 2020-02-27 08:35:14 +0000 |
---|---|---|
committer | zherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069> | 2020-02-27 08:35:14 +0000 |
commit | 571ad09506eb7a13338056a44139073e7f29ad1a (patch) | |
tree | 845bfba6ccfca1cc9e751180d3a7cd969e3ce888 | |
parent | 4ea71d47b6608e27759ebc39359c980d788db68a (diff) | |
download | pcre2-571ad09506eb7a13338056a44139073e7f29ad1a.tar.gz |
Support more accelerated repeat cases in JIT.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1232 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r-- | src/pcre2_jit_compile.c | 158 |
1 files changed, 116 insertions, 42 deletions
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index 2de5538..838b7d8 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -1270,6 +1270,7 @@ switch(*cc) cc += (1 + (32 / sizeof(PCRE2_UCHAR))); #endif + /* Only these types are supported. */ switch(*cc) { case OP_CRSTAR: @@ -1315,8 +1316,10 @@ while (TRUE) break; end = cc + GET(cc, 1); + /* Iterated brackets are skipped. */ if (*end != OP_KET || PRIVATE_DATA(end) != 0) return FALSE; + if (*cc == OP_CBRA) { if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) @@ -1336,67 +1339,138 @@ if (is_accelerated_repeat(cc)) return FALSE; } -static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth) +static void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth) { - PCRE2_SPTR next_alt; +PCRE2_SPTR next_alt; +PCRE2_SPTR end; - SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA); +SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA); +SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0); - if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) - return; +do + { + next_alt = cc + GET(cc, 1); - next_alt = bracketend(cc) - (1 + LINK_SIZE); - if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0) - return; + cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0); - do + while (TRUE) { - next_alt = cc + GET(cc, 1); + switch(*cc) + { + case OP_SOD: + case OP_SOM: + case OP_SET_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ALLANY: + case OP_ANYBYTE: + case OP_ANYNL: + case OP_NOT_HSPACE: + case OP_HSPACE: + case OP_NOT_VSPACE: + case OP_VSPACE: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + cc++; + continue; + + case OP_NOTPROP: + case OP_PROP: + cc += 1 + 2; + continue; + + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + continue; - cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0); + case OP_CLASS: + case OP_NCLASS: +#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 + case OP_XCLASS: + end = cc + ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR)))); +#else + end = cc + (1 + (32 / sizeof(PCRE2_UCHAR))); +#endif - while (TRUE) - { - switch(*cc) + if (*end >= OP_CRSTAR && *end <= OP_CRPOSRANGE) + break; + + cc = end; + continue; + + case OP_BRA: + case OP_CBRA: + end = cc + GET(cc, 1); + + if (*end == OP_KET && PRIVATE_DATA(end) == 0) { - case OP_SOD: - case OP_SOM: - case OP_SET_SOM: - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - case OP_EODN: - case OP_EOD: - case OP_CIRC: - case OP_CIRCM: - case OP_DOLL: - case OP_DOLLM: - /* Zero width assertions. */ - cc++; + if (*cc == OP_CBRA) + { + if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + break; + cc += IMM2_SIZE; + } + + cc += 1 + LINK_SIZE; continue; } - break; - } - if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA)) - detect_fast_fail(common, cc, private_data_start, depth - 1); + if (depth == 0) + break; - if (is_accelerated_repeat(cc)) - { - common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start; + end = bracketend(cc) - (1 + LINK_SIZE); + if (*end != OP_KET || PRIVATE_DATA(end) != 0) + break; - if (common->fast_fail_start_ptr == 0) - common->fast_fail_start_ptr = *private_data_start; + if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + break; - *private_data_start += sizeof(sljit_sw); - common->fast_fail_end_ptr = *private_data_start; + detect_fast_fail(common, cc, private_data_start, depth - 1); + break; - if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) - return; + case OP_KET: + SLJIT_ASSERT(PRIVATE_DATA(cc) == 0); + if (cc >= next_alt) + break; + cc += 1 + LINK_SIZE; + continue; } + break; + } + + if (is_accelerated_repeat(cc)) + { + common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start; - cc = next_alt; + if (common->fast_fail_start_ptr == 0) + common->fast_fail_start_ptr = *private_data_start; + + *private_data_start += sizeof(sljit_sw); + common->fast_fail_end_ptr = *private_data_start; + + if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) + return; } - while (*cc == OP_ALT); + cc = next_alt; + } +while (*cc == OP_ALT); } static int get_class_iterator_size(PCRE2_SPTR cc) |