From aeca4ee3da4ae1629b91651a30e3cb1f1a1fdad1 Mon Sep 17 00:00:00 2001 From: zherczeg Date: Thu, 4 Feb 2016 17:13:10 +0000 Subject: JIT code generator for assertion matching is refactored to a separat function. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1623 2f5784b3-3f2a-0410-8824-cb99058d5e15 --- ChangeLog | 3 + pcre_jit_compile.c | 507 ++++++++++++++++++++++++++++------------------------- 2 files changed, 273 insertions(+), 237 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1b997a7..2c6315c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -44,6 +44,9 @@ Version 8.39 xx-xxxxxx-201x 10. pcre_get_substring_list() crashed if the use of \K in a match caused the start of the match to be earlier than the end. +11. JIT code generator for assertion matching is refactored to a separate + function. + Version 8.38 23-November-2015 ----------------------------- diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c index 445de0c..163705d 100644 --- a/pcre_jit_compile.c +++ b/pcre_jit_compile.c @@ -4895,7 +4895,6 @@ while (*cc != XCL_END) /* We are not necessary in utf mode even in 8 bit mode. */ cc = ccbegin; -detect_partial_match(common, backtracks); read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0); if ((cc[-1] & XCL_HASPROP) == 0) @@ -4903,7 +4902,7 @@ if ((cc[-1] & XCL_HASPROP) == 0) if ((cc[-1] & XCL_MAP) != 0) { jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); - if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found)) + if (!check_class_ranges(common, (const sljit_ub *)cc, (((const sljit_ub *)cc)[31] & 0x80) != 0, TRUE, &found)) { OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); @@ -5282,19 +5281,13 @@ if (found != NULL) #endif -static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks) +static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks) { DEFINE_COMPILER; int length; -unsigned int c, oc, bit; -compare_context context; struct sljit_jump *jump[4]; -jump_list *end_list; #ifdef SUPPORT_UTF struct sljit_label *label; -#ifdef SUPPORT_UCP -pcre_uchar propdata[5]; -#endif #endif /* SUPPORT_UTF */ switch(type) @@ -5317,10 +5310,218 @@ switch(type) add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO)); return cc; + case OP_EODN: + /* Requires rather complex checks. */ + jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (common->mode == JIT_COMPILE) + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); + else + { + jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL)); + check_partial(common, TRUE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump[1]); + } + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + } + else if (common->nltype == NLTYPE_FIXED) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); + } + else + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); + jump[2] = JUMP(SLJIT_GREATER); + add_jump(compiler, backtracks, JUMP(SLJIT_LESS)); + /* Equal. */ + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + + JUMPHERE(jump[1]); + if (common->nltype == NLTYPE_ANYCRLF) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); + } + else + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0); + read_char_range(common, common->nlmin, common->nlmax, TRUE); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); + } + JUMPHERE(jump[2]); + JUMPHERE(jump[3]); + } + JUMPHERE(jump[0]); + check_partial(common, FALSE); + return cc; + + case OP_EOD: + add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + check_partial(common, FALSE); + return cc; + + case OP_DOLL: + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + + if (!common->endonly) + compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks); + else + { + add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + check_partial(common, FALSE); + } + return cc; + + case OP_DOLLM: + jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + check_partial(common, FALSE); + jump[0] = JUMP(SLJIT_JUMP); + JUMPHERE(jump[1]); + + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (common->mode == JIT_COMPILE) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0)); + else + { + jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0); + /* STR_PTR = STR_END - IN_UCHARS(1) */ + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + check_partial(common, TRUE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump[1]); + } + + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + } + else + { + peek_char(common, common->nlmax); + check_newlinechar(common, common->nltype, backtracks, FALSE); + } + JUMPHERE(jump[0]); + return cc; + + case OP_CIRC: + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); + OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + return cc; + + case OP_CIRCM: + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0); + OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + jump[0] = JUMP(SLJIT_JUMP); + JUMPHERE(jump[1]); + + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + } + else + { + skip_char_back(common); + read_char_range(common, common->nlmin, common->nlmax, TRUE); + check_newlinechar(common, common->nltype, backtracks, FALSE); + } + JUMPHERE(jump[0]); + return cc; + + case OP_REVERSE: + length = GET(cc, 0); + if (length == 0) + return cc + LINK_SIZE; + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +#ifdef SUPPORT_UTF + if (common->utf) + { + OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length); + label = LABEL(); + add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0)); + skip_char_back(common); + OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + else +#endif + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0)); + } + check_start_used_ptr(common); + return cc + LINK_SIZE; + } +SLJIT_ASSERT_STOP(); +return cc; +} + +static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr) +{ +DEFINE_COMPILER; +int length; +unsigned int c, oc, bit; +compare_context context; +struct sljit_jump *jump[3]; +jump_list *end_list; +#ifdef SUPPORT_UTF +struct sljit_label *label; +#ifdef SUPPORT_UCP +pcre_uchar propdata[5]; +#endif +#endif /* SUPPORT_UTF */ + +switch(type) + { case OP_NOT_DIGIT: case OP_DIGIT: /* Digits are usually 0-9, so it is worth to optimize them. */ - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); #if defined SUPPORT_UTF && defined COMPILE_PCRE8 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE)) read_char7_type(common, type == OP_NOT_DIGIT); @@ -5334,7 +5535,8 @@ switch(type) case OP_NOT_WHITESPACE: case OP_WHITESPACE: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); #if defined SUPPORT_UTF && defined COMPILE_PCRE8 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE)) read_char7_type(common, type == OP_NOT_WHITESPACE); @@ -5347,7 +5549,8 @@ switch(type) case OP_NOT_WORDCHAR: case OP_WORDCHAR: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); #if defined SUPPORT_UTF && defined COMPILE_PCRE8 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE)) read_char7_type(common, type == OP_NOT_WORDCHAR); @@ -5359,7 +5562,8 @@ switch(type) return cc; case OP_ANY: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); read_char_range(common, common->nlmin, common->nlmax, TRUE); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { @@ -5380,7 +5584,8 @@ switch(type) return cc; case OP_ALLANY: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); #ifdef SUPPORT_UTF if (common->utf) { @@ -5408,7 +5613,8 @@ switch(type) return cc; case OP_ANYBYTE: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); return cc; @@ -5421,13 +5627,16 @@ switch(type) propdata[2] = cc[0]; propdata[3] = cc[1]; propdata[4] = XCL_END; + if (check_str_ptr) + detect_partial_match(common, backtracks); compile_xclass_matchingpath(common, propdata, backtracks); return cc + 2; #endif #endif case OP_ANYNL: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE); jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); /* We don't need to handle soft partial matching case. */ @@ -5449,7 +5658,8 @@ switch(type) case OP_NOT_HSPACE: case OP_HSPACE: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE); add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL)); add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); @@ -5457,7 +5667,8 @@ switch(type) case OP_NOT_VSPACE: case OP_VSPACE: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE); add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL)); add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); @@ -5465,7 +5676,8 @@ switch(type) #ifdef SUPPORT_UCP case OP_EXTUNI: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); read_char(common); add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); @@ -5502,166 +5714,6 @@ switch(type) return cc; #endif - case OP_EODN: - /* Requires rather complex checks. */ - jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - if (common->mode == JIT_COMPILE) - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); - else - { - jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL)); - check_partial(common, TRUE); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - JUMPHERE(jump[1]); - } - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); - } - else if (common->nltype == NLTYPE_FIXED) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); - } - else - { - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); - jump[2] = JUMP(SLJIT_GREATER); - add_jump(compiler, backtracks, JUMP(SLJIT_LESS)); - /* Equal. */ - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - - JUMPHERE(jump[1]); - if (common->nltype == NLTYPE_ANYCRLF) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); - } - else - { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0); - read_char_range(common, common->nlmin, common->nlmax, TRUE); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); - add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); - } - JUMPHERE(jump[2]); - JUMPHERE(jump[3]); - } - JUMPHERE(jump[0]); - check_partial(common, FALSE); - return cc; - - case OP_EOD: - add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); - check_partial(common, FALSE); - return cc; - - case OP_CIRC: - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); - add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - return cc; - - case OP_CIRCM: - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); - jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - jump[0] = JUMP(SLJIT_JUMP); - JUMPHERE(jump[1]); - - add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); - } - else - { - skip_char_back(common); - read_char_range(common, common->nlmin, common->nlmax, TRUE); - check_newlinechar(common, common->nltype, backtracks, FALSE); - } - JUMPHERE(jump[0]); - return cc; - - case OP_DOLL: - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - - if (!common->endonly) - compile_char1_matchingpath(common, OP_EODN, cc, backtracks); - else - { - add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); - check_partial(common, FALSE); - } - return cc; - - case OP_DOLLM: - jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - check_partial(common, FALSE); - jump[0] = JUMP(SLJIT_JUMP); - JUMPHERE(jump[1]); - - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - if (common->mode == JIT_COMPILE) - add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0)); - else - { - jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0); - /* STR_PTR = STR_END - IN_UCHARS(1) */ - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - check_partial(common, TRUE); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - JUMPHERE(jump[1]); - } - - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); - } - else - { - peek_char(common, common->nlmax); - check_newlinechar(common, common->nltype, backtracks, FALSE); - } - JUMPHERE(jump[0]); - return cc; - case OP_CHAR: case OP_CHARI: length = 1; @@ -5681,7 +5733,8 @@ switch(type) return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks); } - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); #ifdef SUPPORT_UTF if (common->utf) { @@ -5713,7 +5766,8 @@ switch(type) case OP_NOT: case OP_NOTI: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); length = 1; #ifdef SUPPORT_UTF if (common->utf) @@ -5774,16 +5828,17 @@ switch(type) case OP_CLASS: case OP_NCLASS: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); #if defined SUPPORT_UTF && defined COMPILE_PCRE8 - bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255; + bit = (common->utf && is_char7_bitset((const sljit_ub *)cc, type == OP_NCLASS)) ? 127 : 255; read_char_range(common, 0, bit, type == OP_NCLASS); #else read_char_range(common, 0, 255, type == OP_NCLASS); #endif - if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks)) + if (check_class_ranges(common, (const sljit_ub *)cc, type == OP_NCLASS, FALSE, backtracks)) return cc + 32 / sizeof(pcre_uchar); #if defined SUPPORT_UTF && defined COMPILE_PCRE8 @@ -5817,40 +5872,15 @@ switch(type) if (jump[0] != NULL) JUMPHERE(jump[0]); #endif - return cc + 32 / sizeof(pcre_uchar); #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 case OP_XCLASS: + if (check_str_ptr) + detect_partial_match(common, backtracks); compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks); return cc + GET(cc, 0) - 1; #endif - - case OP_REVERSE: - length = GET(cc, 0); - if (length == 0) - return cc + LINK_SIZE; - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -#ifdef SUPPORT_UTF - if (common->utf) - { - OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length); - label = LABEL(); - add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0)); - skip_char_back(common); - OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); - JUMPTO(SLJIT_NOT_ZERO, label); - } - else -#endif - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); - add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0)); - } - check_start_used_ptr(common); - return cc + LINK_SIZE; } SLJIT_ASSERT_STOP(); return cc; @@ -5919,7 +5949,7 @@ if (context.length > 0) } /* A non-fixed length character will be checked if length == 0. */ -return compile_char1_matchingpath(common, *cc, cc + 1, backtracks); +return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE); } /* Forward definitions. */ @@ -7985,7 +8015,7 @@ switch(opcode) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); if (opcode == OP_UPTO || opcode == OP_CRRANGE) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); @@ -8007,7 +8037,7 @@ switch(opcode) else { if (opcode == OP_PLUS) - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); if (private_data_ptr == 0) allocate_stack(common, 2); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); @@ -8016,7 +8046,7 @@ switch(opcode) else OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &nomatch); + compile_char1_matchingpath(common, type, cc, &nomatch, TRUE); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); if (opcode <= OP_PLUS) JUMPTO(SLJIT_JUMP, label); @@ -8043,7 +8073,7 @@ switch(opcode) case OP_MINSTAR: case OP_MINPLUS: if (opcode == OP_MINPLUS) - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); if (private_data_ptr == 0) allocate_stack(common, 1); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); @@ -8067,14 +8097,14 @@ switch(opcode) allocate_stack(common, 1); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); if (opcode == OP_QUERY) - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); break; case OP_EXACT: OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); JUMPTO(SLJIT_NOT_ZERO, label); break; @@ -8083,12 +8113,12 @@ switch(opcode) case OP_POSPLUS: case OP_POSUPTO: if (opcode == OP_POSPLUS) - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); if (opcode == OP_POSUPTO) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &nomatch); + compile_char1_matchingpath(common, type, cc, &nomatch, TRUE); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); if (opcode != OP_POSUPTO) JUMPTO(SLJIT_JUMP, label); @@ -8103,7 +8133,7 @@ switch(opcode) case OP_POSQUERY: OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); - compile_char1_matchingpath(common, type, cc, &nomatch); + compile_char1_matchingpath(common, type, cc, &nomatch, TRUE); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); set_jumps(nomatch, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); @@ -8113,7 +8143,7 @@ switch(opcode) /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */ OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); JUMPTO(SLJIT_NOT_ZERO, label); @@ -8124,7 +8154,7 @@ switch(opcode) } OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &nomatch); + compile_char1_matchingpath(common, type, cc, &nomatch, TRUE); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); if (max == 0) JUMPTO(SLJIT_JUMP, label); @@ -8298,6 +8328,16 @@ while (cc < ccend) case OP_SOM: case OP_NOT_WORD_BOUNDARY: case OP_WORD_BOUNDARY: + case OP_EODN: + case OP_EOD: + case OP_DOLL: + case OP_DOLLM: + case OP_CIRC: + case OP_CIRCM: + case OP_REVERSE: + cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + break; + case OP_NOT_DIGIT: case OP_DIGIT: case OP_NOT_WHITESPACE: @@ -8315,16 +8355,9 @@ while (cc < ccend) case OP_NOT_VSPACE: case OP_VSPACE: case OP_EXTUNI: - case OP_EODN: - case OP_EOD: - case OP_CIRC: - case OP_CIRCM: - case OP_DOLL: - case OP_DOLLM: case OP_NOT: case OP_NOTI: - case OP_REVERSE: - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); break; case OP_SET_SOM: @@ -8341,7 +8374,7 @@ while (cc < ccend) if (common->mode == JIT_COMPILE) cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); break; case OP_STAR: @@ -8417,7 +8450,7 @@ while (cc < ccend) if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE) cc = compile_iterator_matchingpath(common, cc, parent); else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); break; #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 @@ -8425,7 +8458,7 @@ while (cc < ccend) if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE) cc = compile_iterator_matchingpath(common, cc, parent); else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); break; #endif @@ -8658,7 +8691,7 @@ switch(opcode) case OP_MINSTAR: case OP_MINPLUS: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - compile_char1_matchingpath(common, type, cc, &jumplist); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); set_jumps(jumplist, LABEL()); @@ -8676,7 +8709,7 @@ switch(opcode) set_jumps(current->topbacktracks, label); } OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - compile_char1_matchingpath(common, type, cc, &jumplist); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); OP1(SLJIT_MOV, TMP1, 0, base, offset1); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); @@ -8714,7 +8747,7 @@ switch(opcode) OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); - compile_char1_matchingpath(common, type, cc, &jumplist); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); set_jumps(jumplist, LABEL()); JUMPHERE(jump); -- cgit v1.2.1