diff options
author | zherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069> | 2019-08-26 12:02:03 +0000 |
---|---|---|
committer | zherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069> | 2019-08-26 12:02:03 +0000 |
commit | 65a57a788c17f1126b8c152e43248f3d02fb595b (patch) | |
tree | 71e7797152ff754e58873b089dbb37ced8a93bbe /src/pcre2_jit_compile.c | |
parent | ec098f6b898334be0674dbadc9fd67a0532fa0eb (diff) | |
download | pcre2-65a57a788c17f1126b8c152e43248f3d02fb595b.tar.gz |
Move JIT simd into a separate header file.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1158 6239d852-aaf2-0410-a92c-79f79f948069
Diffstat (limited to 'src/pcre2_jit_compile.c')
-rw-r--r-- | src/pcre2_jit_compile.c | 605 |
1 files changed, 10 insertions, 595 deletions
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index ebc1779..8cbd8f9 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -5519,590 +5519,11 @@ CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label); } #endif -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) +#include "pcre2_jit_simd_inc.h" -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg) -{ -#if PCRE2_CODE_UNIT_WIDTH == 8 -OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0); -return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80); -#elif PCRE2_CODE_UNIT_WIDTH == 16 -OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00); -return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00); -#else -#error "Unknown code width" -#endif -} -#endif - -static sljit_s32 character_to_int32(PCRE2_UCHAR chr) -{ -sljit_u32 value = chr; -#if PCRE2_CODE_UNIT_WIDTH == 8 -#define SSE2_COMPARE_TYPE_INDEX 0 -return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value); -#elif PCRE2_CODE_UNIT_WIDTH == 16 -#define SSE2_COMPARE_TYPE_INDEX 1 -return (sljit_s32)((value << 16) | value); -#elif PCRE2_CODE_UNIT_WIDTH == 32 -#define SSE2_COMPARE_TYPE_INDEX 2 -return (sljit_s32)(value); -#else -#error "Unsupported unit width" -#endif -} - -static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg, sljit_s8 offset) -{ -sljit_u8 instruction[5]; - -SLJIT_ASSERT(dst_xmm_reg < 8); -SLJIT_ASSERT(src_general_reg < 8); - -/* MOVDQA xmm1, xmm2/m128 */ -instruction[0] = ((sljit_u8)offset & 0xf) == 0 ? 0x66 : 0xf3; -instruction[1] = 0x0f; -instruction[2] = 0x6f; - -if (offset == 0) - { - instruction[3] = (dst_xmm_reg << 3) | src_general_reg; - sljit_emit_op_custom(compiler, instruction, 4); - return; - } - -instruction[3] = 0x40 | (dst_xmm_reg << 3) | src_general_reg; -instruction[4] = (sljit_u8)offset; -sljit_emit_op_custom(compiler, instruction, 5); -} - -typedef enum { - sse2_compare_match1, - sse2_compare_match1i, - sse2_compare_match2, -} sse2_compare_type; - -static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, sse2_compare_type compare_type, - int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) -{ -sljit_u8 instruction[4]; -instruction[0] = 0x66; -instruction[1] = 0x0f; - -SLJIT_ASSERT(step >= 0 && step <= 3); +#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD -if (compare_type != sse2_compare_match2) - { - if (step == 0) - { - if (compare_type == sse2_compare_match1i) - { - /* POR xmm1, xmm2/m128 */ - /* instruction[0] = 0x66; */ - /* instruction[1] = 0x0f; */ - instruction[2] = 0xeb; - instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind; - sljit_emit_op_custom(compiler, instruction, 4); - } - return; - } - - if (step != 2) - return; - - /* PCMPEQB/W/D xmm1, xmm2/m128 */ - /* instruction[0] = 0x66; */ - /* instruction[1] = 0x0f; */ - instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; - instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; - sljit_emit_op_custom(compiler, instruction, 4); - return; - } - -switch (step) - { - case 0: - /* MOVDQA xmm1, xmm2/m128 */ - /* instruction[0] = 0x66; */ - /* instruction[1] = 0x0f; */ - instruction[2] = 0x6f; - instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind; - sljit_emit_op_custom(compiler, instruction, 4); - return; - - case 1: - /* PCMPEQB/W/D xmm1, xmm2/m128 */ - /* instruction[0] = 0x66; */ - /* instruction[1] = 0x0f; */ - instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; - instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; - sljit_emit_op_custom(compiler, instruction, 4); - return; - - case 2: - /* PCMPEQB/W/D xmm1, xmm2/m128 */ - /* instruction[0] = 0x66; */ - /* instruction[1] = 0x0f; */ - instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; - instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind; - sljit_emit_op_custom(compiler, instruction, 4); - return; - - case 3: - /* POR xmm1, xmm2/m128 */ - /* instruction[0] = 0x66; */ - /* instruction[1] = 0x0f; */ - instruction[2] = 0xeb; - instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind; - sljit_emit_op_custom(compiler, instruction, 4); - return; - } -} - -static void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) -{ -DEFINE_COMPILER; -struct sljit_label *start; -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -struct sljit_label *restart; -#endif -struct sljit_jump *quit; -struct sljit_jump *partial_quit[2]; -sse2_compare_type compare_type = sse2_compare_match1; -sljit_u8 instruction[8]; -sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); -sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); -sljit_s32 data_ind = 0; -sljit_s32 tmp_ind = 1; -sljit_s32 cmp1_ind = 2; -sljit_s32 cmp2_ind = 3; -sljit_u32 bit = 0; -int i; - -SLJIT_UNUSED_ARG(offset); - -if (char1 != char2) - { - bit = char1 ^ char2; - compare_type = sse2_compare_match1i; - - if (!is_powerof2(bit)) - { - bit = 0; - compare_type = sse2_compare_match2; - } - } - -partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -if (common->mode == PCRE2_JIT_COMPLETE) - add_jump(compiler, &common->failed_match, partial_quit[0]); - -/* First part (unaligned start) */ - -OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); - -SLJIT_ASSERT(tmp1_reg_ind < 8); - -/* MOVD xmm, r/m32 */ -instruction[0] = 0x66; -instruction[1] = 0x0f; -instruction[2] = 0x6e; -instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -if (char1 != char2) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); - - /* MOVD xmm, r/m32 */ - instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind; - sljit_emit_op_custom(compiler, instruction, 4); - } - -OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); - -/* PSHUFD xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x70; -instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind; -instruction[4] = 0; -sljit_emit_op_custom(compiler, instruction, 5); - -if (char1 != char2) - { - /* PSHUFD xmm1, xmm2/m128, imm8 */ - instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind; - sljit_emit_op_custom(compiler, instruction, 5); - } - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -restart = LABEL(); -#endif -OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); - -load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); -for (i = 0; i < 4; i++) - fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); - -/* PMOVMSKB reg, xmm */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xd7; -instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); -OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); - -quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); - -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); - -/* Second part (aligned) */ -start = LABEL(); - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); - -partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -if (common->mode == PCRE2_JIT_COMPLETE) - add_jump(compiler, &common->failed_match, partial_quit[1]); - -load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); -for (i = 0; i < 4; i++) - fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); - -/* PMOVMSKB reg, xmm */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xd7; -instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); - -JUMPHERE(quit); - -/* BSF r32, r/m32 */ -instruction[0] = 0x0f; -instruction[1] = 0xbc; -instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; -sljit_emit_op_custom(compiler, instruction, 3); - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - -if (common->mode != PCRE2_JIT_COMPLETE) - { - JUMPHERE(partial_quit[0]); - JUMPHERE(partial_quit[1]); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); - CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); - } -else - add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -if (common->utf && offset > 0) - { - SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); - - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); - - quit = jump_if_utf_char_start(compiler, TMP1); - - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); - JUMPTO(SLJIT_JUMP, restart); - - JUMPHERE(quit); - } -#endif -} - -#ifndef _WIN64 - -static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_sse2_offset(void) -{ -#if PCRE2_CODE_UNIT_WIDTH == 8 -return 15; -#elif PCRE2_CODE_UNIT_WIDTH == 16 -return 7; -#elif PCRE2_CODE_UNIT_WIDTH == 32 -return 3; -#else -#error "Unsupported unit width" -#endif -} - -static void fast_forward_char_pair_sse2(compiler_common *common, sljit_s32 offs1, - PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) -{ -DEFINE_COMPILER; -sse2_compare_type compare1_type = sse2_compare_match1; -sse2_compare_type compare2_type = sse2_compare_match1; -sljit_u32 bit1 = 0; -sljit_u32 bit2 = 0; -sljit_u32 diff = IN_UCHARS(offs1 - offs2); -sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); -sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2); -sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); -sljit_s32 data1_ind = 0; -sljit_s32 data2_ind = 1; -sljit_s32 tmp1_ind = 2; -sljit_s32 tmp2_ind = 3; -sljit_s32 cmp1a_ind = 4; -sljit_s32 cmp1b_ind = 5; -sljit_s32 cmp2a_ind = 6; -sljit_s32 cmp2b_ind = 7; -struct sljit_label *start; -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -struct sljit_label *restart; -#endif -struct sljit_jump *jump[2]; -sljit_u8 instruction[8]; -int i; - -SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); -SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_sse2_offset())); -SLJIT_ASSERT(tmp1_reg_ind < 8 && tmp2_reg_ind == 1); - -/* Initialize. */ -if (common->match_end_ptr != 0) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); - OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); - - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); - CMOV(SLJIT_LESS, STR_END, TMP1, 0); - } - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); -add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - -/* MOVD xmm, r/m32 */ -instruction[0] = 0x66; -instruction[1] = 0x0f; -instruction[2] = 0x6e; - -if (char1a == char1b) - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); -else - { - bit1 = char1a ^ char1b; - if (is_powerof2(bit1)) - { - compare1_type = sse2_compare_match1i; - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1)); - } - else - { - compare1_type = sse2_compare_match2; - bit1 = 0; - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b)); - } - } - -instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_reg_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -if (char1a != char1b) - { - instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_reg_ind; - sljit_emit_op_custom(compiler, instruction, 4); - } - -if (char2a == char2b) - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); -else - { - bit2 = char2a ^ char2b; - if (is_powerof2(bit2)) - { - compare2_type = sse2_compare_match1i; - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2)); - } - else - { - compare2_type = sse2_compare_match2; - bit2 = 0; - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b)); - } - } - -instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_reg_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -if (char2a != char2b) - { - instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_reg_ind; - sljit_emit_op_custom(compiler, instruction, 4); - } - -/* PSHUFD xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x70; -instruction[4] = 0; - -instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind; -sljit_emit_op_custom(compiler, instruction, 5); - -if (char1a != char1b) - { - instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind; - sljit_emit_op_custom(compiler, instruction, 5); - } - -instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind; -sljit_emit_op_custom(compiler, instruction, 5); - -if (char2a != char2b) - { - instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind; - sljit_emit_op_custom(compiler, instruction, 5); - } - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -restart = LABEL(); -#endif - -OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff); -OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); -OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); - -load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0); - -jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0); - -load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff); -jump[1] = JUMP(SLJIT_JUMP); - -JUMPHERE(jump[0]); - -/* MOVDQA xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x6f; -instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -/* PSLLDQ xmm1, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x73; -instruction[3] = 0xc0 | (7 << 3) | data2_ind; -instruction[4] = diff; -sljit_emit_op_custom(compiler, instruction, 5); - -JUMPHERE(jump[1]); - -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); - -for (i = 0; i < 4; i++) - { - fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); - fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); - } - -/* PAND xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xdb; -instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -/* PMOVMSKB reg, xmm */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xd7; -instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0; -sljit_emit_op_custom(compiler, instruction, 4); - -/* Ignore matches before the first STR_PTR. */ -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); -OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); - -jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); - -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); - -/* Main loop. */ -start = LABEL(); - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); -add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - -load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0); -load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff); - -for (i = 0; i < 4; i++) - { - fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind); - fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind); - } - -/* PAND xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xdb; -instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -/* PMOVMSKB reg, xmm */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xd7; -instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0; -sljit_emit_op_custom(compiler, instruction, 4); - -CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); - -JUMPHERE(jump[0]); - -/* BSF r32, r/m32 */ -instruction[0] = 0x0f; -instruction[1] = 0xbc; -instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; -sljit_emit_op_custom(compiler, instruction, 3); - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - -add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - -if (common->match_end_ptr != 0) - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -if (common->utf) - { - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); - - jump[0] = jump_if_utf_char_start(compiler, TMP1); - - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart); - - add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP)); - - JUMPHERE(jump[0]); - } -#endif - -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); - -if (common->match_end_ptr != 0) - OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); -} - -static BOOL check_fast_forward_char_pair_sse2(compiler_common *common, fast_forward_char_data *chars, int max) +static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max) { sljit_s32 i, j, max_i = 0, max_j = 0; sljit_u32 max_pri = 0; @@ -6143,15 +5564,11 @@ static BOOL check_fast_forward_char_pair_sse2(compiler_common *common, fast_forw if (max_pri == 0) return FALSE; -fast_forward_char_pair_sse2(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]); +fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]); return TRUE; } -#endif - -#undef SSE2_COMPARE_TYPE_INDEX - -#endif +#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */ static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) { @@ -6179,13 +5596,11 @@ if (has_match_end) CMOV(SLJIT_GREATER, STR_END, TMP1, 0); } -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) - -/* SSE2 accelerated first character search. */ +#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD -if (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) +if (JIT_HAS_FAST_FORWARD_CHAR_SIMD) { - fast_forward_first_char2_sse2(common, char1, char2, offset); + fast_forward_char_simd(common, char1, char2, offset); if (offset > 0) OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); @@ -6292,8 +5707,8 @@ for (i = 0; i < max; i++) chars[i].last_count = (chars[i].count == 255) ? 0 : 1; } -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) && !(defined _WIN64) -if (sljit_has_cpu_feature(SLJIT_HAS_SSE2) && check_fast_forward_char_pair_sse2(common, chars, max)) +#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD +if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max)) return TRUE; #endif |