summaryrefslogtreecommitdiff
path: root/src/pcre2_jit_compile.c
diff options
context:
space:
mode:
authorzherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069>2019-06-18 08:29:43 +0000
committerzherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069>2019-06-18 08:29:43 +0000
commitf46681e3f1c36417fb9fe4a6b4b474aabaf8c2ac (patch)
tree5adc481c75f77435ab352624b47e463549851da7 /src/pcre2_jit_compile.c
parent42a19af33b46e32374f99578a2e4eb454aece59c (diff)
downloadpcre2-f46681e3f1c36417fb9fe4a6b4b474aabaf8c2ac.tar.gz
Improve first character search in JIT (BSF instruction is slow).
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1109 6239d852-aaf2-0410-a92c-79f79f948069
Diffstat (limited to 'src/pcre2_jit_compile.c')
-rw-r--r--src/pcre2_jit_compile.c110
1 files changed, 46 insertions, 64 deletions
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index fd3b198..1a28dea 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -5627,8 +5627,8 @@ struct sljit_label *restart;
struct sljit_jump *quit;
struct sljit_jump *partial_quit[2];
sljit_u8 instruction[8];
-sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
-sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
sljit_s32 data_ind = 0;
sljit_s32 tmp_ind = 1;
sljit_s32 cmp1_ind = 2;
@@ -5652,13 +5652,13 @@ if (common->mode == PCRE2_JIT_COMPLETE)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
-SLJIT_ASSERT(tmp1_ind < 8);
+SLJIT_ASSERT(tmp1_reg_ind < 8);
/* MOVD xmm, r/m32 */
instruction[0] = 0x66;
instruction[1] = 0x0f;
instruction[2] = 0x6e;
-instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_ind;
+instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind;
sljit_emit_op_custom(compiler, instruction, 4);
if (char1 != char2)
@@ -5666,7 +5666,7 @@ if (char1 != char2)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
/* MOVD xmm, r/m32 */
- instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_ind;
+ instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind;
sljit_emit_op_custom(compiler, instruction, 4);
}
@@ -5676,14 +5676,14 @@ OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x70;
-instruction[3] = 0xc0 | (cmp1_ind << 3) | 2;
+instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind;
instruction[4] = 0;
sljit_emit_op_custom(compiler, instruction, 5);
if (char1 != char2)
{
/* PSHUFD xmm1, xmm2/m128, imm8 */
- instruction[3] = 0xc0 | (cmp2_ind << 3) | 3;
+ instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind;
sljit_emit_op_custom(compiler, instruction, 5);
}
@@ -5693,59 +5693,52 @@ restart = LABEL();
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
-load_from_mem_sse2(compiler, data_ind, str_ptr_ind);
+load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind);
fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
/* PMOVMSKB reg, xmm */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0xd7;
-instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
+instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
sljit_emit_op_custom(compiler, instruction, 4);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
-/* BSF r32, r/m32 */
-instruction[0] = 0x0f;
-instruction[1] = 0xbc;
-instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
-sljit_emit_op_custom(compiler, instruction, 3);
-sljit_set_current_flags(compiler, SLJIT_SET_Z);
-
-quit = JUMP(SLJIT_NOT_ZERO);
+quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+/* Second part (aligned) */
start = LABEL();
+
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (common->mode == PCRE2_JIT_COMPLETE)
add_jump(compiler, &common->failed_match, partial_quit[1]);
-/* Second part (aligned) */
-
-load_from_mem_sse2(compiler, 0, str_ptr_ind);
+load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind);
fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
/* PMOVMSKB reg, xmm */
-instruction[0] = 0x66;
-instruction[1] = 0x0f;
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
instruction[2] = 0xd7;
-instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
+instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
sljit_emit_op_custom(compiler, instruction, 4);
+CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
+
+JUMPHERE(quit);
+
/* BSF r32, r/m32 */
instruction[0] = 0x0f;
instruction[1] = 0xbc;
-instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
+instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
sljit_emit_op_custom(compiler, instruction, 3);
-sljit_set_current_flags(compiler, SLJIT_SET_Z);
-JUMPTO(SLJIT_ZERO, start);
-
-JUMPHERE(quit);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
if (common->mode != PCRE2_JIT_COMPLETE)
@@ -5799,9 +5792,9 @@ DEFINE_COMPILER;
sljit_u32 bit1 = 0;
sljit_u32 bit2 = 0;
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
-sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
-sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
-sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
+sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
sljit_s32 data1_ind = 0;
sljit_s32 data2_ind = 1;
sljit_s32 tmp_ind = 2;
@@ -5819,7 +5812,7 @@ sljit_u8 instruction[8];
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_sse2_offset()));
-SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
+SLJIT_ASSERT(tmp1_reg_ind < 8 && tmp2_reg_ind == 1);
/* Initialize. */
if (common->match_end_ptr != 0)
@@ -5858,12 +5851,12 @@ else
}
}
-instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_ind;
+instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_reg_ind;
sljit_emit_op_custom(compiler, instruction, 4);
if (char1a != char1b)
{
- instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_ind;
+ instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_reg_ind;
sljit_emit_op_custom(compiler, instruction, 4);
}
@@ -5885,12 +5878,12 @@ else
}
}
-instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_ind;
+instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_reg_ind;
sljit_emit_op_custom(compiler, instruction, 4);
if (char2a != char2b)
{
- instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_ind;
+ instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_reg_ind;
sljit_emit_op_custom(compiler, instruction, 4);
}
@@ -5927,11 +5920,11 @@ OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, ~0xf);
-load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);
+load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind);
jump[0] = CMP(SLJIT_EQUAL, STR_PTR, 0, TMP1, 0);
-load_from_mem_sse2(compiler, data2_ind, tmp1_ind);
+load_from_mem_sse2(compiler, data2_ind, tmp1_reg_ind);
/* MOVDQA xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */
@@ -5940,7 +5933,7 @@ instruction[2] = 0x6f;
instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind;
sljit_emit_op_custom(compiler, instruction, 4);
-/* PSLLDQ xmm1, xmm2/m128, imm8 */
+/* PSLLDQ xmm1, imm8 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x73;
@@ -5948,7 +5941,7 @@ instruction[3] = 0xc0 | (7 << 3) | tmp_ind;
instruction[4] = diff;
sljit_emit_op_custom(compiler, instruction, 5);
-/* PSRLDQ xmm1, xmm2/m128, imm8 */
+/* PSRLDQ xmm1, imm8 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
/* instruction[2] = 0x73; */
@@ -5974,7 +5967,7 @@ instruction[2] = 0x6f;
instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
sljit_emit_op_custom(compiler, instruction, 4);
-/* PSLLDQ xmm1, xmm2/m128, imm8 */
+/* PSLLDQ xmm1, imm8 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x73;
@@ -6000,38 +5993,28 @@ sljit_emit_op_custom(compiler, instruction, 4);
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0xd7;
-instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
+instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0;
sljit_emit_op_custom(compiler, instruction, 4);
/* Ignore matches before the first STR_PTR. */
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
-/* BSF r32, r/m32 */
-instruction[0] = 0x0f;
-instruction[1] = 0xbc;
-instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
-sljit_emit_op_custom(compiler, instruction, 3);
-sljit_set_current_flags(compiler, SLJIT_SET_Z);
-
-jump[0] = JUMP(SLJIT_NOT_ZERO);
+jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
/* Main loop. */
-instruction[0] = 0x66;
-instruction[1] = 0x0f;
-
start = LABEL();
-load_from_mem_sse2(compiler, data2_ind, str_ptr_ind);
+load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
-load_from_mem_sse2(compiler, data1_ind, str_ptr_ind);
+load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind);
-/* PSRLDQ xmm1, xmm2/m128, imm8 */
+/* PSRLDQ xmm1, imm8 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x73;
@@ -6046,7 +6029,7 @@ instruction[2] = 0x6f;
instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind;
sljit_emit_op_custom(compiler, instruction, 4);
-/* PSLLDQ xmm1, xmm2/m128, imm8 */
+/* PSLLDQ xmm1, imm8 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x73;
@@ -6075,19 +6058,18 @@ sljit_emit_op_custom(compiler, instruction, 4);
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0xd7;
-instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
+instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0;
sljit_emit_op_custom(compiler, instruction, 4);
+CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
+
+JUMPHERE(jump[0]);
+
/* BSF r32, r/m32 */
instruction[0] = 0x0f;
instruction[1] = 0xbc;
-instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
+instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
sljit_emit_op_custom(compiler, instruction, 3);
-sljit_set_current_flags(compiler, SLJIT_SET_Z);
-
-JUMPTO(SLJIT_ZERO, start);
-
-JUMPHERE(jump[0]);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);