diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-11-04 06:11:18 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-11-04 06:11:18 +0000 |
commit | 1d5761adcea0c5fb8c71f9c98b8d60c42ce2c9b1 (patch) | |
tree | 8e9803cd51b3c0d211e174d1dddb87d3f8aa1bc5 /sljit | |
parent | fc3be1f1b44baf703b36aed18cadfc3833908ef3 (diff) | |
download | pcre-1d5761adcea0c5fb8c71f9c98b8d60c42ce2c9b1.tar.gz |
JIT compiler update.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1201 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'sljit')
-rw-r--r-- | sljit/sljitLir.c | 11 | ||||
-rw-r--r-- | sljit/sljitNativeX86_32.c | 162 | ||||
-rw-r--r-- | sljit/sljitNativeX86_64.c | 354 | ||||
-rw-r--r-- | sljit/sljitNativeX86_common.c | 1396 |
4 files changed, 1064 insertions, 859 deletions
diff --git a/sljit/sljitLir.c b/sljit/sljitLir.c index e01f0a1..e71e49f 100644 --- a/sljit/sljitLir.c +++ b/sljit/sljitLir.c @@ -198,7 +198,7 @@ #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 #ifdef _WIN64 -#define FIXED_LOCALS_OFFSET (4 * sizeof(sljit_sw)) +#define FIXED_LOCALS_OFFSET ((4 + 2) * sizeof(sljit_sw)) #else #define FIXED_LOCALS_OFFSET (sizeof(sljit_sw)) #endif @@ -1115,7 +1115,7 @@ static SLJIT_INLINE void check_sljit_emit_cmp(struct sljit_compiler *compiler, s SLJIT_UNUSED_ARG(src2); SLJIT_UNUSED_ARG(src2w); - SLJIT_ASSERT(!(type & ~(0xff | SLJIT_INT_OP | SLJIT_REWRITABLE_JUMP))); + SLJIT_ASSERT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_INT_OP))); SLJIT_ASSERT((type & 0xff) >= SLJIT_C_EQUAL && (type & 0xff) <= SLJIT_C_SIG_LESS_EQUAL); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) FUNCTION_CHECK_SRC(src1, src1w); @@ -1170,6 +1170,13 @@ static SLJIT_INLINE void check_sljit_emit_ijump(struct sljit_compiler *compiler, SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + return; + } +#endif + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) FUNCTION_CHECK_SRC(src, srcw); diff --git a/sljit/sljitNativeX86_32.c b/sljit/sljitNativeX86_32.c index 55184ff..2c8a8b0 100644 --- a/sljit/sljitNativeX86_32.c +++ b/sljit/sljitNativeX86_32.c @@ -28,28 +28,28 @@ static sljit_si emit_do_imm(struct sljit_compiler *compiler, sljit_ub opcode, sljit_sw imm) { - sljit_ub *buf; + sljit_ub *inst; - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw)); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw)); + FAIL_IF(!inst); INC_SIZE(1 + sizeof(sljit_sw)); - *buf++ = opcode; - *(sljit_sw*)buf = imm; + *inst++ = opcode; + *(sljit_sw*)inst = imm; return SLJIT_SUCCESS; } static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type) { if (type == SLJIT_JUMP) { - *code_ptr++ = 0xe9; + *code_ptr++ = JMP_i32; jump->addr++; } else if (type >= SLJIT_FAST_CALL) { - *code_ptr++ = 0xe8; + *code_ptr++ = CALL_i32; jump->addr++; } else { - *code_ptr++ = 0x0f; + *code_ptr++ = GROUP_0F; *code_ptr++ = get_jump_code(type); jump->addr += 2; } @@ -67,7 +67,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil { sljit_si size; sljit_si locals_offset; - sljit_ub *buf; + sljit_ub *inst; CHECK_ERROR(); check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size); @@ -85,15 +85,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil #else size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (2 + args * 3) : 0); #endif - buf = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); INC_SIZE(size); PUSH_REG(reg_map[TMP_REGISTER]); #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (args > 0) { - *buf++ = 0x8b; - *buf++ = 0xc4 | (reg_map[TMP_REGISTER] << 3); + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[TMP_REGISTER] << 3) | 0x4 /* esp */; } #endif if (saveds > 2) @@ -105,34 +105,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (args > 0) { - *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[SLJIT_TEMPORARY_REG3]; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[SLJIT_TEMPORARY_REG3]; } if (args > 1) { - *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[SLJIT_TEMPORARY_REG2]; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[SLJIT_TEMPORARY_REG2]; } if (args > 2) { - *buf++ = 0x8b; - *buf++ = 0x44 | (reg_map[SLJIT_SAVED_REG3] << 3); - *buf++ = 0x24; - *buf++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ + *inst++ = MOV_r_rm; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x4 /* esp */; + *inst++ = 0x24; + *inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ } #else if (args > 0) { - *buf++ = 0x8b; - *buf++ = 0x40 | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[TMP_REGISTER]; - *buf++ = sizeof(sljit_sw) * 2; + *inst++ = MOV_r_rm; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[TMP_REGISTER]; + *inst++ = sizeof(sljit_sw) * 2; } if (args > 1) { - *buf++ = 0x8b; - *buf++ = 0x40 | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[TMP_REGISTER]; - *buf++ = sizeof(sljit_sw) * 3; + *inst++ = MOV_r_rm; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[TMP_REGISTER]; + *inst++ = sizeof(sljit_sw) * 3; } if (args > 2) { - *buf++ = 0x8b; - *buf++ = 0x40 | (reg_map[SLJIT_SAVED_REG3] << 3) | reg_map[TMP_REGISTER]; - *buf++ = sizeof(sljit_sw) * 4; + *inst++ = MOV_r_rm; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | reg_map[TMP_REGISTER]; + *inst++ = sizeof(sljit_sw) * 4; } #endif @@ -148,14 +148,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil #ifdef _WIN32 if (local_size > 1024) { - FAIL_IF(emit_do_imm(compiler, 0xb8 + reg_map[SLJIT_TEMPORARY_REG1], local_size)); + FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_TEMPORARY_REG1], local_size)); FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); } #endif compiler->local_size = local_size; SLJIT_ASSERT(local_size > 0); - return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d, + return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size); return SLJIT_SUCCESS; @@ -189,7 +189,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { sljit_si size; - sljit_ub *buf; + sljit_ub *inst; CHECK_ERROR(); check_sljit_emit_return(compiler, op, src, srcw); @@ -199,7 +199,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); SLJIT_ASSERT(compiler->local_size > 0); - FAIL_IF(emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05, + FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); size = 2 + (compiler->saveds <= 3 ? compiler->saveds : 3); @@ -210,8 +210,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi if (compiler->args > 0) size += 2; #endif - buf = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); INC_SIZE(size); @@ -224,12 +224,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi POP_REG(reg_map[TMP_REGISTER]); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (compiler->args > 2) - RETN(sizeof(sljit_sw)); + RET_I16(sizeof(sljit_sw)); else RET(); #else if (compiler->args > 0) - RETN(compiler->args * sizeof(sljit_sw)); + RET_I16(compiler->args * sizeof(sljit_sw)); else RET(); #endif @@ -248,7 +248,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si /* The general operand (not immediate). */ sljit_si b, sljit_sw immb) { - sljit_ub *buf; + sljit_ub *inst; sljit_ub *buf_ptr; sljit_si flags = size & ~0xf; sljit_si inst_size; @@ -322,26 +322,26 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si else SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); - buf = (sljit_ub*)ensure_buf(compiler, 1 + inst_size); - PTR_FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!inst); /* Encoding the byte. */ INC_SIZE(inst_size); #if (defined SLJIT_SSE2 && SLJIT_SSE2) if (flags & EX86_PREF_F2) - *buf++ = 0xf2; + *inst++ = 0xf2; if (flags & EX86_PREF_F3) - *buf++ = 0xf3; + *inst++ = 0xf3; #endif if (flags & EX86_PREF_66) - *buf++ = 0x66; + *inst++ = 0x66; - buf_ptr = buf + size; + buf_ptr = inst + size; /* Encode mod/rm byte. */ if (!(flags & EX86_SHIFT_INS)) { if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) - *buf = (flags & EX86_BYTE_ARG) ? 0x83 : 0x81; + *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; if ((a & SLJIT_IMM) || (a == 0)) *buf_ptr = 0; @@ -358,19 +358,19 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si else { if (a & SLJIT_IMM) { if (imma == 1) - *buf = 0xd1; + *inst = GROUP_SHIFT_1; else - *buf = 0xc1; + *inst = GROUP_SHIFT_N; } else - *buf = 0xd3; + *inst = GROUP_SHIFT_CL; *buf_ptr = 0; } if (!(b & SLJIT_MEM)) #if (defined SLJIT_SSE2 && SLJIT_SSE2) - *buf_ptr++ |= 0xc0 + ((!(flags & EX86_SSE2)) ? reg_map[b] : b); + *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_map[b] : b); #else - *buf_ptr++ |= 0xc0 + reg_map[b]; + *buf_ptr++ |= MOD_REG + reg_map[b]; #endif else if ((b & 0x0f) != SLJIT_UNUSED) { if ((b & 0xf0) == SLJIT_UNUSED || (b & 0xf0) == (SLJIT_LOCALS_REG << 4)) { @@ -417,7 +417,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si *(sljit_sw*)buf_ptr = imma; } - return !(flags & EX86_SHIFT_INS) ? buf : (buf + 1); + return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); } /* --------------------------------------------------------------------- */ @@ -426,20 +426,20 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type) { - sljit_ub *buf; + sljit_ub *inst; #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - buf = (sljit_ub*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2); + FAIL_IF(!inst); INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2); if (type >= SLJIT_CALL3) PUSH_REG(reg_map[SLJIT_TEMPORARY_REG3]); - *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_map[SLJIT_TEMPORARY_REG3] << 3) | reg_map[SLJIT_TEMPORARY_REG1]; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_TEMPORARY_REG3] << 3) | reg_map[SLJIT_TEMPORARY_REG1]; #else - buf = (sljit_ub*)ensure_buf(compiler, type - SLJIT_CALL0 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, type - SLJIT_CALL0 + 1); + FAIL_IF(!inst); INC_SIZE(type - SLJIT_CALL0); if (type >= SLJIT_CALL3) PUSH_REG(reg_map[SLJIT_TEMPORARY_REG3]); @@ -452,7 +452,7 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) { - sljit_ub *buf; + sljit_ub *inst; CHECK_ERROR(); check_sljit_emit_fast_enter(compiler, dst, dstw); @@ -461,23 +461,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c CHECK_EXTRA_REGS(dst, dstw, (void)0); if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); INC_SIZE(1); POP_REG(reg_map[dst]); return SLJIT_SUCCESS; } else if (dst & SLJIT_MEM) { - buf = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); - FAIL_IF(!buf); - *buf++ = 0x8f; + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = POP_rm; return SLJIT_SUCCESS; } /* For UNUSED dst. Uncommon, but possible. */ - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); INC_SIZE(1); POP_REG(reg_map[TMP_REGISTER]); @@ -486,7 +486,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) { - sljit_ub *buf; + sljit_ub *inst; CHECK_ERROR(); check_sljit_emit_fast_return(compiler, src, srcw); @@ -495,31 +495,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * CHECK_EXTRA_REGS(src, srcw, (void)0); if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1); + FAIL_IF(!inst); INC_SIZE(1 + 1); PUSH_REG(reg_map[src]); } else if (src & SLJIT_MEM) { - buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); - FAIL_IF(!buf); - *buf++ = 0xff; - *buf |= 6 << 3; + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_FF; + *inst |= PUSH_rm; - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); INC_SIZE(1); } else { /* SLJIT_IMM. */ - buf = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1); + FAIL_IF(!inst); INC_SIZE(5 + 1); - *buf++ = 0x68; - *(sljit_sw*)buf = srcw; - buf += sizeof(sljit_sw); + *inst++ = PUSH_i32; + *(sljit_sw*)inst = srcw; + inst += sizeof(sljit_sw); } RET(); diff --git a/sljit/sljitNativeX86_64.c b/sljit/sljitNativeX86_64.c index 456a1e3..658a361 100644 --- a/sljit/sljitNativeX86_64.c +++ b/sljit/sljitNativeX86_64.c @@ -28,27 +28,28 @@ static sljit_si emit_load_imm64(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm) { - sljit_ub *buf; + sljit_ub *inst; - buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw)); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw)); + FAIL_IF(!inst); INC_SIZE(2 + sizeof(sljit_sw)); - *buf++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); - *buf++ = 0xb8 + (reg_map[reg] & 0x7); - *(sljit_sw*)buf = imm; + *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); + *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7); + *(sljit_sw*)inst = imm; return SLJIT_SUCCESS; } static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type) { if (type < SLJIT_JUMP) { + /* Invert type. */ *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10; *code_ptr++ = 10 + 3; } SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first); *code_ptr++ = REX_W | REX_B; - *code_ptr++ = 0xb8 + 1; + *code_ptr++ = MOV_r_i32 + 1; jump->addr = (sljit_uw)code_ptr; if (jump->flags & JUMP_LABEL) @@ -58,8 +59,8 @@ static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ code_ptr += sizeof(sljit_sw); *code_ptr++ = REX_B; - *code_ptr++ = 0xff; - *code_ptr++ = (type >= SLJIT_FAST_CALL) ? 0xd1 /* call */ : 0xe1 /* jmp */; + *code_ptr++ = GROUP_FF; + *code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1); return code_ptr; } @@ -69,18 +70,18 @@ static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si sljit_sw delta = addr - ((sljit_sw)code_ptr + 1 + sizeof(sljit_si)); if (delta <= SLJIT_W(0x7fffffff) && delta >= SLJIT_W(-0x80000000)) { - *code_ptr++ = (type == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */; + *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32; *(sljit_sw*)code_ptr = delta; } else { SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second); *code_ptr++ = REX_W | REX_B; - *code_ptr++ = 0xb8 + 1; + *code_ptr++ = MOV_r_i32 + 1; *(sljit_sw*)code_ptr = addr; code_ptr += sizeof(sljit_sw); *code_ptr++ = REX_B; - *code_ptr++ = 0xff; - *code_ptr++ = (type == 2) ? 0xd1 /* call */ : 0xe1 /* jmp */; + *code_ptr++ = GROUP_FF; + *code_ptr++ = (type == 2) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1); } return code_ptr; @@ -89,7 +90,7 @@ static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si temporaries, sljit_si saveds, sljit_si local_size) { sljit_si size, pushed_size; - sljit_ub *buf; + sljit_ub *inst; CHECK_ERROR(); check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size); @@ -117,24 +118,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil #endif size += args * 3; if (size > 0) { - buf = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); INC_SIZE(size); if (saveds >= 5) { SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG2] >= 8, saved_ereg2_is_hireg); - *buf++ = REX_B; + *inst++ = REX_B; PUSH_REG(reg_lmap[SLJIT_SAVED_EREG2]); } if (saveds >= 4) { SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG1] >= 8, saved_ereg1_is_hireg); - *buf++ = REX_B; + *inst++ = REX_B; PUSH_REG(reg_lmap[SLJIT_SAVED_EREG1]); } if (saveds >= 3) { #ifndef _WIN64 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] >= 8, saved_reg3_is_hireg); - *buf++ = REX_B; + *inst++ = REX_B; #else SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] < 8, saved_reg3_is_loreg); #endif @@ -143,7 +144,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil if (saveds >= 2) { #ifndef _WIN64 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] >= 8, saved_reg2_is_hireg); - *buf++ = REX_B; + *inst++ = REX_B; #else SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] < 8, saved_reg2_is_loreg); #endif @@ -156,42 +157,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil #ifdef _WIN64 if (temporaries >= 5) { SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_EREG2] >= 8, temporary_ereg2_is_hireg); - *buf++ = REX_B; + *inst++ = REX_B; PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]); } #endif #ifndef _WIN64 if (args > 0) { - *buf++ = REX_W; - *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7; + *inst++ = REX_W; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7 /* rdi */; } if (args > 1) { - *buf++ = REX_W | REX_R; - *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6; + *inst++ = REX_W | REX_R; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6 /* rsi */; } if (args > 2) { - *buf++ = REX_W | REX_R; - *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2; + *inst++ = REX_W | REX_R; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2 /* rdx */; } #else if (args > 0) { - *buf++ = REX_W; - *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1; + *inst++ = REX_W; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1 /* rcx */; } if (args > 1) { - *buf++ = REX_W; - *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2; + *inst++ = REX_W; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2 /* rdx */; } if (args > 2) { - *buf++ = REX_W | REX_B; - *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0; + *inst++ = REX_W | REX_B; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0 /* r8 */; } #endif } @@ -201,45 +202,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil #ifdef _WIN64 if (local_size > 1024) { /* Allocate stack for the callback, which grows the stack. */ - buf = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!buf); - INC_SIZE(4); - *buf++ = REX_W; - *buf++ = 0x83; - *buf++ = 0xc0 | (5 << 3) | 4; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_si))); + FAIL_IF(!inst); + INC_SIZE(4 + (3 + sizeof(sljit_si))); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_83; + *inst++ = MOD_REG | SUB | 4; /* Pushed size must be divisible by 8. */ SLJIT_ASSERT(!(pushed_size & 0x7)); if (pushed_size & 0x8) { - *buf++ = 5 * sizeof(sljit_sw); + *inst++ = 5 * sizeof(sljit_sw); local_size -= 5 * sizeof(sljit_sw); } else { - *buf++ = 4 * sizeof(sljit_sw); + *inst++ = 4 * sizeof(sljit_sw); local_size -= 4 * sizeof(sljit_sw); } - FAIL_IF(emit_load_imm64(compiler, SLJIT_TEMPORARY_REG1, local_size)); + /* Second instruction */ + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_REG1] < 8, temporary_reg1_is_loreg); + *inst++ = REX_W; + *inst++ = MOV_rm_i32; + *inst++ = MOD_REG | reg_lmap[SLJIT_TEMPORARY_REG1]; + *(sljit_si*)inst = local_size; +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->skip_checks = 1; +#endif FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); } #endif SLJIT_ASSERT(local_size > 0); if (local_size <= 127) { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); INC_SIZE(4); - *buf++ = REX_W; - *buf++ = 0x83; - *buf++ = 0xc0 | (5 << 3) | 4; - *buf++ = local_size; + *inst++ = REX_W; + *inst++ = GROUP_BINARY_83; + *inst++ = MOD_REG | SUB | 4; + *inst++ = local_size; } else { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 7); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 7); + FAIL_IF(!inst); INC_SIZE(7); - *buf++ = REX_W; - *buf++ = 0x81; - *buf++ = 0xc0 | (5 << 3) | 4; - *(sljit_si*)buf = local_size; - buf += sizeof(sljit_si); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_81; + *inst++ = MOD_REG | SUB | 4; + *(sljit_si*)inst = local_size; + inst += sizeof(sljit_si); } +#ifdef _WIN64 + /* Save xmm6 with MOVAPS instruction. */ + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + *inst++ = GROUP_0F; + *(sljit_si*)inst = 0x20247429; +#endif return SLJIT_SUCCESS; } @@ -269,7 +286,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { sljit_si size; - sljit_ub *buf; + sljit_ub *inst; CHECK_ERROR(); check_sljit_emit_return(compiler, op, src, srcw); @@ -277,24 +294,32 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi compiler->flags_saved = 0; FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); +#ifdef _WIN64 + /* Restore xmm6 with MOVAPS instruction. */ + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + *inst++ = GROUP_0F; + *(sljit_si*)inst = 0x20247428; +#endif SLJIT_ASSERT(compiler->local_size > 0); if (compiler->local_size <= 127) { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); INC_SIZE(4); - *buf++ = REX_W; - *buf++ = 0x83; - *buf++ = 0xc0 | (0 << 3) | 4; - *buf = compiler->local_size; + *inst++ = REX_W; + *inst++ = GROUP_BINARY_83; + *inst++ = MOD_REG | ADD | 4; + *inst = compiler->local_size; } else { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 7); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 7); + FAIL_IF(!inst); INC_SIZE(7); - *buf++ = REX_W; - *buf++ = 0x81; - *buf++ = 0xc0 | (0 << 3) | 4; - *(sljit_si*)buf = compiler->local_size; + *inst++ = REX_W; + *inst++ = GROUP_BINARY_81; + *inst++ = MOD_REG | ADD | 4; + *(sljit_si*)inst = compiler->local_size; } size = 1 + compiler->saveds; @@ -307,14 +332,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi if (compiler->temporaries >= 5) size += (5 - 4) * 2; #endif - buf = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); INC_SIZE(size); #ifdef _WIN64 if (compiler->temporaries >= 5) { - *buf++ = REX_B; + *inst++ = REX_B; POP_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]); } #endif @@ -322,22 +347,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi POP_REG(reg_map[SLJIT_SAVED_REG1]); if (compiler->saveds >= 2) { #ifndef _WIN64 - *buf++ = REX_B; + *inst++ = REX_B; #endif POP_REG(reg_lmap[SLJIT_SAVED_REG2]); } if (compiler->saveds >= 3) { #ifndef _WIN64 - *buf++ = REX_B; + *inst++ = REX_B; #endif POP_REG(reg_lmap[SLJIT_SAVED_REG3]); } if (compiler->saveds >= 4) { - *buf++ = REX_B; + *inst++ = REX_B; POP_REG(reg_lmap[SLJIT_SAVED_EREG1]); } if (compiler->saveds >= 5) { - *buf++ = REX_B; + *inst++ = REX_B; POP_REG(reg_lmap[SLJIT_SAVED_EREG2]); } @@ -351,23 +376,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi static sljit_si emit_do_imm32(struct sljit_compiler *compiler, sljit_ub rex, sljit_ub opcode, sljit_sw imm) { - sljit_ub *buf; + sljit_ub *inst; + sljit_si length = 1 + (rex ? 1 : 0) + sizeof(sljit_si); - if (rex != 0) { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_si)); - FAIL_IF(!buf); - INC_SIZE(2 + sizeof(sljit_si)); - *buf++ = rex; - *buf++ = opcode; - *(sljit_si*)buf = imm; - } - else { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_si)); - FAIL_IF(!buf); - INC_SIZE(1 + sizeof(sljit_si)); - *buf++ = opcode; - *(sljit_si*)buf = imm; - } + inst = (sljit_ub*)ensure_buf(compiler, 1 + length); + FAIL_IF(!inst); + INC_SIZE(length); + if (rex) + *inst++ = rex; + *inst++ = opcode; + *(sljit_si*)inst = imm; return SLJIT_SUCCESS; } @@ -377,7 +395,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si /* The general operand (not immediate). */ sljit_si b, sljit_sw immb) { - sljit_ub *buf; + sljit_ub *inst; sljit_ub *buf_ptr; sljit_ub rex = 0; sljit_si flags = size & ~0xf; @@ -494,27 +512,27 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si if (rex) inst_size++; - buf = (sljit_ub*)ensure_buf(compiler, 1 + inst_size); - PTR_FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!inst); /* Encoding the byte. */ INC_SIZE(inst_size); #if (defined SLJIT_SSE2 && SLJIT_SSE2) if (flags & EX86_PREF_F2) - *buf++ = 0xf2; + *inst++ = 0xf2; if (flags & EX86_PREF_F3) - *buf++ = 0xf3; + *inst++ = 0xf3; #endif if (flags & EX86_PREF_66) - *buf++ = 0x66; + *inst++ = 0x66; if (rex) - *buf++ = rex; - buf_ptr = buf + size; + *inst++ = rex; + buf_ptr = inst + size; /* Encode mod/rm byte. */ if (!(flags & EX86_SHIFT_INS)) { if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) - *buf = (flags & EX86_BYTE_ARG) ? 0x83 : 0x81; + *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; if ((a & SLJIT_IMM) || (a == 0)) *buf_ptr = 0; @@ -531,19 +549,19 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si else { if (a & SLJIT_IMM) { if (imma == 1) - *buf = 0xd1; + *inst = GROUP_SHIFT_1; else - *buf = 0xc1; + *inst = GROUP_SHIFT_N; } else - *buf = 0xd3; + *inst = GROUP_SHIFT_CL; *buf_ptr = 0; } if (!(b & SLJIT_MEM)) #if (defined SLJIT_SSE2 && SLJIT_SSE2) - *buf_ptr++ |= 0xc0 + ((!(flags & EX86_SSE2)) ? reg_lmap[b] : b); + *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_lmap[b] : b); #else - *buf_ptr++ |= 0xc0 + reg_lmap[b]; + *buf_ptr++ |= MOD_REG + reg_lmap[b]; #endif else if ((b & 0x0f) != SLJIT_UNUSED) { if ((b & 0xf0) == SLJIT_UNUSED || (b & 0xf0) == (SLJIT_LOCALS_REG << 4)) { @@ -591,7 +609,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si *(sljit_si*)buf_ptr = imma; } - return !(flags & EX86_SHIFT_INS) ? buf : (buf + 1); + return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); } /* --------------------------------------------------------------------- */ @@ -600,43 +618,43 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type) { - sljit_ub *buf; + sljit_ub *inst; #ifndef _WIN64 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_REG2] == 6 && reg_map[SLJIT_TEMPORARY_REG1] < 8 && reg_map[SLJIT_TEMPORARY_REG3] < 8, args_registers); - buf = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); + FAIL_IF(!inst); INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); if (type >= SLJIT_CALL3) { - *buf++ = REX_W; - *buf++ = 0x8b; - *buf++ = 0xc0 | (0x2 << 3) | reg_lmap[SLJIT_TEMPORARY_REG3]; + *inst++ = REX_W; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_TEMPORARY_REG3]; } - *buf++ = REX_W; - *buf++ = 0x8b; - *buf++ = 0xc0 | (0x7 << 3) | reg_lmap[SLJIT_TEMPORARY_REG1]; + *inst++ = REX_W; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_TEMPORARY_REG1]; #else SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_REG2] == 2 && reg_map[SLJIT_TEMPORARY_REG1] < 8 && reg_map[SLJIT_TEMPORARY_REG3] < 8, args_registers); - buf = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); + FAIL_IF(!inst); INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); if (type >= SLJIT_CALL3) { - *buf++ = REX_W | REX_R; - *buf++ = 0x8b; - *buf++ = 0xc0 | (0x0 << 3) | reg_lmap[SLJIT_TEMPORARY_REG3]; + *inst++ = REX_W | REX_R; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_TEMPORARY_REG3]; } - *buf++ = REX_W; - *buf++ = 0x8b; - *buf++ = 0xc0 | (0x1 << 3) | reg_lmap[SLJIT_TEMPORARY_REG1]; + *inst++ = REX_W; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_TEMPORARY_REG1]; #endif return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) { - sljit_ub *buf; + sljit_ub *inst; CHECK_ERROR(); check_sljit_emit_fast_enter(compiler, dst, dstw); @@ -648,34 +666,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) { if (reg_map[dst] < 8) { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); INC_SIZE(1); POP_REG(reg_lmap[dst]); } else { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 2); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); INC_SIZE(2); - *buf++ = REX_B; + *inst++ = REX_B; POP_REG(reg_lmap[dst]); } } else if (dst & SLJIT_MEM) { /* REX_W is not necessary (src is not immediate). */ compiler->mode32 = 1; - buf = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); - FAIL_IF(!buf); - *buf++ = 0x8f; + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = POP_rm; } return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) { - sljit_ub *buf; + sljit_ub *inst; CHECK_ERROR(); check_sljit_emit_fast_return(compiler, src, srcw); @@ -688,43 +706,43 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) { if (reg_map[src] < 8) { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1); + FAIL_IF(!inst); INC_SIZE(1 + 1); PUSH_REG(reg_lmap[src]); } else { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 1); + FAIL_IF(!inst); INC_SIZE(2 + 1); - *buf++ = REX_B; + *inst++ = REX_B; PUSH_REG(reg_lmap[src]); } } else if (src & SLJIT_MEM) { /* REX_W is not necessary (src is not immediate). */ compiler->mode32 = 1; - buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); - FAIL_IF(!buf); - *buf++ = 0xff; - *buf |= 6 << 3; + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_FF; + *inst |= PUSH_rm; - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); INC_SIZE(1); } else { SLJIT_ASSERT(IS_HALFWORD(srcw)); /* SLJIT_IMM. */ - buf = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1); + FAIL_IF(!inst); INC_SIZE(5 + 1); - *buf++ = 0x68; - *(sljit_si*)buf = srcw; - buf += sizeof(sljit_si); + *inst++ = PUSH_i32; + *(sljit_si*)inst = srcw; + inst += sizeof(sljit_si); } RET(); @@ -740,7 +758,7 @@ static sljit_si emit_mov_int(struct sljit_compiler *compiler, sljit_si sign, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_ub* code; + sljit_ub* inst; sljit_si dst_r; compiler->mode32 = 0; @@ -751,17 +769,17 @@ static sljit_si emit_mov_int(struct sljit_compiler *compiler, sljit_si sign, if (src & SLJIT_IMM) { if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { if (sign || ((sljit_uw)srcw <= 0x7fffffff)) { - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw); - FAIL_IF(!code); - *code = 0xc7; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; return SLJIT_SUCCESS; } return emit_load_imm64(compiler, dst, srcw); } compiler->mode32 = 1; - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw); - FAIL_IF(!code); - *code = 0xc7; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; compiler->mode32 = 0; return SLJIT_SUCCESS; } @@ -772,9 +790,9 @@ static sljit_si emit_mov_int(struct sljit_compiler *compiler, sljit_si sign, dst_r = src; else { if (sign) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); - FAIL_IF(!code); - *code++ = 0x63; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = MOVSXD_r_rm; } else { compiler->mode32 = 1; FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw)); @@ -784,9 +802,9 @@ static sljit_si emit_mov_int(struct sljit_compiler *compiler, sljit_si sign, if (dst & SLJIT_MEM) { compiler->mode32 = 1; - code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x89; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; compiler->mode32 = 0; } diff --git a/sljit/sljitNativeX86_common.c b/sljit/sljitNativeX86_common.c index ce120f9..6cade37 100644 --- a/sljit/sljitNativeX86_common.c +++ b/sljit/sljitNativeX86_common.c @@ -144,68 +144,247 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { #define EX86_PREF_F3 0x2000 #endif -#define INC_SIZE(s) (*buf++ = (s), compiler->size += (s)) -#define INC_CSIZE(s) (*code++ = (s), compiler->size += (s)) +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ -#define PUSH_REG(r) (*buf++ = (0x50 + (r))) -#define POP_REG(r) (*buf++ = (0x58 + (r))) -#define RET() (*buf++ = (0xc3)) -#define RETN(n) (*buf++ = (0xc2), *buf++ = n, *buf++ = 0) +#define ADD (/* BINARY */ 0 << 3) +#define ADD_EAX_i32 0x05 +#define ADD_r_rm 0x03 +#define ADD_rm_r 0x01 +#define ADDSD_x_xm 0x58 +#define ADC (/* BINARY */ 2 << 3) +#define ADC_EAX_i32 0x15 +#define ADC_r_rm 0x13 +#define ADC_rm_r 0x11 +#define AND (/* BINARY */ 4 << 3) +#define AND_EAX_i32 0x25 +#define AND_r_rm 0x23 +#define AND_rm_r 0x21 +#define ANDPD_x_xm 0x54 +#define BSR_r_rm (/* GROUP_0F */ 0xbd) +#define CALL_i32 0xe8 +#define CALL_rm (/* GROUP_FF */ 2 << 3) +#define CDQ 0x99 +#define CMOVNE_r_rm (/* GROUP_0F */ 0x45) +#define CMP (/* BINARY */ 7 << 3) +#define CMP_EAX_i32 0x3d +#define CMP_r_rm 0x3b +#define CMP_rm_r 0x39 +#define DIV (/* GROUP_F7 */ 6 << 3) +#define DIVSD_x_xm 0x5e +#define INT3 0xcc +#define IDIV (/* GROUP_F7 */ 7 << 3) +#define IMUL (/* GROUP_F7 */ 5 << 3) +#define IMUL_r_rm (/* GROUP_0F */ 0xaf) +#define IMUL_r_rm_i8 0x6b +#define IMUL_r_rm_i32 0x69 +#define JE_i8 0x74 +#define JMP_i8 0xeb +#define JMP_i32 0xe9 +#define JMP_rm (/* GROUP_FF */ 4 << 3) +#define LEA_r_m 0x8d +#define MOV_r_rm 0x8b +#define MOV_r_i32 0xb8 +#define MOV_rm_r 0x89 +#define MOV_rm_i32 0xc7 +#define MOV_rm8_i8 0xc6 +#define MOV_rm8_r8 0x88 +#define MOVSD_x_xm 0x10 +#define MOVSD_xm_x 0x11 +#define MOVSXD_r_rm 0x63 +#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) +#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) +#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) +#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) +#define MUL (/* GROUP_F7 */ 4 << 3) +#define MULSD_x_xm 0x59 +#define NEG_rm (/* GROUP_F7 */ 3 << 3) +#define NOP 0x90 +#define NOT_rm (/* GROUP_F7 */ 2 << 3) +#define OR (/* BINARY */ 1 << 3) +#define OR_r_rm 0x0b +#define OR_EAX_i32 0x0d +#define OR_rm_r 0x09 +#define POP_r 0x58 +#define POP_rm 0x8f +#define POPF 0x9d +#define PUSH_i32 0x68 +#define PUSH_r 0x50 +#define PUSH_rm (/* GROUP_FF */ 6 << 3) +#define PUSHF 0x9c +#define RET_near 0xc3 +#define RET_i16 0xc2 +#define SBB (/* BINARY */ 3 << 3) +#define SBB_EAX_i32 0x1d +#define SBB_r_rm 0x1b +#define SBB_rm_r 0x19 +#define SAR (/* SHIFT */ 7 << 3) +#define SHL (/* SHIFT */ 4 << 3) +#define SHR (/* SHIFT */ 5 << 3) +#define SUB (/* BINARY */ 5 << 3) +#define SUB_EAX_i32 0x2d +#define SUB_r_rm 0x2b +#define SUB_rm_r 0x29 +#define SUBSD_x_xm 0x5c +#define TEST_EAX_i32 0xa9 +#define TEST_rm_r 0x85 +#define UCOMISD_x_xm 0x2e +#define XCHG_EAX_r 0x90 +#define XCHG_r_rm 0x87 +#define XOR (/* BINARY */ 6 << 3) +#define XOR_EAX_i32 0x35 +#define XOR_r_rm 0x33 +#define XOR_rm_r 0x31 +#define XORPD_x_xm 0x57 + +#define GROUP_0F 0x0f +#define GROUP_F7 0xf7 +#define GROUP_FF 0xff +#define GROUP_BINARY_81 0x81 +#define GROUP_BINARY_83 0x83 +#define GROUP_SHIFT_1 0xd1 +#define GROUP_SHIFT_N 0xc1 +#define GROUP_SHIFT_CL 0xd3 + +#define MOD_REG 0xc0 +#define MOD_DISP8 0x40 + +#define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) + +#define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) +#define POP_REG(r) (*inst++ = (POP_r + (r))) +#define RET() (*inst++ = (RET_near)) +#define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) /* r32, r/m32 */ -#define MOV_RM(mod, reg, rm) (*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm)) +#define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) + +/* Multithreading does not affect these static variables, since they store + built-in CPU features. Therefore they can be overwritten by different threads + if they detect the CPU features in the same time. */ +#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +static sljit_ui cpu_has_sse2 = -1; +#endif +static sljit_ui cpu_has_cmov = -1; + +#if defined(_MSC_VER) && (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#include <intrin.h> +#endif + +static void get_cpu_features() +{ + sljit_ui features; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + +#ifdef __GNUC__ + /* AT&T syntax. */ + asm ( + "pushl %%ebx\n" + "movl $0x1, %%eax\n" + "cpuid\n" + "popl %%ebx\n" + "movl %%edx, %0\n" + : "=g" (features) + : + : "%eax", "%ecx", "%edx" + ); +#elif defined(_MSC_VER) || defined(__BORLANDC__) + /* Intel syntax. */ + __asm { + mov eax, 1 + push ebx + cpuid + pop ebx + mov features, edx + } +#else + #error "SLJIT_DETECT_SSE2 is not implemented for this C compiler" +#endif + +#else /* SLJIT_CONFIG_X86_32 */ + +#ifdef __GNUC__ + /* AT&T syntax. */ + asm ( + "pushq %%rbx\n" + "movl $0x1, %%eax\n" + "cpuid\n" + "popq %%rbx\n" + "movl %%edx, %0\n" + : "=g" (features) + : + : "%rax", "%rcx", "%rdx" + ); +#elif defined(_MSC_VER) + int CPUInfo[4]; + + __cpuid(CPUInfo, 1); + features = (sljit_ui)CPUInfo[3]; +#else + #error "SLJIT_DETECT_SSE2 is not implemented for this C compiler" +#endif + +#endif /* SLJIT_CONFIG_X86_32 */ + +#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + cpu_has_sse2 = (features >> 26) & 0x1; +#endif + cpu_has_cmov = (features >> 15) & 0x1; +} static sljit_ub get_jump_code(sljit_si type) { switch (type) { case SLJIT_C_EQUAL: case SLJIT_C_FLOAT_EQUAL: - return 0x84; + return 0x84 /* je */; case SLJIT_C_NOT_EQUAL: case SLJIT_C_FLOAT_NOT_EQUAL: - return 0x85; + return 0x85 /* jne */; case SLJIT_C_LESS: case SLJIT_C_FLOAT_LESS: - return 0x82; + return 0x82 /* jc */; case SLJIT_C_GREATER_EQUAL: case SLJIT_C_FLOAT_GREATER_EQUAL: - return 0x83; + return 0x83 /* jae */; case SLJIT_C_GREATER: case SLJIT_C_FLOAT_GREATER: - return 0x87; + return 0x87 /* jnbe */; case SLJIT_C_LESS_EQUAL: case SLJIT_C_FLOAT_LESS_EQUAL: - return 0x86; + return 0x86 /* jbe */; case SLJIT_C_SIG_LESS: - return 0x8c; + return 0x8c /* jl */; case SLJIT_C_SIG_GREATER_EQUAL: - return 0x8d; + return 0x8d /* jnl */; case SLJIT_C_SIG_GREATER: - return 0x8f; + return 0x8f /* jnle */; case SLJIT_C_SIG_LESS_EQUAL: - return 0x8e; + return 0x8e /* jle */; case SLJIT_C_OVERFLOW: case SLJIT_C_MUL_OVERFLOW: - return 0x80; + return 0x80 /* jo */; case SLJIT_C_NOT_OVERFLOW: case SLJIT_C_MUL_NOT_OVERFLOW: - return 0x81; + return 0x81 /* jno */; case SLJIT_C_FLOAT_UNORDERED: - return 0x8a; + return 0x8a /* jp */; case SLJIT_C_FLOAT_ORDERED: - return 0x8b; + return 0x8b /* jpo */; } return 0; } @@ -234,14 +413,14 @@ static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code if (type == SLJIT_JUMP) { if (short_jump) - *code_ptr++ = 0xeb; + *code_ptr++ = JMP_i8; else - *code_ptr++ = 0xe9; + *code_ptr++ = JMP_i32; jump->addr++; } else if (type >= SLJIT_FAST_CALL) { short_jump = 0; - *code_ptr++ = 0xe8; + *code_ptr++ = CALL_i32; jump->addr++; } else if (short_jump) { @@ -249,7 +428,7 @@ static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code jump->addr++; } else { - *code_ptr++ = 0x0f; + *code_ptr++ = GROUP_0F; *code_ptr++ = get_jump_code(type); jump->addr += 2; } @@ -326,7 +505,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } else { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */; + *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32; buf_ptr++; *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)); code_ptr += sizeof(sljit_sw); @@ -407,47 +586,47 @@ static sljit_si emit_mov(struct sljit_compiler *compiler, static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler) { - sljit_ub *buf; + sljit_ub *inst; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - buf = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); INC_SIZE(5); #else - buf = (sljit_ub*)ensure_buf(compiler, 1 + 6); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); + FAIL_IF(!inst); INC_SIZE(6); - *buf++ = REX_W; + *inst++ = REX_W; #endif - *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */ - *buf++ = 0x64; - *buf++ = 0x24; - *buf++ = (sljit_ub)sizeof(sljit_sw); - *buf++ = 0x9c; /* pushfd / pushfq */ + *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */ + *inst++ = 0x64; + *inst++ = 0x24; + *inst++ = (sljit_ub)sizeof(sljit_sw); + *inst++ = PUSHF; compiler->flags_saved = 1; return SLJIT_SUCCESS; } static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags) { - sljit_ub *buf; + sljit_ub *inst; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - buf = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); INC_SIZE(5); - *buf++ = 0x9d; /* popfd */ + *inst++ = POPF; #else - buf = (sljit_ub*)ensure_buf(compiler, 1 + 6); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); + FAIL_IF(!inst); INC_SIZE(6); - *buf++ = 0x9d; /* popfq */ - *buf++ = REX_W; + *inst++ = POPF; + *inst++ = REX_W; #endif - *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */ - *buf++ = 0x64; - *buf++ = 0x24; - *buf++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw); + *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */ + *inst++ = 0x64; + *inst++ = 0x24; + *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw); compiler->flags_saved = keep_flags; return SLJIT_SUCCESS; } @@ -478,64 +657,64 @@ static sljit_si emit_mov(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_ub* code; + sljit_ub* inst; if (dst == SLJIT_UNUSED) { /* No destination, doesn't need to setup flags. */ if (src & SLJIT_MEM) { - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw); - FAIL_IF(!code); - *code = 0x8b; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; } return SLJIT_SUCCESS; } if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) { - code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x89; + inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; return SLJIT_SUCCESS; } if (src & SLJIT_IMM) { if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw); + return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); #else if (!compiler->mode32) { if (NOT_HALFWORD(srcw)) return emit_load_imm64(compiler, dst, srcw); } else - return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw); + return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw); #endif } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (!compiler->mode32 && NOT_HALFWORD(srcw)) { FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw)); - code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x89; + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; return SLJIT_SUCCESS; } #endif - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); - FAIL_IF(!code); - *code = 0xc7; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; return SLJIT_SUCCESS; } if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) { - code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); - FAIL_IF(!code); - *code = 0x8b; + inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; return SLJIT_SUCCESS; } /* Memory to memory move. Requires two instruction. */ - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw); - FAIL_IF(!code); - *code = 0x8b; - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x89; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; return SLJIT_SUCCESS; } @@ -544,7 +723,7 @@ static sljit_si emit_mov(struct sljit_compiler *compiler, SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) { - sljit_ub *buf; + sljit_ub *inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) sljit_si size; #endif @@ -554,16 +733,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler switch (GET_OPCODE(op)) { case SLJIT_BREAKPOINT: - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); INC_SIZE(1); - *buf = 0xcc; + *inst = INT3; break; case SLJIT_NOP: - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); INC_SIZE(1); - *buf = 0x90; + *inst = NOP; break; case SLJIT_UMUL: case SLJIT_SMUL: @@ -591,12 +770,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler if (op == SLJIT_UDIV) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0); - buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0); #else - buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); #endif - FAIL_IF(!buf); - *buf = 0x33; + FAIL_IF(!inst); + *inst = XOR_r_rm; } if (op == SLJIT_SDIV) { @@ -604,69 +783,68 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0); #endif - /* CDQ instruction */ #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); INC_SIZE(1); - *buf = 0x99; + *inst = CDQ; #else if (compiler->mode32) { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); INC_SIZE(1); - *buf = 0x99; + *inst = CDQ; } else { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 2); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); INC_SIZE(2); - *buf++ = REX_W; - *buf = 0x99; + *inst++ = REX_W; + *inst = CDQ; } #endif } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - buf = (sljit_ub*)ensure_buf(compiler, 1 + 2); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); INC_SIZE(2); - *buf++ = 0xf7; - *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]); + *inst++ = GROUP_F7; + *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]); #else #ifdef _WIN64 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2; #else size = (!compiler->mode32) ? 3 : 2; #endif - buf = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); INC_SIZE(size); #ifdef _WIN64 if (!compiler->mode32) - *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0); + *inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0); else if (op >= SLJIT_UDIV) - *buf++ = REX_B; - *buf++ = 0xf7; - *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]); + *inst++ = REX_B; + *inst++ = GROUP_F7; + *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]); #else if (!compiler->mode32) - *buf++ = REX_W; - *buf++ = 0xf7; - *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2]; + *inst++ = REX_W; + *inst++ = GROUP_F7; + *inst = MOD_REG | reg_map[SLJIT_TEMPORARY_REG2]; #endif #endif switch (op) { case SLJIT_UMUL: - *buf |= 4 << 3; + *inst |= MUL; break; case SLJIT_SMUL: - *buf |= 5 << 3; + *inst |= IMUL; break; case SLJIT_UDIV: - *buf |= 6 << 3; + *inst |= DIV; break; case SLJIT_SDIV: - *buf |= 7 << 3; + *inst |= IDIV; break; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) @@ -680,17 +858,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler #define ENCODE_PREFIX(prefix) \ do { \ - code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \ - FAIL_IF(!code); \ - INC_CSIZE(1); \ - *code = (prefix); \ + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \ + FAIL_IF(!inst); \ + INC_SIZE(1); \ + *inst = (prefix); \ } while (0) static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_ub* code; + sljit_ub* inst; sljit_si dst_r; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) sljit_si work_r; @@ -706,14 +884,17 @@ static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, if (src & SLJIT_IMM) { if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw); + return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); #else - return emit_load_imm64(compiler, dst, srcw); + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; #endif } - code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); - FAIL_IF(!code); - *code = 0xc6; + inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_i8; return SLJIT_SUCCESS; } @@ -737,29 +918,28 @@ static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, if (reg_map[dst] < 4) { if (dst != src) EMIT_MOV(compiler, dst, 0, src, 0); - code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); - FAIL_IF(!code); - *code++ = 0x0f; - *code = sign ? 0xbe : 0xb6; + inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; } else { if (dst != src) EMIT_MOV(compiler, dst, 0, src, 0); if (sign) { /* shl reg, 24 */ - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); - FAIL_IF(!code); - *code |= 0x4 << 3; - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); - FAIL_IF(!code); - /* shr/sar reg, 24 */ - *code |= 0x7 << 3; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); + FAIL_IF(!inst); + *inst |= SHL; + /* sar reg, 24 */ + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); + FAIL_IF(!inst); + *inst |= SAR; } else { - /* and dst, 0xff */ - code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0); - FAIL_IF(!code); - *(code + 1) |= 0x4 << 3; + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); + FAIL_IF(!inst); + *(inst + 1) |= AND; } } return SLJIT_SUCCESS; @@ -767,10 +947,10 @@ static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, #endif else { /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ - code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); - FAIL_IF(!code); - *code++ = 0x0f; - *code = sign ? 0xbe : 0xb6; + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; } if (dst & SLJIT_MEM) { @@ -793,36 +973,36 @@ static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, } if (work_r == SLJIT_TEMPORARY_REG1) { - ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]); + ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REGISTER]); } else { - code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); - FAIL_IF(!code); - *code = 0x87; + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); + FAIL_IF(!inst); + *inst = XCHG_r_rm; } - code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x88; + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; if (work_r == SLJIT_TEMPORARY_REG1) { - ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]); + ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REGISTER]); } else { - code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); - FAIL_IF(!code); - *code = 0x87; + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); + FAIL_IF(!inst); + *inst = XCHG_r_rm; } } else { - code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x88; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; } #else - code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x88; + inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; #endif } @@ -833,7 +1013,7 @@ static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_ub* code; + sljit_ub* inst; sljit_si dst_r; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -846,14 +1026,17 @@ static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign, if (src & SLJIT_IMM) { if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw); + return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); #else - return emit_load_imm64(compiler, dst, srcw); + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; #endif } - code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); - FAIL_IF(!code); - *code = 0xc7; + inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; return SLJIT_SUCCESS; } @@ -862,56 +1045,56 @@ static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign, if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)) dst_r = src; else { - code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); - FAIL_IF(!code); - *code++ = 0x0f; - *code = sign ? 0xbf : 0xb7; + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; } if (dst & SLJIT_MEM) { - code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x89; + inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; } return SLJIT_SUCCESS; } -static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_si un_index, +static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_ub* code; + sljit_ub* inst; if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= (un_index) << 3; + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; return SLJIT_SUCCESS; } if (dst == src && dstw == srcw) { /* Same input and output */ - code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= (un_index) << 3; + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; return SLJIT_SUCCESS; } if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { EMIT_MOV(compiler, dst, 0, src, srcw); - code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= (un_index) << 3; + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; return SLJIT_SUCCESS; } EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= (un_index) << 3; + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); return SLJIT_SUCCESS; } @@ -920,38 +1103,38 @@ static sljit_si emit_not_with_flags(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_ub* code; + sljit_ub* inst; if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= 0x2 << 3; - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code = 0x0b; + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst = OR_r_rm; return SLJIT_SUCCESS; } if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { EMIT_MOV(compiler, dst, 0, src, srcw); - code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= 0x2 << 3; - code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); - FAIL_IF(!code); - *code = 0x0b; + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; + inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); + FAIL_IF(!inst); + *inst = OR_r_rm; return SLJIT_SUCCESS; } EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= 0x2 << 3; - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code = 0x0b; + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst = OR_r_rm; EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); return SLJIT_SUCCESS; } @@ -960,37 +1143,37 @@ static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_ub* code; + sljit_ub* inst; sljit_si dst_r; SLJIT_UNUSED_ARG(op_flags); if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { /* Just set the zero flag. */ EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); - code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code++ = 0xf7; - *code |= 0x2 << 3; + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0); #else - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0); #endif - FAIL_IF(!code); - *code |= 0x5 << 3; + FAIL_IF(!inst); + *inst |= SHR; return SLJIT_SUCCESS; } if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { - EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); + EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_IMM, srcw); src = TMP_REGISTER; srcw = 0; } - code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw); - FAIL_IF(!code); - *code++ = 0x0f; - *code = 0xbd; + inst = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = BSR_r_rm; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) @@ -1013,24 +1196,50 @@ static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, compiler->mode32 = op_flags & SLJIT_INT_OP; #endif - code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code++ = 0x0f; - *code = 0x45; + if (cpu_has_cmov == -1) + get_cpu_features(); + if (cpu_has_cmov) { + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CMOVNE_r_rm; + } else { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + + *inst++ = JE_i8; + *inst++ = 2; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REGISTER]; #else - code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + + *inst++ = JE_i8; + *inst++ = 3; + *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REGISTER] >= 8 ? REX_B : 0); + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REGISTER]; #endif - FAIL_IF(!code); - *(code + 1) |= 0x6 << 3; + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); +#else + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0); +#endif + FAIL_IF(!inst); + *(inst + 1) |= XOR; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (dst & SLJIT_MEM) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x87; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = XCHG_r_rm; } #else if (dst & SLJIT_MEM) @@ -1043,7 +1252,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_ub* code; + sljit_ub* inst; sljit_si update = 0; sljit_si op_flags = GET_ALL_FLAGS(op); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -1123,9 +1332,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler } if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) { - code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw); - FAIL_IF(!code); - *code = 0x8d; + inst = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw); + FAIL_IF(!inst); + *inst = LEA_r_m; src &= SLJIT_MEM | 0xf; srcw = 0; } @@ -1174,9 +1383,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler #endif if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) { - code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw); - FAIL_IF(!code); - *code = 0x8d; + inst = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw); + FAIL_IF(!inst); + *inst = LEA_r_m; } return SLJIT_SUCCESS; } @@ -1188,12 +1397,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler case SLJIT_NOT: if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E)) return emit_not_with_flags(compiler, dst, dstw, src, srcw); - return emit_unary(compiler, 0x2, dst, dstw, src, srcw); + return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); case SLJIT_NEG: if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) FAIL_IF(emit_save_flags(compiler)); - return emit_unary(compiler, 0x3, dst, dstw, src, srcw); + return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); case SLJIT_CLZ: if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) @@ -1210,31 +1419,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -#define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \ +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ if (IS_HALFWORD(immw) || compiler->mode32) { \ - code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ - FAIL_IF(!code); \ - *(code + 1) |= (_op_imm_); \ + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!inst); \ + *(inst + 1) |= (op_imm); \ } \ else { \ FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \ - code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ - FAIL_IF(!code); \ - *code = (_op_mr_); \ + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ + FAIL_IF(!inst); \ + *inst = (op_mr); \ } -#define BINARY_EAX_IMM(_op_eax_imm_, immw) \ - FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw)) +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) #else -#define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \ - code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ - FAIL_IF(!code); \ - *(code + 1) |= (_op_imm_); +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!inst); \ + *(inst + 1) |= (op_imm); -#define BINARY_EAX_IMM(_op_eax_imm_, immw) \ - FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw)) +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) #endif @@ -1244,7 +1453,7 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_ub* code; + sljit_ub* inst; if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); @@ -1252,9 +1461,9 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler, BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); } else { - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } return SLJIT_SUCCESS; } @@ -1273,21 +1482,21 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler, } } else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { - code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) { /* Special exception for sljit_emit_cond_value. */ - code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); - FAIL_IF(!code); - *code = op_mr; + inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; } else { EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w); - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); - FAIL_IF(!code); - *code = op_mr; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; } return SLJIT_SUCCESS; } @@ -1307,20 +1516,20 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler, } } else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { - code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); + FAIL_IF(!inst); + *inst = op_rm; } else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) { - code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); - FAIL_IF(!code); - *code = op_mr; + inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; } else { EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); - FAIL_IF(!code); - *code = op_mr; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; } return SLJIT_SUCCESS; } @@ -1332,9 +1541,9 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler, BINARY_IMM(op_imm, op_mr, src2w, dst, 0); } else { - code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } } else { @@ -1344,9 +1553,9 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler, BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); } else { - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); } @@ -1360,7 +1569,7 @@ static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_ub* code; + sljit_ub* inst; if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); @@ -1368,9 +1577,9 @@ static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); } else { - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } return SLJIT_SUCCESS; } @@ -1389,20 +1598,20 @@ static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, } } else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { - code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) { - code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); - FAIL_IF(!code); - *code = op_mr; + inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; } else { EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w); - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); - FAIL_IF(!code); - *code = op_mr; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; } return SLJIT_SUCCESS; } @@ -1414,9 +1623,9 @@ static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, BINARY_IMM(op_imm, op_mr, src2w, dst, 0); } else { - code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } } else { @@ -1426,9 +1635,9 @@ static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); } else { - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!code); - *code = op_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; } EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); } @@ -1441,23 +1650,23 @@ static sljit_si emit_mul(struct sljit_compiler *compiler, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_ub* code; + sljit_ub* inst; sljit_si dst_r; dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER; /* Register destination. */ if (dst_r == src1 && !(src2 & SLJIT_IMM)) { - code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); - FAIL_IF(!code); - *code++ = 0x0f; - *code = 0xaf; + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; } else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { - code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); - FAIL_IF(!code); - *code++ = 0x0f; - *code = 0xaf; + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; } else if (src1 & SLJIT_IMM) { if (src2 & SLJIT_IMM) { @@ -1467,42 +1676,42 @@ static sljit_si emit_mul(struct sljit_compiler *compiler, } if (src1w <= 127 && src1w >= -128) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); - FAIL_IF(!code); - *code = 0x6b; - code = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!code); - INC_CSIZE(1); - *code = (sljit_sb)src1w; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i8; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = (sljit_sb)src1w; } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) else { - code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); - FAIL_IF(!code); - *code = 0x69; - code = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!code); - INC_CSIZE(4); - *(sljit_sw*)code = src1w; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *(sljit_sw*)inst = src1w; } #else else if (IS_HALFWORD(src1w)) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); - FAIL_IF(!code); - *code = 0x69; - code = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!code); - INC_CSIZE(4); - *(sljit_si*)code = (sljit_si)src1w; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *(sljit_si*)inst = (sljit_si)src1w; } else { EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); if (dst_r != src2) EMIT_MOV(compiler, dst_r, 0, src2, src2w); - code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); - FAIL_IF(!code); - *code++ = 0x0f; - *code = 0xaf; + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; } #endif } @@ -1510,42 +1719,42 @@ static sljit_si emit_mul(struct sljit_compiler *compiler, /* Note: src1 is NOT immediate. */ if (src2w <= 127 && src2w >= -128) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); - FAIL_IF(!code); - *code = 0x6b; - code = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!code); - INC_CSIZE(1); - *code = (sljit_sb)src2w; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i8; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = (sljit_sb)src2w; } #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) else { - code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); - FAIL_IF(!code); - *code = 0x69; - code = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!code); - INC_CSIZE(4); - *(sljit_sw*)code = src2w; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *(sljit_sw*)inst = src2w; } #else else if (IS_HALFWORD(src2w)) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); - FAIL_IF(!code); - *code = 0x69; - code = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!code); - INC_CSIZE(4); - *(sljit_si*)code = (sljit_si)src2w; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *(sljit_si*)inst = (sljit_si)src2w; } else { EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); if (dst_r != src1) EMIT_MOV(compiler, dst_r, 0, src1, src1w); - code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); - FAIL_IF(!code); - *code++ = 0x0f; - *code = 0xaf; + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; } #endif } @@ -1554,10 +1763,10 @@ static sljit_si emit_mul(struct sljit_compiler *compiler, if (ADDRESSING_DEPENDS_ON(src2, dst_r)) dst_r = TMP_REGISTER; EMIT_MOV(compiler, dst_r, 0, src1, src1w); - code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); - FAIL_IF(!code); - *code++ = 0x0f; - *code = 0xaf; + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; } if (dst_r == TMP_REGISTER) @@ -1571,7 +1780,7 @@ static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_ub* code; + sljit_ub* inst; sljit_si dst_r, done = 0; /* These cases better be left to handled by normal way. */ @@ -1584,33 +1793,33 @@ static sljit_si emit_lea_binary(struct sljit_compiler *compiler, if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) { if ((src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) || src2 == TMP_REGISTER) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); - FAIL_IF(!code); - *code = 0x8d; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); + FAIL_IF(!inst); + *inst = LEA_r_m; done = 1; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w); + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w); #else if (src2 & SLJIT_IMM) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); #endif - FAIL_IF(!code); - *code = 0x8d; + FAIL_IF(!inst); + *inst = LEA_r_m; done = 1; } } else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w); + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w); #else if (src1 & SLJIT_IMM) { - code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); #endif - FAIL_IF(!code); - *code = 0x8d; + FAIL_IF(!inst); + *inst = LEA_r_m; done = 1; } } @@ -1627,33 +1836,33 @@ static sljit_si emit_cmp_binary(struct sljit_compiler *compiler, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_ub* code; + sljit_ub* inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { #endif - BINARY_EAX_IMM(0x3d, src2w); + BINARY_EAX_IMM(CMP_EAX_i32, src2w); return SLJIT_SUCCESS; } if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) { if (src2 & SLJIT_IMM) { - BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0); + BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); } else { - code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); - FAIL_IF(!code); - *code = 0x3b; + inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = CMP_r_rm; } return SLJIT_SUCCESS; } if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) { - code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); - FAIL_IF(!code); - *code = 0x39; + inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); + FAIL_IF(!inst); + *inst = CMP_rm_r; return SLJIT_SUCCESS; } @@ -1663,13 +1872,13 @@ static sljit_si emit_cmp_binary(struct sljit_compiler *compiler, src1 = TMP_REGISTER; src1w = 0; } - BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w); + BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); } else { EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!code); - *code = 0x3b; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!inst); + *inst = CMP_r_rm; } return SLJIT_SUCCESS; } @@ -1678,14 +1887,14 @@ static sljit_si emit_test_binary(struct sljit_compiler *compiler, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_ub* code; + sljit_ub* inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { #endif - BINARY_EAX_IMM(0xa9, src2w); + BINARY_EAX_IMM(TEST_EAX_i32, src2w); return SLJIT_SUCCESS; } @@ -1694,7 +1903,7 @@ static sljit_si emit_test_binary(struct sljit_compiler *compiler, #else if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { #endif - BINARY_EAX_IMM(0xa9, src1w); + BINARY_EAX_IMM(TEST_EAX_i32, src1w); return SLJIT_SUCCESS; } @@ -1702,26 +1911,26 @@ static sljit_si emit_test_binary(struct sljit_compiler *compiler, if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (IS_HALFWORD(src2w) || compiler->mode32) { - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); - FAIL_IF(!code); - *code = 0xf7; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; } else { FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); - code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0); - FAIL_IF(!code); - *code = 0x85; + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0); + FAIL_IF(!inst); + *inst = TEST_rm_r; } #else - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); - FAIL_IF(!code); - *code = 0xf7; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; #endif } else { - code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); - FAIL_IF(!code); - *code = 0x85; + inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; } return SLJIT_SUCCESS; } @@ -1730,26 +1939,26 @@ static sljit_si emit_test_binary(struct sljit_compiler *compiler, if (src1 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (IS_HALFWORD(src1w) || compiler->mode32) { - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0); - FAIL_IF(!code); - *code = 0xf7; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; } else { FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); - code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0); - FAIL_IF(!code); - *code = 0x85; + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0); + FAIL_IF(!inst); + *inst = TEST_rm_r; } #else - code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0); - FAIL_IF(!code); - *code = 0xf7; + inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; #endif } else { - code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); - FAIL_IF(!code); - *code = 0x85; + inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); + FAIL_IF(!inst); + *inst = TEST_rm_r; } return SLJIT_SUCCESS; } @@ -1758,26 +1967,26 @@ static sljit_si emit_test_binary(struct sljit_compiler *compiler, if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (IS_HALFWORD(src2w) || compiler->mode32) { - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0); - FAIL_IF(!code); - *code = 0xf7; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; } else { FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); - code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code = 0x85; + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst = TEST_rm_r; } #else - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0); - FAIL_IF(!code); - *code = 0xf7; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; #endif } else { - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); - FAIL_IF(!code); - *code = 0x85; + inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; } return SLJIT_SUCCESS; } @@ -1788,42 +1997,42 @@ static sljit_si emit_shift(struct sljit_compiler *compiler, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_ub* code; + sljit_ub* inst; if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { if (dst == src1 && dstw == src1w) { - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst |= mode; return SLJIT_SUCCESS; } if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst |= mode; return SLJIT_SUCCESS; } if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst |= mode; EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); return SLJIT_SUCCESS; } if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { EMIT_MOV(compiler, dst, 0, src1, src1w); - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); + FAIL_IF(!inst); + *inst |= mode; return SLJIT_SUCCESS; } EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst |= mode; EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); return SLJIT_SUCCESS; } @@ -1831,9 +2040,9 @@ static sljit_si emit_shift(struct sljit_compiler *compiler, if (dst == SLJIT_PREF_SHIFT_REG) { EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst |= mode; EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); } else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { @@ -1841,9 +2050,9 @@ static sljit_si emit_shift(struct sljit_compiler *compiler, EMIT_MOV(compiler, dst, 0, src1, src1w); EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0); EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); + FAIL_IF(!inst); + *inst |= mode; EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); } else { @@ -1857,9 +2066,9 @@ static sljit_si emit_shift(struct sljit_compiler *compiler, EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); #endif EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); - code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code |= mode; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); + FAIL_IF(!inst); + *inst |= mode; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); #else @@ -1889,7 +2098,7 @@ static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler, if (!set_flags) return emit_mov(compiler, dst, dstw, src1, src1w); /* OR dst, src, 0 */ - return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d, + return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, dst, dstw, src1, src1w, SLJIT_IMM, 0); } @@ -1941,7 +2150,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler compiler->flags_saved = 0; if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) FAIL_IF(emit_save_flags(compiler)); - return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05, + return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ @@ -1950,7 +2159,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler FAIL_IF(emit_save_flags(compiler)); if (SLJIT_UNLIKELY(GET_FLAGS(op))) compiler->flags_saved = 0; - return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15, + return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: if (!GET_FLAGS(op)) { @@ -1963,7 +2172,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler FAIL_IF(emit_save_flags(compiler)); if (dst == SLJIT_UNUSED) return emit_cmp_binary(compiler, src1, src1w, src2, src2w); - return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d, + return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ @@ -1972,29 +2181,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler FAIL_IF(emit_save_flags(compiler)); if (SLJIT_UNLIKELY(GET_FLAGS(op))) compiler->flags_saved = 0; - return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d, + return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); case SLJIT_AND: if (dst == SLJIT_UNUSED) return emit_test_binary(compiler, src1, src1w, src2, src2w); - return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25, + return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_OR: - return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d, + return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_XOR: - return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35, + return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SHL: - return emit_shift_with_flags(compiler, 0x4 << 3, GET_FLAGS(op), + return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op), dst, dstw, src1, src1w, src2, src2w); case SLJIT_LSHR: - return emit_shift_with_flags(compiler, 0x5 << 3, GET_FLAGS(op), + return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op), dst, dstw, src1, src1w, src2, src2w); case SLJIT_ASHR: - return emit_shift_with_flags(compiler, 0x7 << 3, GET_FLAGS(op), + return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op), dst, dstw, src1, src1w, src2, src2w); } @@ -2015,16 +2224,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, void *instruction, sljit_si size) { - sljit_ub *buf; + sljit_ub *inst; CHECK_ERROR(); check_sljit_emit_op_custom(compiler, instruction, size); SLJIT_ASSERT(size > 0 && size < 16); - buf = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); INC_SIZE(size); - SLJIT_MEMMOVE(buf, instruction, size); + SLJIT_MEMMOVE(inst, instruction, size); return SLJIT_SUCCESS; } @@ -2057,42 +2266,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) { #if (defined SLJIT_SSE2 && SLJIT_SSE2) #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) - static sljit_si sse2_available = -1; - sljit_si features; - - if (sse2_available != -1) - return sse2_available; - -#ifdef __GNUC__ - /* AT&T syntax. */ - asm ( - "pushl %%ebx\n" - "movl $0x1, %%eax\n" - "cpuid\n" - "popl %%ebx\n" - "movl %%edx, %0\n" - : "=g" (features) - : - : "%eax", "%ecx", "%edx" - ); -#elif defined(_MSC_VER) || defined(__BORLANDC__) - /* Intel syntax. */ - __asm { - mov eax, 1 - push ebx - cpuid - pop ebx - mov features, edx - } -#else - #error "SLJIT_DETECT_SSE2 is not implemented for this C compiler" -#endif - sse2_available = (features >> 26) & 0x1; - return sse2_available; -#else + if (cpu_has_sse2 == -1) + get_cpu_features(); + return cpu_has_sse2; +#else /* SLJIT_DETECT_SSE2 */ return 1; -#endif -#else +#endif /* SLJIT_DETECT_SSE2 */ +#else /* SLJIT_SSE2 */ return 0; #endif } @@ -2102,37 +2282,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode, sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) { - sljit_ub *buf; + sljit_ub *inst; - buf = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); - FAIL_IF(!buf); - *buf++ = 0x0f; - *buf = opcode; + inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = opcode; return SLJIT_SUCCESS; } static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode, sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) { - sljit_ub *buf; + sljit_ub *inst; - buf = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); - FAIL_IF(!buf); - *buf++ = 0x0f; - *buf = opcode; + inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = opcode; return SLJIT_SUCCESS; } static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler, sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw) { - return emit_sse2(compiler, 0x10, single, dst, src, srcw); + return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); } static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler, sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src) { - return emit_sse2(compiler, 0x11, single, src, dst, dstw); + return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, @@ -2156,7 +2336,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile dst_r = TMP_FREG; FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw)); } - return emit_sse2_logic(compiler, 0x2e, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw); + return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw); } if (op == SLJIT_MOVD) { @@ -2180,11 +2360,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_NEGD: - FAIL_IF(emit_sse2_logic(compiler, 0x57, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8))); + FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8))); break; case SLJIT_ABSD: - FAIL_IF(emit_sse2_logic(compiler, 0x54, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12))); + FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12))); break; } @@ -2230,19 +2410,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADDD: - FAIL_IF(emit_sse2(compiler, 0x58, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); + FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); break; case SLJIT_SUBD: - FAIL_IF(emit_sse2(compiler, 0x5c, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); + FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); break; case SLJIT_MULD: - FAIL_IF(emit_sse2(compiler, 0x59, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); + FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); break; case SLJIT_DIVD: - FAIL_IF(emit_sse2(compiler, 0x5e, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); + FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); break; } @@ -2284,7 +2464,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) { - sljit_ub *buf; + sljit_ub *inst; struct sljit_label *label; CHECK_ERROR_PTR(); @@ -2302,18 +2482,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi PTR_FAIL_IF(!label); set_label(label, compiler); - buf = (sljit_ub*)ensure_buf(compiler, 2); - PTR_FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 2); + PTR_FAIL_IF(!inst); - *buf++ = 0; - *buf++ = 0; + *inst++ = 0; + *inst++ = 0; return label; } SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) { - sljit_ub *buf; + sljit_ub *inst; struct sljit_jump *jump; CHECK_ERROR_PTR(); @@ -2340,17 +2520,17 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); #endif - buf = (sljit_ub*)ensure_buf(compiler, 2); - PTR_FAIL_IF_NULL(buf); + inst = (sljit_ub*)ensure_buf(compiler, 2); + PTR_FAIL_IF_NULL(inst); - *buf++ = 0; - *buf++ = type + 4; + *inst++ = 0; + *inst++ = type + 4; return jump; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) { - sljit_ub *code; + sljit_ub *inst; struct sljit_jump *jump; CHECK_ERROR(); @@ -2401,28 +2581,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil compiler->size += 10 + 3; #endif - code = (sljit_ub*)ensure_buf(compiler, 2); - FAIL_IF_NULL(code); + inst = (sljit_ub*)ensure_buf(compiler, 2); + FAIL_IF_NULL(inst); - *code++ = 0; - *code++ = type + 4; + *inst++ = 0; + *inst++ = type + 4; } else { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) /* REX_W is not necessary (src is not immediate). */ compiler->mode32 = 1; #endif - code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); - FAIL_IF(!code); - *code++ = 0xff; - *code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3); + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_FF; + *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm; } return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_cond_value(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si type) { - sljit_ub *buf; + sljit_ub *inst; sljit_ub cond_set = 0; sljit_si dst_save = dst; sljit_sw dstw_save = dstw; @@ -2444,84 +2624,84 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_cond_value(struct sljit_compiler *c switch (type) { case SLJIT_C_EQUAL: case SLJIT_C_FLOAT_EQUAL: - cond_set = 0x94; + cond_set = 0x94 /* sete */; break; case SLJIT_C_NOT_EQUAL: case SLJIT_C_FLOAT_NOT_EQUAL: - cond_set = 0x95; + cond_set = 0x95 /* setne */; break; case SLJIT_C_LESS: case SLJIT_C_FLOAT_LESS: - cond_set = 0x92; + cond_set = 0x92 /* setnae */; break; case SLJIT_C_GREATER_EQUAL: case SLJIT_C_FLOAT_GREATER_EQUAL: - cond_set = 0x93; + cond_set = 0x93 /* setnb */; break; case SLJIT_C_GREATER: case SLJIT_C_FLOAT_GREATER: - cond_set = 0x97; + cond_set = 0x97 /* seta */; break; case SLJIT_C_LESS_EQUAL: case SLJIT_C_FLOAT_LESS_EQUAL: - cond_set = 0x96; + cond_set = 0x96 /* setbe */; break; case SLJIT_C_SIG_LESS: - cond_set = 0x9c; + cond_set = 0x9c /* setnge */; break; case SLJIT_C_SIG_GREATER_EQUAL: - cond_set = 0x9d; + cond_set = 0x9d /* setnl */; break; case SLJIT_C_SIG_GREATER: - cond_set = 0x9f; + cond_set = 0x9f /* setg */; break; case SLJIT_C_SIG_LESS_EQUAL: - cond_set = 0x9e; + cond_set = 0x9e /* setle */; break; case SLJIT_C_OVERFLOW: case SLJIT_C_MUL_OVERFLOW: - cond_set = 0x90; + cond_set = 0x90 /* seto */; break; case SLJIT_C_NOT_OVERFLOW: case SLJIT_C_MUL_NOT_OVERFLOW: - cond_set = 0x91; + cond_set = 0x91 /* setno */; break; case SLJIT_C_FLOAT_UNORDERED: - cond_set = 0x9a; + cond_set = 0x9a /* setp */; break; case SLJIT_C_FLOAT_ORDERED: - cond_set = 0x9b; + cond_set = 0x9b /* setpo */; break; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER; - buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4); + FAIL_IF(!inst); INC_SIZE(4 + 4); /* Set low register to conditional flag. */ - *buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B; - *buf++ = 0x0f; - *buf++ = cond_set; - *buf++ = 0xC0 | reg_lmap[reg]; - *buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); - *buf++ = 0x0f; - *buf++ = 0xb6; - *buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg]; + *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | reg_lmap[reg]; + *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; if (reg == TMP_REGISTER) { if (op == SLJIT_MOV) { @@ -2538,35 +2718,35 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_cond_value(struct sljit_compiler *c #else if (op == SLJIT_MOV) { if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); + FAIL_IF(!inst); INC_SIZE(3 + 3); /* Set low byte to conditional flag. */ - *buf++ = 0x0f; - *buf++ = cond_set; - *buf++ = 0xC0 | reg_map[dst]; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | reg_map[dst]; - *buf++ = 0x0f; - *buf++ = 0xb6; - *buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst]; + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst]; } else { EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0); - buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); + FAIL_IF(!inst); INC_SIZE(3 + 3); /* Set al to conditional flag. */ - *buf++ = 0x0f; - *buf++ = cond_set; - *buf++ = 0xC0; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG; - *buf++ = 0x0f; - *buf++ = 0xb6; + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; if (dst >= SLJIT_SAVED_REG1 && dst <= SLJIT_NO_REGISTERS) - *buf = 0xC0 | (reg_map[dst] << 3); + *inst = MOD_REG | (reg_map[dst] << 3); else { - *buf = 0xC0; + *inst = MOD_REG; EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0); } @@ -2576,30 +2756,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_cond_value(struct sljit_compiler *c else { if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) { EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0); - buf = (sljit_ub*)ensure_buf(compiler, 1 + 3); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); INC_SIZE(3); - *buf++ = 0x0f; - *buf++ = cond_set; - *buf++ = 0xC0 | reg_map[dst]; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | reg_map[dst]; } else { EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0); - buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1); - FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1); + FAIL_IF(!inst); INC_SIZE(3 + 3 + 1); /* Set al to conditional flag. */ - *buf++ = 0x0f; - *buf++ = cond_set; - *buf++ = 0xC0; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG; - *buf++ = 0x0f; - *buf++ = 0xb6; - *buf++ = 0xC0; + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst++ = MOD_REG; - *buf++ = 0x90 + reg_map[TMP_REGISTER]; + *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER]; } #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->skip_checks = 1; @@ -2644,7 +2824,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *co SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) { - sljit_ub *buf; + sljit_ub *inst; struct sljit_const *const_; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) sljit_si reg; @@ -2674,11 +2854,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return NULL; #endif - buf = (sljit_ub*)ensure_buf(compiler, 2); - PTR_FAIL_IF(!buf); + inst = (sljit_ub*)ensure_buf(compiler, 2); + PTR_FAIL_IF(!inst); - *buf++ = 0; - *buf++ = 1; + *inst++ = 0; + *inst++ = 1; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (reg == TMP_REGISTER && dst != SLJIT_UNUSED) |