diff options
-rw-r--r-- | pcre_jit_compile.c | 18 | ||||
-rw-r--r-- | pcretest.c | 2 | ||||
-rw-r--r-- | sljit/sljitLir.c | 6 | ||||
-rw-r--r-- | sljit/sljitLir.h | 107 | ||||
-rw-r--r-- | sljit/sljitNativeARM_Thumb2.c | 37 | ||||
-rw-r--r-- | sljit/sljitNativeARM_v5.c | 48 | ||||
-rw-r--r-- | sljit/sljitNativeMIPS_common.c | 17 | ||||
-rw-r--r-- | sljit/sljitNativePPC_32.c | 3 | ||||
-rw-r--r-- | sljit/sljitNativePPC_64.c | 3 | ||||
-rw-r--r-- | sljit/sljitNativePPC_common.c | 38 | ||||
-rw-r--r-- | sljit/sljitNativeX86_64.c | 37 | ||||
-rw-r--r-- | sljit/sljitNativeX86_common.c | 170 |
12 files changed, 376 insertions, 110 deletions
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c index db3485e..fbf2368 100644 --- a/pcre_jit_compile.c +++ b/pcre_jit_compile.c @@ -322,7 +322,7 @@ typedef struct compare_context { int ucharptr; union { sljit_i asint; - sljit_h asshort; + sljit_uh asushort; #ifdef COMPILE_PCRE8 sljit_ub asbyte; sljit_ub asuchars[4]; @@ -334,7 +334,7 @@ typedef struct compare_context { } c; union { sljit_i asint; - sljit_h asshort; + sljit_uh asushort; #ifdef COMPILE_PCRE8 sljit_ub asbyte; sljit_ub asuchars[4]; @@ -2493,7 +2493,7 @@ if (context->sourcereg == -1) if (context->length >= 4) OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); else if (context->length >= 2) - OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); else #endif OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); @@ -2504,7 +2504,7 @@ if (context->sourcereg == -1) OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); else #endif - OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); #endif #endif /* COMPILE_PCRE8 */ context->sourcereg = TMP2; @@ -2545,12 +2545,12 @@ do OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); #ifdef COMPILE_PCRE8 else if (context->length >= 2) - OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); else if (context->length >= 1) OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); #else else if (context->length >= 2) - OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); #endif context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; @@ -2563,9 +2563,9 @@ do break; case 2 / sizeof(pcre_uchar): - if (context->oc.asshort != 0) - OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asshort); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asshort | context->oc.asshort)); + if (context->oc.asushort != 0) + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); + add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); break; #ifdef COMPILE_PCRE8 @@ -2491,7 +2491,7 @@ while (!done) pcre_uint8 *p, *pp, *ppp; pcre_uint8 *to_file = NULL; const pcre_uint8 *tables = NULL; - pcre_uint32 get_options; + unsigned long int get_options; unsigned long int true_size, true_study_size = 0; size_t size, regex_gotten_store; int do_allcaps = 0; diff --git a/sljit/sljitLir.c b/sljit/sljitLir.c index f42e26c..1c44f3b 100644 --- a/sljit/sljitLir.c +++ b/sljit/sljitLir.c @@ -616,6 +616,7 @@ static char* freg_names[] = { static SLJIT_CONST char* op_names[] = { /* op0 */ (char*)"breakpoint", (char*)"nop", + (char*)"umul", (char*)"smul", (char*)"udiv", (char*)"sdiv", /* op1 */ (char*)"mov", (char*)"mov.ub", (char*)"mov.sb", (char*)"mov.uh", (char*)"mov.sh", (char*)"mov.ui", (char*)"mov.si", (char*)"movu", @@ -793,10 +794,11 @@ static SLJIT_INLINE void check_sljit_emit_op0(struct sljit_compiler *compiler, i SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(op); - SLJIT_ASSERT(op >= SLJIT_BREAKPOINT && op <= SLJIT_NOP); + SLJIT_ASSERT((op >= SLJIT_BREAKPOINT && op <= SLJIT_SMUL) + || ((op & ~SLJIT_INT_OP) >= SLJIT_UDIV && (op & ~SLJIT_INT_OP) <= SLJIT_SDIV)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " %s\n", op_names[op]); + fprintf(compiler->verbose, " %s%s\n", op_names[GET_OPCODE(op)], !(op & SLJIT_INT_OP) ? "" : "i"); #endif } diff --git a/sljit/sljitLir.h b/sljit/sljitLir.h index 6b9832c..8012d71 100644 --- a/sljit/sljitLir.h +++ b/sljit/sljitLir.h @@ -127,9 +127,11 @@ of sljitConfigInternal.h */ #define SLJIT_RETURN_REG SLJIT_TEMPORARY_REG1 -/* x86 prefers temporary registers for special purposes. If other - registers are used such purpose, it costs a little performance - drawback. It doesn't matter for other archs. */ +/* x86 prefers specific registers for special purposes. In case of shift + by register it supports only SLJIT_TEMPORARY_REG3 for shift argument + (which is the src2 argument of sljit_emit_op2). If another register is + used, sljit must exchange data between registers which cause a minor + slowdown. Other architectures has no such limitation. */ #define SLJIT_PREF_SHIFT_REG SLJIT_TEMPORARY_REG3 @@ -370,15 +372,16 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compi */ /* - IMPORATNT NOTE: memory access MUST be naturally aligned. + IMPORATNT NOTE: memory access MUST be naturally aligned except + SLJIT_UNALIGNED macro is defined and its value is 1. + length | alignment ---------+----------- byte | 1 byte (not aligned) half | 2 byte (real_address & 0x1 == 0) int | 4 byte (real_address & 0x3 == 0) - sljit_w | 4 byte if SLJIT_32BIT_ARCHITECTURE defined - | 8 byte if SLJIT_64BIT_ARCHITECTURE defined - (This is a strict requirement for embedded systems.) + sljit_w | 4 byte if SLJIT_32BIT_ARCHITECTURE is defined and its value is 1 + | 8 byte if SLJIT_64BIT_ARCHITECTURE is defined and its value is 1 Note: different architectures have different addressing limitations Thus sljit may generate several instructions for other addressing modes @@ -450,6 +453,24 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compi Note: may or may not cause an extra cycle wait it can even decrease the runtime in a few cases. */ #define SLJIT_NOP 1 +/* Flags: may destroy flags + Unsigned multiplication of SLJIT_TEMPORARY_REG1 and SLJIT_TEMPORARY_REG2. + Result goes to SLJIT_TEMPORARY_REG2:SLJIT_TEMPORARY_REG1 (high:low) word */ +#define SLJIT_UMUL 2 +/* Flags: may destroy flags + Signed multiplication of SLJIT_TEMPORARY_REG1 and SLJIT_TEMPORARY_REG2. + Result goes to SLJIT_TEMPORARY_REG2:SLJIT_TEMPORARY_REG1 (high:low) word */ +#define SLJIT_SMUL 3 +/* Flags: I | may destroy flags + Unsigned divide the value in SLJIT_TEMPORARY_REG1 by SLJIT_TEMPORARY_REG2. + Result goes to SLJIT_TEMPORARY_REG1 and remainder goes to SLJIT_TEMPORARY_REG2. + Note: if SLJIT_TEMPORARY_REG2 is equal to 0, the behaviour is undefined. */ +#define SLJIT_UDIV 4 +/* Flags: I | may destroy flags + Signed divide the value in SLJIT_TEMPORARY_REG1 by SLJIT_TEMPORARY_REG2. + Result goes to SLJIT_TEMPORARY_REG1 and remainder goes to SLJIT_TEMPORARY_REG2. + Note: if SLJIT_TEMPORARY_REG2 is equal to 0, the behaviour is undefined. */ +#define SLJIT_SDIV 5 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op); @@ -462,68 +483,68 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int SH = unsgined half (16 bit) */ /* Flags: - (never set any flags) */ -#define SLJIT_MOV 2 +#define SLJIT_MOV 6 /* Flags: - (never set any flags) */ -#define SLJIT_MOV_UB 3 +#define SLJIT_MOV_UB 7 /* Flags: - (never set any flags) */ -#define SLJIT_MOV_SB 4 +#define SLJIT_MOV_SB 8 /* Flags: - (never set any flags) */ -#define SLJIT_MOV_UH 5 +#define SLJIT_MOV_UH 9 /* Flags: - (never set any flags) */ -#define SLJIT_MOV_SH 6 +#define SLJIT_MOV_SH 10 /* Flags: - (never set any flags) */ -#define SLJIT_MOV_UI 7 +#define SLJIT_MOV_UI 11 /* Flags: - (never set any flags) */ -#define SLJIT_MOV_SI 8 +#define SLJIT_MOV_SI 12 /* Flags: - (never set any flags) */ -#define SLJIT_MOVU 9 +#define SLJIT_MOVU 13 /* Flags: - (never set any flags) */ -#define SLJIT_MOVU_UB 10 +#define SLJIT_MOVU_UB 14 /* Flags: - (never set any flags) */ -#define SLJIT_MOVU_SB 11 +#define SLJIT_MOVU_SB 15 /* Flags: - (never set any flags) */ -#define SLJIT_MOVU_UH 12 +#define SLJIT_MOVU_UH 16 /* Flags: - (never set any flags) */ -#define SLJIT_MOVU_SH 13 +#define SLJIT_MOVU_SH 17 /* Flags: - (never set any flags) */ -#define SLJIT_MOVU_UI 14 +#define SLJIT_MOVU_UI 18 /* Flags: - (never set any flags) */ -#define SLJIT_MOVU_SI 15 +#define SLJIT_MOVU_SI 19 /* Flags: I | E | K */ -#define SLJIT_NOT 16 +#define SLJIT_NOT 20 /* Flags: I | E | O | K */ -#define SLJIT_NEG 17 +#define SLJIT_NEG 21 /* Count leading zeroes Flags: I | E | K */ -#define SLJIT_CLZ 18 +#define SLJIT_CLZ 22 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int src, sljit_w srcw); /* Flags: I | E | O | C | K */ -#define SLJIT_ADD 19 +#define SLJIT_ADD 23 /* Flags: I | C | K */ -#define SLJIT_ADDC 20 +#define SLJIT_ADDC 24 /* Flags: I | E | S | U | O | C | K */ -#define SLJIT_SUB 21 +#define SLJIT_SUB 25 /* Flags: I | C | K */ -#define SLJIT_SUBC 22 +#define SLJIT_SUBC 26 /* Note: integer mul */ /* Flags: I | O (see SLJIT_C_MUL_*) | K */ -#define SLJIT_MUL 23 +#define SLJIT_MUL 27 /* Flags: I | E | K */ -#define SLJIT_AND 24 +#define SLJIT_AND 28 /* Flags: I | E | K */ -#define SLJIT_OR 25 +#define SLJIT_OR 29 /* Flags: I | E | K */ -#define SLJIT_XOR 26 +#define SLJIT_XOR 30 /* Flags: I | E | K */ -#define SLJIT_SHL 27 +#define SLJIT_SHL 31 /* Flags: I | E | K */ -#define SLJIT_LSHR 28 +#define SLJIT_LSHR 32 /* Flags: I | E | K */ -#define SLJIT_ASHR 29 +#define SLJIT_ASHR 33 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, @@ -560,26 +581,26 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void); Note: NaN check is always performed. If SLJIT_C_FLOAT_NAN is set, the comparison result is unpredictable. Flags: E | S (see SLJIT_C_FLOAT_*) */ -#define SLJIT_FCMP 30 +#define SLJIT_FCMP 34 /* Flags: - (never set any flags) */ -#define SLJIT_FMOV 31 +#define SLJIT_FMOV 35 /* Flags: - (never set any flags) */ -#define SLJIT_FNEG 32 +#define SLJIT_FNEG 36 /* Flags: - (never set any flags) */ -#define SLJIT_FABS 33 +#define SLJIT_FABS 37 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int src, sljit_w srcw); /* Flags: - (never set any flags) */ -#define SLJIT_FADD 34 +#define SLJIT_FADD 38 /* Flags: - (never set any flags) */ -#define SLJIT_FSUB 35 +#define SLJIT_FSUB 39 /* Flags: - (never set any flags) */ -#define SLJIT_FMUL 36 +#define SLJIT_FMUL 40 /* Flags: - (never set any flags) */ -#define SLJIT_FDIV 37 +#define SLJIT_FDIV 41 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, diff --git a/sljit/sljitNativeARM_Thumb2.c b/sljit/sljitNativeARM_Thumb2.c index 8e5d596..f12c62a 100644 --- a/sljit/sljitNativeARM_Thumb2.c +++ b/sljit/sljitNativeARM_Thumb2.c @@ -38,7 +38,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() #define TMP_FREG1 (SLJIT_FLOAT_REG4 + 1) #define TMP_FREG2 (SLJIT_FLOAT_REG4 + 2) -/* See sljit_emit_enter if you want to change them. */ +/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { 0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15 }; @@ -158,6 +158,7 @@ typedef sljit_ui sljit_ins; #define SXTH 0xb200 #define SXTH_W 0xfa0ff080 #define TST 0x4200 +#define UMULL 0xfba00000 #define UXTB 0xb2c0 #define UXTB_W 0xfa5ff080 #define UXTH 0xb280 @@ -1188,6 +1189,21 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, /* Operators */ /* --------------------------------------------------------------------- */ +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__GNUC__) +extern unsigned int __aeabi_uidivmod(unsigned numerator, unsigned denominator); +extern unsigned int __aeabi_idivmod(unsigned numerator, unsigned denominator); +#else +#error "Software divmod functions are needed" +#endif + +#ifdef __cplusplus +} +#endif + SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op) { CHECK_ERROR(); @@ -1201,6 +1217,25 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int case SLJIT_NOP: push_inst16(compiler, NOP); break; + case SLJIT_UMUL: + case SLJIT_SMUL: + return push_inst32(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) + | (reg_map[SLJIT_TEMPORARY_REG2] << 8) + | (reg_map[SLJIT_TEMPORARY_REG1] << 12) + | (reg_map[SLJIT_TEMPORARY_REG1] << 16) + | reg_map[SLJIT_TEMPORARY_REG2]); + case SLJIT_UDIV: + case SLJIT_SDIV: + FAIL_IF(push_inst32(compiler, 0xf84d2d04 /* str r2, [sp, #-4]! */)); + FAIL_IF(push_inst32(compiler, 0xf84dcd04 /* str ip, [sp, #-4]! */)); +#if defined(__GNUC__) + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); +#else +#error "Software divmod functions are needed" +#endif + FAIL_IF(push_inst32(compiler, 0xf85dcb04 /* ldr ip, [sp], #4 */)); + return push_inst32(compiler, 0xf85d2b04 /* ldr r2, [sp], #4 */); } return SLJIT_SUCCESS; diff --git a/sljit/sljitNativeARM_v5.c b/sljit/sljitNativeARM_v5.c index 73ed98e..27a2fbd 100644 --- a/sljit/sljitNativeARM_v5.c +++ b/sljit/sljitNativeARM_v5.c @@ -54,7 +54,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() #define MAX_DIFFERENCE(max_diff) \ (((max_diff) / (int)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) -/* See sljit_emit_enter if you want to change them. */ +/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { 0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15 }; @@ -84,7 +84,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { #define BX 0xe12fff10 #define CLZ 0xe16f0f10 #define CMP_DP 0xa -#define DEBUGGER 0xe1200070 +#define BKPT 0xe1200070 #define EOR_DP 0x1 #define MOV_DP 0xd #define MUL 0xe0000090 @@ -98,6 +98,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { #define SBC_DP 0x6 #define SMULL 0xe0c00090 #define SUB_DP 0x2 +#define UMULL 0xe0800090 #define VABS_F64 0xeeb00bc0 #define VADD_F64 0xee300b00 #define VCMP_F64 0xeeb40b40 @@ -1755,6 +1756,21 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, return SLJIT_SUCCESS; } +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__GNUC__) +extern unsigned int __aeabi_uidivmod(unsigned numerator, unsigned denominator); +extern unsigned int __aeabi_idivmod(unsigned numerator, unsigned denominator); +#else +#error "Software divmod functions are needed" +#endif + +#ifdef __cplusplus +} +#endif + SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op) { CHECK_ERROR(); @@ -1763,11 +1779,37 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op = GET_OPCODE(op); switch (op) { case SLJIT_BREAKPOINT: - EMIT_INSTRUCTION(DEBUGGER); + EMIT_INSTRUCTION(BKPT); break; case SLJIT_NOP: EMIT_INSTRUCTION(NOP); break; + case SLJIT_UMUL: + case SLJIT_SMUL: +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) + | (reg_map[SLJIT_TEMPORARY_REG2] << 16) + | (reg_map[SLJIT_TEMPORARY_REG1] << 12) + | (reg_map[SLJIT_TEMPORARY_REG1] << 8) + | reg_map[SLJIT_TEMPORARY_REG2]); +#else + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG2))); + return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) + | (reg_map[SLJIT_TEMPORARY_REG2] << 16) + | (reg_map[SLJIT_TEMPORARY_REG1] << 12) + | (reg_map[SLJIT_TEMPORARY_REG1] << 8) + | reg_map[TMP_REG1]); +#endif + case SLJIT_UDIV: + case SLJIT_SDIV: + EMIT_INSTRUCTION(0xe52d2008 /* str r2, [sp, #-8]! */); +#if defined(__GNUC__) + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); +#else +#error "Software divmod functions are needed" +#endif + return push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */); } return SLJIT_SUCCESS; diff --git a/sljit/sljitNativeMIPS_common.c b/sljit/sljitNativeMIPS_common.c index 9617ba4..8b5bc1b 100644 --- a/sljit/sljitNativeMIPS_common.c +++ b/sljit/sljitNativeMIPS_common.c @@ -110,6 +110,8 @@ typedef sljit_ui sljit_ins; #define C_UN_D (HI(17) | FMT_D | LO(49)) #define C_UEQ_D (HI(17) | FMT_D | LO(51)) #define C_ULT_D (HI(17) | FMT_D | LO(53)) +#define DIV (HI(0) | LO(26)) +#define DIVU (HI(0) | LO(27)) #define DIV_D (HI(17) | FMT_D | LO(3)) #define J (HI(2)) #define JAL (HI(3)) @@ -128,6 +130,7 @@ typedef sljit_ui sljit_ins; #define MOVZ (HI(0) | LO(10)) #define MUL_D (HI(17) | FMT_D | LO(2)) #define MULT (HI(0) | LO(24)) +#define MULTU (HI(0) | LO(25)) #define NOP (HI(0) | LO(0)) #define NOR (HI(0) | LO(39)) #define OR (HI(0) | LO(37)) @@ -929,6 +932,20 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int return push_inst(compiler, BREAK, UNMOVABLE_INS); case SLJIT_NOP: return push_inst(compiler, NOP, UNMOVABLE_INS); + case SLJIT_UMUL: + case SLJIT_SMUL: + FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? MULTU : MULT) | S(SLJIT_TEMPORARY_REG1) | T(SLJIT_TEMPORARY_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_TEMPORARY_REG1), DR(SLJIT_TEMPORARY_REG1))); + return push_inst(compiler, MFHI | D(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2)); + case SLJIT_UDIV: + case SLJIT_SDIV: +#if !(defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_TEMPORARY_REG1) | T(SLJIT_TEMPORARY_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_TEMPORARY_REG1), DR(SLJIT_TEMPORARY_REG1))); + return push_inst(compiler, MFHI | D(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2)); } return SLJIT_SUCCESS; diff --git a/sljit/sljitNativePPC_32.c b/sljit/sljitNativePPC_32.c index b990611..82be28c 100644 --- a/sljit/sljitNativePPC_32.c +++ b/sljit/sljitNativePPC_32.c @@ -31,6 +31,9 @@ static int load_immediate(struct sljit_compiler *compiler, int reg, sljit_w imm) if (imm <= SIMM_MAX && imm >= SIMM_MIN) return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | S(ZERO_REG) | A(reg) | IMM(imm)); + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; } diff --git a/sljit/sljitNativePPC_64.c b/sljit/sljitNativePPC_64.c index ffb3ae0..e35e862 100644 --- a/sljit/sljitNativePPC_64.c +++ b/sljit/sljitNativePPC_64.c @@ -49,6 +49,9 @@ static int load_immediate(struct sljit_compiler *compiler, int reg, sljit_w imm) if (imm <= SIMM_MAX && imm >= SIMM_MIN) return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | S(ZERO_REG) | A(reg) | IMM(imm)); + if (imm <= SLJIT_W(0x7fffffff) && imm >= SLJIT_W(-0x80000000)) { FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; diff --git a/sljit/sljitNativePPC_common.c b/sljit/sljitNativePPC_common.c index 03c28ed..ee8e173 100644 --- a/sljit/sljitNativePPC_common.c +++ b/sljit/sljitNativePPC_common.c @@ -101,6 +101,10 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #define CMPL (HI(31) | LO(32)) #define CMPLI (HI(10)) #define CROR (HI(19) | LO(449)) +#define DIVD (HI(31) | LO(489)) +#define DIVDU (HI(31) | LO(457)) +#define DIVW (HI(31) | LO(491)) +#define DIVWU (HI(31) | LO(459)) #define EXTSB (HI(31) | LO(954)) #define EXTSH (HI(31) | LO(922)) #define EXTSW (HI(31) | LO(986)) @@ -123,6 +127,10 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #define MTCTR (HI(31) | LO(467) | 0x90000) #define MTLR (HI(31) | LO(467) | 0x80000) #define MTXER (HI(31) | LO(467) | 0x10000) +#define MULHD (HI(31) | LO(73)) +#define MULHDU (HI(31) | LO(9)) +#define MULHW (HI(31) | LO(75)) +#define MULHWU (HI(31) | LO(11)) #define MULLD (HI(31) | LO(233)) #define MULLI (HI(7)) #define MULLW (HI(31) | LO(235)) @@ -1028,12 +1036,38 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int CHECK_ERROR(); check_sljit_emit_op0(compiler, op); - op = GET_OPCODE(op); - switch (op) { + switch (GET_OPCODE(op)) { case SLJIT_BREAKPOINT: case SLJIT_NOP: return push_inst(compiler, NOP); break; + case SLJIT_UMUL: + case SLJIT_SMUL: + FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG1))); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2))); + return push_inst(compiler, (GET_OPCODE(op) == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_TEMPORARY_REG2) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2)); +#else + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2))); + return push_inst(compiler, (GET_OPCODE(op) == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_TEMPORARY_REG2) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2)); +#endif + case SLJIT_UDIV: + case SLJIT_SDIV: + FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG1))); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_INT_OP) { + FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2))); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG1) | B(SLJIT_TEMPORARY_REG2))); + return push_inst(compiler, SUBF | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG2) | B(TMP_REG1)); + } + FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2))); + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG1) | B(SLJIT_TEMPORARY_REG2))); + return push_inst(compiler, SUBF | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG2) | B(TMP_REG1)); +#else + FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2))); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG1) | B(SLJIT_TEMPORARY_REG2))); + return push_inst(compiler, SUBF | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG2) | B(TMP_REG1)); +#endif } return SLJIT_SUCCESS; diff --git a/sljit/sljitNativeX86_64.c b/sljit/sljitNativeX86_64.c index 1a35d82..7a1e28e 100644 --- a/sljit/sljitNativeX86_64.c +++ b/sljit/sljitNativeX86_64.c @@ -821,33 +821,24 @@ static int emit_mov_int(struct sljit_compiler *compiler, int sign, code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); FAIL_IF(!code); *code++ = 0x63; - } - else { - if (dst_r == src) { - compiler->mode32 = 1; - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, 0); - FAIL_IF(!code); - *code++ = 0x8b; - compiler->mode32 = 0; - } + } else if (dst_r == src) { + compiler->mode32 = 0; + code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 32, dst, 0); + FAIL_IF(!code); + *code |= 0x4 << 3; + code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 32, dst, 0); + FAIL_IF(!code); + *code |= 0x7 << 3; + } else { /* xor reg, reg. */ code = emit_x86_instruction(compiler, 1, dst_r, 0, dst_r, 0); FAIL_IF(!code); *code++ = 0x33; - if (dst_r != src) { - compiler->mode32 = 1; - code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); - FAIL_IF(!code); - *code++ = 0x8b; - compiler->mode32 = 0; - } - else { - compiler->mode32 = 1; - code = emit_x86_instruction(compiler, 1, src, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code++ = 0x8b; - compiler->mode32 = 0; - } + compiler->mode32 = 1; + code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); + FAIL_IF(!code); + *code++ = 0x8b; + compiler->mode32 = 0; } } diff --git a/sljit/sljitNativeX86_common.c b/sljit/sljitNativeX86_common.c index e44b815..7b1efe0 100644 --- a/sljit/sljitNativeX86_common.c +++ b/sljit/sljitNativeX86_common.c @@ -474,32 +474,6 @@ static void SLJIT_CALL sljit_touch_stack(sljit_w local_size) #include "sljitNativeX86_64.c" #endif -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op) -{ - sljit_ub *buf; - - CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); - - op = GET_OPCODE(op); - switch (op) { - case SLJIT_BREAKPOINT: - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); - INC_SIZE(1); - *buf = 0xcc; - break; - case SLJIT_NOP: - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); - INC_SIZE(1); - *buf = 0x90; - break; - } - - return SLJIT_SUCCESS; -} - static int emit_mov(struct sljit_compiler *compiler, int dst, sljit_w dstw, int src, sljit_w srcw) @@ -568,6 +542,150 @@ static int emit_mov(struct sljit_compiler *compiler, #define EMIT_MOV(compiler, dst, dstw, src, srcw) \ FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op) +{ + sljit_ub *buf; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + int size; +#endif + + CHECK_ERROR(); + check_sljit_emit_op0(compiler, op); + + switch (GET_OPCODE(op)) { + case SLJIT_BREAKPOINT: + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!buf); + INC_SIZE(1); + *buf = 0xcc; + break; + case SLJIT_NOP: + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!buf); + INC_SIZE(1); + *buf = 0x90; + break; + case SLJIT_UMUL: + case SLJIT_SMUL: + case SLJIT_UDIV: + case SLJIT_SDIV: + compiler->flags_saved = 0; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#ifdef _WIN64 + SLJIT_COMPILE_ASSERT( + reg_map[SLJIT_TEMPORARY_REG1] == 0 + && reg_map[SLJIT_TEMPORARY_REG2] == 2 + && reg_map[TMP_REGISTER] > 7, + invalid_register_assignment_for_div_mul); +#else + SLJIT_COMPILE_ASSERT( + reg_map[SLJIT_TEMPORARY_REG1] == 0 + && reg_map[SLJIT_TEMPORARY_REG2] < 7 + && reg_map[TMP_REGISTER] == 2, + invalid_register_assignment_for_div_mul); +#endif + compiler->mode32 = op & SLJIT_INT_OP; +#endif + + op = GET_OPCODE(op); + if (op == SLJIT_UDIV) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) + EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0); + buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0); +#else + buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); +#endif + FAIL_IF(!buf); + *buf = 0x33; + } + + if (op == SLJIT_SDIV) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) + EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0); + EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG1, 0); +#else + EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0); +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + buf = (sljit_ub*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!buf); + INC_SIZE(3); + *buf++ = 0xc1; + *buf++ = 0xfa; + *buf = 0x1f; +#else + if (compiler->mode32) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!buf); + INC_SIZE(3); + *buf++ = 0xc1; + *buf++ = 0xfa; + *buf = 0x1f; + } else { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!buf); + INC_SIZE(4); + *buf++ = REX_W; + *buf++ = 0xc1; + *buf++ = 0xfa; + *buf = 0x3f; + } +#endif + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + buf = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!buf); + INC_SIZE(2); + *buf++ = 0xf7; + *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]); +#else +#ifdef _WIN64 + size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2; +#else + size = (!compiler->mode32) ? 3 : 2; +#endif + buf = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!buf); + INC_SIZE(size); +#ifdef _WIN64 + if (!compiler->mode32) + *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0); + else if (op >= SLJIT_UDIV) + *buf++ = REX_B; + *buf++ = 0xf7; + *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]); +#else + if (!compiler->mode32) + *buf++ = REX_W; + *buf++ = 0xf7; + *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2]; +#endif +#endif + switch (op) { + case SLJIT_UMUL: + *buf |= 4 << 3; + break; + case SLJIT_SMUL: + *buf |= 5 << 3; + break; + case SLJIT_UDIV: + *buf |= 6 << 3; + break; + case SLJIT_SDIV: + *buf |= 7 << 3; + break; + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) + EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0); +#endif + break; + } + + return SLJIT_SUCCESS; +} + #define ENCODE_PREFIX(prefix) \ do { \ code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \ |