summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pcre_jit_compile.c18
-rw-r--r--pcretest.c2
-rw-r--r--sljit/sljitLir.c6
-rw-r--r--sljit/sljitLir.h107
-rw-r--r--sljit/sljitNativeARM_Thumb2.c37
-rw-r--r--sljit/sljitNativeARM_v5.c48
-rw-r--r--sljit/sljitNativeMIPS_common.c17
-rw-r--r--sljit/sljitNativePPC_32.c3
-rw-r--r--sljit/sljitNativePPC_64.c3
-rw-r--r--sljit/sljitNativePPC_common.c38
-rw-r--r--sljit/sljitNativeX86_64.c37
-rw-r--r--sljit/sljitNativeX86_common.c170
12 files changed, 376 insertions, 110 deletions
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index db3485e..fbf2368 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -322,7 +322,7 @@ typedef struct compare_context {
int ucharptr;
union {
sljit_i asint;
- sljit_h asshort;
+ sljit_uh asushort;
#ifdef COMPILE_PCRE8
sljit_ub asbyte;
sljit_ub asuchars[4];
@@ -334,7 +334,7 @@ typedef struct compare_context {
} c;
union {
sljit_i asint;
- sljit_h asshort;
+ sljit_uh asushort;
#ifdef COMPILE_PCRE8
sljit_ub asbyte;
sljit_ub asuchars[4];
@@ -2493,7 +2493,7 @@ if (context->sourcereg == -1)
if (context->length >= 4)
OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
else if (context->length >= 2)
- OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+ OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
else
#endif
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
@@ -2504,7 +2504,7 @@ if (context->sourcereg == -1)
OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
else
#endif
- OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+ OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
#endif
#endif /* COMPILE_PCRE8 */
context->sourcereg = TMP2;
@@ -2545,12 +2545,12 @@ do
OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
#ifdef COMPILE_PCRE8
else if (context->length >= 2)
- OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+ OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
else if (context->length >= 1)
OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
#else
else if (context->length >= 2)
- OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+ OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
#endif
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
@@ -2563,9 +2563,9 @@ do
break;
case 2 / sizeof(pcre_uchar):
- if (context->oc.asshort != 0)
- OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asshort);
- add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asshort | context->oc.asshort));
+ if (context->oc.asushort != 0)
+ OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
+ add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
break;
#ifdef COMPILE_PCRE8
diff --git a/pcretest.c b/pcretest.c
index 0b5f4b2..a59ab0c 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -2491,7 +2491,7 @@ while (!done)
pcre_uint8 *p, *pp, *ppp;
pcre_uint8 *to_file = NULL;
const pcre_uint8 *tables = NULL;
- pcre_uint32 get_options;
+ unsigned long int get_options;
unsigned long int true_size, true_study_size = 0;
size_t size, regex_gotten_store;
int do_allcaps = 0;
diff --git a/sljit/sljitLir.c b/sljit/sljitLir.c
index f42e26c..1c44f3b 100644
--- a/sljit/sljitLir.c
+++ b/sljit/sljitLir.c
@@ -616,6 +616,7 @@ static char* freg_names[] = {
static SLJIT_CONST char* op_names[] = {
/* op0 */
(char*)"breakpoint", (char*)"nop",
+ (char*)"umul", (char*)"smul", (char*)"udiv", (char*)"sdiv",
/* op1 */
(char*)"mov", (char*)"mov.ub", (char*)"mov.sb", (char*)"mov.uh",
(char*)"mov.sh", (char*)"mov.ui", (char*)"mov.si", (char*)"movu",
@@ -793,10 +794,11 @@ static SLJIT_INLINE void check_sljit_emit_op0(struct sljit_compiler *compiler, i
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(op);
- SLJIT_ASSERT(op >= SLJIT_BREAKPOINT && op <= SLJIT_NOP);
+ SLJIT_ASSERT((op >= SLJIT_BREAKPOINT && op <= SLJIT_SMUL)
+ || ((op & ~SLJIT_INT_OP) >= SLJIT_UDIV && (op & ~SLJIT_INT_OP) <= SLJIT_SDIV));
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose))
- fprintf(compiler->verbose, " %s\n", op_names[op]);
+ fprintf(compiler->verbose, " %s%s\n", op_names[GET_OPCODE(op)], !(op & SLJIT_INT_OP) ? "" : "i");
#endif
}
diff --git a/sljit/sljitLir.h b/sljit/sljitLir.h
index 6b9832c..8012d71 100644
--- a/sljit/sljitLir.h
+++ b/sljit/sljitLir.h
@@ -127,9 +127,11 @@ of sljitConfigInternal.h */
#define SLJIT_RETURN_REG SLJIT_TEMPORARY_REG1
-/* x86 prefers temporary registers for special purposes. If other
- registers are used such purpose, it costs a little performance
- drawback. It doesn't matter for other archs. */
+/* x86 prefers specific registers for special purposes. In case of shift
+ by register it supports only SLJIT_TEMPORARY_REG3 for shift argument
+ (which is the src2 argument of sljit_emit_op2). If another register is
+ used, sljit must exchange data between registers which cause a minor
+ slowdown. Other architectures has no such limitation. */
#define SLJIT_PREF_SHIFT_REG SLJIT_TEMPORARY_REG3
@@ -370,15 +372,16 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compi
*/
/*
- IMPORATNT NOTE: memory access MUST be naturally aligned.
+ IMPORATNT NOTE: memory access MUST be naturally aligned except
+ SLJIT_UNALIGNED macro is defined and its value is 1.
+
length | alignment
---------+-----------
byte | 1 byte (not aligned)
half | 2 byte (real_address & 0x1 == 0)
int | 4 byte (real_address & 0x3 == 0)
- sljit_w | 4 byte if SLJIT_32BIT_ARCHITECTURE defined
- | 8 byte if SLJIT_64BIT_ARCHITECTURE defined
- (This is a strict requirement for embedded systems.)
+ sljit_w | 4 byte if SLJIT_32BIT_ARCHITECTURE is defined and its value is 1
+ | 8 byte if SLJIT_64BIT_ARCHITECTURE is defined and its value is 1
Note: different architectures have different addressing limitations
Thus sljit may generate several instructions for other addressing modes
@@ -450,6 +453,24 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compi
Note: may or may not cause an extra cycle wait
it can even decrease the runtime in a few cases. */
#define SLJIT_NOP 1
+/* Flags: may destroy flags
+ Unsigned multiplication of SLJIT_TEMPORARY_REG1 and SLJIT_TEMPORARY_REG2.
+ Result goes to SLJIT_TEMPORARY_REG2:SLJIT_TEMPORARY_REG1 (high:low) word */
+#define SLJIT_UMUL 2
+/* Flags: may destroy flags
+ Signed multiplication of SLJIT_TEMPORARY_REG1 and SLJIT_TEMPORARY_REG2.
+ Result goes to SLJIT_TEMPORARY_REG2:SLJIT_TEMPORARY_REG1 (high:low) word */
+#define SLJIT_SMUL 3
+/* Flags: I | may destroy flags
+ Unsigned divide the value in SLJIT_TEMPORARY_REG1 by SLJIT_TEMPORARY_REG2.
+ Result goes to SLJIT_TEMPORARY_REG1 and remainder goes to SLJIT_TEMPORARY_REG2.
+ Note: if SLJIT_TEMPORARY_REG2 is equal to 0, the behaviour is undefined. */
+#define SLJIT_UDIV 4
+/* Flags: I | may destroy flags
+ Signed divide the value in SLJIT_TEMPORARY_REG1 by SLJIT_TEMPORARY_REG2.
+ Result goes to SLJIT_TEMPORARY_REG1 and remainder goes to SLJIT_TEMPORARY_REG2.
+ Note: if SLJIT_TEMPORARY_REG2 is equal to 0, the behaviour is undefined. */
+#define SLJIT_SDIV 5
SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op);
@@ -462,68 +483,68 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int
SH = unsgined half (16 bit) */
/* Flags: - (never set any flags) */
-#define SLJIT_MOV 2
+#define SLJIT_MOV 6
/* Flags: - (never set any flags) */
-#define SLJIT_MOV_UB 3
+#define SLJIT_MOV_UB 7
/* Flags: - (never set any flags) */
-#define SLJIT_MOV_SB 4
+#define SLJIT_MOV_SB 8
/* Flags: - (never set any flags) */
-#define SLJIT_MOV_UH 5
+#define SLJIT_MOV_UH 9
/* Flags: - (never set any flags) */
-#define SLJIT_MOV_SH 6
+#define SLJIT_MOV_SH 10
/* Flags: - (never set any flags) */
-#define SLJIT_MOV_UI 7
+#define SLJIT_MOV_UI 11
/* Flags: - (never set any flags) */
-#define SLJIT_MOV_SI 8
+#define SLJIT_MOV_SI 12
/* Flags: - (never set any flags) */
-#define SLJIT_MOVU 9
+#define SLJIT_MOVU 13
/* Flags: - (never set any flags) */
-#define SLJIT_MOVU_UB 10
+#define SLJIT_MOVU_UB 14
/* Flags: - (never set any flags) */
-#define SLJIT_MOVU_SB 11
+#define SLJIT_MOVU_SB 15
/* Flags: - (never set any flags) */
-#define SLJIT_MOVU_UH 12
+#define SLJIT_MOVU_UH 16
/* Flags: - (never set any flags) */
-#define SLJIT_MOVU_SH 13
+#define SLJIT_MOVU_SH 17
/* Flags: - (never set any flags) */
-#define SLJIT_MOVU_UI 14
+#define SLJIT_MOVU_UI 18
/* Flags: - (never set any flags) */
-#define SLJIT_MOVU_SI 15
+#define SLJIT_MOVU_SI 19
/* Flags: I | E | K */
-#define SLJIT_NOT 16
+#define SLJIT_NOT 20
/* Flags: I | E | O | K */
-#define SLJIT_NEG 17
+#define SLJIT_NEG 21
/* Count leading zeroes
Flags: I | E | K */
-#define SLJIT_CLZ 18
+#define SLJIT_CLZ 22
SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
int dst, sljit_w dstw,
int src, sljit_w srcw);
/* Flags: I | E | O | C | K */
-#define SLJIT_ADD 19
+#define SLJIT_ADD 23
/* Flags: I | C | K */
-#define SLJIT_ADDC 20
+#define SLJIT_ADDC 24
/* Flags: I | E | S | U | O | C | K */
-#define SLJIT_SUB 21
+#define SLJIT_SUB 25
/* Flags: I | C | K */
-#define SLJIT_SUBC 22
+#define SLJIT_SUBC 26
/* Note: integer mul */
/* Flags: I | O (see SLJIT_C_MUL_*) | K */
-#define SLJIT_MUL 23
+#define SLJIT_MUL 27
/* Flags: I | E | K */
-#define SLJIT_AND 24
+#define SLJIT_AND 28
/* Flags: I | E | K */
-#define SLJIT_OR 25
+#define SLJIT_OR 29
/* Flags: I | E | K */
-#define SLJIT_XOR 26
+#define SLJIT_XOR 30
/* Flags: I | E | K */
-#define SLJIT_SHL 27
+#define SLJIT_SHL 31
/* Flags: I | E | K */
-#define SLJIT_LSHR 28
+#define SLJIT_LSHR 32
/* Flags: I | E | K */
-#define SLJIT_ASHR 29
+#define SLJIT_ASHR 33
SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
int dst, sljit_w dstw,
@@ -560,26 +581,26 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void);
Note: NaN check is always performed. If SLJIT_C_FLOAT_NAN is set,
the comparison result is unpredictable.
Flags: E | S (see SLJIT_C_FLOAT_*) */
-#define SLJIT_FCMP 30
+#define SLJIT_FCMP 34
/* Flags: - (never set any flags) */
-#define SLJIT_FMOV 31
+#define SLJIT_FMOV 35
/* Flags: - (never set any flags) */
-#define SLJIT_FNEG 32
+#define SLJIT_FNEG 36
/* Flags: - (never set any flags) */
-#define SLJIT_FABS 33
+#define SLJIT_FABS 37
SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
int dst, sljit_w dstw,
int src, sljit_w srcw);
/* Flags: - (never set any flags) */
-#define SLJIT_FADD 34
+#define SLJIT_FADD 38
/* Flags: - (never set any flags) */
-#define SLJIT_FSUB 35
+#define SLJIT_FSUB 39
/* Flags: - (never set any flags) */
-#define SLJIT_FMUL 36
+#define SLJIT_FMUL 40
/* Flags: - (never set any flags) */
-#define SLJIT_FDIV 37
+#define SLJIT_FDIV 41
SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
int dst, sljit_w dstw,
diff --git a/sljit/sljitNativeARM_Thumb2.c b/sljit/sljitNativeARM_Thumb2.c
index 8e5d596..f12c62a 100644
--- a/sljit/sljitNativeARM_Thumb2.c
+++ b/sljit/sljitNativeARM_Thumb2.c
@@ -38,7 +38,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
#define TMP_FREG1 (SLJIT_FLOAT_REG4 + 1)
#define TMP_FREG2 (SLJIT_FLOAT_REG4 + 2)
-/* See sljit_emit_enter if you want to change them. */
+/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15
};
@@ -158,6 +158,7 @@ typedef sljit_ui sljit_ins;
#define SXTH 0xb200
#define SXTH_W 0xfa0ff080
#define TST 0x4200
+#define UMULL 0xfba00000
#define UXTB 0xb2c0
#define UXTB_W 0xfa5ff080
#define UXTH 0xb280
@@ -1188,6 +1189,21 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler,
/* Operators */
/* --------------------------------------------------------------------- */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__GNUC__)
+extern unsigned int __aeabi_uidivmod(unsigned numerator, unsigned denominator);
+extern unsigned int __aeabi_idivmod(unsigned numerator, unsigned denominator);
+#else
+#error "Software divmod functions are needed"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
{
CHECK_ERROR();
@@ -1201,6 +1217,25 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int
case SLJIT_NOP:
push_inst16(compiler, NOP);
break;
+ case SLJIT_UMUL:
+ case SLJIT_SMUL:
+ return push_inst32(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
+ | (reg_map[SLJIT_TEMPORARY_REG2] << 8)
+ | (reg_map[SLJIT_TEMPORARY_REG1] << 12)
+ | (reg_map[SLJIT_TEMPORARY_REG1] << 16)
+ | reg_map[SLJIT_TEMPORARY_REG2]);
+ case SLJIT_UDIV:
+ case SLJIT_SDIV:
+ FAIL_IF(push_inst32(compiler, 0xf84d2d04 /* str r2, [sp, #-4]! */));
+ FAIL_IF(push_inst32(compiler, 0xf84dcd04 /* str ip, [sp, #-4]! */));
+#if defined(__GNUC__)
+ FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
+ (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
+#else
+#error "Software divmod functions are needed"
+#endif
+ FAIL_IF(push_inst32(compiler, 0xf85dcb04 /* ldr ip, [sp], #4 */));
+ return push_inst32(compiler, 0xf85d2b04 /* ldr r2, [sp], #4 */);
}
return SLJIT_SUCCESS;
diff --git a/sljit/sljitNativeARM_v5.c b/sljit/sljitNativeARM_v5.c
index 73ed98e..27a2fbd 100644
--- a/sljit/sljitNativeARM_v5.c
+++ b/sljit/sljitNativeARM_v5.c
@@ -54,7 +54,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
#define MAX_DIFFERENCE(max_diff) \
(((max_diff) / (int)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))
-/* See sljit_emit_enter if you want to change them. */
+/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15
};
@@ -84,7 +84,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
#define BX 0xe12fff10
#define CLZ 0xe16f0f10
#define CMP_DP 0xa
-#define DEBUGGER 0xe1200070
+#define BKPT 0xe1200070
#define EOR_DP 0x1
#define MOV_DP 0xd
#define MUL 0xe0000090
@@ -98,6 +98,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
#define SBC_DP 0x6
#define SMULL 0xe0c00090
#define SUB_DP 0x2
+#define UMULL 0xe0800090
#define VABS_F64 0xeeb00bc0
#define VADD_F64 0xee300b00
#define VCMP_F64 0xeeb40b40
@@ -1755,6 +1756,21 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags,
return SLJIT_SUCCESS;
}
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__GNUC__)
+extern unsigned int __aeabi_uidivmod(unsigned numerator, unsigned denominator);
+extern unsigned int __aeabi_idivmod(unsigned numerator, unsigned denominator);
+#else
+#error "Software divmod functions are needed"
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
{
CHECK_ERROR();
@@ -1763,11 +1779,37 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int
op = GET_OPCODE(op);
switch (op) {
case SLJIT_BREAKPOINT:
- EMIT_INSTRUCTION(DEBUGGER);
+ EMIT_INSTRUCTION(BKPT);
break;
case SLJIT_NOP:
EMIT_INSTRUCTION(NOP);
break;
+ case SLJIT_UMUL:
+ case SLJIT_SMUL:
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+ return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
+ | (reg_map[SLJIT_TEMPORARY_REG2] << 16)
+ | (reg_map[SLJIT_TEMPORARY_REG1] << 12)
+ | (reg_map[SLJIT_TEMPORARY_REG1] << 8)
+ | reg_map[SLJIT_TEMPORARY_REG2]);
+#else
+ EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG2)));
+ return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
+ | (reg_map[SLJIT_TEMPORARY_REG2] << 16)
+ | (reg_map[SLJIT_TEMPORARY_REG1] << 12)
+ | (reg_map[SLJIT_TEMPORARY_REG1] << 8)
+ | reg_map[TMP_REG1]);
+#endif
+ case SLJIT_UDIV:
+ case SLJIT_SDIV:
+ EMIT_INSTRUCTION(0xe52d2008 /* str r2, [sp, #-8]! */);
+#if defined(__GNUC__)
+ FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
+ (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
+#else
+#error "Software divmod functions are needed"
+#endif
+ return push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */);
}
return SLJIT_SUCCESS;
diff --git a/sljit/sljitNativeMIPS_common.c b/sljit/sljitNativeMIPS_common.c
index 9617ba4..8b5bc1b 100644
--- a/sljit/sljitNativeMIPS_common.c
+++ b/sljit/sljitNativeMIPS_common.c
@@ -110,6 +110,8 @@ typedef sljit_ui sljit_ins;
#define C_UN_D (HI(17) | FMT_D | LO(49))
#define C_UEQ_D (HI(17) | FMT_D | LO(51))
#define C_ULT_D (HI(17) | FMT_D | LO(53))
+#define DIV (HI(0) | LO(26))
+#define DIVU (HI(0) | LO(27))
#define DIV_D (HI(17) | FMT_D | LO(3))
#define J (HI(2))
#define JAL (HI(3))
@@ -128,6 +130,7 @@ typedef sljit_ui sljit_ins;
#define MOVZ (HI(0) | LO(10))
#define MUL_D (HI(17) | FMT_D | LO(2))
#define MULT (HI(0) | LO(24))
+#define MULTU (HI(0) | LO(25))
#define NOP (HI(0) | LO(0))
#define NOR (HI(0) | LO(39))
#define OR (HI(0) | LO(37))
@@ -929,6 +932,20 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int
return push_inst(compiler, BREAK, UNMOVABLE_INS);
case SLJIT_NOP:
return push_inst(compiler, NOP, UNMOVABLE_INS);
+ case SLJIT_UMUL:
+ case SLJIT_SMUL:
+ FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? MULTU : MULT) | S(SLJIT_TEMPORARY_REG1) | T(SLJIT_TEMPORARY_REG2), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_TEMPORARY_REG1), DR(SLJIT_TEMPORARY_REG1)));
+ return push_inst(compiler, MFHI | D(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2));
+ case SLJIT_UDIV:
+ case SLJIT_SDIV:
+#if !(defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64)
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+ FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+#endif
+ FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_TEMPORARY_REG1) | T(SLJIT_TEMPORARY_REG2), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_TEMPORARY_REG1), DR(SLJIT_TEMPORARY_REG1)));
+ return push_inst(compiler, MFHI | D(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2));
}
return SLJIT_SUCCESS;
diff --git a/sljit/sljitNativePPC_32.c b/sljit/sljitNativePPC_32.c
index b990611..82be28c 100644
--- a/sljit/sljitNativePPC_32.c
+++ b/sljit/sljitNativePPC_32.c
@@ -31,6 +31,9 @@ static int load_immediate(struct sljit_compiler *compiler, int reg, sljit_w imm)
if (imm <= SIMM_MAX && imm >= SIMM_MIN)
return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm));
+ if (!(imm & ~0xffff))
+ return push_inst(compiler, ORI | S(ZERO_REG) | A(reg) | IMM(imm));
+
FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16)));
return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
}
diff --git a/sljit/sljitNativePPC_64.c b/sljit/sljitNativePPC_64.c
index ffb3ae0..e35e862 100644
--- a/sljit/sljitNativePPC_64.c
+++ b/sljit/sljitNativePPC_64.c
@@ -49,6 +49,9 @@ static int load_immediate(struct sljit_compiler *compiler, int reg, sljit_w imm)
if (imm <= SIMM_MAX && imm >= SIMM_MIN)
return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm));
+ if (!(imm & ~0xffff))
+ return push_inst(compiler, ORI | S(ZERO_REG) | A(reg) | IMM(imm));
+
if (imm <= SLJIT_W(0x7fffffff) && imm >= SLJIT_W(-0x80000000)) {
FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16)));
return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
diff --git a/sljit/sljitNativePPC_common.c b/sljit/sljitNativePPC_common.c
index 03c28ed..ee8e173 100644
--- a/sljit/sljitNativePPC_common.c
+++ b/sljit/sljitNativePPC_common.c
@@ -101,6 +101,10 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
#define CMPL (HI(31) | LO(32))
#define CMPLI (HI(10))
#define CROR (HI(19) | LO(449))
+#define DIVD (HI(31) | LO(489))
+#define DIVDU (HI(31) | LO(457))
+#define DIVW (HI(31) | LO(491))
+#define DIVWU (HI(31) | LO(459))
#define EXTSB (HI(31) | LO(954))
#define EXTSH (HI(31) | LO(922))
#define EXTSW (HI(31) | LO(986))
@@ -123,6 +127,10 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
#define MTCTR (HI(31) | LO(467) | 0x90000)
#define MTLR (HI(31) | LO(467) | 0x80000)
#define MTXER (HI(31) | LO(467) | 0x10000)
+#define MULHD (HI(31) | LO(73))
+#define MULHDU (HI(31) | LO(9))
+#define MULHW (HI(31) | LO(75))
+#define MULHWU (HI(31) | LO(11))
#define MULLD (HI(31) | LO(233))
#define MULLI (HI(7))
#define MULLW (HI(31) | LO(235))
@@ -1028,12 +1036,38 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int
CHECK_ERROR();
check_sljit_emit_op0(compiler, op);
- op = GET_OPCODE(op);
- switch (op) {
+ switch (GET_OPCODE(op)) {
case SLJIT_BREAKPOINT:
case SLJIT_NOP:
return push_inst(compiler, NOP);
break;
+ case SLJIT_UMUL:
+ case SLJIT_SMUL:
+ FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG1)));
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2)));
+ return push_inst(compiler, (GET_OPCODE(op) == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_TEMPORARY_REG2) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2));
+#else
+ FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2)));
+ return push_inst(compiler, (GET_OPCODE(op) == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_TEMPORARY_REG2) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2));
+#endif
+ case SLJIT_UDIV:
+ case SLJIT_SDIV:
+ FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG1)));
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ if (op & SLJIT_INT_OP) {
+ FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2)));
+ FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG1) | B(SLJIT_TEMPORARY_REG2)));
+ return push_inst(compiler, SUBF | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG2) | B(TMP_REG1));
+ }
+ FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2)));
+ FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG1) | B(SLJIT_TEMPORARY_REG2)));
+ return push_inst(compiler, SUBF | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG2) | B(TMP_REG1));
+#else
+ FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2)));
+ FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG1) | B(SLJIT_TEMPORARY_REG2)));
+ return push_inst(compiler, SUBF | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG2) | B(TMP_REG1));
+#endif
}
return SLJIT_SUCCESS;
diff --git a/sljit/sljitNativeX86_64.c b/sljit/sljitNativeX86_64.c
index 1a35d82..7a1e28e 100644
--- a/sljit/sljitNativeX86_64.c
+++ b/sljit/sljitNativeX86_64.c
@@ -821,33 +821,24 @@ static int emit_mov_int(struct sljit_compiler *compiler, int sign,
code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
FAIL_IF(!code);
*code++ = 0x63;
- }
- else {
- if (dst_r == src) {
- compiler->mode32 = 1;
- code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, 0);
- FAIL_IF(!code);
- *code++ = 0x8b;
- compiler->mode32 = 0;
- }
+ } else if (dst_r == src) {
+ compiler->mode32 = 0;
+ code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 32, dst, 0);
+ FAIL_IF(!code);
+ *code |= 0x4 << 3;
+ code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 32, dst, 0);
+ FAIL_IF(!code);
+ *code |= 0x7 << 3;
+ } else {
/* xor reg, reg. */
code = emit_x86_instruction(compiler, 1, dst_r, 0, dst_r, 0);
FAIL_IF(!code);
*code++ = 0x33;
- if (dst_r != src) {
- compiler->mode32 = 1;
- code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
- FAIL_IF(!code);
- *code++ = 0x8b;
- compiler->mode32 = 0;
- }
- else {
- compiler->mode32 = 1;
- code = emit_x86_instruction(compiler, 1, src, 0, TMP_REGISTER, 0);
- FAIL_IF(!code);
- *code++ = 0x8b;
- compiler->mode32 = 0;
- }
+ compiler->mode32 = 1;
+ code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
+ FAIL_IF(!code);
+ *code++ = 0x8b;
+ compiler->mode32 = 0;
}
}
diff --git a/sljit/sljitNativeX86_common.c b/sljit/sljitNativeX86_common.c
index e44b815..7b1efe0 100644
--- a/sljit/sljitNativeX86_common.c
+++ b/sljit/sljitNativeX86_common.c
@@ -474,32 +474,6 @@ static void SLJIT_CALL sljit_touch_stack(sljit_w local_size)
#include "sljitNativeX86_64.c"
#endif
-SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
-{
- sljit_ub *buf;
-
- CHECK_ERROR();
- check_sljit_emit_op0(compiler, op);
-
- op = GET_OPCODE(op);
- switch (op) {
- case SLJIT_BREAKPOINT:
- buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!buf);
- INC_SIZE(1);
- *buf = 0xcc;
- break;
- case SLJIT_NOP:
- buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
- FAIL_IF(!buf);
- INC_SIZE(1);
- *buf = 0x90;
- break;
- }
-
- return SLJIT_SUCCESS;
-}
-
static int emit_mov(struct sljit_compiler *compiler,
int dst, sljit_w dstw,
int src, sljit_w srcw)
@@ -568,6 +542,150 @@ static int emit_mov(struct sljit_compiler *compiler,
#define EMIT_MOV(compiler, dst, dstw, src, srcw) \
FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
+SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
+{
+ sljit_ub *buf;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ int size;
+#endif
+
+ CHECK_ERROR();
+ check_sljit_emit_op0(compiler, op);
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_BREAKPOINT:
+ buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+ FAIL_IF(!buf);
+ INC_SIZE(1);
+ *buf = 0xcc;
+ break;
+ case SLJIT_NOP:
+ buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
+ FAIL_IF(!buf);
+ INC_SIZE(1);
+ *buf = 0x90;
+ break;
+ case SLJIT_UMUL:
+ case SLJIT_SMUL:
+ case SLJIT_UDIV:
+ case SLJIT_SDIV:
+ compiler->flags_saved = 0;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#ifdef _WIN64
+ SLJIT_COMPILE_ASSERT(
+ reg_map[SLJIT_TEMPORARY_REG1] == 0
+ && reg_map[SLJIT_TEMPORARY_REG2] == 2
+ && reg_map[TMP_REGISTER] > 7,
+ invalid_register_assignment_for_div_mul);
+#else
+ SLJIT_COMPILE_ASSERT(
+ reg_map[SLJIT_TEMPORARY_REG1] == 0
+ && reg_map[SLJIT_TEMPORARY_REG2] < 7
+ && reg_map[TMP_REGISTER] == 2,
+ invalid_register_assignment_for_div_mul);
+#endif
+ compiler->mode32 = op & SLJIT_INT_OP;
+#endif
+
+ op = GET_OPCODE(op);
+ if (op == SLJIT_UDIV) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
+ EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
+ buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0);
+#else
+ buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
+#endif
+ FAIL_IF(!buf);
+ *buf = 0x33;
+ }
+
+ if (op == SLJIT_SDIV) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
+ EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
+ EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG1, 0);
+#else
+ EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
+#endif
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
+ FAIL_IF(!buf);
+ INC_SIZE(3);
+ *buf++ = 0xc1;
+ *buf++ = 0xfa;
+ *buf = 0x1f;
+#else
+ if (compiler->mode32) {
+ buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
+ FAIL_IF(!buf);
+ INC_SIZE(3);
+ *buf++ = 0xc1;
+ *buf++ = 0xfa;
+ *buf = 0x1f;
+ } else {
+ buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
+ FAIL_IF(!buf);
+ INC_SIZE(4);
+ *buf++ = REX_W;
+ *buf++ = 0xc1;
+ *buf++ = 0xfa;
+ *buf = 0x3f;
+ }
+#endif
+ }
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
+ FAIL_IF(!buf);
+ INC_SIZE(2);
+ *buf++ = 0xf7;
+ *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]);
+#else
+#ifdef _WIN64
+ size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
+#else
+ size = (!compiler->mode32) ? 3 : 2;
+#endif
+ buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
+ FAIL_IF(!buf);
+ INC_SIZE(size);
+#ifdef _WIN64
+ if (!compiler->mode32)
+ *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
+ else if (op >= SLJIT_UDIV)
+ *buf++ = REX_B;
+ *buf++ = 0xf7;
+ *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]);
+#else
+ if (!compiler->mode32)
+ *buf++ = REX_W;
+ *buf++ = 0xf7;
+ *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2];
+#endif
+#endif
+ switch (op) {
+ case SLJIT_UMUL:
+ *buf |= 4 << 3;
+ break;
+ case SLJIT_SMUL:
+ *buf |= 5 << 3;
+ break;
+ case SLJIT_UDIV:
+ *buf |= 6 << 3;
+ break;
+ case SLJIT_SDIV:
+ *buf |= 7 << 3;
+ break;
+ }
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
+ EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0);
+#endif
+ break;
+ }
+
+ return SLJIT_SUCCESS;
+}
+
#define ENCODE_PREFIX(prefix) \
do { \
code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \