summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2017-05-07 07:28:03 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2017-05-07 07:28:03 +0000
commitab5f63e3ff84c89aecfc8e8141b8ee732bfa8600 (patch)
tree7103644c0810f253da379a5b8eb0cbdf1e3fc48d
parent4bbd722243d1c17600869dbbfb47cf7864ab3f18 (diff)
downloadpcre-ab5f63e3ff84c89aecfc8e8141b8ee732bfa8600.tar.gz
JIT compiler update.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1700 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--pcre_jit_compile.c136
-rw-r--r--sljit/sljitConfigInternal.h4
-rw-r--r--sljit/sljitLir.c64
-rw-r--r--sljit/sljitLir.h96
-rw-r--r--sljit/sljitNativeARM_32.c83
-rw-r--r--sljit/sljitNativeARM_64.c77
-rw-r--r--sljit/sljitNativeARM_T2_32.c91
-rw-r--r--sljit/sljitNativeMIPS_32.c6
-rw-r--r--sljit/sljitNativeMIPS_64.c6
-rw-r--r--sljit/sljitNativeMIPS_common.c124
-rw-r--r--sljit/sljitNativePPC_common.c199
-rw-r--r--sljit/sljitNativeSPARC_common.c63
-rw-r--r--sljit/sljitNativeTILEGX_64.c3
-rw-r--r--sljit/sljitNativeX86_64.c25
-rw-r--r--sljit/sljitNativeX86_common.c307
15 files changed, 655 insertions, 629 deletions
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index ee0835a..249edbe 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -552,8 +552,8 @@ the start pointers when the end of the capturing group has not yet reached. */
sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
#define CMPTO(type, src1, src1w, src2, src2w, label) \
sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
-#define OP_FLAGS(op, dst, dstw, src, srcw, type) \
- sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
+#define OP_FLAGS(op, dst, dstw, type) \
+ sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
#define GET_LOCAL_BASE(dst, dstw, offset) \
sljit_get_local_base(compiler, (dst), (dstw), (offset))
@@ -3098,7 +3098,7 @@ if (common->utf)
/* Skip low surrogate if necessary. */
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
return;
@@ -3215,7 +3215,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
JUMPHERE(jump);
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
-OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
/* This code runs only in 8 bit mode. No need to shift the value. */
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
@@ -3378,7 +3378,7 @@ if (newlinecheck)
end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
@@ -3416,7 +3416,7 @@ if (common->utf)
singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
JUMPHERE(singlechar);
@@ -4254,9 +4254,9 @@ else
else
{
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
found = JUMP(SLJIT_NOT_ZERO);
}
}
@@ -4576,7 +4576,7 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255)
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
@@ -4621,7 +4621,7 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
#endif
@@ -4697,7 +4697,7 @@ if (common->utf)
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
}
@@ -4842,10 +4842,10 @@ if (common->use_ucp)
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
JUMPHERE(jump);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
}
@@ -4886,10 +4886,10 @@ if (common->use_ucp)
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
JUMPHERE(jump);
}
else
@@ -5082,21 +5082,21 @@ sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
-OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
#ifdef COMPILE_PCRE8
if (common->utf)
{
#endif
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
#ifdef COMPILE_PCRE8
}
#endif
#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
-OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
@@ -5108,33 +5108,33 @@ DEFINE_COMPILER;
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
-OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
-OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
#ifdef COMPILE_PCRE8
if (common->utf)
{
#endif
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
#ifdef COMPILE_PCRE8
}
#endif
#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
-OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
@@ -5148,21 +5148,21 @@ sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
-OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
#ifdef COMPILE_PCRE8
if (common->utf)
{
#endif
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
#ifdef COMPILE_PCRE8
}
#endif
#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
-OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
}
@@ -5749,13 +5749,13 @@ while (*cc != XCL_END)
if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
{
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
- OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
numberofcmps++;
}
else if (numberofcmps > 0)
{
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
numberofcmps = 0;
}
@@ -5775,13 +5775,13 @@ while (*cc != XCL_END)
if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
{
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
- OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
numberofcmps++;
}
else if (numberofcmps > 0)
{
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
numberofcmps = 0;
}
@@ -5807,11 +5807,11 @@ while (*cc != XCL_END)
case PT_LAMP:
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
@@ -5834,32 +5834,32 @@ while (*cc != XCL_END)
case PT_PXSPACE:
SET_CHAR_OFFSET(9);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
SET_TYPE_OFFSET(ucp_Zl);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
case PT_WORD:
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
/* Fall through. */
case PT_ALNUM:
SET_TYPE_OFFSET(ucp_Ll);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
- OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
SET_TYPE_OFFSET(ucp_Nd);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
@@ -5882,7 +5882,7 @@ while (*cc != XCL_END)
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
}
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
other_cases += 2;
}
else if (is_powerof2(other_cases[2] ^ other_cases[1]))
@@ -5895,41 +5895,41 @@ while (*cc != XCL_END)
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
}
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
- OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
other_cases += 3;
}
else
{
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
}
while (*other_cases != NOTACHAR)
{
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
- OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
}
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
case PT_UCNC:
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
SET_CHAR_OFFSET(0xa0);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
SET_CHAR_OFFSET(0);
OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
@@ -5937,20 +5937,20 @@ while (*cc != XCL_END)
/* C and Z groups are the farthest two groups. */
SET_TYPE_OFFSET(ucp_Ll);
OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
/* In case of ucp_Cf, we overwrite the result. */
SET_CHAR_OFFSET(0x2066);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
JUMPHERE(jump);
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
@@ -5960,20 +5960,20 @@ while (*cc != XCL_END)
/* C and Z groups are the farthest two groups. */
SET_TYPE_OFFSET(ucp_Ll);
OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
- OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
+ OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
/* In case of ucp_Cf, we overwrite the result. */
SET_CHAR_OFFSET(0x2066);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
- OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
JUMPHERE(jump);
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
@@ -5982,15 +5982,15 @@ while (*cc != XCL_END)
case PT_PXPUNCT:
SET_TYPE_OFFSET(ucp_Sc);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
SET_CHAR_OFFSET(0);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
- OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
SET_TYPE_OFFSET(ucp_Pc);
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
@@ -6058,9 +6058,9 @@ switch(type)
{
jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
check_partial(common, TRUE);
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
@@ -6336,7 +6336,7 @@ switch(type)
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
+ OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
#endif
diff --git a/sljit/sljitConfigInternal.h b/sljit/sljitConfigInternal.h
index 0931579..cc0810f 100644
--- a/sljit/sljitConfigInternal.h
+++ b/sljit/sljitConfigInternal.h
@@ -567,11 +567,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
#ifndef _WIN64
-#define SLJIT_NUMBER_OF_REGISTERS 12
+#define SLJIT_NUMBER_OF_REGISTERS 13
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6
#define SLJIT_LOCALS_OFFSET_BASE 0
#else
-#define SLJIT_NUMBER_OF_REGISTERS 12
+#define SLJIT_NUMBER_OF_REGISTERS 13
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset)
#endif /* _WIN64 */
diff --git a/sljit/sljitLir.c b/sljit/sljitLir.c
index fe855cd..66cdda3 100644
--- a/sljit/sljitLir.c
+++ b/sljit/sljitLir.c
@@ -697,12 +697,12 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \
CHECK_ARGUMENT(!((i) & ~0x3)); \
} \
- CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
+ CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK))); \
}
-#define FUNCTION_CHECK_DST(p, i) \
+#define FUNCTION_CHECK_DST(p, i, unused) \
CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); \
- if (FUNCTION_CHECK_IS_REG_OR_UNUSED(p)) \
+ if (FUNCTION_CHECK_IS_REG(p) || ((unused) && (p) == SLJIT_UNUSED)) \
CHECK_ARGUMENT((i) == 0); \
else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \
CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \
@@ -716,7 +716,7 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \
CHECK_ARGUMENT(!((i) & ~0x3)); \
} \
- CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
+ CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK))); \
}
#define FUNCTION_FCHECK(p, i) \
@@ -736,7 +736,7 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \
CHECK_ARGUMENT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SP) && !(i & ~0x3)); \
} \
- CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \
+ CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK))); \
}
#endif /* SLJIT_ARGUMENT_CHECKS */
@@ -977,7 +977,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- FUNCTION_CHECK_DST(dst, dstw);
+ FUNCTION_CHECK_DST(dst, dstw, 0);
compiler->last_flags = 0;
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
@@ -1047,8 +1047,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
break;
case SLJIT_NEG:
CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
- || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW
- || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW);
+ || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW);
break;
case SLJIT_MOV:
case SLJIT_MOV_U32:
@@ -1065,8 +1064,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
break;
}
+ FUNCTION_CHECK_DST(dst, dstw, 1);
FUNCTION_CHECK_SRC(src, srcw);
- FUNCTION_CHECK_DST(dst, dstw);
if (GET_OPCODE(op) >= SLJIT_NOT)
compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z));
@@ -1132,18 +1131,16 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler
case SLJIT_MUL:
CHECK_ARGUMENT(!(op & SLJIT_SET_Z));
CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
- || GET_FLAG_TYPE(op) == SLJIT_MUL_OVERFLOW
- || GET_FLAG_TYPE(op) == SLJIT_MUL_NOT_OVERFLOW);
+ || GET_FLAG_TYPE(op) == SLJIT_MUL_OVERFLOW);
break;
case SLJIT_ADD:
CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
|| GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)
- || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW
- || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW);
+ || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW);
break;
case SLJIT_SUB:
CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
- || (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_NOT_OVERFLOW)
+ || (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_OVERFLOW)
|| GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY));
break;
case SLJIT_ADDC:
@@ -1158,9 +1155,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler
break;
}
+ FUNCTION_CHECK_DST(dst, dstw, 1);
FUNCTION_CHECK_SRC(src1, src1w);
FUNCTION_CHECK_SRC(src2, src2w);
- FUNCTION_CHECK_DST(dst, dstw);
compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z));
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
@@ -1317,7 +1314,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(str
CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CONV_S32_FROM_F64);
CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)));
FUNCTION_FCHECK(src, srcw);
- FUNCTION_CHECK_DST(dst, dstw);
+ FUNCTION_CHECK_DST(dst, dstw, 0);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1428,7 +1425,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile
if ((type & 0xff) <= SLJIT_NOT_ZERO)
CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
else
- CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff));
+ CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff)
+ || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)
+ || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW));
CHECK_ARGUMENT((type & SLJIT_I32_OP) == (compiler->last_flags & SLJIT_I32_OP));
}
#endif
@@ -1517,29 +1516,27 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compil
static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1));
- CHECK_ARGUMENT(op == SLJIT_MOV || GET_OPCODE(op) == SLJIT_MOV_U32 || GET_OPCODE(op) == SLJIT_MOV_S32
+ CHECK_ARGUMENT(op == SLJIT_MOV || op == SLJIT_MOV32
|| (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR));
CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK));
if ((type & 0xff) <= SLJIT_NOT_ZERO)
CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
else
- CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff));
+ CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff)
+ || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)
+ || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW));
- if (GET_OPCODE(op) < SLJIT_ADD) {
- CHECK_ARGUMENT(src == SLJIT_UNUSED && srcw == 0);
- } else {
- CHECK_ARGUMENT(src == dst && srcw == dstw);
+ FUNCTION_CHECK_DST(dst, dstw, 0);
+
+ if (GET_OPCODE(op) >= SLJIT_ADD)
compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z));
- }
- FUNCTION_CHECK_DST(dst, dstw);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1548,10 +1545,6 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com
GET_OPCODE(op) < SLJIT_OP2_BASE ? "mov" : op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE],
GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_I32_OP) ? "32" : ""));
sljit_verbose_param(compiler, dst, dstw);
- if (src != SLJIT_UNUSED) {
- fprintf(compiler->verbose, ", ");
- sljit_verbose_param(compiler, src, srcw);
- }
fprintf(compiler->verbose, ", %s%s\n", jump_names[type & 0xff], JUMP_POSTFIX(type));
}
#endif
@@ -1573,7 +1566,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile
if ((type & 0xff) <= SLJIT_NOT_ZERO)
CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
else
- CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff));
+ CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff)
+ || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)
+ || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW));
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1594,7 +1589,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_co
SLJIT_UNUSED_ARG(offset);
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- FUNCTION_CHECK_DST(dst, dstw);
+ FUNCTION_CHECK_DST(dst, dstw, 0);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -1611,7 +1606,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compil
SLJIT_UNUSED_ARG(init_value);
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- FUNCTION_CHECK_DST(dst, dstw);
+ FUNCTION_CHECK_DST(dst, dstw, 0);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
@@ -2166,15 +2161,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(op);
SLJIT_UNUSED_ARG(dst);
SLJIT_UNUSED_ARG(dstw);
- SLJIT_UNUSED_ARG(src);
- SLJIT_UNUSED_ARG(srcw);
SLJIT_UNUSED_ARG(type);
SLJIT_UNREACHABLE();
return SLJIT_ERR_UNSUPPORTED;
diff --git a/sljit/sljitLir.h b/sljit/sljitLir.h
index f64ddcb..a58ad6e 100644
--- a/sljit/sljitLir.h
+++ b/sljit/sljitLir.h
@@ -120,8 +120,8 @@ of sljitConfigInternal.h */
If an architecture provides two scratch and three saved registers,
its scratch and saved register sets are the following:
- R0 | [S4] | R0 and S4 represent the same physical register
- R1 | [S3] | R1 and S3 represent the same physical register
+ R0 | | R0 is always a scratch register
+ R1 | | R1 is always a scratch register
[R2] | S2 | R2 and S2 represent the same physical register
[R3] | S1 | R3 and S1 represent the same physical register
[R4] | S0 | R4 and S0 represent the same physical register
@@ -129,38 +129,35 @@ of sljitConfigInternal.h */
Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS would be 2 and
SLJIT_NUMBER_OF_SAVED_REGISTERS would be 3 for this architecture.
- Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 10
- and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 5. However, 4 registers
+ Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 12
+ and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 6. However, 6 registers
are virtual on x86-32. See below.
- The purpose of this definition is convenience. Although a register
- is either scratch register or saved register, SLJIT allows accessing
- them from the other set. For example, four registers can be used as
- scratch registers and the fifth one as saved register on the architecture
- above. Of course the last two scratch registers (R2 and R3) from this
- four will be saved on the stack, because they are defined as saved
- registers in the application binary interface. Still R2 and R3 can be
- used for referencing to these registers instead of S2 and S1, which
- makes easier to write platform independent code. Scratch registers
- can be saved registers in a similar way, but these extra saved
- registers will not be preserved across function calls! Hence the
- application must save them on those platforms, where the number of
- saved registers is too low. This can be done by copy them onto
- the stack and restore them after a function call.
+ The purpose of this definition is convenience: saved registers can
+ be used as extra scratch registers. For example four registers can
+ be specified as scratch registers and the fifth one as saved register
+ on the CPU above and any user code which requires four scratch
+ registers can run unmodified. The SLJIT compiler automatically saves
+ the content of the two extra scrath register on the stack. Scratch
+ registers can also be preserved by saving their value on the stack
+ but this needs to be done manually.
Note: To emphasize that registers assigned to R2-R4 are saved
- registers, they are enclosed by square brackets. S3-S4
- are marked in a similar way.
+ registers, they are enclosed by square brackets.
Note: sljit_emit_enter and sljit_set_context defines whether a register
is S or R register. E.g: when 3 scratches and 1 saved is mapped
by sljit_emit_enter, the allowed register set will be: R0-R2 and
S0. Although S2 is mapped to the same position as R2, it does not
- available in the current configuration. Furthermore the R3 (S1)
- register does not available as well.
+ available in the current configuration. Furthermore the S1 register
+ is not available at all.
*/
-/* When SLJIT_UNUSED is specified as destination, the result is discarded. */
+/* When SLJIT_UNUSED is specified as the destination of sljit_emit_op1 and
+ and sljit_emit_op2 operations the result is discarded. If no status
+ flags are set, no instructions are emitted for these operations. Data
+ prefetch is a special exception, see SLJIT_MOV operation. Other SLJIT
+ operations do not support SLJIT_UNUSED as a destination operand. */
#define SLJIT_UNUSED 0
/* Scratch registers. */
@@ -489,21 +486,27 @@ static SLJIT_INLINE sljit_sw sljit_get_executable_offset(struct sljit_compiler *
*/
static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; }
-/* Returns with non-zero if the passed SLJIT_HAS_* feature is available.
+/* Returns with non-zero if the feature or limitation type passed as its
+ argument is present on the current CPU.
- Some features (e.g. floating point operations) require CPU support
- while other (e.g. move with update) is emulated if not available.
- However it might be worth to generate a special code path even in
- the latter case in certain cases. */
+ Some features (e.g. floating point operations) require hardware (CPU)
+ support while others (e.g. move with update) are emulated if not available.
+ However even if a feature is emulated, specialized code paths can be faster
+ than the emulation. Some limitations are emulated as well so their general
+ case is supported but it has extra performance costs. */
/* [Not emulated] Floating-point support is available. */
#define SLJIT_HAS_FPU 0
+/* [Limitation] Some registers are virtual registers. */
+#define SLJIT_HAS_VIRTUAL_REGISTERS 1
/* [Emulated] Some forms of move with pre update is supported. */
-#define SLJIT_HAS_PRE_UPDATE 1
+#define SLJIT_HAS_PRE_UPDATE 2
/* [Emulated] Count leading zero is supported. */
-#define SLJIT_HAS_CLZ 2
+#define SLJIT_HAS_CLZ 3
/* [Emulated] Conditional move is supported. */
-#define SLJIT_HAS_CMOV 3
+#define SLJIT_HAS_CMOV 4
+/* [Limitation] [Emulated] Shifting with register is limited to SLJIT_PREF_SHIFT_REG. */
+#define SLJIT_HAS_PREF_SHIFT_REG 5
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
/* [Not emulated] SSE2 support is available on x86. */
@@ -723,8 +726,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
Example: SLJIT_ADD can set the Z, OVERFLOW, CARRY flags hence
sljit_op2(..., SLJIT_ADD, ...)
- Both the zero and variable flags are undefined so their
- they hold a random value after the operation is completed.
+ Both the zero and variable flags are undefined so they can
+ have any value after the operation is completed.
sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...)
Sets the zero flag if the result is zero, clears it otherwise.
@@ -734,10 +737,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
Sets the variable flag if an integer overflow occurs, clears
it otherwise. The zero flag is undefined.
- sljit_op2(..., SLJIT_ADD | SLJIT_SET_NOT_OVERFLOW, ...)
- Sets the variable flag if an integer overflow does NOT occur,
- clears it otherwise. The zero flag is undefined.
-
sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z | SLJIT_SET_CARRY, ...)
Sets the zero flag if the result is zero, clears it otherwise.
Sets the variable flag if unsigned overflow (carry) occurs,
@@ -862,6 +861,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
sljit_emit_op1(..., SLJIT_MOVU_U8,
SLJIT_MEM2(SLJIT_R0, SLJIT_R2), 0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0);
+
+ If the destination of a MOV without update instruction is SLJIT_UNUSED
+ and the source operand is a memory address the compiler emits a prefetch
+ instruction if this instruction is supported by the current CPU.
+ Higher data sizes bring the data closer to the core: a MOV with word
+ size loads the data into a higher level cache than a byte size. Otherwise
+ the type does not affect the prefetch instruction. Furthermore a prefetch
+ instruction never fails, so it can be used to prefetch a data from an
+ address and check whether that address is NULL afterwards.
*/
/* Flags: - (does not modify flags) */
@@ -1090,14 +1098,12 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
#define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW)
#define SLJIT_NOT_OVERFLOW 11
#define SLJIT_NOT_OVERFLOW32 (SLJIT_NOT_OVERFLOW | SLJIT_I32_OP)
-#define SLJIT_SET_NOT_OVERFLOW SLJIT_SET(SLJIT_NOT_OVERFLOW)
#define SLJIT_MUL_OVERFLOW 12
#define SLJIT_MUL_OVERFLOW32 (SLJIT_MUL_OVERFLOW | SLJIT_I32_OP)
#define SLJIT_SET_MUL_OVERFLOW SLJIT_SET(SLJIT_MUL_OVERFLOW)
#define SLJIT_MUL_NOT_OVERFLOW 13
#define SLJIT_MUL_NOT_OVERFLOW32 (SLJIT_MUL_NOT_OVERFLOW | SLJIT_I32_OP)
-#define SLJIT_SET_MUL_NOT_OVERFLOW SLJIT_SET(SLJIT_MUL_NOT_OVERFLOW)
/* There is no SLJIT_CARRY or SLJIT_NOT_CARRY. */
#define SLJIT_SET_CARRY SLJIT_SET(14)
@@ -1194,19 +1200,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
represented by the type is 1, if the condition represented by the type
is fulfilled, and 0 otherwise.
- If op == SLJIT_MOV, SLJIT_MOV_S32, SLJIT_MOV_U32:
+ If op == SLJIT_MOV, SLJIT_MOV32:
Set dst to the value represented by the type (0 or 1).
- Src must be SLJIT_UNUSED, and srcw must be 0
Flags: - (does not modify flags)
If op == SLJIT_OR, op == SLJIT_AND, op == SLJIT_XOR
- Performs the binary operation using src as the first, and the value
- represented by type as the second argument.
- Important note: only dst=src and dstw=srcw is supported at the moment!
- Flags: Z (may destroy flags)
- Note: sljit_emit_op_flags does nothing, if dst is SLJIT_UNUSED (regardless of op). */
+ Performs the binary operation using dst as the first, and the value
+ represented by type as the second argument. Result is written into dst.
+ Flags: Z (may destroy flags) */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type);
/* Emit a conditional mov instruction which moves source to destination,
diff --git a/sljit/sljitNativeARM_32.c b/sljit/sljitNativeARM_32.c
index ac5a50e..baa816d 100644
--- a/sljit/sljitNativeARM_32.c
+++ b/sljit/sljitNativeARM_32.c
@@ -843,6 +843,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
#define WORD_DATA 0x00
#define BYTE_DATA 0x01
#define HALF_DATA 0x02
+#define PRELOAD_DATA 0x03
#define SIGNED_DATA 0x04
#define LOAD_DATA 0x08
@@ -871,7 +872,7 @@ static const sljit_uw data_transfer_insts[16] = {
/* l u w */ 0xe5100000 /* ldr */,
/* l u b */ 0xe5500000 /* ldrb */,
/* l u h */ 0xe11000b0 /* ldrh */,
-/* l u N */ 0x00000000 /* not allowed */,
+/* l u p */ 0xf5500000 /* preload data */,
/* l s w */ 0xe5100000 /* ldr */,
/* l s b */ 0xe11000d0 /* ldrsb */,
/* l s h */ 0xe11000f0 /* ldrsh */,
@@ -879,7 +880,7 @@ static const sljit_uw data_transfer_insts[16] = {
};
#define EMIT_DATA_TRANSFER(type, add, wb, target_reg, base_reg, arg) \
- (data_transfer_insts[(type) & 0xf] | ((add) << 23) | ((wb) << (21 - 4)) | (reg_map[target_reg] << 12) | (reg_map[base_reg] << 16) | (arg))
+ (data_transfer_insts[(type) & 0xf] | ((add) << 23) | ((wb) << (21 - 4)) | RD(target_reg) | RN(base_reg) | (arg))
/* Normal ldr/str instruction.
Type2: ldrsb, ldrh, ldrsh */
@@ -1344,8 +1345,16 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit
if ((arg & REG_MASK) == SLJIT_UNUSED) {
/* Write back is not used. */
- FAIL_IF(load_immediate(compiler, tmp_reg, argw));
- return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, is_type1_transfer ? 0 : TYPE2_TRANSFER_IMM(0)));
+ if (is_type1_transfer) {
+ FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xfff));
+ argw &= 0xfff;
+ }
+ else {
+ FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xff));
+ argw &= 0xff;
+ }
+
+ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, is_type1_transfer ? argw : TYPE2_TRANSFER_IMM(argw)));
}
if (arg & OFFS_REG_MASK) {
@@ -1660,6 +1669,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
+#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
+ if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
+ return emit_op_mem(compiler, PRELOAD_DATA | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
+#endif
+ return SLJIT_SUCCESS;
+ }
+
switch (GET_OPCODE(op)) {
case SLJIT_MOV:
case SLJIT_MOV_U32:
@@ -1725,6 +1742,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
+
switch (GET_OPCODE(op)) {
case SLJIT_ADD:
case SLJIT_ADDC:
@@ -1845,9 +1865,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0)));
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16));
@@ -2015,10 +2032,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
SLJIT_ASSERT(reg_map[TMP_REG1] == 14);
- /* For UNUSED dst. Uncommon, but possible. */
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG1)));
@@ -2199,51 +2212,43 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
- sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op);
+ sljit_s32 dst_r, flags = GET_ALL_FLAGS(op);
sljit_uw cc, ins;
CHECK_ERROR();
- CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+ CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- ADJUST_LOCAL_OFFSET(src, srcw);
-
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
op = GET_OPCODE(op);
cc = get_cc(type & 0xff);
- dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (op < SLJIT_ADD) {
- FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, SRC2_IMM | 0)));
- FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc));
- return (dst_reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1) : SLJIT_SUCCESS;
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0)));
+ FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc));
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2);
+ return SLJIT_SUCCESS;
}
ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP));
- if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
- FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc));
- /* The condition must always be set, even if the ORR/EOR is not executed above. */
- return (flags & SLJIT_SET_Z) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS;
- }
- if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
- src = TMP_REG1;
- } else if (src & SLJIT_IMM) {
- FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
- src = TMP_REG1;
- }
+ if (dst & SLJIT_MEM)
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
+
+ FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, dst_r, SRC2_IMM | 1) & ~COND_MASK) | cc));
- FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_reg, src, SRC2_IMM | 1) & ~COND_MASK) | cc));
- FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_reg, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
- if (dst_reg == TMP_REG2)
- FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1));
+ if (op == SLJIT_AND)
+ FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, dst_r, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
- return (flags & SLJIT_SET_Z) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG2, SLJIT_UNUSED, RM(dst_reg))) : SLJIT_SUCCESS;
+ if (dst & SLJIT_MEM)
+ FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
+
+ if (flags & SLJIT_SET_Z)
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG2, SLJIT_UNUSED, RM(dst_r)));
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
diff --git a/sljit/sljitNativeARM_64.c b/sljit/sljitNativeARM_64.c
index d41b18a..cea8b33 100644
--- a/sljit/sljitNativeARM_64.c
+++ b/sljit/sljitNativeARM_64.c
@@ -890,6 +890,10 @@ static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flag
}
arg &= REG_MASK;
+
+ if (arg == SLJIT_UNUSED)
+ return 0;
+
if (argw >= 0 && (argw >> shift) <= 0xfff && (argw & ((1 << shift) - 1)) == 0) {
if (SLJIT_UNLIKELY(flags & ARG_TEST))
return 1;
@@ -950,7 +954,7 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
next_argw = 0;
}
- tmp_r = (flags & STORE) ? TMP_REG3 : reg;
+ tmp_r = ((flags & STORE) || (flags == (WORD_SIZE | SIGNED))) ? TMP_REG3 : reg;
if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) {
/* Update only applies if a base register exists. */
@@ -1021,16 +1025,16 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
}
}
- if (argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) {
- FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg & REG_MASK) | ((argw >> 12) << 10)));
- return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
- | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift)));
- }
-
diff = argw - next_argw;
next_arg = (arg & REG_MASK) && (arg == next_arg) && diff <= 0xfff && diff >= -0xfff && diff != 0;
arg &= REG_MASK;
+ if (arg != SLJIT_UNUSED && argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) {
+ FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg) | ((argw >> 12) << 10)));
+ return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
+ | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift)));
+ }
+
if (arg && compiler->cache_arg == SLJIT_MEM) {
if (compiler->cache_argw == argw)
return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
@@ -1313,6 +1317,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
compiler->cache_arg = 0;
compiler->cache_argw = 0;
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
+ if (op <= SLJIT_MOV_P && (src & SLJIT_MEM)) {
+ SLJIT_ASSERT(reg_map[1] == 0 && reg_map[3] == 2 && reg_map[5] == 4);
+
+ if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
+ dst = 5;
+ else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
+ dst = 3;
+ else
+ dst = 1;
+
+ /* Signed word sized load is the prefetch instruction. */
+ return emit_op_mem(compiler, WORD_SIZE | SIGNED, dst, src, srcw);
+ }
+ return SLJIT_SUCCESS;
+ }
+
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
op = GET_OPCODE(op);
@@ -1466,6 +1487,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
compiler->cache_arg = 0;
compiler->cache_argw = 0;
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
+
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
mem_flags = WORD_SIZE;
@@ -1617,7 +1641,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
- sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64)
@@ -1630,7 +1654,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src)));
- if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
+ if (dst & SLJIT_MEM)
return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw);
return SLJIT_SUCCESS;
}
@@ -1788,10 +1812,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
- /* For UNUSED dst. Uncommon, but possible. */
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR));
@@ -1979,19 +1999,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
- sljit_s32 dst_r, flags, mem_flags;
+ sljit_s32 dst_r, src_r, flags, mem_flags;
sljit_ins cc;
CHECK_ERROR();
- CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+ CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- ADJUST_LOCAL_OFFSET(src, srcw);
-
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
cc = get_cc(type & 0xff);
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
@@ -2012,19 +2027,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
mem_flags = INT_SIZE;
}
- if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, src, srcw, dst, dstw));
- src = TMP_REG1;
- srcw = 0;
- } else if (src & SLJIT_IMM)
- flags |= ARG1_IMM;
+ src_r = dst;
+
+ if (dst & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, dst, dstw, dst, dstw));
+ src_r = TMP_REG1;
+ }
FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO)));
- emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src, TMP_REG2);
+ emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src_r, TMP_REG2);
- if (dst_r != TMP_REG1)
- return SLJIT_SUCCESS;
- return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
+ if (dst & SLJIT_MEM)
+ return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
@@ -2064,7 +2079,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(!const_);
set_const(const_, compiler);
- dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value));
if (dst & SLJIT_MEM)
diff --git a/sljit/sljitNativeARM_T2_32.c b/sljit/sljitNativeARM_T2_32.c
index 5fff69b..29e5566 100644
--- a/sljit/sljitNativeARM_T2_32.c
+++ b/sljit/sljitNativeARM_T2_32.c
@@ -838,6 +838,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
#define WORD_SIZE 0x00
#define BYTE_SIZE 0x04
#define HALF_SIZE 0x08
+#define PRELOAD 0x0c
#define UPDATE 0x10
@@ -895,7 +896,7 @@ static const sljit_ins sljit_mem16_imm5[12] = {
#define MEM_IMM8 0xc00
#define MEM_IMM12 0x800000
-static const sljit_ins sljit_mem32[12] = {
+static const sljit_ins sljit_mem32[13] = {
/* w u l */ 0xf8500000 /* ldr.w */,
/* w u s */ 0xf8400000 /* str.w */,
/* w s l */ 0xf8500000 /* ldr.w */,
@@ -910,6 +911,8 @@ static const sljit_ins sljit_mem32[12] = {
/* h u s */ 0xf8200000 /* strsh.w */,
/* h s l */ 0xf9300000 /* ldrsh.w */,
/* h s s */ 0xf8200000 /* strsh.w */,
+
+/* p u l */ 0xf8100000 /* pld */,
};
/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
@@ -946,6 +949,12 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit
arg &= ~SLJIT_MEM;
if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
+ tmp = get_imm(argw & ~0xfff);
+ if (tmp != INVALID_IMM) {
+ FAIL_IF(push_inst32(compiler, MOV_WI | RD4(tmp_reg) | tmp));
+ return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg) | (argw & 0xfff));
+ }
+
FAIL_IF(load_immediate(compiler, tmp_reg, argw));
if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags])
return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg));
@@ -1270,6 +1279,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
+ /* Since TMP_PC has index 15, IS_2_LO_REGS and IS_3_LO_REGS checks always fail. */
+ if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
+ return emit_op_mem(compiler, PRELOAD, TMP_PC, src, srcw, TMP_REG1);
+ return SLJIT_SUCCESS;
+ }
+
dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
op = GET_OPCODE(op);
@@ -1388,6 +1404,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
+
dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
@@ -1507,9 +1526,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_F32_OP) | DD4(TMP_FREG1) | DM4(src)));
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1));
@@ -1669,10 +1685,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
- /* For UNUSED dst. Uncommon, but possible. */
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2));
@@ -1836,19 +1848,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
sljit_s32 dst_r, flags = GET_ALL_FLAGS(op);
- sljit_ins cc, ins;
+ sljit_ins cc;
CHECK_ERROR();
- CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+ CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- ADJUST_LOCAL_OFFSET(src, srcw);
-
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
op = GET_OPCODE(op);
cc = get_cc(type & 0xff);
@@ -1864,56 +1871,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1));
FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0));
}
- if (dst_r != TMP_REG1)
+ if (!(dst & SLJIT_MEM))
return SLJIT_SUCCESS;
return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2);
}
- ins = (op == SLJIT_AND ? ANDI : (op == SLJIT_OR ? ORRI : EORI));
-
- if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
- /* Does not change the other bits. */
- FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
- FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst) | 1));
- if (flags & SLJIT_SET_Z) {
- /* The condition must always be set, even if the ORRI/EORI is not executed above. */
- if (reg_map[dst] <= 7)
- return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst));
- return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst));
- }
- return SLJIT_SUCCESS;
- }
-
- if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2));
- src = TMP_REG2;
- srcw = 0;
- } else if (src & SLJIT_IMM) {
- FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
- src = TMP_REG2;
- srcw = 0;
- }
+ if (dst & SLJIT_MEM)
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
- if (op == SLJIT_AND || src != dst_r) {
+ if (op == SLJIT_AND) {
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
- FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1));
- FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 0));
+ FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 1));
+ FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 0));
}
else {
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
- FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1));
+ FAIL_IF(push_inst32(compiler, ((op == SLJIT_OR) ? ORRI : EORI) | RN4(dst_r) | RD4(dst_r) | 1));
}
- if (dst_r == TMP_REG1)
+ if (dst & SLJIT_MEM)
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2));
- if (flags & SLJIT_SET_Z) {
- /* The condition must always be set, even if the ORR/EORI is not executed above. */
- if (reg_map[dst_r] <= 7)
- return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r));
- return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
- }
- return SLJIT_SUCCESS;
+ if (!(flags & SLJIT_SET_Z))
+ return SLJIT_SUCCESS;
+
+ /* The condition must always be set, even if the ORR/EORI is not executed above. */
+ if (reg_map[dst_r] <= 7)
+ return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r));
+ return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
@@ -1977,7 +1962,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(!const_);
set_const(const_, compiler);
- dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value));
if (dst & SLJIT_MEM)
diff --git a/sljit/sljitNativeMIPS_32.c b/sljit/sljitNativeMIPS_32.c
index 4249c2c..62e1610 100644
--- a/sljit/sljitNativeMIPS_32.c
+++ b/sljit/sljitNativeMIPS_32.c
@@ -153,7 +153,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return SLJIT_SUCCESS;
case SLJIT_ADD:
- is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW;
+ is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
@@ -295,7 +295,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return SLJIT_SUCCESS;
}
- is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW;
+ is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
@@ -367,7 +367,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_MUL:
SLJIT_ASSERT(!(flags & SRC2_IMM));
- if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW && GET_FLAG_TYPE(op) != SLJIT_MUL_NOT_OVERFLOW) {
+ if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) {
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
#else
diff --git a/sljit/sljitNativeMIPS_64.c b/sljit/sljitNativeMIPS_64.c
index e96d512..dd114bb 100644
--- a/sljit/sljitNativeMIPS_64.c
+++ b/sljit/sljitNativeMIPS_64.c
@@ -244,7 +244,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return SLJIT_SUCCESS;
case SLJIT_ADD:
- is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW;
+ is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
@@ -386,7 +386,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return SLJIT_SUCCESS;
}
- is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW;
+ is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
@@ -458,7 +458,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_MUL:
SLJIT_ASSERT(!(flags & SRC2_IMM));
- if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW && GET_FLAG_TYPE(op) != SLJIT_MUL_NOT_OVERFLOW) {
+ if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) {
#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
if (op & SLJIT_I32_OP)
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
diff --git a/sljit/sljitNativeMIPS_common.c b/sljit/sljitNativeMIPS_common.c
index 339bdbb..ee207fe 100644
--- a/sljit/sljitNativeMIPS_common.c
+++ b/sljit/sljitNativeMIPS_common.c
@@ -178,6 +178,8 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
#define MOVT (HI(0) | (1 << 16) | LO(1))
#define MOVZ (HI(0) | LO(10))
#define MUL (HI(28) | LO(2))
+#define PREF (HI(51))
+#define PREFX (HI(19) | LO(15))
#define SEB (HI(31) | (16 << 6) | LO(32))
#define SEH (HI(31) | (24 << 6) | LO(32))
#endif
@@ -920,10 +922,8 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
}
if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
- return SLJIT_SUCCESS;
- if (HAS_FLAGS(op))
- flags |= UNUSED_DEST;
+ SLJIT_ASSERT(HAS_FLAGS(op));
+ flags |= UNUSED_DEST;
}
else if (FAST_IS_REG(dst)) {
dst_r = dst;
@@ -1085,6 +1085,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+static sljit_s32 emit_prefetch(struct sljit_compiler *compiler,
+ sljit_s32 src, sljit_sw srcw)
+{
+ if (!(src & OFFS_REG_MASK)) {
+ if (srcw <= SIMM_MAX && srcw >= SIMM_MIN)
+ return push_inst(compiler, PREF | S(src & REG_MASK) | IMM(srcw), MOVABLE_INS);
+
+ FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw));
+ return push_inst(compiler, PREFX | S(src & REG_MASK) | T(TMP_REG1), MOVABLE_INS);
+ }
+
+ srcw &= 0x3;
+
+ if (SLJIT_UNLIKELY(srcw != 0)) {
+ FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(src)) | D(TMP_REG1) | SH_IMM(srcw), DR(TMP_REG1)));
+ return push_inst(compiler, PREFX | S(src & REG_MASK) | T(TMP_REG1), MOVABLE_INS);
+ }
+
+ return push_inst(compiler, PREFX | S(src & REG_MASK) | T(OFFS_REG(src)), MOVABLE_INS);
+}
+#endif
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
@@ -1100,6 +1123,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+ if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
+ return emit_prefetch(compiler, src, srcw);
+#endif
+ return SLJIT_SUCCESS;
+ }
+
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
if ((op & SLJIT_I32_OP) && GET_OPCODE(op) >= SLJIT_NOT) {
flags |= INT_DATA | SIGNED_DATA;
@@ -1203,6 +1234,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
+
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
if (op & SLJIT_I32_OP) {
flags |= INT_DATA | SIGNED_DATA;
@@ -1301,9 +1335,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS));
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS);
@@ -1538,10 +1569,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
- /* For UNUSED dst. Uncommon, but possible. */
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), DR(dst));
@@ -1903,50 +1930,43 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
- sljit_s32 sugg_dst_ar, dst_ar;
- sljit_s32 flags = GET_ALL_FLAGS(op);
+ sljit_s32 src_ar, dst_ar;
+ sljit_s32 saved_op = op;
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-# define mem_type WORD_DATA
+ sljit_s32 mem_type = WORD_DATA;
#else
sljit_s32 mem_type = (op & SLJIT_I32_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
#endif
CHECK_ERROR();
- CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+ CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
op = GET_OPCODE(op);
#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
- if (op == SLJIT_MOV_S32 || op == SLJIT_MOV_U32)
+ if (op == SLJIT_MOV_S32)
mem_type = INT_DATA | SIGNED_DATA;
#endif
- sugg_dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2);
+ dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2);
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
- ADJUST_LOCAL_OFFSET(src, srcw);
- FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, DR(TMP_REG1), src, srcw, dst, dstw));
- src = TMP_REG1;
- srcw = 0;
- }
+
+ if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
+ FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, DR(TMP_REG1), dst, dstw, dst, dstw));
switch (type & 0xff) {
case SLJIT_EQUAL:
case SLJIT_NOT_EQUAL:
- FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
- dst_ar = sugg_dst_ar;
+ FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(dst_ar) | IMM(1), dst_ar));
+ src_ar = dst_ar;
break;
case SLJIT_MUL_OVERFLOW:
case SLJIT_MUL_NOT_OVERFLOW:
- FAIL_IF(push_inst(compiler, SLTIU | SA(OTHER_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
- dst_ar = sugg_dst_ar;
+ FAIL_IF(push_inst(compiler, SLTIU | SA(OTHER_FLAG) | TA(dst_ar) | IMM(1), dst_ar));
+ src_ar = dst_ar;
type ^= 0x1; /* Flip type bit for the XORI below. */
break;
case SLJIT_GREATER_F64:
@@ -1958,38 +1978,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
case SLJIT_GREATER_EQUAL_F64:
case SLJIT_UNORDERED_F64:
case SLJIT_ORDERED_F64:
- FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(FCSR_REG), sugg_dst_ar));
- FAIL_IF(push_inst(compiler, SRL | TA(sugg_dst_ar) | DA(sugg_dst_ar) | SH_IMM(23), sugg_dst_ar));
- FAIL_IF(push_inst(compiler, ANDI | SA(sugg_dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
- dst_ar = sugg_dst_ar;
+ FAIL_IF(push_inst(compiler, CFC1 | TA(dst_ar) | DA(FCSR_REG), dst_ar));
+ FAIL_IF(push_inst(compiler, SRL | TA(dst_ar) | DA(dst_ar) | SH_IMM(23), dst_ar));
+ FAIL_IF(push_inst(compiler, ANDI | SA(dst_ar) | TA(dst_ar) | IMM(1), dst_ar));
+ src_ar = dst_ar;
break;
default:
- dst_ar = OTHER_FLAG;
+ src_ar = OTHER_FLAG;
break;
}
if (type & 0x1) {
- FAIL_IF(push_inst(compiler, XORI | SA(dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar));
- dst_ar = sugg_dst_ar;
+ FAIL_IF(push_inst(compiler, XORI | SA(src_ar) | TA(dst_ar) | IMM(1), dst_ar));
+ src_ar = dst_ar;
}
- if (op >= SLJIT_ADD) {
- if (DR(TMP_REG2) != dst_ar)
- FAIL_IF(push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
- return emit_op(compiler, op | flags, mem_type | CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
+ if (op < SLJIT_ADD) {
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, mem_type, src_ar, dst, dstw);
+
+ if (src_ar != dst_ar)
+ return push_inst(compiler, ADDU_W | SA(src_ar) | TA(0) | DA(dst_ar), dst_ar);
+ return SLJIT_SUCCESS;
}
- if (dst & SLJIT_MEM)
- return emit_op_mem(compiler, mem_type, dst_ar, dst, dstw);
+ /* OTHER_FLAG cannot be specified as src2 argument at the moment. */
+ if (DR(TMP_REG2) != src_ar)
+ FAIL_IF(push_inst(compiler, ADDU_W | SA(src_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
- if (sugg_dst_ar != dst_ar)
- return push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | DA(sugg_dst_ar), sugg_dst_ar);
- return SLJIT_SUCCESS;
+ mem_type |= CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE;
-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-# undef mem_type
-#endif
+ if (dst & SLJIT_MEM)
+ return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
+ return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, TMP_REG2, 0);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
@@ -2078,7 +2100,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(!const_);
set_const(const_, compiler);
- reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+ reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
PTR_FAIL_IF(emit_const(compiler, reg, init_value));
diff --git a/sljit/sljitNativePPC_common.c b/sljit/sljitNativePPC_common.c
index 205c72c..775c708 100644
--- a/sljit/sljitNativePPC_common.c
+++ b/sljit/sljitNativePPC_common.c
@@ -154,6 +154,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
#define CMPL (HI(31) | LO(32))
#define CMPLI (HI(10))
#define CROR (HI(19) | LO(449))
+#define DCBT (HI(31) | LO(278))
#define DIVD (HI(31) | LO(489))
#define DIVDU (HI(31) | LO(457))
#define DIVW (HI(31) | LO(491))
@@ -1169,8 +1170,6 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
/* Destination check. */
if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
- return SLJIT_SUCCESS;
dst_r = TMP_REG2;
}
else if (FAST_IS_REG(dst)) {
@@ -1323,6 +1322,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
+static sljit_s32 emit_prefetch(struct sljit_compiler *compiler,
+ sljit_s32 src, sljit_sw srcw)
+{
+ if (!(src & OFFS_REG_MASK)) {
+ if (srcw == 0 && (src & REG_MASK) != SLJIT_UNUSED)
+ return push_inst(compiler, DCBT | A(0) | B(src & REG_MASK));
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+ /* Works with SLJIT_MEM0() case as well. */
+ return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
+ }
+
+ srcw &= 0x3;
+
+ if (srcw == 0)
+ return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src)));
+
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+ FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(src)) | A(TMP_REG1) | (srcw << 11) | ((31 - srcw) << 1)));
+#else
+ FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(src), srcw, 63 - srcw, 1)));
+#endif
+ return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
+}
+
#define EMIT_MOV(type, type_flags, type_cast) \
emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
@@ -1338,11 +1362,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
+ if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
+ return emit_prefetch(compiler, src, srcw);
+
+ return SLJIT_SUCCESS;
+ }
+
op = GET_OPCODE(op);
if ((src & SLJIT_IMM) && srcw == 0)
src = TMP_ZERO;
- if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op_flags) == SLJIT_NOT_OVERFLOW)
+ if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW)
FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
if (op_flags & SLJIT_I32_OP) {
@@ -1496,6 +1527,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
+
if ((src1 & SLJIT_IMM) && src1w == 0)
src1 = TMP_ZERO;
if ((src2 & SLJIT_IMM) && src2w == 0)
@@ -1513,14 +1547,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
flags |= ALT_SIGN_EXT;
}
#endif
- if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW)
+ if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
if (src2 == TMP_REG2)
flags |= ALT_KEEP_CACHE;
switch (GET_OPCODE(op)) {
case SLJIT_ADD:
- if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW)
+ if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
@@ -1582,7 +1616,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w);
}
- if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW)
+ if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, src2, src2w);
if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
@@ -1751,9 +1785,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
op = GET_OPCODE(op);
FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (op == SLJIT_CONV_SW_FROM_F64) {
if (FAST_IS_REG(dst)) {
FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
@@ -1761,12 +1792,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
}
return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
}
-
#else
FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
-
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
#endif
if (FAST_IS_REG(dst)) {
@@ -2043,10 +2070,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
- /* For UNUSED dst. Uncommon, but possible. */
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst(compiler, MFLR | D(dst));
@@ -2231,125 +2254,112 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
}
-/* Get a bit from CR, all other bits are zeroed. */
-#define GET_CR_BIT(bit, dst) \
- FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
-
-#define INVERT_BIT(dst) \
- FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
-
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
- sljit_s32 reg, input_flags;
- sljit_s32 flags = GET_ALL_FLAGS(op);
- sljit_sw original_dstw = dstw;
+ sljit_s32 reg, input_flags, cr_bit, invert;
+ sljit_s32 saved_op = op;
+ sljit_sw saved_dstw = dstw;
CHECK_ERROR();
- CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+ CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ input_flags = (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA;
+#else
+ input_flags = WORD_DATA;
+#endif
op = GET_OPCODE(op);
reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
- ADJUST_LOCAL_OFFSET(src, srcw);
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
- input_flags = (flags & SLJIT_I32_OP) ? INT_DATA : WORD_DATA;
-#else
- input_flags = WORD_DATA;
-#endif
- FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
- src = TMP_REG1;
- srcw = 0;
- }
-
- FAIL_IF(push_inst(compiler, MFCR | D(reg)));
- switch (type & 0xff) {
- case SLJIT_EQUAL:
- GET_CR_BIT(2, reg);
- break;
+ if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
+ FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
- case SLJIT_NOT_EQUAL:
- GET_CR_BIT(2, reg);
- INVERT_BIT(reg);
- break;
+ invert = 0;
+ switch (type & 0xff) {
case SLJIT_LESS:
case SLJIT_SIG_LESS:
- GET_CR_BIT(0, reg);
+ cr_bit = 0;
break;
case SLJIT_GREATER_EQUAL:
case SLJIT_SIG_GREATER_EQUAL:
- GET_CR_BIT(0, reg);
- INVERT_BIT(reg);
+ cr_bit = 0;
+ invert = 1;
break;
case SLJIT_GREATER:
case SLJIT_SIG_GREATER:
- GET_CR_BIT(1, reg);
+ cr_bit = 1;
break;
case SLJIT_LESS_EQUAL:
case SLJIT_SIG_LESS_EQUAL:
- GET_CR_BIT(1, reg);
- INVERT_BIT(reg);
- break;
-
- case SLJIT_LESS_F64:
- GET_CR_BIT(4 + 0, reg);
+ cr_bit = 1;
+ invert = 1;
break;
- case SLJIT_GREATER_EQUAL_F64:
- GET_CR_BIT(4 + 0, reg);
- INVERT_BIT(reg);
- break;
-
- case SLJIT_GREATER_F64:
- GET_CR_BIT(4 + 1, reg);
+ case SLJIT_EQUAL:
+ cr_bit = 2;
break;
- case SLJIT_LESS_EQUAL_F64:
- GET_CR_BIT(4 + 1, reg);
- INVERT_BIT(reg);
+ case SLJIT_NOT_EQUAL:
+ cr_bit = 2;
+ invert = 1;
break;
case SLJIT_OVERFLOW:
case SLJIT_MUL_OVERFLOW:
- GET_CR_BIT(3, reg);
+ cr_bit = 3;
break;
case SLJIT_NOT_OVERFLOW:
case SLJIT_MUL_NOT_OVERFLOW:
- GET_CR_BIT(3, reg);
- INVERT_BIT(reg);
+ cr_bit = 3;
+ invert = 1;
+ break;
+
+ case SLJIT_LESS_F64:
+ cr_bit = 4 + 0;
+ break;
+
+ case SLJIT_GREATER_EQUAL_F64:
+ cr_bit = 4 + 0;
+ invert = 1;
+ break;
+
+ case SLJIT_GREATER_F64:
+ cr_bit = 4 + 1;
+ break;
+
+ case SLJIT_LESS_EQUAL_F64:
+ cr_bit = 4 + 1;
+ invert = 1;
break;
case SLJIT_EQUAL_F64:
- GET_CR_BIT(4 + 2, reg);
+ cr_bit = 4 + 2;
break;
case SLJIT_NOT_EQUAL_F64:
- GET_CR_BIT(4 + 2, reg);
- INVERT_BIT(reg);
+ cr_bit = 4 + 2;
+ invert = 1;
break;
case SLJIT_UNORDERED_F64:
- GET_CR_BIT(4 + 3, reg);
+ cr_bit = 4 + 3;
break;
case SLJIT_ORDERED_F64:
- GET_CR_BIT(4 + 3, reg);
- INVERT_BIT(reg);
+ cr_bit = 4 + 3;
+ invert = 1;
break;
default:
@@ -2357,28 +2367,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
break;
}
+ FAIL_IF(push_inst(compiler, MFCR | D(reg)));
+ FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + (cr_bit)) << 11) | (31 << 6) | (31 << 1)));
+
+ if (invert)
+ FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1));
+
if (op < SLJIT_ADD) {
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
- if (op == SLJIT_MOV)
- input_flags = WORD_DATA;
- else {
- op = SLJIT_MOV_U32;
- input_flags = INT_DATA;
- }
-#else
- op = SLJIT_MOV;
- input_flags = WORD_DATA;
-#endif
- if (reg != TMP_REG2)
+ if (!(dst & SLJIT_MEM))
return SLJIT_SUCCESS;
- return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
+ return emit_op_mem2(compiler, input_flags, reg, dst, dstw, reg, 0);
}
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
- return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
+ if (dst & SLJIT_MEM)
+ return sljit_emit_op2(compiler, saved_op, dst, saved_dstw, TMP_REG1, 0, TMP_REG2, 0);
+ return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
@@ -2404,7 +2411,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(!const_);
set_const(const_, compiler);
- reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+ reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
PTR_FAIL_IF(emit_const(compiler, reg, init_value));
diff --git a/sljit/sljitNativeSPARC_common.c b/sljit/sljitNativeSPARC_common.c
index 8f3fb0f..9831bd8 100644
--- a/sljit/sljitNativeSPARC_common.c
+++ b/sljit/sljitNativeSPARC_common.c
@@ -683,18 +683,16 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
compiler->cache_argw = 0;
}
- if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
- return SLJIT_SUCCESS;
- }
- else if (FAST_IS_REG(dst)) {
- dst_r = dst;
- flags |= REG_DEST;
- if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
- sugg_src2_r = dst_r;
+ if (dst != SLJIT_UNUSED) {
+ if (FAST_IS_REG(dst)) {
+ dst_r = dst;
+ flags |= REG_DEST;
+ if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
+ sugg_src2_r = dst_r;
+ }
+ else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
+ flags |= SLOW_DEST;
}
- else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
- flags |= SLOW_DEST;
if (flags & IMM_OP) {
if ((src2 & SLJIT_IMM) && src2w) {
@@ -850,6 +848,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(dst, dstw);
ADJUST_LOCAL_OFFSET(src, srcw);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
+
op = GET_OPCODE(op);
switch (op) {
case SLJIT_MOV:
@@ -920,6 +921,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w);
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
+
op = GET_OPCODE(op);
switch (op) {
case SLJIT_ADD:
@@ -991,9 +995,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | DA(TMP_FREG1) | S2A(src), MOVABLE_INS));
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst)) {
FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET);
@@ -1207,10 +1208,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
- /* For UNUSED dst. Uncommon, but possible. */
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
if (FAST_IS_REG(dst))
return push_inst(compiler, OR | D(dst) | S1(0) | S2(TMP_LINK), DR(dst));
@@ -1394,30 +1391,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
sljit_s32 reg, flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
CHECK_ERROR();
- CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
+ CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
op = GET_OPCODE(op);
reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
- ADJUST_LOCAL_OFFSET(src, srcw);
- FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
- src = TMP_REG1;
- srcw = 0;
- }
+
+ if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
+ FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
type &= 0xff;
if (type < SLJIT_EQUAL_F64)
@@ -1428,10 +1418,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS));
- if (op >= SLJIT_ADD)
- return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
+ if (op >= SLJIT_ADD) {
+ flags |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE;
+ if (dst & SLJIT_MEM)
+ return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
+ return emit_op(compiler, op, flags, dst, 0, dst, 0, TMP_REG2, 0);
+ }
+
+ if (!(dst & SLJIT_MEM))
+ return SLJIT_SUCCESS;
- return (reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
+ return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw);
#else
#error "Implementation required"
#endif
@@ -1464,7 +1461,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
PTR_FAIL_IF(!const_);
set_const(const_, compiler);
- reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+ reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
PTR_FAIL_IF(emit_const(compiler, reg, init_value));
diff --git a/sljit/sljitNativeTILEGX_64.c b/sljit/sljitNativeTILEGX_64.c
index ad74b82..003f43a 100644
--- a/sljit/sljitNativeTILEGX_64.c
+++ b/sljit/sljitNativeTILEGX_64.c
@@ -2092,9 +2092,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
-
op = GET_OPCODE(op);
if (op == SLJIT_MOV_S32 || op == SLJIT_MOV_U32)
mem_type = INT_DATA | SIGNED_DATA;
diff --git a/sljit/sljitNativeX86_64.c b/sljit/sljitNativeX86_64.c
index 5171fbb..039b68c 100644
--- a/sljit/sljitNativeX86_64.c
+++ b/sljit/sljitNativeX86_64.c
@@ -47,9 +47,8 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_
*code_ptr++ = 10 + 3;
}
- SLJIT_ASSERT(reg_map[TMP_REG3] == 9);
- *code_ptr++ = REX_W | REX_B;
- *code_ptr++ = MOV_r_i32 + 1;
+ *code_ptr++ = REX_W | ((reg_map[TMP_REG2] <= 7) ? 0 : REX_B);
+ *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
jump->addr = (sljit_uw)code_ptr;
if (jump->flags & JUMP_LABEL)
@@ -58,9 +57,10 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_
sljit_unaligned_store_sw(code_ptr, jump->u.target);
code_ptr += sizeof(sljit_sw);
- *code_ptr++ = REX_B;
+ if (reg_map[TMP_REG2] >= 8)
+ *code_ptr++ = REX_B;
*code_ptr++ = GROUP_FF;
- *code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1);
+ *code_ptr++ = MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2];
return code_ptr;
}
@@ -380,12 +380,12 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32
if (b & SLJIT_MEM) {
if (!(b & OFFS_REG_MASK)) {
if (NOT_HALFWORD(immb)) {
- PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG3, immb));
+ PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
immb = 0;
if (b & REG_MASK)
- b |= TO_OFFS_REG(TMP_REG3);
+ b |= TO_OFFS_REG(TMP_REG2);
else
- b |= TMP_REG3;
+ b |= TMP_REG2;
}
else if (reg_lmap[b & REG_MASK] == 4)
b |= TO_OFFS_REG(SLJIT_SP);
@@ -545,17 +545,19 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32
/* Call / return instructions */
/* --------------------------------------------------------------------- */
-static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
+static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
{
sljit_u8 *inst;
+ /* After any change update IS_REG_CHANGED_BY_CALL as well. */
#ifndef _WIN64
- SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8);
+ SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8 && reg_map[TMP_REG1] == 2);
inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
FAIL_IF(!inst);
INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
if (type >= SLJIT_CALL3) {
+ /* Move third argument to TMP_REG1. */
*inst++ = REX_W;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2];
@@ -564,12 +566,13 @@ static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sl
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0];
#else
- SLJIT_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8);
+ SLJIT_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8 && reg_map[TMP_REG1] == 8);
inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
FAIL_IF(!inst);
INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
if (type >= SLJIT_CALL3) {
+ /* Move third argument to TMP_REG1. */
*inst++ = REX_W | REX_R;
*inst++ = MOV_r_rm;
*inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2];
diff --git a/sljit/sljitNativeX86_common.c b/sljit/sljitNativeX86_common.c
index 3cb0c9d..eb0886d 100644
--- a/sljit/sljitNativeX86_common.c
+++ b/sljit/sljitNativeX86_common.c
@@ -85,28 +85,27 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
/* Last register + 1. */
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
-#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
Note: avoid to use r12 and r13 for memory addessing
- therefore r12 is better for SAVED_EREG than SAVED_REG. */
+ therefore r12 is better to be a higher saved register. */
#ifndef _WIN64
-/* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
-static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
- 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
+/* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
+static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
+ 0, 0, 6, 1, 7, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
};
/* low-map. reg_map & 0x7. */
-static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
- 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1
+static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
+ 0, 0, 6, 1, 7, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
};
#else
-/* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
-static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
- 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
+/* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
+static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
+ 0, 0, 2, 1, 10, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 8, 9
};
/* low-map. reg_map & 0x7. */
-static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
- 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1
+static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
+ 0, 0, 2, 1, 2, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 0, 1
};
#endif
@@ -169,7 +168,7 @@ static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
#define CALL_i32 0xe8
#define CALL_rm (/* GROUP_FF */ 2 << 3)
#define CDQ 0x99
-#define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
+#define CMOVE_r_rm (/* GROUP_0F */ 0x44)
#define CMP (/* BINARY */ 7 << 3)
#define CMP_EAX_i32 0x3d
#define CMP_r_rm 0x3b
@@ -217,6 +216,7 @@ static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
#define POP_r 0x58
#define POP_rm 0x8f
#define POPF 0x9d
+#define PREFETCH 0x18
#define PUSH_i32 0x68
#define PUSH_r 0x50
#define PUSH_rm (/* GROUP_FF */ 6 << 3)
@@ -602,14 +602,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
return 1;
#endif /* SLJIT_DETECT_SSE2 */
- case SLJIT_HAS_CLZ:
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+ case SLJIT_HAS_VIRTUAL_REGISTERS:
return 1;
+#endif
+ case SLJIT_HAS_CLZ:
case SLJIT_HAS_CMOV:
if (cpu_has_cmov == -1)
get_cpu_features();
return cpu_has_cmov;
+ case SLJIT_HAS_PREF_SHIFT_REG:
+ return 1;
+
case SLJIT_HAS_SSE2:
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
if (cpu_has_sse2 == -1)
@@ -676,15 +682,8 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler,
{
sljit_u8* inst;
- if (dst == SLJIT_UNUSED) {
- /* No destination, doesn't need to setup flags. */
- if (src & SLJIT_MEM) {
- inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
- FAIL_IF(!inst);
- *inst = MOV_r_rm;
- }
- return SLJIT_SUCCESS;
- }
+ SLJIT_ASSERT(dst != SLJIT_UNUSED);
+
if (FAST_IS_REG(src)) {
inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
FAIL_IF(!inst);
@@ -706,8 +705,10 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler,
}
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
- FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
- inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
+ /* Immediate to memory move. Only SLJIT_MOV operation copies
+ an immediate directly into memory so TMP_REG1 can be used. */
+ FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
+ inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
FAIL_IF(!inst);
*inst = MOV_rm_r;
return SLJIT_SUCCESS;
@@ -725,7 +726,8 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler,
return SLJIT_SUCCESS;
}
- /* Memory to memory move. Requires two instruction. */
+ /* Memory to memory move. Only SLJIT_MOV operation copies
+ data from memory to memory so TMP_REG1 can be used. */
inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
FAIL_IF(!inst);
*inst = MOV_r_rm;
@@ -898,9 +900,6 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
compiler->mode32 = 0;
#endif
- if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
- return SLJIT_SUCCESS; /* Empty instruction. */
-
if (src & SLJIT_IMM) {
if (FAST_IS_REG(dst)) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@@ -1029,6 +1028,30 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
return SLJIT_SUCCESS;
}
+static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_u8* inst;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif
+
+ inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
+ FAIL_IF(!inst);
+ *inst++ = GROUP_0F;
+ *inst++ = PREFETCH;
+
+ if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
+ *inst |= (3 << 3);
+ else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
+ *inst |= (2 << 3);
+ else
+ *inst |= (1 << 3);
+
+ return SLJIT_SUCCESS;
+}
+
static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
@@ -1040,9 +1063,6 @@ static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
compiler->mode32 = 0;
#endif
- if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
- return SLJIT_SUCCESS; /* Empty instruction. */
-
if (src & SLJIT_IMM) {
if (FAST_IS_REG(dst)) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@@ -1086,14 +1106,6 @@ static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
{
sljit_u8* inst;
- if (dst == SLJIT_UNUSED) {
- EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
- inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
- FAIL_IF(!inst);
- *inst++ = GROUP_F7;
- *inst |= opcode;
- return SLJIT_SUCCESS;
- }
if (dst == src && dstw == srcw) {
/* Same input and output */
inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
@@ -1102,14 +1114,19 @@ static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
*inst |= opcode;
return SLJIT_SUCCESS;
}
+
+ if (dst == SLJIT_UNUSED)
+ dst = TMP_REG1;
+
if (FAST_IS_REG(dst)) {
EMIT_MOV(compiler, dst, 0, src, srcw);
- inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+ inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
FAIL_IF(!inst);
*inst++ = GROUP_F7;
*inst |= opcode;
return SLJIT_SUCCESS;
}
+
EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
FAIL_IF(!inst);
@@ -1125,20 +1142,12 @@ static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
{
sljit_u8* inst;
- if (dst == SLJIT_UNUSED) {
- EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
- inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
- FAIL_IF(!inst);
- *inst++ = GROUP_F7;
- *inst |= NOT_rm;
- inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
- FAIL_IF(!inst);
- *inst = OR_r_rm;
- return SLJIT_SUCCESS;
- }
+ if (dst == SLJIT_UNUSED)
+ dst = TMP_REG1;
+
if (FAST_IS_REG(dst)) {
EMIT_MOV(compiler, dst, 0, src, srcw);
- inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
+ inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
FAIL_IF(!inst);
*inst++ = GROUP_F7;
*inst |= NOT_rm;
@@ -1147,6 +1156,7 @@ static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
*inst = OR_r_rm;
return SLJIT_SUCCESS;
}
+
EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
FAIL_IF(!inst);
@@ -1159,6 +1169,10 @@ static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
return SLJIT_SUCCESS;
}
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+static const sljit_sw emit_clz_arg = 32 + 31;
+#endif
+
static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
@@ -1167,8 +1181,6 @@ static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
sljit_s32 dst_r;
SLJIT_UNUSED_ARG(op_flags);
- if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
- return SLJIT_SUCCESS;
if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
@@ -1176,81 +1188,53 @@ static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
srcw = 0;
}
- inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
+ if (cpu_has_cmov == -1)
+ get_cpu_features();
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+ inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
FAIL_IF(!inst);
*inst++ = GROUP_0F;
*inst = BSR_r_rm;
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- if (FAST_IS_REG(dst))
- dst_r = dst;
- else {
- /* Find an unused temporary register. */
- if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
- dst_r = SLJIT_R0;
- else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
- dst_r = SLJIT_R1;
+ if (cpu_has_cmov) {
+ if (dst_r != TMP_REG1) {
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
+ inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
+ }
else
- dst_r = SLJIT_R2;
- EMIT_MOV(compiler, dst, dstw, dst_r, 0);
- }
- EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
-#else
- dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
- compiler->mode32 = 0;
- EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31);
- compiler->mode32 = op_flags & SLJIT_I32_OP;
-#endif
+ inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
- if (cpu_has_cmov == -1)
- get_cpu_features();
-
- if (cpu_has_cmov) {
- inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
FAIL_IF(!inst);
*inst++ = GROUP_0F;
- *inst = CMOVNE_r_rm;
- } else {
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
- FAIL_IF(!inst);
- INC_SIZE(4);
+ *inst = CMOVE_r_rm;
+ }
+ else
+ FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
- *inst++ = JE_i8;
- *inst++ = 2;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
+ inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
#else
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
- FAIL_IF(!inst);
- INC_SIZE(5);
+ if (cpu_has_cmov) {
+ EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31));
- *inst++ = JE_i8;
- *inst++ = 3;
- *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
-#endif
+ inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
+ FAIL_IF(!inst);
+ *inst++ = GROUP_0F;
+ *inst = CMOVE_r_rm;
}
+ else
+ FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)));
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
-#else
inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
#endif
+
FAIL_IF(!inst);
*(inst + 1) |= XOR;
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- if (dst & SLJIT_MEM) {
- inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
- FAIL_IF(!inst);
- *inst = XCHG_r_rm;
- }
-#else
if (dst & SLJIT_MEM)
- EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
-#endif
+ EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
return SLJIT_SUCCESS;
}
@@ -1278,7 +1262,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
compiler->mode32 = op_flags & SLJIT_I32_OP;
#endif
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
+ if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
+ return emit_prefetch(compiler, op, src, srcw);
+ return SLJIT_SUCCESS;
+ }
+
op = GET_OPCODE(op);
+
if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
@@ -1432,8 +1423,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
*(inst + 1) |= (op_imm); \
} \
else { \
- FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
- inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
+ FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
+ inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
FAIL_IF(!inst); \
*inst = (op_mr); \
}
@@ -1659,7 +1650,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
sljit_u8* inst;
sljit_s32 dst_r;
- dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
/* Register destination. */
if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
@@ -1711,9 +1702,9 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
}
else {
- EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
if (dst_r != src2)
EMIT_MOV(compiler, dst_r, 0, src2, src2w);
+ FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
FAIL_IF(!inst);
*inst++ = GROUP_0F;
@@ -1754,9 +1745,9 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
}
else {
- EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
if (dst_r != src1)
EMIT_MOV(compiler, dst_r, 0, src1, src1w);
+ FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
FAIL_IF(!inst);
*inst++ = GROUP_0F;
@@ -1775,7 +1766,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
*inst = IMUL_r_rm;
}
- if (dst_r == TMP_REG1)
+ if (dst & SLJIT_MEM)
EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
return SLJIT_SUCCESS;
@@ -1922,8 +1913,8 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
*inst = GROUP_F7;
}
else {
- FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
- inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w);
+ FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
+ inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
FAIL_IF(!inst);
*inst = TEST_rm_r;
}
@@ -1951,8 +1942,8 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
*inst = GROUP_F7;
}
else {
- FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
- inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w);
+ FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
+ inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
FAIL_IF(!inst);
*inst = TEST_rm_r;
}
@@ -2066,22 +2057,27 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler,
else {
/* This case is complex since ecx itself may be used for
addressing, and this case must be supported as well. */
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
-#else
EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
-#endif
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
FAIL_IF(!inst);
*inst |= mode;
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
-#else
EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
-#endif
EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+#else
+ EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+ EMIT_MOV(compiler, TMP_REG2, 0, src2, src2w);
+ inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
+ FAIL_IF(!inst);
+ *inst = XCHG_r_rm;
+ inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+ FAIL_IF(!inst);
+ *inst |= mode;
+ EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
+ EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
+#endif
}
return SLJIT_SUCCESS;
@@ -2140,6 +2136,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
compiler->mode32 = op & SLJIT_I32_OP;
#endif
+ if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
+ return SLJIT_SUCCESS;
+
switch (GET_OPCODE(op)) {
case SLJIT_ADD:
if (!HAS_FLAGS(op)) {
@@ -2226,17 +2225,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
/* Floating point operators */
/* --------------------------------------------------------------------- */
-/* Alignment + 2 * 16 bytes. */
-static sljit_s32 sse2_data[3 + (4 + 4) * 2];
+/* Alignment(3) + 4 * 16 bytes. */
+static sljit_s32 sse2_data[3 + (4 * 4)];
static sljit_s32 *sse2_buffer;
static void init_compiler(void)
{
+ /* Align to 16 bytes. */
sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
- /* Single precision constants. */
+
+ /* Single precision constants (each constant is 16 byte long). */
sse2_buffer[0] = 0x80000000;
sse2_buffer[4] = 0x7fffffff;
- /* Double precision constants. */
+ /* Double precision constants (each constant is 16 byte long). */
sse2_buffer[8] = 0;
sse2_buffer[9] = 0x80000000;
sse2_buffer[12] = 0xffffffff;
@@ -2283,7 +2284,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
- sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+ sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
sljit_u8 *inst;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -2296,7 +2297,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
*inst++ = GROUP_0F;
*inst = CVTTSD2SI_r_xm;
- if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
+ if (dst & SLJIT_MEM)
return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
return SLJIT_SUCCESS;
}
@@ -2388,7 +2389,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
return SLJIT_SUCCESS;
}
- if (SLOW_IS_REG(dst)) {
+ if (FAST_IS_REG(dst)) {
dst_r = dst;
if (dst != src)
FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
@@ -2533,6 +2534,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
return jump;
}
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#ifndef _WIN64
+#define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R3)
+#else
+#define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R2)
+#endif
+#endif
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
{
sljit_u8 *inst;
@@ -2554,11 +2563,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
srcw += sizeof(sljit_sw);
#endif
-#endif
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
- if (src == SLJIT_R2) {
- EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
- src = TMP_REG1;
+#else
+ if ((src & SLJIT_MEM) || IS_REG_CHANGED_BY_CALL(src, type)) {
+ EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
+ src = TMP_REG2;
}
#endif
FAIL_IF(call_with_args(compiler, type));
@@ -2598,7 +2606,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw,
sljit_s32 type)
{
sljit_u8 *inst;
@@ -2611,11 +2618,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
sljit_sw dstw_save = dstw;
CHECK_ERROR();
- CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
- SLJIT_UNUSED_ARG(srcw);
-
- if (dst == SLJIT_UNUSED)
- return SLJIT_SUCCESS;
+ CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
CHECK_EXTRA_REGS(dst, dstw, (void)0);
@@ -2625,7 +2628,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
cond_set = get_jump_code(type) + 0x10;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
+ if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
FAIL_IF(!inst);
INC_SIZE(4 + 3);
@@ -2640,7 +2643,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return SLJIT_SUCCESS;
}
- reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
+ reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
FAIL_IF(!inst);
@@ -2663,6 +2666,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
}
+
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
@@ -2724,7 +2728,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return SLJIT_SUCCESS;
}
- if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
+ if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
if (dst != SLJIT_R0) {
@@ -2876,14 +2880,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 0;
- reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+ reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (emit_load_imm64(compiler, reg, init_value))
return NULL;
#else
- if (dst == SLJIT_UNUSED)
- dst = TMP_REG1;
-
if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
return NULL;
#endif