summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2014-06-17 15:48:37 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2014-06-17 15:48:37 +0000
commit753803008382e4b743716f52d8d1397ce31e9c3a (patch)
treee9720ac4adb4b011e531458f4e6af06349bd6322
parente809f069a7e3e16d19d2df8bc87cae203635736f (diff)
downloadpcre-753803008382e4b743716f52d8d1397ce31e9c3a.tar.gz
Major JIT compiler update.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1483 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--pcre_jit_compile.c4
-rw-r--r--sljit/sljitConfigInternal.h31
-rw-r--r--sljit/sljitLir.c207
-rw-r--r--sljit/sljitLir.h56
-rw-r--r--sljit/sljitNativeARM_32.c250
-rw-r--r--sljit/sljitNativeARM_64.c125
-rw-r--r--sljit/sljitNativeARM_T2_32.c117
-rw-r--r--sljit/sljitNativeMIPS_common.c261
-rw-r--r--sljit/sljitNativePPC_common.c298
-rw-r--r--sljit/sljitNativeSPARC_common.c184
-rw-r--r--sljit/sljitNativeX86_32.c19
-rw-r--r--sljit/sljitNativeX86_64.c78
-rw-r--r--sljit/sljitNativeX86_common.c174
13 files changed, 1253 insertions, 551 deletions
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index d374629..b382411 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -477,14 +477,14 @@ typedef struct compare_context {
#define TMP1 SLJIT_SCRATCH_REG1
#define TMP2 SLJIT_SCRATCH_REG3
-#define TMP3 SLJIT_TEMPORARY_EREG2
+#define TMP3 SLJIT_SCRATCH_EREG2
#define STR_PTR SLJIT_SAVED_REG1
#define STR_END SLJIT_SAVED_REG2
#define STACK_TOP SLJIT_SCRATCH_REG2
#define STACK_LIMIT SLJIT_SAVED_REG3
#define ARGUMENTS SLJIT_SAVED_EREG1
#define COUNT_MATCH SLJIT_SAVED_EREG2
-#define RETURN_ADDR SLJIT_TEMPORARY_EREG1
+#define RETURN_ADDR SLJIT_SCRATCH_EREG1
/* Local space layout. */
/* These two locals can be used by the current opcode. */
diff --git a/sljit/sljitConfigInternal.h b/sljit/sljitConfigInternal.h
index 89be38b..dc9963f 100644
--- a/sljit/sljitConfigInternal.h
+++ b/sljit/sljitConfigInternal.h
@@ -34,7 +34,8 @@
SLJIT_32BIT_ARCHITECTURE : 32 bit architecture
SLJIT_64BIT_ARCHITECTURE : 64 bit architecture
SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index
- SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing a double array by index
+ SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing a double precision floating point array by index
+ SLJIT_SINGLE_SHIFT : the shift required to apply when accessing a single precision floating point array by index
SLJIT_LITTLE_ENDIAN : little endian architecture
SLJIT_BIG_ENDIAN : big endian architecture
SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!)
@@ -142,6 +143,19 @@
#undef SLJIT_EXECUTABLE_ALLOCATOR
#endif
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#define SLJIT_CONFIG_X86 1
+#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
+ || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
+#define SLJIT_CONFIG_ARM 1
+#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define SLJIT_CONFIG_PPC 1
+#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#define SLJIT_CONFIG_MIPS 1
+#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) || (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64)
+#define SLJIT_CONFIG_SPARC 1
+#endif
+
#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
/* These libraries are needed for the macros below. */
@@ -219,7 +233,7 @@
#ifndef SLJIT_CACHE_FLUSH
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
/* Not required to implement on archs with unified caches. */
#define SLJIT_CACHE_FLUSH(from, to)
@@ -240,7 +254,7 @@
#define SLJIT_CACHE_FLUSH(from, to) \
cacheflush((long)(from), (long)(to), 0)
-#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */
#define SLJIT_CACHE_FLUSH(from, to) \
@@ -314,6 +328,7 @@ typedef double sljit_d;
/* Shift for double precision sized data. */
#define SLJIT_DOUBLE_SHIFT 3
+#define SLJIT_SINGLE_SHIFT 2
#ifndef SLJIT_W
@@ -418,22 +433,12 @@ typedef double sljit_d;
#endif
#endif /* SLJIT_RETURN_ADDRESS_OFFSET */
-#ifndef SLJIT_SSE2
-
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-/* Turn on SSE2 support on x86. */
-#define SLJIT_SSE2 1
-
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
/* Auto detect SSE2 support using CPUID.
On 64 bit x86 cpus, sse2 must be present. */
#define SLJIT_DETECT_SSE2 1
#endif
-#endif /* (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) */
-
-#endif /* !SLJIT_SSE2 */
-
#ifndef SLJIT_UNALIGNED
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
diff --git a/sljit/sljitLir.c b/sljit/sljitLir.c
index 1acecba..9acaf10 100644
--- a/sljit/sljitLir.c
+++ b/sljit/sljitLir.c
@@ -117,7 +117,7 @@
#define JUMP_ADDR 0x2
/* SLJIT_REWRITABLE_JUMP is 0x1000. */
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
# define PATCH_MB 0x4
# define PATCH_MW 0x8
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -162,7 +162,7 @@
# define PATCH_ABS64 0x100
#endif
-#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
# define IS_COND 0x004
# define IS_CALL 0x008
# define PATCH_B 0x010
@@ -174,7 +174,7 @@
# define REMOVE_COND 0x100
#endif
-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
# define IS_MOVABLE 0x004
# define IS_JAL 0x008
# define IS_CALL 0x010
@@ -254,7 +254,7 @@
#ifdef _AIX
#define FIXED_LOCALS_OFFSET ((6 + 8) * sizeof(sljit_sw))
#else
-#define FIXED_LOCALS_OFFSET (2 * sizeof(sljit_sw))
+#define FIXED_LOCALS_OFFSET (3 * sizeof(sljit_sw))
#endif
#endif
@@ -307,15 +307,11 @@
#include "sljitExecAllocator.c"
#endif
-#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) && !(defined SLJIT_SSE2 && SLJIT_SSE2)
-#error SLJIT_SSE2_AUTO cannot be enabled without SLJIT_SSE2
-#endif
-
/* --------------------------------------------------------------------- */
/* Public functions */
/* --------------------------------------------------------------------- */
-#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || ((defined SLJIT_SSE2 && SLJIT_SSE2) && ((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)))
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
#define SLJIT_NEEDS_COMPILER_INIT 1
static sljit_si compiler_initialized = 0;
/* A thread safe initialization. */
@@ -382,7 +378,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void)
compiler->cpool_diff = 0xffffffff;
#endif
-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
compiler->delay_slot = UNMOVABLE_INS;
#endif
@@ -593,10 +589,6 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
case SLJIT_MUL: \
SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \
break; \
- case SLJIT_CMPD: \
- SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
- SLJIT_ASSERT((op & (SLJIT_SET_E | SLJIT_SET_S))); \
- break; \
case SLJIT_ADD: \
SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_S))); \
break; \
@@ -625,6 +617,19 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
break; \
}
+#define FUNCTION_CHECK_FOP() \
+ SLJIT_ASSERT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \
+ switch (GET_OPCODE(op)) { \
+ case SLJIT_CMPD: \
+ SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
+ SLJIT_ASSERT((op & (SLJIT_SET_E | SLJIT_SET_S))); \
+ break; \
+ default: \
+ /* Only SLJIT_INT_OP or SLJIT_SINGLE_OP is allowed. */ \
+ SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \
+ break; \
+ }
+
#define FUNCTION_CHECK_IS_REG(r) \
((r) == SLJIT_UNUSED || \
((r) >= SLJIT_SCRATCH_REG1 && (r) <= SLJIT_SCRATCH_REG1 - 1 + compiler->scratches) || \
@@ -669,6 +674,8 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
#define FUNCTION_FCHECK(p, i) \
if ((p) >= SLJIT_FLOAT_REG1 && (p) <= SLJIT_FLOAT_REG6) \
SLJIT_ASSERT(i == 0); \
+ else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \
+ SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \
else if ((p) & SLJIT_MEM) { \
SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \
if ((p) & OFFS_REG_MASK) { \
@@ -777,7 +784,9 @@ static SLJIT_CONST char* op_names[] = {
(char*)"mul", (char*)"and", (char*)"or", (char*)"xor",
(char*)"shl", (char*)"lshr", (char*)"ashr",
/* fop1 */
- (char*)"cmp", (char*)"mov", (char*)"neg", (char*)"abs",
+ (char*)"mov", (char*)"neg", (char*)"abs", (char*)"conv",
+ (char*)"conv", (char*)"conv", (char*)"conv", (char*)"conv",
+ (char*)"cmp",
/* fop2 */
(char*)"add", (char*)"sub", (char*)"mul", (char*)"div"
};
@@ -1055,6 +1064,32 @@ static SLJIT_INLINE void check_sljit_emit_op_custom(struct sljit_compiler *compi
SLJIT_ASSERT(instruction);
}
+#define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \
+ SLJIT_ASSERT(sljit_is_fpu_available()); \
+ SLJIT_COMPILE_ASSERT(!(SLJIT_CONVW_FROMD & 0x1) && !(SLJIT_CONVD_FROMW & 0x1) && (SLJIT_MOVD < SLJIT_CONVW_FROMD), \
+ invalid_float_opcodes); \
+ if (GET_OPCODE(op) >= SLJIT_CONVW_FROMD) { \
+ if (GET_OPCODE(op) == SLJIT_CMPD) { \
+ check_sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw); \
+ ADJUST_LOCAL_OFFSET(dst, dstw); \
+ ADJUST_LOCAL_OFFSET(src, srcw); \
+ return sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw); \
+ } \
+ if ((GET_OPCODE(op) | 0x1) == SLJIT_CONVI_FROMD) { \
+ check_sljit_emit_fop1_convw_fromd(compiler, op, dst, dstw, src, srcw); \
+ ADJUST_LOCAL_OFFSET(dst, dstw); \
+ ADJUST_LOCAL_OFFSET(src, srcw); \
+ return sljit_emit_fop1_convw_fromd(compiler, op, dst, dstw, src, srcw); \
+ } \
+ check_sljit_emit_fop1_convd_fromw(compiler, op, dst, dstw, src, srcw); \
+ ADJUST_LOCAL_OFFSET(dst, dstw); \
+ ADJUST_LOCAL_OFFSET(src, srcw); \
+ return sljit_emit_fop1_convd_fromw(compiler, op, dst, dstw, src, srcw); \
+ } \
+ check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); \
+ ADJUST_LOCAL_OFFSET(dst, dstw); \
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
static SLJIT_INLINE void check_sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
sljit_si dst, sljit_sw dstw,
sljit_si src, sljit_sw srcw)
@@ -1074,18 +1109,94 @@ static SLJIT_INLINE void check_sljit_emit_fop1(struct sljit_compiler *compiler,
}
#endif
- SLJIT_ASSERT(sljit_is_fpu_available());
- SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_CMPD && GET_OPCODE(op) <= SLJIT_ABSD);
+ SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_MOVD && GET_OPCODE(op) <= SLJIT_CONVD_FROMS);
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
- FUNCTION_CHECK_OP();
+ FUNCTION_CHECK_FOP();
FUNCTION_FCHECK(src, srcw);
FUNCTION_FCHECK(dst, dstw);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " %s%s ", op_names[GET_OPCODE(op)],
+ (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
+ ? ((op & SLJIT_SINGLE_OP) ? "s.fromd" : "d.froms")
+ : ((op & SLJIT_SINGLE_OP) ? "s" : "d"));
+ sljit_verbose_fparam(dst, dstw);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_fparam(src, srcw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si src1, sljit_sw src1w,
+ sljit_si src2, sljit_sw src2w)
+{
+ /* If debug and verbose are disabled, all arguments are unused. */
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(op);
+ SLJIT_UNUSED_ARG(src1);
+ SLJIT_UNUSED_ARG(src1w);
+ SLJIT_UNUSED_ARG(src2);
+ SLJIT_UNUSED_ARG(src2w);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+ if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+ compiler->skip_checks = 0;
+ return;
+ }
+#endif
+
+ SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_CMPD);
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+ FUNCTION_CHECK_FOP();
+ FUNCTION_FCHECK(src1, src1w);
+ FUNCTION_FCHECK(src2, src2w);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " %s%s%s%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d",
!(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_S) ? "" : ".s");
- sljit_verbose_fparam(dst, dstw);
+ sljit_verbose_fparam(src1, src1w);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_fparam(src2, src2w);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+}
+
+static SLJIT_INLINE void check_sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ /* If debug and verbose are disabled, all arguments are unused. */
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(op);
+ SLJIT_UNUSED_ARG(dst);
+ SLJIT_UNUSED_ARG(dstw);
+ SLJIT_UNUSED_ARG(src);
+ SLJIT_UNUSED_ARG(srcw);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+ if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+ compiler->skip_checks = 0;
+ return;
+ }
+#endif
+
+ SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_CONVW_FROMD && GET_OPCODE(op) <= SLJIT_CONVI_FROMD);
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+ FUNCTION_CHECK_FOP();
+ FUNCTION_FCHECK(src, srcw);
+ FUNCTION_CHECK_DST(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " %s%s.from%s ", op_names[GET_OPCODE(op)],
+ (GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? "i" : "w",
+ (op & SLJIT_SINGLE_OP) ? "s" : "d");
+ sljit_verbose_param(dst, dstw);
fprintf(compiler->verbose, ", ");
sljit_verbose_fparam(src, srcw);
fprintf(compiler->verbose, "\n");
@@ -1093,6 +1204,44 @@ static SLJIT_INLINE void check_sljit_emit_fop1(struct sljit_compiler *compiler,
#endif
}
+static SLJIT_INLINE void check_sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ /* If debug and verbose are disabled, all arguments are unused. */
+ SLJIT_UNUSED_ARG(compiler);
+ SLJIT_UNUSED_ARG(op);
+ SLJIT_UNUSED_ARG(dst);
+ SLJIT_UNUSED_ARG(dstw);
+ SLJIT_UNUSED_ARG(src);
+ SLJIT_UNUSED_ARG(srcw);
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
+ if (SLJIT_UNLIKELY(compiler->skip_checks)) {
+ compiler->skip_checks = 0;
+ return;
+ }
+#endif
+
+ SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_CONVD_FROMW && GET_OPCODE(op) <= SLJIT_CONVD_FROMI);
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+ FUNCTION_CHECK_FOP();
+ FUNCTION_CHECK_SRC(src, srcw);
+ FUNCTION_FCHECK(dst, dstw);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+ if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+ fprintf(compiler->verbose, " %s%s.from%s ", op_names[GET_OPCODE(op)],
+ (op & SLJIT_SINGLE_OP) ? "s" : "d",
+ (GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? "i" : "w");
+ sljit_verbose_fparam(dst, dstw);
+ fprintf(compiler->verbose, ", ");
+ sljit_verbose_param(src, srcw);
+ fprintf(compiler->verbose, "\n");
+ }
+#endif
+}
+
static SLJIT_INLINE void check_sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
sljit_si dst, sljit_sw dstw,
sljit_si src1, sljit_sw src1w,
@@ -1111,7 +1260,7 @@ static SLJIT_INLINE void check_sljit_emit_fop2(struct sljit_compiler *compiler,
SLJIT_ASSERT(sljit_is_fpu_available());
SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_ADDD && GET_OPCODE(op) <= SLJIT_DIVD);
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
- FUNCTION_CHECK_OP();
+ FUNCTION_CHECK_FOP();
FUNCTION_FCHECK(src1, src1w);
FUNCTION_FCHECK(src2, src2w);
FUNCTION_FCHECK(dst, dstw);
@@ -1374,9 +1523,7 @@ static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compi
#define SLJIT_CPUINFO SLJIT_CPUINFO_PART1 SLJIT_CPUINFO_PART2 SLJIT_CPUINFO_PART3
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-# include "sljitNativeX86_common.c"
-#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
# include "sljitNativeX86_common.c"
#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
# include "sljitNativeARM_32.c"
@@ -1386,21 +1533,17 @@ static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compi
# include "sljitNativeARM_T2_32.c"
#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
# include "sljitNativeARM_64.c"
-#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
# include "sljitNativePPC_common.c"
-#elif (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-# include "sljitNativePPC_common.c"
-#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-# include "sljitNativeMIPS_common.c"
-#elif (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
# include "sljitNativeMIPS_common.c"
-#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC)
# include "sljitNativeSPARC_common.c"
#elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX)
# include "sljitNativeTILEGX_64.c"
#endif
-#if !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type,
sljit_si src1, sljit_sw src1w,
@@ -1508,7 +1651,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile
#endif
-#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
{
diff --git a/sljit/sljitLir.h b/sljit/sljitLir.h
index e2cd218..8cc6c30 100644
--- a/sljit/sljitLir.h
+++ b/sljit/sljitLir.h
@@ -114,8 +114,8 @@ of sljitConfigInternal.h */
/* Note: extra registers cannot be used for memory addressing. */
/* Note: on x86-32, these registers are emulated (using stack
loads & stores). */
-#define SLJIT_TEMPORARY_EREG1 4
-#define SLJIT_TEMPORARY_EREG2 5
+#define SLJIT_SCRATCH_EREG1 4
+#define SLJIT_SCRATCH_EREG2 5
/* Saved registers whose preserve their values across function calls. */
#define SLJIT_SAVED_REG1 6
@@ -238,7 +238,7 @@ struct sljit_compiler {
sljit_si mode32;
#endif
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
sljit_si flags_saved;
#endif
@@ -271,13 +271,13 @@ struct sljit_compiler {
sljit_sw cache_argw;
#endif
-#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
sljit_sw imm;
sljit_si cache_arg;
sljit_sw cache_argw;
#endif
-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
sljit_si delay_slot;
sljit_si cache_arg;
sljit_sw cache_argw;
@@ -738,37 +738,55 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void);
-/* Note: dst is the left and src is the right operand for SLJIT_FCMP.
- Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED is set,
- the comparison result is unpredictable.
- Flags: SP | E | S (see SLJIT_C_FLOAT_*) */
-#define SLJIT_CMPD 36
-#define SLJIT_CMPS (SLJIT_CMPD | SLJIT_SINGLE_OP)
/* Flags: SP - (never set any flags) */
-#define SLJIT_MOVD 37
+#define SLJIT_MOVD 36
#define SLJIT_MOVS (SLJIT_MOVD | SLJIT_SINGLE_OP)
/* Flags: SP - (never set any flags) */
-#define SLJIT_NEGD 38
+#define SLJIT_NEGD 37
#define SLJIT_NEGS (SLJIT_NEGD | SLJIT_SINGLE_OP)
/* Flags: SP - (never set any flags) */
-#define SLJIT_ABSD 39
+#define SLJIT_ABSD 38
#define SLJIT_ABSS (SLJIT_ABSD | SLJIT_SINGLE_OP)
+/* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE]
+ SRC/DST TYPE can be: D - double, S - single, W - signed word, I - signed int
+ Rounding mode when the destination is W or I: round towards zero. */
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVD_FROMS 39
+#define SLJIT_CONVS_FROMD (SLJIT_CONVD_FROMS | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVW_FROMD 40
+#define SLJIT_CONVW_FROMS (SLJIT_CONVW_FROMD | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVI_FROMD 41
+#define SLJIT_CONVI_FROMS (SLJIT_CONVI_FROMD | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVD_FROMW 42
+#define SLJIT_CONVS_FROMW (SLJIT_CONVD_FROMW | SLJIT_SINGLE_OP)
+/* Flags: SP - (never set any flags) */
+#define SLJIT_CONVD_FROMI 43
+#define SLJIT_CONVS_FROMI (SLJIT_CONVD_FROMI | SLJIT_SINGLE_OP)
+/* Note: dst is the left and src is the right operand for SLJIT_CMPD.
+ Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED flag
+ is set, the comparison result is unpredictable.
+ Flags: SP | E | S (see SLJIT_C_FLOAT_*) */
+#define SLJIT_CMPD 44
+#define SLJIT_CMPS (SLJIT_CMPD | SLJIT_SINGLE_OP)
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
sljit_si dst, sljit_sw dstw,
sljit_si src, sljit_sw srcw);
/* Flags: SP - (never set any flags) */
-#define SLJIT_ADDD 40
+#define SLJIT_ADDD 45
#define SLJIT_ADDS (SLJIT_ADDD | SLJIT_SINGLE_OP)
/* Flags: SP - (never set any flags) */
-#define SLJIT_SUBD 41
+#define SLJIT_SUBD 46
#define SLJIT_SUBS (SLJIT_SUBD | SLJIT_SINGLE_OP)
/* Flags: SP - (never set any flags) */
-#define SLJIT_MULD 42
+#define SLJIT_MULD 47
#define SLJIT_MULS (SLJIT_MULD | SLJIT_SINGLE_OP)
/* Flags: SP - (never set any flags) */
-#define SLJIT_DIVD 43
+#define SLJIT_DIVD 48
#define SLJIT_DIVS (SLJIT_DIVD | SLJIT_SINGLE_OP)
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
@@ -912,7 +930,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
/* --------------------------------------------------------------------- */
#define SLJIT_MAJOR_VERSION 0
-#define SLJIT_MINOR_VERSION 91
+#define SLJIT_MINOR_VERSION 92
/* Get the human readable name of the platform. Can be useful on platforms
like ARM, where ARM and Thumb2 functions can be mixed, and
diff --git a/sljit/sljitNativeARM_32.c b/sljit/sljitNativeARM_32.c
index 6747c4f..99601f9 100644
--- a/sljit/sljitNativeARM_32.c
+++ b/sljit/sljitNativeARM_32.c
@@ -102,8 +102,12 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
#define VABS_F32 0xeeb00ac0
#define VADD_F32 0xee300a00
#define VCMP_F32 0xeeb40a40
+#define VCVT_F32_S32 0xeeb80ac0
+#define VCVT_F64_F32 0xeeb70ac0
+#define VCVT_S32_F32 0xeebd0ac0
#define VDIV_F32 0xee800a00
#define VMOV_F32 0xeeb00a40
+#define VMOV 0xee000a10
#define VMRS 0xeef1fa10
#define VMUL_F32 0xee200a00
#define VNEG_F32 0xeeb10a40
@@ -810,9 +814,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
#define SIGNED_DATA 0x40
#define LOAD_DATA 0x80
-#define EMIT_INSTRUCTION(inst) \
- FAIL_IF(push_inst(compiler, (inst)))
-
/* Condition: AL. */
#define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \
(0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2))
@@ -853,7 +854,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
push |= 1 << 5;
if (saveds >= 1)
push |= 1 << 4;
- EMIT_INSTRUCTION(push);
+ FAIL_IF(push_inst(compiler, push));
/* Stack must be aligned to 8 bytes: */
size = (1 + saveds) * sizeof(sljit_uw);
@@ -867,11 +868,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size));
if (args >= 1)
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG1)));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG1))));
if (args >= 2)
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG2, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2)));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG2, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2))));
if (args >= 3)
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG3, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG3)));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG3, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG3))));
return SLJIT_SUCCESS;
}
@@ -1031,7 +1032,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
if (op == SLJIT_MOV_UB)
return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff));
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2]));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2])));
return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst]));
#else
return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2));
@@ -1050,7 +1051,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) {
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2]));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2])));
return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst]));
#else
return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2));
@@ -1303,8 +1304,8 @@ static sljit_si generate_int(struct sljit_compiler *compiler, sljit_si reg, slji
return 0;
}
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1));
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1)));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2)));
return 1;
}
#endif
@@ -1320,16 +1321,12 @@ static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sl
/* Create imm by 1 inst. */
tmp = get_imm(imm);
- if (tmp) {
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp));
- return SLJIT_SUCCESS;
- }
+ if (tmp)
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp));
tmp = get_imm(~imm);
- if (tmp) {
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp));
- return SLJIT_SUCCESS;
- }
+ if (tmp)
+ return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp));
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
/* Create imm by 2 inst. */
@@ -1369,14 +1366,14 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl
if (imm) {
if (inp_flags & ARG_TEST)
return 1;
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm)));
return -1;
}
imm = get_imm(~argw);
if (imm) {
if (inp_flags & ARG_TEST)
return 1;
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm)));
return -1;
}
return 0;
@@ -1394,8 +1391,8 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl
if (inp_flags & ARG_TEST)
return 1;
- EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK,
- RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7)));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK,
+ RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7))));
return -1;
}
@@ -1403,13 +1400,13 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl
if (argw >= 0 && argw <= 0xfff) {
if (inp_flags & ARG_TEST)
return 1;
- EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw)));
return -1;
}
if (argw < 0 && argw >= -0xfff) {
if (inp_flags & ARG_TEST)
return 1;
- EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw)));
return -1;
}
}
@@ -1417,14 +1414,14 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl
if (argw >= 0 && argw <= 0xff) {
if (inp_flags & ARG_TEST)
return 1;
- EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))));
return -1;
}
if (argw < 0 && argw >= -0xff) {
if (inp_flags & ARG_TEST)
return 1;
argw = -argw;
- EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))));
return -1;
}
}
@@ -1477,7 +1474,7 @@ static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_
/* This can only happen for stores */ \
/* since ldr reg, [reg, ...]! has no meaning */ \
SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg))); \
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg)))); \
reg = TMP_REG3; \
} \
}
@@ -1537,9 +1534,8 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags,
SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00));
if (inp_flags & WRITE_BACK)
tmp_r = arg & REG_MASK;
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)));
- EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0)));
- return SLJIT_SUCCESS;
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
+ return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0)));
}
imm = (sljit_uw)(argw - compiler->cache_argw);
@@ -1558,7 +1554,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags,
imm = get_imm(argw & ~max_delta);
if (imm) {
TEST_WRITE_BACK();
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm)));
GETPUT_ARG_DATA_TRANSFER(1, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
return SLJIT_SUCCESS;
}
@@ -1567,15 +1563,14 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags,
if (imm) {
argw = -argw;
TEST_WRITE_BACK();
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm)));
GETPUT_ARG_DATA_TRANSFER(0, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta);
return SLJIT_SUCCESS;
}
if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) {
TEST_WRITE_BACK();
- EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
- return SLJIT_SUCCESS;
+ return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
}
if (argw == next_argw && (next_arg & SLJIT_MEM)) {
@@ -1586,15 +1581,14 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags,
compiler->cache_argw = argw;
TEST_WRITE_BACK();
- EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
- return SLJIT_SUCCESS;
+ return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0)));
}
imm = (sljit_uw)(argw - next_argw);
if (arg == next_arg && !(inp_flags & WRITE_BACK) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) {
SLJIT_ASSERT(inp_flags & LOAD_DATA);
FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK]));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK])));
compiler->cache_arg = arg;
compiler->cache_argw = argw;
@@ -1610,8 +1604,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags,
}
FAIL_IF(load_immediate(compiler, tmp_r, argw));
- EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0)));
- return SLJIT_SUCCESS;
+ return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0)));
}
static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
@@ -1843,10 +1836,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
op = GET_OPCODE(op);
switch (op) {
case SLJIT_BREAKPOINT:
- EMIT_INSTRUCTION(BKPT);
+ FAIL_IF(push_inst(compiler, BKPT));
break;
case SLJIT_NOP:
- EMIT_INSTRUCTION(NOP);
+ FAIL_IF(push_inst(compiler, NOP));
break;
case SLJIT_UMUL:
case SLJIT_SMUL:
@@ -1857,7 +1850,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
| (reg_map[SLJIT_SCRATCH_REG1] << 8)
| reg_map[SLJIT_SCRATCH_REG2]);
#else
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2)));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2))));
return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL)
| (reg_map[SLJIT_SCRATCH_REG2] << 16)
| (reg_map[SLJIT_SCRATCH_REG1] << 12)
@@ -1867,7 +1860,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler
case SLJIT_UDIV:
case SLJIT_SDIV:
if (compiler->scratches >= 3)
- EMIT_INSTRUCTION(0xe52d2008 /* str r2, [sp, #-8]! */);
+ FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */));
#if defined(__GNUC__)
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
(op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
@@ -2064,7 +2057,7 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl
SLJIT_ASSERT(arg & SLJIT_MEM);
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
arg = SLJIT_MEM | TMP_REG1;
argw = 0;
}
@@ -2097,13 +2090,13 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl
}
imm = get_imm(argw & ~0x3fc);
if (imm) {
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm)));
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2));
}
imm = get_imm(-argw & ~0x3fc);
if (imm) {
argw = -argw;
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm)));
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2));
}
}
@@ -2112,7 +2105,7 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl
compiler->cache_argw = argw;
if (arg & REG_MASK) {
FAIL_IF(load_immediate(compiler, TMP_REG1, argw));
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1]));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1])));
}
else
FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
@@ -2120,60 +2113,114 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0));
}
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ if (src & SLJIT_MEM) {
+ FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
+ src = TMP_FREG1;
+ }
+
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_SINGLE_OP, TMP_FREG1, src, 0)));
+
+ if (dst == SLJIT_UNUSED)
+ return SLJIT_SUCCESS;
+
+ if (FAST_IS_REG(dst))
+ return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16));
+
+ /* Store the integer value from a VFP register. */
+ return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+ if (FAST_IS_REG(src))
+ FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16)));
+ else if (src & SLJIT_MEM) {
+ /* Load the integer value into a VFP register. */
+ FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
+ }
+ else {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+ FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (TMP_FREG1 << 16)));
+ }
+
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_SINGLE_OP, dst_r, TMP_FREG1, 0)));
+
+ if (dst & SLJIT_MEM)
+ return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si src1, sljit_sw src1w,
+ sljit_si src2, sljit_sw src2w)
+{
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
+ src1 = TMP_FREG1;
+ }
+
+ if (src2 & SLJIT_MEM) {
+ FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
+ src2 = TMP_FREG2;
+ }
+
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, src1, src2, 0)));
+ return push_inst(compiler, VMRS);
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
sljit_si dst, sljit_sw dstw,
sljit_si src, sljit_sw srcw)
{
- sljit_si dst_fr;
+ sljit_si dst_r;
CHECK_ERROR();
- check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
- SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
-
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- op ^= SLJIT_SINGLE_OP;
+ if (GET_OPCODE(op) != SLJIT_CONVD_FROMS)
+ op ^= SLJIT_SINGLE_OP;
- if (GET_OPCODE(op) == SLJIT_CMPD) {
- if (dst & SLJIT_MEM) {
- FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, dst, dstw));
- dst = TMP_FREG1;
- }
- if (src & SLJIT_MEM) {
- FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src, srcw));
- src = TMP_FREG2;
- }
- EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, dst, src, 0));
- EMIT_INSTRUCTION(VMRS);
- return SLJIT_SUCCESS;
- }
+ SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
+ SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
- dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src & SLJIT_MEM) {
- FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_fr, src, srcw));
- src = dst_fr;
+ FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw));
+ src = dst_r;
}
switch (GET_OPCODE(op)) {
- case SLJIT_MOVD:
- if (src != dst_fr && dst_fr != TMP_FREG1)
- EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0));
- break;
- case SLJIT_NEGD:
- EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0));
- break;
- case SLJIT_ABSD:
- EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0));
- break;
- }
-
- if (dst_fr == TMP_FREG1) {
- if (GET_OPCODE(op) == SLJIT_MOVD)
- dst_fr = src;
- FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_fr, dst, dstw));
+ case SLJIT_MOVD:
+ if (src != dst_r) {
+ if (dst_r != TMP_FREG1)
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+ else
+ dst_r = src;
+ }
+ break;
+ case SLJIT_NEGD:
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+ break;
+ case SLJIT_ABSD:
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+ break;
+ case SLJIT_CONVD_FROMS:
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0)));
+ op ^= SLJIT_SINGLE_OP;
+ break;
}
+ if (dst & SLJIT_MEM)
+ return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw);
return SLJIT_SUCCESS;
}
@@ -2182,16 +2229,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
sljit_si src1, sljit_sw src1w,
sljit_si src2, sljit_sw src2w)
{
- sljit_si dst_fr;
+ sljit_si dst_r;
CHECK_ERROR();
check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+ ADJUST_LOCAL_OFFSET(src2, src2w);
compiler->cache_arg = 0;
compiler->cache_argw = 0;
op ^= SLJIT_SINGLE_OP;
- dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src2 & SLJIT_MEM) {
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
@@ -2205,23 +2255,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
switch (GET_OPCODE(op)) {
case SLJIT_ADDD:
- EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
break;
case SLJIT_SUBD:
- EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
break;
case SLJIT_MULD:
- EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
break;
case SLJIT_DIVD:
- EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1));
+ FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1)));
break;
}
- if (dst_fr == TMP_FREG1)
+ if (dst_r == TMP_FREG1)
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw));
return SLJIT_SUCCESS;
@@ -2252,7 +2302,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c
if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw))
return compiler->error;
/* TMP_REG3 is used for caching. */
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3)));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3))));
compiler->cache_arg = 0;
compiler->cache_argw = 0;
return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0);
@@ -2265,7 +2315,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *
ADJUST_LOCAL_OFFSET(src, srcw);
if (FAST_IS_REG(src))
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src)));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src))));
else if (src & SLJIT_MEM) {
if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw))
FAIL_IF(compiler->error);
@@ -2273,7 +2323,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *
compiler->cache_arg = 0;
compiler->cache_argw = 0;
FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0));
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2)));
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2))));
}
}
else if (src & SLJIT_IMM)
@@ -2454,14 +2504,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
if (op < SLJIT_ADD) {
- EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0));
- EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc);
+ FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0)));
+ FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc));
return (dst_r == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS;
}
ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP));
if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
- EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc);
+ FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc));
/* The condition must always be set, even if the ORR/EOR is not executed above. */
return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS;
}
@@ -2478,8 +2528,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com
srcw = 0;
}
- EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc);
- EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000));
+ FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc));
+ FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
if (dst_r == TMP_REG2)
FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0));
diff --git a/sljit/sljitNativeARM_64.c b/sljit/sljitNativeARM_64.c
index cfd1a38..31c2876 100644
--- a/sljit/sljitNativeARM_64.c
+++ b/sljit/sljitNativeARM_64.c
@@ -83,6 +83,8 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = {
#define FABS 0x1e60c000
#define FADD 0x1e602800
#define FCMP 0x1e602000
+#define FCVT 0x1e224000
+#define FCVTZS 0x9e780000
#define FDIV 0x1e601800
#define FMOV 0x1e604000
#define FMUL 0x1e600800
@@ -104,6 +106,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = {
#define RET 0xd65f0000
#define SBC 0xda000000
#define SBFM 0x93000000
+#define SCVTF 0x9e620000
#define SDIV 0x9ac00c00
#define SMADDL 0x9b200000
#define SMULH 0x9b403c00
@@ -1524,41 +1527,107 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl
return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3));
}
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+ sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+
+ if (GET_OPCODE(op) == SLJIT_CONVI_FROMD)
+ inv_bits |= (1 << 31);
+
+ if (src & SLJIT_MEM) {
+ emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw);
+ src = TMP_FREG1;
+ }
+
+ FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src)));
+
+ if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
+ return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw);
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+ sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+
+ if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+ inv_bits |= (1 << 31);
+
+ if (src & SLJIT_MEM) {
+ emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw);
+ src = TMP_REG1;
+ } else if (src & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+ srcw = (sljit_si)srcw;
+#endif
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+ src = TMP_REG1;
+ }
+
+ FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src)));
+
+ if (dst & SLJIT_MEM)
+ return emit_fop_mem(compiler, ((op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw);
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si src1, sljit_sw src1w,
+ sljit_si src2, sljit_sw src2w)
+{
+ sljit_si mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE;
+ sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+
+ if (src1 & SLJIT_MEM) {
+ emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
+ src1 = TMP_FREG1;
+ }
+
+ if (src2 & SLJIT_MEM) {
+ emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
+ src2 = TMP_FREG2;
+ }
+
+ return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2));
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
sljit_si dst, sljit_sw dstw,
sljit_si src, sljit_sw srcw)
{
sljit_si dst_r, mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE;
- sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+ sljit_ins inv_bits;
CHECK_ERROR();
- check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
-
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- if (GET_OPCODE(op) == SLJIT_CMPD) {
- if (dst & SLJIT_MEM) {
- emit_fop_mem(compiler, mem_flags, TMP_FREG1, dst, dstw);
- dst = TMP_FREG1;
- }
- if (src & SLJIT_MEM) {
- emit_fop_mem(compiler, mem_flags, TMP_FREG2, src, srcw);
- src = TMP_FREG2;
- }
- return push_inst(compiler, (FCMP ^ inv_bits) | VN(dst) | VM(src));
- }
+ SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x100) == WORD_SIZE, must_be_one_bit_difference);
+ SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
+
+ inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
- dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1;
if (src & SLJIT_MEM) {
- emit_fop_mem(compiler, mem_flags, dst_r, src, srcw);
+ emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONVD_FROMS) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw);
src = dst_r;
}
switch (GET_OPCODE(op)) {
case SLJIT_MOVD:
- if (src != dst_r)
- FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src)));
+ if (src != dst_r) {
+ if (dst_r != TMP_FREG1)
+ FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src)));
+ else
+ dst_r = src;
+ }
break;
case SLJIT_NEGD:
FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src)));
@@ -1566,11 +1635,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile
case SLJIT_ABSD:
FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src)));
break;
+ case SLJIT_CONVD_FROMS:
+ FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_SINGLE_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src)));
+ break;
}
- if (!(dst & SLJIT_MEM))
- return SLJIT_SUCCESS;
- return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw);
+ if (dst & SLJIT_MEM)
+ return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw);
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
@@ -1583,11 +1655,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
CHECK_ERROR();
check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+ ADJUST_LOCAL_OFFSET(src2, src2w);
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src1 & SLJIT_MEM) {
emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
src1 = TMP_FREG1;
@@ -1631,7 +1706,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c
if (dst == SLJIT_UNUSED)
return SLJIT_SUCCESS;
- if (dst <= REG_MASK)
+ if (FAST_IS_REG(dst))
return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR));
/* Memory. */
@@ -1644,7 +1719,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *
check_sljit_emit_fast_return(compiler, src, srcw);
ADJUST_LOCAL_OFFSET(src, srcw);
- if (src <= REG_MASK)
+ if (FAST_IS_REG(src))
FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src)));
else if (src & SLJIT_MEM)
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw));
@@ -1833,7 +1908,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com
return SLJIT_SUCCESS;
cc = get_cc(type);
- dst_r = (dst <= REG_MASK) ? dst : TMP_REG1;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (GET_OPCODE(op) < SLJIT_ADD) {
FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO)));
diff --git a/sljit/sljitNativeARM_T2_32.c b/sljit/sljitNativeARM_T2_32.c
index 682f964..5d2b3a8 100644
--- a/sljit/sljitNativeARM_T2_32.c
+++ b/sljit/sljitNativeARM_T2_32.c
@@ -169,8 +169,12 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
#define VABS_F32 0xeeb00ac0
#define VADD_F32 0xee300a00
#define VCMP_F32 0xeeb40a40
+#define VCVT_F32_S32 0xeeb80ac0
+#define VCVT_F64_F32 0xeeb70ac0
+#define VCVT_S32_F32 0xeebd0ac0
#define VDIV_F32 0xee800a00
#define VMOV_F32 0xeeb00a40
+#define VMOV 0xee000a10
#define VMRS 0xeef1fa10
#define VMUL_F32 0xee200a00
#define VNEG_F32 0xeeb10a40
@@ -1607,6 +1611,69 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl
return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg));
}
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ if (src & SLJIT_MEM) {
+ FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
+ src = TMP_FREG1;
+ }
+
+ FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_SINGLE_OP) | DD4(TMP_FREG1) | DM4(src)));
+
+ if (dst == SLJIT_UNUSED)
+ return SLJIT_SUCCESS;
+
+ if (FAST_IS_REG(dst))
+ return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1));
+
+ /* Store the integer value from a VFP register. */
+ return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+ if (FAST_IS_REG(src))
+ FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1)));
+ else if (src & SLJIT_MEM) {
+ /* Load the integer value into a VFP register. */
+ FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
+ }
+ else {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+ FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | DN4(TMP_FREG1)));
+ }
+
+ FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(TMP_FREG1)));
+
+ if (dst & SLJIT_MEM)
+ return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si src1, sljit_sw src1w,
+ sljit_si src2, sljit_sw src2w)
+{
+ if (src1 & SLJIT_MEM) {
+ emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w);
+ src1 = TMP_FREG1;
+ }
+
+ if (src2 & SLJIT_MEM) {
+ emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w);
+ src2 = TMP_FREG2;
+ }
+
+ FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_SINGLE_OP) | DD4(src1) | DM4(src2)));
+ return push_inst32(compiler, VMRS);
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
sljit_si dst, sljit_sw dstw,
sljit_si src, sljit_sw srcw)
@@ -1614,27 +1681,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile
sljit_si dst_r;
CHECK_ERROR();
- check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
- SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
-
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- op ^= SLJIT_SINGLE_OP;
+ if (GET_OPCODE(op) != SLJIT_CONVD_FROMS)
+ op ^= SLJIT_SINGLE_OP;
- if (GET_OPCODE(op) == SLJIT_CMPD) {
- if (dst & SLJIT_MEM) {
- emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, dst, dstw);
- dst = TMP_FREG1;
- }
- if (src & SLJIT_MEM) {
- emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src, srcw);
- src = TMP_FREG2;
- }
- FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst) | DM4(src)));
- return push_inst32(compiler, VMRS);
- }
+ SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error);
+ SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
- dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1;
if (src & SLJIT_MEM) {
emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw);
src = dst_r;
@@ -1642,8 +1698,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile
switch (GET_OPCODE(op)) {
case SLJIT_MOVD:
- if (src != dst_r)
- FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
+ if (src != dst_r) {
+ if (dst_r != TMP_FREG1)
+ FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
+ else
+ dst_r = src;
+ }
break;
case SLJIT_NEGD:
FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
@@ -1651,11 +1711,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile
case SLJIT_ABSD:
FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
break;
+ case SLJIT_CONVD_FROMS:
+ FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src)));
+ op ^= SLJIT_SINGLE_OP;
+ break;
}
- if (!(dst & SLJIT_MEM))
- return SLJIT_SUCCESS;
- return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw);
+ if (dst & SLJIT_MEM)
+ return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw);
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
@@ -1667,12 +1731,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
CHECK_ERROR();
check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+ ADJUST_LOCAL_OFFSET(src2, src2w);
compiler->cache_arg = 0;
compiler->cache_argw = 0;
op ^= SLJIT_SINGLE_OP;
- dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src1 & SLJIT_MEM) {
emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w);
src1 = TMP_FREG1;
@@ -1718,7 +1785,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c
if (dst == SLJIT_UNUSED)
return SLJIT_SUCCESS;
- if (dst <= REG_MASK)
+ if (FAST_IS_REG(dst))
return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3));
/* Memory. */
@@ -1737,7 +1804,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *
check_sljit_emit_fast_return(compiler, src, srcw);
ADJUST_LOCAL_OFFSET(src, srcw);
- if (src <= REG_MASK)
+ if (FAST_IS_REG(src))
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src)));
else if (src & SLJIT_MEM) {
if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw))
diff --git a/sljit/sljitNativeMIPS_common.c b/sljit/sljitNativeMIPS_common.c
index 011d887..77be7f9 100644
--- a/sljit/sljitNativeMIPS_common.c
+++ b/sljit/sljitNativeMIPS_common.c
@@ -92,10 +92,10 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
#define HI(opcode) ((opcode) << 26)
#define LO(opcode) (opcode)
/* S = (16 << 21) D = (17 << 21) */
-#define FMT_SD (16 << 21)
+#define FMT_S (16 << 21)
-#define ABS_fmt (HI(17) | FMT_SD | LO(5))
-#define ADD_fmt (HI(17) | FMT_SD | LO(0))
+#define ABS_S (HI(17) | FMT_S | LO(5))
+#define ADD_S (HI(17) | FMT_S | LO(0))
#define ADDIU (HI(9))
#define ADDU (HI(0) | LO(33))
#define AND (HI(0) | LO(36))
@@ -112,17 +112,18 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
#define BNE (HI(5))
#define BREAK (HI(0) | LO(13))
#define CFC1 (HI(17) | (2 << 21))
-#define C_UN_fmt (HI(17) | FMT_SD | LO(49))
-#define C_UEQ_fmt (HI(17) | FMT_SD | LO(51))
-#define C_ULE_fmt (HI(17) | FMT_SD | LO(55))
-#define C_ULT_fmt (HI(17) | FMT_SD | LO(53))
+#define C_UN_S (HI(17) | FMT_S | LO(49))
+#define C_UEQ_S (HI(17) | FMT_S | LO(51))
+#define C_ULE_S (HI(17) | FMT_S | LO(55))
+#define C_ULT_S (HI(17) | FMT_S | LO(53))
+#define CVT_S_S (HI(17) | FMT_S | LO(32))
#define DADDIU (HI(25))
#define DADDU (HI(0) | LO(45))
#define DDIV (HI(0) | LO(30))
#define DDIVU (HI(0) | LO(31))
#define DIV (HI(0) | LO(26))
#define DIVU (HI(0) | LO(27))
-#define DIV_fmt (HI(17) | FMT_SD | LO(3))
+#define DIV_S (HI(17) | FMT_S | LO(3))
#define DMULT (HI(0) | LO(28))
#define DMULTU (HI(0) | LO(29))
#define DSLL (HI(0) | LO(56))
@@ -142,13 +143,15 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
#define LD (HI(55))
#define LUI (HI(15))
#define LW (HI(35))
+#define MFC1 (HI(17))
#define MFHI (HI(0) | LO(16))
#define MFLO (HI(0) | LO(18))
-#define MOV_fmt (HI(17) | FMT_SD | LO(6))
-#define MUL_fmt (HI(17) | FMT_SD | LO(2))
+#define MOV_S (HI(17) | FMT_S | LO(6))
+#define MTC1 (HI(17) | (4 << 21))
+#define MUL_S (HI(17) | FMT_S | LO(2))
#define MULT (HI(0) | LO(24))
#define MULTU (HI(0) | LO(25))
-#define NEG_fmt (HI(17) | FMT_SD | LO(7))
+#define NEG_S (HI(17) | FMT_S | LO(7))
#define NOP (HI(0) | LO(0))
#define NOR (HI(0) | LO(39))
#define OR (HI(0) | LO(37))
@@ -164,9 +167,10 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
#define SRLV (HI(0) | LO(6))
#define SRA (HI(0) | LO(3))
#define SRAV (HI(0) | LO(7))
-#define SUB_fmt (HI(17) | FMT_SD | LO(1))
+#define SUB_S (HI(17) | FMT_S | LO(1))
#define SUBU (HI(0) | LO(35))
#define SW (HI(43))
+#define TRUNC_W_S (HI(17) | FMT_S | LO(13))
#define XOR (HI(0) | LO(38))
#define XORI (HI(14))
@@ -495,6 +499,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
/* Separates integer and floating point registers */
#define GPR_REG 0x0f
#define DOUBLE_DATA 0x10
+#define SINGLE_DATA 0x12
#define MEM_MASK 0x1f
@@ -545,7 +550,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
compiler->logical_local_size = local_size;
#endif
- local_size += (saveds + 1 + 4) * sizeof(sljit_sw);
+ local_size += ((saveds + 1) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET;
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
local_size = (local_size + 15) & ~0xf;
#else
@@ -599,7 +604,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
compiler->logical_local_size = local_size;
#endif
- local_size += (saveds + 1 + 4) * sizeof(sljit_sw);
+ local_size += ((saveds + 1) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET;
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
compiler->local_size = (local_size + 15) & ~0xf;
#else
@@ -1278,83 +1283,164 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7))
#define FMT(op) (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) << (21 - 8))
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+# define flags 0
+#else
+ sljit_si flags = (GET_OPCODE(op) == SLJIT_CONVW_FROMD) << 21;
+#endif
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+ src = TMP_FREG1;
+ }
+ else
+ src <<= 1;
+
+ FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS));
+
+ if (dst == SLJIT_UNUSED)
+ return SLJIT_SUCCESS;
+
+ if (FAST_IS_REG(dst))
+ return push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS);
+
+ /* Store the integer value from a VFP register. */
+ return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+# undef is_long
+#endif
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+# define flags 0
+#else
+ sljit_si flags = (GET_OPCODE(op) == SLJIT_CONVD_FROMW) << 21;
+#endif
+
+ sljit_si dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+
+ if (FAST_IS_REG(src))
+ FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS));
+ else if (src & SLJIT_MEM) {
+ /* Load the integer value into a VFP register. */
+ FAIL_IF(emit_op_mem2(compiler, ((flags) ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+ }
+ else {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+ srcw = (sljit_si)srcw;
+#endif
+ FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw));
+ FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS));
+ }
+
+ FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS));
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+ return SLJIT_SUCCESS;
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+# undef flags
+#endif
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si src1, sljit_sw src1w,
+ sljit_si src2, sljit_sw src2w)
+{
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+ src1 = TMP_FREG1;
+ }
+ else
+ src1 <<= 1;
+
+ if (src2 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
+ src2 = TMP_FREG2;
+ }
+ else
+ src2 <<= 1;
+
+ /* src2 and src1 are swapped. */
+ if (op & SLJIT_SET_E) {
+ FAIL_IF(push_inst(compiler, C_UEQ_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS));
+ FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG));
+ FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG));
+ FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG));
+ }
+ if (op & SLJIT_SET_S) {
+ /* Mixing the instructions for the two checks. */
+ FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS));
+ FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG));
+ FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src1) | FS(src2), UNMOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG));
+ FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG));
+ FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG));
+ FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG));
+ FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG));
+ }
+ return push_inst(compiler, C_UN_S | FMT(op) | FT(src2) | FS(src1), FCSR_FCC);
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
sljit_si dst, sljit_sw dstw,
sljit_si src, sljit_sw srcw)
{
- sljit_si dst_fr;
+ sljit_si dst_r;
CHECK_ERROR();
- check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
- SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
-
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- if (GET_OPCODE(op) == SLJIT_CMPD) {
- if (dst & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw));
- dst = TMP_FREG1;
- }
- else
- dst <<= 1;
+ SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
+ SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
- if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0));
- src = TMP_FREG2;
- }
- else
- src <<= 1;
-
- /* src and dst are swapped. */
- if (op & SLJIT_SET_E) {
- FAIL_IF(push_inst(compiler, C_UEQ_fmt | FMT(op) | FT(src) | FS(dst), UNMOVABLE_INS));
- FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG));
- FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG));
- FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG));
- }
- if (op & SLJIT_SET_S) {
- /* Mixing the instructions for the two checks. */
- FAIL_IF(push_inst(compiler, C_ULT_fmt | FMT(op) | FT(src) | FS(dst), UNMOVABLE_INS));
- FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG));
- FAIL_IF(push_inst(compiler, C_ULT_fmt | FMT(op) | FT(dst) | FS(src), UNMOVABLE_INS));
- FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG));
- FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG));
- FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG));
- FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG));
- FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG));
- }
- return push_inst(compiler, C_UN_fmt | FMT(op) | FT(src) | FS(dst), FCSR_FCC);
- }
+ if (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
+ op ^= SLJIT_SINGLE_OP;
- dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+ dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw));
- src = dst_fr;
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
+ src = dst_r;
}
else
src <<= 1;
switch (GET_OPCODE(op)) {
- case SLJIT_MOVD:
- if (src != dst_fr && dst_fr != TMP_FREG1)
- FAIL_IF(push_inst(compiler, MOV_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS));
- break;
- case SLJIT_NEGD:
- FAIL_IF(push_inst(compiler, NEG_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS));
- break;
- case SLJIT_ABSD:
- FAIL_IF(push_inst(compiler, ABS_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS));
- break;
- }
-
- if (dst_fr == TMP_FREG1) {
- if (GET_OPCODE(op) == SLJIT_MOVD)
- dst_fr = src;
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0));
+ case SLJIT_MOVD:
+ if (src != dst_r) {
+ if (dst_r != TMP_FREG1)
+ FAIL_IF(push_inst(compiler, MOV_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS));
+ else
+ dst_r = src;
+ }
+ break;
+ case SLJIT_NEGD:
+ FAIL_IF(push_inst(compiler, NEG_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS));
+ break;
+ case SLJIT_ABSD:
+ FAIL_IF(push_inst(compiler, ABS_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS));
+ break;
+ case SLJIT_CONVD_FROMS:
+ FAIL_IF(push_inst(compiler, CVT_S_S | ((op & SLJIT_SINGLE_OP) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS));
+ op ^= SLJIT_SINGLE_OP;
+ break;
}
+ if (dst & SLJIT_MEM)
+ return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0);
return SLJIT_SUCCESS;
}
@@ -1363,15 +1449,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
sljit_si src1, sljit_sw src1w,
sljit_si src2, sljit_sw src2w)
{
- sljit_si dst_fr, flags = 0;
+ sljit_si dst_r, flags = 0;
CHECK_ERROR();
check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+ ADJUST_LOCAL_OFFSET(src2, src2w);
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;
+ dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;
if (src1 & SLJIT_MEM) {
if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
@@ -1415,23 +1504,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
switch (GET_OPCODE(op)) {
case SLJIT_ADDD:
- FAIL_IF(push_inst(compiler, ADD_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
break;
case SLJIT_SUBD:
- FAIL_IF(push_inst(compiler, SUB_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SUB_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
break;
case SLJIT_MULD:
- FAIL_IF(push_inst(compiler, MUL_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, MUL_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
break;
case SLJIT_DIVD:
- FAIL_IF(push_inst(compiler, DIV_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, DIV_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS));
break;
}
- if (dst_fr == TMP_FREG2)
+ if (dst_r == TMP_FREG2)
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
return SLJIT_SUCCESS;
@@ -1794,36 +1883,36 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile
switch (type & 0xff) {
case SLJIT_C_FLOAT_EQUAL:
- inst = C_UEQ_fmt;
+ inst = C_UEQ_S;
if_true = 1;
break;
case SLJIT_C_FLOAT_NOT_EQUAL:
- inst = C_UEQ_fmt;
+ inst = C_UEQ_S;
if_true = 0;
break;
case SLJIT_C_FLOAT_LESS:
- inst = C_ULT_fmt;
+ inst = C_ULT_S;
if_true = 1;
break;
case SLJIT_C_FLOAT_GREATER_EQUAL:
- inst = C_ULT_fmt;
+ inst = C_ULT_S;
if_true = 0;
break;
case SLJIT_C_FLOAT_GREATER:
- inst = C_ULE_fmt;
+ inst = C_ULE_S;
if_true = 0;
break;
case SLJIT_C_FLOAT_LESS_EQUAL:
- inst = C_ULE_fmt;
+ inst = C_ULE_S;
if_true = 1;
break;
case SLJIT_C_FLOAT_UNORDERED:
- inst = C_UN_fmt;
+ inst = C_UN_S;
if_true = 1;
break;
case SLJIT_C_FLOAT_ORDERED:
default: /* Make compilers happy. */
- inst = C_UN_fmt;
+ inst = C_UN_S;
if_true = 0;
break;
}
diff --git a/sljit/sljitNativePPC_common.c b/sljit/sljitNativePPC_common.c
index 5e06f2f..037681e 100644
--- a/sljit/sljitNativePPC_common.c
+++ b/sljit/sljitNativePPC_common.c
@@ -114,6 +114,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = {
#define B(b) (reg_map[b] << 11)
#define C(c) (reg_map[c] << 6)
#define FD(fd) ((fd) << 21)
+#define FS(fs) ((fs) << 21)
#define FA(fa) ((fa) << 16)
#define FB(fb) ((fb) << 11)
#define FC(fc) ((fc) << 6)
@@ -159,13 +160,17 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = {
#define FABS (HI(63) | LO(264))
#define FADD (HI(63) | LO(21))
#define FADDS (HI(59) | LO(21))
+#define FCFID (HI(63) | LO(846))
#define FCMPU (HI(63) | LO(0))
+#define FCTIDZ (HI(63) | LO(815))
+#define FCTIWZ (HI(63) | LO(15))
#define FDIV (HI(63) | LO(18))
#define FDIVS (HI(59) | LO(18))
#define FMR (HI(63) | LO(72))
#define FMUL (HI(63) | LO(25))
#define FMULS (HI(59) | LO(25))
#define FNEG (HI(63) | LO(40))
+#define FRSP (HI(63) | LO(12))
#define FSUB (HI(63) | LO(20))
#define FSUBS (HI(59) | LO(20))
#define LD (HI(58) | 0)
@@ -202,6 +207,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = {
#define STD (HI(62) | 0)
#define STDU (HI(62) | 1)
#define STDUX (HI(31) | LO(181))
+#define STFIWX (HI(31) | LO(983))
#define STW (HI(36))
#define STWU (HI(37))
#define STWUX (HI(31) | LO(183))
@@ -602,25 +608,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
if (args >= 3)
FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG3) | A(SLJIT_SAVED_REG3) | B(SLJIT_SCRATCH_REG3)));
-#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
- compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size;
-#else
- compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size;
-#endif
- compiler->local_size = (compiler->local_size + 15) & ~0xf;
+ local_size += ((1 + saveds) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET;
+ local_size = (local_size + 15) & ~0xf;
+ compiler->local_size = local_size;
#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
- if (compiler->local_size <= SIMM_MAX)
- FAIL_IF(push_inst(compiler, STWU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size)));
+ if (local_size <= SIMM_MAX)
+ FAIL_IF(push_inst(compiler, STWU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-local_size)));
else {
- FAIL_IF(load_immediate(compiler, 0, -compiler->local_size));
+ FAIL_IF(load_immediate(compiler, 0, -local_size));
FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
}
#else
- if (compiler->local_size <= SIMM_MAX)
- FAIL_IF(push_inst(compiler, STDU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size)));
+ if (local_size <= SIMM_MAX)
+ FAIL_IF(push_inst(compiler, STDU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-local_size)));
else {
- FAIL_IF(load_immediate(compiler, 0, -compiler->local_size));
+ FAIL_IF(load_immediate(compiler, 0, -local_size));
FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0)));
}
#endif
@@ -639,12 +642,8 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
compiler->logical_local_size = local_size;
#endif
-#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
- compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size;
-#else
- compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size;
-#endif
- compiler->local_size = (compiler->local_size + 15) & ~0xf;
+ local_size += ((1 + saveds) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET;
+ compiler->local_size = (local_size + 15) & ~0xf;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
@@ -999,12 +998,12 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags,
tmp_r = arg;
FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
}
- else if (compiler->cache_arg != arg || high_short != compiler->cache_argw) {
+ else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) {
if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
next_high_short = (sljit_si)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
if (high_short == next_high_short) {
- compiler->cache_arg = SLJIT_IMM | arg;
- compiler->cache_argw = next_high_short;
+ compiler->cache_arg = SLJIT_MEM | arg;
+ compiler->cache_argw = high_short;
tmp_r = TMP_REG3;
}
}
@@ -1685,59 +1684,233 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6))
#define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double)
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+#define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
+#else
+#define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
+
+#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+#define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
+#define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
+#else
+#define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
+#define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
+#endif
+
+#endif /* SLJIT_CONFIG_PPC_64 */
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
sljit_si dst, sljit_sw dstw,
sljit_si src, sljit_sw srcw)
{
- sljit_si dst_fr;
+ if (src & SLJIT_MEM) {
+ /* We can ignore the temporary data store on the stack from caching point of view. */
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+ src = TMP_FREG1;
+ }
- CHECK_ERROR();
- check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
- SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+ op = GET_OPCODE(op);
+ FAIL_IF(push_inst(compiler, (op == SLJIT_CONVI_FROMD ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
+ if (dst == SLJIT_UNUSED)
+ return SLJIT_SUCCESS;
- if (GET_OPCODE(op) == SLJIT_CMPD) {
- if (dst & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw));
- dst = TMP_FREG1;
+ if (op == SLJIT_CONVW_FROMD) {
+ if (FAST_IS_REG(dst)) {
+ FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, 0, 0));
+ return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, 0, 0);
}
+ return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
+ }
- if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0));
- src = TMP_FREG2;
+#else
+ FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
+
+ if (dst == SLJIT_UNUSED)
+ return SLJIT_SUCCESS;
+#endif
+
+ if (FAST_IS_REG(dst)) {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
+ FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_LOCALS_REG) | B(TMP_REG1)));
+ return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, 0, 0);
+ }
+
+ SLJIT_ASSERT(dst & SLJIT_MEM);
+
+ if (dst & OFFS_REG_MASK) {
+ dstw &= 0x3;
+ if (dstw) {
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+ FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
+#else
+ FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
+#endif
+ dstw = TMP_REG1;
+ }
+ else
+ dstw = OFFS_REG(dst);
+ }
+ else {
+ if ((dst & REG_MASK) && !dstw) {
+ dstw = dst & REG_MASK;
+ dst = 0;
+ }
+ else {
+ /* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
+ FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
+ dstw = TMP_REG1;
}
+ }
+
+ return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+
+ sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+ if (src & SLJIT_IMM) {
+ if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+ srcw = (sljit_si)srcw;
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+ src = TMP_REG1;
+ }
+ else if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) {
+ if (FAST_IS_REG(src))
+ FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
+ else
+ FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
+ src = TMP_REG1;
+ }
- return push_inst(compiler, FCMPU | CRD(4) | FA(dst) | FB(src));
+ if (FAST_IS_REG(src)) {
+ FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
+ FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, dst, dstw));
+ }
+ else
+ FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+
+ FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+ if (op & SLJIT_SINGLE_OP)
+ return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
+ return SLJIT_SUCCESS;
+
+#else
+
+ sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+ sljit_si invert_sign = 1;
+
+ if (src & SLJIT_IMM) {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
+ src = TMP_REG1;
+ invert_sign = 0;
+ }
+ else if (!FAST_IS_REG(src)) {
+ FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW));
+ src = TMP_REG1;
}
- dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+ /* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
+ The double precision format has exactly 53 bit precision, so the lower 32 bit represents
+ the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
+ to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
+ point value, we need to substract 2^53 + 2^31 from the constructed value. */
+ FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
+ if (invert_sign)
+ FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
+ FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
+ FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_HI));
+ FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
+ FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW));
+ FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
+ FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW));
+
+ FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+ if (op & SLJIT_SINGLE_OP)
+ return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
+ return SLJIT_SUCCESS;
+
+#endif
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si src1, sljit_sw src1w,
+ sljit_si src2, sljit_sw src2w)
+{
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+ src1 = TMP_FREG1;
+ }
+
+ if (src2 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
+ src2 = TMP_FREG2;
+ }
+
+ return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ sljit_si dst_r;
+
+ CHECK_ERROR();
+ compiler->cache_arg = 0;
+ compiler->cache_argw = 0;
+
+ SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
+ SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
+
+ if (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
+ op ^= SLJIT_SINGLE_OP;
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw));
- src = dst_fr;
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
+ src = dst_r;
}
switch (GET_OPCODE(op)) {
- case SLJIT_MOVD:
- if (src != dst_fr && dst_fr != TMP_FREG1)
- FAIL_IF(push_inst(compiler, FMR | FD(dst_fr) | FB(src)));
- break;
- case SLJIT_NEGD:
- FAIL_IF(push_inst(compiler, FNEG | FD(dst_fr) | FB(src)));
+ case SLJIT_CONVD_FROMS:
+ op ^= SLJIT_SINGLE_OP;
+ if (op & SLJIT_SINGLE_OP) {
+ FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
break;
- case SLJIT_ABSD:
- FAIL_IF(push_inst(compiler, FABS | FD(dst_fr) | FB(src)));
- break;
- }
-
- if (dst_fr == TMP_FREG1) {
- if (GET_OPCODE(op) == SLJIT_MOVD)
- dst_fr = src;
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0));
+ }
+ /* Fall through. */
+ case SLJIT_MOVD:
+ if (src != dst_r) {
+ if (dst_r != TMP_FREG1)
+ FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
+ else
+ dst_r = src;
+ }
+ break;
+ case SLJIT_NEGD:
+ FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
+ break;
+ case SLJIT_ABSD:
+ FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
+ break;
}
+ if (dst & SLJIT_MEM)
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
return SLJIT_SUCCESS;
}
@@ -1746,15 +1919,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
sljit_si src1, sljit_sw src1w,
sljit_si src2, sljit_sw src2w)
{
- sljit_si dst_fr, flags = 0;
+ sljit_si dst_r, flags = 0;
CHECK_ERROR();
check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+ ADJUST_LOCAL_OFFSET(src2, src2w);
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG2;
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
if (src1 & SLJIT_MEM) {
if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
@@ -1794,23 +1970,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
switch (GET_OPCODE(op)) {
case SLJIT_ADDD:
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_fr) | FA(src1) | FB(src2)));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
break;
case SLJIT_SUBD:
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_fr) | FA(src1) | FB(src2)));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
break;
case SLJIT_MULD:
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_fr) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
break;
case SLJIT_DIVD:
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_fr) | FA(src1) | FB(src2)));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
break;
}
- if (dst_fr == TMP_FREG2)
+ if (dst_r == TMP_FREG2)
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
return SLJIT_SUCCESS;
diff --git a/sljit/sljitNativeSPARC_common.c b/sljit/sljitNativeSPARC_common.c
index d6a1e12..e01a5f6 100644
--- a/sljit/sljitNativeSPARC_common.c
+++ b/sljit/sljitNativeSPARC_common.c
@@ -128,10 +128,16 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = {
#define FCMPS (OPC1(0x2) | OPC3(0x35) | DOP(0x51))
#define FDIVD (OPC1(0x2) | OPC3(0x34) | DOP(0x4e))
#define FDIVS (OPC1(0x2) | OPC3(0x34) | DOP(0x4d))
+#define FDTOI (OPC1(0x2) | OPC3(0x34) | DOP(0xd2))
+#define FDTOS (OPC1(0x2) | OPC3(0x34) | DOP(0xc6))
+#define FITOD (OPC1(0x2) | OPC3(0x34) | DOP(0xc8))
+#define FITOS (OPC1(0x2) | OPC3(0x34) | DOP(0xc4))
#define FMOVS (OPC1(0x2) | OPC3(0x34) | DOP(0x01))
#define FMULD (OPC1(0x2) | OPC3(0x34) | DOP(0x4a))
#define FMULS (OPC1(0x2) | OPC3(0x34) | DOP(0x49))
#define FNEGS (OPC1(0x2) | OPC3(0x34) | DOP(0x05))
+#define FSTOD (OPC1(0x2) | OPC3(0x34) | DOP(0xc9))
+#define FSTOI (OPC1(0x2) | OPC3(0x34) | DOP(0xd1))
#define FSUBD (OPC1(0x2) | OPC3(0x34) | DOP(0x46))
#define FSUBS (OPC1(0x2) | OPC3(0x34) | DOP(0x45))
#define JMPL (OPC1(0x2) | OPC3(0x38))
@@ -388,6 +394,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
/* Separates integer and floating point registers */
#define GPR_REG 0x0f
#define DOUBLE_DATA 0x10
+#define SINGLE_DATA 0x12
#define MEM_MASK 0x1f
@@ -423,8 +430,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
compiler->logical_local_size = local_size;
#endif
- local_size += 23 * sizeof(sljit_sw);
- local_size = (local_size + 7) & ~0x7;
+ local_size = (local_size + FIXED_LOCALS_OFFSET + 7) & ~0x7;
compiler->local_size = local_size;
if (local_size <= SIMM_MAX) {
@@ -456,8 +462,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
compiler->logical_local_size = local_size;
#endif
- local_size += 23 * sizeof(sljit_sw);
- compiler->local_size = (local_size + 7) & ~0x7;
+ compiler->local_size = (local_size + FIXED_LOCALS_OFFSET + 7) & ~0x7;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
@@ -953,73 +958,139 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7))
#define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double)
+#define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw))
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ if (src & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+ src = TMP_FREG1;
+ }
+ else
+ src <<= 1;
+
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | DA(TMP_FREG1) | S2A(src), MOVABLE_INS));
+
+ if (dst == SLJIT_UNUSED)
+ return SLJIT_SUCCESS;
+
+ if (FAST_IS_REG(dst)) {
+ FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
+ return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET);
+ }
+
+ /* Store the integer value from a VFP register. */
+ return emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ sljit_si dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+
+ if (src & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+ srcw = (sljit_si)srcw;
+#endif
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+ src = TMP_REG1;
+ srcw = 0;
+ }
+
+ if (FAST_IS_REG(src)) {
+ FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET));
+ src = SLJIT_MEM1(SLJIT_LOCALS_REG);
+ srcw = FLOAT_TMP_MEM_OFFSET;
+ }
+
+ FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | DA(dst_r) | S2A(TMP_FREG1), MOVABLE_INS));
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si src1, sljit_sw src1w,
+ sljit_si src2, sljit_sw src2w)
+{
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+ src1 = TMP_FREG1;
+ }
+ else
+ src1 <<= 1;
+
+ if (src2 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
+ src2 = TMP_FREG2;
+ }
+ else
+ src2 <<= 1;
+
+ return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(src1) | S2A(src2), FCC_IS_SET | MOVABLE_INS);
+}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
sljit_si dst, sljit_sw dstw,
sljit_si src, sljit_sw srcw)
{
- sljit_si dst_fr;
+ sljit_si dst_r;
CHECK_ERROR();
- check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
- SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
-
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- if (GET_OPCODE(op) == SLJIT_CMPD) {
- if (dst & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw));
- dst = TMP_FREG1;
- }
- else
- dst <<= 1;
-
- if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0));
- src = TMP_FREG2;
- }
- else
- src <<= 1;
+ SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
+ SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
- return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(dst) | S2A(src), FCC_IS_SET | MOVABLE_INS);
- }
+ if (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
+ op ^= SLJIT_SINGLE_OP;
- dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
+ dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1;
if (src & SLJIT_MEM) {
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw));
- src = dst_fr;
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
+ src = dst_r;
}
else
src <<= 1;
switch (GET_OPCODE(op)) {
- case SLJIT_MOVD:
- if (src != dst_fr && dst_fr != TMP_FREG1) {
- FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr) | S2A(src), MOVABLE_INS));
+ case SLJIT_MOVD:
+ if (src != dst_r) {
+ if (dst_r != TMP_FREG1) {
+ FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r) | S2A(src), MOVABLE_INS));
if (!(op & SLJIT_SINGLE_OP))
- FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
}
- break;
- case SLJIT_NEGD:
- FAIL_IF(push_inst(compiler, FNEGS | DA(dst_fr) | S2A(src), MOVABLE_INS));
- if (dst_fr != src && !(op & SLJIT_SINGLE_OP))
- FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS));
- break;
- case SLJIT_ABSD:
- FAIL_IF(push_inst(compiler, FABSS | DA(dst_fr) | S2A(src), MOVABLE_INS));
- if (dst_fr != src && !(op & SLJIT_SINGLE_OP))
- FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS));
- break;
- }
-
- if (dst_fr == TMP_FREG1) {
- if (GET_OPCODE(op) == SLJIT_MOVD)
- dst_fr = src;
- FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0));
+ else
+ dst_r = src;
+ }
+ break;
+ case SLJIT_NEGD:
+ FAIL_IF(push_inst(compiler, FNEGS | DA(dst_r) | S2A(src), MOVABLE_INS));
+ if (dst_r != src && !(op & SLJIT_SINGLE_OP))
+ FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
+ break;
+ case SLJIT_ABSD:
+ FAIL_IF(push_inst(compiler, FABSS | DA(dst_r) | S2A(src), MOVABLE_INS));
+ if (dst_r != src && !(op & SLJIT_SINGLE_OP))
+ FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS));
+ break;
+ case SLJIT_CONVD_FROMS:
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | DA(dst_r) | S2A(src), MOVABLE_INS));
+ op ^= SLJIT_SINGLE_OP;
+ break;
}
+ if (dst & SLJIT_MEM)
+ FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
return SLJIT_SUCCESS;
}
@@ -1028,15 +1099,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
sljit_si src1, sljit_sw src1w,
sljit_si src2, sljit_sw src2w)
{
- sljit_si dst_fr, flags = 0;
+ sljit_si dst_r, flags = 0;
CHECK_ERROR();
check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+ ADJUST_LOCAL_OFFSET(src2, src2w);
compiler->cache_arg = 0;
compiler->cache_argw = 0;
- dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;
+ dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2;
if (src1 & SLJIT_MEM) {
if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
@@ -1080,23 +1154,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
switch (GET_OPCODE(op)) {
case SLJIT_ADDD:
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
break;
case SLJIT_SUBD:
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
break;
case SLJIT_MULD:
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
break;
case SLJIT_DIVD:
- FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS));
+ FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS));
break;
}
- if (dst_fr == TMP_FREG2)
+ if (dst_r == TMP_FREG2)
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
return SLJIT_SUCCESS;
diff --git a/sljit/sljitNativeX86_32.c b/sljit/sljitNativeX86_32.c
index dd03f26..5a94c3d 100644
--- a/sljit/sljitNativeX86_32.c
+++ b/sljit/sljitNativeX86_32.c
@@ -280,21 +280,17 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si
SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
/* Both size flags cannot be switched on. */
SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
/* SSE2 and immediate is not possible. */
SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
-#endif
size &= 0xf;
inst_size = size;
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
inst_size++;
-#endif
if (flags & EX86_PREF_66)
inst_size++;
@@ -348,12 +344,10 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si
/* Encoding the byte. */
INC_SIZE(inst_size);
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
if (flags & EX86_PREF_F2)
*inst++ = 0xf2;
if (flags & EX86_PREF_F3)
*inst++ = 0xf3;
-#endif
if (flags & EX86_PREF_66)
*inst++ = 0x66;
@@ -366,15 +360,10 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si
if ((a & SLJIT_IMM) || (a == 0))
*buf_ptr = 0;
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
- else if (!(flags & EX86_SSE2))
+ else if (!(flags & EX86_SSE2_OP1))
*buf_ptr = reg_map[a] << 3;
else
*buf_ptr = a << 3;
-#else
- else
- *buf_ptr = reg_map[a] << 3;
-#endif
}
else {
if (a & SLJIT_IMM) {
@@ -388,11 +377,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si
}
if (!(b & SLJIT_MEM))
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
- *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_map[b] : b);
-#else
- *buf_ptr++ |= MOD_REG + reg_map[b];
-#endif
+ *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b);
else if ((b & REG_MASK) != SLJIT_UNUSED) {
if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) {
if (immb != 0) {
diff --git a/sljit/sljitNativeX86_64.c b/sljit/sljitNativeX86_64.c
index 967f3c3..ac0fca2 100644
--- a/sljit/sljitNativeX86_64.c
+++ b/sljit/sljitNativeX86_64.c
@@ -156,9 +156,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil
}
#ifdef _WIN64
if (scratches >= 5) {
- SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_EREG2] >= 8, temporary_ereg2_is_hireg);
+ SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_EREG2] >= 8, temporary_ereg2_is_hireg);
*inst++ = REX_B;
- PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
+ PUSH_REG(reg_lmap[SLJIT_SCRATCH_EREG2]);
}
#endif
@@ -340,7 +340,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi
#ifdef _WIN64
if (compiler->scratches >= 5) {
*inst++ = REX_B;
- POP_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]);
+ POP_REG(reg_lmap[SLJIT_SCRATCH_EREG2]);
}
#endif
if (compiler->saveds >= 1)
@@ -409,72 +409,64 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si
SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
/* Both size flags cannot be switched on. */
SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
/* SSE2 and immediate is not possible. */
SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
-#endif
size &= 0xf;
inst_size = size;
- if ((b & SLJIT_MEM) && !(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
- if (emit_load_imm64(compiler, TMP_REG3, immb))
- return NULL;
- immb = 0;
- if (b & REG_MASK)
- b |= TO_OFFS_REG(TMP_REG3);
- else
- b |= TMP_REG3;
- }
-
if (!compiler->mode32 && !(flags & EX86_NO_REXW))
rex |= REX_W;
else if (flags & EX86_REX)
rex |= REX;
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
inst_size++;
-#endif
if (flags & EX86_PREF_66)
inst_size++;
/* Calculate size of b. */
inst_size += 1; /* mod r/m byte. */
if (b & SLJIT_MEM) {
+ if (!(b & OFFS_REG_MASK)) {
+ if (NOT_HALFWORD(immb)) {
+ if (emit_load_imm64(compiler, TMP_REG3, immb))
+ return NULL;
+ immb = 0;
+ if (b & REG_MASK)
+ b |= TO_OFFS_REG(TMP_REG3);
+ else
+ b |= TMP_REG3;
+ }
+ else if (reg_lmap[b & REG_MASK] == 4)
+ b |= TO_OFFS_REG(SLJIT_LOCALS_REG);
+ }
+
if ((b & REG_MASK) == SLJIT_UNUSED)
inst_size += 1 + sizeof(sljit_si); /* SIB byte required to avoid RIP based addressing. */
else {
if (reg_map[b & REG_MASK] >= 8)
rex |= REX_B;
- if (immb != 0 && !(b & OFFS_REG_MASK)) {
+ if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG))) {
/* Immediate operand. */
if (immb <= 127 && immb >= -128)
inst_size += sizeof(sljit_sb);
else
inst_size += sizeof(sljit_si);
}
- }
- if ((b & REG_MASK) == SLJIT_LOCALS_REG && !(b & OFFS_REG_MASK))
- b |= TO_OFFS_REG(SLJIT_LOCALS_REG);
-
- if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
- inst_size += 1; /* SIB byte. */
- if (reg_map[OFFS_REG(b)] >= 8)
- rex |= REX_X;
+ if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
+ inst_size += 1; /* SIB byte. */
+ if (reg_map[OFFS_REG(b)] >= 8)
+ rex |= REX_X;
+ }
}
}
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
- else if (!(flags & EX86_SSE2) && reg_map[b] >= 8)
+ else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8)
rex |= REX_B;
-#else
- else if (reg_map[b] >= 8)
- rex |= REX_B;
-#endif
if (a & SLJIT_IMM) {
if (flags & EX86_BIN_INS) {
@@ -500,13 +492,8 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si
else {
SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
- if (!(flags & EX86_SSE2) && reg_map[a] >= 8)
- rex |= REX_R;
-#else
- if (reg_map[a] >= 8)
+ if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8)
rex |= REX_R;
-#endif
}
if (rex)
@@ -517,12 +504,10 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si
/* Encoding the byte. */
INC_SIZE(inst_size);
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
if (flags & EX86_PREF_F2)
*inst++ = 0xf2;
if (flags & EX86_PREF_F3)
*inst++ = 0xf3;
-#endif
if (flags & EX86_PREF_66)
*inst++ = 0x66;
if (rex)
@@ -536,15 +521,10 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si
if ((a & SLJIT_IMM) || (a == 0))
*buf_ptr = 0;
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
- else if (!(flags & EX86_SSE2))
+ else if (!(flags & EX86_SSE2_OP1))
*buf_ptr = reg_lmap[a] << 3;
else
*buf_ptr = a << 3;
-#else
- else
- *buf_ptr = reg_lmap[a] << 3;
-#endif
}
else {
if (a & SLJIT_IMM) {
@@ -558,11 +538,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si
}
if (!(b & SLJIT_MEM))
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
- *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_lmap[b] : b);
-#else
- *buf_ptr++ |= MOD_REG + reg_lmap[b];
-#endif
+ *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b);
else if ((b & REG_MASK) != SLJIT_UNUSED) {
if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) {
if (immb != 0) {
diff --git a/sljit/sljitNativeX86_common.c b/sljit/sljitNativeX86_common.c
index 653705f..815a46a 100644
--- a/sljit/sljitNativeX86_common.c
+++ b/sljit/sljitNativeX86_common.c
@@ -71,8 +71,8 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
};
#define CHECK_EXTRA_REGS(p, w, do) \
- if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
- w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \
+ if (p >= SLJIT_SCRATCH_EREG1 && p <= SLJIT_SCRATCH_EREG2) { \
+ w = compiler->scratches_start + (p - SLJIT_SCRATCH_EREG1) * sizeof(sljit_sw); \
p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
do; \
} \
@@ -133,9 +133,7 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
#endif /* SLJIT_CONFIG_X86_32 */
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
#define TMP_FREG (0)
-#endif
/* Size flags for emit_x86_instruction: */
#define EX86_BIN_INS 0x0010
@@ -145,12 +143,11 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
#define EX86_BYTE_ARG 0x0100
#define EX86_HALF_ARG 0x0200
#define EX86_PREF_66 0x0400
-
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-#define EX86_SSE2 0x0800
-#define EX86_PREF_F2 0x1000
-#define EX86_PREF_F3 0x2000
-#endif
+#define EX86_PREF_F2 0x0800
+#define EX86_PREF_F3 0x1000
+#define EX86_SSE2_OP1 0x2000
+#define EX86_SSE2_OP2 0x4000
+#define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
/* --------------------------------------------------------------------- */
/* Instrucion forms */
@@ -179,6 +176,9 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
#define CMP_EAX_i32 0x3d
#define CMP_r_rm 0x3b
#define CMP_rm_r 0x39
+#define CVTPD2PS_x_xm 0x5a
+#define CVTSI2SD_x_rm 0x2a
+#define CVTTSD2SI_r_xm 0x2c
#define DIV (/* GROUP_F7 */ 6 << 3)
#define DIVSD_x_xm 0x5e
#define INT3 0xcc
@@ -239,6 +239,7 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
#define TEST_EAX_i32 0xa9
#define TEST_rm_r 0x85
#define UCOMISD_x_xm 0x2e
+#define UNPCKLPD_x_xm 0x14
#define XCHG_EAX_r 0x90
#define XCHG_r_rm 0x87
#define XOR (/* BINARY */ 6 << 3)
@@ -271,7 +272,7 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
/* Multithreading does not affect these static variables, since they store
built-in CPU features. Therefore they can be overwritten by different threads
if they detect the CPU features in the same time. */
-#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
static sljit_si cpu_has_sse2 = -1;
#endif
static sljit_si cpu_has_cmov = -1;
@@ -325,7 +326,7 @@ static void get_cpu_features(void)
#endif /* _MSC_VER && _MSC_VER >= 1400 */
-#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
cpu_has_sse2 = (features >> 26) & 0x1;
#endif
cpu_has_cmov = (features >> 15) & 0x1;
@@ -2215,7 +2216,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
{
check_sljit_get_register_index(reg);
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
- if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
+ if (reg == SLJIT_SCRATCH_EREG1 || reg == SLJIT_SCRATCH_EREG2
|| reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
return -1;
#endif
@@ -2248,8 +2249,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co
/* Floating point operators */
/* --------------------------------------------------------------------- */
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-
/* Alignment + 2 * 16 bytes. */
static sljit_si sse2_data[3 + (4 + 4) * 2];
static sljit_si *sse2_buffer;
@@ -2267,27 +2266,19 @@ static void init_compiler(void)
sse2_buffer[13] = 0x7fffffff;
}
-#endif
-
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
{
#ifdef SLJIT_IS_FPU_AVAILABLE
return SLJIT_IS_FPU_AVAILABLE;
-#elif (defined SLJIT_SSE2 && SLJIT_SSE2)
-#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
+#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
if (cpu_has_sse2 == -1)
get_cpu_features();
return cpu_has_sse2;
#else /* SLJIT_DETECT_SSE2 */
return 1;
#endif /* SLJIT_DETECT_SSE2 */
-#else /* SLJIT_SSE2 */
- return 0;
-#endif
}
-#if (defined SLJIT_SSE2 && SLJIT_SSE2)
-
static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
{
@@ -2324,31 +2315,89 @@ static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
sljit_si dst, sljit_sw dstw,
sljit_si src, sljit_sw srcw)
{
- sljit_si dst_r;
+ sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
+ sljit_ub *inst;
- CHECK_ERROR();
- check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (GET_OPCODE(op) == SLJIT_CONVW_FROMD)
+ compiler->mode32 = 0;
+#endif
+
+ inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
+ FAIL_IF(!inst);
+ *inst++ = GROUP_0F;
+ *inst = CVTTSD2SI_r_xm;
+
+ if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
+ return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
+ sljit_ub *inst;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (GET_OPCODE(op) == SLJIT_CONVD_FROMW)
+ compiler->mode32 = 0;
+#endif
+
+ if (src & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
+ srcw = (sljit_si)srcw;
+#endif
+ EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
+ src = TMP_REG1;
+ srcw = 0;
+ }
+
+ inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
+ FAIL_IF(!inst);
+ *inst++ = GROUP_0F;
+ *inst = CVTSI2SD_x_rm;
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 1;
#endif
+ if (dst_r == TMP_FREG)
+ return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
+ return SLJIT_SUCCESS;
+}
- if (GET_OPCODE(op) == SLJIT_CMPD) {
- compiler->flags_saved = 0;
- if (FAST_IS_REG(dst))
- dst_r = dst;
- else {
- dst_r = TMP_FREG;
- FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw));
- }
- return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw);
+static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si src1, sljit_sw src1w,
+ sljit_si src2, sljit_sw src2w)
+{
+ compiler->flags_saved = 0;
+ if (!FAST_IS_REG(src1)) {
+ FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
+ src1 = TMP_FREG;
}
+ return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
+ sljit_si dst, sljit_sw dstw,
+ sljit_si src, sljit_sw srcw)
+{
+ sljit_si dst_r;
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+ compiler->mode32 = 1;
+#endif
+
+ CHECK_ERROR();
+ SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
- if (op == SLJIT_MOVD) {
+ if (GET_OPCODE(op) == SLJIT_MOVD) {
if (FAST_IS_REG(dst))
return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
if (FAST_IS_REG(src))
@@ -2357,6 +2406,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile
return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
}
+ if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) {
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
+ if (FAST_IS_REG(src)) {
+ /* We overwrite the high bits of source. From SLJIT point of view,
+ this is not an issue.
+ Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
+ FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0));
+ }
+ else {
+ FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw));
+ src = TMP_FREG;
+ }
+
+ FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0));
+ if (dst_r == TMP_FREG)
+ return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
+ return SLJIT_SUCCESS;
+ }
+
if (SLOW_IS_REG(dst)) {
dst_r = dst;
if (dst != src)
@@ -2391,6 +2459,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
CHECK_ERROR();
check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+ ADJUST_LOCAL_OFFSET(src2, src2w);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = 1;
@@ -2440,33 +2511,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
-#else
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
- sljit_si dst, sljit_sw dstw,
- sljit_si src, sljit_sw srcw)
-{
- CHECK_ERROR();
- /* Should cause an assertion fail. */
- check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
- compiler->error = SLJIT_ERR_UNSUPPORTED;
- return SLJIT_ERR_UNSUPPORTED;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
- sljit_si dst, sljit_sw dstw,
- sljit_si src1, sljit_sw src1w,
- sljit_si src2, sljit_sw src2w)
-{
- CHECK_ERROR();
- /* Should cause an assertion fail. */
- check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
- compiler->error = SLJIT_ERR_UNSUPPORTED;
- return SLJIT_ERR_UNSUPPORTED;
-}
-
-#endif
-
/* --------------------------------------------------------------------- */
/* Conditional instructions */
/* --------------------------------------------------------------------- */