diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-10-20 21:33:38 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-10-20 21:33:38 +0000 |
commit | 877d804864b242a7205f588a9b2ca0b271bfe0d6 (patch) | |
tree | bc9e112e6e75c6a8fb829f914a64c41fa94ac70e | |
parent | cb2d2c4744966facc4c43cceee213eb33020884d (diff) | |
download | pcre-877d804864b242a7205f588a9b2ca0b271bfe0d6.tar.gz |
Major JIT compiler update with experimental Sparc 32 support.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1149 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | sljit/sljitConfig.h | 1 | ||||
-rw-r--r-- | sljit/sljitConfigInternal.h | 19 | ||||
-rw-r--r-- | sljit/sljitLir.c | 55 | ||||
-rw-r--r-- | sljit/sljitLir.h | 20 | ||||
-rw-r--r-- | sljit/sljitNativeARM_Thumb2.c | 32 | ||||
-rw-r--r-- | sljit/sljitNativeARM_v5.c | 188 | ||||
-rw-r--r-- | sljit/sljitNativeMIPS_32.c | 166 | ||||
-rw-r--r-- | sljit/sljitNativeMIPS_common.c | 405 | ||||
-rw-r--r-- | sljit/sljitNativePPC_32.c | 94 | ||||
-rw-r--r-- | sljit/sljitNativePPC_64.c | 123 | ||||
-rw-r--r-- | sljit/sljitNativePPC_common.c | 582 | ||||
-rw-r--r-- | sljit/sljitNativeSPARC_32.c | 163 | ||||
-rw-r--r-- | sljit/sljitNativeSPARC_common.c | 1286 | ||||
-rw-r--r-- | sljit/sljitNativeX86_32.c | 1 | ||||
-rw-r--r-- | sljit/sljitNativeX86_64.c | 1 | ||||
-rw-r--r-- | sljit/sljitNativeX86_common.c | 22 |
17 files changed, 2357 insertions, 804 deletions
@@ -122,6 +122,9 @@ Version 8.32 24. Add support for 32-bit character strings, and UTF-32 +25. Major JIT compiler update (code refactoring and bugfixing). + Experimental Sparc 32 support is added. + Version 8.31 06-July-2012 ------------------------- diff --git a/sljit/sljitConfig.h b/sljit/sljitConfig.h index 32c3b10..68bc59d 100644 --- a/sljit/sljitConfig.h +++ b/sljit/sljitConfig.h @@ -47,6 +47,7 @@ /* #define SLJIT_CONFIG_PPC_32 1 */ /* #define SLJIT_CONFIG_PPC_64 1 */ /* #define SLJIT_CONFIG_MIPS_32 1 */ +/* #define SLJIT_CONFIG_SPARC_32 1 */ /* #define SLJIT_CONFIG_AUTO 1 */ /* #define SLJIT_CONFIG_UNSUPPORTED 1 */ diff --git a/sljit/sljitConfigInternal.h b/sljit/sljitConfigInternal.h index 0154d3f..1dfff6a 100644 --- a/sljit/sljitConfigInternal.h +++ b/sljit/sljitConfigInternal.h @@ -39,6 +39,7 @@ SLJIT_BIG_ENDIAN : big endian architecture SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information + SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address Types and useful macros: sljit_b, sljit_ub : signed and unsigned 8 bit byte @@ -57,6 +58,7 @@ || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ || (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \ || (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)) #error "An architecture must be selected" @@ -71,6 +73,7 @@ + (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + + (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \ + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2 #error "Multiple architectures are selected" @@ -99,6 +102,8 @@ #define SLJIT_CONFIG_PPC_32 1 #elif defined(__mips__) #define SLJIT_CONFIG_MIPS_32 1 +#elif defined(__sparc__) || defined(__sparc) +#define SLJIT_CONFIG_SPARC_32 1 #else /* Unsupported architecture */ #define SLJIT_CONFIG_UNSUPPORTED 1 @@ -309,7 +314,9 @@ typedef long int sljit_w; #if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) /* These macros are useful for the application. */ -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) #define SLJIT_BIG_ENDIAN 1 #elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -335,11 +342,21 @@ typedef long int sljit_w; #error "Exactly one endianness must be selected" #endif +#ifndef SLJIT_INDIRECT_CALL #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32 && defined _AIX) /* It seems certain ppc compilers use an indirect addressing for functions which makes things complicated. */ #define SLJIT_INDIRECT_CALL 1 #endif +#endif /* SLJIT_INDIRECT_CALL */ + +#ifndef SLJIT_RETURN_ADDRESS_OFFSET +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#define SLJIT_RETURN_ADDRESS_OFFSET 8 +#else +#define SLJIT_RETURN_ADDRESS_OFFSET 0 +#endif +#endif /* SLJIT_RETURN_ADDRESS_OFFSET */ #ifndef SLJIT_SSE2 diff --git a/sljit/sljitLir.c b/sljit/sljitLir.c index d48b878..17904d8 100644 --- a/sljit/sljitLir.c +++ b/sljit/sljitLir.c @@ -122,7 +122,7 @@ #endif #if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) - #define IS_CONDITIONAL 0x04 + #define IS_COND 0x04 #define IS_BL 0x08 /* cannot be encoded as branch */ #define B_TYPE0 0x00 @@ -159,11 +159,33 @@ #define PATCH_J 0x80 /* instruction types */ - #define UNMOVABLE_INS 0 + #define MOVABLE_INS 0 /* 1 - 31 last destination register */ - #define FCSR_FCC 32 /* no destination (i.e: store) */ - #define MOVABLE_INS 33 + #define UNMOVABLE_INS 32 + /* FPU status register */ + #define FCSR_FCC 33 +#endif + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + #define IS_MOVABLE 0x04 + #define IS_COND 0x08 + #define IS_CALL 0x10 + + #define PATCH_B 0x20 + #define PATCH_CALL 0x40 + + /* instruction types */ + #define MOVABLE_INS 0 + /* 1 - 31 last destination register */ + /* no destination (i.e: store) */ + #define UNMOVABLE_INS 32 + + #define DST_INS_MASK 0xff + + /* ICC_SET is the same as SET_FLAGS. */ + #define ICC_IS_SET (1 << 23) + #define FCC_IS_SET (1 << 24) #endif #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -179,11 +201,6 @@ #endif #endif -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 -#define FIXED_LOCALS_OFFSET (4 * sizeof(sljit_w)) -#endif - #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) #define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) @@ -198,6 +215,16 @@ #define FIXED_LOCALS_OFFSET ((6 + 8) * sizeof(sljit_w)) #endif +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 +#define FIXED_LOCALS_OFFSET (4 * sizeof(sljit_w)) +#endif + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 +#define FIXED_LOCALS_OFFSET (23 * sizeof(sljit_w)) +#endif + #if (defined SLJIT_HAS_VARIABLE_LOCALS_OFFSET && SLJIT_HAS_VARIABLE_LOCALS_OFFSET) #define ADJUST_LOCAL_OFFSET(p, i) \ @@ -299,6 +326,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void) compiler->delay_slot = UNMOVABLE_INS; #endif +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + compiler->delay_slot = UNMOVABLE_INS; +#endif + #if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT) if (!compiler_initialized) { init_compiler(); @@ -1092,7 +1123,7 @@ static SLJIT_INLINE void check_sljit_emit_fcmp(struct sljit_compiler *compiler, SLJIT_ASSERT(sljit_is_fpu_available()); SLJIT_ASSERT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP))); - SLJIT_ASSERT((type & 0xff) >= SLJIT_C_FLOAT_EQUAL && (type & 0xff) <= SLJIT_C_FLOAT_NOT_NAN); + SLJIT_ASSERT((type & 0xff) >= SLJIT_C_FLOAT_EQUAL && (type & 0xff) <= SLJIT_C_FLOAT_ORDERED); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) FUNCTION_FCHECK(src1, src1w); FUNCTION_FCHECK(src2, src2w); @@ -1255,6 +1286,8 @@ static SLJIT_INLINE int emit_mov_before_return(struct sljit_compiler *compiler, #include "sljitNativePPC_common.c" #elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #include "sljitNativeMIPS_common.c" +#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + #include "sljitNativeSPARC_common.c" #endif #if !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -1375,7 +1408,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_get_local_base(struct sljit_compiler *compile /* Empty function bodies for those machines, which are not (yet) supported. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) { return "unsupported"; } diff --git a/sljit/sljitLir.h b/sljit/sljitLir.h index 23fea0f..b26baeb 100644 --- a/sljit/sljitLir.h +++ b/sljit/sljitLir.h @@ -260,6 +260,12 @@ struct sljit_compiler { sljit_w cache_argw; #endif +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + int delay_slot; + int cache_arg; + sljit_w cache_argw; +#endif + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) FILE* verbose; #endif @@ -542,7 +548,9 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int /* Flags: I | E | O | K */ #define SLJIT_NEG 21 /* Count leading zeroes - Flags: I | E | K */ + Flags: I | E | K + Important note! Sparc 32 does not support K flag, since + the required popc instruction is introduced only in sparc 64. */ #define SLJIT_CLZ 22 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op, @@ -617,7 +625,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compile SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void); /* Note: dst is the left and src is the right operand for SLJIT_FCMP. - Note: NaN check is always performed. If SLJIT_C_FLOAT_NAN is set, + Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED is set, the comparison result is unpredictable. Flags: E | S (see SLJIT_C_FLOAT_*) */ #define SLJIT_FCMP 34 @@ -677,8 +685,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi #define SLJIT_C_FLOAT_GREATER_EQUAL 17 #define SLJIT_C_FLOAT_GREATER 18 #define SLJIT_C_FLOAT_LESS_EQUAL 19 -#define SLJIT_C_FLOAT_NAN 20 -#define SLJIT_C_FLOAT_NOT_NAN 21 +#define SLJIT_C_FLOAT_UNORDERED 20 +#define SLJIT_C_FLOAT_ORDERED 21 #define SLJIT_JUMP 22 #define SLJIT_FAST_CALL 23 @@ -716,7 +724,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler sljit_emit_jump. However some architectures (i.e: MIPS) may employ special optimizations here. It is suggested to use this comparison form when appropriate. - type must be between SLJIT_C_FLOAT_EQUAL and SLJIT_C_FLOAT_NOT_NAN + type must be between SLJIT_C_FLOAT_EQUAL and SLJIT_C_FLOAT_ORDERED type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP Flags: destroy flags. Note: if either operand is NaN, the behaviour is undefined for @@ -741,7 +749,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, i /* If op == SLJIT_MOV: Set dst to 1 if condition is fulfilled, 0 otherwise - type must be between SLJIT_C_EQUAL and SLJIT_C_FLOAT_NOT_NAN + type must be between SLJIT_C_EQUAL and SLJIT_C_FLOAT_ORDERED Flags: - (never set any flags) If op == SLJIT_OR Dst is used as src as well, and set its lowest bit to 1 if diff --git a/sljit/sljitNativeARM_Thumb2.c b/sljit/sljitNativeARM_Thumb2.c index 465c30d..30f52a3 100644 --- a/sljit/sljitNativeARM_Thumb2.c +++ b/sljit/sljitNativeARM_Thumb2.c @@ -24,11 +24,14 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) { return "ARM-Thumb2" SLJIT_CPUINFO; } +/* Length of an instruction word. */ +typedef sljit_ui sljit_ins; + /* Last register + 1. */ #define TMP_REG1 (SLJIT_NO_REGISTERS + 1) #define TMP_REG2 (SLJIT_NO_REGISTERS + 2) @@ -40,7 +43,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { - 0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15 + 0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15 }; #define COPY_BITS(src, from, to, bits) \ @@ -75,8 +78,6 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { #define IMM12(imm) \ (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)) -typedef sljit_ui sljit_ins; - /* --------------------------------------------------------------------- */ /* Instrucion forms */ /* --------------------------------------------------------------------- */ @@ -234,7 +235,7 @@ static SLJIT_INLINE int detect_jump_type(struct sljit_jump *jump, sljit_uh *code diff = ((sljit_w)(code + jump->u.label->size) - (sljit_w)(code_ptr + 2)) >> 1; } - if (jump->flags & IS_CONDITIONAL) { + if (jump->flags & IS_COND) { SLJIT_ASSERT(!(jump->flags & IS_BL)); if (diff <= 127 && diff >= -128) { jump->flags |= B_TYPE1; @@ -303,24 +304,24 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump) switch (type) { case 1: /* Encoding T1 of 'B' instruction */ - SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_CONDITIONAL)); + SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND)); jump_inst[0] = 0xd000 | (jump->flags & 0xf00) | (diff & 0xff); return; case 2: /* Encoding T3 of 'B' instruction */ - SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_CONDITIONAL)); + SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND)); jump_inst[0] = 0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1); jump_inst[1] = 0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | (diff & 0x7ff); return; case 3: - SLJIT_ASSERT(jump->flags & IS_CONDITIONAL); + SLJIT_ASSERT(jump->flags & IS_COND); *jump_inst++ = IT | ((jump->flags >> 4) & 0xf0) | 0x8; diff--; type = 5; break; case 4: /* Encoding T2 of 'B' instruction */ - SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_CONDITIONAL)); + SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND)); jump_inst[0] = 0xe000 | (diff & 0x7ff); return; } @@ -385,7 +386,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil label = label->next; } if (jump && jump->addr == half_count) { - jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_CONDITIONAL) ? 10 : 8); + jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8); code_ptr -= detect_jump_type(jump, code_ptr, code); jump = jump->next; } @@ -1121,6 +1122,10 @@ static SLJIT_INLINE int emit_op_mem(struct sljit_compiler *compiler, int flags, return getput_arg(compiler, flags, reg, arg, argw, 0, 0); } +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { int size; @@ -1201,7 +1206,6 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, CHECK_ERROR(); check_sljit_emit_return(compiler, op, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); @@ -1766,11 +1770,11 @@ static sljit_uw get_cc(int type) return 0xd; case SLJIT_C_OVERFLOW: - case SLJIT_C_FLOAT_NAN: + case SLJIT_C_FLOAT_UNORDERED: return 0x6; case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_FLOAT_NOT_NAN: + case SLJIT_C_FLOAT_ORDERED: return 0x7; default: /* SLJIT_JUMP */ @@ -1810,7 +1814,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile /* In ARM, we don't need to touch the arguments. */ PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); if (type < SLJIT_JUMP) { - jump->flags |= IS_CONDITIONAL; + jump->flags |= IS_COND; cc = get_cc(type); jump->flags |= cc << 8; PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); diff --git a/sljit/sljitNativeARM_v5.c b/sljit/sljitNativeARM_v5.c index 5dc555c..3bf005b 100644 --- a/sljit/sljitNativeARM_v5.c +++ b/sljit/sljitNativeARM_v5.c @@ -24,7 +24,7 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) { #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) return "ARMv7" SLJIT_CPUINFO; @@ -56,7 +56,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { - 0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15 + 0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15 }; #define RM(rm) (reg_map[rm]) @@ -793,6 +793,10 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil return code; } +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + /* emit_op inp_flags. WRITE_BACK must be the first, since it is a flag. */ #define WRITE_BACK 0x01 @@ -902,7 +906,6 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, CHECK_ERROR(); check_sljit_emit_return(compiler, op, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); @@ -1011,6 +1014,74 @@ static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, sljit_w mul_inst; switch (GET_OPCODE(op)) { + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if (dst != src2) { + if (src2 & SRC2_IMM) { + if (flags & INV_IMM) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + } + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (op == SLJIT_MOV_UB) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff)); + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2])); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst])); +#else + return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2)); +#endif + } + else if (dst != src2) { + SLJIT_ASSERT(src2 & SRC2_IMM); + if (flags & INV_IMM) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2])); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst])); +#else + return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2)); +#endif + } + else if (dst != src2) { + SLJIT_ASSERT(src2 & SRC2_IMM); + if (flags & INV_IMM) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + } + return SLJIT_SUCCESS; + + case SLJIT_NOT: + if (src2 & SRC2_IMM) { + if (flags & INV_IMM) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); + } + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(!(flags & INV_IMM)); + SLJIT_ASSERT(!(src2 & SRC2_IMM)); + FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2))); + if (flags & SET_FLAGS) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM); + return SLJIT_SUCCESS; + case SLJIT_ADD: SLJIT_ASSERT(!(flags & INV_IMM)); EMIT_DATA_PROCESS_INS_AND_RETURN(ADD_DP); @@ -1080,74 +1151,6 @@ static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, case SLJIT_ASHR: EMIT_SHIFT_INS_AND_RETURN(2); - - case SLJIT_MOV: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); - if (dst != src2) { - if (src2 & SRC2_IMM) { - if (flags & INV_IMM) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); - } - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]); - } - return SLJIT_SUCCESS; - - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); - if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (op == SLJIT_MOV_UB) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2])); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst])); -#else - return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2)); -#endif - } - else if (dst != src2) { - SLJIT_ASSERT(src2 & SRC2_IMM); - if (flags & INV_IMM) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); - } - return SLJIT_SUCCESS; - - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); - if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2])); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst])); -#else - return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2)); -#endif - } - else if (dst != src2) { - SLJIT_ASSERT(src2 & SRC2_IMM); - if (flags & INV_IMM) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); - } - return SLJIT_SUCCESS; - - case SLJIT_NOT: - if (src2 & SRC2_IMM) { - if (flags & INV_IMM) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); - } - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2)); - - case SLJIT_CLZ: - SLJIT_ASSERT(!(flags & INV_IMM)); - SLJIT_ASSERT(!(src2 & SRC2_IMM)); - FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2))); - if (flags & SET_FLAGS) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM); - return SLJIT_SUCCESS; } SLJIT_ASSERT_STOP(); return SLJIT_SUCCESS; @@ -1982,7 +1985,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compile 1 - vfp */ static int arm_fpu_type = -1; -static void init_compiler() +static void init_compiler(void) { if (arm_fpu_type != -1) return; @@ -2080,7 +2083,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, in int dst, sljit_w dstw, int src, sljit_w srcw) { - int dst_freg; + int dst_fr; CHECK_ERROR(); check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); @@ -2102,28 +2105,31 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, in return SLJIT_SUCCESS; } - dst_freg = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst; + dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst; if (src > SLJIT_FLOAT_REG4) { - FAIL_IF(emit_fpu_data_transfer(compiler, dst_freg, 1, src, srcw)); - src = dst_freg; + FAIL_IF(emit_fpu_data_transfer(compiler, dst_fr, 1, src, srcw)); + src = dst_fr; } switch (op) { case SLJIT_FMOV: - if (src != dst_freg && dst_freg != TMP_FREG1) - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMOV_F64, dst_freg, src, 0)); + if (src != dst_fr && dst_fr != TMP_FREG1) + EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMOV_F64, dst_fr, src, 0)); break; case SLJIT_FNEG: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VNEG_F64, dst_freg, src, 0)); + EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VNEG_F64, dst_fr, src, 0)); break; case SLJIT_FABS: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VABS_F64, dst_freg, src, 0)); + EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VABS_F64, dst_fr, src, 0)); break; } - if (dst_freg == TMP_FREG1) - FAIL_IF(emit_fpu_data_transfer(compiler, src, 0, dst, dstw)); + if (dst_fr == TMP_FREG1) { + if (op == SLJIT_FMOV) + dst_fr = src; + FAIL_IF(emit_fpu_data_transfer(compiler, dst_fr, 0, dst, dstw)); + } return SLJIT_SUCCESS; } @@ -2133,7 +2139,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, in int src1, sljit_w src1w, int src2, sljit_w src2w) { - int dst_freg; + int dst_fr; CHECK_ERROR(); check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); @@ -2141,7 +2147,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, in compiler->cache_arg = 0; compiler->cache_argw = 0; - dst_freg = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst; + dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst; if (src2 > SLJIT_FLOAT_REG4) { FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w)); @@ -2155,23 +2161,23 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, in switch (op) { case SLJIT_FADD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VADD_F64, dst_freg, src2, src1)); + EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VADD_F64, dst_fr, src2, src1)); break; case SLJIT_FSUB: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VSUB_F64, dst_freg, src2, src1)); + EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VSUB_F64, dst_fr, src2, src1)); break; case SLJIT_FMUL: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMUL_F64, dst_freg, src2, src1)); + EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMUL_F64, dst_fr, src2, src1)); break; case SLJIT_FDIV: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VDIV_F64, dst_freg, src2, src1)); + EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VDIV_F64, dst_fr, src2, src1)); break; } - if (dst_freg == TMP_FREG1) + if (dst_fr == TMP_FREG1) FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 0, dst, dstw)); return SLJIT_SUCCESS; @@ -2270,11 +2276,11 @@ static sljit_uw get_cc(int type) return 0xd0000000; case SLJIT_C_OVERFLOW: - case SLJIT_C_FLOAT_NAN: + case SLJIT_C_FLOAT_UNORDERED: return 0x60000000; case SLJIT_C_NOT_OVERFLOW: - case SLJIT_C_FLOAT_NOT_NAN: + case SLJIT_C_FLOAT_ORDERED: return 0x70000000; default: /* SLJIT_JUMP */ diff --git a/sljit/sljitNativeMIPS_32.c b/sljit/sljitNativeMIPS_32.c index c0cc8b5..82cb28b 100644 --- a/sljit/sljitNativeMIPS_32.c +++ b/sljit/sljitNativeMIPS_32.c @@ -72,6 +72,85 @@ static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, int overflow_ra = 0; switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SB) { +#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); +#else + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); +#endif + } + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SH) { +#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); +#else + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); +#endif + } + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst))); + return SLJIT_SUCCESS; + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); +#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst))); +#else + if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) { + FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG)); + return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG); + } + /* Nearly all instructions are unmovable in the following sequence. */ + FAIL_IF(push_inst(compiler, ADDU_W | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + /* Check zero. */ + FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ADDIU_W | SA(0) | T(dst) | IMM(-1), DR(dst))); + /* Loop for searching the highest bit. */ + FAIL_IF(push_inst(compiler, ADDIU_W | S(dst) | T(dst) | IMM(1), DR(dst))); + FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS)); + if (op & SLJIT_SET_E) + return push_inst(compiler, ADDU_W | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG); +#endif + return SLJIT_SUCCESS; + case SLJIT_ADD: if (flags & SRC2_IMM) { if (op & SLJIT_SET_O) { @@ -293,97 +372,16 @@ static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, case SLJIT_ASHR: EMIT_SHIFT(SRA, SRAV); return SLJIT_SUCCESS; - - case SLJIT_MOV: - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: - SLJIT_ASSERT(src1 == TMP_REG1); - if (dst != src2) - return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst)); - return SLJIT_SUCCESS; - - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: - SLJIT_ASSERT(src1 == TMP_REG1); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SB) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) - return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); -#else - FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); - return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); -#endif - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); - } - else if (dst != src2) - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; - - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: - SLJIT_ASSERT(src1 == TMP_REG1); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SH) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) - return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); -#else - FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); - return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); -#endif - } - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); - } - else if (dst != src2) - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; - - case SLJIT_NOT: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if (op & SLJIT_SET_E) - FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (CHECK_FLAGS(SLJIT_SET_E)) - FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst))); - return SLJIT_SUCCESS; - - case SLJIT_CLZ: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) - if (op & SLJIT_SET_E) - FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (CHECK_FLAGS(SLJIT_SET_E)) - FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst))); -#else - if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) { - FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG)); - return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG); - } - /* Nearly all instructions are unmovable in the following sequence. */ - FAIL_IF(push_inst(compiler, ADDU_W | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); - /* Check zero. */ - FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(6), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS)); - /* Check sign bit. */ - FAIL_IF(push_inst(compiler, BLTZ | S(TMP_REG1) | IMM(4), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(0), UNMOVABLE_INS)); - /* Loop for searching the highest bit. */ - FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, ADDIU_W | S(dst) | T(dst) | IMM(1), UNMOVABLE_INS)); - if (op & SLJIT_SET_E) - return push_inst(compiler, ADDU_W | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG); -#endif - return SLJIT_SUCCESS; } SLJIT_ASSERT_STOP(); return SLJIT_SUCCESS; } -static SLJIT_INLINE int emit_const(struct sljit_compiler *compiler, int reg, sljit_w init_value) +static SLJIT_INLINE int emit_const(struct sljit_compiler *compiler, int dst, sljit_w init_value) { - FAIL_IF(push_inst(compiler, LUI | T(reg) | IMM(init_value >> 16), DR(reg))); - return push_inst(compiler, ORI | S(reg) | T(reg) | IMM(init_value), DR(reg)); + FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst))); + return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) diff --git a/sljit/sljitNativeMIPS_common.c b/sljit/sljitNativeMIPS_common.c index cf748c2..15f9e17 100644 --- a/sljit/sljitNativeMIPS_common.c +++ b/sljit/sljitNativeMIPS_common.c @@ -24,7 +24,7 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) { return "MIPS" SLJIT_CPUINFO; } @@ -41,15 +41,15 @@ typedef sljit_ui sljit_ins; #define TMP_REG3 (SLJIT_NO_REGISTERS + 3) /* For position independent code, t9 must contain the function address. */ -#define PIC_ADDR_REG TMP_REG2 +#define PIC_ADDR_REG TMP_REG2 /* TMP_EREG1 is used mainly for literal encoding on 64 bit. */ -#define TMP_EREG1 15 -#define TMP_EREG2 24 +#define TMP_EREG1 15 +#define TMP_EREG2 24 /* Floating point status register. */ -#define FCSR_REG 31 +#define FCSR_REG 31 /* Return address register. */ -#define RETURN_ADDR_REG 31 +#define RETURN_ADDR_REG 31 /* Flags are keept in volatile registers. */ #define EQUAL_FLAG 7 @@ -60,8 +60,12 @@ typedef sljit_ui sljit_ins; #define GREATER_FLAG 13 #define OVERFLOW_FLAG 14 -#define TMP_FREG1 (SLJIT_FLOAT_REG4 + 1) -#define TMP_FREG2 (SLJIT_FLOAT_REG4 + 2) +#define TMP_FREG1 ((SLJIT_FLOAT_REG4 + 1) << 1) +#define TMP_FREG2 ((SLJIT_FLOAT_REG4 + 2) << 1) + +static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { + 0, 2, 5, 6, 3, 8, 16, 17, 18, 19, 20, 29, 4, 25, 9 +}; /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -74,9 +78,9 @@ typedef sljit_ui sljit_ins; #define SA(s) ((s) << 21) #define TA(t) ((t) << 16) #define DA(d) ((d) << 11) -#define FT(t) ((t) << (16 + 1)) -#define FS(s) ((s) << (11 + 1)) -#define FD(d) ((d) << (6 + 1)) +#define FT(t) ((t) << 16) +#define FS(s) ((s) << 11) +#define FD(d) ((d) << 6) #define IMM(imm) ((imm) & 0xffff) #define SH_IMM(imm) ((imm & 0x1f) << 6) @@ -114,7 +118,6 @@ typedef sljit_ui sljit_ins; #define JALR (HI(0) | LO(9)) #define JR (HI(0) | LO(8)) #define LD (HI(55)) -#define LDC1 (HI(53)) #define LUI (HI(15)) #define LW (HI(35)) #define NEG_D (HI(17) | FMT_D | LO(7)) @@ -132,7 +135,6 @@ typedef sljit_ui sljit_ins; #define OR (HI(0) | LO(37)) #define ORI (HI(13)) #define SD (HI(63)) -#define SDC1 (HI(61)) #define SLT (HI(0) | LO(42)) #define SLTI (HI(10)) #define SLTIU (HI(11)) @@ -172,14 +174,12 @@ typedef sljit_ui sljit_ins; #define SIMM_MIN (-0x8000) #define UIMM_MAX (0xffff) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = { - 0, 2, 5, 6, 3, 8, 16, 17, 18, 19, 20, 29, 4, 25, 9 -}; - /* dest_reg is the absolute name of the register Useful for reordering instructions in the delay slot. */ static int push_inst(struct sljit_compiler *compiler, sljit_ins ins, int delay_slot) { + SLJIT_ASSERT(delay_slot == MOVABLE_INS || delay_slot >= UNMOVABLE_INS + || delay_slot == ((ins >> 11) & 0x1f) || delay_slot == ((ins >> 16) & 0x1f)); sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); FAIL_IF(!ptr); *ptr = ins; @@ -335,7 +335,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) jump->addr = (sljit_uw)(code_ptr - 3); #else - jump->addr = (sljit_uw)(code_ptr - 6); +#error "Implementation required" #endif code_ptr = optimize_jump(jump, code_ptr, code); jump = jump->next; @@ -386,10 +386,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); #else - buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff); - buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff); - buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff); - buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff); +#error "Implementation required" #endif } while (0); jump = jump->next; @@ -406,42 +403,43 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil return code; } +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + /* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 #define WORD_DATA 0x00 -#define BYTE_DATA 0x01 -#define HALF_DATA 0x02 -#define INT_DATA 0x03 -#define SIGNED_DATA 0x04 -#define LOAD_DATA 0x08 - -#define MEM_MASK 0x0f - -#define WRITE_BACK 0x00010 -#define ARG_TEST 0x00020 -#define CUMULATIVE_OP 0x00040 -#define LOGICAL_OP 0x00080 -#define IMM_OP 0x00100 -#define SRC2_IMM 0x00200 - -#define UNUSED_DEST 0x00400 -#define REG_DEST 0x00800 -#define REG1_SOURCE 0x01000 -#define REG2_SOURCE 0x02000 -#define SLOW_SRC1 0x04000 -#define SLOW_SRC2 0x08000 -#define SLOW_DEST 0x10000 +#define BYTE_DATA 0x02 +#define HALF_DATA 0x04 +#define INT_DATA 0x06 +#define SIGNED_DATA 0x08 +/* Separates integer and floating point registers */ +#define GPR_REG 0x0f +#define DOUBLE_DATA 0x10 + +#define MEM_MASK 0x1f + +#define WRITE_BACK 0x00020 +#define ARG_TEST 0x00040 +#define CUMULATIVE_OP 0x00080 +#define LOGICAL_OP 0x00100 +#define IMM_OP 0x00200 +#define SRC2_IMM 0x00400 + +#define UNUSED_DEST 0x00800 +#define REG_DEST 0x01000 +#define REG1_SOURCE 0x02000 +#define REG2_SOURCE 0x04000 +#define SLOW_SRC1 0x08000 +#define SLOW_SRC2 0x10000 +#define SLOW_DEST 0x20000 /* Only these flags are set. UNUSED_DEST is not set when no flags should be set. */ #define CHECK_FLAGS(list) \ (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list)))) #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#include "sljitNativeMIPS_32.c" -#else -#include "sljitNativeMIPS_64.c" -#endif - -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #define STACK_STORE SW #define STACK_LOAD LW #else @@ -449,10 +447,11 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #define STACK_LOAD LD #endif -static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, - int dst, sljit_w dstw, - int src1, sljit_w src1w, - int src2, sljit_w src2w); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#include "sljitNativeMIPS_32.c" +#else +#include "sljitNativeMIPS_64.c" +#endif SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { @@ -528,7 +527,6 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, CHECK_ERROR(); check_sljit_emit_return(compiler, op, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); @@ -569,33 +567,36 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, /* --------------------------------------------------------------------- */ #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define ARCH_DEPEND(a, b) a +#define ARCH_32_64(a, b) a #else -#define ARCH_DEPEND(a, b) b +#define ARCH_32_64(a, b) b #endif -static SLJIT_CONST sljit_ins data_transfer_insts[16] = { -/* s u w */ ARCH_DEPEND(HI(43) /* sw */, HI(63) /* sd */), -/* s u b */ HI(40) /* sb */, -/* s u h */ HI(41) /* sh*/, -/* s u i */ HI(43) /* sw */, - -/* s s w */ ARCH_DEPEND(HI(43) /* sw */, HI(63) /* sd */), -/* s s b */ HI(40) /* sb */, -/* s s h */ HI(41) /* sh*/, -/* s s i */ HI(43) /* sw */, - -/* l u w */ ARCH_DEPEND(HI(35) /* lw */, HI(55) /* ld */), -/* l u b */ HI(36) /* lbu */, -/* l u h */ HI(37) /* lhu */, -/* l u i */ ARCH_DEPEND(HI(35) /* lw */, HI(39) /* lwu */), - -/* l s w */ ARCH_DEPEND(HI(35) /* lw */, HI(55) /* ld */), -/* l s b */ HI(32) /* lb */, -/* l s h */ HI(33) /* lh */, -/* l s i */ HI(35) /* lw */, +static SLJIT_CONST sljit_ins data_transfer_insts[16 + 2] = { +/* u w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */), +/* u w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */), +/* u b s */ HI(40) /* sb */, +/* u b l */ HI(36) /* lbu */, +/* u h s */ HI(41) /* sh */, +/* u h l */ HI(37) /* lhu */, +/* u i s */ HI(43) /* sw */, +/* u i l */ ARCH_32_64(HI(35) /* lw */, HI(39) /* lwu */), + +/* s w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */), +/* s w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */), +/* s b s */ HI(40) /* sb */, +/* s b l */ HI(32) /* lb */, +/* s h s */ HI(41) /* sh */, +/* s h l */ HI(33) /* lh */, +/* s i s */ HI(43) /* sw */, +/* s i l */ HI(35) /* lw */, + +/* d s */ HI(61) /* sdc1 */, +/* d l */ HI(53) /* ldc1 */, }; +#undef ARCH_32_64 + /* reg_ar is an absoulute register! */ /* Can perform an operation using at most 1 instruction. */ @@ -607,10 +608,11 @@ static int getput_arg_fast(struct sljit_compiler *compiler, int flags, int reg_a /* Works for both absoulte and relative addresses. */ if (SLJIT_UNLIKELY(flags & ARG_TEST)) return 1; - FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(arg & 0xf) | TA(reg_ar) | IMM(argw), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS)); + FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(arg & 0xf) + | TA(reg_ar) | IMM(argw), ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? reg_ar : MOVABLE_INS)); return -1; } - return (flags & ARG_TEST) ? SLJIT_SUCCESS : 0; + return 0; } /* See getput_arg below. @@ -618,8 +620,7 @@ static int getput_arg_fast(struct sljit_compiler *compiler, int flags, int reg_a operators always uses word arguments without write back. */ static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw) { - if (!(next_arg & SLJIT_MEM)) - return 0; + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); /* Simple operation except for updates. */ if (arg & 0xf0) { @@ -631,7 +632,7 @@ static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw) } if (arg == next_arg) { - if (((sljit_uw)(next_argw - argw) <= SIMM_MAX && (sljit_uw)(next_argw - argw) >= SIMM_MIN)) + if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN)) return 1; return 0; } @@ -642,8 +643,7 @@ static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw) /* Emit the necessary instructions. See can_cache above. */ static int getput_arg(struct sljit_compiler *compiler, int flags, int reg_ar, int arg, sljit_w argw, int next_arg, sljit_w next_argw) { - int tmp_ar; - int base; + int tmp_ar, base, delay_slot; SLJIT_ASSERT(arg & SLJIT_MEM); if (!(next_arg & SLJIT_MEM)) { @@ -651,7 +651,13 @@ static int getput_arg(struct sljit_compiler *compiler, int flags, int reg_ar, in next_argw = 0; } - tmp_ar = (flags & LOAD_DATA) ? reg_ar : DR(TMP_REG3); + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) { + tmp_ar = reg_ar; + delay_slot = reg_ar; + } else { + tmp_ar = DR(TMP_REG1); + delay_slot = MOVABLE_INS; + } base = arg & 0xf; if (SLJIT_UNLIKELY(arg & 0xf0)) { @@ -666,22 +672,22 @@ static int getput_arg(struct sljit_compiler *compiler, int flags, int reg_ar, in if (argw == compiler->cache_argw) { if (!(flags & WRITE_BACK)) { if (arg == compiler->cache_arg) - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); if ((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg) { if (arg == next_arg && argw == (next_argw & 0x3)) { compiler->cache_arg = arg; compiler->cache_argw = argw; FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); } FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar)); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); } } else { if ((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg) { FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base))); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot); } } } @@ -701,10 +707,10 @@ static int getput_arg(struct sljit_compiler *compiler, int flags, int reg_ar, in } else FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? ((arg >> 4) & 0xf) : TMP_REG3) | DA(tmp_ar), tmp_ar)); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); } FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? ((arg >> 4) & 0xf) : TMP_REG3) | D(base), DR(base))); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot); } if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) { @@ -740,7 +746,7 @@ static int getput_arg(struct sljit_compiler *compiler, int flags, int reg_ar, in FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base))); } } - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot); } if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { @@ -748,7 +754,7 @@ static int getput_arg(struct sljit_compiler *compiler, int flags, int reg_ar, in FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); compiler->cache_argw = argw; } - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); } if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { @@ -762,16 +768,16 @@ static int getput_arg(struct sljit_compiler *compiler, int flags, int reg_ar, in compiler->cache_argw = argw; if (!base) - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) { compiler->cache_arg = arg; FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | D(TMP_REG3), DR(TMP_REG3))); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); } FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | DA(tmp_ar), tmp_ar)); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); } static SLJIT_INLINE int emit_op_mem(struct sljit_compiler *compiler, int flags, int reg_ar, int arg, sljit_w argw) @@ -783,6 +789,13 @@ static SLJIT_INLINE int emit_op_mem(struct sljit_compiler *compiler, int flags, return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0); } +static SLJIT_INLINE int emit_op_mem2(struct sljit_compiler *compiler, int flags, int reg, int arg1, sljit_w arg1w, int arg2, sljit_w arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + static int emit_op(struct sljit_compiler *compiler, int op, int flags, int dst, sljit_w dstw, int src1, sljit_w src1w, @@ -823,7 +836,7 @@ static int emit_op(struct sljit_compiler *compiler, int op, int flags, src2_r = src2w; } } - if ((src1 & SLJIT_IMM) && src1w && (flags & CUMULATIVE_OP) && !(flags & SRC2_IMM)) { + if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { if ((!(flags & LOGICAL_OP) && (src1w <= SIMM_MAX && src1w >= SIMM_MIN)) || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_MAX))) { flags |= SRC2_IMM; @@ -948,7 +961,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int int src, sljit_w srcw) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - #define inp_flags 0 + #define flags 0 #endif CHECK_ERROR(); @@ -960,60 +973,60 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int switch (GET_OPCODE(op)) { case SLJIT_MOV: - return emit_op(compiler, SLJIT_MOV, inp_flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOV_UI: - return emit_op(compiler, SLJIT_MOV_UI, inp_flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOV_SI: - return emit_op(compiler, SLJIT_MOV_SI, inp_flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOV_UB: - return emit_op(compiler, SLJIT_MOV_UB, inp_flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw); case SLJIT_MOV_SB: - return emit_op(compiler, SLJIT_MOV_SB, inp_flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw); case SLJIT_MOV_UH: - return emit_op(compiler, SLJIT_MOV_UH, inp_flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw); case SLJIT_MOV_SH: - return emit_op(compiler, SLJIT_MOV_SH, inp_flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw); case SLJIT_MOVU: - return emit_op(compiler, SLJIT_MOV, inp_flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOVU_UI: - return emit_op(compiler, SLJIT_MOV_UI, inp_flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOVU_SI: - return emit_op(compiler, SLJIT_MOV_SI, inp_flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOVU_UB: - return emit_op(compiler, SLJIT_MOV_UB, inp_flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw); case SLJIT_MOVU_SB: - return emit_op(compiler, SLJIT_MOV_SB, inp_flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw); case SLJIT_MOVU_UH: - return emit_op(compiler, SLJIT_MOV_UH, inp_flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw); case SLJIT_MOVU_SH: - return emit_op(compiler, SLJIT_MOV_SH, inp_flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw); + return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw); case SLJIT_NOT: - return emit_op(compiler, op, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_NEG: - return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), inp_flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); + return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); case SLJIT_CLZ: - return emit_op(compiler, op, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); } return SLJIT_SUCCESS; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - #undef inp_flags + #undef flags #endif } @@ -1023,7 +1036,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int int src2, sljit_w src2w) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - #define inp_flags 0 + #define flags 0 #endif CHECK_ERROR(); @@ -1035,19 +1048,19 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int switch (GET_OPCODE(op)) { case SLJIT_ADD: case SLJIT_ADDC: - return emit_op(compiler, op, inp_flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: case SLJIT_SUBC: - return emit_op(compiler, op, inp_flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: - return emit_op(compiler, op, inp_flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_AND: case SLJIT_OR: case SLJIT_XOR: - return emit_op(compiler, op, inp_flags | CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, op, flags | CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SHL: case SLJIT_LSHR: @@ -1056,15 +1069,14 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int if (src2 & SLJIT_IMM) src2w &= 0x1f; #else - if (src2 & SLJIT_IMM) - src2w &= 0x3f; + SLJIT_ASSERT_STOP(); #endif - return emit_op(compiler, op, inp_flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); } return SLJIT_SUCCESS; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - #undef inp_flags + #undef flags #endif } @@ -1102,44 +1114,6 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void) #endif } -static int emit_fpu_data_transfer(struct sljit_compiler *compiler, int fpu_reg, int load, int arg, sljit_w argw) -{ - int hi_reg; - - SLJIT_ASSERT(arg & SLJIT_MEM); - - /* Fast loads and stores. */ - if (!(arg & 0xf0)) { - /* Both for (arg & 0xf) == SLJIT_UNUSED and (arg & 0xf) != SLJIT_UNUSED. */ - if (argw <= SIMM_MAX && argw >= SIMM_MIN) - return push_inst(compiler, (load ? LDC1 : SDC1) | S(arg & 0xf) | FT(fpu_reg) | IMM(argw), MOVABLE_INS); - } - - if (arg & 0xf0) { - argw &= 0x3; - hi_reg = (arg >> 4) & 0xf; - if (argw) { - FAIL_IF(push_inst(compiler, SLL_W | T(hi_reg) | D(TMP_REG1) | SH_IMM(argw), DR(TMP_REG1))); - hi_reg = TMP_REG1; - } - FAIL_IF(push_inst(compiler, ADDU_W | S(hi_reg) | T(arg & 0xf) | D(TMP_REG1), DR(TMP_REG1))); - return push_inst(compiler, (load ? LDC1 : SDC1) | S(TMP_REG1) | FT(fpu_reg) | IMM(0), MOVABLE_INS); - } - - /* Use cache. */ - if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) - return push_inst(compiler, (load ? LDC1 : SDC1) | S(TMP_REG3) | FT(fpu_reg) | IMM(argw - compiler->cache_argw), MOVABLE_INS); - - /* Put value to cache. */ - compiler->cache_arg = arg; - compiler->cache_argw = argw; - - FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw)); - if (arg & 0xf) - FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(arg & 0xf) | D(TMP_REG3), DR(TMP_REG3))); - return push_inst(compiler, (load ? LDC1 : SDC1) | S(TMP_REG3) | FT(fpu_reg) | IMM(0), MOVABLE_INS); -} - SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int src, sljit_w srcw) @@ -1154,13 +1128,18 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, in if (GET_OPCODE(op) == SLJIT_FCMP) { if (dst > SLJIT_FLOAT_REG4) { - FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, dst, dstw)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw)); dst = TMP_FREG1; } + else + dst <<= 1; + if (src > SLJIT_FLOAT_REG4) { - FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src, srcw)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0)); src = TMP_FREG2; } + else + src <<= 1; /* src and dst are swapped. */ if (op & SLJIT_SET_E) { @@ -1183,12 +1162,14 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, in return push_inst(compiler, C_UN_D | FT(src) | FS(dst), FCSR_FCC); } - dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst; + dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : (dst << 1); if (src > SLJIT_FLOAT_REG4) { - FAIL_IF(emit_fpu_data_transfer(compiler, dst_fr, 1, src, srcw)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, dst_fr, src, srcw, dst, dstw)); src = dst_fr; } + else + src <<= 1; switch (op) { case SLJIT_FMOV: @@ -1203,8 +1184,11 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, in break; } - if (dst_fr == TMP_FREG1) - FAIL_IF(emit_fpu_data_transfer(compiler, src, 0, dst, dstw)); + if (dst_fr == TMP_FREG1) { + if (op == SLJIT_FMOV) + dst_fr = src; + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, dst_fr, dst, dstw, 0, 0)); + } return SLJIT_SUCCESS; } @@ -1214,7 +1198,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, in int src1, sljit_w src1w, int src2, sljit_w src2w) { - int dst_fr; + int dst_fr, flags = 0; CHECK_ERROR(); check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); @@ -1222,17 +1206,47 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, in compiler->cache_arg = 0; compiler->cache_argw = 0; - dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst; + dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG2 : (dst << 1); + + if (src1 > SLJIT_FLOAT_REG4) { + if (getput_arg_fast(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= SLOW_SRC1; + } + else + src1 <<= 1; if (src2 > SLJIT_FLOAT_REG4) { - FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w)); - src2 = TMP_FREG2; + if (getput_arg_fast(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= SLOW_SRC2; } + else + src2 <<= 1; - if (src1 > SLJIT_FLOAT_REG4) { - FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, src1, src1w)); - src1 = TMP_FREG1; + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + } } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + + if (flags & SLOW_SRC1) + src1 = TMP_FREG1; + if (flags & SLOW_SRC2) + src2 = TMP_FREG2; switch (op) { case SLJIT_FADD: @@ -1252,8 +1266,8 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, in break; } - if (dst_fr == TMP_FREG1) - FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 0, dst, dstw)); + if (dst_fr == TMP_FREG2) + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG2, dst, dstw, 0, 0)); return SLJIT_SUCCESS; } @@ -1272,6 +1286,8 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compil return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), DR(dst)); else if (dst & SLJIT_MEM) return emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw); + + /* SLJIT_UNUSED is also possible, although highly unlikely. */ return SLJIT_SUCCESS; } @@ -1316,7 +1332,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #define JUMP_LENGTH 4 #else -#define JUMP_LENGTH 7 +#error "Implementation required" #endif #define BR_Z(src) \ @@ -1399,10 +1415,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_C_MUL_NOT_OVERFLOW: BR_NZ(OVERFLOW_FLAG); break; - case SLJIT_C_FLOAT_NAN: + case SLJIT_C_FLOAT_UNORDERED: BR_F(); break; - case SLJIT_C_FLOAT_NOT_NAN: + case SLJIT_C_FLOAT_ORDERED: BR_T(); break; default: @@ -1472,17 +1488,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler compiler->cache_argw = 0; flags = ((type & SLJIT_INT_OP) ? INT_DATA : WORD_DATA) | LOAD_DATA; if (src1 & SLJIT_MEM) { - if (getput_arg_fast(compiler, flags, DR(TMP_REG1), src1, src1w)) - PTR_FAIL_IF(compiler->error); - else - PTR_FAIL_IF(getput_arg(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w)); + PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w)); src1 = TMP_REG1; } if (src2 & SLJIT_MEM) { - if (getput_arg_fast(compiler, flags, DR(TMP_REG2), src2, src2w)) - PTR_FAIL_IF(compiler->error); - else - PTR_FAIL_IF(getput_arg(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0)); + PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0)); src2 = TMP_REG2; } @@ -1597,13 +1607,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile compiler->cache_argw = 0; if (src1 > SLJIT_FLOAT_REG4) { - PTR_FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, src1, src1w)); + PTR_FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); src1 = TMP_FREG1; } + else + src1 <<= 1; + if (src2 > SLJIT_FLOAT_REG4) { - PTR_FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w)); + PTR_FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); src2 = TMP_FREG2; } + else + src2 <<= 1; jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF(!jump); @@ -1636,11 +1651,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile inst = C_ULE_D; if_true = 1; break; - case SLJIT_C_FLOAT_NAN: + case SLJIT_C_FLOAT_UNORDERED: inst = C_UN_D; if_true = 1; break; - case SLJIT_C_FLOAT_NOT_NAN: + case SLJIT_C_FLOAT_ORDERED: default: /* Make compilers happy. */ inst = C_UN_D; if_true = 0; @@ -1775,8 +1790,8 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compil dst_ar = EQUAL_FLAG; break; - case SLJIT_C_FLOAT_NAN: - case SLJIT_C_FLOAT_NOT_NAN: + case SLJIT_C_FLOAT_UNORDERED: + case SLJIT_C_FLOAT_ORDERED: FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(FCSR_REG), sugg_dst_ar)); FAIL_IF(push_inst(compiler, SRL | TA(sugg_dst_ar) | DA(sugg_dst_ar) | SH_IMM(23), sugg_dst_ar)); FAIL_IF(push_inst(compiler, ANDI | SA(sugg_dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); diff --git a/sljit/sljitNativePPC_32.c b/sljit/sljitNativePPC_32.c index 82d0508..8c8e74f 100644 --- a/sljit/sljitNativePPC_32.c +++ b/sljit/sljitNativePPC_32.c @@ -45,6 +45,54 @@ static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, int dst, int src1, int src2) { switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + SLJIT_ASSERT(src1 == TMP_REG1); + if (dst != src2) + return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SB) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); + } + else if ((flags & REG_DEST) && op == SLJIT_MOV_SB) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SH) + return push_inst(compiler, EXTSH | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); + + case SLJIT_NEG: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst)); + case SLJIT_ADD: if (flags & ALT_FORM1) { /* Flags does not set: BIN_IMM_EXTS unnecessary. */ @@ -185,52 +233,6 @@ static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)); } return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2)); - - case SLJIT_MOV: - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: - SLJIT_ASSERT(src1 == TMP_REG1); - if (dst != src2) - return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); - return SLJIT_SUCCESS; - - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: - SLJIT_ASSERT(src1 == TMP_REG1); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SB) - return push_inst(compiler, EXTSB | S(src2) | A(dst)); - return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); - } - else if ((flags & REG_DEST) && op == SLJIT_MOV_SB) - return push_inst(compiler, EXTSB | S(src2) | A(dst)); - else if (dst != src2) - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; - - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: - SLJIT_ASSERT(src1 == TMP_REG1); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SH) - return push_inst(compiler, EXTSH | S(src2) | A(dst)); - return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); - } - else if (dst != src2) - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; - - case SLJIT_NOT: - SLJIT_ASSERT(src1 == TMP_REG1); - return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); - - case SLJIT_NEG: - SLJIT_ASSERT(src1 == TMP_REG1); - return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2)); - - case SLJIT_CLZ: - SLJIT_ASSERT(src1 == TMP_REG1); - return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst)); } SLJIT_ASSERT_STOP(); diff --git a/sljit/sljitNativePPC_64.c b/sljit/sljitNativePPC_64.c index d13fa53..0483d62 100644 --- a/sljit/sljitNativePPC_64.c +++ b/sljit/sljitNativePPC_64.c @@ -149,6 +149,69 @@ static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, int dst, int src1, int src2) { switch (op) { + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_REG1); + if (dst != src2) + return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SI) + return push_inst(compiler, EXTSW | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SB) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); + } + else if ((flags & REG_DEST) && op == SLJIT_MOV_SB) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SH) + return push_inst(compiler, EXTSH | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1); + UN_EXTS(); + return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); + + case SLJIT_NEG: + SLJIT_ASSERT(src1 == TMP_REG1); + UN_EXTS(); + return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1); + if (flags & ALT_FORM1) + return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst)); + return push_inst(compiler, CNTLZD | RC(flags) | S(src2) | A(dst)); + case SLJIT_ADD: if (flags & ALT_FORM1) { /* Flags does not set: BIN_IMM_EXTS unnecessary. */ @@ -321,66 +384,6 @@ static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, if (flags & ALT_FORM2) return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2)); return push_inst(compiler, SRAD | RC(flags) | S(src1) | A(dst) | B(src2)); - - case SLJIT_MOV: - SLJIT_ASSERT(src1 == TMP_REG1); - if (dst != src2) - return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); - return SLJIT_SUCCESS; - - case SLJIT_MOV_UI: - case SLJIT_MOV_SI: - SLJIT_ASSERT(src1 == TMP_REG1); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SI) - return push_inst(compiler, EXTSW | S(src2) | A(dst)); - return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0)); - } - else if (dst != src2) - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; - - case SLJIT_MOV_UB: - case SLJIT_MOV_SB: - SLJIT_ASSERT(src1 == TMP_REG1); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SB) - return push_inst(compiler, EXTSB | S(src2) | A(dst)); - return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); - } - else if ((flags & REG_DEST) && op == SLJIT_MOV_SB) - return push_inst(compiler, EXTSB | S(src2) | A(dst)); - else if (dst != src2) - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; - - case SLJIT_MOV_UH: - case SLJIT_MOV_SH: - SLJIT_ASSERT(src1 == TMP_REG1); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_SH) - return push_inst(compiler, EXTSH | S(src2) | A(dst)); - return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); - } - else if (dst != src2) - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; - - case SLJIT_NOT: - SLJIT_ASSERT(src1 == TMP_REG1); - UN_EXTS(); - return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); - - case SLJIT_NEG: - SLJIT_ASSERT(src1 == TMP_REG1); - UN_EXTS(); - return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2)); - - case SLJIT_CLZ: - SLJIT_ASSERT(src1 == TMP_REG1); - if (flags & ALT_FORM1) - return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst)); - return push_inst(compiler, CNTLZD | RC(flags) | S(src2) | A(dst)); } SLJIT_ASSERT_STOP(); diff --git a/sljit/sljitNativePPC_common.c b/sljit/sljitNativePPC_common.c index 744f41e..f5738e5 100644 --- a/sljit/sljitNativePPC_common.c +++ b/sljit/sljitNativePPC_common.c @@ -24,7 +24,7 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) { return "PowerPC" SLJIT_CPUINFO; } @@ -86,6 +86,10 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #define TMP_FREG1 (SLJIT_FLOAT_REG4 + 1) #define TMP_FREG2 (SLJIT_FLOAT_REG4 + 2) +static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { + 0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31 +}; + /* --------------------------------------------------------------------- */ /* Instrucion forms */ /* --------------------------------------------------------------------- */ @@ -146,9 +150,6 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #define FNEG (HI(63) | LO(40)) #define FSUB (HI(63) | LO(20)) #define LD (HI(58) | 0) -#define LFD (HI(50)) -#define LFDUX (HI(31) | LO(631)) -#define LFDX (HI(31) | LO(599)) #define LWZ (HI(32)) #define MFCR (HI(31) | LO(19)) #define MFLR (HI(31) | LO(339) | 0x80000) @@ -182,9 +183,6 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #define STD (HI(62) | 0) #define STDU (HI(62) | 1) #define STDUX (HI(31) | LO(181)) -#define STFD (HI(54)) -#define STFDUX (HI(31) | LO(759)) -#define STFDX (HI(31) | LO(727)) #define STW (HI(36)) #define STWU (HI(37)) #define STWUX (HI(31) | LO(183)) @@ -200,10 +198,6 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #define SIMM_MIN (-0x8000) #define UIMM_MAX (0xffff) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = { - 0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31 -}; - #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_w addr, void* func) { @@ -423,19 +417,26 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #endif } +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + /* inp_flags: */ /* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define INDEXED 0x02 +#define WRITE_BACK 0x04 #define WORD_DATA 0x00 -#define BYTE_DATA 0x01 -#define HALF_DATA 0x02 -#define INT_DATA 0x03 -#define SIGNED_DATA 0x04 -#define LOAD_DATA 0x08 -#define WRITE_BACK 0x10 -#define INDEXED 0x20 +#define BYTE_DATA 0x08 +#define HALF_DATA 0x10 +#define INT_DATA 0x18 +#define SIGNED_DATA 0x20 +/* Separates integer and floating point registers */ +#define GPR_REG 0x3f +#define DOUBLE_DATA 0x40 -#define MEM_MASK 0x3f +#define MEM_MASK 0x7f /* Other inp_flags. */ @@ -480,11 +481,6 @@ ALT_FORM6 0x200000 */ #define STACK_LOAD LD #endif -static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, - int dst, sljit_w dstw, - int src1, sljit_w src1w, - int src2, sljit_w src2w); - SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { CHECK_ERROR(); @@ -567,7 +563,6 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, { CHECK_ERROR(); check_sljit_emit_return(compiler, op, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); @@ -617,110 +612,127 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, #define UPDATE_REQ 0x20000 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) -#define ARCH_DEPEND(a, b) a -#define GET_INST_CODE(inst) (inst) +#define ARCH_32_64(a, b) a +#define INST_CODE_AND_DST(inst, flags, reg) \ + ((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) #else -#define ARCH_DEPEND(a, b) b -#define GET_INST_CODE(index) ((inst) & ~(ADDR_MODE2 | UPDATE_REQ)) +#define ARCH_32_64(a, b) b +#define INST_CODE_AND_DST(inst, flags, reg) \ + (((inst) & ~(ADDR_MODE2 | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) #endif -static SLJIT_CONST sljit_ins data_transfer_insts[64] = { +static SLJIT_CONST sljit_ins data_transfer_insts[64 + 4] = { + +/* -------- Unsigned -------- */ + +/* Word. */ + +/* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | ADDR_MODE2 | 0x0 /* std */), +/* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x0 /* ld */), +/* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), +/* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), + +/* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | ADDR_MODE2 | 0x1 /* stdu */), +/* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | 0x1 /* ldu */), +/* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), +/* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), -/* No write-back. */ +/* Byte. */ -/* i n s u w */ ARCH_DEPEND(HI(36) /* stw */, HI(62) | ADDR_MODE2 | 0x0 /* std */), -/* i n s u b */ HI(38) /* stb */, -/* i n s u h */ HI(44) /* sth*/, -/* i n s u i */ HI(36) /* stw */, +/* u b n i s */ HI(38) /* stb */, +/* u b n i l */ HI(34) /* lbz */, +/* u b n x s */ HI(31) | LO(215) /* stbx */, +/* u b n x l */ HI(31) | LO(87) /* lbzx */, -/* i n s s w */ ARCH_DEPEND(HI(36) /* stw */, HI(62) | ADDR_MODE2 | 0x0 /* std */), -/* i n s s b */ HI(38) /* stb */, -/* i n s s h */ HI(44) /* sth*/, -/* i n s s i */ HI(36) /* stw */, +/* u b w i s */ HI(39) /* stbu */, +/* u b w i l */ HI(35) /* lbzu */, +/* u b w x s */ HI(31) | LO(247) /* stbux */, +/* u b w x l */ HI(31) | LO(119) /* lbzux */, -/* i n l u w */ ARCH_DEPEND(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x0 /* ld */), -/* i n l u b */ HI(34) /* lbz */, -/* i n l u h */ HI(40) /* lhz */, -/* i n l u i */ HI(32) /* lwz */, +/* Half. */ -/* i n l s w */ ARCH_DEPEND(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x0 /* ld */), -/* i n l s b */ HI(34) /* lbz */ /* EXTS_REQ */, -/* i n l s h */ HI(42) /* lha */, -/* i n l s i */ ARCH_DEPEND(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x2 /* lwa */), +/* u h n i s */ HI(44) /* sth */, +/* u h n i l */ HI(40) /* lhz */, +/* u h n x s */ HI(31) | LO(407) /* sthx */, +/* u h n x l */ HI(31) | LO(279) /* lhzx */, -/* Write-back. */ +/* u h w i s */ HI(45) /* sthu */, +/* u h w i l */ HI(41) /* lhzu */, +/* u h w x s */ HI(31) | LO(439) /* sthux */, +/* u h w x l */ HI(31) | LO(311) /* lhzux */, -/* i w s u w */ ARCH_DEPEND(HI(37) /* stwu */, HI(62) | ADDR_MODE2 | 0x1 /* stdu */), -/* i w s u b */ HI(39) /* stbu */, -/* i w s u h */ HI(45) /* sthu */, -/* i w s u i */ HI(37) /* stwu */, +/* Int. */ -/* i w s s w */ ARCH_DEPEND(HI(37) /* stwu */, HI(62) | ADDR_MODE2 | 0x1 /* stdu */), -/* i w s s b */ HI(39) /* stbu */, -/* i w s s h */ HI(45) /* sthu */, -/* i w s s i */ HI(37) /* stwu */, +/* u i n i s */ HI(36) /* stw */, +/* u i n i l */ HI(32) /* lwz */, +/* u i n x s */ HI(31) | LO(151) /* stwx */, +/* u i n x l */ HI(31) | LO(23) /* lwzx */, -/* i w l u w */ ARCH_DEPEND(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | 0x1 /* ldu */), -/* i w l u b */ HI(35) /* lbzu */, -/* i w l u h */ HI(41) /* lhzu */, -/* i w l u i */ HI(33) /* lwzu */, +/* u i w i s */ HI(37) /* stwu */, +/* u i w i l */ HI(33) /* lwzu */, +/* u i w x s */ HI(31) | LO(183) /* stwux */, +/* u i w x l */ HI(31) | LO(55) /* lwzux */, -/* i w l s w */ ARCH_DEPEND(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | 0x1 /* ldu */), -/* i w l s b */ HI(35) /* lbzu */ /* EXTS_REQ */, -/* i w l s h */ HI(43) /* lhau */, -/* i w l s i */ ARCH_DEPEND(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | UPDATE_REQ | 0x2 /* lwa */), +/* -------- Signed -------- */ -/* ---------- */ -/* Indexed */ -/* ---------- */ +/* Word. */ -/* No write-back. */ +/* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | ADDR_MODE2 | 0x0 /* std */), +/* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x0 /* ld */), +/* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), +/* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), -/* x n s u w */ ARCH_DEPEND(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), -/* x n s u b */ HI(31) | LO(215) /* stbx */, -/* x n s u h */ HI(31) | LO(407) /* sthx */, -/* x n s u i */ HI(31) | LO(151) /* stwx */, +/* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | ADDR_MODE2 | 0x1 /* stdu */), +/* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | 0x1 /* ldu */), +/* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), +/* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), -/* x n s s w */ ARCH_DEPEND(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), -/* x n s s b */ HI(31) | LO(215) /* stbx */, -/* x n s s h */ HI(31) | LO(407) /* sthx */, -/* x n s s i */ HI(31) | LO(151) /* stwx */, +/* Byte. */ -/* x n l u w */ ARCH_DEPEND(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), -/* x n l u b */ HI(31) | LO(87) /* lbzx */, -/* x n l u h */ HI(31) | LO(279) /* lhzx */, -/* x n l u i */ HI(31) | LO(23) /* lwzx */, +/* s b n i s */ HI(38) /* stb */, +/* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */, +/* s b n x s */ HI(31) | LO(215) /* stbx */, +/* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */, -/* x n l s w */ ARCH_DEPEND(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), -/* x n l s b */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */, -/* x n l s h */ HI(31) | LO(343) /* lhax */, -/* x n l s i */ ARCH_DEPEND(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */), +/* s b w i s */ HI(39) /* stbu */, +/* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */, +/* s b w x s */ HI(31) | LO(247) /* stbux */, +/* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */, -/* Write-back. */ +/* Half. */ -/* x w s u w */ ARCH_DEPEND(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), -/* x w s u b */ HI(31) | LO(247) /* stbux */, -/* x w s u h */ HI(31) | LO(439) /* sthux */, -/* x w s u i */ HI(31) | LO(183) /* stwux */, +/* s h n i s */ HI(44) /* sth */, +/* s h n i l */ HI(42) /* lha */, +/* s h n x s */ HI(31) | LO(407) /* sthx */, +/* s h n x l */ HI(31) | LO(343) /* lhax */, -/* x w s s w */ ARCH_DEPEND(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), -/* x w s s b */ HI(31) | LO(247) /* stbux */, -/* x w s s h */ HI(31) | LO(439) /* sthux */, -/* x w s s i */ HI(31) | LO(183) /* stwux */, +/* s h w i s */ HI(45) /* sthu */, +/* s h w i l */ HI(43) /* lhau */, +/* s h w x s */ HI(31) | LO(439) /* sthux */, +/* s h w x l */ HI(31) | LO(375) /* lhaux */, -/* x w l u w */ ARCH_DEPEND(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), -/* x w l u b */ HI(31) | LO(119) /* lbzux */, -/* x w l u h */ HI(31) | LO(311) /* lhzux */, -/* x w l u i */ HI(31) | LO(55) /* lwzux */, +/* Int. */ -/* x w l s w */ ARCH_DEPEND(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), -/* x w l s b */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */, -/* x w l s h */ HI(31) | LO(375) /* lhaux */, -/* x w l s i */ ARCH_DEPEND(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */) +/* s i n i s */ HI(36) /* stw */, +/* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x2 /* lwa */), +/* s i n x s */ HI(31) | LO(151) /* stwx */, +/* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */), + +/* s i w i s */ HI(37) /* stwu */, +/* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | UPDATE_REQ | 0x2 /* lwa */), +/* s i w x s */ HI(31) | LO(183) /* stwux */, +/* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */), + +/* -------- Double -------- */ + +/* d n i s */ HI(54) /* stfd */, +/* d n i l */ HI(50) /* lfd */, +/* d n x s */ HI(31) | LO(727) /* stfdx */, +/* d n x l */ HI(31) | LO(599) /* lfdx */, }; -#undef ARCH_DEPEND +#undef ARCH_32_64 /* Simple cases, (no caching is required). */ static int getput_arg_fast(struct sljit_compiler *compiler, int inp_flags, int reg, int arg, sljit_w argw) @@ -739,7 +751,7 @@ static int getput_arg_fast(struct sljit_compiler *compiler, int inp_flags, int r inst = data_transfer_insts[(inp_flags & ~WRITE_BACK) & MEM_MASK]; SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - push_inst(compiler, GET_INST_CODE(inst) | D(reg) | IMM(argw)); + push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | IMM(argw)); return -1; } #else @@ -749,11 +761,11 @@ static int getput_arg_fast(struct sljit_compiler *compiler, int inp_flags, int r if (inp_flags & ARG_TEST) return 1; - push_inst(compiler, GET_INST_CODE(inst) | D(reg) | IMM(argw)); + push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | IMM(argw)); return -1; } #endif - return (inp_flags & ARG_TEST) ? SLJIT_SUCCESS : 0; + return 0; } if (!(arg & 0xf0)) { @@ -764,7 +776,7 @@ static int getput_arg_fast(struct sljit_compiler *compiler, int inp_flags, int r inst = data_transfer_insts[inp_flags & MEM_MASK]; SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | IMM(argw)); + push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | IMM(argw)); return -1; } #else @@ -780,7 +792,7 @@ static int getput_arg_fast(struct sljit_compiler *compiler, int inp_flags, int r arg = tmp_reg | SLJIT_MEM; argw = 0; } - push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | IMM(argw)); + push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | IMM(argw)); return -1; } #endif @@ -790,10 +802,10 @@ static int getput_arg_fast(struct sljit_compiler *compiler, int inp_flags, int r return 1; inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B((arg >> 4) & 0xf)); + push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B((arg >> 4) & 0xf)); return -1; } - return (inp_flags & ARG_TEST) ? SLJIT_SUCCESS : 0; + return 0; } /* See getput_arg below. @@ -801,17 +813,13 @@ static int getput_arg_fast(struct sljit_compiler *compiler, int inp_flags, int r uses word arguments without write back. */ static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw) { - SLJIT_ASSERT(arg & SLJIT_MEM); - SLJIT_ASSERT(next_arg & SLJIT_MEM); + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); - if (!(arg & 0xf)) { - if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX)) - return 1; - return 0; - } + if (!(arg & 0xf)) + return (next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX); if (arg & 0xf0) - return 0; + return ((arg & 0xf0) == (next_arg & 0xf0) && (argw & 0x3) == (next_argw & 0x3)); if (argw <= SIMM_MAX && argw >= SIMM_MIN) { if (arg == next_arg && (next_argw >= SIMM_MAX && next_argw <= SIMM_MIN)) @@ -844,14 +852,10 @@ static int getput_arg(struct sljit_compiler *compiler, int inp_flags, int reg, i SLJIT_ASSERT(arg & SLJIT_MEM); - tmp_r = (inp_flags & LOAD_DATA) ? reg : TMP_REG3; - if ((arg & 0xf) == tmp_r) { - /* Special case for "mov reg, [reg, ... ]". - Caching would not happen anyway. */ - tmp_r = TMP_REG3; - compiler->cache_arg = 0; - compiler->cache_argw = 0; - } + tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1; + /* Special case for "mov reg, [reg, ... ]". */ + if ((arg & 0xf) == tmp_r) + tmp_r = TMP_REG1; if (!(arg & 0xf)) { inst = data_transfer_insts[(inp_flags & ~WRITE_BACK) & MEM_MASK]; @@ -859,7 +863,7 @@ static int getput_arg(struct sljit_compiler *compiler, int inp_flags, int reg, i argw = argw - compiler->cache_argw; ADJUST_CACHED_IMM(argw); SLJIT_ASSERT(!(inst & UPDATE_REQ)); - return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(TMP_REG3) | IMM(argw)); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(argw)); } if ((next_arg & SLJIT_MEM) && (argw - next_argw <= SIMM_MAX || next_argw - argw <= SIMM_MAX)) { @@ -871,21 +875,31 @@ static int getput_arg(struct sljit_compiler *compiler, int inp_flags, int reg, i } FAIL_IF(load_immediate(compiler, tmp_r, argw)); - return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(tmp_r)); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r)); } if (SLJIT_UNLIKELY(arg & 0xf0)) { argw &= 0x3; /* Otherwise getput_arg_fast would capture it. */ SLJIT_ASSERT(argw); + + if ((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg && argw == compiler->cache_argw) + tmp_r = TMP_REG3; + else { + if ((arg & 0xf0) == (next_arg & 0xf0) && argw == (next_argw & 0x3)) { + compiler->cache_arg = SLJIT_MEM | (arg & 0xf0); + compiler->cache_argw = argw; + tmp_r = TMP_REG3; + } #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - FAIL_IF(push_inst(compiler, RLWINM | S((arg >> 4) & 0xf) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1))); + FAIL_IF(push_inst(compiler, RLWINM | S((arg >> 4) & 0xf) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1))); #else - FAIL_IF(push_inst(compiler, RLDI(tmp_r, (arg >> 4) & 0xf, argw, 63 - argw, 1))); + FAIL_IF(push_inst(compiler, RLDI(tmp_r, (arg >> 4) & 0xf, argw, 63 - argw, 1))); #endif + } inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B(tmp_r)); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(tmp_r)); } inst = data_transfer_insts[inp_flags & MEM_MASK]; @@ -894,13 +908,13 @@ static int getput_arg(struct sljit_compiler *compiler, int inp_flags, int reg, i SLJIT_ASSERT(!(inp_flags & WRITE_BACK)); argw = argw - compiler->cache_argw; ADJUST_CACHED_IMM(argw); - return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(TMP_REG3) | IMM(argw)); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(argw)); } if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) { inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B(TMP_REG3)); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(TMP_REG3)); } if (argw == next_argw && (next_arg & SLJIT_MEM)) { @@ -912,7 +926,7 @@ static int getput_arg(struct sljit_compiler *compiler, int inp_flags, int reg, i inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B(TMP_REG3)); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(TMP_REG3)); } if (arg == next_arg && !(inp_flags & WRITE_BACK) && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX)) { @@ -923,17 +937,24 @@ static int getput_arg(struct sljit_compiler *compiler, int inp_flags, int reg, i compiler->cache_arg = arg; compiler->cache_argw = argw; - return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(TMP_REG3)); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3)); } /* Get the indexed version instead of the normal one. */ inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); FAIL_IF(load_immediate(compiler, tmp_r, argw)); - return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B(tmp_r)); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(tmp_r)); } -static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, +static SLJIT_INLINE int emit_op_mem2(struct sljit_compiler *compiler, int flags, int reg, int arg1, sljit_w arg1w, int arg2, sljit_w arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +static int emit_op(struct sljit_compiler *compiler, int op, int input_flags, int dst, sljit_w dstw, int src1, sljit_w src1w, int src2, sljit_w src2w) @@ -946,7 +967,7 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, int src1_r; int src2_r; int sugg_src2_r = TMP_REG2; - int flags = inp_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS); + int flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS); compiler->cache_arg = 0; compiler->cache_argw = 0; @@ -965,7 +986,7 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, } else { SLJIT_ASSERT(dst & SLJIT_MEM); - if (getput_arg_fast(compiler, inp_flags | ARG_TEST, TMP_REG2, dst, dstw)) { + if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) { flags |= FAST_DEST; dst_r = TMP_REG2; } @@ -982,8 +1003,9 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, } else if (src1 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if ((inp_flags & 0x3) == INT_DATA) { - if (inp_flags & SIGNED_DATA) + SLJIT_COMPILE_ASSERT(INT_DATA == 0x18, int_data_check1); + if ((input_flags & 0x18) == INT_DATA) { + if (input_flags & SIGNED_DATA) src1w = (signed int)src1w; else src1w = (unsigned int)src1w; @@ -992,7 +1014,7 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); src1_r = TMP_REG1; } - else if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) { + else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) { FAIL_IF(compiler->error); src1_r = TMP_REG1; } @@ -1008,8 +1030,9 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, } else if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if ((inp_flags & 0x3) == INT_DATA) { - if (inp_flags & SIGNED_DATA) + SLJIT_COMPILE_ASSERT(INT_DATA == 0x18, int_data_check2); + if ((input_flags & 0x18) == INT_DATA) { + if (input_flags & SIGNED_DATA) src2w = (signed int)src2w; else src2w = (unsigned int)src2w; @@ -1018,7 +1041,7 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); src2_r = sugg_src2_r; } - else if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) { + else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) { FAIL_IF(compiler->error); src2_r = sugg_src2_r; } @@ -1029,26 +1052,26 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, All arguments are complex addressing modes, and it is a binary operator. */ if (src1_r == 0 && src2_r == 0 && dst_r == 0) { if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); } else { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); } src1_r = TMP_REG1; src2_r = TMP_REG2; } else if (src1_r == 0 && src2_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); src1_r = TMP_REG1; } else if (src1_r == 0 && dst_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); src1_r = TMP_REG1; } else if (src2_r == 0 && dst_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); src2_r = sugg_src2_r; } @@ -1056,12 +1079,12 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, dst_r = TMP_REG2; if (src1_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0)); + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0)); src1_r = TMP_REG1; } if (src2_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0)); + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0)); src2_r = sugg_src2_r; } @@ -1069,9 +1092,9 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, if (flags & (FAST_DEST | SLOW_DEST)) { if (flags & FAST_DEST) - FAIL_IF(getput_arg_fast(compiler, inp_flags, dst_r, dst, dstw)); + FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw)); else - FAIL_IF(getput_arg(compiler, inp_flags, dst_r, dst, dstw, 0, 0)); + FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0)); } return SLJIT_SUCCESS; } @@ -1118,23 +1141,26 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int return SLJIT_SUCCESS; } +#define EMIT_MOV(type, type_flags, type_cast) \ + emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw) + SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int src, sljit_w srcw) { - int inp_flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; + int flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; CHECK_ERROR(); check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); - if ((src & SLJIT_IMM) && srcw == 0) + if ((src & SLJIT_IMM) && srcw == 0 && GET_OPCODE(op) >= SLJIT_NOT) src = ZERO_REG; #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) if (op & SLJIT_INT_OP) { - inp_flags |= INT_DATA | SIGNED_DATA; + flags |= INT_DATA | SIGNED_DATA; if (src & SLJIT_IMM) srcw = (int)srcw; } @@ -1144,64 +1170,66 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int switch (GET_OPCODE(op)) { case SLJIT_MOV: - return emit_op(compiler, SLJIT_MOV, inp_flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOV_UI: - return emit_op(compiler, SLJIT_MOV_UI, inp_flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOV_SI: - return emit_op(compiler, SLJIT_MOV_SI, inp_flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOV_UB: - return emit_op(compiler, SLJIT_MOV_UB, inp_flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw); + return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA, (unsigned char)); case SLJIT_MOV_SB: - return emit_op(compiler, SLJIT_MOV_SB, inp_flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw); + return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, (signed char)); case SLJIT_MOV_UH: - return emit_op(compiler, SLJIT_MOV_UH, inp_flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw); + return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA, (unsigned short)); case SLJIT_MOV_SH: - return emit_op(compiler, SLJIT_MOV_SH, inp_flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw); + return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, (signed short)); case SLJIT_MOVU: - return emit_op(compiler, SLJIT_MOV, inp_flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOVU_UI: - return emit_op(compiler, SLJIT_MOV_UI, inp_flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOVU_SI: - return emit_op(compiler, SLJIT_MOV_SI, inp_flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_MOVU_UB: - return emit_op(compiler, SLJIT_MOV_UB, inp_flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw); + return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, (unsigned char)); case SLJIT_MOVU_SB: - return emit_op(compiler, SLJIT_MOV_SB, inp_flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw); + return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (signed char)); case SLJIT_MOVU_UH: - return emit_op(compiler, SLJIT_MOV_UH, inp_flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw); + return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, (unsigned short)); case SLJIT_MOVU_SH: - return emit_op(compiler, SLJIT_MOV_SH, inp_flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw); + return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, (signed short)); case SLJIT_NOT: - return emit_op(compiler, SLJIT_NOT, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_NEG: - return emit_op(compiler, SLJIT_NEG, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_CLZ: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - return emit_op(compiler, SLJIT_CLZ, inp_flags | (!(op & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_CLZ, flags | (!(op & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); #else - return emit_op(compiler, SLJIT_CLZ, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw); #endif } return SLJIT_SUCCESS; } +#undef EMIT_MOV + #define TEST_SL_IMM(src, srcw) \ (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN) @@ -1240,7 +1268,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int int src1, sljit_w src1w, int src2, sljit_w src2w) { - int inp_flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; + int flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; CHECK_ERROR(); check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); @@ -1255,13 +1283,13 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) if (op & SLJIT_INT_OP) { - inp_flags |= INT_DATA | SIGNED_DATA; + flags |= INT_DATA | SIGNED_DATA; if (src1 & SLJIT_IMM) src1w = (src1w << 32) >> 32; if (src2 & SLJIT_IMM) src2w = (src2w << 32) >> 32; if (GET_FLAGS(op)) - inp_flags |= ALT_SIGN_EXT; + flags |= ALT_SIGN_EXT; } #endif if (op & SLJIT_SET_O) @@ -1272,63 +1300,63 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { if (TEST_SL_IMM(src2, src2w)) { compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w)) { compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); } if (TEST_SH_IMM(src2, src2w)) { compiler->imm = (src2w >> 16) & 0xffff; - return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SH_IMM(src1, src1w)) { compiler->imm = (src1w >> 16) & 0xffff; - return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); } /* Range between -1 and -32768 is covered above. */ if (TEST_ADD_IMM(src2, src2w)) { compiler->imm = src2w & 0xffffffff; - return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_ADD_IMM(src1, src1w)) { compiler->imm = src1w & 0xffffffff; - return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); } } if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) { if (TEST_SL_IMM(src2, src2w)) { compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w)) { compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); } } - return emit_op(compiler, SLJIT_ADD, inp_flags, dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: - return emit_op(compiler, SLJIT_ADDC, inp_flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { if (TEST_SL_IMM(src2, -src2w)) { compiler->imm = (-src2w) & 0xffff; - return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w)) { compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); } if (TEST_SH_IMM(src2, -src2w)) { compiler->imm = ((-src2w) >> 16) & 0xffff; - return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); } /* Range between -1 and -32768 is covered above. */ if (TEST_ADD_IMM(src2, -src2w)) { compiler->imm = -src2w & 0xffffffff; - return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } } if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) { @@ -1336,55 +1364,55 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ if (TEST_SL_IMM(src2, src2w)) { compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); } if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) { compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); } } if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) { /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ if (TEST_UL_IMM(src2, src2w)) { compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } - return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); } if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) { compiler->imm = src2w; - return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } - return emit_op(compiler, SLJIT_SUB, inp_flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); } if (!(op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O))) { if (TEST_SL_IMM(src2, -src2w)) { compiler->imm = (-src2w) & 0xffff; - return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } } /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ - return emit_op(compiler, SLJIT_SUB, inp_flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: - return emit_op(compiler, SLJIT_SUBC, inp_flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) if (op & SLJIT_INT_OP) - inp_flags |= ALT_FORM2; + flags |= ALT_FORM2; #endif if (!GET_FLAGS(op)) { if (TEST_SL_IMM(src2, src2w)) { compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_MUL, inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w)) { compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_MUL, inp_flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); } } - return emit_op(compiler, SLJIT_MUL, inp_flags, dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w); case SLJIT_AND: case SLJIT_OR: @@ -1393,45 +1421,45 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) { if (TEST_UL_IMM(src2, src2w)) { compiler->imm = src2w; - return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_UL_IMM(src1, src1w)) { compiler->imm = src1w; - return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); } if (TEST_UH_IMM(src2, src2w)) { compiler->imm = (src2w >> 16) & 0xffff; - return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_UH_IMM(src1, src1w)) { compiler->imm = (src1w >> 16) & 0xffff; - return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); } } if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) { if (TEST_UI_IMM(src2, src2w)) { compiler->imm = src2w; - return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_UI_IMM(src1, src1w)) { compiler->imm = src1w; - return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); } } - return emit_op(compiler, GET_OPCODE(op), inp_flags, dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SHL: case SLJIT_LSHR: case SLJIT_ASHR: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) if (op & SLJIT_INT_OP) - inp_flags |= ALT_FORM2; + flags |= ALT_FORM2; #endif if (src2 & SLJIT_IMM) { compiler->imm = src2w; - return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); } - return emit_op(compiler, GET_OPCODE(op), inp_flags, dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); } return SLJIT_SUCCESS; @@ -1463,44 +1491,6 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void) return 1; } -static int emit_fpu_data_transfer(struct sljit_compiler *compiler, int fpu_reg, int load, int arg, sljit_w argw) -{ - SLJIT_ASSERT(arg & SLJIT_MEM); - - /* Fast loads and stores. */ - if (!(arg & 0xf0)) { - /* Both for (arg & 0xf) == SLJIT_UNUSED and (arg & 0xf) != SLJIT_UNUSED. */ - if (argw <= SIMM_MAX && argw >= SIMM_MIN) - return push_inst(compiler, (load ? LFD : STFD) | FD(fpu_reg) | A(arg & 0xf) | IMM(argw)); - } - - if (arg & 0xf0) { - argw &= 0x3; - if (argw) { -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - FAIL_IF(push_inst(compiler, RLWINM | S((arg >> 4) & 0xf) | A(TMP_REG2) | (argw << 11) | ((31 - argw) << 1))); -#else - FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, (arg >> 4) & 0xf, argw, 63 - argw, 1))); -#endif - return push_inst(compiler, (load ? LFDX : STFDX) | FD(fpu_reg) | A(arg & 0xf) | B(TMP_REG2)); - } - return push_inst(compiler, (load ? LFDX : STFDX) | FD(fpu_reg) | A(arg & 0xf) | B((arg >> 4) & 0xf)); - } - - /* Use cache. */ - if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) - return push_inst(compiler, (load ? LFD : STFD) | FD(fpu_reg) | A(TMP_REG3) | IMM(argw - compiler->cache_argw)); - - /* Put value to cache. */ - compiler->cache_arg = arg; - compiler->cache_argw = argw; - - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - if (!(arg & 0xf)) - return push_inst(compiler, (load ? LFDX : STFDX) | FD(fpu_reg) | A(0) | B(TMP_REG3)); - return push_inst(compiler, (load ? LFDUX : STFDUX) | FD(fpu_reg) | A(TMP_REG3) | B(arg & 0xf)); -} - SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int src, sljit_w srcw) @@ -1515,20 +1505,22 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, in if (GET_OPCODE(op) == SLJIT_FCMP) { if (dst > SLJIT_FLOAT_REG4) { - FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, dst, dstw)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw)); dst = TMP_FREG1; } + if (src > SLJIT_FLOAT_REG4) { - FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src, srcw)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0)); src = TMP_FREG2; } + return push_inst(compiler, FCMPU | CRD(4) | FA(dst) | FB(src)); } dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst; if (src > SLJIT_FLOAT_REG4) { - FAIL_IF(emit_fpu_data_transfer(compiler, dst_fr, 1, src, srcw)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, dst_fr, src, srcw, dst, dstw)); src = dst_fr; } @@ -1545,8 +1537,11 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, in break; } - if (dst_fr == TMP_FREG1) - FAIL_IF(emit_fpu_data_transfer(compiler, src, 0, dst, dstw)); + if (dst_fr == TMP_FREG1) { + if (op == SLJIT_FMOV) + dst_fr = src; + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, dst_fr, dst, dstw, 0, 0)); + } return SLJIT_SUCCESS; } @@ -1556,7 +1551,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, in int src1, sljit_w src1w, int src2, sljit_w src2w) { - int dst_fr; + int dst_fr, flags = 0; CHECK_ERROR(); check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); @@ -1564,17 +1559,43 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, in compiler->cache_arg = 0; compiler->cache_argw = 0; - dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst; + dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG2 : dst; + + if (src1 > SLJIT_FLOAT_REG4) { + if (getput_arg_fast(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= ALT_FORM1; + } if (src2 > SLJIT_FLOAT_REG4) { - FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w)); - src2 = TMP_FREG2; + if (getput_arg_fast(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= ALT_FORM2; } - if (src1 > SLJIT_FLOAT_REG4) { - FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, src1, src1w)); - src1 = TMP_FREG1; + if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + } } + else if (flags & ALT_FORM1) + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + else if (flags & ALT_FORM2) + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + + if (flags & ALT_FORM1) + src1 = TMP_FREG1; + if (flags & ALT_FORM2) + src2 = TMP_FREG2; switch (op) { case SLJIT_FADD: @@ -1594,8 +1615,8 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, in break; } - if (dst_fr == TMP_FREG1) - FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 0, dst, dstw)); + if (dst_fr == TMP_FREG2) + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG2, dst, dstw, 0, 0)); return SLJIT_SUCCESS; } @@ -1617,6 +1638,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compil return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); } + /* SLJIT_UNUSED is also possible, although highly unlikely. */ return SLJIT_SUCCESS; } @@ -1658,7 +1680,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi return label; } -static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, int type) +static sljit_ins get_bo_bi_flags(int type) { switch (type) { case SLJIT_C_EQUAL: @@ -1709,10 +1731,10 @@ static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, int type) case SLJIT_C_FLOAT_NOT_EQUAL: return (4 << 21) | ((4 + 2) << 16); - case SLJIT_C_FLOAT_NAN: + case SLJIT_C_FLOAT_UNORDERED: return (12 << 21) | ((4 + 3) << 16); - case SLJIT_C_FLOAT_NOT_NAN: + case SLJIT_C_FLOAT_ORDERED: return (4 << 21) | ((4 + 3) << 16); default: @@ -1729,7 +1751,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile CHECK_ERROR_PTR(); check_sljit_emit_jump(compiler, type); - bo_bi_flags = get_bo_bi_flags(compiler, type & 0xff); + bo_bi_flags = get_bo_bi_flags(type & 0xff); if (!bo_bi_flags) return NULL; @@ -1751,7 +1773,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw) { - sljit_ins bo_bi_flags; struct sljit_jump *jump = NULL; int src_r; @@ -1759,9 +1780,6 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, i check_sljit_emit_ijump(compiler, type, src, srcw); ADJUST_LOCAL_OFFSET(src, srcw); - bo_bi_flags = get_bo_bi_flags(compiler, type); - FAIL_IF(!bo_bi_flags); - if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) src_r = src; else if (src & SLJIT_IMM) { @@ -1781,7 +1799,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, i FAIL_IF(push_inst(compiler, MTCTR | S(src_r))); if (jump) jump->addr = compiler->size; - return push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)); + return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0)); } /* Get a bit from CR, all other bits are zeroed. */ @@ -1875,11 +1893,11 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compil INVERT_BIT(reg); break; - case SLJIT_C_FLOAT_NAN: + case SLJIT_C_FLOAT_UNORDERED: GET_CR_BIT(4 + 3, reg); break; - case SLJIT_C_FLOAT_NOT_NAN: + case SLJIT_C_FLOAT_ORDERED: GET_CR_BIT(4 + 3, reg); INVERT_BIT(reg); break; @@ -1890,11 +1908,9 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compil } if (GET_OPCODE(op) == SLJIT_OR) - return emit_op(compiler, GET_OPCODE(op), GET_FLAGS(op) ? ALT_SET_FLAGS : 0, dst, dstw, dst, dstw, TMP_REG2, 0); + return emit_op(compiler, SLJIT_OR, GET_FLAGS(op) ? ALT_SET_FLAGS : 0, dst, dstw, dst, dstw, TMP_REG2, 0); - if (reg == TMP_REG2) - return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); - return SLJIT_SUCCESS; + return (reg == TMP_REG2) ? emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0) : SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value) diff --git a/sljit/sljitNativeSPARC_32.c b/sljit/sljitNativeSPARC_32.c new file mode 100644 index 0000000..8c4a84d --- /dev/null +++ b/sljit/sljitNativeSPARC_32.c @@ -0,0 +1,163 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +static int load_immediate(struct sljit_compiler *compiler, int dst, sljit_w imm) +{ + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, OR | D(dst) | S1(0) | IMM(imm), DR(dst)); + + FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((imm >> 10) & 0x3fffff), DR(dst))); + return (imm & 0x3ff) ? push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (imm & 0x3ff), DR(dst)) : SLJIT_SUCCESS; +} + +#define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2)) + +static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, int flags, + int dst, int src1, sljit_w src2) +{ + SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same); + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_UB) + return push_inst(compiler, AND | D(dst) | S1(src2) | IMM(0xff), DR(dst)); + FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst))); + return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst))); + return push_inst(compiler, (op == SLJIT_MOV_SH ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + /* sparc 32 does not support SLJIT_KEEP_FLAGS. Not sure I can fix this. */ + FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(src2) | S2(0), SET_FLAGS)); + FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2(src2), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, BICC | DA(0x1) | (7 & DISP_MASK), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS | (flags & SET_FLAGS))); + FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(-1), DR(dst))); + + /* Loop. */ + FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS)); + FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS)); + return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS | (flags & SET_FLAGS)); + + case SLJIT_ADD: + return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_ADDC: + return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_SUB: + return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_SUBC: + return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_MUL: + FAIL_IF(push_inst(compiler, SMUL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); + if (!(flags & SET_FLAGS)) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, RDY | D(TMP_REG4), DR(TMP_REG4))); + return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_REG4), MOVABLE_INS | SET_FLAGS); + + case SLJIT_AND: + return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_OR: + return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_XOR: + return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_SHL: + FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); + return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); + + case SLJIT_LSHR: + FAIL_IF(push_inst(compiler, SRL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); + return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); + + case SLJIT_ASHR: + FAIL_IF(push_inst(compiler, SRA | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); + return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); + } + + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE int emit_const(struct sljit_compiler *compiler, int dst, sljit_w init_value) +{ + FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((init_value >> 10) & 0x3fffff), DR(dst))); + return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffc00000) | ((new_addr >> 10) & 0x3fffff); + inst[1] = (inst[1] & 0xfffffc00) | (new_addr & 0x3ff); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff); + inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} diff --git a/sljit/sljitNativeSPARC_common.c b/sljit/sljitNativeSPARC_common.c new file mode 100644 index 0000000..85a320c --- /dev/null +++ b/sljit/sljitNativeSPARC_common.c @@ -0,0 +1,1286 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +{ + return "SPARC" SLJIT_CPUINFO; +} + +/* Length of an instruction word + Both for sparc-32 and sparc-64 */ +typedef sljit_ui sljit_ins; + +/* TMP_REG2 is not used by getput_arg */ +#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) +#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) +#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) +#define TMP_REG4 (SLJIT_NO_REGISTERS + 4) +#define LINK_REG (SLJIT_NO_REGISTERS + 5) + +#define TMP_FREG1 ((SLJIT_FLOAT_REG4 + 1) << 1) +#define TMP_FREG2 ((SLJIT_FLOAT_REG4 + 2) << 1) + +static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = { + 0, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 14, 1, 24, 25, 26, 15 +}; + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define D(d) (reg_map[d] << 25) +#define DA(d) ((d) << 25) +#define S1(s1) (reg_map[s1] << 14) +#define S2(s2) (reg_map[s2]) +#define S1A(s1) ((s1) << 14) +#define S2A(s2) (s2) +#define IMM_ARG 0x2000 +#define DOP(op) ((op) << 5) +#define IMM(imm) (((imm) & 0x1fff) | IMM_ARG) + +#define DR(dr) (reg_map[dr]) +#define OPC1(opcode) ((opcode) << 30) +#define OPC2(opcode) ((opcode) << 22) +#define OPC3(opcode) ((opcode) << 19) +#define SET_FLAGS OPC3(0x10) + +#define ADD (OPC1(0x2) | OPC3(0x00)) +#define ADDC (OPC1(0x2) | OPC3(0x08)) +#define AND (OPC1(0x2) | OPC3(0x01)) +#define ANDN (OPC1(0x2) | OPC3(0x05)) +#define CALL (OPC1(0x1)) +#define FABSS (OPC1(0x2) | OPC3(0x34) | DOP(0x09)) +#define FADDD (OPC1(0x2) | OPC3(0x34) | DOP(0x42)) +#define FCMPD (OPC1(0x2) | OPC3(0x35) | DOP(0x52)) +#define FDIVD (OPC1(0x2) | OPC3(0x34) | DOP(0x4e)) +#define FMOVS (OPC1(0x2) | OPC3(0x34) | DOP(0x01)) +#define FMULD (OPC1(0x2) | OPC3(0x34) | DOP(0x4a)) +#define FNEGS (OPC1(0x2) | OPC3(0x34) | DOP(0x05)) +#define FSUBD (OPC1(0x2) | OPC3(0x34) | DOP(0x46)) +#define JMPL (OPC1(0x2) | OPC3(0x38)) +#define NOP (OPC1(0x0) | OPC2(0x04)) +#define OR (OPC1(0x2) | OPC3(0x02)) +#define ORN (OPC1(0x2) | OPC3(0x06)) +#define RDY (OPC1(0x2) | OPC3(0x28) | S1A(0)) +#define RESTORE (OPC1(0x2) | OPC3(0x3d)) +#define SAVE (OPC1(0x2) | OPC3(0x3c)) +#define SETHI (OPC1(0x0) | OPC2(0x04)) +#define SLL (OPC1(0x2) | OPC3(0x25)) +#define SLLX (OPC1(0x2) | OPC3(0x25) | (1 << 12)) +#define SRA (OPC1(0x2) | OPC3(0x27)) +#define SRAX (OPC1(0x2) | OPC3(0x27) | (1 << 12)) +#define SRL (OPC1(0x2) | OPC3(0x26)) +#define SRLX (OPC1(0x2) | OPC3(0x26) | (1 << 12)) +#define SUB (OPC1(0x2) | OPC3(0x04)) +#define SUBC (OPC1(0x2) | OPC3(0x0c)) +#define TA (OPC1(0x2) | OPC3(0x3a) | (8 << 25)) +#define WRY (OPC1(0x2) | OPC3(0x30) | DA(0)) +#define XOR (OPC1(0x2) | OPC3(0x03)) +#define XNOR (OPC1(0x2) | OPC3(0x07)) + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#define MAX_DISP (0x1fffff) +#define MIN_DISP (-0x200000) +#define DISP_MASK (0x3fffff) + +#define BICC (OPC1(0x0) | OPC2(0x2)) +#define FBFCC (OPC1(0x0) | OPC2(0x6)) +#define SLL_W SLL +#define SDIV (OPC1(0x2) | OPC3(0x0f)) +#define SMUL (OPC1(0x2) | OPC3(0x0b)) +#define UDIV (OPC1(0x2) | OPC3(0x0e)) +#define UMUL (OPC1(0x2) | OPC3(0x0a)) +#else +#define SLL_W SLLX +#endif + +#define SIMM_MAX (0x0fff) +#define SIMM_MIN (-0x1000) + +/* dest_reg is the absolute name of the register + Useful for reordering instructions in the delay slot. */ +static int push_inst(struct sljit_compiler *compiler, sljit_ins ins, int delay_slot) +{ + SLJIT_ASSERT((delay_slot & DST_INS_MASK) == UNMOVABLE_INS + || (delay_slot & DST_INS_MASK) == MOVABLE_INS + || (delay_slot & DST_INS_MASK) == ((ins >> 25) & 0x1f)); + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + compiler->delay_slot = delay_slot; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_ins* optimize_jump(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +{ + sljit_w diff; + sljit_uw target_addr; + sljit_ins *inst; + sljit_ins saved_inst; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return code_ptr; + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size); + } + inst = (sljit_ins*)jump->addr; + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + if (jump->flags & IS_CALL) { + /* Call is always patchable on sparc 32. */ + jump->flags |= PATCH_CALL; + if (jump->flags & IS_MOVABLE) { + inst[0] = inst[-1]; + inst[-1] = CALL; + jump->addr -= sizeof(sljit_ins); + return inst; + } + inst[0] = CALL; + inst[1] = NOP; + return inst + 1; + } +#else + /* Both calls and BPr instructions shall not pass this point. */ +#error "Implementation required" +#endif + + if (jump->flags & IS_COND) + inst--; + + if (jump->flags & IS_MOVABLE) { + diff = ((sljit_w)target_addr - (sljit_w)(inst - 1)) >> 2; + if (diff <= MAX_DISP && diff >= MIN_DISP) { + jump->flags |= PATCH_B; + inst--; + if (jump->flags & IS_COND) { + saved_inst = inst[0]; + inst[0] = inst[1] ^ (1 << 28); + inst[1] = saved_inst; + } else { + inst[1] = inst[0]; + inst[0] = BICC | DA(0x8); + } + jump->addr = (sljit_uw)inst; + return inst + 1; + } + } + + diff = ((sljit_w)target_addr - (sljit_w)(inst)) >> 2; + if (diff <= MAX_DISP && diff >= MIN_DISP) { + jump->flags |= PATCH_B; + if (jump->flags & IS_COND) + inst[0] ^= (1 << 28); + else + inst[0] = BICC | DA(0x8); + inst[1] = NOP; + jump->addr = (sljit_uw)inst; + return inst + 1; + } + + return code_ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + check_sljit_generate_code(compiler); + reverse_buf(compiler); + + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + /* Just recording the address. */ + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + jump->addr = (sljit_uw)(code_ptr - 3); +#else + jump->addr = (sljit_uw)(code_ptr - 6); +#endif + code_ptr = optimize_jump(jump, code_ptr, code); + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + /* Just recording the address. */ + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + code_ptr ++; + word_count ++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code_ptr - code <= (int)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins*)jump->addr; + + if (jump->flags & PATCH_CALL) { + addr = (sljit_w)(addr - jump->addr) >> 2; + SLJIT_ASSERT((sljit_w)addr <= 0x1fffffff && (sljit_w)addr >= -0x20000000); + buf_ptr[0] = CALL | (addr & 0x3fffffff); + break; + } + if (jump->flags & PATCH_B) { + addr = (sljit_w)(addr - jump->addr) >> 2; + SLJIT_ASSERT((sljit_w)addr <= MAX_DISP && (sljit_w)addr >= MIN_DISP); + buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK); + break; + } + + /* Set the fields of immediate loads. */ +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + buf_ptr[0] = (buf_ptr[0] & 0xffc00000) | ((addr >> 10) & 0x3fffff); + buf_ptr[1] = (buf_ptr[1] & 0xfffffc00) | (addr & 0x3ff); +#else +#error "Implementation required" +#endif + } while (0); + jump = jump->next; + } + + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_size = compiler->size * sizeof(sljit_ins); + SLJIT_CACHE_FLUSH(code, code_ptr); + return code; +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define WORD_DATA 0x00 +#define BYTE_DATA 0x02 +#define HALF_DATA 0x04 +#define INT_DATA 0x06 +#define SIGNED_DATA 0x08 +/* Separates integer and floating point registers */ +#define GPR_REG 0x0f +#define DOUBLE_DATA 0x10 + +#define MEM_MASK 0x1f + +#define WRITE_BACK 0x00020 +#define ARG_TEST 0x00040 +#define CUMULATIVE_OP 0x00080 +#define IMM_OP 0x00100 +#define SRC2_IMM 0x00200 + +#define REG_DEST 0x00400 +#define REG2_SOURCE 0x00800 +#define SLOW_SRC1 0x01000 +#define SLOW_SRC2 0x02000 +#define SLOW_DEST 0x04000 +/* SET_FLAGS (0x10 << 19) also belong here! */ + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#include "sljitNativeSPARC_32.c" +#else +#include "sljitNativeSPARC_64.c" +#endif + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) +{ + CHECK_ERROR(); + check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size); + + compiler->temporaries = temporaries; + compiler->saveds = saveds; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->logical_local_size = local_size; +#endif + + local_size += 23 * sizeof(sljit_w); + local_size = (local_size + 7) & ~0x7; + compiler->local_size = local_size; + + if (local_size <= SIMM_MAX) { + FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | IMM(-local_size), UNMOVABLE_INS)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size)); + FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | S2(TMP_REG1), UNMOVABLE_INS)); + } + + if (args >= 1) + FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG1) | S1(0) | S2A(24), DR(SLJIT_SAVED_REG1))); + if (args >= 2) + FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG2) | S1(0) | S2A(25), DR(SLJIT_SAVED_REG2))); + if (args >= 3) + FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG3) | S1(0) | S2A(26), DR(SLJIT_SAVED_REG3))); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) +{ + CHECK_ERROR_VOID(); + check_sljit_set_context(compiler, args, temporaries, saveds, local_size); + + compiler->temporaries = temporaries; + compiler->saveds = saveds; +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->logical_local_size = local_size; +#endif + + local_size += 23 * sizeof(sljit_w); + compiler->local_size = (local_size + 7) & ~0x7; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw) +{ + CHECK_ERROR(); + check_sljit_emit_return(compiler, op, src, srcw); + + if (op != SLJIT_MOV || !(src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)) { + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + src = SLJIT_TEMPORARY_REG1; + } + + FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS)); + return push_inst(compiler, RESTORE | D(SLJIT_TEMPORARY_REG1) | S1(src) | S2(0), UNMOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#define ARCH_32_64(a, b) a +#else +#define ARCH_32_64(a, b) b +#endif + +static SLJIT_CONST sljit_ins data_transfer_insts[16 + 2] = { +/* u w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */), +/* u w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */), +/* u b s */ OPC1(3) | OPC3(0x05) /* stb */, +/* u b l */ OPC1(3) | OPC3(0x01) /* ldub */, +/* u h s */ OPC1(3) | OPC3(0x06) /* sth */, +/* u h l */ OPC1(3) | OPC3(0x02) /* lduh */, +/* u i s */ OPC1(3) | OPC3(0x04) /* stw */, +/* u i l */ OPC1(3) | OPC3(0x00) /* lduw */, + +/* s w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */), +/* s w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */), +/* s b s */ OPC1(3) | OPC3(0x05) /* stb */, +/* s b l */ OPC1(3) | OPC3(0x09) /* ldsb */, +/* s h s */ OPC1(3) | OPC3(0x06) /* sth */, +/* s h l */ OPC1(3) | OPC3(0x0a) /* ldsh */, +/* s i s */ OPC1(3) | OPC3(0x04) /* stw */, +/* s i l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x08) /* ldsw */), + +/* d s */ OPC1(3) | OPC3(0x27), +/* d l */ OPC1(3) | OPC3(0x23), +}; + +#undef ARCH_32_64 + +/* Can perform an operation using at most 1 instruction. */ +static int getput_arg_fast(struct sljit_compiler *compiler, int flags, int reg, int arg, sljit_w argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (!(flags & WRITE_BACK)) { + if ((!(arg & 0xf0) && argw <= SIMM_MAX && argw >= SIMM_MIN) + || ((arg & 0xf0) && (argw & 0x3) == 0)) { + /* Works for both absoulte and relative addresses (immediate case). */ + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] + | ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg)) + | S1(arg & 0xf) | ((arg & 0xf0) ? S2((arg >> 4) & 0xf) : IMM(argw)), + ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS)); + return -1; + } + } + return 0; +} + +/* See getput_arg below. + Note: can_cache is called only for binary operators. Those + operators always uses word arguments without write back. */ +static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw) +{ + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); + + /* Simple operation except for updates. */ + if (arg & 0xf0) { + argw &= 0x3; + SLJIT_ASSERT(argw); + next_argw &= 0x3; + if ((arg & 0xf0) == (next_arg & 0xf0) && argw == next_argw) + return 1; + return 0; + } + + if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN)) + return 1; + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static int getput_arg(struct sljit_compiler *compiler, int flags, int reg, int arg, sljit_w argw, int next_arg, sljit_w next_argw) +{ + int base, arg2, delay_slot; + sljit_ins dest; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + base = arg & 0xf; + if (SLJIT_UNLIKELY(arg & 0xf0)) { + argw &= 0x3; + SLJIT_ASSERT(argw != 0); + + /* Using the cache. */ + if (((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg) && (argw == compiler->cache_argw)) + arg2 = TMP_REG3; + else { + if ((arg & 0xf0) == (next_arg & 0xf0) && argw == (next_argw & 0x3)) { + compiler->cache_arg = SLJIT_MEM | (arg & 0xf0); + compiler->cache_argw = argw; + arg2 = TMP_REG3; + } + else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base && (reg << 4) != (arg & 0xf0)) + arg2 = reg; + else /* It must be a mov operation, so tmp1 must be free to use. */ + arg2 = TMP_REG1; + FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1((arg >> 4) & 0xf) | IMM_ARG | argw, DR(arg2))); + } + } + else { + /* Using the cache. */ + if ((compiler->cache_arg == SLJIT_MEM) && (argw - compiler->cache_argw) <= SIMM_MAX && (argw - compiler->cache_argw) >= SIMM_MIN) { + if (argw != compiler->cache_argw) { + FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | S1(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); + compiler->cache_argw = argw; + } + arg2 = TMP_REG3; + } else { + if ((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) { + compiler->cache_arg = SLJIT_MEM; + compiler->cache_argw = argw; + arg2 = TMP_REG3; + } + else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base) + arg2 = reg; + else /* It must be a mov operation, so tmp1 must be free to use. */ + arg2 = TMP_REG1; + FAIL_IF(load_immediate(compiler, arg2, argw)); + } + } + + dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg)); + delay_slot = ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS; + if (!base) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(arg2) | IMM(0), delay_slot); + if (!(flags & WRITE_BACK)) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot); + FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot)); + return push_inst(compiler, ADD | D(base) | S1(base) | S2(arg2), DR(base)); +} + +static SLJIT_INLINE int emit_op_mem(struct sljit_compiler *compiler, int flags, int reg, int arg, sljit_w argw) +{ + if (getput_arg_fast(compiler, flags, reg, arg, argw)) + return compiler->error; + compiler->cache_arg = 0; + compiler->cache_argw = 0; + return getput_arg(compiler, flags, reg, arg, argw, 0, 0); +} + +static SLJIT_INLINE int emit_op_mem2(struct sljit_compiler *compiler, int flags, int reg, int arg1, sljit_w arg1w, int arg2, sljit_w arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +static int emit_op(struct sljit_compiler *compiler, int op, int flags, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + int dst_r = TMP_REG2; + int src1_r; + sljit_w src2_r = 0; + int sugg_src2_r = TMP_REG2; + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REG3) { + dst_r = dst; + flags |= REG_DEST; + if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) + sugg_src2_r = dst_r; + } + else if (dst == SLJIT_UNUSED) { + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) + return SLJIT_SUCCESS; + } + else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) + flags |= SLOW_DEST; + + if (flags & IMM_OP) { + if ((src2 & SLJIT_IMM) && src2w) { + if (src2w <= SIMM_MAX && src2w >= SIMM_MIN) { + flags |= SRC2_IMM; + src2_r = src2w; + } + } + if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { + if (src1w <= SIMM_MAX && src1w >= SIMM_MIN) { + flags |= SRC2_IMM; + src2_r = src1w; + + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ + } + } + } + + /* Source 1. */ + if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= TMP_REG3) + src1_r = src1; + else if (src1 & SLJIT_IMM) { + if (src1w) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } + else + src1_r = 0; + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REG3) { + src2_r = src2; + flags |= REG2_SOURCE; + if (!(flags & REG_DEST) && GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) + dst_r = src2_r; + } + else if (src2 & SLJIT_IMM) { + if (!(flags & SRC2_IMM)) { + if (src2w || (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI)) { + FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); + src2_r = sugg_src2_r; + } + else + src2_r = 0; + } + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + src2_r = sugg_src2_r; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + SLJIT_ASSERT(src2_r == TMP_REG2); + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, flags, dst_r, dst, dstw); + return compiler->error; + } + return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op) +{ + CHECK_ERROR(); + check_sljit_emit_op0(compiler, op); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, TA, UNMOVABLE_INS); + case SLJIT_NOP: + return push_inst(compiler, NOP, UNMOVABLE_INS); + case SLJIT_UMUL: + case SLJIT_SMUL: +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? UMUL : SMUL) | D(SLJIT_TEMPORARY_REG1) | S1(SLJIT_TEMPORARY_REG1) | S2(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG1))); + return push_inst(compiler, RDY | D(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2)); +#else +#error "Implementation required" +#endif + case SLJIT_UDIV: + case SLJIT_SDIV: +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + if (op == SLJIT_UDIV) + FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS)); + else { + FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_TEMPORARY_REG1) | IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS)); + } + FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_TEMPORARY_REG1), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? UDIV : SDIV) | D(SLJIT_TEMPORARY_REG1) | S1(SLJIT_TEMPORARY_REG1) | S2(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG1))); + FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_TEMPORARY_REG2) | S1(SLJIT_TEMPORARY_REG1) | S2(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2))); + FAIL_IF(push_inst(compiler, SUB | D(SLJIT_TEMPORARY_REG2) | S1(TMP_REG2) | S2(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2))); + return SLJIT_SUCCESS; +#else +#error "Implementation required" +#endif + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + int flags = GET_FLAGS(op) ? SET_FLAGS : 0; + + CHECK_ERROR(); + check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_MOV: + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_UI: + return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_SI: + return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_UB: + return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw); + + case SLJIT_MOV_SB: + return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw); + + case SLJIT_MOV_UH: + return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw); + + case SLJIT_MOV_SH: + return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw); + + case SLJIT_MOVU: + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_UI: + return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_SI: + return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_UB: + return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw); + + case SLJIT_MOVU_SB: + return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw); + + case SLJIT_MOVU_UH: + return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw); + + case SLJIT_MOVU_SH: + return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw); + + case SLJIT_NOT: + case SLJIT_CLZ: + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_NEG: + return emit_op(compiler, SLJIT_SUB, flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + int flags = GET_FLAGS(op) ? SET_FLAGS : 0; + + CHECK_ERROR(); + check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_ADD: + case SLJIT_ADDC: + case SLJIT_MUL: + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + case SLJIT_SUBC: + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + if (src2 & SLJIT_IMM) + src2w &= 0x1f; +#else + SLJIT_ASSERT_STOP(); +#endif + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg) +{ + check_sljit_get_register_index(reg); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, int size) +{ + CHECK_ERROR(); + check_sljit_emit_op_custom(compiler, instruction, size); + SLJIT_ASSERT(size == 4); + + return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void) +{ + return 1; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + int dst_fr; + + CHECK_ERROR(); + check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (GET_OPCODE(op) == SLJIT_FCMP) { + if (dst > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw)); + dst = TMP_FREG1; + } + else + dst <<= 1; + + if (src > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0)); + src = TMP_FREG2; + } + else + src <<= 1; + + return push_inst(compiler, FCMPD | S1A(dst) | S2A(src), FCC_IS_SET | MOVABLE_INS); + } + + dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : (dst << 1); + + if (src > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, dst_fr, src, srcw, dst, dstw)); + src = dst_fr; + } + else + src <<= 1; + + switch (op) { + case SLJIT_FMOV: + if (src != dst_fr && dst_fr != TMP_FREG1) { + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr) | S2A(src), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS)); + } + break; + case SLJIT_FNEG: + FAIL_IF(push_inst(compiler, FNEGS | DA(dst_fr) | S2A(src), MOVABLE_INS)); + if (dst_fr != src) + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS)); + break; + case SLJIT_FABS: + FAIL_IF(push_inst(compiler, FABSS | DA(dst_fr) | S2A(src), MOVABLE_INS)); + if (dst_fr != src) + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS)); + break; + } + + if (dst_fr == TMP_FREG1) { + if (op == SLJIT_FMOV) + dst_fr = src; + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, dst_fr, dst, dstw, 0, 0)); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + int dst_fr, flags = 0; + + CHECK_ERROR(); + check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG2 : (dst << 1); + + if (src1 > SLJIT_FLOAT_REG4) { + if (getput_arg_fast(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= SLOW_SRC1; + } + else + src1 <<= 1; + + if (src2 > SLJIT_FLOAT_REG4) { + if (getput_arg_fast(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= SLOW_SRC2; + } + else + src2 <<= 1; + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + + if (flags & SLOW_SRC1) + src1 = TMP_FREG1; + if (flags & SLOW_SRC2) + src2 = TMP_FREG2; + + switch (op) { + case SLJIT_FADD: + FAIL_IF(push_inst(compiler, FADDD | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); + break; + + case SLJIT_FSUB: + FAIL_IF(push_inst(compiler, FSUBD | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); + break; + + case SLJIT_FMUL: + FAIL_IF(push_inst(compiler, FMULD | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); + break; + + case SLJIT_FDIV: + FAIL_IF(push_inst(compiler, FDIVD | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); + break; + } + + if (dst_fr == TMP_FREG2) + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG2, dst, dstw, 0, 0)); + + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw) +{ + CHECK_ERROR(); + check_sljit_emit_fast_enter(compiler, dst, dstw); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) + return push_inst(compiler, OR | D(dst) | S1(0) | S2(LINK_REG), DR(dst)); + else if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_DATA, LINK_REG, dst, dstw); + + /* SLJIT_UNUSED is also possible, although highly unlikely. */ + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + CHECK_ERROR(); + check_sljit_emit_fast_return(compiler, src, srcw); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) + FAIL_IF(push_inst(compiler, OR | D(LINK_REG) | S1(0) | S2(src), DR(LINK_REG))); + else if (src & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, LINK_REG, src, srcw)); + else if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, LINK_REG, srcw)); + + FAIL_IF(push_inst(compiler, JMPL | D(0) | S1(LINK_REG) | IMM(8), UNMOVABLE_INS)); + return push_inst(compiler, NOP, UNMOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + check_sljit_emit_label(compiler); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + compiler->delay_slot = UNMOVABLE_INS; + return label; +} + +static sljit_ins get_cc(int type) +{ + switch (type) { + case SLJIT_C_EQUAL: + case SLJIT_C_MUL_NOT_OVERFLOW: + return DA(0x1); + + case SLJIT_C_NOT_EQUAL: + case SLJIT_C_MUL_OVERFLOW: + return DA(0x9); + + case SLJIT_C_LESS: + return DA(0x5); + + case SLJIT_C_GREATER_EQUAL: + return DA(0xd); + + case SLJIT_C_GREATER: + return DA(0xc); + + case SLJIT_C_LESS_EQUAL: + return DA(0x4); + + case SLJIT_C_SIG_LESS: + return DA(0x3); + + case SLJIT_C_SIG_GREATER_EQUAL: + return DA(0xb); + + case SLJIT_C_SIG_GREATER: + return DA(0xa); + + case SLJIT_C_SIG_LESS_EQUAL: + return DA(0x2); + + case SLJIT_C_OVERFLOW: + return DA(0x7); + + case SLJIT_C_NOT_OVERFLOW: + return DA(0xf); + + case SLJIT_C_FLOAT_EQUAL: + return DA(0x9); + + case SLJIT_C_FLOAT_NOT_EQUAL: /* Unordered. */ + return DA(0x1); + + case SLJIT_C_FLOAT_LESS: + return DA(0x4); + + case SLJIT_C_FLOAT_GREATER_EQUAL: /* Unordered. */ + return DA(0xc); + + case SLJIT_C_FLOAT_LESS_EQUAL: + return DA(0xd); + + case SLJIT_C_FLOAT_GREATER: /* Unordered. */ + return DA(0x5); + + case SLJIT_C_FLOAT_UNORDERED: + return DA(0x7); + + case SLJIT_C_FLOAT_ORDERED: + return DA(0xf); + + default: + SLJIT_ASSERT_STOP(); + return DA(0x8); + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type) +{ + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + check_sljit_emit_jump(compiler, type); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type < SLJIT_C_FLOAT_EQUAL) { + jump->flags |= IS_COND; + if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET)) + jump->flags |= IS_MOVABLE; +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(type ^ 1) | 5, UNMOVABLE_INS)); +#else +#error "Implementation required" +#endif + } + else if (type < SLJIT_JUMP) { + jump->flags |= IS_COND; + if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & FCC_IS_SET)) + jump->flags |= IS_MOVABLE; +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(type ^ 1) | 5, UNMOVABLE_INS)); +#else +#error "Implementation required" +#endif + } else { + if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + if (type >= SLJIT_FAST_CALL) + jump->flags |= IS_CALL; + } + + PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? LINK_REG : 0) | S1(TMP_REG2) | IMM(0), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw) +{ + struct sljit_jump *jump = NULL; + int src_r; + + CHECK_ERROR(); + check_sljit_emit_ijump(compiler, type, src, srcw); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) + src_r = src; + else if (src & SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR); + jump->u.target = srcw; + if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + if (type >= SLJIT_FAST_CALL) + jump->flags |= IS_CALL; + + FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + src_r = TMP_REG2; + } + else { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw)); + src_r = TMP_REG2; + } + + FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? LINK_REG : 0) | S1(src_r) | IMM(0), UNMOVABLE_INS)); + if (jump) + jump->addr = compiler->size; + return push_inst(compiler, NOP, UNMOVABLE_INS); +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type) +{ + int reg; + + CHECK_ERROR(); + check_sljit_emit_cond_value(compiler, op, dst, dstw, type); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2; + + if (type < SLJIT_C_FLOAT_EQUAL) + FAIL_IF(push_inst(compiler, BICC | get_cc(type) | 3, UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, FBFCC | get_cc(type) | 3, UNMOVABLE_INS)); + + FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS)); + + if (GET_OPCODE(op) == SLJIT_OR) + return emit_op(compiler, SLJIT_OR, (GET_FLAGS(op) ? SET_FLAGS : 0) | CUMULATIVE_OP | IMM_OP, dst, dstw, dst, dstw, TMP_REG2, 0); + + return (reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS; +#else +#error "Implementation required" +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value) +{ + int reg; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + check_sljit_emit_const(compiler, dst, dstw, init_value); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2; + + PTR_FAIL_IF(emit_const(compiler, reg, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + + return const_; +} diff --git a/sljit/sljitNativeX86_32.c b/sljit/sljitNativeX86_32.c index e955825..b65d538 100644 --- a/sljit/sljitNativeX86_32.c +++ b/sljit/sljitNativeX86_32.c @@ -194,7 +194,6 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, CHECK_ERROR(); check_sljit_emit_return(compiler, op, src, srcw); SLJIT_ASSERT(compiler->args >= 0); - ADJUST_LOCAL_OFFSET(src, srcw); compiler->flags_saved = 0; FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); diff --git a/sljit/sljitNativeX86_64.c b/sljit/sljitNativeX86_64.c index 480cebc..acb27ee 100644 --- a/sljit/sljitNativeX86_64.c +++ b/sljit/sljitNativeX86_64.c @@ -273,7 +273,6 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, CHECK_ERROR(); check_sljit_emit_return(compiler, op, src, srcw); - ADJUST_LOCAL_OFFSET(src, srcw); compiler->flags_saved = 0; FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); diff --git a/sljit/sljitNativeX86_common.c b/sljit/sljitNativeX86_common.c index 019e587..083465d 100644 --- a/sljit/sljitNativeX86_common.c +++ b/sljit/sljitNativeX86_common.c @@ -24,7 +24,7 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) { return "x86" SLJIT_CPUINFO; } @@ -67,7 +67,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1) static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = { - 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5 + 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5 }; #define CHECK_EXTRA_REGS(p, w, do) \ @@ -95,20 +95,20 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = { #ifndef _WIN64 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9 + 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9 }; /* low-map. reg_map & 0x7. */ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1 + 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1 }; #else /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9 + 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9 }; /* low-map. reg_map & 0x7. */ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1 + 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1 }; #endif @@ -203,10 +203,10 @@ static sljit_ub get_jump_code(int type) case SLJIT_C_MUL_NOT_OVERFLOW: return 0x81; - case SLJIT_C_FLOAT_NAN: + case SLJIT_C_FLOAT_UNORDERED: return 0x8a; - case SLJIT_C_FLOAT_NOT_NAN: + case SLJIT_C_FLOAT_ORDERED: return 0x8b; } return 0; @@ -2021,7 +2021,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compile static sljit_i sse2_data[3 + 4 + 4]; static sljit_i *sse2_buffer; -static void init_compiler() +static void init_compiler(void) { sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf); sse2_buffer[0] = 0; @@ -2477,11 +2477,11 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compil cond_set = 0x91; break; - case SLJIT_C_FLOAT_NAN: + case SLJIT_C_FLOAT_UNORDERED: cond_set = 0x9a; break; - case SLJIT_C_FLOAT_NOT_NAN: + case SLJIT_C_FLOAT_ORDERED: cond_set = 0x9b; break; } |