From 1376f89edfb2d1f692f96c6e871f27a18759be39 Mon Sep 17 00:00:00 2001 From: zherczeg Date: Thu, 27 May 2021 08:11:15 +0000 Subject: JIT compiler update git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1312 6239d852-aaf2-0410-a92c-79f79f948069 --- src/pcre2_jit_compile.c | 16 +- src/sljit/sljitConfigInternal.h | 12 + src/sljit/sljitLir.c | 25 +- src/sljit/sljitLir.h | 76 +- src/sljit/sljitNativeARM_32.c | 23 +- src/sljit/sljitNativeARM_64.c | 21 +- src/sljit/sljitNativeARM_T2_32.c | 21 +- src/sljit/sljitNativeMIPS_32.c | 2 +- src/sljit/sljitNativeMIPS_64.c | 2 +- src/sljit/sljitNativeMIPS_common.c | 16 +- src/sljit/sljitNativePPC_32.c | 26 +- src/sljit/sljitNativePPC_64.c | 32 +- src/sljit/sljitNativePPC_common.c | 93 +- src/sljit/sljitNativeS390X.c | 1737 ++++++++++++++++++++--------------- src/sljit/sljitNativeSPARC_32.c | 3 + src/sljit/sljitNativeSPARC_common.c | 18 +- src/sljit/sljitNativeX86_common.c | 2 - 17 files changed, 1226 insertions(+), 899 deletions(-) diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index f3a26ae..ae0d9a9 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -8135,7 +8135,7 @@ switch(type) } else OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); if (!common->endonly) compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks); @@ -8155,7 +8155,7 @@ switch(type) } else OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); check_partial(common, FALSE); jump[0] = JUMP(SLJIT_JUMP); JUMPHERE(jump[1]); @@ -8195,14 +8195,14 @@ switch(type) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); } else { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); } return cc; @@ -8221,7 +8221,7 @@ switch(type) jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); } - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); jump[0] = JUMP(SLJIT_JUMP); JUMPHERE(jump[1]); @@ -9575,11 +9575,11 @@ free_stack(common, callout_arg_size); /* Check return value. */ OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); -add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32)); +add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER)); if (common->abort_label == NULL) - add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */); + add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */); else - JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->abort_label); + JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label); return cc + callout_length; } diff --git a/src/sljit/sljitConfigInternal.h b/src/sljit/sljitConfigInternal.h index ff36e5b..7bb9990 100644 --- a/src/sljit/sljitConfigInternal.h +++ b/src/sljit/sljitConfigInternal.h @@ -761,6 +761,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \ (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS) +/********************************/ +/* CPU status flags management. */ +/********************************/ + +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ + || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + || (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ + || (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +#define SLJIT_HAS_STATUS_FLAGS_STATE 1 +#endif + /*************************************/ /* Debug and verbose related macros. */ /*************************************/ diff --git a/src/sljit/sljitLir.c b/src/sljit/sljitLir.c index d817c90..a24a99a 100644 --- a/src/sljit/sljitLir.c +++ b/src/sljit/sljitLir.c @@ -532,13 +532,21 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_la put_label->label = label; } +#define SLJIT_CURRENT_FLAGS_ALL \ + (SLJIT_CURRENT_FLAGS_I32_OP | SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE) + SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(current_flags); +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + compiler->status_flags_state = current_flags; +#endif + #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - if ((current_flags & ~(VARIABLE_FLAG_MASK | SLJIT_I32_OP | SLJIT_SET_Z)) == 0) { + compiler->last_flags = 0; + if ((current_flags & ~(VARIABLE_FLAG_MASK | SLJIT_SET_Z | SLJIT_CURRENT_FLAGS_ALL)) == 0) { compiler->last_flags = GET_FLAG_TYPE(current_flags) | (current_flags & (SLJIT_I32_OP | SLJIT_SET_Z)); } #endif @@ -968,7 +976,7 @@ static const char* fop2_names[] = { }; #define JUMP_POSTFIX(type) \ - ((type & 0xff) <= SLJIT_MUL_NOT_OVERFLOW ? ((type & SLJIT_I32_OP) ? "32" : "") \ + ((type & 0xff) <= SLJIT_NOT_OVERFLOW ? ((type & SLJIT_I32_OP) ? "32" : "") \ : ((type & 0xff) <= SLJIT_ORDERED_F64 ? ((type & SLJIT_F32_OP) ? ".f32" : ".f64") : "")) static char* jump_names[] = { @@ -978,7 +986,6 @@ static char* jump_names[] = { (char*)"sig_less", (char*)"sig_greater_equal", (char*)"sig_greater", (char*)"sig_less_equal", (char*)"overflow", (char*)"not_overflow", - (char*)"mul_overflow", (char*)"mul_not_overflow", (char*)"carry", (char*)"", (char*)"equal", (char*)"not_equal", (char*)"less", (char*)"greater_equal", @@ -1278,7 +1285,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler case SLJIT_MUL: CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) - || GET_FLAG_TYPE(op) == SLJIT_MUL_OVERFLOW); + || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW); break; case SLJIT_ADD: CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) @@ -1601,9 +1608,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); else CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) - || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) - || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW)); - CHECK_ARGUMENT((type & SLJIT_I32_OP) == (compiler->last_flags & SLJIT_I32_OP)); + || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); } #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -1818,8 +1823,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); else CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) - || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) - || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW)); + || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); FUNCTION_CHECK_DST(dst, dstw, 0); @@ -1858,8 +1862,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); else CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) - || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) - || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW)); + || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { diff --git a/src/sljit/sljitLir.h b/src/sljit/sljitLir.h index 93d2804..0eb62fc 100644 --- a/src/sljit/sljitLir.h +++ b/src/sljit/sljitLir.h @@ -412,6 +412,10 @@ struct sljit_compiler { /* Executable size for statistical purposes. */ sljit_uw executable_size; +#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE) + sljit_s32 status_flags_state; +#endif + #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) sljit_s32 args; sljit_s32 locals_offset; @@ -460,7 +464,7 @@ struct sljit_compiler { #if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) /* Need to allocate register save area to make calls. */ - sljit_s32 have_save_area; + sljit_s32 mode; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -996,7 +1000,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #define SLJIT_SUBC (SLJIT_OP2_BASE + 3) #define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_I32_OP) /* Note: integer mul - Flags: MUL_OVERFLOW */ + Flags: OVERFLOW */ #define SLJIT_MUL (SLJIT_OP2_BASE + 4) #define SLJIT_MUL32 (SLJIT_MUL | SLJIT_I32_OP) /* Flags: Z */ @@ -1141,89 +1145,69 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi /* Integer comparison types. */ #define SLJIT_EQUAL 0 -#define SLJIT_EQUAL32 (SLJIT_EQUAL | SLJIT_I32_OP) -#define SLJIT_ZERO 0 -#define SLJIT_ZERO32 (SLJIT_ZERO | SLJIT_I32_OP) +#define SLJIT_ZERO SLJIT_EQUAL #define SLJIT_NOT_EQUAL 1 -#define SLJIT_NOT_EQUAL32 (SLJIT_NOT_EQUAL | SLJIT_I32_OP) -#define SLJIT_NOT_ZERO 1 -#define SLJIT_NOT_ZERO32 (SLJIT_NOT_ZERO | SLJIT_I32_OP) +#define SLJIT_NOT_ZERO SLJIT_NOT_EQUAL #define SLJIT_LESS 2 -#define SLJIT_LESS32 (SLJIT_LESS | SLJIT_I32_OP) #define SLJIT_SET_LESS SLJIT_SET(SLJIT_LESS) #define SLJIT_GREATER_EQUAL 3 -#define SLJIT_GREATER_EQUAL32 (SLJIT_GREATER_EQUAL | SLJIT_I32_OP) #define SLJIT_SET_GREATER_EQUAL SLJIT_SET(SLJIT_GREATER_EQUAL) #define SLJIT_GREATER 4 -#define SLJIT_GREATER32 (SLJIT_GREATER | SLJIT_I32_OP) #define SLJIT_SET_GREATER SLJIT_SET(SLJIT_GREATER) #define SLJIT_LESS_EQUAL 5 -#define SLJIT_LESS_EQUAL32 (SLJIT_LESS_EQUAL | SLJIT_I32_OP) #define SLJIT_SET_LESS_EQUAL SLJIT_SET(SLJIT_LESS_EQUAL) #define SLJIT_SIG_LESS 6 -#define SLJIT_SIG_LESS32 (SLJIT_SIG_LESS | SLJIT_I32_OP) #define SLJIT_SET_SIG_LESS SLJIT_SET(SLJIT_SIG_LESS) #define SLJIT_SIG_GREATER_EQUAL 7 -#define SLJIT_SIG_GREATER_EQUAL32 (SLJIT_SIG_GREATER_EQUAL | SLJIT_I32_OP) #define SLJIT_SET_SIG_GREATER_EQUAL SLJIT_SET(SLJIT_SIG_GREATER_EQUAL) #define SLJIT_SIG_GREATER 8 -#define SLJIT_SIG_GREATER32 (SLJIT_SIG_GREATER | SLJIT_I32_OP) #define SLJIT_SET_SIG_GREATER SLJIT_SET(SLJIT_SIG_GREATER) #define SLJIT_SIG_LESS_EQUAL 9 -#define SLJIT_SIG_LESS_EQUAL32 (SLJIT_SIG_LESS_EQUAL | SLJIT_I32_OP) #define SLJIT_SET_SIG_LESS_EQUAL SLJIT_SET(SLJIT_SIG_LESS_EQUAL) #define SLJIT_OVERFLOW 10 -#define SLJIT_OVERFLOW32 (SLJIT_OVERFLOW | SLJIT_I32_OP) #define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW) #define SLJIT_NOT_OVERFLOW 11 -#define SLJIT_NOT_OVERFLOW32 (SLJIT_NOT_OVERFLOW | SLJIT_I32_OP) - -#define SLJIT_MUL_OVERFLOW 12 -#define SLJIT_MUL_OVERFLOW32 (SLJIT_MUL_OVERFLOW | SLJIT_I32_OP) -#define SLJIT_SET_MUL_OVERFLOW SLJIT_SET(SLJIT_MUL_OVERFLOW) -#define SLJIT_MUL_NOT_OVERFLOW 13 -#define SLJIT_MUL_NOT_OVERFLOW32 (SLJIT_MUL_NOT_OVERFLOW | SLJIT_I32_OP) /* There is no SLJIT_CARRY or SLJIT_NOT_CARRY. */ -#define SLJIT_SET_CARRY SLJIT_SET(14) +#define SLJIT_SET_CARRY SLJIT_SET(12) /* Floating point comparison types. */ -#define SLJIT_EQUAL_F64 16 +#define SLJIT_EQUAL_F64 14 #define SLJIT_EQUAL_F32 (SLJIT_EQUAL_F64 | SLJIT_F32_OP) #define SLJIT_SET_EQUAL_F SLJIT_SET(SLJIT_EQUAL_F64) -#define SLJIT_NOT_EQUAL_F64 17 +#define SLJIT_NOT_EQUAL_F64 15 #define SLJIT_NOT_EQUAL_F32 (SLJIT_NOT_EQUAL_F64 | SLJIT_F32_OP) #define SLJIT_SET_NOT_EQUAL_F SLJIT_SET(SLJIT_NOT_EQUAL_F64) -#define SLJIT_LESS_F64 18 +#define SLJIT_LESS_F64 16 #define SLJIT_LESS_F32 (SLJIT_LESS_F64 | SLJIT_F32_OP) #define SLJIT_SET_LESS_F SLJIT_SET(SLJIT_LESS_F64) -#define SLJIT_GREATER_EQUAL_F64 19 +#define SLJIT_GREATER_EQUAL_F64 17 #define SLJIT_GREATER_EQUAL_F32 (SLJIT_GREATER_EQUAL_F64 | SLJIT_F32_OP) #define SLJIT_SET_GREATER_EQUAL_F SLJIT_SET(SLJIT_GREATER_EQUAL_F64) -#define SLJIT_GREATER_F64 20 +#define SLJIT_GREATER_F64 18 #define SLJIT_GREATER_F32 (SLJIT_GREATER_F64 | SLJIT_F32_OP) #define SLJIT_SET_GREATER_F SLJIT_SET(SLJIT_GREATER_F64) -#define SLJIT_LESS_EQUAL_F64 21 +#define SLJIT_LESS_EQUAL_F64 19 #define SLJIT_LESS_EQUAL_F32 (SLJIT_LESS_EQUAL_F64 | SLJIT_F32_OP) #define SLJIT_SET_LESS_EQUAL_F SLJIT_SET(SLJIT_LESS_EQUAL_F64) -#define SLJIT_UNORDERED_F64 22 +#define SLJIT_UNORDERED_F64 20 #define SLJIT_UNORDERED_F32 (SLJIT_UNORDERED_F64 | SLJIT_F32_OP) #define SLJIT_SET_UNORDERED_F SLJIT_SET(SLJIT_UNORDERED_F64) -#define SLJIT_ORDERED_F64 23 +#define SLJIT_ORDERED_F64 21 #define SLJIT_ORDERED_F32 (SLJIT_ORDERED_F64 | SLJIT_F32_OP) #define SLJIT_SET_ORDERED_F SLJIT_SET(SLJIT_ORDERED_F64) /* Unconditional jump types. */ -#define SLJIT_JUMP 24 +#define SLJIT_JUMP 22 /* Fast calling method. See sljit_emit_fast_enter / SLJIT_FAST_RETURN. */ -#define SLJIT_FAST_CALL 25 +#define SLJIT_FAST_CALL 23 /* Called function must be declared with the SLJIT_FUNC attribute. */ -#define SLJIT_CALL 26 +#define SLJIT_CALL 24 /* Called function must be declared with cdecl attribute. This is the default attribute for C functions. */ -#define SLJIT_CALL_CDECL 27 +#define SLJIT_CALL_CDECL 25 /* The target can be changed during runtime (see: sljit_set_jump_addr). */ #define SLJIT_REWRITABLE_JUMP 0x1000 @@ -1534,8 +1518,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, void *instruction, sljit_s32 size); -/* Define the currently available CPU status flags. It is usually used after an - sljit_emit_op_custom call to define which flags are set. */ +/* Flags were set by a 32 bit operation. */ +#define SLJIT_CURRENT_FLAGS_I32_OP SLJIT_I32_OP + +/* Flags were set by an ADD, ADDC, SUB, SUBC, or NEG operation. */ +#define SLJIT_CURRENT_FLAGS_ADD_SUB 0x01 + +/* Flags were set by a SUB with unused destination. + Must be combined with SLJIT_CURRENT_FLAGS_ADD_SUB. */ +#define SLJIT_CURRENT_FLAGS_COMPARE 0x02 + +/* Define the currently available CPU status flags. It is usually used after + an sljit_emit_label or sljit_emit_op_custom operations to define which CPU + status flags are available. + + The current_flags must be a valid combination of SLJIT_SET_* and + SLJIT_CURRENT_FLAGS_* constants. */ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags); diff --git a/src/sljit/sljitNativeARM_32.c b/src/sljit/sljitNativeARM_32.c index ae8479f..74cf55f 100644 --- a/src/sljit/sljitNativeARM_32.c +++ b/src/sljit/sljitNativeARM_32.c @@ -1197,6 +1197,8 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_ADD: SLJIT_ASSERT(!(flags & INV_IMM)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); @@ -1207,6 +1209,8 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_SUB: SLJIT_ASSERT(!(flags & INV_IMM)); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS) @@ -1220,6 +1224,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_MUL: SLJIT_ASSERT(!(flags & INV_IMM)); SLJIT_ASSERT(!(src2 & SRC2_IMM)); + compiler->status_flags_state = 0; if (!HAS_FLAGS(op)) return push_inst(compiler, MUL | (reg_map[dst] << 16) | (reg_map[src2] << 8) | reg_map[src1]); @@ -2153,16 +2158,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * /* Conditional instructions */ /* --------------------------------------------------------------------- */ -static sljit_uw get_cc(sljit_s32 type) +static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_MUL_NOT_OVERFLOW: case SLJIT_EQUAL_F64: return 0x00000000; case SLJIT_NOT_EQUAL: - case SLJIT_MUL_OVERFLOW: case SLJIT_NOT_EQUAL_F64: return 0x10000000; @@ -2195,10 +2198,16 @@ static sljit_uw get_cc(sljit_s32 type) return 0xd0000000; case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x10000000; + case SLJIT_UNORDERED_F64: return 0x60000000; case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x00000000; + case SLJIT_ORDERED_F64: return 0x70000000; @@ -2242,7 +2251,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (type >= SLJIT_FAST_CALL) PTR_FAIL_IF(prepare_blx(compiler)); PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, - type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0)); + type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0)); if (jump->flags & SLJIT_REWRITABLE_JUMP) { jump->addr = compiler->size; @@ -2260,7 +2269,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (type >= SLJIT_FAST_CALL) jump->flags |= IS_BL; PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); - PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type))); + PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type))); jump->addr = compiler->size; #endif return jump; @@ -2589,7 +2598,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co ADJUST_LOCAL_OFFSET(dst, dstw); op = GET_OPCODE(op); - cc = get_cc(type & 0xff); + cc = get_cc(compiler, type & 0xff); dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; if (op < SLJIT_ADD) { @@ -2629,7 +2638,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil dst_reg &= ~SLJIT_I32_OP; - cc = get_cc(type & 0xff); + cc = get_cc(compiler, type & 0xff); if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { tmp = get_imm(srcw); diff --git a/src/sljit/sljitNativeARM_64.c b/src/sljit/sljitNativeARM_64.c index 52267e7..3f0f5fc 100644 --- a/src/sljit/sljitNativeARM_64.c +++ b/src/sljit/sljitNativeARM_64.c @@ -644,6 +644,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s imm = -imm; /* Fall through. */ case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; if (imm == 0) { CHECK_FLAGS(1 << 29); return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg)); @@ -781,6 +782,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s break; /* Set flags. */ case SLJIT_NEG: SLJIT_ASSERT(arg1 == TMP_REG1); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; if (flags & SET_FLAGS) inv_bits |= 1 << 29; return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); @@ -789,17 +791,20 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)); case SLJIT_ADD: CHECK_FLAGS(1 << 29); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_ADDC: CHECK_FLAGS(1 << 29); return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_SUB: CHECK_FLAGS(1 << 29); + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_SUBC: CHECK_FLAGS(1 << 29); return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_MUL: + compiler->status_flags_state = 0; if (!(flags & SET_FLAGS)) return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)); if (flags & INT_OP) { @@ -1600,16 +1605,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * /* Conditional instructions */ /* --------------------------------------------------------------------- */ -static sljit_uw get_cc(sljit_s32 type) +static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_MUL_NOT_OVERFLOW: case SLJIT_EQUAL_F64: return 0x1; case SLJIT_NOT_EQUAL: - case SLJIT_MUL_OVERFLOW: case SLJIT_NOT_EQUAL_F64: return 0x0; @@ -1642,10 +1645,16 @@ static sljit_uw get_cc(sljit_s32 type) return 0xc; case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x0; + case SLJIT_UNORDERED_F64: return 0x7; case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x1; + case SLJIT_ORDERED_F64: return 0x6; @@ -1685,7 +1694,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (type < SLJIT_JUMP) { jump->flags |= IS_COND; - PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(type))); + PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(compiler, type))); } else if (type >= SLJIT_FAST_CALL) jump->flags |= IS_BL; @@ -1799,7 +1808,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); - cc = get_cc(type & 0xff); + cc = get_cc(compiler, type & 0xff); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; if (GET_OPCODE(op) < SLJIT_ADD) { @@ -1854,7 +1863,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil srcw = 0; } - cc = get_cc(type & 0xff); + cc = get_cc(compiler, type & 0xff); dst_reg &= ~SLJIT_I32_OP; return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src)); diff --git a/src/sljit/sljitNativeARM_T2_32.c b/src/sljit/sljitNativeARM_T2_32.c index 4624882..e35dbe9 100644 --- a/src/sljit/sljitNativeARM_T2_32.c +++ b/src/sljit/sljitNativeARM_T2_32.c @@ -610,6 +610,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s Although some clever things could be done here, "NOT IMM" does not worth the efforts. */ break; case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; nimm = -(sljit_sw)imm; if (IS_2_LO_REGS(reg, dst)) { if (imm <= 0x7) @@ -643,6 +644,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s break; case SLJIT_SUB: /* SUB operation can be replaced by ADD because of the negative carry flag. */ + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; if (flags & ARG1_IMM) { if (imm == 0 && IS_2_LO_REGS(reg, dst)) return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg)); @@ -801,6 +803,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2))); return SLJIT_SUCCESS; case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; if (IS_3_LO_REGS(dst, arg1, arg2)) return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2)); if (dst == arg1 && !(flags & SET_FLAGS)) @@ -811,6 +814,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; if (flags & UNUSED_RETURN) { if (IS_2_LO_REGS(arg1, arg2)) return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2)); @@ -824,6 +828,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_MUL: + compiler->status_flags_state = 0; if (!(flags & SET_FLAGS)) return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2)); SLJIT_ASSERT(dst != TMP_REG2); @@ -1760,16 +1765,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * /* Conditional instructions */ /* --------------------------------------------------------------------- */ -static sljit_uw get_cc(sljit_s32 type) +static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_MUL_NOT_OVERFLOW: case SLJIT_EQUAL_F64: return 0x0; case SLJIT_NOT_EQUAL: - case SLJIT_MUL_OVERFLOW: case SLJIT_NOT_EQUAL_F64: return 0x1; @@ -1802,10 +1805,16 @@ static sljit_uw get_cc(sljit_s32 type) return 0xd; case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x1; + case SLJIT_UNORDERED_F64: return 0x6; case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return 0x0; + case SLJIT_ORDERED_F64: return 0x7; @@ -1847,7 +1856,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); if (type < SLJIT_JUMP) { jump->flags |= IS_COND; - cc = get_cc(type); + cc = get_cc(compiler, type); jump->flags |= cc << 8; PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); } @@ -2177,7 +2186,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co ADJUST_LOCAL_OFFSET(dst, dstw); op = GET_OPCODE(op); - cc = get_cc(type & 0xff); + cc = get_cc(compiler, type & 0xff); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; if (op < SLJIT_ADD) { @@ -2229,7 +2238,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil dst_reg &= ~SLJIT_I32_OP; - cc = get_cc(type & 0xff); + cc = get_cc(compiler, type & 0xff); if (!(src & SLJIT_IMM)) { FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); diff --git a/src/sljit/sljitNativeMIPS_32.c b/src/sljit/sljitNativeMIPS_32.c index f887ee1..a90345f 100644 --- a/src/sljit/sljitNativeMIPS_32.c +++ b/src/sljit/sljitNativeMIPS_32.c @@ -367,7 +367,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_MUL: SLJIT_ASSERT(!(flags & SRC2_IMM)); - if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) { + if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) { #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); #else /* SLJIT_MIPS_REV < 1 */ diff --git a/src/sljit/sljitNativeMIPS_64.c b/src/sljit/sljitNativeMIPS_64.c index 5ab9b7d..1f22e49 100644 --- a/src/sljit/sljitNativeMIPS_64.c +++ b/src/sljit/sljitNativeMIPS_64.c @@ -458,7 +458,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_MUL: SLJIT_ASSERT(!(flags & SRC2_IMM)); - if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) { + if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) { #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)); #elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) diff --git a/src/sljit/sljitNativeMIPS_common.c b/src/sljit/sljitNativeMIPS_common.c index ecf4dac..fd74769 100644 --- a/src/sljit/sljitNativeMIPS_common.c +++ b/src/sljit/sljitNativeMIPS_common.c @@ -1377,6 +1377,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_NEG: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); case SLJIT_CLZ: @@ -1424,13 +1425,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADD: case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: + compiler->status_flags_state = 0; return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_AND: @@ -1860,7 +1864,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_SIG_LESS: case SLJIT_SIG_GREATER: case SLJIT_OVERFLOW: - case SLJIT_MUL_OVERFLOW: BR_Z(OTHER_FLAG); break; case SLJIT_GREATER_EQUAL: @@ -1868,7 +1871,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_SIG_GREATER_EQUAL: case SLJIT_SIG_LESS_EQUAL: case SLJIT_NOT_OVERFLOW: - case SLJIT_MUL_NOT_OVERFLOW: BR_NZ(OTHER_FLAG); break; case SLJIT_NOT_EQUAL_F64: @@ -2127,8 +2129,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); src_ar = dst_ar; break; - case SLJIT_MUL_OVERFLOW: - case SLJIT_MUL_NOT_OVERFLOW: + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB) { + src_ar = OTHER_FLAG; + break; + } FAIL_IF(push_inst(compiler, SLTIU | SA(OTHER_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); src_ar = dst_ar; type ^= 0x1; /* Flip type bit for the XORI below. */ @@ -2219,7 +2225,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil case SLJIT_SIG_LESS: case SLJIT_SIG_GREATER: case SLJIT_OVERFLOW: - case SLJIT_MUL_OVERFLOW: ins = MOVN | TA(OTHER_FLAG); break; case SLJIT_GREATER_EQUAL: @@ -2227,7 +2232,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil case SLJIT_SIG_GREATER_EQUAL: case SLJIT_SIG_LESS_EQUAL: case SLJIT_NOT_OVERFLOW: - case SLJIT_MUL_NOT_OVERFLOW: ins = MOVZ | TA(OTHER_FLAG); break; case SLJIT_EQUAL_F64: diff --git a/src/sljit/sljitNativePPC_32.c b/src/sljit/sljitNativePPC_32.c index 7d9ec53..6ddb550 100644 --- a/src/sljit/sljitNativePPC_32.c +++ b/src/sljit/sljitNativePPC_32.c @@ -119,9 +119,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); } + SLJIT_ASSERT(!(flags & ALT_FORM4)); if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); - if (flags & ALT_FORM4) + if (flags & ALT_FORM5) return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2)); @@ -143,24 +144,29 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl } if (flags & ALT_FORM2) { + if (flags & ALT_FORM3) { + FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2))); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM3) { /* Setting XER SO is not enough, CR SO is also needed. */ return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); } - if (flags & ALT_FORM3) { + if (flags & ALT_FORM4) { /* Flags does not set: BIN_IMM_EXTS unnecessary. */ SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (flags & ALT_FORM4) { - if (flags & ALT_FORM5) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm); - } - return push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2)); - } - if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); if (flags & ALT_FORM5) diff --git a/src/sljit/sljitNativePPC_64.c b/src/sljit/sljitNativePPC_64.c index 92147d2..cbdf2dd 100644 --- a/src/sljit/sljitNativePPC_64.c +++ b/src/sljit/sljitNativePPC_64.c @@ -252,10 +252,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl BIN_IMM_EXTS(); return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); } + if (flags & ALT_FORM4) { + if (flags & ALT_FORM5) + FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm)); + else + FAIL_IF(push_inst(compiler, ADD | D(dst) | A(src1) | B(src2))); + return push_inst(compiler, CMPI | A(dst) | 0); + } if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); BIN_EXTS(); - if (flags & ALT_FORM4) + if (flags & ALT_FORM5) return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2)); @@ -278,6 +285,19 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl } if (flags & ALT_FORM2) { + if (flags & ALT_FORM3) { + FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); + if (!(flags & ALT_FORM4)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM3) { if (flags & ALT_SIGN_EXT) { FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); src1 = TMP_REG1; @@ -291,20 +311,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; } - if (flags & ALT_FORM3) { + if (flags & ALT_FORM4) { /* Flags does not set: BIN_IMM_EXTS unnecessary. */ SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (flags & ALT_FORM4) { - if (flags & ALT_FORM5) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm); - } - return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)); - } - if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); BIN_EXTS(); diff --git a/src/sljit/sljitNativePPC_common.c b/src/sljit/sljitNativePPC_common.c index d84562c..2174dbb 100644 --- a/src/sljit/sljitNativePPC_common.c +++ b/src/sljit/sljitNativePPC_common.c @@ -1324,6 +1324,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ((src) & SLJIT_IMM) #endif +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_ADD_FORM1(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ + || (op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_I32_OP | SLJIT_SET_Z | SLJIT_SET_CARRY)) +#define TEST_SUB_FORM2(op) \ + ((GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) \ + || (op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_I32_OP | SLJIT_SET_Z)) +#define TEST_SUB_FORM3(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW \ + || (op & (SLJIT_I32_OP | SLJIT_SET_Z)) == (SLJIT_I32_OP | SLJIT_SET_Z)) +#else +#define TEST_ADD_FORM1(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) +#define TEST_SUB_FORM2(op) \ + (GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) +#define TEST_SUB_FORM3(op) \ + (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) +#endif + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, @@ -1362,7 +1381,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADD: - if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) + if (TEST_ADD_FORM1(op)) return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { @@ -1392,6 +1411,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); } } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((op & (SLJIT_I32_OP | SLJIT_SET_Z)) == (SLJIT_I32_OP | SLJIT_SET_Z)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src2, src2w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + } +#endif if (HAS_FLAGS(op)) { if (TEST_SL_IMM(src2, src2w)) { compiler->imm = src2w & 0xffff; @@ -1402,7 +1435,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); } } - return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM4 : 0), dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w); @@ -1424,18 +1457,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); } - if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) + if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, src2, src2w); + } - if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { - if (TEST_SL_IMM(src2, -src2w)) { - compiler->imm = (-src2w) & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); - } - if (TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + if (TEST_SUB_FORM2(op)) { + if ((src2 & SLJIT_IMM) && src2w >= -SIMM_MAX && src2w <= SIMM_MAX) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + } + + if (TEST_SUB_FORM3(op)) + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); + + if (TEST_SL_IMM(src2, -src2w)) { + compiler->imm = (-src2w) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | (!HAS_FLAGS(op) ? ALT_FORM2 : ALT_FORM3), dst, dstw, src1, src1w, TMP_REG2, 0); + } + + if (TEST_SL_IMM(src1, src1w) && !(op & SLJIT_SET_Z)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); + } + + if (!HAS_FLAGS(op)) { if (TEST_SH_IMM(src2, -src2w)) { compiler->imm = ((-src2w) >> 16) & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1447,18 +1498,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile } } - if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) != GET_FLAG_TYPE(SLJIT_SET_CARRY)) { - if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0); - } - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); - } - - if (TEST_SL_IMM(src2, -src2w)) { - compiler->imm = (-src2w) & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); - } /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */ return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); @@ -1536,6 +1575,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return SLJIT_SUCCESS; } +#undef TEST_ADD_FORM1 +#undef TEST_SUB_FORM2 +#undef TEST_SUB_FORM3 + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -1941,11 +1984,9 @@ static sljit_ins get_bo_bi_flags(sljit_s32 type) return (4 << 21) | ((4 + 1) << 16); case SLJIT_OVERFLOW: - case SLJIT_MUL_OVERFLOW: return (12 << 21) | (3 << 16); case SLJIT_NOT_OVERFLOW: - case SLJIT_MUL_NOT_OVERFLOW: return (4 << 21) | (3 << 16); case SLJIT_EQUAL_F64: @@ -2143,12 +2184,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co break; case SLJIT_OVERFLOW: - case SLJIT_MUL_OVERFLOW: cr_bit = 3; break; case SLJIT_NOT_OVERFLOW: - case SLJIT_MUL_NOT_OVERFLOW: cr_bit = 3; invert = 1; break; diff --git a/src/sljit/sljitNativeS390X.c b/src/sljit/sljitNativeS390X.c index 3d007fe..716491e 100644 --- a/src/sljit/sljitNativeS390X.c +++ b/src/sljit/sljitNativeS390X.c @@ -45,7 +45,7 @@ typedef sljit_uw sljit_ins; static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { - 14, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1 + 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1 }; /* there are also a[2-15] available, but they are slower to access and @@ -120,8 +120,7 @@ struct sljit_s390x_const { /* Convert SLJIT register to hardware register. */ static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r) { - SLJIT_ASSERT(r != SLJIT_UNUSED); - SLJIT_ASSERT(r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0]))); + SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0]))); return reg_map[r]; } @@ -172,51 +171,93 @@ static sljit_s32 encode_inst(void **ptr, sljit_ins ins) return SLJIT_SUCCESS; } +#define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \ + (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE)) == SLJIT_CURRENT_FLAGS_ADD_SUB) + /* Map the given type to a 4-bit condition code mask. */ -static SLJIT_INLINE sljit_u8 get_cc(sljit_s32 type) { - const sljit_u8 eq = 1 << 3; /* equal {,to zero} */ - const sljit_u8 lt = 1 << 2; /* less than {,zero} */ - const sljit_u8 gt = 1 << 1; /* greater than {,zero} */ - const sljit_u8 ov = 1 << 0; /* {overflow,NaN} */ +static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) { + const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */ + const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */ + const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */ + const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */ switch (type) { case SLJIT_EQUAL: + if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) { + sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state); + if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL) + return cc0; + if (type == SLJIT_OVERFLOW) + return (cc0 | cc3); + return (cc0 | cc2); + } + case SLJIT_EQUAL_F64: - return eq; + return cc0; case SLJIT_NOT_EQUAL: + if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) { + sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state); + if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL) + return (cc1 | cc2 | cc3); + if (type == SLJIT_OVERFLOW) + return (cc1 | cc2); + return (cc1 | cc3); + } + case SLJIT_NOT_EQUAL_F64: - return ~eq; + return (cc1 | cc2 | cc3); case SLJIT_LESS: + return cc1; + + case SLJIT_GREATER_EQUAL: + return (cc0 | cc2 | cc3); + + case SLJIT_GREATER: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE) + return cc2; + return cc3; + + case SLJIT_LESS_EQUAL: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE) + return (cc0 | cc1); + return (cc0 | cc1 | cc2); + case SLJIT_SIG_LESS: case SLJIT_LESS_F64: - return lt; + return cc1; - case SLJIT_LESS_EQUAL: case SLJIT_SIG_LESS_EQUAL: case SLJIT_LESS_EQUAL_F64: - return (lt | eq); + return (cc0 | cc1); - case SLJIT_GREATER: case SLJIT_SIG_GREATER: - case SLJIT_GREATER_F64: - return gt; + /* Overflow is considered greater, see SLJIT_SUB. */ + return cc2 | cc3; - case SLJIT_GREATER_EQUAL: case SLJIT_SIG_GREATER_EQUAL: - case SLJIT_GREATER_EQUAL_F64: - return (gt | eq); + return (cc0 | cc2 | cc3); case SLJIT_OVERFLOW: - case SLJIT_MUL_OVERFLOW: + if (compiler->status_flags_state & SLJIT_SET_Z) + return (cc2 | cc3); + case SLJIT_UNORDERED_F64: - return ov; + return cc3; case SLJIT_NOT_OVERFLOW: - case SLJIT_MUL_NOT_OVERFLOW: + if (compiler->status_flags_state & SLJIT_SET_Z) + return (cc0 | cc1); + case SLJIT_ORDERED_F64: - return ~ov; + return (cc0 | cc1 | cc2); + + case SLJIT_GREATER_F64: + return cc2; + + case SLJIT_GREATER_EQUAL_F64: + return (cc0 | cc2); } SLJIT_UNREACHABLE(); @@ -346,19 +387,20 @@ HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY) #define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL) #define CHECK_SIGNED(v, bitlen) \ - ((v) == (((v) << (sizeof(v) * 8 - bitlen)) >> (sizeof(v) * 8 - bitlen))) + ((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1))) +#define is_s8(d) CHECK_SIGNED((d), 8) #define is_s16(d) CHECK_SIGNED((d), 16) #define is_s20(d) CHECK_SIGNED((d), 20) -#define is_s32(d) CHECK_SIGNED((d), 32) +#define is_s32(d) ((d) == (sljit_s32)(d)) -static SLJIT_INLINE sljit_uw disp_s20(sljit_s32 d) +static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d) { + SLJIT_ASSERT(is_s20(d)); + sljit_uw dh = (d >> 12) & 0xff; sljit_uw dl = (d << 8) & 0xfff00; - - SLJIT_ASSERT(is_s20(d)); - return dh | dl; + return (dh | dl) << 8; } /* TODO(carenas): variadic macro is not strictly needed */ @@ -372,12 +414,6 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \ return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \ } -/* ADD */ -SLJIT_S390X_RR(ar, 0x1a00) - -/* ADD LOGICAL */ -SLJIT_S390X_RR(alr, 0x1e00) - /* AND */ SLJIT_S390X_RR(nr, 0x1400) @@ -387,12 +423,6 @@ SLJIT_S390X_RR(basr, 0x0d00) /* BRANCH ON CONDITION */ SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */ -/* COMPARE */ -SLJIT_S390X_RR(cr, 0x1900) - -/* COMPARE LOGICAL */ -SLJIT_S390X_RR(clr, 0x1500) - /* DIVIDE */ SLJIT_S390X_RR(dr, 0x1d00) @@ -408,12 +438,6 @@ SLJIT_S390X_RR(lcr, 0x1300) /* OR */ SLJIT_S390X_RR(or, 0x1600) -/* SUBTRACT */ -SLJIT_S390X_RR(sr, 0x1b00) - -/* SUBTRACT LOGICAL */ -SLJIT_S390X_RR(slr, 0x1f00) - #undef SLJIT_S390X_RR /* RRE form instructions */ @@ -423,25 +447,9 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \ return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \ } -/* ADD */ -SLJIT_S390X_RRE(agr, 0xb9080000) - -/* ADD LOGICAL */ -SLJIT_S390X_RRE(algr, 0xb90a0000) - -/* ADD LOGICAL WITH CARRY */ -SLJIT_S390X_RRE(alcr, 0xb9980000) -SLJIT_S390X_RRE(alcgr, 0xb9880000) - /* AND */ SLJIT_S390X_RRE(ngr, 0xb9800000) -/* COMPARE */ -SLJIT_S390X_RRE(cgr, 0xb9200000) - -/* COMPARE LOGICAL */ -SLJIT_S390X_RRE(clgr, 0xb9210000) - /* DIVIDE LOGICAL */ SLJIT_S390X_RRE(dlr, 0xb9970000) SLJIT_S390X_RRE(dlgr, 0xb9870000) @@ -482,8 +490,6 @@ SLJIT_S390X_RRE(llghr, 0xb9850000) SLJIT_S390X_RRE(mlgr, 0xb9860000) /* MULTIPLY SINGLE */ -SLJIT_S390X_RRE(msr, 0xb2520000) -SLJIT_S390X_RRE(msgr, 0xb90c0000) SLJIT_S390X_RRE(msgfr, 0xb91c0000) /* OR */ @@ -492,13 +498,6 @@ SLJIT_S390X_RRE(ogr, 0xb9810000) /* SUBTRACT */ SLJIT_S390X_RRE(sgr, 0xb9090000) -/* SUBTRACT LOGICAL */ -SLJIT_S390X_RRE(slgr, 0xb90b0000) - -/* SUBTRACT LOGICAL WITH BORROW */ -SLJIT_S390X_RRE(slbr, 0xb9990000) -SLJIT_S390X_RRE(slbgr, 0xb9890000) - #undef SLJIT_S390X_RRE /* RI-a form instructions */ @@ -509,13 +508,8 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ } /* ADD HALFWORD IMMEDIATE */ -SLJIT_S390X_RIA(ahi, 0xa70a0000, sljit_s16) SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16) -/* COMPARE HALFWORD IMMEDIATE */ -SLJIT_S390X_RIA(chi, 0xa70e0000, sljit_s16) -SLJIT_S390X_RIA(cghi, 0xa70f0000, sljit_s16) - /* LOAD HALFWORD IMMEDIATE */ SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16) SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16) @@ -533,9 +527,6 @@ SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16) /* OR IMMEDIATE */ SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16) -/* TEST UNDER MASK */ -SLJIT_S390X_RIA(tmlh, 0xa7000000, sljit_u16) - #undef SLJIT_S390X_RIA /* RIL-a form instructions (requires extended immediate facility) */ @@ -547,30 +538,13 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \ } /* ADD IMMEDIATE */ -SLJIT_S390X_RILA(afi, 0xc20900000000, sljit_s32) SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32) /* ADD IMMEDIATE HIGH */ SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */ -/* ADD LOGICAL IMMEDIATE */ -SLJIT_S390X_RILA(alfi, 0xc20b00000000, sljit_u32) -SLJIT_S390X_RILA(algfi, 0xc20a00000000, sljit_u32) - /* AND IMMEDIATE */ SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32) -SLJIT_S390X_RILA(nilf, 0xc00b00000000, sljit_u32) - -/* COMPARE IMMEDIATE */ -SLJIT_S390X_RILA(cfi, 0xc20d00000000, sljit_s32) -SLJIT_S390X_RILA(cgfi, 0xc20c00000000, sljit_s32) - -/* COMPARE IMMEDIATE HIGH */ -SLJIT_S390X_RILA(cih, 0xcc0d00000000, sljit_s32) /* TODO(mundaym): high-word facility? */ - -/* COMPARE LOGICAL IMMEDIATE */ -SLJIT_S390X_RILA(clfi, 0xc20f00000000, sljit_u32) -SLJIT_S390X_RILA(clgfi, 0xc20e00000000, sljit_u32) /* EXCLUSIVE OR IMMEDIATE */ SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32) @@ -586,8 +560,8 @@ SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32) SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32) SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32) -/* OR IMMEDIATE */ -SLJIT_S390X_RILA(oilf, 0xc00d00000000, sljit_u32) +/* SUBTRACT LOGICAL IMMEDIATE */ +SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32) #undef SLJIT_S390X_RILA @@ -606,18 +580,6 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_u16 d, sljit_gpr x, sljit_gpr b return (pattern) | ri | xi | bi | di; \ } -/* ADD */ -SLJIT_S390X_RXA(a, 0x5a000000) - -/* ADD LOGICAL */ -SLJIT_S390X_RXA(al, 0x5e000000) - -/* AND */ -SLJIT_S390X_RXA(n, 0x54000000) - -/* EXCLUSIVE OR */ -SLJIT_S390X_RXA(x, 0x57000000) - /* LOAD */ SLJIT_S390X_RXA(l, 0x58000000) @@ -630,9 +592,6 @@ SLJIT_S390X_RXA(lh, 0x48000000) /* MULTIPLY SINGLE */ SLJIT_S390X_RXA(ms, 0x71000000) -/* OR */ -SLJIT_S390X_RXA(o, 0x56000000) - /* STORE */ SLJIT_S390X_RXA(st, 0x50000000) @@ -642,12 +601,6 @@ SLJIT_S390X_RXA(stc, 0x42000000) /* STORE HALFWORD */ SLJIT_S390X_RXA(sth, 0x40000000) -/* SUBTRACT */ -SLJIT_S390X_RXA(s, 0x5b000000) - -/* SUBTRACT LOGICAL */ -SLJIT_S390X_RXA(sl, 0x5f000000) - #undef SLJIT_S390X_RXA /* RXY-a instructions */ @@ -660,31 +613,11 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b ri = (sljit_ins)(r & 0xf) << 36; \ xi = (sljit_ins)(x & 0xf) << 32; \ bi = (sljit_ins)(b & 0xf) << 28; \ - di = (sljit_ins)disp_s20(d) << 8; \ + di = disp_s20(d); \ \ return (pattern) | ri | xi | bi | di; \ } -/* ADD */ -SLJIT_S390X_RXYA(ay, 0xe3000000005a, have_ldisp()) -SLJIT_S390X_RXYA(ag, 0xe30000000008, 1) - -/* ADD LOGICAL */ -SLJIT_S390X_RXYA(aly, 0xe3000000005e, have_ldisp()) -SLJIT_S390X_RXYA(alg, 0xe3000000000a, 1) - -/* ADD LOGICAL WITH CARRY */ -SLJIT_S390X_RXYA(alc, 0xe30000000098, 1) -SLJIT_S390X_RXYA(alcg, 0xe30000000088, 1) - -/* AND */ -SLJIT_S390X_RXYA(ny, 0xe30000000054, have_ldisp()) -SLJIT_S390X_RXYA(ng, 0xe30000000080, 1) - -/* EXCLUSIVE OR */ -SLJIT_S390X_RXYA(xy, 0xe30000000057, have_ldisp()) -SLJIT_S390X_RXYA(xg, 0xe30000000082, 1) - /* LOAD */ SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp()) SLJIT_S390X_RXYA(lg, 0xe30000000004, 1) @@ -713,10 +646,6 @@ SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1) SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp()) SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1) -/* OR */ -SLJIT_S390X_RXYA(oy, 0xe30000000056, have_ldisp()) -SLJIT_S390X_RXYA(og, 0xe30000000081, 1) - /* STORE */ SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp()) SLJIT_S390X_RXYA(stg, 0xe30000000024, 1) @@ -727,41 +656,8 @@ SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp()) /* STORE HALFWORD */ SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp()) -/* SUBTRACT */ -SLJIT_S390X_RXYA(sy, 0xe3000000005b, have_ldisp()) -SLJIT_S390X_RXYA(sg, 0xe30000000009, 1) - -/* SUBTRACT LOGICAL */ -SLJIT_S390X_RXYA(sly, 0xe3000000005f, have_ldisp()) -SLJIT_S390X_RXYA(slg, 0xe3000000000b, 1) - -/* SUBTRACT LOGICAL WITH BORROW */ -SLJIT_S390X_RXYA(slb, 0xe30000000099, 1) -SLJIT_S390X_RXYA(slbg, 0xe30000000089, 1) - #undef SLJIT_S390X_RXYA -/* RS-a instructions */ -#define SLJIT_S390X_RSA(name, pattern) \ -SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw d, sljit_gpr b) \ -{ \ - sljit_ins r1 = (sljit_ins)(reg & 0xf) << 20; \ - sljit_ins b2 = (sljit_ins)(b & 0xf) << 12; \ - sljit_ins d2 = (sljit_ins)(d & 0xfff); \ - return (pattern) | r1 | b2 | d2; \ -} - -/* SHIFT LEFT SINGLE LOGICAL */ -SLJIT_S390X_RSA(sll, 0x89000000) - -/* SHIFT RIGHT SINGLE */ -SLJIT_S390X_RSA(sra, 0x8a000000) - -/* SHIFT RIGHT SINGLE LOGICAL */ -SLJIT_S390X_RSA(srl, 0x88000000) - -#undef SLJIT_S390X_RSA - /* RSY-a instructions */ #define SLJIT_S390X_RSYA(name, pattern, cond) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_sw d, sljit_gpr b) \ @@ -772,7 +668,7 @@ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_sw d, sljit_gp r1 = (sljit_ins)(dst & 0xf) << 36; \ r3 = (sljit_ins)(src & 0xf) << 32; \ b2 = (sljit_ins)(b & 0xf) << 28; \ - d2 = (sljit_ins)disp_s20(d) << 8; \ + d2 = disp_s20(d); \ \ return (pattern) | r1 | r3 | b2 | d2; \ } @@ -786,9 +682,6 @@ SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1) /* SHIFT RIGHT SINGLE */ SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1) -/* SHIFT RIGHT SINGLE LOGICAL */ -SLJIT_S390X_RSYA(srlg, 0xeb000000000c, 1) - /* STORE MULTIPLE */ SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1) @@ -831,26 +724,6 @@ SLJIT_S390X_RIEF(risbhg, 0xec000000005d) #undef SLJIT_S390X_RIEF -/* RRF-a instructions */ -#define SLJIT_S390X_RRFA(name, pattern, cond) \ -SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src1, sljit_gpr src2) \ -{ \ - sljit_ins r1, r2, r3; \ -\ - SLJIT_ASSERT(cond); \ - r1 = (sljit_ins)(dst & 0xf) << 4; \ - r2 = (sljit_ins)(src1 & 0xf); \ - r3 = (sljit_ins)(src2 & 0xf) << 12; \ -\ - return (pattern) | r3 | r1 | r2; \ -} - -/* MULTIPLY */ -SLJIT_S390X_RRFA(msrkc, 0xb9fd0000, have_misc2()) -SLJIT_S390X_RRFA(msgrkc, 0xb9ed0000, have_misc2()) - -#undef SLJIT_S390X_RRFA - /* RRF-c instructions (require load/store-on-condition 1 facility) */ #define SLJIT_S390X_RRFC(name, pattern) \ SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \ @@ -919,6 +792,13 @@ SLJIT_S390X_INSTRUCTION(br, sljit_gpr target) return 0x07f0 | target; } +SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target) +{ + sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20; + sljit_ins ri2 = (sljit_ins)target & 0xffff; + return 0xa7040000L | m1 | ri2; +} + SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target) { sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36; @@ -940,6 +820,12 @@ SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst) return 0xb2220000 | ((sljit_ins)(dst & 0xf) << 4); } +/* SET PROGRAM MASK */ +SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst) +{ + return 0x0400 | ((sljit_ins)(dst & 0xf) << 4); +} + /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */ SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) { @@ -948,30 +834,20 @@ SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, s #undef SLJIT_S390X_INSTRUCTION -/* load condition code as needed to match type */ -static sljit_s32 push_load_cc(struct sljit_compiler *compiler, sljit_s32 type) +static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r) { - type &= ~SLJIT_I32_OP; - switch (type) { - case SLJIT_ZERO: - case SLJIT_NOT_ZERO: - return push_inst(compiler, cih(flag_r, 0)); - break; - default: - return push_inst(compiler, tmlh(flag_r, 0x3000)); - break; - } - return SLJIT_SUCCESS; -} - -static sljit_s32 push_store_zero_flag(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr source) -{ - /* insert low 32-bits into high 32-bits of flag register */ - FAIL_IF(push_inst(compiler, risbhgz(flag_r, source, 0, 31, 32))); - if (!(op & SLJIT_I32_OP)) { - /* OR high 32-bits with high 32-bits of flag register */ - return push_inst(compiler, rosbg(flag_r, source, 0, 31, 0)); - } + /* Condition codes: bits 18 and 19. + Transformation: + 0 (zero and no overflow) : unchanged + 1 (non-zero and no overflow) : unchanged + 2 (zero and overflow) : decreased by 1 + 3 (non-zero and overflow) : decreased by 1 if non-zero */ + FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_I32_OP) ? 1 : 2) + 2 + 3 + 1))); + FAIL_IF(push_inst(compiler, ipm(flag_r))); + FAIL_IF(push_inst(compiler, (op & SLJIT_I32_OP) ? or(dst_r, dst_r) : ogr(dst_r, dst_r))); + FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3))); + FAIL_IF(push_inst(compiler, slfi(flag_r, 0x10000000))); + FAIL_IF(push_inst(compiler, spm(flag_r))); return SLJIT_SUCCESS; } @@ -1088,18 +964,19 @@ static sljit_s32 make_addr_bx(struct sljit_compiler *compiler, #define WHEN(cond, r, i1, i2, addr) \ (cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr) +/* May clobber tmp1. */ static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst, sljit_s32 src, sljit_sw srcw, - sljit_gpr tmp /* clobbered */, sljit_s32 is_32bit) + sljit_s32 is_32bit) { struct addr addr; sljit_ins ins; SLJIT_ASSERT(src & SLJIT_MEM); if (have_ldisp() || !is_32bit) - FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp)); + FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1)); else - FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp)); + FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1)); if (is_32bit) ins = WHEN(is_u12(addr.offset), dst, l, ly, addr); @@ -1109,18 +986,19 @@ static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst, return push_inst(compiler, ins); } +/* May clobber tmp1. */ static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src, sljit_s32 dst, sljit_sw dstw, - sljit_gpr tmp /* clobbered */, sljit_s32 is_32bit) + sljit_s32 is_32bit) { struct addr addr; sljit_ins ins; SLJIT_ASSERT(dst & SLJIT_MEM); if (have_ldisp() || !is_32bit) - FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp)); + FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1)); else - FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp)); + FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1)); if (is_32bit) ins = WHEN(is_u12(addr.offset), src, st, sty, addr); @@ -1132,6 +1010,358 @@ static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src, #undef WHEN +static sljit_s32 emit_move(struct sljit_compiler *compiler, + sljit_gpr dst_r, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_ASSERT(!SLOW_IS_REG(src) || dst_r != gpr(src & REG_MASK)); + + if (src & SLJIT_IMM) + return push_load_imm_inst(compiler, dst_r, srcw); + + if (src & SLJIT_MEM) + return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_I32_OP) != 0); + + sljit_gpr src_r = gpr(src & REG_MASK); + return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, src_r) : lgr(dst_r, src_r)); +} + +static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_gpr dst_r = tmp0; + sljit_gpr src_r = tmp1; + sljit_s32 needs_move = 1; + + if (SLOW_IS_REG(dst)) { + dst_r = gpr(dst & REG_MASK); + + if (dst == src1) + needs_move = 0; + else if (dst == src2) { + dst_r = tmp0; + needs_move = 2; + } + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (FAST_IS_REG(src2)) + src_r = gpr(src2 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + + FAIL_IF(push_inst(compiler, ins | (dst_r << 4) | src_r)); + + if (needs_move != 2) + return SLJIT_SUCCESS; + + dst_r = gpr(dst & REG_MASK); + return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); +} + +static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src1_r = tmp0; + sljit_gpr src2_r = tmp1; + + if (FAST_IS_REG(src1)) + src1_r = gpr(src1 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if (FAST_IS_REG(src2)) + src2_r = gpr(src2 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + + return push_inst(compiler, ins | (dst_r << 4) | src1_r | (src2_r << 12)); +} + +typedef enum { + RI_A, + RIL_A, +} emit_ril_type; + +static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_sw src2w, + emit_ril_type type) +{ + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + + if (SLOW_IS_REG(dst)) { + dst_r = gpr(dst & REG_MASK); + + if (dst == src1) + needs_move = 0; + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (type == RIL_A) + return push_inst(compiler, ins | (dst_r << 36) | (src2w & 0xffffffff)); + return push_inst(compiler, ins | (dst_r << 20) | (src2w & 0xffff)); +} + +static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_sw src2w) +{ + sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src_r = tmp0; + + if (!SLOW_IS_REG(src1)) + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + else + src_r = gpr(src1 & REG_MASK); + + return push_inst(compiler, ins | (dst_r << 36) | (src_r << 32) | (src2w & 0xffff) << 16); +} + +typedef enum { + RX_A, + RXY_A, +} emit_rx_type; + +static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w, + emit_rx_type type) +{ + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + sljit_gpr base, index; + + SLJIT_ASSERT(src2 & SLJIT_MEM); + + if (SLOW_IS_REG(dst)) { + dst_r = gpr(dst); + + if (dst == src1) + needs_move = 0; + else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) { + dst_r = tmp0; + needs_move = 2; + } + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + base = gpr(src2 & REG_MASK); + index = tmp0; + + if (src2 & OFFS_REG_MASK) { + index = gpr(OFFS_REG(src2)); + + if (src2w != 0) { + FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0))); + src2w = 0; + index = tmp1; + } + } else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w)); + + if (src2 & REG_MASK) + index = tmp1; + else + base = tmp1; + src2w = 0; + } + + if (type == RX_A) + ins |= (dst_r << 20) | (index << 16) | (base << 12) | src2w; + else + ins |= (dst_r << 36) | (index << 32) | (base << 28) | disp_s20(src2w); + + FAIL_IF(push_inst(compiler, ins)); + + if (needs_move != 2) + return SLJIT_SUCCESS; + + dst_r = gpr(dst); + return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); +} + +static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, sljit_sw dstw, + sljit_sw srcw) +{ + SLJIT_ASSERT(dst & SLJIT_MEM); + + sljit_gpr dst_r = tmp1; + + if (dst & OFFS_REG_MASK) { + sljit_gpr index = tmp1; + + if ((dstw & 0x3) == 0) + index = gpr(OFFS_REG(dst)); + else + FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0))); + + FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index))); + dstw = 0; + } + else if (!is_s20(dstw)) { + FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw)); + + if (dst & REG_MASK) + FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1))); + + dstw = 0; + } + else + dst_r = gpr(dst & REG_MASK); + + return push_inst(compiler, ins | ((srcw & 0xff) << 32) | (dst_r << 28) | disp_s20(dstw)); +} + +struct ins_forms { + sljit_ins op_r; + sljit_ins op_gr; + sljit_ins op_rk; + sljit_ins op_grk; + sljit_ins op; + sljit_ins op_y; + sljit_ins op_g; +}; + +static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 mode = compiler->mode; + sljit_ins ins, ins_k; + + if ((src1 | src2) & SLJIT_MEM) { + sljit_ins ins12, ins20; + + if (mode & SLJIT_I32_OP) { + ins12 = forms->op; + ins20 = forms->op_y; + } + else { + ins12 = 0; + ins20 = forms->op_g; + } + + if (ins12 && ins20) { + /* Extra instructions needed for address computation can be executed independently. */ + if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM) + || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) { + if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w)) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + + if (src1 & SLJIT_MEM) { + if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w)) + return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A); + + return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A); + } + } + else if (ins12 || ins20) { + emit_rx_type rx_type; + + if (ins12) { + rx_type = RX_A; + ins = ins12; + } + else { + rx_type = RXY_A; + ins = ins20; + } + + if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM) + || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w))))) + return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type); + + if (src1 & SLJIT_MEM) + return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type); + } + } + + if (mode & SLJIT_I32_OP) { + ins = forms->op_r; + ins_k = forms->op_rk; + } + else { + ins = forms->op_gr; + ins_k = forms->op_grk; + } + + SLJIT_ASSERT(ins != 0 || ins_k != 0); + + if (ins && SLOW_IS_REG(dst)) { + if (dst == src1) + return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w); + + if (dst == src2) + return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w); + } + + if (ins_k == 0) + return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w); + + return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w); +} + +static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 mode = compiler->mode; + sljit_ins ins; + + if (src2 & SLJIT_MEM) { + sljit_ins ins12, ins20; + + if (mode & SLJIT_I32_OP) { + ins12 = forms->op; + ins20 = forms->op_y; + } + else { + ins12 = 0; + ins20 = forms->op_g; + } + + if (ins12 && ins20) { + if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w)) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + else if (ins12) + return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A); + else if (ins20) + return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A); + } + + ins = (mode & SLJIT_I32_OP) ? forms->op_rk : forms->op_grk; + + if (ins == 0 || (SLOW_IS_REG(dst) && dst == src1)) + return emit_rr(compiler, (mode & SLJIT_I32_OP) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w); + + return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w); +} + SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) { struct sljit_label *label; @@ -1560,6 +1790,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile /* TODO(carenas): implement prefetch? */ return SLJIT_SUCCESS; } + if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) { /* LOAD REGISTER */ if (FAST_IS_REG(dst) && FAST_IS_REG(src)) { @@ -1610,11 +1841,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile SLJIT_UNREACHABLE(); } FAIL_IF(push_inst(compiler, ins)); - if (HAS_FLAGS(op)) { - /* only handle zero flag */ - SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK)); - return push_store_zero_flag(compiler, op, dst_r); - } return SLJIT_SUCCESS; } /* LOAD IMMEDIATE */ @@ -1691,11 +1917,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile SLJIT_UNREACHABLE(); } FAIL_IF(push_inst(compiler, ins)); - if (HAS_FLAGS(op)) { - /* only handle zero flag */ - SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK)); - return push_store_zero_flag(compiler, op, reg); - } return SLJIT_SUCCESS; } /* STORE and STORE IMMEDIATE */ @@ -1724,11 +1945,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_MOV_P: case SLJIT_MOV: FAIL_IF(push_inst(compiler, LEVAL(stg))); - if (HAS_FLAGS(op)) { - /* only handle zero flag */ - SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK)); - return push_store_zero_flag(compiler, op, reg); - } return SLJIT_SUCCESS; default: SLJIT_UNREACHABLE(); @@ -1768,11 +1984,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1)); FAIL_IF(push_inst(compiler, EVAL(stg, tmp0, mem))); - if (HAS_FLAGS(op)) { - /* only handle zero flag */ - SLJIT_ASSERT(!(op & VARIABLE_FLAG_MASK)); - return push_store_zero_flag(compiler, op, tmp0); - } return SLJIT_SUCCESS; default: SLJIT_UNREACHABLE(); @@ -1786,7 +1997,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile dst_r = SLOW_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0; src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0; if (src & SLJIT_MEM) - FAIL_IF(load_word(compiler, src_r, src, srcw, tmp1, src & SLJIT_I32_OP)); + FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_I32_OP)); + + compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); /* TODO(mundaym): optimize loads and stores */ switch (opcode | (op & SLJIT_I32_OP)) { @@ -1811,9 +2024,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile } break; case SLJIT_NEG: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; FAIL_IF(push_inst(compiler, lcgr(dst_r, src_r))); break; case SLJIT_NEG32: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; FAIL_IF(push_inst(compiler, lcr(dst_r, src_r))); break; case SLJIT_CLZ: @@ -1840,17 +2055,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile SLJIT_UNREACHABLE(); } - /* write condition code to emulated flag register */ - if (op & VARIABLE_FLAG_MASK) - FAIL_IF(push_inst(compiler, ipm(flag_r))); - - /* write zero flag to emulated flag register */ - if (op & SLJIT_SET_Z) - FAIL_IF(push_store_zero_flag(compiler, op, dst_r)); + if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW)) + FAIL_IF(update_zero_overflow(compiler, op, dst_r)); /* TODO(carenas): doesn't need FAIL_IF */ if ((dst != SLJIT_UNUSED) && (dst & SLJIT_MEM)) - FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP)); + FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP)); return SLJIT_SUCCESS; } @@ -1888,530 +2098,554 @@ static SLJIT_INLINE int sets_signed_flag(sljit_s32 op) return 0; } -/* Report whether we have an instruction for: - op dst src imm - where dst and src are separate registers. */ -static int have_op_3_imm(sljit_s32 op, sljit_sw imm) { - return 0; /* TODO(mundaym): implement */ -} - -/* Report whether we have an instruction for: - op reg imm - where reg is both a source and the destination. */ -static int have_op_2_imm(sljit_s32 op, sljit_sw imm) { - switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) { - case SLJIT_ADD32: - case SLJIT_ADD: - if (!HAS_FLAGS(op) || sets_signed_flag(op)) - return have_eimm() ? is_s32(imm) : is_s16(imm); +static const struct ins_forms add_forms = { + 0x1a00, /* ar */ + 0xb9080000, /* agr */ + 0xb9f80000, /* ark */ + 0xb9e80000, /* agrk */ + 0x5a000000, /* a */ + 0xe3000000005a, /* ay */ + 0xe30000000008, /* ag */ +}; - return have_eimm() && is_u32(imm); - case SLJIT_MUL32: - case SLJIT_MUL: - /* TODO(mundaym): general extension check */ - /* for ms{,g}fi */ - if (op & VARIABLE_FLAG_MASK) - return 0; - - return have_genext() && is_s16(imm); - case SLJIT_OR32: - case SLJIT_XOR32: - case SLJIT_AND32: - /* only use if have extended immediate facility */ - /* this ensures flags are set correctly */ - return have_eimm(); - case SLJIT_AND: - case SLJIT_OR: - case SLJIT_XOR: - /* TODO(mundaym): make this more flexible */ - /* avoid using immediate variations, flags */ - /* won't be set correctly */ - return 0; - case SLJIT_ADDC32: - case SLJIT_ADDC: - /* no ADD LOGICAL WITH CARRY IMMEDIATE */ - return 0; - case SLJIT_SUB: - case SLJIT_SUB32: - case SLJIT_SUBC: - case SLJIT_SUBC32: - /* no SUBTRACT IMMEDIATE */ - /* TODO(mundaym): SUBTRACT LOGICAL IMMEDIATE */ - return 0; - } - return 0; -} +static const struct ins_forms logical_add_forms = { + 0x1e00, /* alr */ + 0xb90a0000, /* algr */ + 0xb9fa0000, /* alrk */ + 0xb9ea0000, /* algrk */ + 0x5e000000, /* al */ + 0xe3000000005e, /* aly */ + 0xe3000000000a, /* alg */ +}; -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, +static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src1, src1w); - ADJUST_LOCAL_OFFSET(src2, src2w); - - if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) - return SLJIT_SUCCESS; - - sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW; + int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); + const struct ins_forms *forms; + sljit_ins ins; - if (is_commutative(op)) { - #define SWAP_ARGS \ - do { \ - sljit_s32 t = src1; \ - sljit_sw tw = src1w; \ - src1 = src2; \ - src1w = src2w; \ - src2 = t; \ - src2w = tw; \ - } while(0); - - /* prefer immediate in src2 */ - if (src1 & SLJIT_IMM) { - SWAP_ARGS + if (src2 & SLJIT_IMM) { + if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { + if (sets_overflow) + ins = (op & SLJIT_I32_OP) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + else + ins = (op & SLJIT_I32_OP) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + return emit_siy(compiler, ins, dst, dstw, src2w); } - /* prefer to have src1 use same register as dst */ - if (FAST_IS_REG(src2) && gpr(src2 & REG_MASK) == dst_r) { - SWAP_ARGS + if (is_s16(src2w)) { + if (sets_overflow) + ins = (op & SLJIT_I32_OP) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + else + ins = (op & SLJIT_I32_OP) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w)); + goto done; } - /* prefer memory argument in src2 */ - if (FAST_IS_REG(src2) && (src1 & SLJIT_MEM)) { - SWAP_ARGS + if (!sets_overflow) { + if ((op & SLJIT_I32_OP) || is_u32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; + } + if (is_u32(-src2w)) { + FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A)); + goto done; + } + } + else if ((op & SLJIT_I32_OP) || is_s32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; } - #undef SWAP_ARGS } - /* src1 must be in a register */ - sljit_gpr src1_r = FAST_IS_REG(src1) ? gpr(src1 & REG_MASK) : tmp0; - if (src1 & SLJIT_IMM) - FAIL_IF(push_load_imm_inst(compiler, src1_r, src1w)); - - if (src1 & SLJIT_MEM) - FAIL_IF(load_word(compiler, src1_r, src1, src1w, tmp1, op & SLJIT_I32_OP)); - - /* emit comparison before subtract */ - if (GET_OPCODE(op) == SLJIT_SUB && (op & VARIABLE_FLAG_MASK)) { - sljit_sw cmp = 0; - switch (GET_FLAG_TYPE(op)) { - case SLJIT_LESS: - case SLJIT_LESS_EQUAL: - case SLJIT_GREATER: - case SLJIT_GREATER_EQUAL: - cmp = 1; /* unsigned */ - break; - case SLJIT_EQUAL: - case SLJIT_SIG_LESS: - case SLJIT_SIG_LESS_EQUAL: - case SLJIT_SIG_GREATER: - case SLJIT_SIG_GREATER_EQUAL: - cmp = -1; /* signed */ - break; - } - if (cmp) { - /* clear flags - no need to generate now */ - op &= ~VARIABLE_FLAG_MASK; - sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1; - if (src2 & SLJIT_IMM) { - #define LEVAL(i) i(src1_r, src2w) - if (cmp > 0 && is_u32(src2w)) { - /* unsigned */ - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, clfi, clgfi))); - } - else if (cmp < 0 && is_s16(src2w)) { - /* signed */ - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, chi, cghi))); - } - else if (cmp < 0 && is_s32(src2w)) { - /* signed */ - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, cfi, cgfi))); - } - #undef LEVAL - #define LEVAL(i) i(src1_r, src2_r) - else { - FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w)); - if (cmp > 0) { - /* unsigned */ - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, clr, clgr))); - } - if (cmp < 0) { - /* signed */ - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, cr, cgr))); - } + forms = sets_overflow ? &add_forms : &logical_add_forms; + FAIL_IF(emit_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w)); + +done: + if (sets_zero_overflow) + FAIL_IF(update_zero_overflow(compiler, op, SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0)); + + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + + return SLJIT_SUCCESS; +} + +static const struct ins_forms sub_forms = { + 0x1b00, /* sr */ + 0xb9090000, /* sgr */ + 0xb9f90000, /* srk */ + 0xb9e90000, /* sgrk */ + 0x5b000000, /* s */ + 0xe3000000005b, /* sy */ + 0xe30000000009, /* sg */ +}; + +static const struct ins_forms logical_sub_forms = { + 0x1f00, /* slr */ + 0xb90b0000, /* slgr */ + 0xb9fb0000, /* slrk */ + 0xb9eb0000, /* slgrk */ + 0x5f000000, /* sl */ + 0xe3000000005f, /* sly */ + 0xe3000000000b, /* slg */ +}; + +static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + int sets_signed = sets_signed_flag(op); + int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); + const struct ins_forms *forms; + sljit_ins ins; + + if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + int compare_signed = GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS; + + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE; + + if (src2 & SLJIT_IMM) { + if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) + { + if ((op & SLJIT_I32_OP) || is_s32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */; + return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); } } else { - if (src2 & SLJIT_MEM) { - /* TODO(mundaym): comparisons with memory */ - /* load src2 into register */ - FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP)); + if ((op & SLJIT_I32_OP) || is_u32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */; + return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A); } - if (cmp > 0) { - /* unsigned */ - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, clr, clgr))); - } - if (cmp < 0) { - /* signed */ - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, cr, cgr))); - } - #undef LEVAL + if (is_s16(src2w)) + return emit_rie_d(compiler, 0xec00000000db /* alghsik */, SLJIT_UNUSED, src1, src1w, src2w); } - FAIL_IF(push_inst(compiler, ipm(flag_r))); } - } + else if (src2 & SLJIT_MEM) { + if ((op & SLJIT_I32_OP) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) { + ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */; + return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A); + } - if (!HAS_FLAGS(op) && dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; + if (compare_signed) + ins = (op & SLJIT_I32_OP) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */; + else + ins = (op & SLJIT_I32_OP) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */; + return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A); + } - /* need to specify signed or logical operation */ - int signed_flags = sets_signed_flag(op); + if (compare_signed) + ins = (op & SLJIT_I32_OP) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */; + else + ins = (op & SLJIT_I32_OP) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */; + return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w); + } - if (is_shift(op)) { - /* handle shifts first, they have more constraints than other operations */ - sljit_sw d = 0; - sljit_gpr b = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : r0; - if (src2 & SLJIT_IMM) - d = src2w & ((op & SLJIT_I32_OP) ? 31 : 63); + if (src2 & SLJIT_IMM) { + sljit_sw neg_src2w = -src2w; - if (src2 & SLJIT_MEM) { - /* shift amount (b) cannot be in r0 (i.e. tmp0) */ - FAIL_IF(load_word(compiler, tmp1, src2, src2w, tmp1, op & SLJIT_I32_OP)); - b = tmp1; - } - /* src1 and dst share the same register in the base 32-bit ISA */ - /* TODO(mundaym): not needed when distinct-operand facility is available */ - int workaround_alias = op & SLJIT_I32_OP && src1_r != dst_r; - if (workaround_alias) { - /* put src1 into tmp0 so we can overwrite it */ - FAIL_IF(push_inst(compiler, lr(tmp0, src1_r))); - src1_r = tmp0; - } - switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) { - case SLJIT_SHL: - FAIL_IF(push_inst(compiler, sllg(dst_r, src1_r, d, b))); - break; - case SLJIT_SHL32: - FAIL_IF(push_inst(compiler, sll(src1_r, d, b))); - break; - case SLJIT_LSHR: - FAIL_IF(push_inst(compiler, srlg(dst_r, src1_r, d, b))); - break; - case SLJIT_LSHR32: - FAIL_IF(push_inst(compiler, srl(src1_r, d, b))); - break; - case SLJIT_ASHR: - FAIL_IF(push_inst(compiler, srag(dst_r, src1_r, d, b))); - break; - case SLJIT_ASHR32: - FAIL_IF(push_inst(compiler, sra(src1_r, d, b))); - break; - default: - SLJIT_UNREACHABLE(); + if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) { + if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) { + if (sets_signed) + ins = (op & SLJIT_I32_OP) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */; + else + ins = (op & SLJIT_I32_OP) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */; + return emit_siy(compiler, ins, dst, dstw, neg_src2w); + } + + if (is_s16(neg_src2w)) { + if (sets_signed) + ins = (op & SLJIT_I32_OP) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */; + else + ins = (op & SLJIT_I32_OP) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */; + FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w)); + goto done; + } } - if (workaround_alias && dst_r != src1_r) - FAIL_IF(push_inst(compiler, lr(dst_r, src1_r))); - } - else if ((GET_OPCODE(op) == SLJIT_MUL) && HAS_FLAGS(op)) { - /* multiply instructions do not generally set flags so we need to manually */ - /* detect overflow conditions */ - /* TODO(mundaym): 64-bit overflow */ - SLJIT_ASSERT(GET_FLAG_TYPE(op) == SLJIT_MUL_OVERFLOW || - GET_FLAG_TYPE(op) == SLJIT_MUL_NOT_OVERFLOW); - sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1; - if (src2 & SLJIT_IMM) { - /* load src2 into register */ - FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w)); + if (!sets_signed) { + if ((op & SLJIT_I32_OP) || is_u32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A)); + goto done; + } + if (is_u32(neg_src2w)) { + FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A)); + goto done; + } } - if (src2 & SLJIT_MEM) { - /* load src2 into register */ - FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP)); + else if ((op & SLJIT_I32_OP) || is_s32(neg_src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */; + FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A)); + goto done; } - if (have_misc2()) { - #define LEVAL(i) i(dst_r, src1_r, src2_r) - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, msrkc, msgrkc))); - #undef LEVAL + } + + forms = sets_signed ? &sub_forms : &logical_sub_forms; + FAIL_IF(emit_non_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w)); + +done: + if (sets_signed) { + sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + + if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) { + /* In case of overflow, the sign bit of the two source operands must be different, and + - the first operand is greater if the sign bit of the result is set + - the first operand is less if the sign bit of the result is not set + The -result operation sets the corrent sign, because the result cannot be zero. + The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */ + FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2))); + FAIL_IF(push_inst(compiler, (op & SLJIT_I32_OP) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r))); } - else if (op & SLJIT_I32_OP) { - op &= ~VARIABLE_FLAG_MASK; - FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r))); - FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r))); - if (dst_r != tmp0) { - FAIL_IF(push_inst(compiler, lr(dst_r, tmp0))); - } - FAIL_IF(push_inst(compiler, aih(tmp0, 1))); - FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U))); - FAIL_IF(push_inst(compiler, ipm(flag_r))); - FAIL_IF(push_inst(compiler, oilh(flag_r, 0x2000))); + else if (op & SLJIT_SET_Z) + FAIL_IF(update_zero_overflow(compiler, op, dst_r)); + } + + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + + return SLJIT_SUCCESS; +} + +static const struct ins_forms multiply_forms = { + 0xb2520000, /* msr */ + 0xb90c0000, /* msgr */ + 0xb9fd0000, /* msrkc */ + 0xb9ed0000, /* msgrkc */ + 0x71000000, /* ms */ + 0xe30000000051, /* msy */ + 0xe3000000000c, /* msg */ +}; + +static const struct ins_forms multiply_overflow_forms = { + 0, + 0, + 0xb9fd0000, /* msrkc */ + 0xb9ed0000, /* msgrkc */ + 0, + 0xe30000000053, /* msc */ + 0xe30000000083, /* msgc */ +}; + +static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins ins; + + if (HAS_FLAGS(op)) { + /* if have_misc2 fails, this operation should be emulated. 32 bit emulation: + FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r))); + FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r))); + if (dst_r != tmp0) { + FAIL_IF(push_inst(compiler, lr(dst_r, tmp0))); } - else - return SLJIT_ERR_UNSUPPORTED; + FAIL_IF(push_inst(compiler, aih(tmp0, 1))); + FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U))); + FAIL_IF(push_inst(compiler, ipm(flag_r))); + FAIL_IF(push_inst(compiler, oilh(flag_r, 0x2000))); */ + return emit_commutative(compiler, &multiply_overflow_forms, dst, dstw, src1, src1w, src2, src2w); } - else if ((GET_OPCODE(op) == SLJIT_SUB) && (op & SLJIT_SET_Z) && !signed_flags) { - /* subtract logical instructions do not set the right flags unfortunately */ - /* instead, negate src2 and issue an add logical */ - /* TODO(mundaym): distinct operand facility where needed */ - if (src1_r != dst_r && src1_r != tmp0) { - #define LEVAL(i) i(tmp0, src1_r) - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, lr, lgr))); - src1_r = tmp0; - #undef LEVAL - } - sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1; - if (src2 & SLJIT_IMM) { - /* load src2 into register */ - FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w)); + + if (src2 & SLJIT_IMM) { + if (is_s16(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */; + return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A); } - if (src2 & SLJIT_MEM) { - /* load src2 into register */ - FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP)); + + if (is_s32(src2w)) { + ins = (op & SLJIT_I32_OP) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */; + return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A); } - if (op & SLJIT_I32_OP) { - FAIL_IF(push_inst(compiler, lcr(tmp1, src2_r))); - FAIL_IF(push_inst(compiler, alr(src1_r, tmp1))); - if (src1_r != dst_r) - FAIL_IF(push_inst(compiler, lr(dst_r, src1_r))); + } + + return emit_commutative(compiler, &multiply_forms, dst, dstw, src1, src1w, src2, src2w); +} + +static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_uw imm, sljit_s32 count16) +{ + sljit_s32 mode = compiler->mode; + sljit_gpr dst_r = tmp0; + sljit_s32 needs_move = 1; + + if (SLOW_IS_REG(dst)) { + dst_r = gpr(dst & REG_MASK); + if (dst == src1) + needs_move = 0; + } + + if (needs_move) + FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); + + if (type == SLJIT_AND) { + if (!(mode & SLJIT_I32_OP)) + FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | (dst_r << 36) | (imm >> 32))); + return push_inst(compiler, 0xc00b00000000 /* nilf */ | (dst_r << 36) | (imm & 0xffffffff)); + } + else if (type == SLJIT_OR) { + if (count16 >= 3) { + FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | (dst_r << 36) | (imm >> 32))); + return push_inst(compiler, 0xc00d00000000 /* oilf */ | (dst_r << 36) | (imm & 0xffffffff)); } - else { - FAIL_IF(push_inst(compiler, lcgr(tmp1, src2_r))); - FAIL_IF(push_inst(compiler, algr(src1_r, tmp1))); - if (src1_r != dst_r) - FAIL_IF(push_inst(compiler, lgr(dst_r, src1_r))); + + if (count16 >= 2) { + if ((imm & 0x00000000ffffffffull) == 0) + return push_inst(compiler, 0xc00c00000000 /* oihf */ | (dst_r << 36) | (imm >> 32)); + if ((imm & 0xffffffff00000000ull) == 0) + return push_inst(compiler, 0xc00d00000000 /* oilf */ | (dst_r << 36) | (imm & 0xffffffff)); } + + if ((imm & 0xffff000000000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | (dst_r << 20) | (imm >> 48))); + if ((imm & 0x0000ffff00000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | (dst_r << 20) | ((imm >> 32) & 0xffff))); + if ((imm & 0x00000000ffff0000ull) != 0) + FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | (dst_r << 20) | ((imm >> 16) & 0xffff))); + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + return push_inst(compiler, 0xa50b0000 /* oill */ | (dst_r << 20) | (imm & 0xffff)); + return SLJIT_SUCCESS; } - else if ((src2 & SLJIT_IMM) && (src1_r == dst_r) && have_op_2_imm(op, src2w)) { - switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) { - #define LEVAL(i) i(dst_r, src2w) - case SLJIT_ADD: - if (!HAS_FLAGS(op) || signed_flags) { - FAIL_IF(push_inst(compiler, - WHEN2(is_s16(src2w), aghi, agfi))); - } - else - FAIL_IF(push_inst(compiler, LEVAL(algfi))); - break; - case SLJIT_ADD32: - if (!HAS_FLAGS(op) || signed_flags) - FAIL_IF(push_inst(compiler, - WHEN2(is_s16(src2w), ahi, afi))); + if ((imm & 0xffffffff00000000ull) != 0) + FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | (dst_r << 36) | (imm >> 32))); + if ((imm & 0x00000000ffffffffull) != 0 || imm == 0) + return push_inst(compiler, 0xc00700000000 /* xilf */ | (dst_r << 36) | (imm & 0xffffffff)); + return SLJIT_SUCCESS; +} + +static const struct ins_forms bitwise_and_forms = { + 0x1400, /* nr */ + 0xb9800000, /* ngr */ + 0xb9f40000, /* nrk */ + 0xb9e40000, /* ngrk */ + 0x54000000, /* n */ + 0xe30000000054, /* ny */ + 0xe30000000080, /* ng */ +}; + +static const struct ins_forms bitwise_or_forms = { + 0x1600, /* or */ + 0xb9810000, /* ogr */ + 0xb9f60000, /* ork */ + 0xb9e60000, /* ogrk */ + 0x56000000, /* o */ + 0xe30000000056, /* oy */ + 0xe30000000081, /* og */ +}; + +static const struct ins_forms bitwise_xor_forms = { + 0x1700, /* xr */ + 0xb9820000, /* xgr */ + 0xb9f70000, /* xrk */ + 0xb9e70000, /* xgrk */ + 0x57000000, /* x */ + 0xe30000000057, /* xy */ + 0xe30000000082, /* xg */ +}; + +static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 type = GET_OPCODE(op); + const struct ins_forms *forms; + + if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == SLJIT_UNUSED))) { + sljit_s32 count16 = 0; + sljit_uw imm = (sljit_uw)src2w; + + if (op & SLJIT_I32_OP) + imm &= 0xffffffffull; + + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + count16++; + if ((imm & 0x00000000ffff0000ull) != 0) + count16++; + if ((imm & 0x0000ffff00000000ull) != 0) + count16++; + if ((imm & 0xffff000000000000ull) != 0) + count16++; + + if (type == SLJIT_AND && dst == SLJIT_UNUSED && count16 == 1) { + sljit_gpr src_r = tmp0; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1 & REG_MASK); else - FAIL_IF(push_inst(compiler, LEVAL(alfi))); - - break; - #undef LEVAL /* TODO(carenas): move down and refactor? */ - case SLJIT_MUL: - FAIL_IF(push_inst(compiler, mhi(dst_r, src2w))); - break; - case SLJIT_MUL32: - FAIL_IF(push_inst(compiler, mghi(dst_r, src2w))); - break; - case SLJIT_OR32: - FAIL_IF(push_inst(compiler, oilf(dst_r, src2w))); - break; - case SLJIT_XOR32: - FAIL_IF(push_inst(compiler, xilf(dst_r, src2w))); - break; - case SLJIT_AND32: - FAIL_IF(push_inst(compiler, nilf(dst_r, src2w))); - break; - default: - SLJIT_UNREACHABLE(); + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if ((imm & 0x000000000000ffffull) != 0 || imm == 0) + return push_inst(compiler, 0xa7010000 | (src_r << 20) | imm); + if ((imm & 0x00000000ffff0000ull) != 0) + return push_inst(compiler, 0xa7000000 | (src_r << 20) | (imm >> 16)); + if ((imm & 0x0000ffff00000000ull) != 0) + return push_inst(compiler, 0xa7030000 | (src_r << 20) | (imm >> 32)); + return push_inst(compiler, 0xa7020000 | (src_r << 20) | (imm >> 48)); } + + if (!(op & SLJIT_SET_Z)) + return sljit_emit_bitwise_imm(compiler, type, dst, dstw, src1, src1w, imm, count16); } - else if ((src2 & SLJIT_IMM) && have_op_3_imm(op, src2w)) { - abort(); /* TODO(mundaym): implement */ + + if (type == SLJIT_AND) + forms = &bitwise_and_forms; + else if (type == SLJIT_OR) + forms = &bitwise_or_forms; + else + forms = &bitwise_xor_forms; + + return emit_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w); +} + +static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 type = GET_OPCODE(op); + sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr src_r = tmp0; + sljit_gpr base_r = tmp0; + sljit_ins imm = 0; + sljit_ins ins; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1 & REG_MASK); + else + FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); + + if (src2 & SLJIT_IMM) + imm = src2w & ((op & SLJIT_I32_OP) ? 0x1f : 0x3f); + else if (FAST_IS_REG(src2)) + base_r = gpr(src2 & REG_MASK); + else { + FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); + base_r = tmp1; } - else if ((src2 & SLJIT_MEM) && (dst_r == src1_r)) { - /* most 32-bit instructions can only handle 12-bit immediate offsets */ - int need_u12 = !have_ldisp() && - (op & SLJIT_I32_OP) && - (GET_OPCODE(op) != SLJIT_ADDC) && - (GET_OPCODE(op) != SLJIT_SUBC); - struct addr mem; - if (need_u12) - FAIL_IF(make_addr_bx(compiler, &mem, src2, src2w, tmp1)); + + if ((op & SLJIT_I32_OP) && dst_r == src_r) { + if (type == SLJIT_SHL) + ins = 0x89000000 /* sll */; + else if (type == SLJIT_LSHR) + ins = 0x88000000 /* srl */; else - FAIL_IF(make_addr_bxy(compiler, &mem, src2, src2w, tmp1)); - - int can_u12 = is_u12(mem.offset) ? 1 : 0; - sljit_ins ins = 0; - switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) { - /* 64-bit ops */ - #define LEVAL(i) EVAL(i, dst_r, mem) - case SLJIT_ADD: - ins = WHEN2(signed_flags, ag, alg); - break; - case SLJIT_SUB: - ins = WHEN2(signed_flags, sg, slg); - break; - case SLJIT_ADDC: - ins = LEVAL(alcg); - break; - case SLJIT_SUBC: - ins = LEVAL(slbg); - break; - case SLJIT_MUL: - ins = LEVAL(msg); - break; - case SLJIT_OR: - ins = LEVAL(og); - break; - case SLJIT_XOR: - ins = LEVAL(xg); - break; - case SLJIT_AND: - ins = LEVAL(ng); - break; - /* 32-bit ops */ - case SLJIT_ADD32: - if (signed_flags) - ins = WHEN2(can_u12, a, ay); - else - ins = WHEN2(can_u12, al, aly); - break; - case SLJIT_SUB32: - if (signed_flags) - ins = WHEN2(can_u12, s, sy); - else - ins = WHEN2(can_u12, sl, sly); - break; - case SLJIT_ADDC32: - ins = LEVAL(alc); - break; - case SLJIT_SUBC32: - ins = LEVAL(slb); - break; - case SLJIT_MUL32: - ins = WHEN2(can_u12, ms, msy); - break; - case SLJIT_OR32: - ins = WHEN2(can_u12, o, oy); - break; - case SLJIT_XOR32: - ins = WHEN2(can_u12, x, xy); - break; - case SLJIT_AND32: - ins = WHEN2(can_u12, n, ny); - break; - #undef LEVAL - default: - SLJIT_UNREACHABLE(); - } - FAIL_IF(push_inst(compiler, ins)); + ins = 0x8a000000 /* sra */; + + FAIL_IF(push_inst(compiler, ins | (dst_r << 20) | (base_r << 12) | imm)); } else { - sljit_gpr src2_r = FAST_IS_REG(src2) ? gpr(src2 & REG_MASK) : tmp1; - if (src2 & SLJIT_IMM) { - /* load src2 into register */ - FAIL_IF(push_load_imm_inst(compiler, src2_r, src2w)); - } - if (src2 & SLJIT_MEM) { - /* load src2 into register */ - FAIL_IF(load_word(compiler, src2_r, src2, src2w, tmp1, op & SLJIT_I32_OP)); - } - /* TODO(mundaym): distinct operand facility where needed */ - #define LEVAL(i) i(tmp0, src1_r) - if (src1_r != dst_r && src1_r != tmp0) { - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, lr, lgr))); - src1_r = tmp0; - } - #undef LEVAL - sljit_ins ins = 0; - switch (GET_OPCODE(op) | (op & SLJIT_I32_OP)) { - #define LEVAL(i) i(src1_r, src2_r) - /* 64-bit ops */ - case SLJIT_ADD: - ins = WHEN2(signed_flags, agr, algr); - break; - case SLJIT_SUB: - ins = WHEN2(signed_flags, sgr, slgr); - break; - case SLJIT_ADDC: - ins = LEVAL(alcgr); - break; - case SLJIT_SUBC: - ins = LEVAL(slbgr); - break; - case SLJIT_MUL: - ins = LEVAL(msgr); - break; - case SLJIT_AND: - ins = LEVAL(ngr); - break; - case SLJIT_OR: - ins = LEVAL(ogr); - break; - case SLJIT_XOR: - ins = LEVAL(xgr); - break; - /* 32-bit ops */ - case SLJIT_ADD32: - ins = WHEN2(signed_flags, ar, alr); - break; - case SLJIT_SUB32: - ins = WHEN2(signed_flags, sr, slr); - break; - case SLJIT_ADDC32: - ins = LEVAL(alcr); - break; - case SLJIT_SUBC32: - ins = LEVAL(slbr); - break; - case SLJIT_MUL32: - ins = LEVAL(msr); - break; - case SLJIT_AND32: - ins = LEVAL(nr); - break; - case SLJIT_OR32: - ins = LEVAL(or); - break; - case SLJIT_XOR32: - ins = LEVAL(xr); - break; - #undef LEVAL - default: - SLJIT_UNREACHABLE(); - } - FAIL_IF(push_inst(compiler, ins)); - #define LEVAL(i) i(dst_r, src1_r) - if (src1_r != dst_r) - FAIL_IF(push_inst(compiler, - WHEN2(op & SLJIT_I32_OP, lr, lgr))); - #undef LEVAL + if (type == SLJIT_SHL) + ins = (op & SLJIT_I32_OP) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */; + else if (type == SLJIT_LSHR) + ins = (op & SLJIT_I32_OP) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */; + else + ins = (op & SLJIT_I32_OP) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */; + + FAIL_IF(push_inst(compiler, ins | (dst_r << 36) | (src_r << 32) | (base_r << 28) | (imm << 16))); } - /* write condition code to emulated flag register */ - if (op & VARIABLE_FLAG_MASK) - FAIL_IF(push_inst(compiler, ipm(flag_r))); + if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR) + return push_inst(compiler, (op & SLJIT_I32_OP) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)); + + return SLJIT_SUCCESS; +} + +static const struct ins_forms addc_forms = { + 0xb9980000, /* alcr */ + 0xb9880000, /* alcgr */ + 0, + 0, + 0, + 0xe30000000098, /* alc */ + 0xe30000000088, /* alcg */ +}; + +static const struct ins_forms subc_forms = { + 0xb9990000, /* slbr */ + 0xb9890000, /* slbgr */ + 0, + 0, + 0, + 0xe30000000099, /* slb */ + 0xe30000000089, /* slbg */ +}; + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; - /* write zero flag to emulated flag register */ - if (op & SLJIT_SET_Z) - FAIL_IF(push_store_zero_flag(compiler, op, dst_r)); + compiler->mode = op & SLJIT_I32_OP; + compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); + + if (GET_OPCODE(op) >= SLJIT_ADD || GET_OPCODE(op) <= SLJIT_SUBC) + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; + + if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) { + src1 ^= src2; + src2 ^= src1; + src1 ^= src2; + + src1w ^= src2w; + src2w ^= src1w; + src1w ^= src2w; + } - /* finally write the result to memory if required */ - if (dst & SLJIT_MEM) { - SLJIT_ASSERT(dst_r != tmp1); - /* TODO(carenas): s/FAIL_IF/ return */ - FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP)); + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ADDC: + FAIL_IF(emit_commutative(compiler, &addc_forms, dst, dstw, src1, src1w, src2, src2w)); + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return SLJIT_SUCCESS; + case SLJIT_SUB: + return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUBC: + FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, dstw, src1, src1w, src2, src2w)); + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); + return SLJIT_SUCCESS; + case SLJIT_MUL: + FAIL_IF(sljit_emit_multiply(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + break; + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + FAIL_IF(sljit_emit_bitwise(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + break; + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: + FAIL_IF(sljit_emit_shift(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + break; } + if (dst & SLJIT_MEM) + return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP); return SLJIT_SUCCESS; } @@ -2429,7 +2663,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src( case SLJIT_FAST_RETURN: src_r = FAST_IS_REG(src) ? gpr(src) : tmp1; if (src & SLJIT_MEM) - FAIL_IF(load_word(compiler, tmp1, src, srcw, tmp1, 0)); + FAIL_IF(load_word(compiler, tmp1, src, srcw, 0)); return push_inst(compiler, br(src_r)); case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: @@ -2508,7 +2742,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * return push_inst(compiler, lgr(gpr(dst), fast_link_r)); /* memory */ - return store_word(compiler, fast_link_r, dst, dstw, tmp1, 0); + return store_word(compiler, fast_link_r, dst, dstw, 0); } /* --------------------------------------------------------------------- */ @@ -2533,15 +2767,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { - sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(type & 0xff) : 0xf; + sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_jump(compiler, type)); - /* reload condition code */ - if (mask != 0xf) - PTR_FAIL_IF(push_load_cc(compiler, type & 0xff)); - /* record jump */ struct sljit_jump *jump = (struct sljit_jump *) ensure_abuf(compiler, sizeof(struct sljit_jump)); @@ -2586,7 +2816,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi FAIL_IF(push_load_imm_inst(compiler, src_r, srcw)); } else if (src & SLJIT_MEM) - FAIL_IF(load_word(compiler, src_r, src, srcw, tmp1, 0 /* 64-bit */)); + FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */)); /* emit jump instruction */ if (type >= SLJIT_FAST_CALL) @@ -2614,7 +2844,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co sljit_s32 dst, sljit_sw dstw, sljit_s32 type) { - sljit_u8 mask = get_cc(type & 0xff); + sljit_u8 mask = get_cc(compiler, type & 0xff); CHECK_ERROR(); CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); @@ -2625,9 +2855,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co case SLJIT_AND: case SLJIT_OR: case SLJIT_XOR: + compiler->status_flags_state = op & SLJIT_SET_Z; + /* dst is also source operand */ if (dst & SLJIT_MEM) - FAIL_IF(load_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP)); + FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP)); break; case SLJIT_MOV: @@ -2639,9 +2871,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co SLJIT_UNREACHABLE(); } - if (mask != 0xf) - FAIL_IF(push_load_cc(compiler, type & 0xff)); - /* TODO(mundaym): fold into cmov helper function? */ #define LEVAL(i) i(loc_r, 1, mask) if (have_lscond2()) { @@ -2672,14 +2901,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co #undef LEVAL } - /* set zero flag if needed */ - if (op & SLJIT_SET_Z) - FAIL_IF(push_store_zero_flag(compiler, op, dst_r)); - /* store result to memory if required */ - /* TODO(carenas): s/FAIL_IF/ return */ if (dst & SLJIT_MEM) - FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, op & SLJIT_I32_OP)); + return store_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP); return SLJIT_SUCCESS; } @@ -2688,16 +2912,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil sljit_s32 dst_reg, sljit_s32 src, sljit_sw srcw) { - sljit_u8 mask = get_cc(type & 0xff); + sljit_u8 mask = get_cc(compiler, type & 0xff); sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_I32_OP); sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0; CHECK_ERROR(); CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); - if (mask != 0xf) - FAIL_IF(push_load_cc(compiler, type & 0xff)); - if (src & SLJIT_IMM) { /* TODO(mundaym): fast path with lscond2 */ FAIL_IF(push_load_imm_inst(compiler, src_r, srcw)); @@ -2751,7 +2972,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi } if (dst & SLJIT_MEM) - PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, 0 /* always 64-bit */)); + PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */)); return (struct sljit_const*)const_; } @@ -2798,7 +3019,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label( } if (dst & SLJIT_MEM) - PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, tmp1, 0)); + PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0)); return put_label; } diff --git a/src/sljit/sljitNativeSPARC_32.c b/src/sljit/sljitNativeSPARC_32.c index e5167f0..2888640 100644 --- a/src/sljit/sljitNativeSPARC_32.c +++ b/src/sljit/sljitNativeSPARC_32.c @@ -93,18 +93,21 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS); case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); case SLJIT_ADDC: return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); case SLJIT_SUBC: return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); case SLJIT_MUL: + compiler->status_flags_state = 0; FAIL_IF(push_inst(compiler, SMUL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); if (!(flags & SET_FLAGS)) return SLJIT_SUCCESS; diff --git a/src/sljit/sljitNativeSPARC_common.c b/src/sljit/sljitNativeSPARC_common.c index 544d80d..e833f09 100644 --- a/src/sljit/sljitNativeSPARC_common.c +++ b/src/sljit/sljitNativeSPARC_common.c @@ -1275,16 +1275,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi return label; } -static sljit_ins get_cc(sljit_s32 type) +static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_MUL_NOT_OVERFLOW: case SLJIT_NOT_EQUAL_F64: /* Unordered. */ return DA(0x1); case SLJIT_NOT_EQUAL: - case SLJIT_MUL_OVERFLOW: case SLJIT_EQUAL_F64: return DA(0x9); @@ -1317,10 +1315,16 @@ static sljit_ins get_cc(sljit_s32 type) return DA(0x2); case SLJIT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return DA(0x9); + case SLJIT_UNORDERED_F64: return DA(0x7); case SLJIT_NOT_OVERFLOW: + if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + return DA(0x1); + case SLJIT_ORDERED_F64: return DA(0xf); @@ -1347,7 +1351,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET)) jump->flags |= IS_MOVABLE; #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(type ^ 1) | 5, UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(compiler, type ^ 1) | 5, UNMOVABLE_INS)); #else #error "Implementation required" #endif @@ -1357,7 +1361,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & FCC_IS_SET)) jump->flags |= IS_MOVABLE; #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(type ^ 1) | 5, UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(compiler, type ^ 1) | 5, UNMOVABLE_INS)); #else #error "Implementation required" #endif @@ -1474,9 +1478,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co type &= 0xff; if (type < SLJIT_EQUAL_F64) - FAIL_IF(push_inst(compiler, BICC | get_cc(type) | 3, UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, BICC | get_cc(compiler, type) | 3, UNMOVABLE_INS)); else - FAIL_IF(push_inst(compiler, FBFCC | get_cc(type) | 3, UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, FBFCC | get_cc(compiler, type) | 3, UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS)); diff --git a/src/sljit/sljitNativeX86_common.c b/src/sljit/sljitNativeX86_common.c index ddcc5eb..515d98a 100644 --- a/src/sljit/sljitNativeX86_common.c +++ b/src/sljit/sljitNativeX86_common.c @@ -411,11 +411,9 @@ static sljit_u8 get_jump_code(sljit_s32 type) return 0x8e /* jle */; case SLJIT_OVERFLOW: - case SLJIT_MUL_OVERFLOW: return 0x80 /* jo */; case SLJIT_NOT_OVERFLOW: - case SLJIT_MUL_NOT_OVERFLOW: return 0x81 /* jno */; case SLJIT_UNORDERED_F64: -- cgit v1.2.1