diff options
Diffstat (limited to 'pcre/sljit/sljitNativePPC_common.c')
-rw-r--r-- | pcre/sljit/sljitNativePPC_common.c | 598 |
1 files changed, 383 insertions, 215 deletions
diff --git a/pcre/sljit/sljitNativePPC_common.c b/pcre/sljit/sljitNativePPC_common.c index 67e6898a17d..5e06f2fd8ee 100644 --- a/pcre/sljit/sljitNativePPC_common.c +++ b/pcre/sljit/sljitNativePPC_common.c @@ -33,10 +33,19 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) Both for ppc-32 and ppc-64. */ typedef sljit_ui sljit_ins; +#if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \ + || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_PPC_STACK_FRAME_V2 1 +#endif + #ifdef _AIX #include <sys/cache.h> #endif +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1 +#endif + static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) { #ifdef _AIX @@ -81,13 +90,19 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #define TMP_REG1 (SLJIT_NO_REGISTERS + 1) #define TMP_REG2 (SLJIT_NO_REGISTERS + 2) #define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define ZERO_REG (SLJIT_NO_REGISTERS + 4) +#define TMP_ZERO (SLJIT_NO_REGISTERS + 4) + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) +#define TMP_CALL_REG (SLJIT_NO_REGISTERS + 5) +#else +#define TMP_CALL_REG TMP_REG2 +#endif #define TMP_FREG1 (0) #define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { - 0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = { + 0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31, 12 }; /* --------------------------------------------------------------------- */ @@ -224,13 +239,19 @@ static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins) return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_si optimize_jump(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) { sljit_sw diff; sljit_uw target_addr; + sljit_sw extra_jump_flags; +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) + return 0; +#else if (jump->flags & SLJIT_REWRITABLE_JUMP) return 0; +#endif if (jump->flags & JUMP_ADDR) target_addr = jump->u.target; @@ -238,28 +259,52 @@ static SLJIT_INLINE sljit_si optimize_jump(struct sljit_jump *jump, sljit_ins *c SLJIT_ASSERT(jump->flags & JUMP_LABEL); target_addr = (sljit_uw)(code + jump->u.label->size); } + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (jump->flags & IS_CALL) + goto keep_address; +#endif + diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l; - if (jump->flags & UNCOND_B) { - if (diff <= 0x01ffffff && diff >= -0x02000000) { - jump->flags |= PATCH_B; - return 1; - } - if (target_addr <= 0x03ffffff) { - jump->flags |= PATCH_B | ABSOLUTE_B; - return 1; - } - } - else { + extra_jump_flags = 0; + if (jump->flags & IS_COND) { if (diff <= 0x7fff && diff >= -0x8000) { jump->flags |= PATCH_B; return 1; } if (target_addr <= 0xffff) { - jump->flags |= PATCH_B | ABSOLUTE_B; + jump->flags |= PATCH_B | PATCH_ABS_B; return 1; } + extra_jump_flags = REMOVE_COND; + + diff -= sizeof(sljit_ins); + } + + if (diff <= 0x01ffffff && diff >= -0x02000000) { + jump->flags |= PATCH_B | extra_jump_flags; + return 1; } + if (target_addr <= 0x03ffffff) { + jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags; + return 1; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) +keep_address: +#endif + if (target_addr <= 0x7fffffff) { + jump->flags |= PATCH_ABS32; + return 1; + } + if (target_addr <= 0x7fffffffffffl) { + jump->flags |= PATCH_ABS48; + return 1; + } +#endif + return 0; } @@ -318,19 +363,42 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #else jump->addr = (sljit_uw)(code_ptr - 6); #endif - if (optimize_jump(jump, code_ptr, code)) { + if (detect_jump_type(jump, code_ptr, code)) { #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) code_ptr[-3] = code_ptr[0]; code_ptr -= 3; #else - code_ptr[-6] = code_ptr[0]; - code_ptr -= 6; + if (jump->flags & PATCH_ABS32) { + code_ptr -= 3; + code_ptr[-1] = code_ptr[2]; + code_ptr[0] = code_ptr[3]; + } + else if (jump->flags & PATCH_ABS48) { + code_ptr--; + code_ptr[-1] = code_ptr[0]; + code_ptr[0] = code_ptr[1]; + /* rldicr rX,rX,32,31 -> rX,rX,16,47 */ + SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6); + code_ptr[-3] ^= 0x8422; + /* oris -> ori */ + code_ptr[-2] ^= 0x4000000; + } + else { + code_ptr[-6] = code_ptr[0]; + code_ptr -= 6; + } #endif + if (jump->flags & REMOVE_COND) { + code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001); + code_ptr++; + jump->addr += sizeof(sljit_ins); + code_ptr[0] = Bx; + jump->flags -= IS_COND; + } } jump = jump->next; } if (const_ && const_->addr == word_count) { - /* Just recording the address. */ const_->addr = (sljit_uw)code_ptr; const_ = const_->next; } @@ -362,29 +430,27 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; buf_ptr = (sljit_ins*)jump->addr; if (jump->flags & PATCH_B) { - if (jump->flags & UNCOND_B) { - if (!(jump->flags & ABSOLUTE_B)) { + if (jump->flags & IS_COND) { + if (!(jump->flags & PATCH_ABS_B)) { addr = addr - jump->addr; - SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); - *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1); + SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); + *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); } else { - SLJIT_ASSERT(addr <= 0x03ffffff); - *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); + SLJIT_ASSERT(addr <= 0xffff); + *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); } } else { - if (!(jump->flags & ABSOLUTE_B)) { + if (!(jump->flags & PATCH_ABS_B)) { addr = addr - jump->addr; - SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); - *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); + SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); + *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1); } else { - addr = addr & ~0x3l; - SLJIT_ASSERT(addr <= 0xffff); - *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); + SLJIT_ASSERT(addr <= 0x03ffffff); + *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); } - } break; } @@ -393,6 +459,19 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); #else + if (jump->flags & PATCH_ABS32) { + SLJIT_ASSERT(addr <= 0x7fffffff); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); + break; + } + if (jump->flags & PATCH_ABS48) { + SLJIT_ASSERT(addr <= 0x7fffffffffff); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff); + break; + } buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff); buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff); buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff); @@ -498,7 +577,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil #endif FAIL_IF(push_inst(compiler, MFLR | D(0))); - FAIL_IF(push_inst(compiler, STACK_STORE | S(ZERO_REG) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) )); + FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) )); if (saveds >= 1) FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) )); if (saveds >= 2) @@ -509,9 +588,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) )); if (saveds >= 5) FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) )); +#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw)) )); +#else FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw)) )); +#endif - FAIL_IF(push_inst(compiler, ADDI | D(ZERO_REG) | A(0) | 0)); + FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0)); if (args >= 1) FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(SLJIT_SAVED_REG1) | B(SLJIT_SCRATCH_REG1))); if (args >= 2) @@ -519,7 +602,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil if (args >= 3) FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG3) | A(SLJIT_SAVED_REG3) | B(SLJIT_SCRATCH_REG3))); -#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size; #else compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size; @@ -556,7 +639,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, compiler->logical_local_size = local_size; #endif -#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size; #else compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size; @@ -578,7 +661,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, ADD | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0))); } +#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) + FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw)))); +#else FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw)))); +#endif if (compiler->saveds >= 5) FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) )); if (compiler->saveds >= 4) @@ -589,7 +676,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) )); if (compiler->saveds >= 1) FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) )); - FAIL_IF(push_inst(compiler, STACK_LOAD | D(ZERO_REG) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) )); + FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) )); FAIL_IF(push_inst(compiler, MTLR | S(0))); FAIL_IF(push_inst(compiler, BLR)); @@ -612,7 +699,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi It contans 32 items, but not all are different. */ /* 64 bit only: [reg+imm] must be aligned to 4 bytes. */ -#define ADDR_MODE2 0x10000 +#define INT_ALIGNED 0x10000 /* 64-bit only: there is no lwau instruction. */ #define UPDATE_REQ 0x20000 @@ -623,7 +710,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi #else #define ARCH_32_64(a, b) b #define INST_CODE_AND_DST(inst, flags, reg) \ - (((inst) & ~(ADDR_MODE2 | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) + (((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) #endif static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = { @@ -632,13 +719,13 @@ static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = { /* Word. */ -/* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | ADDR_MODE2 | 0x0 /* std */), -/* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x0 /* ld */), +/* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */), +/* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */), /* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), /* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), -/* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | ADDR_MODE2 | 0x1 /* stdu */), -/* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | 0x1 /* ldu */), +/* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */), +/* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */), /* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), /* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), @@ -682,13 +769,13 @@ static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = { /* Word. */ -/* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | ADDR_MODE2 | 0x0 /* std */), -/* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x0 /* ld */), +/* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */), +/* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */), /* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), /* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), -/* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | ADDR_MODE2 | 0x1 /* stdu */), -/* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | 0x1 /* ldu */), +/* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */), +/* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */), /* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), /* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), @@ -719,12 +806,12 @@ static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = { /* Int. */ /* s i n i s */ HI(36) /* stw */, -/* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x2 /* lwa */), +/* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */), /* s i n x s */ HI(31) | LO(151) /* stwx */, /* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */), /* s i w i s */ HI(37) /* stwu */, -/* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | UPDATE_REQ | 0x2 /* lwa */), +/* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */), /* s i w x s */ HI(31) | LO(183) /* stwux */, /* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */), @@ -748,74 +835,48 @@ static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = { static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw) { sljit_ins inst; -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - sljit_si tmp_reg; -#endif + /* Should work when (arg & REG_MASK) == 0. */ + SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0); SLJIT_ASSERT(arg & SLJIT_MEM); - if (!(arg & 0xf)) { -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - if (argw <= SIMM_MAX && argw >= SIMM_MIN) { - if (inp_flags & ARG_TEST) - return 1; - - inst = data_transfer_insts[(inp_flags & ~WRITE_BACK) & MEM_MASK]; - SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | IMM(argw)); - return -1; - } -#else - inst = data_transfer_insts[(inp_flags & ~WRITE_BACK) & MEM_MASK]; - if (argw <= SIMM_MAX && argw >= SIMM_MIN && - (!(inst & ADDR_MODE2) || (argw & 0x3) == 0)) { - if (inp_flags & ARG_TEST) - return 1; - - push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | IMM(argw)); - return -1; - } -#endif - return 0; - } - if (!(arg & 0xf0)) { -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - if (argw <= SIMM_MAX && argw >= SIMM_MIN) { - if (inp_flags & ARG_TEST) - return 1; - - inst = data_transfer_insts[inp_flags & MEM_MASK]; - SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | IMM(argw)); - return -1; - } -#else - inst = data_transfer_insts[inp_flags & MEM_MASK]; - if (argw <= SIMM_MAX && argw >= SIMM_MIN && (!(inst & ADDR_MODE2) || (argw & 0x3) == 0)) { - if (inp_flags & ARG_TEST) - return 1; - - if ((inp_flags & WRITE_BACK) && (inst & UPDATE_REQ)) { - tmp_reg = (inp_flags & LOAD_DATA) ? (arg & 0xf) : TMP_REG3; - if (push_inst(compiler, ADDI | D(tmp_reg) | A(arg & 0xf) | IMM(argw))) - return -1; - arg = tmp_reg | SLJIT_MEM; - argw = 0; - } - push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | IMM(argw)); - return -1; - } -#endif - } - else if (!(argw & 0x3)) { + if (arg & OFFS_REG_MASK) { + if (argw & 0x3) + return 0; if (inp_flags & ARG_TEST) return 1; + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; - SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B((arg >> 4) & 0xf)); + SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ))); + FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg)))); return -1; } - return 0; + + if (SLJIT_UNLIKELY(!(arg & REG_MASK))) + inp_flags &= ~WRITE_BACK; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + inst = data_transfer_insts[inp_flags & MEM_MASK]; + SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ)); + + if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ)) + return 0; + if (inp_flags & ARG_TEST) + return 1; +#endif + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + if (argw > SIMM_MAX || argw < SIMM_MIN) + return 0; + if (inp_flags & ARG_TEST) + return 1; + + inst = data_transfer_insts[inp_flags & MEM_MASK]; + SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ))); +#endif + + FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw))); + return -1; } /* See getput_arg below. @@ -823,35 +884,50 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl uses word arguments without write back. */ static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) { + sljit_sw high_short, next_high_short; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_sw diff; +#endif + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); - if (!(arg & 0xf)) - return (next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX); + if (arg & OFFS_REG_MASK) + return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3)); - if (arg & 0xf0) - return ((arg & 0xf0) == (next_arg & 0xf0) && (argw & 0x3) == (next_argw & 0x3)); + if (next_arg & OFFS_REG_MASK) + return 0; - if (argw <= SIMM_MAX && argw >= SIMM_MIN) { - if (arg == next_arg && (next_argw >= SIMM_MAX && next_argw <= SIMM_MIN)) +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff; + next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff; + return high_short == next_high_short; +#else + if (argw <= 0x7fffffffl && argw >= -0x80000000l) { + high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff; + next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff; + if (high_short == next_high_short) return 1; } - if (arg == next_arg && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX)) + diff = argw - next_argw; + if (!(arg & REG_MASK)) + return diff <= SIMM_MAX && diff >= SIMM_MIN; + + if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN) return 1; return 0; +#endif } #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #define ADJUST_CACHED_IMM(imm) \ - if ((inst & ADDR_MODE2) && (imm & 0x3)) { \ + if ((inst & INT_ALIGNED) && (imm & 0x3)) { \ /* Adjust cached value. Fortunately this is really a rare case */ \ compiler->cache_argw += imm & 0x3; \ FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \ imm &= ~0x3; \ } -#else -#define ADJUST_CACHED_IMM(imm) #endif /* Emit the necessary instructions. See can_cache above. */ @@ -859,72 +935,125 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, { sljit_si tmp_r; sljit_ins inst; + sljit_sw high_short, next_high_short; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_sw diff; +#endif SLJIT_ASSERT(arg & SLJIT_MEM); tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1; /* Special case for "mov reg, [reg, ... ]". */ - if ((arg & 0xf) == tmp_r) + if ((arg & REG_MASK) == tmp_r) tmp_r = TMP_REG1; - if (!(arg & 0xf)) { - inst = data_transfer_insts[(inp_flags & ~WRITE_BACK) & MEM_MASK]; - if ((compiler->cache_arg & SLJIT_IMM) && (((sljit_uw)argw - (sljit_uw)compiler->cache_argw) <= SIMM_MAX || ((sljit_uw)compiler->cache_argw - (sljit_uw)argw) <= SIMM_MAX)) { - argw = argw - compiler->cache_argw; - ADJUST_CACHED_IMM(argw); - SLJIT_ASSERT(!(inst & UPDATE_REQ)); - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(argw)); - } - - if ((next_arg & SLJIT_MEM) && (argw - next_argw <= SIMM_MAX || next_argw - argw <= SIMM_MAX)) { - SLJIT_ASSERT(inp_flags & LOAD_DATA); - - compiler->cache_arg = SLJIT_IMM; - compiler->cache_argw = argw; - tmp_r = TMP_REG3; - } - - FAIL_IF(load_immediate(compiler, tmp_r, argw)); - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r)); - } - - if (SLJIT_UNLIKELY(arg & 0xf0)) { + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { argw &= 0x3; /* Otherwise getput_arg_fast would capture it. */ SLJIT_ASSERT(argw); - if ((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg && argw == compiler->cache_argw) + if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw) tmp_r = TMP_REG3; else { - if ((arg & 0xf0) == (next_arg & 0xf0) && argw == (next_argw & 0x3)) { - compiler->cache_arg = SLJIT_MEM | (arg & 0xf0); + if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) { + compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); compiler->cache_argw = argw; tmp_r = TMP_REG3; } #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - FAIL_IF(push_inst(compiler, RLWINM | S((arg >> 4) & 0xf) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1))); + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1))); #else - FAIL_IF(push_inst(compiler, RLDI(tmp_r, (arg >> 4) & 0xf, argw, 63 - argw, 1))); + FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1))); #endif } inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; - SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(tmp_r)); + SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ))); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r)); } + if (SLJIT_UNLIKELY(!(arg & REG_MASK))) + inp_flags &= ~WRITE_BACK; + inst = data_transfer_insts[inp_flags & MEM_MASK]; + SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (argw <= 0x7fff7fffl && argw >= -0x80000000l + && (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) { +#endif + + arg &= REG_MASK; + high_short = (sljit_si)(argw + ((argw & 0x8000) << 1)) & ~0xffff; + /* The getput_arg_fast should handle this otherwise. */ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l); +#else + SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ))); +#endif - if (compiler->cache_arg == arg && ((sljit_uw)argw - (sljit_uw)compiler->cache_argw <= SIMM_MAX || (sljit_uw)compiler->cache_argw - (sljit_uw)argw <= SIMM_MAX)) { - SLJIT_ASSERT(!(inp_flags & WRITE_BACK)); - argw = argw - compiler->cache_argw; - ADJUST_CACHED_IMM(argw); - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(argw)); + if (inp_flags & WRITE_BACK) { + if (arg == reg) { + FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg))); + reg = tmp_r; + } + tmp_r = arg; + FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16))); + } + else if (compiler->cache_arg != arg || high_short != compiler->cache_argw) { + if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) { + next_high_short = (sljit_si)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff; + if (high_short == next_high_short) { + compiler->cache_arg = SLJIT_IMM | arg; + compiler->cache_argw = next_high_short; + tmp_r = TMP_REG3; + } + } + FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16))); + } + else + tmp_r = TMP_REG3; + + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + } + + /* Everything else is PPC-64 only. */ + if (SLJIT_UNLIKELY(!(arg & REG_MASK))) { + diff = argw - compiler->cache_argw; + if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) { + ADJUST_CACHED_IMM(diff); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff)); + } + + diff = argw - next_argw; + if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + + compiler->cache_arg = SLJIT_IMM; + compiler->cache_argw = argw; + tmp_r = TMP_REG3; + } + + FAIL_IF(load_immediate(compiler, tmp_r, argw)); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r)); + } + + diff = argw - compiler->cache_argw; + if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) { + SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ)); + ADJUST_CACHED_IMM(diff); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff)); } - if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) { + if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) { inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; - SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(TMP_REG3)); + SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ))); + if (compiler->cache_argw != argw) { + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff))); + compiler->cache_argw = argw; + } + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3)); } if (argw == next_argw && (next_arg & SLJIT_MEM)) { @@ -935,14 +1064,15 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, compiler->cache_argw = argw; inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; - SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(TMP_REG3)); + SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ))); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3)); } - if (arg == next_arg && !(inp_flags & WRITE_BACK) && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX)) { + diff = argw - next_argw; + if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) { SLJIT_ASSERT(inp_flags & LOAD_DATA); FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & 0xf))); + FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK))); compiler->cache_arg = arg; compiler->cache_argw = argw; @@ -950,11 +1080,22 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3)); } + if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + + compiler->cache_arg = SLJIT_IMM; + compiler->cache_argw = argw; + tmp_r = TMP_REG3; + } + else + FAIL_IF(load_immediate(compiler, tmp_r, argw)); + /* Get the indexed version instead of the normal one. */ inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; - SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); - FAIL_IF(load_immediate(compiler, tmp_r, argw)); - return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & 0xf) | B(tmp_r)); + SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ))); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r)); +#endif } static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) @@ -990,7 +1131,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si i return SLJIT_SUCCESS; dst_r = TMP_REG2; } - else if (dst <= ZERO_REG) { + else if (FAST_IS_REG(dst)) { dst_r = dst; flags |= REG_DEST; if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) @@ -1009,7 +1150,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si i } /* Source 1. */ - if (src1 <= ZERO_REG) { + if (FAST_IS_REG(src1)) { src1_r = src1; flags |= REG1_SOURCE; } @@ -1025,7 +1166,7 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si i src1_r = 0; /* Source 2. */ - if (src2 <= ZERO_REG) { + if (FAST_IS_REG(src2)) { src2_r = src2; flags |= REG2_SOURCE; if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) @@ -1095,38 +1236,42 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si i SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) { +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_si int_op = op & SLJIT_INT_OP; +#endif + CHECK_ERROR(); check_sljit_emit_op0(compiler, op); - switch (GET_OPCODE(op)) { + op = GET_OPCODE(op); + switch (op) { case SLJIT_BREAKPOINT: case SLJIT_NOP: return push_inst(compiler, NOP); - break; case SLJIT_UMUL: case SLJIT_SMUL: FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1))); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, (GET_OPCODE(op) == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)); + return push_inst(compiler, (op == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)); #else FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, (GET_OPCODE(op) == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)); + return push_inst(compiler, (op == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)); #endif case SLJIT_UDIV: case SLJIT_SDIV: FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1))); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op & SLJIT_INT_OP) { - FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); + if (int_op) { + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1)); + } else { + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); } - FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1)); #else - FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1)); #endif @@ -1152,14 +1297,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler op = GET_OPCODE(op); if ((src & SLJIT_IMM) && srcw == 0) - src = ZERO_REG; + src = TMP_ZERO; if (op_flags & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, MTXER | S(ZERO_REG))); + FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); if (op_flags & SLJIT_INT_OP) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { - if (src <= ZERO_REG && src == dst) { + if (op < SLJIT_NOT) { + if (FAST_IS_REG(src) && src == dst) { if (!TYPE_CAST_NEEDED(op)) return SLJIT_SUCCESS; } @@ -1268,7 +1413,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #define TEST_SH_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= SLJIT_W(0x7fffffff) && (srcw) >= SLJIT_W(-0x80000000)) + (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l) #else #define TEST_SH_IMM(src, srcw) \ (((src) & SLJIT_IMM) && !((srcw) & 0xffff)) @@ -1279,7 +1424,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #define TEST_ADD_IMM(src, srcw) \ - (((src) & SLJIT_IMM) && (srcw) <= SLJIT_W(0x7fff7fff) && (srcw) >= SLJIT_W(-0x80000000)) + (((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l) #else #define TEST_ADD_IMM(src, srcw) \ ((src) & SLJIT_IMM) @@ -1307,9 +1452,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler ADJUST_LOCAL_OFFSET(src2, src2w); if ((src1 & SLJIT_IMM) && src1w == 0) - src1 = ZERO_REG; + src1 = TMP_ZERO; if ((src2 & SLJIT_IMM) && src2w == 0) - src2 = ZERO_REG; + src2 = TMP_ZERO; #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) if (op & SLJIT_INT_OP) { @@ -1324,7 +1469,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler } #endif if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, MTXER | S(ZERO_REG))); + FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); if (src2 == TMP_REG2) flags |= ALT_KEEP_CACHE; @@ -1392,7 +1537,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } } - if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) { + if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) { if (!(op & SLJIT_SET_U)) { /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ if (TEST_SL_IMM(src2, src2w)) { @@ -1418,7 +1563,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler } return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); } - if (!(op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O))) { + if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) { if (TEST_SL_IMM(src2, -src2w)) { compiler->imm = (-src2w) & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1529,8 +1674,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) { - /* Always available. */ +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ return 1; +#endif } #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6)) @@ -1550,12 +1699,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile compiler->cache_argw = 0; if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst > SLJIT_FLOAT_REG6) { + if (dst & SLJIT_MEM) { FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw)); dst = TMP_FREG1; } - if (src > SLJIT_FLOAT_REG6) { + if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0)); src = TMP_FREG2; } @@ -1563,9 +1712,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile return push_inst(compiler, FCMPU | CRD(4) | FA(dst) | FB(src)); } - dst_fr = (dst > SLJIT_FLOAT_REG6) ? TMP_FREG1 : dst; + dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1; - if (src > SLJIT_FLOAT_REG6) { + if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw)); src = dst_fr; } @@ -1605,9 +1754,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile compiler->cache_arg = 0; compiler->cache_argw = 0; - dst_fr = (dst > SLJIT_FLOAT_REG6) ? TMP_FREG2 : dst; + dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG2; - if (src1 > SLJIT_FLOAT_REG6) { + if (src1 & SLJIT_MEM) { if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { FAIL_IF(compiler->error); src1 = TMP_FREG1; @@ -1615,7 +1764,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile flags |= ALT_FORM1; } - if (src2 > SLJIT_FLOAT_REG6) { + if (src2 & SLJIT_MEM) { if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { FAIL_IF(compiler->error); src2 = TMP_FREG2; @@ -1684,7 +1833,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c if (dst == SLJIT_UNUSED) return SLJIT_SUCCESS; - if (dst <= ZERO_REG) + if (FAST_IS_REG(dst)) return push_inst(compiler, MFLR | D(dst)); /* Memory. */ @@ -1698,7 +1847,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * check_sljit_emit_fast_return(compiler, src, srcw); ADJUST_LOCAL_OFFSET(src, srcw); - if (src <= ZERO_REG) + if (FAST_IS_REG(src)) FAIL_IF(push_inst(compiler, MTLR | S(src))); else { if (src & SLJIT_MEM) @@ -1811,11 +1960,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile type &= 0xff; /* In PPC, we don't need to touch the arguments. */ - if (type >= SLJIT_JUMP) - jump->flags |= UNCOND_B; + if (type < SLJIT_JUMP) + jump->flags |= IS_COND; +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL0) + jump->flags |= IS_CALL; +#endif - PTR_FAIL_IF(emit_const(compiler, TMP_REG1, 0)); - PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_REG1))); + PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); + PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG))); jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0))); return jump; @@ -1830,20 +1983,32 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil check_sljit_emit_ijump(compiler, type, src, srcw); ADJUST_LOCAL_OFFSET(src, srcw); - if (src <= ZERO_REG) + if (FAST_IS_REG(src)) { +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL0) { + FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); + src_r = TMP_CALL_REG; + } + else + src_r = src; +#else src_r = src; - else if (src & SLJIT_IMM) { +#endif + } else if (src & SLJIT_IMM) { jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); - set_jump(jump, compiler, JUMP_ADDR | UNCOND_B); + set_jump(jump, compiler, JUMP_ADDR); jump->u.target = srcw; - - FAIL_IF(emit_const(compiler, TMP_REG2, 0)); - src_r = TMP_REG2; +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL0) + jump->flags |= IS_CALL; +#endif + FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); + src_r = TMP_CALL_REG; } else { - FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); - src_r = TMP_REG2; + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw)); + src_r = TMP_CALL_REG; } FAIL_IF(push_inst(compiler, MTCTR | S(src_r))); @@ -1867,6 +2032,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com { sljit_si reg, input_flags; sljit_si flags = GET_ALL_FLAGS(op); + sljit_sw original_dstw = dstw; CHECK_ERROR(); check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); @@ -1876,7 +2042,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com return SLJIT_SUCCESS; op = GET_OPCODE(op); - reg = (op < SLJIT_ADD && dst <= ZERO_REG) ? dst : TMP_REG2; + reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; compiler->cache_arg = 0; compiler->cache_argw = 0; @@ -1988,13 +2154,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com op = SLJIT_MOV; input_flags = WORD_DATA; #endif - return (reg == TMP_REG2) ? emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0) : SLJIT_SUCCESS; + if (reg != TMP_REG2) + return SLJIT_SUCCESS; + return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); } #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->skip_checks = 1; #endif - return sljit_emit_op2(compiler, op | flags, dst, dstw, src, srcw, TMP_REG2, 0); + return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) @@ -2010,7 +2178,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi PTR_FAIL_IF(!const_); set_const(const_, compiler); - reg = (dst <= ZERO_REG) ? dst : TMP_REG2; + reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; PTR_FAIL_IF(emit_const(compiler, reg, init_value)); |