diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2014-07-07 07:11:16 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2014-07-07 07:11:16 +0000 |
commit | 70a819e440bc8aa4d46d52ae0598396b79cb56dd (patch) | |
tree | 7756166aa21b67f709df30a814e95f6ae501d6c6 /sljit | |
parent | 0b076090dde9e8e3486815f03f20d2578486f704 (diff) | |
download | pcre-70a819e440bc8aa4d46d52ae0598396b79cb56dd.tar.gz |
Major JIT compiler update.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1491 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'sljit')
-rw-r--r-- | sljit/sljitConfigInternal.h | 109 | ||||
-rw-r--r-- | sljit/sljitLir.c | 249 | ||||
-rw-r--r-- | sljit/sljitLir.h | 276 | ||||
-rw-r--r-- | sljit/sljitNativeARM_32.c | 121 | ||||
-rw-r--r-- | sljit/sljitNativeARM_64.c | 135 | ||||
-rw-r--r-- | sljit/sljitNativeARM_T2_32.c | 120 | ||||
-rw-r--r-- | sljit/sljitNativeMIPS_32.c | 46 | ||||
-rw-r--r-- | sljit/sljitNativeMIPS_64.c | 46 | ||||
-rw-r--r-- | sljit/sljitNativeMIPS_common.c | 149 | ||||
-rw-r--r-- | sljit/sljitNativePPC_common.c | 175 | ||||
-rw-r--r-- | sljit/sljitNativeSPARC_32.c | 4 | ||||
-rw-r--r-- | sljit/sljitNativeSPARC_common.c | 68 | ||||
-rw-r--r-- | sljit/sljitNativeTILEGX_64.c | 22 | ||||
-rw-r--r-- | sljit/sljitNativeX86_32.c | 135 | ||||
-rw-r--r-- | sljit/sljitNativeX86_64.c | 256 | ||||
-rw-r--r-- | sljit/sljitNativeX86_common.c | 148 |
16 files changed, 1145 insertions, 914 deletions
diff --git a/sljit/sljitConfigInternal.h b/sljit/sljitConfigInternal.h index dc9963f..26a614d 100644 --- a/sljit/sljitConfigInternal.h +++ b/sljit/sljitConfigInternal.h @@ -28,31 +28,43 @@ #define _SLJIT_CONFIG_INTERNAL_H_ /* - SLJIT defines the following macros depending on the target architecture: - - Feature detection (boolean) macros: - SLJIT_32BIT_ARCHITECTURE : 32 bit architecture - SLJIT_64BIT_ARCHITECTURE : 64 bit architecture - SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index - SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing a double precision floating point array by index - SLJIT_SINGLE_SHIFT : the shift required to apply when accessing a single precision floating point array by index - SLJIT_LITTLE_ENDIAN : little endian architecture - SLJIT_BIG_ENDIAN : big endian architecture - SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) - SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information - SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address - - Types and useful macros: - sljit_sb, sljit_ub : signed and unsigned 8 bit byte - sljit_sh, sljit_uh : signed and unsigned 16 bit half-word (short) type - sljit_si, sljit_ui : signed and unsigned 32 bit integer type - sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer - sljit_p : unsgined pointer value (usually the same as sljit_uw, but - some 64 bit ABIs may use 32 bit pointers) - sljit_s : single precision floating point value - sljit_d : double precision floating point value - SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT - SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper) + SLJIT defines the following architecture dependent types and macros: + + Types: + sljit_sb, sljit_ub : signed and unsigned 8 bit byte + sljit_sh, sljit_uh : signed and unsigned 16 bit half-word (short) type + sljit_si, sljit_ui : signed and unsigned 32 bit integer type + sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer + sljit_p : unsgined pointer value (usually the same as sljit_uw, but + some 64 bit ABIs may use 32 bit pointers) + sljit_s : single precision floating point value + sljit_d : double precision floating point value + + Macros for feature detection (boolean): + SLJIT_32BIT_ARCHITECTURE : 32 bit architecture + SLJIT_64BIT_ARCHITECTURE : 64 bit architecture + SLJIT_LITTLE_ENDIAN : little endian architecture + SLJIT_BIG_ENDIAN : big endian architecture + SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) + SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information + + Constants: + SLJIT_NUMBER_OF_REGISTERS : number of available registers + SLJIT_NUMBER_OF_SCRATCH_REGISTERS : number of available scratch registers + SLJIT_NUMBER_OF_SAVED_REGISTERS : number of available saved registers + SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers + SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers + SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers + SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index + SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing + a double precision floating point array by index + SLJIT_SINGLE_SHIFT : the shift required to apply when accessing + a single precision floating point array by index + SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address + + Other macros: + SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT + SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper) */ #if !((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ @@ -156,6 +168,53 @@ #define SLJIT_CONFIG_SPARC 1 #endif +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#define SLJIT_NUMBER_OF_REGISTERS 10 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7 +#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#ifndef _WIN64 +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6 +#else +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#endif /* _WIN64 */ +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +#define SLJIT_NUMBER_OF_REGISTERS 11 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +#define SLJIT_NUMBER_OF_REGISTERS 11 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7 +#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +#define SLJIT_NUMBER_OF_REGISTERS 23 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) +#define SLJIT_NUMBER_OF_REGISTERS 22 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17 +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) +#define SLJIT_NUMBER_OF_REGISTERS 17 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) +#define SLJIT_NUMBER_OF_REGISTERS 18 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 14 +#elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#define SLJIT_NUMBER_OF_REGISTERS 0 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0 +#endif + +#define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \ + (SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS) + +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 6 +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && (defined _WIN64) +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 1 +#else +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 +#endif + +#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \ + (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS) + #if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) /* These libraries are needed for the macros below. */ diff --git a/sljit/sljitLir.c b/sljit/sljitLir.c index 9acaf10..29fc322 100644 --- a/sljit/sljitLir.c +++ b/sljit/sljitLir.c @@ -229,13 +229,25 @@ # define FCC_IS_SET (1 << 24) #endif +/* Stack management. */ + +#define GET_SAVED_REGISTERS_SIZE(scratches, saveds, extra) \ + (((scratches < SLJIT_NUMBER_OF_SCRATCH_REGISTERS ? 0 : (scratches - SLJIT_NUMBER_OF_SCRATCH_REGISTERS)) + \ + (saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? saveds : SLJIT_NUMBER_OF_SAVED_REGISTERS) + \ + extra) * sizeof(sljit_sw)) + #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -#define SLJIT_HAS_VARIABLE_LOCALS_OFFSET 1 -#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) -#define FIXED_LOCALS_OFFSET (3 * sizeof(sljit_sw)) -#endif +#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) +#define FIXED_LOCALS_OFFSET ((2 + 4) * sizeof(sljit_sw)) +#else +/* Maximum 3 arguments are passed on the stack. */ +#define FIXED_LOCALS_OFFSET ((3 + 4) * sizeof(sljit_sw)) #endif +#endif /* SLJIT_CONFIG_X86_32 */ + #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 #ifdef _WIN64 @@ -281,13 +293,13 @@ #if (defined SLJIT_HAS_VARIABLE_LOCALS_OFFSET && SLJIT_HAS_VARIABLE_LOCALS_OFFSET) #define ADJUST_LOCAL_OFFSET(p, i) \ - if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ + if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ (i) += compiler->locals_offset; #elif (defined SLJIT_HAS_FIXED_LOCALS_OFFSET && SLJIT_HAS_FIXED_LOCALS_OFFSET) #define ADJUST_LOCAL_OFFSET(p, i) \ - if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ + if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ (i) += FIXED_LOCALS_OFFSET; #else @@ -361,6 +373,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void) compiler->scratches = -1; compiler->saveds = -1; + compiler->fscratches = -1; + compiler->fsaveds = -1; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) compiler->args = -1; @@ -632,8 +646,15 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp #define FUNCTION_CHECK_IS_REG(r) \ ((r) == SLJIT_UNUSED || \ - ((r) >= SLJIT_SCRATCH_REG1 && (r) <= SLJIT_SCRATCH_REG1 - 1 + compiler->scratches) || \ - ((r) >= SLJIT_SAVED_REG1 && (r) <= SLJIT_SAVED_REG1 - 1 + compiler->saveds)) + ((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) || \ + ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0)) + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#define FUNCTION_ASSERT_IF_VIRTUAL(p) \ + SLJIT_ASSERT((p) < SLJIT_R3 || (p) > SLJIT_R6); +#else +#define FUNCTION_ASSERT_IF_VIRTUAL(p) +#endif #define FUNCTION_CHECK_SRC(p, i) \ SLJIT_ASSERT(compiler->scratches != -1 && compiler->saveds != -1); \ @@ -641,12 +662,14 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp SLJIT_ASSERT((i) == 0 && (p) != SLJIT_UNUSED); \ else if ((p) == SLJIT_IMM) \ ; \ - else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ + else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \ else if ((p) & SLJIT_MEM) { \ SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \ + FUNCTION_ASSERT_IF_VIRTUAL((p) & REG_MASK); \ if ((p) & OFFS_REG_MASK) { \ SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ + FUNCTION_ASSERT_IF_VIRTUAL(OFFS_REG(p)); \ SLJIT_ASSERT(!((i) & ~0x3)); \ } \ SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ @@ -658,12 +681,14 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp SLJIT_ASSERT(compiler->scratches != -1 && compiler->saveds != -1); \ if (FUNCTION_CHECK_IS_REG(p)) \ SLJIT_ASSERT((i) == 0); \ - else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ + else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \ else if ((p) & SLJIT_MEM) { \ SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \ + FUNCTION_ASSERT_IF_VIRTUAL((p) & REG_MASK); \ if ((p) & OFFS_REG_MASK) { \ SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ + FUNCTION_ASSERT_IF_VIRTUAL(OFFS_REG(p)); \ SLJIT_ASSERT(!((i) & ~0x3)); \ } \ SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ @@ -672,15 +697,19 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp SLJIT_ASSERT_STOP(); #define FUNCTION_FCHECK(p, i) \ - if ((p) >= SLJIT_FLOAT_REG1 && (p) <= SLJIT_FLOAT_REG6) \ + SLJIT_ASSERT(compiler->fscratches != -1 && compiler->fsaveds != -1); \ + if (((p) >= SLJIT_FR0 && (p) < (SLJIT_FR0 + compiler->fscratches)) || \ + ((p) > (SLJIT_FS0 - compiler->fsaveds) && (p) <= SLJIT_FS0)) \ SLJIT_ASSERT(i == 0); \ - else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ + else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \ else if ((p) & SLJIT_MEM) { \ SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \ + FUNCTION_ASSERT_IF_VIRTUAL((p) & REG_MASK); \ if ((p) & OFFS_REG_MASK) { \ SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ - SLJIT_ASSERT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_LOCALS_REG) && !(i & ~0x3)); \ + FUNCTION_ASSERT_IF_VIRTUAL(OFFS_REG(p)); \ + SLJIT_ASSERT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SP) && !(i & ~0x3)); \ } else \ SLJIT_ASSERT(OFFS_REG(p) == 0); \ SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ @@ -690,8 +719,8 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp #define FUNCTION_CHECK_OP1() \ if (GET_OPCODE(op) >= SLJIT_MOVU && GET_OPCODE(op) <= SLJIT_MOVU_P) { \ - SLJIT_ASSERT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_LOCALS_REG); \ - SLJIT_ASSERT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_LOCALS_REG); \ + SLJIT_ASSERT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_SP); \ + SLJIT_ASSERT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_SP); \ if ((src & SLJIT_MEM) && (src & REG_MASK)) \ SLJIT_ASSERT((dst & REG_MASK) != (src & REG_MASK) && OFFS_REG(dst) != (src & REG_MASK)); \ } @@ -705,17 +734,6 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp compiler->verbose = verbose; } -static char* reg_names[] = { - (char*)"unused", (char*)"s1", (char*)"s2", (char*)"s3", - (char*)"se1", (char*)"se2", (char*)"p1", (char*)"p2", - (char*)"p3", (char*)"pe1", (char*)"pe2", (char*)"lc" -}; - -static char* freg_names[] = { - (char*)"unused", (char*)"f1", (char*)"f2", (char*)"f3", - (char*)"f4", (char*)"f5", (char*)"f6" -}; - #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) #ifdef _WIN64 # define SLJIT_PRINT_D "I64" @@ -726,48 +744,62 @@ static char* freg_names[] = { # define SLJIT_PRINT_D "" #endif -#define sljit_verbose_param(p, i) \ +#define sljit_verbose_reg(compiler, r) \ + do { \ + if ((r) < (SLJIT_R0 + compiler->scratches)) \ + fprintf(compiler->verbose, "r%d", (r) - SLJIT_R0); \ + else \ + fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - (r)); \ + } while (0) + +#define sljit_verbose_param(compiler, p, i) \ if ((p) & SLJIT_IMM) \ fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i)); \ else if ((p) & SLJIT_MEM) { \ if ((p) & REG_MASK) { \ - if (i) { \ - if ((p) & OFFS_REG_MASK) \ - fprintf(compiler->verbose, "[%s + %s * %d]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)], 1 << (i)); \ - else \ - fprintf(compiler->verbose, "[%s + #%" SLJIT_PRINT_D "d]", reg_names[(p) & REG_MASK], (i)); \ - } \ - else { \ - if ((p) & OFFS_REG_MASK) \ - fprintf(compiler->verbose, "[%s + %s]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)]); \ - else \ - fprintf(compiler->verbose, "[%s]", reg_names[(p) & REG_MASK]); \ + fputc('[', compiler->verbose); \ + sljit_verbose_reg(compiler, (p) & REG_MASK); \ + if ((p) & OFFS_REG_MASK) { \ + fprintf(compiler->verbose, " + "); \ + sljit_verbose_reg(compiler, OFFS_REG(p)); \ + if (i) \ + fprintf(compiler->verbose, " * %d", 1 << (i)); \ } \ + else if (i) \ + fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); \ + fputc(']', compiler->verbose); \ } \ else \ fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \ - } else \ - fprintf(compiler->verbose, "%s", reg_names[p]); -#define sljit_verbose_fparam(p, i) \ + } else if (p) \ + sljit_verbose_reg(compiler, p); \ + else \ + fprintf(compiler->verbose, "unused"); + +#define sljit_verbose_fparam(compiler, p, i) \ if ((p) & SLJIT_MEM) { \ if ((p) & REG_MASK) { \ - if (i) { \ - if ((p) & OFFS_REG_MASK) \ - fprintf(compiler->verbose, "[%s + %s * %d]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)], 1 << (i)); \ - else \ - fprintf(compiler->verbose, "[%s + #%" SLJIT_PRINT_D "d]", reg_names[(p) & REG_MASK], (i)); \ - } \ - else { \ - if ((p) & OFFS_REG_MASK) \ - fprintf(compiler->verbose, "[%s + %s]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)]); \ - else \ - fprintf(compiler->verbose, "[%s]", reg_names[(p) & REG_MASK]); \ + fputc('[', compiler->verbose); \ + sljit_verbose_reg(compiler, (p) & REG_MASK); \ + if ((p) & OFFS_REG_MASK) { \ + fprintf(compiler->verbose, " + "); \ + sljit_verbose_reg(compiler, OFFS_REG(p)); \ + if (i) \ + fprintf(compiler->verbose, "%d", 1 << (i)); \ } \ + else if (i) \ + fprintf(compiler->verbose, "%" SLJIT_PRINT_D "d", (i)); \ + fputc(']', compiler->verbose); \ } \ else \ fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \ - } else \ - fprintf(compiler->verbose, "%s", freg_names[p]); + } \ + else { \ + if ((p) < (SLJIT_FR0 + compiler->fscratches)) \ + fprintf(compiler->verbose, "fr%d", (p) - SLJIT_FR0); \ + else \ + fprintf(compiler->verbose, "fs%d", SLJIT_NUMBER_OF_FLOAT_REGISTERS - (p)); \ + } static SLJIT_CONST char* op_names[] = { /* op0 */ @@ -832,7 +864,9 @@ static SLJIT_INLINE void check_sljit_generate_code(struct sljit_compiler *compil #endif } -static SLJIT_INLINE void check_sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +static SLJIT_INLINE void check_sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { /* If debug and verbose are disabled, all arguments are unused. */ SLJIT_UNUSED_ARG(compiler); @@ -842,17 +876,24 @@ static SLJIT_INLINE void check_sljit_emit_enter(struct sljit_compiler *compiler, SLJIT_UNUSED_ARG(local_size); SLJIT_ASSERT(args >= 0 && args <= 3); - SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NO_TMP_REGISTERS); - SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS); + SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); + SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); + SLJIT_ASSERT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); SLJIT_ASSERT(args <= saveds); + SLJIT_ASSERT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + SLJIT_ASSERT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + SLJIT_ASSERT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " enter args=%d scratches=%d saveds=%d local_size=%d\n", args, scratches, saveds, local_size); + fprintf(compiler->verbose, " enter args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + args, scratches, saveds, fscratches, fsaveds, local_size); #endif } -static SLJIT_INLINE void check_sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +static SLJIT_INLINE void check_sljit_set_context(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { /* If debug and verbose are disabled, all arguments are unused. */ SLJIT_UNUSED_ARG(compiler); @@ -869,13 +910,18 @@ static SLJIT_INLINE void check_sljit_set_context(struct sljit_compiler *compiler #endif SLJIT_ASSERT(args >= 0 && args <= 3); - SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NO_TMP_REGISTERS); - SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS); + SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); + SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); + SLJIT_ASSERT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); SLJIT_ASSERT(args <= saveds); + SLJIT_ASSERT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + SLJIT_ASSERT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + SLJIT_ASSERT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " set_context args=%d scratches=%d saveds=%d local_size=%d\n", args, scratches, saveds, local_size); + fprintf(compiler->verbose, " set_context args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + args, scratches, saveds, fscratches, fsaveds, local_size); #endif } @@ -901,7 +947,7 @@ static SLJIT_INLINE void check_sljit_emit_return(struct sljit_compiler *compiler fprintf(compiler->verbose, " return\n"); else { fprintf(compiler->verbose, " return %s ", op_names[op]); - sljit_verbose_param(src, srcw); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } } @@ -921,7 +967,7 @@ static SLJIT_INLINE void check_sljit_emit_fast_enter(struct sljit_compiler *comp #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " fast_enter "); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, "\n"); } #endif @@ -940,7 +986,7 @@ static SLJIT_INLINE void check_sljit_emit_fast_return(struct sljit_compiler *com #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " fast_return "); - sljit_verbose_param(src, srcw); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } #endif @@ -954,6 +1000,7 @@ static SLJIT_INLINE void check_sljit_emit_op0(struct sljit_compiler *compiler, s SLJIT_ASSERT((op >= SLJIT_BREAKPOINT && op <= SLJIT_SMUL) || ((op & ~SLJIT_INT_OP) >= SLJIT_UDIV && (op & ~SLJIT_INT_OP) <= SLJIT_SDIV)); + SLJIT_ASSERT(op < SLJIT_UMUL || compiler->scratches >= 2); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) fprintf(compiler->verbose, " %s%s\n", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)]); @@ -991,9 +1038,9 @@ static SLJIT_INLINE void check_sljit_emit_op1(struct sljit_compiler *compiler, s fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)], !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s", !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); - sljit_verbose_param(src, srcw); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } #endif @@ -1033,11 +1080,11 @@ static SLJIT_INLINE void check_sljit_emit_op2(struct sljit_compiler *compiler, s fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)], !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s", !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); - sljit_verbose_param(src1, src1w); + sljit_verbose_param(compiler, src1, src1w); fprintf(compiler->verbose, ", "); - sljit_verbose_param(src2, src2w); + sljit_verbose_param(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } #endif @@ -1046,13 +1093,13 @@ static SLJIT_INLINE void check_sljit_emit_op2(struct sljit_compiler *compiler, s static SLJIT_INLINE void check_sljit_get_register_index(sljit_si reg) { SLJIT_UNUSED_ARG(reg); - SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_REGISTERS); + SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS); } static SLJIT_INLINE void check_sljit_get_float_register_index(sljit_si reg) { SLJIT_UNUSED_ARG(reg); - SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_FLOAT_REGISTERS); + SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); } static SLJIT_INLINE void check_sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -1121,9 +1168,9 @@ static SLJIT_INLINE void check_sljit_emit_fop1(struct sljit_compiler *compiler, (GET_OPCODE(op) == SLJIT_CONVD_FROMS) ? ((op & SLJIT_SINGLE_OP) ? "s.fromd" : "d.froms") : ((op & SLJIT_SINGLE_OP) ? "s" : "d")); - sljit_verbose_fparam(dst, dstw); + sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src, srcw); + sljit_verbose_fparam(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } #endif @@ -1158,9 +1205,9 @@ static SLJIT_INLINE void check_sljit_emit_fop1_cmp(struct sljit_compiler *compil if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s%s%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d", !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_S) ? "" : ".s"); - sljit_verbose_fparam(src1, src1w); + sljit_verbose_fparam(compiler, src1, src1w); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src2, src2w); + sljit_verbose_fparam(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } #endif @@ -1196,9 +1243,9 @@ static SLJIT_INLINE void check_sljit_emit_fop1_convw_fromd(struct sljit_compiler fprintf(compiler->verbose, " %s%s.from%s ", op_names[GET_OPCODE(op)], (GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? "i" : "w", (op & SLJIT_SINGLE_OP) ? "s" : "d"); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src, srcw); + sljit_verbose_fparam(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } #endif @@ -1234,9 +1281,9 @@ static SLJIT_INLINE void check_sljit_emit_fop1_convd_fromw(struct sljit_compiler fprintf(compiler->verbose, " %s%s.from%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d", (GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? "i" : "w"); - sljit_verbose_fparam(dst, dstw); + sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); - sljit_verbose_param(src, srcw); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } #endif @@ -1268,11 +1315,11 @@ static SLJIT_INLINE void check_sljit_emit_fop2(struct sljit_compiler *compiler, #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d"); - sljit_verbose_fparam(dst, dstw); + sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src1, src1w); + sljit_verbose_fparam(compiler, src1, src1w); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src2, src2w); + sljit_verbose_fparam(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } #endif @@ -1330,9 +1377,9 @@ static SLJIT_INLINE void check_sljit_emit_cmp(struct sljit_compiler *compiler, s #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %scmp%s.%s ", !(type & SLJIT_INT_OP) ? "" : "i", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); - sljit_verbose_param(src1, src1w); + sljit_verbose_param(compiler, src1, src1w); fprintf(compiler->verbose, ", "); - sljit_verbose_param(src2, src2w); + sljit_verbose_param(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } #endif @@ -1360,9 +1407,9 @@ static SLJIT_INLINE void check_sljit_emit_fcmp(struct sljit_compiler *compiler, if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %scmp%s.%s ", (type & SLJIT_SINGLE_OP) ? "s" : "d", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); - sljit_verbose_fparam(src1, src1w); + sljit_verbose_fparam(compiler, src1, src1w); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src2, src2w); + sljit_verbose_fparam(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } #endif @@ -1390,7 +1437,7 @@ static SLJIT_INLINE void check_sljit_emit_ijump(struct sljit_compiler *compiler, #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " ijump.%s ", jump_names[type]); - sljit_verbose_param(src, srcw); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } #endif @@ -1427,10 +1474,10 @@ static SLJIT_INLINE void check_sljit_emit_op_flags(struct sljit_compiler *compil if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %sflags.%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)], !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); if (src != SLJIT_UNUSED) { fprintf(compiler->verbose, ", "); - sljit_verbose_param(src, srcw); + sljit_verbose_param(compiler, src, srcw); } fprintf(compiler->verbose, ", %s\n", jump_names[type]); } @@ -1450,7 +1497,7 @@ static SLJIT_INLINE void check_sljit_get_local_base(struct sljit_compiler *compi #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " local_base "); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", offset); } #endif @@ -1470,7 +1517,7 @@ static SLJIT_INLINE void check_sljit_emit_const(struct sljit_compiler *compiler, #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " const "); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", init_value); } #endif @@ -1658,13 +1705,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *co CHECK_ERROR(); check_sljit_get_local_base(compiler, dst, dstw, offset); - ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset); + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->skip_checks = 1; #endif if (offset != 0) - return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset); - return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_LOCALS_REG, 0); + return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0); } #endif @@ -1720,23 +1767,31 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) SLJIT_ASSERT_STOP(); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(scratches); SLJIT_UNUSED_ARG(saveds); + SLJIT_UNUSED_ARG(fscratches); + SLJIT_UNUSED_ARG(fsaveds); SLJIT_UNUSED_ARG(local_size); SLJIT_ASSERT_STOP(); return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(scratches); SLJIT_UNUSED_ARG(saveds); + SLJIT_UNUSED_ARG(fscratches); + SLJIT_UNUSED_ARG(fsaveds); SLJIT_UNUSED_ARG(local_size); SLJIT_ASSERT_STOP(); } diff --git a/sljit/sljitLir.h b/sljit/sljitLir.h index 8cc6c30..1ca4599 100644 --- a/sljit/sljitLir.h +++ b/sljit/sljitLir.h @@ -56,8 +56,6 @@ Disadvantages: - No automatic register allocation, and temporary results are not stored on the stack. (hence the name comes) - - Limited number of registers (only 6+4 integer registers, max 3+2 - scratch, max 3+2 saved and 6 floating point registers) In practice: - This approach is very effective for interpreters - One of the saved registers typically points to a stack interface @@ -104,71 +102,155 @@ of sljitConfigInternal.h */ /* Registers */ /* --------------------------------------------------------------------- */ +/* + Scratch (R) registers: registers whose may not preserve their values + across function calls. + + Saved (S) registers: registers whose preserve their values across + function calls. + + The scratch and saved register sets are overlap. The last scratch register + is the first saved register, the one before the last is the second saved + register, and so on. + + If an architecture provides two scratch and three saved registers, + its scratch and saved register sets are the following: + + R0 | [S4] | R0 and S4 represent the same physical register + R1 | [S3] | R1 and S3 represent the same physical register + [R2] | S2 | R2 and S2 represent the same physical register + [R3] | S1 | R3 and S1 represent the same physical register + [R4] | S0 | R4 and S0 represent the same physical register + + Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS would be 2 and + SLJIT_NUMBER_OF_SAVED_REGISTERS would be 3 for this architecture. + + Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 10 + and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 5. However, 4 registers + are virtual on x86-32. See below. + + The purpose of this definition is convenience. Although a register + is either scratch register or saved register, SLJIT allows accessing + them from the other set. For example, four registers can be used as + scratch registers and the fifth one as saved register on the architecture + above. Of course the last two scratch registers (R2 and R3) from this + four will be saved on the stack, because they are defined as saved + registers in the application binary interface. Still R2 and R3 can be + used for referencing to these registers instead of S2 and S1, which + makes easier to write platform independent code. Scratch registers + can be saved registers in a similar way, but these extra saved + registers will not be preserved across function calls! Hence the + application must save them on those platforms, where the number of + saved registers is too low. This can be done by copy them onto + the stack and restore them after a function call. + + Note: To emphasize that registers assigned to R2-R4 are saved + registers, they are enclosed by square brackets. S3-S4 + are marked in a similar way. + + Note: sljit_emit_enter and sljit_set_context defines whether a register + is S or R register. E.g: when 3 scratches and 1 saved is mapped + by sljit_emit_enter, the allowed register set will be: R0-R2 and + S0. Although S2 is mapped to the same position as R2, it does not + available in the current configuration. Furthermore the R3 (S1) + register does not available as well. +*/ + +/* When SLJIT_UNUSED is specified as destination, the result is discarded. */ #define SLJIT_UNUSED 0 -/* Scratch (temporary) registers whose may not preserve their values - across function calls. */ -#define SLJIT_SCRATCH_REG1 1 -#define SLJIT_SCRATCH_REG2 2 -#define SLJIT_SCRATCH_REG3 3 -/* Note: extra registers cannot be used for memory addressing. */ -/* Note: on x86-32, these registers are emulated (using stack - loads & stores). */ -#define SLJIT_SCRATCH_EREG1 4 -#define SLJIT_SCRATCH_EREG2 5 - -/* Saved registers whose preserve their values across function calls. */ -#define SLJIT_SAVED_REG1 6 -#define SLJIT_SAVED_REG2 7 -#define SLJIT_SAVED_REG3 8 -/* Note: extra registers cannot be used for memory addressing. */ -/* Note: on x86-32, these registers are emulated (using stack - loads & stores). */ -#define SLJIT_SAVED_EREG1 9 -#define SLJIT_SAVED_EREG2 10 - -/* Read-only register (cannot be the destination of an operation). - Only SLJIT_MEM1(SLJIT_LOCALS_REG) addressing mode is allowed since - several ABIs has certain limitations about the stack layout. However - sljit_get_local_base() can be used to obtain the offset of a value - on the stack. */ -#define SLJIT_LOCALS_REG 11 - -/* Number of registers. */ -#define SLJIT_NO_TMP_REGISTERS 5 -#define SLJIT_NO_GEN_REGISTERS 5 -#define SLJIT_NO_REGISTERS 11 +/* Scratch registers. */ +#define SLJIT_R0 1 +#define SLJIT_R1 2 +#define SLJIT_R2 3 +/* Note: on x86-32, R3 - R6 are emulated (using stack loads & stores), + so they cannot be used for memory addressing. There is no such + limitation on other CPUs. */ +#define SLJIT_R3 4 +#define SLJIT_R4 5 +#define SLJIT_R5 6 +#define SLJIT_R6 7 +#define SLJIT_R7 8 +#define SLJIT_R8 9 +#define SLJIT_R9 10 +/* All R registers provided by the architecture can be accessed by SLJIT_R(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_REGISTERS. */ +#define SLJIT_R(i) (1 + (i)) + +/* Saved registers. */ +#define SLJIT_S0 (SLJIT_NUMBER_OF_REGISTERS) +#define SLJIT_S1 (SLJIT_NUMBER_OF_REGISTERS - 1) +#define SLJIT_S2 (SLJIT_NUMBER_OF_REGISTERS - 2) +/* Note: on x86-32, S3 - S6 are emulated (using stack loads & stores), + so they cannot be used for memory addressing. There is no such + limitation on other CPUs. */ +#define SLJIT_S3 (SLJIT_NUMBER_OF_REGISTERS - 3) +#define SLJIT_S4 (SLJIT_NUMBER_OF_REGISTERS - 4) +#define SLJIT_S5 (SLJIT_NUMBER_OF_REGISTERS - 5) +#define SLJIT_S6 (SLJIT_NUMBER_OF_REGISTERS - 6) +#define SLJIT_S7 (SLJIT_NUMBER_OF_REGISTERS - 7) +#define SLJIT_S8 (SLJIT_NUMBER_OF_REGISTERS - 8) +#define SLJIT_S9 (SLJIT_NUMBER_OF_REGISTERS - 9) +/* All S registers provided by the architecture can be accessed by SLJIT_S(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_REGISTERS. */ +#define SLJIT_S(i) (SLJIT_NUMBER_OF_REGISTERS - (i)) + +/* Registers >= SLJIT_FIRST_SAVED_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_REG (SLJIT_S0 - SLJIT_NUMBER_OF_SAVED_REGISTERS + 1) + +/* The SLJIT_SP provides direct access to the linear stack space allocated by + sljit_emit_enter. It can only be used in the following form: SLJIT_MEM1(SLJIT_SP). + The immediate offset is extended by the relative stack offset automatically. + The sljit_get_local_base can be used to obtain the absolute offset. */ +#define SLJIT_SP (SLJIT_NUMBER_OF_REGISTERS + 1) /* Return with machine word. */ -#define SLJIT_RETURN_REG SLJIT_SCRATCH_REG1 +#define SLJIT_RETURN_REG SLJIT_R0 /* x86 prefers specific registers for special purposes. In case of shift - by register it supports only SLJIT_SCRATCH_REG3 for shift argument + by register it supports only SLJIT_R2 for shift argument (which is the src2 argument of sljit_emit_op2). If another register is used, sljit must exchange data between registers which cause a minor slowdown. Other architectures has no such limitation. */ -#define SLJIT_PREF_SHIFT_REG SLJIT_SCRATCH_REG3 +#define SLJIT_PREF_SHIFT_REG SLJIT_R2 /* --------------------------------------------------------------------- */ /* Floating point registers */ /* --------------------------------------------------------------------- */ -/* Note: SLJIT_UNUSED as destination is not valid for floating point - operations, since they cannot be used for setting flags. */ - -/* Floating point operations are performed on double or - single precision values. */ +/* Each floating point register can store a double or single precision + value. The FR and FS register sets are overlap in the same way as R + and S register sets. See above. */ -#define SLJIT_FLOAT_REG1 1 -#define SLJIT_FLOAT_REG2 2 -#define SLJIT_FLOAT_REG3 3 -#define SLJIT_FLOAT_REG4 4 -#define SLJIT_FLOAT_REG5 5 -#define SLJIT_FLOAT_REG6 6 - -#define SLJIT_NO_FLOAT_REGISTERS 6 +/* Note: SLJIT_UNUSED as destination is not valid for floating point + operations, since they cannot be used for setting flags. */ + +/* Floating point scratch registers. */ +#define SLJIT_FR0 1 +#define SLJIT_FR1 2 +#define SLJIT_FR2 3 +#define SLJIT_FR3 4 +#define SLJIT_FR4 5 +#define SLJIT_FR5 6 +/* All FR registers provided by the architecture can be accessed by SLJIT_FR(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_FLOAT_REGISTERS. */ +#define SLJIT_FR(i) (1 + (i)) + +/* Floating point saved registers. */ +#define SLJIT_FS0 (SLJIT_NUMBER_OF_FLOAT_REGISTERS) +#define SLJIT_FS1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1) +#define SLJIT_FS2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2) +#define SLJIT_FS3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 3) +#define SLJIT_FS4 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 4) +#define SLJIT_FS5 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 5) +/* All S registers provided by the architecture can be accessed by SLJIT_FS(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */ +#define SLJIT_FS(i) (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i)) + +/* Float registers >= SLJIT_FIRST_SAVED_FLOAT_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_FLOAT_REG (SLJIT_FS0 - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + 1) /* --------------------------------------------------------------------- */ /* Main structures and functions */ @@ -216,10 +298,14 @@ struct sljit_compiler { struct sljit_memory_fragment *buf; struct sljit_memory_fragment *abuf; - /* Used local registers. */ + /* Used scratch registers. */ sljit_si scratches; /* Used saved registers. */ sljit_si saveds; + /* Used float scratch registers. */ + sljit_si fscratches; + /* Used float saved registers. */ + sljit_si fsaveds; /* Local stack size. */ sljit_si local_size; /* Code size. */ @@ -229,9 +315,6 @@ struct sljit_compiler { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) sljit_si args; - sljit_si locals_offset; - sljit_si scratches_start; - sljit_si saveds_start; #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -361,46 +444,61 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler error, they return with SLJIT_SUCCESS. */ /* - The executable code is basically a function call from the viewpoint of - the C language. The function calls must obey to the ABI (Application - Binary Interface) of the platform, which specify the purpose of machine - registers and stack handling among other things. The sljit_emit_enter - function emits the necessary instructions for setting up a new context - for the executable code and moves function arguments to the saved - registers. The number of arguments are specified in the "args" - parameter and the first argument goes to SLJIT_SAVED_REG1, the second - goes to SLJIT_SAVED_REG2 and so on. The number of scratch and - saved registers are passed in "scratches" and "saveds" arguments - respectively. Since the saved registers contains the arguments, - "args" must be less or equal than "saveds". The sljit_emit_enter - is also capable of allocating a stack space for local variables. The - "local_size" argument contains the size in bytes of this local area - and its staring address is stored in SLJIT_LOCALS_REG. However - the SLJIT_LOCALS_REG is not necessary the machine stack pointer. - The memory bytes between SLJIT_LOCALS_REG (inclusive) and - SLJIT_LOCALS_REG + local_size (exclusive) can be modified freely - until the function returns. The stack space is uninitialized. + The executable code is a function call from the viewpoint of the C + language. The function calls must obey to the ABI (Application + Binary Interface) of the platform, which specify the purpose of + all machine registers and stack handling among other things. The + sljit_emit_enter function emits the necessary instructions for + setting up a new context for the executable code and moves function + arguments to the saved registers. The number of sljit_sw arguments + passed to the function are specified in the "args" parameter. The + number of arguments must be less than or equal to 3. The first + argument goes to SLJIT_S0, the second goes to SLJIT_S1 and so on. + The register set used by the function must be declared as well. + The number of scratch and saved registers used by the function must + be passed to sljit_emit_enter. Only R registers between R0 and + "scratches" argument can be used later. E.g. if "scratches" is set + to 2, the register set will be limited to R0 and R1. The S registers + and the floating point registers ("fscratches" and "fsaveds") + are specified in a similar way. The sljit_emit_enter is also capable + of allocating a stack space for local variables. The "local_size" + argument contains the size in bytes of this local area and its + staring address is stored in SLJIT_SP. The memory area between + SLJIT_SP (inclusive) and SLJIT_SP + local_size (exclusive) can be + modified freely until the function returns. The stack space is not + initialized. + + Note: the following conditions must met: + 0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS + 0 <= saveds <= SLJIT_NUMBER_OF_REGISTERS + scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS + 0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + 0 <= fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS Note: every call of sljit_emit_enter and sljit_set_context - overwrites the previous context. */ + overwrites the previous context. +*/ #define SLJIT_MAX_LOCAL_SIZE 65536 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size); + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size); /* The machine code has a context (which contains the local stack space size, number of used registers, etc.) which initialized by sljit_emit_enter. Several functions (like sljit_emit_return) requres this context to be able to generate the appropriate code. However, some code fragments (like inline cache) may have - no normal entry point so their context is unknown for the compiler. Using the - function below we can specify their context. + no normal entry point so their context is unknown for the compiler. Their context + can be provided to the compiler by the sljit_set_context function. Note: every call of sljit_emit_enter and sljit_set_context overwrites the previous context. */ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, - sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size); + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size); /* Return from machine code. The op argument can be SLJIT_UNUSED which means the function does not return with anything or any opcode between SLJIT_MOV and @@ -558,23 +656,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * it can even decrease the runtime in a few cases. */ #define SLJIT_NOP 1 /* Flags: - (may destroy flags) - Unsigned multiplication of SLJIT_SCRATCH_REG1 and SLJIT_SCRATCH_REG2. - Result goes to SLJIT_SCRATCH_REG2:SLJIT_SCRATCH_REG1 (high:low) word */ + Unsigned multiplication of SLJIT_R0 and SLJIT_R1. + Result goes to SLJIT_R1:SLJIT_R0 (high:low) word */ #define SLJIT_UMUL 2 /* Flags: - (may destroy flags) - Signed multiplication of SLJIT_SCRATCH_REG1 and SLJIT_SCRATCH_REG2. - Result goes to SLJIT_SCRATCH_REG2:SLJIT_SCRATCH_REG1 (high:low) word */ + Signed multiplication of SLJIT_R0 and SLJIT_R1. + Result goes to SLJIT_R1:SLJIT_R0 (high:low) word */ #define SLJIT_SMUL 3 /* Flags: I - (may destroy flags) - Unsigned divide of the value in SLJIT_SCRATCH_REG1 by the value in SLJIT_SCRATCH_REG2. - The result is placed in SLJIT_SCRATCH_REG1 and the remainder goes to SLJIT_SCRATCH_REG2. - Note: if SLJIT_SCRATCH_REG2 contains 0, the behaviour is undefined. */ + Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed in SLJIT_R0 and the remainder goes to SLJIT_R1. + Note: if SLJIT_R1 contains 0, the behaviour is undefined. */ #define SLJIT_UDIV 4 #define SLJIT_IUDIV (SLJIT_UDIV | SLJIT_INT_OP) /* Flags: I - (may destroy flags) - Signed divide of the value in SLJIT_SCRATCH_REG1 by the value in SLJIT_SCRATCH_REG2. - The result is placed in SLJIT_SCRATCH_REG1 and the remainder goes to SLJIT_SCRATCH_REG2. - Note: if SLJIT_SCRATCH_REG2 contains 0, the behaviour is undefined. */ + Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed in SLJIT_R0 and the remainder goes to SLJIT_R1. + Note: if SLJIT_R1 contains 0, the behaviour is undefined. */ #define SLJIT_SDIV 5 #define SLJIT_ISDIV (SLJIT_SDIV | SLJIT_INT_OP) @@ -906,7 +1004,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com sljit_si src, sljit_sw srcw, sljit_si type); -/* Copies the base address of SLJIT_LOCALS_REG+offset to dst. +/* Copies the base address of SLJIT_SP + offset to dst. Flags: - (never set any flags) */ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset); diff --git a/sljit/sljitNativeARM_32.c b/sljit/sljitNativeARM_32.c index 99601f9..0c44664 100644 --- a/sljit/sljitNativeARM_32.c +++ b/sljit/sljitNativeARM_32.c @@ -36,13 +36,13 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) } /* Last register + 1. */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_PC (SLJIT_NO_REGISTERS + 4) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5) #define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) /* In ARM instruction words. Cache lines are usually 32 byte aligned. */ @@ -55,8 +55,8 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) (((max_diff) / (sljit_si)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { - 0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { + 0, 0, 1, 2, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 12, 14, 15 }; #define RM(rm) (reg_map[rm]) @@ -823,16 +823,20 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si i sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w); -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si size; + sljit_si size, i, tmp; sljit_uw push; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif @@ -840,67 +844,57 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil /* Push saved registers, temporary registers stmdb sp!, {..., lr} */ push = PUSH | (1 << 14); - if (scratches >= 5) - push |= 1 << 11; - if (scratches >= 4) - push |= 1 << 10; - if (saveds >= 5) - push |= 1 << 8; - if (saveds >= 4) - push |= 1 << 7; - if (saveds >= 3) - push |= 1 << 6; - if (saveds >= 2) - push |= 1 << 5; - if (saveds >= 1) - push |= 1 << 4; + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + push |= 1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + push |= 1 << reg_map[i]; + FAIL_IF(push_inst(compiler, push)); /* Stack must be aligned to 8 bytes: */ - size = (1 + saveds) * sizeof(sljit_uw); - if (scratches >= 4) - size += (scratches - 3) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + local_size = ((size + local_size + 7) & ~7) - size; compiler->local_size = local_size; if (local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size)); + FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); if (args >= 1) - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG1)))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S0, SLJIT_UNUSED, RM(SLJIT_R0)))); if (args >= 2) - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG2, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2)))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S1, SLJIT_UNUSED, RM(SLJIT_R1)))); if (args >= 3) - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG3, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG3)))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S2, SLJIT_UNUSED, RM(SLJIT_R2)))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_si size; CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - size = (1 + saveds) * sizeof(sljit_uw); - if (scratches >= 4) - size += (scratches - 3) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; - compiler->local_size = local_size; + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + compiler->local_size = ((size + local_size + 7) & ~7) - size; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { + sljit_si i, tmp; sljit_uw pop; CHECK_ERROR(); @@ -909,25 +903,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); if (compiler->local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); + FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); - pop = POP | (1 << 15); /* Push saved registers, temporary registers ldmia sp!, {..., pc} */ - if (compiler->scratches >= 5) - pop |= 1 << 11; - if (compiler->scratches >= 4) - pop |= 1 << 10; - if (compiler->saveds >= 5) - pop |= 1 << 8; - if (compiler->saveds >= 4) - pop |= 1 << 7; - if (compiler->saveds >= 3) - pop |= 1 << 6; - if (compiler->saveds >= 2) - pop |= 1 << 5; - if (compiler->saveds >= 1) - pop |= 1 << 4; + pop = POP | (1 << 15); + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + pop |= 1 << reg_map[i]; + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + pop |= 1 << reg_map[i]; return push_inst(compiler, pop); } @@ -1845,16 +1832,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler case SLJIT_SMUL: #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) - | (reg_map[SLJIT_SCRATCH_REG2] << 16) - | (reg_map[SLJIT_SCRATCH_REG1] << 12) - | (reg_map[SLJIT_SCRATCH_REG1] << 8) - | reg_map[SLJIT_SCRATCH_REG2]); + | (reg_map[SLJIT_R1] << 16) + | (reg_map[SLJIT_R0] << 12) + | (reg_map[SLJIT_R0] << 8) + | reg_map[SLJIT_R1]); #else - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2)))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_R1)))); return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) - | (reg_map[SLJIT_SCRATCH_REG2] << 16) - | (reg_map[SLJIT_SCRATCH_REG1] << 12) - | (reg_map[SLJIT_SCRATCH_REG1] << 8) + | (reg_map[SLJIT_R1] << 16) + | (reg_map[SLJIT_R0] << 12) + | (reg_map[SLJIT_R0] << 8) | reg_map[TMP_REG1]); #endif case SLJIT_UDIV: diff --git a/sljit/sljitNativeARM_64.c b/sljit/sljitNativeARM_64.c index 31c2876..cad4c2e 100644 --- a/sljit/sljitNativeARM_64.c +++ b/sljit/sljitNativeARM_64.c @@ -34,18 +34,18 @@ typedef sljit_ui sljit_ins; #define TMP_ZERO 0 -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_REG4 (SLJIT_NO_REGISTERS + 4) -#define TMP_LR (SLJIT_NO_REGISTERS + 5) -#define TMP_SP (SLJIT_NO_REGISTERS + 6) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_REG4 (SLJIT_NUMBER_OF_REGISTERS + 5) +#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 6) +#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 7) #define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = { - 31, 0, 1, 2, 3, 4, 19, 20, 21, 22, 23, 29, 9, 10, 11, 12, 30, 31 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { + 31, 0, 1, 2, 3, 4, 5, 6, 7, 13, 14, 15, 16, 17, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 12, 30, 31 }; #define W_OP (1 << 31) @@ -1061,17 +1061,23 @@ static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit /* Entry, exit */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { + sljit_si i, tmp, offs, prev; + CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - compiler->locals_offset = (2 + saveds) * sizeof(sljit_sw); + compiler->locals_offset = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); local_size = (compiler->locals_offset + local_size + 15) & ~15; compiler->local_size = local_size; @@ -1089,64 +1095,97 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) | RN(TMP_SP) | (0x40 << 15))); } - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_LOCALS_REG) | RN(TMP_SP))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP))); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + offs = 2 << 15; + prev = -1; + for (i = SLJIT_S0; i >= tmp; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } - if (saveds >= 2) - FAIL_IF(push_inst(compiler, STP | RT(SLJIT_SAVED_REG1) | RT2(SLJIT_SAVED_REG2) | RN(TMP_SP) | (2 << 15))); - if (saveds >= 4) - FAIL_IF(push_inst(compiler, STP | RT(SLJIT_SAVED_REG3) | RT2(SLJIT_SAVED_EREG1) | RN(TMP_SP) | (4 << 15))); - if (saveds == 1) - FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_REG1) | RN(TMP_SP) | (2 << 10))); - if (saveds == 3) - FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_REG3) | RN(TMP_SP) | (4 << 10))); - if (saveds == 5) - FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_EREG2) | RN(TMP_SP) | (6 << 10))); + if (prev != -1) + FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(TMP_SP) | (offs >> 5))); if (args >= 1) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0))); if (args >= 2) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG2) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1))); if (args >= 3) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG3) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG3))); + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - compiler->locals_offset = (2 + saveds) * sizeof(sljit_sw); + compiler->locals_offset = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); compiler->local_size = (compiler->locals_offset + local_size + 15) & ~15; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { - sljit_si saveds, local_size; + sljit_si local_size; + sljit_si i, tmp, offs, prev; CHECK_ERROR(); check_sljit_emit_return(compiler, op, src, srcw); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - saveds = compiler->saveds; + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + offs = 2 << 15; + prev = -1; + for (i = SLJIT_S0; i >= tmp; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } - if (saveds >= 2) - FAIL_IF(push_inst(compiler, LDP | RT(SLJIT_SAVED_REG1) | RT2(SLJIT_SAVED_REG2) | RN(TMP_SP) | (2 << 15))); - if (saveds >= 4) - FAIL_IF(push_inst(compiler, LDP | RT(SLJIT_SAVED_REG3) | RT2(SLJIT_SAVED_EREG1) | RN(TMP_SP) | (4 << 15))); - if (saveds == 1) - FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_REG1) | RN(TMP_SP) | (2 << 10))); - if (saveds == 3) - FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_REG3) | RN(TMP_SP) | (4 << 10))); - if (saveds == 5) - FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_EREG2) | RN(TMP_SP) | (6 << 10))); + if (prev != -1) + FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(TMP_SP) | (offs >> 5))); local_size = compiler->local_size; @@ -1187,15 +1226,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return push_inst(compiler, NOP); case SLJIT_UMUL: case SLJIT_SMUL: - FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1))); - FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_SCRATCH_REG1) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2) | RT2(TMP_ZERO))); - return push_inst(compiler, (op == SLJIT_SMUL ? SMULH : UMULH) | RD(SLJIT_SCRATCH_REG2) | RN(TMP_REG1) | RM(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); + return push_inst(compiler, (op == SLJIT_SMUL ? SMULH : UMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); case SLJIT_UDIV: case SLJIT_SDIV: - FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1))); - FAIL_IF(push_inst(compiler, ((op == SLJIT_SDIV ? SDIV : UDIV) ^ inv_bits) | RD(SLJIT_SCRATCH_REG1) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_SCRATCH_REG2) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2) | RT2(TMP_ZERO))); - return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_SCRATCH_REG2) | RN(TMP_REG1) | RM(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, ((op == SLJIT_SDIV ? SDIV : UDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); + return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); } return SLJIT_SUCCESS; diff --git a/sljit/sljitNativeARM_T2_32.c b/sljit/sljitNativeARM_T2_32.c index 5d2b3a8..cfdc08c 100644 --- a/sljit/sljitNativeARM_T2_32.c +++ b/sljit/sljitNativeARM_T2_32.c @@ -33,17 +33,17 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) typedef sljit_ui sljit_ins; /* Last register + 1. */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_PC (SLJIT_NO_REGISTERS + 4) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5) #define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { - 0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { + 0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 13, 3, 4, 14, 15 }; #define COPY_BITS(src, from, to, bits) \ @@ -138,9 +138,9 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { #define ORRI 0xf0400000 #define ORRS 0x4300 #define ORR_W 0xea400000 -#define POP 0xbd00 +#define POP 0xbc00 #define POP_W 0xe8bd0000 -#define PUSH 0xb500 +#define PUSH 0xb400 #define PUSH_W 0xe92d0000 #define RSB_WI 0xf1c00000 #define RSBSI 0x4240 @@ -960,7 +960,7 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, } /* SP based immediate. */ - if (SLJIT_UNLIKELY(arg == SLJIT_LOCALS_REG) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) { + if (SLJIT_UNLIKELY(arg == SLJIT_SP) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) { FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2))); return -1; } @@ -1127,82 +1127,82 @@ static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit /* Entry, exit */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si size; + sljit_si size, i, tmp; sljit_ins push; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif push = (1 << 4); - if (saveds >= 5) - push |= 1 << 11; - if (saveds >= 4) - push |= 1 << 10; - if (saveds >= 3) - push |= 1 << 8; - if (saveds >= 2) - push |= 1 << 7; - if (saveds >= 1) - push |= 1 << 6; - if (scratches >= 5) - push |= 1 << 5; - FAIL_IF(saveds >= 3 + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + push |= 1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + push |= 1 << reg_map[i]; + + FAIL_IF((push & 0xff00) ? push_inst32(compiler, PUSH_W | (1 << 14) | push) - : push_inst16(compiler, PUSH | push)); + : push_inst16(compiler, PUSH | (1 << 8) | push)); - /* Stack must be aligned to 8 bytes: */ - size = (3 + saveds) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; + /* Stack must be aligned to 8 bytes: (LR, R4) */ + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + local_size = ((size + local_size + 7) & ~7) - size; compiler->local_size = local_size; if (local_size > 0) { if (local_size <= (127 << 2)) FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2))); else - FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, local_size)); + FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size)); } if (args >= 1) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG1, SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0, SLJIT_R0))); if (args >= 2) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG2, SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S1, SLJIT_R1))); if (args >= 3) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG3, SLJIT_SCRATCH_REG3))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_si size; CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - size = (3 + saveds) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; - compiler->local_size = local_size; + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + compiler->local_size = ((size + local_size + 7) & ~7) - size; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { + sljit_si i, tmp; sljit_ins pop; CHECK_ERROR(); @@ -1214,25 +1214,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi if (compiler->local_size <= (127 << 2)) FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2))); else - FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, compiler->local_size)); + FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_SP, SLJIT_SP, compiler->local_size)); } pop = (1 << 4); - if (compiler->saveds >= 5) - pop |= 1 << 11; - if (compiler->saveds >= 4) - pop |= 1 << 10; - if (compiler->saveds >= 3) - pop |= 1 << 8; - if (compiler->saveds >= 2) - pop |= 1 << 7; - if (compiler->saveds >= 1) - pop |= 1 << 6; - if (compiler->scratches >= 5) - pop |= 1 << 5; - return compiler->saveds >= 3 + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + pop |= 1 << reg_map[i]; + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + pop |= 1 << reg_map[i]; + + return (pop & 0xff00) ? push_inst32(compiler, POP_W | (1 << 15) | pop) - : push_inst16(compiler, POP | pop); + : push_inst16(compiler, POP | (1 << 8) | pop); } /* --------------------------------------------------------------------- */ @@ -1268,10 +1264,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler case SLJIT_UMUL: case SLJIT_SMUL: return push_inst32(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) - | (reg_map[SLJIT_SCRATCH_REG2] << 8) - | (reg_map[SLJIT_SCRATCH_REG1] << 12) - | (reg_map[SLJIT_SCRATCH_REG1] << 16) - | reg_map[SLJIT_SCRATCH_REG2]); + | (reg_map[SLJIT_R1] << 8) + | (reg_map[SLJIT_R0] << 12) + | (reg_map[SLJIT_R0] << 16) + | reg_map[SLJIT_R1]); case SLJIT_UDIV: case SLJIT_SDIV: if (compiler->scratches >= 4) { diff --git a/sljit/sljitNativeMIPS_32.c b/sljit/sljitNativeMIPS_32.c index cb7c695..55b1f33 100644 --- a/sljit/sljitNativeMIPS_32.c +++ b/sljit/sljitNativeMIPS_32.c @@ -154,9 +154,9 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_O) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); @@ -174,7 +174,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } else { if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (op & (SLJIT_SET_C | SLJIT_SET_O)) @@ -189,8 +189,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); if (!(op & SLJIT_SET_O)) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); return push_inst(compiler, SLL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); @@ -198,21 +198,21 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_C) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); else { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } } FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); } else { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); } if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); if (!(op & SLJIT_SET_C)) @@ -221,7 +221,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */ FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); /* Set carry flag. */ - return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(TMP_EREG1) | DA(ULESS_FLAG), ULESS_FLAG); + return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG); case SLJIT_SUB: if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) { @@ -233,9 +233,9 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_O) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); @@ -247,7 +247,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } else { if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O)) @@ -265,8 +265,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (!(op & SLJIT_SET_O)) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); return push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); @@ -279,22 +279,22 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); } else { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); } if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(TMP_EREG2), TMP_EREG2)); + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG)); FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); - return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; + return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; case SLJIT_MUL: SLJIT_ASSERT(!(flags & SRC2_IMM)); @@ -307,10 +307,10 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj #endif } FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, MFHI | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG)); FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); - FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(TMP_EREG2) | SH_IMM(31), TMP_EREG2)); - return push_inst(compiler, SUBU | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); + FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG)); + return push_inst(compiler, SUBU | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); case SLJIT_AND: EMIT_LOGICAL(ANDI, AND); diff --git a/sljit/sljitNativeMIPS_64.c b/sljit/sljitNativeMIPS_64.c index df22eba..1c5ca15 100644 --- a/sljit/sljitNativeMIPS_64.c +++ b/sljit/sljitNativeMIPS_64.c @@ -246,9 +246,9 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_O) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); @@ -266,7 +266,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } else { if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (op & (SLJIT_SET_C | SLJIT_SET_O)) @@ -281,8 +281,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); if (!(op & SLJIT_SET_O)) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); return push_inst(compiler, SELECT_OP(DSRL32, SLL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); @@ -290,21 +290,21 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_C) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); else { - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } } FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); } else { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); } if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); if (!(op & SLJIT_SET_C)) @@ -313,7 +313,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */ FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); /* Set carry flag. */ - return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(TMP_EREG1) | DA(ULESS_FLAG), ULESS_FLAG); + return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG); case SLJIT_SUB: if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) { @@ -325,9 +325,9 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_O) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); @@ -339,7 +339,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } else { if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O)) @@ -357,8 +357,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (!(op & SLJIT_SET_O)) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); @@ -371,22 +371,22 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); } else { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); } if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(TMP_EREG2), TMP_EREG2)); + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG)); FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); - return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; + return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; case SLJIT_MUL: SLJIT_ASSERT(!(flags & SRC2_IMM)); @@ -402,10 +402,10 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj #endif } FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, MFHI | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG)); FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); - FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(TMP_EREG2) | SH_IMM(31), TMP_EREG2)); - return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG)); + return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); case SLJIT_AND: EMIT_LOGICAL(ANDI, AND); diff --git a/sljit/sljitNativeMIPS_common.c b/sljit/sljitNativeMIPS_common.c index 77be7f9..6ba356c 100644 --- a/sljit/sljitNativeMIPS_common.c +++ b/sljit/sljitNativeMIPS_common.c @@ -40,35 +40,32 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) Both for mips-32 and mips-64 */ typedef sljit_ui sljit_ins; -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) /* For position independent code, t9 must contain the function address. */ #define PIC_ADDR_REG TMP_REG2 -/* TMP_EREGs are used mainly for arithmetic operations. */ -#define TMP_EREG1 15 -#define TMP_EREG2 24 /* Floating point status register. */ #define FCSR_REG 31 /* Return address register. */ #define RETURN_ADDR_REG 31 -/* Flags are keept in volatile registers. */ -#define EQUAL_FLAG 7 +/* Flags are kept in volatile registers. */ +#define EQUAL_FLAG 12 /* And carry flag as well. */ -#define ULESS_FLAG 10 -#define UGREATER_FLAG 11 -#define LESS_FLAG 12 -#define GREATER_FLAG 13 -#define OVERFLOW_FLAG 14 +#define ULESS_FLAG 13 +#define UGREATER_FLAG 14 +#define LESS_FLAG 15 +#define GREATER_FLAG 31 +#define OVERFLOW_FLAG 1 #define TMP_FREG1 (0) -#define TMP_FREG2 ((SLJIT_FLOAT_REG6 + 1) << 1) +#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 2, 5, 6, 3, 8, 16, 17, 18, 19, 20, 29, 4, 25, 9 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 2, 5, 6, 7, 8, 9, 10, 11, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4 }; /* --------------------------------------------------------------------- */ @@ -537,20 +534,25 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #include "sljitNativeMIPS_64.c" #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_ins base; + sljit_si i, tmp, offs; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - local_size += ((saveds + 1) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) local_size = (local_size + 15) & ~0xf; #else @@ -560,51 +562,57 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil if (local_size <= SIMM_MAX) { /* Frequent case. */ - FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(-local_size), DR(SLJIT_LOCALS_REG))); - base = S(SLJIT_LOCALS_REG); + FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP))); + base = S(SLJIT_SP); } else { FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(SLJIT_LOCALS_REG), DR(SLJIT_LOCALS_REG))); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP))); base = S(TMP_REG2); local_size = 0; } - FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); + offs = local_size - (sljit_sw)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS)); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); + } if (args >= 1) - FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_SAVED_REG1), DR(SLJIT_SAVED_REG1))); + FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_S0), DR(SLJIT_S0))); if (args >= 2) - FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_SAVED_REG2), DR(SLJIT_SAVED_REG2))); + FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_S1), DR(SLJIT_S1))); if (args >= 3) - FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_SAVED_REG3), DR(SLJIT_SAVED_REG3))); + FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_S2), DR(SLJIT_S2))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - local_size += ((saveds + 1) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) compiler->local_size = (local_size + 15) & ~0xf; #else @@ -614,7 +622,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { - sljit_si local_size; + sljit_si local_size, i, tmp, offs; sljit_ins base; CHECK_ERROR(); @@ -624,31 +632,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi local_size = compiler->local_size; if (local_size <= SIMM_MAX) - base = S(SLJIT_LOCALS_REG); + base = S(SLJIT_SP); else { FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1))); base = S(TMP_REG1); local_size = 0; } - FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (sljit_si)sizeof(sljit_sw)), RETURN_ADDR_REG)); - if (compiler->saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_EREG2))); - if (compiler->saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_EREG1))); - if (compiler->saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG3))); - if (compiler->saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG2))); - if (compiler->saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG1))); + FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - (sljit_si)sizeof(sljit_sw)), RETURN_ADDR_REG)); + offs = local_size - (sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + SLJIT_ASSERT(offs == local_size - (sljit_sw)(sizeof(sljit_sw))); FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); if (compiler->local_size <= SIMM_MAX) - return push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(compiler->local_size), UNMOVABLE_INS); + return push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(compiler->local_size), UNMOVABLE_INS); else - return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_LOCALS_REG), UNMOVABLE_INS); + return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_SP), UNMOVABLE_INS); } #undef STACK_STORE @@ -1043,12 +1056,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler case SLJIT_UMUL: case SLJIT_SMUL: #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? DMULTU : DMULT) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? DMULTU : DMULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #else - FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? MULTU : MULT) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? MULTU : MULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #endif - FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_SCRATCH_REG1), DR(SLJIT_SCRATCH_REG1))); - return push_inst(compiler, MFHI | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); + return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); case SLJIT_UDIV: case SLJIT_SDIV: #if !(defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) @@ -1058,15 +1071,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) if (int_op) - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); else - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DDIVU : DDIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #else - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #endif - FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_SCRATCH_REG1), DR(SLJIT_SCRATCH_REG1))); - return push_inst(compiler, MFHI | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); + return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); } return SLJIT_SUCCESS; @@ -1702,7 +1715,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); jump->addr = compiler->size; /* A NOP if type < CALL1. */ - PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS)); } return jump; } @@ -1963,12 +1976,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil } FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); /* We need an extra instruction in any case. */ - return push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), UNMOVABLE_INS); + return push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS); } /* Register input. */ if (type >= SLJIT_CALL1) - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), 4)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), 4)); FAIL_IF(push_inst(compiler, JALR | S(src_r) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); return push_inst(compiler, ADDU_W | S(src_r) | TA(0) | D(PIC_ADDR_REG), UNMOVABLE_INS); } diff --git a/sljit/sljitNativePPC_common.c b/sljit/sljitNativePPC_common.c index 037681e..8a81375 100644 --- a/sljit/sljitNativePPC_common.c +++ b/sljit/sljitNativePPC_common.c @@ -87,22 +87,22 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #endif /* _AIX */ } -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_ZERO (SLJIT_NO_REGISTERS + 4) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_ZERO (SLJIT_NUMBER_OF_REGISTERS + 5) #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) -#define TMP_CALL_REG (SLJIT_NO_REGISTERS + 5) +#define TMP_CALL_REG (SLJIT_NUMBER_OF_REGISTERS + 6) #else #define TMP_CALL_REG TMP_REG2 #endif #define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = { - 0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31, 12 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { + 0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12 }; /* --------------------------------------------------------------------- */ @@ -571,111 +571,134 @@ ALT_FORM6 0x200000 */ #define STACK_LOAD LD #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { + sljit_si i, tmp, offs; + CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif FAIL_IF(push_inst(compiler, MFLR | D(0))); - FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) )); + offs = -(sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); + } + + SLJIT_ASSERT(offs == -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1)); + #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) - FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw)) )); + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); #else - FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw)) )); + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); #endif FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0)); if (args >= 1) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(SLJIT_SAVED_REG1) | B(SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0))); if (args >= 2) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG2) | A(SLJIT_SAVED_REG2) | B(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1))); if (args >= 3) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG3) | A(SLJIT_SAVED_REG3) | B(SLJIT_SCRATCH_REG3))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2))); - local_size += ((1 + saveds) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET; local_size = (local_size + 15) & ~0xf; compiler->local_size = local_size; #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) if (local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, STWU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-local_size))); + FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); else { FAIL_IF(load_immediate(compiler, 0, -local_size)); - FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0))); + FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0))); } #else if (local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, STDU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-local_size))); + FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); else { FAIL_IF(load_immediate(compiler, 0, -local_size)); - FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0))); + FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0))); } #endif return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - local_size += ((1 + saveds) * sizeof(sljit_sw)) + FIXED_LOCALS_OFFSET; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET; compiler->local_size = (local_size + 15) & ~0xf; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { + sljit_si i, tmp, offs; + CHECK_ERROR(); check_sljit_emit_return(compiler, op, src, srcw); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); if (compiler->local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(compiler->local_size))); + FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size))); else { FAIL_IF(load_immediate(compiler, 0, compiler->local_size)); - FAIL_IF(push_inst(compiler, ADD | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0))); + FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0))); } #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw)))); + FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); #else - FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw)))); + FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); #endif - if (compiler->saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) )); - FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) )); + + offs = -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); + SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw))); FAIL_IF(push_inst(compiler, MTLR | S(0))); FAIL_IF(push_inst(compiler, BLR)); @@ -1249,30 +1272,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return push_inst(compiler, NOP); case SLJIT_UMUL: case SLJIT_SMUL: - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, (op == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + return push_inst(compiler, (op == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); #else - FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, (op == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + return push_inst(compiler, (op == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); #endif case SLJIT_UDIV: case SLJIT_SDIV: - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) if (int_op) { - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); } else { - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); } - return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1)); + return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1)); #else - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); + return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1)); #endif } @@ -1718,8 +1741,8 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler * if (op == SLJIT_CONVW_FROMD) { if (FAST_IS_REG(dst)) { - FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, 0, 0)); - return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, 0, 0); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0)); + return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0); } return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0); } @@ -1733,8 +1756,8 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler * if (FAST_IS_REG(dst)) { FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET)); - FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_LOCALS_REG) | B(TMP_REG1))); - return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, 0, 0); + FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1))); + return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0); } SLJIT_ASSERT(dst & SLJIT_MEM); @@ -1785,13 +1808,13 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler * if (FAST_IS_REG(src)) FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1))); else - FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); src = TMP_REG1; } if (FAST_IS_REG(src)) { - FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET)); - FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, dst, dstw)); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw)); } else FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); @@ -1815,7 +1838,7 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler * invert_sign = 0; } else if (!FAST_IS_REG(src)) { - FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW)); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW)); src = TMP_REG1; } @@ -1827,12 +1850,12 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler * FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330)); if (invert_sign) FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000)); - FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET)); - FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_HI)); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI)); FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000)); - FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW)); - FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET)); - FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET_LOW)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW)); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW)); FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2))); diff --git a/sljit/sljitNativeSPARC_32.c b/sljit/sljitNativeSPARC_32.c index 80479bf..4a2e629 100644 --- a/sljit/sljitNativeSPARC_32.c +++ b/sljit/sljitNativeSPARC_32.c @@ -110,8 +110,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (!(flags & SET_FLAGS)) return SLJIT_SUCCESS; FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, RDY | D(TMP_REG4), DR(TMP_REG4))); - return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_REG4), MOVABLE_INS | SET_FLAGS); + FAIL_IF(push_inst(compiler, RDY | D(TMP_LINK), DR(TMP_LINK))); + return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS); case SLJIT_AND: return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); diff --git a/sljit/sljitNativeSPARC_common.c b/sljit/sljitNativeSPARC_common.c index e01a5f6..3d8ab3c 100644 --- a/sljit/sljitNativeSPARC_common.c +++ b/sljit/sljitNativeSPARC_common.c @@ -83,17 +83,16 @@ static void sparc_cache_flush(sljit_ins *from, sljit_ins *to) } /* TMP_REG2 is not used by getput_arg */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_REG4 (SLJIT_NO_REGISTERS + 4) -#define TMP_LINK (SLJIT_NO_REGISTERS + 5) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_LINK (SLJIT_NUMBER_OF_REGISTERS + 5) #define TMP_FREG1 (0) -#define TMP_FREG2 ((SLJIT_FLOAT_REG6 + 1) << 1) +#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = { - 0, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 14, 1, 24, 25, 26, 15 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { + 0, 8, 9, 10, 13, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 11, 12, 15 }; /* --------------------------------------------------------------------- */ @@ -419,13 +418,17 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #include "sljitNativeSPARC_64.c" #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif @@ -434,30 +437,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil compiler->local_size = local_size; if (local_size <= SIMM_MAX) { - FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | IMM(-local_size), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | IMM(-local_size), UNMOVABLE_INS)); } else { FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size)); - FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | S2(TMP_REG1), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | S2(TMP_REG1), UNMOVABLE_INS)); } - if (args >= 1) - FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG1) | S1(0) | S2A(24), DR(SLJIT_SAVED_REG1))); - if (args >= 2) - FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG2) | S1(0) | S2A(25), DR(SLJIT_SAVED_REG2))); - if (args >= 3) - FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG3) | S1(0) | S2A(26), DR(SLJIT_SAVED_REG3))); + /* Arguments are in their appropriate registers. */ return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif @@ -472,11 +474,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi if (op != SLJIT_MOV || !FAST_IS_REG(src)) { FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - src = SLJIT_SCRATCH_REG1; + src = SLJIT_R0; } FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS)); - return push_inst(compiler, RESTORE | D(SLJIT_SCRATCH_REG1) | S1(src) | S2(0), UNMOVABLE_INS); + return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(src) | S2(0), UNMOVABLE_INS); } /* --------------------------------------------------------------------- */ @@ -783,8 +785,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler case SLJIT_UMUL: case SLJIT_SMUL: #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? UMUL : SMUL) | D(SLJIT_SCRATCH_REG1) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG1))); - return push_inst(compiler, RDY | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? UMUL : SMUL) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); + return push_inst(compiler, RDY | D(SLJIT_R1), DR(SLJIT_R1)); #else #error "Implementation required" #endif @@ -794,13 +796,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler if (op == SLJIT_UDIV) FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS)); else { - FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_SCRATCH_REG1) | IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_R0) | IMM(31), DR(TMP_REG1))); FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS)); } - FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_SCRATCH_REG1), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? UDIV : SDIV) | D(SLJIT_SCRATCH_REG1) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG1))); - FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_SCRATCH_REG2) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, SUB | D(SLJIT_SCRATCH_REG2) | S1(TMP_REG2) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); + FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_R1) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R1))); + FAIL_IF(push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1))); return SLJIT_SUCCESS; #else #error "Implementation required" @@ -977,8 +979,8 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler * return SLJIT_SUCCESS; if (FAST_IS_REG(dst)) { - FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET)); - return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET); + FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET); } /* Store the integer value from a VFP register. */ @@ -1002,8 +1004,8 @@ static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler * } if (FAST_IS_REG(src)) { - FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_LOCALS_REG), FLOAT_TMP_MEM_OFFSET)); - src = SLJIT_MEM1(SLJIT_LOCALS_REG); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + src = SLJIT_MEM1(SLJIT_SP); srcw = FLOAT_TMP_MEM_OFFSET; } diff --git a/sljit/sljitNativeTILEGX_64.c b/sljit/sljitNativeTILEGX_64.c index d0b392e..d438c63 100644 --- a/sljit/sljitNativeTILEGX_64.c +++ b/sljit/sljitNativeTILEGX_64.c @@ -1173,16 +1173,20 @@ static sljit_si emit_const_64(struct sljit_compiler *compiler, sljit_si dst_ar, return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_ins base; sljit_ins bundle = 0; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif @@ -1233,13 +1237,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif @@ -2370,7 +2378,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); } - FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO)); + FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); FAIL_IF(ADDI_SOLO(54, 54, -16)); @@ -2381,7 +2389,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil /* Register input. */ if (type >= SLJIT_CALL1) - FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO)); + FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO)); @@ -2511,7 +2519,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compil SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2); /* Cannot be optimized out if type is >= CALL0. */ jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0); - PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO)); + PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); jump->addr = compiler->size; PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped)); } diff --git a/sljit/sljitNativeX86_32.c b/sljit/sljitNativeX86_32.c index 5a94c3d..fb68706 100644 --- a/sljit/sljitNativeX86_32.c +++ b/sljit/sljitNativeX86_32.c @@ -63,27 +63,31 @@ static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ return code_ptr; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_si size; - sljit_si locals_offset; sljit_ub *inst; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; compiler->args = args; compiler->flags_saved = 0; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif + size = 1 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); + size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); #else - size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (2 + args * 3) : 0); + size += (args > 0 ? (2 + args * 3) : 0); #endif inst = (sljit_ub*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); @@ -96,76 +100,66 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil *inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */; } #endif - if (saveds > 2) - PUSH_REG(reg_map[SLJIT_SAVED_REG3]); - if (saveds > 1) - PUSH_REG(reg_map[SLJIT_SAVED_REG2]); - if (saveds > 0) - PUSH_REG(reg_map[SLJIT_SAVED_REG1]); + if (saveds > 2 || scratches > 7) + PUSH_REG(reg_map[SLJIT_S2]); + if (saveds > 1 || scratches > 8) + PUSH_REG(reg_map[SLJIT_S1]); + if (saveds > 0 || scratches > 9) + PUSH_REG(reg_map[SLJIT_S0]); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (args > 0) { *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[SLJIT_SCRATCH_REG3]; + *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2]; } if (args > 1) { *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[SLJIT_SCRATCH_REG2]; + *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1]; } if (args > 2) { *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x4 /* esp */; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */; *inst++ = 0x24; *inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ } #else if (args > 0) { *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[TMP_REG1]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1]; *inst++ = sizeof(sljit_sw) * 2; } if (args > 1) { *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[TMP_REG1]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1]; *inst++ = sizeof(sljit_sw) * 3; } if (args > 2) { *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | reg_map[TMP_REG1]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1]; *inst++ = sizeof(sljit_sw) * 4; } #endif -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - locals_offset = 2 * sizeof(sljit_uw); -#else - SLJIT_COMPILE_ASSERT(FIXED_LOCALS_OFFSET >= 2 * sizeof(sljit_uw), require_at_least_two_words); - locals_offset = FIXED_LOCALS_OFFSET; -#endif - compiler->scratches_start = locals_offset; - if (scratches > 3) - locals_offset += (scratches - 3) * sizeof(sljit_uw); - compiler->saveds_start = locals_offset; - if (saveds > 3) - locals_offset += (saveds - 3) * sizeof(sljit_uw); - compiler->locals_offset = locals_offset; + SLJIT_COMPILE_ASSERT(FIXED_LOCALS_OFFSET >= (2 + 4) * sizeof(sljit_uw), require_at_least_two_words); #if defined(__APPLE__) - saveds = (2 + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); - local_size = ((locals_offset + saveds + local_size + 15) & ~15) - saveds; + /* Ignore pushed registers and FIXED_LOCALS_OFFSET when + computing the aligned local size. */ + saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); + local_size = ((FIXED_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; #else - local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1)); + local_size = FIXED_LOCALS_OFFSET + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1)); #endif compiler->local_size = local_size; #ifdef _WIN32 if (local_size > 1024) { #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_SCRATCH_REG1], local_size)); + FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); #else local_size -= FIXED_LOCALS_OFFSET; - FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_SCRATCH_REG1], local_size)); + FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, - SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, FIXED_LOCALS_OFFSET)); + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, FIXED_LOCALS_OFFSET)); #endif FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); } @@ -173,40 +167,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil SLJIT_ASSERT(local_size > 0); return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, - SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size); + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si locals_offset; - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; compiler->args = args; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - locals_offset = 2 * sizeof(sljit_uw); -#else - locals_offset = FIXED_LOCALS_OFFSET; -#endif - compiler->scratches_start = locals_offset; - if (scratches > 3) - locals_offset += (scratches - 3) * sizeof(sljit_uw); - compiler->saveds_start = locals_offset; - if (saveds > 3) - locals_offset += (saveds - 3) * sizeof(sljit_uw); - compiler->locals_offset = locals_offset; #if defined(__APPLE__) - saveds = (2 + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); - compiler->local_size = ((locals_offset + saveds + local_size + 15) & ~15) - saveds; + saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); + compiler->local_size = ((FIXED_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; #else - compiler->local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1)); + compiler->local_size = FIXED_LOCALS_OFFSET + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1)); #endif } @@ -224,9 +208,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi SLJIT_ASSERT(compiler->local_size > 0); FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, - SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); - size = 2 + (compiler->saveds <= 3 ? compiler->saveds : 3); + size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (compiler->args > 2) size += 2; @@ -239,12 +224,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi INC_SIZE(size); - if (compiler->saveds > 0) - POP_REG(reg_map[SLJIT_SAVED_REG1]); - if (compiler->saveds > 1) - POP_REG(reg_map[SLJIT_SAVED_REG2]); - if (compiler->saveds > 2) - POP_REG(reg_map[SLJIT_SAVED_REG3]); + if (compiler->saveds > 0 || compiler->scratches > 9) + POP_REG(reg_map[SLJIT_S0]); + if (compiler->saveds > 1 || compiler->scratches > 8) + POP_REG(reg_map[SLJIT_S1]); + if (compiler->saveds > 2 || compiler->scratches > 7) + POP_REG(reg_map[SLJIT_S2]); POP_REG(reg_map[TMP_REG1]); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (compiler->args > 2) @@ -307,8 +292,8 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si inst_size += sizeof(sljit_sw); } - if ((b & REG_MASK) == SLJIT_LOCALS_REG && !(b & OFFS_REG_MASK)) - b |= TO_OFFS_REG(SLJIT_LOCALS_REG); + if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK)) + b |= TO_OFFS_REG(SLJIT_SP); if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) inst_size += 1; /* SIB byte. */ @@ -379,7 +364,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si if (!(b & SLJIT_MEM)) *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b); else if ((b & REG_MASK) != SLJIT_UNUSED) { - if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) { + if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { if (immb != 0) { if (immb <= 127 && immb >= -128) *buf_ptr |= 0x40; @@ -440,28 +425,28 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2); if (type >= SLJIT_CALL3) - PUSH_REG(reg_map[SLJIT_SCRATCH_REG3]); + PUSH_REG(reg_map[SLJIT_R2]); *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SCRATCH_REG3] << 3) | reg_map[SLJIT_SCRATCH_REG1]; + *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]; #else inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0)); FAIL_IF(!inst); INC_SIZE(4 * (type - SLJIT_CALL0)); *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG1] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_R0] << 3) | 0x4 /* SIB */; + *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; *inst++ = 0; if (type >= SLJIT_CALL2) { *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG2] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_R1] << 3) | 0x4 /* SIB */; + *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; *inst++ = sizeof(sljit_sw); } if (type >= SLJIT_CALL3) { *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG3] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_R2] << 3) | 0x4 /* SIB */; + *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; *inst++ = 2 * sizeof(sljit_sw); } #endif diff --git a/sljit/sljitNativeX86_64.c b/sljit/sljitNativeX86_64.c index ac0fca2..58d460a 100644 --- a/sljit/sljitNativeX86_64.c +++ b/sljit/sljitNativeX86_64.c @@ -87,118 +87,94 @@ static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si return code_ptr; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si size, pushed_size; + sljit_si i, tmp, size, allocated_size; sljit_ub *inst; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; compiler->flags_saved = 0; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - size = saveds; /* Including the return address saved by the call instruction. */ - pushed_size = (saveds + 1) * sizeof(sljit_sw); -#ifndef _WIN64 - if (saveds >= 2) - size += saveds - 1; -#else - if (saveds >= 4) - size += saveds - 3; - if (scratches >= 5) { - size += (5 - 4) * 2; - pushed_size += sizeof(sljit_sw); - } -#endif - size += args * 3; - if (size > 0) { + allocated_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + size = reg_map[i] >= 8 ? 2 : 1; inst = (sljit_ub*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); - INC_SIZE(size); - if (saveds >= 5) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG2] >= 8, saved_ereg2_is_hireg); - *inst++ = REX_B; - PUSH_REG(reg_lmap[SLJIT_SAVED_EREG2]); - } - if (saveds >= 4) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG1] >= 8, saved_ereg1_is_hireg); - *inst++ = REX_B; - PUSH_REG(reg_lmap[SLJIT_SAVED_EREG1]); - } - if (saveds >= 3) { -#ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] >= 8, saved_reg3_is_hireg); - *inst++ = REX_B; -#else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] < 8, saved_reg3_is_loreg); -#endif - PUSH_REG(reg_lmap[SLJIT_SAVED_REG3]); - } - if (saveds >= 2) { -#ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] >= 8, saved_reg2_is_hireg); + if (reg_map[i] >= 8) *inst++ = REX_B; -#else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] < 8, saved_reg2_is_loreg); -#endif - PUSH_REG(reg_lmap[SLJIT_SAVED_REG2]); - } - if (saveds >= 1) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG1] < 8, saved_reg1_is_loreg); - PUSH_REG(reg_lmap[SLJIT_SAVED_REG1]); - } -#ifdef _WIN64 - if (scratches >= 5) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_EREG2] >= 8, temporary_ereg2_is_hireg); + PUSH_REG(reg_lmap[i]); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) *inst++ = REX_B; - PUSH_REG(reg_lmap[SLJIT_SCRATCH_EREG2]); - } -#endif + PUSH_REG(reg_lmap[i]); + } + + if (args > 0) { + size = args * 3; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); #ifndef _WIN64 if (args > 0) { *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7 /* rdi */; + *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */; } if (args > 1) { *inst++ = REX_W | REX_R; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6 /* rsi */; + *inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */; } if (args > 2) { *inst++ = REX_W | REX_R; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2 /* rdx */; + *inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */; } #else if (args > 0) { *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1 /* rcx */; + *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */; } if (args > 1) { *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2 /* rdx */; + *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */; } if (args > 2) { *inst++ = REX_W | REX_B; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0 /* r8 */; + *inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */; } #endif } - local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size; + local_size = ((local_size + FIXED_LOCALS_OFFSET + allocated_size + 16 - 1) & ~(16 - 1)) - allocated_size; compiler->local_size = local_size; + #ifdef _WIN64 if (local_size > 1024) { /* Allocate stack for the callback, which grows the stack. */ @@ -208,9 +184,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil *inst++ = REX_W; *inst++ = GROUP_BINARY_83; *inst++ = MOD_REG | SUB | 4; - /* Pushed size must be divisible by 8. */ - SLJIT_ASSERT(!(pushed_size & 0x7)); - if (pushed_size & 0x8) { + /* Allocated size for registers must be divisible by 8. */ + SLJIT_ASSERT(!(allocated_size & 0x7)); + /* Aligned to 16 byte. */ + if (allocated_size & 0x8) { *inst++ = 5 * sizeof(sljit_sw); local_size -= 5 * sizeof(sljit_sw); } else { @@ -218,10 +195,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil local_size -= 4 * sizeof(sljit_sw); } /* Second instruction */ - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] < 8, temporary_reg1_is_loreg); + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] < 8, temporary_reg1_is_loreg); *inst++ = REX_W; *inst++ = MOV_rm_i32; - *inst++ = MOD_REG | reg_lmap[SLJIT_SCRATCH_REG1]; + *inst++ = MOD_REG | reg_lmap[SLJIT_R0]; *(sljit_si*)inst = local_size; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->skip_checks = 1; @@ -229,6 +206,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); } #endif + SLJIT_ASSERT(local_size > 0); if (local_size <= 127) { inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); @@ -249,43 +227,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil *(sljit_si*)inst = local_size; inst += sizeof(sljit_si); } + #ifdef _WIN64 - /* Save xmm6 with MOVAPS instruction. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - *inst++ = GROUP_0F; - *(sljit_si*)inst = 0x20247429; + /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */ + if (fscratches >= 6 || fsaveds >= 1) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + *inst++ = GROUP_0F; + *(sljit_si*)inst = 0x20247429; + } #endif return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si pushed_size; + sljit_si allocated_size; CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif /* Including the return address saved by the call instruction. */ - pushed_size = (saveds + 1) * sizeof(sljit_sw); -#ifdef _WIN64 - if (scratches >= 5) - pushed_size += sizeof(sljit_sw); -#endif - compiler->local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size; + allocated_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + compiler->local_size = ((local_size + FIXED_LOCALS_OFFSET + allocated_size + 16 - 1) & ~(16 - 1)) - allocated_size; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { - sljit_si size; + sljit_si i, tmp, size; sljit_ub *inst; CHECK_ERROR(); @@ -295,13 +276,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); #ifdef _WIN64 - /* Restore xmm6 with MOVAPS instruction. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - *inst++ = GROUP_0F; - *(sljit_si*)inst = 0x20247428; + /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */ + if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + *inst++ = GROUP_0F; + *(sljit_si*)inst = 0x20247428; + } #endif + SLJIT_ASSERT(compiler->local_size > 0); if (compiler->local_size <= 127) { inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); @@ -322,50 +306,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi *(sljit_si*)inst = compiler->local_size; } - size = 1 + compiler->saveds; -#ifndef _WIN64 - if (compiler->saveds >= 2) - size += compiler->saveds - 1; -#else - if (compiler->saveds >= 4) - size += compiler->saveds - 3; - if (compiler->scratches >= 5) - size += (5 - 4) * 2; -#endif - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - - INC_SIZE(size); - -#ifdef _WIN64 - if (compiler->scratches >= 5) { - *inst++ = REX_B; - POP_REG(reg_lmap[SLJIT_SCRATCH_EREG2]); - } -#endif - if (compiler->saveds >= 1) - POP_REG(reg_map[SLJIT_SAVED_REG1]); - if (compiler->saveds >= 2) { -#ifndef _WIN64 - *inst++ = REX_B; -#endif - POP_REG(reg_lmap[SLJIT_SAVED_REG2]); - } - if (compiler->saveds >= 3) { -#ifndef _WIN64 - *inst++ = REX_B; -#endif - POP_REG(reg_lmap[SLJIT_SAVED_REG3]); - } - if (compiler->saveds >= 4) { - *inst++ = REX_B; - POP_REG(reg_lmap[SLJIT_SAVED_EREG1]); + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); } - if (compiler->saveds >= 5) { - *inst++ = REX_B; - POP_REG(reg_lmap[SLJIT_SAVED_EREG2]); + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); } + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); RET(); return SLJIT_SUCCESS; } @@ -442,7 +407,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si b |= TMP_REG3; } else if (reg_lmap[b & REG_MASK] == 4) - b |= TO_OFFS_REG(SLJIT_LOCALS_REG); + b |= TO_OFFS_REG(SLJIT_SP); } if ((b & REG_MASK) == SLJIT_UNUSED) @@ -450,13 +415,16 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si else { if (reg_map[b & REG_MASK] >= 8) rex |= REX_B; - if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG))) { + + if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) { /* Immediate operand. */ if (immb <= 127 && immb >= -128) inst_size += sizeof(sljit_sb); else inst_size += sizeof(sljit_si); } + else if (reg_lmap[b & REG_MASK] == 5) + inst_size += sizeof(sljit_sb); if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) { inst_size += 1; /* SIB byte. */ @@ -540,8 +508,8 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si if (!(b & SLJIT_MEM)) *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b); else if ((b & REG_MASK) != SLJIT_UNUSED) { - if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) { - if (immb != 0) { + if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { + if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { if (immb <= 127 && immb >= -128) *buf_ptr |= 0x40; else @@ -555,7 +523,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3); } - if (immb != 0) { + if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { if (immb <= 127 && immb >= -128) *buf_ptr++ = immb; /* 8 bit displacement. */ else { @@ -565,8 +533,12 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si } } else { + if (reg_lmap[b & REG_MASK] == 5) + *buf_ptr |= 0x40; *buf_ptr++ |= 0x04; *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6); + if (reg_lmap[b & REG_MASK] == 5) + *buf_ptr++ = 0; } } else { @@ -597,7 +569,7 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj sljit_ub *inst; #ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG2] == 6 && reg_map[SLJIT_SCRATCH_REG1] < 8 && reg_map[SLJIT_SCRATCH_REG3] < 8, args_registers); + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers); inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); FAIL_IF(!inst); @@ -605,13 +577,13 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj if (type >= SLJIT_CALL3) { *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_SCRATCH_REG3]; + *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2]; } *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_SCRATCH_REG1]; + *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0]; #else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG2] == 2 && reg_map[SLJIT_SCRATCH_REG1] < 8 && reg_map[SLJIT_SCRATCH_REG3] < 8, args_registers); + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers); inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); FAIL_IF(!inst); @@ -619,11 +591,11 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj if (type >= SLJIT_CALL3) { *inst++ = REX_W | REX_R; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_SCRATCH_REG3]; + *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2]; } *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_SCRATCH_REG1]; + *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0]; #endif return SLJIT_SUCCESS; } diff --git a/sljit/sljitNativeX86_common.c b/sljit/sljitNativeX86_common.c index 815a46a..71442fe 100644 --- a/sljit/sljitNativeX86_common.c +++ b/sljit/sljitNativeX86_common.c @@ -64,51 +64,46 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) /* Last register + 1. */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = { - 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { + 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5 }; #define CHECK_EXTRA_REGS(p, w, do) \ - if (p >= SLJIT_SCRATCH_EREG1 && p <= SLJIT_SCRATCH_EREG2) { \ - w = compiler->scratches_start + (p - SLJIT_SCRATCH_EREG1) * sizeof(sljit_sw); \ - p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ - do; \ - } \ - else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \ - w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \ - p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ + if (p >= SLJIT_R3 && p <= SLJIT_R6) { \ + w = FIXED_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \ + p = SLJIT_MEM1(SLJIT_SP); \ do; \ } #else /* SLJIT_CONFIG_X86_32 */ /* Last register + 1. */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present Note: avoid to use r12 and r13 for memory addessing therefore r12 is better for SAVED_EREG than SAVED_REG. */ #ifndef _WIN64 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9 }; /* low-map. reg_map & 0x7. */ -static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1 +static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1 }; #else /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9 }; /* low-map. reg_map & 0x7. */ -static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1 +static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1 }; #endif @@ -752,14 +747,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #ifdef _WIN64 SLJIT_COMPILE_ASSERT( - reg_map[SLJIT_SCRATCH_REG1] == 0 - && reg_map[SLJIT_SCRATCH_REG2] == 2 + reg_map[SLJIT_R0] == 0 + && reg_map[SLJIT_R1] == 2 && reg_map[TMP_REG1] > 7, invalid_register_assignment_for_div_mul); #else SLJIT_COMPILE_ASSERT( - reg_map[SLJIT_SCRATCH_REG1] == 0 - && reg_map[SLJIT_SCRATCH_REG2] < 7 + reg_map[SLJIT_R0] == 0 + && reg_map[SLJIT_R1] < 7 && reg_map[TMP_REG1] == 2, invalid_register_assignment_for_div_mul); #endif @@ -769,8 +764,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler op = GET_OPCODE(op); if (op == SLJIT_UDIV) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0); - inst = emit_x86_instruction(compiler, 1, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); #else inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); #endif @@ -780,7 +775,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler if (op == SLJIT_SDIV) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); #endif #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -809,7 +804,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler FAIL_IF(!inst); INC_SIZE(2); *inst++ = GROUP_F7; - *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_SCRATCH_REG2]); + *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); #else #ifdef _WIN64 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2; @@ -825,12 +820,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler else if (op >= SLJIT_UDIV) *inst++ = REX_B; *inst++ = GROUP_F7; - *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_SCRATCH_REG2]); + *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); #else if (!compiler->mode32) *inst++ = REX_W; *inst++ = GROUP_F7; - *inst = MOD_REG | reg_map[SLJIT_SCRATCH_REG2]; + *inst = MOD_REG | reg_map[SLJIT_R1]; #endif #endif switch (op) { @@ -848,7 +843,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler break; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) - EMIT_MOV(compiler, SLJIT_SCRATCH_REG2, 0, TMP_REG1, 0); + EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); #endif break; } @@ -957,22 +952,22 @@ static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (dst_r == TMP_REG1) { /* Find a non-used register, whose reg_map[src] < 4. */ - if ((dst & REG_MASK) == SLJIT_SCRATCH_REG1) { - if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SCRATCH_REG2)) - work_r = SLJIT_SCRATCH_REG3; + if ((dst & REG_MASK) == SLJIT_R0) { + if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1)) + work_r = SLJIT_R2; else - work_r = SLJIT_SCRATCH_REG2; + work_r = SLJIT_R1; } else { - if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1)) - work_r = SLJIT_SCRATCH_REG1; - else if ((dst & REG_MASK) == SLJIT_SCRATCH_REG2) - work_r = SLJIT_SCRATCH_REG3; + if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) + work_r = SLJIT_R0; + else if ((dst & REG_MASK) == SLJIT_R1) + work_r = SLJIT_R2; else - work_r = SLJIT_SCRATCH_REG2; + work_r = SLJIT_R1; } - if (work_r == SLJIT_SCRATCH_REG1) { + if (work_r == SLJIT_R0) { ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); } else { @@ -985,7 +980,7 @@ static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, FAIL_IF(!inst); *inst = MOV_rm8_r8; - if (work_r == SLJIT_SCRATCH_REG1) { + if (work_r == SLJIT_R0) { ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); } else { @@ -1180,12 +1175,12 @@ static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, dst_r = dst; else { /* Find an unused temporary register. */ - if ((dst & REG_MASK) != SLJIT_SCRATCH_REG1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1)) - dst_r = SLJIT_SCRATCH_REG1; - else if ((dst & REG_MASK) != SLJIT_SCRATCH_REG2 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG2)) - dst_r = SLJIT_SCRATCH_REG2; + if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) + dst_r = SLJIT_R0; + else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1)) + dst_r = SLJIT_R1; else - dst_r = SLJIT_SCRATCH_REG3; + dst_r = SLJIT_R2; EMIT_MOV(compiler, dst, dstw, dst_r, 0); } EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); @@ -1341,7 +1336,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { - SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG)); + SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); dst = TMP_REG1; } #endif @@ -1379,7 +1374,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) - return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REG1, 0); + return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); #endif if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) { @@ -1471,9 +1466,9 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler, if (dst == src1 && dstw == src1w) { if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) { + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(op_eax_imm, src2w); } @@ -1505,9 +1500,9 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler, if (dst == src2 && dstw == src2w) { if (src1 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { + if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { #else - if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128)) { + if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { #endif BINARY_EAX_IMM(op_eax_imm, src1w); } @@ -1587,9 +1582,9 @@ static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, if (dst == src1 && dstw == src1w) { if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) { + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(op_eax_imm, src2w); } @@ -1841,9 +1836,9 @@ static sljit_si emit_cmp_binary(struct sljit_compiler *compiler, sljit_ub* inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(CMP_EAX_i32, src2w); return SLJIT_SUCCESS; @@ -1892,18 +1887,18 @@ static sljit_si emit_test_binary(struct sljit_compiler *compiler, sljit_ub* inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(TEST_EAX_i32, src2w); return SLJIT_SUCCESS; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src2 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { + if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { #else - if (src2 == SLJIT_SCRATCH_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { + if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { #endif BINARY_EAX_IMM(TEST_EAX_i32, src1w); return SLJIT_SUCCESS; @@ -2065,7 +2060,7 @@ static sljit_si emit_shift(struct sljit_compiler *compiler, EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); #else /* [esp+0] contains the flags. */ - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); #endif EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); @@ -2074,7 +2069,7 @@ static sljit_si emit_shift(struct sljit_compiler *compiler, #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); #else - EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw)); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); #endif EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); } @@ -2216,8 +2211,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) { check_sljit_get_register_index(reg); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (reg == SLJIT_SCRATCH_EREG1 || reg == SLJIT_SCRATCH_EREG2 - || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2) + if (reg >= SLJIT_R3 && reg <= SLJIT_R6) return -1; #endif return reg_map[reg]; @@ -2601,16 +2595,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil if (type >= SLJIT_CALL1) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (src == SLJIT_SCRATCH_REG3) { + if (src == SLJIT_R2) { EMIT_MOV(compiler, TMP_REG1, 0, src, 0); src = TMP_REG1; } - if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3) + if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3) srcw += sizeof(sljit_sw); #endif #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) - if (src == SLJIT_SCRATCH_REG3) { + if (src == SLJIT_R2) { EMIT_MOV(compiler, TMP_REG1, 0, src, 0); src = TMP_REG1; } @@ -2776,8 +2770,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com } if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax); - if (dst != SLJIT_SCRATCH_REG1) { + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax); + if (dst != SLJIT_R0) { inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); FAIL_IF(!inst); INC_SIZE(1 + 3 + 2 + 1); @@ -2846,23 +2840,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *co compiler->mode32 = 0; #endif - ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset); + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (NOT_HALFWORD(offset)) { FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) - SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); + SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); return compiler->error; #else - return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0); + return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); #endif } #endif if (offset != 0) - return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset); - return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0); + return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) |