diff options
author | Sergei Golubchik <sergii@pisem.net> | 2014-11-18 18:07:55 +0100 |
---|---|---|
committer | Sergei Golubchik <sergii@pisem.net> | 2014-11-18 18:07:55 +0100 |
commit | 553b437d3835764f260df43c74b2e50dacda9c54 (patch) | |
tree | 958037ad2350bd096ea4fd022086db502e6cd8f5 /pcre/sljit | |
parent | 8cc5973f1a18654e2e09076adeece2897a768411 (diff) | |
download | mariadb-git-553b437d3835764f260df43c74b2e50dacda9c54.tar.gz |
8.36
Diffstat (limited to 'pcre/sljit')
-rw-r--r-- | pcre/sljit/sljitConfigInternal.h | 140 | ||||
-rw-r--r-- | pcre/sljit/sljitLir.c | 484 | ||||
-rw-r--r-- | pcre/sljit/sljitLir.h | 444 | ||||
-rw-r--r-- | pcre/sljit/sljitNativeARM_32.c | 367 | ||||
-rw-r--r-- | pcre/sljit/sljitNativeARM_64.c | 262 | ||||
-rw-r--r-- | pcre/sljit/sljitNativeARM_T2_32.c | 241 | ||||
-rw-r--r-- | pcre/sljit/sljitNativeMIPS_32.c | 54 | ||||
-rw-r--r-- | pcre/sljit/sljitNativeMIPS_64.c | 50 | ||||
-rw-r--r-- | pcre/sljit/sljitNativeMIPS_common.c | 422 | ||||
-rw-r--r-- | pcre/sljit/sljitNativePPC_common.c | 441 | ||||
-rw-r--r-- | pcre/sljit/sljitNativeSPARC_32.c | 4 | ||||
-rw-r--r-- | pcre/sljit/sljitNativeSPARC_common.c | 246 | ||||
-rw-r--r-- | pcre/sljit/sljitNativeTILEGX_64.c | 24 | ||||
-rw-r--r-- | pcre/sljit/sljitNativeX86_32.c | 156 | ||||
-rw-r--r-- | pcre/sljit/sljitNativeX86_64.c | 326 | ||||
-rw-r--r-- | pcre/sljit/sljitNativeX86_common.c | 316 |
16 files changed, 2492 insertions, 1485 deletions
diff --git a/pcre/sljit/sljitConfigInternal.h b/pcre/sljit/sljitConfigInternal.h index 89be38bd468..6351e5b1685 100644 --- a/pcre/sljit/sljitConfigInternal.h +++ b/pcre/sljit/sljitConfigInternal.h @@ -28,30 +28,43 @@ #define _SLJIT_CONFIG_INTERNAL_H_ /* - SLJIT defines the following macros depending on the target architecture: - - Feature detection (boolean) macros: - SLJIT_32BIT_ARCHITECTURE : 32 bit architecture - SLJIT_64BIT_ARCHITECTURE : 64 bit architecture - SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index - SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing a double array by index - SLJIT_LITTLE_ENDIAN : little endian architecture - SLJIT_BIG_ENDIAN : big endian architecture - SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) - SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information - SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address - - Types and useful macros: - sljit_sb, sljit_ub : signed and unsigned 8 bit byte - sljit_sh, sljit_uh : signed and unsigned 16 bit half-word (short) type - sljit_si, sljit_ui : signed and unsigned 32 bit integer type - sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer - sljit_p : unsgined pointer value (usually the same as sljit_uw, but - some 64 bit ABIs may use 32 bit pointers) - sljit_s : single precision floating point value - sljit_d : double precision floating point value - SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT - SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper) + SLJIT defines the following architecture dependent types and macros: + + Types: + sljit_sb, sljit_ub : signed and unsigned 8 bit byte + sljit_sh, sljit_uh : signed and unsigned 16 bit half-word (short) type + sljit_si, sljit_ui : signed and unsigned 32 bit integer type + sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer + sljit_p : unsgined pointer value (usually the same as sljit_uw, but + some 64 bit ABIs may use 32 bit pointers) + sljit_s : single precision floating point value + sljit_d : double precision floating point value + + Macros for feature detection (boolean): + SLJIT_32BIT_ARCHITECTURE : 32 bit architecture + SLJIT_64BIT_ARCHITECTURE : 64 bit architecture + SLJIT_LITTLE_ENDIAN : little endian architecture + SLJIT_BIG_ENDIAN : big endian architecture + SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) + SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information + + Constants: + SLJIT_NUMBER_OF_REGISTERS : number of available registers + SLJIT_NUMBER_OF_SCRATCH_REGISTERS : number of available scratch registers + SLJIT_NUMBER_OF_SAVED_REGISTERS : number of available saved registers + SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers + SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers + SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers + SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index + SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing + a double precision floating point array by index + SLJIT_SINGLE_SHIFT : the shift required to apply when accessing + a single precision floating point array by index + SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address + + Other macros: + SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT + SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper) */ #if !((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ @@ -142,6 +155,70 @@ #undef SLJIT_EXECUTABLE_ALLOCATOR #endif +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +#define SLJIT_CONFIG_ARM_32 1 +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#define SLJIT_CONFIG_X86 1 +#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +#define SLJIT_CONFIG_ARM 1 +#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_CONFIG_PPC 1 +#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#define SLJIT_CONFIG_MIPS 1 +#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) || (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64) +#define SLJIT_CONFIG_SPARC 1 +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#define SLJIT_NUMBER_OF_REGISTERS 10 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7 +#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#ifndef _WIN64 +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6 +#else +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#endif /* _WIN64 */ +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +#define SLJIT_NUMBER_OF_REGISTERS 11 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +#define SLJIT_NUMBER_OF_REGISTERS 11 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7 +#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +#define SLJIT_NUMBER_OF_REGISTERS 23 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) +#define SLJIT_NUMBER_OF_REGISTERS 22 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17 +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) +#define SLJIT_NUMBER_OF_REGISTERS 17 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) +#define SLJIT_NUMBER_OF_REGISTERS 18 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 14 +#elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#define SLJIT_NUMBER_OF_REGISTERS 0 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0 +#endif + +#define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \ + (SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS) + +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 6 +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && (defined _WIN64) +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 1 +#else +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 +#endif + +#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \ + (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS) + #if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) /* These libraries are needed for the macros below. */ @@ -219,7 +296,7 @@ #ifndef SLJIT_CACHE_FLUSH -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) /* Not required to implement on archs with unified caches. */ #define SLJIT_CACHE_FLUSH(from, to) @@ -240,7 +317,7 @@ #define SLJIT_CACHE_FLUSH(from, to) \ cacheflush((long)(from), (long)(to), 0) -#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) /* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */ #define SLJIT_CACHE_FLUSH(from, to) \ @@ -314,6 +391,7 @@ typedef double sljit_d; /* Shift for double precision sized data. */ #define SLJIT_DOUBLE_SHIFT 3 +#define SLJIT_SINGLE_SHIFT 2 #ifndef SLJIT_W @@ -418,22 +496,12 @@ typedef double sljit_d; #endif #endif /* SLJIT_RETURN_ADDRESS_OFFSET */ -#ifndef SLJIT_SSE2 - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -/* Turn on SSE2 support on x86. */ -#define SLJIT_SSE2 1 - #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) /* Auto detect SSE2 support using CPUID. On 64 bit x86 cpus, sse2 must be present. */ #define SLJIT_DETECT_SSE2 1 #endif -#endif /* (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) */ - -#endif /* !SLJIT_SSE2 */ - #ifndef SLJIT_UNALIGNED #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ diff --git a/pcre/sljit/sljitLir.c b/pcre/sljit/sljitLir.c index 1acecba8b43..69c4b109165 100644 --- a/pcre/sljit/sljitLir.c +++ b/pcre/sljit/sljitLir.c @@ -117,7 +117,7 @@ #define JUMP_ADDR 0x2 /* SLJIT_REWRITABLE_JUMP is 0x1000. */ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) # define PATCH_MB 0x4 # define PATCH_MW 0x8 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -162,7 +162,7 @@ # define PATCH_ABS64 0x100 #endif -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) # define IS_COND 0x004 # define IS_CALL 0x008 # define PATCH_B 0x010 @@ -174,7 +174,7 @@ # define REMOVE_COND 0x100 #endif -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) # define IS_MOVABLE 0x004 # define IS_JAL 0x008 # define IS_CALL 0x010 @@ -229,13 +229,25 @@ # define FCC_IS_SET (1 << 24) #endif +/* Stack management. */ + +#define GET_SAVED_REGISTERS_SIZE(scratches, saveds, extra) \ + (((scratches < SLJIT_NUMBER_OF_SCRATCH_REGISTERS ? 0 : (scratches - SLJIT_NUMBER_OF_SCRATCH_REGISTERS)) + \ + (saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? saveds : SLJIT_NUMBER_OF_SAVED_REGISTERS) + \ + extra) * sizeof(sljit_sw)) + #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -#define SLJIT_HAS_VARIABLE_LOCALS_OFFSET 1 -#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) -#define FIXED_LOCALS_OFFSET (3 * sizeof(sljit_sw)) -#endif +#define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) +#define FIXED_LOCALS_OFFSET ((2 + 4) * sizeof(sljit_sw)) +#else +/* Maximum 3 arguments are passed on the stack. */ +#define FIXED_LOCALS_OFFSET ((3 + 4) * sizeof(sljit_sw)) #endif +#endif /* SLJIT_CONFIG_X86_32 */ + #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #define SLJIT_HAS_FIXED_LOCALS_OFFSET 1 #ifdef _WIN64 @@ -254,7 +266,7 @@ #ifdef _AIX #define FIXED_LOCALS_OFFSET ((6 + 8) * sizeof(sljit_sw)) #else -#define FIXED_LOCALS_OFFSET (2 * sizeof(sljit_sw)) +#define FIXED_LOCALS_OFFSET (3 * sizeof(sljit_sw)) #endif #endif @@ -281,13 +293,13 @@ #if (defined SLJIT_HAS_VARIABLE_LOCALS_OFFSET && SLJIT_HAS_VARIABLE_LOCALS_OFFSET) #define ADJUST_LOCAL_OFFSET(p, i) \ - if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ + if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ (i) += compiler->locals_offset; #elif (defined SLJIT_HAS_FIXED_LOCALS_OFFSET && SLJIT_HAS_FIXED_LOCALS_OFFSET) #define ADJUST_LOCAL_OFFSET(p, i) \ - if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ + if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ (i) += FIXED_LOCALS_OFFSET; #else @@ -307,15 +319,11 @@ #include "sljitExecAllocator.c" #endif -#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) && !(defined SLJIT_SSE2 && SLJIT_SSE2) -#error SLJIT_SSE2_AUTO cannot be enabled without SLJIT_SSE2 -#endif - /* --------------------------------------------------------------------- */ /* Public functions */ /* --------------------------------------------------------------------- */ -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || ((defined SLJIT_SSE2 && SLJIT_SSE2) && ((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64))) +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) #define SLJIT_NEEDS_COMPILER_INIT 1 static sljit_si compiler_initialized = 0; /* A thread safe initialization. */ @@ -365,6 +373,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void) compiler->scratches = -1; compiler->saveds = -1; + compiler->fscratches = -1; + compiler->fsaveds = -1; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) compiler->args = -1; @@ -382,7 +392,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void) compiler->cpool_diff = 0xffffffff; #endif -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) compiler->delay_slot = UNMOVABLE_INS; #endif @@ -593,10 +603,6 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp case SLJIT_MUL: \ SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \ break; \ - case SLJIT_CMPD: \ - SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ - SLJIT_ASSERT((op & (SLJIT_SET_E | SLJIT_SET_S))); \ - break; \ case SLJIT_ADD: \ SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_S))); \ break; \ @@ -625,10 +631,30 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp break; \ } +#define FUNCTION_CHECK_FOP() \ + SLJIT_ASSERT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \ + switch (GET_OPCODE(op)) { \ + case SLJIT_CMPD: \ + SLJIT_ASSERT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ + SLJIT_ASSERT((op & (SLJIT_SET_E | SLJIT_SET_S))); \ + break; \ + default: \ + /* Only SLJIT_INT_OP or SLJIT_SINGLE_OP is allowed. */ \ + SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ + break; \ + } + #define FUNCTION_CHECK_IS_REG(r) \ ((r) == SLJIT_UNUSED || \ - ((r) >= SLJIT_SCRATCH_REG1 && (r) <= SLJIT_SCRATCH_REG1 - 1 + compiler->scratches) || \ - ((r) >= SLJIT_SAVED_REG1 && (r) <= SLJIT_SAVED_REG1 - 1 + compiler->saveds)) + ((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) || \ + ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0)) + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#define FUNCTION_ASSERT_IF_VIRTUAL(p) \ + SLJIT_ASSERT((p) < SLJIT_R3 || (p) > SLJIT_R6); +#else +#define FUNCTION_ASSERT_IF_VIRTUAL(p) +#endif #define FUNCTION_CHECK_SRC(p, i) \ SLJIT_ASSERT(compiler->scratches != -1 && compiler->saveds != -1); \ @@ -636,12 +662,14 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp SLJIT_ASSERT((i) == 0 && (p) != SLJIT_UNUSED); \ else if ((p) == SLJIT_IMM) \ ; \ - else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ + else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \ else if ((p) & SLJIT_MEM) { \ SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \ + FUNCTION_ASSERT_IF_VIRTUAL((p) & REG_MASK); \ if ((p) & OFFS_REG_MASK) { \ SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ + FUNCTION_ASSERT_IF_VIRTUAL(OFFS_REG(p)); \ SLJIT_ASSERT(!((i) & ~0x3)); \ } \ SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ @@ -653,12 +681,14 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp SLJIT_ASSERT(compiler->scratches != -1 && compiler->saveds != -1); \ if (FUNCTION_CHECK_IS_REG(p)) \ SLJIT_ASSERT((i) == 0); \ - else if ((p) == (SLJIT_MEM1(SLJIT_LOCALS_REG))) \ + else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \ else if ((p) & SLJIT_MEM) { \ SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \ + FUNCTION_ASSERT_IF_VIRTUAL((p) & REG_MASK); \ if ((p) & OFFS_REG_MASK) { \ SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ + FUNCTION_ASSERT_IF_VIRTUAL(OFFS_REG(p)); \ SLJIT_ASSERT(!((i) & ~0x3)); \ } \ SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ @@ -667,13 +697,19 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp SLJIT_ASSERT_STOP(); #define FUNCTION_FCHECK(p, i) \ - if ((p) >= SLJIT_FLOAT_REG1 && (p) <= SLJIT_FLOAT_REG6) \ + SLJIT_ASSERT(compiler->fscratches != -1 && compiler->fsaveds != -1); \ + if (((p) >= SLJIT_FR0 && (p) < (SLJIT_FR0 + compiler->fscratches)) || \ + ((p) > (SLJIT_FS0 - compiler->fsaveds) && (p) <= SLJIT_FS0)) \ SLJIT_ASSERT(i == 0); \ + else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ + SLJIT_ASSERT((i) >= 0 && (i) < compiler->logical_local_size); \ else if ((p) & SLJIT_MEM) { \ SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & REG_MASK)); \ + FUNCTION_ASSERT_IF_VIRTUAL((p) & REG_MASK); \ if ((p) & OFFS_REG_MASK) { \ SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ - SLJIT_ASSERT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_LOCALS_REG) && !(i & ~0x3)); \ + FUNCTION_ASSERT_IF_VIRTUAL(OFFS_REG(p)); \ + SLJIT_ASSERT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SP) && !(i & ~0x3)); \ } else \ SLJIT_ASSERT(OFFS_REG(p) == 0); \ SLJIT_ASSERT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ @@ -683,8 +719,8 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp #define FUNCTION_CHECK_OP1() \ if (GET_OPCODE(op) >= SLJIT_MOVU && GET_OPCODE(op) <= SLJIT_MOVU_P) { \ - SLJIT_ASSERT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_LOCALS_REG); \ - SLJIT_ASSERT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_LOCALS_REG); \ + SLJIT_ASSERT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_SP); \ + SLJIT_ASSERT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_SP); \ if ((src & SLJIT_MEM) && (src & REG_MASK)) \ SLJIT_ASSERT((dst & REG_MASK) != (src & REG_MASK) && OFFS_REG(dst) != (src & REG_MASK)); \ } @@ -698,17 +734,6 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp compiler->verbose = verbose; } -static char* reg_names[] = { - (char*)"unused", (char*)"s1", (char*)"s2", (char*)"s3", - (char*)"se1", (char*)"se2", (char*)"p1", (char*)"p2", - (char*)"p3", (char*)"pe1", (char*)"pe2", (char*)"lc" -}; - -static char* freg_names[] = { - (char*)"unused", (char*)"f1", (char*)"f2", (char*)"f3", - (char*)"f4", (char*)"f5", (char*)"f6" -}; - #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) #ifdef _WIN64 # define SLJIT_PRINT_D "I64" @@ -719,66 +744,89 @@ static char* freg_names[] = { # define SLJIT_PRINT_D "" #endif -#define sljit_verbose_param(p, i) \ +#define sljit_verbose_reg(compiler, r) \ + do { \ + if ((r) < (SLJIT_R0 + compiler->scratches)) \ + fprintf(compiler->verbose, "r%d", (r) - SLJIT_R0); \ + else \ + fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - (r)); \ + } while (0) + +#define sljit_verbose_param(compiler, p, i) \ if ((p) & SLJIT_IMM) \ fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i)); \ else if ((p) & SLJIT_MEM) { \ if ((p) & REG_MASK) { \ - if (i) { \ - if ((p) & OFFS_REG_MASK) \ - fprintf(compiler->verbose, "[%s + %s * %d]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)], 1 << (i)); \ - else \ - fprintf(compiler->verbose, "[%s + #%" SLJIT_PRINT_D "d]", reg_names[(p) & REG_MASK], (i)); \ - } \ - else { \ - if ((p) & OFFS_REG_MASK) \ - fprintf(compiler->verbose, "[%s + %s]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)]); \ - else \ - fprintf(compiler->verbose, "[%s]", reg_names[(p) & REG_MASK]); \ + fputc('[', compiler->verbose); \ + sljit_verbose_reg(compiler, (p) & REG_MASK); \ + if ((p) & OFFS_REG_MASK) { \ + fprintf(compiler->verbose, " + "); \ + sljit_verbose_reg(compiler, OFFS_REG(p)); \ + if (i) \ + fprintf(compiler->verbose, " * %d", 1 << (i)); \ } \ + else if (i) \ + fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); \ + fputc(']', compiler->verbose); \ } \ else \ fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \ - } else \ - fprintf(compiler->verbose, "%s", reg_names[p]); -#define sljit_verbose_fparam(p, i) \ + } else if (p) \ + sljit_verbose_reg(compiler, p); \ + else \ + fprintf(compiler->verbose, "unused"); + +#define sljit_verbose_fparam(compiler, p, i) \ if ((p) & SLJIT_MEM) { \ if ((p) & REG_MASK) { \ - if (i) { \ - if ((p) & OFFS_REG_MASK) \ - fprintf(compiler->verbose, "[%s + %s * %d]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)], 1 << (i)); \ - else \ - fprintf(compiler->verbose, "[%s + #%" SLJIT_PRINT_D "d]", reg_names[(p) & REG_MASK], (i)); \ - } \ - else { \ - if ((p) & OFFS_REG_MASK) \ - fprintf(compiler->verbose, "[%s + %s]", reg_names[(p) & REG_MASK], reg_names[OFFS_REG(p)]); \ - else \ - fprintf(compiler->verbose, "[%s]", reg_names[(p) & REG_MASK]); \ + fputc('[', compiler->verbose); \ + sljit_verbose_reg(compiler, (p) & REG_MASK); \ + if ((p) & OFFS_REG_MASK) { \ + fprintf(compiler->verbose, " + "); \ + sljit_verbose_reg(compiler, OFFS_REG(p)); \ + if (i) \ + fprintf(compiler->verbose, "%d", 1 << (i)); \ } \ + else if (i) \ + fprintf(compiler->verbose, "%" SLJIT_PRINT_D "d", (i)); \ + fputc(']', compiler->verbose); \ } \ else \ fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \ - } else \ - fprintf(compiler->verbose, "%s", freg_names[p]); + } \ + else { \ + if ((p) < (SLJIT_FR0 + compiler->fscratches)) \ + fprintf(compiler->verbose, "fr%d", (p) - SLJIT_FR0); \ + else \ + fprintf(compiler->verbose, "fs%d", SLJIT_NUMBER_OF_FLOAT_REGISTERS - (p)); \ + } -static SLJIT_CONST char* op_names[] = { - /* op0 */ +static SLJIT_CONST char* op0_names[] = { (char*)"breakpoint", (char*)"nop", (char*)"umul", (char*)"smul", (char*)"udiv", (char*)"sdiv", - /* op1 */ +}; + +static SLJIT_CONST char* op1_names[] = { (char*)"mov", (char*)"mov.ub", (char*)"mov.sb", (char*)"mov.uh", (char*)"mov.sh", (char*)"mov.ui", (char*)"mov.si", (char*)"mov.p", (char*)"movu", (char*)"movu.ub", (char*)"movu.sb", (char*)"movu.uh", (char*)"movu.sh", (char*)"movu.ui", (char*)"movu.si", (char*)"movu.p", (char*)"not", (char*)"neg", (char*)"clz", - /* op2 */ +}; + +static SLJIT_CONST char* op2_names[] = { (char*)"add", (char*)"addc", (char*)"sub", (char*)"subc", (char*)"mul", (char*)"and", (char*)"or", (char*)"xor", (char*)"shl", (char*)"lshr", (char*)"ashr", - /* fop1 */ - (char*)"cmp", (char*)"mov", (char*)"neg", (char*)"abs", - /* fop2 */ +}; + +static SLJIT_CONST char* fop1_names[] = { + (char*)"mov", (char*)"conv", (char*)"conv", (char*)"conv", + (char*)"conv", (char*)"conv", (char*)"cmp", (char*)"neg", + (char*)"abs", +}; + +static SLJIT_CONST char* fop2_names[] = { (char*)"add", (char*)"sub", (char*)"mul", (char*)"div" }; @@ -823,33 +871,49 @@ static SLJIT_INLINE void check_sljit_generate_code(struct sljit_compiler *compil #endif } -static SLJIT_INLINE void check_sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +static SLJIT_INLINE void check_sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { /* If debug and verbose are disabled, all arguments are unused. */ SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(options); SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(scratches); SLJIT_UNUSED_ARG(saveds); + SLJIT_UNUSED_ARG(fscratches); + SLJIT_UNUSED_ARG(fsaveds); SLJIT_UNUSED_ARG(local_size); + SLJIT_ASSERT(options == 0); SLJIT_ASSERT(args >= 0 && args <= 3); - SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NO_TMP_REGISTERS); - SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS); + SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); + SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); + SLJIT_ASSERT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); SLJIT_ASSERT(args <= saveds); + SLJIT_ASSERT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + SLJIT_ASSERT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + SLJIT_ASSERT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " enter args=%d scratches=%d saveds=%d local_size=%d\n", args, scratches, saveds, local_size); + fprintf(compiler->verbose, " enter options:none args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + args, scratches, saveds, fscratches, fsaveds, local_size); #endif } -static SLJIT_INLINE void check_sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +static SLJIT_INLINE void check_sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { /* If debug and verbose are disabled, all arguments are unused. */ SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(options); SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(scratches); SLJIT_UNUSED_ARG(saveds); + SLJIT_UNUSED_ARG(fscratches); + SLJIT_UNUSED_ARG(fsaveds); SLJIT_UNUSED_ARG(local_size); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) @@ -859,14 +923,20 @@ static SLJIT_INLINE void check_sljit_set_context(struct sljit_compiler *compiler } #endif + SLJIT_ASSERT(options == 0); SLJIT_ASSERT(args >= 0 && args <= 3); - SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NO_TMP_REGISTERS); - SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS); + SLJIT_ASSERT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); + SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); + SLJIT_ASSERT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); SLJIT_ASSERT(args <= saveds); + SLJIT_ASSERT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + SLJIT_ASSERT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + SLJIT_ASSERT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " set_context args=%d scratches=%d saveds=%d local_size=%d\n", args, scratches, saveds, local_size); + fprintf(compiler->verbose, " set_context options:none args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + args, scratches, saveds, fscratches, fsaveds, local_size); #endif } @@ -891,8 +961,8 @@ static SLJIT_INLINE void check_sljit_emit_return(struct sljit_compiler *compiler if (op == SLJIT_UNUSED) fprintf(compiler->verbose, " return\n"); else { - fprintf(compiler->verbose, " return %s ", op_names[op]); - sljit_verbose_param(src, srcw); + fprintf(compiler->verbose, " return.%s ", op1_names[op - SLJIT_OP1_BASE]); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } } @@ -912,7 +982,7 @@ static SLJIT_INLINE void check_sljit_emit_fast_enter(struct sljit_compiler *comp #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " fast_enter "); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, "\n"); } #endif @@ -931,7 +1001,7 @@ static SLJIT_INLINE void check_sljit_emit_fast_return(struct sljit_compiler *com #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " fast_return "); - sljit_verbose_param(src, srcw); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } #endif @@ -945,9 +1015,10 @@ static SLJIT_INLINE void check_sljit_emit_op0(struct sljit_compiler *compiler, s SLJIT_ASSERT((op >= SLJIT_BREAKPOINT && op <= SLJIT_SMUL) || ((op & ~SLJIT_INT_OP) >= SLJIT_UDIV && (op & ~SLJIT_INT_OP) <= SLJIT_SDIV)); + SLJIT_ASSERT(op < SLJIT_UMUL || compiler->scratches >= 2); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " %s%s\n", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)]); + fprintf(compiler->verbose, " %s%s\n", !(op & SLJIT_INT_OP) ? "" : "i", op0_names[GET_OPCODE(op) - SLJIT_OP0_BASE]); #endif } @@ -979,12 +1050,12 @@ static SLJIT_INLINE void check_sljit_emit_op1(struct sljit_compiler *compiler, s #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)], + fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s", !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); - sljit_verbose_param(src, srcw); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } #endif @@ -1021,14 +1092,14 @@ static SLJIT_INLINE void check_sljit_emit_op2(struct sljit_compiler *compiler, s #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)], + fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s", !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); - sljit_verbose_param(src1, src1w); + sljit_verbose_param(compiler, src1, src1w); fprintf(compiler->verbose, ", "); - sljit_verbose_param(src2, src2w); + sljit_verbose_param(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } #endif @@ -1037,13 +1108,13 @@ static SLJIT_INLINE void check_sljit_emit_op2(struct sljit_compiler *compiler, s static SLJIT_INLINE void check_sljit_get_register_index(sljit_si reg) { SLJIT_UNUSED_ARG(reg); - SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_REGISTERS); + SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS); } static SLJIT_INLINE void check_sljit_get_float_register_index(sljit_si reg) { SLJIT_UNUSED_ARG(reg); - SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_FLOAT_REGISTERS); + SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); } static SLJIT_INLINE void check_sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -1055,6 +1126,32 @@ static SLJIT_INLINE void check_sljit_emit_op_custom(struct sljit_compiler *compi SLJIT_ASSERT(instruction); } +#define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \ + SLJIT_ASSERT(sljit_is_fpu_available()); \ + SLJIT_COMPILE_ASSERT(!(SLJIT_CONVW_FROMD & 0x1) && !(SLJIT_CONVD_FROMW & 0x1), \ + invalid_float_opcodes); \ + if (GET_OPCODE(op) >= SLJIT_CONVW_FROMD && GET_OPCODE(op) <= SLJIT_CMPD) { \ + if (GET_OPCODE(op) == SLJIT_CMPD) { \ + check_sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw); \ + } \ + if ((GET_OPCODE(op) | 0x1) == SLJIT_CONVI_FROMD) { \ + check_sljit_emit_fop1_convw_fromd(compiler, op, dst, dstw, src, srcw); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_convw_fromd(compiler, op, dst, dstw, src, srcw); \ + } \ + check_sljit_emit_fop1_convd_fromw(compiler, op, dst, dstw, src, srcw); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_convd_fromw(compiler, op, dst, dstw, src, srcw); \ + } \ + check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); + static SLJIT_INLINE void check_sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) @@ -1074,20 +1171,134 @@ static SLJIT_INLINE void check_sljit_emit_fop1(struct sljit_compiler *compiler, } #endif - SLJIT_ASSERT(sljit_is_fpu_available()); - SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_CMPD && GET_OPCODE(op) <= SLJIT_ABSD); + SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_MOVD && GET_OPCODE(op) <= SLJIT_ABSD); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_CHECK_OP(); + FUNCTION_CHECK_FOP(); FUNCTION_FCHECK(src, srcw); FUNCTION_FCHECK(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d", + fprintf(compiler->verbose, " %s%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (GET_OPCODE(op) == SLJIT_CONVD_FROMS) + ? ((op & SLJIT_SINGLE_OP) ? "s.fromd" : "d.froms") + : ((op & SLJIT_SINGLE_OP) ? "s" : "d")); + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif +} + +static SLJIT_INLINE void check_sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + return; + } +#endif + + SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_CMPD); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + FUNCTION_CHECK_FOP(); + FUNCTION_FCHECK(src1, src1w); + FUNCTION_FCHECK(src2, src2w); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s%s%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], (op & SLJIT_SINGLE_OP) ? "s" : "d", !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_S) ? "" : ".s"); - sljit_verbose_fparam(dst, dstw); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif +} + +static SLJIT_INLINE void check_sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + return; + } +#endif + + SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_CONVW_FROMD && GET_OPCODE(op) <= SLJIT_CONVI_FROMD); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + FUNCTION_CHECK_FOP(); + FUNCTION_FCHECK(src, srcw); + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? "i" : "w", + (op & SLJIT_SINGLE_OP) ? "s" : "d"); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src, srcw); + sljit_verbose_fparam(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif +} + +static SLJIT_INLINE void check_sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + return; + } +#endif + + SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_CONVD_FROMW && GET_OPCODE(op) <= SLJIT_CONVD_FROMI); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + FUNCTION_CHECK_FOP(); + FUNCTION_CHECK_SRC(src, srcw); + FUNCTION_FCHECK(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (op & SLJIT_SINGLE_OP) ? "s" : "d", + (GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? "i" : "w"); + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } #endif @@ -1111,19 +1322,19 @@ static SLJIT_INLINE void check_sljit_emit_fop2(struct sljit_compiler *compiler, SLJIT_ASSERT(sljit_is_fpu_available()); SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_ADDD && GET_OPCODE(op) <= SLJIT_DIVD); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) - FUNCTION_CHECK_OP(); + FUNCTION_CHECK_FOP(); FUNCTION_FCHECK(src1, src1w); FUNCTION_FCHECK(src2, src2w); FUNCTION_FCHECK(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s ", op_names[GET_OPCODE(op)], (op & SLJIT_SINGLE_OP) ? "s" : "d"); - sljit_verbose_fparam(dst, dstw); + fprintf(compiler->verbose, " %s%s ", fop2_names[GET_OPCODE(op) - SLJIT_FOP2_BASE], (op & SLJIT_SINGLE_OP) ? "s" : "d"); + sljit_verbose_fparam(compiler, dst, dstw); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src1, src1w); + sljit_verbose_fparam(compiler, src1, src1w); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src2, src2w); + sljit_verbose_fparam(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } #endif @@ -1181,9 +1392,9 @@ static SLJIT_INLINE void check_sljit_emit_cmp(struct sljit_compiler *compiler, s #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %scmp%s.%s ", !(type & SLJIT_INT_OP) ? "" : "i", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); - sljit_verbose_param(src1, src1w); + sljit_verbose_param(compiler, src1, src1w); fprintf(compiler->verbose, ", "); - sljit_verbose_param(src2, src2w); + sljit_verbose_param(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } #endif @@ -1211,9 +1422,9 @@ static SLJIT_INLINE void check_sljit_emit_fcmp(struct sljit_compiler *compiler, if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %scmp%s.%s ", (type & SLJIT_SINGLE_OP) ? "s" : "d", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); - sljit_verbose_fparam(src1, src1w); + sljit_verbose_fparam(compiler, src1, src1w); fprintf(compiler->verbose, ", "); - sljit_verbose_fparam(src2, src2w); + sljit_verbose_fparam(compiler, src2, src2w); fprintf(compiler->verbose, "\n"); } #endif @@ -1241,7 +1452,7 @@ static SLJIT_INLINE void check_sljit_emit_ijump(struct sljit_compiler *compiler, #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " ijump.%s ", jump_names[type]); - sljit_verbose_param(src, srcw); + sljit_verbose_param(compiler, src, srcw); fprintf(compiler->verbose, "\n"); } #endif @@ -1277,11 +1488,12 @@ static SLJIT_INLINE void check_sljit_emit_op_flags(struct sljit_compiler *compil #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %sflags.%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", - op_names[GET_OPCODE(op)], !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); - sljit_verbose_param(dst, dstw); + GET_OPCODE(op) >= SLJIT_OP2_BASE ? op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE] : op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], + !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); + sljit_verbose_param(compiler, dst, dstw); if (src != SLJIT_UNUSED) { fprintf(compiler->verbose, ", "); - sljit_verbose_param(src, srcw); + sljit_verbose_param(compiler, src, srcw); } fprintf(compiler->verbose, ", %s\n", jump_names[type]); } @@ -1301,7 +1513,7 @@ static SLJIT_INLINE void check_sljit_get_local_base(struct sljit_compiler *compi #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " local_base "); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", offset); } #endif @@ -1321,7 +1533,7 @@ static SLJIT_INLINE void check_sljit_emit_const(struct sljit_compiler *compiler, #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " const "); - sljit_verbose_param(dst, dstw); + sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", init_value); } #endif @@ -1374,9 +1586,7 @@ static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compi #define SLJIT_CPUINFO SLJIT_CPUINFO_PART1 SLJIT_CPUINFO_PART2 SLJIT_CPUINFO_PART3 -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -# include "sljitNativeX86_common.c" -#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) # include "sljitNativeX86_common.c" #elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) # include "sljitNativeARM_32.c" @@ -1386,21 +1596,17 @@ static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compi # include "sljitNativeARM_T2_32.c" #elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) # include "sljitNativeARM_64.c" -#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) # include "sljitNativePPC_common.c" -#elif (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) -# include "sljitNativePPC_common.c" -#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# include "sljitNativeMIPS_common.c" -#elif (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) # include "sljitNativeMIPS_common.c" -#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) # include "sljitNativeSPARC_common.c" #elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) # include "sljitNativeTILEGX_64.c" #endif -#if !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, sljit_si src1, sljit_sw src1w, @@ -1508,20 +1714,20 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile #endif -#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) { CHECK_ERROR(); check_sljit_get_local_base(compiler, dst, dstw, offset); - ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset); + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->skip_checks = 1; #endif if (offset != 0) - return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset); - return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_LOCALS_REG, 0); + return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0); } #endif @@ -1577,23 +1783,33 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) SLJIT_ASSERT_STOP(); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(options); SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(scratches); SLJIT_UNUSED_ARG(saveds); + SLJIT_UNUSED_ARG(fscratches); + SLJIT_UNUSED_ARG(fsaveds); SLJIT_UNUSED_ARG(local_size); SLJIT_ASSERT_STOP(); return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(options); SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(scratches); SLJIT_UNUSED_ARG(saveds); + SLJIT_UNUSED_ARG(fscratches); + SLJIT_UNUSED_ARG(fsaveds); SLJIT_UNUSED_ARG(local_size); SLJIT_ASSERT_STOP(); } diff --git a/pcre/sljit/sljitLir.h b/pcre/sljit/sljitLir.h index e2cd21846df..97188719390 100644 --- a/pcre/sljit/sljitLir.h +++ b/pcre/sljit/sljitLir.h @@ -56,8 +56,6 @@ Disadvantages: - No automatic register allocation, and temporary results are not stored on the stack. (hence the name comes) - - Limited number of registers (only 6+4 integer registers, max 3+2 - scratch, max 3+2 saved and 6 floating point registers) In practice: - This approach is very effective for interpreters - One of the saved registers typically points to a stack interface @@ -104,76 +102,169 @@ of sljitConfigInternal.h */ /* Registers */ /* --------------------------------------------------------------------- */ +/* + Scratch (R) registers: registers whose may not preserve their values + across function calls. + + Saved (S) registers: registers whose preserve their values across + function calls. + + The scratch and saved register sets are overlap. The last scratch register + is the first saved register, the one before the last is the second saved + register, and so on. + + If an architecture provides two scratch and three saved registers, + its scratch and saved register sets are the following: + + R0 | [S4] | R0 and S4 represent the same physical register + R1 | [S3] | R1 and S3 represent the same physical register + [R2] | S2 | R2 and S2 represent the same physical register + [R3] | S1 | R3 and S1 represent the same physical register + [R4] | S0 | R4 and S0 represent the same physical register + + Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS would be 2 and + SLJIT_NUMBER_OF_SAVED_REGISTERS would be 3 for this architecture. + + Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 10 + and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 5. However, 4 registers + are virtual on x86-32. See below. + + The purpose of this definition is convenience. Although a register + is either scratch register or saved register, SLJIT allows accessing + them from the other set. For example, four registers can be used as + scratch registers and the fifth one as saved register on the architecture + above. Of course the last two scratch registers (R2 and R3) from this + four will be saved on the stack, because they are defined as saved + registers in the application binary interface. Still R2 and R3 can be + used for referencing to these registers instead of S2 and S1, which + makes easier to write platform independent code. Scratch registers + can be saved registers in a similar way, but these extra saved + registers will not be preserved across function calls! Hence the + application must save them on those platforms, where the number of + saved registers is too low. This can be done by copy them onto + the stack and restore them after a function call. + + Note: To emphasize that registers assigned to R2-R4 are saved + registers, they are enclosed by square brackets. S3-S4 + are marked in a similar way. + + Note: sljit_emit_enter and sljit_set_context defines whether a register + is S or R register. E.g: when 3 scratches and 1 saved is mapped + by sljit_emit_enter, the allowed register set will be: R0-R2 and + S0. Although S2 is mapped to the same position as R2, it does not + available in the current configuration. Furthermore the R3 (S1) + register does not available as well. +*/ + +/* When SLJIT_UNUSED is specified as destination, the result is discarded. */ #define SLJIT_UNUSED 0 -/* Scratch (temporary) registers whose may not preserve their values - across function calls. */ -#define SLJIT_SCRATCH_REG1 1 -#define SLJIT_SCRATCH_REG2 2 -#define SLJIT_SCRATCH_REG3 3 -/* Note: extra registers cannot be used for memory addressing. */ -/* Note: on x86-32, these registers are emulated (using stack - loads & stores). */ -#define SLJIT_TEMPORARY_EREG1 4 -#define SLJIT_TEMPORARY_EREG2 5 - -/* Saved registers whose preserve their values across function calls. */ -#define SLJIT_SAVED_REG1 6 -#define SLJIT_SAVED_REG2 7 -#define SLJIT_SAVED_REG3 8 -/* Note: extra registers cannot be used for memory addressing. */ -/* Note: on x86-32, these registers are emulated (using stack - loads & stores). */ -#define SLJIT_SAVED_EREG1 9 -#define SLJIT_SAVED_EREG2 10 - -/* Read-only register (cannot be the destination of an operation). - Only SLJIT_MEM1(SLJIT_LOCALS_REG) addressing mode is allowed since - several ABIs has certain limitations about the stack layout. However - sljit_get_local_base() can be used to obtain the offset of a value - on the stack. */ -#define SLJIT_LOCALS_REG 11 - -/* Number of registers. */ -#define SLJIT_NO_TMP_REGISTERS 5 -#define SLJIT_NO_GEN_REGISTERS 5 -#define SLJIT_NO_REGISTERS 11 +/* Scratch registers. */ +#define SLJIT_R0 1 +#define SLJIT_R1 2 +#define SLJIT_R2 3 +/* Note: on x86-32, R3 - R6 (same as S3 - S6) are emulated (they + are allocated on the stack). These registers are called virtual + and cannot be used for memory addressing (cannot be part of + any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such + limitation on other CPUs. See sljit_get_register_index(). */ +#define SLJIT_R3 4 +#define SLJIT_R4 5 +#define SLJIT_R5 6 +#define SLJIT_R6 7 +#define SLJIT_R7 8 +#define SLJIT_R8 9 +#define SLJIT_R9 10 +/* All R registers provided by the architecture can be accessed by SLJIT_R(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_REGISTERS. */ +#define SLJIT_R(i) (1 + (i)) + +/* Saved registers. */ +#define SLJIT_S0 (SLJIT_NUMBER_OF_REGISTERS) +#define SLJIT_S1 (SLJIT_NUMBER_OF_REGISTERS - 1) +#define SLJIT_S2 (SLJIT_NUMBER_OF_REGISTERS - 2) +/* Note: on x86-32, S3 - S6 (same as R3 - R6) are emulated (they + are allocated on the stack). These registers are called virtual + and cannot be used for memory addressing (cannot be part of + any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such + limitation on other CPUs. See sljit_get_register_index(). */ +#define SLJIT_S3 (SLJIT_NUMBER_OF_REGISTERS - 3) +#define SLJIT_S4 (SLJIT_NUMBER_OF_REGISTERS - 4) +#define SLJIT_S5 (SLJIT_NUMBER_OF_REGISTERS - 5) +#define SLJIT_S6 (SLJIT_NUMBER_OF_REGISTERS - 6) +#define SLJIT_S7 (SLJIT_NUMBER_OF_REGISTERS - 7) +#define SLJIT_S8 (SLJIT_NUMBER_OF_REGISTERS - 8) +#define SLJIT_S9 (SLJIT_NUMBER_OF_REGISTERS - 9) +/* All S registers provided by the architecture can be accessed by SLJIT_S(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_REGISTERS. */ +#define SLJIT_S(i) (SLJIT_NUMBER_OF_REGISTERS - (i)) + +/* Registers >= SLJIT_FIRST_SAVED_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_REG (SLJIT_S0 - SLJIT_NUMBER_OF_SAVED_REGISTERS + 1) + +/* The SLJIT_SP provides direct access to the linear stack space allocated by + sljit_emit_enter. It can only be used in the following form: SLJIT_MEM1(SLJIT_SP). + The immediate offset is extended by the relative stack offset automatically. + The sljit_get_local_base can be used to obtain the absolute offset. */ +#define SLJIT_SP (SLJIT_NUMBER_OF_REGISTERS + 1) /* Return with machine word. */ -#define SLJIT_RETURN_REG SLJIT_SCRATCH_REG1 +#define SLJIT_RETURN_REG SLJIT_R0 /* x86 prefers specific registers for special purposes. In case of shift - by register it supports only SLJIT_SCRATCH_REG3 for shift argument + by register it supports only SLJIT_R2 for shift argument (which is the src2 argument of sljit_emit_op2). If another register is used, sljit must exchange data between registers which cause a minor slowdown. Other architectures has no such limitation. */ -#define SLJIT_PREF_SHIFT_REG SLJIT_SCRATCH_REG3 +#define SLJIT_PREF_SHIFT_REG SLJIT_R2 /* --------------------------------------------------------------------- */ /* Floating point registers */ /* --------------------------------------------------------------------- */ -/* Note: SLJIT_UNUSED as destination is not valid for floating point - operations, since they cannot be used for setting flags. */ +/* Each floating point register can store a double or single precision + value. The FR and FS register sets are overlap in the same way as R + and S register sets. See above. */ -/* Floating point operations are performed on double or - single precision values. */ - -#define SLJIT_FLOAT_REG1 1 -#define SLJIT_FLOAT_REG2 2 -#define SLJIT_FLOAT_REG3 3 -#define SLJIT_FLOAT_REG4 4 -#define SLJIT_FLOAT_REG5 5 -#define SLJIT_FLOAT_REG6 6 - -#define SLJIT_NO_FLOAT_REGISTERS 6 +/* Note: SLJIT_UNUSED as destination is not valid for floating point + operations, since they cannot be used for setting flags. */ + +/* Floating point scratch registers. */ +#define SLJIT_FR0 1 +#define SLJIT_FR1 2 +#define SLJIT_FR2 3 +#define SLJIT_FR3 4 +#define SLJIT_FR4 5 +#define SLJIT_FR5 6 +/* All FR registers provided by the architecture can be accessed by SLJIT_FR(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_FLOAT_REGISTERS. */ +#define SLJIT_FR(i) (1 + (i)) + +/* Floating point saved registers. */ +#define SLJIT_FS0 (SLJIT_NUMBER_OF_FLOAT_REGISTERS) +#define SLJIT_FS1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1) +#define SLJIT_FS2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2) +#define SLJIT_FS3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 3) +#define SLJIT_FS4 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 4) +#define SLJIT_FS5 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 5) +/* All S registers provided by the architecture can be accessed by SLJIT_FS(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */ +#define SLJIT_FS(i) (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i)) + +/* Float registers >= SLJIT_FIRST_SAVED_FLOAT_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_FLOAT_REG (SLJIT_FS0 - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + 1) /* --------------------------------------------------------------------- */ /* Main structures and functions */ /* --------------------------------------------------------------------- */ +/* + The following structures are private, and can be changed in the + future. Keeping them here allows code inlining. +*/ + struct sljit_memory_fragment { struct sljit_memory_fragment *next; sljit_uw used_size; @@ -205,6 +296,7 @@ struct sljit_const { struct sljit_compiler { sljit_si error; + sljit_si options; struct sljit_label *labels; struct sljit_jump *jumps; @@ -216,10 +308,14 @@ struct sljit_compiler { struct sljit_memory_fragment *buf; struct sljit_memory_fragment *abuf; - /* Used local registers. */ + /* Used scratch registers. */ sljit_si scratches; /* Used saved registers. */ sljit_si saveds; + /* Used float scratch registers. */ + sljit_si fscratches; + /* Used float saved registers. */ + sljit_si fsaveds; /* Local stack size. */ sljit_si local_size; /* Code size. */ @@ -229,16 +325,13 @@ struct sljit_compiler { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) sljit_si args; - sljit_si locals_offset; - sljit_si scratches_start; - sljit_si saveds_start; #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) sljit_si mode32; #endif -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) sljit_si flags_saved; #endif @@ -271,13 +364,13 @@ struct sljit_compiler { sljit_sw cache_argw; #endif -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) sljit_sw imm; sljit_si cache_arg; sljit_sw cache_argw; #endif -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) sljit_si delay_slot; sljit_si cache_arg; sljit_sw cache_argw; @@ -361,46 +454,64 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler error, they return with SLJIT_SUCCESS. */ /* - The executable code is basically a function call from the viewpoint of - the C language. The function calls must obey to the ABI (Application - Binary Interface) of the platform, which specify the purpose of machine - registers and stack handling among other things. The sljit_emit_enter - function emits the necessary instructions for setting up a new context - for the executable code and moves function arguments to the saved - registers. The number of arguments are specified in the "args" - parameter and the first argument goes to SLJIT_SAVED_REG1, the second - goes to SLJIT_SAVED_REG2 and so on. The number of scratch and - saved registers are passed in "scratches" and "saveds" arguments - respectively. Since the saved registers contains the arguments, - "args" must be less or equal than "saveds". The sljit_emit_enter - is also capable of allocating a stack space for local variables. The - "local_size" argument contains the size in bytes of this local area - and its staring address is stored in SLJIT_LOCALS_REG. However - the SLJIT_LOCALS_REG is not necessary the machine stack pointer. - The memory bytes between SLJIT_LOCALS_REG (inclusive) and - SLJIT_LOCALS_REG + local_size (exclusive) can be modified freely - until the function returns. The stack space is uninitialized. + The executable code is a function call from the viewpoint of the C + language. The function calls must obey to the ABI (Application + Binary Interface) of the platform, which specify the purpose of + all machine registers and stack handling among other things. The + sljit_emit_enter function emits the necessary instructions for + setting up a new context for the executable code and moves function + arguments to the saved registers. Furthermore the options argument + can be used to pass configuration options to the compiler. Currently + there are no options, so it must be set to 0. + + The number of sljit_sw arguments passed to the generated function + are specified in the "args" parameter. The number of arguments must + be less than or equal to 3. The first argument goes to SLJIT_S0, + the second goes to SLJIT_S1 and so on. The register set used by + the function must be declared as well. The number of scratch and + saved registers used by the function must be passed to sljit_emit_enter. + Only R registers between R0 and "scratches" argument can be used + later. E.g. if "scratches" is set to 2, the register set will be + limited to R0 and R1. The S registers and the floating point + registers ("fscratches" and "fsaveds") are specified in a similar + way. The sljit_emit_enter is also capable of allocating a stack + space for local variables. The "local_size" argument contains the + size in bytes of this local area and its staring address is stored + in SLJIT_SP. The memory area between SLJIT_SP (inclusive) and + SLJIT_SP + local_size (exclusive) can be modified freely until + the function returns. The stack space is not initialized. + + Note: the following conditions must met: + 0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS + 0 <= saveds <= SLJIT_NUMBER_OF_REGISTERS + scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS + 0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + 0 <= fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS Note: every call of sljit_emit_enter and sljit_set_context - overwrites the previous context. */ + overwrites the previous context. +*/ #define SLJIT_MAX_LOCAL_SIZE 65536 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, - sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size); + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size); /* The machine code has a context (which contains the local stack space size, number of used registers, etc.) which initialized by sljit_emit_enter. Several functions (like sljit_emit_return) requres this context to be able to generate the appropriate code. However, some code fragments (like inline cache) may have - no normal entry point so their context is unknown for the compiler. Using the - function below we can specify their context. + no normal entry point so their context is unknown for the compiler. Their context + can be provided to the compiler by the sljit_set_context function. Note: every call of sljit_emit_enter and sljit_set_context overwrites the previous context. */ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, - sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size); + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size); /* Return from machine code. The op argument can be SLJIT_UNUSED which means the function does not return with anything or any opcode between SLJIT_MOV and @@ -549,37 +660,43 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * the instruction does not set flags (See: SLJIT_KEEP_FLAGS). - flag combinations: '|' means 'logical or'. */ +/* Starting index of opcodes for sljit_emit_op0. */ +#define SLJIT_OP0_BASE 0 + /* Flags: - (never set any flags) Note: breakpoint instruction is not supported by all architectures (namely ppc) It falls back to SLJIT_NOP in those cases. */ -#define SLJIT_BREAKPOINT 0 +#define SLJIT_BREAKPOINT (SLJIT_OP0_BASE + 0) /* Flags: - (never set any flags) Note: may or may not cause an extra cycle wait it can even decrease the runtime in a few cases. */ -#define SLJIT_NOP 1 +#define SLJIT_NOP (SLJIT_OP0_BASE + 1) /* Flags: - (may destroy flags) - Unsigned multiplication of SLJIT_SCRATCH_REG1 and SLJIT_SCRATCH_REG2. - Result goes to SLJIT_SCRATCH_REG2:SLJIT_SCRATCH_REG1 (high:low) word */ -#define SLJIT_UMUL 2 + Unsigned multiplication of SLJIT_R0 and SLJIT_R1. + Result goes to SLJIT_R1:SLJIT_R0 (high:low) word */ +#define SLJIT_UMUL (SLJIT_OP0_BASE + 2) /* Flags: - (may destroy flags) - Signed multiplication of SLJIT_SCRATCH_REG1 and SLJIT_SCRATCH_REG2. - Result goes to SLJIT_SCRATCH_REG2:SLJIT_SCRATCH_REG1 (high:low) word */ -#define SLJIT_SMUL 3 + Signed multiplication of SLJIT_R0 and SLJIT_R1. + Result goes to SLJIT_R1:SLJIT_R0 (high:low) word */ +#define SLJIT_SMUL (SLJIT_OP0_BASE + 3) /* Flags: I - (may destroy flags) - Unsigned divide of the value in SLJIT_SCRATCH_REG1 by the value in SLJIT_SCRATCH_REG2. - The result is placed in SLJIT_SCRATCH_REG1 and the remainder goes to SLJIT_SCRATCH_REG2. - Note: if SLJIT_SCRATCH_REG2 contains 0, the behaviour is undefined. */ -#define SLJIT_UDIV 4 + Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed in SLJIT_R0 and the remainder goes to SLJIT_R1. + Note: if SLJIT_R1 contains 0, the behaviour is undefined. */ +#define SLJIT_UDIV (SLJIT_OP0_BASE + 4) #define SLJIT_IUDIV (SLJIT_UDIV | SLJIT_INT_OP) /* Flags: I - (may destroy flags) - Signed divide of the value in SLJIT_SCRATCH_REG1 by the value in SLJIT_SCRATCH_REG2. - The result is placed in SLJIT_SCRATCH_REG1 and the remainder goes to SLJIT_SCRATCH_REG2. - Note: if SLJIT_SCRATCH_REG2 contains 0, the behaviour is undefined. */ -#define SLJIT_SDIV 5 + Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed in SLJIT_R0 and the remainder goes to SLJIT_R1. + Note: if SLJIT_R1 contains 0, the behaviour is undefined. */ +#define SLJIT_SDIV (SLJIT_OP0_BASE + 5) #define SLJIT_ISDIV (SLJIT_SDIV | SLJIT_INT_OP) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op); +/* Starting index of opcodes for sljit_emit_op1. */ +#define SLJIT_OP1_BASE 32 + /* Notes for MOV instructions: U = Mov with update (pre form). If source or destination defined as SLJIT_MEM1(r1) or SLJIT_MEM2(r1, r2), r1 is increased by the sum of r2 and the constant argument @@ -592,115 +709,118 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler P = pointer (sljit_p) size */ /* Flags: - (never set any flags) */ -#define SLJIT_MOV 6 +#define SLJIT_MOV (SLJIT_OP1_BASE + 0) /* Flags: I - (never set any flags) */ -#define SLJIT_MOV_UB 7 +#define SLJIT_MOV_UB (SLJIT_OP1_BASE + 1) #define SLJIT_IMOV_UB (SLJIT_MOV_UB | SLJIT_INT_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOV_SB 8 +#define SLJIT_MOV_SB (SLJIT_OP1_BASE + 2) #define SLJIT_IMOV_SB (SLJIT_MOV_SB | SLJIT_INT_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOV_UH 9 +#define SLJIT_MOV_UH (SLJIT_OP1_BASE + 3) #define SLJIT_IMOV_UH (SLJIT_MOV_UH | SLJIT_INT_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOV_SH 10 +#define SLJIT_MOV_SH (SLJIT_OP1_BASE + 4) #define SLJIT_IMOV_SH (SLJIT_MOV_SH | SLJIT_INT_OP) /* Flags: I - (never set any flags) Note: see SLJIT_INT_OP for further details. */ -#define SLJIT_MOV_UI 11 +#define SLJIT_MOV_UI (SLJIT_OP1_BASE + 5) /* No SLJIT_INT_OP form, since it is the same as SLJIT_IMOV. */ /* Flags: I - (never set any flags) Note: see SLJIT_INT_OP for further details. */ -#define SLJIT_MOV_SI 12 +#define SLJIT_MOV_SI (SLJIT_OP1_BASE + 6) #define SLJIT_IMOV (SLJIT_MOV_SI | SLJIT_INT_OP) /* Flags: - (never set any flags) */ -#define SLJIT_MOV_P 13 +#define SLJIT_MOV_P (SLJIT_OP1_BASE + 7) /* Flags: - (never set any flags) */ -#define SLJIT_MOVU 14 +#define SLJIT_MOVU (SLJIT_OP1_BASE + 8) /* Flags: I - (never set any flags) */ -#define SLJIT_MOVU_UB 15 +#define SLJIT_MOVU_UB (SLJIT_OP1_BASE + 9) #define SLJIT_IMOVU_UB (SLJIT_MOVU_UB | SLJIT_INT_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOVU_SB 16 +#define SLJIT_MOVU_SB (SLJIT_OP1_BASE + 10) #define SLJIT_IMOVU_SB (SLJIT_MOVU_SB | SLJIT_INT_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOVU_UH 17 +#define SLJIT_MOVU_UH (SLJIT_OP1_BASE + 11) #define SLJIT_IMOVU_UH (SLJIT_MOVU_UH | SLJIT_INT_OP) /* Flags: I - (never set any flags) */ -#define SLJIT_MOVU_SH 18 +#define SLJIT_MOVU_SH (SLJIT_OP1_BASE + 12) #define SLJIT_IMOVU_SH (SLJIT_MOVU_SH | SLJIT_INT_OP) /* Flags: I - (never set any flags) Note: see SLJIT_INT_OP for further details. */ -#define SLJIT_MOVU_UI 19 +#define SLJIT_MOVU_UI (SLJIT_OP1_BASE + 13) /* No SLJIT_INT_OP form, since it is the same as SLJIT_IMOVU. */ /* Flags: I - (never set any flags) Note: see SLJIT_INT_OP for further details. */ -#define SLJIT_MOVU_SI 20 +#define SLJIT_MOVU_SI (SLJIT_OP1_BASE + 14) #define SLJIT_IMOVU (SLJIT_MOVU_SI | SLJIT_INT_OP) /* Flags: - (never set any flags) */ -#define SLJIT_MOVU_P 21 +#define SLJIT_MOVU_P (SLJIT_OP1_BASE + 15) /* Flags: I | E | K */ -#define SLJIT_NOT 22 +#define SLJIT_NOT (SLJIT_OP1_BASE + 16) #define SLJIT_INOT (SLJIT_NOT | SLJIT_INT_OP) /* Flags: I | E | O | K */ -#define SLJIT_NEG 23 +#define SLJIT_NEG (SLJIT_OP1_BASE + 17) #define SLJIT_INEG (SLJIT_NEG | SLJIT_INT_OP) /* Count leading zeroes Flags: I | E | K Important note! Sparc 32 does not support K flag, since the required popc instruction is introduced only in sparc 64. */ -#define SLJIT_CLZ 24 +#define SLJIT_CLZ (SLJIT_OP1_BASE + 18) #define SLJIT_ICLZ (SLJIT_CLZ | SLJIT_INT_OP) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw); +/* Starting index of opcodes for sljit_emit_op2. */ +#define SLJIT_OP2_BASE 96 + /* Flags: I | E | O | C | K */ -#define SLJIT_ADD 25 +#define SLJIT_ADD (SLJIT_OP2_BASE + 0) #define SLJIT_IADD (SLJIT_ADD | SLJIT_INT_OP) /* Flags: I | C | K */ -#define SLJIT_ADDC 26 +#define SLJIT_ADDC (SLJIT_OP2_BASE + 1) #define SLJIT_IADDC (SLJIT_ADDC | SLJIT_INT_OP) /* Flags: I | E | U | S | O | C | K */ -#define SLJIT_SUB 27 +#define SLJIT_SUB (SLJIT_OP2_BASE + 2) #define SLJIT_ISUB (SLJIT_SUB | SLJIT_INT_OP) /* Flags: I | C | K */ -#define SLJIT_SUBC 28 +#define SLJIT_SUBC (SLJIT_OP2_BASE + 3) #define SLJIT_ISUBC (SLJIT_SUBC | SLJIT_INT_OP) /* Note: integer mul Flags: I | O (see SLJIT_C_MUL_*) | K */ -#define SLJIT_MUL 29 +#define SLJIT_MUL (SLJIT_OP2_BASE + 4) #define SLJIT_IMUL (SLJIT_MUL | SLJIT_INT_OP) /* Flags: I | E | K */ -#define SLJIT_AND 30 +#define SLJIT_AND (SLJIT_OP2_BASE + 5) #define SLJIT_IAND (SLJIT_AND | SLJIT_INT_OP) /* Flags: I | E | K */ -#define SLJIT_OR 31 +#define SLJIT_OR (SLJIT_OP2_BASE + 6) #define SLJIT_IOR (SLJIT_OR | SLJIT_INT_OP) /* Flags: I | E | K */ -#define SLJIT_XOR 32 +#define SLJIT_XOR (SLJIT_OP2_BASE + 7) #define SLJIT_IXOR (SLJIT_XOR | SLJIT_INT_OP) /* Flags: I | E | K Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 - to bit_length - 1, the operation is undefined. */ -#define SLJIT_SHL 33 + to bit_length - 1, the result is undefined. */ +#define SLJIT_SHL (SLJIT_OP2_BASE + 8) #define SLJIT_ISHL (SLJIT_SHL | SLJIT_INT_OP) /* Flags: I | E | K Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 - to bit_length - 1, the operation is undefined. */ -#define SLJIT_LSHR 34 + to bit_length - 1, the result is undefined. */ +#define SLJIT_LSHR (SLJIT_OP2_BASE + 9) #define SLJIT_ILSHR (SLJIT_LSHR | SLJIT_INT_OP) /* Flags: I | E | K Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 - to bit_length - 1, the operation is undefined. */ -#define SLJIT_ASHR 35 + to bit_length - 1, the result is undefined. */ +#define SLJIT_ASHR (SLJIT_OP2_BASE + 10) #define SLJIT_IASHR (SLJIT_ASHR | SLJIT_INT_OP) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, @@ -709,15 +829,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler sljit_si src2, sljit_sw src2w); /* The following function is a helper function for sljit_emit_op_custom. - It returns with the real machine register index of any SLJIT_SCRATCH - SLJIT_SAVED or SLJIT_LOCALS register. - Note: it returns with -1 for virtual registers (all EREGs on x86-32). */ + It returns with the real machine register index ( >=0 ) of any SLJIT_R, + SLJIT_S and SLJIT_SP registers. + + Note: it returns with -1 for virtual registers (only on x86-32). */ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg); /* The following function is a helper function for sljit_emit_op_custom. It returns with the real machine register index of any SLJIT_FLOAT register. - Note: the index is divided by 2 on ARM 32 bit architectures. */ + + Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg); @@ -738,37 +860,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void); -/* Note: dst is the left and src is the right operand for SLJIT_FCMP. - Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED is set, - the comparison result is unpredictable. - Flags: SP | E | S (see SLJIT_C_FLOAT_*) */ -#define SLJIT_CMPD 36 -#define SLJIT_CMPS (SLJIT_CMPD | SLJIT_SINGLE_OP) +/* Starting index of opcodes for sljit_emit_fop1. */ +#define SLJIT_FOP1_BASE 128 + /* Flags: SP - (never set any flags) */ -#define SLJIT_MOVD 37 +#define SLJIT_MOVD (SLJIT_FOP1_BASE + 0) #define SLJIT_MOVS (SLJIT_MOVD | SLJIT_SINGLE_OP) +/* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE] + SRC/DST TYPE can be: D - double, S - single, W - signed word, I - signed int + Rounding mode when the destination is W or I: round towards zero. */ +/* Flags: SP - (never set any flags) */ +#define SLJIT_CONVD_FROMS (SLJIT_FOP1_BASE + 1) +#define SLJIT_CONVS_FROMD (SLJIT_CONVD_FROMS | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_CONVW_FROMD (SLJIT_FOP1_BASE + 2) +#define SLJIT_CONVW_FROMS (SLJIT_CONVW_FROMD | SLJIT_SINGLE_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_NEGD 38 +#define SLJIT_CONVI_FROMD (SLJIT_FOP1_BASE + 3) +#define SLJIT_CONVI_FROMS (SLJIT_CONVI_FROMD | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_CONVD_FROMW (SLJIT_FOP1_BASE + 4) +#define SLJIT_CONVS_FROMW (SLJIT_CONVD_FROMW | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_CONVD_FROMI (SLJIT_FOP1_BASE + 5) +#define SLJIT_CONVS_FROMI (SLJIT_CONVD_FROMI | SLJIT_SINGLE_OP) +/* Note: dst is the left and src is the right operand for SLJIT_CMPD. + Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED flag + is set, the comparison result is unpredictable. + Flags: SP | E | S (see SLJIT_C_FLOAT_*) */ +#define SLJIT_CMPD (SLJIT_FOP1_BASE + 6) +#define SLJIT_CMPS (SLJIT_CMPD | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_NEGD (SLJIT_FOP1_BASE + 7) #define SLJIT_NEGS (SLJIT_NEGD | SLJIT_SINGLE_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_ABSD 39 +#define SLJIT_ABSD (SLJIT_FOP1_BASE + 8) #define SLJIT_ABSS (SLJIT_ABSD | SLJIT_SINGLE_OP) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw); +/* Starting index of opcodes for sljit_emit_fop2. */ +#define SLJIT_FOP2_BASE 160 + /* Flags: SP - (never set any flags) */ -#define SLJIT_ADDD 40 +#define SLJIT_ADDD (SLJIT_FOP2_BASE + 0) #define SLJIT_ADDS (SLJIT_ADDD | SLJIT_SINGLE_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_SUBD 41 +#define SLJIT_SUBD (SLJIT_FOP2_BASE + 1) #define SLJIT_SUBS (SLJIT_SUBD | SLJIT_SINGLE_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_MULD 42 +#define SLJIT_MULD (SLJIT_FOP2_BASE + 2) #define SLJIT_MULS (SLJIT_MULD | SLJIT_SINGLE_OP) /* Flags: SP - (never set any flags) */ -#define SLJIT_DIVD 43 +#define SLJIT_DIVD (SLJIT_FOP2_BASE + 3) #define SLJIT_DIVS (SLJIT_DIVD | SLJIT_SINGLE_OP) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, @@ -888,7 +1034,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com sljit_si src, sljit_sw srcw, sljit_si type); -/* Copies the base address of SLJIT_LOCALS_REG+offset to dst. +/* Copies the base address of SLJIT_SP + offset to dst. Flags: - (never set any flags) */ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset); @@ -912,7 +1058,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta /* --------------------------------------------------------------------- */ #define SLJIT_MAJOR_VERSION 0 -#define SLJIT_MINOR_VERSION 91 +#define SLJIT_MINOR_VERSION 92 /* Get the human readable name of the platform. Can be useful on platforms like ARM, where ARM and Thumb2 functions can be mixed, and diff --git a/pcre/sljit/sljitNativeARM_32.c b/pcre/sljit/sljitNativeARM_32.c index 6747c4f6174..0998423b86c 100644 --- a/pcre/sljit/sljitNativeARM_32.c +++ b/pcre/sljit/sljitNativeARM_32.c @@ -36,13 +36,13 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) } /* Last register + 1. */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_PC (SLJIT_NO_REGISTERS + 4) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5) #define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) /* In ARM instruction words. Cache lines are usually 32 byte aligned. */ @@ -55,8 +55,8 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) (((max_diff) / (sljit_si)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { - 0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { + 0, 0, 1, 2, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 12, 14, 15 }; #define RM(rm) (reg_map[rm]) @@ -102,8 +102,12 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { #define VABS_F32 0xeeb00ac0 #define VADD_F32 0xee300a00 #define VCMP_F32 0xeeb40a40 +#define VCVT_F32_S32 0xeeb80ac0 +#define VCVT_F64_F32 0xeeb70ac0 +#define VCVT_S32_F32 0xeebd0ac0 #define VDIV_F32 0xee800a00 #define VMOV_F32 0xeeb00a40 +#define VMOV 0xee000a10 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 @@ -810,9 +814,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #define SIGNED_DATA 0x40 #define LOAD_DATA 0x80 -#define EMIT_INSTRUCTION(inst) \ - FAIL_IF(push_inst(compiler, (inst))) - /* Condition: AL. */ #define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \ (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2)) @@ -822,16 +823,21 @@ static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si i sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w); -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si size; + sljit_si size, i, tmp; sljit_uw push; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif @@ -839,67 +845,58 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil /* Push saved registers, temporary registers stmdb sp!, {..., lr} */ push = PUSH | (1 << 14); - if (scratches >= 5) - push |= 1 << 11; - if (scratches >= 4) - push |= 1 << 10; - if (saveds >= 5) - push |= 1 << 8; - if (saveds >= 4) - push |= 1 << 7; - if (saveds >= 3) - push |= 1 << 6; - if (saveds >= 2) - push |= 1 << 5; - if (saveds >= 1) - push |= 1 << 4; - EMIT_INSTRUCTION(push); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + push |= 1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + push |= 1 << reg_map[i]; + + FAIL_IF(push_inst(compiler, push)); /* Stack must be aligned to 8 bytes: */ - size = (1 + saveds) * sizeof(sljit_uw); - if (scratches >= 4) - size += (scratches - 3) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + local_size = ((size + local_size + 7) & ~7) - size; compiler->local_size = local_size; if (local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size)); + FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); if (args >= 1) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S0, SLJIT_UNUSED, RM(SLJIT_R0)))); if (args >= 2) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG2, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S1, SLJIT_UNUSED, RM(SLJIT_R1)))); if (args >= 3) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG3, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG3))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S2, SLJIT_UNUSED, RM(SLJIT_R2)))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_si size; CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - size = (1 + saveds) * sizeof(sljit_uw); - if (scratches >= 4) - size += (scratches - 3) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; - compiler->local_size = local_size; + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + compiler->local_size = ((size + local_size + 7) & ~7) - size; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { + sljit_si i, tmp; sljit_uw pop; CHECK_ERROR(); @@ -908,25 +905,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); if (compiler->local_size > 0) - FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); + FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); - pop = POP | (1 << 15); /* Push saved registers, temporary registers ldmia sp!, {..., pc} */ - if (compiler->scratches >= 5) - pop |= 1 << 11; - if (compiler->scratches >= 4) - pop |= 1 << 10; - if (compiler->saveds >= 5) - pop |= 1 << 8; - if (compiler->saveds >= 4) - pop |= 1 << 7; - if (compiler->saveds >= 3) - pop |= 1 << 6; - if (compiler->saveds >= 2) - pop |= 1 << 5; - if (compiler->saveds >= 1) - pop |= 1 << 4; + pop = POP | (1 << 15); + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + pop |= 1 << reg_map[i]; + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + pop |= 1 << reg_map[i]; return push_inst(compiler, pop); } @@ -1031,7 +1021,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) if (op == SLJIT_MOV_UB) return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2])); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2]))); return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst])); #else return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2)); @@ -1050,7 +1040,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2])); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2]))); return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst])); #else return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2)); @@ -1303,8 +1293,8 @@ static sljit_si generate_int(struct sljit_compiler *compiler, sljit_si reg, slji return 0; } - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2))); return 1; } #endif @@ -1320,16 +1310,12 @@ static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sl /* Create imm by 1 inst. */ tmp = get_imm(imm); - if (tmp) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp)); - return SLJIT_SUCCESS; - } + if (tmp) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp)); tmp = get_imm(~imm); - if (tmp) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp)); - return SLJIT_SUCCESS; - } + if (tmp) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp)); #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) /* Create imm by 2 inst. */ @@ -1369,14 +1355,14 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl if (imm) { if (inp_flags & ARG_TEST) return 1; - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm))); return -1; } imm = get_imm(~argw); if (imm) { if (inp_flags & ARG_TEST) return 1; - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm))); return -1; } return 0; @@ -1394,8 +1380,8 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl if (inp_flags & ARG_TEST) return 1; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, - RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7))); + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, + RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7)))); return -1; } @@ -1403,13 +1389,13 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl if (argw >= 0 && argw <= 0xfff) { if (inp_flags & ARG_TEST) return 1; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw)); + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw))); return -1; } if (argw < 0 && argw >= -0xfff) { if (inp_flags & ARG_TEST) return 1; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw)); + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw))); return -1; } } @@ -1417,14 +1403,14 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_fl if (argw >= 0 && argw <= 0xff) { if (inp_flags & ARG_TEST) return 1; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))); + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)))); return -1; } if (argw < 0 && argw >= -0xff) { if (inp_flags & ARG_TEST) return 1; argw = -argw; - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw))); + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)))); return -1; } } @@ -1477,7 +1463,7 @@ static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_ /* This can only happen for stores */ \ /* since ldr reg, [reg, ...]! has no meaning */ \ SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \ - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg))); \ + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg)))); \ reg = TMP_REG3; \ } \ } @@ -1537,9 +1523,8 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00)); if (inp_flags & WRITE_BACK) tmp_r = arg & REG_MASK; - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))); - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0))); - return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)))); + return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0))); } imm = (sljit_uw)(argw - compiler->cache_argw); @@ -1558,7 +1543,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, imm = get_imm(argw & ~max_delta); if (imm) { TEST_WRITE_BACK(); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm))); GETPUT_ARG_DATA_TRANSFER(1, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta); return SLJIT_SUCCESS; } @@ -1567,15 +1552,14 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, if (imm) { argw = -argw; TEST_WRITE_BACK(); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm))); GETPUT_ARG_DATA_TRANSFER(0, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta); return SLJIT_SUCCESS; } if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) { TEST_WRITE_BACK(); - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); - return SLJIT_SUCCESS; + return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); } if (argw == next_argw && (next_arg & SLJIT_MEM)) { @@ -1586,15 +1570,14 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, compiler->cache_argw = argw; TEST_WRITE_BACK(); - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); - return SLJIT_SUCCESS; + return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); } imm = (sljit_uw)(argw - next_argw); if (arg == next_arg && !(inp_flags & WRITE_BACK) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) { SLJIT_ASSERT(inp_flags & LOAD_DATA); FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK])); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK]))); compiler->cache_arg = arg; compiler->cache_argw = argw; @@ -1610,8 +1593,7 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, } FAIL_IF(load_immediate(compiler, tmp_r, argw)); - EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0))); - return SLJIT_SUCCESS; + return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0))); } static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) @@ -1843,31 +1825,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler op = GET_OPCODE(op); switch (op) { case SLJIT_BREAKPOINT: - EMIT_INSTRUCTION(BKPT); + FAIL_IF(push_inst(compiler, BKPT)); break; case SLJIT_NOP: - EMIT_INSTRUCTION(NOP); + FAIL_IF(push_inst(compiler, NOP)); break; case SLJIT_UMUL: case SLJIT_SMUL: #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) - | (reg_map[SLJIT_SCRATCH_REG2] << 16) - | (reg_map[SLJIT_SCRATCH_REG1] << 12) - | (reg_map[SLJIT_SCRATCH_REG1] << 8) - | reg_map[SLJIT_SCRATCH_REG2]); + | (reg_map[SLJIT_R1] << 16) + | (reg_map[SLJIT_R0] << 12) + | (reg_map[SLJIT_R0] << 8) + | reg_map[SLJIT_R1]); #else - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_R1)))); return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) - | (reg_map[SLJIT_SCRATCH_REG2] << 16) - | (reg_map[SLJIT_SCRATCH_REG1] << 12) - | (reg_map[SLJIT_SCRATCH_REG1] << 8) + | (reg_map[SLJIT_R1] << 16) + | (reg_map[SLJIT_R0] << 12) + | (reg_map[SLJIT_R0] << 8) | reg_map[TMP_REG1]); #endif case SLJIT_UDIV: case SLJIT_SDIV: if (compiler->scratches >= 3) - EMIT_INSTRUCTION(0xe52d2008 /* str r2, [sp, #-8]! */); + FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */)); #if defined(__GNUC__) FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); @@ -1995,7 +1977,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) { check_sljit_get_float_register_index(reg); - return reg; + return reg << 1; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -2064,7 +2046,7 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl SLJIT_ASSERT(arg & SLJIT_MEM); if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)))); arg = SLJIT_MEM | TMP_REG1; argw = 0; } @@ -2097,13 +2079,13 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl } imm = get_imm(argw & ~0x3fc); if (imm) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm))); return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2)); } imm = get_imm(-argw & ~0x3fc); if (imm) { argw = -argw; - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm))); return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2)); } } @@ -2112,7 +2094,7 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl compiler->cache_argw = argw; if (arg & REG_MASK) { FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1])); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1]))); } else FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); @@ -2120,60 +2102,114 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0)); } +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_SINGLE_OP, TMP_FREG1, src, 0))); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16)); + + /* Store the integer value from a VFP register. */ + return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16))); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (TMP_FREG1 << 16))); + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_SINGLE_OP, dst_r, TMP_FREG1, 0))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, src1, src2, 0))); + return push_inst(compiler, VMRS); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_si dst_fr; + sljit_si dst_r; CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error); - compiler->cache_arg = 0; compiler->cache_argw = 0; - op ^= SLJIT_SINGLE_OP; + if (GET_OPCODE(op) != SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, dst, dstw)); - dst = TMP_FREG1; - } - if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src, srcw)); - src = TMP_FREG2; - } - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, dst, src, 0)); - EMIT_INSTRUCTION(VMRS); - return SLJIT_SUCCESS; - } + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_fr, src, srcw)); - src = dst_fr; + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw)); + src = dst_r; } switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_fr && dst_fr != TMP_FREG1) - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0)); - break; - case SLJIT_NEGD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0)); - break; - case SLJIT_ABSD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_fr, src, 0)); - break; - } - - if (dst_fr == TMP_FREG1) { - if (GET_OPCODE(op) == SLJIT_MOVD) - dst_fr = src; - FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_fr, dst, dstw)); + case SLJIT_MOVD: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + else + dst_r = src; + } + break; + case SLJIT_NEGD: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + break; + case SLJIT_ABSD: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + op ^= SLJIT_SINGLE_OP; + break; } + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw); return SLJIT_SUCCESS; } @@ -2182,16 +2218,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_si dst_fr; + sljit_si dst_r; CHECK_ERROR(); check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); compiler->cache_arg = 0; compiler->cache_argw = 0; op ^= SLJIT_SINGLE_OP; - dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src2 & SLJIT_MEM) { FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); @@ -2205,23 +2244,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADDD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1)); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); break; case SLJIT_SUBD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1)); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); break; case SLJIT_MULD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1)); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); break; case SLJIT_DIVD: - EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_fr, src2, src1)); + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); break; } - if (dst_fr == TMP_FREG1) + if (dst_r == TMP_FREG1) FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw)); return SLJIT_SUCCESS; @@ -2252,7 +2291,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw)) return compiler->error; /* TMP_REG3 is used for caching. */ - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3)))); compiler->cache_arg = 0; compiler->cache_argw = 0; return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0); @@ -2265,7 +2304,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * ADJUST_LOCAL_OFFSET(src, srcw); if (FAST_IS_REG(src)) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src)))); else if (src & SLJIT_MEM) { if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw)) FAIL_IF(compiler->error); @@ -2273,7 +2312,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * compiler->cache_arg = 0; compiler->cache_argw = 0; FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0)); - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2)))); } } else if (src & SLJIT_IMM) @@ -2454,14 +2493,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; if (op < SLJIT_ADD) { - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0)); - EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0))); + FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc)); return (dst_r == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS; } ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP)); if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) { - EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc); + FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc)); /* The condition must always be set, even if the ORR/EOR is not executed above. */ return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS; } @@ -2478,8 +2517,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com srcw = 0; } - EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc); - EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)); + FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc)); + FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000))); if (dst_r == TMP_REG2) FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0)); diff --git a/pcre/sljit/sljitNativeARM_64.c b/pcre/sljit/sljitNativeARM_64.c index cfd1a38242f..fba664ec9b9 100644 --- a/pcre/sljit/sljitNativeARM_64.c +++ b/pcre/sljit/sljitNativeARM_64.c @@ -34,18 +34,18 @@ typedef sljit_ui sljit_ins; #define TMP_ZERO 0 -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_REG4 (SLJIT_NO_REGISTERS + 4) -#define TMP_LR (SLJIT_NO_REGISTERS + 5) -#define TMP_SP (SLJIT_NO_REGISTERS + 6) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_REG4 (SLJIT_NUMBER_OF_REGISTERS + 5) +#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 6) +#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 7) #define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = { - 31, 0, 1, 2, 3, 4, 19, 20, 21, 22, 23, 29, 9, 10, 11, 12, 30, 31 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { + 31, 0, 1, 2, 3, 4, 5, 6, 7, 13, 14, 15, 16, 17, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 12, 30, 31 }; #define W_OP (1 << 31) @@ -83,6 +83,8 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = { #define FABS 0x1e60c000 #define FADD 0x1e602800 #define FCMP 0x1e602000 +#define FCVT 0x1e224000 +#define FCVTZS 0x9e780000 #define FDIV 0x1e601800 #define FMOV 0x1e604000 #define FMUL 0x1e600800 @@ -104,6 +106,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = { #define RET 0xd65f0000 #define SBC 0xda000000 #define SBFM 0x93000000 +#define SCVTF 0x9e620000 #define SDIV 0x9ac00c00 #define SMADDL 0x9b200000 #define SMULH 0x9b403c00 @@ -1058,17 +1061,24 @@ static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit /* Entry, exit */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { + sljit_si i, tmp, offs, prev; + CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - compiler->locals_offset = (2 + saveds) * sizeof(sljit_sw); + compiler->locals_offset = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); local_size = (compiler->locals_offset + local_size + 15) & ~15; compiler->local_size = local_size; @@ -1086,64 +1096,98 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) | RN(TMP_SP) | (0x40 << 15))); } - FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_LOCALS_REG) | RN(TMP_SP))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP))); - if (saveds >= 2) - FAIL_IF(push_inst(compiler, STP | RT(SLJIT_SAVED_REG1) | RT2(SLJIT_SAVED_REG2) | RN(TMP_SP) | (2 << 15))); - if (saveds >= 4) - FAIL_IF(push_inst(compiler, STP | RT(SLJIT_SAVED_REG3) | RT2(SLJIT_SAVED_EREG1) | RN(TMP_SP) | (4 << 15))); - if (saveds == 1) - FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_REG1) | RN(TMP_SP) | (2 << 10))); - if (saveds == 3) - FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_REG3) | RN(TMP_SP) | (4 << 10))); - if (saveds == 5) - FAIL_IF(push_inst(compiler, STRI | RT(SLJIT_SAVED_EREG2) | RN(TMP_SP) | (6 << 10))); + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + offs = 2 << 15; + prev = -1; + for (i = SLJIT_S0; i >= tmp; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + if (prev != -1) + FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(TMP_SP) | (offs >> 5))); if (args >= 1) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0))); if (args >= 2) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG2) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1))); if (args >= 3) - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_SAVED_REG3) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG3))); + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - compiler->locals_offset = (2 + saveds) * sizeof(sljit_sw); + compiler->locals_offset = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); compiler->local_size = (compiler->locals_offset + local_size + 15) & ~15; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { - sljit_si saveds, local_size; + sljit_si local_size; + sljit_si i, tmp, offs, prev; CHECK_ERROR(); check_sljit_emit_return(compiler, op, src, srcw); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - saveds = compiler->saveds; + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + offs = 2 << 15; + prev = -1; + for (i = SLJIT_S0; i >= tmp; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } - if (saveds >= 2) - FAIL_IF(push_inst(compiler, LDP | RT(SLJIT_SAVED_REG1) | RT2(SLJIT_SAVED_REG2) | RN(TMP_SP) | (2 << 15))); - if (saveds >= 4) - FAIL_IF(push_inst(compiler, LDP | RT(SLJIT_SAVED_REG3) | RT2(SLJIT_SAVED_EREG1) | RN(TMP_SP) | (4 << 15))); - if (saveds == 1) - FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_REG1) | RN(TMP_SP) | (2 << 10))); - if (saveds == 3) - FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_REG3) | RN(TMP_SP) | (4 << 10))); - if (saveds == 5) - FAIL_IF(push_inst(compiler, LDRI | RT(SLJIT_SAVED_EREG2) | RN(TMP_SP) | (6 << 10))); + if (prev != -1) + FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(TMP_SP) | (offs >> 5))); local_size = compiler->local_size; @@ -1184,15 +1228,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return push_inst(compiler, NOP); case SLJIT_UMUL: case SLJIT_SMUL: - FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1))); - FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_SCRATCH_REG1) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2) | RT2(TMP_ZERO))); - return push_inst(compiler, (op == SLJIT_SMUL ? SMULH : UMULH) | RD(SLJIT_SCRATCH_REG2) | RN(TMP_REG1) | RM(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); + return push_inst(compiler, (op == SLJIT_SMUL ? SMULH : UMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); case SLJIT_UDIV: case SLJIT_SDIV: - FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_SCRATCH_REG1))); - FAIL_IF(push_inst(compiler, ((op == SLJIT_SDIV ? SDIV : UDIV) ^ inv_bits) | RD(SLJIT_SCRATCH_REG1) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_SCRATCH_REG2) | RN(SLJIT_SCRATCH_REG1) | RM(SLJIT_SCRATCH_REG2) | RT2(TMP_ZERO))); - return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_SCRATCH_REG2) | RN(TMP_REG1) | RM(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, ((op == SLJIT_SDIV ? SDIV : UDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); + return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); } return SLJIT_SUCCESS; @@ -1524,41 +1568,107 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3)); } +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONVI_FROMD) + inv_bits |= (1 << 31); + + if (src & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src))); + + if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) + return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + inv_bits |= (1 << 31); + + if (src & SLJIT_MEM) { + emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw); + src = TMP_REG1; + } else if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; +#endif + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, ((op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { sljit_si dst_r, mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE; - sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + sljit_ins inv_bits; CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - compiler->cache_arg = 0; compiler->cache_argw = 0; - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst & SLJIT_MEM) { - emit_fop_mem(compiler, mem_flags, TMP_FREG1, dst, dstw); - dst = TMP_FREG1; - } - if (src & SLJIT_MEM) { - emit_fop_mem(compiler, mem_flags, TMP_FREG2, src, srcw); - src = TMP_FREG2; - } - return push_inst(compiler, (FCMP ^ inv_bits) | VN(dst) | VM(src)); - } + SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x100) == WORD_SIZE, must_be_one_bit_difference); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { - emit_fop_mem(compiler, mem_flags, dst_r, src, srcw); + emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONVD_FROMS) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw); src = dst_r; } switch (GET_OPCODE(op)) { case SLJIT_MOVD: - if (src != dst_r) - FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src))); + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src))); + else + dst_r = src; + } break; case SLJIT_NEGD: FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src))); @@ -1566,11 +1676,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile case SLJIT_ABSD: FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src))); break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_SINGLE_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); + break; } - if (!(dst & SLJIT_MEM)) - return SLJIT_SUCCESS; - return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw); + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw); + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, @@ -1583,11 +1696,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile CHECK_ERROR(); check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); compiler->cache_arg = 0; compiler->cache_argw = 0; - dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src1 & SLJIT_MEM) { emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); src1 = TMP_FREG1; @@ -1631,7 +1747,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c if (dst == SLJIT_UNUSED) return SLJIT_SUCCESS; - if (dst <= REG_MASK) + if (FAST_IS_REG(dst)) return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR)); /* Memory. */ @@ -1644,7 +1760,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * check_sljit_emit_fast_return(compiler, src, srcw); ADJUST_LOCAL_OFFSET(src, srcw); - if (src <= REG_MASK) + if (FAST_IS_REG(src)) FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src))); else if (src & SLJIT_MEM) FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw)); @@ -1833,7 +1949,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com return SLJIT_SUCCESS; cc = get_cc(type); - dst_r = (dst <= REG_MASK) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; if (GET_OPCODE(op) < SLJIT_ADD) { FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO))); diff --git a/pcre/sljit/sljitNativeARM_T2_32.c b/pcre/sljit/sljitNativeARM_T2_32.c index 682f964e357..7ea8cd7d823 100644 --- a/pcre/sljit/sljitNativeARM_T2_32.c +++ b/pcre/sljit/sljitNativeARM_T2_32.c @@ -33,17 +33,17 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) typedef sljit_ui sljit_ins; /* Last register + 1. */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_PC (SLJIT_NO_REGISTERS + 4) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5) #define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { - 0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { + 0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 13, 3, 4, 14, 15 }; #define COPY_BITS(src, from, to, bits) \ @@ -138,9 +138,9 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { #define ORRI 0xf0400000 #define ORRS 0x4300 #define ORR_W 0xea400000 -#define POP 0xbd00 +#define POP 0xbc00 #define POP_W 0xe8bd0000 -#define PUSH 0xb500 +#define PUSH 0xb400 #define PUSH_W 0xe92d0000 #define RSB_WI 0xf1c00000 #define RSBSI 0x4240 @@ -169,8 +169,12 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { #define VABS_F32 0xeeb00ac0 #define VADD_F32 0xee300a00 #define VCMP_F32 0xeeb40a40 +#define VCVT_F32_S32 0xeeb80ac0 +#define VCVT_F64_F32 0xeeb70ac0 +#define VCVT_S32_F32 0xeebd0ac0 #define VDIV_F32 0xee800a00 #define VMOV_F32 0xeeb00a40 +#define VMOV 0xee000a10 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 @@ -956,7 +960,7 @@ static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, } /* SP based immediate. */ - if (SLJIT_UNLIKELY(arg == SLJIT_LOCALS_REG) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) { + if (SLJIT_UNLIKELY(arg == SLJIT_SP) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) { FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2))); return -1; } @@ -1123,82 +1127,84 @@ static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit /* Entry, exit */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si size; + sljit_si size, i, tmp; sljit_ins push; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif push = (1 << 4); - if (saveds >= 5) - push |= 1 << 11; - if (saveds >= 4) - push |= 1 << 10; - if (saveds >= 3) - push |= 1 << 8; - if (saveds >= 2) - push |= 1 << 7; - if (saveds >= 1) - push |= 1 << 6; - if (scratches >= 5) - push |= 1 << 5; - FAIL_IF(saveds >= 3 + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + push |= 1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + push |= 1 << reg_map[i]; + + FAIL_IF((push & 0xff00) ? push_inst32(compiler, PUSH_W | (1 << 14) | push) - : push_inst16(compiler, PUSH | push)); + : push_inst16(compiler, PUSH | (1 << 8) | push)); - /* Stack must be aligned to 8 bytes: */ - size = (3 + saveds) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; + /* Stack must be aligned to 8 bytes: (LR, R4) */ + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + local_size = ((size + local_size + 7) & ~7) - size; compiler->local_size = local_size; if (local_size > 0) { if (local_size <= (127 << 2)) FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2))); else - FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, local_size)); + FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size)); } if (args >= 1) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG1, SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0, SLJIT_R0))); if (args >= 2) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG2, SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S1, SLJIT_R1))); if (args >= 3) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG3, SLJIT_SCRATCH_REG3))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_si size; CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - size = (3 + saveds) * sizeof(sljit_uw); - local_size += size; - local_size = (local_size + 7) & ~7; - local_size -= size; - compiler->local_size = local_size; + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + compiler->local_size = ((size + local_size + 7) & ~7) - size; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { + sljit_si i, tmp; sljit_ins pop; CHECK_ERROR(); @@ -1210,25 +1216,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi if (compiler->local_size <= (127 << 2)) FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2))); else - FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, compiler->local_size)); + FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_SP, SLJIT_SP, compiler->local_size)); } pop = (1 << 4); - if (compiler->saveds >= 5) - pop |= 1 << 11; - if (compiler->saveds >= 4) - pop |= 1 << 10; - if (compiler->saveds >= 3) - pop |= 1 << 8; - if (compiler->saveds >= 2) - pop |= 1 << 7; - if (compiler->saveds >= 1) - pop |= 1 << 6; - if (compiler->scratches >= 5) - pop |= 1 << 5; - return compiler->saveds >= 3 + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + pop |= 1 << reg_map[i]; + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + pop |= 1 << reg_map[i]; + + return (pop & 0xff00) ? push_inst32(compiler, POP_W | (1 << 15) | pop) - : push_inst16(compiler, POP | pop); + : push_inst16(compiler, POP | (1 << 8) | pop); } /* --------------------------------------------------------------------- */ @@ -1264,10 +1266,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler case SLJIT_UMUL: case SLJIT_SMUL: return push_inst32(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) - | (reg_map[SLJIT_SCRATCH_REG2] << 8) - | (reg_map[SLJIT_SCRATCH_REG1] << 12) - | (reg_map[SLJIT_SCRATCH_REG1] << 16) - | reg_map[SLJIT_SCRATCH_REG2]); + | (reg_map[SLJIT_R1] << 8) + | (reg_map[SLJIT_R0] << 12) + | (reg_map[SLJIT_R0] << 16) + | reg_map[SLJIT_R1]); case SLJIT_UDIV: case SLJIT_SDIV: if (compiler->scratches >= 4) { @@ -1512,7 +1514,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) { check_sljit_get_float_register_index(reg); - return reg; + return reg << 1; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -1607,6 +1609,69 @@ static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sl return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg)); } +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_SINGLE_OP) | DD4(TMP_FREG1) | DM4(src))); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1)); + + /* Store the integer value from a VFP register. */ + return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1))); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | DN4(TMP_FREG1))); + } + + FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(TMP_FREG1))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_SINGLE_OP) | DD4(src1) | DM4(src2))); + return push_inst32(compiler, VMRS); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) @@ -1614,27 +1679,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile sljit_si dst_r; CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error); - compiler->cache_arg = 0; compiler->cache_argw = 0; - op ^= SLJIT_SINGLE_OP; + if (GET_OPCODE(op) != SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, dst, dstw); - dst = TMP_FREG1; - } - if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src, srcw); - src = TMP_FREG2; - } - FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst) | DM4(src))); - return push_inst32(compiler, VMRS); - } + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw); src = dst_r; @@ -1642,8 +1696,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_MOVD: - if (src != dst_r) - FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); + else + dst_r = src; + } break; case SLJIT_NEGD: FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); @@ -1651,11 +1709,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile case SLJIT_ABSD: FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); + op ^= SLJIT_SINGLE_OP; + break; } - if (!(dst & SLJIT_MEM)) - return SLJIT_SUCCESS; - return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw); + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw); + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, @@ -1667,12 +1729,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile CHECK_ERROR(); check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); compiler->cache_arg = 0; compiler->cache_argw = 0; op ^= SLJIT_SINGLE_OP; - dst_r = (dst <= REG_MASK) ? dst : TMP_FREG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src1 & SLJIT_MEM) { emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); src1 = TMP_FREG1; @@ -1718,7 +1783,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *c if (dst == SLJIT_UNUSED) return SLJIT_SUCCESS; - if (dst <= REG_MASK) + if (FAST_IS_REG(dst)) return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3)); /* Memory. */ @@ -1737,7 +1802,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler * check_sljit_emit_fast_return(compiler, src, srcw); ADJUST_LOCAL_OFFSET(src, srcw); - if (src <= REG_MASK) + if (FAST_IS_REG(src)) FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src))); else if (src & SLJIT_MEM) { if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw)) diff --git a/pcre/sljit/sljitNativeMIPS_32.c b/pcre/sljit/sljitNativeMIPS_32.c index cb7c6951685..b2b60d7a4c9 100644 --- a/pcre/sljit/sljitNativeMIPS_32.c +++ b/pcre/sljit/sljitNativeMIPS_32.c @@ -84,7 +84,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { if (op == SLJIT_MOV_SB) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); #else FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); @@ -102,7 +102,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { if (op == SLJIT_MOV_SH) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); #else FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); @@ -125,7 +125,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (CHECK_FLAGS(SLJIT_SET_E)) @@ -154,9 +154,9 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_O) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); @@ -174,7 +174,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } else { if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (op & (SLJIT_SET_C | SLJIT_SET_O)) @@ -189,8 +189,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); if (!(op & SLJIT_SET_O)) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); return push_inst(compiler, SLL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); @@ -198,21 +198,21 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_C) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); else { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } } FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); } else { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); } if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); if (!(op & SLJIT_SET_C)) @@ -221,7 +221,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */ FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); /* Set carry flag. */ - return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(TMP_EREG1) | DA(ULESS_FLAG), ULESS_FLAG); + return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG); case SLJIT_SUB: if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) { @@ -233,9 +233,9 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_O) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); @@ -247,7 +247,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } else { if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O)) @@ -265,8 +265,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (!(op & SLJIT_SET_O)) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); return push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); @@ -279,27 +279,27 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); } else { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); } if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(TMP_EREG2), TMP_EREG2)); + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG)); FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); - return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; + return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; case SLJIT_MUL: SLJIT_ASSERT(!(flags & SRC2_IMM)); if (!(op & SLJIT_SET_O)) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); #else FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); @@ -307,10 +307,10 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj #endif } FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, MFHI | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG)); FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); - FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(TMP_EREG2) | SH_IMM(31), TMP_EREG2)); - return push_inst(compiler, SUBU | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); + FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG)); + return push_inst(compiler, SUBU | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); case SLJIT_AND: EMIT_LOGICAL(ANDI, AND); diff --git a/pcre/sljit/sljitNativeMIPS_64.c b/pcre/sljit/sljitNativeMIPS_64.c index df22ebaf554..185fb5768e2 100644 --- a/pcre/sljit/sljitNativeMIPS_64.c +++ b/pcre/sljit/sljitNativeMIPS_64.c @@ -217,7 +217,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (CHECK_FLAGS(SLJIT_SET_E)) @@ -246,9 +246,9 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_O) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); @@ -266,7 +266,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } else { if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (op & (SLJIT_SET_C | SLJIT_SET_O)) @@ -281,8 +281,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); if (!(op & SLJIT_SET_O)) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); return push_inst(compiler, SELECT_OP(DSRL32, SLL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); @@ -290,21 +290,21 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_C) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); else { - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } } FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); } else { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); } if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); if (!(op & SLJIT_SET_C)) @@ -313,7 +313,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */ FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); /* Set carry flag. */ - return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(TMP_EREG1) | DA(ULESS_FLAG), ULESS_FLAG); + return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG); case SLJIT_SUB: if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) { @@ -325,9 +325,9 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_O) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); } if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); @@ -339,7 +339,7 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj } else { if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); if (op & SLJIT_SET_E) FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O)) @@ -357,8 +357,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (!(op & SLJIT_SET_O)) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | SA(TMP_EREG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); @@ -371,27 +371,27 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (flags & SRC2_IMM) { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); } else { if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); } if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(TMP_EREG2), TMP_EREG2)); + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG)); FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); - return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; + return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; case SLJIT_MUL: SLJIT_ASSERT(!(flags & SRC2_IMM)); if (!(op & SLJIT_SET_O)) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) if (op & SLJIT_INT_OP) return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS)); @@ -402,10 +402,10 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj #endif } FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, MFHI | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG)); FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); - FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(TMP_EREG2) | SH_IMM(31), TMP_EREG2)); - return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG)); + return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); case SLJIT_AND: EMIT_LOGICAL(ANDI, AND); diff --git a/pcre/sljit/sljitNativeMIPS_common.c b/pcre/sljit/sljitNativeMIPS_common.c index 011d8874c27..f1f9f70c678 100644 --- a/pcre/sljit/sljitNativeMIPS_common.c +++ b/pcre/sljit/sljitNativeMIPS_common.c @@ -25,13 +25,17 @@ */ /* Latest MIPS architecture. */ -/* Automatically detect SLJIT_MIPS_32_64 */ +/* Automatically detect SLJIT_MIPS_R1 */ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) { -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) - return "MIPS V" SLJIT_CPUINFO; +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return "MIPS32-R1" SLJIT_CPUINFO; #else + return "MIPS64-R1" SLJIT_CPUINFO; +#endif +#else /* SLJIT_MIPS_R1 */ return "MIPS III" SLJIT_CPUINFO; #endif } @@ -40,35 +44,32 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) Both for mips-32 and mips-64 */ typedef sljit_ui sljit_ins; -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) /* For position independent code, t9 must contain the function address. */ #define PIC_ADDR_REG TMP_REG2 -/* TMP_EREGs are used mainly for arithmetic operations. */ -#define TMP_EREG1 15 -#define TMP_EREG2 24 /* Floating point status register. */ #define FCSR_REG 31 /* Return address register. */ #define RETURN_ADDR_REG 31 -/* Flags are keept in volatile registers. */ -#define EQUAL_FLAG 7 +/* Flags are kept in volatile registers. */ +#define EQUAL_FLAG 12 /* And carry flag as well. */ -#define ULESS_FLAG 10 -#define UGREATER_FLAG 11 -#define LESS_FLAG 12 -#define GREATER_FLAG 13 -#define OVERFLOW_FLAG 14 +#define ULESS_FLAG 13 +#define UGREATER_FLAG 14 +#define LESS_FLAG 15 +#define GREATER_FLAG 31 +#define OVERFLOW_FLAG 1 #define TMP_FREG1 (0) -#define TMP_FREG2 ((SLJIT_FLOAT_REG6 + 1) << 1) +#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 2, 5, 6, 3, 8, 16, 17, 18, 19, 20, 29, 4, 25, 9 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 2, 5, 6, 7, 8, 9, 10, 11, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4 }; /* --------------------------------------------------------------------- */ @@ -92,10 +93,10 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { #define HI(opcode) ((opcode) << 26) #define LO(opcode) (opcode) /* S = (16 << 21) D = (17 << 21) */ -#define FMT_SD (16 << 21) +#define FMT_S (16 << 21) -#define ABS_fmt (HI(17) | FMT_SD | LO(5)) -#define ADD_fmt (HI(17) | FMT_SD | LO(0)) +#define ABS_S (HI(17) | FMT_S | LO(5)) +#define ADD_S (HI(17) | FMT_S | LO(0)) #define ADDIU (HI(9)) #define ADDU (HI(0) | LO(33)) #define AND (HI(0) | LO(36)) @@ -112,17 +113,18 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { #define BNE (HI(5)) #define BREAK (HI(0) | LO(13)) #define CFC1 (HI(17) | (2 << 21)) -#define C_UN_fmt (HI(17) | FMT_SD | LO(49)) -#define C_UEQ_fmt (HI(17) | FMT_SD | LO(51)) -#define C_ULE_fmt (HI(17) | FMT_SD | LO(55)) -#define C_ULT_fmt (HI(17) | FMT_SD | LO(53)) +#define C_UN_S (HI(17) | FMT_S | LO(49)) +#define C_UEQ_S (HI(17) | FMT_S | LO(51)) +#define C_ULE_S (HI(17) | FMT_S | LO(55)) +#define C_ULT_S (HI(17) | FMT_S | LO(53)) +#define CVT_S_S (HI(17) | FMT_S | LO(32)) #define DADDIU (HI(25)) #define DADDU (HI(0) | LO(45)) #define DDIV (HI(0) | LO(30)) #define DDIVU (HI(0) | LO(31)) #define DIV (HI(0) | LO(26)) #define DIVU (HI(0) | LO(27)) -#define DIV_fmt (HI(17) | FMT_SD | LO(3)) +#define DIV_S (HI(17) | FMT_S | LO(3)) #define DMULT (HI(0) | LO(28)) #define DMULTU (HI(0) | LO(29)) #define DSLL (HI(0) | LO(56)) @@ -142,13 +144,15 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { #define LD (HI(55)) #define LUI (HI(15)) #define LW (HI(35)) +#define MFC1 (HI(17)) #define MFHI (HI(0) | LO(16)) #define MFLO (HI(0) | LO(18)) -#define MOV_fmt (HI(17) | FMT_SD | LO(6)) -#define MUL_fmt (HI(17) | FMT_SD | LO(2)) +#define MOV_S (HI(17) | FMT_S | LO(6)) +#define MTC1 (HI(17) | (4 << 21)) +#define MUL_S (HI(17) | FMT_S | LO(2)) #define MULT (HI(0) | LO(24)) #define MULTU (HI(0) | LO(25)) -#define NEG_fmt (HI(17) | FMT_SD | LO(7)) +#define NEG_S (HI(17) | FMT_S | LO(7)) #define NOP (HI(0) | LO(0)) #define NOR (HI(0) | LO(39)) #define OR (HI(0) | LO(37)) @@ -164,13 +168,14 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { #define SRLV (HI(0) | LO(6)) #define SRA (HI(0) | LO(3)) #define SRAV (HI(0) | LO(7)) -#define SUB_fmt (HI(17) | FMT_SD | LO(1)) +#define SUB_S (HI(17) | FMT_S | LO(1)) #define SUBU (HI(0) | LO(35)) #define SW (HI(43)) +#define TRUNC_W_S (HI(17) | FMT_S | LO(13)) #define XOR (HI(0) | LO(38)) #define XORI (HI(14)) -#if (defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) #define CLZ (HI(28) | LO(32)) #define DCLZ (HI(28) | LO(36)) #define MUL (HI(28) | LO(2)) @@ -495,6 +500,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* Separates integer and floating point registers */ #define GPR_REG 0x0f #define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 #define MEM_MASK 0x1f @@ -532,20 +538,26 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #include "sljitNativeMIPS_64.c" #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_ins base; + sljit_si i, tmp, offs; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - local_size += (saveds + 1 + 4) * sizeof(sljit_sw); + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) local_size = (local_size + 15) & ~0xf; #else @@ -555,51 +567,58 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil if (local_size <= SIMM_MAX) { /* Frequent case. */ - FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(-local_size), DR(SLJIT_LOCALS_REG))); - base = S(SLJIT_LOCALS_REG); + FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP))); + base = S(SLJIT_SP); } else { FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(SLJIT_LOCALS_REG), DR(SLJIT_LOCALS_REG))); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP))); base = S(TMP_REG2); local_size = 0; } - FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); - if (saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (sljit_si)sizeof(sljit_sw)), MOVABLE_INS)); + offs = local_size - (sljit_sw)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS)); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); + } if (args >= 1) - FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_SAVED_REG1), DR(SLJIT_SAVED_REG1))); + FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_S0), DR(SLJIT_S0))); if (args >= 2) - FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_SAVED_REG2), DR(SLJIT_SAVED_REG2))); + FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_S1), DR(SLJIT_S1))); if (args >= 3) - FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_SAVED_REG3), DR(SLJIT_SAVED_REG3))); + FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_S2), DR(SLJIT_S2))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - local_size += (saveds + 1 + 4) * sizeof(sljit_sw); + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) compiler->local_size = (local_size + 15) & ~0xf; #else @@ -609,7 +628,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { - sljit_si local_size; + sljit_si local_size, i, tmp, offs; sljit_ins base; CHECK_ERROR(); @@ -619,31 +638,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi local_size = compiler->local_size; if (local_size <= SIMM_MAX) - base = S(SLJIT_LOCALS_REG); + base = S(SLJIT_SP); else { FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_LOCALS_REG) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1))); base = S(TMP_REG1); local_size = 0; } - FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (sljit_si)sizeof(sljit_sw)), RETURN_ADDR_REG)); - if (compiler->saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 6 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_EREG2))); - if (compiler->saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 5 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_EREG1))); - if (compiler->saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 4 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG3))); - if (compiler->saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 3 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG2))); - if (compiler->saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 2 * (sljit_si)sizeof(sljit_sw)), DR(SLJIT_SAVED_REG1))); + FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - (sljit_si)sizeof(sljit_sw)), RETURN_ADDR_REG)); + offs = local_size - (sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + SLJIT_ASSERT(offs == local_size - (sljit_sw)(sizeof(sljit_sw))); FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); if (compiler->local_size <= SIMM_MAX) - return push_inst(compiler, ADDIU_W | S(SLJIT_LOCALS_REG) | T(SLJIT_LOCALS_REG) | IMM(compiler->local_size), UNMOVABLE_INS); + return push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(compiler->local_size), UNMOVABLE_INS); else - return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_LOCALS_REG), UNMOVABLE_INS); + return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_SP), UNMOVABLE_INS); } #undef STACK_STORE @@ -1038,30 +1062,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler case SLJIT_UMUL: case SLJIT_SMUL: #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? DMULTU : DMULT) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? DMULTU : DMULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #else - FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? MULTU : MULT) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? MULTU : MULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #endif - FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_SCRATCH_REG1), DR(SLJIT_SCRATCH_REG1))); - return push_inst(compiler, MFHI | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); + return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); case SLJIT_UDIV: case SLJIT_SDIV: -#if !(defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) +#if !(defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); #endif #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) if (int_op) - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); else - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DDIVU : DDIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #else - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_SCRATCH_REG1) | T(SLJIT_SCRATCH_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #endif - FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_SCRATCH_REG1), DR(SLJIT_SCRATCH_REG1))); - return push_inst(compiler, MFHI | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); + return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); } return SLJIT_SUCCESS; @@ -1278,83 +1302,164 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7)) #define FMT(op) (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) << (21 - 8)) +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define flags 0 +#else + sljit_si flags = (GET_OPCODE(op) == SLJIT_CONVW_FROMD) << 21; +#endif + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } + else + src <<= 1; + + FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS)); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS); + + /* Store the integer value from a VFP register. */ + return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0); + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef is_long +#endif +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define flags 0 +#else + sljit_si flags = (GET_OPCODE(op) == SLJIT_CONVD_FROMW) << 21; +#endif + + sljit_si dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS)); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_op_mem2(compiler, ((flags) ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + } + else { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; +#endif + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS)); + } + + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef flags +#endif +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + else + src1 <<= 1; + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + else + src2 <<= 1; + + /* src2 and src1 are swapped. */ + if (op & SLJIT_SET_E) { + FAIL_IF(push_inst(compiler, C_UEQ_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG)); + } + if (op & SLJIT_SET_S) { + /* Mixing the instructions for the two checks. */ + FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src1) | FS(src2), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG)); + FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG)); + FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG)); + } + return push_inst(compiler, C_UN_S | FMT(op) | FT(src2) | FS(src1), FCSR_FCC); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_si dst_fr; + sljit_si dst_r; CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); - compiler->cache_arg = 0; compiler->cache_argw = 0; - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw)); - dst = TMP_FREG1; - } - else - dst <<= 1; + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0)); - src = TMP_FREG2; - } - else - src <<= 1; - - /* src and dst are swapped. */ - if (op & SLJIT_SET_E) { - FAIL_IF(push_inst(compiler, C_UEQ_fmt | FMT(op) | FT(src) | FS(dst), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG)); - } - if (op & SLJIT_SET_S) { - /* Mixing the instructions for the two checks. */ - FAIL_IF(push_inst(compiler, C_ULT_fmt | FMT(op) | FT(src) | FS(dst), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, C_ULT_fmt | FMT(op) | FT(dst) | FS(src), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG)); - FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG)); - FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG)); - } - return push_inst(compiler, C_UN_fmt | FMT(op) | FT(src) | FS(dst), FCSR_FCC); - } + if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; - dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw)); - src = dst_fr; + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; } else src <<= 1; switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_fr && dst_fr != TMP_FREG1) - FAIL_IF(push_inst(compiler, MOV_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS)); - break; - case SLJIT_NEGD: - FAIL_IF(push_inst(compiler, NEG_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS)); - break; - case SLJIT_ABSD: - FAIL_IF(push_inst(compiler, ABS_fmt | FMT(op) | FS(src) | FD(dst_fr), MOVABLE_INS)); - break; - } - - if (dst_fr == TMP_FREG1) { - if (GET_OPCODE(op) == SLJIT_MOVD) - dst_fr = src; - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0)); + case SLJIT_MOVD: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, MOV_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + else + dst_r = src; + } + break; + case SLJIT_NEGD: + FAIL_IF(push_inst(compiler, NEG_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_ABSD: + FAIL_IF(push_inst(compiler, ABS_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst(compiler, CVT_S_S | ((op & SLJIT_SINGLE_OP) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS)); + op ^= SLJIT_SINGLE_OP; + break; } + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0); return SLJIT_SUCCESS; } @@ -1363,15 +1468,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_si dst_fr, flags = 0; + sljit_si dst_r, flags = 0; CHECK_ERROR(); check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); compiler->cache_arg = 0; compiler->cache_argw = 0; - dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2; + dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2; if (src1 & SLJIT_MEM) { if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { @@ -1415,23 +1523,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADDD: - FAIL_IF(push_inst(compiler, ADD_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; case SLJIT_SUBD: - FAIL_IF(push_inst(compiler, SUB_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SUB_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; case SLJIT_MULD: - FAIL_IF(push_inst(compiler, MUL_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MUL_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; case SLJIT_DIVD: - FAIL_IF(push_inst(compiler, DIV_fmt | FMT(op) | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, DIV_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); break; } - if (dst_fr == TMP_FREG2) + if (dst_r == TMP_FREG2) FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); return SLJIT_SUCCESS; @@ -1613,7 +1721,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); jump->addr = compiler->size; /* A NOP if type < CALL1. */ - PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS)); } return jump; } @@ -1794,36 +1902,36 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile switch (type & 0xff) { case SLJIT_C_FLOAT_EQUAL: - inst = C_UEQ_fmt; + inst = C_UEQ_S; if_true = 1; break; case SLJIT_C_FLOAT_NOT_EQUAL: - inst = C_UEQ_fmt; + inst = C_UEQ_S; if_true = 0; break; case SLJIT_C_FLOAT_LESS: - inst = C_ULT_fmt; + inst = C_ULT_S; if_true = 1; break; case SLJIT_C_FLOAT_GREATER_EQUAL: - inst = C_ULT_fmt; + inst = C_ULT_S; if_true = 0; break; case SLJIT_C_FLOAT_GREATER: - inst = C_ULE_fmt; + inst = C_ULE_S; if_true = 0; break; case SLJIT_C_FLOAT_LESS_EQUAL: - inst = C_ULE_fmt; + inst = C_ULE_S; if_true = 1; break; case SLJIT_C_FLOAT_UNORDERED: - inst = C_UN_fmt; + inst = C_UN_S; if_true = 1; break; case SLJIT_C_FLOAT_ORDERED: default: /* Make compilers happy. */ - inst = C_UN_fmt; + inst = C_UN_S; if_true = 0; break; } @@ -1874,12 +1982,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil } FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); /* We need an extra instruction in any case. */ - return push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), UNMOVABLE_INS); + return push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS); } /* Register input. */ if (type >= SLJIT_CALL1) - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SCRATCH_REG1) | TA(0) | DA(4), 4)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), 4)); FAIL_IF(push_inst(compiler, JALR | S(src_r) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); return push_inst(compiler, ADDU_W | S(src_r) | TA(0) | D(PIC_ADDR_REG), UNMOVABLE_INS); } diff --git a/pcre/sljit/sljitNativePPC_common.c b/pcre/sljit/sljitNativePPC_common.c index 5e06f2fd8ee..01cb771409b 100644 --- a/pcre/sljit/sljitNativePPC_common.c +++ b/pcre/sljit/sljitNativePPC_common.c @@ -87,22 +87,22 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #endif /* _AIX */ } -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_ZERO (SLJIT_NO_REGISTERS + 4) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_ZERO (SLJIT_NUMBER_OF_REGISTERS + 5) #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) -#define TMP_CALL_REG (SLJIT_NO_REGISTERS + 5) +#define TMP_CALL_REG (SLJIT_NUMBER_OF_REGISTERS + 6) #else #define TMP_CALL_REG TMP_REG2 #endif #define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_FLOAT_REG6 + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = { - 0, 3, 4, 5, 6, 7, 30, 29, 28, 27, 26, 1, 8, 9, 10, 31, 12 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { + 0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12 }; /* --------------------------------------------------------------------- */ @@ -114,6 +114,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = { #define B(b) (reg_map[b] << 11) #define C(c) (reg_map[c] << 6) #define FD(fd) ((fd) << 21) +#define FS(fs) ((fs) << 21) #define FA(fa) ((fa) << 16) #define FB(fb) ((fb) << 11) #define FC(fc) ((fc) << 6) @@ -159,13 +160,17 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = { #define FABS (HI(63) | LO(264)) #define FADD (HI(63) | LO(21)) #define FADDS (HI(59) | LO(21)) +#define FCFID (HI(63) | LO(846)) #define FCMPU (HI(63) | LO(0)) +#define FCTIDZ (HI(63) | LO(815)) +#define FCTIWZ (HI(63) | LO(15)) #define FDIV (HI(63) | LO(18)) #define FDIVS (HI(59) | LO(18)) #define FMR (HI(63) | LO(72)) #define FMUL (HI(63) | LO(25)) #define FMULS (HI(59) | LO(25)) #define FNEG (HI(63) | LO(40)) +#define FRSP (HI(63) | LO(12)) #define FSUB (HI(63) | LO(20)) #define FSUBS (HI(59) | LO(20)) #define LD (HI(58) | 0) @@ -202,6 +207,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = { #define STD (HI(62) | 0) #define STDU (HI(62) | 1) #define STDUX (HI(31) | LO(181)) +#define STFIWX (HI(31) | LO(983)) #define STW (HI(36)) #define STWU (HI(37)) #define STWUX (HI(31) | LO(183)) @@ -565,118 +571,136 @@ ALT_FORM6 0x200000 */ #define STACK_LOAD LD #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { + sljit_si i, tmp, offs; + CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif FAIL_IF(push_inst(compiler, MFLR | D(0))); - FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) )); - if (saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) )); + offs = -(sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); + } + + SLJIT_ASSERT(offs == -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1)); + #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) - FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw)) )); + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); #else - FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw)) )); + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); #endif FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0)); if (args >= 1) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(SLJIT_SAVED_REG1) | B(SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0))); if (args >= 2) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG2) | A(SLJIT_SAVED_REG2) | B(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1))); if (args >= 3) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG3) | A(SLJIT_SAVED_REG3) | B(SLJIT_SCRATCH_REG3))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2))); -#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) - compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size; -#else - compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size; -#endif - compiler->local_size = (compiler->local_size + 15) & ~0xf; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET; + local_size = (local_size + 15) & ~0xf; + compiler->local_size = local_size; #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - if (compiler->local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, STWU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size))); + if (local_size <= SIMM_MAX) + FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); else { - FAIL_IF(load_immediate(compiler, 0, -compiler->local_size)); - FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0))); + FAIL_IF(load_immediate(compiler, 0, -local_size)); + FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0))); } #else - if (compiler->local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, STDU | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(-compiler->local_size))); + if (local_size <= SIMM_MAX) + FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); else { - FAIL_IF(load_immediate(compiler, 0, -compiler->local_size)); - FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0))); + FAIL_IF(load_immediate(compiler, 0, -local_size)); + FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0))); } #endif return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif -#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) - compiler->local_size = (1 + saveds + 6 + 8) * sizeof(sljit_sw) + local_size; -#else - compiler->local_size = (1 + saveds + 2) * sizeof(sljit_sw) + local_size; -#endif - compiler->local_size = (compiler->local_size + 15) & ~0xf; + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET; + compiler->local_size = (local_size + 15) & ~0xf; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { + sljit_si i, tmp, offs; + CHECK_ERROR(); check_sljit_emit_return(compiler, op, src, srcw); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); if (compiler->local_size <= SIMM_MAX) - FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | IMM(compiler->local_size))); + FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size))); else { FAIL_IF(load_immediate(compiler, 0, compiler->local_size)); - FAIL_IF(push_inst(compiler, ADD | D(SLJIT_LOCALS_REG) | A(SLJIT_LOCALS_REG) | B(0))); + FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0))); } #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(2 * sizeof(sljit_sw)))); + FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); #else - FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_LOCALS_REG) | IMM(sizeof(sljit_sw)))); + FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); #endif - if (compiler->saveds >= 5) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG2) | A(SLJIT_LOCALS_REG) | IMM(-6 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 4) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG1) | A(SLJIT_LOCALS_REG) | IMM(-5 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 3) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG3) | A(SLJIT_LOCALS_REG) | IMM(-4 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 2) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG2) | A(SLJIT_LOCALS_REG) | IMM(-3 * (sljit_si)(sizeof(sljit_sw))) )); - if (compiler->saveds >= 1) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG1) | A(SLJIT_LOCALS_REG) | IMM(-2 * (sljit_si)(sizeof(sljit_sw))) )); - FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_LOCALS_REG) | IMM(-(sljit_si)(sizeof(sljit_sw))) )); + + offs = -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); + SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw))); FAIL_IF(push_inst(compiler, MTLR | S(0))); FAIL_IF(push_inst(compiler, BLR)); @@ -999,12 +1023,12 @@ static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, tmp_r = arg; FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16))); } - else if (compiler->cache_arg != arg || high_short != compiler->cache_argw) { + else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) { if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) { next_high_short = (sljit_si)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff; if (high_short == next_high_short) { - compiler->cache_arg = SLJIT_IMM | arg; - compiler->cache_argw = next_high_short; + compiler->cache_arg = SLJIT_MEM | arg; + compiler->cache_argw = high_short; tmp_r = TMP_REG3; } } @@ -1250,30 +1274,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler return push_inst(compiler, NOP); case SLJIT_UMUL: case SLJIT_SMUL: - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, (op == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + return push_inst(compiler, (op == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); #else - FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, (op == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_SCRATCH_REG2) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + return push_inst(compiler, (op == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); #endif case SLJIT_UDIV: case SLJIT_SDIV: - FAIL_IF(push_inst(compiler, OR | S(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG1))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) if (int_op) { - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); } else { - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); } - return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1)); + return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1)); #else - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_SCRATCH_REG1) | A(TMP_REG1) | B(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG1) | B(SLJIT_SCRATCH_REG2))); - return push_inst(compiler, SUBF | D(SLJIT_SCRATCH_REG2) | A(SLJIT_SCRATCH_REG2) | B(TMP_REG1)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); + return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1)); #endif } @@ -1685,59 +1709,233 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6)) #define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double) -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw)) +#else +#define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw)) + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw)) +#define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw)) +#else +#define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw)) +#define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw)) +#endif + +#endif /* SLJIT_CONFIG_PPC_64 */ + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_si dst_fr; + if (src & SLJIT_MEM) { + /* We can ignore the temporary data store on the stack from caching point of view. */ + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } - CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + op = GET_OPCODE(op); + FAIL_IF(push_inst(compiler, (op == SLJIT_CONVI_FROMD ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src))); - compiler->cache_arg = 0; - compiler->cache_argw = 0; + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw)); - dst = TMP_FREG1; + if (op == SLJIT_CONVW_FROMD) { + if (FAST_IS_REG(dst)) { + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0)); + return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0); } + return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0); + } + +#else + FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src))); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; +#endif + + if (FAST_IS_REG(dst)) { + FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1))); + return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0); + } + + SLJIT_ASSERT(dst & SLJIT_MEM); - if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0)); - src = TMP_FREG2; + if (dst & OFFS_REG_MASK) { + dstw &= 0x3; + if (dstw) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1))); +#else + FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1))); +#endif + dstw = TMP_REG1; + } + else + dstw = OFFS_REG(dst); + } + else { + if ((dst & REG_MASK) && !dstw) { + dstw = dst & REG_MASK; + dst = 0; + } + else { + /* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */ + FAIL_IF(load_immediate(compiler, TMP_REG1, dstw)); + dstw = TMP_REG1; } + } + + return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw)); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - return push_inst(compiler, FCMPU | CRD(4) | FA(dst) | FB(src)); + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_IMM) { + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + else if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) { + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1))); + else + FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + src = TMP_REG1; + } + + if (FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw)); + } + else + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + + FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1))); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + if (op & SLJIT_SINGLE_OP) + return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); + return SLJIT_SUCCESS; + +#else + + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_si invert_sign = 1; + + if (src & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000)); + src = TMP_REG1; + invert_sign = 0; + } + else if (!FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW)); + src = TMP_REG1; + } + + /* First, a special double floating point value is constructed: (2^53 + (input xor (2^31))) + The double precision format has exactly 53 bit precision, so the lower 32 bit represents + the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000 + to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating + point value, we need to substract 2^53 + 2^31 from the constructed value. */ + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330)); + if (invert_sign) + FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000)); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI)); + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW)); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW)); + + FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2))); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + if (op & SLJIT_SINGLE_OP) + return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); + return SLJIT_SUCCESS; + +#endif +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; } - dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG1; + return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw)); - src = dst_fr; + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; } switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_fr && dst_fr != TMP_FREG1) - FAIL_IF(push_inst(compiler, FMR | FD(dst_fr) | FB(src))); - break; - case SLJIT_NEGD: - FAIL_IF(push_inst(compiler, FNEG | FD(dst_fr) | FB(src))); - break; - case SLJIT_ABSD: - FAIL_IF(push_inst(compiler, FABS | FD(dst_fr) | FB(src))); + case SLJIT_CONVD_FROMS: + op ^= SLJIT_SINGLE_OP; + if (op & SLJIT_SINGLE_OP) { + FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src))); break; + } + /* Fall through. */ + case SLJIT_MOVD: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src))); + else + dst_r = src; + } + break; + case SLJIT_NEGD: + FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src))); + break; + case SLJIT_ABSD: + FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src))); + break; } - if (dst_fr == TMP_FREG1) { - if (GET_OPCODE(op) == SLJIT_MOVD) - dst_fr = src; - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0)); - } - + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0)); return SLJIT_SUCCESS; } @@ -1746,15 +1944,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_si dst_fr, flags = 0; + sljit_si dst_r, flags = 0; CHECK_ERROR(); check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); compiler->cache_arg = 0; compiler->cache_argw = 0; - dst_fr = FAST_IS_REG(dst) ? dst : TMP_FREG2; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; if (src1 & SLJIT_MEM) { if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { @@ -1794,23 +1995,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADDD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_fr) | FA(src1) | FB(src2))); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2))); break; case SLJIT_SUBD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_fr) | FA(src1) | FB(src2))); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2))); break; case SLJIT_MULD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_fr) | FA(src1) | FC(src2) /* FMUL use FC as src2 */)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */)); break; case SLJIT_DIVD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_fr) | FA(src1) | FB(src2))); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2))); break; } - if (dst_fr == TMP_FREG2) + if (dst_r == TMP_FREG2) FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); return SLJIT_SUCCESS; diff --git a/pcre/sljit/sljitNativeSPARC_32.c b/pcre/sljit/sljitNativeSPARC_32.c index 80479bfe2b6..4a2e6293de5 100644 --- a/pcre/sljit/sljitNativeSPARC_32.c +++ b/pcre/sljit/sljitNativeSPARC_32.c @@ -110,8 +110,8 @@ static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, slj if (!(flags & SET_FLAGS)) return SLJIT_SUCCESS; FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, RDY | D(TMP_REG4), DR(TMP_REG4))); - return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_REG4), MOVABLE_INS | SET_FLAGS); + FAIL_IF(push_inst(compiler, RDY | D(TMP_LINK), DR(TMP_LINK))); + return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS); case SLJIT_AND: return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); diff --git a/pcre/sljit/sljitNativeSPARC_common.c b/pcre/sljit/sljitNativeSPARC_common.c index d6a1e12bfef..8926a606260 100644 --- a/pcre/sljit/sljitNativeSPARC_common.c +++ b/pcre/sljit/sljitNativeSPARC_common.c @@ -83,17 +83,16 @@ static void sparc_cache_flush(sljit_ins *from, sljit_ins *to) } /* TMP_REG2 is not used by getput_arg */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) -#define TMP_REG4 (SLJIT_NO_REGISTERS + 4) -#define TMP_LINK (SLJIT_NO_REGISTERS + 5) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_LINK (SLJIT_NUMBER_OF_REGISTERS + 5) #define TMP_FREG1 (0) -#define TMP_FREG2 ((SLJIT_FLOAT_REG6 + 1) << 1) +#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = { - 0, 8, 9, 10, 11, 12, 16, 17, 18, 19, 20, 14, 1, 24, 25, 26, 15 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { + 0, 8, 9, 10, 13, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 11, 12, 15 }; /* --------------------------------------------------------------------- */ @@ -128,10 +127,16 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 7] = { #define FCMPS (OPC1(0x2) | OPC3(0x35) | DOP(0x51)) #define FDIVD (OPC1(0x2) | OPC3(0x34) | DOP(0x4e)) #define FDIVS (OPC1(0x2) | OPC3(0x34) | DOP(0x4d)) +#define FDTOI (OPC1(0x2) | OPC3(0x34) | DOP(0xd2)) +#define FDTOS (OPC1(0x2) | OPC3(0x34) | DOP(0xc6)) +#define FITOD (OPC1(0x2) | OPC3(0x34) | DOP(0xc8)) +#define FITOS (OPC1(0x2) | OPC3(0x34) | DOP(0xc4)) #define FMOVS (OPC1(0x2) | OPC3(0x34) | DOP(0x01)) #define FMULD (OPC1(0x2) | OPC3(0x34) | DOP(0x4a)) #define FMULS (OPC1(0x2) | OPC3(0x34) | DOP(0x49)) #define FNEGS (OPC1(0x2) | OPC3(0x34) | DOP(0x05)) +#define FSTOD (OPC1(0x2) | OPC3(0x34) | DOP(0xc9)) +#define FSTOI (OPC1(0x2) | OPC3(0x34) | DOP(0xd1)) #define FSUBD (OPC1(0x2) | OPC3(0x34) | DOP(0x46)) #define FSUBS (OPC1(0x2) | OPC3(0x34) | DOP(0x45)) #define JMPL (OPC1(0x2) | OPC3(0x38)) @@ -388,6 +393,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* Separates integer and floating point registers */ #define GPR_REG 0x0f #define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 #define MEM_MASK 0x1f @@ -412,52 +418,55 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #include "sljitNativeSPARC_64.c" #endif -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - local_size += 23 * sizeof(sljit_sw); - local_size = (local_size + 7) & ~0x7; + local_size = (local_size + FIXED_LOCALS_OFFSET + 7) & ~0x7; compiler->local_size = local_size; if (local_size <= SIMM_MAX) { - FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | IMM(-local_size), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | IMM(-local_size), UNMOVABLE_INS)); } else { FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size)); - FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_LOCALS_REG) | S1(SLJIT_LOCALS_REG) | S2(TMP_REG1), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | S2(TMP_REG1), UNMOVABLE_INS)); } - if (args >= 1) - FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG1) | S1(0) | S2A(24), DR(SLJIT_SAVED_REG1))); - if (args >= 2) - FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG2) | S1(0) | S2A(25), DR(SLJIT_SAVED_REG2))); - if (args >= 3) - FAIL_IF(push_inst(compiler, OR | D(SLJIT_SAVED_REG3) | S1(0) | S2A(26), DR(SLJIT_SAVED_REG3))); + /* Arguments are in their appropriate registers. */ return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - local_size += 23 * sizeof(sljit_sw); - compiler->local_size = (local_size + 7) & ~0x7; + compiler->local_size = (local_size + FIXED_LOCALS_OFFSET + 7) & ~0x7; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) @@ -467,11 +476,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi if (op != SLJIT_MOV || !FAST_IS_REG(src)) { FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - src = SLJIT_SCRATCH_REG1; + src = SLJIT_R0; } FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS)); - return push_inst(compiler, RESTORE | D(SLJIT_SCRATCH_REG1) | S1(src) | S2(0), UNMOVABLE_INS); + return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(src) | S2(0), UNMOVABLE_INS); } /* --------------------------------------------------------------------- */ @@ -778,8 +787,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler case SLJIT_UMUL: case SLJIT_SMUL: #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? UMUL : SMUL) | D(SLJIT_SCRATCH_REG1) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG1))); - return push_inst(compiler, RDY | D(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2)); + FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? UMUL : SMUL) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); + return push_inst(compiler, RDY | D(SLJIT_R1), DR(SLJIT_R1)); #else #error "Implementation required" #endif @@ -789,13 +798,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler if (op == SLJIT_UDIV) FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS)); else { - FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_SCRATCH_REG1) | IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_R0) | IMM(31), DR(TMP_REG1))); FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS)); } - FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_SCRATCH_REG1), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? UDIV : SDIV) | D(SLJIT_SCRATCH_REG1) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG1))); - FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_SCRATCH_REG2) | S1(SLJIT_SCRATCH_REG1) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2))); - FAIL_IF(push_inst(compiler, SUB | D(SLJIT_SCRATCH_REG2) | S1(TMP_REG2) | S2(SLJIT_SCRATCH_REG2), DR(SLJIT_SCRATCH_REG2))); + FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); + FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_R1) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R1))); + FAIL_IF(push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1))); return SLJIT_SUCCESS; #else #error "Implementation required" @@ -953,73 +962,139 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7)) #define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double) +#define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw)) + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } + else + src <<= 1; + + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | DA(TMP_FREG1) | S2A(src), MOVABLE_INS)); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) { + FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET); + } + + /* Store the integer value from a VFP register. */ + return emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + + if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; +#endif + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + srcw = 0; + } + + if (FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + src = SLJIT_MEM1(SLJIT_SP); + srcw = FLOAT_TMP_MEM_OFFSET; + } + + FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | DA(dst_r) | S2A(TMP_FREG1), MOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + else + src1 <<= 1; + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + else + src2 <<= 1; + + return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(src1) | S2A(src2), FCC_IS_SET | MOVABLE_INS); +} SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_si dst_fr; + sljit_si dst_r; CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); - compiler->cache_arg = 0; compiler->cache_argw = 0; - if (GET_OPCODE(op) == SLJIT_CMPD) { - if (dst & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, dst, dstw, src, srcw)); - dst = TMP_FREG1; - } - else - dst <<= 1; - - if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src, srcw, 0, 0)); - src = TMP_FREG2; - } - else - src <<= 1; + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(dst) | S2A(src), FCC_IS_SET | MOVABLE_INS); - } + if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; - dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_fr, src, srcw, dst, dstw)); - src = dst_fr; + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; } else src <<= 1; switch (GET_OPCODE(op)) { - case SLJIT_MOVD: - if (src != dst_fr && dst_fr != TMP_FREG1) { - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr) | S2A(src), MOVABLE_INS)); + case SLJIT_MOVD: + if (src != dst_r) { + if (dst_r != TMP_FREG1) { + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r) | S2A(src), MOVABLE_INS)); if (!(op & SLJIT_SINGLE_OP)) - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); } - break; - case SLJIT_NEGD: - FAIL_IF(push_inst(compiler, FNEGS | DA(dst_fr) | S2A(src), MOVABLE_INS)); - if (dst_fr != src && !(op & SLJIT_SINGLE_OP)) - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS)); - break; - case SLJIT_ABSD: - FAIL_IF(push_inst(compiler, FABSS | DA(dst_fr) | S2A(src), MOVABLE_INS)); - if (dst_fr != src && !(op & SLJIT_SINGLE_OP)) - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_fr | 1) | S2A(src | 1), MOVABLE_INS)); - break; - } - - if (dst_fr == TMP_FREG1) { - if (GET_OPCODE(op) == SLJIT_MOVD) - dst_fr = src; - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_fr, dst, dstw, 0, 0)); + else + dst_r = src; + } + break; + case SLJIT_NEGD: + FAIL_IF(push_inst(compiler, FNEGS | DA(dst_r) | S2A(src), MOVABLE_INS)); + if (dst_r != src && !(op & SLJIT_SINGLE_OP)) + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); + break; + case SLJIT_ABSD: + FAIL_IF(push_inst(compiler, FABSS | DA(dst_r) | S2A(src), MOVABLE_INS)); + if (dst_r != src && !(op & SLJIT_SINGLE_OP)) + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); + break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | DA(dst_r) | S2A(src), MOVABLE_INS)); + op ^= SLJIT_SINGLE_OP; + break; } + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0)); return SLJIT_SUCCESS; } @@ -1028,15 +1103,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) { - sljit_si dst_fr, flags = 0; + sljit_si dst_r, flags = 0; CHECK_ERROR(); check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); compiler->cache_arg = 0; compiler->cache_argw = 0; - dst_fr = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2; + dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2; if (src1 & SLJIT_MEM) { if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { @@ -1080,23 +1158,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADDD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); break; case SLJIT_SUBD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); break; case SLJIT_MULD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); break; case SLJIT_DIVD: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_fr) | S1A(src1) | S2A(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); break; } - if (dst_fr == TMP_FREG2) + if (dst_r == TMP_FREG2) FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); return SLJIT_SUCCESS; diff --git a/pcre/sljit/sljitNativeTILEGX_64.c b/pcre/sljit/sljitNativeTILEGX_64.c index d0b392e7a48..fb4cd383b5f 100644 --- a/pcre/sljit/sljitNativeTILEGX_64.c +++ b/pcre/sljit/sljitNativeTILEGX_64.c @@ -1173,16 +1173,21 @@ static sljit_si emit_const_64(struct sljit_compiler *compiler, sljit_si dst_ar, return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_ins base; sljit_ins bundle = 0; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif @@ -1233,13 +1238,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif @@ -2370,7 +2380,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); } - FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO)); + FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); FAIL_IF(ADDI_SOLO(54, 54, -16)); @@ -2381,7 +2391,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil /* Register input. */ if (type >= SLJIT_CALL1) - FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO)); + FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO)); @@ -2511,7 +2521,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compil SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2); /* Cannot be optimized out if type is >= CALL0. */ jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0); - PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_SCRATCH_REG1], ZERO)); + PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); jump->addr = compiler->size; PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped)); } diff --git a/pcre/sljit/sljitNativeX86_32.c b/pcre/sljit/sljitNativeX86_32.c index dd03f26618b..515bf7e746d 100644 --- a/pcre/sljit/sljitNativeX86_32.c +++ b/pcre/sljit/sljitNativeX86_32.c @@ -63,27 +63,32 @@ static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ return code_ptr; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { sljit_si size; - sljit_si locals_offset; sljit_ub *inst; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; compiler->args = args; compiler->flags_saved = 0; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif + size = 1 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); + size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); #else - size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (2 + args * 3) : 0); + size += (args > 0 ? (2 + args * 3) : 0); #endif inst = (sljit_ub*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); @@ -96,76 +101,66 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil *inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */; } #endif - if (saveds > 2) - PUSH_REG(reg_map[SLJIT_SAVED_REG3]); - if (saveds > 1) - PUSH_REG(reg_map[SLJIT_SAVED_REG2]); - if (saveds > 0) - PUSH_REG(reg_map[SLJIT_SAVED_REG1]); + if (saveds > 2 || scratches > 7) + PUSH_REG(reg_map[SLJIT_S2]); + if (saveds > 1 || scratches > 8) + PUSH_REG(reg_map[SLJIT_S1]); + if (saveds > 0 || scratches > 9) + PUSH_REG(reg_map[SLJIT_S0]); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (args > 0) { *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[SLJIT_SCRATCH_REG3]; + *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2]; } if (args > 1) { *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[SLJIT_SCRATCH_REG2]; + *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1]; } if (args > 2) { *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x4 /* esp */; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */; *inst++ = 0x24; *inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ } #else if (args > 0) { *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[TMP_REG1]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1]; *inst++ = sizeof(sljit_sw) * 2; } if (args > 1) { *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[TMP_REG1]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1]; *inst++ = sizeof(sljit_sw) * 3; } if (args > 2) { *inst++ = MOV_r_rm; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SAVED_REG3] << 3) | reg_map[TMP_REG1]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1]; *inst++ = sizeof(sljit_sw) * 4; } #endif -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - locals_offset = 2 * sizeof(sljit_uw); -#else - SLJIT_COMPILE_ASSERT(FIXED_LOCALS_OFFSET >= 2 * sizeof(sljit_uw), require_at_least_two_words); - locals_offset = FIXED_LOCALS_OFFSET; -#endif - compiler->scratches_start = locals_offset; - if (scratches > 3) - locals_offset += (scratches - 3) * sizeof(sljit_uw); - compiler->saveds_start = locals_offset; - if (saveds > 3) - locals_offset += (saveds - 3) * sizeof(sljit_uw); - compiler->locals_offset = locals_offset; + SLJIT_COMPILE_ASSERT(FIXED_LOCALS_OFFSET >= (2 + 4) * sizeof(sljit_uw), require_at_least_two_words); #if defined(__APPLE__) - saveds = (2 + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); - local_size = ((locals_offset + saveds + local_size + 15) & ~15) - saveds; + /* Ignore pushed registers and FIXED_LOCALS_OFFSET when + computing the aligned local size. */ + saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); + local_size = ((FIXED_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; #else - local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1)); + local_size = FIXED_LOCALS_OFFSET + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1)); #endif compiler->local_size = local_size; #ifdef _WIN32 if (local_size > 1024) { #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_SCRATCH_REG1], local_size)); + FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); #else local_size -= FIXED_LOCALS_OFFSET; - FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_SCRATCH_REG1], local_size)); + FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, - SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, FIXED_LOCALS_OFFSET)); + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, FIXED_LOCALS_OFFSET)); #endif FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); } @@ -173,40 +168,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil SLJIT_ASSERT(local_size > 0); return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, - SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size); + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si locals_offset; - CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; compiler->args = args; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - locals_offset = 2 * sizeof(sljit_uw); -#else - locals_offset = FIXED_LOCALS_OFFSET; -#endif - compiler->scratches_start = locals_offset; - if (scratches > 3) - locals_offset += (scratches - 3) * sizeof(sljit_uw); - compiler->saveds_start = locals_offset; - if (saveds > 3) - locals_offset += (saveds - 3) * sizeof(sljit_uw); - compiler->locals_offset = locals_offset; #if defined(__APPLE__) - saveds = (2 + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); - compiler->local_size = ((locals_offset + saveds + local_size + 15) & ~15) - saveds; + saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); + compiler->local_size = ((FIXED_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; #else - compiler->local_size = locals_offset + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1)); + compiler->local_size = FIXED_LOCALS_OFFSET + ((local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1)); #endif } @@ -224,9 +210,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi SLJIT_ASSERT(compiler->local_size > 0); FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, - SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); - size = 2 + (compiler->saveds <= 3 ? compiler->saveds : 3); + size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (compiler->args > 2) size += 2; @@ -239,12 +226,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi INC_SIZE(size); - if (compiler->saveds > 0) - POP_REG(reg_map[SLJIT_SAVED_REG1]); - if (compiler->saveds > 1) - POP_REG(reg_map[SLJIT_SAVED_REG2]); - if (compiler->saveds > 2) - POP_REG(reg_map[SLJIT_SAVED_REG3]); + if (compiler->saveds > 0 || compiler->scratches > 9) + POP_REG(reg_map[SLJIT_S0]); + if (compiler->saveds > 1 || compiler->scratches > 8) + POP_REG(reg_map[SLJIT_S1]); + if (compiler->saveds > 2 || compiler->scratches > 7) + POP_REG(reg_map[SLJIT_S2]); POP_REG(reg_map[TMP_REG1]); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (compiler->args > 2) @@ -280,21 +267,17 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); /* Both size flags cannot be switched on. */ SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); -#if (defined SLJIT_SSE2 && SLJIT_SSE2) /* SSE2 and immediate is not possible. */ SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); -#endif size &= 0xf; inst_size = size; -#if (defined SLJIT_SSE2 && SLJIT_SSE2) if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) inst_size++; -#endif if (flags & EX86_PREF_66) inst_size++; @@ -311,8 +294,8 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si inst_size += sizeof(sljit_sw); } - if ((b & REG_MASK) == SLJIT_LOCALS_REG && !(b & OFFS_REG_MASK)) - b |= TO_OFFS_REG(SLJIT_LOCALS_REG); + if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK)) + b |= TO_OFFS_REG(SLJIT_SP); if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) inst_size += 1; /* SIB byte. */ @@ -348,12 +331,10 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si /* Encoding the byte. */ INC_SIZE(inst_size); -#if (defined SLJIT_SSE2 && SLJIT_SSE2) if (flags & EX86_PREF_F2) *inst++ = 0xf2; if (flags & EX86_PREF_F3) *inst++ = 0xf3; -#endif if (flags & EX86_PREF_66) *inst++ = 0x66; @@ -366,15 +347,10 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si if ((a & SLJIT_IMM) || (a == 0)) *buf_ptr = 0; -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - else if (!(flags & EX86_SSE2)) + else if (!(flags & EX86_SSE2_OP1)) *buf_ptr = reg_map[a] << 3; else *buf_ptr = a << 3; -#else - else - *buf_ptr = reg_map[a] << 3; -#endif } else { if (a & SLJIT_IMM) { @@ -388,13 +364,9 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si } if (!(b & SLJIT_MEM)) -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_map[b] : b); -#else - *buf_ptr++ |= MOD_REG + reg_map[b]; -#endif + *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b); else if ((b & REG_MASK) != SLJIT_UNUSED) { - if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) { + if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { if (immb != 0) { if (immb <= 127 && immb >= -128) *buf_ptr |= 0x40; @@ -455,28 +427,28 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2); if (type >= SLJIT_CALL3) - PUSH_REG(reg_map[SLJIT_SCRATCH_REG3]); + PUSH_REG(reg_map[SLJIT_R2]); *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SCRATCH_REG3] << 3) | reg_map[SLJIT_SCRATCH_REG1]; + *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]; #else inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0)); FAIL_IF(!inst); INC_SIZE(4 * (type - SLJIT_CALL0)); *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG1] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_R0] << 3) | 0x4 /* SIB */; + *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; *inst++ = 0; if (type >= SLJIT_CALL2) { *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG2] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_R1] << 3) | 0x4 /* SIB */; + *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; *inst++ = sizeof(sljit_sw); } if (type >= SLJIT_CALL3) { *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_SCRATCH_REG3] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_LOCALS_REG]; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_R2] << 3) | 0x4 /* SIB */; + *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; *inst++ = 2 * sizeof(sljit_sw); } #endif diff --git a/pcre/sljit/sljitNativeX86_64.c b/pcre/sljit/sljitNativeX86_64.c index 967f3c34440..f356f1c39c6 100644 --- a/pcre/sljit/sljitNativeX86_64.c +++ b/pcre/sljit/sljitNativeX86_64.c @@ -87,118 +87,95 @@ static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si return code_ptr; } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si size, pushed_size; + sljit_si i, tmp, size, saved_register_size; sljit_ub *inst; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, scratches, saveds, local_size); + check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; compiler->flags_saved = 0; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif - size = saveds; /* Including the return address saved by the call instruction. */ - pushed_size = (saveds + 1) * sizeof(sljit_sw); -#ifndef _WIN64 - if (saveds >= 2) - size += saveds - 1; -#else - if (saveds >= 4) - size += saveds - 3; - if (scratches >= 5) { - size += (5 - 4) * 2; - pushed_size += sizeof(sljit_sw); - } -#endif - size += args * 3; - if (size > 0) { + saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + size = reg_map[i] >= 8 ? 2 : 1; inst = (sljit_ub*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); - INC_SIZE(size); - if (saveds >= 5) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG2] >= 8, saved_ereg2_is_hireg); - *inst++ = REX_B; - PUSH_REG(reg_lmap[SLJIT_SAVED_EREG2]); - } - if (saveds >= 4) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG1] >= 8, saved_ereg1_is_hireg); - *inst++ = REX_B; - PUSH_REG(reg_lmap[SLJIT_SAVED_EREG1]); - } - if (saveds >= 3) { -#ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] >= 8, saved_reg3_is_hireg); - *inst++ = REX_B; -#else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] < 8, saved_reg3_is_loreg); -#endif - PUSH_REG(reg_lmap[SLJIT_SAVED_REG3]); - } - if (saveds >= 2) { -#ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] >= 8, saved_reg2_is_hireg); + if (reg_map[i] >= 8) *inst++ = REX_B; -#else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] < 8, saved_reg2_is_loreg); -#endif - PUSH_REG(reg_lmap[SLJIT_SAVED_REG2]); - } - if (saveds >= 1) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG1] < 8, saved_reg1_is_loreg); - PUSH_REG(reg_lmap[SLJIT_SAVED_REG1]); - } -#ifdef _WIN64 - if (scratches >= 5) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_TEMPORARY_EREG2] >= 8, temporary_ereg2_is_hireg); + PUSH_REG(reg_lmap[i]); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) *inst++ = REX_B; - PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]); - } -#endif + PUSH_REG(reg_lmap[i]); + } + + if (args > 0) { + size = args * 3; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); #ifndef _WIN64 if (args > 0) { *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7 /* rdi */; + *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */; } if (args > 1) { *inst++ = REX_W | REX_R; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6 /* rsi */; + *inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */; } if (args > 2) { *inst++ = REX_W | REX_R; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2 /* rdx */; + *inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */; } #else if (args > 0) { *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1 /* rcx */; + *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */; } if (args > 1) { *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2 /* rdx */; + *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */; } if (args > 2) { *inst++ = REX_W | REX_B; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0 /* r8 */; + *inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */; } #endif } - local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size; + local_size = ((local_size + FIXED_LOCALS_OFFSET + saved_register_size + 16 - 1) & ~(16 - 1)) - saved_register_size; compiler->local_size = local_size; + #ifdef _WIN64 if (local_size > 1024) { /* Allocate stack for the callback, which grows the stack. */ @@ -208,9 +185,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil *inst++ = REX_W; *inst++ = GROUP_BINARY_83; *inst++ = MOD_REG | SUB | 4; - /* Pushed size must be divisible by 8. */ - SLJIT_ASSERT(!(pushed_size & 0x7)); - if (pushed_size & 0x8) { + /* Allocated size for registers must be divisible by 8. */ + SLJIT_ASSERT(!(saved_register_size & 0x7)); + /* Aligned to 16 byte. */ + if (saved_register_size & 0x8) { *inst++ = 5 * sizeof(sljit_sw); local_size -= 5 * sizeof(sljit_sw); } else { @@ -218,10 +196,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil local_size -= 4 * sizeof(sljit_sw); } /* Second instruction */ - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] < 8, temporary_reg1_is_loreg); + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] < 8, temporary_reg1_is_loreg); *inst++ = REX_W; *inst++ = MOV_rm_i32; - *inst++ = MOD_REG | reg_lmap[SLJIT_SCRATCH_REG1]; + *inst++ = MOD_REG | reg_lmap[SLJIT_R0]; *(sljit_si*)inst = local_size; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->skip_checks = 1; @@ -229,6 +207,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); } #endif + SLJIT_ASSERT(local_size > 0); if (local_size <= 127) { inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); @@ -249,43 +228,47 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compil *(sljit_si*)inst = local_size; inst += sizeof(sljit_si); } + #ifdef _WIN64 - /* Save xmm6 with MOVAPS instruction. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - *inst++ = GROUP_0F; - *(sljit_si*)inst = 0x20247429; + /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */ + if (fscratches >= 6 || fsaveds >= 1) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + *inst++ = GROUP_0F; + *(sljit_si*)inst = 0x20247429; + } #endif return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, sljit_si args, sljit_si scratches, sljit_si saveds, sljit_si local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) { - sljit_si pushed_size; + sljit_si saved_register_size; CHECK_ERROR_VOID(); - check_sljit_set_context(compiler, args, scratches, saveds, local_size); + check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + compiler->options = options; compiler->scratches = scratches; compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->logical_local_size = local_size; #endif /* Including the return address saved by the call instruction. */ - pushed_size = (saveds + 1) * sizeof(sljit_sw); -#ifdef _WIN64 - if (scratches >= 5) - pushed_size += sizeof(sljit_sw); -#endif - compiler->local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size; + saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + compiler->local_size = ((local_size + FIXED_LOCALS_OFFSET + saved_register_size + 16 - 1) & ~(16 - 1)) - saved_register_size; } SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) { - sljit_si size; + sljit_si i, tmp, size; sljit_ub *inst; CHECK_ERROR(); @@ -295,13 +278,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); #ifdef _WIN64 - /* Restore xmm6 with MOVAPS instruction. */ - inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - *inst++ = GROUP_0F; - *(sljit_si*)inst = 0x20247428; + /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */ + if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + *inst++ = GROUP_0F; + *(sljit_si*)inst = 0x20247428; + } #endif + SLJIT_ASSERT(compiler->local_size > 0); if (compiler->local_size <= 127) { inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); @@ -322,50 +308,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compi *(sljit_si*)inst = compiler->local_size; } - size = 1 + compiler->saveds; -#ifndef _WIN64 - if (compiler->saveds >= 2) - size += compiler->saveds - 1; -#else - if (compiler->saveds >= 4) - size += compiler->saveds - 3; - if (compiler->scratches >= 5) - size += (5 - 4) * 2; -#endif - inst = (sljit_ub*)ensure_buf(compiler, 1 + size); - FAIL_IF(!inst); - - INC_SIZE(size); - -#ifdef _WIN64 - if (compiler->scratches >= 5) { - *inst++ = REX_B; - POP_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]); - } -#endif - if (compiler->saveds >= 1) - POP_REG(reg_map[SLJIT_SAVED_REG1]); - if (compiler->saveds >= 2) { -#ifndef _WIN64 - *inst++ = REX_B; -#endif - POP_REG(reg_lmap[SLJIT_SAVED_REG2]); - } - if (compiler->saveds >= 3) { -#ifndef _WIN64 - *inst++ = REX_B; -#endif - POP_REG(reg_lmap[SLJIT_SAVED_REG3]); - } - if (compiler->saveds >= 4) { - *inst++ = REX_B; - POP_REG(reg_lmap[SLJIT_SAVED_EREG1]); + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); } - if (compiler->saveds >= 5) { - *inst++ = REX_B; - POP_REG(reg_lmap[SLJIT_SAVED_EREG2]); + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); } + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); RET(); return SLJIT_SUCCESS; } @@ -409,72 +376,67 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); /* Both size flags cannot be switched on. */ SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); -#if (defined SLJIT_SSE2 && SLJIT_SSE2) /* SSE2 and immediate is not possible. */ SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); -#endif size &= 0xf; inst_size = size; - if ((b & SLJIT_MEM) && !(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) { - if (emit_load_imm64(compiler, TMP_REG3, immb)) - return NULL; - immb = 0; - if (b & REG_MASK) - b |= TO_OFFS_REG(TMP_REG3); - else - b |= TMP_REG3; - } - if (!compiler->mode32 && !(flags & EX86_NO_REXW)) rex |= REX_W; else if (flags & EX86_REX) rex |= REX; -#if (defined SLJIT_SSE2 && SLJIT_SSE2) if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) inst_size++; -#endif if (flags & EX86_PREF_66) inst_size++; /* Calculate size of b. */ inst_size += 1; /* mod r/m byte. */ if (b & SLJIT_MEM) { + if (!(b & OFFS_REG_MASK)) { + if (NOT_HALFWORD(immb)) { + if (emit_load_imm64(compiler, TMP_REG3, immb)) + return NULL; + immb = 0; + if (b & REG_MASK) + b |= TO_OFFS_REG(TMP_REG3); + else + b |= TMP_REG3; + } + else if (reg_lmap[b & REG_MASK] == 4) + b |= TO_OFFS_REG(SLJIT_SP); + } + if ((b & REG_MASK) == SLJIT_UNUSED) inst_size += 1 + sizeof(sljit_si); /* SIB byte required to avoid RIP based addressing. */ else { if (reg_map[b & REG_MASK] >= 8) rex |= REX_B; - if (immb != 0 && !(b & OFFS_REG_MASK)) { + + if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) { /* Immediate operand. */ if (immb <= 127 && immb >= -128) inst_size += sizeof(sljit_sb); else inst_size += sizeof(sljit_si); } - } + else if (reg_lmap[b & REG_MASK] == 5) + inst_size += sizeof(sljit_sb); - if ((b & REG_MASK) == SLJIT_LOCALS_REG && !(b & OFFS_REG_MASK)) - b |= TO_OFFS_REG(SLJIT_LOCALS_REG); - - if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) { - inst_size += 1; /* SIB byte. */ - if (reg_map[OFFS_REG(b)] >= 8) - rex |= REX_X; + if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) { + inst_size += 1; /* SIB byte. */ + if (reg_map[OFFS_REG(b)] >= 8) + rex |= REX_X; + } } } -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - else if (!(flags & EX86_SSE2) && reg_map[b] >= 8) + else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8) rex |= REX_B; -#else - else if (reg_map[b] >= 8) - rex |= REX_B; -#endif if (a & SLJIT_IMM) { if (flags & EX86_BIN_INS) { @@ -500,13 +462,8 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si else { SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - if (!(flags & EX86_SSE2) && reg_map[a] >= 8) - rex |= REX_R; -#else - if (reg_map[a] >= 8) + if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8) rex |= REX_R; -#endif } if (rex) @@ -517,12 +474,10 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si /* Encoding the byte. */ INC_SIZE(inst_size); -#if (defined SLJIT_SSE2 && SLJIT_SSE2) if (flags & EX86_PREF_F2) *inst++ = 0xf2; if (flags & EX86_PREF_F3) *inst++ = 0xf3; -#endif if (flags & EX86_PREF_66) *inst++ = 0x66; if (rex) @@ -536,15 +491,10 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si if ((a & SLJIT_IMM) || (a == 0)) *buf_ptr = 0; -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - else if (!(flags & EX86_SSE2)) + else if (!(flags & EX86_SSE2_OP1)) *buf_ptr = reg_lmap[a] << 3; else *buf_ptr = a << 3; -#else - else - *buf_ptr = reg_lmap[a] << 3; -#endif } else { if (a & SLJIT_IMM) { @@ -558,14 +508,10 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si } if (!(b & SLJIT_MEM)) -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2)) ? reg_lmap[b] : b); -#else - *buf_ptr++ |= MOD_REG + reg_lmap[b]; -#endif + *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b); else if ((b & REG_MASK) != SLJIT_UNUSED) { - if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_LOCALS_REG)) { - if (immb != 0) { + if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { + if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { if (immb <= 127 && immb >= -128) *buf_ptr |= 0x40; else @@ -579,7 +525,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3); } - if (immb != 0) { + if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { if (immb <= 127 && immb >= -128) *buf_ptr++ = immb; /* 8 bit displacement. */ else { @@ -589,8 +535,12 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si } } else { + if (reg_lmap[b & REG_MASK] == 5) + *buf_ptr |= 0x40; *buf_ptr++ |= 0x04; *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6); + if (reg_lmap[b & REG_MASK] == 5) + *buf_ptr++ = 0; } } else { @@ -621,7 +571,7 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj sljit_ub *inst; #ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG2] == 6 && reg_map[SLJIT_SCRATCH_REG1] < 8 && reg_map[SLJIT_SCRATCH_REG3] < 8, args_registers); + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers); inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); FAIL_IF(!inst); @@ -629,13 +579,13 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj if (type >= SLJIT_CALL3) { *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_SCRATCH_REG3]; + *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2]; } *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_SCRATCH_REG1]; + *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0]; #else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG2] == 2 && reg_map[SLJIT_SCRATCH_REG1] < 8 && reg_map[SLJIT_SCRATCH_REG3] < 8, args_registers); + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers); inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); FAIL_IF(!inst); @@ -643,11 +593,11 @@ static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, slj if (type >= SLJIT_CALL3) { *inst++ = REX_W | REX_R; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_SCRATCH_REG3]; + *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2]; } *inst++ = REX_W; *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_SCRATCH_REG1]; + *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0]; #endif return SLJIT_SUCCESS; } diff --git a/pcre/sljit/sljitNativeX86_common.c b/pcre/sljit/sljitNativeX86_common.c index 653705f6ca6..71442fef254 100644 --- a/pcre/sljit/sljitNativeX86_common.c +++ b/pcre/sljit/sljitNativeX86_common.c @@ -64,51 +64,46 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) /* Last register + 1. */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = { - 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { + 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5 }; #define CHECK_EXTRA_REGS(p, w, do) \ - if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \ - w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \ - p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ - do; \ - } \ - else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \ - w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \ - p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ + if (p >= SLJIT_R3 && p <= SLJIT_R6) { \ + w = FIXED_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \ + p = SLJIT_MEM1(SLJIT_SP); \ do; \ } #else /* SLJIT_CONFIG_X86_32 */ /* Last register + 1. */ -#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) -#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) -#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present Note: avoid to use r12 and r13 for memory addessing therefore r12 is better for SAVED_EREG than SAVED_REG. */ #ifndef _WIN64 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9 }; /* low-map. reg_map & 0x7. */ -static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1 +static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1 }; #else /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ -static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9 +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9 }; /* low-map. reg_map & 0x7. */ -static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { - 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1 +static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1 }; #endif @@ -133,9 +128,7 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { #endif /* SLJIT_CONFIG_X86_32 */ -#if (defined SLJIT_SSE2 && SLJIT_SSE2) #define TMP_FREG (0) -#endif /* Size flags for emit_x86_instruction: */ #define EX86_BIN_INS 0x0010 @@ -145,12 +138,11 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { #define EX86_BYTE_ARG 0x0100 #define EX86_HALF_ARG 0x0200 #define EX86_PREF_66 0x0400 - -#if (defined SLJIT_SSE2 && SLJIT_SSE2) -#define EX86_SSE2 0x0800 -#define EX86_PREF_F2 0x1000 -#define EX86_PREF_F3 0x2000 -#endif +#define EX86_PREF_F2 0x0800 +#define EX86_PREF_F3 0x1000 +#define EX86_SSE2_OP1 0x2000 +#define EX86_SSE2_OP2 0x4000 +#define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -179,6 +171,9 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { #define CMP_EAX_i32 0x3d #define CMP_r_rm 0x3b #define CMP_rm_r 0x39 +#define CVTPD2PS_x_xm 0x5a +#define CVTSI2SD_x_rm 0x2a +#define CVTTSD2SI_r_xm 0x2c #define DIV (/* GROUP_F7 */ 6 << 3) #define DIVSD_x_xm 0x5e #define INT3 0xcc @@ -239,6 +234,7 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { #define TEST_EAX_i32 0xa9 #define TEST_rm_r 0x85 #define UCOMISD_x_xm 0x2e +#define UNPCKLPD_x_xm 0x14 #define XCHG_EAX_r 0x90 #define XCHG_r_rm 0x87 #define XOR (/* BINARY */ 6 << 3) @@ -271,7 +267,7 @@ static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { /* Multithreading does not affect these static variables, since they store built-in CPU features. Therefore they can be overwritten by different threads if they detect the CPU features in the same time. */ -#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) static sljit_si cpu_has_sse2 = -1; #endif static sljit_si cpu_has_cmov = -1; @@ -325,7 +321,7 @@ static void get_cpu_features(void) #endif /* _MSC_VER && _MSC_VER >= 1400 */ -#if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) cpu_has_sse2 = (features >> 26) & 0x1; #endif cpu_has_cmov = (features >> 15) & 0x1; @@ -751,14 +747,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #ifdef _WIN64 SLJIT_COMPILE_ASSERT( - reg_map[SLJIT_SCRATCH_REG1] == 0 - && reg_map[SLJIT_SCRATCH_REG2] == 2 + reg_map[SLJIT_R0] == 0 + && reg_map[SLJIT_R1] == 2 && reg_map[TMP_REG1] > 7, invalid_register_assignment_for_div_mul); #else SLJIT_COMPILE_ASSERT( - reg_map[SLJIT_SCRATCH_REG1] == 0 - && reg_map[SLJIT_SCRATCH_REG2] < 7 + reg_map[SLJIT_R0] == 0 + && reg_map[SLJIT_R1] < 7 && reg_map[TMP_REG1] == 2, invalid_register_assignment_for_div_mul); #endif @@ -768,8 +764,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler op = GET_OPCODE(op); if (op == SLJIT_UDIV) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0); - inst = emit_x86_instruction(compiler, 1, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); #else inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); #endif @@ -779,7 +775,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler if (op == SLJIT_SDIV) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SCRATCH_REG2, 0); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); #endif #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -808,7 +804,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler FAIL_IF(!inst); INC_SIZE(2); *inst++ = GROUP_F7; - *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_SCRATCH_REG2]); + *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); #else #ifdef _WIN64 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2; @@ -824,12 +820,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler else if (op >= SLJIT_UDIV) *inst++ = REX_B; *inst++ = GROUP_F7; - *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_SCRATCH_REG2]); + *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); #else if (!compiler->mode32) *inst++ = REX_W; *inst++ = GROUP_F7; - *inst = MOD_REG | reg_map[SLJIT_SCRATCH_REG2]; + *inst = MOD_REG | reg_map[SLJIT_R1]; #endif #endif switch (op) { @@ -847,7 +843,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler break; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) - EMIT_MOV(compiler, SLJIT_SCRATCH_REG2, 0, TMP_REG1, 0); + EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); #endif break; } @@ -956,22 +952,22 @@ static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (dst_r == TMP_REG1) { /* Find a non-used register, whose reg_map[src] < 4. */ - if ((dst & REG_MASK) == SLJIT_SCRATCH_REG1) { - if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SCRATCH_REG2)) - work_r = SLJIT_SCRATCH_REG3; + if ((dst & REG_MASK) == SLJIT_R0) { + if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1)) + work_r = SLJIT_R2; else - work_r = SLJIT_SCRATCH_REG2; + work_r = SLJIT_R1; } else { - if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1)) - work_r = SLJIT_SCRATCH_REG1; - else if ((dst & REG_MASK) == SLJIT_SCRATCH_REG2) - work_r = SLJIT_SCRATCH_REG3; + if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) + work_r = SLJIT_R0; + else if ((dst & REG_MASK) == SLJIT_R1) + work_r = SLJIT_R2; else - work_r = SLJIT_SCRATCH_REG2; + work_r = SLJIT_R1; } - if (work_r == SLJIT_SCRATCH_REG1) { + if (work_r == SLJIT_R0) { ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); } else { @@ -984,7 +980,7 @@ static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, FAIL_IF(!inst); *inst = MOV_rm8_r8; - if (work_r == SLJIT_SCRATCH_REG1) { + if (work_r == SLJIT_R0) { ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); } else { @@ -1179,12 +1175,12 @@ static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, dst_r = dst; else { /* Find an unused temporary register. */ - if ((dst & REG_MASK) != SLJIT_SCRATCH_REG1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG1)) - dst_r = SLJIT_SCRATCH_REG1; - else if ((dst & REG_MASK) != SLJIT_SCRATCH_REG2 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SCRATCH_REG2)) - dst_r = SLJIT_SCRATCH_REG2; + if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) + dst_r = SLJIT_R0; + else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1)) + dst_r = SLJIT_R1; else - dst_r = SLJIT_SCRATCH_REG3; + dst_r = SLJIT_R2; EMIT_MOV(compiler, dst, dstw, dst_r, 0); } EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); @@ -1340,7 +1336,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { - SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG)); + SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); dst = TMP_REG1; } #endif @@ -1378,7 +1374,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) - return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REG1, 0); + return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); #endif if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) { @@ -1470,9 +1466,9 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler, if (dst == src1 && dstw == src1w) { if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) { + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(op_eax_imm, src2w); } @@ -1504,9 +1500,9 @@ static sljit_si emit_cum_binary(struct sljit_compiler *compiler, if (dst == src2 && dstw == src2w) { if (src1 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { + if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { #else - if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128)) { + if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { #endif BINARY_EAX_IMM(op_eax_imm, src1w); } @@ -1586,9 +1582,9 @@ static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, if (dst == src1 && dstw == src1w) { if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) { + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(op_eax_imm, src2w); } @@ -1840,9 +1836,9 @@ static sljit_si emit_cmp_binary(struct sljit_compiler *compiler, sljit_ub* inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(CMP_EAX_i32, src2w); return SLJIT_SUCCESS; @@ -1891,18 +1887,18 @@ static sljit_si emit_test_binary(struct sljit_compiler *compiler, sljit_ub* inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { #else - if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { #endif BINARY_EAX_IMM(TEST_EAX_i32, src2w); return SLJIT_SUCCESS; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src2 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { + if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { #else - if (src2 == SLJIT_SCRATCH_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { + if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { #endif BINARY_EAX_IMM(TEST_EAX_i32, src1w); return SLJIT_SUCCESS; @@ -2064,7 +2060,7 @@ static sljit_si emit_shift(struct sljit_compiler *compiler, EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); #else /* [esp+0] contains the flags. */ - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); #endif EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); @@ -2073,7 +2069,7 @@ static sljit_si emit_shift(struct sljit_compiler *compiler, #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); #else - EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw)); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); #endif EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); } @@ -2215,8 +2211,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) { check_sljit_get_register_index(reg); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2 - || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2) + if (reg >= SLJIT_R3 && reg <= SLJIT_R6) return -1; #endif return reg_map[reg]; @@ -2248,8 +2243,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *co /* Floating point operators */ /* --------------------------------------------------------------------- */ -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - /* Alignment + 2 * 16 bytes. */ static sljit_si sse2_data[3 + (4 + 4) * 2]; static sljit_si *sse2_buffer; @@ -2267,27 +2260,19 @@ static void init_compiler(void) sse2_buffer[13] = 0x7fffffff; } -#endif - SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) { #ifdef SLJIT_IS_FPU_AVAILABLE return SLJIT_IS_FPU_AVAILABLE; -#elif (defined SLJIT_SSE2 && SLJIT_SSE2) -#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) if (cpu_has_sse2 == -1) get_cpu_features(); return cpu_has_sse2; #else /* SLJIT_DETECT_SSE2 */ return 1; #endif /* SLJIT_DETECT_SSE2 */ -#else /* SLJIT_SSE2 */ - return 0; -#endif } -#if (defined SLJIT_SSE2 && SLJIT_SSE2) - static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode, sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) { @@ -2324,31 +2309,89 @@ static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler, return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); } -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) { - sljit_si dst_r; + sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + sljit_ub *inst; - CHECK_ERROR(); - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVW_FROMD) + compiler->mode32 = 0; +#endif + + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CVTTSD2SI_r_xm; + + if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + sljit_ub *inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMW) + compiler->mode32 = 0; +#endif + + if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; +#endif + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + srcw = 0; + } + + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CVTSI2SD_x_rm; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 1; #endif + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} - if (GET_OPCODE(op) == SLJIT_CMPD) { - compiler->flags_saved = 0; - if (FAST_IS_REG(dst)) - dst_r = dst; - else { - dst_r = TMP_FREG; - FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw)); - } - return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw); +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + compiler->flags_saved = 0; + if (!FAST_IS_REG(src1)) { + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); + src1 = TMP_FREG; } + return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w); +} - if (op == SLJIT_MOVD) { +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + CHECK_ERROR(); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_MOVD) { if (FAST_IS_REG(dst)) return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw); if (FAST_IS_REG(src)) @@ -2357,6 +2400,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compile return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); } + if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) { + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + if (FAST_IS_REG(src)) { + /* We overwrite the high bits of source. From SLJIT point of view, + this is not an issue. + Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ + FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0)); + } + else { + FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw)); + src = TMP_FREG; + } + + FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0)); + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; + } + if (SLOW_IS_REG(dst)) { dst_r = dst; if (dst != src) @@ -2391,6 +2453,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile CHECK_ERROR(); check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 1; @@ -2440,33 +2505,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compile return SLJIT_SUCCESS; } -#else - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src, sljit_sw srcw) -{ - CHECK_ERROR(); - /* Should cause an assertion fail. */ - check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); - compiler->error = SLJIT_ERR_UNSUPPORTED; - return SLJIT_ERR_UNSUPPORTED; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, - sljit_si dst, sljit_sw dstw, - sljit_si src1, sljit_sw src1w, - sljit_si src2, sljit_sw src2w) -{ - CHECK_ERROR(); - /* Should cause an assertion fail. */ - check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); - compiler->error = SLJIT_ERR_UNSUPPORTED; - return SLJIT_ERR_UNSUPPORTED; -} - -#endif - /* --------------------------------------------------------------------- */ /* Conditional instructions */ /* --------------------------------------------------------------------- */ @@ -2557,16 +2595,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compil if (type >= SLJIT_CALL1) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (src == SLJIT_SCRATCH_REG3) { + if (src == SLJIT_R2) { EMIT_MOV(compiler, TMP_REG1, 0, src, 0); src = TMP_REG1; } - if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3) + if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3) srcw += sizeof(sljit_sw); #endif #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) - if (src == SLJIT_SCRATCH_REG3) { + if (src == SLJIT_R2) { EMIT_MOV(compiler, TMP_REG1, 0, src, 0); src = TMP_REG1; } @@ -2732,8 +2770,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *com } if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax); - if (dst != SLJIT_SCRATCH_REG1) { + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax); + if (dst != SLJIT_R0) { inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); FAIL_IF(!inst); INC_SIZE(1 + 3 + 2 + 1); @@ -2802,23 +2840,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *co compiler->mode32 = 0; #endif - ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset); + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (NOT_HALFWORD(offset)) { FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) - SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); + SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); return compiler->error; #else - return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REG1, 0); + return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); #endif } #endif if (offset != 0) - return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset); - return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0); + return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) |