diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-08-22 14:35:22 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-08-22 14:35:22 +0000 |
commit | 6830d6ce7bb4a5178b8cb866e6e262268f00b8d8 (patch) | |
tree | 672f743c90198ec1d45b506e55a7e99701d5b8ea | |
parent | 726ab5e2bc4e0cca8ea937d0eaa89007d3080d91 (diff) | |
download | pcre-6830d6ce7bb4a5178b8cb866e6e262268f00b8d8.tar.gz |
JIT compiler source
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@662 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | sljit/sljitConfig.h | 96 | ||||
-rw-r--r-- | sljit/sljitConfigInternal.h | 350 | ||||
-rw-r--r-- | sljit/sljitExecAllocator.c | 274 | ||||
-rw-r--r-- | sljit/sljitLir.c | 1443 | ||||
-rw-r--r-- | sljit/sljitLir.h | 737 | ||||
-rw-r--r-- | sljit/sljitNativeARM_Thumb2.c | 1841 | ||||
-rw-r--r-- | sljit/sljitNativeARM_v5.c | 2355 | ||||
-rw-r--r-- | sljit/sljitNativeMIPS_32.c | 366 | ||||
-rw-r--r-- | sljit/sljitNativeMIPS_common.c | 1705 | ||||
-rw-r--r-- | sljit/sljitNativePPC_32.c | 246 | ||||
-rw-r--r-- | sljit/sljitNativePPC_64.c | 415 | ||||
-rw-r--r-- | sljit/sljitNativePPC_common.c | 1784 | ||||
-rw-r--r-- | sljit/sljitNativeX86_32.c | 521 | ||||
-rw-r--r-- | sljit/sljitNativeX86_64.c | 836 | ||||
-rw-r--r-- | sljit/sljitNativeX86_common.c | 2690 | ||||
-rw-r--r-- | sljit/sljitUtils.c | 244 |
16 files changed, 15903 insertions, 0 deletions
diff --git a/sljit/sljitConfig.h b/sljit/sljitConfig.h new file mode 100644 index 0000000..b71c03e --- /dev/null +++ b/sljit/sljitConfig.h @@ -0,0 +1,96 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SLJIT_CONFIG_H_ +#define _SLJIT_CONFIG_H_ + +/* --------------------------------------------------------------------- */ +/* Architecture */ +/* --------------------------------------------------------------------- */ + +/* Architecture selection */ +/* #define SLJIT_CONFIG_X86_32 1 */ +/* #define SLJIT_CONFIG_X86_64 1 */ +/* #define SLJIT_CONFIG_ARM_V5 1 */ +/* #define SLJIT_CONFIG_ARM_V7 1 */ +/* #define SLJIT_CONFIG_ARM_THUMB2 1 */ +/* #define SLJIT_CONFIG_PPC_32 1 */ +/* #define SLJIT_CONFIG_PPC_64 1 */ +/* #define SLJIT_CONFIG_MIPS_32 1 */ + +/* #define SLJIT_CONFIG_AUTO 1 */ +/* #define SLJIT_CONFIG_UNSUPPORTED 1 */ + +/* --------------------------------------------------------------------- */ +/* Utilities */ +/* --------------------------------------------------------------------- */ + +/* Useful for thread-safe compiling of global functions. */ +#ifndef SLJIT_UTIL_GLOBAL_LOCK +/* Enabled by default */ +#define SLJIT_UTIL_GLOBAL_LOCK 1 +#endif + +/* Implements a stack like data structure (by using mmap / VirtualAlloc). */ +#ifndef SLJIT_UTIL_STACK +/* Enabled by default */ +#define SLJIT_UTIL_STACK 1 +#endif + +/* --------------------------------------------------------------------- */ +/* Configuration */ +/* --------------------------------------------------------------------- */ + +/* If SLJIT_STD_MACROS_DEFINED is not defined, the application should + define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMMOVE, and NULL. */ +#ifndef SLJIT_STD_MACROS_DEFINED +/* Disabled by default. */ +#define SLJIT_STD_MACROS_DEFINED 0 +#endif + +/* Executable code allocation: + If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should + define both SLJIT_MALLOC_EXEC and SLJIT_FREE_EXEC. */ +#ifndef SLJIT_EXECUTABLE_ALLOCATOR +/* Enabled by default. */ +#define SLJIT_EXECUTABLE_ALLOCATOR 1 +#endif + +/* Debug checks (assertions, etc.). */ +#ifndef SLJIT_DEBUG +/* Enabled by default */ +#define SLJIT_DEBUG 1 +#endif + +/* Verbose operations */ +#ifndef SLJIT_VERBOSE +/* Enabled by default */ +#define SLJIT_VERBOSE 1 +#endif + +/* See the beginning of sljitConfigInternal.h */ + +#endif diff --git a/sljit/sljitConfigInternal.h b/sljit/sljitConfigInternal.h new file mode 100644 index 0000000..1293f76 --- /dev/null +++ b/sljit/sljitConfigInternal.h @@ -0,0 +1,350 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SLJIT_CONFIG_INTERNAL_H_ +#define _SLJIT_CONFIG_INTERNAL_H_ + +/* + SLJIT defines the following variables itself depending on the configuration: + sljit_b, sljit_ub : signed and unsigned 8 bit byte + sljit_h, sljit_uh : signed and unsigned 16 bit half-word (short) type + sljit_i, sljit_ui : signed and unsigned 32 bit integer type + sljit_w, sljit_uw : signed and unsigned machine word, enough to store a pointer (same as intptr_t) + SLJIT_CALL : C calling convention for both calling JIT and C callbacks from JIT + SLJIT_32BIT_ARCHITECTURE : 32 bit architecture + SLJIT_64BIT_ARCHITECTURE : 64 bit architecture + SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_w/sljit_uw array by index + SLJIT_FLOAT_SHIFT : the shift required to apply when accessing a double array by index + SLJIT_BIG_ENDIAN : big endian architecture + SLJIT_LITTLE_ENDIAN : little endian architecture + SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() + SLJIT_W : for defining 64 bit constants on 64 bit architectures (compiler workaround) + SLJIT_UNALIGNED : allows unaligned memory accesses for integer arithmetic (only!) +*/ + +#if !((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + || (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \ + || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + || (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \ + || (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)) +#error "An architecture must be selected" +#endif + +/* Sanity check. */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + + (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + + (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \ + + (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + + (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + + (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \ + + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2 +#error "Multiple architectures are selected" +#endif + +/* Auto select option (requires compiler support) */ +#if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) + +#ifndef _WIN32 + +#if defined(__i386__) || defined(__i386) +#define SLJIT_CONFIG_X86_32 1 +#elif defined(__x86_64__) +#define SLJIT_CONFIG_X86_64 1 +#elif defined(__arm__) || defined(__ARM__) +#ifdef __thumb2__ +#define SLJIT_CONFIG_ARM_THUMB2 1 +#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) +#define SLJIT_CONFIG_ARM_V7 1 +#else +#define SLJIT_CONFIG_ARM_V5 1 +#endif +#elif defined(__ppc64__) || defined(__powerpc64__) +#define SLJIT_CONFIG_PPC_64 1 +#elif defined(__ppc__) || defined(__powerpc__) +#define SLJIT_CONFIG_PPC_32 1 +#elif defined(__mips__) +#define SLJIT_CONFIG_MIPS_32 1 +#else +/* Unsupported architecture */ +#define SLJIT_CONFIG_UNSUPPORTED 1 +#endif + +#else /* !_WIN32 */ + +#if defined(_M_X64) || defined(__x86_64__) +#define SLJIT_CONFIG_X86_64 1 +#elif defined(_ARM_) +#define SLJIT_CONFIG_ARM_V5 1 +#else +#define SLJIT_CONFIG_X86_32 1 +#endif + +#endif /* !WIN32 */ +#endif /* SLJIT_CONFIG_AUTO */ + +#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#undef SLJIT_EXECUTABLE_ALLOCATOR +#endif + +#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) + +#include <stdlib.h> +#include <string.h> + +/* General libraries: + Note: SLJIT is designed to be independent from them as possible. + + In release mode (SLJIT_DEBUG is not defined) only the following macros are needed: */ + +/* General allocation. */ +#define SLJIT_MALLOC(size) malloc(size) +#define SLJIT_FREE(ptr) free(ptr) +#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len) + +#endif /* STD_MACROS_DEFINED */ + +#if !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) + +#if defined(__GNUC__) && (__GNUC__ >= 3) +#define SLJIT_LIKELY(x) __builtin_expect((x), 1) +#define SLJIT_UNLIKELY(x) __builtin_expect((x), 0) +#else +#define SLJIT_LIKELY(x) (x) +#define SLJIT_UNLIKELY(x) (x) +#endif + +#endif /* !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) */ + +#ifndef SLJIT_INLINE +/* Inline functions. */ +#define SLJIT_INLINE __inline +#endif + +#ifndef SLJIT_CONST +/* Const variables. */ +#define SLJIT_CONST const +#endif + +#ifndef SLJIT_UNUSED_ARG +/* Unused arguments. */ +#define SLJIT_UNUSED_ARG(arg) (void)arg +#endif + +#ifndef SLJIT_CACHE_FLUSH + +#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) && !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* Just call __ARM_NR_cacheflush on Linux. */ +#define SLJIT_CACHE_FLUSH(from, to) \ + __clear_cache((char*)(from), (char*)(to)) +#else + /* Not required to implement on archs with unified caches. */ +#define SLJIT_CACHE_FLUSH(from, to) +#endif + +#endif /* !SLJIT_CACHE_FLUSH */ + +/* 8 bit byte type. */ +typedef unsigned char sljit_ub; +typedef signed char sljit_b; + +/* 16 bit half-word type. */ +typedef unsigned short int sljit_uh; +typedef signed short int sljit_h; + +/* 32 bit integer type. */ +typedef unsigned int sljit_ui; +typedef signed int sljit_i; + +/* Machine word type. Can encapsulate a pointer. + 32 bit for 32 bit machines. + 64 bit for 64 bit machines. */ +#if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_32BIT_ARCHITECTURE 1 +#define SLJIT_WORD_SHIFT 2 +typedef unsigned int sljit_uw; +typedef int sljit_w; +#else +#define SLJIT_64BIT_ARCHITECTURE 1 +#define SLJIT_WORD_SHIFT 3 +#ifdef _WIN32 +typedef unsigned __int64 sljit_uw; +typedef __int64 sljit_w; +#else +typedef unsigned long int sljit_uw; +typedef long int sljit_w; +#endif +#endif + +/* Double precision. */ +#define SLJIT_FLOAT_SHIFT 3 + +#ifndef SLJIT_W + +/* Defining long constants. */ +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#define SLJIT_W(w) (w##ll) +#else +#define SLJIT_W(w) (w) +#endif + +#endif /* !SLJIT_W */ + +#ifndef SLJIT_CALL + +/* ABI (Application Binary Interface) types. */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + +#ifdef __GNUC__ +#define SLJIT_CALL __attribute__ ((fastcall)) +#define SLJIT_X86_32_FASTCALL 1 +#elif defined(_WIN32) +#define SLJIT_CALL __fastcall +#define SLJIT_X86_32_FASTCALL 1 +#else +#define SLJIT_CALL __stdcall +#endif + +#else /* Other architectures. */ + +#define SLJIT_CALL + +#endif /* SLJIT_CONFIG_X86_32 */ + +#endif /* !SLJIT_CALL */ + +#if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) + +/* These macros are useful for the application. */ +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define SLJIT_BIG_ENDIAN 1 +#else +#define SLJIT_LITTLE_ENDIAN 1 +#endif + +#endif /* !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) */ + +/* Sanity check. */ +#if (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#error "Exactly one endianness must be selected" +#endif + +#if !(defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && !(defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#error "Exactly one endianness must be selected" +#endif + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +/* It seems ppc64 compilers use an indirect addressing for functions. + It makes things really complicated. */ +#define SLJIT_INDIRECT_CALL 1 +#endif + +#ifndef SLJIT_SSE2 + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +/* Turn on SSE2 support on x86 (operating on doubles). + (Better performance than legacy fpu instructions). */ +#define SLJIT_SSE2 1 + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +/* Auto detect SSE2 support using CPUID. + On 64 bit x86 cpus, sse2 must be present. */ +#define SLJIT_SSE2_AUTO 1 +#endif + +#endif /* (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) */ + +#endif /* !SLJIT_SSE2 */ + +#ifndef SLJIT_UNALIGNED + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_UNALIGNED 1 +#endif + +#endif /* !SLJIT_UNALIGNED */ + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) +void* sljit_malloc_exec(sljit_uw size); +void sljit_free_exec(void* ptr); +#define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size) +#define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr) +#endif + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) +#include <stdio.h> +#endif + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + +/* Feel free to redefine these two macros. */ +#ifndef SLJIT_ASSERT + +#define SLJIT_ASSERT(x) \ + do { \ + if (SLJIT_UNLIKELY(!(x))) { \ + printf("Assertion failed at " __FILE__ ":%d\n", __LINE__); \ + *((int*)0) = 0; \ + } \ + } while (0) + +#endif /* !SLJIT_ASSERT */ + +#ifndef SLJIT_ASSERT_STOP + +#define SLJIT_ASSERT_STOP() \ + do { \ + printf("Should never been reached " __FILE__ ":%d\n", __LINE__); \ + *((int*)0) = 0; \ + } while (0) + +#endif /* !SLJIT_ASSERT_STOP */ + +#else /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */ + +#undef SLJIT_ASSERT +#undef SLJIT_ASSERT_STOP + +#define SLJIT_ASSERT(x) \ + do { } while (0) +#define SLJIT_ASSERT_STOP() \ + do { } while (0) + +#endif /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */ + +#endif diff --git a/sljit/sljitExecAllocator.c b/sljit/sljitExecAllocator.c new file mode 100644 index 0000000..fecdb29 --- /dev/null +++ b/sljit/sljitExecAllocator.c @@ -0,0 +1,274 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + This file contains a simple executable memory allocator + + It is assumed, that executable code blocks are usually medium (or sometimes + large) memory blocks, and the allocator is not too frequently called (less + optimized than other allocators). Thus, using it as a generic allocator is + not suggested. + + How does it work: + Memory is allocated in continuous memory areas called chunks by alloc_chunk() + Chunk format: + [ block ][ block ] ... [ block ][ block terminator ] + + All blocks and the block terminator is started with block_header. The block + header contains the size of the previous and the next block. These sizes + can also contain special values. + Block size: + 0 - The block is a free_block, with a different size member. + 1 - The block is a block terminator. + n - The block is used at the moment, and the value contains its size. + Previous block size: + 0 - This is the first block of the memory chunk. + n - The size of the previous block. + + Using these size values we can go forward or backward on the block chain. + The unused blocks are stored in a chain list pointed by free_blocks. This + list is useful if we need to find a suitable memory area when the allocator + is called. + + When a block is freed, the new free block is connected to its adjacent free + blocks if possible. + + [ free block ][ used block ][ free block ] + and "used block" is freed, the three blocks are connected together: + [ one big free block ] +*/ + +/* --------------------------------------------------------------------- */ +/* System (OS) functions */ +/* --------------------------------------------------------------------- */ + +/* 64 KByte. */ +#define CHUNK_SIZE 0x10000 + +/* + alloc_chunk / free_chunk : + * allocate executable system memory chunks + * the size is always divisible by CHUNK_SIZE + allocator_grab_lock / allocator_release_lock : + * make the allocator thread safe + * can be empty if the OS (or the application) does not support threading + * only the allocator requires this lock, sljit is fully thread safe + as it only uses local variables +*/ + +#ifdef _WIN32 + +static SLJIT_INLINE void* alloc_chunk(sljit_uw size) +{ + return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); +} + +static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size) +{ + SLJIT_UNUSED_ARG(size); + VirtualFree(chunk, 0, MEM_RELEASE); +} + +#else + +#include <sys/mman.h> + +static SLJIT_INLINE void* alloc_chunk(sljit_uw size) +{ + void* retval = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); + return (retval != MAP_FAILED) ? retval : NULL; +} + +static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size) +{ + munmap(chunk, size); +} + +#endif + +/* --------------------------------------------------------------------- */ +/* Common functions */ +/* --------------------------------------------------------------------- */ + +#define CHUNK_MASK (~(CHUNK_SIZE - 1)) + +struct block_header { + sljit_uw size; + sljit_uw prev_size; +}; + +struct free_block { + struct block_header header; + struct free_block *next; + struct free_block *prev; + sljit_uw size; +}; + +#define AS_BLOCK_HEADER(base, offset) \ + ((struct block_header*)(((sljit_ub*)base) + offset)) +#define AS_FREE_BLOCK(base, offset) \ + ((struct free_block*)(((sljit_ub*)base) + offset)) +#define MEM_START(base) ((void*)(((sljit_ub*)base) + sizeof(struct block_header))) +#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7) + +static struct free_block* free_blocks; +static sljit_uw allocated_size; +static sljit_uw total_size; + +static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size) +{ + free_block->header.size = 0; + free_block->size = size; + + free_block->next = free_blocks; + free_block->prev = 0; + if (free_blocks) + free_blocks->prev = free_block; + free_blocks = free_block; +} + +static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block) +{ + if (free_block->next) + free_block->next->prev = free_block->prev; + + if (free_block->prev) + free_block->prev->next = free_block->next; + else { + SLJIT_ASSERT(free_blocks == free_block); + free_blocks = free_block->next; + } +} + +void* sljit_malloc_exec(sljit_uw size) +{ + struct block_header *header; + struct block_header *next_header; + struct free_block *free_block; + sljit_uw chunk_size; + + allocator_grab_lock(); + if (size < sizeof(struct free_block)) + size = sizeof(struct free_block); + size = ALIGN_SIZE(size); + + free_block = free_blocks; + while (free_block) { + if (free_block->size >= size) { + chunk_size = free_block->size; + if (chunk_size > size + 64) { + /* We just cut a block from the end of the free block. */ + chunk_size -= size; + free_block->size = chunk_size; + header = AS_BLOCK_HEADER(free_block, chunk_size); + header->prev_size = chunk_size; + AS_BLOCK_HEADER(header, size)->prev_size = size; + } + else { + sljit_remove_free_block(free_block); + header = (struct block_header*)free_block; + size = chunk_size; + } + allocated_size += size; + header->size = size; + allocator_release_lock(); + return MEM_START(header); + } + free_block = free_block->next; + } + + chunk_size = (size + sizeof(struct block_header) + CHUNK_SIZE - 1) & CHUNK_MASK; + header = (struct block_header*)alloc_chunk(chunk_size); + PTR_FAIL_IF(!header); + + chunk_size -= sizeof(struct block_header); + total_size += chunk_size; + + header->prev_size = 0; + if (chunk_size > size + 64) { + /* Cut the allocated space into a free and a used block. */ + allocated_size += size; + header->size = size; + chunk_size -= size; + + free_block = AS_FREE_BLOCK(header, size); + free_block->header.prev_size = size; + sljit_insert_free_block(free_block, chunk_size); + next_header = AS_BLOCK_HEADER(free_block, chunk_size); + } + else { + /* All space belongs to this allocation. */ + allocated_size += chunk_size; + header->size = chunk_size; + next_header = AS_BLOCK_HEADER(header, chunk_size); + } + next_header->size = 1; + next_header->prev_size = chunk_size; + allocator_release_lock(); + return MEM_START(header); +} + +void sljit_free_exec(void* ptr) +{ + struct block_header *header; + struct free_block* free_block; + + allocator_grab_lock(); + header = AS_BLOCK_HEADER(ptr, -sizeof(struct block_header)); + allocated_size -= header->size; + + /* Connecting free blocks together if possible. */ + + /* If header->prev_size == 0, free_block will equal to header. + In this case, free_block->header.size will be > 0. */ + free_block = AS_FREE_BLOCK(header, -header->prev_size); + if (SLJIT_UNLIKELY(!free_block->header.size)) { + free_block->size += header->size; + header = AS_BLOCK_HEADER(free_block, free_block->size); + header->prev_size = free_block->size; + } + else { + free_block = (struct free_block*)header; + sljit_insert_free_block(free_block, header->size); + } + + header = AS_BLOCK_HEADER(free_block, free_block->size); + if (SLJIT_UNLIKELY(!header->size)) { + free_block->size += ((struct free_block*)header)->size; + sljit_remove_free_block((struct free_block*)header); + header = AS_BLOCK_HEADER(free_block, free_block->size); + header->prev_size = free_block->size; + } + + if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) { + if (total_size - free_block->size > (allocated_size * 3 / 2)) { + sljit_remove_free_block(free_block); + free_chunk(free_block, free_block->size + sizeof(struct block_header)); + } + } + + allocator_release_lock(); +} diff --git a/sljit/sljitLir.c b/sljit/sljitLir.c new file mode 100644 index 0000000..5d99598 --- /dev/null +++ b/sljit/sljitLir.c @@ -0,0 +1,1443 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sljitLir.h" + +#define CHECK_ERROR() \ + do { \ + if (SLJIT_UNLIKELY(compiler->error)) \ + return compiler->error; \ + } while (0) + +#define CHECK_ERROR_PTR() \ + do { \ + if (SLJIT_UNLIKELY(compiler->error)) \ + return NULL; \ + } while (0) + +#define CHECK_ERROR_VOID() \ + do { \ + if (SLJIT_UNLIKELY(compiler->error)) \ + return; \ + } while (0) + +#define FAIL_IF(expr) \ + do { \ + if (SLJIT_UNLIKELY(expr)) \ + return compiler->error; \ + } while (0) + +#define PTR_FAIL_IF(expr) \ + do { \ + if (SLJIT_UNLIKELY(expr)) \ + return NULL; \ + } while (0) + +#define FAIL_IF_NULL(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_ALLOC_FAILED; \ + return SLJIT_ERR_ALLOC_FAILED; \ + } \ + } while (0) + +#define PTR_FAIL_IF_NULL(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_ALLOC_FAILED; \ + return NULL; \ + } \ + } while (0) + +#define PTR_FAIL_WITH_EXEC_IF(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_EX_ALLOC_FAILED; \ + return NULL; \ + } \ + } while (0) + +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +#define GET_OPCODE(op) \ + ((op) & ~(SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS)) + +#define GET_FLAGS(op) \ + ((op) & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C)) + +#define GET_ALL_FLAGS(op) \ + ((op) & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS)) + +#define BUF_SIZE 4096 + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) +#define ABUF_SIZE 2048 +#else +#define ABUF_SIZE 4096 +#endif + +/* Jump flags. */ +#define JUMP_LABEL 0x1 +#define JUMP_ADDR 0x2 +/* SLJIT_REWRITABLE_JUMP is 0x1000. */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + #define PATCH_MB 0x4 + #define PATCH_MW 0x8 +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + #define PATCH_MD 0x10 +#endif +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + #define IS_BL 0x4 + #define PATCH_B 0x8 +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + #define CPOOL_SIZE 512 +#endif + +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + #define IS_CONDITIONAL 0x04 + #define IS_BL 0x08 + /* cannot be encoded as branch */ + #define B_TYPE0 0x00 + /* conditional + imm8 */ + #define B_TYPE1 0x10 + /* conditional + imm20 */ + #define B_TYPE2 0x20 + /* IT + imm24 */ + #define B_TYPE3 0x30 + /* imm11 */ + #define B_TYPE4 0x40 + /* imm24 */ + #define B_TYPE5 0x50 + /* BL + imm24 */ + #define BL_TYPE6 0x60 + /* 0xf00 cc code for branches */ +#endif + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + #define UNCOND_B 0x04 + #define PATCH_B 0x08 + #define ABSOLUTE_B 0x10 +#endif + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + #define IS_MOVABLE 0x04 + #define IS_JAL 0x08 + #define IS_BIT26_COND 0x10 + #define IS_BIT16_COND 0x20 + + #define IS_COND (IS_BIT26_COND | IS_BIT16_COND) + + #define PATCH_B 0x40 + #define PATCH_J 0x80 + + /* instruction types */ + #define UNMOVABLE_INS 0 + /* 1 - 31 last destination register */ + /* 32 - 39 FCSR FCC bits */ + #define FCSR_FCC 32 + /* no destination (i.e: store) */ + #define MOVABLE_INS 40 +#endif + +#endif /* !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) */ + +/* Utils can still be used even if SLJIT_CONFIG_UNSUPPORTED is set. */ +#include "sljitUtils.c" + +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) +#include "sljitExecAllocator.c" +#endif + +#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) && !(defined SLJIT_SSE2 && SLJIT_SSE2) +#error SLJIT_SSE2_AUTO cannot be enabled without SLJIT_SSE2 +#endif + +/* --------------------------------------------------------------------- */ +/* Public functions */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || ((defined SLJIT_SSE2 && SLJIT_SSE2) && ((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64))) +#define SLJIT_NEEDS_COMPILER_INIT 1 +static int compiler_initialized = 0; +/* A thread safe initialization. */ +static void init_compiler(void); +#endif + +struct sljit_compiler* sljit_create_compiler(void) +{ + struct sljit_compiler *compiler = (struct sljit_compiler*)SLJIT_MALLOC(sizeof(struct sljit_compiler)); + /* Compile time assert. */ + SLJIT_CONST int minus1[sizeof(sljit_b) == 1 && sizeof(sljit_h) == 2 && + sizeof(sljit_i) == 4 && (sizeof(sljit_w) == 4 || sizeof(sljit_w) == 8) ? 1 : -1] = { -1 }; + + if (!compiler) + return NULL; + + compiler->error = SLJIT_SUCCESS; + + compiler->labels = NULL; + compiler->jumps = NULL; + compiler->consts = NULL; + compiler->last_label = NULL; + compiler->last_jump = NULL; + compiler->last_const = NULL; + + compiler->buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE); + compiler->abuf = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE); + + if (!compiler->buf || !compiler->abuf) { + if (compiler->buf) + SLJIT_FREE(compiler->buf); + if (compiler->abuf) + SLJIT_FREE(compiler->abuf); + SLJIT_FREE(compiler); + return NULL; + } + + compiler->buf->next = NULL; + compiler->buf->used_size = 0; + compiler->abuf->next = NULL; + compiler->abuf->used_size = 0; + + compiler->temporaries = minus1[0]; + compiler->generals = minus1[0]; + compiler->local_size = 0; + compiler->size = 0; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + compiler->args = -1; +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->flags_saved = 0; +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + compiler->cpool = (sljit_uw*)SLJIT_MALLOC(CPOOL_SIZE * sizeof(sljit_uw) + CPOOL_SIZE * sizeof(sljit_ub)); + if (!compiler->cpool) { + SLJIT_FREE(compiler->buf); + SLJIT_FREE(compiler->abuf); + SLJIT_FREE(compiler); + return NULL; + } + compiler->cpool_unique = (sljit_ub*)(compiler->cpool + CPOOL_SIZE); + compiler->cpool_diff = 0xffffffff; + compiler->cpool_fill = 0; + compiler->patches = 0; +#endif + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->has_locals = 0; + compiler->delay_slot = UNMOVABLE_INS; +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + compiler->verbose = NULL; +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->skip_checks = 0; +#endif + +#if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT) + if (!compiler_initialized) { + init_compiler(); + compiler_initialized = 1; + } +#endif + + return compiler; +} + +void sljit_free_compiler(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + struct sljit_memory_fragment *curr; + + buf = compiler->buf; + while (buf) { + curr = buf; + buf = buf->next; + SLJIT_FREE(curr); + } + + buf = compiler->abuf; + while (buf) { + curr = buf; + buf = buf->next; + SLJIT_FREE(curr); + } + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + SLJIT_FREE(compiler->cpool); +#endif + SLJIT_FREE(compiler); +} + +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +void sljit_free_code(void* code) +{ + /* Remove thumb mode flag. */ + SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~0x1)); +} +#elif (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +void sljit_free_code(void* code) +{ + /* Resolve indirection. */ + code = (void*)(*(sljit_uw*)code); + SLJIT_FREE_EXEC(code); +} +#else +void sljit_free_code(void* code) +{ + SLJIT_FREE_EXEC(code); +} +#endif + +void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) +{ + if (SLJIT_LIKELY(!!jump) && SLJIT_LIKELY(!!label)) { + jump->flags &= ~JUMP_ADDR; + jump->flags |= JUMP_LABEL; + jump->u.label = label; + } +} + +void sljit_set_target(struct sljit_jump *jump, sljit_uw target) +{ + if (SLJIT_LIKELY(!!jump)) { + SLJIT_ASSERT(jump->flags & SLJIT_REWRITABLE_JUMP); + + jump->flags &= ~JUMP_LABEL; + jump->flags |= JUMP_ADDR; + jump->u.target = target; + } +} + +/* --------------------------------------------------------------------- */ +/* Private functions */ +/* --------------------------------------------------------------------- */ + +static void* ensure_buf(struct sljit_compiler *compiler, int size) +{ + sljit_ub *ret; + struct sljit_memory_fragment *new_frag; + + if (compiler->buf->used_size + size <= (int)(BUF_SIZE - sizeof(sljit_uw) - sizeof(void*))) { + ret = compiler->buf->memory + compiler->buf->used_size; + compiler->buf->used_size += size; + return ret; + } + new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE); + PTR_FAIL_IF_NULL(new_frag); + new_frag->next = compiler->buf; + compiler->buf = new_frag; + new_frag->used_size = size; + return new_frag->memory; +} + +static void* ensure_abuf(struct sljit_compiler *compiler, int size) +{ + sljit_ub *ret; + struct sljit_memory_fragment *new_frag; + + if (compiler->abuf->used_size + size <= (int)(ABUF_SIZE - sizeof(sljit_uw) - sizeof(void*))) { + ret = compiler->abuf->memory + compiler->abuf->used_size; + compiler->abuf->used_size += size; + return ret; + } + new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE); + PTR_FAIL_IF_NULL(new_frag); + new_frag->next = compiler->abuf; + compiler->abuf = new_frag; + new_frag->used_size = size; + return new_frag->memory; +} + +void* sljit_alloc_memory(struct sljit_compiler *compiler, int size) +{ + CHECK_ERROR_PTR(); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + if (size <= 0 || size > 128) + return NULL; + size = (size + 7) & ~7; +#else + if (size <= 0 || size > 64) + return NULL; + size = (size + 3) & ~3; +#endif + return ensure_abuf(compiler, size); +} + +static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf = compiler->buf; + struct sljit_memory_fragment *prev = NULL; + struct sljit_memory_fragment *tmp; + + do { + tmp = buf->next; + buf->next = prev; + prev = buf; + buf = tmp; + } while (buf != NULL); + + compiler->buf = prev; +} + +static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compiler *compiler) +{ + label->next = NULL; + label->size = compiler->size; + if (compiler->last_label) + compiler->last_label->next = label; + else + compiler->labels = label; + compiler->last_label = label; +} + +static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, int flags) +{ + jump->next = NULL; + jump->flags = flags; + if (compiler->last_jump) + compiler->last_jump->next = jump; + else + compiler->jumps = jump; + compiler->last_jump = jump; +} + +static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_compiler *compiler) +{ + const_->next = NULL; + const_->addr = compiler->size; + if (compiler->last_const) + compiler->last_const->next = const_; + else + compiler->consts = const_; + compiler->last_const = const_; +} + +#define depends_on(exp, reg) \ + (((exp) & SLJIT_MEM) && (((exp) & 0xf) == reg || (((exp) >> 4) & 0xf) == reg)) + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) +#define FUNCTION_CHECK_OP() \ + SLJIT_ASSERT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \ + switch (GET_OPCODE(op)) { \ + case SLJIT_NOT: \ + case SLJIT_CLZ: \ + case SLJIT_AND: \ + case SLJIT_OR: \ + case SLJIT_XOR: \ + case SLJIT_SHL: \ + case SLJIT_LSHR: \ + case SLJIT_ASHR: \ + SLJIT_ASSERT(!(op & (SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C))); \ + break; \ + case SLJIT_NEG: \ + SLJIT_ASSERT(!(op & (SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_C))); \ + break; \ + case SLJIT_MUL: \ + SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_C))); \ + break; \ + case SLJIT_FCMP: \ + SLJIT_ASSERT(!(op & (SLJIT_INT_OP | SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ + SLJIT_ASSERT((op & (SLJIT_SET_E | SLJIT_SET_S))); \ + break; \ + case SLJIT_ADD: \ + SLJIT_ASSERT(!(op & (SLJIT_SET_S | SLJIT_SET_U))); \ + break; \ + case SLJIT_SUB: \ + break; \ + case SLJIT_ADDC: \ + case SLJIT_SUBC: \ + SLJIT_ASSERT(!(op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O))); \ + break; \ + default: \ + /* Nothing allowed */ \ + SLJIT_ASSERT(!(op & (SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ + break; \ + } + +#define FUNCTION_CHECK_IS_REG(r) \ + ((r) == SLJIT_UNUSED || (r) == SLJIT_LOCALS_REG || \ + ((r) >= SLJIT_TEMPORARY_REG1 && (r) <= SLJIT_TEMPORARY_REG3 && (r) <= SLJIT_TEMPORARY_REG1 - 1 + compiler->temporaries) || \ + ((r) >= SLJIT_GENERAL_REG1 && (r) <= SLJIT_GENERAL_REG3 && (r) <= SLJIT_GENERAL_REG1 - 1 + compiler->generals)) \ + +#define FUNCTION_CHECK_SRC(p, i) \ + SLJIT_ASSERT(compiler->temporaries != -1 && compiler->generals != -1); \ + if (((p) >= SLJIT_TEMPORARY_REG1 && (p) <= SLJIT_TEMPORARY_REG1 - 1 + compiler->temporaries) || \ + ((p) >= SLJIT_GENERAL_REG1 && (p) <= SLJIT_GENERAL_REG1 - 1 + compiler->generals) || \ + (p) == SLJIT_LOCALS_REG) \ + SLJIT_ASSERT(i == 0); \ + else if ((p) == SLJIT_IMM) \ + ; \ + else if ((p) & SLJIT_MEM) { \ + SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & 0xf)); \ + if ((p) & 0xf0) { \ + SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(((p) >> 4) & 0xf)); \ + SLJIT_ASSERT(((p) & 0xf0) != (SLJIT_LOCALS_REG << 4) && !(i & ~0x3)); \ + } else \ + SLJIT_ASSERT((((p) >> 4) & 0xf) == 0); \ + SLJIT_ASSERT(((p) >> 9) == 0); \ + } \ + else \ + SLJIT_ASSERT_STOP(); + +#define FUNCTION_CHECK_DST(p, i) \ + SLJIT_ASSERT(compiler->temporaries != -1 && compiler->generals != -1); \ + if (((p) >= SLJIT_TEMPORARY_REG1 && (p) <= SLJIT_TEMPORARY_REG1 - 1 + compiler->temporaries) || \ + ((p) >= SLJIT_GENERAL_REG1 && (p) <= SLJIT_GENERAL_REG1 - 1 + compiler->generals) || \ + (p) == SLJIT_UNUSED) \ + SLJIT_ASSERT(i == 0); \ + else if ((p) & SLJIT_MEM) { \ + SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & 0xf)); \ + if ((p) & 0xf0) { \ + SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(((p) >> 4) & 0xf)); \ + SLJIT_ASSERT(((p) & 0xf0) != (SLJIT_LOCALS_REG << 4) && !(i & ~0x3)); \ + } else \ + SLJIT_ASSERT((((p) >> 4) & 0xf) == 0); \ + SLJIT_ASSERT(((p) >> 9) == 0); \ + } \ + else \ + SLJIT_ASSERT_STOP(); + +#define FUNCTION_FCHECK(p, i) \ + if ((p) >= SLJIT_FLOAT_REG1 && (p) <= SLJIT_FLOAT_REG4) \ + SLJIT_ASSERT(i == 0); \ + else if ((p) & SLJIT_MEM) { \ + SLJIT_ASSERT(FUNCTION_CHECK_IS_REG((p) & 0xf)); \ + if ((p) & 0xf0) { \ + SLJIT_ASSERT(FUNCTION_CHECK_IS_REG(((p) >> 4) & 0xf)); \ + SLJIT_ASSERT(((p) & 0xf0) != (SLJIT_LOCALS_REG << 4) && !(i & ~0x3)); \ + } else \ + SLJIT_ASSERT((((p) >> 4) & 0xf) == 0); \ + SLJIT_ASSERT(((p) >> 9) == 0); \ + } \ + else \ + SLJIT_ASSERT_STOP(); + +#define FUNCTION_CHECK_OP1() \ + if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) { \ + SLJIT_ASSERT(!GET_ALL_FLAGS(op)); \ + } \ + if (GET_OPCODE(op) >= SLJIT_MOVU && GET_OPCODE(op) <= SLJIT_MOVU_SI) { \ + SLJIT_ASSERT(!(src & SLJIT_MEM) || (src & 0xf) != SLJIT_LOCALS_REG); \ + SLJIT_ASSERT(!(dst & SLJIT_MEM) || (dst & 0xf) != SLJIT_LOCALS_REG); \ + if ((src & SLJIT_MEM) && (src & 0xf)) \ + SLJIT_ASSERT((dst & 0xf) != (src & 0xf) && ((dst >> 4) & 0xf) != (src & 0xf)); \ + } + +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + +void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose) +{ + compiler->verbose = verbose; +} + +static char* reg_names[] = { + (char*)"<noreg>", (char*)"tmp_r1", (char*)"tmp_r2", (char*)"tmp_r3", + (char*)"tmp_er1", (char*)"tmp_er2", (char*)"gen_r1", (char*)"gen_r2", + (char*)"gen_r3", (char*)"gen_er1", (char*)"gen_er2", (char*)"stack_r" +}; + +static char* freg_names[] = { + (char*)"<noreg>", (char*)"float_r1", (char*)"float_r2", (char*)"float_r3", (char*)"float_r4" +}; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#ifdef _WIN64 + #define SLJIT_PRINT_D "I64" +#else + #define SLJIT_PRINT_D "l" +#endif +#else + #define SLJIT_PRINT_D "" +#endif + +#define sljit_verbose_param(p, i) \ + if ((p) & SLJIT_IMM) \ + fprintf(compiler->verbose, "#%"SLJIT_PRINT_D"d", (i)); \ + else if ((p) & SLJIT_MEM) { \ + if ((p) & 0xf) { \ + if (i) { \ + if (((p) >> 4) & 0xf) \ + fprintf(compiler->verbose, "[%s + %s * %d]", reg_names[(p) & 0xF], reg_names[((p) >> 4)& 0xF], 1 << (i)); \ + else \ + fprintf(compiler->verbose, "[%s + #%"SLJIT_PRINT_D"d]", reg_names[(p) & 0xF], (i)); \ + } \ + else { \ + if (((p) >> 4) & 0xf) \ + fprintf(compiler->verbose, "[%s + %s]", reg_names[(p) & 0xF], reg_names[((p) >> 4)& 0xF]); \ + else \ + fprintf(compiler->verbose, "[%s]", reg_names[(p) & 0xF]); \ + } \ + } \ + else \ + fprintf(compiler->verbose, "[#%"SLJIT_PRINT_D"d]", (i)); \ + } else \ + fprintf(compiler->verbose, "%s", reg_names[p]); +#define sljit_verbose_fparam(p, i) \ + if ((p) & SLJIT_MEM) { \ + if ((p) & 0xf) { \ + if (i) { \ + if (((p) >> 4) & 0xf) \ + fprintf(compiler->verbose, "[%s + %s * %d]", reg_names[(p) & 0xF], reg_names[((p) >> 4)& 0xF], 1 << (i)); \ + else \ + fprintf(compiler->verbose, "[%s + #%"SLJIT_PRINT_D"d]", reg_names[(p) & 0xF], (i)); \ + } \ + else { \ + if (((p) >> 4) & 0xF) \ + fprintf(compiler->verbose, "[%s + %s]", reg_names[(p) & 0xF], reg_names[((p) >> 4)& 0xF]); \ + else \ + fprintf(compiler->verbose, "[%s]", reg_names[(p) & 0xF]); \ + } \ + } \ + else \ + fprintf(compiler->verbose, "[#%"SLJIT_PRINT_D"d]", (i)); \ + } else \ + fprintf(compiler->verbose, "%s", freg_names[p]); + +static SLJIT_CONST char* op_names[] = { + /* op0 */ + (char*)"breakpoint", (char*)"nop", + /* op1 */ + (char*)"mov", (char*)"mov.ub", (char*)"mov.sb", (char*)"mov.uh", + (char*)"mov.sh", (char*)"mov.ui", (char*)"mov.si", (char*)"movu", + (char*)"movu.ub", (char*)"movu.sb", (char*)"movu.uh", (char*)"movu.sh", + (char*)"movu.ui", (char*)"movu.si", (char*)"not", (char*)"neg", + (char*)"clz", + /* op2 */ + (char*)"add", (char*)"addc", (char*)"sub", (char*)"subc", + (char*)"mul", (char*)"and", (char*)"or", (char*)"xor", + (char*)"shl", (char*)"lshr", (char*)"ashr", + /* fop1 */ + (char*)"fcmp", (char*)"fmov", (char*)"fneg", (char*)"fabs", + /* fop2 */ + (char*)"fadd", (char*)"fsub", (char*)"fmul", (char*)"fdiv" +}; + +static char* jump_names[] = { + (char*)"c_equal", (char*)"c_not_equal", + (char*)"c_less", (char*)"c_greater_equal", + (char*)"c_greater", (char*)"c_less_equal", + (char*)"c_sig_less", (char*)"c_sig_greater_equal", + (char*)"c_sig_greater", (char*)"c_sig_less_equal", + (char*)"c_overflow", (char*)"c_not_overflow", + (char*)"c_mul_overflow", (char*)"c_mul_not_overflow", + (char*)"c_float_equal", (char*)"c_float_not_equal", + (char*)"c_float_less", (char*)"c_float_greater_equal", + (char*)"c_float_greater", (char*)"c_float_less_equal", + (char*)"c_float_nan", (char*)"c_float_not_nan", + (char*)"jump", + (char*)"call0", (char*)"call1", (char*)"call2", (char*)"call3" +}; + +#endif + +/* --------------------------------------------------------------------- */ +/* Arch dependent */ +/* --------------------------------------------------------------------- */ + +static SLJIT_INLINE void check_sljit_generate_code(struct sljit_compiler *compiler) +{ +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + struct sljit_jump *jump; +#endif + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + + SLJIT_ASSERT(compiler->size > 0); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + jump = compiler->jumps; + while (jump) { + /* All jumps have target. */ + SLJIT_ASSERT(jump->flags & (JUMP_LABEL | JUMP_ADDR)); + jump = jump->next; + } +#endif +} + +static SLJIT_INLINE void check_sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(temporaries); + SLJIT_UNUSED_ARG(generals); + SLJIT_UNUSED_ARG(local_size); + + SLJIT_ASSERT(args >= 0 && args <= 3); + SLJIT_ASSERT(temporaries >= 0 && temporaries <= SLJIT_NO_TMP_REGISTERS); + SLJIT_ASSERT(generals >= 0 && generals <= SLJIT_NO_GEN_REGISTERS); + SLJIT_ASSERT(args <= generals); + SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, " enter args=%d temporaries=%d generals=%d local_size=%d\n", args, temporaries, generals, local_size); +#endif +} + +static SLJIT_INLINE void check_sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(temporaries); + SLJIT_UNUSED_ARG(generals); + SLJIT_UNUSED_ARG(local_size); + + SLJIT_ASSERT(args >= 0 && args <= 3); + SLJIT_ASSERT(temporaries >= 0 && temporaries <= SLJIT_NO_TMP_REGISTERS); + SLJIT_ASSERT(generals >= 0 && generals <= SLJIT_NO_GEN_REGISTERS); + SLJIT_ASSERT(args <= generals); + SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, " fake_enter args=%d temporaries=%d generals=%d local_size=%d\n", args, temporaries, generals, local_size); +#endif +} + +static SLJIT_INLINE void check_sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (src != SLJIT_UNUSED) { + FUNCTION_CHECK_SRC(src, srcw); + } + else + SLJIT_ASSERT(srcw == 0); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " return "); + sljit_verbose_param(src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif +} + +static SLJIT_INLINE void check_sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(temporaries); + SLJIT_UNUSED_ARG(generals); + SLJIT_UNUSED_ARG(local_size); + + SLJIT_ASSERT(args >= 0 && args <= 3); + SLJIT_ASSERT(temporaries >= 0 && temporaries <= SLJIT_NO_TMP_REGISTERS); + SLJIT_ASSERT(generals >= 0 && generals <= SLJIT_NO_GEN_REGISTERS); + SLJIT_ASSERT(args <= generals); + SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->temporaries = temporaries; + compiler->generals = generals; + FUNCTION_CHECK_DST(dst, dstw); + compiler->temporaries = -1; + compiler->generals = -1; +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fast_enter "); + sljit_verbose_param(dst, dstw); + fprintf(compiler->verbose, " args=%d temporaries=%d generals=%d local_size=%d\n", args, temporaries, generals, local_size); + } +#endif +} + +static SLJIT_INLINE void check_sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + FUNCTION_CHECK_SRC(src, srcw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fast_return "); + sljit_verbose_param(src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif +} + +static SLJIT_INLINE void check_sljit_emit_op0(struct sljit_compiler *compiler, int op) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + + SLJIT_ASSERT(op >= SLJIT_BREAKPOINT && op <= SLJIT_NOP); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, " %s\n", op_names[op]); +#endif +} + +static SLJIT_INLINE void check_sljit_emit_op1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + + SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CLZ); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + FUNCTION_CHECK_OP(); + FUNCTION_CHECK_SRC(src, srcw); + FUNCTION_CHECK_DST(dst, dstw); + FUNCTION_CHECK_OP1(); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)], + !(op & SLJIT_SET_E) ? "" : "E", !(op & SLJIT_SET_S) ? "" : "S", !(op & SLJIT_SET_U) ? "" : "U", !(op & SLJIT_SET_O) ? "" : "O", !(op & SLJIT_SET_C) ? "" : "C", !(op & SLJIT_KEEP_FLAGS) ? "" : "K"); + sljit_verbose_param(dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif +} + +static SLJIT_INLINE void check_sljit_emit_op2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + return; + } +#endif + + SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ASHR); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + FUNCTION_CHECK_OP(); + FUNCTION_CHECK_SRC(src1, src1w); + FUNCTION_CHECK_SRC(src2, src2w); + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)], + !(op & SLJIT_SET_E) ? "" : "E", !(op & SLJIT_SET_S) ? "" : "S", !(op & SLJIT_SET_U) ? "" : "U", !(op & SLJIT_SET_O) ? "" : "O", !(op & SLJIT_SET_C) ? "" : "C", !(op & SLJIT_KEEP_FLAGS) ? "" : "K"); + sljit_verbose_param(dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif +} + +static SLJIT_INLINE void check_sljit_emit_fop1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + + SLJIT_ASSERT(sljit_is_fpu_available()); + SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_FCMP && GET_OPCODE(op) <= SLJIT_FABS); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + FUNCTION_CHECK_OP(); + FUNCTION_FCHECK(src, srcw); + FUNCTION_FCHECK(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s%s ", op_names[GET_OPCODE(op)], + !(op & SLJIT_SET_E) ? "" : "E", !(op & SLJIT_SET_S) ? "" : "S"); + sljit_verbose_fparam(dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif +} + +static SLJIT_INLINE void check_sljit_emit_fop2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + + SLJIT_ASSERT(sljit_is_fpu_available()); + SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_FADD && GET_OPCODE(op) <= SLJIT_FDIV); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + FUNCTION_CHECK_OP(); + FUNCTION_FCHECK(src1, src1w); + FUNCTION_FCHECK(src2, src2w); + FUNCTION_FCHECK(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s ", op_names[GET_OPCODE(op)]); + sljit_verbose_fparam(dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif +} + +static SLJIT_INLINE void check_sljit_emit_label(struct sljit_compiler *compiler) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, "label:\n"); +#endif +} + +static SLJIT_INLINE void check_sljit_emit_jump(struct sljit_compiler *compiler, int type) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + return; + } +#endif + + SLJIT_ASSERT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP))); + SLJIT_ASSERT((type & 0xff) >= SLJIT_C_EQUAL && (type & 0xff) <= SLJIT_CALL3); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, " jump%s <%s>\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : "R", jump_names[type & 0xff]); +#endif +} + +static SLJIT_INLINE void check_sljit_emit_cmp(struct sljit_compiler *compiler, int type, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + + SLJIT_ASSERT(!(type & ~(0xff | SLJIT_INT_OP | SLJIT_REWRITABLE_JUMP))); + SLJIT_ASSERT((type & 0xff) >= SLJIT_C_EQUAL && (type & 0xff) <= SLJIT_C_SIG_LESS_EQUAL); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + FUNCTION_CHECK_SRC(src1, src1w); + FUNCTION_CHECK_SRC(src2, src2w); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %scmp%s <%s> ", !(type & SLJIT_INT_OP) ? "" : "i", !(type & SLJIT_REWRITABLE_JUMP) ? "" : "R", jump_names[type & 0xff]); + sljit_verbose_param(src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif +} + +static SLJIT_INLINE void check_sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + FUNCTION_CHECK_SRC(src, srcw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " ijump <%s> ", jump_names[type]); + sljit_verbose_param(src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif +} + +static SLJIT_INLINE void check_sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(type); + + SLJIT_ASSERT(type >= SLJIT_C_EQUAL && type < SLJIT_JUMP); + SLJIT_ASSERT(op == SLJIT_MOV || GET_OPCODE(op) == SLJIT_OR); + SLJIT_ASSERT(GET_ALL_FLAGS(op) == 0 || GET_ALL_FLAGS(op) == SLJIT_SET_E || GET_ALL_FLAGS(op) == SLJIT_KEEP_FLAGS); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " cond_set%s%s <%s> ", !(op & SLJIT_SET_E) ? "" : "E", + !(op & SLJIT_KEEP_FLAGS) ? "" : "K", op_names[GET_OPCODE(op)]); + sljit_verbose_param(dst, dstw); + fprintf(compiler->verbose, ", <%s>\n", jump_names[type]); + } +#endif +} + +static SLJIT_INLINE void check_sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value) +{ + /* If debug and verbose are disabled, all arguments are unused. */ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(init_value); + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " const "); + sljit_verbose_param(dst, dstw); + fprintf(compiler->verbose, ", #%"SLJIT_PRINT_D"d\n", init_value); + } +#endif +} + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + #include "sljitNativeX86_common.c" +#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + #include "sljitNativeX86_common.c" +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + #include "sljitNativeARM_v5.c" +#elif (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + #include "sljitNativeARM_v5.c" +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + #include "sljitNativeARM_Thumb2.c" +#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + #include "sljitNativePPC_common.c" +#elif (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + #include "sljitNativePPC_common.c" +#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + #include "sljitNativeMIPS_common.c" +#endif + +#if !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, int type, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + /* Default compare for most architectures. */ + int flags, tmp_src, condition; + sljit_w tmp_srcw; + + CHECK_ERROR_PTR(); + check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w); + + condition = type & 0xff; + if (SLJIT_UNLIKELY((src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM))) { + /* Immediate is prefered as second argument by most architectures. */ + switch (condition) { + case SLJIT_C_LESS: + condition = SLJIT_C_GREATER; + break; + case SLJIT_C_GREATER_EQUAL: + condition = SLJIT_C_LESS_EQUAL; + break; + case SLJIT_C_GREATER: + condition = SLJIT_C_LESS; + break; + case SLJIT_C_LESS_EQUAL: + condition = SLJIT_C_GREATER_EQUAL; + break; + case SLJIT_C_SIG_LESS: + condition = SLJIT_C_SIG_GREATER; + break; + case SLJIT_C_SIG_GREATER_EQUAL: + condition = SLJIT_C_SIG_LESS_EQUAL; + break; + case SLJIT_C_SIG_GREATER: + condition = SLJIT_C_SIG_LESS; + break; + case SLJIT_C_SIG_LESS_EQUAL: + condition = SLJIT_C_SIG_GREATER_EQUAL; + break; + } + type = condition | (type & (SLJIT_INT_OP | SLJIT_REWRITABLE_JUMP)); + tmp_src = src1; + src1 = src2; + src2 = tmp_src; + tmp_srcw = src1w; + src1w = src2w; + src2w = tmp_srcw; + } + + if (condition <= SLJIT_C_NOT_ZERO) + flags = SLJIT_SET_E; + else if (condition <= SLJIT_C_LESS_EQUAL) + flags = SLJIT_SET_U; + else + flags = SLJIT_SET_S; + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->skip_checks = 1; +#endif + PTR_FAIL_IF(sljit_emit_op2(compiler, SLJIT_SUB | flags | (type & SLJIT_INT_OP), + SLJIT_UNUSED, 0, src1, src1w, src2, src2w)); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->skip_checks = 1; +#endif + return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP)); +} +#endif + +#else /* SLJIT_CONFIG_UNSUPPORTED */ + +/* Empty function bodies for those machines, which are not (yet) supported. */ + +SLJIT_CONST char* sljit_get_platform_name() +{ + return "unsupported"; +} + +struct sljit_compiler* sljit_create_compiler(void) +{ + SLJIT_ASSERT_STOP(); + return NULL; +} + +void sljit_free_compiler(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_ASSERT_STOP(); +} + +void* sljit_alloc_memory(struct sljit_compiler *compiler, int size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(size); + SLJIT_ASSERT_STOP(); + return NULL; +} + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) +void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(verbose); + SLJIT_ASSERT_STOP(); +} +#endif + +void* sljit_generate_code(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_ASSERT_STOP(); + return NULL; +} + +void sljit_free_code(void* code) +{ + SLJIT_UNUSED_ARG(code); + SLJIT_ASSERT_STOP(); +} + +int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(temporaries); + SLJIT_UNUSED_ARG(generals); + SLJIT_UNUSED_ARG(local_size); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(temporaries); + SLJIT_UNUSED_ARG(generals); + SLJIT_UNUSED_ARG(local_size); + SLJIT_ASSERT_STOP(); +} + +int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(temporaries); + SLJIT_UNUSED_ARG(generals); + SLJIT_UNUSED_ARG(local_size); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +int sljit_emit_op0(struct sljit_compiler *compiler, int op) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +int sljit_emit_op1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +int sljit_emit_op2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +int sljit_is_fpu_available(void) +{ + SLJIT_ASSERT_STOP(); + return 0; +} + +int sljit_emit_fop1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +int sljit_emit_fop2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_ASSERT_STOP(); + return NULL; +} + +struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_ASSERT_STOP(); + return NULL; +} + +struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, int type, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_ASSERT_STOP(); + return NULL; +} + +void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) +{ + SLJIT_UNUSED_ARG(jump); + SLJIT_UNUSED_ARG(label); + SLJIT_ASSERT_STOP(); +} + +void sljit_set_target(struct sljit_jump *jump, sljit_uw target) +{ + SLJIT_UNUSED_ARG(jump); + SLJIT_UNUSED_ARG(target); + SLJIT_ASSERT_STOP(); +} + +int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(type); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w initval) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(initval); + SLJIT_ASSERT_STOP(); + return NULL; +} + +void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + SLJIT_UNUSED_ARG(addr); + SLJIT_UNUSED_ARG(new_addr); + SLJIT_ASSERT_STOP(); +} + +void sljit_set_const(sljit_uw addr, sljit_w new_constant) +{ + SLJIT_UNUSED_ARG(addr); + SLJIT_UNUSED_ARG(new_constant); + SLJIT_ASSERT_STOP(); +} + +#endif diff --git a/sljit/sljitLir.h b/sljit/sljitLir.h new file mode 100644 index 0000000..6e26d06 --- /dev/null +++ b/sljit/sljitLir.h @@ -0,0 +1,737 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SLJIT_LIR_H_ +#define _SLJIT_LIR_H_ + +/* + ------------------------------------------------------------------------ + Stack-Less JIT compiler for multiple architectures (x86, ARM, PowerPC) + ------------------------------------------------------------------------ + + Short description + Advantages: + - The execution can be continued from any LIR instruction + In other words, jump into and out of the code is safe + - Both target of (conditional) jump and call instructions + and constants can be dynamically modified during runtime + - although it is not suggested to do it frequently + - very effective to cache an important value once + - A fixed stack space can be allocated for local variables + - The compiler is thread-safe + Disadvantages: + - Limited number of registers (only 6+4 integer registers, max 3+2 + temporary and max 3+2 general, and 4 floating point registers) + In practice: + - This approach is very effective for interpreters + - One of the general registers typically points to a stack interface + - It can jump to any exception handler anytime (even for another + function. It is safe for SLJIT.) + - Fast paths can be modified during runtime reflecting the changes + of the fastest execution path of the dynamic language + - SLJIT supports complex memory addressing modes + - mainly position independent code + - Optimizations (perhaps later) + - Only for basic blocks (when no labels inserted between LIR instructions) +*/ + +#if !(defined SLJIT_NO_DEFAULT_CONFIG && SLJIT_NO_DEFAULT_CONFIG) +#include "sljitConfig.h" +#endif +#include "sljitConfigInternal.h" + +/* --------------------------------------------------------------------- */ +/* Error codes */ +/* --------------------------------------------------------------------- */ + +/* Indicates no error. */ +#define SLJIT_SUCCESS 0 +/* After the call of sljit_generate_code(), the error code of the compiler + is set to this value to avoid future sljit calls (in debug mode at least). + The complier should be freed after sljit_generate_code(). */ +#define SLJIT_ERR_COMPILED 1 +/* Cannot allocate non executable memory. */ +#define SLJIT_ERR_ALLOC_FAILED 2 +/* Cannot allocate executable memory. + Only for sljit_generate_code() */ +#define SLJIT_ERR_EX_ALLOC_FAILED 3 +/* return value for SLJIT_CONFIG_UNSUPPORTED empty architecture. */ +#define SLJIT_ERR_UNSUPPORTED 4 + +/* --------------------------------------------------------------------- */ +/* Registers */ +/* --------------------------------------------------------------------- */ + +#define SLJIT_UNUSED 0 + +#define SLJIT_TEMPORARY_REG1 1 +#define SLJIT_TEMPORARY_REG2 2 +#define SLJIT_TEMPORARY_REG3 3 +/* Note: Extra Registers cannot be used for memory addressing. */ +/* Note: on x86-32, these registers are emulated (using stack loads & stores). */ +#define SLJIT_TEMPORARY_EREG1 4 +#define SLJIT_TEMPORARY_EREG2 5 + +#define SLJIT_GENERAL_REG1 6 +#define SLJIT_GENERAL_REG2 7 +#define SLJIT_GENERAL_REG3 8 +/* Note: Extra Registers cannot be used for memory addressing. */ +/* Note: on x86-32, these registers are emulated (using stack loads & stores). */ +#define SLJIT_GENERAL_EREG1 9 +#define SLJIT_GENERAL_EREG2 10 + +/* Read-only register (cannot be the destination of an operation). */ +/* Note: SLJIT_MEM2( ... , SLJIT_LOCALS_REG) is not supported (x86 limitation). */ +/* Note: SLJIT_LOCALS_REG is not necessary the real stack pointer. See sljit_emit_enter. */ +#define SLJIT_LOCALS_REG 11 + +/* Number of registers. */ +#define SLJIT_NO_TMP_REGISTERS 5 +#define SLJIT_NO_GEN_REGISTERS 5 +#define SLJIT_NO_REGISTERS 11 + +/* Return with machine word. */ + +#define SLJIT_RETURN_REG SLJIT_TEMPORARY_REG1 + +/* x86 prefers temporary registers for special purposes. If other + registers are used such purpose, it costs a little performance + drawback. It doesn't matter for other archs. */ + +#define SLJIT_PREF_SHIFT_REG SLJIT_TEMPORARY_REG3 + +/* --------------------------------------------------------------------- */ +/* Floating point registers */ +/* --------------------------------------------------------------------- */ + +/* Note: SLJIT_UNUSED as destination is not valid for floating point + operations, since they cannot be used for setting flags. */ + +/* Floating point operations are performed on double precision values. */ + +#define SLJIT_FLOAT_REG1 1 +#define SLJIT_FLOAT_REG2 2 +#define SLJIT_FLOAT_REG3 3 +#define SLJIT_FLOAT_REG4 4 + +/* --------------------------------------------------------------------- */ +/* Main structures and functions */ +/* --------------------------------------------------------------------- */ + +struct sljit_memory_fragment { + struct sljit_memory_fragment *next; + sljit_uw used_size; + sljit_ub memory[1]; +}; + +struct sljit_label { + struct sljit_label *next; + sljit_uw addr; + /* The maximum size difference. */ + sljit_uw size; +}; + +struct sljit_jump { + struct sljit_jump *next; + sljit_uw addr; + sljit_w flags; + union { + sljit_uw target; + struct sljit_label* label; + } u; +}; + +struct sljit_const { + struct sljit_const *next; + sljit_uw addr; +}; + +struct sljit_compiler { + int error; + + struct sljit_label *labels; + struct sljit_jump *jumps; + struct sljit_const *consts; + struct sljit_label *last_label; + struct sljit_jump *last_jump; + struct sljit_const *last_const; + + struct sljit_memory_fragment *buf; + struct sljit_memory_fragment *abuf; + + /* Used local registers. */ + int temporaries; + /* Used general registers. */ + int generals; + /* Local stack size. */ + int local_size; + /* Code size. */ + sljit_uw size; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + int args; + int temporaries_start; + int generals_start; +#endif + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + int mode32; +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + int flags_saved; +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + /* Constant pool handling. */ + sljit_uw *cpool; + sljit_ub *cpool_unique; + sljit_uw cpool_diff; + sljit_uw cpool_fill; + /* General fields. */ + /* Contains pointer, "ldr pc, [...]" pairs. */ + sljit_uw patches; +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + /* Temporary fields. */ + sljit_uw shift_imm; + int cache_arg; + sljit_w cache_argw; +#endif + +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + int cache_arg; + sljit_w cache_argw; +#endif + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + int has_locals; + sljit_w imm; + int cache_arg; + sljit_w cache_argw; +#endif + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + int has_locals; + int delay_slot; + int cache_arg; + sljit_w cache_argw; +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + FILE* verbose; +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + int skip_checks; +#endif +}; + +/* --------------------------------------------------------------------- */ +/* Main functions */ +/* --------------------------------------------------------------------- */ + +/* Creates an sljit compiler. + Returns NULL if failed. */ +struct sljit_compiler* sljit_create_compiler(void); +/* Free everything except the codes. */ +void sljit_free_compiler(struct sljit_compiler *compiler); + +static SLJIT_INLINE int sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; } + +/* + Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit, + and <= 128 bytes on 64 bit architectures. The memory area is owned by the compiler, + and freed by sljit_free_compiler. The returned pointer is sizeof(sljit_w) aligned. + Excellent for allocating small blocks during the compiling, and no need to worry + about freeing them. The size is enough to contain at most 16 pointers. + If the size is outside of the range, the function will return with NULL, + but this return value does not indicate that there is no more memory (does + not set the compiler to out-of-memory status). +*/ +void* sljit_alloc_memory(struct sljit_compiler *compiler, int size); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) +/* Passing NULL disables verbose. */ +void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose); +#endif + +void* sljit_generate_code(struct sljit_compiler *compiler); +void sljit_free_code(void* code); + +/* Instruction generation. Returns with error code. */ + +/* + Entry instruction. The instruction has "args" number of arguments + and will use the first "general" number of general registers. + The arguments are passed into the general registers (arg1 to general_reg1, and so on). + Thus, "args" must be less or equal than "general". A local_size extra + stack space is allocated for the jit code (must be less or equal than + SLJIT_MAX_LOCAL_SIZE), which can accessed through SLJIT_LOCALS_REG (see + the notes there). SLJIT_LOCALS_REG is not necessary the real stack pointer! + It just points somewhere in the stack if local_size > 0 (!). Thus, the only + thing which is known that the memory area between SLJIT_LOCALS_REG and + SLJIT_LOCALS_REG + local_size is a valid stack area if local_size > 0 +*/ + +/* Note: multiple calls of this function overwrites the previous call. */ + +#define SLJIT_MAX_LOCAL_SIZE 65536 + +int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size); + +/* Since sljit_emit_return (and many asserts) uses variables which are initialized + by sljit_emit_enter, a simple return is not possible if these variables are not + initialized. sljit_fake_enter does not emit any instruction, just initialize + those variables. */ + +/* Note: multiple calls of this function overwrites the previous call. */ + +void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size); + +/* Return from jit. See below the possible values for src and srcw. */ +int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw); + +/* Really fast calling method for utility functions inside sljit (see SLJIT_FAST_CALL). + All registers and even the stack frame is passed to the callee. The return address is + preserved in dst/dstw by sljit_emit_fast_enter, and sljit_emit_fast_return can + use this as a return value later. */ + +/* Note: only for sljit specific, non ABI compilant calls. Fast, since only a few machine instructions + are needed. Excellent for small uility functions, where saving general registers and setting up + a new stack frame would cost too much performance. However, it is still possible to return + to the address of the caller (or anywhere else). */ + +/* Note: flags are not changed (unlike sljit_emit_enter / sljit_emit_return). */ + +/* Note: although sljit_emit_fast_return could be replaced by an ijump, it is not suggested, + since many architectures do clever branch prediction on call / return instruction pairs. */ + +int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size); +int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw); + +/* + Source and destination values for arithmetical instructions + imm - a simple immediate value (cannot be used as a destination) + reg - any of the registers (immediate argument must be 0) + [imm] - absolute immediate memory address + [reg+imm] - indirect memory address + [reg+(reg<<imm)] - indirect indexed memory address (shift must be between 0 and 3) + useful for (byte, half, int, sljit_w) array access + (fully supported by both x86 and ARM architectures, and cheap operation on others) +*/ + +/* + IMPORATNT NOTE: memory access MUST be naturally aligned. + length | alignment + ---------+----------- + byte | 1 byte (not aligned) + half | 2 byte (real_address & 0x1 == 0) + int | 4 byte (real_address & 0x3 == 0) + sljit_w | 4 byte if SLJIT_32BIT_ARCHITECTURE defined + | 8 byte if SLJIT_64BIT_ARCHITECTURE defined + (This is a strict requirement for embedded systems.) + + Note: different architectures have different addressing limitations + Thus sljit may generate several instructions for other addressing modes + x86: all addressing modes supported, but write-back is not supported + (requires an extra instruction). On x86-64 only 32 bit signed + integers are supported by the architecture. + arm: [reg+imm] supported for small immediates (-4095 <= imm <= 4095 + or -255 <= imm <= 255 for loading signed bytes, any halfs or doubles) + [reg+(reg<<imm)] are supported or requires only two instructions + Write back is limited to small immediates on thumb2 + ppc: [reg+imm], -65535 <= imm <= 65535. 64 bit moves requires immediates + divisible by 4. [reg+reg] supported, write-back supported + [reg+(reg<<imm)] (imm != 0) is cheap (requires two instructions) +*/ + +/* Register output: simply the name of the register. + For destination, you can use SLJIT_UNUSED as well. */ +#define SLJIT_MEM 0x100 +#define SLJIT_MEM0() (SLJIT_MEM) +#define SLJIT_MEM1(r1) (SLJIT_MEM | (r1)) +#define SLJIT_MEM2(r1, r2) (SLJIT_MEM | (r1) | ((r2) << 4)) +#define SLJIT_IMM 0x200 + +/* Set 32 bit operation mode (I) on 64 bit CPUs. The flag is totally ignored on + 32 bit CPUs. The arithmetic instruction uses only the lower 32 bit of the + input register(s), and set the flags according to the 32 bit result. If the + destination is a register, the higher 32 bit of the result is undefined. + The addressing modes (SLJIT_MEM1/SLJIT_MEM2 macros) are unaffected by this flag. */ +#define SLJIT_INT_OP 0x100 + +/* Common CPU status flags for all architectures (x86, ARM, PPC) + - carry flag + - overflow flag + - zero flag + - negative/positive flag (depends on arc) + On mips, these flags are emulated by software. */ + +/* By default, the instructions may, or may not set the CPU status flags. + Forcing to set or keep status flags can be done with the following flags: */ + +/* Note: sljit tries to emit the minimum number of instructions. Using these + flags can increase them, so use them wisely to avoid unnecessary code generation. */ + +/* Set Equal (Zero) status flag (E). */ +#define SLJIT_SET_E 0x0200 +/* Set signed status flag (S). */ +#define SLJIT_SET_S 0x0400 +/* Set unsgined status flag (U). */ +#define SLJIT_SET_U 0x0800 +/* Set signed overflow flag (O). */ +#define SLJIT_SET_O 0x1000 +/* Set carry flag (C). + Note: Kinda unsigned overflow, but behaves differently on various cpus. */ +#define SLJIT_SET_C 0x2000 +/* Do not modify the flags (K). + Note: This flag cannot be combined with any other SLJIT_SET_* flag. */ +#define SLJIT_KEEP_FLAGS 0x4000 + +/* Notes: + - you cannot postpone conditional jump instructions except if noted that + the instruction does not set flags (See: SLJIT_KEEP_FLAGS). + - flag combinations: '|' means 'logical or'. */ + +/* Flags: - (never set any flags) + Note: breakpoint instruction is not supported by all architectures (namely ppc) + It falls back to SLJIT_NOP in those cases. */ +#define SLJIT_BREAKPOINT 0 +/* Flags: - (never set any flags) + Note: may or may not cause an extra cycle wait + it can even decrease the runtime in a few cases. */ +#define SLJIT_NOP 1 + +int sljit_emit_op0(struct sljit_compiler *compiler, int op); + +/* Notes for MOV instructions: + U = Mov with update (post form). If source or destination defined as SLJIT_MEM1(r1) + or SLJIT_MEM2(r1, r2), r1 is increased by the sum of r2 and the constant argument + UB = unsigned byte (8 bit) + SB = signed byte (8 bit) + UH = unsgined half (16 bit) + SH = unsgined half (16 bit) */ + +/* Flags: - (never set any flags) */ +#define SLJIT_MOV 2 +/* Flags: - (never set any flags) */ +#define SLJIT_MOV_UB 3 +/* Flags: - (never set any flags) */ +#define SLJIT_MOV_SB 4 +/* Flags: - (never set any flags) */ +#define SLJIT_MOV_UH 5 +/* Flags: - (never set any flags) */ +#define SLJIT_MOV_SH 6 +/* Flags: - (never set any flags) */ +#define SLJIT_MOV_UI 7 +/* Flags: - (never set any flags) */ +#define SLJIT_MOV_SI 8 +/* Flags: - (never set any flags) */ +#define SLJIT_MOVU 9 +/* Flags: - (never set any flags) */ +#define SLJIT_MOVU_UB 10 +/* Flags: - (never set any flags) */ +#define SLJIT_MOVU_SB 11 +/* Flags: - (never set any flags) */ +#define SLJIT_MOVU_UH 12 +/* Flags: - (never set any flags) */ +#define SLJIT_MOVU_SH 13 +/* Flags: - (never set any flags) */ +#define SLJIT_MOVU_UI 14 +/* Flags: - (never set any flags) */ +#define SLJIT_MOVU_SI 15 +/* Flags: I | E | K */ +#define SLJIT_NOT 16 +/* Flags: I | E | O | K */ +#define SLJIT_NEG 17 +/* Count leading zeroes + Flags: I | E | K */ +#define SLJIT_CLZ 18 + +int sljit_emit_op1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw); + +/* Flags: I | E | O | C | K */ +#define SLJIT_ADD 19 +/* Flags: I | C | K */ +#define SLJIT_ADDC 20 +/* Flags: I | E | S | U | O | C | K */ +#define SLJIT_SUB 21 +/* Flags: I | C | K */ +#define SLJIT_SUBC 22 +/* Note: integer mul */ +/* Flags: I | O (see SLJIT_C_MUL_*) | K */ +#define SLJIT_MUL 23 +/* Flags: I | E | K */ +#define SLJIT_AND 24 +/* Flags: I | E | K */ +#define SLJIT_OR 25 +/* Flags: I | E | K */ +#define SLJIT_XOR 26 +/* Flags: I | E | K */ +#define SLJIT_SHL 27 +/* Flags: I | E | K */ +#define SLJIT_LSHR 28 +/* Flags: I | E | K */ +#define SLJIT_ASHR 29 + +int sljit_emit_op2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w); + +int sljit_is_fpu_available(void); + +/* Note: dst is the left and src is the right operand for SLJIT_FCMP. + Note: NaN check is always performed. If SLJIT_C_FLOAT_NAN is set, + the comparison result is unpredictable. + Flags: E | S (see SLJIT_C_FLOAT_*) */ +#define SLJIT_FCMP 30 +/* Flags: - (never set any flags) */ +#define SLJIT_FMOV 31 +/* Flags: - (never set any flags) */ +#define SLJIT_FNEG 32 +/* Flags: - (never set any flags) */ +#define SLJIT_FABS 33 + +int sljit_emit_fop1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw); + +/* Flags: - (never set any flags) */ +#define SLJIT_FADD 34 +/* Flags: - (never set any flags) */ +#define SLJIT_FSUB 35 +/* Flags: - (never set any flags) */ +#define SLJIT_FMUL 36 +/* Flags: - (never set any flags) */ +#define SLJIT_FDIV 37 + +int sljit_emit_fop2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w); + +/* Label and jump instructions. */ + +struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler); + +/* Invert conditional instruction: xor (^) with 0x1 */ +#define SLJIT_C_EQUAL 0 +#define SLJIT_C_ZERO 0 +#define SLJIT_C_NOT_EQUAL 1 +#define SLJIT_C_NOT_ZERO 1 + +#define SLJIT_C_LESS 2 +#define SLJIT_C_GREATER_EQUAL 3 +#define SLJIT_C_GREATER 4 +#define SLJIT_C_LESS_EQUAL 5 +#define SLJIT_C_SIG_LESS 6 +#define SLJIT_C_SIG_GREATER_EQUAL 7 +#define SLJIT_C_SIG_GREATER 8 +#define SLJIT_C_SIG_LESS_EQUAL 9 + +#define SLJIT_C_OVERFLOW 10 +#define SLJIT_C_NOT_OVERFLOW 11 + +#define SLJIT_C_MUL_OVERFLOW 12 +#define SLJIT_C_MUL_NOT_OVERFLOW 13 + +#define SLJIT_C_FLOAT_EQUAL 14 +#define SLJIT_C_FLOAT_NOT_EQUAL 15 +#define SLJIT_C_FLOAT_LESS 16 +#define SLJIT_C_FLOAT_GREATER_EQUAL 17 +#define SLJIT_C_FLOAT_GREATER 18 +#define SLJIT_C_FLOAT_LESS_EQUAL 19 +#define SLJIT_C_FLOAT_NAN 20 +#define SLJIT_C_FLOAT_NOT_NAN 21 + +#define SLJIT_JUMP 22 +#define SLJIT_CALL0 23 +#define SLJIT_CALL1 24 +#define SLJIT_CALL2 25 +#define SLJIT_CALL3 26 + +/* Fast calling method. See sljit_emit_fast_enter / sljit_emit_fast_return. */ +#define SLJIT_FAST_CALL SLJIT_CALL0 + +/* The target can be changed during runtime (see: sljit_set_jump_addr). */ +#define SLJIT_REWRITABLE_JUMP 0x1000 + +/* Emit a jump instruction. The destination is not set, only the type of the jump. + type must be between SLJIT_C_EQUAL and SLJIT_CALL3 + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + Flags: - (never set any flags) for both conditional and unconditional jumps. + Flags: destroy all flags for calls. */ +struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type); + +/* Basic arithmetic comparison. In most architectures it is equal to + an SLJIT_SUB operation (with SLJIT_UNUSED destination) followed by a + sljit_emit_jump. However some architectures (i.e: MIPS) may employ + special optimizations here. It is suggested to use this comparison + form when flags are unimportant. + type must be between SLJIT_C_EQUAL and SLJIT_C_SIG_LESS_EQUAL + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP or SLJIT_INT_OP + Flags: destroy flags. */ +struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, int type, + int src1, sljit_w src1w, + int src2, sljit_w src2w); + +/* Set the destination of the jump to this label. */ +void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label); +/* Only for jumps defined with SLJIT_REWRITABLE_JUMP flag. + Note: use sljit_emit_ijump for fixed jumps. */ +void sljit_set_target(struct sljit_jump *jump, sljit_uw target); + +/* Call function or jump anywhere. Both direct and indirect form + type must be between SLJIT_JUMP and SLJIT_CALL3 + Direct form: set src to SLJIT_IMM() and srcw to the address + Indirect form: any other valid addressing mode + Flags: - (never set any flags) for unconditional jumps. + Flags: destroy all flags for calls. */ +int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw); + +/* If op == SLJIT_MOV: + Set dst to 1 if condition is fulfilled, 0 otherwise + type must be between SLJIT_C_EQUAL and SLJIT_C_FLOAT_NOT_NAN + Flags: - (never set any flags) + If op == SLJIT_OR + Dst is used as src as well, and set its lowest bit to 1 if + the condition is fulfilled. Otherwise it does nothing. + Flags: E | K + Note: sljit_emit_cond_value does nothing, if dst is SLJIT_UNUSED (regardless of op). */ +int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type); + +/* The constant can be changed runtime (see: sljit_set_const) + Flags: - (never set any flags) */ +struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value); + +/* After the code generation the address for label, jump and const instructions + are computed. Since these structures are freed sljit_free_compiler, the + addresses must be preserved by the user program elsewere. */ +static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { return label->addr; } +static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; } +static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; } + +/* Only the address is required to rewrite the code. */ +void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr); +void sljit_set_const(sljit_uw addr, sljit_w new_constant); + +/* --------------------------------------------------------------------- */ +/* Miscellaneous utility functions */ +/* --------------------------------------------------------------------- */ + +#define SLJIT_MAJOR_VERSION 0 +#define SLJIT_MINOR_VERSION 82 + +/* Get the human readable name of the platfrom. + Can be useful for debugging on platforms like ARM, where ARM and + Thumb2 functions can be mixed. */ +SLJIT_CONST char* sljit_get_platform_name(void); + +/* Portble helper function to get an offset of a member. */ +#define SLJIT_OFFSETOF(base, member) ((sljit_w)(&((base*)0x10)->member) - 0x10) + +#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) +/* This global lock is useful to compile common functions. */ +void SLJIT_CALL sljit_grab_lock(void); +void SLJIT_CALL sljit_release_lock(void); +#endif + +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) + +/* The sljit_stack is a utiliy feature of sljit, which allocates a + writable memory region between base (inclusive) and limit (exclusive). + Both base and limit is a pointer, and base is always <= than limit. + This feature uses the "address space reserve" feature + of modern operating systems. Basically we don't need to allocate a + huge memory block in one step for the worst case, we can start with + a smaller chunk and extend it later. Since the address space is + reserved, the data never copied to other regions, thus it is safe + to store pointers here. */ + +/* Note: The base field is aligned to PAGE_SIZE bytes (usually 4k or more). + Note: stack growing should not happen in small steps: 4k, 16k or even + bigger growth is better. + Note: this structure may not be supported by all operating systems. + Some kind of fallback mechanism is suggested when SLJIT_UTIL_STACK + is not defined. */ + +struct sljit_stack { + /* User data, anything can be stored here. + Starting with the same value as base. */ + sljit_uw top; + /* These members are read only. */ + sljit_uw base; + sljit_uw limit; + sljit_uw max_limit; +}; + +/* Returns NULL if unsuccessful. + Note: limit and max_limit contains the size for stack allocation + Note: the top field is initialized to base. */ +struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_w limit, sljit_w max_limit); +void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack); + +/* Can be used to increase (allocate) or decrease (free) the memory area. + Returns with a non-zero value if unsuccessful. If new_limit is greater than + max_limit, it will fail. It is very easy to implement a stack data structure, + since the growth ratio can be added to the current limit, and sljit_stack_resize + will do all the necessary checks. The fields of the stack are not changed if + sljit_stack_resize fails. */ +sljit_w SLJIT_CALL sljit_stack_resize(struct sljit_stack* stack, sljit_w new_limit); + +#endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */ + +#if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + +/* Get the entry address of a given function. */ +#define SLJIT_FUNC_OFFSET(func_name) ((sljit_w)func_name) + +#else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ + +/* All JIT related code should be placed in the same context (library, binary, etc.). */ + +#define SLJIT_FUNC_OFFSET(func_name) ((sljit_w)*(void**)func_name) + +/* For powerpc64, the function pointers point to a context descriptor. */ +struct sljit_function_context { + sljit_w addr; + sljit_w r2; + sljit_w r11; +}; + +/* Fill the context arguments using the addr and the function. + If func_ptr is NULL, it will not be set to the address of context + If addr is NULL, the function address also comes from the func pointer. */ +void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_w addr, void* func); + +#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ + +#endif /* _SLJIT_LIR_H_ */ diff --git a/sljit/sljitNativeARM_Thumb2.c b/sljit/sljitNativeARM_Thumb2.c new file mode 100644 index 0000000..de68d7b --- /dev/null +++ b/sljit/sljitNativeARM_Thumb2.c @@ -0,0 +1,1841 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_CONST char* sljit_get_platform_name() +{ + return "arm-thumb2"; +} + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) +#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) +#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) +#define TMP_PC (SLJIT_NO_REGISTERS + 4) + +#define TMP_FREG1 (SLJIT_FLOAT_REG4 + 1) +#define TMP_FREG2 (SLJIT_FLOAT_REG4 + 2) + +/* See sljit_emit_enter if you want to change them. */ +static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { + 0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15 +}; + +#define COPY_BITS(src, from, to, bits) \ + ((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to)) + +/* Thumb16 encodings. */ +#define RD3(rd) (reg_map[rd]) +#define RN3(rn) (reg_map[rn] << 3) +#define RM3(rm) (reg_map[rm] << 6) +#define RDN3(rdn) (reg_map[rdn] << 8) +#define IMM3(imm) (imm << 6) +#define IMM8(imm) (imm) + +/* Thumb16 helpers. */ +#define SET_REGS44(rd, rn) \ + ((reg_map[rn] << 3) | (reg_map[rd] & 0x7) | ((reg_map[rd] & 0x8) << 4)) +#define IS_2_LO_REGS(reg1, reg2) \ + (reg_map[reg1] <= 7 && reg_map[reg2] <= 7) +#define IS_3_LO_REGS(reg1, reg2, reg3) \ + (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7) + +/* Thumb32 encodings. */ +#define RD4(rd) (reg_map[rd] << 8) +#define RN4(rn) (reg_map[rn] << 16) +#define RM4(rm) (reg_map[rm]) +#define RT4(rt) (reg_map[rt] << 12) +#define DD4(dd) ((dd) << 12) +#define DN4(dn) ((dn) << 16) +#define DM4(dm) (dm) +#define IMM5(imm) \ + (COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6)) +#define IMM12(imm) \ + (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)) + +typedef sljit_ui sljit_ins; + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +/* dot '.' changed to _ + I immediate form (possibly followed by number of immediate bits). */ +#define ADCI 0xf1400000 +#define ADCS 0x4140 +#define ADC_W 0xeb400000 +#define ADD 0x4400 +#define ADDS 0x1800 +#define ADDSI3 0x1c00 +#define ADDSI8 0x3000 +#define ADD_W 0xeb000000 +#define ADDWI 0xf2000000 +#define ADD_SP 0xb000 +#define ADD_W 0xeb000000 +#define ADD_WI 0xf1000000 +#define ANDI 0xf0000000 +#define ANDS 0x4000 +#define AND_W 0xea000000 +#define ASRS 0x4100 +#define ASRSI 0x1000 +#define ASR_W 0xfa40f000 +#define ASR_WI 0xea4f0020 +#define BICI 0xf0200000 +#define BKPT 0xbe00 +#define BLX 0x4780 +#define BX 0x4700 +#define CLZ 0xfab0f080 +#define CMPI 0x2800 +#define CMP_W 0xebb00f00 +#define EORI 0xf0800000 +#define EORS 0x4040 +#define EOR_W 0xea800000 +#define IT 0xbf00 +#define LSLS 0x4080 +#define LSLSI 0x0000 +#define LSL_W 0xfa00f000 +#define LSL_WI 0xea4f0000 +#define LSRS 0x40c0 +#define LSRSI 0x0800 +#define LSR_W 0xfa20f000 +#define LSR_WI 0xea4f0010 +#define MOV 0x4600 +#define MOVSI 0x2000 +#define MOVT 0xf2c00000 +#define MOVW 0xf2400000 +#define MOV_WI 0xf04f0000 +#define MUL 0xfb00f000 +#define MVNS 0x43c0 +#define MVN_W 0xea6f0000 +#define MVN_WI 0xf06f0000 +#define NOP 0xbf00 +#define ORNI 0xf0600000 +#define ORRI 0xf0400000 +#define ORRS 0x4300 +#define ORR_W 0xea400000 +#define POP 0xbd00 +#define POP_W 0xe8bd0000 +#define PUSH 0xb500 +#define PUSH_W 0xe92d0000 +#define RSB_WI 0xf1c00000 +#define RSBSI 0x4240 +#define SBCI 0xf1600000 +#define SBCS 0x4180 +#define SBC_W 0xeb600000 +#define SMULL 0xfb800000 +#define STR_SP 0x9000 +#define SUBS 0x1a00 +#define SUBSI3 0x1e00 +#define SUBSI8 0x3800 +#define SUB_W 0xeba00000 +#define SUBWI 0xf2a00000 +#define SUB_SP 0xb080 +#define SUB_WI 0xf1a00000 +#define SXTB 0xb240 +#define SXTB_W 0xfa4ff080 +#define SXTH 0xb200 +#define SXTH_W 0xfa0ff080 +#define TST 0x4200 +#define UXTB 0xb2c0 +#define UXTB_W 0xfa5ff080 +#define UXTH 0xb280 +#define UXTH_W 0xfa1ff080 +#define VABS_F64 0xeeb00bc0 +#define VADD_F64 0xee300b00 +#define VCMP_F64 0xeeb40b40 +#define VDIV_F64 0xee800b00 +#define VMOV_F64 0xeeb00b40 +#define VMRS 0xeef1fa10 +#define VMUL_F64 0xee200b00 +#define VNEG_F64 0xeeb10b40 +#define VSTR 0xed000b00 +#define VSUB_F64 0xee300b40 + +static int push_inst16(struct sljit_compiler *compiler, sljit_ins inst) +{ + sljit_uh *ptr; + SLJIT_ASSERT(!(inst & 0xffff0000)); + + ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_uh)); + FAIL_IF(!ptr); + *ptr = inst; + compiler->size++; + return SLJIT_SUCCESS; +} + +static int push_inst32(struct sljit_compiler *compiler, sljit_ins inst) +{ + sljit_uh *ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr++ = inst >> 16; + *ptr = inst; + compiler->size += 2; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE int emit_imm32_const(struct sljit_compiler *compiler, int dst, sljit_uw imm) +{ + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) | + COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); + return push_inst32(compiler, MOVT | RD4(dst) | + COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); +} + +static SLJIT_INLINE void modify_imm32_const(sljit_uh* inst, sljit_uw new_imm) +{ + int dst = inst[1] & 0x0f00; + SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00)); + inst[0] = (MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1); + inst[1] = dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff); + inst[2] = (MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1); + inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16); +} + +static SLJIT_INLINE int detect_jump_type(struct sljit_jump *jump, sljit_uh *code_ptr, sljit_uh *code) +{ + sljit_w diff; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return 0; + + if (jump->flags & JUMP_ADDR) { + /* Branch to ARM code is not optimized yet. */ + if (!(jump->u.target & 0x1)) + return 0; + diff = ((sljit_w)jump->u.target - (sljit_w)(code_ptr + 2)) >> 1; + } + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + diff = ((sljit_w)(code + jump->u.label->size) - (sljit_w)(code_ptr + 2)) >> 1; + } + + if (jump->flags & IS_CONDITIONAL) { + SLJIT_ASSERT(!(jump->flags & IS_BL)); + if (diff <= 127 && diff >= -128) { + jump->flags |= B_TYPE1; + return 5; + } + if (diff <= 524287 && diff >= -524288) { + jump->flags |= B_TYPE2; + return 4; + } + /* +1 comes from the prefix IT instruction. */ + diff--; + if (diff <= 8388607 && diff >= -8388608) { + jump->flags |= B_TYPE3; + return 3; + } + } + else if (jump->flags & IS_BL) { + if (diff <= 8388607 && diff >= -8388608) { + jump->flags |= BL_TYPE6; + return 3; + } + } + else { + if (diff <= 1023 && diff >= -1024) { + jump->flags |= B_TYPE4; + return 4; + } + if (diff <= 8388607 && diff >= -8388608) { + jump->flags |= B_TYPE5; + return 3; + } + } + + return 0; +} + +static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, int flush) +{ + sljit_uh* inst = (sljit_uh*)addr; + modify_imm32_const(inst, new_addr); + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 3); + } +} + +static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump) +{ + int type = (jump->flags >> 4) & 0xf; + sljit_w diff; + sljit_uh *jump_inst; + int s, j1, j2; + + if (SLJIT_UNLIKELY(type == 0)) { + inline_set_jump_addr(jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); + return; + } + + if (jump->flags & JUMP_ADDR) { + SLJIT_ASSERT(jump->u.target & 0x1); + diff = ((sljit_w)jump->u.target - (sljit_w)(jump->addr + 4)) >> 1; + } + else + diff = ((sljit_w)(jump->u.label->addr) - (sljit_w)(jump->addr + 4)) >> 1; + jump_inst = (sljit_uh*)jump->addr; + + switch (type) { + case 1: + /* Encoding T1 of 'B' instruction */ + SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_CONDITIONAL)); + jump_inst[0] = 0xd000 | (jump->flags & 0xf00) | (diff & 0xff); + return; + case 2: + /* Encoding T3 of 'B' instruction */ + SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_CONDITIONAL)); + jump_inst[0] = 0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1); + jump_inst[1] = 0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | (diff & 0x7ff); + return; + case 3: + SLJIT_ASSERT(jump->flags & IS_CONDITIONAL); + *jump_inst++ = IT | ((jump->flags >> 4) & 0xf0) | 0x8; + diff--; + type = 5; + break; + case 4: + /* Encoding T2 of 'B' instruction */ + SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_CONDITIONAL)); + jump_inst[0] = 0xe000 | (diff & 0x7ff); + return; + } + + SLJIT_ASSERT(diff <= 8388607 && diff >= -8388608); + + /* Really complex instruction form for branches. */ + s = (diff >> 23) & 0x1; + j1 = (~(diff >> 21) ^ s) & 0x1; + j2 = (~(diff >> 22) ^ s) & 0x1; + jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10); + jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff); + + /* The others have a common form. */ + if (type == 5) /* Encoding T4 of 'B' instruction */ + jump_inst[1] |= 0x9000; + else if (type == 6) /* Encoding T1 of 'BL' instruction */ + jump_inst[1] |= 0xd000; + else + SLJIT_ASSERT_STOP(); +} + +void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_uh *code; + sljit_uh *code_ptr; + sljit_uh *buf_ptr; + sljit_uh *buf_end; + sljit_uw half_count; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + check_sljit_generate_code(compiler); + reverse_buf(compiler); + + code = (sljit_uh*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_uh)); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + half_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + + do { + buf_ptr = (sljit_uh*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 1); + do { + *code_ptr = *buf_ptr++; + /* These structures are ordered by their address. */ + SLJIT_ASSERT(!label || label->size >= half_count); + SLJIT_ASSERT(!jump || jump->addr >= half_count); + SLJIT_ASSERT(!const_ || const_->addr >= half_count); + if (label && label->size == half_count) { + label->addr = ((sljit_uw)code_ptr) | 0x1; + label->size = code_ptr - code; + label = label->next; + } + if (jump && jump->addr == half_count) { + jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_CONDITIONAL) ? 10 : 8); + code_ptr -= detect_jump_type(jump, code_ptr, code); + jump = jump->next; + } + if (const_ && const_->addr == half_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + code_ptr ++; + half_count ++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == half_count) { + label->addr = ((sljit_uw)code_ptr) | 0x1; + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code_ptr - code <= (int)compiler->size); + + jump = compiler->jumps; + while (jump) { + set_jump_instruction(jump); + jump = jump->next; + } + + SLJIT_CACHE_FLUSH(code, code_ptr); + compiler->error = SLJIT_ERR_COMPILED; + /* Set thumb mode flag. */ + return (void*)((sljit_uw)code | 0x1); +} + +#define INVALID_IMM 0x80000000 +static sljit_uw get_imm(sljit_uw imm) +{ + /* Thumb immediate form. */ + int counter; + + if (imm <= 0xff) + return imm; + + if ((imm & 0xffff) == (imm >> 16)) { + /* Some special cases. */ + if (!(imm & 0xff00)) + return (1 << 12) | (imm & 0xff); + if (!(imm & 0xff)) + return (2 << 12) | ((imm >> 8) & 0xff); + if ((imm & 0xff00) == ((imm & 0xff) << 8)) + return (3 << 12) | (imm & 0xff); + } + + /* Assembly optimization: count leading zeroes? */ + counter = 8; + if (!(imm & 0xffff0000)) { + counter += 16; + imm <<= 16; + } + if (!(imm & 0xff000000)) { + counter += 8; + imm <<= 8; + } + if (!(imm & 0xf0000000)) { + counter += 4; + imm <<= 4; + } + if (!(imm & 0xc0000000)) { + counter += 2; + imm <<= 2; + } + if (!(imm & 0x80000000)) { + counter += 1; + imm <<= 1; + } + /* Since imm >= 128, this must be true. */ + SLJIT_ASSERT(counter <= 31); + + if (imm & 0x00ffffff) + return INVALID_IMM; /* Cannot be encoded. */ + + return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1); +} + +static int load_immediate(struct sljit_compiler *compiler, int dst, sljit_uw imm) +{ + sljit_uw tmp; + + if (imm >= 0x10000) { + tmp = get_imm(imm); + if (tmp != INVALID_IMM) + return push_inst32(compiler, MOV_WI | RD4(dst) | tmp); + tmp = get_imm(~imm); + if (tmp != INVALID_IMM) + return push_inst32(compiler, MVN_WI | RD4(dst) | tmp); + } + + /* set low 16 bits, set hi 16 bits to 0. */ + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) | + COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); + + /* set hi 16 bit if needed. */ + if (imm >= 0x10000) + return push_inst32(compiler, MOVT | RD4(dst) | + COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); + return SLJIT_SUCCESS; +} + +#define ARG1_IMM 0x0010000 +#define ARG2_IMM 0x0020000 +#define KEEP_FLAGS 0x0040000 +#define SET_MULOV 0x0080000 +/* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */ +#define SET_FLAGS 0x0100000 +#define UNUSED_RETURN 0x0200000 +#define SLOW_DEST 0x0400000 +#define SLOW_SRC1 0x0800000 +#define SLOW_SRC2 0x1000000 + +static int emit_op_imm(struct sljit_compiler *compiler, int flags, int dst, sljit_uw arg1, sljit_uw arg2) +{ + /* dst must be register, TMP_REG1 + arg1 must be register, TMP_REG1, imm + arg2 must be register, TMP_REG2, imm */ + int reg; + sljit_uw imm; + + if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { + /* Both are immediates. */ + flags &= ~ARG1_IMM; + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + + if (flags & (ARG1_IMM | ARG2_IMM)) { + reg = (flags & ARG2_IMM) ? arg1 : arg2; + imm = (flags & ARG2_IMM) ? arg2 : arg1; + + switch (flags & 0xffff) { + case SLJIT_MOV: + SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1); + return load_immediate(compiler, dst, imm); + case SLJIT_NOT: + if (!(flags & SET_FLAGS)) + return load_immediate(compiler, dst, ~imm); + /* Since the flags should be set, we just fallback to the register mode. + Although I could do some clever things here, "NOT IMM" does not worth the efforts. */ + break; + case SLJIT_CLZ: + /* No form with immediate operand. */ + break; + case SLJIT_ADD: + if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) { + if (imm <= 0x7) + return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); + if (reg == dst && imm <= 0xff) + return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst)); + } + if (imm <= 0xfff && !(flags & SET_FLAGS)) + return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm)); + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_ADDC: + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_SUB: + if (flags & ARG2_IMM) { + if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) { + if (imm <= 0x7) + return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); + if (imm <= 0xff) { + if (reg == dst) + return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst)); + if (flags & UNUSED_RETURN) + return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg)); + } + } + if (imm <= 0xfff && !(flags & SET_FLAGS)) + return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm)); + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + } + else { + if (!(flags & KEEP_FLAGS) && imm == 0 && IS_2_LO_REGS(reg, dst)) + return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg)); + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + } + break; + case SLJIT_SUBC: + if (flags & ARG2_IMM) { + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + } + break; + case SLJIT_MUL: + /* No form with immediate operand. */ + break; + case SLJIT_AND: + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, ANDI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + imm = get_imm(~((flags & ARG2_IMM) ? arg2 : arg1)); + if (imm != INVALID_IMM) + return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_OR: + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + imm = get_imm(~((flags & ARG2_IMM) ? arg2 : arg1)); + if (imm != INVALID_IMM) + return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_XOR: + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_SHL: + if (flags & ARG2_IMM) { + imm &= 0x1f; + if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + } + break; + case SLJIT_LSHR: + if (flags & ARG2_IMM) { + imm &= 0x1f; + if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + } + break; + case SLJIT_ASHR: + if (flags & ARG2_IMM) { + imm &= 0x1f; + if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + } + break; + default: + SLJIT_ASSERT_STOP(); + break; + } + + if (flags & ARG2_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG2, arg2)); + arg2 = TMP_REG2; + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + } + + /* Both arguments are registers. */ + switch (flags & 0xffff) { + case SLJIT_MOV: + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + case SLJIT_MOVU: + case SLJIT_MOVU_UI: + case SLJIT_MOVU_SI: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + return push_inst16(compiler, MOV | SET_REGS44(dst, arg2)); + case SLJIT_MOV_UB: + case SLJIT_MOVU_UB: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_SB: + case SLJIT_MOVU_SB: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_UH: + case SLJIT_MOVU_UH: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_SH: + case SLJIT_MOVU_SH: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2)); + case SLJIT_NOT: + SLJIT_ASSERT(arg1 == TMP_REG1); + if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, MVNS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(arg2)); + case SLJIT_CLZ: + SLJIT_ASSERT(arg1 == TMP_REG1); + FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2))); + if (flags & SET_FLAGS) { + if (reg_map[dst] <= 7) + return push_inst16(compiler, CMPI | RDN3(dst)); + return push_inst32(compiler, ADD_WI | SET_FLAGS | RN4(dst) | RD4(dst)); + } + return SLJIT_SUCCESS; + case SLJIT_ADD: + if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2)) + return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2)); + if (dst == arg1 && !(flags & SET_FLAGS)) + return push_inst16(compiler, ADD | SET_REGS44(dst, arg2)); + return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_ADDC: + if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_SUB: + if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2)) + return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2)); + return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_SUBC: + if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_MUL: + if (!(flags & SET_FLAGS)) + return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2)); + SLJIT_ASSERT(reg_map[TMP_REG2] <= 7 && dst != TMP_REG2); + FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2))); + /* cmp TMP_REG2, dst asr #31. */ + return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst)); + case SLJIT_AND: + if (!(flags & KEEP_FLAGS)) { + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2)); + if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2)) + return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2)); + } + return push_inst32(compiler, AND_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_OR: + if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_XOR: + if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_SHL: + if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_LSHR: + if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_ASHR: + if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + } + + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; +} + +#define STORE 0x01 +#define SIGNED 0x02 + +#define WORD_SIZE 0x00 +#define BYTE_SIZE 0x04 +#define HALF_SIZE 0x08 + +#define UPDATE 0x10 +#define ARG_TEST 0x20 + +#define IS_WORD_SIZE(flags) (!(flags & (BYTE_SIZE | HALF_SIZE))) +#define OFFSET_CHECK(imm, shift) (!(argw & ~(imm << shift))) + +/* + 1st letter: + w = word + b = byte + h = half + + 2nd letter: + s = signed + u = unsigned + + 3rd letter: + l = load + s = store +*/ + +static SLJIT_CONST sljit_uw sljit_mem16[12] = { +/* w u l */ 0x5800 /* ldr */, +/* w u s */ 0x5000 /* str */, +/* w s l */ 0x5800 /* ldr */, +/* w s s */ 0x5000 /* str */, + +/* b u l */ 0x5c00 /* ldrb */, +/* b u s */ 0x5400 /* strb */, +/* b s l */ 0x5600 /* ldrsb */, +/* b s s */ 0x5400 /* strb */, + +/* h u l */ 0x5a00 /* ldrh */, +/* h u s */ 0x5200 /* strh */, +/* h s l */ 0x5e00 /* ldrsh */, +/* h s s */ 0x5200 /* strh */, +}; + +static SLJIT_CONST sljit_uw sljit_mem16_imm5[12] = { +/* w u l */ 0x6800 /* ldr imm5 */, +/* w u s */ 0x6000 /* str imm5 */, +/* w s l */ 0x6800 /* ldr imm5 */, +/* w s s */ 0x6000 /* str imm5 */, + +/* b u l */ 0x7800 /* ldrb imm5 */, +/* b u s */ 0x7000 /* strb imm5 */, +/* b s l */ 0x0000 /* not allowed */, +/* b s s */ 0x7000 /* strb imm5 */, + +/* h u l */ 0x8800 /* ldrh imm5 */, +/* h u s */ 0x8000 /* strh imm5 */, +/* h s l */ 0x0000 /* not allowed */, +/* h s s */ 0x8000 /* strh imm5 */, +}; + +#define MEM_IMM8 0xc00 +#define MEM_IMM12 0x800000 +static SLJIT_CONST sljit_uw sljit_mem32[12] = { +/* w u l */ 0xf8500000 /* ldr.w */, +/* w u s */ 0xf8400000 /* str.w */, +/* w s l */ 0xf8500000 /* ldr.w */, +/* w s s */ 0xf8400000 /* str.w */, + +/* b u l */ 0xf8100000 /* ldrb.w */, +/* b u s */ 0xf8000000 /* strb.w */, +/* b s l */ 0xf9100000 /* ldrsb.w */, +/* b s s */ 0xf8000000 /* strb.w */, + +/* h u l */ 0xf8300000 /* ldrh.w */, +/* h u s */ 0xf8200000 /* strsh.w */, +/* h s l */ 0xf9300000 /* ldrsh.w */, +/* h s s */ 0xf8200000 /* strsh.w */, +}; + +/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ +static int emit_set_delta(struct sljit_compiler *compiler, int dst, int reg, sljit_w value) +{ + if (value >= 0) { + if (value <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value)); + value = get_imm(value); + if (value != INVALID_IMM) + return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | value); + } + else { + value = -value; + if (value <= 0xfff) + return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value)); + value = get_imm(value); + if (value != INVALID_IMM) + return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | value); + } + return SLJIT_ERR_UNSUPPORTED; +} + +/* Can perform an operation using at most 1 instruction. */ +static int getput_arg_fast(struct sljit_compiler *compiler, int flags, int reg, int arg, sljit_w argw) +{ + int tmp; + + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (SLJIT_UNLIKELY(flags & UPDATE)) { + if ((arg & 0xf) && !(arg & 0xf0) && argw <= 0xff && argw >= -0xff) { + flags &= ~UPDATE; + arg &= 0xf; + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + if (argw >= 0) + argw |= 0x200; + else { + argw = -argw; + } + SLJIT_ASSERT(argw >= 0 && (argw & 0xff) <= 0xff); + FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw)); + return -1; + } + return (flags & ARG_TEST) ? SLJIT_SUCCESS : 0; + } + + if (SLJIT_UNLIKELY(arg & 0xf0)) { + argw &= 0x3; + tmp = (arg >> 4) & 0xf; + arg &= 0xf; + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + if (!argw && IS_3_LO_REGS(reg, arg, tmp)) + FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp))); + else + FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp) | (argw << 4))); + return -1; + } + + if (!(arg & 0xf) || argw > 0xfff || argw < -0xff) + return (flags & ARG_TEST) ? SLJIT_SUCCESS : 0; + + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + arg &= 0xf; + if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) { + tmp = 3; + if (IS_WORD_SIZE(flags)) { + if (OFFSET_CHECK(0x1f, 2)) + tmp = 2; + } + else if (flags & BYTE_SIZE) + { + if (OFFSET_CHECK(0x1f, 0)) + tmp = 0; + } + else { + SLJIT_ASSERT(flags & HALF_SIZE); + if (OFFSET_CHECK(0x1f, 1)) + tmp = 1; + } + + if (tmp != 3) { + FAIL_IF(push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - tmp)))); + return -1; + } + } + + /* SP based immediate. */ + if (SLJIT_UNLIKELY(arg == SLJIT_LOCALS_REG) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) { + FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2))); + return -1; + } + + if (argw >= 0) + FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw)); + else + FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw)); + return -1; +} + +/* see getput_arg below. + Note: can_cache is called only for binary operators. Those + operators always uses word arguments without write back. */ +static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw) +{ + /* Simple operation except for updates. */ + if ((arg & 0xf0) || !(next_arg & SLJIT_MEM)) + return 0; + + if (!(arg & 0xf)) { + if ((sljit_uw)(argw - next_argw) <= 0xfff || (sljit_uw)(next_argw - argw) <= 0xfff) + return 1; + return 0; + } + + if (argw == next_argw) + return 1; + + if (arg == next_arg && ((sljit_uw)(argw - next_argw) <= 0xfff || (sljit_uw)(next_argw - argw) <= 0xfff)) + return 1; + + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static int getput_arg(struct sljit_compiler *compiler, int flags, int reg, int arg, sljit_w argw, int next_arg, sljit_w next_argw) +{ + int tmp_r; + sljit_w tmp; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + tmp_r = (flags & STORE) ? TMP_REG3 : reg; + + if (SLJIT_UNLIKELY(flags & UPDATE)) { + flags &= ~UPDATE; + /* Update only applies if a base register exists. */ + if (arg & 0xf) { + /* There is no caching here. */ + tmp = (arg & 0xf0) >> 4; + arg &= 0xf; + + if (!tmp) { + if (!(argw & ~0xfff)) { + FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw)); + return push_inst32(compiler, ADDWI | RD4(arg) | RN4(arg) | IMM12(argw)); + } + + if (compiler->cache_arg == SLJIT_MEM) { + if (argw == compiler->cache_argw) { + tmp = TMP_REG3; + argw = 0; + } + else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + compiler->cache_argw = argw; + tmp = TMP_REG3; + argw = 0; + } + } + + if (argw) { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + compiler->cache_arg = SLJIT_MEM; + compiler->cache_argw = argw; + tmp = TMP_REG3; + argw = 0; + } + } + + argw &= 0x3; + if (!argw && IS_3_LO_REGS(reg, arg, tmp)) { + FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp))); + return push_inst16(compiler, ADD | SET_REGS44(arg, tmp)); + } + FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp) | (argw << 4))); + return push_inst32(compiler, ADD_W | RD4(arg) | RN4(arg) | RM4(tmp) | (argw << 6)); + } + } + + SLJIT_ASSERT(!(arg & 0xf0)); + + if (compiler->cache_arg == arg) { + if (!((argw - compiler->cache_argw) & ~0xfff)) + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | (argw - compiler->cache_argw)); + if (!((compiler->cache_argw - argw) & ~0xff)) + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(TMP_REG3) | (compiler->cache_argw - argw)); + if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0); + } + } + + next_arg = (arg & 0xf) && (arg == next_arg); + arg &= 0xf; + if (arg && compiler->cache_arg == SLJIT_MEM && compiler->cache_argw == argw) + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3)); + + compiler->cache_argw = argw; + if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + compiler->cache_arg = SLJIT_MEM | arg; + arg = 0; + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + compiler->cache_arg = SLJIT_MEM; + + if (next_arg) { + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, arg))); + compiler->cache_arg = SLJIT_MEM | arg; + arg = 0; + } + } + + if (arg) + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3)); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0); +} + +static SLJIT_INLINE int emit_op_mem(struct sljit_compiler *compiler, int flags, int reg, int arg, sljit_w argw) +{ + if (getput_arg_fast(compiler, flags, reg, arg, argw)) + return compiler->error; + compiler->cache_arg = 0; + compiler->cache_argw = 0; + return getput_arg(compiler, flags, reg, arg, argw, 0, 0); +} + +int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + int size; + sljit_ins push; + + CHECK_ERROR(); + check_sljit_emit_enter(compiler, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + + push = (1 << 4); + if (generals >= 5) + push |= 1 << 11; + if (generals >= 4) + push |= 1 << 10; + if (generals >= 3) + push |= 1 << 8; + if (generals >= 2) + push |= 1 << 7; + if (generals >= 1) + push |= 1 << 6; + if (temporaries >= 5) + push |= 1 << 5; + FAIL_IF(generals >= 3 + ? push_inst32(compiler, PUSH_W | (1 << 14) | push) + : push_inst16(compiler, PUSH | push)); + + /* Stack must be aligned to 8 bytes: */ + size = (3 + generals) * sizeof(sljit_uw); + local_size += size; + local_size = (local_size + 7) & ~7; + local_size -= size; + compiler->local_size = local_size; + if (local_size > 0) { + if (local_size <= (127 << 2)) + FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2))); + else + FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, local_size)); + } + + if (args >= 1) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_GENERAL_REG1, SLJIT_TEMPORARY_REG1))); + if (args >= 2) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_GENERAL_REG2, SLJIT_TEMPORARY_REG2))); + if (args >= 3) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_GENERAL_REG3, SLJIT_TEMPORARY_REG3))); + + return SLJIT_SUCCESS; +} + +void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + int size; + + CHECK_ERROR_VOID(); + check_sljit_fake_enter(compiler, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + + size = (3 + generals) * sizeof(sljit_uw); + local_size += size; + local_size = (local_size + 7) & ~7; + local_size -= size; + compiler->local_size = local_size; +} + +int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + sljit_ins pop; + + CHECK_ERROR(); + check_sljit_emit_return(compiler, src, srcw); + + if (src != SLJIT_UNUSED && src != SLJIT_RETURN_REG) { + if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REG3) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_RETURN_REG, src))); + else + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, SLJIT_RETURN_REG, src, srcw)); + } + + if (compiler->local_size > 0) { + if (compiler->local_size <= (127 << 2)) + FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2))); + else + FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, compiler->local_size)); + } + + pop = (1 << 4); + if (compiler->generals >= 5) + pop |= 1 << 11; + if (compiler->generals >= 4) + pop |= 1 << 10; + if (compiler->generals >= 3) + pop |= 1 << 8; + if (compiler->generals >= 2) + pop |= 1 << 7; + if (compiler->generals >= 1) + pop |= 1 << 6; + if (compiler->temporaries >= 5) + pop |= 1 << 5; + return compiler->generals >= 3 + ? push_inst32(compiler, POP_W | (1 << 15) | pop) + : push_inst16(compiler, POP | pop); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +int sljit_emit_op0(struct sljit_compiler *compiler, int op) +{ + CHECK_ERROR(); + check_sljit_emit_op0(compiler, op); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + push_inst16(compiler, BKPT); + break; + case SLJIT_NOP: + push_inst16(compiler, NOP); + break; + } + + return SLJIT_SUCCESS; +} + +int sljit_emit_op1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + int op_type, dst_r, flags; + + CHECK_ERROR(); + check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + op_type = GET_OPCODE(op); + dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG1; + + if (op_type >= SLJIT_MOV && op_type <= SLJIT_MOVU_SI) { + switch (op_type) { + case SLJIT_MOV: + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + flags = WORD_SIZE; + break; + case SLJIT_MOV_UB: + flags = BYTE_SIZE; + if (src & SLJIT_IMM) + srcw = (unsigned char)srcw; + break; + case SLJIT_MOV_SB: + flags = BYTE_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (signed char)srcw; + break; + case SLJIT_MOV_UH: + flags = HALF_SIZE; + if (src & SLJIT_IMM) + srcw = (unsigned short)srcw; + break; + case SLJIT_MOV_SH: + flags = HALF_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (signed short)srcw; + break; + case SLJIT_MOVU: + case SLJIT_MOVU_UI: + case SLJIT_MOVU_SI: + flags = WORD_SIZE | UPDATE; + break; + case SLJIT_MOVU_UB: + flags = BYTE_SIZE | UPDATE; + if (src & SLJIT_IMM) + srcw = (unsigned char)srcw; + break; + case SLJIT_MOVU_SB: + flags = BYTE_SIZE | SIGNED | UPDATE; + if (src & SLJIT_IMM) + srcw = (signed char)srcw; + break; + case SLJIT_MOVU_UH: + flags = HALF_SIZE | UPDATE; + if (src & SLJIT_IMM) + srcw = (unsigned short)srcw; + break; + case SLJIT_MOVU_SH: + flags = HALF_SIZE | SIGNED | UPDATE; + if (src & SLJIT_IMM) + srcw = (signed short)srcw; + break; + default: + SLJIT_ASSERT_STOP(); + flags = 0; + break; + } + + if (src & SLJIT_IMM) + FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw)); + else if (src & SLJIT_MEM) { + if (getput_arg_fast(compiler, flags, dst_r, src, srcw)) + FAIL_IF(compiler->error); + else + FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw)); + } else { + if (dst_r != TMP_REG1) + return emit_op_imm(compiler, op_type, dst_r, TMP_REG1, src); + dst_r = src; + } + + if (dst & SLJIT_MEM) { + if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw)) + return compiler->error; + else + return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0); + } + return SLJIT_SUCCESS; + } + + if (op_type == SLJIT_NEG) { +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, GET_FLAGS(op) | SLJIT_SUB, dst, dstw, SLJIT_IMM, 0, src, srcw); + } + + flags = (GET_FLAGS(op) ? SET_FLAGS : 0) | ((op & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0); + if (src & SLJIT_MEM) { + if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src, srcw)) + FAIL_IF(compiler->error); + else + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw)); + src = TMP_REG2; + } + + if (src & SLJIT_IMM) + flags |= ARG2_IMM; + else + srcw = src; + + emit_op_imm(compiler, flags | op_type, dst_r, TMP_REG1, srcw); + + if (dst & SLJIT_MEM) { + if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw)) + return compiler->error; + else + return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0); + } + return SLJIT_SUCCESS; +} + +int sljit_emit_op2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + int dst_r, flags; + + CHECK_ERROR(); + check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG1; + flags = (GET_FLAGS(op) ? SET_FLAGS : 0) | ((op & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0); + + if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, WORD_SIZE | STORE | ARG_TEST, TMP_REG1, dst, dstw)) + flags |= SLOW_DEST; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG1, src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + } + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw)); + + if (src1 & SLJIT_MEM) + src1 = TMP_REG1; + if (src2 & SLJIT_MEM) + src2 = TMP_REG2; + + if (src1 & SLJIT_IMM) + flags |= ARG1_IMM; + else + src1w = src1; + if (src2 & SLJIT_IMM) + flags |= ARG2_IMM; + else + src2w = src2; + + if (dst == SLJIT_UNUSED) + flags |= UNUSED_RETURN; + + if (GET_OPCODE(op) == SLJIT_MUL && (op & SLJIT_SET_O)) + flags |= SET_MULOV; + + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, WORD_SIZE | STORE, dst_r, dst, dstw); + return compiler->error; + } + return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, 0, 0); + } + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +int sljit_is_fpu_available(void) +{ + return 1; +} + +static int emit_fop_mem(struct sljit_compiler *compiler, int flags, int reg, int arg, sljit_w argw) +{ + sljit_w tmp; + sljit_w inst = VSTR | ((flags & STORE) ? 0 : 0x00100000); + + SLJIT_ASSERT(arg & SLJIT_MEM); + + /* Fast loads and stores. */ + if (SLJIT_UNLIKELY(arg & 0xf0)) { + FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG2) | RN4(arg & 0xf) | RM4((arg & 0xf0) >> 4) | ((argw & 0x3) << 6))); + arg = SLJIT_MEM | TMP_REG2; + argw = 0; + } + + if (arg & 0xf) { + if (!(argw & ~0x3fc)) + return push_inst32(compiler, inst | 0x800000 | RN4(arg & 0xf) | DD4(reg) | (argw >> 2)); + if (!(-argw & ~0x3fc)) + return push_inst32(compiler, inst | RN4(arg & 0xf) | DD4(reg) | (-argw >> 2)); + } + + SLJIT_ASSERT(!(arg & 0xf0)); + if (compiler->cache_arg == arg) { + tmp = argw - compiler->cache_argw; + if (!(tmp & ~0x3fc)) + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg) | (tmp >> 2)); + if (!(-tmp & ~0x3fc)) + return push_inst32(compiler, inst | RN4(TMP_REG3) | DD4(reg) | (-tmp >> 2)); + if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + compiler->cache_argw = argw; + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg)); + } + } + + compiler->cache_arg = arg; + compiler->cache_argw = argw; + + if (SLJIT_UNLIKELY(!(arg & 0xf))) + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + else if (emit_set_delta(compiler, TMP_REG3, arg & 0xf, argw) != SLJIT_ERR_UNSUPPORTED) + FAIL_IF(compiler->error); + else { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + if (arg & 0xf) + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, (arg & 0xf)))); + } + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg)); +} + +int sljit_emit_fop1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + int dst_r; + + CHECK_ERROR(); + check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (GET_OPCODE(op) == SLJIT_FCMP) { + if (dst & SLJIT_MEM) { + emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); + dst = TMP_FREG1; + } + if (src & SLJIT_MEM) { + emit_fop_mem(compiler, 0, TMP_FREG2, src, srcw); + src = TMP_FREG2; + } + FAIL_IF(push_inst32(compiler, VCMP_F64 | DD4(dst) | DM4(src))); + return push_inst32(compiler, VMRS); + } + + dst_r = (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? dst : TMP_FREG1; + if (src & SLJIT_MEM) { + emit_fop_mem(compiler, 0, dst_r, src, srcw); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_FMOV: + if (src != dst_r) + FAIL_IF(push_inst32(compiler, VMOV_F64 | DD4(dst_r) | DM4(src))); + break; + case SLJIT_FNEG: + FAIL_IF(push_inst32(compiler, VNEG_F64 | DD4(dst_r) | DM4(src))); + break; + case SLJIT_FABS: + FAIL_IF(push_inst32(compiler, VABS_F64 | DD4(dst_r) | DM4(src))); + break; + } + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, STORE, TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +int sljit_emit_fop2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + int dst_r; + + CHECK_ERROR(); + check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? dst : TMP_FREG1; + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, 0, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, 0, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + switch (GET_OPCODE(op)) { + case SLJIT_FADD: + FAIL_IF(push_inst32(compiler, VADD_F64 | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + case SLJIT_FSUB: + FAIL_IF(push_inst32(compiler, VSUB_F64 | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + case SLJIT_FMUL: + FAIL_IF(push_inst32(compiler, VMUL_F64 | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + case SLJIT_FDIV: + FAIL_IF(push_inst32(compiler, VDIV_F64 | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + } + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, STORE, TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +{ + int size; + + CHECK_ERROR(); + check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + + size = (3 + generals) * sizeof(sljit_uw); + local_size += size; + local_size = (local_size + 7) & ~7; + local_size -= size; + compiler->local_size = local_size; + + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) + return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3)); + else if (dst & SLJIT_MEM) { + if (getput_arg_fast(compiler, WORD_SIZE | STORE, TMP_REG3, dst, dstw)) + return compiler->error; + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, TMP_REG3))); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + CHECK_ERROR(); + check_sljit_emit_fast_return(compiler, src, srcw); + + if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src))); + else if (src & SLJIT_MEM) { + if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw)) + FAIL_IF(compiler->error); + else { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, 0, 0)); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, TMP_REG2))); + } + } + else if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, TMP_REG3, srcw)); + return push_inst16(compiler, BLX | RN3(TMP_REG3)); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_uw get_cc(int type) +{ + switch (type) { + case SLJIT_C_EQUAL: + case SLJIT_C_MUL_NOT_OVERFLOW: + case SLJIT_C_FLOAT_EQUAL: + return 0x0; + + case SLJIT_C_NOT_EQUAL: + case SLJIT_C_MUL_OVERFLOW: + case SLJIT_C_FLOAT_NOT_EQUAL: + return 0x1; + + case SLJIT_C_LESS: + case SLJIT_C_FLOAT_LESS: + return 0x3; + + case SLJIT_C_GREATER_EQUAL: + case SLJIT_C_FLOAT_GREATER_EQUAL: + return 0x2; + + case SLJIT_C_GREATER: + case SLJIT_C_FLOAT_GREATER: + return 0x8; + + case SLJIT_C_LESS_EQUAL: + case SLJIT_C_FLOAT_LESS_EQUAL: + return 0x9; + + case SLJIT_C_SIG_LESS: + return 0xb; + + case SLJIT_C_SIG_GREATER_EQUAL: + return 0xa; + + case SLJIT_C_SIG_GREATER: + return 0xc; + + case SLJIT_C_SIG_LESS_EQUAL: + return 0xd; + + case SLJIT_C_OVERFLOW: + case SLJIT_C_FLOAT_NAN: + return 0x6; + + case SLJIT_C_NOT_OVERFLOW: + case SLJIT_C_FLOAT_NOT_NAN: + return 0x7; + + default: /* SLJIT_JUMP */ + return 0xe; + } +} + +struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + check_sljit_emit_label(compiler); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type) +{ + struct sljit_jump *jump; + int cc; + + CHECK_ERROR_PTR(); + check_sljit_emit_jump(compiler, type); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + /* In ARM, we don't need to touch the arguments. */ + PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); + if (type < SLJIT_JUMP) { + jump->flags |= IS_CONDITIONAL; + cc = get_cc(type); + jump->flags |= cc << 8; + PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + } + + jump->addr = compiler->size; + if (type <= SLJIT_JUMP) + PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1))); + else { + jump->flags |= IS_BL; + PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1))); + } + + return jump; +} + +int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + check_sljit_emit_ijump(compiler, type, src, srcw); + + /* In ARM, we don't need to touch the arguments. */ + if (src & SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_CALL0) ? IS_BL : 0)); + jump->u.target = srcw; + + FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); + jump->addr = compiler->size; + if (type <= SLJIT_JUMP) + FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1))); + else + FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1))); + } + else { + if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) { + if (type <= SLJIT_JUMP) + return push_inst16(compiler, BX | RN3(src)); + else + return push_inst16(compiler, BLX | RN3(src)); + } + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw)); + if (type >= SLJIT_CALL0) + return push_inst16(compiler, BLX | RN3(TMP_REG1)); + } + return SLJIT_SUCCESS; +} + +int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type) +{ + int dst_r; + sljit_uw cc; + + CHECK_ERROR(); + check_sljit_emit_cond_value(compiler, op, dst, dstw, type); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + cc = get_cc(type); + if (GET_OPCODE(op) == SLJIT_OR && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + FAIL_IF(push_inst32(compiler, ORRI | RN4(dst) | RD4(dst) | 0x1)); + if (op & SLJIT_SET_E) { + if (reg_map[dst] <= 7) + return push_inst16(compiler, ORRS | RD3(dst) | RN3(dst)); + return push_inst32(compiler, ORR_W | SET_FLAGS | RD4(TMP_REG1) | RN4(dst) | RM4(dst)); + } + return SLJIT_SUCCESS; + } + + dst_r = TMP_REG2; + if (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && reg_map[dst] <= 7) + dst_r = dst; + + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); + FAIL_IF(push_inst16(compiler, MOVSI | 0x1 | RDN3(dst_r))); + FAIL_IF(push_inst16(compiler, MOVSI | 0x0 | RDN3(dst_r))); + + if (dst_r == TMP_REG2) { + if (GET_OPCODE(op) == SLJIT_OR) { +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG2, 0); + } + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw); + else + return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2)); + } + + return SLJIT_SUCCESS; +} + +struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value) +{ + struct sljit_const *const_; + int dst_r; + + CHECK_ERROR_PTR(); + check_sljit_emit_const(compiler, dst, dstw, init_value); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG1; + PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw)); + return const_; +} + +void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + inline_set_jump_addr(addr, new_addr, 1); +} + +void sljit_set_const(sljit_uw addr, sljit_w new_constant) +{ + sljit_uh* inst = (sljit_uh*)addr; + modify_imm32_const(inst, new_constant); + SLJIT_CACHE_FLUSH(inst, inst + 3); +} diff --git a/sljit/sljitNativeARM_v5.c b/sljit/sljitNativeARM_v5.c new file mode 100644 index 0000000..9cf17a2 --- /dev/null +++ b/sljit/sljitNativeARM_v5.c @@ -0,0 +1,2355 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_CONST char* sljit_get_platform_name() +{ +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + return "arm-v7"; +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + return "arm-v5"; +#else +#error "Internal error: Unknown ARM architecture" +#endif +} + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) +#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) +#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) +#define TMP_PC (SLJIT_NO_REGISTERS + 4) + +#define TMP_FREG1 (SLJIT_FLOAT_REG4 + 1) +#define TMP_FREG2 (SLJIT_FLOAT_REG4 + 2) + +/* In ARM instruction words. + Cache lines are usually 32 byte aligned. */ +#define CONST_POOL_ALIGNMENT 8 +#define CONST_POOL_EMPTY 0xffffffff + +#define ALIGN_INSTRUCTION(ptr) \ + (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1)) +#define MAX_DIFFERENCE(max_diff) \ + (((max_diff) / (int)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) + +/* See sljit_emit_enter if you want to change them. */ +static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { + 0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15 +}; + +#define RM(rm) (reg_map[rm]) +#define RD(rd) (reg_map[rd] << 12) +#define RN(rn) (reg_map[rn] << 16) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +/* The instruction includes the AL condition. + INST_NAME - CONDITIONAL remove this flag. */ +#define COND_MASK 0xf0000000 +#define CONDITIONAL 0xe0000000 +#define PUSH_POOL 0xff000000 + +/* DP - Data Processing instruction (use with EMIT_DATA_PROCESS_INS). */ +#define ADC_DP 0x5 +#define ADD_DP 0x4 +#define AND_DP 0x0 +#define B 0xea000000 +#define BIC_DP 0xe +#define BL 0xeb000000 +#define BLX 0xe12fff30 +#define BX 0xe12fff10 +#define CLZ 0xe16f0f10 +#define CMP_DP 0xa +#define DEBUGGER 0xe1200070 +#define EOR_DP 0x1 +#define MOV_DP 0xd +#define MUL 0xe0000090 +#define MVN_DP 0xf +#define NOP 0xe1a00000 +#define ORR_DP 0xc +#define PUSH 0xe92d0000 +#define POP 0xe8bd0000 +#define RSB_DP 0x3 +#define RSC_DP 0x7 +#define SBC_DP 0x6 +#define SMULL 0xe0c00090 +#define SUB_DP 0x2 +#define VABS_F64 0xeeb00bc0 +#define VADD_F64 0xee300b00 +#define VCMP_F64 0xeeb40b40 +#define VDIV_F64 0xee800b00 +#define VMOV_F64 0xeeb00b40 +#define VMRS 0xeef1fa10 +#define VMUL_F64 0xee200b00 +#define VNEG_F64 0xeeb10b40 +#define VSTR 0xed000b00 +#define VSUB_F64 0xee300b40 + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +/* Arm v7 specific instructions. */ +#define MOVW 0xe3000000 +#define MOVT 0xe3400000 +#define SXTB 0xe6af0070 +#define SXTH 0xe6bf0070 +#define UXTB 0xe6ef0070 +#define UXTH 0xe6ff0070 +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + +static int push_cpool(struct sljit_compiler *compiler) +{ + /* Pushing the constant pool into the instruction stream. */ + sljit_uw* inst; + sljit_uw* cpool_ptr; + sljit_uw* cpool_end; + int i; + + /* The label could point the address after the constant pool. */ + if (compiler->last_label && compiler->last_label->size == compiler->size) + compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1; + + SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE); + inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!inst); + compiler->size++; + *inst = 0xff000000 | compiler->cpool_fill; + + for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) { + inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!inst); + compiler->size++; + *inst = 0; + } + + cpool_ptr = compiler->cpool; + cpool_end = cpool_ptr + compiler->cpool_fill; + while (cpool_ptr < cpool_end) { + inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!inst); + compiler->size++; + *inst = *cpool_ptr++; + } + compiler->cpool_diff = CONST_POOL_EMPTY; + compiler->cpool_fill = 0; + return SLJIT_SUCCESS; +} + +static int push_inst(struct sljit_compiler *compiler, sljit_uw inst) +{ + sljit_uw* ptr; + + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) + FAIL_IF(push_cpool(compiler)); + + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst; + return SLJIT_SUCCESS; +} + +static int push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) +{ + sljit_uw* ptr; + sljit_uw cpool_index = CPOOL_SIZE; + sljit_uw* cpool_ptr; + sljit_uw* cpool_end; + sljit_ub* cpool_unique_ptr; + + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) + FAIL_IF(push_cpool(compiler)); + else if (compiler->cpool_fill > 0) { + cpool_ptr = compiler->cpool; + cpool_end = cpool_ptr + compiler->cpool_fill; + cpool_unique_ptr = compiler->cpool_unique; + do { + if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) { + cpool_index = cpool_ptr - compiler->cpool; + break; + } + cpool_ptr++; + cpool_unique_ptr++; + } while (cpool_ptr < cpool_end); + } + + if (cpool_index == CPOOL_SIZE) { + /* Must allocate a new entry in the literal pool. */ + if (compiler->cpool_fill < CPOOL_SIZE) { + cpool_index = compiler->cpool_fill; + compiler->cpool_fill++; + } + else { + FAIL_IF(push_cpool(compiler)); + cpool_index = 0; + compiler->cpool_fill = 1; + } + } + + SLJIT_ASSERT((inst & 0xfff) == 0); + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst | cpool_index; + + compiler->cpool[cpool_index] = literal; + compiler->cpool_unique[cpool_index] = 0; + if (compiler->cpool_diff == CONST_POOL_EMPTY) + compiler->cpool_diff = compiler->size; + return SLJIT_SUCCESS; +} + +static int push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) +{ + sljit_uw* ptr; + if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE)) + FAIL_IF(push_cpool(compiler)); + + SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0); + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst | compiler->cpool_fill; + + compiler->cpool[compiler->cpool_fill] = literal; + compiler->cpool_unique[compiler->cpool_fill] = 1; + compiler->cpool_fill++; + if (compiler->cpool_diff == CONST_POOL_EMPTY) + compiler->cpool_diff = compiler->size; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE int prepare_blx(struct sljit_compiler *compiler) +{ + /* Place for at least two instruction (doesn't matter whether the first has a literal). */ + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088))) + return push_cpool(compiler); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE int emit_blx(struct sljit_compiler *compiler) +{ + /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */ + SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092)); + return push_inst(compiler, BLX | RM(TMP_REG1)); +} + +static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size) +{ + sljit_uw diff; + sljit_uw ind; + sljit_uw counter = 0; + sljit_uw* clear_const_pool = const_pool; + sljit_uw* clear_const_pool_end = const_pool + cpool_size; + + SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT); + /* Set unused flag for all literals in the constant pool. + I.e.: unused literals can belong to branches, which can be encoded as B or BL. + We can "compress" the constant pool by discarding these literals. */ + while (clear_const_pool < clear_const_pool_end) + *clear_const_pool++ = (sljit_uw)(-1); + + while (last_pc_patch < code_ptr) { + /* Data transfer instruction with Rn == r15. */ + if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) { + diff = const_pool - last_pc_patch; + ind = (*last_pc_patch) & 0xfff; + + /* Must be a load instruction with immediate offset. */ + SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20))); + if ((int)const_pool[ind] < 0) { + const_pool[ind] = counter; + ind = counter; + counter++; + } + else + ind = const_pool[ind]; + + SLJIT_ASSERT(diff >= 1); + if (diff >= 2 || ind > 0) { + diff = (diff + ind - 2) << 2; + SLJIT_ASSERT(diff <= 0xfff); + *last_pc_patch = (*last_pc_patch & ~0xfff) | diff; + } + else + *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004; + } + last_pc_patch++; + } + return counter; +} + +/* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */ +struct future_patch { + struct future_patch* next; + int index; + int value; +}; + +static SLJIT_INLINE int resolve_const_pool_index(struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) +{ + int value; + struct future_patch *curr_patch, *prev_patch; + + /* Using the values generated by patch_pc_relative_loads. */ + if (!*first_patch) + value = (int)cpool_start_address[cpool_current_index]; + else { + curr_patch = *first_patch; + prev_patch = 0; + while (1) { + if (!curr_patch) { + value = (int)cpool_start_address[cpool_current_index]; + break; + } + if ((sljit_uw)curr_patch->index == cpool_current_index) { + value = curr_patch->value; + if (prev_patch) + prev_patch->next = curr_patch->next; + else + *first_patch = curr_patch->next; + SLJIT_FREE(curr_patch); + break; + } + prev_patch = curr_patch; + curr_patch = curr_patch->next; + } + } + + if (value >= 0) { + if ((sljit_uw)value > cpool_current_index) { + curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch)); + if (!curr_patch) { + while (*first_patch) { + curr_patch = *first_patch; + *first_patch = (*first_patch)->next; + SLJIT_FREE(curr_patch); + } + return SLJIT_ERR_ALLOC_FAILED; + } + curr_patch->next = *first_patch; + curr_patch->index = value; + curr_patch->value = cpool_start_address[value]; + *first_patch = curr_patch; + } + cpool_start_address[value] = *buf_ptr; + } + return SLJIT_SUCCESS; +} + +#else + +static int push_inst(struct sljit_compiler *compiler, sljit_uw inst) +{ + sljit_uw* ptr; + + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE int emit_imm(struct sljit_compiler *compiler, int reg, sljit_w imm) +{ + FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); + return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); +} + +#endif + +static SLJIT_INLINE int detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code) +{ + sljit_w diff; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return 0; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (jump->flags & IS_BL) + code_ptr--; + + if (jump->flags & JUMP_ADDR) + diff = ((sljit_w)jump->u.target - (sljit_w)(code_ptr + 2)); + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + diff = ((sljit_w)(code + jump->u.label->size) - (sljit_w)(code_ptr + 2)); + } + + /* Branch to Thumb code has not optimized yet. */ + if (diff & 0x3) + return 0; + + diff >>= 2; + if (jump->flags & IS_BL) { + if (diff <= 0x01ffffff && diff >= -0x02000000) { + *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK); + jump->flags |= PATCH_B; + return 1; + } + } + else { + if (diff <= 0x01ffffff && diff >= -0x02000000) { + *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK); + jump->flags |= PATCH_B; + } + } +#else + if (jump->flags & JUMP_ADDR) + diff = ((sljit_w)jump->u.target - (sljit_w)code_ptr); + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + diff = ((sljit_w)(code + jump->u.label->size) - (sljit_w)code_ptr); + } + + /* Branch to Thumb code has not optimized yet. */ + if (diff & 0x3) + return 0; + + diff >>= 2; + if (diff <= 0x01ffffff && diff >= -0x02000000) { + code_ptr -= 2; + *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK); + jump->flags |= PATCH_B; + return 1; + } +#endif + return 0; +} + +static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, int flush) +{ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_uw *ptr = (sljit_uw*)addr; + sljit_uw *inst = (sljit_uw*)ptr[0]; + sljit_uw mov_pc = ptr[1]; + int bl = (mov_pc & 0x0000f000) != RD(TMP_PC); + sljit_w diff = (sljit_w)(((sljit_w)new_addr - (sljit_w)(inst + 2)) >> 2); + + if (diff <= 0x7fffff && diff >= -0x800000) { + /* Turn to branch. */ + if (!bl) { + inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff); + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } else { + inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff); + inst[1] = NOP; + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 2); + } + } + } else { + /* Get the position of the constant. */ + if (mov_pc & (1 << 23)) + ptr = inst + ((mov_pc & 0xfff) >> 2) + 2; + else + ptr = inst + 1; + + if (*inst != mov_pc) { + inst[0] = mov_pc; + if (!bl) { + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } else { + inst[1] = BLX | RM(TMP_REG1); + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 2); + } + } + } + *ptr = new_addr; + } +#else + sljit_uw *inst = (sljit_uw*)addr; + SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); + inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff); + inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff); + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 2); + } +#endif +} + +static sljit_uw get_immediate(sljit_uw imm); + +static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_w new_constant, int flush) +{ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_uw *ptr = (sljit_uw*)addr; + sljit_uw *inst = (sljit_uw*)ptr[0]; + sljit_uw ldr_literal = ptr[1]; + sljit_uw src2; + + src2 = get_immediate(new_constant); + if (src2) { + *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2; + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + return; + } + + src2 = get_immediate(~new_constant); + if (src2) { + *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2; + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + return; + } + + if (ldr_literal & (1 << 23)) + ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2; + else + ptr = inst + 1; + + if (*inst != ldr_literal) { + *inst = ldr_literal; + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } + *ptr = new_constant; +#else + sljit_uw *inst = (sljit_uw*)addr; + SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); + inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff); + inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff); + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 2); + } +#endif +} + +void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_uw *code; + sljit_uw *code_ptr; + sljit_uw *buf_ptr; + sljit_uw *buf_end; + sljit_uw size; + sljit_uw word_count; +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_uw cpool_size; + sljit_uw cpool_skip_alignment; + sljit_uw cpool_current_index; + sljit_uw *cpool_start_address; + sljit_uw *last_pc_patch; + struct future_patch *first_patch; +#endif + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + check_sljit_generate_code(compiler); + reverse_buf(compiler); + + /* Second code generation pass. */ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + size = compiler->size + (compiler->patches << 1); + if (compiler->cpool_fill > 0) + size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1; +#else + size = compiler->size; +#endif + code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw)); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + cpool_size = 0; + cpool_skip_alignment = 0; + cpool_current_index = 0; + cpool_start_address = NULL; + first_patch = NULL; + last_pc_patch = code; +#endif + + code_ptr = code; + word_count = 0; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + + if (label && label->size == 0) { + label->addr = (sljit_uw)code; + label->size = 0; + label = label->next; + } + + do { + buf_ptr = (sljit_uw*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + word_count++; +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (cpool_size > 0) { + if (cpool_skip_alignment > 0) { + buf_ptr++; + cpool_skip_alignment--; + } + else { + if (SLJIT_UNLIKELY(resolve_const_pool_index(&first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { + SLJIT_FREE_EXEC(code); + compiler->error = SLJIT_ERR_ALLOC_FAILED; + return NULL; + } + buf_ptr++; + if (++cpool_current_index >= cpool_size) { + SLJIT_ASSERT(!first_patch); + cpool_size = 0; + if (label && label->size == word_count) { + /* Points after the current instruction. */ + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + } + } + } + else if ((*buf_ptr & 0xff000000) != PUSH_POOL) { +#endif + *code_ptr = *buf_ptr++; + /* These structures are ordered by their address. */ + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (detect_jump_type(jump, code_ptr, code)) + code_ptr--; + jump->addr = (sljit_uw)code_ptr; +#else + jump->addr = (sljit_uw)(code_ptr - 2); + if (detect_jump_type(jump, code_ptr, code)) + code_ptr -= 2; +#endif + jump = jump->next; + } + if (label && label->size == word_count) { + /* code_ptr can be affected above. */ + label->addr = (sljit_uw)(code_ptr + 1); + label->size = (code_ptr + 1) - code; + label = label->next; + } + if (const_ && const_->addr == word_count) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + const_->addr = (sljit_uw)code_ptr; +#else + const_->addr = (sljit_uw)(code_ptr - 1); +#endif + const_ = const_->next; + } + code_ptr++; +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + } + else { + /* Fortunately, no need to shift. */ + cpool_size = *buf_ptr++ & ~PUSH_POOL; + SLJIT_ASSERT(cpool_size > 0); + cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1); + cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size); + if (cpool_current_index > 0) { + /* Unconditional branch. */ + *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); + code_ptr = cpool_start_address + cpool_current_index; + } + cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1; + cpool_current_index = 0; + last_pc_patch = code_ptr; + } +#endif + } while (buf_ptr < buf_end); + buf = buf->next; + } while (buf); + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + SLJIT_ASSERT(cpool_size == 0); + if (compiler->cpool_fill > 0) { + cpool_start_address = ALIGN_INSTRUCTION(code_ptr); + cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill); + if (cpool_current_index > 0) + code_ptr = cpool_start_address + cpool_current_index; + + buf_ptr = compiler->cpool; + buf_end = buf_ptr + compiler->cpool_fill; + cpool_current_index = 0; + while (buf_ptr < buf_end) { + if (SLJIT_UNLIKELY(resolve_const_pool_index(&first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { + SLJIT_FREE_EXEC(code); + compiler->error = SLJIT_ERR_ALLOC_FAILED; + return NULL; + } + buf_ptr++; + cpool_current_index++; + } + SLJIT_ASSERT(!first_patch); + } +#endif + + jump = compiler->jumps; + while (jump) { + buf_ptr = (sljit_uw*)jump->addr; + + if (jump->flags & PATCH_B) { + if (!(jump->flags & JUMP_ADDR)) { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT(((sljit_w)jump->u.label->addr - (sljit_w)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_w)jump->u.label->addr - (sljit_w)(buf_ptr + 2)) >= -0x02000000); + *buf_ptr |= (((sljit_w)jump->u.label->addr - (sljit_w)(buf_ptr + 2)) >> 2) & 0x00ffffff; + } + else { + SLJIT_ASSERT(((sljit_w)jump->u.target - (sljit_w)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_w)jump->u.target - (sljit_w)(buf_ptr + 2)) >= -0x02000000); + *buf_ptr |= (((sljit_w)jump->u.target - (sljit_w)(buf_ptr + 2)) >> 2) & 0x00ffffff; + } + } + else if (jump->flags & SLJIT_REWRITABLE_JUMP) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + jump->addr = (sljit_uw)code_ptr; + code_ptr[0] = (sljit_uw)buf_ptr; + code_ptr[1] = *buf_ptr; + inline_set_jump_addr((sljit_uw)code_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); + code_ptr += 2; +#else + inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); +#endif + } + else { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (jump->flags & IS_BL) + buf_ptr--; + if (*buf_ptr & (1 << 23)) + buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; + else + buf_ptr += 1; + *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; +#else + inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); +#endif + } + jump = jump->next; + } + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + const_ = compiler->consts; + while (const_) { + buf_ptr = (sljit_uw*)const_->addr; + const_->addr = (sljit_uw)code_ptr; + + code_ptr[0] = (sljit_uw)buf_ptr; + code_ptr[1] = *buf_ptr; + if (*buf_ptr & (1 << 23)) + buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; + else + buf_ptr += 1; + /* Set the value again (can be a simple constant). */ + inline_set_const((sljit_uw)code_ptr, *buf_ptr, 0); + code_ptr += 2; + + const_ = const_->next; + } +#endif + + SLJIT_ASSERT(code_ptr - code <= (int)size); + + SLJIT_CACHE_FLUSH(code, code_ptr); + compiler->error = SLJIT_ERR_COMPILED; + return code; +} + +/* emit_op inp_flags. + WRITE_BACK must be the first, since it is a flag. */ +#define WRITE_BACK 0x01 +#define ALLOW_IMM 0x02 +#define ALLOW_INV_IMM 0x04 +#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) +#define ARG_TEST 0x08 + +/* Creates an index in data_transfer_insts array. */ +#define WORD_DATA 0x00 +#define BYTE_DATA 0x10 +#define HALF_DATA 0x20 +#define SIGNED_DATA 0x40 +#define LOAD_DATA 0x80 + +#define EMIT_INSTRUCTION(inst) \ + FAIL_IF(push_inst(compiler, (inst))) + +/* Condition: AL. */ +#define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \ + (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2)) + +static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w); + +int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + int size; + sljit_uw push; + + CHECK_ERROR(); + check_sljit_emit_enter(compiler, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + + /* Push general registers, temporary registers + stmdb sp!, {..., lr} */ + push = PUSH | (1 << 14); + if (temporaries >= 5) + push |= 1 << 11; + if (temporaries >= 4) + push |= 1 << 10; + if (generals >= 5) + push |= 1 << 8; + if (generals >= 4) + push |= 1 << 7; + if (generals >= 3) + push |= 1 << 6; + if (generals >= 2) + push |= 1 << 5; + if (generals >= 1) + push |= 1 << 4; + EMIT_INSTRUCTION(push); + + /* Stack must be aligned to 8 bytes: */ + size = (1 + generals) * sizeof(sljit_uw); + if (temporaries >= 4) + size += (temporaries - 3) * sizeof(sljit_uw); + local_size += size; + local_size = (local_size + 7) & ~7; + local_size -= size; + compiler->local_size = local_size; + if (local_size > 0) + FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size)); + + if (args >= 1) + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_GENERAL_REG1, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG1))); + if (args >= 2) + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_GENERAL_REG2, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG2))); + if (args >= 3) + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_GENERAL_REG3, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG3))); + + return SLJIT_SUCCESS; +} + +void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + int size; + + CHECK_ERROR_VOID(); + check_sljit_fake_enter(compiler, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + + size = (1 + generals) * sizeof(sljit_uw); + if (temporaries >= 4) + size += (temporaries - 3) * sizeof(sljit_uw); + local_size += size; + local_size = (local_size + 7) & ~7; + local_size -= size; + compiler->local_size = local_size; +} + +int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + sljit_uw pop; + + CHECK_ERROR(); + check_sljit_emit_return(compiler, src, srcw); + + if (src != SLJIT_UNUSED && src != SLJIT_RETURN_REG) + FAIL_IF(emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, SLJIT_RETURN_REG, 0, TMP_REG1, 0, src, srcw)); + + if (compiler->local_size > 0) + FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); + + pop = POP | (1 << 15); + /* Push general registers, temporary registers + ldmia sp!, {..., pc} */ + if (compiler->temporaries >= 5) + pop |= 1 << 11; + if (compiler->temporaries >= 4) + pop |= 1 << 10; + if (compiler->generals >= 5) + pop |= 1 << 8; + if (compiler->generals >= 4) + pop |= 1 << 7; + if (compiler->generals >= 3) + pop |= 1 << 6; + if (compiler->generals >= 2) + pop |= 1 << 5; + if (compiler->generals >= 1) + pop |= 1 << 4; + + return push_inst(compiler, pop); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +/* s/l - store/load (1 bit) + u/s - signed/unsigned (1 bit) + w/b/h/N - word/byte/half/NOT allowed (2 bit) + It contans 16 items, but not all are different. */ + +static sljit_w data_transfer_insts[16] = { +/* s u w */ 0xe5000000 /* str */, +/* s u b */ 0xe5400000 /* strb */, +/* s u h */ 0xe10000b0 /* strh */, +/* s u N */ 0x00000000 /* not allowed */, +/* s s w */ 0xe5000000 /* str */, +/* s s b */ 0xe5400000 /* strb */, +/* s s h */ 0xe10000b0 /* strh */, +/* s s N */ 0x00000000 /* not allowed */, + +/* l u w */ 0xe5100000 /* ldr */, +/* l u b */ 0xe5500000 /* ldrb */, +/* l u h */ 0xe11000b0 /* ldrh */, +/* l u N */ 0x00000000 /* not allowed */, +/* l s w */ 0xe5100000 /* ldr */, +/* l s b */ 0xe11000d0 /* ldrsb */, +/* l s h */ 0xe11000f0 /* ldrsh */, +/* l s N */ 0x00000000 /* not allowed */, +}; + +#define EMIT_DATA_TRANSFER(type, add, wb, target, base1, base2) \ + (data_transfer_insts[(type) >> 4] | ((add) << 23) | ((wb) << 21) | (reg_map[target] << 12) | (reg_map[base1] << 16) | (base2)) +/* Normal ldr/str instruction. + Type2: ldrsb, ldrh, ldrsh */ +#define IS_TYPE1_TRANSFER(type) \ + (data_transfer_insts[(type) >> 4] & 0x04000000) +#define TYPE2_TRANSFER_IMM(imm) \ + (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) + +/* flags: */ + /* Arguments are swapped. */ +#define ARGS_SWAPPED 0x01 + /* Inverted immediate. */ +#define INV_IMM 0x02 + /* Source and destination is register. */ +#define REG_DEST 0x04 +#define REG_SOURCE 0x08 + /* One instruction is enough. */ +#define FAST_DEST 0x10 + /* Multiple instructions are required. */ +#define SLOW_DEST 0x20 +/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ +#define SET_FLAGS (1 << 20) +/* dst: reg + src1: reg + src2: reg or imm (if allowed) + SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */ +#define SRC2_IMM (1 << 25) + +#define EMIT_DATA_PROCESS_INS_AND_RETURN(opcode) \ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))) + +#define EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(opcode, dst, src1, src2) \ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, src2)) + +#define EMIT_SHIFT_INS_AND_RETURN(opcode) \ + SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \ + if (compiler->shift_imm != 0x20) { \ + SLJIT_ASSERT(src1 == TMP_REG1); \ + SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | reg_map[src2])); \ + } \ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | ((flags & ARGS_SWAPPED) ? reg_map[src2] : reg_map[src1]))); + +static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, int flags, + int dst, int src1, int src2) +{ + sljit_w mul_inst; + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + SLJIT_ASSERT(!(flags & INV_IMM)); + EMIT_DATA_PROCESS_INS_AND_RETURN(ADD_DP); + + case SLJIT_ADDC: + SLJIT_ASSERT(!(flags & INV_IMM)); + EMIT_DATA_PROCESS_INS_AND_RETURN(ADC_DP); + + case SLJIT_SUB: + SLJIT_ASSERT(!(flags & INV_IMM)); + if (!(flags & ARGS_SWAPPED)) + EMIT_DATA_PROCESS_INS_AND_RETURN(SUB_DP); + EMIT_DATA_PROCESS_INS_AND_RETURN(RSB_DP); + + case SLJIT_SUBC: + SLJIT_ASSERT(!(flags & INV_IMM)); + if (!(flags & ARGS_SWAPPED)) + EMIT_DATA_PROCESS_INS_AND_RETURN(SBC_DP); + EMIT_DATA_PROCESS_INS_AND_RETURN(RSC_DP); + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & INV_IMM)); + SLJIT_ASSERT(!(src2 & SRC2_IMM)); + if (SLJIT_UNLIKELY(op & SLJIT_SET_O)) + mul_inst = SMULL | (reg_map[TMP_REG3] << 16) | (reg_map[dst] << 12); + else + mul_inst = MUL | (reg_map[dst] << 16); + + if (dst != src2) + FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src1] << 8) | reg_map[src2])); + else if (dst != src1) + FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[src1])); + else { + /* Rm and Rd must not be the same register. */ + SLJIT_ASSERT(dst != TMP_REG1); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, reg_map[src2]))); + FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[TMP_REG1])); + } + + if (!(op & SLJIT_SET_O)) + return SLJIT_SUCCESS; + + /* We need to use TMP_REG3. */ + compiler->cache_arg = 0; + compiler->cache_argw = 0; + /* cmp TMP_REG2, dst asr #31. */ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG3, RM(dst) | 0xfc0)); + + case SLJIT_AND: + if (!(flags & INV_IMM)) + EMIT_DATA_PROCESS_INS_AND_RETURN(AND_DP); + EMIT_DATA_PROCESS_INS_AND_RETURN(BIC_DP); + + case SLJIT_OR: + SLJIT_ASSERT(!(flags & INV_IMM)); + EMIT_DATA_PROCESS_INS_AND_RETURN(ORR_DP); + + case SLJIT_XOR: + SLJIT_ASSERT(!(flags & INV_IMM)); + EMIT_DATA_PROCESS_INS_AND_RETURN(EOR_DP); + + case SLJIT_SHL: + EMIT_SHIFT_INS_AND_RETURN(0); + + case SLJIT_LSHR: + EMIT_SHIFT_INS_AND_RETURN(1); + + case SLJIT_ASHR: + EMIT_SHIFT_INS_AND_RETURN(2); + + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if (dst != src2) { + if (src2 & SRC2_IMM) { + if (flags & INV_IMM) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + } + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (op == SLJIT_MOV_UB) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff)); + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2])); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst])); +#else + return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2)); +#endif + } + else if (dst != src2) { + SLJIT_ASSERT(src2 & SRC2_IMM); + if (flags & INV_IMM) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2])); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst])); +#else + return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2)); +#endif + } + else if (dst != src2) { + SLJIT_ASSERT(src2 & SRC2_IMM); + if (flags & INV_IMM) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + } + return SLJIT_SUCCESS; + + case SLJIT_NOT: + if (src2 & SRC2_IMM) { + if (flags & INV_IMM) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); + } + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(!(flags & INV_IMM)); + SLJIT_ASSERT(!(src2 & SRC2_IMM)); + FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2))); + if (flags & SET_FLAGS) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM); + return SLJIT_SUCCESS; + } + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; +} + +#undef EMIT_DATA_PROCESS_INS_AND_RETURN +#undef EMIT_FULL_DATA_PROCESS_INS_AND_RETURN +#undef EMIT_SHIFT_INS_AND_RETURN + +/* Tests whether the immediate can be stored in the 12 bit imm field. + Returns with 0 if not possible. */ +static sljit_uw get_immediate(sljit_uw imm) +{ + int rol; + + if (imm <= 0xff) + return SRC2_IMM | imm; + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol = 8; + } + else { + imm = (imm << 24) | (imm >> 8); + rol = 0; + } + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol += 4; + } + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + return SRC2_IMM | (imm >> 24) | (rol << 8); + else + return 0; +} + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +static int generate_int(struct sljit_compiler *compiler, int reg, sljit_uw imm, int positive) +{ + sljit_uw mask; + sljit_uw imm1; + sljit_uw imm2; + int rol; + + /* Step1: Search a zero byte (8 continous zero bit). */ + mask = 0xff000000; + rol = 8; + while(1) { + if (!(imm & mask)) { + /* Rol imm by rol. */ + imm = (imm << rol) | (imm >> (32 - rol)); + /* Calculate arm rol. */ + rol = 4 + (rol >> 1); + break; + } + rol += 2; + mask >>= 2; + if (mask & 0x3) { + /* rol by 8. */ + imm = (imm << 8) | (imm >> 24); + mask = 0xff00; + rol = 24; + while (1) { + if (!(imm & mask)) { + /* Rol imm by rol. */ + imm = (imm << rol) | (imm >> (32 - rol)); + /* Calculate arm rol. */ + rol = (rol >> 1) - 8; + break; + } + rol += 2; + mask >>= 2; + if (mask & 0x3) + return 0; + } + break; + } + } + + /* The low 8 bit must be zero. */ + SLJIT_ASSERT(!(imm & 0xff)); + + if (!(imm & 0xff000000)) { + imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8); + imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8); + } + else if (imm & 0xc0000000) { + imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); + imm <<= 8; + rol += 4; + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol += 4; + } + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); + else + return 0; + } + else { + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); + imm <<= 8; + rol += 4; + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); + else + return 0; + } + + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1)); + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2)); + return 1; +} +#endif + +static int load_immediate(struct sljit_compiler *compiler, int reg, sljit_uw imm) +{ + sljit_uw tmp; + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + if (!(imm & ~0xffff)) + return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)); +#endif + + /* Create imm by 1 inst. */ + tmp = get_immediate(imm); + if (tmp) { + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp)); + return SLJIT_SUCCESS; + } + + tmp = get_immediate(~imm); + if (tmp) { + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp)); + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + /* Create imm by 2 inst. */ + FAIL_IF(generate_int(compiler, reg, imm, 1)); + FAIL_IF(generate_int(compiler, reg, ~imm, 0)); + + /* Load integer. */ + return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), imm); +#else + return emit_imm(compiler, reg, imm); +#endif +} + +/* Can perform an operation using at most 1 instruction. */ +static int getput_arg_fast(struct sljit_compiler *compiler, int inp_flags, int reg, int arg, sljit_w argw) +{ + sljit_uw imm; + + if (arg & SLJIT_IMM) { + imm = get_immediate(argw); + if (imm) { + if (inp_flags & ARG_TEST) + return 1; + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm)); + return -1; + } + imm = get_immediate(~argw); + if (imm) { + if (inp_flags & ARG_TEST) + return 1; + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm)); + return -1; + } + return (inp_flags & ARG_TEST) ? SLJIT_SUCCESS : 0; + } + + SLJIT_ASSERT(arg & SLJIT_MEM); + + /* Fast loads/stores. */ + if (arg & 0xf) { + if (!(arg & 0xf0)) { + if (IS_TYPE1_TRANSFER(inp_flags)) { + if (argw >= 0 && argw <= 0xfff) { + if (inp_flags & ARG_TEST) + return 1; + EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, argw)); + return -1; + } + if (argw < 0 && argw >= -0xfff) { + if (inp_flags & ARG_TEST) + return 1; + EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & 0xf, -argw)); + return -1; + } + } + else { + if (argw >= 0 && argw <= 0xff) { + if (inp_flags & ARG_TEST) + return 1; + EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, TYPE2_TRANSFER_IMM(argw))); + return -1; + } + if (argw < 0 && argw >= -0xff) { + if (inp_flags & ARG_TEST) + return 1; + argw = -argw; + EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & 0xf, TYPE2_TRANSFER_IMM(argw))); + return -1; + } + } + } + else if ((argw & 0x3) == 0 || IS_TYPE1_TRANSFER(inp_flags)) { + if (inp_flags & ARG_TEST) + return 1; + EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, + RM((arg >> 4) & 0xf) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7))); + return -1; + } + } + + return (inp_flags & ARG_TEST) ? SLJIT_SUCCESS : 0; +} + +/* See getput_arg below. + Note: can_cache is called only for binary operators. Those + operators always uses word arguments without write back. */ +static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw) +{ + /* Immediate caching is not supported as it would be an operation on constant arguments. */ + if (arg & SLJIT_IMM) + return 0; + + /* Always a simple operation. */ + if (arg & 0xf0) + return 0; + + if (!(arg & 0xf)) { + /* Immediate access. */ + if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff)) + return 1; + return 0; + } + + if (argw <= 0xfffff && argw >= -0xfffff) + return 0; + + if (argw == next_argw && (next_arg & SLJIT_MEM)) + return 1; + + if (arg == next_arg && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff)) + return 1; + + return 0; +} + +#define GETPUT_ARG_DATA_TRANSFER(add, wb, target, base, imm) \ + if (max_delta & 0xf00) \ + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, imm))); \ + else \ + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, TYPE2_TRANSFER_IMM(imm)))); + +#define TEST_WRITE_BACK() \ + if (inp_flags & WRITE_BACK) { \ + tmp_r = arg & 0xf; \ + if (reg == tmp_r) { \ + /* This can only happen for stores */ \ + /* since ldr reg, [reg, ...]! has no meaning */ \ + SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \ + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg))); \ + reg = TMP_REG3; \ + } \ + } + +/* Emit the necessary instructions. See can_cache above. */ +static int getput_arg(struct sljit_compiler *compiler, int inp_flags, int reg, int arg, sljit_w argw, int next_arg, sljit_w next_argw) +{ + int tmp_r; + sljit_w max_delta; + sljit_w sign; + + if (arg & SLJIT_IMM) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + return load_immediate(compiler, reg, argw); + } + + SLJIT_ASSERT(arg & SLJIT_MEM); + + tmp_r = (inp_flags & LOAD_DATA) ? reg : TMP_REG3; + max_delta = IS_TYPE1_TRANSFER(inp_flags) ? 0xfff : 0xff; + + if ((arg & 0xf) == SLJIT_UNUSED) { + /* Write back is not used. */ + if ((compiler->cache_arg & SLJIT_IMM) && (((sljit_uw)argw - (sljit_uw)compiler->cache_argw) <= (sljit_uw)max_delta || ((sljit_uw)compiler->cache_argw - (sljit_uw)argw) <= (sljit_uw)max_delta)) { + if (((sljit_uw)argw - (sljit_uw)compiler->cache_argw) <= (sljit_uw)max_delta) { + sign = 1; + argw = argw - compiler->cache_argw; + } + else { + sign = 0; + argw = compiler->cache_argw - argw; + } + + if (max_delta & 0xf00) { + EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, sign, 0, reg, TMP_REG3, argw)); + } + else { + EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, sign, 0, reg, TMP_REG3, TYPE2_TRANSFER_IMM(argw))); + } + return SLJIT_SUCCESS; + } + + /* With write back, we can create some sophisticated loads, but + it is hard to decide whether we should convert downward (0s) or upward (1s). */ + if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= (sljit_uw)max_delta || (sljit_uw)next_argw - (sljit_uw)argw <= (sljit_uw)max_delta)) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + + compiler->cache_arg = SLJIT_IMM; + compiler->cache_argw = argw; + tmp_r = TMP_REG3; + } + + FAIL_IF(load_immediate(compiler, tmp_r, argw)); + GETPUT_ARG_DATA_TRANSFER(1, 0, reg, tmp_r, 0); + return SLJIT_SUCCESS; + } + + /* Extended imm addressing for [reg+imm] format. */ + sign = (max_delta << 8) | 0xff; + if (!(arg & 0xf0) && argw <= sign && argw >= -sign) { + TEST_WRITE_BACK(); + if (argw >= 0) { + sign = 1; + } + else { + sign = 0; + argw = -argw; + } + + /* Optimization: add is 0x4, sub is 0x2. Sign is 1 for add and 0 for sub. */ + if (max_delta & 0xf00) + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP << sign, 0, tmp_r, arg & 0xf, SRC2_IMM | (argw >> 12) | 0xa00)); + else + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP << sign, 0, tmp_r, arg & 0xf, SRC2_IMM | (argw >> 8) | 0xc00)); + + argw &= max_delta; + GETPUT_ARG_DATA_TRANSFER(sign, inp_flags & WRITE_BACK, reg, tmp_r, argw); + return SLJIT_SUCCESS; + } + + if (arg & 0xf0) { + SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00)); + if (inp_flags & WRITE_BACK) + tmp_r = arg & 0xf; + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & 0xf, RM((arg >> 4) & 0xf) | ((argw & 0x3) << 7))); + EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0))); + return SLJIT_SUCCESS; + } + + if (compiler->cache_arg == arg && ((sljit_uw)argw - (sljit_uw)compiler->cache_argw) <= (sljit_uw)max_delta) { + SLJIT_ASSERT(!(inp_flags & WRITE_BACK)); + argw = argw - compiler->cache_argw; + GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, argw); + return SLJIT_SUCCESS; + } + + if (compiler->cache_arg == arg && ((sljit_uw)compiler->cache_argw - (sljit_uw)argw) <= (sljit_uw)max_delta) { + SLJIT_ASSERT(!(inp_flags & WRITE_BACK)); + argw = compiler->cache_argw - argw; + GETPUT_ARG_DATA_TRANSFER(0, 0, reg, TMP_REG3, argw); + return SLJIT_SUCCESS; + } + + if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) { + TEST_WRITE_BACK(); + EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); + return SLJIT_SUCCESS; + } + + if (argw == next_argw && (next_arg & SLJIT_MEM)) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + + compiler->cache_arg = SLJIT_IMM; + compiler->cache_argw = argw; + + TEST_WRITE_BACK(); + EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); + return SLJIT_SUCCESS; + } + + if (arg == next_arg && !(inp_flags & WRITE_BACK) && ((sljit_uw)argw - (sljit_uw)next_argw <= (sljit_uw)max_delta || (sljit_uw)next_argw - (sljit_uw)argw <= (sljit_uw)max_delta)) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & 0xf])); + + compiler->cache_arg = arg; + compiler->cache_argw = argw; + + GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, 0); + return SLJIT_SUCCESS; + } + + FAIL_IF(load_immediate(compiler, tmp_r, argw)); + EMIT_INSTRUCTION(EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & 0xf, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0))); + return SLJIT_SUCCESS; +} + +static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + + /* We prefers register and simple consts. */ + int dst_r; + int src1_r; + int src2_r = 0; + int sugg_src2_r = TMP_REG2; + int flags = GET_FLAGS(op) ? SET_FLAGS : 0; + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + /* Destination check. */ + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REG3) { + dst_r = dst; + flags |= REG_DEST; + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + sugg_src2_r = dst_r; + } + else if (dst == SLJIT_UNUSED) { + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) + return SLJIT_SUCCESS; + dst_r = TMP_REG2; + } + else { + SLJIT_ASSERT(dst & SLJIT_MEM); + if (getput_arg_fast(compiler, inp_flags | ARG_TEST, TMP_REG2, dst, dstw)) { + flags |= FAST_DEST; + dst_r = TMP_REG2; + } + else { + flags |= SLOW_DEST; + dst_r = 0; + } + } + + /* Source 1. */ + if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= TMP_REG3) + src1_r = src1; + else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REG3) { + flags |= ARGS_SWAPPED; + src1_r = src2; + src2 = src1; + src2w = src1w; + } + else { + if ((inp_flags & ALLOW_ANY_IMM) && (src1 & SLJIT_IMM)) { + /* The second check will generate a hit. */ + src2_r = get_immediate(src1w); + if (src2_r) { + flags |= ARGS_SWAPPED; + src1 = src2; + src1w = src2w; + } + if (inp_flags & ALLOW_INV_IMM) { + src2_r = get_immediate(~src1w); + if (src2_r) { + flags |= ARGS_SWAPPED | INV_IMM; + src1 = src2; + src1w = src2w; + } + } + } + + src1_r = 0; + if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1_r = TMP_REG1; + } + } + + /* Source 2. */ + if (src2_r == 0) { + if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REG3) { + src2_r = src2; + flags |= REG_SOURCE; + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + dst_r = src2_r; + } + else do { /* do { } while(0) is used because of breaks. */ + if ((inp_flags & ALLOW_ANY_IMM) && (src2 & SLJIT_IMM)) { + src2_r = get_immediate(src2w); + if (src2_r) + break; + if (inp_flags & ALLOW_INV_IMM) { + src2_r = get_immediate(~src2w); + if (src2_r) { + flags |= INV_IMM; + break; + } + } + } + + /* src2_r is 0. */ + if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) { + FAIL_IF(compiler->error); + src2_r = sugg_src2_r; + } + } while (0); + } + + /* src1_r, src2_r and dst_r can be zero (=unprocessed) or non-zero. + If they are zero, they must not be registers. */ + if (src1_r == 0 && src2_r == 0 && dst_r == 0) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); + flags |= ARGS_SWAPPED; + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); + } + src1_r = TMP_REG1; + src2_r = TMP_REG2; + } + else if (src1_r == 0 && src2_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + src1_r = TMP_REG1; + } + else if (src1_r == 0 && dst_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + src1_r = TMP_REG1; + } + else if (src2_r == 0 && dst_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + src2_r = sugg_src2_r; + } + + if (dst_r == 0) + dst_r = TMP_REG2; + + if (src1_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0)); + src1_r = TMP_REG1; + } + + if (src2_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0)); + src2_r = sugg_src2_r; + } + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (flags & (FAST_DEST | SLOW_DEST)) { + if (flags & FAST_DEST) + FAIL_IF(getput_arg_fast(compiler, inp_flags, dst_r, dst, dstw)); + else + FAIL_IF(getput_arg(compiler, inp_flags, dst_r, dst, dstw, 0, 0)); + } + return SLJIT_SUCCESS; +} + +int sljit_emit_op0(struct sljit_compiler *compiler, int op) +{ + CHECK_ERROR(); + check_sljit_emit_op0(compiler, op); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + EMIT_INSTRUCTION(DEBUGGER); + break; + case SLJIT_NOP: + EMIT_INSTRUCTION(NOP); + break; + } + + return SLJIT_SUCCESS; +} + +int sljit_emit_op1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + CHECK_ERROR(); + check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_UB: + return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw); + + case SLJIT_MOV_SB: + return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw); + + case SLJIT_MOV_UH: + return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw); + + case SLJIT_MOV_SH: + return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw); + + case SLJIT_MOVU: + case SLJIT_MOVU_UI: + case SLJIT_MOVU_SI: + return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_UB: + return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw); + + case SLJIT_MOVU_SB: + return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw); + + case SLJIT_MOVU_UH: + return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw); + + case SLJIT_MOVU_SH: + return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw); + + case SLJIT_NOT: + return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_NEG: +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, SLJIT_SUB | GET_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw); + + case SLJIT_CLZ: + return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); + } + + return SLJIT_SUCCESS; +} + +int sljit_emit_op2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + CHECK_ERROR(); + check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + case SLJIT_SUB: + case SLJIT_SUBC: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: + if (src2 & SLJIT_IMM) { + compiler->shift_imm = src2w & 0x1f; + return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w); + } + else { + compiler->shift_imm = 0x20; + return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); + } + } + + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + +/* 0 - no fpu + 1 - vfp */ +static int arm_fpu_type = -1; + +static void init_compiler() +{ + if (arm_fpu_type != -1) + return; + + /* TODO: Only the OS can help to determine the correct fpu type. */ + arm_fpu_type = 1; +} + +int sljit_is_fpu_available(void) +{ + if (arm_fpu_type == -1) + init_compiler(); + return arm_fpu_type; +} + +#else + +#define arm_fpu_type 1 + +int sljit_is_fpu_available(void) +{ + /* Always available. */ + return 1; +} + +#endif + +#define EMIT_FPU_DATA_TRANSFER(add, load, base, freg, offs) \ + (VSTR | ((add) << 23) | ((load) << 20) | (reg_map[base] << 16) | (freg << 12) | (offs)) +#define EMIT_FPU_OPERATION(opcode, dst, src1, src2) \ + ((opcode) | ((dst) << 12) | (src1) | ((src2) << 16)) + +static int emit_fpu_data_transfer(struct sljit_compiler *compiler, int fpu_reg, int load, int arg, sljit_w argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + + /* Fast loads and stores. */ + if ((arg & 0xf) && !(arg & 0xf0) && (argw & 0x3) == 0) { + if (argw >= 0 && argw <= 0x3ff) { + EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(1, load, arg & 0xf, fpu_reg, argw >> 2)); + return SLJIT_SUCCESS; + } + if (argw < 0 && argw >= -0x3ff) { + EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(0, load, arg & 0xf, fpu_reg, (-argw) >> 2)); + return SLJIT_SUCCESS; + } + if (argw >= 0 && argw <= 0x3ffff) { + SLJIT_ASSERT(get_immediate(argw & 0x3fc00)); + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & 0xf, get_immediate(argw & 0x3fc00))); + argw &= 0x3ff; + EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(1, load, TMP_REG1, fpu_reg, argw >> 2)); + return SLJIT_SUCCESS; + } + if (argw < 0 && argw >= -0x3ffff) { + argw = -argw; + SLJIT_ASSERT(get_immediate(argw & 0x3fc00)); + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & 0xf, get_immediate(argw & 0x3fc00))); + argw &= 0x3ff; + EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(0, load, TMP_REG1, fpu_reg, argw >> 2)); + return SLJIT_SUCCESS; + } + } + + if (arg & 0xf0) { + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & 0xf, RM((arg >> 4) & 0xf) | ((argw & 0x3) << 7))); + EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(1, load, TMP_REG1, fpu_reg, 0)); + return SLJIT_SUCCESS; + } + + if (compiler->cache_arg == arg && ((argw - compiler->cache_argw) & 0x3) == 0) { + if (((sljit_uw)argw - (sljit_uw)compiler->cache_argw) <= 0x3ff) { + EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(1, load, TMP_REG3, fpu_reg, (argw - compiler->cache_argw) >> 2)); + return SLJIT_SUCCESS; + } + if (((sljit_uw)compiler->cache_argw - (sljit_uw)argw) <= 0x3ff) { + EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(0, load, TMP_REG3, fpu_reg, (compiler->cache_argw - argw) >> 2)); + return SLJIT_SUCCESS; + } + } + + compiler->cache_arg = arg; + compiler->cache_argw = argw; + if (arg & 0xf) { + FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & 0xf, reg_map[TMP_REG1])); + } + else + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + + EMIT_INSTRUCTION(EMIT_FPU_DATA_TRANSFER(1, load, TMP_REG3, fpu_reg, 0)); + return SLJIT_SUCCESS; +} + +int sljit_emit_fop1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + int dst_freg; + + CHECK_ERROR(); + check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (GET_OPCODE(op) == SLJIT_FCMP) { + if (dst > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, dst, dstw)); + dst = TMP_FREG1; + } + if (src > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src, srcw)); + src = TMP_FREG2; + } + EMIT_INSTRUCTION(VCMP_F64 | (dst << 12) | src); + EMIT_INSTRUCTION(VMRS); + return SLJIT_SUCCESS; + } + + dst_freg = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst; + + if (src > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, dst_freg, 1, src, srcw)); + src = dst_freg; + } + + switch (op) { + case SLJIT_FMOV: + if (src != dst_freg && dst_freg != TMP_FREG1) + EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMOV_F64, dst_freg, src, 0)); + break; + case SLJIT_FNEG: + EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VNEG_F64, dst_freg, src, 0)); + break; + case SLJIT_FABS: + EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VABS_F64, dst_freg, src, 0)); + break; + } + + if (dst_freg == TMP_FREG1) + FAIL_IF(emit_fpu_data_transfer(compiler, src, 0, dst, dstw)); + + return SLJIT_SUCCESS; +} + +int sljit_emit_fop2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + int dst_freg; + + CHECK_ERROR(); + check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_freg = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst; + + if (src2 > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w)); + src2 = TMP_FREG2; + } + + if (src1 > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, src1, src1w)); + src1 = TMP_FREG1; + } + + switch (op) { + case SLJIT_FADD: + EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VADD_F64, dst_freg, src2, src1)); + break; + + case SLJIT_FSUB: + EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VSUB_F64, dst_freg, src2, src1)); + break; + + case SLJIT_FMUL: + EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VMUL_F64, dst_freg, src2, src1)); + break; + + case SLJIT_FDIV: + EMIT_INSTRUCTION(EMIT_FPU_OPERATION(VDIV_F64, dst_freg, src2, src1)); + break; + } + + if (dst_freg == TMP_FREG1) + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 0, dst, dstw)); + + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +{ + int size; + + CHECK_ERROR(); + check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + + size = (1 + generals) * sizeof(sljit_uw); + if (temporaries >= 4) + size += (temporaries - 3) * sizeof(sljit_uw); + local_size += size; + local_size = (local_size + 7) & ~7; + local_size -= size; + compiler->local_size = local_size; + + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG3))); + else if (dst & SLJIT_MEM) { + if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw)) + return compiler->error; + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3))); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + CHECK_ERROR(); + check_sljit_emit_fast_return(compiler, src, srcw); + + if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src))); + else if (src & SLJIT_MEM) { + if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw)) + FAIL_IF(compiler->error); + else { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0)); + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2))); + } + } + else if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, TMP_REG3, srcw)); + return push_inst(compiler, BLX | RM(TMP_REG3)); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_uw get_cc(int type) +{ + switch (type) { + case SLJIT_C_EQUAL: + case SLJIT_C_MUL_NOT_OVERFLOW: + case SLJIT_C_FLOAT_EQUAL: + return 0x00000000; + + case SLJIT_C_NOT_EQUAL: + case SLJIT_C_MUL_OVERFLOW: + case SLJIT_C_FLOAT_NOT_EQUAL: + return 0x10000000; + + case SLJIT_C_LESS: + case SLJIT_C_FLOAT_LESS: + return 0x30000000; + + case SLJIT_C_GREATER_EQUAL: + case SLJIT_C_FLOAT_GREATER_EQUAL: + return 0x20000000; + + case SLJIT_C_GREATER: + case SLJIT_C_FLOAT_GREATER: + return 0x80000000; + + case SLJIT_C_LESS_EQUAL: + case SLJIT_C_FLOAT_LESS_EQUAL: + return 0x90000000; + + case SLJIT_C_SIG_LESS: + return 0xb0000000; + + case SLJIT_C_SIG_GREATER_EQUAL: + return 0xa0000000; + + case SLJIT_C_SIG_GREATER: + return 0xc0000000; + + case SLJIT_C_SIG_LESS_EQUAL: + return 0xd0000000; + + case SLJIT_C_OVERFLOW: + case SLJIT_C_FLOAT_NAN: + return 0x60000000; + + case SLJIT_C_NOT_OVERFLOW: + case SLJIT_C_FLOAT_NOT_NAN: + return 0x70000000; + + default: /* SLJIT_JUMP */ + return 0xe0000000; + } +} + +struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + check_sljit_emit_label(compiler); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type) +{ + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + check_sljit_emit_jump(compiler, type); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + /* In ARM, we don't need to touch the arguments. */ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (type >= SLJIT_CALL0) + PTR_FAIL_IF(prepare_blx(compiler)); + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, + type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0)); + + if (jump->flags & SLJIT_REWRITABLE_JUMP) { + jump->addr = compiler->size; + compiler->patches++; + } + + if (type >= SLJIT_CALL0) { + jump->flags |= IS_BL; + PTR_FAIL_IF(emit_blx(compiler)); + } + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) + jump->addr = compiler->size; +#else + if (type >= SLJIT_CALL0) + jump->flags |= IS_BL; + PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); + PTR_FAIL_IF(push_inst(compiler, (((type < SLJIT_CALL0 ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type))); + jump->addr = compiler->size; +#endif + return jump; +} + +int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + check_sljit_emit_ijump(compiler, type, src, srcw); + + /* In ARM, we don't need to touch the arguments. */ + if (src & SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_CALL0) ? IS_BL : 0)); + jump->u.target = srcw; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (type >= SLJIT_CALL0) + FAIL_IF(prepare_blx(compiler)); + FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0)); + if (type >= SLJIT_CALL0) + FAIL_IF(emit_blx(compiler)); +#else + FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); + FAIL_IF(push_inst(compiler, (type < SLJIT_CALL0 ? BX : BLX) | RM(TMP_REG1))); +#endif + jump->addr = compiler->size; + } + else { + if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) + return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src)); + + SLJIT_ASSERT(src & SLJIT_MEM); + FAIL_IF(emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); + return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)); + } + + return SLJIT_SUCCESS; +} + +int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type) +{ + int reg; + sljit_uw cc; + + CHECK_ERROR(); + check_sljit_emit_cond_value(compiler, op, dst, dstw, type); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + cc = get_cc(type); + if (GET_OPCODE(op) == SLJIT_OR) { + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(ORR_DP, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc); + if (op & SLJIT_SET_E) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))); + return SLJIT_SUCCESS; + } + + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, SRC2_IMM | 0)); + EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->skip_checks = 1; +#endif + return emit_op(compiler, op, ALLOW_IMM, dst, dstw, TMP_REG1, 0, dst, dstw); + } + + reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2; + + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, SRC2_IMM | 0)); + EMIT_INSTRUCTION((EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc); + + if (reg == TMP_REG2) + return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); + return SLJIT_SUCCESS; +} + +struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value) +{ + struct sljit_const *const_; + int reg; + + CHECK_ERROR_PTR(); + check_sljit_emit_const(compiler, dst, dstw, init_value); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + + reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), init_value)); + compiler->patches++; +#else + PTR_FAIL_IF(emit_imm(compiler, reg, init_value)); +#endif + set_const(const_, compiler); + + if (reg == TMP_REG2 && dst != SLJIT_UNUSED) + if (emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)) + return NULL; + return const_; +} + +void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + inline_set_jump_addr(addr, new_addr, 1); +} + +void sljit_set_const(sljit_uw addr, sljit_w new_constant) +{ + inline_set_const(addr, new_constant, 1); +} diff --git a/sljit/sljitNativeMIPS_32.c b/sljit/sljitNativeMIPS_32.c new file mode 100644 index 0000000..c025073 --- /dev/null +++ b/sljit/sljitNativeMIPS_32.c @@ -0,0 +1,366 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* mips 32-bit arch dependent functions. */ + +static int load_immediate(struct sljit_compiler *compiler, int dst_ar, sljit_w imm) +{ + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + if (imm < 0 && imm >= SIMM_MIN) + return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar)); + return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS; +} + +#define EMIT_LOGICAL(op_imm, op_norm) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \ + } \ + else { \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \ + } + +#define EMIT_SHIFT(op_imm, op_norm) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ + } \ + else { \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_inst(compiler, op_norm | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_inst(compiler, op_norm | S(src2) | T(src1) | D(dst), DR(dst))); \ + } + +static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, int flags, + int dst, int src1, sljit_w src2) +{ + int overflow_ra = 0; + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_O) { + FAIL_IF(push_inst(compiler, SRL | T(src1) | DA(TMP_EREG1) | SH_IMM(31), TMP_EREG1)); + if (src2 < 0) + FAIL_IF(push_inst(compiler, XORI | SA(TMP_EREG1) | TA(TMP_EREG1) | IMM(1), TMP_EREG1)); + } + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + if (op & SLJIT_SET_C) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); + else { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); + } + } + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); + if (op & SLJIT_SET_O) { + FAIL_IF(push_inst(compiler, SRL | T(dst) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); + if (src2 < 0) + FAIL_IF(push_inst(compiler, XORI | SA(OVERFLOW_FLAG) | TA(OVERFLOW_FLAG) | IMM(1), OVERFLOW_FLAG)); + } + } + else { + if (op & SLJIT_SET_O) { + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SRL | TA(TMP_EREG1) | DA(TMP_EREG1) | SH_IMM(31), TMP_EREG1)); + if (src1 != dst) + overflow_ra = DR(src1); + else if (src2 != dst) + overflow_ra = DR(src2); + else { + /* Rare ocasion. */ + FAIL_IF(push_inst(compiler, ADDU | S(src1) | TA(0) | DA(TMP_EREG2), TMP_EREG2)); + overflow_ra = TMP_EREG2; + } + } + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG)); + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); + if (op & SLJIT_SET_O) { + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(overflow_ra) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); + } + } + + /* a + b >= a | b (otherwise, the carry should be set to 1). */ + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); + if (op & SLJIT_SET_O) + return push_inst(compiler, MOVN | SA(0) | TA(TMP_EREG1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); + return SLJIT_SUCCESS; + + case SLJIT_ADDC: + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_C) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); + else { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(TMP_EREG1) | IMM(src2), TMP_EREG1)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); + } + } + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); + } else { + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); + } + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(TMP_EREG1) | DA(TMP_EREG1), TMP_EREG1)); + + FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); + if (!(op & SLJIT_SET_C)) + return SLJIT_SUCCESS; + + /* Set TMP_EREG2 (dst == 0) && (ULESS_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTIU | S(dst) | TA(TMP_EREG2) | IMM(1), TMP_EREG2)); + FAIL_IF(push_inst(compiler, AND | SA(TMP_EREG2) | TA(ULESS_FLAG) | DA(TMP_EREG2), TMP_EREG2)); + /* Set carry flag. */ + return push_inst(compiler, OR | SA(TMP_EREG2) | TA(TMP_EREG1) | DA(ULESS_FLAG), ULESS_FLAG); + + case SLJIT_SUB: + if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_S | SLJIT_SET_U)) || src2 == SIMM_MIN)) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_O) { + FAIL_IF(push_inst(compiler, SRL | T(src1) | DA(TMP_EREG1) | SH_IMM(31), TMP_EREG1)); + if (src2 < 0) + FAIL_IF(push_inst(compiler, XORI | SA(TMP_EREG1) | TA(TMP_EREG1) | IMM(1), TMP_EREG1)); + if (src1 != dst) + overflow_ra = DR(src1); + else { + /* Rare ocasion. */ + FAIL_IF(push_inst(compiler, ADDU | S(src1) | TA(0) | DA(TMP_EREG2), TMP_EREG2)); + overflow_ra = TMP_EREG2; + } + } + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (op & SLJIT_SET_O) { + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, SRL | TA(TMP_EREG1) | DA(TMP_EREG1) | SH_IMM(31), TMP_EREG1)); + if (src1 != dst) + overflow_ra = DR(src1); + else { + /* Rare ocasion. */ + FAIL_IF(push_inst(compiler, ADDU | S(src1) | TA(0) | DA(TMP_EREG2), TMP_EREG2)); + overflow_ra = TMP_EREG2; + } + } + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (op & (SLJIT_SET_U | SLJIT_SET_C)) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG)); + if (op & SLJIT_SET_U) + FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG)); + if (op & SLJIT_SET_S) { + FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG)); + FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG)); + } + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_C)) + FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (op & SLJIT_SET_O) { + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(overflow_ra) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG)); + return push_inst(compiler, MOVZ | SA(0) | TA(TMP_EREG1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); + } + return SLJIT_SUCCESS; + + case SLJIT_SUBC: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(TMP_EREG1) | IMM(-src2), TMP_EREG1)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(TMP_EREG1), TMP_EREG1)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, MOVZ | SA(ULESS_FLAG) | T(dst) | DA(TMP_EREG1), TMP_EREG1)); + + FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); + + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, ADDU | SA(TMP_EREG1) | TA(0) | DA(ULESS_FLAG), ULESS_FLAG)); + + return SLJIT_SUCCESS; + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & SRC2_IMM)); + if (!(op & SLJIT_SET_O)) + return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); + FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MFHI | DA(TMP_EREG1), TMP_EREG1)); + FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); + FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(TMP_EREG2) | SH_IMM(31), TMP_EREG2)); + return push_inst(compiler, SUBU | SA(TMP_EREG1) | TA(TMP_EREG2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); + + case SLJIT_AND: + EMIT_LOGICAL(ANDI, AND); + return SLJIT_SUCCESS; + + case SLJIT_OR: + EMIT_LOGICAL(ORI, OR); + return SLJIT_SUCCESS; + + case SLJIT_XOR: + EMIT_LOGICAL(XORI, XOR); + return SLJIT_SUCCESS; + + case SLJIT_SHL: + EMIT_SHIFT(SLL, SLLV); + return SLJIT_SUCCESS; + + case SLJIT_LSHR: + EMIT_SHIFT(SRL, SRLV); + return SLJIT_SUCCESS; + + case SLJIT_ASHR: + EMIT_SHIFT(SRA, SRAV); + return SLJIT_SUCCESS; + + case SLJIT_MOV: + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + SLJIT_ASSERT(src1 == TMP_REG1); + if (dst != src2) + return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SB) + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SH) + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst))); + return SLJIT_SUCCESS; + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst))); + return SLJIT_SUCCESS; + } + + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE int emit_const(struct sljit_compiler *compiler, int reg, sljit_w init_value) +{ + FAIL_IF(push_inst(compiler, LUI | T(reg) | IMM(init_value >> 16), DR(reg))); + return push_inst(compiler, ORI | S(reg) | T(reg) | IMM(init_value), DR(reg)); +} + +void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} + +void sljit_set_const(sljit_uw addr, sljit_w new_constant) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} diff --git a/sljit/sljitNativeMIPS_common.c b/sljit/sljitNativeMIPS_common.c new file mode 100644 index 0000000..1f10c06 --- /dev/null +++ b/sljit/sljitNativeMIPS_common.c @@ -0,0 +1,1705 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_CONST char* sljit_get_platform_name() +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return "mips-32"; +#else +#error "mips-64 is not yet supported" +#endif +} + +/* Length of an instruction word + Both for mips-32 and mips-64 */ +typedef sljit_ui sljit_ins; + +#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) +#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) +#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) +#define REAL_STACK_PTR (SLJIT_NO_REGISTERS + 4) + +/* TMP_EREG1 is used mainly for literal encoding on 64 bit. */ +#define TMP_EREG1 24 +#define TMP_EREG2 25 + +/* Flags are keept in volatile registers. */ +#define EQUAL_FLAG 7 +/* And carry flag as well. */ +#define ULESS_FLAG 11 +#define UGREATER_FLAG 12 +#define LESS_FLAG 13 +#define GREATER_FLAG 14 +#define OVERFLOW_FLAG 15 + +#define UNORD_BIT 1 +#define EQUAL_BIT 2 +#define LESS_BIT 3 +#define GREATER_BIT 4 + +#define TMP_FREG1 (SLJIT_FLOAT_REG4 + 1) +#define TMP_FREG2 (SLJIT_FLOAT_REG4 + 2) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define S(s) (reg_map[s] << 21) +#define T(t) (reg_map[t] << 16) +#define D(d) (reg_map[d] << 11) +/* Absolute registers. */ +#define SA(s) ((s) << 21) +#define TA(t) ((t) << 16) +#define DA(d) ((d) << 11) +#define FT(t) ((t) << (16 + 1)) +#define FS(s) ((s) << (11 + 1)) +#define FD(d) ((d) << (6 + 1)) +#define IMM(imm) ((imm) & 0xffff) +#define SH_IMM(imm) ((imm & 0x1f) << 6) + +#define DR(dr) (reg_map[dr]) +#define HI(opcode) ((opcode) << 26) +#define LO(opcode) (opcode) +#define FMT_D (17 << 21) + +#define ABS_D (HI(17) | FMT_D | LO(5)) +#define ADD_D (HI(17) | FMT_D | LO(0)) +#define ADDU (HI(0) | LO(33)) +#define ADDIU (HI(9)) +#define AND (HI(0) | LO(36)) +#define ANDI (HI(12)) +#define B (HI(4)) +#define BAL (HI(1) | (17 << 16)) +#define BC1F (HI(17) | (8 << 21)) +#define BC1T (HI(17) | (8 << 21) | (1 << 16)) +#define BEQ (HI(4)) +#define BGEZ (HI(1) | (1 << 16)) +#define BGTZ (HI(7)) +#define BLEZ (HI(6)) +#define BLTZ (HI(1) | (0 << 16)) +#define BNE (HI(5)) +#define BREAK (HI(0) | LO(13)) +#define C_UN_D (HI(17) | FMT_D | LO(49)) +#define C_UEQ_D (HI(17) | FMT_D | LO(51)) +#define C_ULT_D (HI(17) | FMT_D | LO(53)) +#define CLZ (HI(28) | LO(32)) +#define DIV_D (HI(17) | FMT_D | LO(3)) +#define EXT (HI(31) | LO(0)) +#define J (HI(2)) +#define JAL (HI(3)) +#define JALR (HI(0) | LO(9)) +#define JR (HI(0) | LO(8)) +#define LD (HI(55)) +#define LDC1 (HI(53)) +#define LUI (HI(15)) +#define LW (HI(35)) +#define NEG_D (HI(17) | FMT_D | LO(7)) +#define MFHI (HI(0) | LO(16)) +#define MFLO (HI(0) | LO(18)) +#define MOV_D (HI(17) | FMT_D | LO(6)) +#define CFC1 (HI(17) | (2 << 21)) +#define MOVN (HI(0) | LO(11)) +#define MOVZ (HI(0) | LO(10)) +#define MUL (HI(28) | LO(2)) +#define MUL_D (HI(17) | FMT_D | LO(2)) +#define MULT (HI(0) | LO(24)) +#define NOP (HI(0) | LO(0)) +#define NOR (HI(0) | LO(39)) +#define OR (HI(0) | LO(37)) +#define ORI (HI(13)) +#define SD (HI(63)) +#define SDC1 (HI(61)) +#define SEB (HI(31) | (16 << 6) | LO(32)) +#define SEH (HI(31) | (24 << 6) | LO(32)) +#define SLT (HI(0) | LO(42)) +#define SLTI (HI(10)) +#define SLTIU (HI(11)) +#define SLTU (HI(0) | LO(43)) +#define SLL (HI(0) | LO(0)) +#define SLLV (HI(0) | LO(4)) +#define SRL (HI(0) | LO(2)) +#define SRLV (HI(0) | LO(6)) +#define SRA (HI(0) | LO(3)) +#define SRAV (HI(0) | LO(7)) +#define SUB_D (HI(17) | FMT_D | LO(1)) +#define SUBU (HI(0) | LO(35)) +#define SW (HI(43)) +#define XOR (HI(0) | LO(38)) +#define XORI (HI(14)) + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define ADDU_W ADDU +#define ADDIU_W ADDIU +#define EXT_W EXT +#define SLL_W SLL +#define SUBU_W SUBU +#else +#define ADDU_W DADDU +#define ADDIU_W DADDIU +#define EXT_W DEXT +#define SLL_W DSLL +#define SUBU_W DSUBU +#endif + +#define SIMM_MAX (0x7fff) +#define SIMM_MIN (-0x8000) +#define UIMM_MAX (0xffff) + +static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = { + 0, 2, 5, 6, 3, 4, 17, 18, 19, 20, 21, 16, 8, 9, 10, 29 +}; + +/* dest_reg is the absolute name of the register + Useful for reordering instructions in the delay slot. */ +static int push_inst(struct sljit_compiler *compiler, sljit_ins ins, int delay_slot) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + compiler->delay_slot = delay_slot; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_ins invert_branch(int flags) +{ + return (flags & IS_BIT26_COND) ? (1 << 26) : (1 << 16); +} + +static SLJIT_INLINE sljit_ins* optimize_jump(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +{ + sljit_w diff; + sljit_uw target_addr; + sljit_ins *inst; + sljit_ins saved_inst; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return code_ptr; + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size); + } + inst = (sljit_ins*)jump->addr; + if (jump->flags & IS_COND) + inst--; + + /* B instructions. */ + if (jump->flags & IS_MOVABLE) { + diff = ((sljit_w)target_addr - (sljit_w)(inst)) >> 2; + if (diff <= SIMM_MAX && diff >= SIMM_MIN) { + jump->flags |= PATCH_B; + + if (!(jump->flags & IS_COND)) { + inst[0] = inst[-1]; + inst[-1] = (jump->flags & IS_JAL) ? BAL : B; + jump->addr -= sizeof(sljit_ins); + return inst; + } + saved_inst = inst[0]; + inst[0] = inst[-1]; + inst[-1] = saved_inst ^ invert_branch(jump->flags); + jump->addr -= 2 * sizeof(sljit_ins); + return inst; + } + } + + diff = ((sljit_w)target_addr - (sljit_w)(inst + 1)) >> 2; + if (diff <= SIMM_MAX && diff >= SIMM_MIN) { + jump->flags |= PATCH_B; + + if (!(jump->flags & IS_COND)) { + inst[0] = (jump->flags & IS_JAL) ? BAL : B; + inst[1] = NOP; + return inst + 1; + } + inst[0] = inst[0] ^ invert_branch(jump->flags); + inst[1] = NOP; + jump->addr -= sizeof(sljit_ins); + return inst + 1; + } + + if (jump->flags & IS_COND) { + if ((target_addr & ~0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = (inst[0] & 0xffff0000) | 3; + inst[1] = NOP; + inst[2] = J; + inst[3] = NOP; + jump->addr += sizeof(sljit_ins); + return inst + 3; + } + return code_ptr; + } + + /* J instuctions. */ + if (jump->flags & IS_MOVABLE) { + if ((target_addr & ~0xfffffff) == (jump->addr & ~0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = inst[-1]; + inst[-1] = (jump->flags & IS_JAL) ? JAL : J; + jump->addr -= sizeof(sljit_ins); + return inst; + } + } + + if ((target_addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = (jump->flags & IS_JAL) ? JAL : J; + inst[1] = NOP; + return inst + 1; + } + + return code_ptr; +} + +#ifdef __GNUC__ +static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_ptr) +{ + SLJIT_CACHE_FLUSH(code, code_ptr); +} +#endif + +void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + check_sljit_generate_code(compiler); + reverse_buf(compiler); + + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + /* Just recording the address. */ + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + jump->addr = (sljit_uw)(code_ptr - 3); +#else + jump->addr = (sljit_uw)(code_ptr - 6); +#endif + code_ptr = optimize_jump(jump, code_ptr, code); + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + /* Just recording the address. */ + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + code_ptr ++; + word_count ++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code_ptr - code <= (int)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins*)jump->addr; + + if (jump->flags & PATCH_B) { + addr = (sljit_w)(addr - (jump->addr + sizeof(sljit_ins))) >> 2; + SLJIT_ASSERT((sljit_w)addr <= SIMM_MAX && (sljit_w)addr >= SIMM_MIN); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | (addr & 0xffff); + break; + } + if (jump->flags & PATCH_J) { + SLJIT_ASSERT((addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)); + buf_ptr[0] |= (addr >> 2) & 0x0cffffff; + break; + } + + /* Set the fields of immediate loads. */ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); +#else + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff); + buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff); +#endif + } while (0); + jump = jump->next; + } + + compiler->error = SLJIT_ERR_COMPILED; +#ifndef __GNUC__ + SLJIT_CACHE_FLUSH(code, code_ptr); +#else + /* GCC workaround for invalid code generation with -O2. */ + sljit_cache_flush(code, code_ptr); +#endif + return code; +} + +/* Creates an index in data_transfer_insts array. */ +#define WORD_DATA 0x00 +#define BYTE_DATA 0x01 +#define HALF_DATA 0x02 +#define INT_DATA 0x03 +#define SIGNED_DATA 0x04 +#define LOAD_DATA 0x08 + +#define MEM_MASK 0x0f + +#define WRITE_BACK 0x00010 +#define ARG_TEST 0x00020 +#define CUMULATIVE_OP 0x00040 +#define LOGICAL_OP 0x00080 +#define IMM_OP 0x00100 +#define SRC2_IMM 0x00200 + +#define UNUSED_DEST 0x00400 +#define REG_DEST 0x00800 +#define REG1_SOURCE 0x01000 +#define REG2_SOURCE 0x02000 +#define SLOW_SRC1 0x04000 +#define SLOW_SRC2 0x08000 +#define SLOW_DEST 0x10000 + +/* Only these flags are set. UNUSED_DEST is not set when no flags should be set. */ +#define CHECK_FLAGS(list) \ + (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list)))) + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#include "sljitNativeMIPS_32.c" +#else +#include "sljitNativeMIPS_64.c" +#endif + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define STACK_STORE SW +#define STACK_LOAD LW +#else +#define STACK_STORE SD +#define STACK_LOAD LD +#endif + +static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w); + +int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + sljit_ins base; + + CHECK_ERROR(); + check_sljit_emit_enter(compiler, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + + compiler->has_locals = local_size > 0; + local_size += (generals + 2 + 4) * sizeof(sljit_w); + local_size = (local_size + 15) & ~0xf; + compiler->local_size = local_size; + + if (local_size <= SIMM_MAX) { + /* Frequent case. */ + FAIL_IF(push_inst(compiler, ADDIU_W | S(REAL_STACK_PTR) | T(REAL_STACK_PTR) | IMM(-local_size), DR(REAL_STACK_PTR))); + base = S(REAL_STACK_PTR); + } + else { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); + FAIL_IF(push_inst(compiler, ADDU_W | S(REAL_STACK_PTR) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SUBU_W | S(REAL_STACK_PTR) | T(TMP_REG1) | D(REAL_STACK_PTR), DR(REAL_STACK_PTR))); + base = S(TMP_REG2); + local_size = 0; + } + + FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(31) | IMM(local_size - 1 * (int)sizeof(sljit_w)), MOVABLE_INS)); + if (compiler->has_locals) + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_LOCALS_REG) | IMM(local_size - 2 * (int)sizeof(sljit_w)), MOVABLE_INS)); + if (generals >= 1) + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_GENERAL_REG1) | IMM(local_size - 3 * (int)sizeof(sljit_w)), MOVABLE_INS)); + if (generals >= 2) + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_GENERAL_REG2) | IMM(local_size - 4 * (int)sizeof(sljit_w)), MOVABLE_INS)); + if (generals >= 3) + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_GENERAL_REG3) | IMM(local_size - 5 * (int)sizeof(sljit_w)), MOVABLE_INS)); + if (generals >= 4) + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_GENERAL_EREG1) | IMM(local_size - 6 * (int)sizeof(sljit_w)), MOVABLE_INS)); + if (generals >= 5) + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_GENERAL_EREG2) | IMM(local_size - 7 * (int)sizeof(sljit_w)), MOVABLE_INS)); + + if (compiler->has_locals) + FAIL_IF(push_inst(compiler, ADDIU_W | S(REAL_STACK_PTR) | T(SLJIT_LOCALS_REG) | IMM(4 * sizeof(sljit_w)), DR(SLJIT_LOCALS_REG))); + + if (args >= 1) + FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_GENERAL_REG1), DR(SLJIT_GENERAL_REG1))); + if (args >= 2) + FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_GENERAL_REG2), DR(SLJIT_GENERAL_REG2))); + if (args >= 3) + FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_GENERAL_REG3), DR(SLJIT_GENERAL_REG3))); + + return SLJIT_SUCCESS; +} + +void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + CHECK_ERROR_VOID(); + check_sljit_fake_enter(compiler, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + + compiler->has_locals = local_size > 0; + local_size += (generals + 2 + 4) * sizeof(sljit_w); + compiler->local_size = (local_size + 15) & ~0xf; +} + +int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + int local_size; + sljit_ins base; + + CHECK_ERROR(); + check_sljit_emit_return(compiler, src, srcw); + + local_size = compiler->local_size; + + if (src != SLJIT_UNUSED && src != SLJIT_RETURN_REG) + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, SLJIT_RETURN_REG, 0, TMP_REG1, 0, src, srcw)); + + if (local_size <= SIMM_MAX) + base = S(REAL_STACK_PTR); + else { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); + FAIL_IF(push_inst(compiler, ADDU_W | S(REAL_STACK_PTR) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1))); + base = S(TMP_REG1); + local_size = 0; + } + + FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(31) | IMM(local_size - 1 * (int)sizeof(sljit_w)), 31)); + if (compiler->generals >= 5) + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_GENERAL_EREG2) | IMM(local_size - 7 * (int)sizeof(sljit_w)), DR(SLJIT_GENERAL_EREG2))); + if (compiler->generals >= 4) + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_GENERAL_EREG1) | IMM(local_size - 6 * (int)sizeof(sljit_w)), DR(SLJIT_GENERAL_EREG1))); + if (compiler->generals >= 3) + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_GENERAL_REG3) | IMM(local_size - 5 * (int)sizeof(sljit_w)), DR(SLJIT_GENERAL_REG3))); + if (compiler->generals >= 2) + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_GENERAL_REG2) | IMM(local_size - 4 * (int)sizeof(sljit_w)), DR(SLJIT_GENERAL_REG2))); + if (compiler->generals >= 1) + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_GENERAL_REG1) | IMM(local_size - 3 * (int)sizeof(sljit_w)), DR(SLJIT_GENERAL_REG1))); + if (compiler->has_locals) + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_LOCALS_REG) | IMM(local_size - 2 * (int)sizeof(sljit_w)), DR(SLJIT_LOCALS_REG))); + + FAIL_IF(push_inst(compiler, JR | SA(31), UNMOVABLE_INS)); + if (compiler->local_size <= SIMM_MAX) + return push_inst(compiler, ADDIU_W | S(REAL_STACK_PTR) | T(REAL_STACK_PTR) | IMM(compiler->local_size), UNMOVABLE_INS); + else + return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(REAL_STACK_PTR), UNMOVABLE_INS); +} + +#undef STACK_STORE +#undef STACK_LOAD + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define ARCH_DEPEND(a, b) a +#else +#define ARCH_DEPEND(a, b) b +#endif + +static SLJIT_CONST sljit_ins data_transfer_insts[16] = { +/* s u w */ ARCH_DEPEND(HI(43) /* sw */, HI(63) /* sd */), +/* s u b */ HI(40) /* sb */, +/* s u h */ HI(41) /* sh*/, +/* s u i */ HI(43) /* sw */, + +/* s s w */ ARCH_DEPEND(HI(43) /* sw */, HI(63) /* sd */), +/* s s b */ HI(40) /* sb */, +/* s s h */ HI(41) /* sh*/, +/* s s i */ HI(43) /* sw */, + +/* l u w */ ARCH_DEPEND(HI(35) /* lw */, HI(55) /* ld */), +/* l u b */ HI(36) /* lbu */, +/* l u h */ HI(37) /* lhu */, +/* l u i */ ARCH_DEPEND(HI(35) /* lw */, HI(39) /* lwu */), + +/* l s w */ ARCH_DEPEND(HI(35) /* lw */, HI(55) /* ld */), +/* l s b */ HI(32) /* lb */, +/* l s h */ HI(33) /* lh */, +/* l s i */ HI(35) /* lw */, +}; + +/* reg_ar is an absoulute register! */ + +/* Can perform an operation using at most 1 instruction. */ +static int getput_arg_fast(struct sljit_compiler *compiler, int flags, int reg_ar, int arg, sljit_w argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (!(flags & WRITE_BACK) && !(arg & 0xf0) && argw <= SIMM_MAX && argw >= SIMM_MIN) { + /* Works for both absoulte and relative addresses. */ + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(arg & 0xf) | TA(reg_ar) | IMM(argw), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS)); + return -1; + } + return (flags & ARG_TEST) ? SLJIT_SUCCESS : 0; +} + +/* See getput_arg below. + Note: can_cache is called only for binary operators. Those + operators always uses word arguments without write back. */ +static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw) +{ + if (!(next_arg & SLJIT_MEM)) + return 0; + + /* Simple operation except for updates. */ + if (arg & 0xf0) { + argw &= 0x3; + next_argw &= 0x3; + if (argw && argw == next_argw && (arg == next_arg || (arg & 0xf0) == (next_arg & 0xf0))) + return 1; + return 0; + } + + if (arg == next_arg) { + if (((sljit_uw)(next_argw - argw) <= SIMM_MAX && (sljit_uw)(next_argw - argw) >= SIMM_MIN)) + return 1; + return 0; + } + + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static int getput_arg(struct sljit_compiler *compiler, int flags, int reg_ar, int arg, sljit_w argw, int next_arg, sljit_w next_argw) +{ + int tmp_ar; + int base; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + tmp_ar = (flags & LOAD_DATA) ? reg_ar : DR(TMP_REG3); + base = arg & 0xf; + + if (SLJIT_UNLIKELY(arg & 0xf0)) { + argw &= 0x3; + if ((flags & WRITE_BACK) && reg_ar == DR(base)) { + SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar); + FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + reg_ar = DR(TMP_REG1); + } + + /* Using the cache. */ + if (argw == compiler->cache_argw) { + if (!(flags & WRITE_BACK)) { + if (arg == compiler->cache_arg) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + if ((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg) { + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + } + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + } + } + else { + if ((SLJIT_MEM | (arg & 0xf0)) == compiler->cache_arg) { + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + } + } + } + + if (SLJIT_UNLIKELY(argw)) { + compiler->cache_arg = SLJIT_MEM | (arg & 0xf0); + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, SLL_W | T((arg >> 4) & 0xf) | D(TMP_REG3) | SH_IMM(argw), DR(TMP_REG3))); + } + + if (!(flags & WRITE_BACK)) { + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? ((arg >> 4) & 0xf) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); + tmp_ar = DR(TMP_REG3); + } + else + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? ((arg >> 4) & 0xf) : TMP_REG3) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + } + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? ((arg >> 4) & 0xf) : TMP_REG3) | D(base), DR(base))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + } + + if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) { + /* Update only applies if a base register exists. */ + if (reg_ar == DR(base)) { + SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar); + if (argw <= SIMM_MAX && argw >= SIMM_MIN) { + FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar) | IMM(argw), MOVABLE_INS)); + if (argw) + return push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base)); + return SLJIT_SUCCESS; + } + FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + reg_ar = DR(TMP_REG1); + } + + if (argw <= SIMM_MAX && argw >= SIMM_MIN) { + if (argw) + FAIL_IF(push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base))); + } + else { + if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { + if (argw != compiler->cache_argw) { + FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); + compiler->cache_argw = argw; + } + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base))); + } + else { + compiler->cache_arg = SLJIT_MEM; + compiler->cache_argw = argw; + FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw)); + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base))); + } + } + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + } + + if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { + if (argw != compiler->cache_argw) { + FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); + compiler->cache_argw = argw; + } + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + } + + if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { + if (argw != compiler->cache_argw) + FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); + } + else { + compiler->cache_arg = SLJIT_MEM; + FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw)); + } + compiler->cache_argw = argw; + + if (!base) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + + if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) { + compiler->cache_arg = arg; + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | D(TMP_REG3), DR(TMP_REG3))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); + } + + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), (flags & LOAD_DATA) ? reg_ar : MOVABLE_INS); +} + +static SLJIT_INLINE int emit_op_mem(struct sljit_compiler *compiler, int flags, int reg_ar, int arg, sljit_w argw) +{ + if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) + return compiler->error; + compiler->cache_arg = 0; + compiler->cache_argw = 0; + return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0); +} + +static int emit_op(struct sljit_compiler *compiler, int op, int flags, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + int dst_r = TMP_REG2; + int src1_r; + sljit_w src2_r = 0; + int sugg_src2_r = TMP_REG2; + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REG3) { + dst_r = dst; + flags |= REG_DEST; + if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) + sugg_src2_r = dst_r; + } + else if (dst == SLJIT_UNUSED) { + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) + return SLJIT_SUCCESS; + if (GET_FLAGS(op)) + flags |= UNUSED_DEST; + } + else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw)) + flags |= SLOW_DEST; + + if (flags & IMM_OP) { + if ((src2 & SLJIT_IMM) && src2w) { + if ((!(flags & LOGICAL_OP) && (src2w <= SIMM_MAX && src2w >= SIMM_MIN)) + || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_MAX))) { + flags |= SRC2_IMM; + src2_r = src2w; + } + } + if ((src1 & SLJIT_IMM) && src1w && (flags & CUMULATIVE_OP) && !(flags & SRC2_IMM)) { + if ((!(flags & LOGICAL_OP) && (src1w <= SIMM_MAX && src1w >= SIMM_MIN)) + || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_MAX))) { + flags |= SRC2_IMM; + src2_r = src1w; + + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ + } + } + } + + /* Source 1. */ + if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= TMP_REG3) { + src1_r = src1; + flags |= REG1_SOURCE; + } + else if (src1 & SLJIT_IMM) { + if (src1w) { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); + src1_r = TMP_REG1; + } + else + src1_r = 0; + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REG3) { + src2_r = src2; + flags |= REG2_SOURCE; + if (!(flags & REG_DEST) && GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) + dst_r = src2_r; + } + else if (src2 & SLJIT_IMM) { + if (!(flags & SRC2_IMM)) { + if (src2w || (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI)) { + FAIL_IF(load_immediate(compiler, DR(sugg_src2_r), src2w)); + src2_r = sugg_src2_r; + } + else + src2_r = 0; + } + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + src2_r = sugg_src2_r; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + SLJIT_ASSERT(src2_r == TMP_REG2); + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w, dst, dstw)); + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, flags, DR(dst_r), dst, dstw); + return compiler->error; + } + return getput_arg(compiler, flags, DR(dst_r), dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +int sljit_emit_op0(struct sljit_compiler *compiler, int op) +{ + CHECK_ERROR(); + check_sljit_emit_op0(compiler, op); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, BREAK, UNMOVABLE_INS); + case SLJIT_NOP: + return push_inst(compiler, NOP, UNMOVABLE_INS); + } + + return SLJIT_SUCCESS; +} + +int sljit_emit_op1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + #define inp_flags 0 +#endif + + CHECK_ERROR(); + check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + + SLJIT_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + return emit_op(compiler, SLJIT_MOV, inp_flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_UI: + return emit_op(compiler, SLJIT_MOV_UI, inp_flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_SI: + return emit_op(compiler, SLJIT_MOV_SI, inp_flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_UB: + return emit_op(compiler, SLJIT_MOV_UB, inp_flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw); + + case SLJIT_MOV_SB: + return emit_op(compiler, SLJIT_MOV_SB, inp_flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw); + + case SLJIT_MOV_UH: + return emit_op(compiler, SLJIT_MOV_UH, inp_flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw); + + case SLJIT_MOV_SH: + return emit_op(compiler, SLJIT_MOV_SH, inp_flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw); + + case SLJIT_MOVU: + return emit_op(compiler, SLJIT_MOV, inp_flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_UI: + return emit_op(compiler, SLJIT_MOV_UI, inp_flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_SI: + return emit_op(compiler, SLJIT_MOV_SI, inp_flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_UB: + return emit_op(compiler, SLJIT_MOV_UB, inp_flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw); + + case SLJIT_MOVU_SB: + return emit_op(compiler, SLJIT_MOV_SB, inp_flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw); + + case SLJIT_MOVU_UH: + return emit_op(compiler, SLJIT_MOV_UH, inp_flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw); + + case SLJIT_MOVU_SH: + return emit_op(compiler, SLJIT_MOV_SH, inp_flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw); + + case SLJIT_NOT: + return emit_op(compiler, op, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_NEG: + return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), inp_flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); + + case SLJIT_CLZ: + return emit_op(compiler, op, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw); + } + + return SLJIT_SUCCESS; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + #undef inp_flags +#endif +} + +int sljit_emit_op2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + #define inp_flags 0 +#endif + + CHECK_ERROR(); + check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + return emit_op(compiler, op, inp_flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + case SLJIT_SUBC: + return emit_op(compiler, op, inp_flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + return emit_op(compiler, op, inp_flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, inp_flags | CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (src2 & SLJIT_IMM) + src2w &= 0x1f; +#else + if (src2 & SLJIT_IMM) + src2w &= 0x3f; +#endif + return emit_op(compiler, op, inp_flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + #undef inp_flags +#endif +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +int sljit_is_fpu_available(void) +{ +#if 0 + sljit_w fir; + asm ("cfc1 %0, $0" : "=r"(fir)); + return (fir >> 22) & 0x1; +#endif + /* Qemu says fir is 0 by default. */ + return 1; +} + +static int emit_fpu_data_transfer(struct sljit_compiler *compiler, int fpu_reg, int load, int arg, sljit_w argw) +{ + int hi_reg; + + SLJIT_ASSERT(arg & SLJIT_MEM); + + /* Fast loads and stores. */ + if (!(arg & 0xf0)) { + /* Both for (arg & 0xf) == SLJIT_UNUSED and (arg & 0xf) != SLJIT_UNUSED. */ + if (argw <= SIMM_MAX && argw >= SIMM_MIN) + return push_inst(compiler, (load ? LDC1 : SDC1) | S(arg & 0xf) | FT(fpu_reg) | IMM(argw), MOVABLE_INS); + } + + if (arg & 0xf0) { + argw &= 0x3; + hi_reg = (arg >> 4) & 0xf; + if (argw) { + FAIL_IF(push_inst(compiler, SLL_W | T(hi_reg) | D(TMP_REG1) | SH_IMM(argw), DR(TMP_REG1))); + hi_reg = TMP_REG1; + } + FAIL_IF(push_inst(compiler, ADDU_W | S(hi_reg) | T(arg & 0xf) | D(TMP_REG1), DR(TMP_REG1))); + return push_inst(compiler, (load ? LDC1 : SDC1) | S(TMP_REG1) | FT(fpu_reg) | IMM(0), MOVABLE_INS); + } + + /* Use cache. */ + if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) + return push_inst(compiler, (load ? LDC1 : SDC1) | S(TMP_REG3) | FT(fpu_reg) | IMM(argw - compiler->cache_argw), MOVABLE_INS); + + /* Put value to cache. */ + compiler->cache_arg = arg; + compiler->cache_argw = argw; + + FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw)); + if (arg & 0xf) + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(arg & 0xf) | D(TMP_REG3), DR(TMP_REG3))); + return push_inst(compiler, (load ? LDC1 : SDC1) | S(TMP_REG3) | FT(fpu_reg) | IMM(0), MOVABLE_INS); +} + +int sljit_emit_fop1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + int dst_fr; + + CHECK_ERROR(); + check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (GET_OPCODE(op) == SLJIT_FCMP) { + if (dst > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, dst, dstw)); + dst = TMP_FREG1; + } + if (src > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src, srcw)); + src = TMP_FREG2; + } + + /* src and dst are swapped. */ + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, C_UEQ_D | FT(src) | FS(dst) | (EQUAL_BIT << 8), FCSR_FCC + EQUAL_BIT)); + if (op & SLJIT_SET_S) { + FAIL_IF(push_inst(compiler, C_ULT_D | FT(src) | FS(dst) | (LESS_BIT << 8), FCSR_FCC + LESS_BIT)); + FAIL_IF(push_inst(compiler, C_ULT_D | FT(dst) | FS(src) | (GREATER_BIT << 8), FCSR_FCC + GREATER_BIT)); + } + return push_inst(compiler, C_UN_D | FT(src) | FS(dst) | (UNORD_BIT << 8), FCSR_FCC + UNORD_BIT); + } + + dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst; + + if (src > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, dst_fr, 1, src, srcw)); + src = dst_fr; + } + + switch (op) { + case SLJIT_FMOV: + if (src != dst_fr && dst_fr != TMP_FREG1) + FAIL_IF(push_inst(compiler, MOV_D | FS(src) | FD(dst_fr), MOVABLE_INS)); + break; + case SLJIT_FNEG: + FAIL_IF(push_inst(compiler, NEG_D | FS(src) | FD(dst_fr), MOVABLE_INS)); + break; + case SLJIT_FABS: + FAIL_IF(push_inst(compiler, ABS_D | FS(src) | FD(dst_fr), MOVABLE_INS)); + break; + } + + if (dst_fr == TMP_FREG1) + FAIL_IF(emit_fpu_data_transfer(compiler, src, 0, dst, dstw)); + + return SLJIT_SUCCESS; +} + +int sljit_emit_fop2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + int dst_fr; + + CHECK_ERROR(); + check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst; + + if (src2 > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w)); + src2 = TMP_FREG2; + } + + if (src1 > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, src1, src1w)); + src1 = TMP_FREG1; + } + + switch (op) { + case SLJIT_FADD: + FAIL_IF(push_inst(compiler, ADD_D | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); + break; + + case SLJIT_FSUB: + FAIL_IF(push_inst(compiler, SUB_D | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); + break; + + case SLJIT_FMUL: + FAIL_IF(push_inst(compiler, MUL_D | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); + break; + + case SLJIT_FDIV: + FAIL_IF(push_inst(compiler, DIV_D | FT(src2) | FS(src1) | FD(dst_fr), MOVABLE_INS)); + break; + } + + if (dst_fr == TMP_FREG1) + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 0, dst, dstw)); + + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +{ + CHECK_ERROR(); + check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + + compiler->has_locals = local_size > 0; + local_size += (generals + 2 + 4) * sizeof(sljit_w); + compiler->local_size = (local_size + 15) & ~0xf; + + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) + return push_inst(compiler, ADDU_W | SA(31) | TA(0) | D(dst), DR(dst)); + else if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_DATA, 31, dst, dstw); + return SLJIT_SUCCESS; +} + +int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + CHECK_ERROR(); + check_sljit_emit_fast_return(compiler, src, srcw); + + if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) + FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(31), 31)); + else if (src & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, 31, src, srcw)); + else if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, 31, srcw)); + + FAIL_IF(push_inst(compiler, JR | SA(31), UNMOVABLE_INS)); + return push_inst(compiler, NOP, UNMOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + check_sljit_emit_label(compiler); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + compiler->delay_slot = UNMOVABLE_INS; + return label; +} + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define JUMP_LENGTH 4 +#else +#define JUMP_LENGTH 7 +#endif + +#define BR_Z(src) \ + inst = BEQ | SA(src) | TA(0) | JUMP_LENGTH; \ + flags = IS_BIT26_COND; \ + delay_check = src; + +#define BR_NZ(src) \ + inst = BNE | SA(src) | TA(0) | JUMP_LENGTH; \ + flags = IS_BIT26_COND; \ + delay_check = src; + +#define BR_T(bit) \ + inst = BC1T | (bit << 18) | JUMP_LENGTH; \ + flags = IS_BIT16_COND; \ + delay_check = FCSR_FCC + bit; + +#define BR_F(bit) \ + inst = BC1F | (bit << 18) | JUMP_LENGTH; \ + flags = IS_BIT16_COND; \ + delay_check = FCSR_FCC + bit; + +struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type) +{ + struct sljit_jump *jump; + sljit_ins inst; + int flags = 0; + int delay_check = UNMOVABLE_INS; + + CHECK_ERROR_PTR(); + check_sljit_emit_jump(compiler, type); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + switch (type) { + case SLJIT_C_EQUAL: + BR_NZ(EQUAL_FLAG); + break; + case SLJIT_C_NOT_EQUAL: + BR_Z(EQUAL_FLAG); + break; + case SLJIT_C_LESS: + BR_Z(ULESS_FLAG); + break; + case SLJIT_C_GREATER_EQUAL: + BR_NZ(ULESS_FLAG); + break; + case SLJIT_C_GREATER: + BR_Z(UGREATER_FLAG); + break; + case SLJIT_C_LESS_EQUAL: + BR_NZ(UGREATER_FLAG); + break; + case SLJIT_C_SIG_LESS: + BR_Z(LESS_FLAG); + break; + case SLJIT_C_SIG_GREATER_EQUAL: + BR_NZ(LESS_FLAG); + break; + case SLJIT_C_SIG_GREATER: + BR_Z(GREATER_FLAG); + break; + case SLJIT_C_SIG_LESS_EQUAL: + BR_NZ(GREATER_FLAG); + break; + case SLJIT_C_OVERFLOW: + case SLJIT_C_MUL_OVERFLOW: + BR_Z(OVERFLOW_FLAG); + break; + case SLJIT_C_NOT_OVERFLOW: + case SLJIT_C_MUL_NOT_OVERFLOW: + BR_NZ(OVERFLOW_FLAG); + break; + case SLJIT_C_FLOAT_EQUAL: + BR_F(EQUAL_BIT); + break; + case SLJIT_C_FLOAT_NOT_EQUAL: + BR_T(EQUAL_BIT); + break; + case SLJIT_C_FLOAT_LESS: + BR_F(LESS_BIT); + break; + case SLJIT_C_FLOAT_GREATER_EQUAL: + BR_T(LESS_BIT); + break; + case SLJIT_C_FLOAT_GREATER: + BR_F(GREATER_BIT); + break; + case SLJIT_C_FLOAT_LESS_EQUAL: + BR_T(GREATER_BIT); + break; + case SLJIT_C_FLOAT_NAN: + BR_F(UNORD_BIT); + break; + case SLJIT_C_FLOAT_NOT_NAN: + BR_T(UNORD_BIT); + break; + default: + /* Not conditional branch. */ + inst = 0; + break; + } + + jump->flags |= flags; + if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != delay_check)) + jump->flags |= IS_MOVABLE; + + if (inst) + PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS)); + + if (type >= SLJIT_CALL1) + PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_TEMPORARY_REG1) | TA(0) | DA(4), 4)); + + PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + if (type <= SLJIT_JUMP) + PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); + else { + jump->flags |= IS_JAL; + PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(31), UNMOVABLE_INS)); + } + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + return jump; +} + +#define RESOLVE_IMM1() \ + if (src1 & SLJIT_IMM) { \ + if (src1w) { \ + PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); \ + src1 = TMP_REG1; \ + } \ + else \ + src1 = 0; \ + } + +#define RESOLVE_IMM2() \ + if (src2 & SLJIT_IMM) { \ + if (src2w) { \ + PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG2), src2w)); \ + src2 = TMP_REG2; \ + } \ + else \ + src2 = 0; \ + } + +struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, int type, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + struct sljit_jump *jump; + int flags; + sljit_ins inst; + + CHECK_ERROR_PTR(); + check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + flags = ((type & SLJIT_INT_OP) ? INT_DATA : WORD_DATA) | LOAD_DATA; + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, flags, DR(TMP_REG1), src1, src1w)) + PTR_FAIL_IF(compiler->error); + else + PTR_FAIL_IF(getput_arg(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w)); + src1 = TMP_REG1; + } + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, flags, DR(TMP_REG2), src2, src2w)) + PTR_FAIL_IF(compiler->error); + else + PTR_FAIL_IF(getput_arg(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0)); + src2 = TMP_REG2; + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type <= SLJIT_C_NOT_EQUAL) { + RESOLVE_IMM1(); + RESOLVE_IMM2(); + jump->flags |= IS_BIT26_COND; + if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != DR(src1) && compiler->delay_slot != DR(src2))) + jump->flags |= IS_MOVABLE; + PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_C_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | JUMP_LENGTH, UNMOVABLE_INS)); + } + else if (type >= SLJIT_C_SIG_LESS && (((src1 & SLJIT_IMM) && (src1w == 0)) || ((src2 & SLJIT_IMM) && (src2w == 0)))) { + inst = NOP; + if ((src1 & SLJIT_IMM) && (src1w == 0)) { + RESOLVE_IMM2(); + switch (type) { + case SLJIT_C_SIG_LESS: + inst = BLEZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_C_SIG_GREATER_EQUAL: + inst = BGTZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_C_SIG_GREATER: + inst = BGEZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_C_SIG_LESS_EQUAL: + inst = BLTZ; + jump->flags |= IS_BIT16_COND; + break; + } + src1 = src2; + } + else { + RESOLVE_IMM1(); + switch (type) { + case SLJIT_C_SIG_LESS: + inst = BGEZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_C_SIG_GREATER_EQUAL: + inst = BLTZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_C_SIG_GREATER: + inst = BLEZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_C_SIG_LESS_EQUAL: + inst = BGTZ; + jump->flags |= IS_BIT26_COND; + break; + } + } + PTR_FAIL_IF(push_inst(compiler, inst | S(src1) | JUMP_LENGTH, UNMOVABLE_INS)); + } + else { + if (type == SLJIT_C_LESS || type == SLJIT_C_GREATER_EQUAL || type == SLJIT_C_SIG_LESS || type == SLJIT_C_SIG_GREATER_EQUAL) { + RESOLVE_IMM1(); + if ((src2 & SLJIT_IMM) && src2w <= SIMM_MAX && src2w >= SIMM_MIN) + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTIU : SLTI) | S(src1) | T(TMP_REG1) | IMM(src2w), DR(TMP_REG1))); + else { + RESOLVE_IMM2(); + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTU : SLT) | S(src1) | T(src2) | D(TMP_REG1), DR(TMP_REG1))); + } + type = (type == SLJIT_C_LESS || type == SLJIT_C_SIG_LESS) ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL; + } + else { + RESOLVE_IMM2(); + if ((src1 & SLJIT_IMM) && src1w <= SIMM_MAX && src1w >= SIMM_MIN) + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTIU : SLTI) | S(src2) | T(TMP_REG1) | IMM(src1w), DR(TMP_REG1))); + else { + RESOLVE_IMM1(); + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_C_LESS_EQUAL ? SLTU : SLT) | S(src2) | T(src1) | D(TMP_REG1), DR(TMP_REG1))); + } + type = (type == SLJIT_C_GREATER || type == SLJIT_C_SIG_GREATER) ? SLJIT_C_NOT_EQUAL : SLJIT_C_EQUAL; + } + + jump->flags |= IS_BIT26_COND; + PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_C_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | JUMP_LENGTH, UNMOVABLE_INS)); + } + + PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + return jump; +} + +#undef RESOLVE_IMM1 +#undef RESOLVE_IMM2 + +#undef JUMP_LENGTH +#undef BR_Z +#undef BR_NZ +#undef BR_T +#undef BR_F + +int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw) +{ + int src_r = TMP_REG2; + struct sljit_jump *jump = NULL; + + CHECK_ERROR(); + check_sljit_emit_ijump(compiler, type, src, srcw); + + if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) { + if (DR(src) < 4 || DR(src) > 6) + src_r = src; + else + FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + } + + if (type >= SLJIT_CALL1) + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_TEMPORARY_REG1) | TA(0) | DA(4), 4)); + + if (src & SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_CALL0) ? IS_JAL : 0)); + jump->u.target = srcw; + + if (compiler->delay_slot != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + + FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + } + else if (src & SLJIT_MEM) + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); + + if (type <= SLJIT_JUMP) + FAIL_IF(push_inst(compiler, JR | S(src_r), UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, JALR | S(src_r) | DA(31), UNMOVABLE_INS)); + if (jump) + jump->addr = compiler->size; + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + return SLJIT_SUCCESS; +} + +int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type) +{ + int sugg_dst_ar, dst_ar; + + CHECK_ERROR(); + check_sljit_emit_cond_value(compiler, op, dst, dstw, type); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + sugg_dst_ar = DR((op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2); + + switch (type) { + case SLJIT_C_EQUAL: + case SLJIT_C_NOT_EQUAL: + FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); + dst_ar = sugg_dst_ar; + break; + case SLJIT_C_LESS: + case SLJIT_C_GREATER_EQUAL: + dst_ar = ULESS_FLAG; + break; + case SLJIT_C_GREATER: + case SLJIT_C_LESS_EQUAL: + dst_ar = UGREATER_FLAG; + break; + case SLJIT_C_SIG_LESS: + case SLJIT_C_SIG_GREATER_EQUAL: + dst_ar = LESS_FLAG; + break; + case SLJIT_C_SIG_GREATER: + case SLJIT_C_SIG_LESS_EQUAL: + dst_ar = GREATER_FLAG; + break; + case SLJIT_C_OVERFLOW: + case SLJIT_C_NOT_OVERFLOW: + dst_ar = OVERFLOW_FLAG; + break; + case SLJIT_C_MUL_OVERFLOW: + case SLJIT_C_MUL_NOT_OVERFLOW: + FAIL_IF(push_inst(compiler, SLTIU | SA(OVERFLOW_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); + dst_ar = sugg_dst_ar; + type ^= 0x1; /* Flip type bit for the XORI below. */ + break; + default: + if (type >= SLJIT_C_FLOAT_EQUAL && type <= SLJIT_C_FLOAT_NOT_NAN) { + FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(31), sugg_dst_ar)); + switch (type) { + case SLJIT_C_FLOAT_EQUAL: + case SLJIT_C_FLOAT_NOT_EQUAL: + dst_ar = EQUAL_BIT + 24; + break; + case SLJIT_C_FLOAT_LESS: + case SLJIT_C_FLOAT_GREATER_EQUAL: + dst_ar = LESS_BIT + 24; + break; + case SLJIT_C_FLOAT_GREATER: + case SLJIT_C_FLOAT_LESS_EQUAL: + dst_ar = GREATER_BIT + 24; + break; + case SLJIT_C_FLOAT_NAN: + case SLJIT_C_FLOAT_NOT_NAN: + dst_ar = UNORD_BIT + 24; + break; + } + FAIL_IF(push_inst(compiler, EXT_W | SA(sugg_dst_ar) | TA(sugg_dst_ar) | (dst_ar << 6), sugg_dst_ar)); + } + dst_ar = sugg_dst_ar; + break; + } + + if (type & 0x1) { + FAIL_IF(push_inst(compiler, XORI | SA(dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); + dst_ar = sugg_dst_ar; + } + + if (GET_OPCODE(op) == SLJIT_OR) { + if (DR(TMP_REG2) != dst_ar) + FAIL_IF(push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, dst, dstw, TMP_REG2, 0); + } + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw); + + if (sugg_dst_ar != dst_ar) + return push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | DA(sugg_dst_ar), sugg_dst_ar); + return SLJIT_SUCCESS; +} + +struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value) +{ + struct sljit_const *const_; + int reg; + + CHECK_ERROR_PTR(); + check_sljit_emit_const(compiler, dst, dstw, init_value); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2; + + PTR_FAIL_IF(emit_const(compiler, reg, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); + return const_; +} diff --git a/sljit/sljitNativePPC_32.c b/sljit/sljitNativePPC_32.c new file mode 100644 index 0000000..5dedfe5 --- /dev/null +++ b/sljit/sljitNativePPC_32.c @@ -0,0 +1,246 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ppc 32-bit arch dependent functions. */ + +static int load_immediate(struct sljit_compiler *compiler, int reg, sljit_w imm) +{ + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); + + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; +} + +#define INS_CLEAR_LEFT(dst, src, from) \ + (RLWINM | S(src) | A(dst) | ((from) << 6) | (31 << 1)) + +static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, int flags, + int dst, int src1, int src2) +{ + switch (op) { + case SLJIT_ADD: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); + } + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); + return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + + case SLJIT_ADDC: + if (flags & ALT_FORM1) { + FAIL_IF(push_inst(compiler, MFXER | S(0))); + FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2))); + return push_inst(compiler, MTXER | S(0)); + } + return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); + + case SLJIT_SUB: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, CMPLI | CRD(4) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM4) + return push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2)); + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + if (flags & ALT_FORM5) + FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2))); + return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + + case SLJIT_SUBC: + if (flags & ALT_FORM1) { + FAIL_IF(push_inst(compiler, MFXER | S(0))); + FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1))); + return push_inst(compiler, MTXER | S(0)); + } + return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); + + case SLJIT_MUL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm); + } + return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1)); + + case SLJIT_AND: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm); + } + return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_OR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm))); + return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); + } + return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_XOR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm))); + return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); + } + return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_SHL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + compiler->imm &= 0x1f; + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1)); + } + return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_LSHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + compiler->imm &= 0x1f; + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1)); + } + return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_ASHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + compiler->imm &= 0x1f; + return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)); + } + return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_MOV: + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + SLJIT_ASSERT(src1 == TMP_REG1); + if (dst != src2) + return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SB) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); + } + else if ((flags & REG_DEST) && op == SLJIT_MOV_SB) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SH) + return push_inst(compiler, EXTSH | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); + + case SLJIT_NEG: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst)); + } + + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE int emit_const(struct sljit_compiler *compiler, int reg, sljit_w init_value) +{ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); +} + +void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} + +void sljit_set_const(sljit_uw addr, sljit_w new_constant) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} diff --git a/sljit/sljitNativePPC_64.c b/sljit/sljitNativePPC_64.c new file mode 100644 index 0000000..5e9b7fb --- /dev/null +++ b/sljit/sljitNativePPC_64.c @@ -0,0 +1,415 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ppc 64-bit arch dependent functions. */ + +#ifdef __GNUC__ +#define ASM_SLJIT_CLZ(src, dst) \ + asm volatile ( "cntlzd %0, %1" : "=r"(dst) : "r"(src) ) +#else +#error "Must implement count leading zeroes" +#endif + +#define RLDI(dst, src, sh, mb, type) \ + (HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20)) + +#define PUSH_RLDICR(reg, shift) \ + push_inst(compiler, RLDI(reg, reg, 63 - shift, shift, 1)) + +static int load_immediate(struct sljit_compiler *compiler, int reg, sljit_w imm) +{ + sljit_uw tmp; + sljit_uw shift; + sljit_uw tmp2; + sljit_uw shift2; + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); + + if (imm <= SLJIT_W(0x7fffffff) && imm >= SLJIT_W(-0x80000000)) { + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; + } + + /* Count leading zeroes. */ + tmp = (imm >= 0) ? imm : ~imm; + ASM_SLJIT_CLZ(tmp, shift); + SLJIT_ASSERT(shift > 0); + shift--; + tmp = (imm << shift); + + if ((tmp & ~0xffff000000000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + shift += 15; + return PUSH_RLDICR(reg, shift); + } + + if ((tmp & ~0xffffffff00000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32))); + shift += 31; + return PUSH_RLDICR(reg, shift); + } + + /* Cut out the 16 bit from immediate. */ + shift += 15; + tmp2 = imm & ((1ul << (63 - shift)) - 1); + + if (tmp2 <= 0xffff) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(PUSH_RLDICR(reg, shift)); + return push_inst(compiler, ORI | S(reg) | A(reg) | tmp2); + } + + if (tmp2 <= 0xffffffff) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(PUSH_RLDICR(reg, shift)); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (tmp2 >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS; + } + + ASM_SLJIT_CLZ(tmp2, shift2); + tmp2 <<= shift2; + + if ((tmp2 & ~0xffff000000000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + shift2 += 15; + shift += (63 - shift2); + FAIL_IF(PUSH_RLDICR(reg, shift)); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (tmp2 >> 48))); + return PUSH_RLDICR(reg, shift2); + } + + /* The general version. */ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32))); + FAIL_IF(PUSH_RLDICR(reg, 31)); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)); +} + +/* Simplified mnemonics: clrldi. */ +#define INS_CLEAR_LEFT(dst, src, from) \ + (RLDICL | S(src) | A(dst) | ((from) << 6) | (1 << 5)) + +/* Sign extension for integer operations. */ +#define UN_EXTS() \ + if ((flags & (ALT_SIGN_EXT | REG2_SOURCE)) == (ALT_SIGN_EXT | REG2_SOURCE)) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \ + src2 = TMP_REG2; \ + } + +#define BIN_EXTS() \ + if (flags & ALT_SIGN_EXT) { \ + if (flags & REG1_SOURCE) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \ + src1 = TMP_REG1; \ + } \ + if (flags & REG2_SOURCE) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \ + src2 = TMP_REG2; \ + } \ + } + +#define BIN_IMM_EXTS() \ + if ((flags & (ALT_SIGN_EXT | REG1_SOURCE)) == (ALT_SIGN_EXT | REG1_SOURCE)) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \ + src1 = TMP_REG1; \ + } + +static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, int flags, + int dst, int src1, int src2) +{ + switch (op) { + case SLJIT_ADD: + if (flags & ALT_FORM1) { + /* Flags not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM2) { + /* Flags not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + BIN_IMM_EXTS(); + return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); + } + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); + BIN_EXTS(); + return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + + case SLJIT_ADDC: + if (flags & ALT_FORM1) { + FAIL_IF(push_inst(compiler, MFXER | S(0))); + FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2))); + return push_inst(compiler, MTXER | S(0)); + } + BIN_EXTS(); + return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); + + case SLJIT_SUB: + if (flags & ALT_FORM1) { + /* Flags not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, CMPLI | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM4) + return push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)); + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + BIN_EXTS(); + if (flags & ALT_FORM5) + FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); + return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + + case SLJIT_SUBC: + if (flags & ALT_FORM1) { + FAIL_IF(push_inst(compiler, MFXER | S(0))); + FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1))); + return push_inst(compiler, MTXER | S(0)); + } + BIN_EXTS(); + return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); + + case SLJIT_MUL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm); + } + BIN_EXTS(); + if (flags & ALT_FORM2) + return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, MULLD | OERC(flags) | D(dst) | A(src2) | B(src1)); + + case SLJIT_AND: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm); + } + return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_OR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm))); + return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); + } + return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_XOR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm))); + return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); + } + return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_SHL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + if (flags & ALT_FORM2) { + compiler->imm &= 0x1f; + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1)); + } + else { + compiler->imm &= 0x3f; + return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags)); + } + } + if (flags & ALT_FORM2) + return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2)); + return push_inst(compiler, SLD | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_LSHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + if (flags & ALT_FORM2) { + compiler->imm &= 0x1f; + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1)); + } + else { + compiler->imm &= 0x3f; + return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags)); + } + } + if (flags & ALT_FORM2) + return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2)); + return push_inst(compiler, SRD | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_ASHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + if (flags & ALT_FORM2) { + compiler->imm &= 0x1f; + return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)); + } + else { + compiler->imm &= 0x3f; + return push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4)); + } + } + if (flags & ALT_FORM2) + return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2)); + return push_inst(compiler, SRAD | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_REG1); + if (dst != src2) + return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SI) + return push_inst(compiler, EXTSW | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SB) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); + } + else if ((flags & REG_DEST) && op == SLJIT_MOV_SB) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SH) + return push_inst(compiler, EXTSH | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1); + UN_EXTS(); + return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); + + case SLJIT_NEG: + SLJIT_ASSERT(src1 == TMP_REG1); + UN_EXTS(); + return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1); + if (flags & ALT_FORM1) + return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst)); + return push_inst(compiler, CNTLZD | RC(flags) | S(src2) | A(dst)); + } + + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE int emit_const(struct sljit_compiler *compiler, int reg, sljit_w init_value) +{ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value >> 32))); + FAIL_IF(PUSH_RLDICR(reg, 31)); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(init_value >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); +} + +void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff); + inst[4] = (inst[4] & 0xffff0000) | (new_addr & 0xffff); + SLJIT_CACHE_FLUSH(inst, inst + 5); +} + +void sljit_set_const(sljit_uw addr, sljit_w new_constant) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff); + inst[4] = (inst[4] & 0xffff0000) | (new_constant & 0xffff); + SLJIT_CACHE_FLUSH(inst, inst + 5); +} + +void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_w addr, void* func) +{ + sljit_w* ptrs; + if (func_ptr) + *func_ptr = (void*)context; + ptrs = (sljit_w*)func; + context->addr = addr ? addr : ptrs[0]; + context->r2 = ptrs[1]; + context->r11 = ptrs[2]; +} diff --git a/sljit/sljitNativePPC_common.c b/sljit/sljitNativePPC_common.c new file mode 100644 index 0000000..0ad1a35 --- /dev/null +++ b/sljit/sljitNativePPC_common.c @@ -0,0 +1,1784 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_CONST char* sljit_get_platform_name() +{ +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + return "ppc-32"; +#else + return "ppc-64"; +#endif +} + +/* Length of an instruction word. + Both for ppc-32 and ppc-64. */ +typedef sljit_ui sljit_ins; + +#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) +#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) +#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) +#define ZERO_REG (SLJIT_NO_REGISTERS + 4) +#define REAL_STACK_PTR (SLJIT_NO_REGISTERS + 5) + +#define TMP_FREG1 (SLJIT_FLOAT_REG4 + 1) +#define TMP_FREG2 (SLJIT_FLOAT_REG4 + 2) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ +#define D(d) (reg_map[d] << 21) +#define S(s) (reg_map[s] << 21) +#define A(a) (reg_map[a] << 16) +#define B(b) (reg_map[b] << 11) +#define C(c) (reg_map[c] << 6) +#define FD(fd) ((fd) << 21) +#define FA(fa) ((fa) << 16) +#define FB(fb) ((fb) << 11) +#define FC(fc) ((fc) << 6) +#define IMM(imm) ((imm) & 0xffff) +#define CRD(d) ((d) << 21) + +/* Instruction bit sections. + OE and Rc flag (see ALT_SET_FLAGS). */ +#define OERC(flags) (((flags & ALT_SET_FLAGS) >> 15) | ((flags & ALT_SET_FLAGS) >> 5)) +/* Rc flag (see ALT_SET_FLAGS). */ +#define RC(flags) ((flags & ALT_SET_FLAGS) >> 15) +#define HI(opcode) ((opcode) << 26) +#define LO(opcode) ((opcode) << 1) + +#define ADD (HI(31) | LO(266)) +#define ADDC (HI(31) | LO(10)) +#define ADDE (HI(31) | LO(138)) +#define ADDI (HI(14)) +#define ADDIC (HI(13)) +#define ADDIS (HI(15)) +#define ADDME (HI(31) | LO(234)) +#define AND (HI(31) | LO(28)) +#define ANDI (HI(28)) +#define ANDIS (HI(29)) +#define Bx (HI(18)) +#define BCx (HI(16)) +#define BCCTR (HI(19) | LO(528) | (3 << 11)) +#define BLR (HI(19) | LO(16) | (0x14 << 21)) +#define CNTLZD (HI(31) | LO(58)) +#define CNTLZW (HI(31) | LO(26)) +#define CMPI (HI(11)) +#define CMPL (HI(31) | LO(32)) +#define CMPLI (HI(10)) +#define CROR (HI(19) | LO(449)) +#define EXTSB (HI(31) | LO(954)) +#define EXTSH (HI(31) | LO(922)) +#define EXTSW (HI(31) | LO(986)) +#define FABS (HI(63) | LO(264)) +#define FADD (HI(63) | LO(21)) +#define FCMPU (HI(63) | LO(0)) +#define FDIV (HI(63) | LO(18)) +#define FMR (HI(63) | LO(72)) +#define FMUL (HI(63) | LO(25)) +#define FNEG (HI(63) | LO(40)) +#define FSUB (HI(63) | LO(20)) +#define LD (HI(58) | 0) +#define LFD (HI(50)) +#define LFDUX (HI(31) | LO(631)) +#define LFDX (HI(31) | LO(599)) +#define LWZ (HI(32)) +#define MFCR (HI(31) | LO(19)) +#define MFLR (HI(31) | LO(339) | 0x80000) +#define MFXER (HI(31) | LO(339) | 0x10000) +#define MTCTR (HI(31) | LO(467) | 0x90000) +#define MTLR (HI(31) | LO(467) | 0x80000) +#define MTXER (HI(31) | LO(467) | 0x10000) +#define MULLD (HI(31) | LO(233)) +#define MULLI (HI(7)) +#define MULLW (HI(31) | LO(235)) +#define NEG (HI(31) | LO(104)) +#define NOP (HI(24)) +#define NOR (HI(31) | LO(124)) +#define OR (HI(31) | LO(444)) +#define ORI (HI(24)) +#define ORIS (HI(25)) +#define RLDICL (HI(30)) +#define RLWINM (HI(21)) +#define SLD (HI(31) | LO(27)) +#define SLW (HI(31) | LO(24)) +#define SRAD (HI(31) | LO(794)) +#define SRADI (HI(31) | LO(413 << 1)) +#define SRAW (HI(31) | LO(792)) +#define SRAWI (HI(31) | LO(824)) +#define SRD (HI(31) | LO(539)) +#define SRW (HI(31) | LO(536)) +#define STD (HI(62) | 0) +#define STDU (HI(62) | 1) +#define STDUX (HI(31) | LO(181)) +#define STFD (HI(54)) +#define STFDUX (HI(31) | LO(759)) +#define STFDX (HI(31) | LO(727)) +#define STW (HI(36)) +#define STWU (HI(37)) +#define STWUX (HI(31) | LO(183)) +#define SUBF (HI(31) | LO(40)) +#define SUBFC (HI(31) | LO(8)) +#define SUBFE (HI(31) | LO(136)) +#define SUBFIC (HI(8)) +#define XOR (HI(31) | LO(316)) +#define XORI (HI(26)) +#define XORIS (HI(27)) + +#define SIMM_MAX (0x7fff) +#define SIMM_MIN (-0x8000) +#define UIMM_MAX (0xffff) + +/* SLJIT_LOCALS_REG is not the real stack register, since it must + point to the head of the stack chain. */ +static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 6] = { + 0, 3, 4, 5, 6, 7, 29, 28, 27, 26, 25, 31, 8, 9, 10, 30, 1 +}; + +static int push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE int optimize_jump(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +{ + sljit_w diff; + sljit_uw target_addr; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return 0; + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size); + } + diff = ((sljit_w)target_addr - (sljit_w)(code_ptr)) & ~0x3l; + + if (jump->flags & UNCOND_B) { + if (diff <= 0x01ffffff && diff >= -0x02000000) { + jump->flags |= PATCH_B; + return 1; + } + if (target_addr <= 0x03ffffff) { + jump->flags |= PATCH_B | ABSOLUTE_B; + return 1; + } + } + else { + if (diff <= 0x7fff && diff >= -0x8000) { + jump->flags |= PATCH_B; + return 1; + } + if (target_addr <= 0xffff) { + jump->flags |= PATCH_B | ABSOLUTE_B; + return 1; + } + } + return 0; +} + +void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + check_sljit_generate_code(compiler); + reverse_buf(compiler); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins)); +#endif + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + /* Just recording the address. */ + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + jump->addr = (sljit_uw)(code_ptr - 3); +#else + jump->addr = (sljit_uw)(code_ptr - 6); +#endif + if (optimize_jump(jump, code_ptr, code)) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + code_ptr[-3] = code_ptr[0]; + code_ptr -= 3; +#else + code_ptr[-6] = code_ptr[0]; + code_ptr -= 6; +#endif + } + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + /* Just recording the address. */ + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + code_ptr ++; + word_count ++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + SLJIT_ASSERT(code_ptr - code <= (int)compiler->size - ((compiler->size & 0x1) ? 3 : 2)); +#else + SLJIT_ASSERT(code_ptr - code <= (int)compiler->size); +#endif + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins*)jump->addr; + if (jump->flags & PATCH_B) { + if (jump->flags & UNCOND_B) { + if (!(jump->flags & ABSOLUTE_B)) { + addr = addr - jump->addr; + SLJIT_ASSERT((sljit_w)addr <= 0x01ffffff && (sljit_w)addr >= -0x02000000); + *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1); + } + else { + SLJIT_ASSERT(addr <= 0x03ffffff); + *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); + } + } + else { + if (!(jump->flags & ABSOLUTE_B)) { + addr = addr - jump->addr; + SLJIT_ASSERT((sljit_w)addr <= 0x7fff && (sljit_w)addr >= -0x8000); + *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); + } + else { + addr = addr & ~0x3l; + SLJIT_ASSERT(addr <= 0xffff); + *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); + } + + } + break; + } + /* Set the fields of immediate loads. */ +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); +#else + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff); + buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff); +#endif + } while (0); + jump = jump->next; + } + + SLJIT_CACHE_FLUSH(code, code_ptr); + compiler->error = SLJIT_ERR_COMPILED; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (((sljit_w)code_ptr) & 0x4) + code_ptr++; + sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_w)code, sljit_generate_code); + return code_ptr; +#else + return code; +#endif +} + +/* inp_flags: */ + +/* Creates an index in data_transfer_insts array. */ +#define WORD_DATA 0x00 +#define BYTE_DATA 0x01 +#define HALF_DATA 0x02 +#define INT_DATA 0x03 +#define SIGNED_DATA 0x04 +#define LOAD_DATA 0x08 +#define WRITE_BACK 0x10 +#define INDEXED 0x20 + +#define MEM_MASK 0x3f + +/* Other inp_flags. */ + +#define ARG_TEST 0x0100 +#define ALT_FORM1 0x0200 +#define ALT_FORM2 0x0400 +#define ALT_FORM3 0x0800 +#define ALT_FORM4 0x1000 +#define ALT_FORM5 0x2000 +/* Integer opertion and set flags -> requires exts on 64 bit systems. */ +#define ALT_SIGN_EXT 0x4000 +/* This flag affects the RC() and OERC() macros. */ +#define ALT_SET_FLAGS 0x8000 + + /* Source and destination is register. */ +#define REG_DEST 0x0001 +#define REG1_SOURCE 0x0002 +#define REG2_SOURCE 0x0004 + /* getput_arg_fast returned true. */ +#define FAST_DEST 0x0008 + /* Multiple instructions are required. */ +#define SLOW_DEST 0x0010 +/* ALT_FORM1 0x0200 + ALT_FORM2 0x0400 + ALT_FORM3 0x0800 + ALT_FORM4 0x1000 + ALT_FORM5 0x2000 + ALT_SIGN_EXT 0x4000 + ALT_SET_FLAGS 0x8000 */ + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#include "sljitNativePPC_32.c" +#else +#include "sljitNativePPC_64.c" +#endif + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#define STACK_STORE STW +#define STACK_LOAD LWZ +#else +#define STACK_STORE STD +#define STACK_LOAD LD +#endif + +static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w); + +int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + CHECK_ERROR(); + check_sljit_emit_enter(compiler, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + compiler->has_locals = local_size > 0; + + FAIL_IF(push_inst(compiler, MFLR | D(0))); + if (compiler->has_locals) + FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_LOCALS_REG) | A(REAL_STACK_PTR) | IMM(-(int)(sizeof(sljit_w))) )); + FAIL_IF(push_inst(compiler, STACK_STORE | S(ZERO_REG) | A(REAL_STACK_PTR) | IMM(-2 * (int)(sizeof(sljit_w))) )); + if (generals >= 1) + FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_GENERAL_REG1) | A(REAL_STACK_PTR) | IMM(-3 * (int)(sizeof(sljit_w))) )); + if (generals >= 2) + FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_GENERAL_REG2) | A(REAL_STACK_PTR) | IMM(-4 * (int)(sizeof(sljit_w))) )); + if (generals >= 3) + FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_GENERAL_REG3) | A(REAL_STACK_PTR) | IMM(-5 * (int)(sizeof(sljit_w))) )); + if (generals >= 4) + FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_GENERAL_EREG1) | A(REAL_STACK_PTR) | IMM(-6 * (int)(sizeof(sljit_w))) )); + if (generals >= 5) + FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_GENERAL_EREG2) | A(REAL_STACK_PTR) | IMM(-7 * (int)(sizeof(sljit_w))) )); + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(REAL_STACK_PTR) | IMM(sizeof(sljit_w)) )); + + FAIL_IF(push_inst(compiler, ADDI | D(ZERO_REG) | A(0) | 0)); + if (args >= 1) + FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG1) | A(SLJIT_GENERAL_REG1) | B(SLJIT_TEMPORARY_REG1))); + if (args >= 2) + FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG2) | A(SLJIT_GENERAL_REG2) | B(SLJIT_TEMPORARY_REG2))); + if (args >= 3) + FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG3) | A(SLJIT_GENERAL_REG3) | B(SLJIT_TEMPORARY_REG3))); + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + compiler->local_size = (2 + generals + 2) * sizeof(sljit_w) + local_size; +#else + compiler->local_size = (2 + generals + 7 + 8) * sizeof(sljit_w) + local_size; +#endif + compiler->local_size = (compiler->local_size + 15) & ~0xf; + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + if (compiler->local_size <= SIMM_MAX) + FAIL_IF(push_inst(compiler, STWU | S(REAL_STACK_PTR) | A(REAL_STACK_PTR) | IMM(-compiler->local_size))); + else { + FAIL_IF(load_immediate(compiler, 0, -compiler->local_size)); + FAIL_IF(push_inst(compiler, STWUX | S(REAL_STACK_PTR) | A(REAL_STACK_PTR) | B(0))); + } + if (compiler->has_locals) + FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_LOCALS_REG) | A(REAL_STACK_PTR) | IMM(2 * sizeof(sljit_w)))); +#else + if (compiler->local_size <= SIMM_MAX) + FAIL_IF(push_inst(compiler, STDU | S(REAL_STACK_PTR) | A(REAL_STACK_PTR) | IMM(-compiler->local_size))); + else { + FAIL_IF(load_immediate(compiler, 0, -compiler->local_size)); + FAIL_IF(push_inst(compiler, STDUX | S(REAL_STACK_PTR) | A(REAL_STACK_PTR) | B(0))); + } + if (compiler->has_locals) + FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_LOCALS_REG) | A(REAL_STACK_PTR) | IMM((7 + 8) * sizeof(sljit_w)))); +#endif + + return SLJIT_SUCCESS; +} + +void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + CHECK_ERROR_VOID(); + check_sljit_fake_enter(compiler, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + + compiler->has_locals = local_size > 0; +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + compiler->local_size = (2 + generals + 2) * sizeof(sljit_w) + local_size; +#else + compiler->local_size = (2 + generals + 7 + 8) * sizeof(sljit_w) + local_size; +#endif + compiler->local_size = (compiler->local_size + 15) & ~0xf; +} + +int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + CHECK_ERROR(); + check_sljit_emit_return(compiler, src, srcw); + + if (src != SLJIT_UNUSED && src != SLJIT_RETURN_REG) + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, SLJIT_RETURN_REG, 0, TMP_REG1, 0, src, srcw)); + + if (compiler->local_size <= SIMM_MAX) + FAIL_IF(push_inst(compiler, ADDI | D(REAL_STACK_PTR) | A(REAL_STACK_PTR) | IMM(compiler->local_size))); + else { + FAIL_IF(load_immediate(compiler, 0, compiler->local_size)); + FAIL_IF(push_inst(compiler, ADD | D(REAL_STACK_PTR) | A(REAL_STACK_PTR) | B(0))); + } + + FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(REAL_STACK_PTR) | IMM(sizeof(sljit_w)))); + if (compiler->generals >= 5) + FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_GENERAL_EREG2) | A(REAL_STACK_PTR) | IMM(-7 * (int)(sizeof(sljit_w))) )); + if (compiler->generals >= 4) + FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_GENERAL_EREG1) | A(REAL_STACK_PTR) | IMM(-6 * (int)(sizeof(sljit_w))) )); + if (compiler->generals >= 3) + FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_GENERAL_REG3) | A(REAL_STACK_PTR) | IMM(-5 * (int)(sizeof(sljit_w))) )); + if (compiler->generals >= 2) + FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_GENERAL_REG2) | A(REAL_STACK_PTR) | IMM(-4 * (int)(sizeof(sljit_w))) )); + if (compiler->generals >= 1) + FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_GENERAL_REG1) | A(REAL_STACK_PTR) | IMM(-3 * (int)(sizeof(sljit_w))) )); + FAIL_IF(push_inst(compiler, STACK_LOAD | D(ZERO_REG) | A(REAL_STACK_PTR) | IMM(-2 * (int)(sizeof(sljit_w))) )); + if (compiler->has_locals) + FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_LOCALS_REG) | A(REAL_STACK_PTR) | IMM(-(int)(sizeof(sljit_w))) )); + + FAIL_IF(push_inst(compiler, MTLR | S(0))); + FAIL_IF(push_inst(compiler, BLR)); + + return SLJIT_SUCCESS; +} + +#undef STACK_STORE +#undef STACK_LOAD + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +/* i/x - immediate/indexed form + n/w - no write-back / write-back (1 bit) + s/l - store/load (1 bit) + u/s - signed/unsigned (1 bit) + w/b/h/i - word/byte/half/int allowed (2 bit) + It contans 32 items, but not all are different. */ + +/* 64 bit only: [reg+imm] must be aligned to 4 bytes. */ +#define ADDR_MODE2 0x10000 +/* 64-bit only: there is no lwau instruction. */ +#define UPDATE_REQ 0x20000 + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#define ARCH_DEPEND(a, b) a +#define GET_INST_CODE(inst) (inst) +#else +#define ARCH_DEPEND(a, b) b +#define GET_INST_CODE(index) ((inst) & ~(ADDR_MODE2 | UPDATE_REQ)) +#endif + +static SLJIT_CONST sljit_ins data_transfer_insts[64] = { + +/* No write-back. */ + +/* i n s u w */ ARCH_DEPEND(HI(36) /* stw */, HI(62) | ADDR_MODE2 | 0x0 /* std */), +/* i n s u b */ HI(38) /* stb */, +/* i n s u h */ HI(44) /* sth*/, +/* i n s u i */ HI(36) /* stw */, + +/* i n s s w */ ARCH_DEPEND(HI(36) /* stw */, HI(62) | ADDR_MODE2 | 0x0 /* std */), +/* i n s s b */ HI(38) /* stb */, +/* i n s s h */ HI(44) /* sth*/, +/* i n s s i */ HI(36) /* stw */, + +/* i n l u w */ ARCH_DEPEND(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x0 /* ld */), +/* i n l u b */ HI(34) /* lbz */, +/* i n l u h */ HI(40) /* lhz */, +/* i n l u i */ HI(32) /* lwz */, + +/* i n l s w */ ARCH_DEPEND(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x0 /* ld */), +/* i n l s b */ HI(34) /* lbz */ /* EXTS_REQ */, +/* i n l s h */ HI(42) /* lha */, +/* i n l s i */ ARCH_DEPEND(HI(32) /* lwz */, HI(58) | ADDR_MODE2 | 0x2 /* lwa */), + +/* Write-back. */ + +/* i w s u w */ ARCH_DEPEND(HI(37) /* stwu */, HI(62) | ADDR_MODE2 | 0x1 /* stdu */), +/* i w s u b */ HI(39) /* stbu */, +/* i w s u h */ HI(45) /* sthu */, +/* i w s u i */ HI(37) /* stwu */, + +/* i w s s w */ ARCH_DEPEND(HI(37) /* stwu */, HI(62) | ADDR_MODE2 | 0x1 /* stdu */), +/* i w s s b */ HI(39) /* stbu */, +/* i w s s h */ HI(45) /* sthu */, +/* i w s s i */ HI(37) /* stwu */, + +/* i w l u w */ ARCH_DEPEND(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | 0x1 /* ldu */), +/* i w l u b */ HI(35) /* lbzu */, +/* i w l u h */ HI(41) /* lhzu */, +/* i w l u i */ HI(33) /* lwzu */, + +/* i w l s w */ ARCH_DEPEND(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | 0x1 /* ldu */), +/* i w l s b */ HI(35) /* lbzu */ /* EXTS_REQ */, +/* i w l s h */ HI(43) /* lhau */, +/* i w l s i */ ARCH_DEPEND(HI(33) /* lwzu */, HI(58) | ADDR_MODE2 | UPDATE_REQ | 0x2 /* lwa */), + +/* ---------- */ +/* Indexed */ +/* ---------- */ + +/* No write-back. */ + +/* x n s u w */ ARCH_DEPEND(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), +/* x n s u b */ HI(31) | LO(215) /* stbx */, +/* x n s u h */ HI(31) | LO(407) /* sthx */, +/* x n s u i */ HI(31) | LO(151) /* stwx */, + +/* x n s s w */ ARCH_DEPEND(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), +/* x n s s b */ HI(31) | LO(215) /* stbx */, +/* x n s s h */ HI(31) | LO(407) /* sthx */, +/* x n s s i */ HI(31) | LO(151) /* stwx */, + +/* x n l u w */ ARCH_DEPEND(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), +/* x n l u b */ HI(31) | LO(87) /* lbzx */, +/* x n l u h */ HI(31) | LO(279) /* lhzx */, +/* x n l u i */ HI(31) | LO(23) /* lwzx */, + +/* x n l s w */ ARCH_DEPEND(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), +/* x n l s b */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */, +/* x n l s h */ HI(31) | LO(343) /* lhax */, +/* x n l s i */ ARCH_DEPEND(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */), + +/* Write-back. */ + +/* x w s u w */ ARCH_DEPEND(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), +/* x w s u b */ HI(31) | LO(247) /* stbux */, +/* x w s u h */ HI(31) | LO(439) /* sthux */, +/* x w s u i */ HI(31) | LO(183) /* stwux */, + +/* x w s s w */ ARCH_DEPEND(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), +/* x w s s b */ HI(31) | LO(247) /* stbux */, +/* x w s s h */ HI(31) | LO(439) /* sthux */, +/* x w s s i */ HI(31) | LO(183) /* stwux */, + +/* x w l u w */ ARCH_DEPEND(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), +/* x w l u b */ HI(31) | LO(119) /* lbzux */, +/* x w l u h */ HI(31) | LO(311) /* lhzux */, +/* x w l u i */ HI(31) | LO(55) /* lwzux */, + +/* x w l s w */ ARCH_DEPEND(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), +/* x w l s b */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */, +/* x w l s h */ HI(31) | LO(375) /* lhaux */, +/* x w l s i */ ARCH_DEPEND(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */) + +}; + +#undef ARCH_DEPEND + +/* Simple cases, (no caching is required). */ +static int getput_arg_fast(struct sljit_compiler *compiler, int inp_flags, int reg, int arg, sljit_w argw) +{ + sljit_ins inst; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + int tmp_reg; +#endif + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(arg & 0xf)) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + if (argw <= SIMM_MAX && argw >= SIMM_MIN) { + if (inp_flags & ARG_TEST) + return 1; + + inst = data_transfer_insts[(inp_flags & ~WRITE_BACK) & MEM_MASK]; + SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); + push_inst(compiler, GET_INST_CODE(inst) | D(reg) | IMM(argw)); + return -1; + } +#else + inst = data_transfer_insts[(inp_flags & ~WRITE_BACK) & MEM_MASK]; + if (argw <= SIMM_MAX && argw >= SIMM_MIN && + (!(inst & ADDR_MODE2) || (argw & 0x3) == 0)) { + if (inp_flags & ARG_TEST) + return 1; + + push_inst(compiler, GET_INST_CODE(inst) | D(reg) | IMM(argw)); + return -1; + } +#endif + return (inp_flags & ARG_TEST) ? SLJIT_SUCCESS : 0; + } + + if (!(arg & 0xf0)) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + if (argw <= SIMM_MAX && argw >= SIMM_MIN) { + if (inp_flags & ARG_TEST) + return 1; + + inst = data_transfer_insts[inp_flags & MEM_MASK]; + SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); + push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | IMM(argw)); + return -1; + } +#else + inst = data_transfer_insts[inp_flags & MEM_MASK]; + if (argw <= SIMM_MAX && argw >= SIMM_MIN && (!(inst & ADDR_MODE2) || (argw & 0x3) == 0)) { + if (inp_flags & ARG_TEST) + return 1; + + if ((inp_flags & WRITE_BACK) && (inst & UPDATE_REQ)) { + tmp_reg = (inp_flags & LOAD_DATA) ? (arg & 0xf) : TMP_REG3; + if (push_inst(compiler, ADDI | D(tmp_reg) | A(arg & 0xf) | IMM(argw))) + return -1; + arg = tmp_reg | SLJIT_MEM; + argw = 0; + } + push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | IMM(argw)); + return -1; + } +#endif + } + else if (!(argw & 0x3)) { + if (inp_flags & ARG_TEST) + return 1; + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); + push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B((arg >> 4) & 0xf)); + return -1; + } + return (inp_flags & ARG_TEST) ? SLJIT_SUCCESS : 0; +} + +/* See getput_arg below. + Note: can_cache is called only for binary operators. Those operator always + uses word arguments without write back. */ +static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + SLJIT_ASSERT(next_arg & SLJIT_MEM); + + if (!(arg & 0xf)) { + if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX)) + return 1; + return 0; + } + + if (arg & 0xf0) + return 0; + + if (argw <= SIMM_MAX && argw >= SIMM_MIN) { + if (arg == next_arg && (next_argw >= SIMM_MAX && next_argw <= SIMM_MIN)) + return 1; + } + + if (arg == next_arg && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX)) + return 1; + + return 0; +} + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define ADJUST_CACHED_IMM(imm) \ + if ((inst & ADDR_MODE2) && (imm & 0x3)) { \ + /* Adjust cached value. Fortunately this is really a rare case */ \ + compiler->cache_argw += imm & 0x3; \ + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \ + imm &= ~0x3; \ + } +#else +#define ADJUST_CACHED_IMM(imm) +#endif + +/* Emit the necessary instructions. See can_cache above. */ +static int getput_arg(struct sljit_compiler *compiler, int inp_flags, int reg, int arg, sljit_w argw, int next_arg, sljit_w next_argw) +{ + int tmp_r; + sljit_ins inst; + + SLJIT_ASSERT(arg & SLJIT_MEM); + + tmp_r = (inp_flags & LOAD_DATA) ? reg : TMP_REG3; + if ((arg & 0xf) == tmp_r) { + /* Special case for "mov reg, [reg, ... ]". + Caching would not happen anyway. */ + tmp_r = TMP_REG3; + compiler->cache_arg = 0; + compiler->cache_argw = 0; + } + + if (!(arg & 0xf)) { + inst = data_transfer_insts[(inp_flags & ~WRITE_BACK) & MEM_MASK]; + if ((compiler->cache_arg & SLJIT_IMM) && (((sljit_uw)argw - (sljit_uw)compiler->cache_argw) <= SIMM_MAX || ((sljit_uw)compiler->cache_argw - (sljit_uw)argw) <= SIMM_MAX)) { + argw = argw - compiler->cache_argw; + ADJUST_CACHED_IMM(argw); + SLJIT_ASSERT(!(inst & UPDATE_REQ)); + return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(TMP_REG3) | IMM(argw)); + } + + if ((next_arg & SLJIT_MEM) && (argw - next_argw <= SIMM_MAX || next_argw - argw <= SIMM_MAX)) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + + compiler->cache_arg = SLJIT_IMM; + compiler->cache_argw = argw; + tmp_r = TMP_REG3; + } + + FAIL_IF(load_immediate(compiler, tmp_r, argw)); + return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(tmp_r)); + } + + if (SLJIT_UNLIKELY(arg & 0xf0)) { + argw &= 0x3; + /* Otherwise getput_arg_fast would capture it. */ + SLJIT_ASSERT(argw); +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, RLWINM | S((arg >> 4) & 0xf) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1))); +#else + FAIL_IF(push_inst(compiler, RLDI(tmp_r, (arg >> 4) & 0xf, argw, 63 - argw, 1))); +#endif + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); + return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B(tmp_r)); + } + + inst = data_transfer_insts[inp_flags & MEM_MASK]; + + if (compiler->cache_arg == arg && ((sljit_uw)argw - (sljit_uw)compiler->cache_argw <= SIMM_MAX || (sljit_uw)compiler->cache_argw - (sljit_uw)argw <= SIMM_MAX)) { + SLJIT_ASSERT(!(inp_flags & WRITE_BACK)); + argw = argw - compiler->cache_argw; + ADJUST_CACHED_IMM(argw); + return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(TMP_REG3) | IMM(argw)); + } + + if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) { + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); + return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B(TMP_REG3)); + } + + if (argw == next_argw && (next_arg & SLJIT_MEM)) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + + compiler->cache_arg = SLJIT_IMM; + compiler->cache_argw = argw; + + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); + return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B(TMP_REG3)); + } + + if (arg == next_arg && !(inp_flags & WRITE_BACK) && ((sljit_uw)argw - (sljit_uw)next_argw <= SIMM_MAX || (sljit_uw)next_argw - (sljit_uw)argw <= SIMM_MAX)) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & 0xf))); + + compiler->cache_arg = arg; + compiler->cache_argw = argw; + + return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(TMP_REG3)); + } + + /* Get the indexed version instead of the normal one. */ + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + SLJIT_ASSERT(!(inst & (ADDR_MODE2 | UPDATE_REQ))); + FAIL_IF(load_immediate(compiler, tmp_r, argw)); + return push_inst(compiler, GET_INST_CODE(inst) | D(reg) | A(arg & 0xf) | B(tmp_r)); +} + +static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + int dst_r; + int src1_r; + int src2_r; + int sugg_src2_r = TMP_REG2; + int flags = inp_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + /* Destination check. */ + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= ZERO_REG) { + dst_r = dst; + flags |= REG_DEST; + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + sugg_src2_r = dst_r; + } + else if (dst == SLJIT_UNUSED) { + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) + return SLJIT_SUCCESS; + dst_r = TMP_REG2; + } + else { + SLJIT_ASSERT(dst & SLJIT_MEM); + if (getput_arg_fast(compiler, inp_flags | ARG_TEST, TMP_REG2, dst, dstw)) { + flags |= FAST_DEST; + dst_r = TMP_REG2; + } + else { + flags |= SLOW_DEST; + dst_r = 0; + } + } + + /* Source 1. */ + if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= ZERO_REG) { + src1_r = src1; + flags |= REG1_SOURCE; + } + else if (src1 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((inp_flags & 0x3) == INT_DATA) { + if (inp_flags & SIGNED_DATA) + src1w = (signed int)src1w; + else + src1w = (unsigned int)src1w; + } +#endif + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } + else if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1_r = TMP_REG1; + } + else + src1_r = 0; + + /* Source 2. */ + if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= ZERO_REG) { + src2_r = src2; + flags |= REG2_SOURCE; + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + dst_r = src2_r; + } + else if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if ((inp_flags & 0x3) == INT_DATA) { + if (inp_flags & SIGNED_DATA) + src2w = (signed int)src2w; + else + src2w = (unsigned int)src2w; + } +#endif + FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); + src2_r = sugg_src2_r; + } + else if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) { + FAIL_IF(compiler->error); + src2_r = sugg_src2_r; + } + else + src2_r = 0; + + /* src1_r, src2_r and dst_r can be zero (=unprocessed). + All arguments are complex addressing modes, and it is a binary operator. */ + if (src1_r == 0 && src2_r == 0 && dst_r == 0) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); + } + src1_r = TMP_REG1; + src2_r = TMP_REG2; + } + else if (src1_r == 0 && src2_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + src1_r = TMP_REG1; + } + else if (src1_r == 0 && dst_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + src1_r = TMP_REG1; + } + else if (src2_r == 0 && dst_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + src2_r = sugg_src2_r; + } + + if (dst_r == 0) + dst_r = TMP_REG2; + + if (src1_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0)); + src1_r = TMP_REG1; + } + + if (src2_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0)); + src2_r = sugg_src2_r; + } + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (flags & (FAST_DEST | SLOW_DEST)) { + if (flags & FAST_DEST) + FAIL_IF(getput_arg_fast(compiler, inp_flags, dst_r, dst, dstw)); + else + FAIL_IF(getput_arg(compiler, inp_flags, dst_r, dst, dstw, 0, 0)); + } + return SLJIT_SUCCESS; +} + +int sljit_emit_op0(struct sljit_compiler *compiler, int op) +{ + CHECK_ERROR(); + check_sljit_emit_op0(compiler, op); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + case SLJIT_NOP: + return push_inst(compiler, NOP); + break; + } + + return SLJIT_SUCCESS; +} + +int sljit_emit_op1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + int inp_flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; + + CHECK_ERROR(); + check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + + if ((src & SLJIT_IMM) && srcw == 0) + src = ZERO_REG; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_INT_OP) { + inp_flags |= INT_DATA | SIGNED_DATA; + if (src & SLJIT_IMM) + srcw = (int)srcw; + } +#endif + if (op & SLJIT_SET_O) + FAIL_IF(push_inst(compiler, MTXER | S(ZERO_REG))); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + return emit_op(compiler, SLJIT_MOV, inp_flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_UI: + return emit_op(compiler, SLJIT_MOV_UI, inp_flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_SI: + return emit_op(compiler, SLJIT_MOV_SI, inp_flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_UB: + return emit_op(compiler, SLJIT_MOV_UB, inp_flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw); + + case SLJIT_MOV_SB: + return emit_op(compiler, SLJIT_MOV_SB, inp_flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw); + + case SLJIT_MOV_UH: + return emit_op(compiler, SLJIT_MOV_UH, inp_flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw); + + case SLJIT_MOV_SH: + return emit_op(compiler, SLJIT_MOV_SH, inp_flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw); + + case SLJIT_MOVU: + return emit_op(compiler, SLJIT_MOV, inp_flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_UI: + return emit_op(compiler, SLJIT_MOV_UI, inp_flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_SI: + return emit_op(compiler, SLJIT_MOV_SI, inp_flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_UB: + return emit_op(compiler, SLJIT_MOV_UB, inp_flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw); + + case SLJIT_MOVU_SB: + return emit_op(compiler, SLJIT_MOV_SB, inp_flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw); + + case SLJIT_MOVU_UH: + return emit_op(compiler, SLJIT_MOV_UH, inp_flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw); + + case SLJIT_MOVU_SH: + return emit_op(compiler, SLJIT_MOV_SH, inp_flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw); + + case SLJIT_NOT: + return emit_op(compiler, SLJIT_NOT, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_NEG: + return emit_op(compiler, SLJIT_NEG, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_CLZ: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + return emit_op(compiler, SLJIT_CLZ, inp_flags | (!(op & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); +#else + return emit_op(compiler, SLJIT_CLZ, inp_flags, dst, dstw, TMP_REG1, 0, src, srcw); +#endif + } + + return SLJIT_SUCCESS; +} + +#define TEST_SL_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN) + +#define TEST_UL_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & ~0xffff)) + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_SH_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= SLJIT_W(0x7fffffff) && (srcw) >= SLJIT_W(-0x80000000)) +#else +#define TEST_SH_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & 0xffff)) +#endif + +#define TEST_UH_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000)) + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_UI_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff)) +#else +#define TEST_UI_IMM(src, srcw) \ + ((src) & SLJIT_IMM) +#endif + +int sljit_emit_op2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + int inp_flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; + + CHECK_ERROR(); + check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + + if ((src1 & SLJIT_IMM) && src1w == 0) + src1 = ZERO_REG; + if ((src2 & SLJIT_IMM) && src2w == 0) + src2 = ZERO_REG; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_INT_OP) { + inp_flags |= INT_DATA | SIGNED_DATA; + if (src1 & SLJIT_IMM) + src1w = (src1w << 32) >> 32; + if (src2 & SLJIT_IMM) + src2w = (src2w << 32) >> 32; + if (GET_FLAGS(op)) + inp_flags |= ALT_SIGN_EXT; + } +#endif + if (op & SLJIT_SET_O) + FAIL_IF(push_inst(compiler, MTXER | S(ZERO_REG))); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + if (!GET_FLAGS(op)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + } + if (TEST_SH_IMM(src2, src2w)) { + compiler->imm = (src2w >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SH_IMM(src1, src1w)) { + compiler->imm = (src1w >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + return emit_op(compiler, SLJIT_ADD, inp_flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_ADDC: + return emit_op(compiler, SLJIT_ADDC, inp_flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + if (!GET_FLAGS(op)) { + if (TEST_SL_IMM(src2, -src2w)) { + compiler->imm = (-src2w) & 0xffff; + return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + } + if (TEST_SH_IMM(src2, -src2w)) { + compiler->imm = ((-src2w) >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + } + if (dst == SLJIT_UNUSED && !(GET_FLAGS(op) & ~(SLJIT_SET_E | SLJIT_SET_S))) { + /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + if (dst == SLJIT_UNUSED && GET_FLAGS(op) == SLJIT_SET_U) { + /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ + if (TEST_UL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + } + if (!(op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O))) { + if (TEST_SL_IMM(src2, -src2w)) { + compiler->imm = (-src2w) & 0xffff; + return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + } + /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ + return emit_op(compiler, SLJIT_SUB, inp_flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM5), dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUBC: + return emit_op(compiler, SLJIT_SUBC, inp_flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_INT_OP) + inp_flags |= ALT_FORM2; +#endif + if (!GET_FLAGS(op)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_MUL, inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_MUL, inp_flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + return emit_op(compiler, SLJIT_MUL, inp_flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + /* Commutative unsigned operations. */ + if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) { + if (TEST_UL_IMM(src2, src2w)) { + compiler->imm = src2w; + return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UL_IMM(src1, src1w)) { + compiler->imm = src1w; + return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + } + if (TEST_UH_IMM(src2, src2w)) { + compiler->imm = (src2w >> 16) & 0xffff; + return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UH_IMM(src1, src1w)) { + compiler->imm = (src1w >> 16) & 0xffff; + return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) { + if (TEST_UI_IMM(src2, src2w)) { + compiler->imm = src2w; + return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UI_IMM(src1, src1w)) { + compiler->imm = src1w; + return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + return emit_op(compiler, GET_OPCODE(op), inp_flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_INT_OP) + inp_flags |= ALT_FORM2; +#endif + if (src2 & SLJIT_IMM) { + compiler->imm = src2w; + return emit_op(compiler, GET_OPCODE(op), inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, GET_OPCODE(op), inp_flags, dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +int sljit_is_fpu_available(void) +{ + /* Always available. */ + return 1; +} + +static int emit_fpu_data_transfer(struct sljit_compiler *compiler, int fpu_reg, int load, int arg, sljit_w argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + + /* Fast loads and stores. */ + if (!(arg & 0xf0)) { + /* Both for (arg & 0xf) == SLJIT_UNUSED and (arg & 0xf) != SLJIT_UNUSED. */ + if (argw <= SIMM_MAX && argw >= SIMM_MIN) + return push_inst(compiler, (load ? LFD : STFD) | FD(fpu_reg) | A(arg & 0xf) | IMM(argw)); + } + + if (arg & 0xf0) { + argw &= 0x3; + if (argw) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, RLWINM | S((arg >> 4) & 0xf) | A(TMP_REG2) | (argw << 11) | ((31 - argw) << 1))); +#else + FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, (arg >> 4) & 0xf, argw, 63 - argw, 1))); +#endif + return push_inst(compiler, (load ? LFDX : STFDX) | FD(fpu_reg) | A(arg & 0xf) | B(TMP_REG2)); + } + return push_inst(compiler, (load ? LFDX : STFDX) | FD(fpu_reg) | A(arg & 0xf) | B((arg >> 4) & 0xf)); + } + + /* Use cache. */ + if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) + return push_inst(compiler, (load ? LFD : STFD) | FD(fpu_reg) | A(TMP_REG3) | IMM(argw - compiler->cache_argw)); + + /* Put value to cache. */ + compiler->cache_arg = arg; + compiler->cache_argw = argw; + + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + if (!(arg & 0xf)) + return push_inst(compiler, (load ? LFDX : STFDX) | FD(fpu_reg) | A(0) | B(TMP_REG3)); + return push_inst(compiler, (load ? LFDUX : STFDUX) | FD(fpu_reg) | A(TMP_REG3) | B(arg & 0xf)); +} + +int sljit_emit_fop1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + int dst_fr; + + CHECK_ERROR(); + check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (GET_OPCODE(op) == SLJIT_FCMP) { + if (dst > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, dst, dstw)); + dst = TMP_FREG1; + } + if (src > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src, srcw)); + src = TMP_FREG2; + } + return push_inst(compiler, FCMPU | CRD(4) | FA(dst) | FB(src)); + } + + dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst; + + if (src > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, dst_fr, 1, src, srcw)); + src = dst_fr; + } + + switch (op) { + case SLJIT_FMOV: + if (src != dst_fr && dst_fr != TMP_FREG1) + FAIL_IF(push_inst(compiler, FMR | FD(dst_fr) | FB(src))); + break; + case SLJIT_FNEG: + FAIL_IF(push_inst(compiler, FNEG | FD(dst_fr) | FB(src))); + break; + case SLJIT_FABS: + FAIL_IF(push_inst(compiler, FABS | FD(dst_fr) | FB(src))); + break; + } + + if (dst_fr == TMP_FREG1) + FAIL_IF(emit_fpu_data_transfer(compiler, src, 0, dst, dstw)); + + return SLJIT_SUCCESS; +} + +int sljit_emit_fop2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + int dst_fr; + + CHECK_ERROR(); + check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_fr = (dst > SLJIT_FLOAT_REG4) ? TMP_FREG1 : dst; + + if (src2 > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w)); + src2 = TMP_FREG2; + } + + if (src1 > SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, src1, src1w)); + src1 = TMP_FREG1; + } + + switch (op) { + case SLJIT_FADD: + FAIL_IF(push_inst(compiler, FADD | FD(dst_fr) | FA(src1) | FB(src2))); + break; + + case SLJIT_FSUB: + FAIL_IF(push_inst(compiler, FSUB | FD(dst_fr) | FA(src1) | FB(src2))); + break; + + case SLJIT_FMUL: + FAIL_IF(push_inst(compiler, FMUL | FD(dst_fr) | FA(src1) | FC(src2) /* FMUL use FC as src2 */)); + break; + + case SLJIT_FDIV: + FAIL_IF(push_inst(compiler, FDIV | FD(dst_fr) | FA(src1) | FB(src2))); + break; + } + + if (dst_fr == TMP_FREG1) + FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 0, dst, dstw)); + + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +{ + CHECK_ERROR(); + check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + + compiler->has_locals = local_size > 0; +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + compiler->local_size = (2 + generals + 2) * sizeof(sljit_w) + local_size; +#else + compiler->local_size = (2 + generals + 7 + 8) * sizeof(sljit_w) + local_size; +#endif + compiler->local_size = (compiler->local_size + 15) & ~0xf; + + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) + return push_inst(compiler, MFLR | D(dst)); + else if (dst & SLJIT_MEM) { + FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2))); + return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); + } + + return SLJIT_SUCCESS; +} + +int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + CHECK_ERROR(); + check_sljit_emit_fast_return(compiler, src, srcw); + + if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) + FAIL_IF(push_inst(compiler, MTLR | S(src))); + else { + if (src & SLJIT_MEM) + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); + else if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, TMP_REG2, srcw)); + FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2))); + } + return push_inst(compiler, BLR); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + check_sljit_emit_label(compiler); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, int type) +{ + switch (type) { + case SLJIT_C_EQUAL: + return (12 << 21) | (2 << 16); + + case SLJIT_C_NOT_EQUAL: + return (4 << 21) | (2 << 16); + + case SLJIT_C_LESS: + case SLJIT_C_FLOAT_LESS: + return (12 << 21) | ((4 + 0) << 16); + + case SLJIT_C_GREATER_EQUAL: + case SLJIT_C_FLOAT_GREATER_EQUAL: + return (4 << 21) | ((4 + 0) << 16); + + case SLJIT_C_GREATER: + case SLJIT_C_FLOAT_GREATER: + return (12 << 21) | ((4 + 1) << 16); + + case SLJIT_C_LESS_EQUAL: + case SLJIT_C_FLOAT_LESS_EQUAL: + return (4 << 21) | ((4 + 1) << 16); + + case SLJIT_C_SIG_LESS: + return (12 << 21) | (0 << 16); + + case SLJIT_C_SIG_GREATER_EQUAL: + return (4 << 21) | (0 << 16); + + case SLJIT_C_SIG_GREATER: + return (12 << 21) | (1 << 16); + + case SLJIT_C_SIG_LESS_EQUAL: + return (4 << 21) | (1 << 16); + + case SLJIT_C_OVERFLOW: + case SLJIT_C_MUL_OVERFLOW: + return (12 << 21) | (3 << 16); + + case SLJIT_C_NOT_OVERFLOW: + case SLJIT_C_MUL_NOT_OVERFLOW: + return (4 << 21) | (3 << 16); + + case SLJIT_C_FLOAT_EQUAL: + return (12 << 21) | ((4 + 2) << 16); + + case SLJIT_C_FLOAT_NOT_EQUAL: + return (4 << 21) | ((4 + 2) << 16); + + case SLJIT_C_FLOAT_NAN: + return (12 << 21) | ((4 + 3) << 16); + + case SLJIT_C_FLOAT_NOT_NAN: + return (4 << 21) | ((4 + 3) << 16); + + default: + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); + return (20 << 21); + } +} + +struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type) +{ + struct sljit_jump *jump; + sljit_ins bo_bi_flags; + + CHECK_ERROR_PTR(); + check_sljit_emit_jump(compiler, type); + + bo_bi_flags = get_bo_bi_flags(compiler, type & 0xff); + if (!bo_bi_flags) + return NULL; + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + /* In PPC, we don't need to touch the arguments. */ + if (type >= SLJIT_JUMP) + jump->flags |= UNCOND_B; + + PTR_FAIL_IF(emit_const(compiler, TMP_REG1, 0)); + PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_REG1))); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_CALL0 ? 1 : 0))); + return jump; +} + +int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw) +{ + sljit_ins bo_bi_flags; + struct sljit_jump *jump = NULL; + int src_r; + + CHECK_ERROR(); + check_sljit_emit_ijump(compiler, type, src, srcw); + + bo_bi_flags = get_bo_bi_flags(compiler, type); + FAIL_IF(!bo_bi_flags); + + if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) + src_r = src; + else if (src & SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | UNCOND_B); + jump->u.target = srcw; + + FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + src_r = TMP_REG2; + } + else { + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); + src_r = TMP_REG2; + } + + FAIL_IF(push_inst(compiler, MTCTR | S(src_r))); + if (jump) + jump->addr = compiler->size; + return push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_CALL0 ? 1 : 0)); +} + +/* Get a bit from CR, all other bits are zeroed. */ +#define GET_CR_BIT(bit, dst) \ + FAIL_IF(push_inst(compiler, MFCR | D(dst))); \ + FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1))); + +#define INVERT_BIT(dst) \ + FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1)); + +int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type) +{ + int reg; + + CHECK_ERROR(); + check_sljit_emit_cond_value(compiler, op, dst, dstw, type); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2; + + switch (type) { + case SLJIT_C_EQUAL: + GET_CR_BIT(2, reg); + break; + + case SLJIT_C_NOT_EQUAL: + GET_CR_BIT(2, reg); + INVERT_BIT(reg); + break; + + case SLJIT_C_LESS: + case SLJIT_C_FLOAT_LESS: + GET_CR_BIT(4 + 0, reg); + break; + + case SLJIT_C_GREATER_EQUAL: + case SLJIT_C_FLOAT_GREATER_EQUAL: + GET_CR_BIT(4 + 0, reg); + INVERT_BIT(reg); + break; + + case SLJIT_C_GREATER: + case SLJIT_C_FLOAT_GREATER: + GET_CR_BIT(4 + 1, reg); + break; + + case SLJIT_C_LESS_EQUAL: + case SLJIT_C_FLOAT_LESS_EQUAL: + GET_CR_BIT(4 + 1, reg); + INVERT_BIT(reg); + break; + + case SLJIT_C_SIG_LESS: + GET_CR_BIT(0, reg); + break; + + case SLJIT_C_SIG_GREATER_EQUAL: + GET_CR_BIT(0, reg); + INVERT_BIT(reg); + break; + + case SLJIT_C_SIG_GREATER: + GET_CR_BIT(1, reg); + break; + + case SLJIT_C_SIG_LESS_EQUAL: + GET_CR_BIT(1, reg); + INVERT_BIT(reg); + break; + + case SLJIT_C_OVERFLOW: + case SLJIT_C_MUL_OVERFLOW: + GET_CR_BIT(3, reg); + break; + + case SLJIT_C_NOT_OVERFLOW: + case SLJIT_C_MUL_NOT_OVERFLOW: + GET_CR_BIT(3, reg); + INVERT_BIT(reg); + break; + + case SLJIT_C_FLOAT_EQUAL: + GET_CR_BIT(4 + 2, reg); + break; + + case SLJIT_C_FLOAT_NOT_EQUAL: + GET_CR_BIT(4 + 2, reg); + INVERT_BIT(reg); + break; + + case SLJIT_C_FLOAT_NAN: + GET_CR_BIT(4 + 3, reg); + break; + + case SLJIT_C_FLOAT_NOT_NAN: + GET_CR_BIT(4 + 3, reg); + INVERT_BIT(reg); + break; + + default: + SLJIT_ASSERT_STOP(); + break; + } + + if (GET_OPCODE(op) == SLJIT_OR) + return emit_op(compiler, GET_OPCODE(op), GET_FLAGS(op) ? ALT_SET_FLAGS : 0, dst, dstw, dst, dstw, TMP_REG2, 0); + + if (reg == TMP_REG2) + return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); + return SLJIT_SUCCESS; +} + +struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value) +{ + struct sljit_const *const_; + int reg; + + CHECK_ERROR_PTR(); + check_sljit_emit_const(compiler, dst, dstw, init_value); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG2; + + PTR_FAIL_IF(emit_const(compiler, reg, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); + return const_; +} diff --git a/sljit/sljitNativeX86_32.c b/sljit/sljitNativeX86_32.c new file mode 100644 index 0000000..a1123cb --- /dev/null +++ b/sljit/sljitNativeX86_32.c @@ -0,0 +1,521 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* x86 32-bit arch dependent functions. */ + +static int emit_do_imm(struct sljit_compiler *compiler, sljit_ub opcode, sljit_w imm) +{ + sljit_ub *buf; + + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_w)); + FAIL_IF(!buf); + INC_SIZE(1 + sizeof(sljit_w)); + *buf++ = opcode; + *(sljit_w*)buf = imm; + return SLJIT_SUCCESS; +} + +static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type) +{ + if (type == SLJIT_JUMP) { + *code_ptr++ = 0xe9; + jump->addr++; + } + else if (type >= SLJIT_CALL0) { + *code_ptr++ = 0xe8; + jump->addr++; + } + else { + *code_ptr++ = 0x0f; + *code_ptr++ = get_jump_code(type); + jump->addr += 2; + } + + if (jump->flags & JUMP_LABEL) + jump->flags |= PATCH_MW; + else + *(sljit_w*)code_ptr = jump->u.target - (jump->addr + 4); + code_ptr += 4; + + return code_ptr; +} + +int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + int size; + sljit_ub *buf; + + CHECK_ERROR(); + check_sljit_emit_enter(compiler, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + compiler->args = args; + compiler->flags_saved = 0; + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + size = 1 + (generals <= 3 ? generals : 3) + (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); +#else + size = 1 + (generals <= 3 ? generals : 3) + (args > 0 ? (2 + args * 3) : 0); +#endif + buf = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!buf); + + INC_SIZE(size); + PUSH_REG(reg_map[TMP_REGISTER]); +#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (args > 0) { + *buf++ = 0x8b; + *buf++ = 0xc4 | (reg_map[TMP_REGISTER] << 3); + } +#endif + if (generals > 2) + PUSH_REG(reg_map[SLJIT_GENERAL_REG3]); + if (generals > 1) + PUSH_REG(reg_map[SLJIT_GENERAL_REG2]); + if (generals > 0) + PUSH_REG(reg_map[SLJIT_GENERAL_REG1]); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (args > 0) { + *buf++ = 0x8b; + *buf++ = 0xc0 | (reg_map[SLJIT_GENERAL_REG1] << 3) | reg_map[SLJIT_TEMPORARY_REG3]; + } + if (args > 1) { + *buf++ = 0x8b; + *buf++ = 0xc0 | (reg_map[SLJIT_GENERAL_REG2] << 3) | reg_map[SLJIT_TEMPORARY_REG2]; + } + if (args > 2) { + *buf++ = 0x8b; + *buf++ = 0x44 | (reg_map[SLJIT_GENERAL_REG3] << 3); + *buf++ = 0x24; + *buf++ = sizeof(sljit_w) * (3 + 2); /* generals >= 3 as well. */ + } +#else + if (args > 0) { + *buf++ = 0x8b; + *buf++ = 0x40 | (reg_map[SLJIT_GENERAL_REG1] << 3) | reg_map[TMP_REGISTER]; + *buf++ = sizeof(sljit_w) * 2; + } + if (args > 1) { + *buf++ = 0x8b; + *buf++ = 0x40 | (reg_map[SLJIT_GENERAL_REG2] << 3) | reg_map[TMP_REGISTER]; + *buf++ = sizeof(sljit_w) * 3; + } + if (args > 2) { + *buf++ = 0x8b; + *buf++ = 0x40 | (reg_map[SLJIT_GENERAL_REG3] << 3) | reg_map[TMP_REGISTER]; + *buf++ = sizeof(sljit_w) * 4; + } +#endif + + local_size = (local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1); + compiler->temporaries_start = local_size; + if (temporaries > 3) + local_size += (temporaries - 3) * sizeof(sljit_uw); + compiler->generals_start = local_size; + if (generals > 3) + local_size += (generals - 3) * sizeof(sljit_uw); + +#ifdef _WIN32 + if (local_size > 1024) { + FAIL_IF(emit_do_imm(compiler, 0xb8 + reg_map[SLJIT_TEMPORARY_REG1], local_size)); + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_touch_stack))); + } +#endif + + compiler->local_size = local_size; + if (local_size > 0) + return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d, + SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size); + + /* Mov arguments to general registers. */ + return SLJIT_SUCCESS; +} + +void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + CHECK_ERROR_VOID(); + check_sljit_fake_enter(compiler, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + compiler->args = args; + compiler->local_size = (local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1); + compiler->temporaries_start = compiler->local_size; + if (temporaries > 3) + compiler->local_size += (temporaries - 3) * sizeof(sljit_uw); + compiler->generals_start = compiler->local_size; + if (generals > 3) + compiler->local_size += (generals - 3) * sizeof(sljit_uw); +} + +int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + int size; + sljit_ub *buf; + + CHECK_ERROR(); + check_sljit_emit_return(compiler, src, srcw); + SLJIT_ASSERT(compiler->args >= 0); + + compiler->flags_saved = 0; + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if (src != SLJIT_UNUSED && src != SLJIT_RETURN_REG) + FAIL_IF(emit_mov(compiler, SLJIT_RETURN_REG, 0, src, srcw)); + + if (compiler->local_size > 0) + FAIL_IF(emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05, + SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); + + size = 2 + (compiler->generals <= 3 ? compiler->generals : 3); +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (compiler->args > 2) + size += 2; +#else + if (compiler->args > 0) + size += 2; +#endif + buf = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!buf); + + INC_SIZE(size); + + if (compiler->generals > 0) + POP_REG(reg_map[SLJIT_GENERAL_REG1]); + if (compiler->generals > 1) + POP_REG(reg_map[SLJIT_GENERAL_REG2]); + if (compiler->generals > 2) + POP_REG(reg_map[SLJIT_GENERAL_REG3]); + POP_REG(reg_map[TMP_REGISTER]); +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (compiler->args > 2) + RETN(sizeof(sljit_w)); + else + RET(); +#else + if (compiler->args > 0) + RETN(compiler->args * sizeof(sljit_w)); + else + RET(); +#endif + + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +/* Size contains the flags as well. */ +static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, int size, + /* The register or immediate operand. */ + int a, sljit_w imma, + /* The general operand (not immediate). */ + int b, sljit_w immb) +{ + sljit_ub *buf; + sljit_ub *buf_ptr; + int flags = size & ~0xf; + int inst_size; + + /* Both cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); + /* Size flags not allowed for typed instructions. */ + SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); + /* Both size flags cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); +#if (defined SLJIT_SSE2 && SLJIT_SSE2) + /* SSE2 and immediate is not possible. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); +#endif + + size &= 0xf; + inst_size = size; + +#if (defined SLJIT_SSE2 && SLJIT_SSE2) + if (flags & EX86_PREF_F2) + inst_size++; +#endif + if (flags & EX86_PREF_66) + inst_size++; + + /* Calculate size of b. */ + inst_size += 1; /* mod r/m byte. */ + if (b & SLJIT_MEM) { + if ((b & 0x0f) == SLJIT_UNUSED) + inst_size += sizeof(sljit_w); + else if (immb != 0 && !(b & 0xf0)) { + /* Immediate operand. */ + if (immb <= 127 && immb >= -128) + inst_size += sizeof(sljit_b); + else + inst_size += sizeof(sljit_w); + } + + if ((b & 0xf) == SLJIT_LOCALS_REG && !(b & 0xf0)) + b |= SLJIT_LOCALS_REG << 4; + + if ((b & 0xf0) != SLJIT_UNUSED) + inst_size += 1; /* SIB byte. */ + } + + /* Calculate size of a. */ + if (a & SLJIT_IMM) { + if (flags & EX86_BIN_INS) { + if (imma <= 127 && imma >= -128) { + inst_size += 1; + flags |= EX86_BYTE_ARG; + } else + inst_size += 4; + } + else if (flags & EX86_SHIFT_INS) { + imma &= 0x1f; + if (imma != 1) { + inst_size ++; + flags |= EX86_BYTE_ARG; + } + } else if (flags & EX86_BYTE_ARG) + inst_size++; + else if (flags & EX86_HALF_ARG) + inst_size += sizeof(short); + else + inst_size += sizeof(sljit_w); + } + else + SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); + + buf = (sljit_ub*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!buf); + + /* Encoding the byte. */ + INC_SIZE(inst_size); +#if (defined SLJIT_SSE2 && SLJIT_SSE2) + if (flags & EX86_PREF_F2) + *buf++ = 0xf2; +#endif + if (flags & EX86_PREF_66) + *buf++ = 0x66; + + buf_ptr = buf + size; + + /* Encode mod/rm byte. */ + if (!(flags & EX86_SHIFT_INS)) { + if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) + *buf = (flags & EX86_BYTE_ARG) ? 0x83 : 0x81; + + if ((a & SLJIT_IMM) || (a == 0)) + *buf_ptr = 0; +#if (defined SLJIT_SSE2 && SLJIT_SSE2) + else if (!(flags & EX86_SSE2)) + *buf_ptr = reg_map[a] << 3; + else + *buf_ptr = a << 3; +#else + else + *buf_ptr = reg_map[a] << 3; +#endif + } + else { + if (a & SLJIT_IMM) { + if (imma == 1) + *buf = 0xd1; + else + *buf = 0xc1; + } else + *buf = 0xd3; + *buf_ptr = 0; + } + + if (!(b & SLJIT_MEM)) +#if (defined SLJIT_SSE2 && SLJIT_SSE2) + *buf_ptr++ |= 0xc0 + ((!(flags & EX86_SSE2)) ? reg_map[b] : b); +#else + *buf_ptr++ |= 0xc0 + reg_map[b]; +#endif + else if ((b & 0x0f) != SLJIT_UNUSED) { + if ((b & 0xf0) == SLJIT_UNUSED || (b & 0xf0) == (SLJIT_LOCALS_REG << 4)) { + if (immb != 0) { + if (immb <= 127 && immb >= -128) + *buf_ptr |= 0x40; + else + *buf_ptr |= 0x80; + } + + if ((b & 0xf0) == SLJIT_UNUSED) + *buf_ptr++ |= reg_map[b & 0x0f]; + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = reg_map[b & 0x0f] | (reg_map[(b >> 4) & 0x0f] << 3); + } + + if (immb != 0) { + if (immb <= 127 && immb >= -128) + *buf_ptr++ = immb; /* 8 bit displacement. */ + else { + *(sljit_w*)buf_ptr = immb; /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_w); + } + } + } + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = reg_map[b & 0x0f] | (reg_map[(b >> 4) & 0x0f] << 3) | (immb << 6); + } + } + else { + *buf_ptr++ |= 0x05; + *(sljit_w*)buf_ptr = immb; /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_w); + } + + if (a & SLJIT_IMM) { + if (flags & EX86_BYTE_ARG) + *buf_ptr = imma; + else if (flags & EX86_HALF_ARG) + *(short*)buf_ptr = imma; + else if (!(flags & EX86_SHIFT_INS)) + *(sljit_w*)buf_ptr = imma; + } + + return !(flags & EX86_SHIFT_INS) ? buf : (buf + 1); +} + +/* --------------------------------------------------------------------- */ +/* Call / return instructions */ +/* --------------------------------------------------------------------- */ + +static SLJIT_INLINE int call_with_args(struct sljit_compiler *compiler, int type) +{ + sljit_ub *buf; + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + buf = (sljit_ub*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2); + FAIL_IF(!buf); + INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2); + + if (type >= SLJIT_CALL3) + PUSH_REG(reg_map[SLJIT_TEMPORARY_REG3]); + *buf++ = 0x8b; + *buf++ = 0xc0 | (reg_map[SLJIT_TEMPORARY_REG3] << 3) | reg_map[SLJIT_TEMPORARY_REG1]; +#else + buf = (sljit_ub*)ensure_buf(compiler, type - SLJIT_CALL0 + 1); + FAIL_IF(!buf); + INC_SIZE(type - SLJIT_CALL0); + if (type >= SLJIT_CALL3) + PUSH_REG(reg_map[SLJIT_TEMPORARY_REG3]); + if (type >= SLJIT_CALL2) + PUSH_REG(reg_map[SLJIT_TEMPORARY_REG2]); + PUSH_REG(reg_map[SLJIT_TEMPORARY_REG1]); +#endif + return SLJIT_SUCCESS; +} + +int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +{ + sljit_ub *buf; + + CHECK_ERROR(); + check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + compiler->args = args; + compiler->local_size = (local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1); + compiler->temporaries_start = compiler->local_size; + if (temporaries > 3) + compiler->local_size += (temporaries - 3) * sizeof(sljit_uw); + compiler->generals_start = compiler->local_size; + if (generals > 3) + compiler->local_size += (generals - 3) * sizeof(sljit_uw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!buf); + + INC_SIZE(1); + POP_REG(reg_map[dst]); + return SLJIT_SUCCESS; + } + else if (dst & SLJIT_MEM) { + buf = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!buf); + *buf++ = 0x8f; + return SLJIT_SUCCESS; + } + + /* For UNUSED dst. Uncommon, but possible. */ + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!buf); + + INC_SIZE(1); + POP_REG(reg_map[TMP_REGISTER]); + return SLJIT_SUCCESS; +} + +int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + sljit_ub *buf; + + CHECK_ERROR(); + check_sljit_emit_fast_return(compiler, src, srcw); + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1); + FAIL_IF(!buf); + + INC_SIZE(1 + 1); + PUSH_REG(reg_map[src]); + } + else if (src & SLJIT_MEM) { + buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!buf); + *buf++ = 0xff; + *buf |= 6 << 3; + + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!buf); + INC_SIZE(1); + } + else { + /* SLJIT_IMM. */ + buf = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1); + FAIL_IF(!buf); + + INC_SIZE(5 + 1); + *buf++ = 0x68; + *(sljit_w*)buf = srcw; + buf += sizeof(sljit_w); + } + + RET(); + return SLJIT_SUCCESS; +} diff --git a/sljit/sljitNativeX86_64.c b/sljit/sljitNativeX86_64.c new file mode 100644 index 0000000..c41d15c --- /dev/null +++ b/sljit/sljitNativeX86_64.c @@ -0,0 +1,836 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* x86 64-bit arch dependent functions. */ + +static int emit_load_imm64(struct sljit_compiler *compiler, int reg, sljit_w imm) +{ + sljit_ub *buf; + + buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_w)); + FAIL_IF(!buf); + INC_SIZE(2 + sizeof(sljit_w)); + *buf++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); + *buf++ = 0xb8 + (reg_map[reg] & 0x7); + *(sljit_w*)buf = imm; + return SLJIT_SUCCESS; +} + +static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type) +{ + if (type < SLJIT_JUMP) { + *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10; + *code_ptr++ = 10 + 3; + } + + SLJIT_ASSERT(reg_map[TMP_REG3] == 9); + *code_ptr++ = REX_W | REX_B; + *code_ptr++ = 0xb8 + 1; + jump->addr = (sljit_uw)code_ptr; + + if (jump->flags & JUMP_LABEL) + jump->flags |= PATCH_MD; + else + *(sljit_w*)code_ptr = jump->u.target; + + code_ptr += sizeof(sljit_w); + *code_ptr++ = REX_B; + *code_ptr++ = 0xff; + *code_ptr++ = (type >= SLJIT_CALL0) ? 0xd1 /* call */ : 0xe1 /* jmp */; + + return code_ptr; +} + +static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type) +{ + sljit_w delta = addr - ((sljit_w)code_ptr + 1 + sizeof(sljit_hw)); + + if (delta <= SLJIT_W(0x7fffffff) && delta >= SLJIT_W(-0x80000000)) { + *code_ptr++ = (type == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */; + *(sljit_w*)code_ptr = delta; + } + else { + SLJIT_ASSERT(reg_map[TMP_REG3] == 9); + *code_ptr++ = REX_W | REX_B; + *code_ptr++ = 0xb8 + 1; + *(sljit_w*)code_ptr = addr; + code_ptr += sizeof(sljit_w); + *code_ptr++ = REX_B; + *code_ptr++ = 0xff; + *code_ptr++ = (type == 2) ? 0xd1 /* call */ : 0xe1 /* jmp */; + } + + return code_ptr; +} + +int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + int size; + sljit_ub *buf; + + CHECK_ERROR(); + check_sljit_emit_enter(compiler, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + compiler->flags_saved = 0; + + size = generals; +#ifndef _WIN64 + if (generals >= 2) + size += generals - 1; +#else + if (local_size > 0) + size += 2; + if (generals >= 4) + size += generals - 3; + if (temporaries >= 5) + size += (5 - 4) * 2; +#endif + size += args * 3; + if (size > 0) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!buf); + + INC_SIZE(size); + if (generals >= 5) { + SLJIT_ASSERT(reg_map[SLJIT_GENERAL_EREG2] >= 8); + *buf++ = REX_B; + PUSH_REG(reg_lmap[SLJIT_GENERAL_EREG2]); + } + if (generals >= 4) { + SLJIT_ASSERT(reg_map[SLJIT_GENERAL_EREG1] >= 8); + *buf++ = REX_B; + PUSH_REG(reg_lmap[SLJIT_GENERAL_EREG1]); + } + if (generals >= 3) { +#ifndef _WIN64 + SLJIT_ASSERT(reg_map[SLJIT_GENERAL_REG3] >= 8); + *buf++ = REX_B; +#else + SLJIT_ASSERT(reg_map[SLJIT_GENERAL_REG3] < 8); +#endif + PUSH_REG(reg_lmap[SLJIT_GENERAL_REG3]); + } + if (generals >= 2) { +#ifndef _WIN64 + SLJIT_ASSERT(reg_map[SLJIT_GENERAL_REG2] >= 8); + *buf++ = REX_B; +#else + SLJIT_ASSERT(reg_map[SLJIT_GENERAL_REG2] < 8); +#endif + PUSH_REG(reg_lmap[SLJIT_GENERAL_REG2]); + } + if (generals >= 1) { + SLJIT_ASSERT(reg_map[SLJIT_GENERAL_REG1] < 8); + PUSH_REG(reg_lmap[SLJIT_GENERAL_REG1]); + } +#ifdef _WIN64 + if (temporaries >= 5) { + SLJIT_ASSERT(reg_map[SLJIT_TEMPORARY_EREG2] >= 8); + *buf++ = REX_B; + PUSH_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]); + } + if (local_size > 0) { + SLJIT_ASSERT(reg_map[SLJIT_LOCALS_REG] >= 8); + *buf++ = REX_B; + PUSH_REG(reg_lmap[SLJIT_LOCALS_REG]); + } +#endif + +#ifndef _WIN64 + if (args > 0) { + *buf++ = REX_W; + *buf++ = 0x8b; + *buf++ = 0xc0 | (reg_map[SLJIT_GENERAL_REG1] << 3) | 0x7; + } + if (args > 1) { + *buf++ = REX_W | REX_R; + *buf++ = 0x8b; + *buf++ = 0xc0 | (reg_lmap[SLJIT_GENERAL_REG2] << 3) | 0x6; + } + if (args > 2) { + *buf++ = REX_W | REX_R; + *buf++ = 0x8b; + *buf++ = 0xc0 | (reg_lmap[SLJIT_GENERAL_REG3] << 3) | 0x2; + } +#else + if (args > 0) { + *buf++ = REX_W; + *buf++ = 0x8b; + *buf++ = 0xc0 | (reg_map[SLJIT_GENERAL_REG1] << 3) | 0x1; + } + if (args > 1) { + *buf++ = REX_W; + *buf++ = 0x8b; + *buf++ = 0xc0 | (reg_map[SLJIT_GENERAL_REG2] << 3) | 0x2; + } + if (args > 2) { + *buf++ = REX_W | REX_B; + *buf++ = 0x8b; + *buf++ = 0xc0 | (reg_map[SLJIT_GENERAL_REG3] << 3) | 0x0; + } +#endif + } + + local_size = (local_size + 16 - 1) & ~(16 - 1); +#ifdef _WIN64 + local_size += 4 * sizeof(sljit_w); + compiler->local_size = local_size; + if (local_size > 1024) { + /* Allocate the stack for the function itself. */ + buf = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!buf); + INC_SIZE(4); + *buf++ = REX_W; + *buf++ = 0x83; + *buf++ = 0xc0 | (5 << 3) | 4; + *buf++ = 4 * sizeof(sljit_w); + local_size -= 4 * sizeof(sljit_w); + FAIL_IF(emit_load_imm64(compiler, SLJIT_TEMPORARY_REG1, local_size)); + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_touch_stack))); + } +#else + compiler->local_size = local_size; + if (local_size > 0) { +#endif + if (local_size <= 127) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!buf); + INC_SIZE(4); + *buf++ = REX_W; + *buf++ = 0x83; + *buf++ = 0xc0 | (5 << 3) | 4; + *buf++ = local_size; + } + else { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 7); + FAIL_IF(!buf); + INC_SIZE(7); + *buf++ = REX_W; + *buf++ = 0x81; + *buf++ = 0xc0 | (5 << 3) | 4; + *(sljit_hw*)buf = local_size; + buf += sizeof(sljit_hw); + } +#ifndef _WIN64 + } +#endif + +#ifdef _WIN64 + if (local_size > 4 * sizeof(sljit_w)) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!buf); + INC_SIZE(5); + *buf++ = REX_W | REX_R; + *buf++ = 0x8d; + *buf++ = 0x40 | (reg_lmap[SLJIT_LOCALS_REG] << 3) | 0x4; + *buf++ = 0x24; + *buf = 4 * sizeof(sljit_w); + } +#endif + + /* Mov arguments to general registers. */ + return SLJIT_SUCCESS; +} + +void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +{ + CHECK_ERROR_VOID(); + check_sljit_fake_enter(compiler, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + compiler->local_size = (local_size + 16 - 1) & ~(16 - 1); +#ifdef _WIN64 + compiler->local_size += 4 * sizeof(sljit_w); +#endif +} + +int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + int size; + sljit_ub *buf; + + CHECK_ERROR(); + check_sljit_emit_return(compiler, src, srcw); + + compiler->flags_saved = 0; + + if (src != SLJIT_UNUSED && src != SLJIT_RETURN_REG) { + compiler->mode32 = 0; + FAIL_IF(emit_mov(compiler, SLJIT_RETURN_REG, 0, src, srcw)); + } + + if (compiler->local_size > 0) { + if (compiler->local_size <= 127) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!buf); + INC_SIZE(4); + *buf++ = REX_W; + *buf++ = 0x83; + *buf++ = 0xc0 | (0 << 3) | 4; + *buf = compiler->local_size; + } + else { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 7); + FAIL_IF(!buf); + INC_SIZE(7); + *buf++ = REX_W; + *buf++ = 0x81; + *buf++ = 0xc0 | (0 << 3) | 4; + *(sljit_hw*)buf = compiler->local_size; + } + } + + size = 1 + compiler->generals; +#ifndef _WIN64 + if (compiler->generals >= 2) + size += compiler->generals - 1; +#else + if (compiler->local_size > 4 * sizeof(sljit_w)) + size += 2; + if (compiler->generals >= 4) + size += compiler->generals - 3; + if (compiler->temporaries >= 5) + size += (5 - 4) * 2; +#endif + buf = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!buf); + + INC_SIZE(size); + +#ifdef _WIN64 + if (compiler->local_size > 4 * sizeof(sljit_w)) { + *buf++ = REX_B; + POP_REG(reg_lmap[SLJIT_LOCALS_REG]); + } + if (compiler->temporaries >= 5) { + *buf++ = REX_B; + POP_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]); + } +#endif + if (compiler->generals >= 1) + POP_REG(reg_map[SLJIT_GENERAL_REG1]); + if (compiler->generals >= 2) { +#ifndef _WIN64 + *buf++ = REX_B; +#endif + POP_REG(reg_lmap[SLJIT_GENERAL_REG2]); + } + if (compiler->generals >= 3) { +#ifndef _WIN64 + *buf++ = REX_B; +#endif + POP_REG(reg_lmap[SLJIT_GENERAL_REG3]); + } + if (compiler->generals >= 4) { + *buf++ = REX_B; + POP_REG(reg_lmap[SLJIT_GENERAL_EREG1]); + } + if (compiler->generals >= 5) { + *buf++ = REX_B; + POP_REG(reg_lmap[SLJIT_GENERAL_EREG2]); + } + + RET(); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +static int emit_do_imm32(struct sljit_compiler *compiler, sljit_ub rex, sljit_ub opcode, sljit_w imm) +{ + sljit_ub *buf; + + if (rex != 0) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_hw)); + FAIL_IF(!buf); + INC_SIZE(2 + sizeof(sljit_hw)); + *buf++ = rex; + *buf++ = opcode; + *(sljit_hw*)buf = imm; + } + else { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_hw)); + FAIL_IF(!buf); + INC_SIZE(1 + sizeof(sljit_hw)); + *buf++ = opcode; + *(sljit_hw*)buf = imm; + } + return SLJIT_SUCCESS; +} + +static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, int size, + /* The register or immediate operand. */ + int a, sljit_w imma, + /* The general operand (not immediate). */ + int b, sljit_w immb) +{ + sljit_ub *buf; + sljit_ub *buf_ptr; + sljit_ub rex = 0; + int flags = size & ~0xf; + int inst_size; + + /* The immediate operand must be 32 bit. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma)); + /* Both cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); + /* Size flags not allowed for typed instructions. */ + SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); + /* Both size flags cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); +#if (defined SLJIT_SSE2 && SLJIT_SSE2) + /* SSE2 and immediate is not possible. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); +#endif + + size &= 0xf; + inst_size = size; + + if ((b & SLJIT_MEM) && !(b & 0xf0) && NOT_HALFWORD(immb)) { + if (emit_load_imm64(compiler, TMP_REG3, immb)) + return NULL; + immb = 0; + if (b & 0xf) + b |= TMP_REG3 << 4; + else + b |= TMP_REG3; + } + + if (!compiler->mode32 && !(flags & EX86_NO_REXW)) + rex |= REX_W; + else if (flags & EX86_REX) + rex |= REX; + +#if (defined SLJIT_SSE2 && SLJIT_SSE2) + if (flags & EX86_PREF_F2) + inst_size++; +#endif + if (flags & EX86_PREF_66) + inst_size++; + + /* Calculate size of b. */ + inst_size += 1; /* mod r/m byte. */ + if (b & SLJIT_MEM) { + if ((b & 0x0f) == SLJIT_UNUSED) + inst_size += 1 + sizeof(sljit_hw); /* SIB byte required to avoid RIP based addressing. */ + else { + if (reg_map[b & 0x0f] >= 8) + rex |= REX_B; + if (immb != 0 && !(b & 0xf0)) { + /* Immediate operand. */ + if (immb <= 127 && immb >= -128) + inst_size += sizeof(sljit_b); + else + inst_size += sizeof(sljit_hw); + } + } + +#ifndef _WIN64 + if ((b & 0xf) == SLJIT_LOCALS_REG && (b & 0xf0) == 0) + b |= SLJIT_LOCALS_REG << 4; +#endif + + if ((b & 0xf0) != SLJIT_UNUSED) { + inst_size += 1; /* SIB byte. */ + if (reg_map[(b >> 4) & 0x0f] >= 8) + rex |= REX_X; + } + } +#if (defined SLJIT_SSE2 && SLJIT_SSE2) + else if (!(flags & EX86_SSE2) && reg_map[b] >= 8) + rex |= REX_B; +#else + else if (reg_map[b] >= 8) + rex |= REX_B; +#endif + + if (a & SLJIT_IMM) { + if (flags & EX86_BIN_INS) { + if (imma <= 127 && imma >= -128) { + inst_size += 1; + flags |= EX86_BYTE_ARG; + } else + inst_size += 4; + } + else if (flags & EX86_SHIFT_INS) { + imma &= 0x3f; + if (imma != 1) { + inst_size ++; + flags |= EX86_BYTE_ARG; + } + } else if (flags & EX86_BYTE_ARG) + inst_size++; + else if (flags & EX86_HALF_ARG) + inst_size += sizeof(short); + else + inst_size += sizeof(sljit_hw); + } + else { + SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); + /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ +#if (defined SLJIT_SSE2 && SLJIT_SSE2) + if (!(flags & EX86_SSE2) && reg_map[a] >= 8) + rex |= REX_R; +#else + if (reg_map[a] >= 8) + rex |= REX_R; +#endif + } + + if (rex) + inst_size++; + + buf = (sljit_ub*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!buf); + + /* Encoding the byte. */ + INC_SIZE(inst_size); +#if (defined SLJIT_SSE2 && SLJIT_SSE2) + if (flags & EX86_PREF_F2) + *buf++ = 0xf2; +#endif + if (flags & EX86_PREF_66) + *buf++ = 0x66; + if (rex) + *buf++ = rex; + buf_ptr = buf + size; + + /* Encode mod/rm byte. */ + if (!(flags & EX86_SHIFT_INS)) { + if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) + *buf = (flags & EX86_BYTE_ARG) ? 0x83 : 0x81; + + if ((a & SLJIT_IMM) || (a == 0)) + *buf_ptr = 0; +#if (defined SLJIT_SSE2 && SLJIT_SSE2) + else if (!(flags & EX86_SSE2)) + *buf_ptr = reg_lmap[a] << 3; + else + *buf_ptr = a << 3; +#else + else + *buf_ptr = reg_lmap[a] << 3; +#endif + } + else { + if (a & SLJIT_IMM) { + if (imma == 1) + *buf = 0xd1; + else + *buf = 0xc1; + } else + *buf = 0xd3; + *buf_ptr = 0; + } + + if (!(b & SLJIT_MEM)) +#if (defined SLJIT_SSE2 && SLJIT_SSE2) + *buf_ptr++ |= 0xc0 + ((!(flags & EX86_SSE2)) ? reg_lmap[b] : b); +#else + *buf_ptr++ |= 0xc0 + reg_lmap[b]; +#endif + else if ((b & 0x0f) != SLJIT_UNUSED) { +#ifdef _WIN64 + SLJIT_ASSERT((b & 0xf0) != (SLJIT_LOCALS_REG << 4)); +#endif + if ((b & 0xf0) == SLJIT_UNUSED || (b & 0xf0) == (SLJIT_LOCALS_REG << 4)) { + if (immb != 0) { + if (immb <= 127 && immb >= -128) + *buf_ptr |= 0x40; + else + *buf_ptr |= 0x80; + } + + if ((b & 0xf0) == SLJIT_UNUSED) + *buf_ptr++ |= reg_lmap[b & 0x0f]; + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = reg_lmap[b & 0x0f] | (reg_lmap[(b >> 4) & 0x0f] << 3); + } + + if (immb != 0) { + if (immb <= 127 && immb >= -128) + *buf_ptr++ = immb; /* 8 bit displacement. */ + else { + *(sljit_hw*)buf_ptr = immb; /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_hw); + } + } + } + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = reg_lmap[b & 0x0f] | (reg_lmap[(b >> 4) & 0x0f] << 3) | (immb << 6); + } + } + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = 0x25; + *(sljit_hw*)buf_ptr = immb; /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_hw); + } + + if (a & SLJIT_IMM) { + if (flags & EX86_BYTE_ARG) + *buf_ptr = imma; + else if (flags & EX86_HALF_ARG) + *(short*)buf_ptr = imma; + else if (!(flags & EX86_SHIFT_INS)) + *(sljit_hw*)buf_ptr = imma; + } + + return !(flags & EX86_SHIFT_INS) ? buf : (buf + 1); +} + +/* --------------------------------------------------------------------- */ +/* Call / return instructions */ +/* --------------------------------------------------------------------- */ + +static SLJIT_INLINE int call_with_args(struct sljit_compiler *compiler, int type) +{ + sljit_ub *buf; + +#ifndef _WIN64 + SLJIT_ASSERT(reg_map[SLJIT_TEMPORARY_REG2] == 6 && reg_map[SLJIT_TEMPORARY_REG1] < 8 && reg_map[SLJIT_TEMPORARY_REG3] < 8); + + buf = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); + FAIL_IF(!buf); + INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); + if (type >= SLJIT_CALL3) { + *buf++ = REX_W; + *buf++ = 0x8b; + *buf++ = 0xc0 | (0x2 << 3) | reg_lmap[SLJIT_TEMPORARY_REG3]; + } + *buf++ = REX_W; + *buf++ = 0x8b; + *buf++ = 0xc0 | (0x7 << 3) | reg_lmap[SLJIT_TEMPORARY_REG1]; +#else + SLJIT_ASSERT(reg_map[SLJIT_TEMPORARY_REG2] == 2 && reg_map[SLJIT_TEMPORARY_REG1] < 8 && reg_map[SLJIT_TEMPORARY_REG3] < 8); + + buf = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); + FAIL_IF(!buf); + INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); + if (type >= SLJIT_CALL3) { + *buf++ = REX_W | REX_R; + *buf++ = 0x8b; + *buf++ = 0xc0 | (0x0 << 3) | reg_lmap[SLJIT_TEMPORARY_REG3]; + } + *buf++ = REX_W; + *buf++ = 0x8b; + *buf++ = 0xc0 | (0x1 << 3) | reg_lmap[SLJIT_TEMPORARY_REG1]; +#endif + return SLJIT_SUCCESS; +} + +int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +{ + sljit_ub *buf; + + CHECK_ERROR(); + check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, generals, local_size); + + compiler->temporaries = temporaries; + compiler->generals = generals; + compiler->local_size = (local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1); +#ifdef _WIN64 + compiler->local_size += 4 * sizeof(sljit_w); +#endif + + /* For UNUSED dst. Uncommon, but possible. */ + if (dst == SLJIT_UNUSED) + dst = TMP_REGISTER; + + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) { + if (reg_map[dst] < 8) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!buf); + + INC_SIZE(1); + POP_REG(reg_lmap[dst]); + } + else { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!buf); + + INC_SIZE(2); + *buf++ = REX_B; + POP_REG(reg_lmap[dst]); + } + } + else if (dst & SLJIT_MEM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; +#endif + buf = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!buf); + *buf++ = 0x8f; + } + return SLJIT_SUCCESS; +} + +int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +{ + sljit_ub *buf; + + CHECK_ERROR(); + check_sljit_emit_fast_return(compiler, src, srcw); + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) { + FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, srcw)); + src = TMP_REGISTER; + } + + if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) { + if (reg_map[src] < 8) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1); + FAIL_IF(!buf); + + INC_SIZE(1 + 1); + PUSH_REG(reg_lmap[src]); + } + else { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 1); + FAIL_IF(!buf); + + INC_SIZE(2 + 1); + *buf++ = REX_B; + PUSH_REG(reg_lmap[src]); + } + } + else if (src & SLJIT_MEM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; +#endif + buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!buf); + *buf++ = 0xff; + *buf |= 6 << 3; + + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!buf); + INC_SIZE(1); + } + else { + SLJIT_ASSERT(IS_HALFWORD(srcw)); + /* SLJIT_IMM. */ + buf = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1); + FAIL_IF(!buf); + + INC_SIZE(5 + 1); + *buf++ = 0x68; + *(sljit_hw*)buf = srcw; + buf += sizeof(sljit_hw); + } + + RET(); + return SLJIT_SUCCESS; +} + + +/* --------------------------------------------------------------------- */ +/* Extend input */ +/* --------------------------------------------------------------------- */ + +static int emit_mov_int(struct sljit_compiler *compiler, int sign, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + sljit_ub* code; + int dst_r; + + compiler->mode32 = 0; + + if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) + return SLJIT_SUCCESS; /* Empty instruction. */ + + if (src & SLJIT_IMM) { + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) + return emit_load_imm64(compiler, dst, srcw); + compiler->mode32 = 1; + code = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_w)(int)srcw, dst, dstw); + FAIL_IF(!code); + *code = 0xc7; + compiler->mode32 = 0; + return SLJIT_SUCCESS; + } + + dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_GENERAL_REG3) ? dst : TMP_REGISTER; + + if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_GENERAL_REG3)) + dst_r = src; + else { + if (sign) { + code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); + FAIL_IF(!code); + *code++ = 0x63; + } + else { + if (dst_r == src) { + compiler->mode32 = 1; + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, 0); + FAIL_IF(!code); + *code++ = 0x8b; + compiler->mode32 = 0; + } + /* xor reg, reg. */ + code = emit_x86_instruction(compiler, 1, dst_r, 0, dst_r, 0); + FAIL_IF(!code); + *code++ = 0x33; + if (dst_r != src) { + compiler->mode32 = 1; + code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); + FAIL_IF(!code); + *code++ = 0x8b; + compiler->mode32 = 0; + } + else { + compiler->mode32 = 1; + code = emit_x86_instruction(compiler, 1, src, 0, TMP_REGISTER, 0); + FAIL_IF(!code); + *code++ = 0x8b; + compiler->mode32 = 0; + } + } + } + + if (dst & SLJIT_MEM) { + compiler->mode32 = 1; + code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!code); + *code = 0x89; + compiler->mode32 = 0; + } + + return SLJIT_SUCCESS; +} diff --git a/sljit/sljitNativeX86_common.c b/sljit/sljitNativeX86_common.c new file mode 100644 index 0000000..ab7b351 --- /dev/null +++ b/sljit/sljitNativeX86_common.c @@ -0,0 +1,2690 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_CONST char* sljit_get_platform_name() +{ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return "x86-32"; +#else + return "x86-64"; +#endif +} + +/* + 32b register indexes: + 0 - EAX + 1 - ECX + 2 - EDX + 3 - EBX + 4 - none + 5 - EBP + 6 - ESI + 7 - EDI +*/ + +/* + 64b register indexes: + 0 - RAX + 1 - RCX + 2 - RDX + 3 - RBX + 4 - none + 5 - RBP + 6 - RSI + 7 - RDI + 8 - R8 - From now on REX prefix is required + 9 - R9 + 10 - R10 + 11 - R11 + 12 - R12 + 13 - R13 + 14 - R14 + 15 - R15 +*/ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + +/* Last register + 1. */ +#define TMP_REGISTER (SLJIT_NO_REGISTERS + 1) + +static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = { + 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5 +}; + +#define CHECK_EXTRA_REGS(p, w, do) \ + if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \ + w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \ + p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ + do; \ + } \ + else if (p >= SLJIT_GENERAL_EREG1 && p <= SLJIT_GENERAL_EREG2) { \ + w = compiler->generals_start + (p - SLJIT_GENERAL_EREG1) * sizeof(sljit_w); \ + p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ + do; \ + } + +#else /* SLJIT_CONFIG_X86_32 */ + +/* Last register + 1. */ +#define TMP_REGISTER (SLJIT_NO_REGISTERS + 1) +#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) +#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) + +/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present + Note: avoid to use r12 and r13 for memory addessing + therefore r12 is better for GENERAL_EREG than GENERAL_REG. */ +#ifndef _WIN64 +/* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ +static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { + 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9 +}; +/* low-map. reg_map & 0x7. */ +static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { + 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1 +}; +#else +/* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ +static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { + 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 12, 15, 10, 8, 9 +}; +/* low-map. reg_map & 0x7. */ +static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = { + 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 4, 7, 2, 0, 1 +}; +#endif + +#define REX_W 0x48 +#define REX_R 0x44 +#define REX_X 0x42 +#define REX_B 0x41 +#define REX 0x40 + +typedef unsigned int sljit_uhw; +typedef int sljit_hw; + +#define IS_HALFWORD(x) ((x) <= 0x7fffffffll && (x) >= -0x80000000ll) +#define NOT_HALFWORD(x) ((x) > 0x7fffffffll || (x) < -0x80000000ll) + +#define CHECK_EXTRA_REGS(p, w, do) + +#endif /* SLJIT_CONFIG_X86_32 */ + +#if (defined SLJIT_SSE2 && SLJIT_SSE2) +#define TMP_FREG (SLJIT_FLOAT_REG4 + 1) +#endif + +/* Size flags for emit_x86_instruction: */ +#define EX86_BIN_INS 0x0010 +#define EX86_SHIFT_INS 0x0020 +#define EX86_REX 0x0040 +#define EX86_NO_REXW 0x0080 +#define EX86_BYTE_ARG 0x0100 +#define EX86_HALF_ARG 0x0200 +#define EX86_PREF_66 0x0400 + +#if (defined SLJIT_SSE2 && SLJIT_SSE2) +#define EX86_PREF_F2 0x0800 +#define EX86_SSE2 0x1000 +#endif + +#define INC_SIZE(s) (*buf++ = (s), compiler->size += (s)) +#define INC_CSIZE(s) (*code++ = (s), compiler->size += (s)) + +#define PUSH_REG(r) (*buf++ = (0x50 + (r))) +#define POP_REG(r) (*buf++ = (0x58 + (r))) +#define RET() (*buf++ = (0xc3)) +#define RETN(n) (*buf++ = (0xc2), *buf++ = n, *buf++ = 0) +/* r32, r/m32 */ +#define MOV_RM(mod, reg, rm) (*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm)) + +static sljit_ub get_jump_code(int type) +{ + switch (type) { + case SLJIT_C_EQUAL: + case SLJIT_C_FLOAT_EQUAL: + return 0x84; + + case SLJIT_C_NOT_EQUAL: + case SLJIT_C_FLOAT_NOT_EQUAL: + return 0x85; + + case SLJIT_C_LESS: + case SLJIT_C_FLOAT_LESS: + return 0x82; + + case SLJIT_C_GREATER_EQUAL: + case SLJIT_C_FLOAT_GREATER_EQUAL: + return 0x83; + + case SLJIT_C_GREATER: + case SLJIT_C_FLOAT_GREATER: + return 0x87; + + case SLJIT_C_LESS_EQUAL: + case SLJIT_C_FLOAT_LESS_EQUAL: + return 0x86; + + case SLJIT_C_SIG_LESS: + return 0x8c; + + case SLJIT_C_SIG_GREATER_EQUAL: + return 0x8d; + + case SLJIT_C_SIG_GREATER: + return 0x8f; + + case SLJIT_C_SIG_LESS_EQUAL: + return 0x8e; + + case SLJIT_C_OVERFLOW: + case SLJIT_C_MUL_OVERFLOW: + return 0x80; + + case SLJIT_C_NOT_OVERFLOW: + case SLJIT_C_MUL_NOT_OVERFLOW: + return 0x81; + + case SLJIT_C_FLOAT_NAN: + return 0x8a; + + case SLJIT_C_FLOAT_NOT_NAN: + return 0x8b; + } + return 0; +} + +static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type); +#endif + +static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type) +{ + int short_jump; + sljit_uw label_addr; + + if (jump->flags & JUMP_LABEL) + label_addr = (sljit_uw)(code + jump->u.label->size); + else + label_addr = jump->u.target; + short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll) + return generate_far_jump_code(jump, code_ptr, type); +#endif + + if (type == SLJIT_JUMP) { + if (short_jump) + *code_ptr++ = 0xeb; + else + *code_ptr++ = 0xe9; + jump->addr++; + } + else if (type >= SLJIT_CALL0) { + short_jump = 0; + *code_ptr++ = 0xe8; + jump->addr++; + } + else if (short_jump) { + *code_ptr++ = get_jump_code(type) - 0x10; + jump->addr++; + } + else { + *code_ptr++ = 0x0f; + *code_ptr++ = get_jump_code(type); + jump->addr += 2; + } + + if (short_jump) { + jump->flags |= PATCH_MB; + code_ptr += sizeof(sljit_b); + } else { + jump->flags |= PATCH_MW; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + code_ptr += sizeof(sljit_w); +#else + code_ptr += sizeof(sljit_hw); +#endif + } + + return code_ptr; +} + +void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ub *code; + sljit_ub *code_ptr; + sljit_ub *buf_ptr; + sljit_ub *buf_end; + sljit_ub len; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + check_sljit_generate_code(compiler); + reverse_buf(compiler); + + /* Second code generation pass. */ + code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + do { + buf_ptr = buf->memory; + buf_end = buf_ptr + buf->used_size; + do { + len = *buf_ptr++; + if (len > 0) { + /* The code is already generated. */ + SLJIT_MEMMOVE(code_ptr, buf_ptr, len); + code_ptr += len; + buf_ptr += len; + } + else { + if (*buf_ptr >= 4) { + jump->addr = (sljit_uw)code_ptr; + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) + code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4); + else + code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4); + jump = jump->next; + } + else if (*buf_ptr == 0) { + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + else if (*buf_ptr == 1) { + const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w); + const_ = const_->next; + } + else { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + *code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */; + buf_ptr++; + *(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w)); + code_ptr += sizeof(sljit_w); + buf_ptr += sizeof(sljit_w) - 1; +#else + code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr); + buf_ptr += sizeof(sljit_w); +#endif + } + buf_ptr++; + } + } while (buf_ptr < buf_end); + SLJIT_ASSERT(buf_ptr == buf_end); + buf = buf->next; + } while (buf); + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + + jump = compiler->jumps; + while (jump) { + if (jump->flags & PATCH_MB) { + SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127); + *(sljit_ub*)jump->addr = jump->u.label->addr - (jump->addr + sizeof(sljit_b)); + } else if (jump->flags & PATCH_MW) { + if (jump->flags & JUMP_LABEL) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + *(sljit_w*)jump->addr = jump->u.label->addr - (jump->addr + sizeof(sljit_w)); +#else + SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll); + *(sljit_hw*)jump->addr = jump->u.label->addr - (jump->addr + sizeof(sljit_hw)); +#endif + } + else { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + *(sljit_w*)jump->addr = jump->u.target - (jump->addr + sizeof(sljit_w)); +#else + SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll); + *(sljit_hw*)jump->addr = jump->u.target - (jump->addr + sizeof(sljit_hw)); +#endif + } + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + else if (jump->flags & PATCH_MD) + *(sljit_w*)jump->addr = jump->u.label->addr; +#endif + + jump = jump->next; + } + + /* Maybe we waste some space because of short jumps. */ + SLJIT_ASSERT(code_ptr <= code + compiler->size); + compiler->error = SLJIT_ERR_COMPILED; + return (void*)code; +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +static int emit_cum_binary(struct sljit_compiler *compiler, + sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w); + +static int emit_non_cum_binary(struct sljit_compiler *compiler, + sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w); + +static int emit_mov(struct sljit_compiler *compiler, + int dst, sljit_w dstw, + int src, sljit_w srcw); + +static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler) +{ + sljit_ub *buf; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + buf = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!buf); + INC_SIZE(5); + *buf++ = 0x9c; /* pushfd */ +#else + buf = (sljit_ub*)ensure_buf(compiler, 1 + 6); + FAIL_IF(!buf); + INC_SIZE(6); + *buf++ = 0x9c; /* pushfq */ + *buf++ = 0x48; +#endif + *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */ + *buf++ = 0x64; + *buf++ = 0x24; + *buf++ = sizeof(sljit_w); + compiler->flags_saved = 1; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags) +{ + sljit_ub *buf; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + buf = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!buf); + INC_SIZE(5); +#else + buf = (sljit_ub*)ensure_buf(compiler, 1 + 6); + FAIL_IF(!buf); + INC_SIZE(6); + *buf++ = 0x48; +#endif + *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */ + *buf++ = 0x64; + *buf++ = 0x24; + *buf++ = (sljit_ub)-(int)sizeof(sljit_w); + *buf++ = 0x9d; /* popfd / popfq */ + compiler->flags_saved = keep_flags; + return SLJIT_SUCCESS; +} + +#ifdef _WIN32 +#include <malloc.h> + +static void SLJIT_CALL sljit_touch_stack(sljit_w local_size) +{ + /* Workaround for calling _chkstk. */ + alloca(local_size); +} +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#include "sljitNativeX86_32.c" +#else +#include "sljitNativeX86_64.c" +#endif + +int sljit_emit_op0(struct sljit_compiler *compiler, int op) +{ + sljit_ub *buf; + + CHECK_ERROR(); + check_sljit_emit_op0(compiler, op); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!buf); + INC_SIZE(1); + *buf = 0xcc; + break; + case SLJIT_NOP: + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!buf); + INC_SIZE(1); + *buf = 0x90; + break; + } + + return SLJIT_SUCCESS; +} + +static int emit_mov(struct sljit_compiler *compiler, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + sljit_ub* code; + + if (dst == SLJIT_UNUSED) { + /* No destination, doesn't need to setup flags. */ + if (src & SLJIT_MEM) { + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw); + FAIL_IF(!code); + *code = 0x8b; + } + return SLJIT_SUCCESS; + } + if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) { + code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); + FAIL_IF(!code); + *code = 0x89; + return SLJIT_SUCCESS; + } + if (src & SLJIT_IMM) { + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw); +#else + if (!compiler->mode32) { + if (NOT_HALFWORD(srcw)) + return emit_load_imm64(compiler, dst, srcw); + } + else + return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw); +#endif + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (!compiler->mode32 && NOT_HALFWORD(srcw)) { + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw)); + code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); + FAIL_IF(!code); + *code = 0x89; + return SLJIT_SUCCESS; + } +#endif + code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!code); + *code = 0xc7; + return SLJIT_SUCCESS; + } + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) { + code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); + FAIL_IF(!code); + *code = 0x8b; + return SLJIT_SUCCESS; + } + + /* Memory to memory move. Requires two instruction. */ + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw); + FAIL_IF(!code); + *code = 0x8b; + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); + FAIL_IF(!code); + *code = 0x89; + return SLJIT_SUCCESS; +} + +#define EMIT_MOV(compiler, dst, dstw, src, srcw) \ + FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); + +#define ENCODE_PREFIX(prefix) \ + do { \ + code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \ + FAIL_IF(!code); \ + INC_CSIZE(1); \ + *code = (prefix); \ + } while (0) + +static int emit_mov_byte(struct sljit_compiler *compiler, int sign, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + sljit_ub* code; + int dst_r; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + int work_r; +#endif + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) + return SLJIT_SUCCESS; /* Empty instruction. */ + + if (src & SLJIT_IMM) { + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw); +#else + return emit_load_imm64(compiler, dst, srcw); +#endif + } + code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!code); + *code = 0xc6; + return SLJIT_SUCCESS; + } + + dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER; + + if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (reg_map[src] >= 4) { + SLJIT_ASSERT(dst_r == TMP_REGISTER); + EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0); + } else + dst_r = src; +#else + dst_r = src; +#endif + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) { + /* src, dst are registers. */ + SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER); + if (reg_map[dst] < 4) { + if (dst != src) + EMIT_MOV(compiler, dst, 0, src, 0); + code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); + FAIL_IF(!code); + *code++ = 0x0f; + *code = sign ? 0xbe : 0xb6; + } + else { + if (dst != src) + EMIT_MOV(compiler, dst, 0, src, 0); + if (sign) { + /* shl reg, 24 */ + code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); + FAIL_IF(!code); + *code |= 0x4 << 3; + code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); + FAIL_IF(!code); + /* shr/sar reg, 24 */ + *code |= 0x7 << 3; + } + else { + /* and dst, 0xff */ + code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0); + FAIL_IF(!code); + *(code + 1) |= 0x4 << 3; + } + } + return SLJIT_SUCCESS; + } +#endif + else { + /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ + code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!code); + *code++ = 0x0f; + *code = sign ? 0xbe : 0xb6; + } + + if (dst & SLJIT_MEM) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (dst_r == TMP_REGISTER) { + /* Find a non-used register, whose reg_map[src] < 4. */ + if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) { + if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4)) + work_r = SLJIT_TEMPORARY_REG3; + else + work_r = SLJIT_TEMPORARY_REG2; + } + else { + if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4)) + work_r = SLJIT_TEMPORARY_REG1; + else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2) + work_r = SLJIT_TEMPORARY_REG3; + else + work_r = SLJIT_TEMPORARY_REG2; + } + + if (work_r == SLJIT_TEMPORARY_REG1) { + ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]); + } + else { + code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); + FAIL_IF(!code); + *code = 0x87; + } + + code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); + FAIL_IF(!code); + *code = 0x88; + + if (work_r == SLJIT_TEMPORARY_REG1) { + ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]); + } + else { + code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); + FAIL_IF(!code); + *code = 0x87; + } + } + else { + code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!code); + *code = 0x88; + } +#else + code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); + FAIL_IF(!code); + *code = 0x88; +#endif + } + + return SLJIT_SUCCESS; +} + +static int emit_mov_half(struct sljit_compiler *compiler, int sign, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + sljit_ub* code; + int dst_r; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) + return SLJIT_SUCCESS; /* Empty instruction. */ + + if (src & SLJIT_IMM) { + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw); +#else + return emit_load_imm64(compiler, dst, srcw); +#endif + } + code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!code); + *code = 0xc7; + return SLJIT_SUCCESS; + } + + dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER; + + if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)) + dst_r = src; + else { + code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!code); + *code++ = 0x0f; + *code = sign ? 0xbf : 0xb7; + } + + if (dst & SLJIT_MEM) { + code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); + FAIL_IF(!code); + *code = 0x89; + } + + return SLJIT_SUCCESS; +} + +static int emit_unary(struct sljit_compiler *compiler, int un_index, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + sljit_ub* code; + + if (dst == SLJIT_UNUSED) { + EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); + code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!code); + *code++ = 0xf7; + *code |= (un_index) << 3; + return SLJIT_SUCCESS; + } + if (dst == src && dstw == srcw) { + /* Same input and output */ + code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!code); + *code++ = 0xf7; + *code |= (un_index) << 3; + return SLJIT_SUCCESS; + } + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + EMIT_MOV(compiler, dst, 0, src, srcw); + code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!code); + *code++ = 0xf7; + *code |= (un_index) << 3; + return SLJIT_SUCCESS; + } + EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); + code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!code); + *code++ = 0xf7; + *code |= (un_index) << 3; + EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); + return SLJIT_SUCCESS; +} + +static int emit_not_with_flags(struct sljit_compiler *compiler, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + sljit_ub* code; + + if (dst == SLJIT_UNUSED) { + EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); + code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!code); + *code++ = 0xf7; + *code |= 0x2 << 3; + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); + FAIL_IF(!code); + *code = 0x0b; + return SLJIT_SUCCESS; + } + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + EMIT_MOV(compiler, dst, 0, src, srcw); + code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!code); + *code++ = 0xf7; + *code |= 0x2 << 3; + code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); + FAIL_IF(!code); + *code = 0x0b; + return SLJIT_SUCCESS; + } + EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); + code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!code); + *code++ = 0xf7; + *code |= 0x2 << 3; + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); + FAIL_IF(!code); + *code = 0x0b; + EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); + return SLJIT_SUCCESS; +} + +static int emit_clz(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + sljit_ub* code; + int dst_r; + + if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { + /* Just set the zero flag. */ + EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); + code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0); + FAIL_IF(!code); + *code++ = 0xf7; + *code |= 0x2 << 3; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0); +#else + code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0); +#endif + FAIL_IF(!code); + *code |= 0x5 << 3; + return SLJIT_SUCCESS; + } + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); + src = TMP_REGISTER; + srcw = 0; + } + + code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw); + FAIL_IF(!code); + *code++ = 0x0f; + *code = 0xbd; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) + dst_r = dst; + else { + /* Find an unused temporary register. */ + if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4)) + dst_r = SLJIT_TEMPORARY_REG1; + else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4)) + dst_r = SLJIT_TEMPORARY_REG2; + else + dst_r = SLJIT_TEMPORARY_REG3; + EMIT_MOV(compiler, dst, dstw, dst_r, 0); + } + EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); +#else + dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2; + compiler->mode32 = 0; + EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31); + compiler->mode32 = op & SLJIT_INT_OP; +#endif + + code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0); + FAIL_IF(!code); + *code++ = 0x0f; + *code = 0x45; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); +#else + code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0); +#endif + FAIL_IF(!code); + *(code + 1) |= 0x6 << 3; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (dst & SLJIT_MEM) { + code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!code); + *code = 0x87; + } +#else + if (dst & SLJIT_MEM) + EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0); +#endif + return SLJIT_SUCCESS; +} + +int sljit_emit_op1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + sljit_ub* code; + int update = 0; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + int dst_is_ereg = 0; + int src_is_ereg = 0; +#else + #define src_is_ereg 0 +#endif + + CHECK_ERROR(); + check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_INT_OP; +#endif + CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); + CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1); + + if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) { + op = GET_OPCODE(op); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + SLJIT_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU); + if (op >= SLJIT_MOVU) { + update = 1; + op -= 7; + } + + if (src & SLJIT_IMM) { + switch (op) { + case SLJIT_MOV_UB: + srcw = (unsigned char)srcw; + break; + case SLJIT_MOV_SB: + srcw = (signed char)srcw; + break; + case SLJIT_MOV_UH: + srcw = (unsigned short)srcw; + break; + case SLJIT_MOV_SH: + srcw = (signed short)srcw; + break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case SLJIT_MOV_UI: + srcw = (unsigned int)srcw; + break; + case SLJIT_MOV_SI: + srcw = (signed int)srcw; + break; +#endif + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg)) + return emit_mov(compiler, dst, dstw, src, srcw); +#endif + } + + if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) { + code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw); + FAIL_IF(!code); + *code = 0x8d; + src &= SLJIT_MEM | 0xf; + srcw = 0; + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) { + SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG)); + dst = TMP_REGISTER; + } +#endif + + switch (op) { + case SLJIT_MOV: +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: +#endif + FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_UB: + FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw)); + break; + case SLJIT_MOV_SB: + FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw)); + break; + case SLJIT_MOV_UH: + FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw)); + break; + case SLJIT_MOV_SH: + FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw)); + break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case SLJIT_MOV_UI: + FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw)); + break; + case SLJIT_MOV_SI: + FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw)); + break; +#endif + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER) + return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0); +#endif + + if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) { + code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw); + FAIL_IF(!code); + *code = 0x8d; + } + return SLJIT_SUCCESS; + } + + if (SLJIT_UNLIKELY(GET_FLAGS(op))) + compiler->flags_saved = 0; + + switch (GET_OPCODE(op)) { + case SLJIT_NOT: + if (SLJIT_UNLIKELY(op & SLJIT_SET_E)) + return emit_not_with_flags(compiler, dst, dstw, src, srcw); + return emit_unary(compiler, 0x2, dst, dstw, src, srcw); + + case SLJIT_NEG: + if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) + FAIL_IF(emit_save_flags(compiler)); + return emit_unary(compiler, 0x3, dst, dstw, src, srcw); + + case SLJIT_CLZ: + if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) + FAIL_IF(emit_save_flags(compiler)); + return emit_clz(compiler, op, dst, dstw, src, srcw); + } + + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + #undef src_is_ereg +#endif +} + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +#define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \ + if (IS_HALFWORD(immw) || compiler->mode32) { \ + code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!code); \ + *(code + 1) |= (_op_imm_); \ + } \ + else { \ + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \ + code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ + FAIL_IF(!code); \ + *code = (_op_mr_); \ + } + +#define BINARY_EAX_IMM(_op_eax_imm_, immw) \ + FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw)) + +#else + +#define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \ + code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!code); \ + *(code + 1) |= (_op_imm_); + +#define BINARY_EAX_IMM(_op_eax_imm_, immw) \ + FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw)) + +#endif + +static int emit_cum_binary(struct sljit_compiler *compiler, + sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + sljit_ub* code; + + if (dst == SLJIT_UNUSED) { + EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); + } + else { + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!code); + *code = op_rm; + } + return SLJIT_SUCCESS; + } + + if (dst == src1 && dstw == src1w) { + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src2w); + } + else { + BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); + } + } + else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); + FAIL_IF(!code); + *code = op_rm; + } + else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) { + /* Special exception for sljit_emit_cond_value. */ + code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); + FAIL_IF(!code); + *code = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w); + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); + FAIL_IF(!code); + *code = op_mr; + } + return SLJIT_SUCCESS; + } + + /* Only for cumulative operations. */ + if (dst == src2 && dstw == src2w) { + if (src1 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { +#else + if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src1w); + } + else { + BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); + } + } + else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); + FAIL_IF(!code); + *code = op_rm; + } + else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) { + code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); + FAIL_IF(!code); + *code = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); + FAIL_IF(!code); + *code = op_mr; + } + return SLJIT_SUCCESS; + } + + /* General version. */ + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, dst, 0); + } + else { + code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); + FAIL_IF(!code); + *code = op_rm; + } + } + else { + /* This version requires less memory writing. */ + EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); + } + else { + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!code); + *code = op_rm; + } + EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); + } + + return SLJIT_SUCCESS; +} + +static int emit_non_cum_binary(struct sljit_compiler *compiler, + sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + sljit_ub* code; + + if (dst == SLJIT_UNUSED) { + EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); + } + else { + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!code); + *code = op_rm; + } + return SLJIT_SUCCESS; + } + + if (dst == src1 && dstw == src1w) { + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src2w); + } + else { + BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); + } + } + else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); + FAIL_IF(!code); + *code = op_rm; + } + else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) { + code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); + FAIL_IF(!code); + *code = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w); + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw); + FAIL_IF(!code); + *code = op_mr; + } + return SLJIT_SUCCESS; + } + + /* General version. */ + if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, dst, 0); + } + else { + code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); + FAIL_IF(!code); + *code = op_rm; + } + } + else { + /* This version requires less memory writing. */ + EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0); + } + else { + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!code); + *code = op_rm; + } + EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); + } + + return SLJIT_SUCCESS; +} + +static int emit_mul(struct sljit_compiler *compiler, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + sljit_ub* code; + int dst_r; + + dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER; + + /* Register destination. */ + if (dst_r == src1 && !(src2 & SLJIT_IMM)) { + code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); + FAIL_IF(!code); + *code++ = 0x0f; + *code = 0xaf; + } + else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { + code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); + FAIL_IF(!code); + *code++ = 0x0f; + *code = 0xaf; + } + else if (src1 & SLJIT_IMM) { + if (src2 & SLJIT_IMM) { + EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); + src2 = dst_r; + src2w = 0; + } + + if (src1w <= 127 && src1w >= -128) { + code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!code); + *code = 0x6b; + code = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!code); + INC_CSIZE(1); + *code = (sljit_b)src1w; + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + else { + code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!code); + *code = 0x69; + code = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!code); + INC_CSIZE(4); + *(sljit_w*)code = src1w; + } +#else + else if (IS_HALFWORD(src1w)) { + code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!code); + *code = 0x69; + code = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!code); + INC_CSIZE(4); + *(sljit_hw*)code = src1w; + } + else { + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); + if (dst_r != src2) + EMIT_MOV(compiler, dst_r, 0, src2, src2w); + code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!code); + *code++ = 0x0f; + *code = 0xaf; + } +#endif + } + else if (src2 & SLJIT_IMM) { + /* Note: src1 is NOT immediate. */ + + if (src2w <= 127 && src2w >= -128) { + code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!code); + *code = 0x6b; + code = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!code); + INC_CSIZE(1); + *code = (sljit_b)src2w; + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + else { + code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!code); + *code = 0x69; + code = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!code); + INC_CSIZE(4); + *(sljit_w*)code = src2w; + } +#else + else if (IS_HALFWORD(src2w)) { + code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!code); + *code = 0x69; + code = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!code); + INC_CSIZE(4); + *(sljit_hw*)code = src2w; + } + else { + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); + if (dst_r != src1) + EMIT_MOV(compiler, dst_r, 0, src1, src1w); + code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!code); + *code++ = 0x0f; + *code = 0xaf; + } +#endif + } + else { + /* Neither argument is immediate. */ + if (depends_on(src2, dst_r)) + dst_r = TMP_REGISTER; + EMIT_MOV(compiler, dst_r, 0, src1, src1w); + code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); + FAIL_IF(!code); + *code++ = 0x0f; + *code = 0xaf; + } + + if (dst_r == TMP_REGISTER) + EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); + + return SLJIT_SUCCESS; +} + +static int emit_lea_binary(struct sljit_compiler *compiler, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + sljit_ub* code; + int dst_r, done = 0; + + /* These cases better be left to handled by normal way. */ + if (dst == src1 && dstw == src1w) + return SLJIT_ERR_UNSUPPORTED; + if (dst == src2 && dstw == src2w) + return SLJIT_ERR_UNSUPPORTED; + + dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER; + + if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) { + if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) { + /* It is not possible to be both SLJIT_LOCALS_REG. */ + if (src1 != SLJIT_LOCALS_REG || src2 != SLJIT_LOCALS_REG) { + code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); + FAIL_IF(!code); + *code = 0x8d; + done = 1; + } + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { + code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w); +#else + if (src2 & SLJIT_IMM) { + code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); +#endif + FAIL_IF(!code); + *code = 0x8d; + done = 1; + } + } + else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { + code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w); +#else + if (src1 & SLJIT_IMM) { + code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); +#endif + FAIL_IF(!code); + *code = 0x8d; + done = 1; + } + } + + if (done) { + if (dst_r == TMP_REGISTER) + return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0); + return SLJIT_SUCCESS; + } + return SLJIT_ERR_UNSUPPORTED; +} + +static int emit_cmp_binary(struct sljit_compiler *compiler, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + sljit_ub* code; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(0x3d, src2w); + return SLJIT_SUCCESS; + } + + if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) { + if (src2 & SLJIT_IMM) { + BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0); + } + else { + code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); + FAIL_IF(!code); + *code = 0x3b; + } + return SLJIT_SUCCESS; + } + + if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) { + code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); + FAIL_IF(!code); + *code = 0x39; + return SLJIT_SUCCESS; + } + + if (src2 & SLJIT_IMM) { + if (src1 & SLJIT_IMM) { + EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); + src1 = TMP_REGISTER; + src1w = 0; + } + BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w); + } + else { + EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!code); + *code = 0x3b; + } + return SLJIT_SUCCESS; +} + +static int emit_test_binary(struct sljit_compiler *compiler, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + sljit_ub* code; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(0xa9, src2w); + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { +#else + if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { +#endif + BINARY_EAX_IMM(0xa9, src1w); + return SLJIT_SUCCESS; + } + + if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) { + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src2w) || compiler->mode32) { + code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); + FAIL_IF(!code); + *code = 0xf7; + } + else { + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); + code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0); + FAIL_IF(!code); + *code = 0x85; + } +#else + code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); + FAIL_IF(!code); + *code = 0xf7; +#endif + } + else { + code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); + FAIL_IF(!code); + *code = 0x85; + } + return SLJIT_SUCCESS; + } + + if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) { + if (src1 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src1w) || compiler->mode32) { + code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0); + FAIL_IF(!code); + *code = 0xf7; + } + else { + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); + code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0); + FAIL_IF(!code); + *code = 0x85; + } +#else + code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0); + FAIL_IF(!code); + *code = 0xf7; +#endif + } + else { + code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); + FAIL_IF(!code); + *code = 0x85; + } + return SLJIT_SUCCESS; + } + + EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src2w) || compiler->mode32) { + code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0); + FAIL_IF(!code); + *code = 0xf7; + } + else { + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); + code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0); + FAIL_IF(!code); + *code = 0x85; + } +#else + code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0); + FAIL_IF(!code); + *code = 0xf7; +#endif + } + else { + code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w); + FAIL_IF(!code); + *code = 0x85; + } + return SLJIT_SUCCESS; +} + +static int emit_shift(struct sljit_compiler *compiler, + sljit_ub mode, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + sljit_ub* code; + + if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { + if (dst == src1 && dstw == src1w) { + code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); + FAIL_IF(!code); + *code |= mode; + return SLJIT_SUCCESS; + } + if (dst == SLJIT_UNUSED) { + EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); + code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0); + FAIL_IF(!code); + *code |= mode; + return SLJIT_SUCCESS; + } + if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { + EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); + code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); + FAIL_IF(!code); + *code |= mode; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); + return SLJIT_SUCCESS; + } + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); + FAIL_IF(!code); + *code |= mode; + return SLJIT_SUCCESS; + } + + EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); + code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0); + FAIL_IF(!code); + *code |= mode; + EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); + return SLJIT_SUCCESS; + } + + if (dst == SLJIT_PREF_SHIFT_REG) { + EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); + FAIL_IF(!code); + *code |= mode; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); + } + else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !depends_on(src2, dst)) { + if (src1 != dst) + EMIT_MOV(compiler, dst, 0, src1, src1w); + EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); + FAIL_IF(!code); + *code |= mode; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); + } + else { + /* This case is really difficult, since ecx can be used for + addressing as well, and we must ensure to work even in that case. */ +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); +#else + /* [esp - 4] is reserved for eflags. */ + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)), SLJIT_PREF_SHIFT_REG, 0); +#endif + + EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); + FAIL_IF(!code); + *code |= mode; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); +#else + /* [esp - 4] is reserved for eflags. */ + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w))); +#endif + EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); + } + + return SLJIT_SUCCESS; +} + +int sljit_emit_op2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + CHECK_ERROR(); + check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_INT_OP; +#endif + CHECK_EXTRA_REGS(dst, dstw, (void)0); + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2, src2w, (void)0); + + if (GET_OPCODE(op) >= SLJIT_MUL) { + if (SLJIT_UNLIKELY(GET_FLAGS(op))) + compiler->flags_saved = 0; + else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) + FAIL_IF(emit_save_flags(compiler)); + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + if (!GET_FLAGS(op)) { + if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) + return compiler->error; + } + else + compiler->flags_saved = 0; + if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) + FAIL_IF(emit_save_flags(compiler)); + return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ADDC: + if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ + FAIL_IF(emit_restore_flags(compiler, 1)); + else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) + FAIL_IF(emit_save_flags(compiler)); + if (SLJIT_UNLIKELY(GET_FLAGS(op))) + compiler->flags_saved = 0; + return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUB: + if (!GET_FLAGS(op)) { + if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) + return compiler->error; + } + else + compiler->flags_saved = 0; + if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) + FAIL_IF(emit_save_flags(compiler)); + if (dst == SLJIT_UNUSED) + return emit_cmp_binary(compiler, src1, src1w, src2, src2w); + return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUBC: + if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ + FAIL_IF(emit_restore_flags(compiler, 1)); + else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) + FAIL_IF(emit_save_flags(compiler)); + if (SLJIT_UNLIKELY(GET_FLAGS(op))) + compiler->flags_saved = 0; + return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_MUL: + return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_AND: + if (dst == SLJIT_UNUSED) + return emit_test_binary(compiler, src1, src1w, src2, src2w); + return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_OR: + return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_XOR: + return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SHL: + return emit_shift(compiler, 0x4 << 3, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_LSHR: + return emit_shift(compiler, 0x5 << 3, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ASHR: + return emit_shift(compiler, 0x7 << 3, + dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) +static int sse2_available = 0; +#endif + +#if (defined SLJIT_SSE2 && SLJIT_SSE2) + +/* Alignment + 2 * 16 bytes. */ +static int sse2_data[3 + 4 + 4]; +static int *sse2_buffer; + +static void init_compiler() +{ +#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) + int features = 0; +#endif + + sse2_buffer = (int*)(((sljit_uw)sse2_data + 15) & ~0xf); + sse2_buffer[0] = 0; + sse2_buffer[1] = 0x80000000; + sse2_buffer[4] = 0xffffffff; + sse2_buffer[5] = 0x7fffffff; + +#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) +#ifdef __GNUC__ + /* AT&T syntax. */ + asm ( + "pushl %%ebx\n" + "movl $0x1, %%eax\n" + "cpuid\n" + "popl %%ebx\n" + "movl %%edx, %0\n" + : "=g" (features) + : + : "%eax", "%ecx", "%edx" + ); +#elif defined(_MSC_VER) + /* Intel syntax. */ + __asm { + mov eax, 1 + push ebx + cpuid + pop ebx + mov features, edx + } +#else + #error "SLJIT_SSE2_AUTO is not implemented for this C compiler" +#endif + sse2_available = (features >> 26) & 0x1; +#endif +} + +#endif + +int sljit_is_fpu_available(void) +{ + /* Always available. */ + return 1; +} + +#if (defined SLJIT_SSE2 && SLJIT_SSE2) + +static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode, + int xmm1, int xmm2, sljit_w xmm2w) +{ + sljit_ub *buf; + + buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w); + FAIL_IF(!buf); + *buf++ = 0x0f; + *buf = opcode; + return SLJIT_SUCCESS; +} + +static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode, + int xmm1, int xmm2, sljit_w xmm2w) +{ + sljit_ub *buf; + + buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w); + FAIL_IF(!buf); + *buf++ = 0x0f; + *buf = opcode; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler, + int dst, int src, sljit_w srcw) +{ + return emit_sse2(compiler, 0x10, dst, src, srcw); +} + +static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler, + int dst, sljit_w dstw, int src) +{ + return emit_sse2(compiler, 0x11, src, dst, dstw); +} + +#if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) +int sljit_emit_fop1(struct sljit_compiler *compiler, int op, +#else +static int sljit_emit_sse2_fop1(struct sljit_compiler *compiler, int op, +#endif + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + int dst_r; + + CHECK_ERROR(); + check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + if (GET_OPCODE(op) == SLJIT_FCMP) { + compiler->flags_saved = 0; + if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) + dst_r = dst; + else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw)); + } + return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw); + } + + if (op == SLJIT_FMOV) { + if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) + return emit_sse2_load(compiler, dst, src, srcw); + if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) + return emit_sse2_store(compiler, dst, dstw, src); + FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw)); + return emit_sse2_store(compiler, dst, dstw, TMP_FREG); + } + + if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) { + dst_r = dst; + if (dst != src) + FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw)); + } + else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw)); + } + + switch (op) { + case SLJIT_FNEG: + FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer)); + break; + + case SLJIT_FABS: + FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4))); + break; + } + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +#if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) +int sljit_emit_fop2(struct sljit_compiler *compiler, int op, +#else +static int sljit_emit_sse2_fop2(struct sljit_compiler *compiler, int op, +#endif + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + int dst_r; + + CHECK_ERROR(); + check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) { + dst_r = dst; + if (dst == src1) + ; /* Do nothing here. */ + else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) { + /* Swap arguments. */ + src2 = src1; + src2w = src1w; + } + else if (dst != src2) + FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w)); + else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w)); + } + } + else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w)); + } + + switch (op) { + case SLJIT_FADD: + FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w)); + break; + + case SLJIT_FSUB: + FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w)); + break; + + case SLJIT_FMUL: + FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w)); + break; + + case SLJIT_FDIV: + FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w)); + break; + } + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +#endif + +#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) || !(defined SLJIT_SSE2 && SLJIT_SSE2) + +static int emit_fld(struct sljit_compiler *compiler, + int src, sljit_w srcw) +{ + sljit_ub *buf; + + if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!buf); + INC_SIZE(2); + *buf++ = 0xd9; + *buf = 0xc0 + src - 1; + return SLJIT_SUCCESS; + } + + buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!buf); + *buf = 0xdd; + return SLJIT_SUCCESS; +} + +static int emit_fop(struct sljit_compiler *compiler, + sljit_ub st_arg, sljit_ub st_arg2, + sljit_ub m64fp_arg, sljit_ub m64fp_arg2, + int src, sljit_w srcw) +{ + sljit_ub *buf; + + if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!buf); + INC_SIZE(2); + *buf++ = st_arg; + *buf = st_arg2 + src; + return SLJIT_SUCCESS; + } + + buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!buf); + *buf++ = m64fp_arg; + *buf |= m64fp_arg2; + return SLJIT_SUCCESS; +} + +static int emit_fop_regs(struct sljit_compiler *compiler, + sljit_ub st_arg, sljit_ub st_arg2, + int src) +{ + sljit_ub *buf; + + buf = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!buf); + INC_SIZE(2); + *buf++ = st_arg; + *buf = st_arg2 + src; + return SLJIT_SUCCESS; +} + +#if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) +int sljit_emit_fop1(struct sljit_compiler *compiler, int op, +#else +static int sljit_emit_fpu_fop1(struct sljit_compiler *compiler, int op, +#endif + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ +#if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_ub *buf; +#endif + + CHECK_ERROR(); + check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + if (GET_OPCODE(op) == SLJIT_FCMP) { + compiler->flags_saved = 0; +#if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + FAIL_IF(emit_fld(compiler, dst, dstw)); + FAIL_IF(emit_fop(compiler, 0xd8, 0xd8, 0xdc, 0x3 << 3, src, srcw)); + + /* Copy flags. */ + EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0); + buf = (sljit_ub*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!buf); + INC_SIZE(3); + *buf++ = 0xdf; + *buf++ = 0xe0; + /* Note: lahf is not supported on all x86-64 architectures. */ + *buf++ = 0x9e; + EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0); +#else + if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) { + FAIL_IF(emit_fld(compiler, dst, dstw)); + FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src)); + } else { + FAIL_IF(emit_fld(compiler, src, srcw)); + FAIL_IF(emit_fld(compiler, dst + ((dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? 1 : 0), dstw)); + FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src)); + FAIL_IF(emit_fop_regs(compiler, 0xdd, 0xd8, 0)); + } +#endif + return SLJIT_SUCCESS; + } + + FAIL_IF(emit_fld(compiler, src, srcw)); + + switch (op) { + case SLJIT_FNEG: + FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe0, 0)); + break; + case SLJIT_FABS: + FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe1, 0)); + break; + } + + FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw)); + + return SLJIT_SUCCESS; +} + +#if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) +int sljit_emit_fop2(struct sljit_compiler *compiler, int op, +#else +static int sljit_emit_fpu_fop2(struct sljit_compiler *compiler, int op, +#endif + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + CHECK_ERROR(); + check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + if (src1 >= SLJIT_FLOAT_REG1 && src1 <= SLJIT_FLOAT_REG4 && dst == src1) { + FAIL_IF(emit_fld(compiler, src2, src2w)); + + switch (op) { + case SLJIT_FADD: + FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src1)); + break; + case SLJIT_FSUB: + FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe8, src1)); + break; + case SLJIT_FMUL: + FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src1)); + break; + case SLJIT_FDIV: + FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf8, src1)); + break; + } + return SLJIT_SUCCESS; + } + + FAIL_IF(emit_fld(compiler, src1, src1w)); + + if (src2 >= SLJIT_FLOAT_REG1 && src2 <= SLJIT_FLOAT_REG4 && dst == src2) { + switch (op) { + case SLJIT_FADD: + FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src2)); + break; + case SLJIT_FSUB: + FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe0, src2)); + break; + case SLJIT_FMUL: + FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src2)); + break; + case SLJIT_FDIV: + FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf0, src2)); + break; + } + return SLJIT_SUCCESS; + } + + switch (op) { + case SLJIT_FADD: + FAIL_IF(emit_fop(compiler, 0xd8, 0xc0, 0xdc, 0x0 << 3, src2, src2w)); + break; + case SLJIT_FSUB: + FAIL_IF(emit_fop(compiler, 0xd8, 0xe0, 0xdc, 0x4 << 3, src2, src2w)); + break; + case SLJIT_FMUL: + FAIL_IF(emit_fop(compiler, 0xd8, 0xc8, 0xdc, 0x1 << 3, src2, src2w)); + break; + case SLJIT_FDIV: + FAIL_IF(emit_fop(compiler, 0xd8, 0xf0, 0xdc, 0x6 << 3, src2, src2w)); + break; + } + + FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw)); + + return SLJIT_SUCCESS; +} +#endif + +#if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) + +int sljit_emit_fop1(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src, sljit_w srcw) +{ + if (sse2_available) + return sljit_emit_sse2_fop1(compiler, op, dst, dstw, src, srcw); + else + return sljit_emit_fpu_fop1(compiler, op, dst, dstw, src, srcw); +} + +int sljit_emit_fop2(struct sljit_compiler *compiler, int op, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + if (sse2_available) + return sljit_emit_sse2_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + else + return sljit_emit_fpu_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w); +} + +#endif + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + sljit_ub *buf; + struct sljit_label *label; + + CHECK_ERROR_PTR(); + check_sljit_emit_label(compiler); + + /* We should restore the flags before the label, + since other taken jumps has their own flags as well. */ + if (SLJIT_UNLIKELY(compiler->flags_saved)) + PTR_FAIL_IF(emit_restore_flags(compiler, 0)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + + buf = (sljit_ub*)ensure_buf(compiler, 2); + PTR_FAIL_IF(!buf); + + *buf++ = 0; + *buf++ = 0; + + return label; +} + +struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type) +{ + sljit_ub *buf; + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + check_sljit_emit_jump(compiler, type); + + if (SLJIT_UNLIKELY(compiler->flags_saved)) { + if ((type & 0xff) <= SLJIT_JUMP) + PTR_FAIL_IF(emit_restore_flags(compiler, 0)); + compiler->flags_saved = 0; + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF_NULL(jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type >= SLJIT_CALL1) + PTR_FAIL_IF(call_with_args(compiler, type)); + + /* Worst case size. */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; +#else + compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); +#endif + + buf = (sljit_ub*)ensure_buf(compiler, 2); + PTR_FAIL_IF_NULL(buf); + + *buf++ = 0; + *buf++ = type + 4; + return jump; +} + +int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw) +{ + sljit_ub *code; + struct sljit_jump *jump; + + CHECK_ERROR(); + check_sljit_emit_ijump(compiler, type, src, srcw); + + CHECK_EXTRA_REGS(src, srcw, (void)0); + if (SLJIT_UNLIKELY(compiler->flags_saved)) { + if (type <= SLJIT_JUMP) + FAIL_IF(emit_restore_flags(compiler, 0)); + compiler->flags_saved = 0; + } + + if (type >= SLJIT_CALL1) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (src == SLJIT_TEMPORARY_REG3) { + EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0); + src = TMP_REGISTER; + } + if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG && type >= SLJIT_CALL3) { + if (src & 0xf0) { + EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); + src = TMP_REGISTER; + } + else + srcw += sizeof(sljit_w); + } +#else + if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG) { + if (src & 0xf0) { + EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); + src = TMP_REGISTER; + } + else + srcw += sizeof(sljit_w) * (type - SLJIT_CALL0); + } +#endif +#endif +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) + if (src == SLJIT_TEMPORARY_REG3) { + EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0); + src = TMP_REGISTER; + } +#endif + FAIL_IF(call_with_args(compiler, type)); + } + + if (src == SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF_NULL(jump); + set_jump(jump, compiler, JUMP_ADDR); + jump->u.target = srcw; + + /* Worst case size. */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + compiler->size += 5; +#else + compiler->size += 10 + 3; +#endif + + code = (sljit_ub*)ensure_buf(compiler, 2); + FAIL_IF_NULL(code); + + *code++ = 0; + *code++ = type + 4; + } + else { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; +#endif + code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!code); + *code++ = 0xff; + *code |= (type >= SLJIT_CALL0) ? (2 << 3) : (4 << 3); + } + return SLJIT_SUCCESS; +} + +int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type) +{ + sljit_ub *buf; + sljit_ub cond_set = 0; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + int reg; +#endif + + CHECK_ERROR(); + check_sljit_emit_cond_value(compiler, op, dst, dstw, type); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + if (SLJIT_UNLIKELY(compiler->flags_saved)) + FAIL_IF(emit_restore_flags(compiler, 0)); + + switch (type) { + case SLJIT_C_EQUAL: + case SLJIT_C_FLOAT_EQUAL: + cond_set = 0x94; + break; + + case SLJIT_C_NOT_EQUAL: + case SLJIT_C_FLOAT_NOT_EQUAL: + cond_set = 0x95; + break; + + case SLJIT_C_LESS: + case SLJIT_C_FLOAT_LESS: + cond_set = 0x92; + break; + + case SLJIT_C_GREATER_EQUAL: + case SLJIT_C_FLOAT_GREATER_EQUAL: + cond_set = 0x93; + break; + + case SLJIT_C_GREATER: + case SLJIT_C_FLOAT_GREATER: + cond_set = 0x97; + break; + + case SLJIT_C_LESS_EQUAL: + case SLJIT_C_FLOAT_LESS_EQUAL: + cond_set = 0x96; + break; + + case SLJIT_C_SIG_LESS: + cond_set = 0x9c; + break; + + case SLJIT_C_SIG_GREATER_EQUAL: + cond_set = 0x9d; + break; + + case SLJIT_C_SIG_GREATER: + cond_set = 0x9f; + break; + + case SLJIT_C_SIG_LESS_EQUAL: + cond_set = 0x9e; + break; + + case SLJIT_C_OVERFLOW: + case SLJIT_C_MUL_OVERFLOW: + cond_set = 0x90; + break; + + case SLJIT_C_NOT_OVERFLOW: + case SLJIT_C_MUL_NOT_OVERFLOW: + cond_set = 0x91; + break; + + case SLJIT_C_FLOAT_NAN: + cond_set = 0x9a; + break; + + case SLJIT_C_FLOAT_NOT_NAN: + cond_set = 0x9b; + break; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER; + + buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4); + FAIL_IF(!buf); + INC_SIZE(4 + 4); + /* Set low register to conditional flag. */ + *buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B; + *buf++ = 0x0f; + *buf++ = cond_set; + *buf++ = 0xC0 | reg_lmap[reg]; + *buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); + *buf++ = 0x0f; + *buf++ = 0xb6; + *buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg]; + + if (reg == TMP_REGISTER) { + if (op == SLJIT_MOV) { + compiler->mode32 = 0; + EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0); + } + else { +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0); + } + } +#else + if (op == SLJIT_MOV) { + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); + FAIL_IF(!buf); + INC_SIZE(3 + 3); + /* Set low byte to conditional flag. */ + *buf++ = 0x0f; + *buf++ = cond_set; + *buf++ = 0xC0 | reg_map[dst]; + + *buf++ = 0x0f; + *buf++ = 0xb6; + *buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst]; + } + else { + EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0); + + buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); + FAIL_IF(!buf); + INC_SIZE(3 + 3); + /* Set al to conditional flag. */ + *buf++ = 0x0f; + *buf++ = cond_set; + *buf++ = 0xC0; + + *buf++ = 0x0f; + *buf++ = 0xb6; + if (dst >= SLJIT_GENERAL_REG1 && dst <= SLJIT_NO_REGISTERS) + *buf = 0xC0 | (reg_map[dst] << 3); + else { + *buf = 0xC0; + EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0); + } + + EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0); + } + } + else { + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) { + EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0); + buf = (sljit_ub*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!buf); + INC_SIZE(3); + + *buf++ = 0x0f; + *buf++ = cond_set; + *buf++ = 0xC0 | reg_map[dst]; + } + else { + EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0); + + buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1); + FAIL_IF(!buf); + INC_SIZE(3 + 3 + 1); + /* Set al to conditional flag. */ + *buf++ = 0x0f; + *buf++ = cond_set; + *buf++ = 0xC0; + + *buf++ = 0x0f; + *buf++ = 0xb6; + *buf++ = 0xC0; + + *buf++ = 0x90 + reg_map[TMP_REGISTER]; + } +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0); + } +#endif + + return SLJIT_SUCCESS; +} + +struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value) +{ + sljit_ub *buf; + struct sljit_const *const_; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + int reg; +#endif + + CHECK_ERROR_PTR(); + check_sljit_emit_const(compiler, dst, dstw, init_value); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; + reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER; + + if (emit_load_imm64(compiler, reg, init_value)) + return NULL; +#else + if (dst == SLJIT_UNUSED) + dst = TMP_REGISTER; + + if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) + return NULL; +#endif + + buf = (sljit_ub*)ensure_buf(compiler, 2); + PTR_FAIL_IF(!buf); + + *buf++ = 0; + *buf++ = 1; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (reg == TMP_REGISTER && dst != SLJIT_UNUSED) + if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0)) + return NULL; +#endif + + return const_; +} + +void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + *(sljit_w*)addr = new_addr - (addr + 4); +#else + *(sljit_uw*)addr = new_addr; +#endif +} + +void sljit_set_const(sljit_uw addr, sljit_w new_constant) +{ + *(sljit_w*)addr = new_constant; +} diff --git a/sljit/sljitUtils.c b/sljit/sljitUtils.c new file mode 100644 index 0000000..13904b8 --- /dev/null +++ b/sljit/sljitUtils.c @@ -0,0 +1,244 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ------------------------------------------------------------------------ */ +/* Locks */ +/* ------------------------------------------------------------------------ */ + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) || (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) + +#ifdef _WIN32 + +#include "windows.h" + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + +static HANDLE allocator_mutex = 0; + +static SLJIT_INLINE void allocator_grab_lock(void) +{ + /* No idea what to do if an error occures. Static mutexes should never fail... */ + if (!allocator_mutex) + allocator_mutex = CreateMutex(NULL, TRUE, NULL); + else + WaitForSingleObject(allocator_mutex, INFINITE); +} + +static SLJIT_INLINE void allocator_release_lock(void) +{ + ReleaseMutex(allocator_mutex); +} + +#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ + +#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) + +static HANDLE global_mutex = 0; + +void SLJIT_CALL sljit_grab_lock(void) +{ + /* No idea what to do if an error occures. Static mutexes should never fail... */ + if (!global_mutex) + global_mutex = CreateMutex(NULL, TRUE, NULL); + else + WaitForSingleObject(global_mutex, INFINITE); +} + +void SLJIT_CALL sljit_release_lock(void) +{ + ReleaseMutex(global_mutex); +} + +#endif /* SLJIT_UTIL_GLOBAL_LOCK */ + +#else /* _WIN32 */ + +#include "pthread.h" + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + +static pthread_mutex_t allocator_mutex = PTHREAD_MUTEX_INITIALIZER; + +static SLJIT_INLINE void allocator_grab_lock(void) +{ + pthread_mutex_lock(&allocator_mutex); +} + +static SLJIT_INLINE void allocator_release_lock(void) +{ + pthread_mutex_unlock(&allocator_mutex); +} + +#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ + +#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) + +static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER; + +void SLJIT_CALL sljit_grab_lock(void) +{ + pthread_mutex_lock(&global_mutex); +} + +void SLJIT_CALL sljit_release_lock(void) +{ + pthread_mutex_unlock(&global_mutex); +} + +#endif /* SLJIT_UTIL_GLOBAL_LOCK */ + +#endif /* _WIN32 */ + +/* ------------------------------------------------------------------------ */ +/* Stack */ +/* ------------------------------------------------------------------------ */ + +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) + +#ifdef _WIN32 +#include "windows.h" +#else +#include <sys/mman.h> +#include <unistd.h> +#endif + +/* Planning to make it even more clever in the future. */ +static sljit_w sljit_page_align = 0; + +struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_w limit, sljit_w max_limit) +{ + struct sljit_stack *stack; + union { + void *ptr; + sljit_uw uw; + } base; +#ifdef _WIN32 + SYSTEM_INFO si; +#endif + + if (limit > max_limit) + return NULL; + +#ifdef _WIN32 + if (!sljit_page_align) { + GetSystemInfo(&si); + sljit_page_align = si.dwPageSize - 1; + } +#else + if (!sljit_page_align) { + sljit_page_align = sysconf(_SC_PAGESIZE); + /* Should never happen. */ + if (sljit_page_align < 0) + sljit_page_align = 4096; + sljit_page_align--; + } +#endif + + /* Align limit and max_limit. */ + max_limit = (max_limit + sljit_page_align) & ~sljit_page_align; + + stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack)); + if (!stack) + return NULL; + +#ifdef _WIN32 + base.ptr = VirtualAlloc(0, max_limit, MEM_RESERVE, PAGE_READWRITE); + if (!base.ptr) { + SLJIT_FREE(stack); + return NULL; + } + stack->base = base.uw; + stack->limit = stack->base; + stack->max_limit = stack->base + max_limit; + if (sljit_stack_resize(stack, stack->base + limit)) { + sljit_free_stack(stack); + return NULL; + } +#else + base.ptr = mmap(0, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (base.ptr == MAP_FAILED) { + SLJIT_FREE(stack); + return NULL; + } + stack->base = base.uw; + stack->limit = stack->base + limit; + stack->max_limit = stack->base + max_limit; +#endif + stack->top = stack->base; + return stack; +} + +#undef PAGE_ALIGN + +void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack) +{ +#ifdef _WIN32 + VirtualFree((void*)stack->base, 0, MEM_RELEASE); +#else + munmap((void*)stack->base, stack->max_limit - stack->base); +#endif + SLJIT_FREE(stack); +} + +sljit_w SLJIT_CALL sljit_stack_resize(struct sljit_stack* stack, sljit_w new_limit) +{ + sljit_w aligned_old_limit; + sljit_w aligned_new_limit; + + if ((new_limit > stack->max_limit) || (new_limit < stack->base)) + return -1; +#ifdef _WIN32 + aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align; + aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align; + if (aligned_new_limit != aligned_old_limit) { + if (aligned_new_limit > aligned_old_limit) { + if (!VirtualAlloc((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, MEM_COMMIT, PAGE_READWRITE)) + return -1; + } + else { + if (!VirtualFree((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MEM_DECOMMIT)) + return -1; + } + } + stack->limit = new_limit; + return 0; +#else + if (new_limit >= stack->limit) { + stack->limit = new_limit; + return 0; + } + aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align; + aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align; + if (aligned_new_limit < aligned_old_limit) + madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MADV_DONTNEED); + stack->limit = new_limit; + return 0; +#endif +} + +#endif /* SLJIT_UTIL_STACK */ + +#endif |