diff options
-rw-r--r-- | Makefile.am | 3 | ||||
-rw-r--r-- | configure.ac | 11 | ||||
-rw-r--r-- | include/ffi.h.in | 5 | ||||
-rw-r--r-- | include/ffi_common.h | 4 | ||||
-rw-r--r-- | include/tramp.h | 24 | ||||
-rw-r--r-- | src/aarch64/ffi.c | 34 | ||||
-rw-r--r-- | src/aarch64/internal.h | 10 | ||||
-rw-r--r-- | src/aarch64/sysv.S | 70 | ||||
-rw-r--r-- | src/arm/ffi.c | 32 | ||||
-rw-r--r-- | src/arm/internal.h | 10 | ||||
-rw-r--r-- | src/arm/sysv.S | 47 | ||||
-rw-r--r-- | src/closures.c | 47 | ||||
-rw-r--r-- | src/tramp.c | 729 | ||||
-rw-r--r-- | src/x86/ffi.c | 36 | ||||
-rw-r--r-- | src/x86/ffi64.c | 32 | ||||
-rw-r--r-- | src/x86/ffitarget.h | 6 | ||||
-rw-r--r-- | src/x86/ffiw64.c | 15 | ||||
-rw-r--r-- | src/x86/internal.h | 14 | ||||
-rw-r--r-- | src/x86/internal64.h | 14 | ||||
-rw-r--r-- | src/x86/sysv.S | 89 | ||||
-rw-r--r-- | src/x86/unix64.S | 75 | ||||
-rw-r--r-- | src/x86/win64.S | 14 | ||||
-rw-r--r-- | testsuite/libffi.closures/closure_loc_fn0.c | 3 |
23 files changed, 1316 insertions, 8 deletions
diff --git a/Makefile.am b/Makefile.am index 7654bf5..1b18198 100644 --- a/Makefile.am +++ b/Makefile.am @@ -38,7 +38,8 @@ toolexeclib_LTLIBRARIES = libffi.la noinst_LTLIBRARIES = libffi_convenience.la libffi_la_SOURCES = src/prep_cif.c src/types.c \ - src/raw_api.c src/java_raw_api.c src/closures.c + src/raw_api.c src/java_raw_api.c src/closures.c \ + src/tramp.c if FFI_DEBUG libffi_la_SOURCES += src/debug.c diff --git a/configure.ac b/configure.ac index 093b87d..0f141f4 100644 --- a/configure.ac +++ b/configure.ac @@ -67,7 +67,7 @@ AC_CHECK_HEADERS(sys/memfd.h) AC_CHECK_FUNCS([memfd_create]) AC_CHECK_HEADERS(sys/mman.h) -AC_CHECK_FUNCS([mmap mkostemp]) +AC_CHECK_FUNCS([mmap mkostemp mkstemp]) AC_FUNC_MMAP_BLACKLIST dnl The -no-testsuite modules omit the test subdir. @@ -363,6 +363,15 @@ AC_ARG_ENABLE(raw-api, AC_DEFINE(FFI_NO_RAW_API, 1, [Define this if you do not want support for the raw API.]) fi) +case "$target" in + *-cygwin*) + ;; + *arm*-*-linux-* | aarch64*-*-linux-* | i*86-*-linux-* | x86_64-*-linux-*) + AC_DEFINE(FFI_EXEC_STATIC_TRAMP, 1, + [Define this if you want statically defined trampolines]) + ;; +esac + AC_ARG_ENABLE(purify-safety, [ --enable-purify-safety purify-safe mode], if test "$enable_purify_safety" = "yes"; then diff --git a/include/ffi.h.in b/include/ffi.h.in index 38885b0..14ec807 100644 --- a/include/ffi.h.in +++ b/include/ffi.h.in @@ -310,7 +310,10 @@ typedef struct { void *trampoline_table; void *trampoline_table_entry; #else - char tramp[FFI_TRAMPOLINE_SIZE]; + union { + char tramp[FFI_TRAMPOLINE_SIZE]; + void *ftramp; + }; #endif ffi_cif *cif; void (*fun)(ffi_cif*,void*,void**,void*); diff --git a/include/ffi_common.h b/include/ffi_common.h index 76b9dd6..d3db33a 100644 --- a/include/ffi_common.h +++ b/include/ffi_common.h @@ -103,6 +103,10 @@ ffi_status ffi_prep_cif_core(ffi_cif *cif, some targets. */ void *ffi_data_to_code_pointer (void *data) FFI_HIDDEN; +/* The arch code calls this to determine if a given closure has a + static trampoline. */ +int ffi_tramp_is_present (void *closure); + /* Extended cif, used in callback from assembly routine */ typedef struct { diff --git a/include/tramp.h b/include/tramp.h new file mode 100644 index 0000000..cc096a4 --- /dev/null +++ b/include/tramp.h @@ -0,0 +1,24 @@ +/* ----------------------------------------------------------------------- + ffi_tramp.h - Copyright (C) 2021 Microsoft, Inc. + + Static trampoline definitions. + ----------------------------------------------------------------------- */ + +#ifndef FFI_TRAMP_H +#define FFI_TRAMP_H + +#ifdef __cplusplus +extern "C" { +#endif + +int ffi_tramp_is_supported(void); +void *ffi_tramp_alloc (int flags); +void ffi_tramp_set_parms (void *tramp, void *data, void *code); +void *ffi_tramp_get_addr (void *tramp); +void ffi_tramp_free (void *tramp); + +#ifdef __cplusplus +} +#endif + +#endif /* FFI_TRAMP_H */ diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c index ef09f4d..8e24a96 100644 --- a/src/aarch64/ffi.c +++ b/src/aarch64/ffi.c @@ -30,6 +30,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifdef _WIN32 #include <windows.h> /* FlushInstructionCache */ #endif +#include <tramp.h> /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE; all further uses in this file will refer to the 128-bit type. */ @@ -782,6 +783,10 @@ ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, extern void ffi_closure_SYSV (void) FFI_HIDDEN; extern void ffi_closure_SYSV_V (void) FFI_HIDDEN; +#if defined(FFI_EXEC_STATIC_TRAMP) +extern void ffi_closure_SYSV_alt (void) FFI_HIDDEN; +extern void ffi_closure_SYSV_V_alt (void) FFI_HIDDEN; +#endif ffi_status ffi_prep_closure_loc (ffi_closure *closure, @@ -816,7 +821,21 @@ ffi_prep_closure_loc (ffi_closure *closure, 0x00, 0x02, 0x1f, 0xd6 /* br x16 */ }; char *tramp = closure->tramp; - + +#if defined(FFI_EXEC_STATIC_TRAMP) + if (ffi_tramp_is_present(closure)) + { + /* Initialize the static trampoline's parameters. */ + if (start == ffi_closure_SYSV_V) + start = ffi_closure_SYSV_V_alt; + else + start = ffi_closure_SYSV_alt; + ffi_tramp_set_parms (closure->ftramp, start, closure); + goto out; + } +#endif + + /* Initialize the dynamic trampoline. */ memcpy (tramp, trampoline, sizeof(trampoline)); *(UINT64 *)(tramp + 16) = (uintptr_t)start; @@ -832,6 +851,7 @@ ffi_prep_closure_loc (ffi_closure *closure, unsigned char *tramp_code = ffi_data_to_code_pointer (tramp); #endif ffi_clear_cache (tramp_code, tramp_code + FFI_TRAMPOLINE_SIZE); +out: #endif closure->cif = cif; @@ -1022,4 +1042,16 @@ ffi_closure_SYSV_inner (ffi_cif *cif, return flags; } +#if defined(FFI_EXEC_STATIC_TRAMP) +void * +ffi_tramp_arch (size_t *tramp_size, size_t *map_size) +{ + extern void *trampoline_code_table; + + *tramp_size = AARCH64_TRAMP_SIZE; + *map_size = AARCH64_TRAMP_MAP_SIZE; + return &trampoline_code_table; +} +#endif + #endif /* (__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)*/ diff --git a/src/aarch64/internal.h b/src/aarch64/internal.h index 3d4d035..de55755 100644 --- a/src/aarch64/internal.h +++ b/src/aarch64/internal.h @@ -66,3 +66,13 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define N_X_ARG_REG 8 #define N_V_ARG_REG 8 #define CALL_CONTEXT_SIZE (N_V_ARG_REG * 16 + N_X_ARG_REG * 8) + +#if defined(FFI_EXEC_STATIC_TRAMP) +/* + * For the trampoline code table mapping, a mapping size of 16K is chosen to + * cover the base page sizes of 4K and 16K. + */ +#define AARCH64_TRAMP_MAP_SHIFT 14 +#define AARCH64_TRAMP_MAP_SIZE (1 << AARCH64_TRAMP_MAP_SHIFT) +#define AARCH64_TRAMP_SIZE 32 +#endif diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S index b720a92..a3c1508 100644 --- a/src/aarch64/sysv.S +++ b/src/aarch64/sysv.S @@ -367,6 +367,76 @@ CNAME(ffi_closure_SYSV): .size CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV) #endif +#if defined(FFI_EXEC_STATIC_TRAMP) + .align 4 +CNAME(ffi_closure_SYSV_V_alt): + /* See the comments above trampoline_code_table. */ + ldr x17, [sp, #8] /* Load closure in x17 */ + add sp, sp, #16 /* Restore the stack */ + b CNAME(ffi_closure_SYSV_V) + + .globl CNAME(ffi_closure_SYSV_V_alt) + FFI_HIDDEN(CNAME(ffi_closure_SYSV_V_alt)) +#ifdef __ELF__ + .type CNAME(ffi_closure_SYSV_V_alt), #function + .size CNAME(ffi_closure_SYSV_V_alt), . - CNAME(ffi_closure_SYSV_V_alt) +#endif + + .align 4 +CNAME(ffi_closure_SYSV_alt): + /* See the comments above trampoline_code_table. */ + ldr x17, [sp, #8] /* Load closure in x17 */ + add sp, sp, #16 /* Restore the stack */ + b CNAME(ffi_closure_SYSV) + + .globl CNAME(ffi_closure_SYSV_alt) + FFI_HIDDEN(CNAME(ffi_closure_SYSV_alt)) +#ifdef __ELF__ + .type CNAME(ffi_closure_SYSV_alt), #function + .size CNAME(ffi_closure_SYSV_alt), . - CNAME(ffi_closure_SYSV_alt) +#endif + +/* + * Below is the definition of the trampoline code table. Each element in + * the code table is a trampoline. + */ +/* + * The trampoline uses register x17. It saves the original value of x17 on + * the stack. + * + * The trampoline has two parameters - target code to jump to and data for + * the target code. The trampoline extracts the parameters from its parameter + * block (see tramp_table_map()). The trampoline saves the data address on + * the stack. Finally, it jumps to the target code. + * + * The target code can choose to: + * + * - restore the value of x17 + * - load the data address in a register + * - restore the stack pointer to what it was when the trampoline was invoked. + */ + .align AARCH64_TRAMP_MAP_SHIFT +CNAME(trampoline_code_table): + .rept AARCH64_TRAMP_MAP_SIZE / AARCH64_TRAMP_SIZE + sub sp, sp, #16 /* Make space on the stack */ + str x17, [sp] /* Save x17 on stack */ + adr x17, #16376 /* Get data address */ + ldr x17, [x17] /* Copy data into x17 */ + str x17, [sp, #8] /* Save data on stack */ + adr x17, #16372 /* Get code address */ + ldr x17, [x17] /* Load code address into x17 */ + br x17 /* Jump to code */ + .endr + + .globl CNAME(trampoline_code_table) + FFI_HIDDEN(CNAME(trampoline_code_table)) +#ifdef __ELF__ + .type CNAME(trampoline_code_table), #function + .size CNAME(trampoline_code_table), . - CNAME(trampoline_code_table) +#endif + .align AARCH64_TRAMP_MAP_SHIFT +#endif /* FFI_EXEC_STATIC_TRAMP */ + #if FFI_EXEC_TRAMPOLINE_TABLE #ifdef __MACH__ diff --git a/src/arm/ffi.c b/src/arm/ffi.c index 0058390..5ad59ce 100644 --- a/src/arm/ffi.c +++ b/src/arm/ffi.c @@ -34,6 +34,7 @@ #include <ffi_common.h> #include <stdint.h> #include <stdlib.h> +#include <tramp.h> #include "internal.h" #if defined(_MSC_VER) && defined(_M_ARM) @@ -571,6 +572,10 @@ ffi_closure_inner_VFP (ffi_cif *cif, void ffi_closure_SYSV (void) FFI_HIDDEN; void ffi_closure_VFP (void) FFI_HIDDEN; +#if defined(FFI_EXEC_STATIC_TRAMP) +void ffi_closure_SYSV_alt (void) FFI_HIDDEN; +void ffi_closure_VFP_alt (void) FFI_HIDDEN; +#endif #ifdef FFI_GO_CLOSURES void ffi_go_closure_SYSV (void) FFI_HIDDEN; @@ -612,6 +617,20 @@ ffi_prep_closure_loc (ffi_closure * closure, config[1] = closure_func; #else +#if defined(FFI_EXEC_STATIC_TRAMP) + if (ffi_tramp_is_present(closure)) + { + /* Initialize the static trampoline's parameters. */ + if (closure_func == ffi_closure_SYSV) + closure_func = ffi_closure_SYSV_alt; + else + closure_func = ffi_closure_VFP_alt; + ffi_tramp_set_parms (closure->ftramp, closure_func, closure); + goto out; + } +#endif + + /* Initialize the dynamic trampoline. */ #ifndef _M_ARM memcpy(closure->tramp, ffi_arm_trampoline, 8); #else @@ -633,6 +652,7 @@ ffi_prep_closure_loc (ffi_closure * closure, #else *(void (**)(void))(closure->tramp + 8) = closure_func; #endif +out: #endif closure->cif = cif; @@ -873,4 +893,16 @@ layout_vfp_args (ffi_cif * cif) } } +#if defined(FFI_EXEC_STATIC_TRAMP) +void * +ffi_tramp_arch (size_t *tramp_size, size_t *map_size) +{ + extern void *trampoline_code_table; + + *tramp_size = ARM_TRAMP_SIZE; + *map_size = ARM_TRAMP_MAP_SIZE; + return &trampoline_code_table; +} +#endif + #endif /* __arm__ or _M_ARM */ diff --git a/src/arm/internal.h b/src/arm/internal.h index 6cf0b2a..fa8ab0b 100644 --- a/src/arm/internal.h +++ b/src/arm/internal.h @@ -5,3 +5,13 @@ #define ARM_TYPE_INT 4 #define ARM_TYPE_VOID 5 #define ARM_TYPE_STRUCT 6 + +#if defined(FFI_EXEC_STATIC_TRAMP) +/* + * For the trampoline table mapping, a mapping size of 4K (base page size) + * is chosen. + */ +#define ARM_TRAMP_MAP_SHIFT 12 +#define ARM_TRAMP_MAP_SIZE (1 << ARM_TRAMP_MAP_SHIFT) +#define ARM_TRAMP_SIZE 20 +#endif diff --git a/src/arm/sysv.S b/src/arm/sysv.S index 74bc53f..49c561e 100644 --- a/src/arm/sysv.S +++ b/src/arm/sysv.S @@ -354,6 +354,53 @@ E(ARM_TYPE_STRUCT) cfi_endproc ARM_FUNC_END(ffi_closure_ret) +#if defined(FFI_EXEC_STATIC_TRAMP) +ARM_FUNC_START(ffi_closure_SYSV_alt) + /* See the comments above trampoline_code_table. */ + ldr ip, [sp, #4] /* Load closure in ip */ + add sp, sp, 8 /* Restore the stack */ + b CNAME(ffi_closure_SYSV) +ARM_FUNC_END(ffi_closure_SYSV_alt) + +ARM_FUNC_START(ffi_closure_VFP_alt) + /* See the comments above trampoline_code_table. */ + ldr ip, [sp, #4] /* Load closure in ip */ + add sp, sp, 8 /* Restore the stack */ + b CNAME(ffi_closure_VFP) +ARM_FUNC_END(ffi_closure_VFP_alt) + +/* + * Below is the definition of the trampoline code table. Each element in + * the code table is a trampoline. + */ +/* + * The trampoline uses register ip (r12). It saves the original value of ip + * on the stack. + * + * The trampoline has two parameters - target code to jump to and data for + * the target code. The trampoline extracts the parameters from its parameter + * block (see tramp_table_map()). The trampoline saves the data address on + * the stack. Finally, it jumps to the target code. + * + * The target code can choose to: + * + * - restore the value of ip + * - load the data address in a register + * - restore the stack pointer to what it was when the trampoline was invoked. + */ + .align ARM_TRAMP_MAP_SHIFT +ARM_FUNC_START(trampoline_code_table) + .rept ARM_TRAMP_MAP_SIZE / ARM_TRAMP_SIZE + sub sp, sp, #8 /* Make space on the stack */ + str ip, [sp] /* Save ip on stack */ + ldr ip, [pc, #4080] /* Copy data into ip */ + str ip, [sp, #4] /* Save data on stack */ + ldr pc, [pc, #4076] /* Copy code into PC */ + .endr +ARM_FUNC_END(trampoline_code_table) + .align ARM_TRAMP_MAP_SHIFT +#endif /* FFI_EXEC_STATIC_TRAMP */ + #if FFI_EXEC_TRAMPOLINE_TABLE #ifdef __MACH__ diff --git a/src/closures.c b/src/closures.c index dfc2f68..3a63c32 100644 --- a/src/closures.c +++ b/src/closures.c @@ -34,6 +34,7 @@ #include <fficonfig.h> #include <ffi.h> #include <ffi_common.h> +#include <tramp.h> #ifdef __NetBSD__ #include <sys/param.h> @@ -112,6 +113,12 @@ ffi_closure_free (void *ptr) munmap(dataseg, rounded_size); munmap(codeseg, rounded_size); } + +int +ffi_tramp_is_present (__attribute__((unused)) void *ptr) +{ + return 0; +} #else /* !NetBSD with PROT_MPROTECT */ #if !FFI_MMAP_EXEC_WRIT && !FFI_EXEC_TRAMPOLINE_TABLE @@ -860,6 +867,12 @@ dlmmap (void *start, size_t length, int prot, && flags == (MAP_PRIVATE | MAP_ANONYMOUS) && fd == -1 && offset == 0); + if (execfd == -1 && ffi_tramp_is_supported ()) + { + ptr = mmap (start, length, prot & ~PROT_EXEC, flags, fd, offset); + return ptr; + } + if (execfd == -1 && is_emutramp_enabled ()) { ptr = mmap (start, length, prot & ~PROT_EXEC, flags, fd, offset); @@ -939,7 +952,7 @@ segment_holding_code (mstate m, char* addr) void * ffi_closure_alloc (size_t size, void **code) { - void *ptr; + void *ptr, *ftramp; if (!code) return NULL; @@ -951,6 +964,17 @@ ffi_closure_alloc (size_t size, void **code) msegmentptr seg = segment_holding (gm, ptr); *code = add_segment_exec_offset (ptr, seg); + if (!ffi_tramp_is_supported ()) + return ptr; + + ftramp = ffi_tramp_alloc (0); + if (ftramp == NULL) + { + dlfree (FFI_RESTORE_PTR (ptr)); + return NULL; + } + *code = ffi_tramp_get_addr (ftramp); + ((ffi_closure *) ptr)->ftramp = ftramp; } return ptr; @@ -965,7 +989,11 @@ ffi_data_to_code_pointer (void *data) burden of managing this memory themselves, in which case this we'll just return data. */ if (seg) - return add_segment_exec_offset (data, seg); + { + if (!ffi_tramp_is_supported ()) + return add_segment_exec_offset (data, seg); + return ffi_tramp_get_addr (((ffi_closure *) data)->ftramp); + } else return data; } @@ -983,10 +1011,19 @@ ffi_closure_free (void *ptr) if (seg) ptr = sub_segment_exec_offset (ptr, seg); #endif + if (ffi_tramp_is_supported ()) + ffi_tramp_free (((ffi_closure *) ptr)->ftramp); dlfree (FFI_RESTORE_PTR (ptr)); } +int +ffi_tramp_is_present (void *ptr) +{ + msegmentptr seg = segment_holding (gm, ptr); + return seg != NULL && ffi_tramp_is_supported(); +} + # else /* ! FFI_MMAP_EXEC_WRIT */ /* On many systems, memory returned by malloc is writable and @@ -1015,6 +1052,12 @@ ffi_data_to_code_pointer (void *data) return data; } +int +ffi_tramp_is_present (__attribute__((unused)) void *ptr) +{ + return 0; +} + # endif /* ! FFI_MMAP_EXEC_WRIT */ #endif /* FFI_CLOSURES */ diff --git a/src/tramp.c b/src/tramp.c new file mode 100644 index 0000000..265aeaa --- /dev/null +++ b/src/tramp.c @@ -0,0 +1,729 @@ +/* ----------------------------------------------------------------------- + tramp.c - Copyright (c) 2020 Madhavan T. Venkataraman + + API and support functions for managing statically defined closure + trampolines. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <fficonfig.h> + +#ifdef FFI_EXEC_STATIC_TRAMP + +/* -------------------------- Headers and Definitions ---------------------*/ +/* + * Add support for other OSes later. For now, it is just Linux. + */ + +#if defined __linux__ +#ifdef __linux__ +#define _GNU_SOURCE 1 +#endif +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <fcntl.h> +#include <pthread.h> +#include <sys/mman.h> +#include <tramp.h> +#ifdef __linux__ +#include <linux/limits.h> +#include <linux/types.h> +#endif +#endif /* __linux__ */ + +/* + * Each architecture defines static code for a trampoline code table. The + * trampoline code table is mapped into the address space of a process. + * + * The following architecture specific function returns: + * + * - the address of the trampoline code table in the text segment + * - the size of each trampoline in the trampoline code table + * - the size of the mapping for the whole trampoline code table + */ +void __attribute__((weak)) *ffi_tramp_arch (size_t *tramp_size, + size_t *map_size); + +/* ------------------------- Trampoline Data Structures --------------------*/ + +struct tramp; + +/* + * Trampoline table. Manages one trampoline code table and one trampoline + * parameter table. + * + * prev, next Links in the global trampoline table list. + * code_table Trampoline code table mapping. + * parm_table Trampoline parameter table mapping. + * array Array of trampolines malloced. + * free List of free trampolines. + * nfree Number of free trampolines. + */ +struct tramp_table +{ + struct tramp_table *prev; + struct tramp_table *next; + void *code_table; + void *parm_table; + struct tramp *array; + struct tramp *free; + int nfree; +}; + +/* + * Parameters for each trampoline. + * + * data + * Data for the target code that the trampoline jumps to. + * target + * Target code that the trampoline jumps to. + */ +struct tramp_parm +{ + void *data; + void *target; +}; + +/* + * Trampoline structure for each trampoline. + * + * prev, next Links in the trampoline free list of a trampoline table. + * table Trampoline table to which this trampoline belongs. + * code Address of this trampoline in the code table mapping. + * parm Address of this trampoline's parameters in the parameter + * table mapping. + */ +struct tramp +{ + struct tramp *prev; + struct tramp *next; + struct tramp_table *table; + void *code; + struct tramp_parm *parm; +}; + +enum tramp_globals_status { + TRAMP_GLOBALS_UNINITIALIZED = 0, + TRAMP_GLOBALS_PASSED, + TRAMP_GLOBALS_FAILED, +}; + +/* + * Trampoline globals. + * + * fd + * File descriptor of binary file that contains the trampoline code table. + * offset + * Offset of the trampoline code table in that file. + * text + * Address of the trampoline code table in the text segment. + * map_size + * Size of the trampoline code table mapping. + * size + * Size of one trampoline in the trampoline code table. + * ntramp + * Total number of trampolines in the trampoline code table. + * free_tables + * List of trampoline tables that contain free trampolines. + * nfree_tables + * Number of trampoline tables that contain free trampolines. + * status + * Initialization status. + */ +struct tramp_globals +{ + int fd; + off_t offset; + void *text; + size_t map_size; + size_t size; + int ntramp; + struct tramp_table *free_tables; + int nfree_tables; + enum tramp_globals_status status; +}; + +static struct tramp_globals tramp_globals; + +/* --------------------- Trampoline File Initialization --------------------*/ + +/* + * The trampoline file is the file used to map the trampoline code table into + * the address space of a process. There are two ways to get this file: + * + * - From the OS. E.g., on Linux, /proc/<pid>/maps lists all the memory + * mappings for <pid>. For file-backed mappings, maps supplies the file name + * and the file offset. Using this, we can locate the mapping that maps + * libffi and get the path to the libffi binary. And, we can compute the + * offset of the trampoline code table within that binary. + * + * - Else, if we can create a temporary file, we can write the trampoline code + * table from the text segment into the temporary file. + * + * The first method is the preferred one. If the OS security subsystem + * disallows mapping unsigned files with PROT_EXEC, then the second method + * will fail. + * + * If an OS allows the trampoline code table in the text segment to be + * directly remapped (e.g., MACH vm_remap ()), then we don't need the + * trampoline file. + */ +static int tramp_table_alloc (void); + +#if defined __linux__ + +static int +ffi_tramp_get_libffi (void) +{ + FILE *fp; + char file[PATH_MAX], line[PATH_MAX+100], perm[10], dev[10]; + unsigned long start, end, offset, inode; + uintptr_t addr = (uintptr_t) tramp_globals.text; + int nfields, found; + + snprintf (file, PATH_MAX, "/proc/%d/maps", getpid()); + fp = fopen (file, "r"); + if (fp == NULL) + return 0; + + found = 0; + while (feof (fp) == 0) { + if (fgets (line, sizeof (line), fp) == 0) + break; + + nfields = sscanf (line, "%lx-%lx %9s %lx %9s %ld %s", + &start, &end, perm, &offset, dev, &inode, file); + if (nfields != 7) + continue; + + if (addr >= start && addr < end) { + tramp_globals.offset = offset + (addr - start); + found = 1; + break; + } + } + fclose (fp); + + if (!found) + return 0; + + tramp_globals.fd = open (file, O_RDONLY); + if (tramp_globals.fd == -1) + return 0; + + /* + * Allocate a trampoline table just to make sure that the trampoline code + * table can be mapped. + */ + if (!tramp_table_alloc ()) + { + close (tramp_globals.fd); + tramp_globals.fd = -1; + return 0; + } + return 1; +} + +#endif /* __linux__ */ + +#if defined __linux__ + +#if defined HAVE_MKSTEMP + +static int +ffi_tramp_get_temp_file (void) +{ + char template[12] = "/tmp/XXXXXX"; + ssize_t count; + + tramp_globals.offset = 0; + tramp_globals.fd = mkstemp (template); + if (tramp_globals.fd == -1) + return 0; + + unlink (template); + /* + * Write the trampoline code table into the temporary file and allocate a + * trampoline table to make sure that the temporary file can be mapped. + */ + count = write(tramp_globals.fd, tramp_globals.text, tramp_globals.map_size); + if (count == tramp_globals.map_size && tramp_table_alloc ()) + return 1; + + close (tramp_globals.fd); + tramp_globals.fd = -1; + return 0; +} + +#else /* !defined HAVE_MKSTEMP */ + +/* + * TODO: + * src/closures.c contains code for finding temp file that has EXEC + * permissions. May be, some of that code can be shared with static + * trampolines. + */ +static int +ffi_tramp_get_temp_file (void) +{ + tramp_globals.offset = 0; + tramp_globals.fd = -1; + return 0; +} + +#endif /* defined HAVE_MKSTEMP */ + +#endif /* __linux__ */ + +/* ------------------------ OS-specific Initialization ----------------------*/ + +#if defined __linux__ + +static int +ffi_tramp_init_os (void) +{ + if (ffi_tramp_get_libffi ()) + return 1; + return ffi_tramp_get_temp_file (); +} + +#endif /* __linux__ */ + +/* --------------------------- OS-specific Locking -------------------------*/ + +#if defined __linux__ + +static pthread_mutex_t tramp_globals_mutex = PTHREAD_MUTEX_INITIALIZER; + +static void +ffi_tramp_lock(void) +{ + pthread_mutex_lock (&tramp_globals_mutex); +} + +static void +ffi_tramp_unlock() +{ + pthread_mutex_unlock (&tramp_globals_mutex); +} + +#endif /* __linux__ */ + +/* ------------------------ OS-specific Memory Mapping ----------------------*/ + +/* + * Create a trampoline code table mapping and a trampoline parameter table + * mapping. The two mappings must be adjacent to each other for PC-relative + * access. + * + * For each trampoline in the code table, there is a corresponding parameter + * block in the parameter table. The size of the parameter block is the same + * as the size of the trampoline. This means that the parameter block is at + * a fixed offset from its trampoline making it easy for a trampoline to find + * its parameters using PC-relative access. + * + * The parameter block will contain a struct tramp_parm. This means that + * sizeof (struct tramp_parm) cannot exceed the size of a parameter block. + */ + +#if defined __linux__ + +static int +tramp_table_map (struct tramp_table *table) +{ + char *addr; + + /* + * Create an anonymous mapping twice the map size. The top half will be used + * for the code table. The bottom half will be used for the parameter table. + */ + addr = mmap (NULL, tramp_globals.map_size * 2, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) + return 0; + + /* + * Replace the top half of the anonymous mapping with the code table mapping. + */ + table->code_table = mmap (addr, tramp_globals.map_size, PROT_READ | PROT_EXEC, + MAP_PRIVATE | MAP_FIXED, tramp_globals.fd, tramp_globals.offset); + if (table->code_table == MAP_FAILED) + { + (void) munmap (addr, tramp_globals.map_size * 2); + return 0; + } + table->parm_table = table->code_table + tramp_globals.map_size; + return 1; +} + +static void +tramp_table_unmap (struct tramp_table *table) +{ + (void) munmap (table->code_table, tramp_globals.map_size); + (void) munmap (table->parm_table, tramp_globals.map_size); +} + +#endif /* __linux__ */ + +/* ------------------------ Trampoline Initialization ----------------------*/ + +/* + * Initialize the static trampoline feature. + */ +static int +ffi_tramp_init (void) +{ + if (tramp_globals.status == TRAMP_GLOBALS_PASSED) + return 1; + + if (tramp_globals.status == TRAMP_GLOBALS_FAILED) + return 0; + + if (ffi_tramp_arch == NULL) + { + tramp_globals.status = TRAMP_GLOBALS_FAILED; + return 0; + } + + tramp_globals.free_tables = NULL; + tramp_globals.nfree_tables = 0; + + /* + * Get trampoline code table information from the architecture. + */ + tramp_globals.text = ffi_tramp_arch (&tramp_globals.size, + &tramp_globals.map_size); + tramp_globals.ntramp = tramp_globals.map_size / tramp_globals.size; + + if (sysconf (_SC_PAGESIZE) > tramp_globals.map_size) + return 0; + + if (ffi_tramp_init_os ()) + { + tramp_globals.status = TRAMP_GLOBALS_PASSED; + return 1; + } + + tramp_globals.status = TRAMP_GLOBALS_FAILED; + return 0; +} + +/* ---------------------- Trampoline Table functions ---------------------- */ + +/* This code assumes that malloc () is available on all OSes. */ + +static void tramp_add (struct tramp *tramp); + +/* + * Allocate and initialize a trampoline table. + */ +static int +tramp_table_alloc (void) +{ + struct tramp_table *table; + struct tramp *tramp_array, *tramp; + size_t size; + char *code, *parm; + int i; + + /* + * If we already have tables with free trampolines, there is no need to + * allocate a new table. + */ + if (tramp_globals.nfree_tables > 0) + return 1; + + /* + * Allocate a new trampoline table structure. + */ + table = malloc (sizeof (*table)); + if (table == NULL) + return 0; + + /* + * Allocate new trampoline structures. + */ + tramp_array = malloc (sizeof (*tramp) * tramp_globals.ntramp); + if (tramp_array == NULL) + goto free_table; + + /* + * Map a code table and a parameter table into the caller's address space. + */ + if (!tramp_table_map (table)) + { + /* + * Failed to map the code and parameter tables. + */ + goto free_tramp_array; + } + + /* + * Initialize the trampoline table. + */ + table->array = tramp_array; + table->free = NULL; + table->nfree = 0; + + /* + * Populate the trampoline table free list. This will also add the trampoline + * table to the global list of trampoline tables. + */ + size = tramp_globals.size; + code = table->code_table; + parm = table->parm_table; + for (i = 0; i < tramp_globals.ntramp; i++) + { + tramp = &tramp_array[i]; + tramp->table = table; + tramp->code = code; + tramp->parm = (struct tramp_parm *) parm; + tramp_add (tramp); + + code += size; + parm += size; + } + /* Success */ + return 1; + +/* Failure */ +free_tramp_array: + free (tramp_array); +free_table: + free (table); + return 0; +} + +/* + * Free a trampoline table. + */ +static void +tramp_table_free (struct tramp_table *table) +{ + tramp_table_unmap (table); + free (table->array); + free (table); +} + +/* + * Add a new trampoline table to the global table list. + */ +static void +tramp_table_add (struct tramp_table *table) +{ + table->next = tramp_globals.free_tables; + table->prev = NULL; + if (tramp_globals.free_tables != NULL) + tramp_globals.free_tables->prev = table; + tramp_globals.free_tables = table; + tramp_globals.nfree_tables++; +} + +/* + * Delete a trampoline table from the global table list. + */ +static void +tramp_table_del (struct tramp_table *table) +{ + tramp_globals.nfree_tables--; + if (table->prev != NULL) + table->prev->next = table->next; + if (table->next != NULL) + table->next->prev = table->prev; + if (tramp_globals.free_tables == table) + tramp_globals.free_tables = table->next; +} + +/* ------------------------- Trampoline functions ------------------------- */ + +/* + * Add a trampoline to its trampoline table. + */ +static void +tramp_add (struct tramp *tramp) +{ + struct tramp_table *table = tramp->table; + + tramp->next = table->free; + tramp->prev = NULL; + if (table->free != NULL) + table->free->prev = tramp; + table->free = tramp; + table->nfree++; + + if (table->nfree == 1) + tramp_table_add (table); + + /* + * We don't want to keep too many free trampoline tables lying around. + */ + if (table->nfree == tramp_globals.ntramp && + tramp_globals.nfree_tables > 1) + { + tramp_table_del (table); + tramp_table_free (table); + } +} + +/* + * Remove a trampoline from its trampoline table. + */ +static void +tramp_del (struct tramp *tramp) +{ + struct tramp_table *table = tramp->table; + + table->nfree--; + if (tramp->prev != NULL) + tramp->prev->next = tramp->next; + if (tramp->next != NULL) + tramp->next->prev = tramp->prev; + if (table->free == tramp) + table->free = tramp->next; + + if (table->nfree == 0) + tramp_table_del (table); +} + +/* ------------------------ Trampoline API functions ------------------------ */ + +int +ffi_tramp_is_supported(void) +{ + int ret; + + ffi_tramp_lock(); + ret = ffi_tramp_init (); + ffi_tramp_unlock(); + return ret; +} + +/* + * Allocate a trampoline and return its opaque address. + */ +void * +ffi_tramp_alloc (int flags) +{ + struct tramp *tramp; + + ffi_tramp_lock(); + + if (!ffi_tramp_init () || flags != 0) + { + ffi_tramp_unlock(); + return NULL; + } + + if (!tramp_table_alloc ()) + { + ffi_tramp_unlock(); + return NULL; + } + + tramp = tramp_globals.free_tables->free; + tramp_del (tramp); + + ffi_tramp_unlock(); + + return tramp; +} + +/* + * Set the parameters for a trampoline. + */ +void +ffi_tramp_set_parms (void *arg, void *target, void *data) +{ + struct tramp *tramp = arg; + + ffi_tramp_lock(); + tramp->parm->target = target; + tramp->parm->data = data; + ffi_tramp_unlock(); +} + +/* + * Get the invocation address of a trampoline. + */ +void * +ffi_tramp_get_addr (void *arg) +{ + struct tramp *tramp = arg; + void *addr; + + ffi_tramp_lock(); + addr = tramp->code; + ffi_tramp_unlock(); + + return addr; +} + +/* + * Free a trampoline. + */ +void +ffi_tramp_free (void *arg) +{ + struct tramp *tramp = arg; + + ffi_tramp_lock(); + tramp_add (tramp); + ffi_tramp_unlock(); +} + +/* ------------------------------------------------------------------------- */ + +#else /* !FFI_EXEC_STATIC_TRAMP */ + +#include <stddef.h> + +int +ffi_tramp_is_supported(void) +{ + return 0; +} + +void * +ffi_tramp_alloc (int flags) +{ + return NULL; +} + +void +ffi_tramp_set_parms (void *arg, void *target, void *data) +{ +} + +void * +ffi_tramp_get_addr (void *arg) +{ + return NULL; +} + +void +ffi_tramp_free (void *arg) +{ +} + +#endif /* FFI_EXEC_STATIC_TRAMP */ diff --git a/src/x86/ffi.c b/src/x86/ffi.c index 5f7fd81..b4d0d39 100644 --- a/src/x86/ffi.c +++ b/src/x86/ffi.c @@ -34,6 +34,7 @@ #include <ffi_common.h> #include <stdint.h> #include <stdlib.h> +#include <tramp.h> #include "internal.h" /* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE; @@ -411,6 +412,11 @@ ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, void FFI_HIDDEN ffi_closure_i386(void); void FFI_HIDDEN ffi_closure_STDCALL(void); void FFI_HIDDEN ffi_closure_REGISTER(void); +#if defined(FFI_EXEC_STATIC_TRAMP) +void FFI_HIDDEN ffi_closure_i386_alt(void); +void FFI_HIDDEN ffi_closure_STDCALL_alt(void); +void FFI_HIDDEN ffi_closure_REGISTER_alt(void); +#endif struct closure_frame { @@ -559,6 +565,22 @@ ffi_prep_closure_loc (ffi_closure* closure, return FFI_BAD_ABI; } +#if defined(FFI_EXEC_STATIC_TRAMP) + if (ffi_tramp_is_present(closure)) + { + /* Initialize the static trampoline's parameters. */ + if (dest == ffi_closure_i386) + dest = ffi_closure_i386_alt; + else if (dest == ffi_closure_STDCALL) + dest = ffi_closure_STDCALL_alt; + else + dest = ffi_closure_REGISTER_alt; + ffi_tramp_set_parms (closure->ftramp, dest, closure); + goto out; + } +#endif + + /* Initialize the dynamic trampoline. */ /* endbr32. */ *(UINT32 *) tramp = 0xfb1e0ff3; @@ -570,6 +592,7 @@ ffi_prep_closure_loc (ffi_closure* closure, tramp[9] = 0xe9; *(unsigned *)(tramp + 10) = (unsigned)dest - ((unsigned)codeloc + 14); +out: closure->cif = cif; closure->fun = fun; closure->user_data = user_data; @@ -767,4 +790,17 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue) ffi_call_i386 (frame, stack); } #endif /* !FFI_NO_RAW_API */ + +#if defined(FFI_EXEC_STATIC_TRAMP) +void * +ffi_tramp_arch (size_t *tramp_size, size_t *map_size) +{ + extern void *trampoline_code_table; + + *map_size = X86_TRAMP_MAP_SIZE; + *tramp_size = X86_TRAMP_SIZE; + return &trampoline_code_table; +} +#endif + #endif /* __i386__ */ diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c index 39f9598..5264cdf 100644 --- a/src/x86/ffi64.c +++ b/src/x86/ffi64.c @@ -33,6 +33,7 @@ #include <stdlib.h> #include <stdarg.h> #include <stdint.h> +#include <tramp.h> #include "internal64.h" #ifdef __x86_64__ @@ -714,6 +715,10 @@ ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, extern void ffi_closure_unix64(void) FFI_HIDDEN; extern void ffi_closure_unix64_sse(void) FFI_HIDDEN; +#if defined(FFI_EXEC_STATIC_TRAMP) +extern void ffi_closure_unix64_alt(void) FFI_HIDDEN; +extern void ffi_closure_unix64_sse_alt(void) FFI_HIDDEN; +#endif #ifndef __ILP32__ extern ffi_status @@ -756,9 +761,24 @@ ffi_prep_closure_loc (ffi_closure* closure, else dest = ffi_closure_unix64; +#if defined(FFI_EXEC_STATIC_TRAMP) + if (ffi_tramp_is_present(closure)) + { + /* Initialize the static trampoline's parameters. */ + if (dest == ffi_closure_unix64_sse) + dest = ffi_closure_unix64_sse_alt; + else + dest = ffi_closure_unix64_alt; + ffi_tramp_set_parms (closure->ftramp, dest, closure); + goto out; + } +#endif + + /* Initialize the dynamic trampoline. */ memcpy (tramp, trampoline, sizeof(trampoline)); *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)dest; +out: closure->cif = cif; closure->fun = fun; closure->user_data = user_data; @@ -892,4 +912,16 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif, #endif /* FFI_GO_CLOSURES */ +#if defined(FFI_EXEC_STATIC_TRAMP) +void * +ffi_tramp_arch (size_t *tramp_size, size_t *map_size) +{ + extern void *trampoline_code_table; + + *map_size = UNIX64_TRAMP_MAP_SIZE; + *tramp_size = UNIX64_TRAMP_SIZE; + return &trampoline_code_table; +} +#endif + #endif /* __x86_64__ */ diff --git a/src/x86/ffitarget.h b/src/x86/ffitarget.h index a34f3e5..f454341 100644 --- a/src/x86/ffitarget.h +++ b/src/x86/ffitarget.h @@ -147,9 +147,11 @@ typedef enum ffi_abi { # define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */ #endif -#if !defined(GENERATE_LIBFFI_MAP) && defined(__ASSEMBLER__) \ - && defined(__CET__) +#if !defined(GENERATE_LIBFFI_MAP) && defined(__CET__) # include <cet.h> +# if (__CET__ & 1) != 0 +# define ENDBR_PRESENT +# endif # define _CET_NOTRACK notrack #else # define _CET_ENDBR diff --git a/src/x86/ffiw64.c b/src/x86/ffiw64.c index a43a9eb..6c28d59 100644 --- a/src/x86/ffiw64.c +++ b/src/x86/ffiw64.c @@ -30,6 +30,7 @@ #include <ffi_common.h> #include <stdlib.h> #include <stdint.h> +#include <tramp.h> #ifdef X86_WIN64 #define EFI64(name) name @@ -187,6 +188,9 @@ EFI64(ffi_call_go)(ffi_cif *cif, void (*fn)(void), void *rvalue, extern void ffi_closure_win64(void) FFI_HIDDEN; +#if defined(FFI_EXEC_STATIC_TRAMP) +extern void ffi_closure_win64_alt(void) FFI_HIDDEN; +#endif #ifdef FFI_GO_CLOSURES extern void ffi_go_closure_win64(void) FFI_HIDDEN; @@ -220,9 +224,20 @@ EFI64(ffi_prep_closure_loc)(ffi_closure* closure, return FFI_BAD_ABI; } +#if defined(FFI_EXEC_STATIC_TRAMP) + if (ffi_tramp_is_present(closure)) + { + /* Initialize the static trampoline's parameters. */ + ffi_tramp_set_parms (closure->ftramp, ffi_closure_win64_alt, closure); + goto out; + } +#endif + + /* Initialize the dynamic trampoline. */ memcpy (tramp, trampoline, sizeof(trampoline)); *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)ffi_closure_win64; +out: closure->cif = cif; closure->fun = fun; closure->user_data = user_data; diff --git a/src/x86/internal.h b/src/x86/internal.h index 09771ba..23be7a2 100644 --- a/src/x86/internal.h +++ b/src/x86/internal.h @@ -27,3 +27,17 @@ #else # define HAVE_FASTCALL 1 #endif + +#if defined(FFI_EXEC_STATIC_TRAMP) +/* + * For the trampoline code table mapping, a mapping size of 4K (base page size) + * is chosen. + */ +#define X86_TRAMP_MAP_SHIFT 12 +#define X86_TRAMP_MAP_SIZE (1 << X86_TRAMP_MAP_SHIFT) +#ifdef ENDBR_PRESENT +#define X86_TRAMP_SIZE 44 +#else +#define X86_TRAMP_SIZE 40 +#endif +#endif diff --git a/src/x86/internal64.h b/src/x86/internal64.h index 512e955..282b408 100644 --- a/src/x86/internal64.h +++ b/src/x86/internal64.h @@ -20,3 +20,17 @@ #define UNIX64_FLAG_RET_IN_MEM (1 << 10) #define UNIX64_FLAG_XMM_ARGS (1 << 11) #define UNIX64_SIZE_SHIFT 12 + +#if defined(FFI_EXEC_STATIC_TRAMP) +/* + * For the trampoline code table mapping, a mapping size of 4K (base page size) + * is chosen. + */ +#define UNIX64_TRAMP_MAP_SHIFT 12 +#define UNIX64_TRAMP_MAP_SIZE (1 << UNIX64_TRAMP_MAP_SHIFT) +#ifdef ENDBR_PRESENT +#define UNIX64_TRAMP_SIZE 40 +#else +#define UNIX64_TRAMP_SIZE 32 +#endif +#endif diff --git a/src/x86/sysv.S b/src/x86/sysv.S index d8ab4b0..7110f02 100644 --- a/src/x86/sysv.S +++ b/src/x86/sysv.S @@ -573,6 +573,94 @@ L(UW31): # cfi_endproc ENDF(C(ffi_closure_STDCALL)) +#if defined(FFI_EXEC_STATIC_TRAMP) + .balign 16 + .globl C(ffi_closure_i386_alt) + FFI_HIDDEN(C(ffi_closure_i386_alt)) +C(ffi_closure_i386_alt): + /* See the comments above trampoline_code_table. */ + _CET_ENDBR + movl 4(%esp), %eax /* Load closure in eax */ + add $8, %esp /* Restore the stack */ + jmp C(ffi_closure_i386) +ENDF(C(ffi_closure_i386_alt)) + + .balign 16 + .globl C(ffi_closure_REGISTER_alt) + FFI_HIDDEN(C(ffi_closure_REGISTER_alt)) +C(ffi_closure_REGISTER_alt): + /* See the comments above trampoline_code_table. */ + _CET_ENDBR + movl (%esp), %eax /* Restore eax */ + add $4, %esp /* Leave closure on stack */ + jmp C(ffi_closure_REGISTER) +ENDF(C(ffi_closure_REGISTER_alt)) + + .balign 16 + .globl C(ffi_closure_STDCALL_alt) + FFI_HIDDEN(C(ffi_closure_STDCALL_alt)) +C(ffi_closure_STDCALL_alt): + /* See the comments above trampoline_code_table. */ + _CET_ENDBR + movl 4(%esp), %eax /* Load closure in eax */ + add $8, %esp /* Restore the stack */ + jmp C(ffi_closure_STDCALL) +ENDF(C(ffi_closure_STDCALL_alt)) + +/* + * Below is the definition of the trampoline code table. Each element in + * the code table is a trampoline. + * + * Because we jump to the trampoline, we place a _CET_ENDBR at the + * beginning of the trampoline to mark it as a valid branch target. This is + * part of the the Intel CET (Control Flow Enforcement Technology). + */ +/* + * The trampoline uses register eax. It saves the original value of eax on + * the stack. + * + * The trampoline has two parameters - target code to jump to and data for + * the target code. The trampoline extracts the parameters from its parameter + * block (see tramp_table_map()). The trampoline saves the data address on + * the stack. Finally, it jumps to the target code. + * + * The target code can choose to: + * + * - restore the value of eax + * - load the data address in a register + * - restore the stack pointer to what it was when the trampoline was invoked. + */ +#ifdef ENDBR_PRESENT +#define X86_DATA_OFFSET 4081 +#define X86_CODE_OFFSET 4070 +#else +#define X86_DATA_OFFSET 4085 +#define X86_CODE_OFFSET 4074 +#endif + + .align X86_TRAMP_MAP_SIZE + .globl C(trampoline_code_table) + FFI_HIDDEN(C(trampoline_code_table)) +C(trampoline_code_table): + .rept X86_TRAMP_MAP_SIZE / X86_TRAMP_SIZE + _CET_ENDBR + sub $8, %esp + movl %eax, (%esp) /* Save %eax on stack */ + call 1f /* Get next PC into %eax */ + movl X86_DATA_OFFSET(%eax), %eax /* Copy data into %eax */ + movl %eax, 4(%esp) /* Save data on stack */ + call 1f /* Get next PC into %eax */ + movl X86_CODE_OFFSET(%eax), %eax /* Copy code into %eax */ + jmp *%eax /* Jump to code */ +1: + mov (%esp), %eax + ret + .align 4 + .endr +ENDF(C(trampoline_code_table)) + .align X86_TRAMP_MAP_SIZE +#endif /* FFI_EXEC_STATIC_TRAMP */ + #if !FFI_NO_RAW_API #define raw_closure_S_FS (16+16+12) @@ -1131,6 +1219,7 @@ L(EFDE9): #endif /* __APPLE__ */ #endif /* ifndef _MSC_VER */ + #endif /* ifdef __i386__ */ #if defined __ELF__ && defined __linux__ diff --git a/src/x86/unix64.S b/src/x86/unix64.S index 89d7db1..ca6fe0c 100644 --- a/src/x86/unix64.S +++ b/src/x86/unix64.S @@ -456,6 +456,81 @@ L(sse_entry2): L(UW17): ENDF(C(ffi_go_closure_unix64)) +#if defined(FFI_EXEC_STATIC_TRAMP) + .balign 8 + .globl C(ffi_closure_unix64_sse_alt) + FFI_HIDDEN(C(ffi_closure_unix64_sse_alt)) + +C(ffi_closure_unix64_sse_alt): + /* See the comments above trampoline_code_table. */ + _CET_ENDBR + movq 8(%rsp), %r10 /* Load closure in r10 */ + addq $16, %rsp /* Restore the stack */ + jmp C(ffi_closure_unix64_sse) +ENDF(C(ffi_closure_unix64_sse_alt)) + + .balign 8 + .globl C(ffi_closure_unix64_alt) + FFI_HIDDEN(C(ffi_closure_unix64_alt)) + +C(ffi_closure_unix64_alt): + /* See the comments above trampoline_code_table. */ + _CET_ENDBR + movq 8(%rsp), %r10 /* Load closure in r10 */ + addq $16, %rsp /* Restore the stack */ + jmp C(ffi_closure_unix64) + ENDF(C(ffi_closure_unix64_alt)) + +/* + * Below is the definition of the trampoline code table. Each element in + * the code table is a trampoline. + * + * Because we jump to the trampoline, we place a _CET_ENDBR at the + * beginning of the trampoline to mark it as a valid branch target. This is + * part of the the Intel CET (Control Flow Enforcement Technology). + */ +/* + * The trampoline uses register r10. It saves the original value of r10 on + * the stack. + * + * The trampoline has two parameters - target code to jump to and data for + * the target code. The trampoline extracts the parameters from its parameter + * block (see tramp_table_map()). The trampoline saves the data address on + * the stack. Finally, it jumps to the target code. + * + * The target code can choose to: + * + * - restore the value of r10 + * - load the data address in a register + * - restore the stack pointer to what it was when the trampoline was invoked. + */ +#ifdef ENDBR_PRESENT +#define X86_DATA_OFFSET 4077 +#define X86_CODE_OFFSET 4073 +#else +#define X86_DATA_OFFSET 4081 +#define X86_CODE_OFFSET 4077 +#endif + + .align UNIX64_TRAMP_MAP_SIZE + .globl trampoline_code_table + FFI_HIDDEN(C(trampoline_code_table)) + +C(trampoline_code_table): + .rept UNIX64_TRAMP_MAP_SIZE / UNIX64_TRAMP_SIZE + _CET_ENDBR + subq $16, %rsp /* Make space on the stack */ + movq %r10, (%rsp) /* Save %r10 on stack */ + movq X86_DATA_OFFSET(%rip), %r10 /* Copy data into %r10 */ + movq %r10, 8(%rsp) /* Save data on stack */ + movq X86_CODE_OFFSET(%rip), %r10 /* Copy code into %r10 */ + jmp *%r10 /* Jump to code */ + .align 8 + .endr +ENDF(C(trampoline_code_table)) + .align UNIX64_TRAMP_MAP_SIZE +#endif /* FFI_EXEC_STATIC_TRAMP */ + /* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */ #ifdef __APPLE__ diff --git a/src/x86/win64.S b/src/x86/win64.S index 8315e8b..f3ace8d 100644 --- a/src/x86/win64.S +++ b/src/x86/win64.S @@ -234,6 +234,20 @@ C(ffi_closure_win64): cfi_endproc SEH(.seh_endproc) + +#if defined(FFI_EXEC_STATIC_TRAMP) + .align 8 + .globl C(ffi_closure_win64_alt) + FFI_HIDDEN(C(ffi_closure_win64_alt)) + + SEH(.seh_proc ffi_closure_win64_alt) +C(ffi_closure_win64_alt): + _CET_ENDBR + movq 8(%rsp), %r10 + addq $16, %rsp + jmp C(ffi_closure_win64) + SEH(.seh_endproc) +#endif #endif /* __x86_64__ */ #if defined __ELF__ && defined __linux__ diff --git a/testsuite/libffi.closures/closure_loc_fn0.c b/testsuite/libffi.closures/closure_loc_fn0.c index b3afa0b..ad488ac 100644 --- a/testsuite/libffi.closures/closure_loc_fn0.c +++ b/testsuite/libffi.closures/closure_loc_fn0.c @@ -83,7 +83,10 @@ int main (void) CHECK(ffi_prep_closure_loc(pcl, &cif, closure_loc_test_fn0, (void *) 3 /* userdata */, codeloc) == FFI_OK); +#ifndef FFI_EXEC_STATIC_TRAMP + /* With static trampolines, the codeloc does not point to closure */ CHECK(memcmp(pcl, codeloc, sizeof(*pcl)) == 0); +#endif res = (*((closure_loc_test_type0)codeloc)) (1LL, 2, 3LL, 4, 127, 429LL, 7, 8, 9.5, 10, 11, 12, 13, |