From 5a4774cd4d90f9ea7e7f9e34b15de29463aba4c4 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Fri, 22 Jul 2022 05:56:30 +0800 Subject: static trampoline for LoongArch (#723) For the benefit and technical details of static trampoline, see https://github.com/libffi/libffi/pull/624. As a new architecture, let's be "safer" from the start. The change survived libffi testsuite on loongarch64-linux-gnu. --- src/loongarch64/ffi.c | 30 +++++++++++++++++++++++++++--- src/loongarch64/sysv.S | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/loongarch64/ffi.c b/src/loongarch64/ffi.c index 7a28892..ed9c15f 100644 --- a/src/loongarch64/ffi.c +++ b/src/loongarch64/ffi.c @@ -519,8 +519,16 @@ ffi_prep_closure_loc (ffi_closure *closure, ffi_cif *cif, if (cif->abi <= FFI_FIRST_ABI || cif->abi >= FFI_LAST_ABI) return FFI_BAD_ABI; - /* We will call ffi_closure_inner with codeloc, not closure, but as long - as the memory is readable it should work. */ +#if defined(FFI_EXEC_STATIC_TRAMP) + if (ffi_tramp_is_present(closure)) + { + ffi_tramp_set_parms (closure->ftramp, ffi_closure_asm, closure); + goto out; + } +#endif + + /* Fill the dynamic trampoline. We will call ffi_closure_inner with codeloc, + not closure, but as long as the memory is readable it should work. */ tramp[0] = 0x1800000c; /* pcaddi $t0, 0 (i.e. $t0 <- tramp) */ tramp[1] = 0x28c0418d; /* ld.d $t1, $t0, 16 */ tramp[2] = 0x4c0001a0; /* jirl $zero, $t1, 0 */ @@ -528,11 +536,13 @@ ffi_prep_closure_loc (ffi_closure *closure, ffi_cif *cif, tramp[4] = fn; tramp[5] = fn >> 32; + __builtin___clear_cache (codeloc, codeloc + FFI_TRAMPOLINE_SIZE); + +out: closure->cif = cif; closure->fun = fun; closure->user_data = user_data; - __builtin___clear_cache (codeloc, codeloc + FFI_TRAMPOLINE_SIZE); return FFI_OK; } @@ -593,3 +603,17 @@ ffi_closure_inner (ffi_cif *cif, marshal (&cb, cif->rtype, 0, rvalue); } } + +#if defined(FFI_EXEC_STATIC_TRAMP) +void * +ffi_tramp_arch (size_t *tramp_size, size_t *map_size) +{ + extern void *trampoline_code_table; + + *tramp_size = 16; + /* A mapping size of 64K is chosen to cover the page sizes of 4K, 16K, and + 64K. */ + *map_size = 1 << 16; + return &trampoline_code_table; +} +#endif diff --git a/src/loongarch64/sysv.S b/src/loongarch64/sysv.S index 9e0da11..aa7bde2 100644 --- a/src/loongarch64/sysv.S +++ b/src/loongarch64/sysv.S @@ -147,7 +147,7 @@ ffi_call_asm: .size ffi_call_asm, .-ffi_call_asm -/* ffi_closure_asm. Expects address of the passed-in ffi_closure in t1. +/* ffi_closure_asm. Expects address of the passed-in ffi_closure in t0. void ffi_closure_inner (ffi_cif *cif, void (*fun)(ffi_cif *, void *, void **, void *), void *user_data, @@ -219,6 +219,37 @@ ffi_closure_asm: .cfi_endproc .size ffi_closure_asm, .-ffi_closure_asm +/* Static trampoline code table, in which each element is a trampoline. + + The trampoline clobbers t0 and t1, but we don't save them on the stack + because our psABI explicitly says they are scratch registers, at least for + ELF. Our dynamic trampoline is already clobbering them anyway. + + The trampoline has two parameters - target code to jump to and data for + the target code. The trampoline extracts the parameters from its parameter + block (see tramp_table_map()). The trampoline saves the data address in + t0 and jumps to the target code. As ffi_closure_asm() already expects the + data address to be in t0, we don't need a "ffi_closure_asm_alt". */ + +#if defined(FFI_EXEC_STATIC_TRAMP) + .align 16 + .globl trampoline_code_table + .hidden trampoline_code_table + .type trampoline_code_table, @function + +trampoline_code_table: + + .rept 65536 / 16 + pcaddu12i $t1, 16 # 65536 >> 12 + ld.d $t0, $t1, 0 + ld.d $t1, $t1, 8 + jirl $zero, $t1, 0 + .endr + .size trampoline_code_table, .-trampoline_code_table + + .align 2 +#endif + /* ffi_go_closure_asm. Expects address of the passed-in ffi_go_closure in t2. void ffi_closure_inner (ffi_cif *cif, void (*fun)(ffi_cif *, void *, void **, void *), -- cgit v1.2.1