diff options
author | Martin Storsjö <martin@martin.st> | 2021-04-07 05:42:10 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-06 22:42:10 -0400 |
commit | dd5bd03075149d7cf8441875c1a344e8beb57dde (patch) | |
tree | 3a119b5e06927287cd2cf2bd38879617461aa815 /src | |
parent | 95ef857d5c6ed15c6c6ba5f8a5b26c0a38b417ab (diff) | |
download | libffi-dd5bd03075149d7cf8441875c1a344e8beb57dde.tar.gz |
Fix building for arm windows with mingw toolchains (#631)
* arm: Check _WIN32 instead of _M_ARM or _MSC_VER for detecting windows
This matches what was done for ARM64 in
c06468fa6674d3783a0edb1d0fae9afc8bc28513.
* arm: Only use armasm source when building with MSVC
When building for windows/arm with clang, the normal gas style .S
source works fine (if fixed up to support thumb and other windows
specifics).
This matches what was done for ARM64 in
c06468fa6674d3783a0edb1d0fae9afc8bc28513.
* arm: Fix sysv.S to work in thumb mode
Align cases in jump tables (adding nop padding to make sure each
case starts where expected).
Rewrite instructions that add directly to the pc register.
For ffi_closure_ret, factor out a call_epilogue subroutine that
restores both sp and pc from the stack; the thumb version of ldm
can't load into the sp register. To avoid excessive ifdeffing, keep
using call_epilogue in arm mode, but keep the shorter "ldm sp, {sp, pc}"
epilogue in that case.
* arm: Add win32 version of trampoline to sysv.S
This matches the version of it in sysv_msvc_arm32.S. The calling
C code expects a specific form of the trampoline on windows; make
sure these work the same on windows regardless of the form of
assembly used.
* arm: Avoid optimizing out clearing the thumb bit of ffi_arm_trampoline
We clear the thumb bit of ffi_arm_trampoline with a bitmask before
memcpying its instructions into closure->tramp.
If the bit isn't cleared, the memcpy of the trampoline function
copies the wrong instructions.
If the ffi_arm_trampoline symbol is declared as an array of int,
the compiler can assume that it is aligned to a 4 byte boundary
and the bitmask operation is a no-op, and optimize it out.
See https://godbolt.org/z/dE3jE1WTz; both Clang and GCC optimize
out the bitmask as it is, while MSVC doesn't. By declaring the
trampoline as an array of unsigned char, the bitmask works as
intended.
Diffstat (limited to 'src')
-rw-r--r-- | src/arm/ffi.c | 19 | ||||
-rw-r--r-- | src/arm/ffitarget.h | 6 | ||||
-rw-r--r-- | src/arm/sysv.S | 72 |
3 files changed, 77 insertions, 20 deletions
diff --git a/src/arm/ffi.c b/src/arm/ffi.c index b2f60d1..593ab4d 100644 --- a/src/arm/ffi.c +++ b/src/arm/ffi.c @@ -37,7 +37,7 @@ #include <tramp.h> #include "internal.h" -#if defined(_MSC_VER) && defined(_M_ARM) +#if defined(_WIN32) #define WIN32_LEAN_AND_MEAN #include <windows.h> #endif @@ -49,10 +49,13 @@ #endif #else -#ifndef _M_ARM +#ifndef _WIN32 extern unsigned int ffi_arm_trampoline[2] FFI_HIDDEN; #else -extern unsigned int ffi_arm_trampoline[3] FFI_HIDDEN; +// Declare this as an array of char, instead of array of int, +// otherwise Clang optimizes out the "& 0xFFFFFFFE" for clearing +// the thumb bit. +extern unsigned char ffi_arm_trampoline[12] FFI_HIDDEN; #endif #endif @@ -104,13 +107,13 @@ ffi_put_arg (ffi_type *ty, void *src, void *dst) case FFI_TYPE_SINT32: case FFI_TYPE_UINT32: case FFI_TYPE_POINTER: -#ifndef _MSC_VER +#ifndef _WIN32 case FFI_TYPE_FLOAT: #endif *(UINT32 *)dst = *(UINT32 *)src; break; -#ifdef _MSC_VER +#ifdef _WIN32 // casting a float* to a UINT32* doesn't work on Windows case FFI_TYPE_FLOAT: *(uintptr_t *)dst = 0; @@ -633,7 +636,7 @@ ffi_prep_closure_loc (ffi_closure * closure, #endif /* Initialize the dynamic trampoline. */ -#ifndef _M_ARM +#ifndef _WIN32 memcpy(closure->tramp, ffi_arm_trampoline, 8); #else // cast away function type so MSVC doesn't set the lower bit of the function pointer @@ -643,13 +646,13 @@ ffi_prep_closure_loc (ffi_closure * closure, #if defined (__QNX__) msync(closure->tramp, 8, 0x1000000); /* clear data map */ msync(codeloc, 8, 0x1000000); /* clear insn map */ -#elif defined(_MSC_VER) +#elif defined(_WIN32) FlushInstructionCache(GetCurrentProcess(), closure->tramp, FFI_TRAMPOLINE_SIZE); #else __clear_cache(closure->tramp, closure->tramp + 8); /* clear data map */ __clear_cache(codeloc, codeloc + 8); /* clear insn map */ #endif -#ifdef _M_ARM +#ifdef _WIN32 *(void(**)(void))(closure->tramp + FFI_TRAMPOLINE_CLOSURE_FUNCTION) = closure_func; #else *(void (**)(void))(closure->tramp + 8) = closure_func; diff --git a/src/arm/ffitarget.h b/src/arm/ffitarget.h index cb57b84..12d5d20 100644 --- a/src/arm/ffitarget.h +++ b/src/arm/ffitarget.h @@ -43,7 +43,7 @@ typedef enum ffi_abi { FFI_SYSV, FFI_VFP, FFI_LAST_ABI, -#if defined(__ARM_PCS_VFP) || defined(_M_ARM) +#if defined(__ARM_PCS_VFP) || defined(_WIN32) FFI_DEFAULT_ABI = FFI_VFP, #else FFI_DEFAULT_ABI = FFI_SYSV, @@ -57,7 +57,7 @@ typedef enum ffi_abi { signed char vfp_args[16] \ #define FFI_TARGET_SPECIFIC_VARIADIC -#ifndef _M_ARM +#ifndef _WIN32 #define FFI_TARGET_HAS_COMPLEX_TYPE #endif @@ -77,7 +77,7 @@ typedef enum ffi_abi { #endif #else -#ifdef _MSC_VER +#ifdef _WIN32 #define FFI_TRAMPOLINE_SIZE 16 #define FFI_TRAMPOLINE_CLOSURE_FUNCTION 12 #else diff --git a/src/arm/sysv.S b/src/arm/sysv.S index e816e32..fb36213 100644 --- a/src/arm/sysv.S +++ b/src/arm/sysv.S @@ -92,9 +92,25 @@ #define ARM_FUNC_END(name) \ SIZE(name) + .text + .syntax unified +#if defined(_WIN32) + /* Windows on ARM is thumb-only */ + .thumb +#else + /* Keep the assembly in ARM mode in other cases, for simplicity + * (to avoid interworking issues). */ +#undef __thumb__ + .arm +#endif + /* Aid in defining a jump table with 8 bytes between entries. */ +#ifdef __thumb__ +/* In thumb mode, instructions can be shorter than expected in arm mode, so + * we need to align the start of each case. */ +# define E(index) .align 3 +#elif defined(__clang__) /* ??? The clang assembler doesn't handle .if with symbolic expressions. */ -#ifdef __clang__ # define E(index) #else # define E(index) \ @@ -103,9 +119,6 @@ .endif #endif - .text - .syntax unified - .arm #ifndef __clang__ /* We require interworking on LDM, which implies ARMv5T, @@ -128,6 +141,7 @@ ARM_FUNC_START(ffi_call_VFP) cfi_startproc cmp r3, #3 @ load only d0 if possible + ite le #ifdef __clang__ vldrle d0, [r0] vldmgt r0, {d0-d7} @@ -167,9 +181,16 @@ ARM_FUNC_START(ffi_call_SYSV) cfi_def_cfa_register(sp) @ Store values stored in registers. +#ifndef __thumb__ .align 3 add pc, pc, r3, lsl #3 nop +#else + adr ip, 0f + add ip, ip, r3, lsl #3 + mov pc, ip + .align 3 +#endif 0: E(ARM_TYPE_VFP_S) #ifdef __clang__ @@ -228,6 +249,9 @@ ARM_FUNC_END(ffi_go_closure_SYSV) ARM_FUNC_START(ffi_closure_SYSV) UNWIND(.fnstart) cfi_startproc +#ifdef _WIN32 + ldmfd sp!, {r0, ip} @ restore fp (r0 is used for stack alignment) +#endif stmdb sp!, {r0-r3} @ save argument regs cfi_adjust_cfa_offset(16) @@ -256,7 +280,12 @@ ARM_FUNC_START(ffi_closure_SYSV) @ Load values returned in registers. add r2, sp, #8+64 @ load result adr r3, CNAME(ffi_closure_ret) +#ifndef __thumb__ add pc, r3, r0, lsl #3 +#else + add r3, r3, r0, lsl #3 + mov pc, r3 +#endif cfi_endproc UNWIND(.fnend) ARM_FUNC_END(ffi_closure_SYSV) @@ -275,6 +304,9 @@ ARM_FUNC_END(ffi_go_closure_VFP) ARM_FUNC_START(ffi_closure_VFP) UNWIND(.fnstart) cfi_startproc +#ifdef _WIN32 + ldmfd sp!, {r0, ip} @ restore fp (r0 is used for stack alignment) +#endif stmdb sp!, {r0-r3} @ save argument regs cfi_adjust_cfa_offset(16) @@ -306,7 +338,12 @@ ARM_FUNC_START(ffi_closure_VFP) @ Load values returned in registers. add r2, sp, #8+64 @ load result adr r3, CNAME(ffi_closure_ret) +#ifndef __thumb__ add pc, r3, r0, lsl #3 +#else + add r3, r3, r0, lsl #3 + mov pc, r3 +#endif cfi_endproc UNWIND(.fnend) ARM_FUNC_END(ffi_closure_VFP) @@ -326,32 +363,40 @@ E(ARM_TYPE_VFP_S) #else ldc p10, cr0, [r2] @ vldr s0, [r2] #endif - ldm sp, {sp,pc} + b call_epilogue E(ARM_TYPE_VFP_D) #ifdef __clang__ vldr d0, [r2] #else ldc p11, cr0, [r2] @ vldr d0, [r2] #endif - ldm sp, {sp,pc} + b call_epilogue E(ARM_TYPE_VFP_N) #ifdef __clang__ vldm r2, {d0-d3} #else ldc p11, cr0, [r2], {8} @ vldm r2, {d0-d3} #endif - ldm sp, {sp,pc} + b call_epilogue E(ARM_TYPE_INT64) ldr r1, [r2, #4] nop E(ARM_TYPE_INT) ldr r0, [r2] - ldm sp, {sp,pc} + b call_epilogue E(ARM_TYPE_VOID) - ldm sp, {sp,pc} + b call_epilogue nop E(ARM_TYPE_STRUCT) + b call_epilogue +call_epilogue: +#ifndef __thumb__ ldm sp, {sp,pc} +#else + ldm sp, {ip,lr} + mov sp, ip + bx lr +#endif cfi_endproc ARM_FUNC_END(ffi_closure_ret) @@ -419,6 +464,15 @@ ARM_FUNC_START(ffi_closure_trampoline_table_page) ARM_FUNC_END(ffi_closure_trampoline_table_page) #endif +#elif defined(_WIN32) + +ARM_FUNC_START(ffi_arm_trampoline) +0: adr ip, 0b + stmdb sp!, {r0, ip} + ldr pc, 1f +1: .long 0 +ARM_FUNC_END(ffi_arm_trampoline) + #else ARM_FUNC_START(ffi_arm_trampoline) |