summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2021-04-07 05:42:10 +0300
committerGitHub <noreply@github.com>2021-04-06 22:42:10 -0400
commitdd5bd03075149d7cf8441875c1a344e8beb57dde (patch)
tree3a119b5e06927287cd2cf2bd38879617461aa815
parent95ef857d5c6ed15c6c6ba5f8a5b26c0a38b417ab (diff)
downloadlibffi-dd5bd03075149d7cf8441875c1a344e8beb57dde.tar.gz
Fix building for arm windows with mingw toolchains (#631)
* arm: Check _WIN32 instead of _M_ARM or _MSC_VER for detecting windows This matches what was done for ARM64 in c06468fa6674d3783a0edb1d0fae9afc8bc28513. * arm: Only use armasm source when building with MSVC When building for windows/arm with clang, the normal gas style .S source works fine (if fixed up to support thumb and other windows specifics). This matches what was done for ARM64 in c06468fa6674d3783a0edb1d0fae9afc8bc28513. * arm: Fix sysv.S to work in thumb mode Align cases in jump tables (adding nop padding to make sure each case starts where expected). Rewrite instructions that add directly to the pc register. For ffi_closure_ret, factor out a call_epilogue subroutine that restores both sp and pc from the stack; the thumb version of ldm can't load into the sp register. To avoid excessive ifdeffing, keep using call_epilogue in arm mode, but keep the shorter "ldm sp, {sp, pc}" epilogue in that case. * arm: Add win32 version of trampoline to sysv.S This matches the version of it in sysv_msvc_arm32.S. The calling C code expects a specific form of the trampoline on windows; make sure these work the same on windows regardless of the form of assembly used. * arm: Avoid optimizing out clearing the thumb bit of ffi_arm_trampoline We clear the thumb bit of ffi_arm_trampoline with a bitmask before memcpying its instructions into closure->tramp. If the bit isn't cleared, the memcpy of the trampoline function copies the wrong instructions. If the ffi_arm_trampoline symbol is declared as an array of int, the compiler can assume that it is aligned to a 4 byte boundary and the bitmask operation is a no-op, and optimize it out. See https://godbolt.org/z/dE3jE1WTz; both Clang and GCC optimize out the bitmask as it is, while MSVC doesn't. By declaring the trampoline as an array of unsigned char, the bitmask works as intended.
-rw-r--r--configure.host10
-rw-r--r--src/arm/ffi.c19
-rw-r--r--src/arm/ffitarget.h6
-rw-r--r--src/arm/sysv.S72
4 files changed, 85 insertions, 22 deletions
diff --git a/configure.host b/configure.host
index 257b784..2682671 100644
--- a/configure.host
+++ b/configure.host
@@ -32,7 +32,9 @@ case "${host}" in
arm*-*-cygwin* | arm*-*-mingw* | arm*-*-win* )
TARGET=ARM_WIN32; TARGETDIR=arm
- MSVC=1
+ if test "${ax_cv_c_compiler_vendor}" = "microsoft"; then
+ MSVC=1
+ fi
;;
arm*-*-*)
@@ -264,7 +266,11 @@ esac
# ... but some of the cases above share configury.
case "${TARGET}" in
ARM_WIN32)
- SOURCES="ffi.c sysv_msvc_arm32.S"
+ if test "$MSVC" = 1; then
+ SOURCES="ffi.c sysv_msvc_arm32.S"
+ else
+ SOURCES="ffi.c sysv.S"
+ fi
;;
ARM_WIN64)
if test "$MSVC" = 1; then
diff --git a/src/arm/ffi.c b/src/arm/ffi.c
index b2f60d1..593ab4d 100644
--- a/src/arm/ffi.c
+++ b/src/arm/ffi.c
@@ -37,7 +37,7 @@
#include <tramp.h>
#include "internal.h"
-#if defined(_MSC_VER) && defined(_M_ARM)
+#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
@@ -49,10 +49,13 @@
#endif
#else
-#ifndef _M_ARM
+#ifndef _WIN32
extern unsigned int ffi_arm_trampoline[2] FFI_HIDDEN;
#else
-extern unsigned int ffi_arm_trampoline[3] FFI_HIDDEN;
+// Declare this as an array of char, instead of array of int,
+// otherwise Clang optimizes out the "& 0xFFFFFFFE" for clearing
+// the thumb bit.
+extern unsigned char ffi_arm_trampoline[12] FFI_HIDDEN;
#endif
#endif
@@ -104,13 +107,13 @@ ffi_put_arg (ffi_type *ty, void *src, void *dst)
case FFI_TYPE_SINT32:
case FFI_TYPE_UINT32:
case FFI_TYPE_POINTER:
-#ifndef _MSC_VER
+#ifndef _WIN32
case FFI_TYPE_FLOAT:
#endif
*(UINT32 *)dst = *(UINT32 *)src;
break;
-#ifdef _MSC_VER
+#ifdef _WIN32
// casting a float* to a UINT32* doesn't work on Windows
case FFI_TYPE_FLOAT:
*(uintptr_t *)dst = 0;
@@ -633,7 +636,7 @@ ffi_prep_closure_loc (ffi_closure * closure,
#endif
/* Initialize the dynamic trampoline. */
-#ifndef _M_ARM
+#ifndef _WIN32
memcpy(closure->tramp, ffi_arm_trampoline, 8);
#else
// cast away function type so MSVC doesn't set the lower bit of the function pointer
@@ -643,13 +646,13 @@ ffi_prep_closure_loc (ffi_closure * closure,
#if defined (__QNX__)
msync(closure->tramp, 8, 0x1000000); /* clear data map */
msync(codeloc, 8, 0x1000000); /* clear insn map */
-#elif defined(_MSC_VER)
+#elif defined(_WIN32)
FlushInstructionCache(GetCurrentProcess(), closure->tramp, FFI_TRAMPOLINE_SIZE);
#else
__clear_cache(closure->tramp, closure->tramp + 8); /* clear data map */
__clear_cache(codeloc, codeloc + 8); /* clear insn map */
#endif
-#ifdef _M_ARM
+#ifdef _WIN32
*(void(**)(void))(closure->tramp + FFI_TRAMPOLINE_CLOSURE_FUNCTION) = closure_func;
#else
*(void (**)(void))(closure->tramp + 8) = closure_func;
diff --git a/src/arm/ffitarget.h b/src/arm/ffitarget.h
index cb57b84..12d5d20 100644
--- a/src/arm/ffitarget.h
+++ b/src/arm/ffitarget.h
@@ -43,7 +43,7 @@ typedef enum ffi_abi {
FFI_SYSV,
FFI_VFP,
FFI_LAST_ABI,
-#if defined(__ARM_PCS_VFP) || defined(_M_ARM)
+#if defined(__ARM_PCS_VFP) || defined(_WIN32)
FFI_DEFAULT_ABI = FFI_VFP,
#else
FFI_DEFAULT_ABI = FFI_SYSV,
@@ -57,7 +57,7 @@ typedef enum ffi_abi {
signed char vfp_args[16] \
#define FFI_TARGET_SPECIFIC_VARIADIC
-#ifndef _M_ARM
+#ifndef _WIN32
#define FFI_TARGET_HAS_COMPLEX_TYPE
#endif
@@ -77,7 +77,7 @@ typedef enum ffi_abi {
#endif
#else
-#ifdef _MSC_VER
+#ifdef _WIN32
#define FFI_TRAMPOLINE_SIZE 16
#define FFI_TRAMPOLINE_CLOSURE_FUNCTION 12
#else
diff --git a/src/arm/sysv.S b/src/arm/sysv.S
index e816e32..fb36213 100644
--- a/src/arm/sysv.S
+++ b/src/arm/sysv.S
@@ -92,9 +92,25 @@
#define ARM_FUNC_END(name) \
SIZE(name)
+ .text
+ .syntax unified
+#if defined(_WIN32)
+ /* Windows on ARM is thumb-only */
+ .thumb
+#else
+ /* Keep the assembly in ARM mode in other cases, for simplicity
+ * (to avoid interworking issues). */
+#undef __thumb__
+ .arm
+#endif
+
/* Aid in defining a jump table with 8 bytes between entries. */
+#ifdef __thumb__
+/* In thumb mode, instructions can be shorter than expected in arm mode, so
+ * we need to align the start of each case. */
+# define E(index) .align 3
+#elif defined(__clang__)
/* ??? The clang assembler doesn't handle .if with symbolic expressions. */
-#ifdef __clang__
# define E(index)
#else
# define E(index) \
@@ -103,9 +119,6 @@
.endif
#endif
- .text
- .syntax unified
- .arm
#ifndef __clang__
/* We require interworking on LDM, which implies ARMv5T,
@@ -128,6 +141,7 @@ ARM_FUNC_START(ffi_call_VFP)
cfi_startproc
cmp r3, #3 @ load only d0 if possible
+ ite le
#ifdef __clang__
vldrle d0, [r0]
vldmgt r0, {d0-d7}
@@ -167,9 +181,16 @@ ARM_FUNC_START(ffi_call_SYSV)
cfi_def_cfa_register(sp)
@ Store values stored in registers.
+#ifndef __thumb__
.align 3
add pc, pc, r3, lsl #3
nop
+#else
+ adr ip, 0f
+ add ip, ip, r3, lsl #3
+ mov pc, ip
+ .align 3
+#endif
0:
E(ARM_TYPE_VFP_S)
#ifdef __clang__
@@ -228,6 +249,9 @@ ARM_FUNC_END(ffi_go_closure_SYSV)
ARM_FUNC_START(ffi_closure_SYSV)
UNWIND(.fnstart)
cfi_startproc
+#ifdef _WIN32
+ ldmfd sp!, {r0, ip} @ restore fp (r0 is used for stack alignment)
+#endif
stmdb sp!, {r0-r3} @ save argument regs
cfi_adjust_cfa_offset(16)
@@ -256,7 +280,12 @@ ARM_FUNC_START(ffi_closure_SYSV)
@ Load values returned in registers.
add r2, sp, #8+64 @ load result
adr r3, CNAME(ffi_closure_ret)
+#ifndef __thumb__
add pc, r3, r0, lsl #3
+#else
+ add r3, r3, r0, lsl #3
+ mov pc, r3
+#endif
cfi_endproc
UNWIND(.fnend)
ARM_FUNC_END(ffi_closure_SYSV)
@@ -275,6 +304,9 @@ ARM_FUNC_END(ffi_go_closure_VFP)
ARM_FUNC_START(ffi_closure_VFP)
UNWIND(.fnstart)
cfi_startproc
+#ifdef _WIN32
+ ldmfd sp!, {r0, ip} @ restore fp (r0 is used for stack alignment)
+#endif
stmdb sp!, {r0-r3} @ save argument regs
cfi_adjust_cfa_offset(16)
@@ -306,7 +338,12 @@ ARM_FUNC_START(ffi_closure_VFP)
@ Load values returned in registers.
add r2, sp, #8+64 @ load result
adr r3, CNAME(ffi_closure_ret)
+#ifndef __thumb__
add pc, r3, r0, lsl #3
+#else
+ add r3, r3, r0, lsl #3
+ mov pc, r3
+#endif
cfi_endproc
UNWIND(.fnend)
ARM_FUNC_END(ffi_closure_VFP)
@@ -326,32 +363,40 @@ E(ARM_TYPE_VFP_S)
#else
ldc p10, cr0, [r2] @ vldr s0, [r2]
#endif
- ldm sp, {sp,pc}
+ b call_epilogue
E(ARM_TYPE_VFP_D)
#ifdef __clang__
vldr d0, [r2]
#else
ldc p11, cr0, [r2] @ vldr d0, [r2]
#endif
- ldm sp, {sp,pc}
+ b call_epilogue
E(ARM_TYPE_VFP_N)
#ifdef __clang__
vldm r2, {d0-d3}
#else
ldc p11, cr0, [r2], {8} @ vldm r2, {d0-d3}
#endif
- ldm sp, {sp,pc}
+ b call_epilogue
E(ARM_TYPE_INT64)
ldr r1, [r2, #4]
nop
E(ARM_TYPE_INT)
ldr r0, [r2]
- ldm sp, {sp,pc}
+ b call_epilogue
E(ARM_TYPE_VOID)
- ldm sp, {sp,pc}
+ b call_epilogue
nop
E(ARM_TYPE_STRUCT)
+ b call_epilogue
+call_epilogue:
+#ifndef __thumb__
ldm sp, {sp,pc}
+#else
+ ldm sp, {ip,lr}
+ mov sp, ip
+ bx lr
+#endif
cfi_endproc
ARM_FUNC_END(ffi_closure_ret)
@@ -419,6 +464,15 @@ ARM_FUNC_START(ffi_closure_trampoline_table_page)
ARM_FUNC_END(ffi_closure_trampoline_table_page)
#endif
+#elif defined(_WIN32)
+
+ARM_FUNC_START(ffi_arm_trampoline)
+0: adr ip, 0b
+ stmdb sp!, {r0, ip}
+ ldr pc, 1f
+1: .long 0
+ARM_FUNC_END(ffi_arm_trampoline)
+
#else
ARM_FUNC_START(ffi_arm_trampoline)