summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2022-05-03 13:13:08 -0400
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-05-05 20:24:37 -0400
commitbe7102e5a75a652b7c073360cc1c3766e8f984bf (patch)
treea78bb7017d34b8ac55337d319d29447667bfc7f6
parentaacb15a3d68216cac0729e15d708be2f13b0eae8 (diff)
downloadhaskell-be7102e5a75a652b7c073360cc1c3766e8f984bf.tar.gz
rts: Ensure that XMM registers are preserved on Win64
Previously we only preserved the bottom 64-bits of the callee-saved 128-bit XMM registers, in violation of the Win64 calling convention. Fix this. Fixes #21465.
-rw-r--r--compiler/GHC/CmmToAsm/X86/Regs.hs6
-rw-r--r--rts/StgCRun.c63
-rw-r--r--rts/include/rts/Constants.h2
3 files changed, 41 insertions, 30 deletions
diff --git a/compiler/GHC/CmmToAsm/X86/Regs.hs b/compiler/GHC/CmmToAsm/X86/Regs.hs
index cc880f6e98..38d4fdc422 100644
--- a/compiler/GHC/CmmToAsm/X86/Regs.hs
+++ b/compiler/GHC/CmmToAsm/X86/Regs.hs
@@ -382,9 +382,9 @@ callClobberedRegs platform
| target32Bit platform = [eax,ecx,edx] ++ map regSingle (floatregnos platform)
| platformOS platform == OSMinGW32
= [rax,rcx,rdx,r8,r9,r10,r11]
- -- Only xmm0-5 are caller-saves registers on 64bit windows.
- -- ( https://docs.microsoft.com/en-us/cpp/build/register-usage )
- -- For details check the Win64 ABI.
+ -- Only xmm0-5 are caller-saves registers on 64-bit windows.
+ -- For details check the Win64 ABI:
+ -- https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions
++ map xmm [0 .. 5]
| otherwise
-- all xmm regs are caller-saves
diff --git a/rts/StgCRun.c b/rts/StgCRun.c
index 7f5b6d169b..4fefc326e5 100644
--- a/rts/StgCRun.c
+++ b/rts/StgCRun.c
@@ -115,8 +115,9 @@ StgFunPtr StgReturn(void)
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* On X86 (both 32bit and 64bit) we keep the stack aligned on function calls at
* a 16-byte boundary. This is done because on a number of architectures the
- * ABI requires this (x64, Mac OSX 32bit/64bit) as well as interfacing with
- * other libraries through the FFI.
+ * ABI requires this (e.g. the System V AMD64 ABI, Mac OS X 32-bit/64-bit ABIs,
+ * and the Win64 ABI) as well as interfacing with * other libraries through the
+ * FFI.
*
* As part of this arrangement we must maintain the stack at a 16-byte boundary
* - word_size-bytes (so 16n - 4 for i386 and 16n - 8 for x64) on entry to a
@@ -405,19 +406,27 @@ StgRunIsImplementedInAssembler(void)
* Additional callee saved registers on Win64. This must match
* callClobberedRegisters in compiler/GHC/CmmToAsm/X86/Regs.hs as
* both represent the Win64 calling convention.
+ *
+ * Note that we must save the entire 128-bit width of the XMM
+ * registers, as noted in #21465. Moreover, note that, due to the
+ * presence of the return address on the stack, %rsp+8 is
+ * 16-byte aligned. Since MOVAPS requires memory operands to be aligned
+ * to 16-bytes, we must add a word of padding here.
*/
- "movq %%rdi,48(%%rax)\n\t"
- "movq %%rsi,56(%%rax)\n\t"
- "movq %%xmm6, 64(%%rax)\n\t"
- "movq %%xmm7, 72(%%rax)\n\t"
- "movq %%xmm8, 80(%%rax)\n\t"
- "movq %%xmm9, 88(%%rax)\n\t"
- "movq %%xmm10, 96(%%rax)\n\t"
- "movq %%xmm11,104(%%rax)\n\t"
- "movq %%xmm12,112(%%rax)\n\t"
- "movq %%xmm13,120(%%rax)\n\t"
- "movq %%xmm14,128(%%rax)\n\t"
- "movq %%xmm15,136(%%rax)\n\t"
+ "movq %%rdi, 48(%%rax)\n\t"
+ "movq %%rsi, 56(%%rax)\n\t"
+ /* 8 bytes of padding for alignment */
+ "movaps %%xmm6, 72(%%rax)\n\t"
+ "movaps %%xmm7, 88(%%rax)\n\t"
+ "movaps %%xmm8, 104(%%rax)\n\t"
+ "movaps %%xmm9, 120(%%rax)\n\t"
+ "movaps %%xmm10,136(%%rax)\n\t"
+ "movaps %%xmm11,152(%%rax)\n\t"
+ "movaps %%xmm12,168(%%rax)\n\t"
+ "movaps %%xmm13,184(%%rax)\n\t"
+ "movaps %%xmm14,200(%%rax)\n\t"
+ "movaps %%xmm15,216(%%rax)\n\t"
+ /* 8 bytes of padding for alignment */
#endif
#if defined(ENABLE_UNWINDING)
@@ -506,18 +515,20 @@ StgRunIsImplementedInAssembler(void)
"movq 32(%%rsp),%%r14\n\t"
"movq 40(%%rsp),%%r15\n\t"
#if defined(mingw32_HOST_OS)
- "movq 48(%%rsp),%%rdi\n\t"
- "movq 56(%%rsp),%%rsi\n\t"
- "movq 64(%%rsp),%%xmm6\n\t"
- "movq 72(%%rsp),%%xmm7\n\t"
- "movq 80(%%rsp),%%xmm8\n\t"
- "movq 88(%%rsp),%%xmm9\n\t"
- "movq 96(%%rsp),%%xmm10\n\t"
- "movq 104(%%rsp),%%xmm11\n\t"
- "movq 112(%%rsp),%%xmm12\n\t"
- "movq 120(%%rsp),%%xmm13\n\t"
- "movq 128(%%rsp),%%xmm14\n\t"
- "movq 136(%%rsp),%%xmm15\n\t"
+ "movq 48(%%rsp),%%rdi\n\t"
+ "movq 56(%%rsp),%%rsi\n\t"
+ /* 8 bytes of padding for alignment */
+ "movaps 72(%%rsp),%%xmm6\n\t"
+ "movaps 88(%%rsp),%%xmm7\n\t"
+ "movaps 104(%%rsp),%%xmm8\n\t"
+ "movaps 120(%%rsp),%%xmm9\n\t"
+ "movaps 136(%%rsp),%%xmm10\n\t"
+ "movaps 152(%%rsp),%%xmm11\n\t"
+ "movaps 168(%%rsp),%%xmm12\n\t"
+ "movaps 184(%%rsp),%%xmm13\n\t"
+ "movaps 200(%%rsp),%%xmm14\n\t"
+ "movaps 216(%%rsp),%%xmm15\n\t"
+ /* 8 bytes of padding for alignment */
#endif
"addq %1, %%rsp\n\t"
"retq"
diff --git a/rts/include/rts/Constants.h b/rts/include/rts/Constants.h
index b601999f88..3bf5a7a2d5 100644
--- a/rts/include/rts/Constants.h
+++ b/rts/include/rts/Constants.h
@@ -123,7 +123,7 @@
-------------------------------------------------------------------------- */
#if defined(x86_64_HOST_ARCH)
# if defined(mingw32_HOST_OS)
-# define STG_RUN_STACK_FRAME_SIZE 144
+# define STG_RUN_STACK_FRAME_SIZE 240
# else
# define STG_RUN_STACK_FRAME_SIZE 48
# endif