diff options
author | Ben Gamari <ben@smart-cactus.org> | 2022-05-03 13:13:08 -0400 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-05-05 20:24:37 -0400 |
commit | be7102e5a75a652b7c073360cc1c3766e8f984bf (patch) | |
tree | a78bb7017d34b8ac55337d319d29447667bfc7f6 | |
parent | aacb15a3d68216cac0729e15d708be2f13b0eae8 (diff) | |
download | haskell-be7102e5a75a652b7c073360cc1c3766e8f984bf.tar.gz |
rts: Ensure that XMM registers are preserved on Win64
Previously we only preserved the bottom 64-bits of the callee-saved
128-bit XMM registers, in violation of the Win64 calling convention.
Fix this.
Fixes #21465.
-rw-r--r-- | compiler/GHC/CmmToAsm/X86/Regs.hs | 6 | ||||
-rw-r--r-- | rts/StgCRun.c | 63 | ||||
-rw-r--r-- | rts/include/rts/Constants.h | 2 |
3 files changed, 41 insertions, 30 deletions
diff --git a/compiler/GHC/CmmToAsm/X86/Regs.hs b/compiler/GHC/CmmToAsm/X86/Regs.hs index cc880f6e98..38d4fdc422 100644 --- a/compiler/GHC/CmmToAsm/X86/Regs.hs +++ b/compiler/GHC/CmmToAsm/X86/Regs.hs @@ -382,9 +382,9 @@ callClobberedRegs platform | target32Bit platform = [eax,ecx,edx] ++ map regSingle (floatregnos platform) | platformOS platform == OSMinGW32 = [rax,rcx,rdx,r8,r9,r10,r11] - -- Only xmm0-5 are caller-saves registers on 64bit windows. - -- ( https://docs.microsoft.com/en-us/cpp/build/register-usage ) - -- For details check the Win64 ABI. + -- Only xmm0-5 are caller-saves registers on 64-bit windows. + -- For details check the Win64 ABI: + -- https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions ++ map xmm [0 .. 5] | otherwise -- all xmm regs are caller-saves diff --git a/rts/StgCRun.c b/rts/StgCRun.c index 7f5b6d169b..4fefc326e5 100644 --- a/rts/StgCRun.c +++ b/rts/StgCRun.c @@ -115,8 +115,9 @@ StgFunPtr StgReturn(void) * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * On X86 (both 32bit and 64bit) we keep the stack aligned on function calls at * a 16-byte boundary. This is done because on a number of architectures the - * ABI requires this (x64, Mac OSX 32bit/64bit) as well as interfacing with - * other libraries through the FFI. + * ABI requires this (e.g. the System V AMD64 ABI, Mac OS X 32-bit/64-bit ABIs, + * and the Win64 ABI) as well as interfacing with * other libraries through the + * FFI. * * As part of this arrangement we must maintain the stack at a 16-byte boundary * - word_size-bytes (so 16n - 4 for i386 and 16n - 8 for x64) on entry to a @@ -405,19 +406,27 @@ StgRunIsImplementedInAssembler(void) * Additional callee saved registers on Win64. This must match * callClobberedRegisters in compiler/GHC/CmmToAsm/X86/Regs.hs as * both represent the Win64 calling convention. + * + * Note that we must save the entire 128-bit width of the XMM + * registers, as noted in #21465. Moreover, note that, due to the + * presence of the return address on the stack, %rsp+8 is + * 16-byte aligned. Since MOVAPS requires memory operands to be aligned + * to 16-bytes, we must add a word of padding here. */ - "movq %%rdi,48(%%rax)\n\t" - "movq %%rsi,56(%%rax)\n\t" - "movq %%xmm6, 64(%%rax)\n\t" - "movq %%xmm7, 72(%%rax)\n\t" - "movq %%xmm8, 80(%%rax)\n\t" - "movq %%xmm9, 88(%%rax)\n\t" - "movq %%xmm10, 96(%%rax)\n\t" - "movq %%xmm11,104(%%rax)\n\t" - "movq %%xmm12,112(%%rax)\n\t" - "movq %%xmm13,120(%%rax)\n\t" - "movq %%xmm14,128(%%rax)\n\t" - "movq %%xmm15,136(%%rax)\n\t" + "movq %%rdi, 48(%%rax)\n\t" + "movq %%rsi, 56(%%rax)\n\t" + /* 8 bytes of padding for alignment */ + "movaps %%xmm6, 72(%%rax)\n\t" + "movaps %%xmm7, 88(%%rax)\n\t" + "movaps %%xmm8, 104(%%rax)\n\t" + "movaps %%xmm9, 120(%%rax)\n\t" + "movaps %%xmm10,136(%%rax)\n\t" + "movaps %%xmm11,152(%%rax)\n\t" + "movaps %%xmm12,168(%%rax)\n\t" + "movaps %%xmm13,184(%%rax)\n\t" + "movaps %%xmm14,200(%%rax)\n\t" + "movaps %%xmm15,216(%%rax)\n\t" + /* 8 bytes of padding for alignment */ #endif #if defined(ENABLE_UNWINDING) @@ -506,18 +515,20 @@ StgRunIsImplementedInAssembler(void) "movq 32(%%rsp),%%r14\n\t" "movq 40(%%rsp),%%r15\n\t" #if defined(mingw32_HOST_OS) - "movq 48(%%rsp),%%rdi\n\t" - "movq 56(%%rsp),%%rsi\n\t" - "movq 64(%%rsp),%%xmm6\n\t" - "movq 72(%%rsp),%%xmm7\n\t" - "movq 80(%%rsp),%%xmm8\n\t" - "movq 88(%%rsp),%%xmm9\n\t" - "movq 96(%%rsp),%%xmm10\n\t" - "movq 104(%%rsp),%%xmm11\n\t" - "movq 112(%%rsp),%%xmm12\n\t" - "movq 120(%%rsp),%%xmm13\n\t" - "movq 128(%%rsp),%%xmm14\n\t" - "movq 136(%%rsp),%%xmm15\n\t" + "movq 48(%%rsp),%%rdi\n\t" + "movq 56(%%rsp),%%rsi\n\t" + /* 8 bytes of padding for alignment */ + "movaps 72(%%rsp),%%xmm6\n\t" + "movaps 88(%%rsp),%%xmm7\n\t" + "movaps 104(%%rsp),%%xmm8\n\t" + "movaps 120(%%rsp),%%xmm9\n\t" + "movaps 136(%%rsp),%%xmm10\n\t" + "movaps 152(%%rsp),%%xmm11\n\t" + "movaps 168(%%rsp),%%xmm12\n\t" + "movaps 184(%%rsp),%%xmm13\n\t" + "movaps 200(%%rsp),%%xmm14\n\t" + "movaps 216(%%rsp),%%xmm15\n\t" + /* 8 bytes of padding for alignment */ #endif "addq %1, %%rsp\n\t" "retq" diff --git a/rts/include/rts/Constants.h b/rts/include/rts/Constants.h index b601999f88..3bf5a7a2d5 100644 --- a/rts/include/rts/Constants.h +++ b/rts/include/rts/Constants.h @@ -123,7 +123,7 @@ -------------------------------------------------------------------------- */ #if defined(x86_64_HOST_ARCH) # if defined(mingw32_HOST_OS) -# define STG_RUN_STACK_FRAME_SIZE 144 +# define STG_RUN_STACK_FRAME_SIZE 240 # else # define STG_RUN_STACK_FRAME_SIZE 48 # endif |