diff options
-rw-r--r-- | compiler/nativeGen/X86/Regs.hs | 6 | ||||
-rw-r--r-- | includes/rts/Constants.h | 8 | ||||
-rw-r--r-- | rts/StgCRun.c | 33 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T14619.hs | 46 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/T14619.stdout | 1 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_run/all.T | 1 |
6 files changed, 86 insertions, 9 deletions
diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs index 169d4020a0..d6983b7009 100644 --- a/compiler/nativeGen/X86/Regs.hs +++ b/compiler/nativeGen/X86/Regs.hs @@ -238,7 +238,6 @@ xmmregnos platform = [firstxmm .. lastxmm platform] floatregnos :: Platform -> [RegNo] floatregnos platform = fakeregnos ++ xmmregnos platform - -- argRegs is the set of regs which are read for an n-argument call to C. -- For archs which pass all args on the stack (x86), is empty. -- Sparc passes up to the first 6 args in regs. @@ -408,7 +407,10 @@ callClobberedRegs platform | target32Bit platform = [eax,ecx,edx] ++ map regSingle (floatregnos platform) | platformOS platform == OSMinGW32 = [rax,rcx,rdx,r8,r9,r10,r11] - ++ map regSingle (floatregnos platform) + -- Only xmm0-5 are caller-saves registers on 64bit windows. + -- ( https://docs.microsoft.com/en-us/cpp/build/register-usage ) + -- For details check the Win64 ABI. + ++ map regSingle fakeregnos ++ map xmm [0 .. 5] | otherwise -- all xmm regs are caller-saves -- caller-saves registers diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h index 27097bf45b..5774bd7a0e 100644 --- a/includes/rts/Constants.h +++ b/includes/rts/Constants.h @@ -113,11 +113,15 @@ /* ----------------------------------------------------------------------------- How large is the stack frame saved by StgRun? world. Used in StgCRun.c. + + The size has to be enough to save the registers (see StgCRun) + plus padding if the result is not 16 byte aligned. + See the Note [Stack Alignment on X86] in StgCRun.c for details. + -------------------------------------------------------------------------- */ #if defined(x86_64_HOST_ARCH) # if defined(mingw32_HOST_OS) -/* 8 larger than necessary to make the alignment right*/ -# define STG_RUN_STACK_FRAME_SIZE 80 +# define STG_RUN_STACK_FRAME_SIZE 144 # else # define STG_RUN_STACK_FRAME_SIZE 48 # endif diff --git a/rts/StgCRun.c b/rts/StgCRun.c index 5460598ced..ab66c649fc 100644 --- a/rts/StgCRun.c +++ b/rts/StgCRun.c @@ -236,7 +236,7 @@ StgRunIsImplementedInAssembler(void) ); } -#endif +#endif // defined(i386_HOST_ARCH) /* ---------------------------------------------------------------------------- x86-64 is almost the same as plain x86. @@ -279,9 +279,23 @@ StgRunIsImplementedInAssembler(void) "movq %%r14,32(%%rax)\n\t" "movq %%r15,40(%%rax)\n\t" #if defined(mingw32_HOST_OS) + /* + * Additional callee saved registers on Win64. This must match + * callClobberedRegisters in compiler/nativeGen/X86/Regs.hs as + * both represent the Win64 calling convention. + */ "movq %%rdi,48(%%rax)\n\t" "movq %%rsi,56(%%rax)\n\t" - "movq %%xmm6,64(%%rax)\n\t" + "movq %%xmm6, 64(%%rax)\n\t" + "movq %%xmm7, 72(%%rax)\n\t" + "movq %%xmm8, 80(%%rax)\n\t" + "movq %%xmm9, 88(%%rax)\n\t" + "movq %%xmm10, 96(%%rax)\n\t" + "movq %%xmm11,104(%%rax)\n\t" + "movq %%xmm12,112(%%rax)\n\t" + "movq %%xmm13,120(%%rax)\n\t" + "movq %%xmm14,128(%%rax)\n\t" + "movq %%xmm15,136(%%rax)\n\t" #endif /* * Set BaseReg @@ -317,9 +331,18 @@ StgRunIsImplementedInAssembler(void) "movq 32(%%rsp),%%r14\n\t" "movq 40(%%rsp),%%r15\n\t" #if defined(mingw32_HOST_OS) - "movq 48(%%rsp),%%rdi\n\t" - "movq 56(%%rsp),%%rsi\n\t" - "movq 64(%%rsp),%%xmm6\n\t" + "movq 48(%%rsp),%%rdi\n\t" + "movq 56(%%rsp),%%rsi\n\t" + "movq 64(%%rsp),%%xmm6\n\t" + "movq 72(%%rax),%%xmm7\n\t" + "movq 80(%%rax),%%xmm8\n\t" + "movq 88(%%rax),%%xmm9\n\t" + "movq 96(%%rax),%%xmm10\n\t" + "movq 104(%%rax),%%xmm11\n\t" + "movq 112(%%rax),%%xmm12\n\t" + "movq 120(%%rax),%%xmm13\n\t" + "movq 128(%%rax),%%xmm14\n\t" + "movq 136(%%rax),%%xmm15\n\t" #endif "addq %1, %%rsp\n\t" "retq" diff --git a/testsuite/tests/codeGen/should_run/T14619.hs b/testsuite/tests/codeGen/should_run/T14619.hs new file mode 100644 index 0000000000..7af16dff67 --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T14619.hs @@ -0,0 +1,46 @@ +{-# OPTIONS_GHC -O1 #-} + +{- + On windows some xmm registers are callee saved. This means + they can't be used as scratch registers before a call to C. + + In #14619 this wasn't respected which lead to a wrong value + ending up in xmm6 and being returned in the final result. + + This code compiles to a non trivial fp computation followed + by a call to sqrt at O1+. If xmm6 isn't properly handled it + will be used as a scratch register failing the test. + + The original code used regular sqrt which on 8.2 generated + a C call in the backend. To imitate this behaviour on 8.4+ + we force a call to a C function instead. +-} + +module Main (main) where + + + +import Prelude hiding((*>), (<*)) +import Foreign.C +import Unsafe.Coerce + +foreign import ccall unsafe "sqrt" call_sqrt :: CDouble -> CDouble + +type V3 = (Double, Double, Double) + +absf :: V3 -> V3 -> Double +absf (x, y, z) (x', y', z') = x*x' +y*y'+z*z' + + +{-# NOINLINE sphereIntersection #-} +sphereIntersection :: V3 -> V3 -> (V3) +sphereIntersection orig dir@(_, _, dirz) + | b < 0 = undefined + | t1 > 0 = dir + | t1 < 0 = orig + | otherwise = undefined + where b = orig `absf` dir + sqrtDisc = realToFrac . call_sqrt $ CDouble b + t1 = b - sqrtDisc + +main = print $ sphereIntersection (11, 22, 33) (44, 55, 66) diff --git a/testsuite/tests/codeGen/should_run/T14619.stdout b/testsuite/tests/codeGen/should_run/T14619.stdout new file mode 100644 index 0000000000..a11c04de2e --- /dev/null +++ b/testsuite/tests/codeGen/should_run/T14619.stdout @@ -0,0 +1 @@ +(44.0,55.0,66.0) diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T index 42d8a2f767..145365e802 100644 --- a/testsuite/tests/codeGen/should_run/all.T +++ b/testsuite/tests/codeGen/should_run/all.T @@ -165,3 +165,4 @@ test('T13825-unit', extra_run_opts('"' + config.libdir + '"'), compile_and_run, ['-package ghc']) +test('T14619', normal, compile_and_run, ['']) |