diff options
author | Ben Gamari <ben@smart-cactus.org> | 2022-02-06 21:45:41 -0500 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-02-13 03:26:14 -0500 |
commit | 191dfd2d88f6d2fd7a879f14da26fe88e616c8ea (patch) | |
tree | 6c3996135341d7e74d43788a20fe865003d4e480 | |
parent | afdfaff0fd4ed8162997fe904924d2ce528be285 (diff) | |
download | haskell-191dfd2d88f6d2fd7a879f14da26fe88e616c8ea.tar.gz |
rts/adjustor: Split Windows path out of NativeAmd64
-rw-r--r-- | rts/adjustor/NativeAmd64.c | 165 | ||||
-rw-r--r-- | rts/adjustor/NativeAmd64Mingw.c | 220 | ||||
-rw-r--r-- | rts/ghc.mk | 4 | ||||
-rw-r--r-- | rts/rts.cabal.in | 5 |
4 files changed, 229 insertions, 165 deletions
diff --git a/rts/adjustor/NativeAmd64.c b/rts/adjustor/NativeAmd64.c index 2d59bd0bc0..045d984b2d 100644 --- a/rts/adjustor/NativeAmd64.c +++ b/rts/adjustor/NativeAmd64.c @@ -37,12 +37,6 @@ static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void) ".globl " UNDERSCORE "obscure_ccall_ret_code\n" UNDERSCORE "obscure_ccall_ret_code:\n\t" "addq $0x8, %rsp\n\t" -#if defined(mingw32_HOST_OS) - /* On Win64, we had to put the original return address after the - arg 1-4 spill slots, ro now we have to move it back */ - "movq 0x20(%rsp), %rcx\n" - "movq %rcx, (%rsp)\n" -#endif /* defined(mingw32_HOST_OS) */ "ret" ); } @@ -58,150 +52,6 @@ createAdjustor(int cconv, StgStablePtr hptr, switch (cconv) { case 1: /* _ccall */ -#if defined(mingw32_HOST_OS) - /* - stack at call: - argn - ... - arg5 - return address - %rcx,%rdx,%r8,%r9 = arg1..arg4 - - if there are <4 integer args, then we can just push the - StablePtr into %rcx and shuffle the other args up. - - If there are >=4 integer args, then we have to flush one arg - to the stack, and arrange to adjust the stack ptr on return. - The stack will be rearranged to this: - - argn - ... - arg5 - return address *** <-- dummy arg in stub fn. - arg4 - obscure_ccall_ret_code - - This unfortunately means that the type of the stub function - must have a dummy argument for the original return address - pointer inserted just after the 4th integer argument. - - Code for the simple case: - - 0: 4d 89 c1 mov %r8,%r9 - 3: 49 89 d0 mov %rdx,%r8 - 6: 48 89 ca mov %rcx,%rdx - 9: f2 0f 10 da movsd %xmm2,%xmm3 - d: f2 0f 10 d1 movsd %xmm1,%xmm2 - 11: f2 0f 10 c8 movsd %xmm0,%xmm1 - 15: 48 8b 0d 0c 00 00 00 mov 0xc(%rip),%rcx # 28 <.text+0x28> - 1c: ff 25 0e 00 00 00 jmpq *0xe(%rip) # 30 <.text+0x30> - 22: 90 nop - [...] - - - And the version for >=4 integer arguments: - -[we want to push the 4th argument (either %r9 or %xmm3, depending on - whether it is a floating arg or not) and the return address onto the - stack. However, slots 1-4 are reserved for code we call to spill its - args 1-4 into, so we can't just push them onto the bottom of the stack. - So first put the 4th argument onto the stack, above what will be the - spill slots.] - 0: 48 83 ec 08 sub $0x8,%rsp -[if non-floating arg, then do this:] - 4: 90 nop - 5: 4c 89 4c 24 20 mov %r9,0x20(%rsp) -[else if floating arg then do this:] - 4: f2 0f 11 5c 24 20 movsd %xmm3,0x20(%rsp) -[end if] -[Now push the new return address onto the stack] - a: ff 35 30 00 00 00 pushq 0x30(%rip) # 40 <.text+0x40> -[But the old return address has been moved up into a spill slot, so - we need to move it above them] - 10: 4c 8b 4c 24 10 mov 0x10(%rsp),%r9 - 15: 4c 89 4c 24 30 mov %r9,0x30(%rsp) -[Now we do the normal register shuffle-up etc] - 1a: 4d 89 c1 mov %r8,%r9 - 1d: 49 89 d0 mov %rdx,%r8 - 20: 48 89 ca mov %rcx,%rdx - 23: f2 0f 10 da movsd %xmm2,%xmm3 - 27: f2 0f 10 d1 movsd %xmm1,%xmm2 - 2b: f2 0f 10 c8 movsd %xmm0,%xmm1 - 2f: 48 8b 0d 12 00 00 00 mov 0x12(%rip),%rcx # 48 <.text+0x48> - 36: ff 25 14 00 00 00 jmpq *0x14(%rip) # 50 <.text+0x50> - 3c: 90 nop - 3d: 90 nop - 3e: 90 nop - 3f: 90 nop - [...] - - */ - { - // determine whether we have 4 or more integer arguments, - // and therefore need to flush one to the stack. - if ((typeString[0] == '\0') || - (typeString[1] == '\0') || - (typeString[2] == '\0') || - (typeString[3] == '\0')) { - - ExecPage *page = allocateExecPage(); - if (page == NULL) { - barf("createAdjustor: failed to allocate executable page\n"); - } - StgWord8 *adj_code = (StgWord8*) page; - - *(StgInt32 *)adj_code = 0x49c1894d; - *(StgInt32 *)(adj_code+0x4) = 0x8948d089; - *(StgInt32 *)(adj_code+0x8) = 0x100ff2ca; - *(StgInt32 *)(adj_code+0xc) = 0x100ff2da; - *(StgInt32 *)(adj_code+0x10) = 0x100ff2d1; - *(StgInt32 *)(adj_code+0x14) = 0x0d8b48c8; - *(StgInt32 *)(adj_code+0x18) = 0x0000000c; - - *(StgInt32 *)(adj_code+0x1c) = 0x000e25ff; - *(StgInt32 *)(adj_code+0x20) = 0x00000000; - *(StgInt64 *)(adj_code+0x28) = (StgInt64)hptr; - *(StgInt64 *)(adj_code+0x30) = (StgInt64)wptr; - - freezeExecPage(page); - return page; - } - else - { - bool fourthFloating = (typeString[3] == 'f' || typeString[3] == 'd'); - ExecPage *page = allocateExecPage(); - if (page == NULL) { - barf("createAdjustor: failed to allocate executable page\n"); - } - StgWord8 *adj_code = (StgWord8*) page; - - *(StgInt32 *)adj_code = 0x08ec8348; - *(StgInt32 *)(adj_code+0x4) = fourthFloating ? 0x5c110ff2 - : 0x4c894c90; - *(StgInt32 *)(adj_code+0x8) = 0x35ff2024; - *(StgInt32 *)(adj_code+0xc) = 0x00000030; - *(StgInt32 *)(adj_code+0x10) = 0x244c8b4c; - *(StgInt32 *)(adj_code+0x14) = 0x4c894c10; - *(StgInt32 *)(adj_code+0x18) = 0x894d3024; - *(StgInt32 *)(adj_code+0x1c) = 0xd08949c1; - *(StgInt32 *)(adj_code+0x20) = 0xf2ca8948; - *(StgInt32 *)(adj_code+0x24) = 0xf2da100f; - *(StgInt32 *)(adj_code+0x28) = 0xf2d1100f; - *(StgInt32 *)(adj_code+0x2c) = 0x48c8100f; - *(StgInt32 *)(adj_code+0x30) = 0x00120d8b; - *(StgInt32 *)(adj_code+0x34) = 0x25ff0000; - *(StgInt32 *)(adj_code+0x38) = 0x00000014; - *(StgInt32 *)(adj_code+0x3c) = 0x90909090; - *(StgInt64 *)(adj_code+0x40) = (StgInt64)obscure_ccall_ret_code; - *(StgInt64 *)(adj_code+0x48) = (StgInt64)hptr; - *(StgInt64 *)(adj_code+0x50) = (StgInt64)wptr; - - freezeExecPage(page); - return page; - } - } - -# else /* stack at call: argn @@ -316,7 +166,6 @@ createAdjustor(int cconv, StgStablePtr hptr, return page; } } -#endif /* defined(mingw32_HOST_OS) */ default: barf("createAdjustor: Unsupported calling convention"); @@ -327,21 +176,9 @@ createAdjustor(int cconv, StgStablePtr hptr, void freeHaskellFunctionPtr(void* ptr) { if ( *(StgWord16 *)ptr == 0x894d ) { - freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+ -#if defined(mingw32_HOST_OS) - 0x28 -#else - 0x20 -#endif - )); -#if !defined(mingw32_HOST_OS) + freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x20)); } else if ( *(StgWord16 *)ptr == 0x5141 ) { freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x30)); -#endif -#if defined(mingw32_HOST_OS) - } else if ( *(StgWord16 *)ptr == 0x8348 ) { - freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x48)); -#endif } else { errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); return; diff --git a/rts/adjustor/NativeAmd64Mingw.c b/rts/adjustor/NativeAmd64Mingw.c new file mode 100644 index 0000000000..555d539c63 --- /dev/null +++ b/rts/adjustor/NativeAmd64Mingw.c @@ -0,0 +1,220 @@ +/* ----------------------------------------------------------------------------- + * AMD64/Windows architecture adjustor thunk logic. + * ---------------------------------------------------------------------------*/ + +#include "rts/PosixSource.h" +#include "Rts.h" + +#include "RtsUtils.h" +#include "StablePtr.h" + +#if defined(LEADING_UNDERSCORE) +#define UNDERSCORE "_" +#else +#define UNDERSCORE "" +#endif + +/* + Now here's something obscure for you: + + When generating an adjustor thunk that uses the C calling + convention, we have to make sure that the thunk kicks off + the process of jumping into Haskell with a tail jump. Why? + Because as a result of jumping in into Haskell we may end + up freeing the very adjustor thunk we came from using + freeHaskellFunctionPtr(). Hence, we better not return to + the adjustor code on our way out, since it could by then + point to junk. + + The fix is readily at hand, just include the opcodes + for the C stack fixup code that we need to perform when + returning in some static piece of memory and arrange + to return to it before tail jumping from the adjustor thunk. +*/ +static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void) +{ + __asm__ ( + ".globl " UNDERSCORE "obscure_ccall_ret_code\n" + UNDERSCORE "obscure_ccall_ret_code:\n\t" + "addq $0x8, %rsp\n\t" + /* On Win64, we had to put the original return address after the + arg 1-4 spill slots, ro now we have to move it back */ + "movq 0x20(%rsp), %rcx\n" + "movq %rcx, (%rsp)\n" + "ret" + ); +} + +extern void obscure_ccall_ret_code(void); + +void* +createAdjustor(int cconv, StgStablePtr hptr, + StgFunPtr wptr, + char *typeString + ) +{ + switch (cconv) + { + case 1: /* _ccall */ + /* + stack at call: + argn + ... + arg5 + return address + %rcx,%rdx,%r8,%r9 = arg1..arg4 + + if there are <4 integer args, then we can just push the + StablePtr into %rcx and shuffle the other args up. + + If there are >=4 integer args, then we have to flush one arg + to the stack, and arrange to adjust the stack ptr on return. + The stack will be rearranged to this: + + argn + ... + arg5 + return address *** <-- dummy arg in stub fn. + arg4 + obscure_ccall_ret_code + + This unfortunately means that the type of the stub function + must have a dummy argument for the original return address + pointer inserted just after the 4th integer argument. + + Code for the simple case: + + 0: 4d 89 c1 mov %r8,%r9 + 3: 49 89 d0 mov %rdx,%r8 + 6: 48 89 ca mov %rcx,%rdx + 9: f2 0f 10 da movsd %xmm2,%xmm3 + d: f2 0f 10 d1 movsd %xmm1,%xmm2 + 11: f2 0f 10 c8 movsd %xmm0,%xmm1 + 15: 48 8b 0d 0c 00 00 00 mov 0xc(%rip),%rcx # 28 <.text+0x28> + 1c: ff 25 0e 00 00 00 jmpq *0xe(%rip) # 30 <.text+0x30> + 22: 90 nop + [...] + + + And the version for >=4 integer arguments: + +[we want to push the 4th argument (either %r9 or %xmm3, depending on + whether it is a floating arg or not) and the return address onto the + stack. However, slots 1-4 are reserved for code we call to spill its + args 1-4 into, so we can't just push them onto the bottom of the stack. + So first put the 4th argument onto the stack, above what will be the + spill slots.] + 0: 48 83 ec 08 sub $0x8,%rsp +[if non-floating arg, then do this:] + 4: 90 nop + 5: 4c 89 4c 24 20 mov %r9,0x20(%rsp) +[else if floating arg then do this:] + 4: f2 0f 11 5c 24 20 movsd %xmm3,0x20(%rsp) +[end if] +[Now push the new return address onto the stack] + a: ff 35 30 00 00 00 pushq 0x30(%rip) # 40 <.text+0x40> +[But the old return address has been moved up into a spill slot, so + we need to move it above them] + 10: 4c 8b 4c 24 10 mov 0x10(%rsp),%r9 + 15: 4c 89 4c 24 30 mov %r9,0x30(%rsp) +[Now we do the normal register shuffle-up etc] + 1a: 4d 89 c1 mov %r8,%r9 + 1d: 49 89 d0 mov %rdx,%r8 + 20: 48 89 ca mov %rcx,%rdx + 23: f2 0f 10 da movsd %xmm2,%xmm3 + 27: f2 0f 10 d1 movsd %xmm1,%xmm2 + 2b: f2 0f 10 c8 movsd %xmm0,%xmm1 + 2f: 48 8b 0d 12 00 00 00 mov 0x12(%rip),%rcx # 48 <.text+0x48> + 36: ff 25 14 00 00 00 jmpq *0x14(%rip) # 50 <.text+0x50> + 3c: 90 nop + 3d: 90 nop + 3e: 90 nop + 3f: 90 nop + [...] + + */ + { + // determine whether we have 4 or more integer arguments, + // and therefore need to flush one to the stack. + if ((typeString[0] == '\0') || + (typeString[1] == '\0') || + (typeString[2] == '\0') || + (typeString[3] == '\0')) { + + ExecPage *page = allocateExecPage(); + if (page == NULL) { + barf("createAdjustor: failed to allocate executable page\n"); + } + StgWord8 *adj_code = (StgWord8*) page; + + *(StgInt32 *)adj_code = 0x49c1894d; + *(StgInt32 *)(adj_code+0x4) = 0x8948d089; + *(StgInt32 *)(adj_code+0x8) = 0x100ff2ca; + *(StgInt32 *)(adj_code+0xc) = 0x100ff2da; + *(StgInt32 *)(adj_code+0x10) = 0x100ff2d1; + *(StgInt32 *)(adj_code+0x14) = 0x0d8b48c8; + *(StgInt32 *)(adj_code+0x18) = 0x0000000c; + + *(StgInt32 *)(adj_code+0x1c) = 0x000e25ff; + *(StgInt32 *)(adj_code+0x20) = 0x00000000; + *(StgInt64 *)(adj_code+0x28) = (StgInt64)hptr; + *(StgInt64 *)(adj_code+0x30) = (StgInt64)wptr; + + freezeExecPage(page); + return page; + } + else + { + bool fourthFloating = (typeString[3] == 'f' || typeString[3] == 'd'); + ExecPage *page = allocateExecPage(); + if (page == NULL) { + barf("createAdjustor: failed to allocate executable page\n"); + } + StgWord8 *adj_code = (StgWord8*) page; + + *(StgInt32 *)adj_code = 0x08ec8348; + *(StgInt32 *)(adj_code+0x4) = fourthFloating ? 0x5c110ff2 + : 0x4c894c90; + *(StgInt32 *)(adj_code+0x8) = 0x35ff2024; + *(StgInt32 *)(adj_code+0xc) = 0x00000030; + *(StgInt32 *)(adj_code+0x10) = 0x244c8b4c; + *(StgInt32 *)(adj_code+0x14) = 0x4c894c10; + *(StgInt32 *)(adj_code+0x18) = 0x894d3024; + *(StgInt32 *)(adj_code+0x1c) = 0xd08949c1; + *(StgInt32 *)(adj_code+0x20) = 0xf2ca8948; + *(StgInt32 *)(adj_code+0x24) = 0xf2da100f; + *(StgInt32 *)(adj_code+0x28) = 0xf2d1100f; + *(StgInt32 *)(adj_code+0x2c) = 0x48c8100f; + *(StgInt32 *)(adj_code+0x30) = 0x00120d8b; + *(StgInt32 *)(adj_code+0x34) = 0x25ff0000; + *(StgInt32 *)(adj_code+0x38) = 0x00000014; + *(StgInt32 *)(adj_code+0x3c) = 0x90909090; + *(StgInt64 *)(adj_code+0x40) = (StgInt64)obscure_ccall_ret_code; + *(StgInt64 *)(adj_code+0x48) = (StgInt64)hptr; + *(StgInt64 *)(adj_code+0x50) = (StgInt64)wptr; + + freezeExecPage(page); + return page; + } + } + + default: + barf("createAdjustor: Unsupported calling convention"); + break; + } +} + +void freeHaskellFunctionPtr(void* ptr) +{ + if ( *(StgWord16 *)ptr == 0x894d ) { + freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x28)); + } else if ( *(StgWord16 *)ptr == 0x8348 ) { + freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x48)); + } else { + errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); + return; + } + + freeExecPage((ExecPage *) ptr); +} + diff --git a/rts/ghc.mk b/rts/ghc.mk index 6d6ff4bb90..370c88b01f 100644 --- a/rts/ghc.mk +++ b/rts/ghc.mk @@ -72,7 +72,11 @@ rts_S_SRCS += rts/AdjustorAsm.S rts_C_SRCS += rts/adjustor/Nativei386.c else ifneq "$(findstring $(TargetArch_CPP), x86_64)" "" +ifneq "$(findstring $(TargetOS_CPP), mingw32)" "" +rts_C_SRCS += rts/adjustor/NativeAmd64Mingw.c +else rts_C_SRCS += rts/adjustor/NativeAmd64.c +endif else ifneq "$(findstring $(TargetArch_CPP), powerpc64le powerpc)" "" rts_S_SRCS += rts/AdjustorAsm.S diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in index e2abeb5df1..0c40c2f6bb 100644 --- a/rts/rts.cabal.in +++ b/rts/rts.cabal.in @@ -455,7 +455,10 @@ library asm-sources: AdjustorAsm.S c-sources: adjustor/Nativei386.c if arch(x86_64) - c-sources: adjustor/NativeAmd64.c + if opsys(mingw32) + c-sources: adjustor/NativeAmd64Mingw.c + else + c-sources: adjustor/NativeAmd64.c if arch(ppc) || arch(ppc64) asm-sources: AdjustorAsm.S c-sources: adjustor/NativePowerPC.c |