summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2022-02-06 21:45:41 -0500
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-02-13 03:26:14 -0500
commit191dfd2d88f6d2fd7a879f14da26fe88e616c8ea (patch)
tree6c3996135341d7e74d43788a20fe865003d4e480 /rts
parentafdfaff0fd4ed8162997fe904924d2ce528be285 (diff)
downloadhaskell-191dfd2d88f6d2fd7a879f14da26fe88e616c8ea.tar.gz
rts/adjustor: Split Windows path out of NativeAmd64
Diffstat (limited to 'rts')
-rw-r--r--rts/adjustor/NativeAmd64.c165
-rw-r--r--rts/adjustor/NativeAmd64Mingw.c220
-rw-r--r--rts/ghc.mk4
-rw-r--r--rts/rts.cabal.in5
4 files changed, 229 insertions, 165 deletions
diff --git a/rts/adjustor/NativeAmd64.c b/rts/adjustor/NativeAmd64.c
index 2d59bd0bc0..045d984b2d 100644
--- a/rts/adjustor/NativeAmd64.c
+++ b/rts/adjustor/NativeAmd64.c
@@ -37,12 +37,6 @@ static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
".globl " UNDERSCORE "obscure_ccall_ret_code\n"
UNDERSCORE "obscure_ccall_ret_code:\n\t"
"addq $0x8, %rsp\n\t"
-#if defined(mingw32_HOST_OS)
- /* On Win64, we had to put the original return address after the
- arg 1-4 spill slots, ro now we have to move it back */
- "movq 0x20(%rsp), %rcx\n"
- "movq %rcx, (%rsp)\n"
-#endif /* defined(mingw32_HOST_OS) */
"ret"
);
}
@@ -58,150 +52,6 @@ createAdjustor(int cconv, StgStablePtr hptr,
switch (cconv)
{
case 1: /* _ccall */
-#if defined(mingw32_HOST_OS)
- /*
- stack at call:
- argn
- ...
- arg5
- return address
- %rcx,%rdx,%r8,%r9 = arg1..arg4
-
- if there are <4 integer args, then we can just push the
- StablePtr into %rcx and shuffle the other args up.
-
- If there are >=4 integer args, then we have to flush one arg
- to the stack, and arrange to adjust the stack ptr on return.
- The stack will be rearranged to this:
-
- argn
- ...
- arg5
- return address *** <-- dummy arg in stub fn.
- arg4
- obscure_ccall_ret_code
-
- This unfortunately means that the type of the stub function
- must have a dummy argument for the original return address
- pointer inserted just after the 4th integer argument.
-
- Code for the simple case:
-
- 0: 4d 89 c1 mov %r8,%r9
- 3: 49 89 d0 mov %rdx,%r8
- 6: 48 89 ca mov %rcx,%rdx
- 9: f2 0f 10 da movsd %xmm2,%xmm3
- d: f2 0f 10 d1 movsd %xmm1,%xmm2
- 11: f2 0f 10 c8 movsd %xmm0,%xmm1
- 15: 48 8b 0d 0c 00 00 00 mov 0xc(%rip),%rcx # 28 <.text+0x28>
- 1c: ff 25 0e 00 00 00 jmpq *0xe(%rip) # 30 <.text+0x30>
- 22: 90 nop
- [...]
-
-
- And the version for >=4 integer arguments:
-
-[we want to push the 4th argument (either %r9 or %xmm3, depending on
- whether it is a floating arg or not) and the return address onto the
- stack. However, slots 1-4 are reserved for code we call to spill its
- args 1-4 into, so we can't just push them onto the bottom of the stack.
- So first put the 4th argument onto the stack, above what will be the
- spill slots.]
- 0: 48 83 ec 08 sub $0x8,%rsp
-[if non-floating arg, then do this:]
- 4: 90 nop
- 5: 4c 89 4c 24 20 mov %r9,0x20(%rsp)
-[else if floating arg then do this:]
- 4: f2 0f 11 5c 24 20 movsd %xmm3,0x20(%rsp)
-[end if]
-[Now push the new return address onto the stack]
- a: ff 35 30 00 00 00 pushq 0x30(%rip) # 40 <.text+0x40>
-[But the old return address has been moved up into a spill slot, so
- we need to move it above them]
- 10: 4c 8b 4c 24 10 mov 0x10(%rsp),%r9
- 15: 4c 89 4c 24 30 mov %r9,0x30(%rsp)
-[Now we do the normal register shuffle-up etc]
- 1a: 4d 89 c1 mov %r8,%r9
- 1d: 49 89 d0 mov %rdx,%r8
- 20: 48 89 ca mov %rcx,%rdx
- 23: f2 0f 10 da movsd %xmm2,%xmm3
- 27: f2 0f 10 d1 movsd %xmm1,%xmm2
- 2b: f2 0f 10 c8 movsd %xmm0,%xmm1
- 2f: 48 8b 0d 12 00 00 00 mov 0x12(%rip),%rcx # 48 <.text+0x48>
- 36: ff 25 14 00 00 00 jmpq *0x14(%rip) # 50 <.text+0x50>
- 3c: 90 nop
- 3d: 90 nop
- 3e: 90 nop
- 3f: 90 nop
- [...]
-
- */
- {
- // determine whether we have 4 or more integer arguments,
- // and therefore need to flush one to the stack.
- if ((typeString[0] == '\0') ||
- (typeString[1] == '\0') ||
- (typeString[2] == '\0') ||
- (typeString[3] == '\0')) {
-
- ExecPage *page = allocateExecPage();
- if (page == NULL) {
- barf("createAdjustor: failed to allocate executable page\n");
- }
- StgWord8 *adj_code = (StgWord8*) page;
-
- *(StgInt32 *)adj_code = 0x49c1894d;
- *(StgInt32 *)(adj_code+0x4) = 0x8948d089;
- *(StgInt32 *)(adj_code+0x8) = 0x100ff2ca;
- *(StgInt32 *)(adj_code+0xc) = 0x100ff2da;
- *(StgInt32 *)(adj_code+0x10) = 0x100ff2d1;
- *(StgInt32 *)(adj_code+0x14) = 0x0d8b48c8;
- *(StgInt32 *)(adj_code+0x18) = 0x0000000c;
-
- *(StgInt32 *)(adj_code+0x1c) = 0x000e25ff;
- *(StgInt32 *)(adj_code+0x20) = 0x00000000;
- *(StgInt64 *)(adj_code+0x28) = (StgInt64)hptr;
- *(StgInt64 *)(adj_code+0x30) = (StgInt64)wptr;
-
- freezeExecPage(page);
- return page;
- }
- else
- {
- bool fourthFloating = (typeString[3] == 'f' || typeString[3] == 'd');
- ExecPage *page = allocateExecPage();
- if (page == NULL) {
- barf("createAdjustor: failed to allocate executable page\n");
- }
- StgWord8 *adj_code = (StgWord8*) page;
-
- *(StgInt32 *)adj_code = 0x08ec8348;
- *(StgInt32 *)(adj_code+0x4) = fourthFloating ? 0x5c110ff2
- : 0x4c894c90;
- *(StgInt32 *)(adj_code+0x8) = 0x35ff2024;
- *(StgInt32 *)(adj_code+0xc) = 0x00000030;
- *(StgInt32 *)(adj_code+0x10) = 0x244c8b4c;
- *(StgInt32 *)(adj_code+0x14) = 0x4c894c10;
- *(StgInt32 *)(adj_code+0x18) = 0x894d3024;
- *(StgInt32 *)(adj_code+0x1c) = 0xd08949c1;
- *(StgInt32 *)(adj_code+0x20) = 0xf2ca8948;
- *(StgInt32 *)(adj_code+0x24) = 0xf2da100f;
- *(StgInt32 *)(adj_code+0x28) = 0xf2d1100f;
- *(StgInt32 *)(adj_code+0x2c) = 0x48c8100f;
- *(StgInt32 *)(adj_code+0x30) = 0x00120d8b;
- *(StgInt32 *)(adj_code+0x34) = 0x25ff0000;
- *(StgInt32 *)(adj_code+0x38) = 0x00000014;
- *(StgInt32 *)(adj_code+0x3c) = 0x90909090;
- *(StgInt64 *)(adj_code+0x40) = (StgInt64)obscure_ccall_ret_code;
- *(StgInt64 *)(adj_code+0x48) = (StgInt64)hptr;
- *(StgInt64 *)(adj_code+0x50) = (StgInt64)wptr;
-
- freezeExecPage(page);
- return page;
- }
- }
-
-# else
/*
stack at call:
argn
@@ -316,7 +166,6 @@ createAdjustor(int cconv, StgStablePtr hptr,
return page;
}
}
-#endif /* defined(mingw32_HOST_OS) */
default:
barf("createAdjustor: Unsupported calling convention");
@@ -327,21 +176,9 @@ createAdjustor(int cconv, StgStablePtr hptr,
void freeHaskellFunctionPtr(void* ptr)
{
if ( *(StgWord16 *)ptr == 0x894d ) {
- freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+
-#if defined(mingw32_HOST_OS)
- 0x28
-#else
- 0x20
-#endif
- ));
-#if !defined(mingw32_HOST_OS)
+ freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x20));
} else if ( *(StgWord16 *)ptr == 0x5141 ) {
freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x30));
-#endif
-#if defined(mingw32_HOST_OS)
- } else if ( *(StgWord16 *)ptr == 0x8348 ) {
- freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x48));
-#endif
} else {
errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
return;
diff --git a/rts/adjustor/NativeAmd64Mingw.c b/rts/adjustor/NativeAmd64Mingw.c
new file mode 100644
index 0000000000..555d539c63
--- /dev/null
+++ b/rts/adjustor/NativeAmd64Mingw.c
@@ -0,0 +1,220 @@
+/* -----------------------------------------------------------------------------
+ * AMD64/Windows architecture adjustor thunk logic.
+ * ---------------------------------------------------------------------------*/
+
+#include "rts/PosixSource.h"
+#include "Rts.h"
+
+#include "RtsUtils.h"
+#include "StablePtr.h"
+
+#if defined(LEADING_UNDERSCORE)
+#define UNDERSCORE "_"
+#else
+#define UNDERSCORE ""
+#endif
+
+/*
+ Now here's something obscure for you:
+
+ When generating an adjustor thunk that uses the C calling
+ convention, we have to make sure that the thunk kicks off
+ the process of jumping into Haskell with a tail jump. Why?
+ Because as a result of jumping in into Haskell we may end
+ up freeing the very adjustor thunk we came from using
+ freeHaskellFunctionPtr(). Hence, we better not return to
+ the adjustor code on our way out, since it could by then
+ point to junk.
+
+ The fix is readily at hand, just include the opcodes
+ for the C stack fixup code that we need to perform when
+ returning in some static piece of memory and arrange
+ to return to it before tail jumping from the adjustor thunk.
+*/
+static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
+{
+ __asm__ (
+ ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
+ UNDERSCORE "obscure_ccall_ret_code:\n\t"
+ "addq $0x8, %rsp\n\t"
+ /* On Win64, we had to put the original return address after the
+ arg 1-4 spill slots, ro now we have to move it back */
+ "movq 0x20(%rsp), %rcx\n"
+ "movq %rcx, (%rsp)\n"
+ "ret"
+ );
+}
+
+extern void obscure_ccall_ret_code(void);
+
+void*
+createAdjustor(int cconv, StgStablePtr hptr,
+ StgFunPtr wptr,
+ char *typeString
+ )
+{
+ switch (cconv)
+ {
+ case 1: /* _ccall */
+ /*
+ stack at call:
+ argn
+ ...
+ arg5
+ return address
+ %rcx,%rdx,%r8,%r9 = arg1..arg4
+
+ if there are <4 integer args, then we can just push the
+ StablePtr into %rcx and shuffle the other args up.
+
+ If there are >=4 integer args, then we have to flush one arg
+ to the stack, and arrange to adjust the stack ptr on return.
+ The stack will be rearranged to this:
+
+ argn
+ ...
+ arg5
+ return address *** <-- dummy arg in stub fn.
+ arg4
+ obscure_ccall_ret_code
+
+ This unfortunately means that the type of the stub function
+ must have a dummy argument for the original return address
+ pointer inserted just after the 4th integer argument.
+
+ Code for the simple case:
+
+ 0: 4d 89 c1 mov %r8,%r9
+ 3: 49 89 d0 mov %rdx,%r8
+ 6: 48 89 ca mov %rcx,%rdx
+ 9: f2 0f 10 da movsd %xmm2,%xmm3
+ d: f2 0f 10 d1 movsd %xmm1,%xmm2
+ 11: f2 0f 10 c8 movsd %xmm0,%xmm1
+ 15: 48 8b 0d 0c 00 00 00 mov 0xc(%rip),%rcx # 28 <.text+0x28>
+ 1c: ff 25 0e 00 00 00 jmpq *0xe(%rip) # 30 <.text+0x30>
+ 22: 90 nop
+ [...]
+
+
+ And the version for >=4 integer arguments:
+
+[we want to push the 4th argument (either %r9 or %xmm3, depending on
+ whether it is a floating arg or not) and the return address onto the
+ stack. However, slots 1-4 are reserved for code we call to spill its
+ args 1-4 into, so we can't just push them onto the bottom of the stack.
+ So first put the 4th argument onto the stack, above what will be the
+ spill slots.]
+ 0: 48 83 ec 08 sub $0x8,%rsp
+[if non-floating arg, then do this:]
+ 4: 90 nop
+ 5: 4c 89 4c 24 20 mov %r9,0x20(%rsp)
+[else if floating arg then do this:]
+ 4: f2 0f 11 5c 24 20 movsd %xmm3,0x20(%rsp)
+[end if]
+[Now push the new return address onto the stack]
+ a: ff 35 30 00 00 00 pushq 0x30(%rip) # 40 <.text+0x40>
+[But the old return address has been moved up into a spill slot, so
+ we need to move it above them]
+ 10: 4c 8b 4c 24 10 mov 0x10(%rsp),%r9
+ 15: 4c 89 4c 24 30 mov %r9,0x30(%rsp)
+[Now we do the normal register shuffle-up etc]
+ 1a: 4d 89 c1 mov %r8,%r9
+ 1d: 49 89 d0 mov %rdx,%r8
+ 20: 48 89 ca mov %rcx,%rdx
+ 23: f2 0f 10 da movsd %xmm2,%xmm3
+ 27: f2 0f 10 d1 movsd %xmm1,%xmm2
+ 2b: f2 0f 10 c8 movsd %xmm0,%xmm1
+ 2f: 48 8b 0d 12 00 00 00 mov 0x12(%rip),%rcx # 48 <.text+0x48>
+ 36: ff 25 14 00 00 00 jmpq *0x14(%rip) # 50 <.text+0x50>
+ 3c: 90 nop
+ 3d: 90 nop
+ 3e: 90 nop
+ 3f: 90 nop
+ [...]
+
+ */
+ {
+ // determine whether we have 4 or more integer arguments,
+ // and therefore need to flush one to the stack.
+ if ((typeString[0] == '\0') ||
+ (typeString[1] == '\0') ||
+ (typeString[2] == '\0') ||
+ (typeString[3] == '\0')) {
+
+ ExecPage *page = allocateExecPage();
+ if (page == NULL) {
+ barf("createAdjustor: failed to allocate executable page\n");
+ }
+ StgWord8 *adj_code = (StgWord8*) page;
+
+ *(StgInt32 *)adj_code = 0x49c1894d;
+ *(StgInt32 *)(adj_code+0x4) = 0x8948d089;
+ *(StgInt32 *)(adj_code+0x8) = 0x100ff2ca;
+ *(StgInt32 *)(adj_code+0xc) = 0x100ff2da;
+ *(StgInt32 *)(adj_code+0x10) = 0x100ff2d1;
+ *(StgInt32 *)(adj_code+0x14) = 0x0d8b48c8;
+ *(StgInt32 *)(adj_code+0x18) = 0x0000000c;
+
+ *(StgInt32 *)(adj_code+0x1c) = 0x000e25ff;
+ *(StgInt32 *)(adj_code+0x20) = 0x00000000;
+ *(StgInt64 *)(adj_code+0x28) = (StgInt64)hptr;
+ *(StgInt64 *)(adj_code+0x30) = (StgInt64)wptr;
+
+ freezeExecPage(page);
+ return page;
+ }
+ else
+ {
+ bool fourthFloating = (typeString[3] == 'f' || typeString[3] == 'd');
+ ExecPage *page = allocateExecPage();
+ if (page == NULL) {
+ barf("createAdjustor: failed to allocate executable page\n");
+ }
+ StgWord8 *adj_code = (StgWord8*) page;
+
+ *(StgInt32 *)adj_code = 0x08ec8348;
+ *(StgInt32 *)(adj_code+0x4) = fourthFloating ? 0x5c110ff2
+ : 0x4c894c90;
+ *(StgInt32 *)(adj_code+0x8) = 0x35ff2024;
+ *(StgInt32 *)(adj_code+0xc) = 0x00000030;
+ *(StgInt32 *)(adj_code+0x10) = 0x244c8b4c;
+ *(StgInt32 *)(adj_code+0x14) = 0x4c894c10;
+ *(StgInt32 *)(adj_code+0x18) = 0x894d3024;
+ *(StgInt32 *)(adj_code+0x1c) = 0xd08949c1;
+ *(StgInt32 *)(adj_code+0x20) = 0xf2ca8948;
+ *(StgInt32 *)(adj_code+0x24) = 0xf2da100f;
+ *(StgInt32 *)(adj_code+0x28) = 0xf2d1100f;
+ *(StgInt32 *)(adj_code+0x2c) = 0x48c8100f;
+ *(StgInt32 *)(adj_code+0x30) = 0x00120d8b;
+ *(StgInt32 *)(adj_code+0x34) = 0x25ff0000;
+ *(StgInt32 *)(adj_code+0x38) = 0x00000014;
+ *(StgInt32 *)(adj_code+0x3c) = 0x90909090;
+ *(StgInt64 *)(adj_code+0x40) = (StgInt64)obscure_ccall_ret_code;
+ *(StgInt64 *)(adj_code+0x48) = (StgInt64)hptr;
+ *(StgInt64 *)(adj_code+0x50) = (StgInt64)wptr;
+
+ freezeExecPage(page);
+ return page;
+ }
+ }
+
+ default:
+ barf("createAdjustor: Unsupported calling convention");
+ break;
+ }
+}
+
+void freeHaskellFunctionPtr(void* ptr)
+{
+ if ( *(StgWord16 *)ptr == 0x894d ) {
+ freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x28));
+ } else if ( *(StgWord16 *)ptr == 0x8348 ) {
+ freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x48));
+ } else {
+ errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
+ return;
+ }
+
+ freeExecPage((ExecPage *) ptr);
+}
+
diff --git a/rts/ghc.mk b/rts/ghc.mk
index 6d6ff4bb90..370c88b01f 100644
--- a/rts/ghc.mk
+++ b/rts/ghc.mk
@@ -72,7 +72,11 @@ rts_S_SRCS += rts/AdjustorAsm.S
rts_C_SRCS += rts/adjustor/Nativei386.c
else
ifneq "$(findstring $(TargetArch_CPP), x86_64)" ""
+ifneq "$(findstring $(TargetOS_CPP), mingw32)" ""
+rts_C_SRCS += rts/adjustor/NativeAmd64Mingw.c
+else
rts_C_SRCS += rts/adjustor/NativeAmd64.c
+endif
else
ifneq "$(findstring $(TargetArch_CPP), powerpc64le powerpc)" ""
rts_S_SRCS += rts/AdjustorAsm.S
diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in
index e2abeb5df1..0c40c2f6bb 100644
--- a/rts/rts.cabal.in
+++ b/rts/rts.cabal.in
@@ -455,7 +455,10 @@ library
asm-sources: AdjustorAsm.S
c-sources: adjustor/Nativei386.c
if arch(x86_64)
- c-sources: adjustor/NativeAmd64.c
+ if opsys(mingw32)
+ c-sources: adjustor/NativeAmd64Mingw.c
+ else
+ c-sources: adjustor/NativeAmd64.c
if arch(ppc) || arch(ppc64)
asm-sources: AdjustorAsm.S
c-sources: adjustor/NativePowerPC.c