summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2022-02-07 14:39:58 -0500
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-02-13 03:26:14 -0500
commit974e73afd519e1189f01ab3e96d3fe0212e74b05 (patch)
treefa01bb8b3f42b18398557d81c84b79c6dd050deb
parenteab37902d0cbfc4a8362aa0f0b2cf8eb3e9ce27f (diff)
downloadhaskell-974e73afd519e1189f01ab3e96d3fe0212e74b05.tar.gz
adjustors/NativeAmd64Mingw: Use AdjustorPool
-rw-r--r--rts/adjustor/NativeAmd64Mingw.c196
-rw-r--r--rts/adjustor/NativeAmd64MingwAsm.S140
-rw-r--r--rts/ghc.mk1
-rw-r--r--rts/rts.cabal.in3
4 files changed, 189 insertions, 151 deletions
diff --git a/rts/adjustor/NativeAmd64Mingw.c b/rts/adjustor/NativeAmd64Mingw.c
index d9c831c36d..5acde07b1f 100644
--- a/rts/adjustor/NativeAmd64Mingw.c
+++ b/rts/adjustor/NativeAmd64Mingw.c
@@ -7,54 +7,51 @@
#include "RtsUtils.h"
#include "StablePtr.h"
-
-#if defined(LEADING_UNDERSCORE)
-#define UNDERSCORE "_"
-#else
-#define UNDERSCORE ""
-#endif
-
-/*
- Now here's something obscure for you:
-
- When generating an adjustor thunk that uses the C calling
- convention, we have to make sure that the thunk kicks off
- the process of jumping into Haskell with a tail jump. Why?
- Because as a result of jumping in into Haskell we may end
- up freeing the very adjustor thunk we came from using
- freeHaskellFunctionPtr(). Hence, we better not return to
- the adjustor code on our way out, since it could by then
- point to junk.
-
- The fix is readily at hand, just include the opcodes
- for the C stack fixup code that we need to perform when
- returning in some static piece of memory and arrange
- to return to it before tail jumping from the adjustor thunk.
-*/
-static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
+#include "Adjustor.h"
+#include "AdjustorPool.h"
+
+#define DECLARE_ADJUSTOR_TEMPLATE(NAME) \
+ extern uint8_t NAME ## _adjustor; \
+ extern uint8_t NAME ## _adjustor_context; \
+ extern uint8_t NAME ## _adjustor_end; \
+ const struct AdjustorTemplate NAME ## _adjustor_template = { \
+ .code_start = (uint8_t *) &NAME ## _adjustor, \
+ .code_end = (uint8_t *) &NAME ## _adjustor_end, \
+ .context_ptr = (const struct AdjustorContext **) &NAME ## _adjustor_context, \
+ };
+
+/* adjustors to handle calls with less than 6 integer arguments */
+DECLARE_ADJUSTOR_TEMPLATE(simple_ccall);
+static struct AdjustorPool *simple_ccall_pool;
+
+/* adjustors to handle calls with 6 or more integer arguments where the fourth
+ * argument is a float */
+DECLARE_ADJUSTOR_TEMPLATE(complex_float_ccall);
+static struct AdjustorPool *complex_float_ccall_pool;
+
+/* adjustors to handle calls with 6 or more integer arguments where the fourth
+ * argument is not a float */
+DECLARE_ADJUSTOR_TEMPLATE(complex_nofloat_ccall);
+static struct AdjustorPool *complex_nofloat_ccall_pool;
+
+void initAdjustors()
{
- __asm__ (
- ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
- UNDERSCORE "obscure_ccall_ret_code:\n\t"
- "addq $0x8, %rsp\n\t"
- /* On Win64, we had to put the original return address after the
- arg 1-4 spill slots, ro now we have to move it back */
- "movq 0x20(%rsp), %rcx\n"
- "movq %rcx, (%rsp)\n"
- "ret"
- );
+ simple_ccall_pool = new_adjustor_pool_from_template(&simple_ccall_adjustor_template);
+ complex_float_ccall_pool = new_adjustor_pool_from_template(&complex_float_ccall_adjustor_template);
+ complex_nofloat_ccall_pool = new_adjustor_pool_from_template(&complex_nofloat_ccall_adjustor_template);
}
-extern void obscure_ccall_ret_code(void);
-
-void initAdjustors() { }
-
void*
createAdjustor(int cconv, StgStablePtr hptr,
StgFunPtr wptr,
char *typeString
)
{
+ struct AdjustorContext context = {
+ .hptr = hptr,
+ .wptr = wptr,
+ };
+
switch (cconv)
{
case 1: /* _ccall */
@@ -84,56 +81,7 @@ createAdjustor(int cconv, StgStablePtr hptr,
must have a dummy argument for the original return address
pointer inserted just after the 4th integer argument.
- Code for the simple case:
-
- 0: 4d 89 c1 mov %r8,%r9
- 3: 49 89 d0 mov %rdx,%r8
- 6: 48 89 ca mov %rcx,%rdx
- 9: f2 0f 10 da movsd %xmm2,%xmm3
- d: f2 0f 10 d1 movsd %xmm1,%xmm2
- 11: f2 0f 10 c8 movsd %xmm0,%xmm1
- 15: 48 8b 0d 0c 00 00 00 mov 0xc(%rip),%rcx # 28 <.text+0x28>
- 1c: ff 25 0e 00 00 00 jmpq *0xe(%rip) # 30 <.text+0x30>
- 22: 90 nop
- [...]
-
-
- And the version for >=4 integer arguments:
-
-[we want to push the 4th argument (either %r9 or %xmm3, depending on
- whether it is a floating arg or not) and the return address onto the
- stack. However, slots 1-4 are reserved for code we call to spill its
- args 1-4 into, so we can't just push them onto the bottom of the stack.
- So first put the 4th argument onto the stack, above what will be the
- spill slots.]
- 0: 48 83 ec 08 sub $0x8,%rsp
-[if non-floating arg, then do this:]
- 4: 90 nop
- 5: 4c 89 4c 24 20 mov %r9,0x20(%rsp)
-[else if floating arg then do this:]
- 4: f2 0f 11 5c 24 20 movsd %xmm3,0x20(%rsp)
-[end if]
-[Now push the new return address onto the stack]
- a: ff 35 30 00 00 00 pushq 0x30(%rip) # 40 <.text+0x40>
-[But the old return address has been moved up into a spill slot, so
- we need to move it above them]
- 10: 4c 8b 4c 24 10 mov 0x10(%rsp),%r9
- 15: 4c 89 4c 24 30 mov %r9,0x30(%rsp)
-[Now we do the normal register shuffle-up etc]
- 1a: 4d 89 c1 mov %r8,%r9
- 1d: 49 89 d0 mov %rdx,%r8
- 20: 48 89 ca mov %rcx,%rdx
- 23: f2 0f 10 da movsd %xmm2,%xmm3
- 27: f2 0f 10 d1 movsd %xmm1,%xmm2
- 2b: f2 0f 10 c8 movsd %xmm0,%xmm1
- 2f: 48 8b 0d 12 00 00 00 mov 0x12(%rip),%rcx # 48 <.text+0x48>
- 36: ff 25 14 00 00 00 jmpq *0x14(%rip) # 50 <.text+0x50>
- 3c: 90 nop
- 3d: 90 nop
- 3e: 90 nop
- 3f: 90 nop
- [...]
-
+ See NativeAmd64MingwAsm.S.
*/
{
// determine whether we have 4 or more integer arguments,
@@ -141,62 +89,18 @@ createAdjustor(int cconv, StgStablePtr hptr,
if ((typeString[0] == '\0') ||
(typeString[1] == '\0') ||
(typeString[2] == '\0') ||
- (typeString[3] == '\0')) {
-
- ExecPage *page = allocateExecPage();
- if (page == NULL) {
- barf("createAdjustor: failed to allocate executable page\n");
- }
- StgWord8 *adj_code = (StgWord8*) page;
-
- *(StgInt32 *)adj_code = 0x49c1894d;
- *(StgInt32 *)(adj_code+0x4) = 0x8948d089;
- *(StgInt32 *)(adj_code+0x8) = 0x100ff2ca;
- *(StgInt32 *)(adj_code+0xc) = 0x100ff2da;
- *(StgInt32 *)(adj_code+0x10) = 0x100ff2d1;
- *(StgInt32 *)(adj_code+0x14) = 0x0d8b48c8;
- *(StgInt32 *)(adj_code+0x18) = 0x0000000c;
-
- *(StgInt32 *)(adj_code+0x1c) = 0x000e25ff;
- *(StgInt32 *)(adj_code+0x20) = 0x00000000;
- *(StgInt64 *)(adj_code+0x28) = (StgInt64)hptr;
- *(StgInt64 *)(adj_code+0x30) = (StgInt64)wptr;
-
- freezeExecPage(page);
- return page;
+ (typeString[3] == '\0'))
+ {
+ return alloc_adjustor(simple_ccall_pool, context);
}
else
{
bool fourthFloating = (typeString[3] == 'f' || typeString[3] == 'd');
- ExecPage *page = allocateExecPage();
- if (page == NULL) {
- barf("createAdjustor: failed to allocate executable page\n");
+ if (fourthFloating) {
+ return alloc_adjustor(complex_float_ccall_pool, context);
+ } else {
+ return alloc_adjustor(complex_nofloat_ccall_pool, context);
}
- StgWord8 *adj_code = (StgWord8*) page;
-
- *(StgInt32 *)adj_code = 0x08ec8348;
- *(StgInt32 *)(adj_code+0x4) = fourthFloating ? 0x5c110ff2
- : 0x4c894c90;
- *(StgInt32 *)(adj_code+0x8) = 0x35ff2024;
- *(StgInt32 *)(adj_code+0xc) = 0x00000030;
- *(StgInt32 *)(adj_code+0x10) = 0x244c8b4c;
- *(StgInt32 *)(adj_code+0x14) = 0x4c894c10;
- *(StgInt32 *)(adj_code+0x18) = 0x894d3024;
- *(StgInt32 *)(adj_code+0x1c) = 0xd08949c1;
- *(StgInt32 *)(adj_code+0x20) = 0xf2ca8948;
- *(StgInt32 *)(adj_code+0x24) = 0xf2da100f;
- *(StgInt32 *)(adj_code+0x28) = 0xf2d1100f;
- *(StgInt32 *)(adj_code+0x2c) = 0x48c8100f;
- *(StgInt32 *)(adj_code+0x30) = 0x00120d8b;
- *(StgInt32 *)(adj_code+0x34) = 0x25ff0000;
- *(StgInt32 *)(adj_code+0x38) = 0x00000014;
- *(StgInt32 *)(adj_code+0x3c) = 0x90909090;
- *(StgInt64 *)(adj_code+0x40) = (StgInt64)obscure_ccall_ret_code;
- *(StgInt64 *)(adj_code+0x48) = (StgInt64)hptr;
- *(StgInt64 *)(adj_code+0x50) = (StgInt64)wptr;
-
- freezeExecPage(page);
- return page;
}
}
@@ -208,15 +112,7 @@ createAdjustor(int cconv, StgStablePtr hptr,
void freeHaskellFunctionPtr(void* ptr)
{
- if ( *(StgWord16 *)ptr == 0x894d ) {
- freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x28));
- } else if ( *(StgWord16 *)ptr == 0x8348 ) {
- freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x48));
- } else {
- errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
- return;
- }
-
- freeExecPage((ExecPage *) ptr);
+ struct AdjustorContext context = free_adjustor(ptr);
+ freeStablePtr(context.hptr);
}
diff --git a/rts/adjustor/NativeAmd64MingwAsm.S b/rts/adjustor/NativeAmd64MingwAsm.S
new file mode 100644
index 0000000000..ae80f9b86b
--- /dev/null
+++ b/rts/adjustor/NativeAmd64MingwAsm.S
@@ -0,0 +1,140 @@
+#include "include/ghcconfig.h"
+
+#define HPTR_OFF 0
+#define WPTR_OFF 8
+
+#if defined(LEADING_UNDERSCORE)
+#define CSYM(x) _ ## x
+#else
+#define CSYM(x) x
+#endif
+
+#define DECLARE_CSYM(x) \
+ .globl CSYM(x) ; \
+ CSYM(x):
+
+/* ------------------------------------------------------------------
+ * Adjustor for a call with less than 4 integer arguments.
+ * ------------------------------------------------------------------ */
+DECLARE_CSYM(simple_ccall_adjustor)
+ // Shuffle the argument registers down
+ mov %r8, %r9
+ mov %rdx, %r8
+ mov %rcx, %rdx
+ movsd %xmm2, %xmm3
+ movsd %xmm1, %xmm2
+ movsd %xmm0, %xmm1
+ mov lcl_simple_ccall_adjustor_context(%rip), %rax // load the address of the context
+ mov HPTR_OFF(%rax), %rcx // load the StablePtr
+ jmp *WPTR_OFF(%rax) // jump to the entrypoint
+
+.align 8
+DECLARE_CSYM(simple_ccall_adjustor_context)
+// See Note [Adjustors: Local symbol references] in NativeAmd64Asm.S
+lcl_simple_ccall_adjustor_context:
+ // this will be overwritten with a pointer to the AdjustorContext
+ .quad 0
+DECLARE_CSYM(simple_ccall_adjustor_end)
+
+/* ------------------------------------------------------------------
+ * Adjustor for a call with 4 or more integer arguments and where the fourth
+ * argument is not floating-point.
+ * ------------------------------------------------------------------ */
+DECLARE_CSYM(complex_nofloat_ccall_adjustor)
+ sub $8,%rsp
+ // Handle the fourth argument; this is the only difference between the
+ // float/non-float cases
+ mov %r9, 0x20(%rsp)
+ // Push the new return address onto the stack
+ pushq complex_nofloat_ccall_ret_code_ptr(%rip)
+ // But the old return address has been moved up into a spill slot, so we
+ // need to move it above them
+ mov 0x10(%rsp), %r9
+ mov %r9, 0x30(%rsp)
+ // Now do the normal argument shuffle
+ mov %r8, %r9
+ mov %rdx, %r8
+ mov %rcx, %rdx
+ movsd %xmm2, %xmm3
+ movsd %xmm1, %xmm2
+ movsd %xmm0, %xmm1
+ // Load the address of the context
+ mov lcl_complex_nofloat_ccall_adjustor_context(%rip), %rax
+ mov HPTR_OFF(%rax), %rcx
+ jmpq *WPTR_OFF(%rax)
+
+.align 8
+complex_nofloat_ccall_ret_code_ptr:
+ .quad complex_ccall_ret_code
+DECLARE_CSYM(complex_nofloat_ccall_adjustor_context)
+// See Note [Adjustors: Local symbol references] in NativeAmd64Asm.S
+lcl_complex_nofloat_ccall_adjustor_context:
+ // this will be overwritten with a pointer to the AdjustorContext
+ .quad 0x0
+DECLARE_CSYM(complex_nofloat_ccall_adjustor_end)
+
+/* ------------------------------------------------------------------
+ * Adjustor for a call with 4 or more integer arguments and where the fourth
+ * argument is floating point.
+ * ------------------------------------------------------------------ */
+DECLARE_CSYM(complex_float_ccall_adjustor)
+ sub $8,%rsp
+ // Handle the fourth argument; this is the only difference between the
+ // float/non-float cases
+ movsd %xmm3,0x20(%rsp)
+ // Push the new return address onto the stack
+ pushq complex_float_ccall_ret_code_ptr(%rip)
+ // But the old return address has been moved up into a spill slot, so we
+ // need to move it above them
+ mov 0x10(%rsp),%r9
+ mov %r9,0x30(%rsp)
+ // Now do the normal argument shuffle
+ mov %r8, %r9
+ mov %rdx, %r8
+ mov %rcx, %rdx
+ movsd %xmm2, %xmm3
+ movsd %xmm1, %xmm2
+ movsd %xmm0, %xmm1
+ // Load the address of the context
+ mov complex_float_ccall_adjustor_context(%rip), %rax
+ mov HPTR_OFF(%rax), %rcx
+ jmpq *WPTR_OFF(%rax)
+
+.align 8
+complex_float_ccall_ret_code_ptr:
+ .quad complex_ccall_ret_code
+DECLARE_CSYM(complex_float_ccall_adjustor_context)
+// See Note [Adjustors: Local symbol references] in NativeAmd64Asm.S
+lcl_complex_float_ccall_adjustor_context:
+ // this will be overwritten with a pointer to the AdjustorContext
+ .quad 0x0
+DECLARE_CSYM(complex_float_ccall_adjustor_end)
+
+
+/*
+ When generating an adjustor thunk that uses the C calling
+ convention, we have to make sure that the thunk kicks off
+ the process of jumping into Haskell with a tail jump. Why?
+ Because as a result of jumping in into Haskell we may end
+ up freeing the very adjustor thunk we came from using
+ freeHaskellFunctionPtr(). Hence, we better not return to
+ the adjustor code on our way out, since it could by then
+ point to junk.
+
+ The fix is readily at hand, just include the opcodes
+ for the C stack fixup code that we need to perform when
+ returning in some static piece of memory and arrange
+ to return to it before tail jumping from the adjustor thunk.
+*/
+complex_ccall_ret_code:
+ addq $0x8, %rsp
+ /* On Win64, we had to put the original return address after the
+ arg 1-4 spill slots, ro now we have to move it back */
+ movq 0x20(%rsp), %rcx
+ movq %rcx, (%rsp)
+ ret
+
+/* mark stack as nonexecutable */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",@progbits
+#endif
diff --git a/rts/ghc.mk b/rts/ghc.mk
index e00de976f1..4c7a54977f 100644
--- a/rts/ghc.mk
+++ b/rts/ghc.mk
@@ -74,6 +74,7 @@ rts_C_SRCS += rts/adjustor/Nativei386.c
else
ifneq "$(findstring $(TargetArch_CPP), x86_64)" ""
ifneq "$(findstring $(TargetOS_CPP), mingw32)" ""
+rts_S_SRCS += rts/adjustor/NativeAmd64MingwAsm.S
rts_C_SRCS += rts/adjustor/NativeAmd64Mingw.c
else
rts_S_SRCS += rts/adjustor/NativeAmd64Asm.S
diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in
index 9b8c51a644..641fccc437 100644
--- a/rts/rts.cabal.in
+++ b/rts/rts.cabal.in
@@ -455,7 +455,8 @@ library
asm-sources: AdjustorAsm.S
c-sources: adjustor/Nativei386.c
if arch(x86_64)
- if opsys(mingw32)
+ if os(mingw32)
+ asm-sources: adjustor/NativeAmd64MingwAsm.S
c-sources: adjustor/NativeAmd64Mingw.c
else
asm-sources: adjustor/NativeAmd64Asm.S