diff options
author | Ben Gamari <ben@smart-cactus.org> | 2022-02-07 14:39:58 -0500 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2022-02-13 03:26:14 -0500 |
commit | 974e73afd519e1189f01ab3e96d3fe0212e74b05 (patch) | |
tree | fa01bb8b3f42b18398557d81c84b79c6dd050deb | |
parent | eab37902d0cbfc4a8362aa0f0b2cf8eb3e9ce27f (diff) | |
download | haskell-974e73afd519e1189f01ab3e96d3fe0212e74b05.tar.gz |
adjustors/NativeAmd64Mingw: Use AdjustorPool
-rw-r--r-- | rts/adjustor/NativeAmd64Mingw.c | 196 | ||||
-rw-r--r-- | rts/adjustor/NativeAmd64MingwAsm.S | 140 | ||||
-rw-r--r-- | rts/ghc.mk | 1 | ||||
-rw-r--r-- | rts/rts.cabal.in | 3 |
4 files changed, 189 insertions, 151 deletions
diff --git a/rts/adjustor/NativeAmd64Mingw.c b/rts/adjustor/NativeAmd64Mingw.c index d9c831c36d..5acde07b1f 100644 --- a/rts/adjustor/NativeAmd64Mingw.c +++ b/rts/adjustor/NativeAmd64Mingw.c @@ -7,54 +7,51 @@ #include "RtsUtils.h" #include "StablePtr.h" - -#if defined(LEADING_UNDERSCORE) -#define UNDERSCORE "_" -#else -#define UNDERSCORE "" -#endif - -/* - Now here's something obscure for you: - - When generating an adjustor thunk that uses the C calling - convention, we have to make sure that the thunk kicks off - the process of jumping into Haskell with a tail jump. Why? - Because as a result of jumping in into Haskell we may end - up freeing the very adjustor thunk we came from using - freeHaskellFunctionPtr(). Hence, we better not return to - the adjustor code on our way out, since it could by then - point to junk. - - The fix is readily at hand, just include the opcodes - for the C stack fixup code that we need to perform when - returning in some static piece of memory and arrange - to return to it before tail jumping from the adjustor thunk. -*/ -static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void) +#include "Adjustor.h" +#include "AdjustorPool.h" + +#define DECLARE_ADJUSTOR_TEMPLATE(NAME) \ + extern uint8_t NAME ## _adjustor; \ + extern uint8_t NAME ## _adjustor_context; \ + extern uint8_t NAME ## _adjustor_end; \ + const struct AdjustorTemplate NAME ## _adjustor_template = { \ + .code_start = (uint8_t *) &NAME ## _adjustor, \ + .code_end = (uint8_t *) &NAME ## _adjustor_end, \ + .context_ptr = (const struct AdjustorContext **) &NAME ## _adjustor_context, \ + }; + +/* adjustors to handle calls with less than 6 integer arguments */ +DECLARE_ADJUSTOR_TEMPLATE(simple_ccall); +static struct AdjustorPool *simple_ccall_pool; + +/* adjustors to handle calls with 6 or more integer arguments where the fourth + * argument is a float */ +DECLARE_ADJUSTOR_TEMPLATE(complex_float_ccall); +static struct AdjustorPool *complex_float_ccall_pool; + +/* adjustors to handle calls with 6 or more integer arguments where the fourth + * argument is not a float */ +DECLARE_ADJUSTOR_TEMPLATE(complex_nofloat_ccall); +static struct AdjustorPool *complex_nofloat_ccall_pool; + +void initAdjustors() { - __asm__ ( - ".globl " UNDERSCORE "obscure_ccall_ret_code\n" - UNDERSCORE "obscure_ccall_ret_code:\n\t" - "addq $0x8, %rsp\n\t" - /* On Win64, we had to put the original return address after the - arg 1-4 spill slots, ro now we have to move it back */ - "movq 0x20(%rsp), %rcx\n" - "movq %rcx, (%rsp)\n" - "ret" - ); + simple_ccall_pool = new_adjustor_pool_from_template(&simple_ccall_adjustor_template); + complex_float_ccall_pool = new_adjustor_pool_from_template(&complex_float_ccall_adjustor_template); + complex_nofloat_ccall_pool = new_adjustor_pool_from_template(&complex_nofloat_ccall_adjustor_template); } -extern void obscure_ccall_ret_code(void); - -void initAdjustors() { } - void* createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr, char *typeString ) { + struct AdjustorContext context = { + .hptr = hptr, + .wptr = wptr, + }; + switch (cconv) { case 1: /* _ccall */ @@ -84,56 +81,7 @@ createAdjustor(int cconv, StgStablePtr hptr, must have a dummy argument for the original return address pointer inserted just after the 4th integer argument. - Code for the simple case: - - 0: 4d 89 c1 mov %r8,%r9 - 3: 49 89 d0 mov %rdx,%r8 - 6: 48 89 ca mov %rcx,%rdx - 9: f2 0f 10 da movsd %xmm2,%xmm3 - d: f2 0f 10 d1 movsd %xmm1,%xmm2 - 11: f2 0f 10 c8 movsd %xmm0,%xmm1 - 15: 48 8b 0d 0c 00 00 00 mov 0xc(%rip),%rcx # 28 <.text+0x28> - 1c: ff 25 0e 00 00 00 jmpq *0xe(%rip) # 30 <.text+0x30> - 22: 90 nop - [...] - - - And the version for >=4 integer arguments: - -[we want to push the 4th argument (either %r9 or %xmm3, depending on - whether it is a floating arg or not) and the return address onto the - stack. However, slots 1-4 are reserved for code we call to spill its - args 1-4 into, so we can't just push them onto the bottom of the stack. - So first put the 4th argument onto the stack, above what will be the - spill slots.] - 0: 48 83 ec 08 sub $0x8,%rsp -[if non-floating arg, then do this:] - 4: 90 nop - 5: 4c 89 4c 24 20 mov %r9,0x20(%rsp) -[else if floating arg then do this:] - 4: f2 0f 11 5c 24 20 movsd %xmm3,0x20(%rsp) -[end if] -[Now push the new return address onto the stack] - a: ff 35 30 00 00 00 pushq 0x30(%rip) # 40 <.text+0x40> -[But the old return address has been moved up into a spill slot, so - we need to move it above them] - 10: 4c 8b 4c 24 10 mov 0x10(%rsp),%r9 - 15: 4c 89 4c 24 30 mov %r9,0x30(%rsp) -[Now we do the normal register shuffle-up etc] - 1a: 4d 89 c1 mov %r8,%r9 - 1d: 49 89 d0 mov %rdx,%r8 - 20: 48 89 ca mov %rcx,%rdx - 23: f2 0f 10 da movsd %xmm2,%xmm3 - 27: f2 0f 10 d1 movsd %xmm1,%xmm2 - 2b: f2 0f 10 c8 movsd %xmm0,%xmm1 - 2f: 48 8b 0d 12 00 00 00 mov 0x12(%rip),%rcx # 48 <.text+0x48> - 36: ff 25 14 00 00 00 jmpq *0x14(%rip) # 50 <.text+0x50> - 3c: 90 nop - 3d: 90 nop - 3e: 90 nop - 3f: 90 nop - [...] - + See NativeAmd64MingwAsm.S. */ { // determine whether we have 4 or more integer arguments, @@ -141,62 +89,18 @@ createAdjustor(int cconv, StgStablePtr hptr, if ((typeString[0] == '\0') || (typeString[1] == '\0') || (typeString[2] == '\0') || - (typeString[3] == '\0')) { - - ExecPage *page = allocateExecPage(); - if (page == NULL) { - barf("createAdjustor: failed to allocate executable page\n"); - } - StgWord8 *adj_code = (StgWord8*) page; - - *(StgInt32 *)adj_code = 0x49c1894d; - *(StgInt32 *)(adj_code+0x4) = 0x8948d089; - *(StgInt32 *)(adj_code+0x8) = 0x100ff2ca; - *(StgInt32 *)(adj_code+0xc) = 0x100ff2da; - *(StgInt32 *)(adj_code+0x10) = 0x100ff2d1; - *(StgInt32 *)(adj_code+0x14) = 0x0d8b48c8; - *(StgInt32 *)(adj_code+0x18) = 0x0000000c; - - *(StgInt32 *)(adj_code+0x1c) = 0x000e25ff; - *(StgInt32 *)(adj_code+0x20) = 0x00000000; - *(StgInt64 *)(adj_code+0x28) = (StgInt64)hptr; - *(StgInt64 *)(adj_code+0x30) = (StgInt64)wptr; - - freezeExecPage(page); - return page; + (typeString[3] == '\0')) + { + return alloc_adjustor(simple_ccall_pool, context); } else { bool fourthFloating = (typeString[3] == 'f' || typeString[3] == 'd'); - ExecPage *page = allocateExecPage(); - if (page == NULL) { - barf("createAdjustor: failed to allocate executable page\n"); + if (fourthFloating) { + return alloc_adjustor(complex_float_ccall_pool, context); + } else { + return alloc_adjustor(complex_nofloat_ccall_pool, context); } - StgWord8 *adj_code = (StgWord8*) page; - - *(StgInt32 *)adj_code = 0x08ec8348; - *(StgInt32 *)(adj_code+0x4) = fourthFloating ? 0x5c110ff2 - : 0x4c894c90; - *(StgInt32 *)(adj_code+0x8) = 0x35ff2024; - *(StgInt32 *)(adj_code+0xc) = 0x00000030; - *(StgInt32 *)(adj_code+0x10) = 0x244c8b4c; - *(StgInt32 *)(adj_code+0x14) = 0x4c894c10; - *(StgInt32 *)(adj_code+0x18) = 0x894d3024; - *(StgInt32 *)(adj_code+0x1c) = 0xd08949c1; - *(StgInt32 *)(adj_code+0x20) = 0xf2ca8948; - *(StgInt32 *)(adj_code+0x24) = 0xf2da100f; - *(StgInt32 *)(adj_code+0x28) = 0xf2d1100f; - *(StgInt32 *)(adj_code+0x2c) = 0x48c8100f; - *(StgInt32 *)(adj_code+0x30) = 0x00120d8b; - *(StgInt32 *)(adj_code+0x34) = 0x25ff0000; - *(StgInt32 *)(adj_code+0x38) = 0x00000014; - *(StgInt32 *)(adj_code+0x3c) = 0x90909090; - *(StgInt64 *)(adj_code+0x40) = (StgInt64)obscure_ccall_ret_code; - *(StgInt64 *)(adj_code+0x48) = (StgInt64)hptr; - *(StgInt64 *)(adj_code+0x50) = (StgInt64)wptr; - - freezeExecPage(page); - return page; } } @@ -208,15 +112,7 @@ createAdjustor(int cconv, StgStablePtr hptr, void freeHaskellFunctionPtr(void* ptr) { - if ( *(StgWord16 *)ptr == 0x894d ) { - freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x28)); - } else if ( *(StgWord16 *)ptr == 0x8348 ) { - freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x48)); - } else { - errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); - return; - } - - freeExecPage((ExecPage *) ptr); + struct AdjustorContext context = free_adjustor(ptr); + freeStablePtr(context.hptr); } diff --git a/rts/adjustor/NativeAmd64MingwAsm.S b/rts/adjustor/NativeAmd64MingwAsm.S new file mode 100644 index 0000000000..ae80f9b86b --- /dev/null +++ b/rts/adjustor/NativeAmd64MingwAsm.S @@ -0,0 +1,140 @@ +#include "include/ghcconfig.h" + +#define HPTR_OFF 0 +#define WPTR_OFF 8 + +#if defined(LEADING_UNDERSCORE) +#define CSYM(x) _ ## x +#else +#define CSYM(x) x +#endif + +#define DECLARE_CSYM(x) \ + .globl CSYM(x) ; \ + CSYM(x): + +/* ------------------------------------------------------------------ + * Adjustor for a call with less than 4 integer arguments. + * ------------------------------------------------------------------ */ +DECLARE_CSYM(simple_ccall_adjustor) + // Shuffle the argument registers down + mov %r8, %r9 + mov %rdx, %r8 + mov %rcx, %rdx + movsd %xmm2, %xmm3 + movsd %xmm1, %xmm2 + movsd %xmm0, %xmm1 + mov lcl_simple_ccall_adjustor_context(%rip), %rax // load the address of the context + mov HPTR_OFF(%rax), %rcx // load the StablePtr + jmp *WPTR_OFF(%rax) // jump to the entrypoint + +.align 8 +DECLARE_CSYM(simple_ccall_adjustor_context) +// See Note [Adjustors: Local symbol references] in NativeAmd64Asm.S +lcl_simple_ccall_adjustor_context: + // this will be overwritten with a pointer to the AdjustorContext + .quad 0 +DECLARE_CSYM(simple_ccall_adjustor_end) + +/* ------------------------------------------------------------------ + * Adjustor for a call with 4 or more integer arguments and where the fourth + * argument is not floating-point. + * ------------------------------------------------------------------ */ +DECLARE_CSYM(complex_nofloat_ccall_adjustor) + sub $8,%rsp + // Handle the fourth argument; this is the only difference between the + // float/non-float cases + mov %r9, 0x20(%rsp) + // Push the new return address onto the stack + pushq complex_nofloat_ccall_ret_code_ptr(%rip) + // But the old return address has been moved up into a spill slot, so we + // need to move it above them + mov 0x10(%rsp), %r9 + mov %r9, 0x30(%rsp) + // Now do the normal argument shuffle + mov %r8, %r9 + mov %rdx, %r8 + mov %rcx, %rdx + movsd %xmm2, %xmm3 + movsd %xmm1, %xmm2 + movsd %xmm0, %xmm1 + // Load the address of the context + mov lcl_complex_nofloat_ccall_adjustor_context(%rip), %rax + mov HPTR_OFF(%rax), %rcx + jmpq *WPTR_OFF(%rax) + +.align 8 +complex_nofloat_ccall_ret_code_ptr: + .quad complex_ccall_ret_code +DECLARE_CSYM(complex_nofloat_ccall_adjustor_context) +// See Note [Adjustors: Local symbol references] in NativeAmd64Asm.S +lcl_complex_nofloat_ccall_adjustor_context: + // this will be overwritten with a pointer to the AdjustorContext + .quad 0x0 +DECLARE_CSYM(complex_nofloat_ccall_adjustor_end) + +/* ------------------------------------------------------------------ + * Adjustor for a call with 4 or more integer arguments and where the fourth + * argument is floating point. + * ------------------------------------------------------------------ */ +DECLARE_CSYM(complex_float_ccall_adjustor) + sub $8,%rsp + // Handle the fourth argument; this is the only difference between the + // float/non-float cases + movsd %xmm3,0x20(%rsp) + // Push the new return address onto the stack + pushq complex_float_ccall_ret_code_ptr(%rip) + // But the old return address has been moved up into a spill slot, so we + // need to move it above them + mov 0x10(%rsp),%r9 + mov %r9,0x30(%rsp) + // Now do the normal argument shuffle + mov %r8, %r9 + mov %rdx, %r8 + mov %rcx, %rdx + movsd %xmm2, %xmm3 + movsd %xmm1, %xmm2 + movsd %xmm0, %xmm1 + // Load the address of the context + mov complex_float_ccall_adjustor_context(%rip), %rax + mov HPTR_OFF(%rax), %rcx + jmpq *WPTR_OFF(%rax) + +.align 8 +complex_float_ccall_ret_code_ptr: + .quad complex_ccall_ret_code +DECLARE_CSYM(complex_float_ccall_adjustor_context) +// See Note [Adjustors: Local symbol references] in NativeAmd64Asm.S +lcl_complex_float_ccall_adjustor_context: + // this will be overwritten with a pointer to the AdjustorContext + .quad 0x0 +DECLARE_CSYM(complex_float_ccall_adjustor_end) + + +/* + When generating an adjustor thunk that uses the C calling + convention, we have to make sure that the thunk kicks off + the process of jumping into Haskell with a tail jump. Why? + Because as a result of jumping in into Haskell we may end + up freeing the very adjustor thunk we came from using + freeHaskellFunctionPtr(). Hence, we better not return to + the adjustor code on our way out, since it could by then + point to junk. + + The fix is readily at hand, just include the opcodes + for the C stack fixup code that we need to perform when + returning in some static piece of memory and arrange + to return to it before tail jumping from the adjustor thunk. +*/ +complex_ccall_ret_code: + addq $0x8, %rsp + /* On Win64, we had to put the original return address after the + arg 1-4 spill slots, ro now we have to move it back */ + movq 0x20(%rsp), %rcx + movq %rcx, (%rsp) + ret + +/* mark stack as nonexecutable */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",@progbits +#endif diff --git a/rts/ghc.mk b/rts/ghc.mk index e00de976f1..4c7a54977f 100644 --- a/rts/ghc.mk +++ b/rts/ghc.mk @@ -74,6 +74,7 @@ rts_C_SRCS += rts/adjustor/Nativei386.c else ifneq "$(findstring $(TargetArch_CPP), x86_64)" "" ifneq "$(findstring $(TargetOS_CPP), mingw32)" "" +rts_S_SRCS += rts/adjustor/NativeAmd64MingwAsm.S rts_C_SRCS += rts/adjustor/NativeAmd64Mingw.c else rts_S_SRCS += rts/adjustor/NativeAmd64Asm.S diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in index 9b8c51a644..641fccc437 100644 --- a/rts/rts.cabal.in +++ b/rts/rts.cabal.in @@ -455,7 +455,8 @@ library asm-sources: AdjustorAsm.S c-sources: adjustor/Nativei386.c if arch(x86_64) - if opsys(mingw32) + if os(mingw32) + asm-sources: adjustor/NativeAmd64MingwAsm.S c-sources: adjustor/NativeAmd64Mingw.c else asm-sources: adjustor/NativeAmd64Asm.S |