diff options
-rw-r--r-- | rts/adjustor/NativeAmd64.c | 168 | ||||
-rw-r--r-- | rts/adjustor/NativeAmd64Asm.S | 114 | ||||
-rw-r--r-- | rts/ghc.mk | 1 | ||||
-rw-r--r-- | rts/rts.cabal.in | 1 |
4 files changed, 160 insertions, 124 deletions
diff --git a/rts/adjustor/NativeAmd64.c b/rts/adjustor/NativeAmd64.c index e58a7b79b0..19e186a6f4 100644 --- a/rts/adjustor/NativeAmd64.c +++ b/rts/adjustor/NativeAmd64.c @@ -7,50 +7,44 @@ #include "RtsUtils.h" #include "StablePtr.h" - -#if defined(LEADING_UNDERSCORE) -#define UNDERSCORE "_" -#else -#define UNDERSCORE "" -#endif - -/* - Now here's something obscure for you: - - When generating an adjustor thunk that uses the C calling - convention, we have to make sure that the thunk kicks off - the process of jumping into Haskell with a tail jump. Why? - Because as a result of jumping in into Haskell we may end - up freeing the very adjustor thunk we came from using - freeHaskellFunctionPtr(). Hence, we better not return to - the adjustor code on our way out, since it could by then - point to junk. - - The fix is readily at hand, just include the opcodes - for the C stack fixup code that we need to perform when - returning in some static piece of memory and arrange - to return to it before tail jumping from the adjustor thunk. -*/ -static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void) +#include "Adjustor.h" +#include "AdjustorPool.h" + +#define DECLARE_ADJUSTOR_TEMPLATE(NAME) \ + extern uint8_t NAME ## _adjustor; \ + extern uint8_t NAME ## _adjustor_context; \ + extern uint8_t NAME ## _adjustor_end; \ + const struct AdjustorTemplate NAME ## _adjustor_template = { \ + .code_start = (uint8_t *) &NAME ## _adjustor, \ + .code_end = (uint8_t *) &NAME ## _adjustor_end, \ + .context_ptr = (const struct AdjustorContext **) &NAME ## _adjustor_context, \ + }; + +/* adjustors to handle calls with less than 6 integer arguments */ +DECLARE_ADJUSTOR_TEMPLATE(simple_ccall); +static struct AdjustorPool *simple_ccall_pool; + +/* adjustors to handle calls with 6 or more integer arguments */ +DECLARE_ADJUSTOR_TEMPLATE(complex_ccall); +static struct AdjustorPool *complex_ccall_pool; + +void initAdjustors() { - __asm__ ( - ".globl " UNDERSCORE "obscure_ccall_ret_code\n" - UNDERSCORE "obscure_ccall_ret_code:\n\t" - "addq $0x8, %rsp\n\t" - "ret" - ); + simple_ccall_pool = new_adjustor_pool_from_template(&simple_ccall_adjustor_template); + complex_ccall_pool = new_adjustor_pool_from_template(&complex_ccall_adjustor_template); } -extern void obscure_ccall_ret_code(void); - -void initAdjustors() { } - void* createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr, char *typeString ) { + struct AdjustorContext context = { + .hptr = hptr, + .wptr = wptr, + }; + switch (cconv) { case 1: /* _ccall */ @@ -79,94 +73,28 @@ createAdjustor(int cconv, StgStablePtr hptr, This unfortunately means that the type of the stub function must have a dummy argument for the original return address pointer inserted just after the 6th integer argument. - - Code for the simple case: - - 0: 4d 89 c1 mov %r8,%r9 - 3: 49 89 c8 mov %rcx,%r8 - 6: 48 89 d1 mov %rdx,%rcx - 9: 48 89 f2 mov %rsi,%rdx - c: 48 89 fe mov %rdi,%rsi - f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi - 16: ff 25 0c 00 00 00 jmpq *12(%rip) - ... - 20: .quad 0 # aligned on 8-byte boundary - 28: .quad 0 # aligned on 8-byte boundary - - - And the version for >=6 integer arguments: - - 0: 41 51 push %r9 - 2: ff 35 20 00 00 00 pushq 32(%rip) # 28 <ccall_adjustor+0x28> - 8: 4d 89 c1 mov %r8,%r9 - b: 49 89 c8 mov %rcx,%r8 - e: 48 89 d1 mov %rdx,%rcx - 11: 48 89 f2 mov %rsi,%rdx - 14: 48 89 fe mov %rdi,%rsi - 17: 48 8b 3d 12 00 00 00 mov 18(%rip),%rdi # 30 <ccall_adjustor+0x30> - 1e: ff 25 14 00 00 00 jmpq *20(%rip) # 38 <ccall_adjustor+0x38> - ... - 28: .quad 0 # aligned on 8-byte boundary - 30: .quad 0 # aligned on 8-byte boundary - 38: .quad 0 # aligned on 8-byte boundary */ { - int i = 0; - char *c; + int n_int_args = 0; // determine whether we have 6 or more integer arguments, // and therefore need to flush one to the stack. - for (c = typeString; *c != '\0'; c++) { - if (*c != 'f' && *c != 'd') i++; - if (i == 6) break; - } - - if (i < 6) { - ExecPage *page = allocateExecPage(); - if (page == NULL) { - barf("createAdjustor: failed to allocate executable page\n"); + for (char *c = typeString; *c != '\0'; c++) { + if (*c != 'f' && *c != 'd') { + n_int_args++; } - StgWord8 *adj_code = (StgWord8*) page; - - *(StgInt32 *)adj_code = 0x49c1894d; - *(StgInt32 *)(adj_code+0x4) = 0x8948c889; - *(StgInt32 *)(adj_code+0x8) = 0xf28948d1; - *(StgInt32 *)(adj_code+0xc) = 0x48fe8948; - *(StgInt32 *)(adj_code+0x10) = 0x000a3d8b; - *(StgInt32 *)(adj_code+0x14) = 0x25ff0000; - *(StgInt32 *)(adj_code+0x18) = 0x0000000c; - *(StgInt64 *)(adj_code+0x20) = (StgInt64)hptr; - *(StgInt64 *)(adj_code+0x28) = (StgInt64)wptr; - - freezeExecPage(page); - return page; - } - else - { - ExecPage *page = allocateExecPage(); - if (page == NULL) { - barf("createAdjustor: failed to allocate executable page\n"); + if (n_int_args == 6) { + break; } - StgWord8 *adj_code = (StgWord8*) page; - - *(StgInt32 *)adj_code = 0x35ff5141; - *(StgInt32 *)(adj_code+0x4) = 0x00000020; - *(StgInt32 *)(adj_code+0x8) = 0x49c1894d; - *(StgInt32 *)(adj_code+0xc) = 0x8948c889; - *(StgInt32 *)(adj_code+0x10) = 0xf28948d1; - *(StgInt32 *)(adj_code+0x14) = 0x48fe8948; - *(StgInt32 *)(adj_code+0x18) = 0x00123d8b; - *(StgInt32 *)(adj_code+0x1c) = 0x25ff0000; - *(StgInt32 *)(adj_code+0x20) = 0x00000014; - - *(StgInt64 *)(adj_code+0x28) = (StgInt64)obscure_ccall_ret_code; - *(StgInt64 *)(adj_code+0x30) = (StgInt64)hptr; - *(StgInt64 *)(adj_code+0x38) = (StgInt64)wptr; - - freezeExecPage(page); - return page; } + + if (n_int_args < 6) { + return alloc_adjustor(simple_ccall_pool, context); + } else { + return alloc_adjustor(complex_ccall_pool, context); + } + break; } default: @@ -177,14 +105,6 @@ createAdjustor(int cconv, StgStablePtr hptr, void freeHaskellFunctionPtr(void* ptr) { - if ( *(StgWord16 *)ptr == 0x894d ) { - freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x20)); - } else if ( *(StgWord16 *)ptr == 0x5141 ) { - freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x30)); - } else { - errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); - return; - } - - freeExecPage((ExecPage *) ptr); + struct AdjustorContext context = free_adjustor(ptr); + freeStablePtr(context.hptr); } diff --git a/rts/adjustor/NativeAmd64Asm.S b/rts/adjustor/NativeAmd64Asm.S new file mode 100644 index 0000000000..32e8da50fd --- /dev/null +++ b/rts/adjustor/NativeAmd64Asm.S @@ -0,0 +1,114 @@ +#include "include/ghcconfig.h" + +#define HPTR_OFF 0 +#define WPTR_OFF 8 + +#if defined(LEADING_UNDERSCORE) +#define CSYM(x) _ ## x +#else +#define CSYM(x) x +#endif + +#define DECLARE_CSYM(x) \ + .globl CSYM(x) ; \ + CSYM(x): + + +#if defined(darwin_HOST_OS) +/* + * Note [Adjustor templates live in data section on Darwin] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Apple Darwin's linker does not allow relocations in the text section; in + * principle this should be mitigated by only using local symbol references, as + * described in Note [Adjustors: Local symbol references]. However, for reasons + * that remain a mystery the assembler produces a relocations regardless. + * To work around this we must declare this code to be data. This is okay since + * we will never execute it here; it will always be copied to an executable + * page first. + */ +.section __DATA,__data +#endif + +/* + * Note [Adjustors: Local symbol references] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Some platforms (e.g. Darwin) don't allow relocations in text sections. However, + * the assembler tends to produce relocations for references to a global symbols. + * Consequently we must refer to RIP-relative things only via local symbols. + */ + +/* ------------------------------------------------------------------ + * Adjustor for a call with less than 6 integer arguments. + * ------------------------------------------------------------------ */ +DECLARE_CSYM(simple_ccall_adjustor) + // Shuffle the argument registers down + mov %r8, %r9 + mov %rcx, %r8 + mov %rdx, %rcx + mov %rsi, %rdx + mov %rdi, %rsi + mov lcl_simple_ccall_adjustor_context(%rip), %rax // load the address of the context + mov HPTR_OFF(%rax), %rdi // load the StablePtr + jmp *WPTR_OFF(%rax) // jump to the entrypoint + +.align 8 +DECLARE_CSYM(simple_ccall_adjustor_context) +lcl_simple_ccall_adjustor_context: // See Note [Adjustors: Local symbol references] + // this will be overwritten with a pointer to the AdjustorContext + .quad 0 +DECLARE_CSYM(simple_ccall_adjustor_end) + +/* ------------------------------------------------------------------ + * Adjustor for a call with 6 or more integer arguments. + * ------------------------------------------------------------------ */ +DECLARE_CSYM(complex_ccall_adjustor) + push %r9 + pushq complex_ccall_ret_code_ptr(%rip) + // Shuffle the argument registers down + mov %r8, %r9 + mov %rcx, %r8 + mov %rdx, %rcx + mov %rsi, %rdx + mov %rdi, %rsi + mov lcl_complex_ccall_adjustor_context(%rip), %rax // load the address of the context + mov HPTR_OFF(%rax), %rdi // load the StablePtr + jmpq *WPTR_OFF(%rax) // jump to the entrypoint + +.align 8 +complex_ccall_ret_code_ptr: + .quad complex_ccall_ret_code +DECLARE_CSYM(complex_ccall_adjustor_context) +lcl_complex_ccall_adjustor_context: // See Note [Adjustors: Local symbol references] + // this will be overwritten with a pointer to the AdjustorContext + .quad 0x0 +DECLARE_CSYM(complex_ccall_adjustor_end) + + +#if defined(darwin_HOST_OS) +/* See Note [Adjustor templates live in data section on Darwin]. */ +.section __TEXT,__text +#endif + +/* + When generating an adjustor thunk that uses the C calling + convention, we have to make sure that the thunk kicks off + the process of jumping into Haskell with a tail jump. Why? + Because as a result of jumping in into Haskell we may end + up freeing the very adjustor thunk we came from using + freeHaskellFunctionPtr(). Hence, we better not return to + the adjustor code on our way out, since it could by then + point to junk. + + The fix is readily at hand, just include the opcodes + for the C stack fixup code that we need to perform when + returning in some static piece of memory and arrange + to return to it before tail jumping from the adjustor thunk. +*/ +complex_ccall_ret_code: + addq $0x8, %rsp + ret + +/* mark stack as nonexecutable */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",@progbits +#endif diff --git a/rts/ghc.mk b/rts/ghc.mk index d9a1d60c21..e00de976f1 100644 --- a/rts/ghc.mk +++ b/rts/ghc.mk @@ -76,6 +76,7 @@ ifneq "$(findstring $(TargetArch_CPP), x86_64)" "" ifneq "$(findstring $(TargetOS_CPP), mingw32)" "" rts_C_SRCS += rts/adjustor/NativeAmd64Mingw.c else +rts_S_SRCS += rts/adjustor/NativeAmd64Asm.S rts_C_SRCS += rts/adjustor/NativeAmd64.c endif else diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in index 6465a1f8e6..9b8c51a644 100644 --- a/rts/rts.cabal.in +++ b/rts/rts.cabal.in @@ -458,6 +458,7 @@ library if opsys(mingw32) c-sources: adjustor/NativeAmd64Mingw.c else + asm-sources: adjustor/NativeAmd64Asm.S c-sources: adjustor/NativeAmd64.c if arch(ppc) || arch(ppc64) asm-sources: AdjustorAsm.S |