summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--rts/adjustor/NativeAmd64.c168
-rw-r--r--rts/adjustor/NativeAmd64Asm.S114
-rw-r--r--rts/ghc.mk1
-rw-r--r--rts/rts.cabal.in1
4 files changed, 160 insertions, 124 deletions
diff --git a/rts/adjustor/NativeAmd64.c b/rts/adjustor/NativeAmd64.c
index e58a7b79b0..19e186a6f4 100644
--- a/rts/adjustor/NativeAmd64.c
+++ b/rts/adjustor/NativeAmd64.c
@@ -7,50 +7,44 @@
#include "RtsUtils.h"
#include "StablePtr.h"
-
-#if defined(LEADING_UNDERSCORE)
-#define UNDERSCORE "_"
-#else
-#define UNDERSCORE ""
-#endif
-
-/*
- Now here's something obscure for you:
-
- When generating an adjustor thunk that uses the C calling
- convention, we have to make sure that the thunk kicks off
- the process of jumping into Haskell with a tail jump. Why?
- Because as a result of jumping in into Haskell we may end
- up freeing the very adjustor thunk we came from using
- freeHaskellFunctionPtr(). Hence, we better not return to
- the adjustor code on our way out, since it could by then
- point to junk.
-
- The fix is readily at hand, just include the opcodes
- for the C stack fixup code that we need to perform when
- returning in some static piece of memory and arrange
- to return to it before tail jumping from the adjustor thunk.
-*/
-static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
+#include "Adjustor.h"
+#include "AdjustorPool.h"
+
+#define DECLARE_ADJUSTOR_TEMPLATE(NAME) \
+ extern uint8_t NAME ## _adjustor; \
+ extern uint8_t NAME ## _adjustor_context; \
+ extern uint8_t NAME ## _adjustor_end; \
+ const struct AdjustorTemplate NAME ## _adjustor_template = { \
+ .code_start = (uint8_t *) &NAME ## _adjustor, \
+ .code_end = (uint8_t *) &NAME ## _adjustor_end, \
+ .context_ptr = (const struct AdjustorContext **) &NAME ## _adjustor_context, \
+ };
+
+/* adjustors to handle calls with less than 6 integer arguments */
+DECLARE_ADJUSTOR_TEMPLATE(simple_ccall);
+static struct AdjustorPool *simple_ccall_pool;
+
+/* adjustors to handle calls with 6 or more integer arguments */
+DECLARE_ADJUSTOR_TEMPLATE(complex_ccall);
+static struct AdjustorPool *complex_ccall_pool;
+
+void initAdjustors()
{
- __asm__ (
- ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
- UNDERSCORE "obscure_ccall_ret_code:\n\t"
- "addq $0x8, %rsp\n\t"
- "ret"
- );
+ simple_ccall_pool = new_adjustor_pool_from_template(&simple_ccall_adjustor_template);
+ complex_ccall_pool = new_adjustor_pool_from_template(&complex_ccall_adjustor_template);
}
-extern void obscure_ccall_ret_code(void);
-
-void initAdjustors() { }
-
void*
createAdjustor(int cconv, StgStablePtr hptr,
StgFunPtr wptr,
char *typeString
)
{
+ struct AdjustorContext context = {
+ .hptr = hptr,
+ .wptr = wptr,
+ };
+
switch (cconv)
{
case 1: /* _ccall */
@@ -79,94 +73,28 @@ createAdjustor(int cconv, StgStablePtr hptr,
This unfortunately means that the type of the stub function
must have a dummy argument for the original return address
pointer inserted just after the 6th integer argument.
-
- Code for the simple case:
-
- 0: 4d 89 c1 mov %r8,%r9
- 3: 49 89 c8 mov %rcx,%r8
- 6: 48 89 d1 mov %rdx,%rcx
- 9: 48 89 f2 mov %rsi,%rdx
- c: 48 89 fe mov %rdi,%rsi
- f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi
- 16: ff 25 0c 00 00 00 jmpq *12(%rip)
- ...
- 20: .quad 0 # aligned on 8-byte boundary
- 28: .quad 0 # aligned on 8-byte boundary
-
-
- And the version for >=6 integer arguments:
-
- 0: 41 51 push %r9
- 2: ff 35 20 00 00 00 pushq 32(%rip) # 28 <ccall_adjustor+0x28>
- 8: 4d 89 c1 mov %r8,%r9
- b: 49 89 c8 mov %rcx,%r8
- e: 48 89 d1 mov %rdx,%rcx
- 11: 48 89 f2 mov %rsi,%rdx
- 14: 48 89 fe mov %rdi,%rsi
- 17: 48 8b 3d 12 00 00 00 mov 18(%rip),%rdi # 30 <ccall_adjustor+0x30>
- 1e: ff 25 14 00 00 00 jmpq *20(%rip) # 38 <ccall_adjustor+0x38>
- ...
- 28: .quad 0 # aligned on 8-byte boundary
- 30: .quad 0 # aligned on 8-byte boundary
- 38: .quad 0 # aligned on 8-byte boundary
*/
{
- int i = 0;
- char *c;
+ int n_int_args = 0;
// determine whether we have 6 or more integer arguments,
// and therefore need to flush one to the stack.
- for (c = typeString; *c != '\0'; c++) {
- if (*c != 'f' && *c != 'd') i++;
- if (i == 6) break;
- }
-
- if (i < 6) {
- ExecPage *page = allocateExecPage();
- if (page == NULL) {
- barf("createAdjustor: failed to allocate executable page\n");
+ for (char *c = typeString; *c != '\0'; c++) {
+ if (*c != 'f' && *c != 'd') {
+ n_int_args++;
}
- StgWord8 *adj_code = (StgWord8*) page;
-
- *(StgInt32 *)adj_code = 0x49c1894d;
- *(StgInt32 *)(adj_code+0x4) = 0x8948c889;
- *(StgInt32 *)(adj_code+0x8) = 0xf28948d1;
- *(StgInt32 *)(adj_code+0xc) = 0x48fe8948;
- *(StgInt32 *)(adj_code+0x10) = 0x000a3d8b;
- *(StgInt32 *)(adj_code+0x14) = 0x25ff0000;
- *(StgInt32 *)(adj_code+0x18) = 0x0000000c;
- *(StgInt64 *)(adj_code+0x20) = (StgInt64)hptr;
- *(StgInt64 *)(adj_code+0x28) = (StgInt64)wptr;
-
- freezeExecPage(page);
- return page;
- }
- else
- {
- ExecPage *page = allocateExecPage();
- if (page == NULL) {
- barf("createAdjustor: failed to allocate executable page\n");
+ if (n_int_args == 6) {
+ break;
}
- StgWord8 *adj_code = (StgWord8*) page;
-
- *(StgInt32 *)adj_code = 0x35ff5141;
- *(StgInt32 *)(adj_code+0x4) = 0x00000020;
- *(StgInt32 *)(adj_code+0x8) = 0x49c1894d;
- *(StgInt32 *)(adj_code+0xc) = 0x8948c889;
- *(StgInt32 *)(adj_code+0x10) = 0xf28948d1;
- *(StgInt32 *)(adj_code+0x14) = 0x48fe8948;
- *(StgInt32 *)(adj_code+0x18) = 0x00123d8b;
- *(StgInt32 *)(adj_code+0x1c) = 0x25ff0000;
- *(StgInt32 *)(adj_code+0x20) = 0x00000014;
-
- *(StgInt64 *)(adj_code+0x28) = (StgInt64)obscure_ccall_ret_code;
- *(StgInt64 *)(adj_code+0x30) = (StgInt64)hptr;
- *(StgInt64 *)(adj_code+0x38) = (StgInt64)wptr;
-
- freezeExecPage(page);
- return page;
}
+
+ if (n_int_args < 6) {
+ return alloc_adjustor(simple_ccall_pool, context);
+ } else {
+ return alloc_adjustor(complex_ccall_pool, context);
+ }
+ break;
}
default:
@@ -177,14 +105,6 @@ createAdjustor(int cconv, StgStablePtr hptr,
void freeHaskellFunctionPtr(void* ptr)
{
- if ( *(StgWord16 *)ptr == 0x894d ) {
- freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x20));
- } else if ( *(StgWord16 *)ptr == 0x5141 ) {
- freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x30));
- } else {
- errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
- return;
- }
-
- freeExecPage((ExecPage *) ptr);
+ struct AdjustorContext context = free_adjustor(ptr);
+ freeStablePtr(context.hptr);
}
diff --git a/rts/adjustor/NativeAmd64Asm.S b/rts/adjustor/NativeAmd64Asm.S
new file mode 100644
index 0000000000..32e8da50fd
--- /dev/null
+++ b/rts/adjustor/NativeAmd64Asm.S
@@ -0,0 +1,114 @@
+#include "include/ghcconfig.h"
+
+#define HPTR_OFF 0
+#define WPTR_OFF 8
+
+#if defined(LEADING_UNDERSCORE)
+#define CSYM(x) _ ## x
+#else
+#define CSYM(x) x
+#endif
+
+#define DECLARE_CSYM(x) \
+ .globl CSYM(x) ; \
+ CSYM(x):
+
+
+#if defined(darwin_HOST_OS)
+/*
+ * Note [Adjustor templates live in data section on Darwin]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Apple Darwin's linker does not allow relocations in the text section; in
+ * principle this should be mitigated by only using local symbol references, as
+ * described in Note [Adjustors: Local symbol references]. However, for reasons
+ * that remain a mystery the assembler produces a relocations regardless.
+ * To work around this we must declare this code to be data. This is okay since
+ * we will never execute it here; it will always be copied to an executable
+ * page first.
+ */
+.section __DATA,__data
+#endif
+
+/*
+ * Note [Adjustors: Local symbol references]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Some platforms (e.g. Darwin) don't allow relocations in text sections. However,
+ * the assembler tends to produce relocations for references to a global symbols.
+ * Consequently we must refer to RIP-relative things only via local symbols.
+ */
+
+/* ------------------------------------------------------------------
+ * Adjustor for a call with less than 6 integer arguments.
+ * ------------------------------------------------------------------ */
+DECLARE_CSYM(simple_ccall_adjustor)
+ // Shuffle the argument registers down
+ mov %r8, %r9
+ mov %rcx, %r8
+ mov %rdx, %rcx
+ mov %rsi, %rdx
+ mov %rdi, %rsi
+ mov lcl_simple_ccall_adjustor_context(%rip), %rax // load the address of the context
+ mov HPTR_OFF(%rax), %rdi // load the StablePtr
+ jmp *WPTR_OFF(%rax) // jump to the entrypoint
+
+.align 8
+DECLARE_CSYM(simple_ccall_adjustor_context)
+lcl_simple_ccall_adjustor_context: // See Note [Adjustors: Local symbol references]
+ // this will be overwritten with a pointer to the AdjustorContext
+ .quad 0
+DECLARE_CSYM(simple_ccall_adjustor_end)
+
+/* ------------------------------------------------------------------
+ * Adjustor for a call with 6 or more integer arguments.
+ * ------------------------------------------------------------------ */
+DECLARE_CSYM(complex_ccall_adjustor)
+ push %r9
+ pushq complex_ccall_ret_code_ptr(%rip)
+ // Shuffle the argument registers down
+ mov %r8, %r9
+ mov %rcx, %r8
+ mov %rdx, %rcx
+ mov %rsi, %rdx
+ mov %rdi, %rsi
+ mov lcl_complex_ccall_adjustor_context(%rip), %rax // load the address of the context
+ mov HPTR_OFF(%rax), %rdi // load the StablePtr
+ jmpq *WPTR_OFF(%rax) // jump to the entrypoint
+
+.align 8
+complex_ccall_ret_code_ptr:
+ .quad complex_ccall_ret_code
+DECLARE_CSYM(complex_ccall_adjustor_context)
+lcl_complex_ccall_adjustor_context: // See Note [Adjustors: Local symbol references]
+ // this will be overwritten with a pointer to the AdjustorContext
+ .quad 0x0
+DECLARE_CSYM(complex_ccall_adjustor_end)
+
+
+#if defined(darwin_HOST_OS)
+/* See Note [Adjustor templates live in data section on Darwin]. */
+.section __TEXT,__text
+#endif
+
+/*
+ When generating an adjustor thunk that uses the C calling
+ convention, we have to make sure that the thunk kicks off
+ the process of jumping into Haskell with a tail jump. Why?
+ Because as a result of jumping in into Haskell we may end
+ up freeing the very adjustor thunk we came from using
+ freeHaskellFunctionPtr(). Hence, we better not return to
+ the adjustor code on our way out, since it could by then
+ point to junk.
+
+ The fix is readily at hand, just include the opcodes
+ for the C stack fixup code that we need to perform when
+ returning in some static piece of memory and arrange
+ to return to it before tail jumping from the adjustor thunk.
+*/
+complex_ccall_ret_code:
+ addq $0x8, %rsp
+ ret
+
+/* mark stack as nonexecutable */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",@progbits
+#endif
diff --git a/rts/ghc.mk b/rts/ghc.mk
index d9a1d60c21..e00de976f1 100644
--- a/rts/ghc.mk
+++ b/rts/ghc.mk
@@ -76,6 +76,7 @@ ifneq "$(findstring $(TargetArch_CPP), x86_64)" ""
ifneq "$(findstring $(TargetOS_CPP), mingw32)" ""
rts_C_SRCS += rts/adjustor/NativeAmd64Mingw.c
else
+rts_S_SRCS += rts/adjustor/NativeAmd64Asm.S
rts_C_SRCS += rts/adjustor/NativeAmd64.c
endif
else
diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in
index 6465a1f8e6..9b8c51a644 100644
--- a/rts/rts.cabal.in
+++ b/rts/rts.cabal.in
@@ -458,6 +458,7 @@ library
if opsys(mingw32)
c-sources: adjustor/NativeAmd64Mingw.c
else
+ asm-sources: adjustor/NativeAmd64Asm.S
c-sources: adjustor/NativeAmd64.c
if arch(ppc) || arch(ppc64)
asm-sources: AdjustorAsm.S