summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2022-02-06 21:36:52 -0500
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-04-06 13:02:40 -0400
commit9b645ee1a9fff64b66b36dc73d8809ff82025829 (patch)
tree4fa2f9927eade7982de16a0d1572a4f2ab8e767c
parent3f31825bf3126073ab527d5e6661bf0aff4e8c66 (diff)
downloadhaskell-9b645ee1a9fff64b66b36dc73d8809ff82025829.tar.gz
adjustors/i386: Use AdjustorPool
In !7511 (closed) I introduced a new allocator for adjustors, AdjustorPool, which eliminates the address space fragmentation issues which adjustors can introduce. In that work I focused on amd64 since that was the platform where I observed issues. However, in #21132 we noted that the size of adjustors is also a cause of CI fragility on i386. In this MR I port i386 to use AdjustorPool. Sadly the complexity of the i386 adjustor code does cause require a bit of generalization which makes the code a bit more opaque but such is the world. Closes #21132.
-rw-r--r--rts/AdjustorAsm.S59
-rw-r--r--rts/adjustor/Nativei386.c176
-rw-r--r--rts/adjustor/Nativei386Asm.S57
-rw-r--r--rts/ghc.mk2
-rw-r--r--rts/rts.cabal.in2
5 files changed, 163 insertions, 133 deletions
diff --git a/rts/AdjustorAsm.S b/rts/AdjustorAsm.S
index 59ac2b49cb..cc2f99df62 100644
--- a/rts/AdjustorAsm.S
+++ b/rts/AdjustorAsm.S
@@ -21,7 +21,7 @@
#define WS 4
#define LOAD lwz
#define STORE stw
-#endif
+#endif /* defined(powerpc64_HOST_ARCH) */
/* Some info about stack frame layout */
#define LINK_SLOT (2*WS)
@@ -42,7 +42,8 @@
.csect .text[PR]
#else
.text
-#endif
+#endif /* defined(aix_HOST_OS) */
+
#if LEADING_UNDERSCORE
.globl _adjustorCode
_adjustorCode:
@@ -53,7 +54,7 @@ _adjustorCode:
in createAdjustor().
*/
adjustorCode:
-#endif
+#endif /* LEADING_UNDERSCORE */
/* On entry, r2 will point to the AdjustorStub data structure. */
/* save the link */
@@ -66,7 +67,7 @@ adjustorCode:
stdux 1, 1, 12
#else
stwux 1, 1, 12
-#endif
+#endif /* defined(powerpc64_HOST_ARCH) */
/* Save some regs so that we can use them.
Note that we use the "Red Zone" below the stack pointer.
@@ -117,54 +118,8 @@ L2:
LOAD 0, LINK_SLOT(1)
mtlr 0
blr
-#endif
-
-/* ********************************* i386 ********************************** */
-
-#elif defined(i386_HOST_ARCH)
-
-#define WS 4
-#define RETVAL_OFF 5
-#define HEADER_BYTES 8
-
-#define HPTR_OFF HEADER_BYTES
-#define WPTR_OFF (HEADER_BYTES + 1*WS)
-#define FRAMESIZE_OFF (HEADER_BYTES + 2*WS)
-#define ARGWORDS_OFF (HEADER_BYTES + 3*WS)
-
-#if defined(LEADING_UNDERSCORE)
- .globl _adjustorCode
-_adjustorCode:
-#else
- .globl adjustorCode
-adjustorCode:
-#endif
- popl %eax
- subl $RETVAL_OFF, %eax
-
- pushl %ebp
- movl %esp, %ebp
-
- subl FRAMESIZE_OFF(%eax), %esp
-
- pushl %esi
- pushl %edi
-
- leal 8(%ebp), %esi
- leal 12(%esp), %edi
- movl ARGWORDS_OFF(%eax), %ecx
- rep
- movsl
-
- popl %edi
- popl %esi
-
- pushl HPTR_OFF(%eax)
- call *WPTR_OFF(%eax)
-
- leave
- ret
-#endif
+#endif /* !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)) */
+#endif /* defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) */
/* mark stack as nonexecutable */
#if defined(__linux__) && defined(__ELF__)
diff --git a/rts/adjustor/Nativei386.c b/rts/adjustor/Nativei386.c
index 55ad1bf310..71143b00d3 100644
--- a/rts/adjustor/Nativei386.c
+++ b/rts/adjustor/Nativei386.c
@@ -8,97 +8,119 @@
#include "RtsUtils.h"
#include "StablePtr.h"
#include "Adjustor.h"
+#include "AdjustorPool.h"
#if defined(_WIN32)
#include <windows.h>
#endif
-extern void adjustorCode(void);
+// Defined in Nativei386Asm.S
+extern void ccall_adjustor(void);
-/* !!! !!! WARNING: !!! !!!
- * This structure is accessed from AdjustorAsm.s
- * Any changes here have to be mirrored in the offsets there.
- */
+/***************************************
+ * ccall adjustor
+ ***************************************/
-typedef struct AdjustorStub {
- unsigned char call[8];
+// Matches constants in Nativei386Asm.S
+struct CCallContext {
StgStablePtr hptr;
StgFunPtr wptr;
StgInt frame_size;
StgInt argument_size;
-} AdjustorStub;
+};
-void initAdjustors() { }
+#define CCALL_CONTEXT_LEN sizeof(struct CCallContext)
+#define CCALL_ADJUSTOR_LEN 10
-void*
-createAdjustor(int cconv, StgStablePtr hptr,
- StgFunPtr wptr,
- char *typeString STG_UNUSED
- )
+static void mk_ccall_adjustor(uint8_t *code, const void *context, void *user_data STG_UNUSED)
{
- switch (cconv)
- {
- case 0: /* _stdcall */
+ /*
+ Most of the trickiness here is due to the need to keep the
+ stack pointer 16-byte aligned (see #5250). That means we
+ can't just push another argument on the stack and call the
+ wrapper, we may have to shuffle the whole argument block.
+ */
+
+ // MOVL context, %eax
+ code[0] = 0xb8;
+ *(const void **) &code[1] = context;
+
+ // JMP ccall_adjustor
+ int32_t jmp_off = (uint8_t *) &ccall_adjustor - &code[10];
+ code[5] = 0xe9;
+ *(int32_t *) &code[6] = jmp_off;
+}
+
+/* adjustors to handle ccalls */
+static struct AdjustorPool *ccall_pool;
+
+/***************************************
+ * stdcall adjustor
+ ***************************************/
+
#if !defined(darwin_HOST_OS)
+#define STDCALL_ADJUSTOR_LEN 0x0c
+
+static void mk_stdcall_adjustor(uint8_t *code, const void *context, void *user_data STG_UNUSED)
+{
/* Magic constant computed by inspecting the code length of
the following assembly language snippet
(offset and machine code prefixed):
- <0>: 58 popl %eax # temp. remove ret addr..
- <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to
- # hold a StgStablePtr
- <6>: 50 pushl %eax # put back ret. addr
- <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr
- <c>: ff e0 jmp %eax # and jump to it.
+ <0>: 58 popl %eax # temp. remove return addr.
+ <1>: b9 fd fc fe fa movl 0xfafefcfd, %ecx # constant is addr. of AdjustorContext
+ <6>: ff 31 pushl (%ecx) # push hptr
+ <8>: 50 pushl %eax # put back return addr.
+ <9>: ff 61 04 jmp *4(%ecx) # and jump to wptr
# the callee cleans up the stack
*/
+ code[0x00] = 0x58; /* popl %eax */
- {
- ExecPage *page = allocateExecPage();
- if (page == NULL) {
- barf("createAdjustor: failed to allocate executable page\n");
- }
- uint8_t *adj_code = (uint8_t *) page;
- adj_code[0x00] = 0x58; /* popl %eax */
+ code[0x01] = 0xb9; /* movl context (which is a dword immediate), %ecx */
+ *((const void **) &(code[0x02])) = context;
- adj_code[0x01] = 0x68; /* pushl hptr (which is a dword immediate ) */
- *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
+ code[0x06] = 0xff; /* pushl (%ecx) */
+ code[0x07] = 0x31;
- adj_code[0x06] = 0x50; /* pushl %eax */
+ code[0x08] = 0x50; /* pushl %eax */
- adj_code[0x07] = 0xb8; /* movl $wptr, %eax */
- *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
+ code[0x09] = 0xff; /* jmp *4(%ecx) */
+ code[0x0a] = 0x61;
+ code[0x0b] = 0x04;
+}
- adj_code[0x0c] = 0xff; /* jmp %eax */
- adj_code[0x0d] = 0xe0;
+static struct AdjustorPool *stdcall_pool;
+#endif
- freezeExecPage(page);
- return page;
- }
+void initAdjustors() {
+ ccall_pool = new_adjustor_pool(sizeof(struct CCallContext), CCALL_ADJUSTOR_LEN, mk_ccall_adjustor, NULL);
+#if !defined(darwin_HOST_OS)
+ stdcall_pool = new_adjustor_pool(sizeof(struct AdjustorContext), STDCALL_ADJUSTOR_LEN, mk_stdcall_adjustor, NULL);
+#endif
+}
+
+void*
+createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr,
+ char *typeString STG_UNUSED
+ )
+{
+
+ switch (cconv)
+ {
+ case 0: { /* _stdcall */
+#if defined(darwin_HOST_OS)
+ barf("stdcall is not supported on Darwin")
+#else
+ struct AdjustorContext context = {
+ .hptr = hptr,
+ .wptr = wptr,
+ };
+ return alloc_adjustor(stdcall_pool, &context);
#endif /* !defined(darwin_HOST_OS) */
+ }
case 1: /* _ccall */
{
- /*
- Most of the trickiness here is due to the need to keep the
- stack pointer 16-byte aligned (see #5250). That means we
- can't just push another argument on the stack and call the
- wrapper, we may have to shuffle the whole argument block.
-
- We offload most of the work to AdjustorAsm.S.
- */
- ExecPage *page = allocateExecPage();
- if (page == NULL) {
- barf("createAdjustor: failed to allocate executable page\n");
- }
- AdjustorStub *adjustorStub = (AdjustorStub *) page;
- int sz = totalArgumentSize(typeString);
-
- adjustorStub->call[0] = 0xe8;
- *(long*)&adjustorStub->call[1] = ((char*)&adjustorCode) - ((char*)page + 5);
- adjustorStub->hptr = hptr;
- adjustorStub->wptr = wptr;
-
// The adjustor puts the following things on the stack:
// 1.) %ebp link
// 2.) padding and (a copy of) the arguments
@@ -107,16 +129,21 @@ createAdjustor(int cconv, StgStablePtr hptr,
// 5.) return address (for returning to the adjustor)
// All these have to add up to a multiple of 16.
+ int sz = totalArgumentSize(typeString);
// first, include everything in frame_size
- adjustorStub->frame_size = sz * 4 + 16;
+ StgInt frame_size = sz * 4 + 16;
// align to 16 bytes
- adjustorStub->frame_size = (adjustorStub->frame_size + 15) & ~15;
+ frame_size = (frame_size + 15) & ~15;
// only count 2.) and 3.) as part of frame_size
- adjustorStub->frame_size -= 12;
- adjustorStub->argument_size = sz;
-
- freezeExecPage(page);
- return page;
+ frame_size -= 12;
+
+ struct CCallContext context = {
+ .hptr = hptr,
+ .wptr = wptr,
+ .frame_size = frame_size,
+ .argument_size = sz,
+ };
+ return alloc_adjustor(ccall_pool, &context);
}
default:
@@ -127,16 +154,7 @@ createAdjustor(int cconv, StgStablePtr hptr,
void
freeHaskellFunctionPtr(void* ptr)
{
- if ( *(unsigned char*)ptr != 0xe8 &&
- *(unsigned char*)ptr != 0x58 ) {
- errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
- return;
- }
- if (*(unsigned char*)ptr == 0xe8) { /* Aha, a ccall adjustor! */
- freeStablePtr(((AdjustorStub*)ptr)->hptr);
- } else {
- freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
- }
-
- freeExecPage((ExecPage *) ptr);
+ struct AdjustorContext context;
+ free_adjustor(ptr, &context);
+ freeStablePtr(context.hptr);
}
diff --git a/rts/adjustor/Nativei386Asm.S b/rts/adjustor/Nativei386Asm.S
new file mode 100644
index 0000000000..4ed5a0359e
--- /dev/null
+++ b/rts/adjustor/Nativei386Asm.S
@@ -0,0 +1,57 @@
+#include "include/ghcconfig.h"
+
+#define WS 4
+
+#define RETVAL_OFF 5
+
+// Mathces layout of struct CCallAdjustor in Nativei386.c
+#define HPTR_OFF (0*WS)
+#define WPTR_OFF (1*WS)
+#define FRAME_SIZE_OFF (2*WS)
+#define ARGUMENT_WORDS_OFF (3*WS)
+
+#if defined(LEADING_UNDERSCORE)
+#define CSYM(x) _ ## x
+#else
+#define CSYM(x) x
+#endif
+
+#define DECLARE_CSYM(x) \
+ .globl CSYM(x) ; \
+ CSYM(x):
+
+DECLARE_CSYM(ccall_adjustor)
+ // At entry %eax contains a pointer to the CCallContext
+
+ // Record a frame pointer. Paired with the `leave` below.
+ pushl %ebp
+ movl %esp, %ebp
+
+ subl FRAME_SIZE_OFF(%eax), %esp
+
+ // Save %esi and %edi as we need to clobber them to perform the shuffle
+ pushl %esi
+ pushl %edi
+
+ // Shuffle the stack down...
+ leal 8(%ebp), %esi
+ leal 12(%esp), %edi
+ movl ARGUMENT_WORDS_OFF(%eax), %ecx
+ rep
+ movsl
+
+ // Restore %edi and %esi
+ popl %edi
+ popl %esi
+
+ // Perform the call
+ pushl HPTR_OFF(%eax)
+ call *WPTR_OFF(%eax)
+
+ leave
+ ret
+
+/* mark stack as nonexecutable */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",@progbits
+#endif
diff --git a/rts/ghc.mk b/rts/ghc.mk
index 4c7a54977f..36a82f9f2c 100644
--- a/rts/ghc.mk
+++ b/rts/ghc.mk
@@ -69,7 +69,7 @@ ifeq "$(UseLibffiForAdjustors)" "YES"
rts_C_SRCS += rts/adjustor/LibffiAdjustor.c
else
ifneq "$(findstring $(TargetArch_CPP), i386)" ""
-rts_S_SRCS += rts/AdjustorAsm.S
+rts_S_SRCS += rts/adjustor/Nativei386Asm.S
rts_C_SRCS += rts/adjustor/Nativei386.c
else
ifneq "$(findstring $(TargetArch_CPP), x86_64)" ""
diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in
index 641fccc437..5e53b38be2 100644
--- a/rts/rts.cabal.in
+++ b/rts/rts.cabal.in
@@ -452,7 +452,7 @@ library
else
-- Use GHC's native adjustors
if arch(i386)
- asm-sources: AdjustorAsm.S
+ asm-sources: adjustor/Nativei386Asm.S
c-sources: adjustor/Nativei386.c
if arch(x86_64)
if os(mingw32)