From 9b645ee1a9fff64b66b36dc73d8809ff82025829 Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Sun, 6 Feb 2022 21:36:52 -0500 Subject: adjustors/i386: Use AdjustorPool In !7511 (closed) I introduced a new allocator for adjustors, AdjustorPool, which eliminates the address space fragmentation issues which adjustors can introduce. In that work I focused on amd64 since that was the platform where I observed issues. However, in #21132 we noted that the size of adjustors is also a cause of CI fragility on i386. In this MR I port i386 to use AdjustorPool. Sadly the complexity of the i386 adjustor code does cause require a bit of generalization which makes the code a bit more opaque but such is the world. Closes #21132. --- rts/AdjustorAsm.S | 59 ++------------- rts/adjustor/Nativei386.c | 176 ++++++++++++++++++++++++------------------- rts/adjustor/Nativei386Asm.S | 57 ++++++++++++++ rts/ghc.mk | 2 +- rts/rts.cabal.in | 2 +- 5 files changed, 163 insertions(+), 133 deletions(-) create mode 100644 rts/adjustor/Nativei386Asm.S diff --git a/rts/AdjustorAsm.S b/rts/AdjustorAsm.S index 59ac2b49cb..cc2f99df62 100644 --- a/rts/AdjustorAsm.S +++ b/rts/AdjustorAsm.S @@ -21,7 +21,7 @@ #define WS 4 #define LOAD lwz #define STORE stw -#endif +#endif /* defined(powerpc64_HOST_ARCH) */ /* Some info about stack frame layout */ #define LINK_SLOT (2*WS) @@ -42,7 +42,8 @@ .csect .text[PR] #else .text -#endif +#endif /* defined(aix_HOST_OS) */ + #if LEADING_UNDERSCORE .globl _adjustorCode _adjustorCode: @@ -53,7 +54,7 @@ _adjustorCode: in createAdjustor(). */ adjustorCode: -#endif +#endif /* LEADING_UNDERSCORE */ /* On entry, r2 will point to the AdjustorStub data structure. */ /* save the link */ @@ -66,7 +67,7 @@ adjustorCode: stdux 1, 1, 12 #else stwux 1, 1, 12 -#endif +#endif /* defined(powerpc64_HOST_ARCH) */ /* Save some regs so that we can use them. Note that we use the "Red Zone" below the stack pointer. @@ -117,54 +118,8 @@ L2: LOAD 0, LINK_SLOT(1) mtlr 0 blr -#endif - -/* ********************************* i386 ********************************** */ - -#elif defined(i386_HOST_ARCH) - -#define WS 4 -#define RETVAL_OFF 5 -#define HEADER_BYTES 8 - -#define HPTR_OFF HEADER_BYTES -#define WPTR_OFF (HEADER_BYTES + 1*WS) -#define FRAMESIZE_OFF (HEADER_BYTES + 2*WS) -#define ARGWORDS_OFF (HEADER_BYTES + 3*WS) - -#if defined(LEADING_UNDERSCORE) - .globl _adjustorCode -_adjustorCode: -#else - .globl adjustorCode -adjustorCode: -#endif - popl %eax - subl $RETVAL_OFF, %eax - - pushl %ebp - movl %esp, %ebp - - subl FRAMESIZE_OFF(%eax), %esp - - pushl %esi - pushl %edi - - leal 8(%ebp), %esi - leal 12(%esp), %edi - movl ARGWORDS_OFF(%eax), %ecx - rep - movsl - - popl %edi - popl %esi - - pushl HPTR_OFF(%eax) - call *WPTR_OFF(%eax) - - leave - ret -#endif +#endif /* !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)) */ +#endif /* defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) */ /* mark stack as nonexecutable */ #if defined(__linux__) && defined(__ELF__) diff --git a/rts/adjustor/Nativei386.c b/rts/adjustor/Nativei386.c index 55ad1bf310..71143b00d3 100644 --- a/rts/adjustor/Nativei386.c +++ b/rts/adjustor/Nativei386.c @@ -8,97 +8,119 @@ #include "RtsUtils.h" #include "StablePtr.h" #include "Adjustor.h" +#include "AdjustorPool.h" #if defined(_WIN32) #include #endif -extern void adjustorCode(void); +// Defined in Nativei386Asm.S +extern void ccall_adjustor(void); -/* !!! !!! WARNING: !!! !!! - * This structure is accessed from AdjustorAsm.s - * Any changes here have to be mirrored in the offsets there. - */ +/*************************************** + * ccall adjustor + ***************************************/ -typedef struct AdjustorStub { - unsigned char call[8]; +// Matches constants in Nativei386Asm.S +struct CCallContext { StgStablePtr hptr; StgFunPtr wptr; StgInt frame_size; StgInt argument_size; -} AdjustorStub; +}; -void initAdjustors() { } +#define CCALL_CONTEXT_LEN sizeof(struct CCallContext) +#define CCALL_ADJUSTOR_LEN 10 -void* -createAdjustor(int cconv, StgStablePtr hptr, - StgFunPtr wptr, - char *typeString STG_UNUSED - ) +static void mk_ccall_adjustor(uint8_t *code, const void *context, void *user_data STG_UNUSED) { - switch (cconv) - { - case 0: /* _stdcall */ + /* + Most of the trickiness here is due to the need to keep the + stack pointer 16-byte aligned (see #5250). That means we + can't just push another argument on the stack and call the + wrapper, we may have to shuffle the whole argument block. + */ + + // MOVL context, %eax + code[0] = 0xb8; + *(const void **) &code[1] = context; + + // JMP ccall_adjustor + int32_t jmp_off = (uint8_t *) &ccall_adjustor - &code[10]; + code[5] = 0xe9; + *(int32_t *) &code[6] = jmp_off; +} + +/* adjustors to handle ccalls */ +static struct AdjustorPool *ccall_pool; + +/*************************************** + * stdcall adjustor + ***************************************/ + #if !defined(darwin_HOST_OS) +#define STDCALL_ADJUSTOR_LEN 0x0c + +static void mk_stdcall_adjustor(uint8_t *code, const void *context, void *user_data STG_UNUSED) +{ /* Magic constant computed by inspecting the code length of the following assembly language snippet (offset and machine code prefixed): - <0>: 58 popl %eax # temp. remove ret addr.. - <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to - # hold a StgStablePtr - <6>: 50 pushl %eax # put back ret. addr - <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr - : ff e0 jmp %eax # and jump to it. + <0>: 58 popl %eax # temp. remove return addr. + <1>: b9 fd fc fe fa movl 0xfafefcfd, %ecx # constant is addr. of AdjustorContext + <6>: ff 31 pushl (%ecx) # push hptr + <8>: 50 pushl %eax # put back return addr. + <9>: ff 61 04 jmp *4(%ecx) # and jump to wptr # the callee cleans up the stack */ + code[0x00] = 0x58; /* popl %eax */ - { - ExecPage *page = allocateExecPage(); - if (page == NULL) { - barf("createAdjustor: failed to allocate executable page\n"); - } - uint8_t *adj_code = (uint8_t *) page; - adj_code[0x00] = 0x58; /* popl %eax */ + code[0x01] = 0xb9; /* movl context (which is a dword immediate), %ecx */ + *((const void **) &(code[0x02])) = context; - adj_code[0x01] = 0x68; /* pushl hptr (which is a dword immediate ) */ - *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr; + code[0x06] = 0xff; /* pushl (%ecx) */ + code[0x07] = 0x31; - adj_code[0x06] = 0x50; /* pushl %eax */ + code[0x08] = 0x50; /* pushl %eax */ - adj_code[0x07] = 0xb8; /* movl $wptr, %eax */ - *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr; + code[0x09] = 0xff; /* jmp *4(%ecx) */ + code[0x0a] = 0x61; + code[0x0b] = 0x04; +} - adj_code[0x0c] = 0xff; /* jmp %eax */ - adj_code[0x0d] = 0xe0; +static struct AdjustorPool *stdcall_pool; +#endif - freezeExecPage(page); - return page; - } +void initAdjustors() { + ccall_pool = new_adjustor_pool(sizeof(struct CCallContext), CCALL_ADJUSTOR_LEN, mk_ccall_adjustor, NULL); +#if !defined(darwin_HOST_OS) + stdcall_pool = new_adjustor_pool(sizeof(struct AdjustorContext), STDCALL_ADJUSTOR_LEN, mk_stdcall_adjustor, NULL); +#endif +} + +void* +createAdjustor(int cconv, StgStablePtr hptr, StgFunPtr wptr, + char *typeString STG_UNUSED + ) +{ + + switch (cconv) + { + case 0: { /* _stdcall */ +#if defined(darwin_HOST_OS) + barf("stdcall is not supported on Darwin") +#else + struct AdjustorContext context = { + .hptr = hptr, + .wptr = wptr, + }; + return alloc_adjustor(stdcall_pool, &context); #endif /* !defined(darwin_HOST_OS) */ + } case 1: /* _ccall */ { - /* - Most of the trickiness here is due to the need to keep the - stack pointer 16-byte aligned (see #5250). That means we - can't just push another argument on the stack and call the - wrapper, we may have to shuffle the whole argument block. - - We offload most of the work to AdjustorAsm.S. - */ - ExecPage *page = allocateExecPage(); - if (page == NULL) { - barf("createAdjustor: failed to allocate executable page\n"); - } - AdjustorStub *adjustorStub = (AdjustorStub *) page; - int sz = totalArgumentSize(typeString); - - adjustorStub->call[0] = 0xe8; - *(long*)&adjustorStub->call[1] = ((char*)&adjustorCode) - ((char*)page + 5); - adjustorStub->hptr = hptr; - adjustorStub->wptr = wptr; - // The adjustor puts the following things on the stack: // 1.) %ebp link // 2.) padding and (a copy of) the arguments @@ -107,16 +129,21 @@ createAdjustor(int cconv, StgStablePtr hptr, // 5.) return address (for returning to the adjustor) // All these have to add up to a multiple of 16. + int sz = totalArgumentSize(typeString); // first, include everything in frame_size - adjustorStub->frame_size = sz * 4 + 16; + StgInt frame_size = sz * 4 + 16; // align to 16 bytes - adjustorStub->frame_size = (adjustorStub->frame_size + 15) & ~15; + frame_size = (frame_size + 15) & ~15; // only count 2.) and 3.) as part of frame_size - adjustorStub->frame_size -= 12; - adjustorStub->argument_size = sz; - - freezeExecPage(page); - return page; + frame_size -= 12; + + struct CCallContext context = { + .hptr = hptr, + .wptr = wptr, + .frame_size = frame_size, + .argument_size = sz, + }; + return alloc_adjustor(ccall_pool, &context); } default: @@ -127,16 +154,7 @@ createAdjustor(int cconv, StgStablePtr hptr, void freeHaskellFunctionPtr(void* ptr) { - if ( *(unsigned char*)ptr != 0xe8 && - *(unsigned char*)ptr != 0x58 ) { - errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); - return; - } - if (*(unsigned char*)ptr == 0xe8) { /* Aha, a ccall adjustor! */ - freeStablePtr(((AdjustorStub*)ptr)->hptr); - } else { - freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02))); - } - - freeExecPage((ExecPage *) ptr); + struct AdjustorContext context; + free_adjustor(ptr, &context); + freeStablePtr(context.hptr); } diff --git a/rts/adjustor/Nativei386Asm.S b/rts/adjustor/Nativei386Asm.S new file mode 100644 index 0000000000..4ed5a0359e --- /dev/null +++ b/rts/adjustor/Nativei386Asm.S @@ -0,0 +1,57 @@ +#include "include/ghcconfig.h" + +#define WS 4 + +#define RETVAL_OFF 5 + +// Mathces layout of struct CCallAdjustor in Nativei386.c +#define HPTR_OFF (0*WS) +#define WPTR_OFF (1*WS) +#define FRAME_SIZE_OFF (2*WS) +#define ARGUMENT_WORDS_OFF (3*WS) + +#if defined(LEADING_UNDERSCORE) +#define CSYM(x) _ ## x +#else +#define CSYM(x) x +#endif + +#define DECLARE_CSYM(x) \ + .globl CSYM(x) ; \ + CSYM(x): + +DECLARE_CSYM(ccall_adjustor) + // At entry %eax contains a pointer to the CCallContext + + // Record a frame pointer. Paired with the `leave` below. + pushl %ebp + movl %esp, %ebp + + subl FRAME_SIZE_OFF(%eax), %esp + + // Save %esi and %edi as we need to clobber them to perform the shuffle + pushl %esi + pushl %edi + + // Shuffle the stack down... + leal 8(%ebp), %esi + leal 12(%esp), %edi + movl ARGUMENT_WORDS_OFF(%eax), %ecx + rep + movsl + + // Restore %edi and %esi + popl %edi + popl %esi + + // Perform the call + pushl HPTR_OFF(%eax) + call *WPTR_OFF(%eax) + + leave + ret + +/* mark stack as nonexecutable */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",@progbits +#endif diff --git a/rts/ghc.mk b/rts/ghc.mk index 4c7a54977f..36a82f9f2c 100644 --- a/rts/ghc.mk +++ b/rts/ghc.mk @@ -69,7 +69,7 @@ ifeq "$(UseLibffiForAdjustors)" "YES" rts_C_SRCS += rts/adjustor/LibffiAdjustor.c else ifneq "$(findstring $(TargetArch_CPP), i386)" "" -rts_S_SRCS += rts/AdjustorAsm.S +rts_S_SRCS += rts/adjustor/Nativei386Asm.S rts_C_SRCS += rts/adjustor/Nativei386.c else ifneq "$(findstring $(TargetArch_CPP), x86_64)" "" diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in index 641fccc437..5e53b38be2 100644 --- a/rts/rts.cabal.in +++ b/rts/rts.cabal.in @@ -452,7 +452,7 @@ library else -- Use GHC's native adjustors if arch(i386) - asm-sources: AdjustorAsm.S + asm-sources: adjustor/Nativei386Asm.S c-sources: adjustor/Nativei386.c if arch(x86_64) if os(mingw32) -- cgit v1.2.1