summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTamar Christina <tamar@zhox.com>2020-08-31 10:35:56 +0100
committerBen Gamari <ben@smart-cactus.org>2020-10-19 23:16:22 -0400
commit658362c68a64f9b999367875ae7d75db07eb2620 (patch)
tree649c2b8a475b0776cbd93f30f0f272316cd840e7
parent5a4d0c3dcd2d83744293ea82a7e5606ad2038109 (diff)
downloadhaskell-wip/backport-MR3949.tar.gz
rts: fix race condition in StgCRunwip/backport-MR3949
On windows the stack has to be allocated 4k at a time, otherwise we get a segfault. This is done by using a helper ___chkstk_ms that is provided by libgcc. The Haskell side already knows how to handle this but we need to do the same from STG. Previously we would drop the stack in StgRun but would only make it valid whenever the scheduler loop ran. This approach was fundamentally broken in that it falls apart when you take a signal from the OS. We see it less often because you initially get allocated a 1MB stack block which you have to blow past first. Concretely this means we must always keep the stack valid. Fixes #18601. (cherry picked from commit fd984d68e5ec4b04bc79395c099434e653eb1060)
-rw-r--r--compiler/nativeGen/X86/Instr.hs5
-rw-r--r--rts/RtsSymbols.c2
-rw-r--r--rts/Schedule.c17
-rw-r--r--rts/StgCRun.c49
-rw-r--r--rts/StgRun.h4
5 files changed, 33 insertions, 44 deletions
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 7e47860143..d4efd83be7 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -819,13 +819,14 @@ x86_mkJumpInstr id
-- In essense each allocation larger than a page size needs to be chunked and
-- a probe emitted after each page allocation. You have to hit the guard
-- page so the kernel can map in the next page, otherwise you'll segfault.
+-- See Note [Windows stack allocations].
--
needs_probe_call :: Platform -> Int -> Bool
needs_probe_call platform amount
= case platformOS platform of
OSMinGW32 -> case platformArch platform of
ArchX86 -> amount > (4 * 1024)
- ArchX86_64 -> amount > (8 * 1024)
+ ArchX86_64 -> amount > (4 * 1024)
_ -> False
_ -> False
@@ -849,7 +850,7 @@ x86_mkStackAllocInstr platform amount
--
-- We emit a call because the stack probes are quite involved and
-- would bloat code size a lot. GHC doesn't really have an -Os.
- -- __chkstk is guaranteed to leave all nonvolatile registers and AX
+ -- ___chkstk is guaranteed to leave all nonvolatile registers and AX
-- untouched. It's part of the standard prologue code for any Windows
-- function dropping the stack more than a page.
-- See Note [Windows stack layout]
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index 442b92a31a..d1d041c0f1 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -134,7 +134,7 @@
SymI_HasProto(rts_InstallConsoleEvent) \
SymI_HasProto(rts_ConsoleHandlerDone) \
SymI_HasProto(atexit) \
- RTS_WIN32_ONLY(SymI_NeedsProto(__chkstk_ms)) \
+ RTS_WIN32_ONLY(SymI_NeedsProto(___chkstk_ms)) \
RTS_WIN64_ONLY(SymI_NeedsProto(___chkstk_ms)) \
RTS_WIN32_ONLY(SymI_HasProto(_imp___environ)) \
RTS_WIN64_ONLY(SymI_HasProto(__imp__environ)) \
diff --git a/rts/Schedule.c b/rts/Schedule.c
index 9323915dfe..22787c4cfb 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -136,7 +136,6 @@ static Capability *schedule (Capability *initialCapability, Task *task);
// abstracted only to make the structure and control flow of the
// scheduler clearer.
//
-static void schedulePreLoop (void);
static void scheduleFindWork (Capability **pcap);
#if defined(THREADED_RTS)
static void scheduleYield (Capability **pcap, Task *task);
@@ -205,8 +204,6 @@ schedule (Capability *initialCapability, Task *task)
debugTrace (DEBUG_sched, "cap %d: schedule()", initialCapability->no);
- schedulePreLoop();
-
// -----------------------------------------------------------
// Scheduler loop starts here:
@@ -599,20 +596,6 @@ promoteInRunQueue (Capability *cap, StgTSO *tso)
pushOnRunQueue(cap, tso);
}
-/* ----------------------------------------------------------------------------
- * Setting up the scheduler loop
- * ------------------------------------------------------------------------- */
-
-static void
-schedulePreLoop(void)
-{
- // initialisation for scheduler - what cannot go into initScheduler()
-
-#if defined(mingw32_HOST_OS) && !defined(USE_MINIINTERPRETER)
- win32AllocStack();
-#endif
-}
-
/* -----------------------------------------------------------------------------
* scheduleFindWork()
*
diff --git a/rts/StgCRun.c b/rts/StgCRun.c
index 934926e0f3..ddb19b468b 100644
--- a/rts/StgCRun.c
+++ b/rts/StgCRun.c
@@ -89,25 +89,6 @@ StgFunPtr StgReturn(void)
}
#else /* !USE_MINIINTERPRETER */
-
-#if defined(mingw32_HOST_OS)
-/*
- * Note [Windows Stack allocations]
- *
- * On windows the stack has to be allocated 4k at a time, otherwise
- * we get a segfault. The C compiler knows how to do this (it calls
- * _alloca()), so we make sure that we can allocate as much stack as
- * we need. However since we are doing a local stack allocation and the value
- * isn't valid outside the frame, compilers are free to optimize this allocation
- * and the corresponding stack check away. So to prevent that we request that
- * this function never be optimized (See #14669). */
-STG_NO_OPTIMIZE StgWord8 *win32AllocStack(void)
-{
- StgWord8 stack[RESERVED_C_STACK_BYTES + 16 + 12];
- return stack;
-}
-#endif
-
/* -----------------------------------------------------------------------------
x86 architecture
-------------------------------------------------------------------------- */
@@ -159,7 +140,21 @@ STG_NO_OPTIMIZE StgWord8 *win32AllocStack(void)
*
* If you edit the sequence below be sure to update the unwinding information
* for stg_stop_thread in StgStartup.cmm.
- */
+ *
+ * Note [Windows Stack allocations]
+ *
+ * On windows the stack has to be allocated 4k at a time, otherwise
+ * we get a segfault. This is done by using a helper ___chkstk_ms that is
+ * provided by libgcc. The Haskell side already knows how to handle this
+(see GHC.CmmToAsm.X86.Instr.needs_probe_call)
+ * but we need to do the same from STG. Previously we would drop the stack
+ * in StgRun but would only make it valid whenever the scheduler loop ran.
+ *
+ * This approach was fundamentally broken in that it falls apart when you
+ * take a signal from the OS (See #14669, #18601, #18548 and #18496).
+ * Concretely this means we must always keep the stack valid.
+ * */
+
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
@@ -179,7 +174,13 @@ StgRunIsImplementedInAssembler(void)
* bytes from here - this is a requirement of the C ABI, so
* that C code can assign SSE2 registers directly to/from
* stack locations.
+ *
+ * See Note [Windows Stack allocations]
*/
+#if defined(mingw32_HOST_OS)
+ "movl %0, %%eax\n\t"
+ "call ___chkstk_ms\n\t"
+#endif
"subl %0, %%esp\n\t"
/*
@@ -376,6 +377,14 @@ StgRunIsImplementedInAssembler(void)
STG_HIDDEN STG_RUN "\n"
#endif
STG_RUN ":\n\t"
+ /*
+ * See Note [Windows Stack allocations]
+ */
+#if defined(mingw32_HOST_OS)
+ "movq %1, %%rax\n\t"
+ "addq %0, %%rax\n\t"
+ "callq ___chkstk_ms\n\t"
+#endif
"subq %1, %%rsp\n\t"
"movq %%rsp, %%rax\n\t"
"subq %0, %%rsp\n\t"
diff --git a/rts/StgRun.h b/rts/StgRun.h
index 356c40dba2..f7c042c853 100644
--- a/rts/StgRun.h
+++ b/rts/StgRun.h
@@ -9,7 +9,3 @@
#pragma once
RTS_PRIVATE StgRegTable * StgRun (StgFunPtr f, StgRegTable *basereg);
-
-#if defined(mingw32_HOST_OS)
-StgWord8 *win32AllocStack(void);
-#endif