summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTamar Christina <tamar@zhox.com>2020-08-31 10:35:56 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-10-09 08:41:50 -0400
commitfd984d68e5ec4b04bc79395c099434e653eb1060 (patch)
tree9b3e3725cc82fbd8686f3e63ba5a62860afdcd58
parenta566c83d4fc3a90b209b33131a2972ea53ec81b2 (diff)
downloadhaskell-fd984d68e5ec4b04bc79395c099434e653eb1060.tar.gz
rts: fix race condition in StgCRun
On windows the stack has to be allocated 4k at a time, otherwise we get a segfault. This is done by using a helper ___chkstk_ms that is provided by libgcc. The Haskell side already knows how to handle this but we need to do the same from STG. Previously we would drop the stack in StgRun but would only make it valid whenever the scheduler loop ran. This approach was fundamentally broken in that it falls apart when you take a signal from the OS. We see it less often because you initially get allocated a 1MB stack block which you have to blow past first. Concretely this means we must always keep the stack valid. Fixes #18601.
-rw-r--r--compiler/GHC/CmmToAsm/X86/Instr.hs5
-rw-r--r--rts/RtsStartup.c2
-rw-r--r--rts/RtsSymbols.c2
-rw-r--r--rts/Schedule.c17
-rw-r--r--rts/StgCRun.c49
-rw-r--r--rts/StgRun.h4
6 files changed, 34 insertions, 45 deletions
diff --git a/compiler/GHC/CmmToAsm/X86/Instr.hs b/compiler/GHC/CmmToAsm/X86/Instr.hs
index caf50ce2fc..da6ca0a662 100644
--- a/compiler/GHC/CmmToAsm/X86/Instr.hs
+++ b/compiler/GHC/CmmToAsm/X86/Instr.hs
@@ -832,13 +832,14 @@ mkJumpInstr id
-- In essence each allocation larger than a page size needs to be chunked and
-- a probe emitted after each page allocation. You have to hit the guard
-- page so the kernel can map in the next page, otherwise you'll segfault.
+-- See Note [Windows stack allocations].
--
needs_probe_call :: Platform -> Int -> Bool
needs_probe_call platform amount
= case platformOS platform of
OSMinGW32 -> case platformArch platform of
ArchX86 -> amount > (4 * 1024)
- ArchX86_64 -> amount > (8 * 1024)
+ ArchX86_64 -> amount > (4 * 1024)
_ -> False
_ -> False
@@ -862,7 +863,7 @@ mkStackAllocInstr platform amount
--
-- We emit a call because the stack probes are quite involved and
-- would bloat code size a lot. GHC doesn't really have an -Os.
- -- __chkstk is guaranteed to leave all nonvolatile registers and AX
+ -- ___chkstk is guaranteed to leave all nonvolatile registers and AX
-- untouched. It's part of the standard prologue code for any Windows
-- function dropping the stack more than a page.
-- See Note [Windows stack layout]
diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c
index b916010d34..a3dddb03f5 100644
--- a/rts/RtsStartup.c
+++ b/rts/RtsStartup.c
@@ -71,7 +71,7 @@ static bool rts_shutdown = false;
#if defined(mingw32_HOST_OS)
/* Indicates CodePage to set program to after exit. */
-static int64_t __codePage = 0;
+static int64_t __codePage = -1;
#endif
static void flushStdHandles(void);
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index 8d8bd1115f..3a2e37fd63 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -135,7 +135,7 @@
SymI_HasProto(rts_InstallConsoleEvent) \
SymI_HasProto(rts_ConsoleHandlerDone) \
SymI_HasProto(atexit) \
- RTS_WIN32_ONLY(SymI_NeedsProto(__chkstk_ms)) \
+ RTS_WIN32_ONLY(SymI_NeedsProto(___chkstk_ms)) \
RTS_WIN64_ONLY(SymI_NeedsProto(___chkstk_ms)) \
RTS_WIN32_ONLY(SymI_HasProto(_imp___environ)) \
RTS_WIN64_ONLY(SymI_HasProto(__imp__environ)) \
diff --git a/rts/Schedule.c b/rts/Schedule.c
index 6b10326859..41d0dba953 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -137,7 +137,6 @@ static Capability *schedule (Capability *initialCapability, Task *task);
// abstracted only to make the structure and control flow of the
// scheduler clearer.
//
-static void schedulePreLoop (void);
static void scheduleFindWork (Capability **pcap);
#if defined(THREADED_RTS)
static void scheduleYield (Capability **pcap, Task *task);
@@ -207,8 +206,6 @@ schedule (Capability *initialCapability, Task *task)
debugTrace (DEBUG_sched, "cap %d: schedule()", initialCapability->no);
- schedulePreLoop();
-
// -----------------------------------------------------------
// Scheduler loop starts here:
@@ -613,20 +610,6 @@ promoteInRunQueue (Capability *cap, StgTSO *tso)
pushOnRunQueue(cap, tso);
}
-/* ----------------------------------------------------------------------------
- * Setting up the scheduler loop
- * ------------------------------------------------------------------------- */
-
-static void
-schedulePreLoop(void)
-{
- // initialisation for scheduler - what cannot go into initScheduler()
-
-#if defined(mingw32_HOST_OS) && !defined(USE_MINIINTERPRETER)
- win32AllocStack();
-#endif
-}
-
/* -----------------------------------------------------------------------------
* scheduleFindWork()
*
diff --git a/rts/StgCRun.c b/rts/StgCRun.c
index 2d1cc16b28..31dc9dd42a 100644
--- a/rts/StgCRun.c
+++ b/rts/StgCRun.c
@@ -96,25 +96,6 @@ StgFunPtr StgReturn(void)
}
#else /* !USE_MINIINTERPRETER */
-
-#if defined(mingw32_HOST_OS)
-/*
- * Note [Windows Stack allocations]
- *
- * On windows the stack has to be allocated 4k at a time, otherwise
- * we get a segfault. The C compiler knows how to do this (it calls
- * _alloca()), so we make sure that we can allocate as much stack as
- * we need. However since we are doing a local stack allocation and the value
- * isn't valid outside the frame, compilers are free to optimize this allocation
- * and the corresponding stack check away. So to prevent that we request that
- * this function never be optimized (See #14669). */
-STG_NO_OPTIMIZE StgWord8 *win32AllocStack(void)
-{
- StgWord8 stack[RESERVED_C_STACK_BYTES + 16 + 12];
- return stack;
-}
-#endif
-
/* -----------------------------------------------------------------------------
x86 architecture
-------------------------------------------------------------------------- */
@@ -166,7 +147,21 @@ STG_NO_OPTIMIZE StgWord8 *win32AllocStack(void)
*
* If you edit the sequence below be sure to update the unwinding information
* for stg_stop_thread in StgStartup.cmm.
- */
+ *
+ * Note [Windows Stack allocations]
+ *
+ * On windows the stack has to be allocated 4k at a time, otherwise
+ * we get a segfault. This is done by using a helper ___chkstk_ms that is
+ * provided by libgcc. The Haskell side already knows how to handle this
+(see GHC.CmmToAsm.X86.Instr.needs_probe_call)
+ * but we need to do the same from STG. Previously we would drop the stack
+ * in StgRun but would only make it valid whenever the scheduler loop ran.
+ *
+ * This approach was fundamentally broken in that it falls apart when you
+ * take a signal from the OS (See #14669, #18601, #18548 and #18496).
+ * Concretely this means we must always keep the stack valid.
+ * */
+
static void GNUC3_ATTRIBUTE(used)
StgRunIsImplementedInAssembler(void)
@@ -186,7 +181,13 @@ StgRunIsImplementedInAssembler(void)
* bytes from here - this is a requirement of the C ABI, so
* that C code can assign SSE2 registers directly to/from
* stack locations.
+ *
+ * See Note [Windows Stack allocations]
*/
+#if defined(mingw32_HOST_OS)
+ "movl %0, %%eax\n\t"
+ "call ___chkstk_ms\n\t"
+#endif
"subl %0, %%esp\n\t"
/*
@@ -383,6 +384,14 @@ StgRunIsImplementedInAssembler(void)
STG_HIDDEN STG_RUN "\n"
#endif
STG_RUN ":\n\t"
+ /*
+ * See Note [Windows Stack allocations]
+ */
+#if defined(mingw32_HOST_OS)
+ "movq %1, %%rax\n\t"
+ "addq %0, %%rax\n\t"
+ "callq ___chkstk_ms\n\t"
+#endif
"subq %1, %%rsp\n\t"
"movq %%rsp, %%rax\n\t"
"subq %0, %%rsp\n\t"
diff --git a/rts/StgRun.h b/rts/StgRun.h
index 356c40dba2..f7c042c853 100644
--- a/rts/StgRun.h
+++ b/rts/StgRun.h
@@ -9,7 +9,3 @@
#pragma once
RTS_PRIVATE StgRegTable * StgRun (StgFunPtr f, StgRegTable *basereg);
-
-#if defined(mingw32_HOST_OS)
-StgWord8 *win32AllocStack(void);
-#endif