diff options
49 files changed, 1043 insertions, 961 deletions
diff --git a/compiler/cmm/CmmCPSGen.hs b/compiler/cmm/CmmCPSGen.hs index 924ce9d4ab..45d0aebe3c 100644 --- a/compiler/cmm/CmmCPSGen.hs +++ b/compiler/cmm/CmmCPSGen.hs @@ -331,8 +331,8 @@ nursery_bdescr_start = cmmOffset stgCurrentNursery oFFSET_bdescr_start nursery_bdescr_blocks = cmmOffset stgCurrentNursery oFFSET_bdescr_blocks tso_SP, tso_STACK, tso_CCCS :: ByteOff -tso_SP = tsoFieldB oFFSET_StgTSO_sp -tso_STACK = tsoFieldB oFFSET_StgTSO_stack +tso_SP = tsoFieldB undefined --oFFSET_StgTSO_sp +tso_STACK = tsoFieldB undefined --oFFSET_StgTSO_stack tso_CCCS = tsoProfFieldB oFFSET_StgTSO_CCCS -- The TSO struct has a variable header, and an optional StgTSOProfInfo in diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs index 8e8e34d77b..cdaccc98a8 100644 --- a/compiler/codeGen/CgForeignCall.hs +++ b/compiler/codeGen/CgForeignCall.hs @@ -202,8 +202,9 @@ maybe_assign_temp e emitSaveThreadState :: Code emitSaveThreadState = do - -- CurrentTSO->sp = Sp; - stmtC $ CmmStore (cmmOffset stgCurrentTSO tso_SP) stgSp + -- CurrentTSO->stackobj->sp = Sp; + stmtC $ CmmStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord) + stack_SP) stgSp emitCloseNursery -- and save the current cost centre stack in the TSO when profiling: when opt_SccProfilingOn $ @@ -216,14 +217,17 @@ emitCloseNursery = stmtC $ CmmStore nursery_bdescr_free (cmmOffsetW stgHp 1) emitLoadThreadState :: Code emitLoadThreadState = do tso <- newTemp bWord -- TODO FIXME NOW + stack <- newTemp bWord -- TODO FIXME NOW stmtsC [ - -- tso = CurrentTSO; - CmmAssign (CmmLocal tso) stgCurrentTSO, - -- Sp = tso->sp; - CmmAssign sp (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_SP) - bWord), - -- SpLim = tso->stack + RESERVED_STACK_WORDS; - CmmAssign spLim (cmmOffsetW (cmmOffset (CmmReg (CmmLocal tso)) tso_STACK) + -- tso = CurrentTSO + CmmAssign (CmmLocal tso) stgCurrentTSO, + -- stack = tso->stackobj + CmmAssign (CmmLocal stack) (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_stackobj) bWord), + -- Sp = stack->sp; + CmmAssign sp (CmmLoad (cmmOffset (CmmReg (CmmLocal stack)) stack_SP) + bWord), + -- SpLim = stack->stack + RESERVED_STACK_WORDS; + CmmAssign spLim (cmmOffsetW (cmmOffset (CmmReg (CmmLocal stack)) stack_STACK) rESERVED_STACK_WORDS), -- HpAlloc = 0; -- HpAlloc is assumed to be set to non-zero only by a failed @@ -234,7 +238,7 @@ emitLoadThreadState = do -- and load the current cost centre stack from the TSO when profiling: when opt_SccProfilingOn $ stmtC (CmmStore curCCSAddr - (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord)) + (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord)) emitOpenNursery :: Code emitOpenNursery = stmtsC [ @@ -262,20 +266,14 @@ nursery_bdescr_free = cmmOffset stgCurrentNursery oFFSET_bdescr_free nursery_bdescr_start = cmmOffset stgCurrentNursery oFFSET_bdescr_start nursery_bdescr_blocks = cmmOffset stgCurrentNursery oFFSET_bdescr_blocks -tso_SP, tso_STACK, tso_CCCS :: ByteOff -tso_SP = tsoFieldB oFFSET_StgTSO_sp -tso_STACK = tsoFieldB oFFSET_StgTSO_stack -tso_CCCS = tsoProfFieldB oFFSET_StgTSO_CCCS +tso_stackobj, tso_CCCS, stack_STACK, stack_SP :: ByteOff +tso_stackobj = closureField oFFSET_StgTSO_stackobj +tso_CCCS = closureField oFFSET_StgTSO_CCCS +stack_STACK = closureField oFFSET_StgStack_stack +stack_SP = closureField oFFSET_StgStack_sp --- The TSO struct has a variable header, and an optional StgTSOProfInfo in --- the middle. The fields we're interested in are after the StgTSOProfInfo. -tsoFieldB :: ByteOff -> ByteOff -tsoFieldB off - | opt_SccProfilingOn = off + sIZEOF_StgTSOProfInfo + fixedHdrSize * wORD_SIZE - | otherwise = off + fixedHdrSize * wORD_SIZE - -tsoProfFieldB :: ByteOff -> ByteOff -tsoProfFieldB off = off + fixedHdrSize * wORD_SIZE +closureField :: ByteOff -> ByteOff +closureField off = off + fixedHdrSize * wORD_SIZE stgSp, stgHp, stgCurrentTSO, stgCurrentNursery :: CmmExpr stgSp = CmmReg sp diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs index 83c430143e..7ddf597f40 100644 --- a/compiler/codeGen/StgCmmForeign.hs +++ b/compiler/codeGen/StgCmmForeign.hs @@ -243,10 +243,12 @@ nursery_bdescr_start = cmmOffset stgCurrentNursery oFFSET_bdescr_start nursery_bdescr_blocks = cmmOffset stgCurrentNursery oFFSET_bdescr_blocks tso_SP, tso_STACK, tso_CCCS :: ByteOff -tso_SP = tsoFieldB oFFSET_StgTSO_sp -tso_STACK = tsoFieldB oFFSET_StgTSO_stack tso_CCCS = tsoProfFieldB oFFSET_StgTSO_CCCS + --ToDo: needs merging with changes to CgForeign +tso_STACK = tsoFieldB undefined +tso_SP = tsoFieldB undefined + -- The TSO struct has a variable header, and an optional StgTSOProfInfo in -- the middle. The fields we're interested in are after the StgTSOProfInfo. tsoFieldB :: ByteOff -> ByteOff diff --git a/docs/users_guide/runtime_control.xml b/docs/users_guide/runtime_control.xml index 40409131cc..0e13f0e933 100644 --- a/docs/users_guide/runtime_control.xml +++ b/docs/users_guide/runtime_control.xml @@ -424,22 +424,88 @@ <varlistentry> <term> - <option>-k</option><replaceable>size</replaceable> + <option>-ki</option><replaceable>size</replaceable> <indexterm><primary><option>-k</option></primary><secondary>RTS option</secondary></indexterm> - <indexterm><primary>stack, minimum size</primary></indexterm> + <indexterm><primary>stack, initial size</primary></indexterm> </term> <listitem> - <para>[Default: 1k] Set the initial stack size for - new threads. Thread stacks (including the main thread's - stack) live on the heap, and grow as required. The default - value is good for concurrent applications with lots of small - threads; if your program doesn't fit this model then - increasing this option may help performance.</para> - - <para>The main thread is normally started with a slightly - larger heap to cut down on unnecessary stack growth while - the program is starting up.</para> - </listitem> + <para> + [Default: 1k] Set the initial stack size for new + threads. (Note: this flag used to be + simply <option>-k</option>, but was renamed + to <option>-ki</option> in GHC 7.2.1. The old name is + still accepted for backwards compatibility, but that may + be removed in a future version). + </para> + + <para> + Thread stacks (including the main thread's stack) live on + the heap. As the stack grows, new stack chunks are added + as required; if the stack shrinks again, these extra stack + chunks are reclaimed by the garbage collector. The + default initial stack size is deliberately small, in order + to keep the time and space overhead for thread creation to + a minimum, and to make it practical to spawn threads for + even tiny pieces of work. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term> + <option>-kc</option><replaceable>size</replaceable> + <indexterm><primary><option>-kc</option></primary><secondary>RTS + option</secondary></indexterm> + <indexterm><primary>stack</primary><secondary>chunk size</secondary></indexterm> + </term> + <listitem> + <para> + [Default: 32k] Set the size of “stack + chunks”. When a thread's current stack overflows, a + new stack chunk is created and added to the thread's + stack, until the limit set by <option>-K</option> is + reached. + </para> + + <para> + The advantage of smaller stack chunks is that the garbage + collector can avoid traversing stack chunks if they are + known to be unmodified since the last collection, so + reducing the chunk size means that the garbage collector + can identify more stack as unmodified, and the GC overhead + might be reduced. On the other hand, making stack chunks + too small adds some overhead as there will be more + overflow/underflow between chunks. The default setting of + 32k appears to be a reasonable compromise in most cases. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term> + <option>-kb</option><replaceable>size</replaceable> + <indexterm><primary><option>-kc</option></primary><secondary>RTS + option</secondary></indexterm> + <indexterm><primary>stack</primary><secondary>chunk buffer size</secondary></indexterm> + </term> + <listitem> + <para> + [Default: 1k] Sets the stack chunk buffer size. + When a stack chunk overflows and a new stack chunk is + created, some of the data from the previous stack chunk is + moved into the new chunk, to avoid an immediate underflow + and repeated overflow/underflow at the boundary. The + amount of stack moved is set by the <option>-kb</option> + option. + </para> + <para> + Note that to avoid wasting space, this value should + typically be less than 10% of the size of a stack + chunk (<option>-kc</option>), because in a chain of stack + chunks, each chunk will have a gap of unused space of this + size. + </para> + </listitem> </varlistentry> <varlistentry> @@ -451,9 +517,14 @@ <listitem> <para>[Default: 8M] Set the maximum stack size for an individual thread to <replaceable>size</replaceable> - bytes. This option is there purely to stop the program - eating up all the available memory in the machine if it gets - into an infinite loop.</para> + bytes. If the thread attempts to exceed this limit, it will + be send the <literal>StackOverflow</literal> exception. + </para> + <para> + This option is there mainly to stop the program eating up + all the available memory in the machine if it gets into an + infinite loop. + </para> </listitem> </varlistentry> diff --git a/includes/Cmm.h b/includes/Cmm.h index 0088c1aa05..6abe760be5 100644 --- a/includes/Cmm.h +++ b/includes/Cmm.h @@ -467,6 +467,12 @@ #define mutArrPtrsCardWords(n) \ ROUNDUP_BYTES_TO_WDS(((n) + (1 << MUT_ARR_PTRS_CARD_BITS) - 1) >> MUT_ARR_PTRS_CARD_BITS) +#if defined(PROFILING) || (!defined(THREADED_RTS) && defined(DEBUG)) +#define OVERWRITING_CLOSURE(c) foreign "C" overwritingClosure(c "ptr") +#else +#define OVERWRITING_CLOSURE(c) /* nothing */ +#endif + /* ----------------------------------------------------------------------------- Voluntary Yields/Blocks diff --git a/includes/mkDerivedConstants.c b/includes/mkDerivedConstants.c index ade104a4be..0ed7ec67da 100644 --- a/includes/mkDerivedConstants.c +++ b/includes/mkDerivedConstants.c @@ -296,9 +296,12 @@ main(int argc, char *argv[]) closure_field(StgTSO, dirty); closure_field(StgTSO, bq); closure_field_("StgTSO_CCCS", StgTSO, prof.CCCS); - tso_field(StgTSO, sp); - tso_field_offset(StgTSO, stack); - tso_field(StgTSO, stack_size); + closure_field(StgTSO, stackobj); + + closure_field(StgStack, sp); + closure_field_offset(StgStack, stack); + closure_field(StgStack, stack_size); + closure_field(StgStack, dirty); struct_size(StgTSOProfInfo); diff --git a/includes/rts/Constants.h b/includes/rts/Constants.h index e21a893bbc..a4114ab999 100644 --- a/includes/rts/Constants.h +++ b/includes/rts/Constants.h @@ -198,8 +198,7 @@ #define ThreadRunGHC 1 /* return to address on top of stack */ #define ThreadInterpret 2 /* interpret this thread */ #define ThreadKilled 3 /* thread has died, don't run it */ -#define ThreadRelocated 4 /* thread has moved, link points to new locn */ -#define ThreadComplete 5 /* thread has finished */ +#define ThreadComplete 4 /* thread has finished */ /* * Constants for the why_blocked field of a TSO @@ -266,11 +265,6 @@ #define TSO_STOPPED_ON_BREAKPOINT 16 /* - * TSO_LINK_DIRTY is set when a TSO's link field is modified - */ -#define TSO_LINK_DIRTY 32 - -/* * Used by the sanity checker to check whether TSOs are on the correct * mutable list. */ diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h index 8bfadaa0cd..75525d8984 100644 --- a/includes/rts/Flags.h +++ b/includes/rts/Flags.h @@ -29,6 +29,8 @@ struct GC_FLAGS { nat maxStkSize; /* in *words* */ nat initialStkSize; /* in *words* */ + nat stkChunkSize; /* in *words* */ + nat stkChunkBufferSize; /* in *words* */ nat maxHeapSize; /* in *blocks* */ nat minAllocAreaSize; /* in *blocks* */ diff --git a/includes/rts/prof/LDV.h b/includes/rts/prof/LDV.h index 77d873cceb..64266911bd 100644 --- a/includes/rts/prof/LDV.h +++ b/includes/rts/prof/LDV.h @@ -31,25 +31,16 @@ #ifdef CMINUSMINUS -#define LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(c) \ - foreign "C" LDV_recordDead_FILL_SLOP_DYNAMIC(c "ptr") - #else #define LDV_RECORD_CREATE(c) \ LDVW((c)) = ((StgWord)RTS_DEREF(era) << LDV_SHIFT) | LDV_STATE_CREATE -void LDV_recordDead_FILL_SLOP_DYNAMIC( StgClosure *p ); - -#define LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(c) \ - LDV_recordDead_FILL_SLOP_DYNAMIC(c) - #endif #else /* !PROFILING */ #define LDV_RECORD_CREATE(c) /* nothing */ -#define LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(c) /* nothing */ #endif /* PROFILING */ diff --git a/includes/rts/storage/ClosureMacros.h b/includes/rts/storage/ClosureMacros.h index aead2edd04..7123c20587 100644 --- a/includes/rts/storage/ClosureMacros.h +++ b/includes/rts/storage/ClosureMacros.h @@ -131,9 +131,9 @@ // Use when changing a closure from one kind to another #define OVERWRITE_INFO(c, new_info) \ - LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC((StgClosure *)(c)); \ - SET_INFO((c), (new_info)); \ - LDV_RECORD_CREATE(c); + OVERWRITING_CLOSURE((StgClosure *)(c)); \ + SET_INFO((c), (new_info)); \ + LDV_RECORD_CREATE(c); /* ----------------------------------------------------------------------------- How to get hold of the static link field for a static closure. @@ -289,8 +289,8 @@ INLINE_HEADER StgOffset arr_words_sizeW( StgArrWords* x ) INLINE_HEADER StgOffset mut_arr_ptrs_sizeW( StgMutArrPtrs* x ) { return sizeofW(StgMutArrPtrs) + x->size; } -INLINE_HEADER StgWord tso_sizeW ( StgTSO *tso ) -{ return TSO_STRUCT_SIZEW + tso->stack_size; } +INLINE_HEADER StgWord stack_sizeW ( StgStack *stack ) +{ return sizeofW(StgStack) + stack->stack_size; } INLINE_HEADER StgWord bco_sizeW ( StgBCO *bco ) { return bco->size; } @@ -339,7 +339,9 @@ closure_sizeW_ (StgClosure *p, StgInfoTable *info) case MUT_ARR_PTRS_FROZEN0: return mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); case TSO: - return tso_sizeW((StgTSO *)p); + return sizeofW(StgTSO); + case STACK: + return stack_sizeW((StgStack*)p); case BCO: return bco_sizeW((StgBCO *)p); case TREC_CHUNK: @@ -417,4 +419,62 @@ INLINE_HEADER StgWord8 *mutArrPtrsCard (StgMutArrPtrs *a, lnat n) return ((StgWord8 *)&(a->payload[a->ptrs]) + n); } +/* ----------------------------------------------------------------------------- + Replacing a closure with a different one. We must call + OVERWRITING_CLOSURE(p) on the old closure that is about to be + overwritten. + + In PROFILING mode, LDV profiling requires that we fill the slop + with zeroes, and record the old closure as dead (LDV_recordDead()). + + In DEBUG mode, we must overwrite the slop with zeroes, because the + sanity checker wants to walk through the heap checking all the + pointers. + + In multicore mode, we *cannot* overwrite slop with zeroes, because + another thread might be reading it. So, + + PROFILING is not compatible with +RTS -N<n> (for n > 1) + + THREADED_RTS can be used with DEBUG, but full heap sanity + checking is disabled. + + -------------------------------------------------------------------------- */ + +#if defined(PROFILING) || (!defined(THREADED_RTS) && defined(DEBUG)) +#define OVERWRITING_CLOSURE(c) overwritingClosure(c) +#else +#define OVERWRITING_CLOSURE(c) /* nothing */ +#endif + +#ifdef PROFILING +void LDV_recordDead (StgClosure *c, nat size); +#endif + +#ifdef KEEP_INLINES +void overwritingClosure (StgClosure *p); +#else +INLINE_HEADER +#endif +void +overwritingClosure (StgClosure *p) +{ + nat size, i; + +#if defined(PROFILING) + if (era <= 0) return; +#endif + + size = closure_sizeW(p); + + // For LDV profiling, we need to record the closure as dead +#if defined(PROFILING) + LDV_recordDead((StgClosure *)(p), size); +#endif + + for (i = 0; i < size - sizeofW(StgThunkHeader); i++) { + ((StgThunk *)(p))->payload[i] = 0; + } +} + #endif /* RTS_STORAGE_CLOSUREMACROS_H */ diff --git a/includes/rts/storage/ClosureTypes.h b/includes/rts/storage/ClosureTypes.h index b7489c90c1..75ec08bf18 100644 --- a/includes/rts/storage/ClosureTypes.h +++ b/includes/rts/storage/ClosureTypes.h @@ -56,27 +56,29 @@ #define RET_FUN 35 #define UPDATE_FRAME 36 #define CATCH_FRAME 37 -#define STOP_FRAME 38 -#define BLOCKING_QUEUE 39 -#define BLACKHOLE 40 -#define MVAR_CLEAN 41 -#define MVAR_DIRTY 42 -#define ARR_WORDS 43 -#define MUT_ARR_PTRS_CLEAN 44 -#define MUT_ARR_PTRS_DIRTY 45 -#define MUT_ARR_PTRS_FROZEN0 46 -#define MUT_ARR_PTRS_FROZEN 47 -#define MUT_VAR_CLEAN 48 -#define MUT_VAR_DIRTY 49 -#define WEAK 50 -#define PRIM 51 -#define MUT_PRIM 52 -#define TSO 53 -#define TREC_CHUNK 54 -#define ATOMICALLY_FRAME 55 -#define CATCH_RETRY_FRAME 56 -#define CATCH_STM_FRAME 57 -#define WHITEHOLE 58 -#define N_CLOSURE_TYPES 59 +#define UNDERFLOW_FRAME 38 +#define STOP_FRAME 39 +#define BLOCKING_QUEUE 40 +#define BLACKHOLE 41 +#define MVAR_CLEAN 42 +#define MVAR_DIRTY 43 +#define ARR_WORDS 44 +#define MUT_ARR_PTRS_CLEAN 45 +#define MUT_ARR_PTRS_DIRTY 46 +#define MUT_ARR_PTRS_FROZEN0 47 +#define MUT_ARR_PTRS_FROZEN 48 +#define MUT_VAR_CLEAN 49 +#define MUT_VAR_DIRTY 50 +#define WEAK 51 +#define PRIM 52 +#define MUT_PRIM 53 +#define TSO 54 +#define STACK 55 +#define TREC_CHUNK 56 +#define ATOMICALLY_FRAME 57 +#define CATCH_RETRY_FRAME 58 +#define CATCH_STM_FRAME 59 +#define WHITEHOLE 60 +#define N_CLOSURE_TYPES 61 #endif /* RTS_STORAGE_CLOSURETYPES_H */ diff --git a/includes/rts/storage/Closures.h b/includes/rts/storage/Closures.h index 2683ce7d49..f3929ee36f 100644 --- a/includes/rts/storage/Closures.h +++ b/includes/rts/storage/Closures.h @@ -166,6 +166,11 @@ typedef struct { } StgCatchFrame; typedef struct { + const StgInfoTable* info; + struct StgStack_ *next_chunk; +} StgUnderflowFrame; + +typedef struct { StgHeader header; } StgStopFrame; diff --git a/includes/rts/storage/TSO.h b/includes/rts/storage/TSO.h index 0e9883f1a6..04e673fb12 100644 --- a/includes/rts/storage/TSO.h +++ b/includes/rts/storage/TSO.h @@ -83,7 +83,7 @@ typedef struct StgTSO_ { Currently used for linking TSOs on: * cap->run_queue_{hd,tl} * (non-THREADED_RTS); the blocked_queue - * and pointing to the relocated version of a ThreadRelocated + * and pointing to the next chunk for a ThreadOldStack NOTE!!! do not modify _link directly, it is subject to a write barrier for generational GC. Instead use the @@ -97,7 +97,11 @@ typedef struct StgTSO_ { struct StgTSO_* global_link; // Links threads on the // generation->threads lists - StgWord dirty; /* non-zero => dirty */ + /* + * The thread's stack + */ + struct StgStack_ *stackobj; + /* * The tso->dirty flag indicates that this TSO's stack should be * scanned during garbage collection. It also indicates that this @@ -110,10 +114,6 @@ typedef struct StgTSO_ { * * tso->dirty is set by dirty_TSO(), and unset by the garbage * collector (only). - * - * The link field has a separate dirty bit of its own, namely the - * bit TSO_LINK_DIRTY in the tso->flags field, set by - * setTSOLink(). */ StgWord16 what_next; // Values defined in Constants.h @@ -121,21 +121,21 @@ typedef struct StgTSO_ { StgWord32 flags; // Values defined in Constants.h StgTSOBlockInfo block_info; StgThreadID id; - int saved_errno; + StgWord32 saved_errno; + StgWord32 dirty; /* non-zero => dirty */ struct InCall_* bound; struct Capability_* cap; + struct StgTRecHeader_ * trec; /* STM transaction record */ /* - A list of threads blocked on this TSO waiting to throw - exceptions. In order to access this field, the TSO must be - locked using lockClosure/unlockClosure (see SMP.h). + * A list of threads blocked on this TSO waiting to throw exceptions. */ struct MessageThrowTo_ * blocked_exceptions; /* - A list of StgBlockingQueue objects, representing threads blocked - on thunks that are under evaluation by this thread. + * A list of StgBlockingQueue objects, representing threads + * blocked on thunks that are under evaluation by this thread. */ struct StgBlockingQueue_ *bq; @@ -149,14 +149,36 @@ typedef struct StgTSO_ { StgWord32 saved_winerror; #endif - /* The thread stack... */ - StgWord32 stack_size; /* stack size in *words* */ - StgWord32 max_stack_size; /* maximum stack size in *words* */ - StgPtr sp; - - StgWord stack[FLEXIBLE_ARRAY]; + /* + * sum of the sizes of all stack chunks (in words), used to decide + * whether to throw the StackOverflow exception when the stack + * overflows, or whether to just chain on another stack chunk. + * + * Note that this overestimates the real stack size, because each + * chunk will have a gap at the end, of +RTS -kb<size> words. + * This means stack overflows are not entirely accurate, because + * the more gaps there are, the sooner the stack will run into the + * hard +RTS -K<size> limit. + */ + StgWord32 tot_stack_size; + } *StgTSOPtr; +typedef struct StgStack_ { + StgHeader header; + StgWord32 stack_size; // stack size in *words* + StgWord32 dirty; // non-zero => dirty + StgPtr sp; // current stack pointer + StgWord stack[FLEXIBLE_ARRAY]; +} StgStack; + +// Calculate SpLim from a TSO (reads tso->stackobj, but no fields from +// the stackobj itself). +INLINE_HEADER StgPtr tso_SpLim (StgTSO* tso) +{ + return tso->stackobj->stack + RESERVED_STACK_WORDS; +} + /* ----------------------------------------------------------------------------- functions -------------------------------------------------------------------------- */ @@ -165,17 +187,7 @@ void dirty_TSO (Capability *cap, StgTSO *tso); void setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target); void setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target); -// Apply to a TSO before looking at it if you are not sure whether it -// might be ThreadRelocated or not (basically, that's most of the time -// unless the TSO is the current TSO). -// -INLINE_HEADER StgTSO * deRefTSO(StgTSO *tso) -{ - while (tso->what_next == ThreadRelocated) { - tso = tso->_link; - } - return tso; -} +void dirty_STACK (Capability *cap, StgStack *stack); /* ----------------------------------------------------------------------------- Invariants: @@ -232,18 +244,6 @@ INLINE_HEADER StgTSO * deRefTSO(StgTSO *tso) ---------------------------------------------------------------------------- */ -/* Workaround for a bug/quirk in gcc on certain architectures. - * symptom is that (&tso->stack - &tso->header) /= sizeof(StgTSO) - * in other words, gcc pads the structure at the end. - */ - -extern StgTSO dummy_tso; - -#define TSO_STRUCT_SIZE \ - ((char *)&dummy_tso.stack - (char *)&dummy_tso.header) - -#define TSO_STRUCT_SIZEW (TSO_STRUCT_SIZE / sizeof(W_)) - /* this is the NIL ptr for a TSO queue (e.g. runnable queue) */ #define END_TSO_QUEUE ((StgTSO *)(void*)&stg_END_TSO_QUEUE_closure) diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h index 8a1b84a5cc..c52a3c9702 100644 --- a/includes/stg/MiscClosures.h +++ b/includes/stg/MiscClosures.h @@ -61,6 +61,7 @@ RTS_RET(stg_catch_stm_frame); RTS_RET(stg_unmaskAsyncExceptionszh_ret); RTS_RET(stg_maskUninterruptiblezh_ret); RTS_RET(stg_maskAsyncExceptionszh_ret); +RTS_RET(stg_stack_underflow_frame); // RTS_FUN(stg_interp_constr_entry); // @@ -100,6 +101,7 @@ RTS_ENTRY(stg_STABLE_NAME); RTS_ENTRY(stg_MVAR_CLEAN); RTS_ENTRY(stg_MVAR_DIRTY); RTS_ENTRY(stg_TSO); +RTS_ENTRY(stg_STACK); RTS_ENTRY(stg_ARR_WORDS); RTS_ENTRY(stg_MUT_ARR_WORDS); RTS_ENTRY(stg_MUT_ARR_PTRS_CLEAN); @@ -119,6 +121,7 @@ RTS_ENTRY(stg_PAP); RTS_ENTRY(stg_AP); RTS_ENTRY(stg_AP_NOUPD); RTS_ENTRY(stg_AP_STACK); +RTS_ENTRY(stg_AP_STACK_NOUPD); RTS_ENTRY(stg_dummy_ret); RTS_ENTRY(stg_raise); RTS_ENTRY(stg_raise_ret); diff --git a/includes/stg/Ticky.h b/includes/stg/Ticky.h index 2ede8ebdf9..a811aec4eb 100644 --- a/includes/stg/Ticky.h +++ b/includes/stg/Ticky.h @@ -190,7 +190,8 @@ EXTERN StgInt RET_SEMI_loads_avoided INIT(0); #define TICK_UPD_SQUEEZED() #define TICK_ALLOC_HEAP_NOCTR(x) #define TICK_GC_FAILED_PROMOTION() -#define TICK_ALLOC_TSO(g,s) +#define TICK_ALLOC_TSO() +#define TICK_ALLOC_STACK(g) #define TICK_ALLOC_UP_THK(g,s) #define TICK_ALLOC_SE_THK(g,s) diff --git a/rts/Apply.cmm b/rts/Apply.cmm index 9af9b11a97..f9ac3b353c 100644 --- a/rts/Apply.cmm +++ b/rts/Apply.cmm @@ -350,3 +350,56 @@ for: ENTER(); } + +/* ----------------------------------------------------------------------------- + AP_STACK_NOUPD - exactly like AP_STACK, but doesn't push an update frame. + -------------------------------------------------------------------------- */ + +INFO_TABLE(stg_AP_STACK_NOUPD,/*special layout*/0,0,AP_STACK, + "AP_STACK_NOUPD","AP_STACK_NOUPD") +{ + W_ Words; + W_ ap; + + ap = R1; + + Words = StgAP_STACK_size(ap); + + /* + * Check for stack overflow. IMPORTANT: use a _NP check here, + * because if the check fails, we might end up blackholing this very + * closure, in which case we must enter the blackhole on return rather + * than continuing to evaluate the now-defunct closure. + */ + STK_CHK_NP(WDS(Words) + WDS(AP_STACK_SPLIM)); + /* ensure there is at least AP_STACK_SPLIM words of headroom available + * after unpacking the AP_STACK. See bug #1466 */ + + Sp = Sp - WDS(Words); + + TICK_ENT_AP(); + LDV_ENTER(ap); + + // Enter PAP cost centre + ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL + + // Reload the stack + W_ i; + W_ p; + p = ap + SIZEOF_StgHeader + OFFSET_StgAP_STACK_payload; + i = 0; +for: + if (i < Words) { + Sp(i) = W_[p]; + p = p + WDS(1); + i = i + 1; + goto for; + } + + // Off we go! + TICK_ENT_VIA_NODE(); + + R1 = StgAP_STACK_fun(ap); + + ENTER(); +} diff --git a/rts/ClosureFlags.c b/rts/ClosureFlags.c index d5181cae22..41810f4025 100644 --- a/rts/ClosureFlags.c +++ b/rts/ClosureFlags.c @@ -59,8 +59,9 @@ StgWord16 closure_flags[] = { [RET_FUN] = ( 0 ), [UPDATE_FRAME] = ( _BTM ), [CATCH_FRAME] = ( _BTM ), - [STOP_FRAME] = ( _BTM ), - [BLACKHOLE] = ( _NS| _UPT ), + [UNDERFLOW_FRAME] = ( _BTM ), + [STOP_FRAME] = ( _BTM ), + [BLACKHOLE] = ( _NS| _UPT ), [BLOCKING_QUEUE] = ( _NS| _MUT|_UPT ), [MVAR_CLEAN] = (_HNF| _NS| _MUT|_UPT ), [MVAR_DIRTY] = (_HNF| _NS| _MUT|_UPT ), @@ -74,7 +75,8 @@ StgWord16 closure_flags[] = { [WEAK] = (_HNF| _NS| _UPT ), [PRIM] = (_HNF| _NS| _UPT ), [MUT_PRIM] = (_HNF| _NS| _MUT|_UPT ), - [TSO] = (_HNF| _NS| _MUT|_UPT ), + [TSO] = (_HNF| _NS| _MUT|_UPT ), + [STACK] = (_HNF| _NS| _MUT|_UPT ), [TREC_CHUNK] = ( _NS| _MUT|_UPT ), [ATOMICALLY_FRAME] = ( _BTM ), [CATCH_RETRY_FRAME] = ( _BTM ), @@ -82,6 +84,6 @@ StgWord16 closure_flags[] = { [WHITEHOLE] = ( 0 ) }; -#if N_CLOSURE_TYPES != 59 +#if N_CLOSURE_TYPES != 61 #error Closure types changed: update ClosureFlags.c! #endif diff --git a/rts/Exception.cmm b/rts/Exception.cmm index 581dafdf4f..24da1c690e 100644 --- a/rts/Exception.cmm +++ b/rts/Exception.cmm @@ -283,11 +283,6 @@ stg_killThreadzh * If the exception went to a catch frame, we'll just continue from * the handler. */ - loop: - if (StgTSO_what_next(target) == ThreadRelocated::I16) { - target = StgTSO__link(target); - goto loop; - } if (target == CurrentTSO) { /* * So what should happen if a thread calls "throwTo self" inside @@ -436,9 +431,9 @@ stg_raisezh #endif retry_pop_stack: - StgTSO_sp(CurrentTSO) = Sp; + SAVE_THREAD_STATE(); (frame_type) = foreign "C" raiseExceptionHelper(BaseReg "ptr", CurrentTSO "ptr", exception "ptr") []; - Sp = StgTSO_sp(CurrentTSO); + LOAD_THREAD_STATE(); if (frame_type == ATOMICALLY_FRAME) { /* The exception has reached the edge of a memory transaction. Check that * the transaction is valid. If not then perhaps the exception should @@ -511,8 +506,10 @@ retry_pop_stack: * We will leave the stack in a GC'able state, see the stg_stop_thread * entry code in StgStartup.cmm. */ - Sp = CurrentTSO + TSO_OFFSET_StgTSO_stack - + WDS(TO_W_(StgTSO_stack_size(CurrentTSO))) - WDS(2); + W_ stack; + stack = StgTSO_stackobj(CurrentTSO); + Sp = stack + OFFSET_StgStack_stack + + WDS(TO_W_(StgStack_stack_size(stack))) - WDS(2); Sp(1) = exception; /* save the exception */ Sp(0) = stg_enter_info; /* so that GC can traverse this stack */ StgTSO_what_next(CurrentTSO) = ThreadKilled::I16; diff --git a/rts/Interpreter.c b/rts/Interpreter.c index fa4a46fd12..ade4ad18ed 100644 --- a/rts/Interpreter.c +++ b/rts/Interpreter.c @@ -65,13 +65,13 @@ #define BCO_LIT(n) literals[n] #define LOAD_STACK_POINTERS \ - Sp = cap->r.rCurrentTSO->sp; \ + Sp = cap->r.rCurrentTSO->stackobj->sp; \ /* We don't change this ... */ \ - SpLim = cap->r.rCurrentTSO->stack + RESERVED_STACK_WORDS; + SpLim = tso_SpLim(cap->r.rCurrentTSO); #define SAVE_STACK_POINTERS \ ASSERT(Sp > SpLim); \ - cap->r.rCurrentTSO->sp = Sp + cap->r.rCurrentTSO->stackobj->sp = Sp #define RETURN_TO_SCHEDULER(todo,retcode) \ SAVE_STACK_POINTERS; \ @@ -266,7 +266,7 @@ eval_obj: debugBelch("Sp = %p\n", Sp); debugBelch("\n" ); - printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size); + printStackChunk(Sp,cap->r.rCurrentTSO->stackobj->stack+cap->r.rCurrentTSO->stackobj->stack_size); debugBelch("\n\n"); ); @@ -381,11 +381,11 @@ do_return: debugBelch("Returning: "); printObj(obj); debugBelch("Sp = %p\n", Sp); debugBelch("\n" ); - printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size); + printStackChunk(Sp,cap->r.rCurrentTSO->stackobj->stack+cap->r.rCurrentTSO->stackobj->stack_size); debugBelch("\n\n"); ); - IF_DEBUG(sanity,checkStackChunk(Sp, cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size)); + IF_DEBUG(sanity,checkStackChunk(Sp, cap->r.rCurrentTSO->stackobj->stack+cap->r.rCurrentTSO->stackobj->stack_size)); switch (get_itbl((StgClosure *)Sp)->type) { @@ -466,7 +466,7 @@ do_return: INTERP_TICK(it_retto_other); IF_DEBUG(interpreter, debugBelch("returning to unknown frame -- yielding to sched\n"); - printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size); + printStackChunk(Sp,cap->r.rCurrentTSO->stackobj->stack+cap->r.rCurrentTSO->stackobj->stack_size); ); Sp -= 2; Sp[1] = (W_)tagged_obj; @@ -529,8 +529,8 @@ do_return_unboxed: INTERP_TICK(it_retto_other); IF_DEBUG(interpreter, debugBelch("returning to unknown frame -- yielding to sched\n"); - printStackChunk(Sp,cap->r.rCurrentTSO->stack+cap->r.rCurrentTSO->stack_size); - ); + printStackChunk(Sp,cap->r.rCurrentTSO->stackobj->stack+cap->r.rCurrentTSO->stackobj->stack_size); + ); RETURN_TO_SCHEDULER_NO_PAUSE(ThreadRunGHC, ThreadYielding); } } diff --git a/rts/LdvProfile.c b/rts/LdvProfile.c index 021ecf0846..acec0572d7 100644 --- a/rts/LdvProfile.c +++ b/rts/LdvProfile.c @@ -168,6 +168,7 @@ processHeapClosureForDead( StgClosure *c ) // stack objects case UPDATE_FRAME: case CATCH_FRAME: + case UNDERFLOW_FRAME: case STOP_FRAME: case RET_DYN: case RET_BCO: diff --git a/rts/Messages.c b/rts/Messages.c index 5e0fa2544f..1730278930 100644 --- a/rts/Messages.c +++ b/rts/Messages.c @@ -98,11 +98,13 @@ loop: r = throwToMsg(cap, t); switch (r) { - case THROWTO_SUCCESS: + case THROWTO_SUCCESS: { // this message is done - unlockClosure((StgClosure*)m, &stg_MSG_NULL_info); - tryWakeupThread(cap, t->source); + StgTSO *source = t->source; + doneWithMsgThrowTo(t); + tryWakeupThread(cap, source); break; + } case THROWTO_BLOCKED: // unlock the message unlockClosure((StgClosure*)m, &stg_MSG_THROWTO_info); @@ -203,7 +205,7 @@ loop: else if (info == &stg_TSO_info) { - owner = deRefTSO((StgTSO *)p); + owner = (StgTSO*)p; #ifdef THREADED_RTS if (owner->cap != cap) { @@ -265,7 +267,7 @@ loop: ASSERT(bq->bh == bh); - owner = deRefTSO(bq->owner); + owner = bq->owner; ASSERT(owner != END_TSO_QUEUE); diff --git a/rts/Messages.h b/rts/Messages.h index 54650fd018..febb839ee9 100644 --- a/rts/Messages.h +++ b/rts/Messages.h @@ -15,4 +15,15 @@ void executeMessage (Capability *cap, Message *m); void sendMessage (Capability *from_cap, Capability *to_cap, Message *msg); #endif +#include "Capability.h" +#include "Updates.h" // for DEBUG_FILL_SLOP + +INLINE_HEADER void +doneWithMsgThrowTo (MessageThrowTo *m) +{ + OVERWRITING_CLOSURE((StgClosure*)m); + unlockClosure((StgClosure*)m, &stg_MSG_NULL_info); + LDV_RECORD_CREATE(m); +} + #include "EndPrivate.h" diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index 0cf26b2b7f..4f6c2526d4 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -634,11 +634,6 @@ stg_threadStatuszh W_ ret; tso = R1; - loop: - if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) { - tso = StgTSO__link(tso); - goto loop; - } what_next = TO_W_(StgTSO_what_next(tso)); why_blocked = TO_W_(StgTSO_why_blocked(tso)); @@ -939,9 +934,9 @@ stg_retryzh // Find the enclosing ATOMICALLY_FRAME or CATCH_RETRY_FRAME retry_pop_stack: - StgTSO_sp(CurrentTSO) = Sp; - (frame_type) = foreign "C" findRetryFrameHelper(CurrentTSO "ptr") []; - Sp = StgTSO_sp(CurrentTSO); + SAVE_THREAD_STATE(); + (frame_type) = foreign "C" findRetryFrameHelper(MyCapability(), CurrentTSO "ptr") []; + LOAD_THREAD_STATE(); frame = Sp; trec = StgTSO_trec(CurrentTSO); outer = StgTRecHeader_enclosing_trec(trec); @@ -1138,13 +1133,13 @@ stg_newMVarzh } -#define PerformTake(tso, value) \ - W_[StgTSO_sp(tso) + WDS(1)] = value; \ - W_[StgTSO_sp(tso) + WDS(0)] = stg_gc_unpt_r1_info; +#define PerformTake(stack, value) \ + W_[StgStack_sp(stack) + WDS(1)] = value; \ + W_[StgStack_sp(stack) + WDS(0)] = stg_gc_unpt_r1_info; -#define PerformPut(tso,lval) \ - StgTSO_sp(tso) = StgTSO_sp(tso) + WDS(3); \ - lval = W_[StgTSO_sp(tso) - WDS(1)]; +#define PerformPut(stack,lval) \ + StgStack_sp(stack) = StgStack_sp(stack) + WDS(3); \ + lval = W_[StgStack_sp(stack) - WDS(1)]; stg_takeMVarzh { @@ -1224,24 +1219,20 @@ loop: StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } -loop2: - if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) { - tso = StgTSO__link(tso); - goto loop2; - } - ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); ASSERT(StgTSO_block_info(tso) == mvar); // actually perform the putMVar for the thread that we just woke up - PerformPut(tso,StgMVar_value(mvar)); + W_ stack; + stack = StgTSO_stackobj(tso); + PerformPut(stack, StgMVar_value(mvar)); // indicate that the MVar operation has now completed. StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; // no need to mark the TSO dirty, we have only written END_TSO_QUEUE. - foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) []; + foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; unlockClosure(mvar, stg_MVAR_DIRTY_info); RET_P(val); @@ -1303,24 +1294,20 @@ loop: StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } -loop2: - if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) { - tso = StgTSO__link(tso); - goto loop2; - } - ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); ASSERT(StgTSO_block_info(tso) == mvar); // actually perform the putMVar for the thread that we just woke up - PerformPut(tso,StgMVar_value(mvar)); + W_ stack; + stack = StgTSO_stackobj(tso); + PerformPut(stack, StgMVar_value(mvar)); // indicate that the MVar operation has now completed. StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; // no need to mark the TSO dirty, we have only written END_TSO_QUEUE. - foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) []; + foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; unlockClosure(mvar, stg_MVAR_DIRTY_info); RET_NP(1,val); @@ -1395,26 +1382,22 @@ loop: StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } -loop2: - if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) { - tso = StgTSO__link(tso); - goto loop2; - } - ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); ASSERT(StgTSO_block_info(tso) == mvar); // actually perform the takeMVar - PerformTake(tso, val); + W_ stack; + stack = StgTSO_stackobj(tso); + PerformTake(stack, val); // indicate that the MVar operation has now completed. StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; - - if (TO_W_(StgTSO_dirty(tso)) == 0) { - foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; + + if (TO_W_(StgStack_dirty(stack)) == 0) { + foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") []; } - foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) []; + foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; unlockClosure(mvar, stg_MVAR_DIRTY_info); jump %ENTRY_CODE(Sp(0)); @@ -1468,26 +1451,22 @@ loop: StgMVar_tail(mvar) = stg_END_TSO_QUEUE_closure; } -loop2: - if (TO_W_(StgTSO_what_next(tso)) == ThreadRelocated) { - tso = StgTSO__link(tso); - goto loop2; - } - ASSERT(StgTSO_why_blocked(tso) == BlockedOnMVar::I16); ASSERT(StgTSO_block_info(tso) == mvar); // actually perform the takeMVar - PerformTake(tso, val); + W_ stack; + stack = StgTSO_stackobj(tso); + PerformTake(stack, val); // indicate that the MVar operation has now completed. StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; - if (TO_W_(StgTSO_dirty(tso)) == 0) { - foreign "C" dirty_TSO(MyCapability() "ptr", tso "ptr") []; + if (TO_W_(StgStack_dirty(stack)) == 0) { + foreign "C" dirty_STACK(MyCapability() "ptr", stack "ptr") []; } - foreign "C" tryWakeupThread_(MyCapability() "ptr", tso) []; + foreign "C" tryWakeupThread(MyCapability() "ptr", tso) []; unlockClosure(mvar, stg_MVAR_DIRTY_info); RET_N(1); diff --git a/rts/Printer.c b/rts/Printer.c index 565a11ed77..fcc483dce6 100644 --- a/rts/Printer.c +++ b/rts/Printer.c @@ -276,6 +276,15 @@ printClosure( StgClosure *obj ) break; } + case UNDERFLOW_FRAME: + { + StgUnderflowFrame* u = (StgUnderflowFrame*)obj; + debugBelch("UNDERFLOW_FRAME("); + printPtr((StgPtr)u->next_chunk); + debugBelch(")\n"); + break; + } + case STOP_FRAME: { StgStopFrame* u = (StgStopFrame*)obj; @@ -461,13 +470,11 @@ printStackChunk( StgPtr sp, StgPtr spBottom ) case UPDATE_FRAME: case CATCH_FRAME: - printObj((StgClosure*)sp); + case UNDERFLOW_FRAME: + case STOP_FRAME: + printObj((StgClosure*)sp); continue; - case STOP_FRAME: - printObj((StgClosure*)sp); - return; - case RET_DYN: { StgRetDyn* r; @@ -559,7 +566,8 @@ printStackChunk( StgPtr sp, StgPtr spBottom ) void printTSO( StgTSO *tso ) { - printStackChunk( tso->sp, tso->stack+tso->stack_size); + printStackChunk( tso->stackobj->sp, + tso->stackobj->stack+tso->stackobj->stack_size); } /* -------------------------------------------------------------------------- @@ -1039,7 +1047,6 @@ char *what_next_strs[] = { [ThreadRunGHC] = "ThreadRunGHC", [ThreadInterpret] = "ThreadInterpret", [ThreadKilled] = "ThreadKilled", - [ThreadRelocated] = "ThreadRelocated", [ThreadComplete] = "ThreadComplete" }; @@ -1102,6 +1109,7 @@ char *closure_type_names[] = { [RET_FUN] = "RET_FUN", [UPDATE_FRAME] = "UPDATE_FRAME", [CATCH_FRAME] = "CATCH_FRAME", + [UNDERFLOW_FRAME] = "UNDERFLOW_FRAME", [STOP_FRAME] = "STOP_FRAME", [BLACKHOLE] = "BLACKHOLE", [BLOCKING_QUEUE] = "BLOCKING_QUEUE", @@ -1118,6 +1126,7 @@ char *closure_type_names[] = { [PRIM] = "PRIM", [MUT_PRIM] = "MUT_PRIM", [TSO] = "TSO", + [STACK] = "STACK", [TREC_CHUNK] = "TREC_CHUNK", [ATOMICALLY_FRAME] = "ATOMICALLY_FRAME", [CATCH_RETRY_FRAME] = "CATCH_RETRY_FRAME", diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c index d398afd66f..39b64d4c51 100644 --- a/rts/ProfHeap.c +++ b/rts/ProfHeap.c @@ -947,19 +947,35 @@ heapCensusChain( Census *census, bdescr *bd ) prim = rtsTrue; #ifdef PROFILING if (RtsFlags.ProfFlags.includeTSOs) { - size = tso_sizeW((StgTSO *)p); + size = sizeofW(StgTSO); break; } else { // Skip this TSO and move on to the next object - p += tso_sizeW((StgTSO *)p); + p += sizeofW(StgTSO); continue; } #else - size = tso_sizeW((StgTSO *)p); + size = sizeofW(StgTSO); break; #endif - case TREC_CHUNK: + case STACK: + prim = rtsTrue; +#ifdef PROFILING + if (RtsFlags.ProfFlags.includeTSOs) { + size = stack_sizeW((StgStack*)p); + break; + } else { + // Skip this TSO and move on to the next object + p += stack_sizeW((StgStack*)p); + continue; + } +#else + size = stack_sizeW((StgStack*)p); + break; +#endif + + case TREC_CHUNK: prim = rtsTrue; size = sizeofW(StgTRecChunk); break; diff --git a/rts/ProfHeap.h b/rts/ProfHeap.h index 48b5bafa48..c4a92e200b 100644 --- a/rts/ProfHeap.h +++ b/rts/ProfHeap.h @@ -14,7 +14,6 @@ void heapCensus (void); nat initHeapProfiling (void); void endHeapProfiling (void); -void LDV_recordDead (StgClosure *c, nat size); rtsBool strMatchesSelector (char* str, char* sel); #include "EndPrivate.h" diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c index 7abccde0a5..550f703e8d 100644 --- a/rts/RaiseAsync.c +++ b/rts/RaiseAsync.c @@ -23,11 +23,11 @@ #include "win32/IOManager.h" #endif -static void raiseAsync (Capability *cap, - StgTSO *tso, - StgClosure *exception, - rtsBool stop_at_atomically, - StgUpdateFrame *stop_here); +static StgTSO* raiseAsync (Capability *cap, + StgTSO *tso, + StgClosure *exception, + rtsBool stop_at_atomically, + StgUpdateFrame *stop_here); static void removeFromQueues(Capability *cap, StgTSO *tso); @@ -61,11 +61,9 @@ static void throwToSingleThreaded__ (Capability *cap, StgTSO *tso, StgClosure *exception, rtsBool stop_at_atomically, StgUpdateFrame *stop_here) { - tso = deRefTSO(tso); - // Thread already dead? if (tso->what_next == ThreadComplete || tso->what_next == ThreadKilled) { - return; + return; } // Remove it from any blocking queues @@ -81,13 +79,13 @@ throwToSingleThreaded (Capability *cap, StgTSO *tso, StgClosure *exception) } void -throwToSingleThreaded_ (Capability *cap, StgTSO *tso, StgClosure *exception, +throwToSingleThreaded_ (Capability *cap, StgTSO *tso, StgClosure *exception, rtsBool stop_at_atomically) { throwToSingleThreaded__ (cap, tso, exception, stop_at_atomically, NULL); } -void +void // cannot return a different TSO suspendComputation (Capability *cap, StgTSO *tso, StgUpdateFrame *stop_here) { throwToSingleThreaded__ (cap, tso, NULL, rtsFalse, stop_here); @@ -192,9 +190,6 @@ retry: check_target: ASSERT(target != END_TSO_QUEUE); - // follow ThreadRelocated links in the target first - target = deRefTSO(target); - // Thread already dead? if (target->what_next == ThreadComplete || target->what_next == ThreadKilled) { @@ -268,7 +263,7 @@ check_target: // might as well just do it now. The message will // be a no-op when it arrives. unlockClosure((StgClosure*)m, i); - tryWakeupThread_(cap, target); + tryWakeupThread(cap, target); goto retry; } @@ -286,7 +281,7 @@ check_target: } // nobody else can wake up this TSO after we claim the message - unlockClosure((StgClosure*)m, &stg_MSG_NULL_info); + doneWithMsgThrowTo(m); raiseAsync(cap, target, msg->exception, rtsFalse, NULL); return THROWTO_SUCCESS; @@ -315,12 +310,7 @@ check_target: info = lockClosure((StgClosure *)mvar); - if (target->what_next == ThreadRelocated) { - target = target->_link; - unlockClosure((StgClosure *)mvar,info); - goto retry; - } - // we have the MVar, let's check whether the thread + // we have the MVar, let's check whether the thread // is still blocked on the same MVar. if (target->why_blocked != BlockedOnMVar || (StgMVar *)target->block_info.closure != mvar) { @@ -334,7 +324,7 @@ check_target: // thread now anyway and ignore the message when it // arrives. unlockClosure((StgClosure *)mvar, info); - tryWakeupThread_(cap, target); + tryWakeupThread(cap, target); goto retry; } @@ -505,7 +495,8 @@ maybePerformBlockedException (Capability *cap, StgTSO *tso) { MessageThrowTo *msg; const StgInfoTable *i; - + StgTSO *source; + if (tso->what_next == ThreadComplete || tso->what_next == ThreadFinished) { if (tso->blocked_exceptions != END_BLOCKED_EXCEPTIONS_QUEUE) { awakenBlockedExceptionQueue(cap,tso); @@ -537,8 +528,9 @@ maybePerformBlockedException (Capability *cap, StgTSO *tso) } throwToSingleThreaded(cap, msg->target, msg->exception); - unlockClosure((StgClosure*)msg,&stg_MSG_NULL_info); - tryWakeupThread(cap, msg->source); + source = msg->source; + doneWithMsgThrowTo(msg); + tryWakeupThread(cap, source); return 1; } return 0; @@ -552,13 +544,15 @@ awakenBlockedExceptionQueue (Capability *cap, StgTSO *tso) { MessageThrowTo *msg; const StgInfoTable *i; + StgTSO *source; for (msg = tso->blocked_exceptions; msg != END_BLOCKED_EXCEPTIONS_QUEUE; msg = (MessageThrowTo*)msg->link) { i = lockClosure((StgClosure *)msg); if (i != &stg_MSG_NULL_info) { - unlockClosure((StgClosure *)msg,&stg_MSG_NULL_info); - tryWakeupThread(cap, msg->source); + source = msg->source; + doneWithMsgThrowTo(msg); + tryWakeupThread(cap, source); } else { unlockClosure((StgClosure *)msg,i); } @@ -653,7 +647,7 @@ removeFromQueues(Capability *cap, StgTSO *tso) // ASSERT(m->header.info == &stg_WHITEHOLE_info); // unlock and revoke it at the same time - unlockClosure((StgClosure*)m,&stg_MSG_NULL_info); + doneWithMsgThrowTo(m); break; } @@ -724,7 +718,7 @@ removeFromQueues(Capability *cap, StgTSO *tso) * * -------------------------------------------------------------------------- */ -static void +static StgTSO * raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, rtsBool stop_at_atomically, StgUpdateFrame *stop_here) { @@ -732,6 +726,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, StgPtr sp, frame; StgClosure *updatee; nat i; + StgStack *stack; debugTraceCap(DEBUG_sched, cap, "raising exception in thread %ld.", (long)tso->id); @@ -747,25 +742,21 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, fprintCCS_stderr(tso->prof.CCCS); } #endif - // ASSUMES: the thread is not already complete or dead, or - // ThreadRelocated. Upper layers should deal with that. + // ASSUMES: the thread is not already complete or dead + // Upper layers should deal with that. ASSERT(tso->what_next != ThreadComplete && - tso->what_next != ThreadKilled && - tso->what_next != ThreadRelocated); + tso->what_next != ThreadKilled); // only if we own this TSO (except that deleteThread() calls this ASSERT(tso->cap == cap); - // wake it up - if (tso->why_blocked != NotBlocked) { - tso->why_blocked = NotBlocked; - appendToRunQueue(cap,tso); - } + stack = tso->stackobj; // mark it dirty; we're about to change its stack. dirty_TSO(cap, tso); + dirty_STACK(cap, stack); - sp = tso->sp; + sp = stack->sp; if (stop_here != NULL) { updatee = stop_here->updatee; @@ -801,10 +792,13 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, // // 5. If it's a STOP_FRAME, then kill the thread. // - // NB: if we pass an ATOMICALLY_FRAME then abort the associated + // 6. If it's an UNDERFLOW_FRAME, then continue with the next + // stack chunk. + // + // NB: if we pass an ATOMICALLY_FRAME then abort the associated // transaction - info = get_ret_itbl((StgClosure *)frame); + info = get_ret_itbl((StgClosure *)frame); switch (info->i.type) { @@ -859,12 +853,46 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, continue; //no need to bump frame } - case STOP_FRAME: + case UNDERFLOW_FRAME: + { + StgAP_STACK * ap; + nat words; + + // First build an AP_STACK consisting of the stack chunk above the + // current update frame, with the top word on the stack as the + // fun field. + // + words = frame - sp - 1; + ap = (StgAP_STACK *)allocate(cap,AP_STACK_sizeW(words)); + + ap->size = words; + ap->fun = (StgClosure *)sp[0]; + sp++; + for(i=0; i < (nat)words; ++i) { + ap->payload[i] = (StgClosure *)*sp++; + } + + SET_HDR(ap,&stg_AP_STACK_NOUPD_info, + ((StgClosure *)frame)->header.prof.ccs /* ToDo */); + TICK_ALLOC_SE_THK(words+1,0); + + stack->sp = sp; + threadStackUnderflow(cap,tso); + stack = tso->stackobj; + sp = stack->sp; + + sp--; + sp[0] = (W_)ap; + frame = sp + 1; + continue; + } + + case STOP_FRAME: { // We've stripped the entire stack, the thread is now dead. tso->what_next = ThreadKilled; - tso->sp = frame + sizeofW(StgStopFrame); - return; + stack->sp = frame + sizeofW(StgStopFrame); + goto done; } case CATCH_FRAME: @@ -906,17 +934,16 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, */ sp[0] = (W_)raise; sp[-1] = (W_)&stg_enter_info; - tso->sp = sp-1; + stack->sp = sp-1; tso->what_next = ThreadRunGHC; - IF_DEBUG(sanity, checkTSO(tso)); - return; + goto done; } case ATOMICALLY_FRAME: if (stop_at_atomically) { ASSERT(tso->trec->enclosing_trec == NO_TREC); stmCondemnTransaction(cap, tso -> trec); - tso->sp = frame - 2; + stack->sp = frame - 2; // The ATOMICALLY_FRAME expects to be returned a // result from the transaction, which it stores in the // stack frame. Hence we arrange to return a dummy @@ -925,10 +952,10 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, // ATOMICALLY_FRAME instance for condemned // transactions, but I don't fully understand the // interaction with STM invariants. - tso->sp[1] = (W_)&stg_NO_TREC_closure; - tso->sp[0] = (W_)&stg_gc_unpt_r1_info; - tso->what_next = ThreadRunGHC; - return; + stack->sp[1] = (W_)&stg_NO_TREC_closure; + stack->sp[0] = (W_)&stg_gc_unpt_r1_info; + tso->what_next = ThreadRunGHC; + goto done; } // Not stop_at_atomically... fall through and abort the // transaction. @@ -950,7 +977,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, stmAbortTransaction(cap, trec); stmFreeAbortedTRec(cap, trec); tso -> trec = outer; - break; + break; }; default: @@ -961,8 +988,16 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, frame += stack_frame_sizeW((StgClosure *)frame); } - // if we got here, then we stopped at stop_here - ASSERT(stop_here != NULL); +done: + IF_DEBUG(sanity, checkTSO(tso)); + + // wake it up + if (tso->why_blocked != NotBlocked) { + tso->why_blocked = NotBlocked; + appendToRunQueue(cap,tso); + } + + return tso; } diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c index c5a7bf7897..48473d2480 100644 --- a/rts/RetainerProfile.c +++ b/rts/RetainerProfile.c @@ -597,11 +597,13 @@ push( StgClosure *c, retainer c_child_r, StgClosure **first_child ) case AP: case AP_STACK: case TSO: + case STACK: case IND_STATIC: case CONSTR_NOCAF_STATIC: // stack objects case UPDATE_FRAME: case CATCH_FRAME: + case UNDERFLOW_FRAME: case STOP_FRAME: case RET_DYN: case RET_BCO: @@ -925,13 +927,15 @@ pop( StgClosure **c, StgClosure **cp, retainer *r ) case AP: case AP_STACK: case TSO: - case IND_STATIC: + case STACK: + case IND_STATIC: case CONSTR_NOCAF_STATIC: // stack objects case RET_DYN: case UPDATE_FRAME: case CATCH_FRAME: - case STOP_FRAME: + case UNDERFLOW_FRAME: + case STOP_FRAME: case RET_BCO: case RET_SMALL: case RET_BIG: @@ -1001,6 +1005,7 @@ isRetainer( StgClosure *c ) // // TSOs MUST be retainers: they constitute the set of roots. case TSO: + case STACK: // mutable objects case MUT_PRIM: @@ -1080,6 +1085,7 @@ isRetainer( StgClosure *c ) // legal objects during retainer profiling. case UPDATE_FRAME: case CATCH_FRAME: + case UNDERFLOW_FRAME: case STOP_FRAME: case RET_DYN: case RET_BCO: @@ -1257,8 +1263,8 @@ retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r) * RSET(c) and RSET(c_child_r) are valid, i.e., their * interpretation conforms to the current value of flip (even when they * are interpreted to be NULL). - * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete, - * or ThreadKilled, which means that its stack is ready to process. + * If *c is TSO, its state is not ThreadComplete,or ThreadKilled, + * which means that its stack is ready to process. * Note: * This code was almost plagiarzied from GC.c! For each pointer, * retainClosure() is invoked instead of evacuate(). @@ -1291,11 +1297,8 @@ retainStack( StgClosure *c, retainer c_child_r, // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary); #endif - ASSERT(get_itbl(c)->type != TSO || - (((StgTSO *)c)->what_next != ThreadRelocated && - ((StgTSO *)c)->what_next != ThreadComplete && - ((StgTSO *)c)->what_next != ThreadKilled)); - + ASSERT(get_itbl(c)->type == STACK); + p = stackStart; while (p < stackEnd) { info = get_ret_itbl((StgClosure *)p); @@ -1307,7 +1310,8 @@ retainStack( StgClosure *c, retainer c_child_r, p += sizeofW(StgUpdateFrame); continue; - case STOP_FRAME: + case UNDERFLOW_FRAME: + case STOP_FRAME: case CATCH_FRAME: case CATCH_STM_FRAME: case CATCH_RETRY_FRAME: @@ -1560,14 +1564,7 @@ inner_loop: #endif goto loop; } - if (((StgTSO *)c)->what_next == ThreadRelocated) { -#ifdef DEBUG_RETAINER - debugBelch("ThreadRelocated encountered in retainClosure()\n"); -#endif - c = (StgClosure *)((StgTSO *)c)->_link; - goto inner_loop; - } - break; + break; case IND_STATIC: // We just skip IND_STATIC, so its retainer set is never computed. @@ -1681,10 +1678,10 @@ inner_loop: // than attempting to save the current position, because doing so // would be hard. switch (typeOfc) { - case TSO: + case STACK: retainStack(c, c_child_r, - ((StgTSO *)c)->sp, - ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size); + ((StgStack *)c)->sp, + ((StgStack *)c)->stack + ((StgStack *)c)->stack_size); goto loop; case PAP: diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c index 53628dc61a..8fcf8ce812 100644 --- a/rts/RtsAPI.c +++ b/rts/RtsAPI.c @@ -375,8 +375,8 @@ rts_getBool (HaskellObj p) -------------------------------------------------------------------------- */ INLINE_HEADER void pushClosure (StgTSO *tso, StgWord c) { - tso->sp--; - tso->sp[0] = (W_) c; + tso->stackobj->sp--; + tso->stackobj->sp[0] = (W_) c; } StgTSO * diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index c11cc3e925..b0dd42b38c 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -69,6 +69,8 @@ void initRtsFlagsDefaults(void) RtsFlags.GcFlags.maxStkSize = (8 * 1024 * 1024) / sizeof(W_); RtsFlags.GcFlags.initialStkSize = 1024 / sizeof(W_); + RtsFlags.GcFlags.stkChunkSize = (32 * 1024) / sizeof(W_); + RtsFlags.GcFlags.stkChunkBufferSize = (1 * 1024) / sizeof(W_); RtsFlags.GcFlags.minAllocAreaSize = (512 * 1024) / BLOCK_SIZE; RtsFlags.GcFlags.minOldGenSize = (1024 * 1024) / BLOCK_SIZE; @@ -194,7 +196,9 @@ usage_text[] = { " --info Print information about the RTS used by this program", "", " -K<size> Sets the maximum stack size (default 8M) Egs: -K32k -K512k", -" -k<size> Sets the initial thread stack size (default 1k) Egs: -k4k -k2m", +" -ki<size> Sets the initial thread stack size (default 1k) Egs: -ki4k -ki2m", +" -kc<size> Sets the stack chunk size (default 32k)", +" -kb<size> Sets the stack chunk buffer size (default 1k)", "", " -A<size> Sets the minimum allocation area size (default 512k) Egs: -A1m -A10k", " -M<size> Sets the maximum heap size (default unlimited) Egs: -M256k -M1G", @@ -693,15 +697,31 @@ error = rtsTrue; case 'K': RtsFlags.GcFlags.maxStkSize = - decodeSize(rts_argv[arg], 2, 1, HS_WORD_MAX) / sizeof(W_); + decodeSize(rts_argv[arg], 2, sizeof(W_), HS_WORD_MAX) / sizeof(W_); break; case 'k': + switch(rts_argv[arg][2]) { + case 'c': + RtsFlags.GcFlags.stkChunkSize = + decodeSize(rts_argv[arg], 3, sizeof(W_), HS_WORD_MAX) / sizeof(W_); + break; + case 'b': + RtsFlags.GcFlags.stkChunkBufferSize = + decodeSize(rts_argv[arg], 3, sizeof(W_), HS_WORD_MAX) / sizeof(W_); + break; + case 'i': + RtsFlags.GcFlags.initialStkSize = + decodeSize(rts_argv[arg], 3, sizeof(W_), HS_WORD_MAX) / sizeof(W_); + break; + default: RtsFlags.GcFlags.initialStkSize = - decodeSize(rts_argv[arg], 2, 1, HS_WORD_MAX) / sizeof(W_); + decodeSize(rts_argv[arg], 2, sizeof(W_), HS_WORD_MAX) / sizeof(W_); break; + } + break; - case 'M': + case 'M': RtsFlags.GcFlags.maxHeapSize = decodeSize(rts_argv[arg], 2, BLOCK_SIZE, HS_WORD_MAX) / BLOCK_SIZE; /* user give size in *bytes* but "maxHeapSize" is in *blocks* */ @@ -1203,6 +1223,12 @@ error = rtsTrue; RtsFlags.ProfFlags.profileIntervalTicks = 0; } + if (RtsFlags.GcFlags.stkChunkBufferSize > + RtsFlags.GcFlags.stkChunkSize / 2) { + errorBelch("stack chunk buffer size (-kb) must be less than 50%% of the stack chunk size (-kc)"); + error = rtsTrue; + } + if (error) { const char **p; diff --git a/rts/Schedule.c b/rts/Schedule.c index 0b1dec4085..c115d2bde6 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -140,9 +140,7 @@ static void scheduleActivateSpark(Capability *cap); #endif static void schedulePostRunThread(Capability *cap, StgTSO *t); static rtsBool scheduleHandleHeapOverflow( Capability *cap, StgTSO *t ); -static void scheduleHandleStackOverflow( Capability *cap, Task *task, - StgTSO *t); -static rtsBool scheduleHandleYield( Capability *cap, StgTSO *t, +static rtsBool scheduleHandleYield( Capability *cap, StgTSO *t, nat prev_what_next ); static void scheduleHandleThreadBlocked( StgTSO *t ); static rtsBool scheduleHandleThreadFinished( Capability *cap, Task *task, @@ -151,9 +149,6 @@ static rtsBool scheduleNeedHeapProfile(rtsBool ready_to_gc); static Capability *scheduleDoGC(Capability *cap, Task *task, rtsBool force_major); -static StgTSO *threadStackOverflow(Capability *cap, StgTSO *tso); -static StgTSO *threadStackUnderflow(Capability *cap, Task *task, StgTSO *tso); - static void deleteThread (Capability *cap, StgTSO *tso); static void deleteAllThreads (Capability *cap); @@ -426,6 +421,7 @@ run_thread: cap->in_haskell = rtsTrue; dirty_TSO(cap,t); + dirty_STACK(cap,t->stackobj); #if defined(THREADED_RTS) if (recent_activity == ACTIVITY_DONE_GC) { @@ -503,10 +499,6 @@ run_thread: schedulePostRunThread(cap,t); - if (ret != StackOverflow) { - t = threadStackUnderflow(cap,task,t); - } - ready_to_gc = rtsFalse; switch (ret) { @@ -515,8 +507,11 @@ run_thread: break; case StackOverflow: - scheduleHandleStackOverflow(cap,task,t); - break; + // just adjust the stack for this thread, then pop it back + // on the run queue. + threadStackOverflow(cap, t); + pushOnRunQueue(cap,t); + break; case ThreadYielding: if (scheduleHandleYield(cap, t, prev_what_next)) { @@ -729,8 +724,7 @@ schedulePushWork(Capability *cap USED_IF_THREADS, for (; t != END_TSO_QUEUE; t = next) { next = t->_link; t->_link = END_TSO_QUEUE; - if (t->what_next == ThreadRelocated - || t->bound == task->incall // don't move my bound thread + if (t->bound == task->incall // don't move my bound thread || tsoLocked(t)) { // don't move a locked thread setTSOLink(cap, prev, t); setTSOPrev(cap, t, prev); @@ -1098,30 +1092,6 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t ) } /* ----------------------------------------------------------------------------- - * Handle a thread that returned to the scheduler with ThreadStackOverflow - * -------------------------------------------------------------------------- */ - -static void -scheduleHandleStackOverflow (Capability *cap, Task *task, StgTSO *t) -{ - /* just adjust the stack for this thread, then pop it back - * on the run queue. - */ - { - /* enlarge the stack */ - StgTSO *new_t = threadStackOverflow(cap, t); - - /* The TSO attached to this Task may have moved, so update the - * pointer to it. - */ - if (task->incall->tso == t) { - task->incall->tso = new_t; - } - pushOnRunQueue(cap,new_t); - } -} - -/* ----------------------------------------------------------------------------- * Handle a thread that returned to the scheduler with ThreadYielding * -------------------------------------------------------------------------- */ @@ -1241,8 +1211,8 @@ scheduleHandleThreadFinished (Capability *cap STG_UNUSED, Task *task, StgTSO *t) if (t->what_next == ThreadComplete) { if (task->incall->ret) { - // NOTE: return val is tso->sp[1] (see StgStartup.hc) - *(task->incall->ret) = (StgClosure *)task->incall->tso->sp[1]; + // NOTE: return val is stack->sp[1] (see StgStartup.hc) + *(task->incall->ret) = (StgClosure *)task->incall->tso->stackobj->sp[1]; } task->incall->stat = Success; } else { @@ -1578,10 +1548,7 @@ forkProcess(HsStablePtr *entry for (g = 0; g < RtsFlags.GcFlags.generations; g++) { for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) { - if (t->what_next == ThreadRelocated) { - next = t->_link; - } else { - next = t->global_link; + next = t->global_link; // don't allow threads to catch the ThreadKilled // exception, but we do want to raiseAsync() because these // threads may be evaluating thunks that we need later. @@ -1593,7 +1560,6 @@ forkProcess(HsStablePtr *entry // won't get a chance to exit in the usual way (see // also scheduleHandleThreadFinished). t->bound = NULL; - } } } @@ -1661,12 +1627,8 @@ deleteAllThreads ( Capability *cap ) debugTrace(DEBUG_sched,"deleting all threads"); for (g = 0; g < RtsFlags.GcFlags.generations; g++) { for (t = generations[g].threads; t != END_TSO_QUEUE; t = next) { - if (t->what_next == ThreadRelocated) { - next = t->_link; - } else { next = t->global_link; deleteThread(cap,t); - } } } @@ -1850,6 +1812,7 @@ resumeThread (void *task_) /* We might have GC'd, mark the TSO dirty again */ dirty_TSO(cap,tso); + dirty_STACK(cap,tso->stackobj); IF_DEBUG(sanity, checkTSO(tso)); @@ -2108,189 +2071,6 @@ performMajorGC(void) performGC_(rtsTrue); } -/* ----------------------------------------------------------------------------- - Stack overflow - - If the thread has reached its maximum stack size, then raise the - StackOverflow exception in the offending thread. Otherwise - relocate the TSO into a larger chunk of memory and adjust its stack - size appropriately. - -------------------------------------------------------------------------- */ - -static StgTSO * -threadStackOverflow(Capability *cap, StgTSO *tso) -{ - nat new_stack_size, stack_words; - lnat new_tso_size; - StgPtr new_sp; - StgTSO *dest; - - IF_DEBUG(sanity,checkTSO(tso)); - - if (tso->stack_size >= tso->max_stack_size - && !(tso->flags & TSO_BLOCKEX)) { - // NB. never raise a StackOverflow exception if the thread is - // inside Control.Exceptino.block. It is impractical to protect - // against stack overflow exceptions, since virtually anything - // can raise one (even 'catch'), so this is the only sensible - // thing to do here. See bug #767. - // - - if (tso->flags & TSO_SQUEEZED) { - return tso; - } - // #3677: In a stack overflow situation, stack squeezing may - // reduce the stack size, but we don't know whether it has been - // reduced enough for the stack check to succeed if we try - // again. Fortunately stack squeezing is idempotent, so all we - // need to do is record whether *any* squeezing happened. If we - // are at the stack's absolute -K limit, and stack squeezing - // happened, then we try running the thread again. The - // TSO_SQUEEZED flag is set by threadPaused() to tell us whether - // squeezing happened or not. - - debugTrace(DEBUG_gc, - "threadStackOverflow of TSO %ld (%p): stack too large (now %ld; max is %ld)", - (long)tso->id, tso, (long)tso->stack_size, (long)tso->max_stack_size); - IF_DEBUG(gc, - /* If we're debugging, just print out the top of the stack */ - printStackChunk(tso->sp, stg_min(tso->stack+tso->stack_size, - tso->sp+64))); - - // Send this thread the StackOverflow exception - throwToSingleThreaded(cap, tso, (StgClosure *)stackOverflow_closure); - return tso; - } - - - // We also want to avoid enlarging the stack if squeezing has - // already released some of it. However, we don't want to get into - // a pathalogical situation where a thread has a nearly full stack - // (near its current limit, but not near the absolute -K limit), - // keeps allocating a little bit, squeezing removes a little bit, - // and then it runs again. So to avoid this, if we squeezed *and* - // there is still less than BLOCK_SIZE_W words free, then we enlarge - // the stack anyway. - if ((tso->flags & TSO_SQUEEZED) && - ((W_)(tso->sp - tso->stack) >= BLOCK_SIZE_W)) { - return tso; - } - - /* Try to double the current stack size. If that takes us over the - * maximum stack size for this thread, then use the maximum instead - * (that is, unless we're already at or over the max size and we - * can't raise the StackOverflow exception (see above), in which - * case just double the size). Finally round up so the TSO ends up as - * a whole number of blocks. - */ - if (tso->stack_size >= tso->max_stack_size) { - new_stack_size = tso->stack_size * 2; - } else { - new_stack_size = stg_min(tso->stack_size * 2, tso->max_stack_size); - } - new_tso_size = (lnat)BLOCK_ROUND_UP(new_stack_size * sizeof(W_) + - TSO_STRUCT_SIZE)/sizeof(W_); - new_tso_size = round_to_mblocks(new_tso_size); /* Be MBLOCK-friendly */ - new_stack_size = new_tso_size - TSO_STRUCT_SIZEW; - - debugTrace(DEBUG_sched, - "increasing stack size from %ld words to %d.", - (long)tso->stack_size, new_stack_size); - - dest = (StgTSO *)allocate(cap,new_tso_size); - TICK_ALLOC_TSO(new_stack_size,0); - - /* copy the TSO block and the old stack into the new area */ - memcpy(dest,tso,TSO_STRUCT_SIZE); - stack_words = tso->stack + tso->stack_size - tso->sp; - new_sp = (P_)dest + new_tso_size - stack_words; - memcpy(new_sp, tso->sp, stack_words * sizeof(W_)); - - /* relocate the stack pointers... */ - dest->sp = new_sp; - dest->stack_size = new_stack_size; - - /* Mark the old TSO as relocated. We have to check for relocated - * TSOs in the garbage collector and any primops that deal with TSOs. - * - * It's important to set the sp value to just beyond the end - * of the stack, so we don't attempt to scavenge any part of the - * dead TSO's stack. - */ - setTSOLink(cap,tso,dest); - write_barrier(); // other threads seeing ThreadRelocated will look at _link - tso->what_next = ThreadRelocated; - tso->sp = (P_)&(tso->stack[tso->stack_size]); - tso->why_blocked = NotBlocked; - - IF_DEBUG(sanity,checkTSO(dest)); -#if 0 - IF_DEBUG(scheduler,printTSO(dest)); -#endif - - return dest; -} - -static StgTSO * -threadStackUnderflow (Capability *cap, Task *task, StgTSO *tso) -{ - bdescr *bd, *new_bd; - lnat free_w, tso_size_w; - StgTSO *new_tso; - - tso_size_w = tso_sizeW(tso); - - if (tso_size_w < MBLOCK_SIZE_W || - // TSO is less than 2 mblocks (since the first mblock is - // shorter than MBLOCK_SIZE_W) - (tso_size_w - BLOCKS_PER_MBLOCK*BLOCK_SIZE_W) % MBLOCK_SIZE_W != 0 || - // or TSO is not a whole number of megablocks (ensuring - // precondition of splitLargeBlock() below) - (tso_size_w <= round_up_to_mblocks(RtsFlags.GcFlags.initialStkSize)) || - // or TSO is smaller than the minimum stack size (rounded up) - (nat)(tso->stack + tso->stack_size - tso->sp) > tso->stack_size / 4) - // or stack is using more than 1/4 of the available space - { - // then do nothing - return tso; - } - - // this is the number of words we'll free - free_w = round_to_mblocks(tso_size_w/2); - - bd = Bdescr((StgPtr)tso); - new_bd = splitLargeBlock(bd, free_w / BLOCK_SIZE_W); - bd->free = bd->start + TSO_STRUCT_SIZEW; - - new_tso = (StgTSO *)new_bd->start; - memcpy(new_tso,tso,TSO_STRUCT_SIZE); - new_tso->stack_size = new_bd->free - new_tso->stack; - - // The original TSO was dirty and probably on the mutable - // list. The new TSO is not yet on the mutable list, so we better - // put it there. - new_tso->dirty = 0; - new_tso->flags &= ~TSO_LINK_DIRTY; - dirty_TSO(cap, new_tso); - - debugTrace(DEBUG_sched, "thread %ld: reducing TSO size from %lu words to %lu", - (long)tso->id, tso_size_w, tso_sizeW(new_tso)); - - tso->_link = new_tso; // no write barrier reqd: same generation - write_barrier(); // other threads seeing ThreadRelocated will look at _link - tso->what_next = ThreadRelocated; - - // The TSO attached to this Task may have moved, so update the - // pointer to it. - if (task->incall->tso == tso) { - task->incall->tso = new_tso; - } - - IF_DEBUG(sanity,checkTSO(new_tso)); - - return new_tso; -} - /* --------------------------------------------------------------------------- Interrupt execution - usually called inside a signal handler so it mustn't do anything fancy. @@ -2337,7 +2117,7 @@ void wakeUpRts(void) exception. -------------------------------------------------------------------------- */ -static void +static void deleteThread (Capability *cap STG_UNUSED, StgTSO *tso) { // NOTE: must only be called on a TSO that we have exclusive @@ -2347,12 +2127,12 @@ deleteThread (Capability *cap STG_UNUSED, StgTSO *tso) if (tso->why_blocked != BlockedOnCCall && tso->why_blocked != BlockedOnCCall_Interruptible) { - throwToSingleThreaded(tso->cap,tso,NULL); + throwToSingleThreaded(tso->cap,tso,NULL); } } #ifdef FORKPROCESS_PRIMOP_SUPPORTED -static void +static void deleteThread_(Capability *cap, StgTSO *tso) { // for forkProcess only: // like deleteThread(), but we delete threads in foreign calls, too. @@ -2406,7 +2186,7 @@ raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception) // we update any closures pointed to from update frames with the // raise closure that we just built. // - p = tso->sp; + p = tso->stackobj->sp; while(1) { info = get_ret_itbl((StgClosure *)p); next = p + stack_frame_sizeW((StgClosure *)p); @@ -2427,20 +2207,25 @@ raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception) case ATOMICALLY_FRAME: debugTrace(DEBUG_stm, "found ATOMICALLY_FRAME at %p", p); - tso->sp = p; + tso->stackobj->sp = p; return ATOMICALLY_FRAME; case CATCH_FRAME: - tso->sp = p; + tso->stackobj->sp = p; return CATCH_FRAME; case CATCH_STM_FRAME: debugTrace(DEBUG_stm, "found CATCH_STM_FRAME at %p", p); - tso->sp = p; + tso->stackobj->sp = p; return CATCH_STM_FRAME; - case STOP_FRAME: - tso->sp = p; + case UNDERFLOW_FRAME: + threadStackUnderflow(cap,tso); + p = tso->stackobj->sp; + continue; + + case STOP_FRAME: + tso->stackobj->sp = p; return STOP_FRAME; case CATCH_RETRY_FRAME: @@ -2470,12 +2255,12 @@ raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception) -------------------------------------------------------------------------- */ StgWord -findRetryFrameHelper (StgTSO *tso) +findRetryFrameHelper (Capability *cap, StgTSO *tso) { StgPtr p, next; StgRetInfoTable *info; - p = tso -> sp; + p = tso->stackobj->sp; while (1) { info = get_ret_itbl((StgClosure *)p); next = p + stack_frame_sizeW((StgClosure *)p); @@ -2484,13 +2269,13 @@ findRetryFrameHelper (StgTSO *tso) case ATOMICALLY_FRAME: debugTrace(DEBUG_stm, "found ATOMICALLY_FRAME at %p during retry", p); - tso->sp = p; + tso->stackobj->sp = p; return ATOMICALLY_FRAME; case CATCH_RETRY_FRAME: debugTrace(DEBUG_stm, "found CATCH_RETRY_FRAME at %p during retrry", p); - tso->sp = p; + tso->stackobj->sp = p; return CATCH_RETRY_FRAME; case CATCH_STM_FRAME: { @@ -2499,13 +2284,17 @@ findRetryFrameHelper (StgTSO *tso) debugTrace(DEBUG_stm, "found CATCH_STM_FRAME at %p during retry", p); debugTrace(DEBUG_stm, "trec=%p outer=%p", trec, outer); - stmAbortTransaction(tso -> cap, trec); - stmFreeAbortedTRec(tso -> cap, trec); + stmAbortTransaction(cap, trec); + stmFreeAbortedTRec(cap, trec); tso -> trec = outer; p = next; continue; } + case UNDERFLOW_FRAME: + threadStackUnderflow(cap,tso); + p = tso->stackobj->sp; + continue; default: ASSERT(info->i.type != CATCH_FRAME); diff --git a/rts/Schedule.h b/rts/Schedule.h index a00d81af4f..edba8f5f16 100644 --- a/rts/Schedule.h +++ b/rts/Schedule.h @@ -44,7 +44,7 @@ void wakeUpRts(void); StgWord raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception); /* findRetryFrameHelper */ -StgWord findRetryFrameHelper (StgTSO *tso); +StgWord findRetryFrameHelper (Capability *cap, StgTSO *tso); /* Entry point for a new worker */ void scheduleWorker (Capability *cap, Task *task); diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index c981cbec18..b4a037d5d6 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -19,6 +19,23 @@ import EnterCriticalSection; import LeaveCriticalSection; /* ---------------------------------------------------------------------------- + Stack underflow + ------------------------------------------------------------------------- */ + +INFO_TABLE_RET (stg_stack_underflow_frame, UNDERFLOW_FRAME, P_ unused) +{ + W_ new_tso; + W_ ret_off; + + SAVE_THREAD_STATE(); + ("ptr" ret_off) = foreign "C" threadStackUnderflow(MyCapability(), + CurrentTSO); + LOAD_THREAD_STATE(); + + jump %ENTRY_CODE(Sp(ret_off)); +} + +/* ---------------------------------------------------------------------------- Support for the bytecode interpreter. ------------------------------------------------------------------------- */ @@ -353,6 +370,9 @@ loop: INFO_TABLE(stg_TSO, 0,0,TSO, "TSO", "TSO") { foreign "C" barf("TSO object entered!") never returns; } +INFO_TABLE(stg_STACK, 0,0, STACK, "STACK", "STACK") +{ foreign "C" barf("STACK object entered!") never returns; } + /* ---------------------------------------------------------------------------- Weak pointers diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c index 94a5a15f46..aeae1d4128 100644 --- a/rts/ThreadPaused.c +++ b/rts/ThreadPaused.c @@ -44,13 +44,13 @@ stackSqueeze(Capability *cap, StgTSO *tso, StgPtr bottom) // contains two values: the size of the gap, and the distance // to the next gap (or the stack top). - frame = tso->sp; + frame = tso->stackobj->sp; ASSERT(frame < bottom); prev_was_update_frame = rtsFalse; current_gap_size = 0; - gap = (struct stack_gap *) (tso->sp - sizeofW(StgUpdateFrame)); + gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame)); while (frame <= bottom) { @@ -150,7 +150,7 @@ stackSqueeze(Capability *cap, StgTSO *tso, StgPtr bottom) next_gap_start = (StgWord8*)gap + sizeof(StgUpdateFrame); sp = next_gap_start; - while ((StgPtr)gap > tso->sp) { + while ((StgPtr)gap > tso->stackobj->sp) { // we're working in *bytes* now... gap_start = next_gap_start; @@ -164,7 +164,7 @@ stackSqueeze(Capability *cap, StgTSO *tso, StgPtr bottom) memmove(sp, next_gap_start, chunk_size); } - tso->sp = (StgPtr)sp; + tso->stackobj->sp = (StgPtr)sp; } } @@ -201,27 +201,27 @@ threadPaused(Capability *cap, StgTSO *tso) // blackholing, or eager blackholing consistently. See Note // [upd-black-hole] in sm/Scav.c. - stack_end = &tso->stack[tso->stack_size]; + stack_end = tso->stackobj->stack + tso->stackobj->stack_size; - frame = (StgClosure *)tso->sp; + frame = (StgClosure *)tso->stackobj->sp; - while (1) { - // If we've already marked this frame, then stop here. - if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) { - if (prev_was_update_frame) { - words_to_squeeze += sizeofW(StgUpdateFrame); - weight += weight_pending; - weight_pending = 0; - } - goto end; - } - - info = get_ret_itbl(frame); + while ((P_)frame < stack_end) { + info = get_ret_itbl(frame); switch (info->i.type) { - + case UPDATE_FRAME: + // If we've already marked this frame, then stop here. + if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) { + if (prev_was_update_frame) { + words_to_squeeze += sizeofW(StgUpdateFrame); + weight += weight_pending; + weight_pending = 0; + } + goto end; + } + SET_INFO(frame, (StgInfoTable *)&stg_marked_upd_frame_info); bh = ((StgUpdateFrame *)frame)->updatee; @@ -235,7 +235,7 @@ threadPaused(Capability *cap, StgTSO *tso) { debugTrace(DEBUG_squeeze, "suspending duplicate work: %ld words of stack", - (long)((StgPtr)frame - tso->sp)); + (long)((StgPtr)frame - tso->stackobj->sp)); // If this closure is already an indirection, then // suspend the computation up to this point. @@ -245,25 +245,22 @@ threadPaused(Capability *cap, StgTSO *tso) // Now drop the update frame, and arrange to return // the value to the frame underneath: - tso->sp = (StgPtr)frame + sizeofW(StgUpdateFrame) - 2; - tso->sp[1] = (StgWord)bh; + tso->stackobj->sp = (StgPtr)frame + sizeofW(StgUpdateFrame) - 2; + tso->stackobj->sp[1] = (StgWord)bh; ASSERT(bh->header.info != &stg_TSO_info); - tso->sp[0] = (W_)&stg_enter_info; + tso->stackobj->sp[0] = (W_)&stg_enter_info; // And continue with threadPaused; there might be // yet more computation to suspend. - frame = (StgClosure *)(tso->sp + 2); + frame = (StgClosure *)(tso->stackobj->sp + 2); prev_was_update_frame = rtsFalse; continue; } + // zero out the slop so that the sanity checker can tell // where the next closure is. - DEBUG_FILL_SLOP(bh); - - // @LDV profiling - // We pretend that bh is now dead. - LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC((StgClosure *)bh); + OVERWRITING_CLOSURE(bh); // an EAGER_BLACKHOLE or CAF_BLACKHOLE gets turned into a // BLACKHOLE here. @@ -301,7 +298,8 @@ threadPaused(Capability *cap, StgTSO *tso) prev_was_update_frame = rtsTrue; break; - case STOP_FRAME: + case UNDERFLOW_FRAME: + case STOP_FRAME: goto end; // normal stack frames; do nothing except advance the pointer diff --git a/rts/Threads.c b/rts/Threads.c index f6b1bac1a7..d6fe0e7697 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -18,8 +18,14 @@ #include "ThreadLabels.h" #include "Updates.h" #include "Messages.h" +#include "RaiseAsync.h" +#include "Prelude.h" +#include "Printer.h" +#include "sm/Sanity.h" #include "sm/Storage.h" +#include <string.h> + /* Next thread ID to allocate. * LOCK: sched_mutex */ @@ -54,57 +60,67 @@ StgTSO * createThread(Capability *cap, nat size) { StgTSO *tso; + StgStack *stack; nat stack_size; /* sched_mutex is *not* required */ - /* First check whether we should create a thread at all */ - - // ToDo: check whether size = stack_size - TSO_STRUCT_SIZEW - /* catch ridiculously small stack sizes */ - if (size < MIN_STACK_WORDS + TSO_STRUCT_SIZEW) { - size = MIN_STACK_WORDS + TSO_STRUCT_SIZEW; + if (size < MIN_STACK_WORDS + sizeofW(StgStack)) { + size = MIN_STACK_WORDS + sizeofW(StgStack); } - size = round_to_mblocks(size); - tso = (StgTSO *)allocate(cap, size); - - stack_size = size - TSO_STRUCT_SIZEW; - TICK_ALLOC_TSO(stack_size, 0); - + /* The size argument we are given includes all the per-thread + * overheads: + * + * - The TSO structure + * - The STACK header + * + * This is so that we can use a nice round power of 2 for the + * default stack size (e.g. 1k), and if we're allocating lots of + * threads back-to-back they'll fit nicely in a block. It's a bit + * of a benchmark hack, but it doesn't do any harm. + */ + stack_size = round_to_mblocks(size - sizeofW(StgTSO)); + stack = (StgStack *)allocate(cap, stack_size); + TICK_ALLOC_STACK(stack_size); + SET_HDR(stack, &stg_STACK_info, CCS_SYSTEM); + stack->stack_size = stack_size - sizeofW(StgStack); + stack->sp = stack->stack + stack->stack_size; + stack->dirty = 1; + + tso = (StgTSO *)allocate(cap, sizeofW(StgTSO)); + TICK_ALLOC_TSO(); SET_HDR(tso, &stg_TSO_info, CCS_SYSTEM); // Always start with the compiled code evaluator tso->what_next = ThreadRunGHC; - tso->why_blocked = NotBlocked; tso->block_info.closure = (StgClosure *)END_TSO_QUEUE; tso->blocked_exceptions = END_BLOCKED_EXCEPTIONS_QUEUE; tso->bq = (StgBlockingQueue *)END_TSO_QUEUE; tso->flags = 0; tso->dirty = 1; - + tso->_link = END_TSO_QUEUE; + tso->saved_errno = 0; tso->bound = NULL; tso->cap = cap; - tso->stack_size = stack_size; - tso->max_stack_size = round_to_mblocks(RtsFlags.GcFlags.maxStkSize) - - TSO_STRUCT_SIZEW; - tso->sp = (P_)&(tso->stack) + stack_size; + tso->stackobj = stack; + tso->tot_stack_size = stack->stack_size; tso->trec = NO_TREC; - + #ifdef PROFILING tso->prof.CCCS = CCS_MAIN; #endif - /* put a stop frame on the stack */ - tso->sp -= sizeofW(StgStopFrame); - SET_HDR((StgClosure*)tso->sp,(StgInfoTable *)&stg_stop_thread_info,CCS_SYSTEM); - tso->_link = END_TSO_QUEUE; - + // put a stop frame on the stack + stack->sp -= sizeofW(StgStopFrame); + SET_HDR((StgClosure*)stack->sp, + (StgInfoTable *)&stg_stop_thread_info,CCS_SYSTEM); + /* Link the new thread on the global thread list. */ ACQUIRE_LOCK(&sched_mutex); @@ -220,12 +236,6 @@ removeThreadFromDeQueue (Capability *cap, void tryWakeupThread (Capability *cap, StgTSO *tso) { - tryWakeupThread_(cap, deRefTSO(tso)); -} - -void -tryWakeupThread_ (Capability *cap, StgTSO *tso) -{ traceEventThreadWakeup (cap, tso, tso->cap->no); #ifdef THREADED_RTS @@ -267,8 +277,8 @@ tryWakeupThread_ (Capability *cap, StgTSO *tso) } // remove the block frame from the stack - ASSERT(tso->sp[0] == (StgWord)&stg_block_throwto_info); - tso->sp += 3; + ASSERT(tso->stackobj->sp[0] == (StgWord)&stg_block_throwto_info); + tso->stackobj->sp += 3; goto unblock; } @@ -416,7 +426,7 @@ updateThunk (Capability *cap, StgTSO *tso, StgClosure *thunk, StgClosure *val) i = v->header.info; if (i == &stg_TSO_info) { - owner = deRefTSO((StgTSO*)v); + owner = (StgTSO*)v; if (owner != tso) { checkBlockingQueues(cap, tso); } @@ -429,7 +439,7 @@ updateThunk (Capability *cap, StgTSO *tso, StgClosure *thunk, StgClosure *val) return; } - owner = deRefTSO(((StgBlockingQueue*)v)->owner); + owner = ((StgBlockingQueue*)v)->owner; if (owner != tso) { checkBlockingQueues(cap, tso); @@ -466,6 +476,202 @@ isThreadBound(StgTSO* tso USED_IF_THREADS) return rtsFalse; } +/* ----------------------------------------------------------------------------- + Stack overflow + + If the thread has reached its maximum stack size, then raise the + StackOverflow exception in the offending thread. Otherwise + relocate the TSO into a larger chunk of memory and adjust its stack + size appropriately. + -------------------------------------------------------------------------- */ + +void +threadStackOverflow (Capability *cap, StgTSO *tso) +{ + StgStack *new_stack, *old_stack; + StgUnderflowFrame *frame; + + IF_DEBUG(sanity,checkTSO(tso)); + + if (tso->tot_stack_size >= RtsFlags.GcFlags.maxStkSize + && !(tso->flags & TSO_BLOCKEX)) { + // NB. never raise a StackOverflow exception if the thread is + // inside Control.Exceptino.block. It is impractical to protect + // against stack overflow exceptions, since virtually anything + // can raise one (even 'catch'), so this is the only sensible + // thing to do here. See bug #767. + // + + if (tso->flags & TSO_SQUEEZED) { + return; + } + // #3677: In a stack overflow situation, stack squeezing may + // reduce the stack size, but we don't know whether it has been + // reduced enough for the stack check to succeed if we try + // again. Fortunately stack squeezing is idempotent, so all we + // need to do is record whether *any* squeezing happened. If we + // are at the stack's absolute -K limit, and stack squeezing + // happened, then we try running the thread again. The + // TSO_SQUEEZED flag is set by threadPaused() to tell us whether + // squeezing happened or not. + + debugTrace(DEBUG_gc, + "threadStackOverflow of TSO %ld (%p): stack too large (now %ld; max is %ld)", + (long)tso->id, tso, (long)tso->stackobj->stack_size, + RtsFlags.GcFlags.maxStkSize); + IF_DEBUG(gc, + /* If we're debugging, just print out the top of the stack */ + printStackChunk(tso->stackobj->sp, + stg_min(tso->stackobj->stack + tso->stackobj->stack_size, + tso->stackobj->sp+64))); + + // Send this thread the StackOverflow exception + throwToSingleThreaded(cap, tso, (StgClosure *)stackOverflow_closure); + } + + + // We also want to avoid enlarging the stack if squeezing has + // already released some of it. However, we don't want to get into + // a pathalogical situation where a thread has a nearly full stack + // (near its current limit, but not near the absolute -K limit), + // keeps allocating a little bit, squeezing removes a little bit, + // and then it runs again. So to avoid this, if we squeezed *and* + // there is still less than BLOCK_SIZE_W words free, then we enlarge + // the stack anyway. + if ((tso->flags & TSO_SQUEEZED) && + ((W_)(tso->stackobj->sp - tso->stackobj->stack) >= BLOCK_SIZE_W)) { + return; + } + + debugTraceCap(DEBUG_sched, cap, + "allocating new stack chunk of size %d bytes", + RtsFlags.GcFlags.stkChunkSize * sizeof(W_)); + + old_stack = tso->stackobj; + + new_stack = (StgStack*) allocate(cap, RtsFlags.GcFlags.stkChunkSize); + SET_HDR(new_stack, &stg_STACK_info, CCS_SYSTEM); + TICK_ALLOC_STACK(RtsFlags.GcFlags.stkChunkSize); + + new_stack->dirty = 0; // begin clean, we'll mark it dirty below + new_stack->stack_size = RtsFlags.GcFlags.stkChunkSize - sizeofW(StgStack); + new_stack->sp = new_stack->stack + new_stack->stack_size; + + tso->tot_stack_size += new_stack->stack_size; + + new_stack->sp -= sizeofW(StgUnderflowFrame); + frame = (StgUnderflowFrame*)new_stack->sp; + frame->info = &stg_stack_underflow_frame_info; + frame->next_chunk = old_stack; + + { + StgWord *sp; + nat chunk_words, size; + + // find the boundary of the chunk of old stack we're going to + // copy to the new stack. We skip over stack frames until we + // reach the smaller of + // + // * the chunk buffer size (+RTS -kb) + // * the end of the old stack + // + for (sp = old_stack->sp; + sp < stg_min(old_stack->sp + RtsFlags.GcFlags.stkChunkBufferSize, + old_stack->stack + old_stack->stack_size); ) + { + size = stack_frame_sizeW((StgClosure*)sp); + + // if including this frame would exceed the size of the + // new stack (taking into account the underflow frame), + // then stop at the previous frame. + if (sp + size > old_stack->stack + (new_stack->stack_size - + sizeofW(StgUnderflowFrame))) { + break; + } + sp += size; + } + + // copy the stack chunk between tso->sp and sp to + // new_tso->sp + (tso->sp - sp) + chunk_words = sp - old_stack->sp; + + memcpy(/* dest */ new_stack->sp - chunk_words, + /* source */ old_stack->sp, + /* size */ chunk_words * sizeof(W_)); + + old_stack->sp += chunk_words; + new_stack->sp -= chunk_words; + } + + // if the old stack chunk is now empty, discard it. With the + // default settings, -ki1k -kb1k, this means the first stack chunk + // will be discarded after the first overflow, being replaced by a + // non-moving 32k chunk. + if (old_stack->sp == old_stack->stack + old_stack->stack_size) { + frame->next_chunk = new_stack; + } + + tso->stackobj = new_stack; + + // we're about to run it, better mark it dirty + dirty_STACK(cap, new_stack); + + IF_DEBUG(sanity,checkTSO(tso)); + // IF_DEBUG(scheduler,printTSO(new_tso)); +} + + +/* --------------------------------------------------------------------------- + Stack underflow - called from the stg_stack_underflow_info frame + ------------------------------------------------------------------------ */ + +nat // returns offset to the return address +threadStackUnderflow (Capability *cap, StgTSO *tso) +{ + StgStack *new_stack, *old_stack; + StgUnderflowFrame *frame; + nat retvals; + + debugTraceCap(DEBUG_sched, cap, "stack underflow"); + + old_stack = tso->stackobj; + + frame = (StgUnderflowFrame*)(old_stack->stack + old_stack->stack_size + - sizeofW(StgUnderflowFrame)); + ASSERT(frame->info == &stg_stack_underflow_frame_info); + + new_stack = (StgStack*)frame->next_chunk; + tso->stackobj = new_stack; + + retvals = (P_)frame - old_stack->sp; + if (retvals != 0) + { + // we have some return values to copy to the old stack + if ((new_stack->sp - new_stack->stack) < retvals) + { + barf("threadStackUnderflow: not enough space for return values"); + } + + new_stack->sp -= retvals; + + memcpy(/* dest */ new_stack->sp, + /* src */ old_stack->sp, + /* size */ retvals * sizeof(W_)); + } + + // empty the old stack. The GC may still visit this object + // because it is on the mutable list. + old_stack->sp = old_stack->stack + old_stack->stack_size; + + // restore the stack parameters, and update tot_stack_size + tso->tot_stack_size -= old_stack->stack_size; + + // we're about to run it, better mark it dirty + dirty_STACK(cap, new_stack); + + return retvals; +} + /* ---------------------------------------------------------------------------- * Debugging: why is a thread blocked * ------------------------------------------------------------------------- */ @@ -529,10 +735,7 @@ printThreadStatus(StgTSO *t) void *label = lookupThreadLabel(t->id); if (label) debugBelch("[\"%s\"] ",(char *)label); } - if (t->what_next == ThreadRelocated) { - debugBelch("has been relocated...\n"); - } else { - switch (t->what_next) { + switch (t->what_next) { case ThreadKilled: debugBelch("has been killed"); break; @@ -544,11 +747,8 @@ printThreadStatus(StgTSO *t) } if (t->dirty) { debugBelch(" (TSO_DIRTY)"); - } else if (t->flags & TSO_LINK_DIRTY) { - debugBelch(" (TSO_LINK_DIRTY)"); } debugBelch("\n"); - } } void @@ -574,11 +774,7 @@ printAllThreads(void) if (t->why_blocked != NotBlocked) { printThreadStatus(t); } - if (t->what_next == ThreadRelocated) { - next = t->_link; - } else { - next = t->global_link; - } + next = t->global_link; } } } diff --git a/rts/Threads.h b/rts/Threads.h index 776dd93cab..857658a2d0 100644 --- a/rts/Threads.h +++ b/rts/Threads.h @@ -21,9 +21,6 @@ void wakeBlockingQueue (Capability *cap, StgBlockingQueue *bq); void tryWakeupThread (Capability *cap, StgTSO *tso); void migrateThread (Capability *from, StgTSO *tso, Capability *to); -// like tryWakeupThread(), but assumes the TSO is not ThreadRelocated -void tryWakeupThread_ (Capability *cap, StgTSO *tso); - // Wakes up a thread on a Capability (probably a different Capability // from the one held by the current Task). // @@ -41,6 +38,10 @@ rtsBool removeThreadFromDeQueue (Capability *cap, StgTSO **head, StgTSO **tail StgBool isThreadBound (StgTSO* tso); +// Overfow/underflow +void threadStackOverflow (Capability *cap, StgTSO *tso); +nat threadStackUnderflow (Capability *cap, StgTSO *tso); + #ifdef DEBUG void printThreadBlockage (StgTSO *tso); void printThreadStatus (StgTSO *t); diff --git a/rts/Trace.h b/rts/Trace.h index 97d9514751..27de60e4ed 100644 --- a/rts/Trace.h +++ b/rts/Trace.h @@ -265,7 +265,7 @@ void dtraceUserMsgWrapper(Capability *cap, char *msg); INLINE_HEADER void traceEventCreateThread(Capability *cap STG_UNUSED, StgTSO *tso STG_UNUSED) { - traceSchedEvent(cap, EVENT_CREATE_THREAD, tso, tso->stack_size); + traceSchedEvent(cap, EVENT_CREATE_THREAD, tso, tso->stackobj->stack_size); dtraceCreateThread((EventCapNo)cap->no, (EventThreadID)tso->id); } diff --git a/rts/Updates.h b/rts/Updates.h index 2258c988bb..954f02afe1 100644 --- a/rts/Updates.h +++ b/rts/Updates.h @@ -18,101 +18,12 @@ -------------------------------------------------------------------------- */ /* LDV profiling: - * We call LDV_recordDead_FILL_SLOP_DYNAMIC(p1) regardless of the generation in - * which p1 resides. - * - * Note: * After all, we do *NOT* need to call LDV_RECORD_CREATE() for IND * closures because they are inherently used. But, it corrupts * the invariants that every closure keeps its creation time in the profiling * field. So, we call LDV_RECORD_CREATE(). */ -/* In the DEBUG case, we also zero out the slop of the old closure, - * so that the sanity checker can tell where the next closure is. - * - * Two important invariants: we should never try to update a closure - * to point to itself, and the closure being updated should not - * already have been updated (the mutable list will get messed up - * otherwise). - * - * NB. We do *not* do this in THREADED_RTS mode, because when we have the - * possibility of multiple threads entering the same closure, zeroing - * the slop in one of the threads would have a disastrous effect on - * the other (seen in the wild!). - */ -#ifdef CMINUSMINUS - -#define FILL_SLOP(p) \ - W_ inf; \ - W_ sz; \ - W_ i; \ - inf = %GET_STD_INFO(p); \ - if (%INFO_TYPE(inf) != HALF_W_(BLACKHOLE)) { \ - if (%INFO_TYPE(inf) == HALF_W_(THUNK_SELECTOR)) { \ - sz = BYTES_TO_WDS(SIZEOF_StgSelector_NoThunkHdr); \ - } else { \ - if (%INFO_TYPE(inf) == HALF_W_(AP_STACK)) { \ - sz = StgAP_STACK_size(p) + BYTES_TO_WDS(SIZEOF_StgAP_STACK_NoThunkHdr); \ - } else { \ - if (%INFO_TYPE(inf) == HALF_W_(AP)) { \ - sz = TO_W_(StgAP_n_args(p)) + BYTES_TO_WDS(SIZEOF_StgAP_NoThunkHdr); \ - } else { \ - sz = TO_W_(%INFO_PTRS(inf)) + TO_W_(%INFO_NPTRS(inf)); \ - } \ - } \ - } \ - i = 0; \ - for: \ - if (i < sz) { \ - StgThunk_payload(p,i) = 0; \ - i = i + 1; \ - goto for; \ - } \ - } - -#else /* !CMINUSMINUS */ - -INLINE_HEADER void -FILL_SLOP(StgClosure *p) -{ - StgInfoTable *inf = get_itbl(p); - nat i, sz; - - switch (inf->type) { - case BLACKHOLE: - goto no_slop; - // we already filled in the slop when we overwrote the thunk - // with BLACKHOLE, and also an evacuated BLACKHOLE is only the - // size of an IND. - case THUNK_SELECTOR: - sz = sizeofW(StgSelector) - sizeofW(StgThunkHeader); - break; - case AP: - sz = ((StgAP *)p)->n_args + sizeofW(StgAP) - sizeofW(StgThunkHeader); - break; - case AP_STACK: - sz = ((StgAP_STACK *)p)->size + sizeofW(StgAP_STACK) - sizeofW(StgThunkHeader); - break; - default: - sz = inf->layout.payload.ptrs + inf->layout.payload.nptrs; - break; - } - for (i = 0; i < sz; i++) { - ((StgThunk *)p)->payload[i] = 0; - } -no_slop: - ; -} - -#endif /* CMINUSMINUS */ - -#if !defined(DEBUG) || defined(THREADED_RTS) -#define DEBUG_FILL_SLOP(p) /* do nothing */ -#else -#define DEBUG_FILL_SLOP(p) FILL_SLOP(p) -#endif - /* We have two versions of this macro (sadly), one for use in C-- code, * and the other for C. * @@ -128,9 +39,8 @@ no_slop: #define updateWithIndirection(p1, p2, and_then) \ W_ bd; \ \ - DEBUG_FILL_SLOP(p1); \ - LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1); \ - StgInd_indirectee(p1) = p2; \ + OVERWRITING_CLOSURE(p1); \ + StgInd_indirectee(p1) = p2; \ prim %write_barrier() []; \ SET_INFO(p1, stg_BLACKHOLE_info); \ LDV_RECORD_CREATE(p1); \ @@ -155,8 +65,7 @@ INLINE_HEADER void updateWithIndirection (Capability *cap, ASSERT( (P_)p1 != (P_)p2 ); /* not necessarily true: ASSERT( !closure_IND(p1) ); */ /* occurs in RaiseAsync.c:raiseAsync() */ - DEBUG_FILL_SLOP(p1); - LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC(p1); + OVERWRITING_CLOSURE(p1); ((StgInd *)p1)->indirectee = p2; write_barrier(); SET_INFO(p1, &stg_BLACKHOLE_info); diff --git a/rts/posix/Select.c b/rts/posix/Select.c index 0127b3cef4..3c87fbdc70 100644 --- a/rts/posix/Select.c +++ b/rts/posix/Select.c @@ -63,10 +63,6 @@ wakeUpSleepingThreads(lnat ticks) while (sleeping_queue != END_TSO_QUEUE) { tso = sleeping_queue; - if (tso->what_next == ThreadRelocated) { - sleeping_queue = tso->_link; - continue; - } if (((long)ticks - (long)tso->block_info.target) < 0) { break; } @@ -259,11 +255,7 @@ awaitEvent(rtsBool wait) for(tso = blocked_queue_hd; tso != END_TSO_QUEUE; tso = next) { next = tso->_link; - if (tso->what_next == ThreadRelocated) { - continue; - } - - switch (tso->why_blocked) { + switch (tso->why_blocked) { case BlockedOnRead: ready = unblock_all || FD_ISSET(tso->block_info.fd, &rfd); break; diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c index 75c88326ad..bf0c5e6a3b 100644 --- a/rts/sm/BlockAlloc.c +++ b/rts/sm/BlockAlloc.c @@ -577,48 +577,6 @@ freeChain_lock(bdescr *bd) RELEASE_SM_LOCK; } -// splitBlockGroup(bd,B) splits bd in two. Afterward, bd will have B -// blocks, and a new block descriptor pointing to the remainder is -// returned. -bdescr * -splitBlockGroup (bdescr *bd, nat blocks) -{ - bdescr *new_bd; - - if (bd->blocks <= blocks) { - barf("splitLargeBlock: too small"); - } - - if (bd->blocks > BLOCKS_PER_MBLOCK) { - nat low_mblocks, high_mblocks; - void *new_mblock; - if ((blocks - BLOCKS_PER_MBLOCK) % (MBLOCK_SIZE / BLOCK_SIZE) != 0) { - barf("splitLargeBlock: not a multiple of a megablock"); - } - low_mblocks = 1 + (blocks - BLOCKS_PER_MBLOCK) / (MBLOCK_SIZE / BLOCK_SIZE); - high_mblocks = (bd->blocks - blocks) / (MBLOCK_SIZE / BLOCK_SIZE); - - new_mblock = (void *) ((P_)MBLOCK_ROUND_DOWN(bd) + (W_)low_mblocks * MBLOCK_SIZE_W); - initMBlock(new_mblock); - new_bd = FIRST_BDESCR(new_mblock); - new_bd->blocks = MBLOCK_GROUP_BLOCKS(high_mblocks); - - ASSERT(blocks + new_bd->blocks == - bd->blocks + BLOCKS_PER_MBLOCK - MBLOCK_SIZE/BLOCK_SIZE); - } - else - { - // NB. we're not updating all the bdescrs in the split groups to - // point to the new heads, so this can only be used for large - // objects which do not start in the non-head block. - new_bd = bd + blocks; - new_bd->blocks = bd->blocks - blocks; - } - bd->blocks = blocks; - - return new_bd; -} - static void initMBlock(void *mblock) { diff --git a/rts/sm/Compact.c b/rts/sm/Compact.c index 977e31d811..4f3dcf2525 100644 --- a/rts/sm/Compact.c +++ b/rts/sm/Compact.c @@ -335,8 +335,9 @@ thread_stack(StgPtr p, StgPtr stack_end) case CATCH_STM_FRAME: case ATOMICALLY_FRAME: case UPDATE_FRAME: - case STOP_FRAME: - case CATCH_FRAME: + case UNDERFLOW_FRAME: + case STOP_FRAME: + case CATCH_FRAME: case RET_SMALL: bitmap = BITMAP_BITS(info->i.layout.bitmap); size = BITMAP_SIZE(info->i.layout.bitmap); @@ -480,8 +481,8 @@ thread_TSO (StgTSO *tso) thread_(&tso->trec); - thread_stack(tso->sp, &(tso->stack[tso->stack_size])); - return (StgPtr)tso + tso_sizeW(tso); + thread_(&tso->stackobj); + return (StgPtr)tso + sizeofW(StgTSO); } @@ -521,9 +522,12 @@ update_fwd_large( bdescr *bd ) continue; } - case TSO: - thread_TSO((StgTSO *)p); - continue; + case STACK: + { + StgStack *stack = (StgStack*)p; + thread_stack(stack->sp, stack->stack + stack->stack_size); + continue; + } case AP_STACK: thread_AP_STACK((StgAP_STACK *)p); @@ -706,6 +710,13 @@ thread_obj (StgInfoTable *info, StgPtr p) case TSO: return thread_TSO((StgTSO *)p); + case STACK: + { + StgStack *stack = (StgStack*)p; + thread_stack(stack->sp, stack->stack + stack->stack_size); + return p + stack_sizeW(stack); + } + case TREC_CHUNK: { StgWord i; @@ -899,8 +910,8 @@ update_bkwd_compact( generation *gen ) } // relocate TSOs - if (info->type == TSO) { - move_TSO((StgTSO *)p, (StgTSO *)free); + if (info->type == STACK) { + move_STACK((StgStack *)p, (StgStack *)free); } free += size; diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index 61cf10bcbe..65da0762f2 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -485,14 +485,7 @@ loop: /* evacuate large objects by re-linking them onto a different list. */ if (bd->flags & BF_LARGE) { - info = get_itbl(q); - if (info->type == TSO && - ((StgTSO *)q)->what_next == ThreadRelocated) { - q = (StgClosure *)((StgTSO *)q)->_link; - *p = q; - goto loop; - } - evacuate_large((P_)q); + evacuate_large((P_)q); return; } @@ -675,6 +668,7 @@ loop: case RET_BIG: case RET_DYN: case UPDATE_FRAME: + case UNDERFLOW_FRAME: case STOP_FRAME: case CATCH_FRAME: case CATCH_STM_FRAME: @@ -709,31 +703,28 @@ loop: return; case TSO: - { - StgTSO *tso = (StgTSO *)q; + copy(p,info,q,sizeofW(StgTSO),gen); + evacuate((StgClosure**)&(((StgTSO*)(*p))->stackobj)); + return; - /* Deal with redirected TSOs (a TSO that's had its stack enlarged). - */ - if (tso->what_next == ThreadRelocated) { - q = (StgClosure *)tso->_link; - *p = q; - goto loop; - } + case STACK: + { + StgStack *stack = (StgStack *)q; - /* To evacuate a small TSO, we need to adjust the stack pointer + /* To evacuate a small STACK, we need to adjust the stack pointer */ { - StgTSO *new_tso; + StgStack *new_stack; StgPtr r, s; rtsBool mine; - mine = copyPart(p,(StgClosure *)tso, tso_sizeW(tso), - sizeofW(StgTSO), gen); + mine = copyPart(p,(StgClosure *)stack, stack_sizeW(stack), + sizeofW(StgStack), gen); if (mine) { - new_tso = (StgTSO *)*p; - move_TSO(tso, new_tso); - for (r = tso->sp, s = new_tso->sp; - r < tso->stack+tso->stack_size;) { + new_stack = (StgStack *)*p; + move_STACK(stack, new_stack); + for (r = stack->sp, s = new_stack->sp; + r < stack->stack + stack->stack_size;) { *s++ = *r++; } } @@ -952,7 +943,7 @@ selector_loop: // For the purposes of LDV profiling, we have destroyed // the original selector thunk, p. SET_INFO(p, (StgInfoTable *)info_ptr); - LDV_RECORD_DEAD_FILL_SLOP_DYNAMIC((StgClosure *)p); + OVERWRITING_CLOSURE(p); SET_INFO(p, &stg_WHITEHOLE_info); #endif diff --git a/rts/sm/GCAux.c b/rts/sm/GCAux.c index f69c81d5e0..97af17a02c 100644 --- a/rts/sm/GCAux.c +++ b/rts/sm/GCAux.c @@ -67,12 +67,7 @@ isAlive(StgClosure *p) // large objects use the evacuated flag if (bd->flags & BF_LARGE) { - if (get_itbl(q)->type == TSO && - ((StgTSO *)p)->what_next == ThreadRelocated) { - p = (StgClosure *)((StgTSO *)p)->_link; - continue; - } - return NULL; + return NULL; } // check the mark bit for compacted steps @@ -98,13 +93,6 @@ isAlive(StgClosure *p) p = ((StgInd *)q)->indirectee; continue; - case TSO: - if (((StgTSO *)q)->what_next == ThreadRelocated) { - p = (StgClosure *)((StgTSO *)q)->_link; - continue; - } - return NULL; - default: // dead. return NULL; diff --git a/rts/sm/MarkWeak.c b/rts/sm/MarkWeak.c index aadd5757af..72f0ade797 100644 --- a/rts/sm/MarkWeak.c +++ b/rts/sm/MarkWeak.c @@ -260,12 +260,6 @@ static rtsBool tidyThreadList (generation *gen) } ASSERT(get_itbl(t)->type == TSO); - if (t->what_next == ThreadRelocated) { - next = t->_link; - *prev = next; - continue; - } - next = t->global_link; // if the thread is not masking exceptions but there are diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index dfa98657a0..22b7f64136 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -35,6 +35,7 @@ static void checkSmallBitmap ( StgPtr payload, StgWord bitmap, nat ); static void checkLargeBitmap ( StgPtr payload, StgLargeBitmap*, nat ); static void checkClosureShallow ( StgClosure * ); +static void checkSTACK (StgStack *stack); /* ----------------------------------------------------------------------------- Check stack sanity @@ -139,6 +140,7 @@ checkStackFrame( StgPtr c ) case CATCH_STM_FRAME: case CATCH_FRAME: // small bitmap cases (<= 32 entries) + case UNDERFLOW_FRAME: case STOP_FRAME: case RET_SMALL: size = BITMAP_SIZE(info->i.layout.bitmap); @@ -331,7 +333,7 @@ checkClosure( StgClosure* p ) ASSERT(get_itbl(bq->owner)->type == TSO); ASSERT(bq->queue == (MessageBlackHole*)END_TSO_QUEUE - || get_itbl(bq->queue)->type == TSO); + || bq->queue->header.info == &stg_MSG_BLACKHOLE_info); ASSERT(bq->link == (StgBlockingQueue*)END_TSO_QUEUE || get_itbl(bq->link)->type == IND || get_itbl(bq->link)->type == BLOCKING_QUEUE); @@ -384,6 +386,7 @@ checkClosure( StgClosure* p ) case RET_BIG: case RET_DYN: case UPDATE_FRAME: + case UNDERFLOW_FRAME: case STOP_FRAME: case CATCH_FRAME: case ATOMICALLY_FRAME: @@ -431,7 +434,11 @@ checkClosure( StgClosure* p ) case TSO: checkTSO((StgTSO *)p); - return tso_sizeW((StgTSO *)p); + return sizeofW(StgTSO); + + case STACK: + checkSTACK((StgStack*)p); + return stack_sizeW((StgStack*)p); case TREC_CHUNK: { @@ -514,19 +521,21 @@ checkLargeObjects(bdescr *bd) } } -void -checkTSO(StgTSO *tso) +static void +checkSTACK (StgStack *stack) { - StgPtr sp = tso->sp; - StgPtr stack = tso->stack; - StgOffset stack_size = tso->stack_size; - StgPtr stack_end = stack + stack_size; + StgPtr sp = stack->sp; + StgOffset stack_size = stack->stack_size; + StgPtr stack_end = stack->stack + stack_size; - if (tso->what_next == ThreadRelocated) { - checkTSO(tso->_link); - return; - } + ASSERT(stack->stack <= sp && sp <= stack_end); + checkStackChunk(sp, stack_end); +} + +void +checkTSO(StgTSO *tso) +{ if (tso->what_next == ThreadKilled) { /* The garbage collector doesn't bother following any pointers * from dead threads, so don't check sanity here. @@ -537,16 +546,24 @@ checkTSO(StgTSO *tso) ASSERT(tso->_link == END_TSO_QUEUE || tso->_link->header.info == &stg_MVAR_TSO_QUEUE_info || tso->_link->header.info == &stg_TSO_info); - ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->block_info.closure)); + + if ( tso->why_blocked == BlockedOnMVar + || tso->why_blocked == BlockedOnBlackHole + || tso->why_blocked == BlockedOnMsgThrowTo + || tso->why_blocked == NotBlocked + ) { + ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->block_info.closure)); + } + ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->bq)); ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->blocked_exceptions)); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(tso->stackobj)); - ASSERT(stack <= sp && sp < stack_end); - - checkStackChunk(sp, stack_end); + // XXX are we checking the stack twice? + checkSTACK(tso->stackobj); } -/* +/* Check that all TSOs have been evacuated. Optionally also check the sanity of the TSOs. */ @@ -564,11 +581,9 @@ checkGlobalTSOList (rtsBool checkTSOs) if (checkTSOs) checkTSO(tso); - tso = deRefTSO(tso); - // If this TSO is dirty and in an old generation, it better // be on the mutable list. - if (tso->dirty || (tso->flags & TSO_LINK_DIRTY)) { + if (tso->dirty) { ASSERT(Bdescr((P_)tso)->gen_no == 0 || (tso->flags & TSO_MARKED)); tso->flags &= ~TSO_MARKED; } diff --git a/rts/sm/Scav.c b/rts/sm/Scav.c index d01442b34e..d7e16eaf2a 100644 --- a/rts/sm/Scav.c +++ b/rts/sm/Scav.c @@ -51,14 +51,6 @@ scavengeTSO (StgTSO *tso) { rtsBool saved_eager; - if (tso->what_next == ThreadRelocated) { - // the only way this can happen is if the old TSO was on the - // mutable list. We might have other links to this defunct - // TSO, so we must update its link field. - evacuate((StgClosure**)&tso->_link); - return; - } - debugTrace(DEBUG_gc,"scavenging thread %d",(int)tso->id); // update the pointer from the Task. @@ -69,17 +61,13 @@ scavengeTSO (StgTSO *tso) saved_eager = gct->eager_promotion; gct->eager_promotion = rtsFalse; - evacuate((StgClosure **)&tso->blocked_exceptions); evacuate((StgClosure **)&tso->bq); // scavange current transaction record evacuate((StgClosure **)&tso->trec); - - // scavenge this thread's stack - scavenge_stack(tso->sp, &(tso->stack[tso->stack_size])); - tso->dirty = gct->failed_to_evac; + evacuate((StgClosure **)&tso->stackobj); evacuate((StgClosure **)&tso->_link); if ( tso->why_blocked == BlockedOnMVar @@ -99,11 +87,7 @@ scavengeTSO (StgTSO *tso) } #endif - if (tso->dirty == 0 && gct->failed_to_evac) { - tso->flags |= TSO_LINK_DIRTY; - } else { - tso->flags &= ~TSO_LINK_DIRTY; - } + tso->dirty = gct->failed_to_evac; gct->eager_promotion = saved_eager; } @@ -661,12 +645,25 @@ scavenge_block (bdescr *bd) case TSO: { - StgTSO *tso = (StgTSO *)p; - scavengeTSO(tso); - p += tso_sizeW(tso); + scavengeTSO((StgTSO *)p); + p += sizeofW(StgTSO); break; } + case STACK: + { + StgStack *stack = (StgStack*)p; + + gct->eager_promotion = rtsFalse; + + scavenge_stack(stack->sp, stack->stack + stack->stack_size); + stack->dirty = gct->failed_to_evac; + p += stack_sizeW(stack); + + gct->eager_promotion = saved_eager_promotion; + break; + } + case MUT_PRIM: { StgPtr end; @@ -991,6 +988,19 @@ scavenge_mark_stack(void) break; } + case STACK: + { + StgStack *stack = (StgStack*)p; + + gct->eager_promotion = rtsFalse; + + scavenge_stack(stack->sp, stack->stack + stack->stack_size); + stack->dirty = gct->failed_to_evac; + + gct->eager_promotion = saved_eager_promotion; + break; + } + case MUT_PRIM: { StgPtr end; @@ -1227,6 +1237,19 @@ scavenge_one(StgPtr p) break; } + case STACK: + { + StgStack *stack = (StgStack*)p; + + gct->eager_promotion = rtsFalse; + + scavenge_stack(stack->sp, stack->stack + stack->stack_size); + stack->dirty = gct->failed_to_evac; + + gct->eager_promotion = saved_eager_promotion; + break; + } + case MUT_PRIM: { StgPtr end; @@ -1374,33 +1397,7 @@ scavenge_mutable_list(bdescr *bd, generation *gen) recordMutableGen_GC((StgClosure *)p,gen->no); continue; } - case TSO: { - StgTSO *tso = (StgTSO *)p; - if (tso->dirty == 0) { - // Should be on the mutable list because its link - // field is dirty. However, in parallel GC we may - // have a thread on multiple mutable lists, so - // this assertion would be invalid: - // ASSERT(tso->flags & TSO_LINK_DIRTY); - - evacuate((StgClosure **)&tso->_link); - if ( tso->why_blocked == BlockedOnMVar - || tso->why_blocked == BlockedOnBlackHole - || tso->why_blocked == BlockedOnMsgThrowTo - || tso->why_blocked == NotBlocked - ) { - evacuate((StgClosure **)&tso->block_info.prev); - } - if (gct->failed_to_evac) { - recordMutableGen_GC((StgClosure *)p,gen->no); - gct->failed_to_evac = rtsFalse; - } else { - tso->flags &= ~TSO_LINK_DIRTY; - } - continue; - } - } - default: + default: ; } @@ -1643,6 +1640,7 @@ scavenge_stack(StgPtr p, StgPtr stack_end) case CATCH_STM_FRAME: case CATCH_RETRY_FRAME: case ATOMICALLY_FRAME: + case UNDERFLOW_FRAME: case STOP_FRAME: case CATCH_FRAME: case RET_SMALL: diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 2172f9bdb0..4247d28e05 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -495,12 +495,12 @@ resizeNurseries (nat blocks) /* ----------------------------------------------------------------------------- - move_TSO is called to update the TSO structure after it has been + move_STACK is called to update the TSO structure after it has been moved from one place to another. -------------------------------------------------------------------------- */ void -move_TSO (StgTSO *src, StgTSO *dest) +move_STACK (StgStack *src, StgStack *dest) { ptrdiff_t diff; @@ -510,45 +510,6 @@ move_TSO (StgTSO *src, StgTSO *dest) } /* ----------------------------------------------------------------------------- - split N blocks off the front of the given bdescr, returning the - new block group. We add the remainder to the large_blocks list - in the same step as the original block. - -------------------------------------------------------------------------- */ - -bdescr * -splitLargeBlock (bdescr *bd, nat blocks) -{ - bdescr *new_bd; - - ACQUIRE_SM_LOCK; - - ASSERT(countBlocks(bd->gen->large_objects) == bd->gen->n_large_blocks); - - // subtract the original number of blocks from the counter first - bd->gen->n_large_blocks -= bd->blocks; - - new_bd = splitBlockGroup (bd, blocks); - initBdescr(new_bd, bd->gen, bd->gen->to); - new_bd->flags = BF_LARGE | (bd->flags & BF_EVACUATED); - // if new_bd is in an old generation, we have to set BF_EVACUATED - new_bd->free = bd->free; - dbl_link_onto(new_bd, &bd->gen->large_objects); - - ASSERT(new_bd->free <= new_bd->start + new_bd->blocks * BLOCK_SIZE_W); - - // add the new number of blocks to the counter. Due to the gaps - // for block descriptors, new_bd->blocks + bd->blocks might not be - // equal to the original bd->blocks, which is why we do it this way. - bd->gen->n_large_blocks += bd->blocks + new_bd->blocks; - - ASSERT(countBlocks(bd->gen->large_objects) == bd->gen->n_large_blocks); - - RELEASE_SM_LOCK; - - return new_bd; -} - -/* ----------------------------------------------------------------------------- allocate() This allocates memory in the current thread - it is intended for @@ -731,8 +692,8 @@ dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) void setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target) { - if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) { - tso->flags |= TSO_LINK_DIRTY; + if (tso->dirty == 0) { + tso->dirty = 1; recordClosureMutated(cap,(StgClosure*)tso); } tso->_link = target; @@ -741,8 +702,8 @@ setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target) void setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target) { - if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) { - tso->flags |= TSO_LINK_DIRTY; + if (tso->dirty == 0) { + tso->dirty = 1; recordClosureMutated(cap,(StgClosure*)tso); } tso->block_info.prev = target; @@ -751,10 +712,19 @@ setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target) void dirty_TSO (Capability *cap, StgTSO *tso) { - if (tso->dirty == 0 && (tso->flags & TSO_LINK_DIRTY) == 0) { + if (tso->dirty == 0) { + tso->dirty = 1; recordClosureMutated(cap,(StgClosure*)tso); } - tso->dirty = 1; +} + +void +dirty_STACK (Capability *cap, StgStack *stack) +{ + if (stack->dirty == 0) { + stack->dirty = 1; + recordClosureMutated(cap,(StgClosure*)stack); + } } /* diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h index e541193573..3ff3380462 100644 --- a/rts/sm/Storage.h +++ b/rts/sm/Storage.h @@ -146,7 +146,7 @@ extern bdescr *exec_block; #define END_OF_STATIC_LIST ((StgClosure*)1) -void move_TSO (StgTSO *src, StgTSO *dest); +void move_STACK (StgStack *src, StgStack *dest); extern StgClosure * caf_list; extern StgClosure * revertible_caf_list; diff --git a/rts/win32/AsyncIO.c b/rts/win32/AsyncIO.c index 5dedee0c70..ff2e1a2498 100644 --- a/rts/win32/AsyncIO.c +++ b/rts/win32/AsyncIO.c @@ -276,20 +276,7 @@ start: prev = NULL; for(tso = blocked_queue_hd ; tso != END_TSO_QUEUE; tso = tso->_link) { - if (tso->what_next == ThreadRelocated) { - /* Drop the TSO from blocked_queue */ - if (prev) { - setTSOLink(&MainCapability, prev, tso->_link); - } else { - blocked_queue_hd = tso->_link; - } - if (blocked_queue_tl == tso) { - blocked_queue_tl = prev ? prev : END_TSO_QUEUE; - } - continue; - } - - switch(tso->why_blocked) { + switch(tso->why_blocked) { case BlockedOnRead: case BlockedOnWrite: case BlockedOnDoProc: |