diff options
-rw-r--r-- | includes/rts/storage/ClosureMacros.h | 39 | ||||
-rw-r--r-- | rts/Apply.cmm | 2 | ||||
-rw-r--r-- | rts/ProfHeap.c | 17 | ||||
-rw-r--r-- | rts/sm/Sanity.c | 14 | ||||
-rw-r--r-- | rts/sm/Storage.c | 49 |
5 files changed, 97 insertions, 24 deletions
diff --git a/includes/rts/storage/ClosureMacros.h b/includes/rts/storage/ClosureMacros.h index 1b3628f2b9..17a0d5375b 100644 --- a/includes/rts/storage/ClosureMacros.h +++ b/includes/rts/storage/ClosureMacros.h @@ -474,31 +474,38 @@ INLINE_HEADER StgWord8 *mutArrPtrsCard (StgMutArrPtrs *a, W_ n) OVERWRITING_CLOSURE(p) on the old closure that is about to be overwritten. - Note [zeroing slop] + Note [zeroing slop when overwriting closures] - In some scenarios we write zero words into "slop"; memory that is - left unoccupied after we overwrite a closure in the heap with a - smaller closure. + When we overwrite a closure in the heap with a smaller one, in some scenarios + we need to write zero words into "slop"; the memory that is left + unoccupied. See Note [slop on the heap] Zeroing slop is required for: - - full-heap sanity checks (DEBUG, and +RTS -DS) - - LDV profiling (PROFILING, and +RTS -hb) + - full-heap sanity checks (DEBUG, and +RTS -DS), - Zeroing slop must be disabled for: + - LDV profiling (PROFILING, and +RTS -hb) and - - THREADED_RTS with +RTS -N2 and greater, because we cannot - overwrite slop when another thread might be reading it. + However we can get into trouble if we're zeroing slop for ordinarily + immutable closures when using multiple threads, since there is nothing + preventing another thread from still being in the process of reading the + memory we're about to zero. - Hence, slop is zeroed when either: + Thus, with the THREADED RTS and +RTS -N2 or greater we must not zero + immutable closure's slop. - - PROFILING && era <= 0 (LDV is on) - - !THREADED_RTS && DEBUG + Hence, an immutable closure's slop is zeroed when either: - And additionally: + - PROFILING && era > 0 (LDV is on) or + - !THREADED && DEBUG - - LDV profiling and +RTS -N2 are incompatible - - full-heap sanity checks are disabled for THREADED_RTS + Additionally: + + - LDV profiling and +RTS -N2 are incompatible, + + - full-heap sanity checks are disabled for the THREADED RTS, at least when + they don't run right after GC when there is no slop. + See Note [heap sanity checking with SMP]. -------------------------------------------------------------------------- */ @@ -534,7 +541,7 @@ EXTERN_INLINE void overwritingClosure_ (StgClosure *p, EXTERN_INLINE void overwritingClosure_ (StgClosure *p, uint32_t offset, uint32_t size, bool prim USED_IF_PROFILING) { #if ZERO_SLOP_FOR_LDV_PROF && !ZERO_SLOP_FOR_SANITY_CHECK - // see Note [zeroing slop], also #8402 + // see Note [zeroing slop when overwriting closures], also #8402 if (era <= 0) return; #endif diff --git a/rts/Apply.cmm b/rts/Apply.cmm index f23a507402..a706c68194 100644 --- a/rts/Apply.cmm +++ b/rts/Apply.cmm @@ -689,7 +689,7 @@ for: // Because of eager blackholing the closure no longer has correct size so // threadPaused() can't correctly zero the slop, so we do it here. See #15571 - // and Note [zeroing slop]. + // and Note [zeroing slop when overwriting closures]. OVERWRITING_CLOSURE_SIZE(ap, BYTES_TO_WDS(SIZEOF_StgThunkHeader) + 2 + Words); ENTER_R1(); diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c index 6c26de1699..26f4de468d 100644 --- a/rts/ProfHeap.c +++ b/rts/ProfHeap.c @@ -1275,8 +1275,21 @@ heapCensusChain( Census *census, bdescr *bd ) heapProfObject(census,(StgClosure*)p,size,prim); p += size; - /* skip over slop */ - while (p < bd->free && !*p) p++; // skip slop + + /* skip over slop, see Note [slop on the heap] */ + while (p < bd->free && !*p) p++; + /* Note [skipping slop in the heap profiler] + * + * We make sure to zero slop that can remain after a major GC so + * here we can assume any slop words we see until the block's free + * pointer are zero. Since info pointers are always nonzero we can + * use this to scan for the next valid heap closure. + * + * Note that not all types of slop are relevant here, only the ones + * that can reman after major GC. So essentially just large objects + * and pinned objects. All other closures will have been packed nice + * and thight into fresh blocks. + */ } } } diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index 1c4c75514d..3ac926715a 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -475,7 +475,7 @@ void checkHeapChain (bdescr *bd) ASSERT( size >= MIN_PAYLOAD_SIZE + sizeofW(StgHeader) ); p += size; - /* skip over slop */ + /* skip over slop, see Note [slop on the heap] */ while (p < bd->free && (*p < 0x1000 || !LOOKS_LIKE_INFO_PTR(*p))) { p++; } } @@ -796,12 +796,16 @@ static void checkGeneration (generation *gen, ASSERT(countBlocks(gen->large_objects) == gen->n_large_blocks); #if defined(THREADED_RTS) + // Note [heap sanity checking with SMP] + // // heap sanity checking doesn't work with SMP for two reasons: - // * we can't zero the slop (see Updates.h). However, we can sanity-check - // the heap after a major gc, because there is no slop. // - // * the nonmoving collector may be mutating its large object lists, unless we - // were in fact called by the nonmoving collector. + // * We can't zero the slop. However, we can sanity-check the heap after a + // major gc, because there is no slop. See also Updates.h and Note + // [zeroing slop when overwriting closures]. + // + // * The nonmoving collector may be mutating its large object lists, + // unless we were in fact called by the nonmoving collector. if (!after_major_gc) return; #endif diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 2a86c19fdb..d9f5fb77ff 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -907,6 +907,53 @@ accountAllocation(Capability *cap, W_ n) } +/* Note [slop on the heap] + * + * We use the term "slop" to refer to allocated memory on the heap which isn't + * occupied by any closure. Usually closures are packet tightly into the heap + * blocks, storage for one immediately following another. However there are + * situations where slop is left behind: + * + * - Allocating large objects (BF_LARGE) + * + * These are given an entire block, but if they don't fill the entire block + * the rest is slop. See allocateMightFail in Storage.c. + * + * - Allocating pinned objects with alignment (BF_PINNED) + * + * These are packet into blocks like normal closures, however they + * can have alignment constraints and any memory that needed to be skipped for + * alignment becomes slop. See allocatePinned in Storage.c. + * + * - Shrinking (Small)Mutable(Byte)Array# + * + * The size of these closures can be decreased after allocation, leaving any, + * now unused memory, behind as slop. See stg_resizzeMutableByteArrayzh, + * stg_shrinkSmallMutableArrayzh, and stg_shrinkMutableByteArrayzh in + * PrimOps.cmm. + * + * This type of slop is extra tricky because it can also be pinned and + * large. + * + * - Overwriting closures + * + * During GC the RTS overwrites closures with forwarding pointers, this can + * leave slop behind depending on the size of the closure being + * overwritten. See Note [zeroing slop when overwriting closures]. + * + * Under various ways we actually zero slop so we can linearly scan over blocks + * of closures. This trick is used by the sanity checking code and the heap + * profiler, see Note [skipping slop in the heap profiler]. + * + * When profiling we zero: + * - Pinned object alignment slop, see MEMSET_IF_PROFILING_W in allocatePinned. + * - Shrunk array slop, see OVERWRITING_MUTABLE_CLOSURE. + * + * When performing LDV profiling or using a (single threaded) debug RTS we zero + * slop even when overwriting immutable closures, see Note [zeroing slop when + * overwriting closures]. + */ + /* ----------------------------------------------------------------------------- StgPtr allocate (Capability *cap, W_ n) @@ -1069,6 +1116,8 @@ allocateMightFail (Capability *cap, W_ n) /** * When profiling we zero the space used for alignment. This allows us to * traverse pinned blocks in the heap profiler. + * + * See Note [skipping slop in the heap profiler] */ #if defined(PROFILING) #define MEMSET_IF_PROFILING_W(p, val, len) memset(p, val, (len) * sizeof(W_)) |