diff options
Diffstat (limited to 'rts')
-rw-r--r-- | rts/Capability.c | 5 | ||||
-rw-r--r-- | rts/PrimOps.cmm | 4 | ||||
-rw-r--r-- | rts/Threads.c | 4 | ||||
-rw-r--r-- | rts/sm/BlockAlloc.c | 144 | ||||
-rw-r--r-- | rts/sm/Evac.c | 15 | ||||
-rw-r--r-- | rts/sm/GC.c | 21 | ||||
-rw-r--r-- | rts/sm/Sanity.c | 8 | ||||
-rw-r--r-- | rts/sm/Storage.c | 4 |
8 files changed, 180 insertions, 25 deletions
diff --git a/rts/Capability.c b/rts/Capability.c index 33a94398cd..8b552e0b09 100644 --- a/rts/Capability.c +++ b/rts/Capability.c @@ -748,6 +748,8 @@ static Capability * waitForReturnCapability (Task *task) * result of the external call back to the Haskell thread that * made it. * + * pCap is strictly an output. + * * ------------------------------------------------------------------------- */ void waitForCapability (Capability **pCap, Task *task) @@ -840,6 +842,9 @@ void waitForCapability (Capability **pCap, Task *task) * SYNC_GC_PAR), either to do a sequential GC, forkProcess, or * setNumCapabilities. We should give up the Capability temporarily. * + * When yieldCapability returns *pCap will have been updated to the new + * capability held by the caller. + * * ------------------------------------------------------------------------- */ #if defined(THREADED_RTS) diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index d06cde05d9..a2ab3de586 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1744,7 +1744,7 @@ loop: // indicate that the MVar operation has now completed. StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; - if (TO_W_(StgStack_dirty(stack)) == 0) { + if ((TO_W_(StgStack_dirty(stack)) & STACK_DIRTY) == 0) { ccall dirty_STACK(MyCapability() "ptr", stack "ptr"); } @@ -1829,7 +1829,7 @@ loop: // indicate that the MVar operation has now completed. StgTSO__link(tso) = stg_END_TSO_QUEUE_closure; - if (TO_W_(StgStack_dirty(stack)) == 0) { + if ((TO_W_(StgStack_dirty(stack)) & STACK_DIRTY) == 0) { ccall dirty_STACK(MyCapability() "ptr", stack "ptr"); } diff --git a/rts/Threads.c b/rts/Threads.c index 2bdcea1c00..3d5b463051 100644 --- a/rts/Threads.c +++ b/rts/Threads.c @@ -85,7 +85,7 @@ createThread(Capability *cap, W_ size) SET_HDR(stack, &stg_STACK_info, cap->r.rCCCS); stack->stack_size = stack_size - sizeofW(StgStack); stack->sp = stack->stack + stack->stack_size; - stack->dirty = 1; + stack->dirty = STACK_DIRTY; tso = (StgTSO *)allocate(cap, sizeofW(StgTSO)); TICK_ALLOC_TSO(); @@ -804,7 +804,7 @@ loop: // indicate that the MVar operation has now completed. tso->_link = (StgTSO*)&stg_END_TSO_QUEUE_closure; - if (stack->dirty == 0) { + if ((stack->dirty & STACK_DIRTY) == 0) { dirty_STACK(cap, stack); } diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c index f9e3d11407..b3e1e2ce75 100644 --- a/rts/sm/BlockAlloc.c +++ b/rts/sm/BlockAlloc.c @@ -310,7 +310,7 @@ setup_tail (bdescr *bd) // Take a free block group bd, and split off a group of size n from // it. Adjust the free list as necessary, and return the new group. static bdescr * -split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln) +split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln /* log_2_ceil(n) */) { bdescr *fg; // free group @@ -325,6 +325,46 @@ split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln) return fg; } +// Take N blocks off the end, free the rest. +static bdescr * +split_block_high (bdescr *bd, W_ n) +{ + ASSERT(bd->blocks > n); + + bdescr* ret = bd + bd->blocks - n; // take n blocks off the end + ret->blocks = n; + ret->start = ret->free = bd->start + (bd->blocks - n)*BLOCK_SIZE_W; + ret->link = NULL; + + bd->blocks -= n; + + setup_tail(ret); + setup_tail(bd); + freeGroup(bd); + + return ret; +} + +// Like `split_block_high`, but takes n blocks off the beginning rather +// than the end. +static bdescr * +split_block_low (bdescr *bd, W_ n) +{ + ASSERT(bd->blocks > n); + + bdescr* bd_ = bd + n; + bd_->blocks = bd->blocks - n; + bd_->start = bd_->free = bd->start + n*BLOCK_SIZE_W; + + bd->blocks = n; + + setup_tail(bd_); + setup_tail(bd); + freeGroup(bd_); + + return bd; +} + /* Only initializes the start pointers on the first megablock and the * blocks field of the first bdescr; callers are responsible for calling * initGroup afterwards. @@ -461,6 +501,108 @@ finish: return bd; } +// Allocate `n` blocks aligned to `n` blocks, e.g. when n = 8, the blocks will +// be aligned at `8 * BLOCK_SIZE`. For a group with `n` blocks this can be used +// for easily accessing the beginning of the group from a location p in the +// group with +// +// p % (BLOCK_SIZE*n) +// +// Used by the non-moving collector for allocating segments. +// +// Because the storage manager does not support aligned allocations, we have to +// allocate `2*n - 1` blocks here to make sure we'll be able to find an aligned +// region in the allocated blocks. After finding the aligned area we want to +// free slop on the low and high sides, and block allocator doesn't support +// freeing only some portion of a megablock (we can only free whole megablocks). +// So we disallow allocating megablocks here, and allow allocating at most +// `BLOCKS_PER_MBLOCK / 2` blocks. +bdescr * +allocAlignedGroupOnNode (uint32_t node, W_ n) +{ + // allocate enough blocks to have enough space aligned at n-block boundary + // free any slops on the low and high side of this space + + // number of blocks to allocate to make sure we have enough aligned space + W_ num_blocks = 2*n - 1; + + if (num_blocks >= BLOCKS_PER_MBLOCK) { + barf("allocAlignedGroupOnNode: allocating megablocks is not supported\n" + " requested blocks: %" FMT_Word "\n" + " required for alignment: %" FMT_Word "\n" + " megablock size (in blocks): %" FMT_Word, + n, num_blocks, (W_) BLOCKS_PER_MBLOCK); + } + + W_ group_size = n * BLOCK_SIZE; + + // To reduce splitting and fragmentation we use allocLargeChunkOnNode here. + // Tweak the max allocation to avoid allocating megablocks. Splitting slop + // below doesn't work with megablocks (freeGroup can't free only a portion + // of a megablock so we can't allocate megablocks and free some parts of + // them). + W_ max_blocks = stg_min(num_blocks * 3, BLOCKS_PER_MBLOCK - 1); + bdescr *bd = allocLargeChunkOnNode(node, num_blocks, max_blocks); + // We may allocate more than num_blocks, so update it + num_blocks = bd->blocks; + + // slop on the low side + W_ slop_low = 0; + if ((uintptr_t)bd->start % group_size != 0) { + slop_low = group_size - ((uintptr_t)bd->start % group_size); + } + + W_ slop_high = (num_blocks * BLOCK_SIZE) - group_size - slop_low; + + ASSERT((slop_low % BLOCK_SIZE) == 0); + ASSERT((slop_high % BLOCK_SIZE) == 0); + + W_ slop_low_blocks = slop_low / BLOCK_SIZE; + W_ slop_high_blocks = slop_high / BLOCK_SIZE; + + ASSERT(slop_low_blocks + slop_high_blocks + n == num_blocks); + +#if defined(DEBUG) + checkFreeListSanity(); + W_ free_before = countFreeList(); +#endif + + if (slop_low_blocks != 0) { + bd = split_block_high(bd, num_blocks - slop_low_blocks); + ASSERT(countBlocks(bd) == num_blocks - slop_low_blocks); + } + +#if defined(DEBUG) + ASSERT(countFreeList() == free_before + slop_low_blocks); + checkFreeListSanity(); +#endif + + // At this point the bd should be aligned, but we may have slop on the high side + ASSERT((uintptr_t)bd->start % group_size == 0); + +#if defined(DEBUG) + free_before = countFreeList(); +#endif + + if (slop_high_blocks != 0) { + bd = split_block_low(bd, n); + ASSERT(bd->blocks == n); + } + +#if defined(DEBUG) + ASSERT(countFreeList() == free_before + slop_high_blocks); + checkFreeListSanity(); +#endif + + // Should still be aligned + ASSERT((uintptr_t)bd->start % group_size == 0); + + // Just to make sure I get this right + ASSERT(Bdescr(bd->start) == bd); + + return bd; +} + STATIC_INLINE uint32_t nodeWithLeastBlocks (void) { diff --git a/rts/sm/Evac.c b/rts/sm/Evac.c index 53a473d26c..666daf0e32 100644 --- a/rts/sm/Evac.c +++ b/rts/sm/Evac.c @@ -39,7 +39,19 @@ copy_tag(p, info, src, size, stp, tag) #endif -/* Used to avoid long recursion due to selector thunks +/* Note [Selector optimisation depth limit] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * MAX_THUNK_SELECTOR_DEPTH is used to avoid long recursion of + * eval_thunk_selector due to nested selector thunks. Note that this *only* + * counts nested selector thunks, e.g. `fst (fst (... (fst x)))`. The collector + * will traverse interleaved selector-constructor pairs without limit, e.g. + * + * a = (fst b, _) + * b = (fst c, _) + * c = (fst d, _) + * d = (x, _) + * */ #define MAX_THUNK_SELECTOR_DEPTH 16 @@ -1257,6 +1269,7 @@ selector_loop: // recursively evaluate this selector. We don't want to // recurse indefinitely, so we impose a depth bound. + // See Note [Selector optimisation depth limit]. if (gct->thunk_selector_depth >= MAX_THUNK_SELECTOR_DEPTH) { goto bale_out; } diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 76237f35c2..3f301aeb53 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -1841,21 +1841,16 @@ resize_nursery (void) #if defined(DEBUG) -static void gcCAFs(void) +void gcCAFs(void) { - StgIndStatic *p, *prev; + uint32_t i = 0; + StgIndStatic *prev = NULL; - const StgInfoTable *info; - uint32_t i; - - i = 0; - p = debug_caf_list; - prev = NULL; - - for (p = debug_caf_list; p != (StgIndStatic*)END_OF_CAF_LIST; - p = (StgIndStatic*)p->saved_info) { - - info = get_itbl((StgClosure*)p); + for (StgIndStatic *p = debug_caf_list; + p != (StgIndStatic*) END_OF_CAF_LIST; + p = (StgIndStatic*) p->saved_info) + { + const StgInfoTable *info = get_itbl((StgClosure*)p); ASSERT(info->type == IND_STATIC); // See Note [STATIC_LINK fields] in Storage.h diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index 3585bd93b4..289ac542b8 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -632,9 +632,9 @@ checkGlobalTSOList (bool checkTSOs) stack = tso->stackobj; while (1) { - if (stack->dirty & 1) { - ASSERT(Bdescr((P_)stack)->gen_no == 0 || (stack->dirty & TSO_MARKED)); - stack->dirty &= ~TSO_MARKED; + if (stack->dirty & STACK_DIRTY) { + ASSERT(Bdescr((P_)stack)->gen_no == 0 || (stack->dirty & STACK_SANE)); + stack->dirty &= ~STACK_SANE; } frame = (StgUnderflowFrame*) (stack->stack + stack->stack_size - sizeofW(StgUnderflowFrame)); @@ -669,7 +669,7 @@ checkMutableList( bdescr *mut_bd, uint32_t gen ) ((StgTSO *)p)->flags |= TSO_MARKED; break; case STACK: - ((StgStack *)p)->dirty |= TSO_MARKED; + ((StgStack *)p)->dirty |= STACK_SANE; break; } } diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 0130a08f7c..97c71478ed 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -1139,8 +1139,8 @@ dirty_TSO (Capability *cap, StgTSO *tso) void dirty_STACK (Capability *cap, StgStack *stack) { - if (stack->dirty == 0) { - stack->dirty = 1; + if (! (stack->dirty & STACK_DIRTY)) { + stack->dirty = STACK_DIRTY; recordClosureMutated(cap,(StgClosure*)stack); } } |