diff options
Diffstat (limited to 'rts/sm')
-rw-r--r-- | rts/sm/NonMoving.c | 22 | ||||
-rw-r--r-- | rts/sm/NonMoving.h | 6 | ||||
-rw-r--r-- | rts/sm/NonMovingMark.c | 5 | ||||
-rw-r--r-- | rts/sm/NonMovingScav.c | 11 | ||||
-rw-r--r-- | rts/sm/Storage.c | 19 |
5 files changed, 60 insertions, 3 deletions
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c index 41510e7f8e..e774846dbb 100644 --- a/rts/sm/NonMoving.c +++ b/rts/sm/NonMoving.c @@ -492,6 +492,24 @@ Mutex concurrent_coll_finished_lock; * remembered set during the preparatory GC. This allows us to safely skip the * non-moving write barrier without jeopardizing the snapshot invariant. * + * + * Note [Allocating pinned objects into the non-moving heap] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Under the moving collector small, pinned ByteArray#s are allocated by + * Storage.c:allocatePinned() into a per-capability accumulator block which is + * filled in a bump-pointer fashion. While this scheme is simple, it can lead + * to very poor fragmentation behavior as objects become unreachable: a single + * live ByteArray# can keep an entire block of memory alive. + * + * When the non-moving collector is in use we can do better by allocating small + * pinned objects directly into the non-moving heap. + * + * One wrinkle here is that pinned ByteArrays may have alignment requirements + * which requires that we insert padding zero-words before the beginning of the + * object. We must be certain to account for this padding when inspecting the + * object. + * */ memcount nonmoving_live_words = 0; @@ -660,8 +678,8 @@ void *nonmovingAllocate(Capability *cap, StgWord sz) unsigned int log_block_size = log2_ceil(sz * sizeof(StgWord)); unsigned int block_count = nonmovingBlockCountFromSize(log_block_size); - // The max we ever allocate is 3276 bytes (anything larger is a large - // object and not moved) which is covered by allocator 9. + // The max we ever allocate is NONMOVING_MAX_BLOCK_SZ bytes (anything + // larger is a large object and not moved) which is covered by allocator 9. ASSERT(log_block_size < NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT); struct NonmovingAllocator *alloca = nonmovingHeap.allocators[log_block_size - NONMOVING_ALLOCA0]; diff --git a/rts/sm/NonMoving.h b/rts/sm/NonMoving.h index 12fb9ddaab..0f7860f44c 100644 --- a/rts/sm/NonMoving.h +++ b/rts/sm/NonMoving.h @@ -92,11 +92,17 @@ struct NonmovingAllocator { // allocators cover block sizes of 2^NONMOVING_ALLOCA0 to // 2^(NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT) (in bytes) +// The largest allocator class must be at least LARGE_OBJECT_THRESHOLD in size +// as Storage.c:allocatePinned will allocate small pinned allocations into the +// non-moving heap. #define NONMOVING_ALLOCA_CNT 12 // maximum number of free segments to hold on to #define NONMOVING_MAX_FREE 16 +// block size of largest allocator in bytes. +#define NONMOVING_MAX_BLOCK_SZ (1 << (NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT - 1)) + struct NonmovingHeap { struct NonmovingAllocator *allocators[NONMOVING_ALLOCA_CNT]; // free segment list. This is a cache where we keep up to diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c index d9758b943f..c5c88bba43 100644 --- a/rts/sm/NonMovingMark.c +++ b/rts/sm/NonMovingMark.c @@ -1380,6 +1380,11 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin) // Trace pointers ///////////////////////////////////////////////////// + // Find beginning of object. + // See Note [Allocating pinned objects into the non-moving heap]. + while (*(StgPtr*) p == NULL) + p = (StgClosure *) ((StgPtr*) p + 1); + const StgInfoTable *info = get_itbl(p); switch (info->type) { diff --git a/rts/sm/NonMovingScav.c b/rts/sm/NonMovingScav.c index 9f92563032..8128f0dba3 100644 --- a/rts/sm/NonMovingScav.c +++ b/rts/sm/NonMovingScav.c @@ -84,9 +84,18 @@ */ void -nonmovingScavengeOne (StgClosure *q) +nonmovingScavengeOne (StgClosure *q0) { + StgClosure *q = q0; + + // N.B. There may be a gap before the first word of the closure in the case + // of an aligned ByteArray# as allocated by allocatePinned(). + // See Note [Allocating pinned objects into the non-moving heap]. + while (*(StgPtr*) q == NULL) + q = (StgClosure *) ((StgPtr*) q + 1); + ASSERT(LOOKS_LIKE_CLOSURE_PTR(q)); + StgPtr p = (StgPtr)q; const StgInfoTable *info = get_itbl(q); const bool saved_eager_promotion = gct->eager_promotion; diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 40353ea180..a05e43721e 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -1248,6 +1248,25 @@ allocatePinned (Capability *cap, W_ n /*words*/, W_ alignment /*bytes*/, W_ alig const StgWord alignment_w = alignment / sizeof(W_); + // If the non-moving collector is enabled then we can allocate small, + // pinned allocations directly into the non-moving heap. This is a bit more + // expensive up-front but reduces fragmentation and is worthwhile since + // pinned allocations are often long-lived.. + // + // See Note [Allocating pinned objects into the non-moving heap]. + if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving) + && (n + alignment_w) * sizeof(W_) < NONMOVING_MAX_BLOCK_SZ) + { + ACQUIRE_SM_LOCK; + p = nonmovingAllocate(cap, n + alignment_w); + RELEASE_SM_LOCK; + W_ off_w = ALIGN_WITH_OFF_W(p, alignment, align_off); + MEMSET_SLOP_W(p, 0, off_w); + p += off_w; + MEMSET_SLOP_W(p + n, 0, alignment_w - off_w - 1); + return p; + } + // If the request is for a large object, then allocate() // will give us a pinned object anyway. if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { |