summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2020-07-26 15:20:49 -0400
committerBen Gamari <ben@smart-cactus.org>2022-12-21 15:24:39 -0500
commitc5fa7410baecd9d8bf7b07565de341fdc0710bff (patch)
tree91d6da47a8c7651d383d9f0fc2ee3341b26911ac
parent5e047effac9228f3bdddb66c9056e86621ccbec8 (diff)
downloadhaskell-wip/gc/nonmoving-pinned.tar.gz
nonmoving: Teach allocatePinned() to allocate into nonmoving heapwip/gc/nonmoving-pinned
The allocatePinned() function is used to allocate pinned memory (e.g. for newPinnedByteArray#)
-rw-r--r--rts/sm/NonMoving.c22
-rw-r--r--rts/sm/NonMoving.h6
-rw-r--r--rts/sm/NonMovingMark.c5
-rw-r--r--rts/sm/NonMovingScav.c11
-rw-r--r--rts/sm/Storage.c19
5 files changed, 60 insertions, 3 deletions
diff --git a/rts/sm/NonMoving.c b/rts/sm/NonMoving.c
index 41510e7f8e..e774846dbb 100644
--- a/rts/sm/NonMoving.c
+++ b/rts/sm/NonMoving.c
@@ -492,6 +492,24 @@ Mutex concurrent_coll_finished_lock;
* remembered set during the preparatory GC. This allows us to safely skip the
* non-moving write barrier without jeopardizing the snapshot invariant.
*
+ *
+ * Note [Allocating pinned objects into the non-moving heap]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Under the moving collector small, pinned ByteArray#s are allocated by
+ * Storage.c:allocatePinned() into a per-capability accumulator block which is
+ * filled in a bump-pointer fashion. While this scheme is simple, it can lead
+ * to very poor fragmentation behavior as objects become unreachable: a single
+ * live ByteArray# can keep an entire block of memory alive.
+ *
+ * When the non-moving collector is in use we can do better by allocating small
+ * pinned objects directly into the non-moving heap.
+ *
+ * One wrinkle here is that pinned ByteArrays may have alignment requirements
+ * which requires that we insert padding zero-words before the beginning of the
+ * object. We must be certain to account for this padding when inspecting the
+ * object.
+ *
*/
memcount nonmoving_live_words = 0;
@@ -660,8 +678,8 @@ void *nonmovingAllocate(Capability *cap, StgWord sz)
unsigned int log_block_size = log2_ceil(sz * sizeof(StgWord));
unsigned int block_count = nonmovingBlockCountFromSize(log_block_size);
- // The max we ever allocate is 3276 bytes (anything larger is a large
- // object and not moved) which is covered by allocator 9.
+ // The max we ever allocate is NONMOVING_MAX_BLOCK_SZ bytes (anything
+ // larger is a large object and not moved) which is covered by allocator 9.
ASSERT(log_block_size < NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT);
struct NonmovingAllocator *alloca = nonmovingHeap.allocators[log_block_size - NONMOVING_ALLOCA0];
diff --git a/rts/sm/NonMoving.h b/rts/sm/NonMoving.h
index 12fb9ddaab..0f7860f44c 100644
--- a/rts/sm/NonMoving.h
+++ b/rts/sm/NonMoving.h
@@ -92,11 +92,17 @@ struct NonmovingAllocator {
// allocators cover block sizes of 2^NONMOVING_ALLOCA0 to
// 2^(NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT) (in bytes)
+// The largest allocator class must be at least LARGE_OBJECT_THRESHOLD in size
+// as Storage.c:allocatePinned will allocate small pinned allocations into the
+// non-moving heap.
#define NONMOVING_ALLOCA_CNT 12
// maximum number of free segments to hold on to
#define NONMOVING_MAX_FREE 16
+// block size of largest allocator in bytes.
+#define NONMOVING_MAX_BLOCK_SZ (1 << (NONMOVING_ALLOCA0 + NONMOVING_ALLOCA_CNT - 1))
+
struct NonmovingHeap {
struct NonmovingAllocator *allocators[NONMOVING_ALLOCA_CNT];
// free segment list. This is a cache where we keep up to
diff --git a/rts/sm/NonMovingMark.c b/rts/sm/NonMovingMark.c
index d9758b943f..c5c88bba43 100644
--- a/rts/sm/NonMovingMark.c
+++ b/rts/sm/NonMovingMark.c
@@ -1380,6 +1380,11 @@ mark_closure (MarkQueue *queue, const StgClosure *p0, StgClosure **origin)
// Trace pointers
/////////////////////////////////////////////////////
+ // Find beginning of object.
+ // See Note [Allocating pinned objects into the non-moving heap].
+ while (*(StgPtr*) p == NULL)
+ p = (StgClosure *) ((StgPtr*) p + 1);
+
const StgInfoTable *info = get_itbl(p);
switch (info->type) {
diff --git a/rts/sm/NonMovingScav.c b/rts/sm/NonMovingScav.c
index 9f92563032..8128f0dba3 100644
--- a/rts/sm/NonMovingScav.c
+++ b/rts/sm/NonMovingScav.c
@@ -84,9 +84,18 @@
*/
void
-nonmovingScavengeOne (StgClosure *q)
+nonmovingScavengeOne (StgClosure *q0)
{
+ StgClosure *q = q0;
+
+ // N.B. There may be a gap before the first word of the closure in the case
+ // of an aligned ByteArray# as allocated by allocatePinned().
+ // See Note [Allocating pinned objects into the non-moving heap].
+ while (*(StgPtr*) q == NULL)
+ q = (StgClosure *) ((StgPtr*) q + 1);
+
ASSERT(LOOKS_LIKE_CLOSURE_PTR(q));
+
StgPtr p = (StgPtr)q;
const StgInfoTable *info = get_itbl(q);
const bool saved_eager_promotion = gct->eager_promotion;
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 40353ea180..a05e43721e 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -1248,6 +1248,25 @@ allocatePinned (Capability *cap, W_ n /*words*/, W_ alignment /*bytes*/, W_ alig
const StgWord alignment_w = alignment / sizeof(W_);
+ // If the non-moving collector is enabled then we can allocate small,
+ // pinned allocations directly into the non-moving heap. This is a bit more
+ // expensive up-front but reduces fragmentation and is worthwhile since
+ // pinned allocations are often long-lived..
+ //
+ // See Note [Allocating pinned objects into the non-moving heap].
+ if (RTS_UNLIKELY(RtsFlags.GcFlags.useNonmoving)
+ && (n + alignment_w) * sizeof(W_) < NONMOVING_MAX_BLOCK_SZ)
+ {
+ ACQUIRE_SM_LOCK;
+ p = nonmovingAllocate(cap, n + alignment_w);
+ RELEASE_SM_LOCK;
+ W_ off_w = ALIGN_WITH_OFF_W(p, alignment, align_off);
+ MEMSET_SLOP_W(p, 0, off_w);
+ p += off_w;
+ MEMSET_SLOP_W(p + n, 0, alignment_w - off_w - 1);
+ return p;
+ }
+
// If the request is for a large object, then allocate()
// will give us a pinned object anyway.
if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {