summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--includes/rts/storage/Block.h7
-rw-r--r--rts/Schedule.c90
-rw-r--r--rts/Schedule.h6
-rw-r--r--rts/sm/BlockAlloc.c144
-rw-r--r--testsuite/tests/rts/testblockalloc.c121
5 files changed, 307 insertions, 61 deletions
diff --git a/includes/rts/storage/Block.h b/includes/rts/storage/Block.h
index ecd6bf5dd8..792a72d717 100644
--- a/includes/rts/storage/Block.h
+++ b/includes/rts/storage/Block.h
@@ -290,6 +290,13 @@ EXTERN_INLINE bdescr* allocBlock(void)
bdescr *allocGroupOnNode(uint32_t node, W_ n);
+// Allocate n blocks, aligned at n-block boundary. The returned bdescr will
+// have this invariant
+//
+// bdescr->start % BLOCK_SIZE*n == 0
+//
+bdescr *allocAlignedGroupOnNode(uint32_t node, W_ n);
+
EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node);
EXTERN_INLINE bdescr* allocBlockOnNode(uint32_t node)
{
diff --git a/rts/Schedule.c b/rts/Schedule.c
index 7ffd44d22f..8d7acc963e 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -110,6 +110,19 @@ Mutex sched_mutex;
#define FORKPROCESS_PRIMOP_SUPPORTED
#endif
+/*
+ * sync_finished_cond allows threads which do not own any capability (e.g. the
+ * concurrent mark thread) to participate in the sync protocol. In particular,
+ * if such a thread requests a sync while sync is already in progress it will
+ * block on sync_finished_cond, which will be signalled when the sync is
+ * finished (by releaseAllCapabilities).
+ */
+#if defined(THREADED_RTS)
+static Condition sync_finished_cond;
+static Mutex sync_finished_mutex;
+#endif
+
+
/* -----------------------------------------------------------------------------
* static function prototypes
* -------------------------------------------------------------------------- */
@@ -130,7 +143,6 @@ static void scheduleYield (Capability **pcap, Task *task);
static bool requestSync (Capability **pcap, Task *task,
PendingSync *sync_type, SyncType *prev_sync_type);
static void acquireAllCapabilities(Capability *cap, Task *task);
-static void releaseAllCapabilities(uint32_t n, Capability *cap, Task *task);
static void startWorkerTasks (uint32_t from USED_IF_THREADS,
uint32_t to USED_IF_THREADS);
#endif
@@ -1368,17 +1380,24 @@ scheduleNeedHeapProfile( bool ready_to_gc )
* change to the system, such as altering the number of capabilities, or
* forking.
*
+ * pCap may be NULL in the event that the caller doesn't yet own a capability.
+ *
* To resume after stopAllCapabilities(), use releaseAllCapabilities().
* -------------------------------------------------------------------------- */
#if defined(THREADED_RTS)
-static void stopAllCapabilities (Capability **pCap, Task *task)
+void stopAllCapabilities (Capability **pCap, Task *task)
+{
+ stopAllCapabilitiesWith(pCap, task, SYNC_OTHER);
+}
+
+void stopAllCapabilitiesWith (Capability **pCap, Task *task, SyncType sync_type)
{
bool was_syncing;
SyncType prev_sync_type;
PendingSync sync = {
- .type = SYNC_OTHER,
+ .type = sync_type,
.idle = NULL,
.task = task
};
@@ -1387,9 +1406,10 @@ static void stopAllCapabilities (Capability **pCap, Task *task)
was_syncing = requestSync(pCap, task, &sync, &prev_sync_type);
} while (was_syncing);
- acquireAllCapabilities(*pCap,task);
+ acquireAllCapabilities(pCap ? *pCap : NULL, task);
pending_sync = 0;
+ signalCondition(&sync_finished_cond);
}
#endif
@@ -1400,6 +1420,16 @@ static void stopAllCapabilities (Capability **pCap, Task *task)
* directly, instead use stopAllCapabilities(). This is used by the GC, which
* has some special synchronisation requirements.
*
+ * Note that this can be called in two ways:
+ *
+ * - where *pcap points to a capability owned by the caller: in this case
+ * *prev_sync_type will reflect the in-progress sync type on return, if one
+ * *was found
+ *
+ * - where pcap == NULL: in this case the caller doesn't hold a capability.
+ * we only return whether or not a pending sync was found and prev_sync_type
+ * is unchanged.
+ *
* Returns:
* false if we successfully got a sync
* true if there was another sync request in progress,
@@ -1424,13 +1454,25 @@ static bool requestSync (
// After the sync is completed, we cannot read that struct any
// more because it has been freed.
*prev_sync_type = sync->type;
- do {
- debugTrace(DEBUG_sched, "someone else is trying to sync (%d)...",
- sync->type);
- ASSERT(*pcap);
- yieldCapability(pcap,task,true);
- sync = pending_sync;
- } while (sync != NULL);
+ if (pcap == NULL) {
+ // The caller does not hold a capability (e.g. may be a concurrent
+ // mark thread). Consequently we must wait until the pending sync is
+ // finished before proceeding to ensure we don't loop.
+ // TODO: Don't busy-wait
+ ACQUIRE_LOCK(&sync_finished_mutex);
+ while (pending_sync) {
+ waitCondition(&sync_finished_cond, &sync_finished_mutex);
+ }
+ RELEASE_LOCK(&sync_finished_mutex);
+ } else {
+ do {
+ debugTrace(DEBUG_sched, "someone else is trying to sync (%d)...",
+ sync->type);
+ ASSERT(*pcap);
+ yieldCapability(pcap,task,true);
+ sync = pending_sync;
+ } while (sync != NULL);
+ }
// NOTE: task->cap might have changed now
return true;
@@ -1445,9 +1487,9 @@ static bool requestSync (
/* -----------------------------------------------------------------------------
* acquireAllCapabilities()
*
- * Grab all the capabilities except the one we already hold. Used
- * when synchronising before a single-threaded GC (SYNC_SEQ_GC), and
- * before a fork (SYNC_OTHER).
+ * Grab all the capabilities except the one we already hold (cap may be NULL is
+ * the caller does not currently hold a capability). Used when synchronising
+ * before a single-threaded GC (SYNC_SEQ_GC), and before a fork (SYNC_OTHER).
*
* Only call this after requestSync(), otherwise a deadlock might
* ensue if another thread is trying to synchronise.
@@ -1477,29 +1519,30 @@ static void acquireAllCapabilities(Capability *cap, Task *task)
}
}
}
- task->cap = cap;
+ task->cap = cap == NULL ? tmpcap : cap;
}
#endif
/* -----------------------------------------------------------------------------
- * releaseAllcapabilities()
+ * releaseAllCapabilities()
*
- * Assuming this thread holds all the capabilities, release them all except for
- * the one passed in as cap.
+ * Assuming this thread holds all the capabilities, release them all (except for
+ * the one passed in as keep_cap, if non-NULL).
* -------------------------------------------------------------------------- */
#if defined(THREADED_RTS)
-static void releaseAllCapabilities(uint32_t n, Capability *cap, Task *task)
+void releaseAllCapabilities(uint32_t n, Capability *keep_cap, Task *task)
{
uint32_t i;
for (i = 0; i < n; i++) {
- if (cap->no != i) {
- task->cap = capabilities[i];
- releaseCapability(capabilities[i]);
+ Capability *tmpcap = capabilities[i];
+ if (keep_cap != tmpcap) {
+ task->cap = tmpcap;
+ releaseCapability(tmpcap);
}
}
- task->cap = cap;
+ task->cap = keep_cap;
}
#endif
@@ -1801,6 +1844,7 @@ delete_threads_and_gc:
// reset pending_sync *before* GC, so that when the GC threads
// emerge they don't immediately re-enter the GC.
pending_sync = 0;
+ signalCondition(&sync_finished_cond);
GarbageCollect(collect_gen, heap_census, gc_type, cap, idle_cap);
#else
GarbageCollect(collect_gen, heap_census, 0, cap, NULL);
diff --git a/rts/Schedule.h b/rts/Schedule.h
index 66cf8391f3..a4772295d9 100644
--- a/rts/Schedule.h
+++ b/rts/Schedule.h
@@ -49,6 +49,12 @@ StgWord findRetryFrameHelper (Capability *cap, StgTSO *tso);
/* Entry point for a new worker */
void scheduleWorker (Capability *cap, Task *task);
+#if defined(THREADED_RTS)
+void stopAllCapabilitiesWith (Capability **pCap, Task *task, SyncType sync_type);
+void stopAllCapabilities (Capability **pCap, Task *task);
+void releaseAllCapabilities(uint32_t n, Capability *keep_cap, Task *task);
+#endif
+
/* The state of the scheduler. This is used to control the sequence
* of events during shutdown. See Note [shutdown] in Schedule.c.
*/
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index f9e3d11407..b3e1e2ce75 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -310,7 +310,7 @@ setup_tail (bdescr *bd)
// Take a free block group bd, and split off a group of size n from
// it. Adjust the free list as necessary, and return the new group.
static bdescr *
-split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln)
+split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln /* log_2_ceil(n) */)
{
bdescr *fg; // free group
@@ -325,6 +325,46 @@ split_free_block (bdescr *bd, uint32_t node, W_ n, uint32_t ln)
return fg;
}
+// Take N blocks off the end, free the rest.
+static bdescr *
+split_block_high (bdescr *bd, W_ n)
+{
+ ASSERT(bd->blocks > n);
+
+ bdescr* ret = bd + bd->blocks - n; // take n blocks off the end
+ ret->blocks = n;
+ ret->start = ret->free = bd->start + (bd->blocks - n)*BLOCK_SIZE_W;
+ ret->link = NULL;
+
+ bd->blocks -= n;
+
+ setup_tail(ret);
+ setup_tail(bd);
+ freeGroup(bd);
+
+ return ret;
+}
+
+// Like `split_block_high`, but takes n blocks off the beginning rather
+// than the end.
+static bdescr *
+split_block_low (bdescr *bd, W_ n)
+{
+ ASSERT(bd->blocks > n);
+
+ bdescr* bd_ = bd + n;
+ bd_->blocks = bd->blocks - n;
+ bd_->start = bd_->free = bd->start + n*BLOCK_SIZE_W;
+
+ bd->blocks = n;
+
+ setup_tail(bd_);
+ setup_tail(bd);
+ freeGroup(bd_);
+
+ return bd;
+}
+
/* Only initializes the start pointers on the first megablock and the
* blocks field of the first bdescr; callers are responsible for calling
* initGroup afterwards.
@@ -461,6 +501,108 @@ finish:
return bd;
}
+// Allocate `n` blocks aligned to `n` blocks, e.g. when n = 8, the blocks will
+// be aligned at `8 * BLOCK_SIZE`. For a group with `n` blocks this can be used
+// for easily accessing the beginning of the group from a location p in the
+// group with
+//
+// p % (BLOCK_SIZE*n)
+//
+// Used by the non-moving collector for allocating segments.
+//
+// Because the storage manager does not support aligned allocations, we have to
+// allocate `2*n - 1` blocks here to make sure we'll be able to find an aligned
+// region in the allocated blocks. After finding the aligned area we want to
+// free slop on the low and high sides, and block allocator doesn't support
+// freeing only some portion of a megablock (we can only free whole megablocks).
+// So we disallow allocating megablocks here, and allow allocating at most
+// `BLOCKS_PER_MBLOCK / 2` blocks.
+bdescr *
+allocAlignedGroupOnNode (uint32_t node, W_ n)
+{
+ // allocate enough blocks to have enough space aligned at n-block boundary
+ // free any slops on the low and high side of this space
+
+ // number of blocks to allocate to make sure we have enough aligned space
+ W_ num_blocks = 2*n - 1;
+
+ if (num_blocks >= BLOCKS_PER_MBLOCK) {
+ barf("allocAlignedGroupOnNode: allocating megablocks is not supported\n"
+ " requested blocks: %" FMT_Word "\n"
+ " required for alignment: %" FMT_Word "\n"
+ " megablock size (in blocks): %" FMT_Word,
+ n, num_blocks, (W_) BLOCKS_PER_MBLOCK);
+ }
+
+ W_ group_size = n * BLOCK_SIZE;
+
+ // To reduce splitting and fragmentation we use allocLargeChunkOnNode here.
+ // Tweak the max allocation to avoid allocating megablocks. Splitting slop
+ // below doesn't work with megablocks (freeGroup can't free only a portion
+ // of a megablock so we can't allocate megablocks and free some parts of
+ // them).
+ W_ max_blocks = stg_min(num_blocks * 3, BLOCKS_PER_MBLOCK - 1);
+ bdescr *bd = allocLargeChunkOnNode(node, num_blocks, max_blocks);
+ // We may allocate more than num_blocks, so update it
+ num_blocks = bd->blocks;
+
+ // slop on the low side
+ W_ slop_low = 0;
+ if ((uintptr_t)bd->start % group_size != 0) {
+ slop_low = group_size - ((uintptr_t)bd->start % group_size);
+ }
+
+ W_ slop_high = (num_blocks * BLOCK_SIZE) - group_size - slop_low;
+
+ ASSERT((slop_low % BLOCK_SIZE) == 0);
+ ASSERT((slop_high % BLOCK_SIZE) == 0);
+
+ W_ slop_low_blocks = slop_low / BLOCK_SIZE;
+ W_ slop_high_blocks = slop_high / BLOCK_SIZE;
+
+ ASSERT(slop_low_blocks + slop_high_blocks + n == num_blocks);
+
+#if defined(DEBUG)
+ checkFreeListSanity();
+ W_ free_before = countFreeList();
+#endif
+
+ if (slop_low_blocks != 0) {
+ bd = split_block_high(bd, num_blocks - slop_low_blocks);
+ ASSERT(countBlocks(bd) == num_blocks - slop_low_blocks);
+ }
+
+#if defined(DEBUG)
+ ASSERT(countFreeList() == free_before + slop_low_blocks);
+ checkFreeListSanity();
+#endif
+
+ // At this point the bd should be aligned, but we may have slop on the high side
+ ASSERT((uintptr_t)bd->start % group_size == 0);
+
+#if defined(DEBUG)
+ free_before = countFreeList();
+#endif
+
+ if (slop_high_blocks != 0) {
+ bd = split_block_low(bd, n);
+ ASSERT(bd->blocks == n);
+ }
+
+#if defined(DEBUG)
+ ASSERT(countFreeList() == free_before + slop_high_blocks);
+ checkFreeListSanity();
+#endif
+
+ // Should still be aligned
+ ASSERT((uintptr_t)bd->start % group_size == 0);
+
+ // Just to make sure I get this right
+ ASSERT(Bdescr(bd->start) == bd);
+
+ return bd;
+}
+
STATIC_INLINE
uint32_t nodeWithLeastBlocks (void)
{
diff --git a/testsuite/tests/rts/testblockalloc.c b/testsuite/tests/rts/testblockalloc.c
index 577245f45e..53eed24015 100644
--- a/testsuite/tests/rts/testblockalloc.c
+++ b/testsuite/tests/rts/testblockalloc.c
@@ -3,6 +3,7 @@
#include <stdio.h>
extern bdescr *allocGroup_lock_lock(uint32_t n);
+extern bdescr *allocAlignedGroupOnNode (uint32_t node, W_ n);
extern void freeGroup_lock(bdescr *p);
const int ARRSIZE = 256;
@@ -13,64 +14,110 @@ const int SEED = 0xf00f00;
extern StgWord mblocks_allocated;
-int main (int argc, char *argv[])
+static void test_random_alloc(void)
{
- int i, j, b;
-
bdescr *a[ARRSIZE];
- srand(SEED);
+ // repeatedly sweep though the array, allocating new random-sized
+ // objects and deallocating the old ones.
+ for (int i=0; i < LOOPS; i++)
+ {
+ for (int j=0; j < ARRSIZE; j++)
+ {
+ if (i > 0)
+ {
+ IF_DEBUG(block_alloc, debugBelch("A%d: freeing %p, %d blocks @ %p\n", j, a[j], a[j]->blocks, a[j]->start));
+ freeGroup_lock(a[j]);
+ DEBUG_ONLY(checkFreeListSanity());
+ }
+
+ int b = (rand() % MAXALLOC) + 1;
+ a[j] = allocGroup_lock(b);
+ IF_DEBUG(block_alloc, debugBelch("A%d: allocated %p, %d blocks @ %p\n", j, a[j], b, a[j]->start));
+ // allocating zero blocks isn't allowed
+ DEBUG_ONLY(checkFreeListSanity());
+ }
+ }
+ for (int j=0; j < ARRSIZE; j++)
{
- RtsConfig conf = defaultRtsConfig;
- conf.rts_opts_enabled = RtsOptsAll;
- hs_init_ghc(&argc, &argv, conf);
+ freeGroup_lock(a[j]);
}
+}
+
+static void test_sequential_alloc(void)
+{
+ bdescr *a[ARRSIZE];
- // repeatedly sweep though the array, allocating new random-sized
- // objects and deallocating the old ones.
- for (i=0; i < LOOPS; i++)
- {
- for (j=0; j < ARRSIZE; j++)
- {
- if (i > 0)
- {
- IF_DEBUG(block_alloc, debugBelch("A%d: freeing %p, %d blocks @ %p\n", j, a[j], a[j]->blocks, a[j]->start));
- freeGroup_lock(a[j]);
- DEBUG_ONLY(checkFreeListSanity());
- }
- b = (rand() % MAXALLOC) + 1;
- a[j] = allocGroup_lock(b);
- IF_DEBUG(block_alloc, debugBelch("A%d: allocated %p, %d blocks @ %p\n", j, a[j], b, a[j]->start));
- // allocating zero blocks isn't allowed
- DEBUG_ONLY(checkFreeListSanity());
- }
- }
-
- for (j=0; j < ARRSIZE; j++)
- {
- freeGroup_lock(a[j]);
- }
-
// this time, sweep forwards allocating new blocks, and then
// backwards deallocating them.
- for (i=0; i < LOOPS; i++)
+ for (int i=0; i < LOOPS; i++)
{
- for (j=0; j < ARRSIZE; j++)
+ for (int j=0; j < ARRSIZE; j++)
{
- b = (rand() % MAXALLOC) + 1;
+ int b = (rand() % MAXALLOC) + 1;
a[j] = allocGroup_lock(b);
IF_DEBUG(block_alloc, debugBelch("B%d,%d: allocated %p, %d blocks @ %p\n", i, j, a[j], b, a[j]->start));
DEBUG_ONLY(checkFreeListSanity());
}
- for (j=ARRSIZE-1; j >= 0; j--)
+ for (int j=ARRSIZE-1; j >= 0; j--)
{
IF_DEBUG(block_alloc, debugBelch("B%d,%d: freeing %p, %d blocks @ %p\n", i, j, a[j], a[j]->blocks, a[j]->start));
freeGroup_lock(a[j]);
DEBUG_ONLY(checkFreeListSanity());
}
}
-
+}
+
+static void test_aligned_alloc(void)
+{
+ bdescr *a[ARRSIZE];
+
+ // this time, sweep forwards allocating new blocks, and then
+ // backwards deallocating them.
+ for (int i=0; i < LOOPS; i++)
+ {
+ for (int j=0; j < ARRSIZE; j++)
+ {
+ // allocAlignedGroupOnNode does not support allocating more than
+ // BLOCKS_PER_MBLOCK/2 blocks.
+ int b = rand() % (BLOCKS_PER_MBLOCK / 2);
+ if (b == 0) { b = 1; }
+ a[j] = allocAlignedGroupOnNode(0, b);
+ if ((((W_)(a[j]->start)) % (b*BLOCK_SIZE)) != 0)
+ {
+ barf("%p is not aligned to allocation size %d", a[j], b);
+ }
+ IF_DEBUG(block_alloc, debugBelch("B%d,%d: allocated %p, %d blocks @ %p\n", i, j, a[j], b, a[j]->start));
+ DEBUG_ONLY(checkFreeListSanity());
+ }
+ for (int j=ARRSIZE-1; j >= 0; j--)
+ {
+ IF_DEBUG(block_alloc, debugBelch("B%d,%d: freeing %p, %d blocks @ %p\n", i, j, a[j], a[j]->blocks, a[j]->start));
+ freeGroup_lock(a[j]);
+ DEBUG_ONLY(checkFreeListSanity());
+ }
+ }
+}
+
+int main (int argc, char *argv[])
+{
+ int i, j, b;
+
+ bdescr *a[ARRSIZE];
+
+ srand(SEED);
+
+ {
+ RtsConfig conf = defaultRtsConfig;
+ conf.rts_opts_enabled = RtsOptsAll;
+ hs_init_ghc(&argc, &argv, conf);
+ }
+
+ test_random_alloc();
+ test_sequential_alloc();
+ test_aligned_alloc();
+
DEBUG_ONLY(checkFreeListSanity());
hs_exit(); // will do a memory leak test