summaryrefslogtreecommitdiff
path: root/rts/sm/Storage.c
diff options
context:
space:
mode:
Diffstat (limited to 'rts/sm/Storage.c')
-rw-r--r--rts/sm/Storage.c232
1 files changed, 133 insertions, 99 deletions
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 717c96ae81..a9a7857d43 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -6,7 +6,7 @@
*
* Documentation on the architecture of the Storage Manager can be
* found in the online commentary:
- *
+ *
* http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
*
* ---------------------------------------------------------------------------*/
@@ -37,7 +37,7 @@
#include "ffi.h"
-/*
+/*
* All these globals require sm_mutex to access in THREADED_RTS mode.
*/
StgIndStatic *dyn_caf_list = NULL;
@@ -54,9 +54,22 @@ generation *generations = NULL; /* all the generations */
generation *g0 = NULL; /* generation 0, for convenience */
generation *oldest_gen = NULL; /* oldest generation, for convenience */
-nursery *nurseries = NULL; /* array of nurseries, size == n_capabilities */
+/*
+ * Array of nurseries, size == n_capabilities
+ *
+ * nursery[i] belongs to NUMA node (i % RtsFlags.GcFlags.nNumaNodes)
+ * This is chosen to be the same convention as capabilities[i], so
+ * that when not using nursery chunks (+RTS -n), we just map
+ * capabilities to nurseries 1:1.
+ */
+nursery *nurseries = NULL;
uint32_t n_nurseries;
-volatile StgWord next_nursery = 0;
+
+/*
+ * When we are using nursery chunks, we need a separate next_nursery
+ * pointer for each NUMA node.
+ */
+volatile StgWord next_nursery[MAX_NUMA_NODES];
#ifdef THREADED_RTS
/*
@@ -104,7 +117,7 @@ initGeneration (generation *gen, int g)
void
initStorage (void)
{
- uint32_t g;
+ uint32_t g, n;
if (generations != NULL) {
// multi-init protection
@@ -120,22 +133,22 @@ initStorage (void)
ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
-
+
if (RtsFlags.GcFlags.maxHeapSize != 0 &&
- RtsFlags.GcFlags.heapSizeSuggestion >
+ RtsFlags.GcFlags.heapSizeSuggestion >
RtsFlags.GcFlags.maxHeapSize) {
RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
}
if (RtsFlags.GcFlags.maxHeapSize != 0 &&
- RtsFlags.GcFlags.minAllocAreaSize >
+ RtsFlags.GcFlags.minAllocAreaSize >
RtsFlags.GcFlags.maxHeapSize) {
errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
}
initBlockAllocator();
-
+
#if defined(THREADED_RTS)
initMutex(&sm_mutex);
#endif
@@ -143,7 +156,7 @@ initStorage (void)
ACQUIRE_SM_LOCK;
/* allocate generation info array */
- generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
+ generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
* sizeof(struct generation_),
"initStorage: gens");
@@ -161,7 +174,7 @@ initStorage (void)
generations[g].to = &generations[g+1];
}
oldest_gen->to = oldest_gen;
-
+
/* The oldest generation has one step. */
if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
if (RtsFlags.GcFlags.generations == 1) {
@@ -178,7 +191,7 @@ initStorage (void)
dyn_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST;
-
+
if (RtsFlags.GcFlags.largeAllocLim > 0) {
large_alloc_lim = RtsFlags.GcFlags.largeAllocLim * BLOCK_SIZE_W;
} else {
@@ -196,7 +209,9 @@ initStorage (void)
N = 0;
- next_nursery = 0;
+ for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) {
+ next_nursery[n] = n;
+ }
storageAddCapabilities(0, n_capabilities);
IF_DEBUG(gc, statDescribeGens());
@@ -257,7 +272,8 @@ void storageAddCapabilities (uint32_t from, uint32_t to)
// allocate a block for each mut list
for (n = from; n < to; n++) {
for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
- capabilities[n]->mut_lists[g] = allocBlock();
+ capabilities[n]->mut_lists[g] =
+ allocBlockOnNode(capNoToNumaNode(n));
}
}
@@ -526,7 +542,7 @@ StgInd* newGCdCAF (StgRegTable *reg, StgIndStatic *caf)
-------------------------------------------------------------------------- */
static bdescr *
-allocNursery (bdescr *tail, W_ blocks)
+allocNursery (uint32_t node, bdescr *tail, W_ blocks)
{
bdescr *bd = NULL;
W_ i, n;
@@ -542,7 +558,7 @@ allocNursery (bdescr *tail, W_ blocks)
// allocLargeChunk will prefer large chunks, but will pick up
// small chunks if there are any available. We must allow
// single blocks here to avoid fragmentation (#7257)
- bd = allocLargeChunk(1, n);
+ bd = allocLargeChunkOnNode(node, 1, n);
n = bd->blocks;
blocks -= n;
@@ -584,6 +600,7 @@ assignNurseryToCapability (Capability *cap, uint32_t n)
cap->r.rCurrentNursery = nurseries[n].blocks;
newNurseryBlock(nurseries[n].blocks);
cap->r.rCurrentAlloc = NULL;
+ ASSERT(cap->r.rCurrentNursery->node == cap->node);
}
/*
@@ -593,16 +610,18 @@ assignNurseryToCapability (Capability *cap, uint32_t n)
static void
assignNurseriesToCapabilities (uint32_t from, uint32_t to)
{
- uint32_t i;
+ uint32_t i, node;
for (i = from; i < to; i++) {
- assignNurseryToCapability(capabilities[i], next_nursery++);
+ node = capabilities[i]->node;
+ assignNurseryToCapability(capabilities[i], next_nursery[node]);
+ next_nursery[node] += RtsFlags.GcFlags.nNumaNodes;
}
}
static void
allocNurseries (uint32_t from, uint32_t to)
-{
+{
uint32_t i;
memcount n_blocks;
@@ -613,24 +632,28 @@ allocNurseries (uint32_t from, uint32_t to)
}
for (i = from; i < to; i++) {
- nurseries[i].blocks = allocNursery(NULL, n_blocks);
+ nurseries[i].blocks = allocNursery(capNoToNumaNode(i), NULL, n_blocks);
nurseries[i].n_blocks = n_blocks;
}
}
-
+
void
resetNurseries (void)
{
- next_nursery = 0;
+ uint32_t n;
+
+ for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) {
+ next_nursery[n] = n;
+ }
assignNurseriesToCapabilities(0, n_capabilities);
#ifdef DEBUG
bdescr *bd;
- uint32_t n;
for (n = 0; n < n_nurseries; n++) {
for (bd = nurseries[n].blocks; bd; bd = bd->link) {
ASSERT(bd->gen_no == 0);
ASSERT(bd->gen == g0);
+ ASSERT(bd->node == capNoToNumaNode(n));
IF_DEBUG(sanity, memset(bd->start, 0xaa, BLOCK_SIZE));
}
}
@@ -649,56 +672,54 @@ countNurseryBlocks (void)
return blocks;
}
-static void
-resizeNursery (nursery *nursery, W_ blocks)
-{
- bdescr *bd;
- W_ nursery_blocks;
-
- nursery_blocks = nursery->n_blocks;
- if (nursery_blocks == blocks) return;
-
- if (nursery_blocks < blocks) {
- debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
- blocks);
- nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
- }
- else {
- bdescr *next_bd;
-
- debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
- blocks);
-
- bd = nursery->blocks;
- while (nursery_blocks > blocks) {
- next_bd = bd->link;
- next_bd->u.back = NULL;
- nursery_blocks -= bd->blocks; // might be a large block
- freeGroup(bd);
- bd = next_bd;
- }
- nursery->blocks = bd;
- // might have gone just under, by freeing a large block, so make
- // up the difference.
- if (nursery_blocks < blocks) {
- nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
- }
- }
-
- nursery->n_blocks = blocks;
- ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
-}
-
-//
+//
// Resize each of the nurseries to the specified size.
//
static void
resizeNurseriesEach (W_ blocks)
{
- uint32_t i;
+ uint32_t i, node;
+ bdescr *bd;
+ W_ nursery_blocks;
+ nursery *nursery;
for (i = 0; i < n_nurseries; i++) {
- resizeNursery(&nurseries[i], blocks);
+ nursery = &nurseries[i];
+ nursery_blocks = nursery->n_blocks;
+ if (nursery_blocks == blocks) continue;
+
+ node = capNoToNumaNode(i);
+ if (nursery_blocks < blocks) {
+ debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks",
+ blocks);
+ nursery->blocks = allocNursery(node, nursery->blocks,
+ blocks-nursery_blocks);
+ }
+ else
+ {
+ bdescr *next_bd;
+
+ debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks",
+ blocks);
+
+ bd = nursery->blocks;
+ while (nursery_blocks > blocks) {
+ next_bd = bd->link;
+ next_bd->u.back = NULL;
+ nursery_blocks -= bd->blocks; // might be a large block
+ freeGroup(bd);
+ bd = next_bd;
+ }
+ nursery->blocks = bd;
+ // might have gone just under, by freeing a large block, so make
+ // up the difference.
+ if (nursery_blocks < blocks) {
+ nursery->blocks = allocNursery(node, nursery->blocks,
+ blocks-nursery_blocks);
+ }
+ }
+ nursery->n_blocks = blocks;
+ ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
}
}
@@ -716,7 +737,7 @@ resizeNurseriesFixed (void)
resizeNurseriesEach(blocks);
}
-//
+//
// Resize the nurseries to the total specified size.
//
void
@@ -731,18 +752,42 @@ rtsBool
getNewNursery (Capability *cap)
{
StgWord i;
+ uint32_t node = cap->node;
+ uint32_t n;
for(;;) {
- i = next_nursery;
- if (i >= n_nurseries) {
+ i = next_nursery[node];
+ if (i < n_nurseries) {
+ if (cas(&next_nursery[node], i,
+ i+RtsFlags.GcFlags.nNumaNodes) == i) {
+ assignNurseryToCapability(cap, i);
+ return rtsTrue;
+ }
+ } else if (RtsFlags.GcFlags.nNumaNodes > 1) {
+ // Try to find an unused nursery chunk on other nodes. We'll get
+ // remote memory, but the rationale is that avoiding GC is better
+ // than avoiding remote memory access.
+ rtsBool lost = rtsFalse;
+ for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) {
+ if (n == node) continue;
+ i = next_nursery[n];
+ if (i < n_nurseries) {
+ if (cas(&next_nursery[n], i,
+ i+RtsFlags.GcFlags.nNumaNodes) == i) {
+ assignNurseryToCapability(cap, i);
+ return rtsTrue;
+ } else {
+ lost = rtsTrue; /* lost a race */
+ }
+ }
+ }
+ if (!lost) return rtsFalse;
+ } else {
return rtsFalse;
}
- if (cas(&next_nursery, i, i+1) == i) {
- assignNurseryToCapability(cap, i);
- return rtsTrue;
- }
}
}
+
/* -----------------------------------------------------------------------------
move_STACK is called to update the TSO structure after it has been
moved from one place to another.
@@ -753,8 +798,8 @@ move_STACK (StgStack *src, StgStack *dest)
{
ptrdiff_t diff;
- // relocate the stack pointer...
- diff = (StgPtr)dest - (StgPtr)src; // In *words*
+ // relocate the stack pointer...
+ diff = (StgPtr)dest - (StgPtr)src; // In *words*
dest->sp = (StgPtr)dest->sp + diff;
}
@@ -818,7 +863,7 @@ allocate (Capability *cap, W_ n)
}
ACQUIRE_SM_LOCK
- bd = allocGroup(req_blocks);
+ bd = allocGroupOnNode(cap->node,req_blocks);
dbl_link_onto(bd, &g0->large_objects);
g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
g0->n_new_large_words += n;
@@ -834,19 +879,19 @@ allocate (Capability *cap, W_ n)
bd = cap->r.rCurrentAlloc;
if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
-
+
if (bd) finishedNurseryBlock(cap,bd);
// The CurrentAlloc block is full, we need to find another
// one. First, we try taking the next block from the
// nursery:
bd = cap->r.rCurrentNursery->link;
-
+
if (bd == NULL) {
// The nursery is empty: allocate a fresh block (we can't
// fail here).
ACQUIRE_SM_LOCK;
- bd = allocBlock();
+ bd = allocBlockOnNode(cap->node);
cap->r.rNursery->n_blocks++;
RELEASE_SM_LOCK;
initBdescr(bd, g0, g0);
@@ -944,7 +989,7 @@ allocatePinned (Capability *cap, W_ n)
}
bd = cap->pinned_object_block;
-
+
// If we don't have a block of pinned objects yet, or the current
// one isn't large enough to hold the new object, get a new one.
if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
@@ -964,24 +1009,13 @@ allocatePinned (Capability *cap, W_ n)
// objects scale really badly if we do this).
//
// So first, we try taking the next block from the nursery, in
- // the same way as allocate(), but note that we can only take
- // an *empty* block, because we're about to mark it as
- // BF_PINNED | BF_LARGE.
+ // the same way as allocate().
bd = cap->r.rCurrentNursery->link;
- if (bd == NULL) { // must be empty!
- // The nursery is empty, or the next block is non-empty:
- // allocate a fresh block (we can't fail here).
-
- // XXX in the case when the next nursery block is
- // non-empty we aren't exerting any pressure to GC soon,
- // so if this case ever happens then we could in theory
- // keep allocating for ever without calling the GC. We
- // can't bump g0->n_new_large_words because that will be
- // counted towards allocation, and we're already counting
- // our pinned obects as allocation in
- // collect_pinned_object_blocks in the GC.
+ if (bd == NULL) {
+ // The nursery is empty: allocate a fresh block (we can't fail
+ // here).
ACQUIRE_SM_LOCK;
- bd = allocBlock();
+ bd = allocBlockOnNode(cap->node);
RELEASE_SM_LOCK;
initBdescr(bd, g0, g0);
} else {
@@ -1217,13 +1251,13 @@ W_ gcThreadLiveBlocks (uint32_t i, uint32_t g)
* that will be collected next time will therefore need twice as many
* blocks since all the data will be copied.
*/
-extern W_
+extern W_
calcNeeded (rtsBool force_major, memcount *blocks_needed)
{
W_ needed = 0, blocks;
uint32_t g, N;
generation *gen;
-
+
if (force_major) {
N = RtsFlags.GcFlags.generations - 1;
} else {
@@ -1238,7 +1272,7 @@ calcNeeded (rtsBool force_major, memcount *blocks_needed)
// we need at least this much space
needed += blocks;
-
+
// are we collecting this gen?
if (g == 0 || // always collect gen 0
blocks > gen->max_blocks)