diff options
Diffstat (limited to 'rts/sm/Storage.c')
-rw-r--r-- | rts/sm/Storage.c | 232 |
1 files changed, 133 insertions, 99 deletions
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 717c96ae81..a9a7857d43 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -6,7 +6,7 @@ * * Documentation on the architecture of the Storage Manager can be * found in the online commentary: - * + * * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage * * ---------------------------------------------------------------------------*/ @@ -37,7 +37,7 @@ #include "ffi.h" -/* +/* * All these globals require sm_mutex to access in THREADED_RTS mode. */ StgIndStatic *dyn_caf_list = NULL; @@ -54,9 +54,22 @@ generation *generations = NULL; /* all the generations */ generation *g0 = NULL; /* generation 0, for convenience */ generation *oldest_gen = NULL; /* oldest generation, for convenience */ -nursery *nurseries = NULL; /* array of nurseries, size == n_capabilities */ +/* + * Array of nurseries, size == n_capabilities + * + * nursery[i] belongs to NUMA node (i % RtsFlags.GcFlags.nNumaNodes) + * This is chosen to be the same convention as capabilities[i], so + * that when not using nursery chunks (+RTS -n), we just map + * capabilities to nurseries 1:1. + */ +nursery *nurseries = NULL; uint32_t n_nurseries; -volatile StgWord next_nursery = 0; + +/* + * When we are using nursery chunks, we need a separate next_nursery + * pointer for each NUMA node. + */ +volatile StgWord next_nursery[MAX_NUMA_NODES]; #ifdef THREADED_RTS /* @@ -104,7 +117,7 @@ initGeneration (generation *gen, int g) void initStorage (void) { - uint32_t g; + uint32_t g, n; if (generations != NULL) { // multi-init protection @@ -120,22 +133,22 @@ initStorage (void) ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info)); ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure)); ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure)); - + if (RtsFlags.GcFlags.maxHeapSize != 0 && - RtsFlags.GcFlags.heapSizeSuggestion > + RtsFlags.GcFlags.heapSizeSuggestion > RtsFlags.GcFlags.maxHeapSize) { RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion; } if (RtsFlags.GcFlags.maxHeapSize != 0 && - RtsFlags.GcFlags.minAllocAreaSize > + RtsFlags.GcFlags.minAllocAreaSize > RtsFlags.GcFlags.maxHeapSize) { errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)"); RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize; } initBlockAllocator(); - + #if defined(THREADED_RTS) initMutex(&sm_mutex); #endif @@ -143,7 +156,7 @@ initStorage (void) ACQUIRE_SM_LOCK; /* allocate generation info array */ - generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations + generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations * sizeof(struct generation_), "initStorage: gens"); @@ -161,7 +174,7 @@ initStorage (void) generations[g].to = &generations[g+1]; } oldest_gen->to = oldest_gen; - + /* The oldest generation has one step. */ if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) { if (RtsFlags.GcFlags.generations == 1) { @@ -178,7 +191,7 @@ initStorage (void) dyn_caf_list = (StgIndStatic*)END_OF_CAF_LIST; debug_caf_list = (StgIndStatic*)END_OF_CAF_LIST; revertible_caf_list = (StgIndStatic*)END_OF_CAF_LIST; - + if (RtsFlags.GcFlags.largeAllocLim > 0) { large_alloc_lim = RtsFlags.GcFlags.largeAllocLim * BLOCK_SIZE_W; } else { @@ -196,7 +209,9 @@ initStorage (void) N = 0; - next_nursery = 0; + for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) { + next_nursery[n] = n; + } storageAddCapabilities(0, n_capabilities); IF_DEBUG(gc, statDescribeGens()); @@ -257,7 +272,8 @@ void storageAddCapabilities (uint32_t from, uint32_t to) // allocate a block for each mut list for (n = from; n < to; n++) { for (g = 1; g < RtsFlags.GcFlags.generations; g++) { - capabilities[n]->mut_lists[g] = allocBlock(); + capabilities[n]->mut_lists[g] = + allocBlockOnNode(capNoToNumaNode(n)); } } @@ -526,7 +542,7 @@ StgInd* newGCdCAF (StgRegTable *reg, StgIndStatic *caf) -------------------------------------------------------------------------- */ static bdescr * -allocNursery (bdescr *tail, W_ blocks) +allocNursery (uint32_t node, bdescr *tail, W_ blocks) { bdescr *bd = NULL; W_ i, n; @@ -542,7 +558,7 @@ allocNursery (bdescr *tail, W_ blocks) // allocLargeChunk will prefer large chunks, but will pick up // small chunks if there are any available. We must allow // single blocks here to avoid fragmentation (#7257) - bd = allocLargeChunk(1, n); + bd = allocLargeChunkOnNode(node, 1, n); n = bd->blocks; blocks -= n; @@ -584,6 +600,7 @@ assignNurseryToCapability (Capability *cap, uint32_t n) cap->r.rCurrentNursery = nurseries[n].blocks; newNurseryBlock(nurseries[n].blocks); cap->r.rCurrentAlloc = NULL; + ASSERT(cap->r.rCurrentNursery->node == cap->node); } /* @@ -593,16 +610,18 @@ assignNurseryToCapability (Capability *cap, uint32_t n) static void assignNurseriesToCapabilities (uint32_t from, uint32_t to) { - uint32_t i; + uint32_t i, node; for (i = from; i < to; i++) { - assignNurseryToCapability(capabilities[i], next_nursery++); + node = capabilities[i]->node; + assignNurseryToCapability(capabilities[i], next_nursery[node]); + next_nursery[node] += RtsFlags.GcFlags.nNumaNodes; } } static void allocNurseries (uint32_t from, uint32_t to) -{ +{ uint32_t i; memcount n_blocks; @@ -613,24 +632,28 @@ allocNurseries (uint32_t from, uint32_t to) } for (i = from; i < to; i++) { - nurseries[i].blocks = allocNursery(NULL, n_blocks); + nurseries[i].blocks = allocNursery(capNoToNumaNode(i), NULL, n_blocks); nurseries[i].n_blocks = n_blocks; } } - + void resetNurseries (void) { - next_nursery = 0; + uint32_t n; + + for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) { + next_nursery[n] = n; + } assignNurseriesToCapabilities(0, n_capabilities); #ifdef DEBUG bdescr *bd; - uint32_t n; for (n = 0; n < n_nurseries; n++) { for (bd = nurseries[n].blocks; bd; bd = bd->link) { ASSERT(bd->gen_no == 0); ASSERT(bd->gen == g0); + ASSERT(bd->node == capNoToNumaNode(n)); IF_DEBUG(sanity, memset(bd->start, 0xaa, BLOCK_SIZE)); } } @@ -649,56 +672,54 @@ countNurseryBlocks (void) return blocks; } -static void -resizeNursery (nursery *nursery, W_ blocks) -{ - bdescr *bd; - W_ nursery_blocks; - - nursery_blocks = nursery->n_blocks; - if (nursery_blocks == blocks) return; - - if (nursery_blocks < blocks) { - debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks", - blocks); - nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks); - } - else { - bdescr *next_bd; - - debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks", - blocks); - - bd = nursery->blocks; - while (nursery_blocks > blocks) { - next_bd = bd->link; - next_bd->u.back = NULL; - nursery_blocks -= bd->blocks; // might be a large block - freeGroup(bd); - bd = next_bd; - } - nursery->blocks = bd; - // might have gone just under, by freeing a large block, so make - // up the difference. - if (nursery_blocks < blocks) { - nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks); - } - } - - nursery->n_blocks = blocks; - ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks); -} - -// +// // Resize each of the nurseries to the specified size. // static void resizeNurseriesEach (W_ blocks) { - uint32_t i; + uint32_t i, node; + bdescr *bd; + W_ nursery_blocks; + nursery *nursery; for (i = 0; i < n_nurseries; i++) { - resizeNursery(&nurseries[i], blocks); + nursery = &nurseries[i]; + nursery_blocks = nursery->n_blocks; + if (nursery_blocks == blocks) continue; + + node = capNoToNumaNode(i); + if (nursery_blocks < blocks) { + debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks", + blocks); + nursery->blocks = allocNursery(node, nursery->blocks, + blocks-nursery_blocks); + } + else + { + bdescr *next_bd; + + debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks", + blocks); + + bd = nursery->blocks; + while (nursery_blocks > blocks) { + next_bd = bd->link; + next_bd->u.back = NULL; + nursery_blocks -= bd->blocks; // might be a large block + freeGroup(bd); + bd = next_bd; + } + nursery->blocks = bd; + // might have gone just under, by freeing a large block, so make + // up the difference. + if (nursery_blocks < blocks) { + nursery->blocks = allocNursery(node, nursery->blocks, + blocks-nursery_blocks); + } + } + nursery->n_blocks = blocks; + ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks); } } @@ -716,7 +737,7 @@ resizeNurseriesFixed (void) resizeNurseriesEach(blocks); } -// +// // Resize the nurseries to the total specified size. // void @@ -731,18 +752,42 @@ rtsBool getNewNursery (Capability *cap) { StgWord i; + uint32_t node = cap->node; + uint32_t n; for(;;) { - i = next_nursery; - if (i >= n_nurseries) { + i = next_nursery[node]; + if (i < n_nurseries) { + if (cas(&next_nursery[node], i, + i+RtsFlags.GcFlags.nNumaNodes) == i) { + assignNurseryToCapability(cap, i); + return rtsTrue; + } + } else if (RtsFlags.GcFlags.nNumaNodes > 1) { + // Try to find an unused nursery chunk on other nodes. We'll get + // remote memory, but the rationale is that avoiding GC is better + // than avoiding remote memory access. + rtsBool lost = rtsFalse; + for (n = 0; n < RtsFlags.GcFlags.nNumaNodes; n++) { + if (n == node) continue; + i = next_nursery[n]; + if (i < n_nurseries) { + if (cas(&next_nursery[n], i, + i+RtsFlags.GcFlags.nNumaNodes) == i) { + assignNurseryToCapability(cap, i); + return rtsTrue; + } else { + lost = rtsTrue; /* lost a race */ + } + } + } + if (!lost) return rtsFalse; + } else { return rtsFalse; } - if (cas(&next_nursery, i, i+1) == i) { - assignNurseryToCapability(cap, i); - return rtsTrue; - } } } + /* ----------------------------------------------------------------------------- move_STACK is called to update the TSO structure after it has been moved from one place to another. @@ -753,8 +798,8 @@ move_STACK (StgStack *src, StgStack *dest) { ptrdiff_t diff; - // relocate the stack pointer... - diff = (StgPtr)dest - (StgPtr)src; // In *words* + // relocate the stack pointer... + diff = (StgPtr)dest - (StgPtr)src; // In *words* dest->sp = (StgPtr)dest->sp + diff; } @@ -818,7 +863,7 @@ allocate (Capability *cap, W_ n) } ACQUIRE_SM_LOCK - bd = allocGroup(req_blocks); + bd = allocGroupOnNode(cap->node,req_blocks); dbl_link_onto(bd, &g0->large_objects); g0->n_large_blocks += bd->blocks; // might be larger than req_blocks g0->n_new_large_words += n; @@ -834,19 +879,19 @@ allocate (Capability *cap, W_ n) bd = cap->r.rCurrentAlloc; if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { - + if (bd) finishedNurseryBlock(cap,bd); // The CurrentAlloc block is full, we need to find another // one. First, we try taking the next block from the // nursery: bd = cap->r.rCurrentNursery->link; - + if (bd == NULL) { // The nursery is empty: allocate a fresh block (we can't // fail here). ACQUIRE_SM_LOCK; - bd = allocBlock(); + bd = allocBlockOnNode(cap->node); cap->r.rNursery->n_blocks++; RELEASE_SM_LOCK; initBdescr(bd, g0, g0); @@ -944,7 +989,7 @@ allocatePinned (Capability *cap, W_ n) } bd = cap->pinned_object_block; - + // If we don't have a block of pinned objects yet, or the current // one isn't large enough to hold the new object, get a new one. if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) { @@ -964,24 +1009,13 @@ allocatePinned (Capability *cap, W_ n) // objects scale really badly if we do this). // // So first, we try taking the next block from the nursery, in - // the same way as allocate(), but note that we can only take - // an *empty* block, because we're about to mark it as - // BF_PINNED | BF_LARGE. + // the same way as allocate(). bd = cap->r.rCurrentNursery->link; - if (bd == NULL) { // must be empty! - // The nursery is empty, or the next block is non-empty: - // allocate a fresh block (we can't fail here). - - // XXX in the case when the next nursery block is - // non-empty we aren't exerting any pressure to GC soon, - // so if this case ever happens then we could in theory - // keep allocating for ever without calling the GC. We - // can't bump g0->n_new_large_words because that will be - // counted towards allocation, and we're already counting - // our pinned obects as allocation in - // collect_pinned_object_blocks in the GC. + if (bd == NULL) { + // The nursery is empty: allocate a fresh block (we can't fail + // here). ACQUIRE_SM_LOCK; - bd = allocBlock(); + bd = allocBlockOnNode(cap->node); RELEASE_SM_LOCK; initBdescr(bd, g0, g0); } else { @@ -1217,13 +1251,13 @@ W_ gcThreadLiveBlocks (uint32_t i, uint32_t g) * that will be collected next time will therefore need twice as many * blocks since all the data will be copied. */ -extern W_ +extern W_ calcNeeded (rtsBool force_major, memcount *blocks_needed) { W_ needed = 0, blocks; uint32_t g, N; generation *gen; - + if (force_major) { N = RtsFlags.GcFlags.generations - 1; } else { @@ -1238,7 +1272,7 @@ calcNeeded (rtsBool force_major, memcount *blocks_needed) // we need at least this much space needed += blocks; - + // are we collecting this gen? if (g == 0 || // always collect gen 0 blocks > gen->max_blocks) |