diff options
author | Simon Marlow <marlowsd@gmail.com> | 2014-11-21 17:05:58 +0000 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2014-11-25 14:37:26 +0000 |
commit | 452eb80f15fce8665df52bc9facebfafb5b6267b (patch) | |
tree | a34e76ab765b6d1db49b1c05372f8924e52cacf9 | |
parent | e22bc0dedb9e9da0176ad7ce4a74acbefedc7207 (diff) | |
download | haskell-452eb80f15fce8665df52bc9facebfafb5b6267b.tar.gz |
Add +RTS -n<size>: divide the nursery into chunks
See the documentation for details.
-rw-r--r-- | docs/users_guide/runtime_control.xml | 36 | ||||
-rw-r--r-- | includes/rts/Flags.h | 1 | ||||
-rw-r--r-- | rts/RtsFlags.c | 11 | ||||
-rw-r--r-- | rts/Schedule.c | 6 | ||||
-rw-r--r-- | rts/sm/GC.c | 29 | ||||
-rw-r--r-- | rts/sm/Sanity.c | 9 | ||||
-rw-r--r-- | rts/sm/Storage.c | 117 | ||||
-rw-r--r-- | rts/sm/Storage.h | 4 |
8 files changed, 155 insertions, 58 deletions
diff --git a/docs/users_guide/runtime_control.xml b/docs/users_guide/runtime_control.xml index cdd9fd4997..612a44122a 100644 --- a/docs/users_guide/runtime_control.xml +++ b/docs/users_guide/runtime_control.xml @@ -365,6 +365,42 @@ $ ./prog -f +RTS -H32m -S -RTS -h foo bar </varlistentry> <varlistentry> + <term> + <option>-n</option><replaceable>size</replaceable> + <indexterm><primary><option>-n</option></primary><secondary>RTS option</secondary></indexterm> + <indexterm><primary>allocation area, chunk size</primary></indexterm> + </term> + <listitem> + <para>[Default: 0, Example: + <literal>-n4m</literal>] When set to a non-zero value, + this option divides the allocation area (<option>-A</option> + value) into chunks of the specified size. During execution, + when a processor exhausts its current chunk, it is given + another chunk from the pool until the pool is exhausted, at + which point a collection is triggered.</para> + + <para>This option is only useful when running in parallel + (<option>-N2</option> or greater). It allows the processor + cores to make better use of the available allocation area, + even when cores are allocating at different rates. Without + <option>-n</option>, each core gets a fixed-size allocation + area specified by the <option>-A</option>, and the first + core to exhaust its allocation area triggers a GC across all + the cores. This can result in a collection happening when + the allocation areas of some cores are only partially full, + so the purpose of the <option>-n</option> is to allow cores + that are allocating faster to get more of the allocation + area. This means less frequent GC, leading a lower GC + overhead for the same heap size.</para> + + <para>This is particularly useful in conjunction with larger + <option>-A</option> values, for example <option>-A64m + -n4m</option> is a useful combination on larger core counts + (8+).</para> + </listitem> + </varlistentry> + + <varlistentry> <term> <option>-c</option> <indexterm><primary><option>-c</option></primary><secondary>RTS option</secondary></indexterm> diff --git a/includes/rts/Flags.h b/includes/rts/Flags.h index b707a20edc..1440b14a37 100644 --- a/includes/rts/Flags.h +++ b/includes/rts/Flags.h @@ -41,6 +41,7 @@ typedef struct _GC_FLAGS { nat maxHeapSize; /* in *blocks* */ nat minAllocAreaSize; /* in *blocks* */ + nat nurseryChunkSize; /* in *blocks* */ nat minOldGenSize; /* in *blocks* */ nat heapSizeSuggestion; /* in *blocks* */ rtsBool heapSizeSuggestionAuto; diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index 82e90e5b0e..68667005f0 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -108,6 +108,7 @@ void initRtsFlagsDefaults(void) RtsFlags.GcFlags.stkChunkBufferSize = (1 * 1024) / sizeof(W_); RtsFlags.GcFlags.minAllocAreaSize = (512 * 1024) / BLOCK_SIZE; + RtsFlags.GcFlags.nurseryChunkSize = 0; RtsFlags.GcFlags.minOldGenSize = (1024 * 1024) / BLOCK_SIZE; RtsFlags.GcFlags.maxHeapSize = 0; /* off by default */ RtsFlags.GcFlags.heapSizeSuggestion = 0; /* none */ @@ -735,8 +736,16 @@ error = rtsTrue; break; case 'A': OPTION_UNSAFE; + // minimum two blocks in the nursery, so that we have one to + // grab for allocate(). RtsFlags.GcFlags.minAllocAreaSize - = decodeSize(rts_argv[arg], 2, BLOCK_SIZE, HS_INT_MAX) + = decodeSize(rts_argv[arg], 2, 2*BLOCK_SIZE, HS_INT_MAX) + / BLOCK_SIZE; + break; + case 'n': + OPTION_UNSAFE; + RtsFlags.GcFlags.nurseryChunkSize + = decodeSize(rts_argv[arg], 2, 2*BLOCK_SIZE, HS_INT_MAX) / BLOCK_SIZE; break; diff --git a/rts/Schedule.c b/rts/Schedule.c index 447b70ef52..f25b37288d 100644 --- a/rts/Schedule.c +++ b/rts/Schedule.c @@ -1169,6 +1169,12 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t ) } } + if (getNewNursery(cap)) { + debugTrace(DEBUG_sched, "thread %ld got a new nursery", t->id); + pushOnRunQueue(cap,t); + return rtsFalse; + } + if (cap->r.rHpLim == NULL || cap->context_switch) { // Sometimes we miss a context switch, e.g. when calling // primitives in a tight loop, MAYBE_GC() doesn't check the diff --git a/rts/sm/GC.c b/rts/sm/GC.c index 748dc0d6de..9777f32a3b 100644 --- a/rts/sm/GC.c +++ b/rts/sm/GC.c @@ -408,10 +408,6 @@ GarbageCollect (nat collect_gen, break; } - if (!DEBUG_IS_ON && n_gc_threads != 1) { - clearNursery(cap); - } - shutdown_gc_threads(gct->thread_index); // Now see which stable names are still alive. @@ -646,21 +642,6 @@ GarbageCollect (nat collect_gen, } } - // Reset the nursery: make the blocks empty - if (DEBUG_IS_ON || n_gc_threads == 1) { - for (n = 0; n < n_capabilities; n++) { - clearNursery(capabilities[n]); - } - } else { - // When doing parallel GC, clearNursery() is called by the - // worker threads - for (n = 0; n < n_capabilities; n++) { - if (gc_threads[n]->idle) { - clearNursery(capabilities[n]); - } - } - } - resize_nursery(); resetNurseries(); @@ -1072,10 +1053,6 @@ gcWorkerThread (Capability *cap) scavenge_until_all_done(); - if (!DEBUG_IS_ON) { - clearNursery(cap); - } - #ifdef THREADED_RTS // Now that the whole heap is marked, we discard any sparks that // were found to be unreachable. The main GC thread is currently @@ -1716,9 +1693,9 @@ resize_nursery (void) } else { - // we might have added extra large blocks to the nursery, so - // resize back to minAllocAreaSize again. - resizeNurseriesFixed(RtsFlags.GcFlags.minAllocAreaSize); + // we might have added extra blocks to the nursery, so + // resize back to the original size again. + resizeNurseriesFixed(); } } } diff --git a/rts/sm/Sanity.c b/rts/sm/Sanity.c index dd50ded063..c4a699e59a 100644 --- a/rts/sm/Sanity.c +++ b/rts/sm/Sanity.c @@ -765,8 +765,11 @@ findMemoryLeak (void) markBlocks(generations[g].large_objects); } - for (i = 0; i < n_capabilities; i++) { + for (i = 0; i < n_nurseries; i++) { markBlocks(nurseries[i].blocks); + } + + for (i = 0; i < n_capabilities; i++) { markBlocks(capabilities[i]->pinned_object_block); } @@ -856,9 +859,11 @@ memInventory (rtsBool show) } nursery_blocks = 0; - for (i = 0; i < n_capabilities; i++) { + for (i = 0; i < n_nurseries; i++) { ASSERT(countBlocks(nurseries[i].blocks) == nurseries[i].n_blocks); nursery_blocks += nurseries[i].n_blocks; + } + for (i = 0; i < n_capabilities; i++) { if (capabilities[i]->pinned_object_block != NULL) { nursery_blocks += capabilities[i]->pinned_object_block->blocks; } diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index e4a6984c40..f02c00591c 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -55,6 +55,8 @@ generation *g0 = NULL; /* generation 0, for convenience */ generation *oldest_gen = NULL; /* oldest generation, for convenience */ nursery *nurseries = NULL; /* array of nurseries, size == n_capabilities */ +nat n_nurseries; +volatile StgWord next_nursery = 0; #ifdef THREADED_RTS /* @@ -65,6 +67,7 @@ Mutex sm_mutex; #endif static void allocNurseries (nat from, nat to); +static void assignNurseriesToCapabilities (nat from, nat to); static void initGeneration (generation *gen, int g) @@ -190,6 +193,7 @@ initStorage (void) N = 0; + next_nursery = 0; storageAddCapabilities(0, n_capabilities); IF_DEBUG(gc, statDescribeGens()); @@ -206,13 +210,22 @@ initStorage (void) void storageAddCapabilities (nat from, nat to) { - nat n, g, i; + nat n, g, i, new_n_nurseries; + + if (RtsFlags.GcFlags.nurseryChunkSize == 0) { + new_n_nurseries = to; + } else { + memcount total_alloc = to * RtsFlags.GcFlags.minAllocAreaSize; + new_n_nurseries = + stg_max(to, total_alloc / RtsFlags.GcFlags.nurseryChunkSize); + } if (from > 0) { - nurseries = stgReallocBytes(nurseries, to * sizeof(struct nursery_), + nurseries = stgReallocBytes(nurseries, + new_n_nurseries * sizeof(struct nursery_), "storageAddCapabilities"); } else { - nurseries = stgMallocBytes(to * sizeof(struct nursery_), + nurseries = stgMallocBytes(new_n_nurseries * sizeof(struct nursery_), "storageAddCapabilities"); } @@ -228,7 +241,15 @@ void storageAddCapabilities (nat from, nat to) * don't want it to be a big one. This vague idea is borne out by * rigorous experimental evidence. */ - allocNurseries(from, to); + allocNurseries(n_nurseries, new_n_nurseries); + n_nurseries = new_n_nurseries; + + /* + * Assign each of the new capabilities a nursery. Remember to start from + * next_nursery, because we may have already consumed some of the earlier + * nurseries. + */ + assignNurseriesToCapabilities(from,to); // allocate a block for each mut list for (n = from; n < to; n++) { @@ -525,15 +546,26 @@ allocNursery (bdescr *tail, W_ blocks) return &bd[0]; } +STATIC_INLINE void +assignNurseryToCapability (Capability *cap, nat n) +{ + cap->r.rNursery = &nurseries[n]; + cap->r.rCurrentNursery = nurseries[n].blocks; + newNurseryBlock(nurseries[n].blocks); + cap->r.rCurrentAlloc = NULL; +} + +/* + * Give each Capability a nursery from the pool. No need to do atomic increments + * here, everything must be stopped to call this function. + */ static void assignNurseriesToCapabilities (nat from, nat to) { nat i; for (i = from; i < to; i++) { - capabilities[i]->r.rCurrentNursery = nurseries[i].blocks; - newNurseryBlock(nurseries[i].blocks); - capabilities[i]->r.rCurrentAlloc = NULL; + assignNurseryToCapability(capabilities[i], next_nursery++); } } @@ -541,42 +573,46 @@ static void allocNurseries (nat from, nat to) { nat i; + memcount n_blocks; + + if (RtsFlags.GcFlags.nurseryChunkSize) { + n_blocks = RtsFlags.GcFlags.nurseryChunkSize; + } else { + n_blocks = RtsFlags.GcFlags.minAllocAreaSize; + } for (i = from; i < to; i++) { - nurseries[i].blocks = - allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize); - nurseries[i].n_blocks = - RtsFlags.GcFlags.minAllocAreaSize; + nurseries[i].blocks = allocNursery(NULL, n_blocks); + nurseries[i].n_blocks = n_blocks; } - assignNurseriesToCapabilities(from, to); } void -clearNursery (Capability *cap USED_IF_DEBUG) +resetNurseries (void) { + next_nursery = 0; + assignNurseriesToCapabilities(0, n_capabilities); + #ifdef DEBUG bdescr *bd; - for (bd = nurseries[cap->no].blocks; bd; bd = bd->link) { - ASSERT(bd->gen_no == 0); - ASSERT(bd->gen == g0); - IF_DEBUG(sanity, memset(bd->start, 0xaa, BLOCK_SIZE)); + nat n; + for (n = 0; n < n_nurseries; n++) { + for (bd = nurseries[n].blocks; bd; bd = bd->link) { + ASSERT(bd->gen_no == 0); + ASSERT(bd->gen == g0); + IF_DEBUG(sanity, memset(bd->start, 0xaa, BLOCK_SIZE)); + } } #endif } -void -resetNurseries (void) -{ - assignNurseriesToCapabilities(0, n_capabilities); -} - W_ countNurseryBlocks (void) { nat i; W_ blocks = 0; - for (i = 0; i < n_capabilities; i++) { + for (i = 0; i < n_nurseries; i++) { blocks += nurseries[i].n_blocks; } return blocks; @@ -625,15 +661,30 @@ resizeNursery (nursery *nursery, W_ blocks) // // Resize each of the nurseries to the specified size. // -void -resizeNurseriesFixed (W_ blocks) +static void +resizeNurseriesEach (W_ blocks) { nat i; - for (i = 0; i < n_capabilities; i++) { + + for (i = 0; i < n_nurseries; i++) { resizeNursery(&nurseries[i], blocks); } } +void +resizeNurseriesFixed (void) +{ + nat blocks; + + if (RtsFlags.GcFlags.nurseryChunkSize) { + blocks = RtsFlags.GcFlags.nurseryChunkSize; + } else { + blocks = RtsFlags.GcFlags.minAllocAreaSize; + } + + resizeNurseriesEach(blocks); +} + // // Resize the nurseries to the total specified size. // @@ -642,9 +693,19 @@ resizeNurseries (W_ blocks) { // If there are multiple nurseries, then we just divide the number // of available blocks between them. - resizeNurseriesFixed(blocks / n_capabilities); + resizeNurseriesEach(blocks / n_nurseries); } +rtsBool +getNewNursery (Capability *cap) +{ + StgWord i = atomic_inc(&next_nursery, 1) - 1; + if (i >= n_nurseries) { + return rtsFalse; + } + assignNurseryToCapability(cap, i); + return rtsTrue; +} /* ----------------------------------------------------------------------------- move_STACK is called to update the TSO structure after it has been diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h index 943c3e39b7..a4421db3f2 100644 --- a/rts/sm/Storage.h +++ b/rts/sm/Storage.h @@ -80,12 +80,14 @@ void dirty_TVAR(Capability *cap, StgTVar *p); -------------------------------------------------------------------------- */ extern nursery *nurseries; +extern nat n_nurseries; void resetNurseries ( void ); void clearNursery ( Capability *cap ); void resizeNurseries ( W_ blocks ); -void resizeNurseriesFixed ( W_ blocks ); +void resizeNurseriesFixed ( void ); W_ countNurseryBlocks ( void ); +rtsBool getNewNursery ( Capability *cap ); /* ----------------------------------------------------------------------------- Allocation accounting |