summaryrefslogtreecommitdiff
path: root/rts/sm/Storage.c
diff options
context:
space:
mode:
authorÖmer Sinan Ağacan <omer@well-typed.com>2019-02-05 00:18:44 -0500
committerBen Gamari <ben@smart-cactus.org>2019-10-20 21:15:37 -0400
commit68e0647f432f9d79ae13a23f614ef293bfd297a9 (patch)
tree89211783fbd4d8d9502e2777b2719da493fef851 /rts/sm/Storage.c
parentb3ef2d1a861e9b892d64f22f6a233ea331db86d1 (diff)
downloadhaskell-68e0647f432f9d79ae13a23f614ef293bfd297a9.tar.gz
rts: Non-concurrent mark and sweep
This implements the core heap structure and a serial mark/sweep collector which can be used to manage the oldest-generation heap. This is the first step towards a concurrent mark-and-sweep collector aimed at low-latency applications. The full design of the collector implemented here is described in detail in a technical note B. Gamari. "A Concurrent Garbage Collector For the Glasgow Haskell Compiler" (2018) The basic heap structure used in this design is heavily inspired by K. Ueno & A. Ohori. "A fully concurrent garbage collector for functional programs on multicore processors." /ACM SIGPLAN Notices/ Vol. 51. No. 9 (presented by ICFP 2016) This design is intended to allow both marking and sweeping concurrent to execution of a multi-core mutator. Unlike the Ueno design, which requires no global synchronization pauses, the collector introduced here requires a stop-the-world pause at the beginning and end of the mark phase. To avoid heap fragmentation, the allocator consists of a number of fixed-size /sub-allocators/. Each of these sub-allocators allocators into its own set of /segments/, themselves allocated from the block allocator. Each segment is broken into a set of fixed-size allocation blocks (which back allocations) in addition to a bitmap (used to track the liveness of blocks) and some additional metadata (used also used to track liveness). This heap structure enables collection via mark-and-sweep, which can be performed concurrently via a snapshot-at-the-beginning scheme (although concurrent collection is not implemented in this patch). The mark queue is a fairly straightforward chunked-array structure. The representation is a bit more verbose than a typical mark queue to accomodate a combination of two features: * a mark FIFO, which improves the locality of marking, reducing one of the major overheads seen in mark/sweep allocators (see [1] for details) * the selector optimization and indirection shortcutting, which requires that we track where we found each reference to an object in case we need to update the reference at a later point (e.g. when we find that it is an indirection). See Note [Origin references in the nonmoving collector] (in `NonMovingMark.h`) for details. Beyond this the mark/sweep is fairly run-of-the-mill. [1] R. Garner, S.M. Blackburn, D. Frampton. "Effective Prefetch for Mark-Sweep Garbage Collection." ISMM 2007. Co-Authored-By: Ben Gamari <ben@well-typed.com>
Diffstat (limited to 'rts/sm/Storage.c')
-rw-r--r--rts/sm/Storage.c30
1 files changed, 20 insertions, 10 deletions
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 97c71478ed..9fe68e98b7 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -29,6 +29,7 @@
#include "Trace.h"
#include "GC.h"
#include "Evac.h"
+#include "NonMoving.h"
#if defined(ios_HOST_OS)
#include "Hash.h"
#endif
@@ -82,7 +83,7 @@ Mutex sm_mutex;
static void allocNurseries (uint32_t from, uint32_t to);
static void assignNurseriesToCapabilities (uint32_t from, uint32_t to);
-static void
+void
initGeneration (generation *gen, int g)
{
gen->no = g;
@@ -170,6 +171,18 @@ initStorage (void)
}
oldest_gen->to = oldest_gen;
+ // Nonmoving heap uses oldest_gen so initialize it after initializing oldest_gen
+ nonmovingInit();
+
+#if defined(THREADED_RTS)
+ // nonmovingAddCapabilities allocates segments, which requires taking the gc
+ // sync lock, so initialize it before nonmovingAddCapabilities
+ initSpinLock(&gc_alloc_block_sync);
+#endif
+
+ if (RtsFlags.GcFlags.useNonmoving)
+ nonmovingAddCapabilities(n_capabilities);
+
/* The oldest generation has one step. */
if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
if (RtsFlags.GcFlags.generations == 1) {
@@ -195,9 +208,6 @@ initStorage (void)
exec_block = NULL;
-#if defined(THREADED_RTS)
- initSpinLock(&gc_alloc_block_sync);
-#endif
N = 0;
for (n = 0; n < n_numa_nodes; n++) {
@@ -1232,8 +1242,8 @@ W_ countOccupied (bdescr *bd)
W_ genLiveWords (generation *gen)
{
- return gen->n_words + gen->n_large_words +
- gen->n_compact_blocks * BLOCK_SIZE_W;
+ return (gen->live_estimate ? gen->live_estimate : gen->n_words) +
+ gen->n_large_words + gen->n_compact_blocks * BLOCK_SIZE_W;
}
W_ genLiveBlocks (generation *gen)
@@ -1289,9 +1299,9 @@ calcNeeded (bool force_major, memcount *blocks_needed)
for (uint32_t g = 0; g < RtsFlags.GcFlags.generations; g++) {
generation *gen = &generations[g];
- W_ blocks = gen->n_blocks // or: gen->n_words / BLOCK_SIZE_W (?)
- + gen->n_large_blocks
- + gen->n_compact_blocks;
+ W_ blocks = gen->live_estimate ? (gen->live_estimate / BLOCK_SIZE_W) : gen->n_blocks;
+ blocks += gen->n_large_blocks
+ + gen->n_compact_blocks;
// we need at least this much space
needed += blocks;
@@ -1309,7 +1319,7 @@ calcNeeded (bool force_major, memcount *blocks_needed)
// mark stack:
needed += gen->n_blocks / 100;
}
- if (gen->compact) {
+ if (gen->compact || (RtsFlags.GcFlags.useNonmoving && gen == oldest_gen)) {
continue; // no additional space needed for compaction
} else {
needed += gen->n_blocks;