summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2014-10-07 10:30:36 +0100
committerSimon Marlow <marlowsd@gmail.com>2014-11-25 14:37:26 +0000
commite22bc0dedb9e9da0176ad7ce4a74acbefedc7207 (patch)
tree8a8872279576edf6824c25bf31accd793d970fd8
parente159e08a5e1c1f9f7b6805f3f0775333104c3d6e (diff)
downloadhaskell-e22bc0dedb9e9da0176ad7ce4a74acbefedc7207.tar.gz
Make clearNursery free
Summary: clearNursery resets all the bd->free pointers of nursery blocks to make the blocks empty. In profiles we've seen clearNursery taking significant amounts of time particularly with large -N and -A values. This patch moves the work of clearNursery to the point at which we actually need the new block, thereby introducing an invariant that blocks to the right of the CurrentNursery pointer still need their bd->free pointer reset. This should make things faster overall, because we don't need to clear blocks that we don't use. Test Plan: validate Reviewers: AndreasVoellmy, ezyang, austin Subscribers: thomie, carter, ezyang, simonmar Differential Revision: https://phabricator.haskell.org/D318
-rw-r--r--includes/rts/storage/GC.h21
-rw-r--r--rts/Capability.h6
-rw-r--r--rts/HeapStackCheck.cmm5
-rw-r--r--rts/Schedule.c18
-rw-r--r--rts/Stats.c21
-rw-r--r--rts/sm/Storage.c76
-rw-r--r--rts/sm/Storage.h22
-rw-r--r--testsuite/tests/codeGen/should_run/all.T1
-rw-r--r--testsuite/tests/codeGen/should_run/cgrun074.hs24
-rw-r--r--testsuite/tests/codeGen/should_run/cgrun074.stdout1
-rw-r--r--utils/deriveConstants/DeriveConstants.hs1
11 files changed, 144 insertions, 52 deletions
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h
index c171b67252..db71e951ea 100644
--- a/includes/rts/storage/GC.h
+++ b/includes/rts/storage/GC.h
@@ -66,6 +66,27 @@ typedef struct nursery_ {
memcount n_blocks;
} nursery;
+// Nursery invariants:
+//
+// - cap->r.rNursery points to the nursery for this capability
+//
+// - cap->r.rCurrentNursery points to the block in the nursery that we are
+// currently allocating into. While in Haskell the current heap pointer is
+// in Hp, outside Haskell it is stored in cap->r.rCurrentNursery->free.
+//
+// - the blocks *after* cap->rCurrentNursery in the chain are empty
+// (although their bd->free pointers have not been updated to
+// reflect that)
+//
+// - the blocks *before* cap->rCurrentNursery have been used. Except
+// for rCurrentAlloc.
+//
+// - cap->r.rCurrentAlloc is either NULL, or it points to a block in
+// the nursery *before* cap->r.rCurrentNursery.
+//
+// See also Note [allocation accounting] to understand how total
+// memory allocation is tracked.
+
typedef struct generation_ {
nat no; // generation number
diff --git a/rts/Capability.h b/rts/Capability.h
index 910c92c117..420bfd5c80 100644
--- a/rts/Capability.h
+++ b/rts/Capability.h
@@ -98,6 +98,10 @@ struct Capability_ {
// reset after we have executed the context switch.
int interrupt;
+ // Total words allocated by this cap since rts start
+ // See [Note allocation accounting] in Storage.c
+ W_ total_allocated;
+
#if defined(THREADED_RTS)
// Worker Tasks waiting in the wings. Singly-linked.
Task *spare_workers;
@@ -131,8 +135,6 @@ struct Capability_ {
int io_manager_control_wr_fd;
#endif
#endif
- // Total words allocated by this cap since rts start
- W_ total_allocated;
// Per-capability STM-related data
StgTVarWatchQueue *free_tvar_watch_queues;
diff --git a/rts/HeapStackCheck.cmm b/rts/HeapStackCheck.cmm
index a1fb5d446d..a1e18ca0f0 100644
--- a/rts/HeapStackCheck.cmm
+++ b/rts/HeapStackCheck.cmm
@@ -97,7 +97,12 @@ stg_gc_noregs
&& bdescr_link(CurrentNursery) != NULL) {
HpAlloc = 0;
CLOSE_NURSERY();
+ Capability_total_allocated(MyCapability()) =
+ Capability_total_allocated(MyCapability()) +
+ BYTES_TO_WDS(bdescr_free(CurrentNursery) -
+ bdescr_start(CurrentNursery));
CurrentNursery = bdescr_link(CurrentNursery);
+ bdescr_free(CurrentNursery) = bdescr_start(CurrentNursery);
OPEN_NURSERY();
if (Capability_context_switch(MyCapability()) != 0 :: CInt ||
Capability_interrupt(MyCapability()) != 0 :: CInt ||
diff --git a/rts/Schedule.c b/rts/Schedule.c
index 6a06f792ea..447b70ef52 100644
--- a/rts/Schedule.c
+++ b/rts/Schedule.c
@@ -1125,21 +1125,16 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
// don't do this if the nursery is (nearly) full, we'll GC first.
if (cap->r.rCurrentNursery->link != NULL ||
- cap->r.rNursery->n_blocks == 1) { // paranoia to prevent infinite loop
- // if the nursery has only one block.
+ cap->r.rNursery->n_blocks == 1) { // paranoia to prevent
+ // infinite loop if the
+ // nursery has only one
+ // block.
bd = allocGroup_lock(blocks);
cap->r.rNursery->n_blocks += blocks;
- // link the new group into the list
- bd->link = cap->r.rCurrentNursery;
- bd->u.back = cap->r.rCurrentNursery->u.back;
- if (cap->r.rCurrentNursery->u.back != NULL) {
- cap->r.rCurrentNursery->u.back->link = bd;
- } else {
- cap->r.rNursery->blocks = bd;
- }
- cap->r.rCurrentNursery->u.back = bd;
+ // link the new group after CurrentNursery
+ dbl_link_insert_after(bd, cap->r.rCurrentNursery);
// initialise it as a nursery block. We initialise the
// step, gen_no, and flags field of *every* sub-block in
@@ -1162,6 +1157,7 @@ scheduleHandleHeapOverflow( Capability *cap, StgTSO *t )
IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
// now update the nursery to point to the new block
+ finishedNurseryBlock(cap, cap->r.rCurrentNursery);
cap->r.rCurrentNursery = bd;
// we might be unlucky and have another thread get on the
diff --git a/rts/Stats.c b/rts/Stats.c
index ed345c2894..d5efaa2330 100644
--- a/rts/Stats.c
+++ b/rts/Stats.c
@@ -291,27 +291,8 @@ stat_startGC (Capability *cap, gc_thread *gct)
{
gct->gc_start_faults = getPageFaults();
}
-}
-
-/* -----------------------------------------------------------------------------
- * Calculate the total allocated memory since the start of the
- * program. Also emits events reporting the per-cap allocation
- * totals.
- * -------------------------------------------------------------------------- */
-
-static StgWord
-calcTotalAllocated(void)
-{
- W_ tot_alloc = 0;
- W_ n;
- for (n = 0; n < n_capabilities; n++) {
- tot_alloc += capabilities[n]->total_allocated;
- traceEventHeapAllocated(capabilities[n],
- CAPSET_HEAP_DEFAULT,
- capabilities[n]->total_allocated * sizeof(W_));
- }
- return tot_alloc;
+ updateNurseriesStats();
}
/* -----------------------------------------------------------------------------
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 7174425e04..e4a6984c40 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -532,6 +532,7 @@ assignNurseriesToCapabilities (nat from, nat to)
for (i = from; i < to; i++) {
capabilities[i]->r.rCurrentNursery = nurseries[i].blocks;
+ newNurseryBlock(nurseries[i].blocks);
capabilities[i]->r.rCurrentAlloc = NULL;
}
}
@@ -551,17 +552,16 @@ allocNurseries (nat from, nat to)
}
void
-clearNursery (Capability *cap)
+clearNursery (Capability *cap USED_IF_DEBUG)
{
+#ifdef DEBUG
bdescr *bd;
-
for (bd = nurseries[cap->no].blocks; bd; bd = bd->link) {
- cap->total_allocated += (W_)(bd->free - bd->start);
- bd->free = bd->start;
ASSERT(bd->gen_no == 0);
ASSERT(bd->gen == g0);
- IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
+ IF_DEBUG(sanity, memset(bd->start, 0xaa, BLOCK_SIZE));
}
+#endif
}
void
@@ -734,14 +734,16 @@ StgPtr allocate (Capability *cap, W_ n)
bd = cap->r.rCurrentAlloc;
if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
+ if (bd) finishedNurseryBlock(cap,bd);
+
// The CurrentAlloc block is full, we need to find another
// one. First, we try taking the next block from the
// nursery:
bd = cap->r.rCurrentNursery->link;
- if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
- // The nursery is empty, or the next block is already
- // full: allocate a fresh block (we can't fail here).
+ if (bd == NULL) {
+ // The nursery is empty: allocate a fresh block (we can't
+ // fail here).
ACQUIRE_SM_LOCK;
bd = allocBlock();
cap->r.rNursery->n_blocks++;
@@ -752,6 +754,7 @@ StgPtr allocate (Capability *cap, W_ n)
// pretty quickly now, because MAYBE_GC() will
// notice that CurrentNursery->link is NULL.
} else {
+ newNurseryBlock(bd);
// we have a block in the nursery: take it and put
// it at the *front* of the nursery list, and use it
// to allocate() from.
@@ -846,9 +849,9 @@ allocatePinned (Capability *cap, W_ n)
// next GC cycle these objects will be moved to
// g0->large_objects.
if (bd != NULL) {
- dbl_link_onto(bd, &cap->pinned_object_blocks);
// add it to the allocation stats when the block is full
- cap->total_allocated += bd->free - bd->start;
+ finishedNurseryBlock(cap, bd);
+ dbl_link_onto(bd, &cap->pinned_object_blocks);
}
// We need to find another block. We could just allocate one,
@@ -861,7 +864,7 @@ allocatePinned (Capability *cap, W_ n)
// an *empty* block, because we're about to mark it as
// BF_PINNED | BF_LARGE.
bd = cap->r.rCurrentNursery->link;
- if (bd == NULL || bd->free != bd->start) { // must be empty!
+ if (bd == NULL) { // must be empty!
// The nursery is empty, or the next block is non-empty:
// allocate a fresh block (we can't fail here).
@@ -878,6 +881,7 @@ allocatePinned (Capability *cap, W_ n)
RELEASE_SM_LOCK;
initBdescr(bd, g0, g0);
} else {
+ newNurseryBlock(bd);
// we have a block in the nursery: steal it
cap->r.rCurrentNursery->link = bd->link;
if (bd->link != NULL) {
@@ -1001,21 +1005,57 @@ dirty_MVAR(StgRegTable *reg, StgClosure *p)
* -------------------------------------------------------------------------- */
/* -----------------------------------------------------------------------------
- * updateNurseriesStats()
+ * [Note allocation accounting]
*
- * Update the per-cap total_allocated numbers with an approximation of
- * the amount of memory used in each cap's nursery.
+ * - When cap->r.rCurrentNusery moves to a new block in the nursery,
+ * we add the size of the used portion of the previous block to
+ * cap->total_allocated. (see finishedNurseryBlock())
+ *
+ * - When we start a GC, the allocated portion of CurrentNursery and
+ * CurrentAlloc are added to cap->total_allocated. (see
+ * updateNurseriesStats())
*
- * Since this update is also performed by clearNurseries() then we only
- * need this function for the final stats when the RTS is shutting down.
* -------------------------------------------------------------------------- */
-void updateNurseriesStats (void)
+//
+// Calculate the total allocated memory since the start of the
+// program. Also emits events reporting the per-cap allocation
+// totals.
+//
+StgWord
+calcTotalAllocated (void)
+{
+ W_ tot_alloc = 0;
+ W_ n;
+
+ for (n = 0; n < n_capabilities; n++) {
+ tot_alloc += capabilities[n]->total_allocated;
+
+ traceEventHeapAllocated(capabilities[n],
+ CAPSET_HEAP_DEFAULT,
+ capabilities[n]->total_allocated * sizeof(W_));
+ }
+
+ return tot_alloc;
+}
+
+//
+// Update the per-cap total_allocated numbers with an approximation of
+// the amount of memory used in each cap's nursery.
+//
+void
+updateNurseriesStats (void)
{
nat i;
+ bdescr *bd;
for (i = 0; i < n_capabilities; i++) {
- capabilities[i]->total_allocated += countOccupied(nurseries[i].blocks);
+ // The current nursery block and the current allocate block have not
+ // yet been accounted for in cap->total_allocated, so we add them here.
+ bd = capabilities[i]->r.rCurrentNursery;
+ if (bd) finishedNurseryBlock(capabilities[i], bd);
+ bd = capabilities[i]->r.rCurrentAlloc;
+ if (bd) finishedNurseryBlock(capabilities[i], bd);
}
}
diff --git a/rts/sm/Storage.h b/rts/sm/Storage.h
index 0016876066..943c3e39b7 100644
--- a/rts/sm/Storage.h
+++ b/rts/sm/Storage.h
@@ -88,10 +88,30 @@ void resizeNurseriesFixed ( W_ blocks );
W_ countNurseryBlocks ( void );
/* -----------------------------------------------------------------------------
+ Allocation accounting
+
+ See [Note allocation accounting] in Storage.c
+ -------------------------------------------------------------------------- */
+
+//
+// Called when we are finished allocating into a block; account for the amount
+// allocated in cap->total_allocated.
+//
+INLINE_HEADER void finishedNurseryBlock (Capability *cap, bdescr *bd) {
+ cap->total_allocated += bd->free - bd->start;
+}
+
+INLINE_HEADER void newNurseryBlock (bdescr *bd) {
+ bd->free = bd->start;
+}
+
+void updateNurseriesStats (void);
+StgWord calcTotalAllocated (void);
+
+/* -----------------------------------------------------------------------------
Stats 'n' DEBUG stuff
-------------------------------------------------------------------------- */
-void updateNurseriesStats (void);
W_ countLargeAllocated (void);
W_ countOccupied (bdescr *bd);
W_ calcNeeded (rtsBool force_major, W_ *blocks_needed);
diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T
index 03106d4791..f157287c79 100644
--- a/testsuite/tests/codeGen/should_run/all.T
+++ b/testsuite/tests/codeGen/should_run/all.T
@@ -123,3 +123,4 @@ test('T9001', normal, compile_and_run, [''])
test('T9013', omit_ways(['ghci']), # ghci doesn't support unboxed tuples
compile_and_run, [''])
test('T9340', normal, compile_and_run, [''])
+test('cgrun074', normal, compile_and_run, [''])
diff --git a/testsuite/tests/codeGen/should_run/cgrun074.hs b/testsuite/tests/codeGen/should_run/cgrun074.hs
new file mode 100644
index 0000000000..31328a0e88
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/cgrun074.hs
@@ -0,0 +1,24 @@
+-- This test exercises the "large block" allocation code in the
+-- scheduler, where the mutator requests more than a block's worth of
+-- memory.
+
+longlistof x = [
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,
+ x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x
+ ]
+
+main = print (sum (concat (map longlistof [1..100])))
diff --git a/testsuite/tests/codeGen/should_run/cgrun074.stdout b/testsuite/tests/codeGen/should_run/cgrun074.stdout
new file mode 100644
index 0000000000..72e9f31b0b
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/cgrun074.stdout
@@ -0,0 +1 @@
+2828000
diff --git a/utils/deriveConstants/DeriveConstants.hs b/utils/deriveConstants/DeriveConstants.hs
index 486f497572..c793e84e85 100644
--- a/utils/deriveConstants/DeriveConstants.hs
+++ b/utils/deriveConstants/DeriveConstants.hs
@@ -349,6 +349,7 @@ wanteds = concat
,structField C "Capability" "context_switch"
,structField C "Capability" "interrupt"
,structField C "Capability" "sparks"
+ ,structField C "Capability" "total_allocated"
,structField C "Capability" "weak_ptr_list_hd"
,structField C "Capability" "weak_ptr_list_tl"