summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Gröber <dxld@darkboxed.org>2019-07-08 16:29:01 +0200
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-02-17 11:21:10 -0500
commitc4ad915089c440ea35e8727dec9133f4aefcb8c9 (patch)
treeb8d9d52132f4eb713ab28734f2960a4deaf38105
parentfd48d8b0aaf65c1c2f12d9e7433091f6984ab182 (diff)
downloadhaskell-c4ad915089c440ea35e8727dec9133f4aefcb8c9.tar.gz
rts: TraverseHeap: Introduce callback for subtree completion
The callback 'return_cb' allows users to be perform additional accounting when the traversal of a subtree is completed. This is needed for example to determine the number or total size of closures reachable from a given closure. This commit also makes the lifetime increase of stackElements from commit "rts: TraverseHeap: Increase lifetime of stackElements" optional based on 'return_cb' being set enabled or not. Note that our definition of "subtree" here includes leaf nodes. So the invariant is that return_cb is called for all nodes in the traversal exactly once.
-rw-r--r--rts/RetainerProfile.c3
-rw-r--r--rts/TraverseHeap.c226
-rw-r--r--rts/TraverseHeap.h33
3 files changed, 185 insertions, 77 deletions
diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c
index 1e23d8d856..b89519b05d 100644
--- a/rts/RetainerProfile.c
+++ b/rts/RetainerProfile.c
@@ -255,9 +255,10 @@ associate( StgClosure *c, RetainerSet *s )
}
static bool
-retainVisitClosure( StgClosure *c, const StgClosure *cp, const stackData data, const bool first_visit, stackData *out_data )
+retainVisitClosure( StgClosure *c, const StgClosure *cp, const stackData data, const bool first_visit, stackAccum *acc, stackData *out_data )
{
(void) first_visit;
+ (void) acc;
retainer r = data.c_child_r;
RetainerSet *s, *retainerSetOfc;
diff --git a/rts/TraverseHeap.c b/rts/TraverseHeap.c
index adba9a3c96..20fecfb816 100644
--- a/rts/TraverseHeap.c
+++ b/rts/TraverseHeap.c
@@ -9,6 +9,7 @@
#if defined(PROFILING)
+#include <string.h>
#include "PosixSource.h"
#include "Rts.h"
#include "sm/Storage.h"
@@ -98,13 +99,13 @@ typedef struct {
/**
* An element of the traversal work-stack. Besides the closure itself this also
- * stores it's parent and associated data.
+ * stores it's parent, associated data and an accumulator.
*
* When 'info.type == posTypeFresh' a 'stackElement' represents just one
* closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an
* offset into the children of 'c'. This is to support returning a closure's
* children one-by-one without pushing one element per child onto the stack. See
- * traversePushChildren() and traversePop().
+ * traverseGetChildren() and traversePop().
*
*/
typedef struct stackElement_ {
@@ -112,6 +113,7 @@ typedef struct stackElement_ {
StgClosure *c;
stackElement *sep; // stackElement of parent closure
stackData data;
+ stackAccum accum;
} stackElement;
@@ -358,6 +360,7 @@ traversePushClosure(traverseState *ts, StgClosure *c, StgClosure *cp, stackEleme
se.info.next.cp = cp;
se.sep = sep;
se.data = data;
+ se.accum = (stackAccum)(StgWord)0;
se.info.type = posTypeFresh;
pushStackElement(ts, se);
@@ -370,15 +373,49 @@ traversePushRoot(traverseState *ts, StgClosure *c, StgClosure *cp, stackData dat
};
/**
- * traversePushChildren() extracts the first child of 'c' in 'first_child' and
- * conceptually pushes all remaining children of 'c' onto the traversal stack
- * while associating 'data' with the pushed elements to be returned upon poping.
+ * Push an empty stackElement onto the traversal work-stack for the sole purpose
+ * of triggering the return callback 'traversalState.return_cb' for the closure
+ * '*c' when traversing of it's children is complete.
*
- * If 'c' has no children, 'first_child' is set to NULL and nothing is pushed
- * onto the stack.
+ * This is needed for code-paths which don't inherently have to push a
+ * stackElement. c.f. traverseWorkStack.
*
- * If 'c' has only one child, 'first_child' is set to that child and nothing is
- * pushed onto the stack.
+ * When return_cb is NULL this function does nothing.
+ */
+STATIC_INLINE stackElement *
+traversePushReturn(traverseState *ts, StgClosure *c, stackAccum acc, stackElement *sep)
+{
+ if(!ts->return_cb)
+ return sep;
+
+ stackElement se;
+ se.c = c;
+ se.info.next.cp = NULL;
+ se.accum = acc;
+ se.sep = sep;
+ memset(&se.data, 0, sizeof(se.data));
+ // return frames never emit closures, traversePop just skips over them. So
+ // the data field is simply never used.
+ se.info.type = posTypeEmpty;
+ return pushStackElement(ts, se);
+};
+
+/**
+ * traverseGetChildren() extracts the first child of 'c' in 'first_child' and if
+ * 'other_children' is true returns a stackElement in 'se' which
+ * conceptually contains all remaining children of 'c'.
+ *
+ * If 'c' has no children, 'first_child' is set to NULL, other_children is set
+ * to false and nothing is returned in 'se'.
+ *
+ * If 'c' has only one child, 'first_child' is set to that child, other_children
+ * is set to false and nothing is returned in 'se'.
+ *
+ * Otherwise 'other_children' is set to true and a stackElement representing the
+ * other children is returned in 'se'.
+ *
+ * Note that when 'se' is set only the fields fields 'se.c' and 'se.info'
+ * are initialized. It is the caller's responsibility to initialize the rest.
*
* Invariants:
*
@@ -386,17 +423,10 @@ traversePushRoot(traverseState *ts, StgClosure *c, StgClosure *cp, stackData dat
* be any stack objects.
*
* Note: SRTs are considered to be children as well.
- *
- * Note: When pushing onto the stack we only really push one 'stackElement'
- * representing all children onto the stack. See traversePop()
*/
STATIC_INLINE void
-traversePushChildren(traverseState *ts, StgClosure *c, stackElement **sep, stackData data, StgClosure **first_child)
+traverseGetChildren(StgClosure *c, StgClosure **first_child, bool *other_children, stackElement *se)
{
- stackElement se;
-
- debug("traversePushChildren(): stackTop = 0x%x\n", ts->stackTop);
-
ASSERT(get_itbl(c)->type != TSO);
ASSERT(get_itbl(c)->type != AP_STACK);
@@ -404,11 +434,11 @@ traversePushChildren(traverseState *ts, StgClosure *c, stackElement **sep, stack
// fill in se
//
- se.c = c;
- se.data = data;
- se.sep = *sep;
+ se->c = c;
+
+ *other_children = false;
- // fill in se.info
+ // fill in se->info
switch (get_itbl(c)->type) {
// no child, no SRT
case CONSTR_0_1:
@@ -434,16 +464,16 @@ traversePushChildren(traverseState *ts, StgClosure *c, stackElement **sep, stack
*first_child = c->payload[0];
return;
- // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
+ // For CONSTR_2_0 and MVAR, we use se->info.step to record the position
// of the next child. We do not write a separate initialization code.
// Also we do not have to initialize info.type;
// two children (fixed), no SRT
- // need to push a stackElement, but nothing to store in se.info
+ // need to push a stackElement, but nothing to store in se->info
case CONSTR_2_0:
*first_child = c->payload[0]; // return the first pointer
- se.info.type = posTypeStep;
- se.info.next.step = 2; // 2 = second
+ se->info.type = posTypeStep;
+ se->info.next.step = 2; // 2 = second
break;
// three children (fixed), no SRT
@@ -453,15 +483,15 @@ traversePushChildren(traverseState *ts, StgClosure *c, stackElement **sep, stack
// head must be TSO and the head of a linked list of TSOs.
// Shoule it be a child? Seems to be yes.
*first_child = (StgClosure *)((StgMVar *)c)->head;
- se.info.type = posTypeStep;
- se.info.next.step = 2; // 2 = second
+ se->info.type = posTypeStep;
+ se->info.next.step = 2; // 2 = second
break;
// three children (fixed), no SRT
case WEAK:
*first_child = ((StgWeak *)c)->key;
- se.info.type = posTypeStep;
- se.info.next.step = 2;
+ se->info.type = posTypeStep;
+ se->info.next.step = 2;
break;
// layout.payload.ptrs, no SRT
@@ -471,9 +501,9 @@ traversePushChildren(traverseState *ts, StgClosure *c, stackElement **sep, stack
case PRIM:
case MUT_PRIM:
case BCO:
- init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
+ init_ptrs(&se->info, get_itbl(c)->layout.payload.ptrs,
(StgPtr)c->payload);
- *first_child = find_ptrs(&se.info);
+ *first_child = find_ptrs(&se->info);
if (*first_child == NULL)
return; // no child
break;
@@ -483,9 +513,9 @@ traversePushChildren(traverseState *ts, StgClosure *c, stackElement **sep, stack
case MUT_ARR_PTRS_DIRTY:
case MUT_ARR_PTRS_FROZEN_CLEAN:
case MUT_ARR_PTRS_FROZEN_DIRTY:
- init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
+ init_ptrs(&se->info, ((StgMutArrPtrs *)c)->ptrs,
(StgPtr)(((StgMutArrPtrs *)c)->payload));
- *first_child = find_ptrs(&se.info);
+ *first_child = find_ptrs(&se->info);
if (*first_child == NULL)
return;
break;
@@ -495,9 +525,9 @@ traversePushChildren(traverseState *ts, StgClosure *c, stackElement **sep, stack
case SMALL_MUT_ARR_PTRS_DIRTY:
case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
- init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
+ init_ptrs(&se->info, ((StgSmallMutArrPtrs *)c)->ptrs,
(StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
- *first_child = find_ptrs(&se.info);
+ *first_child = find_ptrs(&se->info);
if (*first_child == NULL)
return;
break;
@@ -506,8 +536,8 @@ traversePushChildren(traverseState *ts, StgClosure *c, stackElement **sep, stack
case FUN_STATIC:
case FUN: // *c is a heap object.
case FUN_2_0:
- init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
- *first_child = find_ptrs(&se.info);
+ init_ptrs(&se->info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
+ *first_child = find_ptrs(&se->info);
if (*first_child == NULL)
// no child from ptrs, so check SRT
goto fun_srt_only;
@@ -515,9 +545,9 @@ traversePushChildren(traverseState *ts, StgClosure *c, stackElement **sep, stack
case THUNK:
case THUNK_2_0:
- init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
+ init_ptrs(&se->info, get_itbl(c)->layout.payload.ptrs,
(StgPtr)((StgThunk *)c)->payload);
- *first_child = find_ptrs(&se.info);
+ *first_child = find_ptrs(&se->info);
if (*first_child == NULL)
// no child from ptrs, so check SRT
goto thunk_srt_only;
@@ -528,21 +558,21 @@ traversePushChildren(traverseState *ts, StgClosure *c, stackElement **sep, stack
case FUN_1_1:
*first_child = c->payload[0];
ASSERT(*first_child != NULL);
- init_srt_fun(&se.info, get_fun_itbl(c));
+ init_srt_fun(&se->info, get_fun_itbl(c));
break;
case THUNK_1_0:
case THUNK_1_1:
*first_child = ((StgThunk *)c)->payload[0];
ASSERT(*first_child != NULL);
- init_srt_thunk(&se.info, get_thunk_itbl(c));
+ init_srt_thunk(&se->info, get_thunk_itbl(c));
break;
case FUN_0_1: // *c is a heap object.
case FUN_0_2:
fun_srt_only:
- init_srt_fun(&se.info, get_fun_itbl(c));
- *first_child = find_srt(&se.info);
+ init_srt_fun(&se->info, get_fun_itbl(c));
+ *first_child = find_srt(&se->info);
if (*first_child == NULL)
return; // no child
break;
@@ -554,16 +584,16 @@ traversePushChildren(traverseState *ts, StgClosure *c, stackElement **sep, stack
case THUNK_0_1:
case THUNK_0_2:
thunk_srt_only:
- init_srt_thunk(&se.info, get_thunk_itbl(c));
- *first_child = find_srt(&se.info);
+ init_srt_thunk(&se->info, get_thunk_itbl(c));
+ *first_child = find_srt(&se->info);
if (*first_child == NULL)
return; // no child
break;
case TREC_CHUNK:
*first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
- se.info.type = posTypeStep;
- se.info.next.step = 0; // entry no.
+ se->info.type = posTypeStep;
+ se->info.next.step = 0; // entry no.
break;
// cannot appear
@@ -589,22 +619,14 @@ traversePushChildren(traverseState *ts, StgClosure *c, stackElement **sep, stack
return;
}
- // se.info.next.cp has to be initialized when type==posTypeFresh. We don't
+ // se->info.next.cp has to be initialized when type==posTypeFresh. We don't
// do that here though. So type must be !=posTypeFresh.
- ASSERT(se.info.type != posTypeFresh);
+ ASSERT(se->info.type != posTypeFresh);
- *sep = pushStackElement(ts, se);
+ *other_children = true;
}
-/**
- * popStackElement(): Remove a depleted stackElement from the top of the
- * traversal work-stack.
- *
- * Invariants:
- * stackTop cannot be equal to stackLimit unless the whole stack is
- * empty, in which case popStackElement() is not allowed.
- */
-static void
+STATIC_INLINE void
popStackElement(traverseState *ts) {
debug("popStackElement(): stackTop = 0x%x\n", ts->stackTop);
@@ -660,6 +682,26 @@ popStackElement(traverseState *ts) {
}
/**
+ * callReturnAndPopStackElement(): Call 'traversalState.return_cb' and remove a
+ * depleted stackElement from the top of the traversal work-stack.
+ *
+ * Invariants:
+ * stackTop cannot be equal to stackLimit unless the whole stack is
+ * empty, in which case popStackElement() is not allowed.
+ */
+static void
+callReturnAndPopStackElement(traverseState *ts)
+{
+ stackElement *se = ts->stackTop;
+
+ if(ts->return_cb)
+ ts->return_cb(se->c, se->accum,
+ se->sep->c, &se->sep->accum);
+
+ popStackElement(ts);
+}
+
+/**
* Finds the next object to be considered for retainer profiling and store
* its pointer to *c.
*
@@ -699,8 +741,8 @@ traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data,
}
// Note: Below every `break`, where the loop condition is true, must be
- // accompanied by a popStackElement() otherwise this is an infinite
- // loop.
+ // accompanied by a popStackElement()/callReturnAndPopStackElement()
+ // call otherwise this is an infinite loop.
se = ts->stackTop;
*sep = se->sep;
@@ -713,7 +755,7 @@ traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data,
popStackElement(ts);
return;
} else if (se->info.type == posTypeEmpty) {
- popStackElement(ts);
+ callReturnAndPopStackElement(ts);
continue;
}
@@ -886,9 +928,12 @@ out:
*cp = se->c;
*data = se->data;
+ *sep = se;
- if(last)
+ if(last && ts->return_cb)
se->info.type = posTypeEmpty;
+ else if(last)
+ popStackElement(ts);
return;
@@ -1158,6 +1203,7 @@ traverseWorkStack(traverseState *ts, visitClosure_cb visit_cb)
stackData data, child_data;
StgWord typeOfc;
stackElement *sep;
+ bool other_children;
// Now we flip the flip bit.
flip = flip ^ 1;
@@ -1247,10 +1293,12 @@ inner_loop:
break;
}
+ stackAccum accum = {};
+
// If this is the first visit to c, initialize its data.
bool first_visit = traverseMaybeInitClosureData(c);
bool traverse_children
- = visit_cb(c, cp, data, first_visit, (stackData*)&child_data);
+ = visit_cb(c, cp, data, first_visit, &accum, &child_data);
if(!traverse_children)
goto loop;
@@ -1261,6 +1309,7 @@ inner_loop:
// would be hard.
switch (typeOfc) {
case STACK:
+ sep = traversePushReturn(ts, c, accum, sep);
traversePushStack(ts, c, sep, child_data,
((StgStack *)c)->sp,
((StgStack *)c)->stack + ((StgStack *)c)->stack_size);
@@ -1270,6 +1319,8 @@ inner_loop:
{
StgTSO *tso = (StgTSO *)c;
+ sep = traversePushReturn(ts, c, accum, sep);
+
traversePushClosure(ts, (StgClosure *) tso->stackobj, c, sep, child_data);
traversePushClosure(ts, (StgClosure *) tso->blocked_exceptions, c, sep, child_data);
traversePushClosure(ts, (StgClosure *) tso->bq, c, sep, child_data);
@@ -1288,6 +1339,9 @@ inner_loop:
case BLOCKING_QUEUE:
{
StgBlockingQueue *bq = (StgBlockingQueue *)c;
+
+ sep = traversePushReturn(ts, c, accum, sep);
+
traversePushClosure(ts, (StgClosure *) bq->link, c, sep, child_data);
traversePushClosure(ts, (StgClosure *) bq->bh, c, sep, child_data);
traversePushClosure(ts, (StgClosure *) bq->owner, c, sep, child_data);
@@ -1297,6 +1351,9 @@ inner_loop:
case PAP:
{
StgPAP *pap = (StgPAP *)c;
+
+ sep = traversePushReturn(ts, c, accum, sep);
+
traversePAP(ts, c, sep, child_data, pap->fun, pap->payload, pap->n_args);
goto loop;
}
@@ -1304,11 +1361,16 @@ inner_loop:
case AP:
{
StgAP *ap = (StgAP *)c;
+
+ sep = traversePushReturn(ts, c, accum, sep);
+
traversePAP(ts, c, sep, child_data, ap->fun, ap->payload, ap->n_args);
goto loop;
}
case AP_STACK:
+ sep = traversePushReturn(ts, c, accum, sep);
+
traversePushClosure(ts, ((StgAP_STACK *)c)->fun, c, sep, child_data);
traversePushStack(ts, c, sep, child_data,
(StgPtr)((StgAP_STACK *)c)->payload,
@@ -1317,16 +1379,38 @@ inner_loop:
goto loop;
}
- traversePushChildren(ts, c, &sep, child_data, &first_child);
+ stackElement se;
+ traverseGetChildren(c, &first_child, &other_children, &se);
+
// If first_child is null, c has no child.
// If first_child is not null, the top stack element points to the next
- // object. traversePushChildren() may or may not push a stackElement on the
- // stack.
- if (first_child == NULL)
+ // object.
+ if(first_child == NULL && ts->return_cb) { // no children
+ // This is only true when we're pushing additional return frames onto
+ // the stack due to return_cb, so don't get any funny ideas about
+ // replacing 'cp' by sep.
+ ASSERT(sep->c == cp);
+ ts->return_cb(c, accum, cp, &sep->accum);
goto loop;
-
- // sep is now the stack element traversePushChildren just pushed onto the
- // stack.
+ } else if (first_child == NULL) { // no children
+ goto loop;
+ } else if(!other_children) { // one child
+ // Pushing a return frame for one child is pretty inefficent. We could
+ // optimize this by storing a pointer to cp in c's profiling header
+ // instead. I tested this out in a Haskell prototype of this code and it
+ // works out but is rather fiddly.
+ //
+ // See Haskell model code here:
+ //
+ // https://gitlab.haskell.org/ghc/ghc/snippets/1461
+ sep = traversePushReturn(ts, c, accum, sep);
+ } else { // many children
+ se.sep = sep;
+ se.data = child_data;
+ se.accum = accum;
+
+ sep = pushStackElement(ts, se);
+ }
// (c, cp, data) = (first_child, c, child_data)
data = child_data;
diff --git a/rts/TraverseHeap.h b/rts/TraverseHeap.h
index 5e76872fb9..a06a7d2f18 100644
--- a/rts/TraverseHeap.h
+++ b/rts/TraverseHeap.h
@@ -33,6 +33,10 @@ typedef union stackData_ {
retainer c_child_r;
} stackData;
+typedef union stackAccum_ {
+ StgWord subtree_sizeW;
+} stackAccum;
+
typedef struct stackElement_ stackElement;
typedef struct traverseState_ {
@@ -77,13 +81,31 @@ typedef struct traverseState_ {
*
* Note:
*
- * stackSize is just an estimate measure of the depth of the graph. The
- * reason is that some heap objects have only a single child and may not
- * result in a new element being pushed onto the stack. Therefore, at the
- * end of retainer profiling, maxStackSize is some value no greater than
- * the actual depth of the graph.
+ * When return_cb == NULL stackSize is just an estimate measure of the
+ * depth of the graph. The reason is that some heap objects have only a
+ * single child and may not result in a new element being pushed onto the
+ * stack. Therefore, at the end of retainer profiling, maxStackSize is
+ * some value no greater than the actual depth of the graph.
*/
int stackSize, maxStackSize;
+
+ /**
+ * Callback called when processing of a closure 'c' is complete, i.e. when
+ * all it's children have been processed. Note: This includes leaf nodes
+ * without children.
+ *
+ * @param c The closure who's processing just completed.
+ * @param acc The current value of the accumulator for 'c' on the
+ * stack. It's about to be removed, hence the 'const'
+ * qualifier. This is the same accumulator 'visit_cb' got
+ * passed when 'c' was visited.
+ *
+ * @param c_parent The parent closure of 'c'
+ * @param acc_parent The accumulator associated with 'c_parent', currently
+ * on the stack.
+ */
+ void (*return_cb)(StgClosure *c, const stackAccum acc,
+ StgClosure *c_parent, stackAccum *acc_parent);
} traverseState;
/**
@@ -103,6 +125,7 @@ typedef bool (*visitClosure_cb) (
const StgClosure *cp,
const stackData data,
const bool first_visit,
+ stackAccum *accum,
stackData *child_data);
void traverseWorkStack(traverseState *ts, visitClosure_cb visit_cb);