summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--rts/RetainerProfile.c1353
-rw-r--r--rts/TraverseHeap.c1371
-rw-r--r--rts/rts.cabal.in1
3 files changed, 1372 insertions, 1353 deletions
diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c
index 301f712e59..6f053c09c4 100644
--- a/rts/RetainerProfile.c
+++ b/rts/RetainerProfile.c
@@ -66,41 +66,6 @@ static uint32_t numObjectVisited; // total number of objects visited
static uint32_t timesAnyObjectVisited; // number of times any objects are
// visited
-/** Note [Profiling heap traversal visited bit]
- *
- * If the RTS is compiled with profiling enabled StgProfHeader can be used by
- * profiling code to store per-heap object information.
- *
- * The generic heap traversal code reserves the least significant bit of the
- * largest members of the 'trav' union to decide whether we've already visited a
- * given closure in the current pass or not. The rest of the field is free to be
- * used by the calling profiler.
- *
- * By doing things this way we implicitly assume that the LSB of the largest
- * field in the 'trav' union is insignificant. This is true at least for the
- * word aligned pointers which the retainer profiler currently stores there and
- * should be maintained by new users of the 'trav' union for example by shifting
- * the real data up by one bit.
- *
- * Since we don't want to have to scan the entire heap a second time just to
- * reset the per-object visitied bit before/after the real traversal we make the
- * interpretation of this bit dependent on the value of a global variable,
- * 'flip'.
- *
- * When the 'trav' bit is equal to the value of 'flip' the closure data is
- * valid otherwise not (see isTravDataValid). We then invert the value of 'flip'
- * on each heap traversal (see traverseWorkStack), in effect marking all
- * closure's data as invalid at once.
- *
- * There are some complications with this approach, namely: static objects and
- * mutable data. There we do just go over all existing objects to reset the bit
- * manually. See 'resetStaticObjectForProfiling' and 'computeRetainerSet'.
- */
-StgWord flip = 0;
-
-#define setTravDataToZero(c) \
- (c)->header.prof.hp.trav.lsb = flip
-
/* -----------------------------------------------------------------------------
* Retainer stack - header
* Note:
@@ -111,181 +76,8 @@ StgWord flip = 0;
* all.
* -------------------------------------------------------------------------- */
-typedef enum {
- // Object with fixed layout. Keeps an information about that
- // element was processed. (stackPos.next.step)
- posTypeStep,
- // Description of the pointers-first heap object. Keeps information
- // about layout. (stackPos.next.ptrs)
- posTypePtrs,
- // Keeps SRT bitmap (stackPos.next.srt)
- posTypeSRT,
- // Keeps a new object that was not inspected yet. Keeps a parent
- // element (stackPos.next.parent)
- posTypeFresh
-} nextPosType;
-
-typedef union {
- // fixed layout or layout specified by a field in the closure
- StgWord step;
-
- // layout.payload
- struct {
- // See StgClosureInfo in InfoTables.h
- StgHalfWord pos;
- StgHalfWord ptrs;
- StgPtr payload;
- } ptrs;
-
- // SRT
- struct {
- StgClosure *srt;
- } srt;
-} nextPos;
-
-/**
- * Position pointer into a closure. Determines what the next element to return
- * for a stackElement is.
- */
-typedef struct {
- nextPosType type;
- nextPos next;
-} stackPos;
-
-/**
- * An element of the traversal work-stack. Besides the closure itself this also
- * stores it's parent and associated data.
- *
- * When 'info.type == posTypeFresh' a 'stackElement' represents just one
- * closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an
- * offset into the children of 'c'. This is to support returning a closure's
- * children one-by-one without pushing one element per child onto the stack. See
- * traversePushChildren() and traversePop().
- *
- */
-typedef struct stackElement_ {
- stackPos info;
- StgClosure *c;
- StgClosure *cp; // parent of 'c'. Only used when info.type == posTypeFresh.
- stackData data;
-} stackElement;
-
traverseState g_retainerTraverseState;
-
-#if defined(DEBUG)
-unsigned int g_traversalDebugLevel = 0;
-static inline void debug(const char *s, ...)
-{
- va_list ap;
-
- if(g_traversalDebugLevel == 0)
- return;
-
- va_start(ap,s);
- vdebugBelch(s, ap);
- va_end(ap);
-}
-#else
-#define debug(...)
-#endif
-
-// number of blocks allocated for one stack
-#define BLOCKS_IN_STACK 1
-
-/* -----------------------------------------------------------------------------
- * Add a new block group to the stack.
- * Invariants:
- * currentStack->link == s.
- * -------------------------------------------------------------------------- */
-STATIC_INLINE void
-newStackBlock( traverseState *ts, bdescr *bd )
-{
- ts->currentStack = bd;
- ts->stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
- ts->stackBottom = (stackElement *)bd->start;
- ts->stackLimit = (stackElement *)ts->stackTop;
- bd->free = (StgPtr)ts->stackLimit;
-}
-
-/* -----------------------------------------------------------------------------
- * Return to the previous block group.
- * Invariants:
- * s->link == currentStack.
- * -------------------------------------------------------------------------- */
-STATIC_INLINE void
-returnToOldStack( traverseState *ts, bdescr *bd )
-{
- ts->currentStack = bd;
- ts->stackTop = (stackElement *)bd->free;
- ts->stackBottom = (stackElement *)bd->start;
- ts->stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
- bd->free = (StgPtr)ts->stackLimit;
-}
-
-/**
- * Initializes the traversal work-stack.
- */
-void
-initializeTraverseStack( traverseState *ts )
-{
- if (ts->firstStack != NULL) {
- freeChain(ts->firstStack);
- }
-
- ts->firstStack = allocGroup(BLOCKS_IN_STACK);
- ts->firstStack->link = NULL;
- ts->firstStack->u.back = NULL;
-
- ts->stackSize = 0;
- ts->maxStackSize = 0;
-
- newStackBlock(ts, ts->firstStack);
-}
-
-/**
- * Frees all the block groups in the traversal works-stack.
- *
- * Invariants:
- * firstStack != NULL
- */
-void
-closeTraverseStack( traverseState *ts )
-{
- freeChain(ts->firstStack);
- ts->firstStack = NULL;
-}
-
-int
-getTraverseStackMaxSize(traverseState *ts)
-{
- return ts->maxStackSize;
-}
-
-/* -----------------------------------------------------------------------------
- * Returns true if the whole stack is empty.
- * -------------------------------------------------------------------------- */
-STATIC_INLINE bool
-isEmptyWorkStack( traverseState *ts )
-{
- return (ts->firstStack == ts->currentStack) && ts->stackTop == ts->stackLimit;
-}
-
-/* -----------------------------------------------------------------------------
- * Returns size of stack
- * -------------------------------------------------------------------------- */
-W_
-traverseWorkStackBlocks(traverseState *ts)
-{
- bdescr* bd;
- W_ res = 0;
-
- for (bd = ts->firstStack; bd != NULL; bd = bd->link)
- res += bd->blocks;
-
- return res;
-}
-
W_
retainerStackBlocks(void)
{
@@ -293,648 +85,6 @@ retainerStackBlocks(void)
}
/* -----------------------------------------------------------------------------
- * Initializes *info from ptrs and payload.
- * Invariants:
- * payload[] begins with ptrs pointers followed by non-pointers.
- * -------------------------------------------------------------------------- */
-STATIC_INLINE void
-init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload )
-{
- info->type = posTypePtrs;
- info->next.ptrs.pos = 0;
- info->next.ptrs.ptrs = ptrs;
- info->next.ptrs.payload = payload;
-}
-
-/* -----------------------------------------------------------------------------
- * Find the next object from *info.
- * -------------------------------------------------------------------------- */
-STATIC_INLINE StgClosure *
-find_ptrs( stackPos *info )
-{
- if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
- return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
- } else {
- return NULL;
- }
-}
-
-/* -----------------------------------------------------------------------------
- * Initializes *info from SRT information stored in *infoTable.
- * -------------------------------------------------------------------------- */
-STATIC_INLINE void
-init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable )
-{
- info->type = posTypeSRT;
- if (infoTable->i.srt) {
- info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable);
- } else {
- info->next.srt.srt = NULL;
- }
-}
-
-STATIC_INLINE void
-init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable )
-{
- info->type = posTypeSRT;
- if (infoTable->i.srt) {
- info->next.srt.srt = (StgClosure*)GET_SRT(infoTable);
- } else {
- info->next.srt.srt = NULL;
- }
-}
-
-/* -----------------------------------------------------------------------------
- * Find the next object from *info.
- * -------------------------------------------------------------------------- */
-STATIC_INLINE StgClosure *
-find_srt( stackPos *info )
-{
- StgClosure *c;
- if (info->type == posTypeSRT) {
- c = info->next.srt.srt;
- info->next.srt.srt = NULL;
- return c;
- }
-
- return NULL;
-}
-
-/**
- * Push a set of closures, represented by a single 'stackElement', onto the
- * traversal work-stack.
- */
-static void
-pushStackElement(traverseState *ts, stackElement *se)
-{
- bdescr *nbd; // Next Block Descriptor
- if (ts->stackTop - 1 < ts->stackBottom) {
- debug("pushStackElement() to the next stack.\n");
-
- // currentStack->free is updated when the active stack is switched
- // to the next stack.
- ts->currentStack->free = (StgPtr)ts->stackTop;
-
- if (ts->currentStack->link == NULL) {
- nbd = allocGroup(BLOCKS_IN_STACK);
- nbd->link = NULL;
- nbd->u.back = ts->currentStack;
- ts->currentStack->link = nbd;
- } else
- nbd = ts->currentStack->link;
-
- newStackBlock(ts, nbd);
- }
-
- // adjust stackTop (acutal push)
- ts->stackTop--;
- // If the size of stackElement was huge, we would better replace the
- // following statement by either a memcpy() call or a switch statement
- // on the type of the element. Currently, the size of stackElement is
- // small enough (5 words) that this direct assignment seems to be enough.
- *ts->stackTop = *se;
-
- ts->stackSize++;
- if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
- ASSERT(ts->stackSize >= 0);
- debug("stackSize = %d\n", ts->stackSize);
-}
-
-/**
- * Push a single closure onto the traversal work-stack.
- *
- * cp - object's parent
- * c - closure
- * data - data associated with closure.
- */
-inline void
-traversePushClosure(traverseState *ts, StgClosure *c, StgClosure *cp, stackData data) {
- stackElement se;
-
- se.c = c;
- se.cp = cp;
- se.data = data;
- se.info.type = posTypeFresh;
-
- pushStackElement(ts, &se);
-};
-
-/**
- * traversePushChildren() extracts the first child of 'c' in 'first_child' and
- * conceptually pushes all remaining children of 'c' onto the traversal stack
- * while associating 'data' with the pushed elements to be returned upon poping.
- *
- * If 'c' has no children, 'first_child' is set to NULL and nothing is pushed
- * onto the stack.
- *
- * If 'c' has only one child, 'first_child' is set to that child and nothing is
- * pushed onto the stack.
- *
- * Invariants:
- *
- * - 'c' is not any of TSO, AP, PAP, AP_STACK, which means that there cannot
- * be any stack objects.
- *
- * Note: SRTs are considered to be children as well.
- *
- * Note: When pushing onto the stack we only really push one 'stackElement'
- * representing all children onto the stack. See traversePop()
- */
-STATIC_INLINE void
-traversePushChildren(traverseState *ts, StgClosure *c, stackData data, StgClosure **first_child)
-{
- stackElement se;
-
- debug("traversePushChildren(): stackTop = 0x%x\n", ts->stackTop);
-
- ASSERT(get_itbl(c)->type != TSO);
- ASSERT(get_itbl(c)->type != AP_STACK);
-
- //
- // fill in se
- //
-
- se.c = c;
- se.data = data;
- // Note: se.cp ommitted on purpose, only traversePushClosure uses that.
-
- // fill in se.info
- switch (get_itbl(c)->type) {
- // no child, no SRT
- case CONSTR_0_1:
- case CONSTR_0_2:
- case ARR_WORDS:
- case COMPACT_NFDATA:
- *first_child = NULL;
- return;
-
- // one child (fixed), no SRT
- case MUT_VAR_CLEAN:
- case MUT_VAR_DIRTY:
- *first_child = ((StgMutVar *)c)->var;
- return;
- case THUNK_SELECTOR:
- *first_child = ((StgSelector *)c)->selectee;
- return;
- case BLACKHOLE:
- *first_child = ((StgInd *)c)->indirectee;
- return;
- case CONSTR_1_0:
- case CONSTR_1_1:
- *first_child = c->payload[0];
- return;
-
- // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
- // of the next child. We do not write a separate initialization code.
- // Also we do not have to initialize info.type;
-
- // two children (fixed), no SRT
- // need to push a stackElement, but nothing to store in se.info
- case CONSTR_2_0:
- *first_child = c->payload[0]; // return the first pointer
- se.info.type = posTypeStep;
- se.info.next.step = 2; // 2 = second
- break;
-
- // three children (fixed), no SRT
- // need to push a stackElement
- case MVAR_CLEAN:
- case MVAR_DIRTY:
- // head must be TSO and the head of a linked list of TSOs.
- // Shoule it be a child? Seems to be yes.
- *first_child = (StgClosure *)((StgMVar *)c)->head;
- se.info.type = posTypeStep;
- se.info.next.step = 2; // 2 = second
- break;
-
- // three children (fixed), no SRT
- case WEAK:
- *first_child = ((StgWeak *)c)->key;
- se.info.type = posTypeStep;
- se.info.next.step = 2;
- break;
-
- // layout.payload.ptrs, no SRT
- case TVAR:
- case CONSTR:
- case CONSTR_NOCAF:
- case PRIM:
- case MUT_PRIM:
- case BCO:
- init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
- (StgPtr)c->payload);
- *first_child = find_ptrs(&se.info);
- if (*first_child == NULL)
- return; // no child
- break;
-
- // StgMutArrPtr.ptrs, no SRT
- case MUT_ARR_PTRS_CLEAN:
- case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN_CLEAN:
- case MUT_ARR_PTRS_FROZEN_DIRTY:
- init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
- (StgPtr)(((StgMutArrPtrs *)c)->payload));
- *first_child = find_ptrs(&se.info);
- if (*first_child == NULL)
- return;
- break;
-
- // StgMutArrPtr.ptrs, no SRT
- case SMALL_MUT_ARR_PTRS_CLEAN:
- case SMALL_MUT_ARR_PTRS_DIRTY:
- case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
- case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
- init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
- (StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
- *first_child = find_ptrs(&se.info);
- if (*first_child == NULL)
- return;
- break;
-
- // layout.payload.ptrs, SRT
- case FUN_STATIC:
- case FUN: // *c is a heap object.
- case FUN_2_0:
- init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
- *first_child = find_ptrs(&se.info);
- if (*first_child == NULL)
- // no child from ptrs, so check SRT
- goto fun_srt_only;
- break;
-
- case THUNK:
- case THUNK_2_0:
- init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
- (StgPtr)((StgThunk *)c)->payload);
- *first_child = find_ptrs(&se.info);
- if (*first_child == NULL)
- // no child from ptrs, so check SRT
- goto thunk_srt_only;
- break;
-
- // 1 fixed child, SRT
- case FUN_1_0:
- case FUN_1_1:
- *first_child = c->payload[0];
- ASSERT(*first_child != NULL);
- init_srt_fun(&se.info, get_fun_itbl(c));
- break;
-
- case THUNK_1_0:
- case THUNK_1_1:
- *first_child = ((StgThunk *)c)->payload[0];
- ASSERT(*first_child != NULL);
- init_srt_thunk(&se.info, get_thunk_itbl(c));
- break;
-
- case FUN_0_1: // *c is a heap object.
- case FUN_0_2:
- fun_srt_only:
- init_srt_fun(&se.info, get_fun_itbl(c));
- *first_child = find_srt(&se.info);
- if (*first_child == NULL)
- return; // no child
- break;
-
- // SRT only
- case THUNK_STATIC:
- ASSERT(get_itbl(c)->srt != 0);
- /* fall-thru */
- case THUNK_0_1:
- case THUNK_0_2:
- thunk_srt_only:
- init_srt_thunk(&se.info, get_thunk_itbl(c));
- *first_child = find_srt(&se.info);
- if (*first_child == NULL)
- return; // no child
- break;
-
- case TREC_CHUNK:
- *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
- se.info.type = posTypeStep;
- se.info.next.step = 0; // entry no.
- break;
-
- // cannot appear
- case PAP:
- case AP:
- case AP_STACK:
- case TSO:
- case STACK:
- case IND_STATIC:
- // stack objects
- case UPDATE_FRAME:
- case CATCH_FRAME:
- case UNDERFLOW_FRAME:
- case STOP_FRAME:
- case RET_BCO:
- case RET_SMALL:
- case RET_BIG:
- // invalid objects
- case IND:
- case INVALID_OBJECT:
- default:
- barf("Invalid object *c in push(): %d", get_itbl(c)->type);
- return;
- }
-
- // se.cp has to be initialized when type==posTypeFresh. We don't do that
- // here though. So type must be !=posTypeFresh.
- ASSERT(se.info.type != posTypeFresh);
-
- pushStackElement(ts, &se);
-}
-
-/**
- * popStackElement(): Remove a depleted stackElement from the top of the
- * traversal work-stack.
- *
- * Invariants:
- * stackTop cannot be equal to stackLimit unless the whole stack is
- * empty, in which case popStackElement() is not allowed.
- */
-static void
-popStackElement(traverseState *ts) {
- debug("popStackElement(): stackTop = 0x%x\n", ts->stackTop);
-
- ASSERT(ts->stackTop != ts->stackLimit);
- ASSERT(!isEmptyWorkStack(ts));
-
- // <= (instead of <) is wrong!
- if (ts->stackTop + 1 < ts->stackLimit) {
- ts->stackTop++;
-
- ts->stackSize--;
- if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
- ASSERT(ts->stackSize >= 0);
- debug("stackSize = (--) %d\n", ts->stackSize);
-
- return;
- }
-
- bdescr *pbd; // Previous Block Descriptor
-
- debug("popStackElement() to the previous stack.\n");
-
- ASSERT(ts->stackTop + 1 == ts->stackLimit);
- ASSERT(ts->stackBottom == (stackElement *)ts->currentStack->start);
-
- if (ts->firstStack == ts->currentStack) {
- // The stack is completely empty.
- ts->stackTop++;
- ASSERT(ts->stackTop == ts->stackLimit);
-
- ts->stackSize--;
- if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
- ASSERT(ts->stackSize >= 0);
- debug("stackSize = %d\n", ts->stackSize);
-
- return;
- }
-
- // currentStack->free is updated when the active stack is switched back
- // to the previous stack.
- ts->currentStack->free = (StgPtr)ts->stackLimit;
-
- // find the previous block descriptor
- pbd = ts->currentStack->u.back;
- ASSERT(pbd != NULL);
-
- returnToOldStack(ts, pbd);
-
- ts->stackSize--;
- if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
- ASSERT(ts->stackSize >= 0);
- debug("stackSize = %d\n", ts->stackSize);
-}
-
-/**
- * Finds the next object to be considered for retainer profiling and store
- * its pointer to *c.
- *
- * If the unprocessed object was stored in the stack (posTypeFresh), the
- * this object is returned as-is. Otherwise Test if the topmost stack
- * element indicates that more objects are left,
- * and if so, retrieve the first object and store its pointer to *c. Also,
- * set *cp and *data appropriately, both of which are stored in the stack
- * element. The topmost stack element then is overwritten so as for it to now
- * denote the next object.
- *
- * If the topmost stack element indicates no more objects are left, pop
- * off the stack element until either an object can be retrieved or
- * the work-stack becomes empty, indicated by true returned by
- * isEmptyWorkStack(), in which case *c is set to NULL.
- *
- * Note:
- *
- * It is okay to call this function even when the work-stack is empty.
- */
-STATIC_INLINE void
-traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data)
-{
- stackElement *se;
-
- debug("traversePop(): stackTop = 0x%x\n", ts->stackTop);
-
- // Is this the last internal element? If so instead of modifying the current
- // stackElement in place we actually remove it from the stack.
- bool last = false;
-
- do {
- if (isEmptyWorkStack(ts)) {
- *c = NULL;
- return;
- }
-
- // Note: Below every `break`, where the loop condition is true, must be
- // accompanied by a popStackElement() otherwise this is an infinite
- // loop.
- se = ts->stackTop;
-
- // If this is a top-level element, you should pop that out.
- if (se->info.type == posTypeFresh) {
- *cp = se->cp;
- *c = se->c;
- *data = se->data;
- popStackElement(ts);
- return;
- }
-
- // Note: The first ptr of all of these was already returned as
- // *fist_child in push(), so we always start with the second field.
- switch (get_itbl(se->c)->type) {
- // two children (fixed), no SRT
- // nothing in se.info
- case CONSTR_2_0:
- *c = se->c->payload[1];
- last = true;
- goto out;
-
- // three children (fixed), no SRT
- // need to push a stackElement
- case MVAR_CLEAN:
- case MVAR_DIRTY:
- if (se->info.next.step == 2) {
- *c = (StgClosure *)((StgMVar *)se->c)->tail;
- se->info.next.step++; // move to the next step
- // no popStackElement
- } else {
- *c = ((StgMVar *)se->c)->value;
- last = true;
- }
- goto out;
-
- // three children (fixed), no SRT
- case WEAK:
- if (se->info.next.step == 2) {
- *c = ((StgWeak *)se->c)->value;
- se->info.next.step++;
- // no popStackElement
- } else {
- *c = ((StgWeak *)se->c)->finalizer;
- last = true;
- }
- goto out;
-
- case TREC_CHUNK: {
- // These are pretty complicated: we have N entries, each
- // of which contains 3 fields that we want to follow. So
- // we divide the step counter: the 2 low bits indicate
- // which field, and the rest of the bits indicate the
- // entry number (starting from zero).
- TRecEntry *entry;
- uint32_t entry_no = se->info.next.step >> 2;
- uint32_t field_no = se->info.next.step & 3;
- if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
- *c = NULL;
- popStackElement(ts);
- break; // this breaks out of the switch not the loop
- }
- entry = &((StgTRecChunk *)se->c)->entries[entry_no];
- if (field_no == 0) {
- *c = (StgClosure *)entry->tvar;
- } else if (field_no == 1) {
- *c = entry->expected_value;
- } else {
- *c = entry->new_value;
- }
- se->info.next.step++;
- goto out;
- }
-
- case TVAR:
- case CONSTR:
- case PRIM:
- case MUT_PRIM:
- case BCO:
- // StgMutArrPtr.ptrs, no SRT
- case MUT_ARR_PTRS_CLEAN:
- case MUT_ARR_PTRS_DIRTY:
- case MUT_ARR_PTRS_FROZEN_CLEAN:
- case MUT_ARR_PTRS_FROZEN_DIRTY:
- case SMALL_MUT_ARR_PTRS_CLEAN:
- case SMALL_MUT_ARR_PTRS_DIRTY:
- case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
- case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
- *c = find_ptrs(&se->info);
- if (*c == NULL) {
- popStackElement(ts);
- break; // this breaks out of the switch not the loop
- }
- goto out;
-
- // layout.payload.ptrs, SRT
- case FUN: // always a heap object
- case FUN_STATIC:
- case FUN_2_0:
- if (se->info.type == posTypePtrs) {
- *c = find_ptrs(&se->info);
- if (*c != NULL) {
- goto out;
- }
- init_srt_fun(&se->info, get_fun_itbl(se->c));
- }
- goto do_srt;
-
- case THUNK:
- case THUNK_2_0:
- if (se->info.type == posTypePtrs) {
- *c = find_ptrs(&se->info);
- if (*c != NULL) {
- goto out;
- }
- init_srt_thunk(&se->info, get_thunk_itbl(se->c));
- }
- goto do_srt;
-
- // SRT
- do_srt:
- case THUNK_STATIC:
- case FUN_0_1:
- case FUN_0_2:
- case THUNK_0_1:
- case THUNK_0_2:
- case FUN_1_0:
- case FUN_1_1:
- case THUNK_1_0:
- case THUNK_1_1:
- *c = find_srt(&se->info);
- if(*c == NULL) {
- popStackElement(ts);
- break; // this breaks out of the switch not the loop
- }
- goto out;
-
- // no child (fixed), no SRT
- case CONSTR_0_1:
- case CONSTR_0_2:
- case ARR_WORDS:
- // one child (fixed), no SRT
- case MUT_VAR_CLEAN:
- case MUT_VAR_DIRTY:
- case THUNK_SELECTOR:
- case CONSTR_1_1:
- // cannot appear
- case PAP:
- case AP:
- case AP_STACK:
- case TSO:
- case STACK:
- case IND_STATIC:
- case CONSTR_NOCAF:
- // stack objects
- case UPDATE_FRAME:
- case CATCH_FRAME:
- case UNDERFLOW_FRAME:
- case STOP_FRAME:
- case RET_BCO:
- case RET_SMALL:
- case RET_BIG:
- // invalid objects
- case IND:
- case INVALID_OBJECT:
- default:
- barf("Invalid object *c in traversePop(): %d", get_itbl(se->c)->type);
- return;
- }
- } while (*c == NULL);
-
-out:
-
- ASSERT(*c != NULL);
-
- *cp = se->c;
- *data = se->data;
-
- if(last)
- popStackElement(ts);
-
- return;
-
-}
-
-/* -----------------------------------------------------------------------------
* RETAINER PROFILING ENGINE
* -------------------------------------------------------------------------- */
@@ -954,22 +104,6 @@ endRetainerProfiling( void )
outputAllRetainerSet(prof_file);
}
-/**
- * Make sure a closure's profiling data is initialized to zero if it does not
- * conform to the current value of the flip bit, returns true in this case.
- *
- * See Note [Profiling heap traversal visited bit].
- */
-bool
-traverseMaybeInitClosureData(StgClosure *c)
-{
- if (!isTravDataValid(c)) {
- setTravDataToZero(c);
- return true;
- }
- return false;
-}
-
/* -----------------------------------------------------------------------------
* Returns true if *c is a retainer.
* In general the retainers are the objects that may be the roots of the
@@ -1120,214 +254,6 @@ associate( StgClosure *c, RetainerSet *s )
RSET(c) = (RetainerSet *)((StgWord)s | flip);
}
-/* -----------------------------------------------------------------------------
- Call traversePushClosure for each of the closures covered by a large bitmap.
- -------------------------------------------------------------------------- */
-
-static void
-traverseLargeBitmap (traverseState *ts, StgPtr p, StgLargeBitmap *large_bitmap,
- uint32_t size, StgClosure *c, stackData data)
-{
- uint32_t i, b;
- StgWord bitmap;
-
- b = 0;
- bitmap = large_bitmap->bitmap[b];
- for (i = 0; i < size; ) {
- if ((bitmap & 1) == 0) {
- traversePushClosure(ts, (StgClosure *)*p, c, data);
- }
- i++;
- p++;
- if (i % BITS_IN(W_) == 0) {
- b++;
- bitmap = large_bitmap->bitmap[b];
- } else {
- bitmap = bitmap >> 1;
- }
- }
-}
-
-STATIC_INLINE StgPtr
-traverseSmallBitmap (traverseState *ts, StgPtr p, uint32_t size, StgWord bitmap,
- StgClosure *c, stackData data)
-{
- while (size > 0) {
- if ((bitmap & 1) == 0) {
- traversePushClosure(ts, (StgClosure *)*p, c, data);
- }
- p++;
- bitmap = bitmap >> 1;
- size--;
- }
- return p;
-}
-
-/**
- * traversePushStack(ts, cp, data, stackStart, stackEnd) pushes all the objects
- * in the STG stack-chunk from stackStart to stackEnd onto the traversal
- * work-stack with 'c' and 'data' being their parent and associated data,
- * respectively.
- *
- * Invariants:
- *
- * *cp is one of the following: TSO, AP_STACK.
- *
- * stackStart < stackEnd.
- *
- * If *c is TSO, its state is not ThreadComplete,or ThreadKilled,
- * which means that its stack is ready to process.
- *
- * Note:
- *
- * This code was almost plagiarzied from GC.c! For each pointer,
- * traversePushClosure() is invoked instead of evacuate().
- */
-static void
-traversePushStack(traverseState *ts, StgClosure *cp, stackData data,
- StgPtr stackStart, StgPtr stackEnd)
-{
- StgPtr p;
- const StgRetInfoTable *info;
- StgWord bitmap;
- uint32_t size;
-
- ASSERT(get_itbl(cp)->type == STACK);
-
- p = stackStart;
- while (p < stackEnd) {
- info = get_ret_itbl((StgClosure *)p);
-
- switch(info->i.type) {
-
- case UPDATE_FRAME:
- traversePushClosure(ts, ((StgUpdateFrame *)p)->updatee, cp, data);
- p += sizeofW(StgUpdateFrame);
- continue;
-
- case UNDERFLOW_FRAME:
- case STOP_FRAME:
- case CATCH_FRAME:
- case CATCH_STM_FRAME:
- case CATCH_RETRY_FRAME:
- case ATOMICALLY_FRAME:
- case RET_SMALL:
- bitmap = BITMAP_BITS(info->i.layout.bitmap);
- size = BITMAP_SIZE(info->i.layout.bitmap);
- p++;
- p = traverseSmallBitmap(ts, p, size, bitmap, cp, data);
-
- follow_srt:
- if (info->i.srt) {
- traversePushClosure(ts, GET_SRT(info), cp, data);
- }
- continue;
-
- case RET_BCO: {
- StgBCO *bco;
-
- p++;
- traversePushClosure(ts, (StgClosure*)*p, cp, data);
- bco = (StgBCO *)*p;
- p++;
- size = BCO_BITMAP_SIZE(bco);
- traverseLargeBitmap(ts, p, BCO_BITMAP(bco), size, cp, data);
- p += size;
- continue;
- }
-
- // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
- case RET_BIG:
- size = GET_LARGE_BITMAP(&info->i)->size;
- p++;
- traverseLargeBitmap(ts, p, GET_LARGE_BITMAP(&info->i),
- size, cp, data);
- p += size;
- // and don't forget to follow the SRT
- goto follow_srt;
-
- case RET_FUN: {
- StgRetFun *ret_fun = (StgRetFun *)p;
- const StgFunInfoTable *fun_info;
-
- traversePushClosure(ts, ret_fun->fun, cp, data);
- fun_info = get_fun_itbl(UNTAG_CONST_CLOSURE(ret_fun->fun));
-
- p = (P_)&ret_fun->payload;
- switch (fun_info->f.fun_type) {
- case ARG_GEN:
- bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
- size = BITMAP_SIZE(fun_info->f.b.bitmap);
- p = traverseSmallBitmap(ts, p, size, bitmap, cp, data);
- break;
- case ARG_GEN_BIG:
- size = GET_FUN_LARGE_BITMAP(fun_info)->size;
- traverseLargeBitmap(ts, p, GET_FUN_LARGE_BITMAP(fun_info),
- size, cp, data);
- p += size;
- break;
- default:
- bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
- size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
- p = traverseSmallBitmap(ts, p, size, bitmap, cp, data);
- break;
- }
- goto follow_srt;
- }
-
- default:
- barf("Invalid object found in traversePushStack(): %d",
- (int)(info->i.type));
- }
- }
-}
-
-/* ----------------------------------------------------------------------------
- * Call traversePushClosure for each of the children of a PAP/AP
- * ------------------------------------------------------------------------- */
-
-STATIC_INLINE StgPtr
-traversePAP (traverseState *ts,
- StgClosure *pap, /* NOT tagged */
- stackData data,
- StgClosure *fun, /* tagged */
- StgClosure** payload, StgWord n_args)
-{
- StgPtr p;
- StgWord bitmap;
- const StgFunInfoTable *fun_info;
-
- traversePushClosure(ts, fun, pap, data);
- fun = UNTAG_CLOSURE(fun);
- fun_info = get_fun_itbl(fun);
- ASSERT(fun_info->i.type != PAP);
-
- p = (StgPtr)payload;
-
- switch (fun_info->f.fun_type) {
- case ARG_GEN:
- bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
- p = traverseSmallBitmap(ts, p, n_args, bitmap,
- pap, data);
- break;
- case ARG_GEN_BIG:
- traverseLargeBitmap(ts, p, GET_FUN_LARGE_BITMAP(fun_info),
- n_args, pap, data);
- p += n_args;
- break;
- case ARG_BCO:
- traverseLargeBitmap(ts, (StgPtr)payload, BCO_BITMAP(fun),
- n_args, pap, data);
- p += n_args;
- break;
- default:
- bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
- p = traverseSmallBitmap(ts, p, n_args, bitmap, pap, data);
- break;
- }
- return p;
-}
-
static bool
retainVisitClosure( StgClosure *c, const StgClosure *cp, const stackData data, const bool first_visit, stackData *out_data )
{
@@ -1408,219 +334,6 @@ retainVisitClosure( StgClosure *c, const StgClosure *cp, const stackData data, c
return 1;
}
-static void
-resetMutableObjects(void)
-{
- uint32_t g, n;
- bdescr *bd;
- StgPtr ml;
-
- // The following code resets the 'trav' field of each unvisited mutable
- // object.
- for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
- // NOT true: even G0 has a block on its mutable list
- // ASSERT(g != 0 || (generations[g].mut_list == NULL));
-
- // Traversing through mut_list is necessary
- // because we can find MUT_VAR objects which have not been
- // visited during heap traversal.
- for (n = 0; n < n_capabilities; n++) {
- for (bd = capabilities[n]->mut_lists[g]; bd != NULL; bd = bd->link) {
- for (ml = bd->start; ml < bd->free; ml++) {
-
- traverseMaybeInitClosureData((StgClosure *)*ml);
- }
- }
- }
- }
-}
-
-/**
- * Traverse all closures on the traversal work-stack, calling 'visit_cb' on each
- * closure. See 'visitClosure_cb' for details. This function flips the 'flip'
- * bit and hence every closure's profiling data will be reset to zero upon
- * visiting. See Note [Profiling heap traversal visited bit].
- */
-void
-traverseWorkStack(traverseState *ts, visitClosure_cb visit_cb)
-{
- // first_child = first child of c
- StgClosure *c, *cp, *first_child;
- stackData data, child_data;
- StgWord typeOfc;
-
- // Now we flip the flip bit.
- flip = flip ^ 1;
-
- // c = Current closure (possibly tagged)
- // cp = Current closure's Parent (NOT tagged)
- // data = current closures' associated data (NOT tagged)
- // data_out = data to associate with current closure's children
-
-loop:
- traversePop(ts, &c, &cp, &data);
-
- if (c == NULL) {
- debug("maxStackSize= %d\n", ts->maxStackSize);
- resetMutableObjects();
- return;
- }
-inner_loop:
- c = UNTAG_CLOSURE(c);
-
- typeOfc = get_itbl(c)->type;
-
- // special cases
- switch (typeOfc) {
- case TSO:
- if (((StgTSO *)c)->what_next == ThreadComplete ||
- ((StgTSO *)c)->what_next == ThreadKilled) {
- debug("ThreadComplete or ThreadKilled encountered in traverseWorkStack()\n");
- goto loop;
- }
- break;
-
- case IND_STATIC:
- // We just skip IND_STATIC, so it's never visited.
- c = ((StgIndStatic *)c)->indirectee;
- goto inner_loop;
-
- case CONSTR_NOCAF:
- // static objects with no pointers out, so goto loop.
-
- // It is not just enough not to visit *c; it is
- // mandatory because CONSTR_NOCAF are not reachable from
- // scavenged_static_objects, the list from which is assumed to traverse
- // all static objects after major garbage collections.
- goto loop;
-
- case THUNK_STATIC:
- if (get_itbl(c)->srt == 0) {
- // No need to visit *c; no dynamic objects are reachable from it.
- //
- // Static objects: if we traverse all the live closures,
- // including static closures, during each heap census then
- // we will observe that some static closures appear and
- // disappear. eg. a closure may contain a pointer to a
- // static function 'f' which is not otherwise reachable
- // (it doesn't indirectly point to any CAFs, so it doesn't
- // appear in any SRTs), so we would find 'f' during
- // traversal. However on the next sweep there may be no
- // closures pointing to 'f'.
- //
- // We must therefore ignore static closures whose SRT is
- // empty, because these are exactly the closures that may
- // "appear". A closure with a non-empty SRT, and which is
- // still required, will always be reachable.
- //
- // But what about CONSTR? Surely these may be able
- // to appear, and they don't have SRTs, so we can't
- // check. So for now, we're calling
- // resetStaticObjectForProfiling() from the
- // garbage collector to reset the retainer sets in all the
- // reachable static objects.
- goto loop;
- }
- /* fall-thru */
-
- case FUN_STATIC: {
- const StgInfoTable *info = get_itbl(c);
- if (info->srt == 0 && info->layout.payload.ptrs == 0) {
- goto loop;
- } else {
- break;
- }
- }
-
- default:
- break;
- }
-
- // If this is the first visit to c, initialize its data.
- bool first_visit = traverseMaybeInitClosureData(c);
- bool traverse_children
- = visit_cb(c, cp, data, first_visit, (stackData*)&child_data);
- if(!traverse_children)
- goto loop;
-
- // process child
-
- // Special case closures: we process these all in one go rather
- // than attempting to save the current position, because doing so
- // would be hard.
- switch (typeOfc) {
- case STACK:
- traversePushStack(ts, c, child_data,
- ((StgStack *)c)->sp,
- ((StgStack *)c)->stack + ((StgStack *)c)->stack_size);
- goto loop;
-
- case TSO:
- {
- StgTSO *tso = (StgTSO *)c;
-
- traversePushClosure(ts, (StgClosure *) tso->stackobj, c, child_data);
- traversePushClosure(ts, (StgClosure *) tso->blocked_exceptions, c, child_data);
- traversePushClosure(ts, (StgClosure *) tso->bq, c, child_data);
- traversePushClosure(ts, (StgClosure *) tso->trec, c, child_data);
- if ( tso->why_blocked == BlockedOnMVar
- || tso->why_blocked == BlockedOnMVarRead
- || tso->why_blocked == BlockedOnBlackHole
- || tso->why_blocked == BlockedOnMsgThrowTo
- ) {
- traversePushClosure(ts, tso->block_info.closure, c, child_data);
- }
- goto loop;
- }
-
- case BLOCKING_QUEUE:
- {
- StgBlockingQueue *bq = (StgBlockingQueue *)c;
- traversePushClosure(ts, (StgClosure *) bq->link, c, child_data);
- traversePushClosure(ts, (StgClosure *) bq->bh, c, child_data);
- traversePushClosure(ts, (StgClosure *) bq->owner, c, child_data);
- goto loop;
- }
-
- case PAP:
- {
- StgPAP *pap = (StgPAP *)c;
- traversePAP(ts, c, child_data, pap->fun, pap->payload, pap->n_args);
- goto loop;
- }
-
- case AP:
- {
- StgAP *ap = (StgAP *)c;
- traversePAP(ts, c, child_data, ap->fun, ap->payload, ap->n_args);
- goto loop;
- }
-
- case AP_STACK:
- traversePushClosure(ts, ((StgAP_STACK *)c)->fun, c, child_data);
- traversePushStack(ts, c, child_data,
- (StgPtr)((StgAP_STACK *)c)->payload,
- (StgPtr)((StgAP_STACK *)c)->payload +
- ((StgAP_STACK *)c)->size);
- goto loop;
- }
-
- traversePushChildren(ts, c, child_data, &first_child);
-
- // If first_child is null, c has no child.
- // If first_child is not null, the top stack element points to the next
- // object. traversePushChildren() may or may not push a stackElement on the
- // stack.
- if (first_child == NULL)
- goto loop;
-
- // (c, cp, data) = (first_child, c, child_data)
- data = child_data;
- cp = c;
- c = first_child;
- goto inner_loop;
-}
-
/**
* Push every object reachable from *tl onto the traversal work stack.
*/
@@ -1684,72 +397,6 @@ computeRetainerSet( traverseState *ts )
}
/* -----------------------------------------------------------------------------
- * Traverse all static objects for which we compute retainer sets,
- * and reset their rs fields to NULL, which is accomplished by
- * invoking traverseMaybeInitClosureData(). This function must be called
- * before zeroing all objects reachable from scavenged_static_objects
- * in the case of major garbage collections. See GarbageCollect() in
- * GC.c.
- * Note:
- * The mut_once_list of the oldest generation must also be traversed?
- * Why? Because if the evacuation of an object pointed to by a static
- * indirection object fails, it is put back to the mut_once_list of
- * the oldest generation.
- * However, this is not necessary because any static indirection objects
- * are just traversed through to reach dynamic objects. In other words,
- * they are not taken into consideration in computing retainer sets.
- *
- * SDM (20/7/2011): I don't think this is doing anything sensible,
- * because it happens before retainerProfile() and at the beginning of
- * retainerProfil() we change the sense of 'flip'. So all of the
- * calls to traverseMaybeInitClosureData() here are initialising retainer sets
- * with the wrong flip. Also, I don't see why this is necessary. I
- * added a traverseMaybeInitClosureData() call to retainRoot(), and that seems
- * to have fixed the assertion failure in retainerSetOf() I was
- * encountering.
- * -------------------------------------------------------------------------- */
-void
-resetStaticObjectForProfiling( StgClosure *static_objects )
-{
- uint32_t count = 0;
- StgClosure *p;
-
- p = static_objects;
- while (p != END_OF_STATIC_OBJECT_LIST) {
- p = UNTAG_STATIC_LIST_PTR(p);
- count++;
-
- switch (get_itbl(p)->type) {
- case IND_STATIC:
- // Since we do not compute the retainer set of any
- // IND_STATIC object, we don't have to reset its retainer
- // field.
- p = (StgClosure*)*IND_STATIC_LINK(p);
- break;
- case THUNK_STATIC:
- traverseMaybeInitClosureData(p);
- p = (StgClosure*)*THUNK_STATIC_LINK(p);
- break;
- case FUN_STATIC:
- case CONSTR:
- case CONSTR_1_0:
- case CONSTR_2_0:
- case CONSTR_1_1:
- case CONSTR_NOCAF:
- traverseMaybeInitClosureData(p);
- p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
- break;
- default:
- barf("resetStaticObjectForProfiling: %p (%lu)",
- p, (unsigned long)get_itbl(p)->type);
- break;
- }
- }
-
- debug("count in scavenged_static_objects = %d\n", count);
-}
-
-/* -----------------------------------------------------------------------------
* Perform retainer profiling.
* N is the oldest generation being profilied, where the generations are
* numbered starting at 0.
diff --git a/rts/TraverseHeap.c b/rts/TraverseHeap.c
new file mode 100644
index 0000000000..bf2584cef4
--- /dev/null
+++ b/rts/TraverseHeap.c
@@ -0,0 +1,1371 @@
+/* -----------------------------------------------------------------------------
+ *
+ * (c) The GHC Team, 2001,2019
+ * Author: Sungwoo Park, Daniel Gröber
+ *
+ * Generalised profiling heap traversal.
+ *
+ * ---------------------------------------------------------------------------*/
+
+#if defined(PROFILING)
+
+#include "PosixSource.h"
+#include "Rts.h"
+#include "sm/Storage.h"
+
+#include "TraverseHeap.h"
+
+/** Note [Profiling heap traversal visited bit]
+ *
+ * If the RTS is compiled with profiling enabled StgProfHeader can be used by
+ * profiling code to store per-heap object information.
+ *
+ * The generic heap traversal code reserves the least significant bit of the
+ * largest members of the 'trav' union to decide whether we've already visited a
+ * given closure in the current pass or not. The rest of the field is free to be
+ * used by the calling profiler.
+ *
+ * By doing things this way we implicitly assume that the LSB of the largest
+ * field in the 'trav' union is insignificant. This is true at least for the
+ * word aligned pointers which the retainer profiler currently stores there and
+ * should be maintained by new users of the 'trav' union for example by shifting
+ * the real data up by one bit.
+ *
+ * Since we don't want to have to scan the entire heap a second time just to
+ * reset the per-object visitied bit before/after the real traversal we make the
+ * interpretation of this bit dependent on the value of a global variable,
+ * 'flip'.
+ *
+ * When the 'trav' bit is equal to the value of 'flip' the closure data is
+ * valid otherwise not (see isTravDataValid). We then invert the value of 'flip'
+ * on each heap traversal (see traverseWorkStack), in effect marking all
+ * closure's data as invalid at once.
+ *
+ * There are some complications with this approach, namely: static objects and
+ * mutable data. There we do just go over all existing objects to reset the bit
+ * manually. See 'resetStaticObjectForProfiling' and 'resetMutableObjects'.
+ */
+StgWord flip = 0;
+
+#define setTravDataToZero(c) \
+ (c)->header.prof.hp.trav.lsb = flip
+
+typedef enum {
+ // Object with fixed layout. Keeps an information about that
+ // element was processed. (stackPos.next.step)
+ posTypeStep,
+ // Description of the pointers-first heap object. Keeps information
+ // about layout. (stackPos.next.ptrs)
+ posTypePtrs,
+ // Keeps SRT bitmap (stackPos.next.srt)
+ posTypeSRT,
+ // Keeps a new object that was not inspected yet. Keeps a parent
+ // element (stackPos.next.parent)
+ posTypeFresh
+} nextPosType;
+
+typedef union {
+ // fixed layout or layout specified by a field in the closure
+ StgWord step;
+
+ // layout.payload
+ struct {
+ // See StgClosureInfo in InfoTables.h
+ StgHalfWord pos;
+ StgHalfWord ptrs;
+ StgPtr payload;
+ } ptrs;
+
+ // SRT
+ struct {
+ StgClosure *srt;
+ } srt;
+} nextPos;
+
+/**
+ * Position pointer into a closure. Determines what the next element to return
+ * for a stackElement is.
+ */
+typedef struct {
+ nextPosType type;
+ nextPos next;
+} stackPos;
+
+/**
+ * An element of the traversal work-stack. Besides the closure itself this also
+ * stores it's parent and associated data.
+ *
+ * When 'info.type == posTypeFresh' a 'stackElement' represents just one
+ * closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an
+ * offset into the children of 'c'. This is to support returning a closure's
+ * children one-by-one without pushing one element per child onto the stack. See
+ * traversePushChildren() and traversePop().
+ *
+ */
+typedef struct stackElement_ {
+ stackPos info;
+ StgClosure *c;
+ StgClosure *cp; // parent of 'c'. Only used when info.type == posTypeFresh.
+ stackData data;
+} stackElement;
+
+
+#if defined(DEBUG)
+unsigned int g_traversalDebugLevel = 0;
+static inline void debug(const char *s, ...)
+{
+ va_list ap;
+
+ if(g_traversalDebugLevel == 0)
+ return;
+
+ va_start(ap,s);
+ vdebugBelch(s, ap);
+ va_end(ap);
+}
+#else
+#define debug(...)
+#endif
+
+// number of blocks allocated for one stack
+#define BLOCKS_IN_STACK 1
+
+/* -----------------------------------------------------------------------------
+ * Add a new block group to the stack.
+ * Invariants:
+ * currentStack->link == s.
+ * -------------------------------------------------------------------------- */
+STATIC_INLINE void
+newStackBlock( traverseState *ts, bdescr *bd )
+{
+ ts->currentStack = bd;
+ ts->stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
+ ts->stackBottom = (stackElement *)bd->start;
+ ts->stackLimit = (stackElement *)ts->stackTop;
+ bd->free = (StgPtr)ts->stackLimit;
+}
+
+/* -----------------------------------------------------------------------------
+ * Return to the previous block group.
+ * Invariants:
+ * s->link == currentStack.
+ * -------------------------------------------------------------------------- */
+STATIC_INLINE void
+returnToOldStack( traverseState *ts, bdescr *bd )
+{
+ ts->currentStack = bd;
+ ts->stackTop = (stackElement *)bd->free;
+ ts->stackBottom = (stackElement *)bd->start;
+ ts->stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
+ bd->free = (StgPtr)ts->stackLimit;
+}
+
+/**
+ * Initializes the traversal work-stack.
+ */
+void
+initializeTraverseStack( traverseState *ts )
+{
+ if (ts->firstStack != NULL) {
+ freeChain(ts->firstStack);
+ }
+
+ ts->firstStack = allocGroup(BLOCKS_IN_STACK);
+ ts->firstStack->link = NULL;
+ ts->firstStack->u.back = NULL;
+
+ ts->stackSize = 0;
+ ts->maxStackSize = 0;
+
+ newStackBlock(ts, ts->firstStack);
+}
+
+/**
+ * Frees all the block groups in the traversal works-stack.
+ *
+ * Invariants:
+ * firstStack != NULL
+ */
+void
+closeTraverseStack( traverseState *ts )
+{
+ freeChain(ts->firstStack);
+ ts->firstStack = NULL;
+}
+
+int
+getTraverseStackMaxSize(traverseState *ts)
+{
+ return ts->maxStackSize;
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns true if the whole stack is empty.
+ * -------------------------------------------------------------------------- */
+STATIC_INLINE bool
+isEmptyWorkStack( traverseState *ts )
+{
+ return (ts->firstStack == ts->currentStack) && ts->stackTop == ts->stackLimit;
+}
+
+/* -----------------------------------------------------------------------------
+ * Returns size of stack
+ * -------------------------------------------------------------------------- */
+W_
+traverseWorkStackBlocks(traverseState *ts)
+{
+ bdescr* bd;
+ W_ res = 0;
+
+ for (bd = ts->firstStack; bd != NULL; bd = bd->link)
+ res += bd->blocks;
+
+ return res;
+}
+
+/* -----------------------------------------------------------------------------
+ * Initializes *info from ptrs and payload.
+ * Invariants:
+ * payload[] begins with ptrs pointers followed by non-pointers.
+ * -------------------------------------------------------------------------- */
+STATIC_INLINE void
+init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload )
+{
+ info->type = posTypePtrs;
+ info->next.ptrs.pos = 0;
+ info->next.ptrs.ptrs = ptrs;
+ info->next.ptrs.payload = payload;
+}
+
+/* -----------------------------------------------------------------------------
+ * Find the next object from *info.
+ * -------------------------------------------------------------------------- */
+STATIC_INLINE StgClosure *
+find_ptrs( stackPos *info )
+{
+ if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
+ return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
+ } else {
+ return NULL;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Initializes *info from SRT information stored in *infoTable.
+ * -------------------------------------------------------------------------- */
+STATIC_INLINE void
+init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable )
+{
+ info->type = posTypeSRT;
+ if (infoTable->i.srt) {
+ info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable);
+ } else {
+ info->next.srt.srt = NULL;
+ }
+}
+
+STATIC_INLINE void
+init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable )
+{
+ info->type = posTypeSRT;
+ if (infoTable->i.srt) {
+ info->next.srt.srt = (StgClosure*)GET_SRT(infoTable);
+ } else {
+ info->next.srt.srt = NULL;
+ }
+}
+
+/* -----------------------------------------------------------------------------
+ * Find the next object from *info.
+ * -------------------------------------------------------------------------- */
+STATIC_INLINE StgClosure *
+find_srt( stackPos *info )
+{
+ StgClosure *c;
+ if (info->type == posTypeSRT) {
+ c = info->next.srt.srt;
+ info->next.srt.srt = NULL;
+ return c;
+ }
+
+ return NULL;
+}
+
+/**
+ * Push a set of closures, represented by a single 'stackElement', onto the
+ * traversal work-stack.
+ */
+static void
+pushStackElement(traverseState *ts, stackElement *se)
+{
+ bdescr *nbd; // Next Block Descriptor
+ if (ts->stackTop - 1 < ts->stackBottom) {
+ debug("pushStackElement() to the next stack.\n");
+
+ // currentStack->free is updated when the active stack is switched
+ // to the next stack.
+ ts->currentStack->free = (StgPtr)ts->stackTop;
+
+ if (ts->currentStack->link == NULL) {
+ nbd = allocGroup(BLOCKS_IN_STACK);
+ nbd->link = NULL;
+ nbd->u.back = ts->currentStack;
+ ts->currentStack->link = nbd;
+ } else
+ nbd = ts->currentStack->link;
+
+ newStackBlock(ts, nbd);
+ }
+
+ // adjust stackTop (acutal push)
+ ts->stackTop--;
+ // If the size of stackElement was huge, we would better replace the
+ // following statement by either a memcpy() call or a switch statement
+ // on the type of the element. Currently, the size of stackElement is
+ // small enough (5 words) that this direct assignment seems to be enough.
+ *ts->stackTop = *se;
+
+ ts->stackSize++;
+ if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
+ ASSERT(ts->stackSize >= 0);
+ debug("stackSize = %d\n", ts->stackSize);
+}
+
+/**
+ * Push a single closure onto the traversal work-stack.
+ *
+ * cp - object's parent
+ * c - closure
+ * data - data associated with closure.
+ */
+inline void
+traversePushClosure(traverseState *ts, StgClosure *c, StgClosure *cp, stackData data) {
+ stackElement se;
+
+ se.c = c;
+ se.cp = cp;
+ se.data = data;
+ se.info.type = posTypeFresh;
+
+ pushStackElement(ts, &se);
+};
+
+/**
+ * traversePushChildren() extracts the first child of 'c' in 'first_child' and
+ * conceptually pushes all remaining children of 'c' onto the traversal stack
+ * while associating 'data' with the pushed elements to be returned upon poping.
+ *
+ * If 'c' has no children, 'first_child' is set to NULL and nothing is pushed
+ * onto the stack.
+ *
+ * If 'c' has only one child, 'first_child' is set to that child and nothing is
+ * pushed onto the stack.
+ *
+ * Invariants:
+ *
+ * - 'c' is not any of TSO, AP, PAP, AP_STACK, which means that there cannot
+ * be any stack objects.
+ *
+ * Note: SRTs are considered to be children as well.
+ *
+ * Note: When pushing onto the stack we only really push one 'stackElement'
+ * representing all children onto the stack. See traversePop()
+ */
+STATIC_INLINE void
+traversePushChildren(traverseState *ts, StgClosure *c, stackData data, StgClosure **first_child)
+{
+ stackElement se;
+
+ debug("traversePushChildren(): stackTop = 0x%x\n", ts->stackTop);
+
+ ASSERT(get_itbl(c)->type != TSO);
+ ASSERT(get_itbl(c)->type != AP_STACK);
+
+ //
+ // fill in se
+ //
+
+ se.c = c;
+ se.data = data;
+ // Note: se.cp ommitted on purpose, only traversePushClosure uses that.
+
+ // fill in se.info
+ switch (get_itbl(c)->type) {
+ // no child, no SRT
+ case CONSTR_0_1:
+ case CONSTR_0_2:
+ case ARR_WORDS:
+ case COMPACT_NFDATA:
+ *first_child = NULL;
+ return;
+
+ // one child (fixed), no SRT
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ *first_child = ((StgMutVar *)c)->var;
+ return;
+ case THUNK_SELECTOR:
+ *first_child = ((StgSelector *)c)->selectee;
+ return;
+ case BLACKHOLE:
+ *first_child = ((StgInd *)c)->indirectee;
+ return;
+ case CONSTR_1_0:
+ case CONSTR_1_1:
+ *first_child = c->payload[0];
+ return;
+
+ // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
+ // of the next child. We do not write a separate initialization code.
+ // Also we do not have to initialize info.type;
+
+ // two children (fixed), no SRT
+ // need to push a stackElement, but nothing to store in se.info
+ case CONSTR_2_0:
+ *first_child = c->payload[0]; // return the first pointer
+ se.info.type = posTypeStep;
+ se.info.next.step = 2; // 2 = second
+ break;
+
+ // three children (fixed), no SRT
+ // need to push a stackElement
+ case MVAR_CLEAN:
+ case MVAR_DIRTY:
+ // head must be TSO and the head of a linked list of TSOs.
+ // Shoule it be a child? Seems to be yes.
+ *first_child = (StgClosure *)((StgMVar *)c)->head;
+ se.info.type = posTypeStep;
+ se.info.next.step = 2; // 2 = second
+ break;
+
+ // three children (fixed), no SRT
+ case WEAK:
+ *first_child = ((StgWeak *)c)->key;
+ se.info.type = posTypeStep;
+ se.info.next.step = 2;
+ break;
+
+ // layout.payload.ptrs, no SRT
+ case TVAR:
+ case CONSTR:
+ case CONSTR_NOCAF:
+ case PRIM:
+ case MUT_PRIM:
+ case BCO:
+ init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
+ (StgPtr)c->payload);
+ *first_child = find_ptrs(&se.info);
+ if (*first_child == NULL)
+ return; // no child
+ break;
+
+ // StgMutArrPtr.ptrs, no SRT
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
+ init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
+ (StgPtr)(((StgMutArrPtrs *)c)->payload));
+ *first_child = find_ptrs(&se.info);
+ if (*first_child == NULL)
+ return;
+ break;
+
+ // StgMutArrPtr.ptrs, no SRT
+ case SMALL_MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_DIRTY:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
+ init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
+ (StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
+ *first_child = find_ptrs(&se.info);
+ if (*first_child == NULL)
+ return;
+ break;
+
+ // layout.payload.ptrs, SRT
+ case FUN_STATIC:
+ case FUN: // *c is a heap object.
+ case FUN_2_0:
+ init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
+ *first_child = find_ptrs(&se.info);
+ if (*first_child == NULL)
+ // no child from ptrs, so check SRT
+ goto fun_srt_only;
+ break;
+
+ case THUNK:
+ case THUNK_2_0:
+ init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
+ (StgPtr)((StgThunk *)c)->payload);
+ *first_child = find_ptrs(&se.info);
+ if (*first_child == NULL)
+ // no child from ptrs, so check SRT
+ goto thunk_srt_only;
+ break;
+
+ // 1 fixed child, SRT
+ case FUN_1_0:
+ case FUN_1_1:
+ *first_child = c->payload[0];
+ ASSERT(*first_child != NULL);
+ init_srt_fun(&se.info, get_fun_itbl(c));
+ break;
+
+ case THUNK_1_0:
+ case THUNK_1_1:
+ *first_child = ((StgThunk *)c)->payload[0];
+ ASSERT(*first_child != NULL);
+ init_srt_thunk(&se.info, get_thunk_itbl(c));
+ break;
+
+ case FUN_0_1: // *c is a heap object.
+ case FUN_0_2:
+ fun_srt_only:
+ init_srt_fun(&se.info, get_fun_itbl(c));
+ *first_child = find_srt(&se.info);
+ if (*first_child == NULL)
+ return; // no child
+ break;
+
+ // SRT only
+ case THUNK_STATIC:
+ ASSERT(get_itbl(c)->srt != 0);
+ /* fall-thru */
+ case THUNK_0_1:
+ case THUNK_0_2:
+ thunk_srt_only:
+ init_srt_thunk(&se.info, get_thunk_itbl(c));
+ *first_child = find_srt(&se.info);
+ if (*first_child == NULL)
+ return; // no child
+ break;
+
+ case TREC_CHUNK:
+ *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
+ se.info.type = posTypeStep;
+ se.info.next.step = 0; // entry no.
+ break;
+
+ // cannot appear
+ case PAP:
+ case AP:
+ case AP_STACK:
+ case TSO:
+ case STACK:
+ case IND_STATIC:
+ // stack objects
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case UNDERFLOW_FRAME:
+ case STOP_FRAME:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_BIG:
+ // invalid objects
+ case IND:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object *c in push(): %d", get_itbl(c)->type);
+ return;
+ }
+
+ // se.cp has to be initialized when type==posTypeFresh. We don't do that
+ // here though. So type must be !=posTypeFresh.
+ ASSERT(se.info.type != posTypeFresh);
+
+ pushStackElement(ts, &se);
+}
+
+/**
+ * popStackElement(): Remove a depleted stackElement from the top of the
+ * traversal work-stack.
+ *
+ * Invariants:
+ * stackTop cannot be equal to stackLimit unless the whole stack is
+ * empty, in which case popStackElement() is not allowed.
+ */
+static void
+popStackElement(traverseState *ts) {
+ debug("popStackElement(): stackTop = 0x%x\n", ts->stackTop);
+
+ ASSERT(ts->stackTop != ts->stackLimit);
+ ASSERT(!isEmptyWorkStack(ts));
+
+ // <= (instead of <) is wrong!
+ if (ts->stackTop + 1 < ts->stackLimit) {
+ ts->stackTop++;
+
+ ts->stackSize--;
+ if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
+ ASSERT(ts->stackSize >= 0);
+ debug("stackSize = (--) %d\n", ts->stackSize);
+
+ return;
+ }
+
+ bdescr *pbd; // Previous Block Descriptor
+
+ debug("popStackElement() to the previous stack.\n");
+
+ ASSERT(ts->stackTop + 1 == ts->stackLimit);
+ ASSERT(ts->stackBottom == (stackElement *)ts->currentStack->start);
+
+ if (ts->firstStack == ts->currentStack) {
+ // The stack is completely empty.
+ ts->stackTop++;
+ ASSERT(ts->stackTop == ts->stackLimit);
+
+ ts->stackSize--;
+ if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
+ ASSERT(ts->stackSize >= 0);
+ debug("stackSize = %d\n", ts->stackSize);
+
+ return;
+ }
+
+ // currentStack->free is updated when the active stack is switched back
+ // to the previous stack.
+ ts->currentStack->free = (StgPtr)ts->stackLimit;
+
+ // find the previous block descriptor
+ pbd = ts->currentStack->u.back;
+ ASSERT(pbd != NULL);
+
+ returnToOldStack(ts, pbd);
+
+ ts->stackSize--;
+ if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
+ ASSERT(ts->stackSize >= 0);
+ debug("stackSize = %d\n", ts->stackSize);
+}
+
+/**
+ * Finds the next object to be considered for retainer profiling and store
+ * its pointer to *c.
+ *
+ * If the unprocessed object was stored in the stack (posTypeFresh), the
+ * this object is returned as-is. Otherwise Test if the topmost stack
+ * element indicates that more objects are left,
+ * and if so, retrieve the first object and store its pointer to *c. Also,
+ * set *cp and *data appropriately, both of which are stored in the stack
+ * element. The topmost stack element then is overwritten so as for it to now
+ * denote the next object.
+ *
+ * If the topmost stack element indicates no more objects are left, pop
+ * off the stack element until either an object can be retrieved or
+ * the work-stack becomes empty, indicated by true returned by
+ * isEmptyWorkStack(), in which case *c is set to NULL.
+ *
+ * Note:
+ *
+ * It is okay to call this function even when the work-stack is empty.
+ */
+STATIC_INLINE void
+traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data)
+{
+ stackElement *se;
+
+ debug("traversePop(): stackTop = 0x%x\n", ts->stackTop);
+
+ // Is this the last internal element? If so instead of modifying the current
+ // stackElement in place we actually remove it from the stack.
+ bool last = false;
+
+ do {
+ if (isEmptyWorkStack(ts)) {
+ *c = NULL;
+ return;
+ }
+
+ // Note: Below every `break`, where the loop condition is true, must be
+ // accompanied by a popStackElement() otherwise this is an infinite
+ // loop.
+ se = ts->stackTop;
+
+ // If this is a top-level element, you should pop that out.
+ if (se->info.type == posTypeFresh) {
+ *cp = se->cp;
+ *c = se->c;
+ *data = se->data;
+ popStackElement(ts);
+ return;
+ }
+
+ // Note: The first ptr of all of these was already returned as
+ // *fist_child in push(), so we always start with the second field.
+ switch (get_itbl(se->c)->type) {
+ // two children (fixed), no SRT
+ // nothing in se.info
+ case CONSTR_2_0:
+ *c = se->c->payload[1];
+ last = true;
+ goto out;
+
+ // three children (fixed), no SRT
+ // need to push a stackElement
+ case MVAR_CLEAN:
+ case MVAR_DIRTY:
+ if (se->info.next.step == 2) {
+ *c = (StgClosure *)((StgMVar *)se->c)->tail;
+ se->info.next.step++; // move to the next step
+ // no popStackElement
+ } else {
+ *c = ((StgMVar *)se->c)->value;
+ last = true;
+ }
+ goto out;
+
+ // three children (fixed), no SRT
+ case WEAK:
+ if (se->info.next.step == 2) {
+ *c = ((StgWeak *)se->c)->value;
+ se->info.next.step++;
+ // no popStackElement
+ } else {
+ *c = ((StgWeak *)se->c)->finalizer;
+ last = true;
+ }
+ goto out;
+
+ case TREC_CHUNK: {
+ // These are pretty complicated: we have N entries, each
+ // of which contains 3 fields that we want to follow. So
+ // we divide the step counter: the 2 low bits indicate
+ // which field, and the rest of the bits indicate the
+ // entry number (starting from zero).
+ TRecEntry *entry;
+ uint32_t entry_no = se->info.next.step >> 2;
+ uint32_t field_no = se->info.next.step & 3;
+ if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
+ *c = NULL;
+ popStackElement(ts);
+ break; // this breaks out of the switch not the loop
+ }
+ entry = &((StgTRecChunk *)se->c)->entries[entry_no];
+ if (field_no == 0) {
+ *c = (StgClosure *)entry->tvar;
+ } else if (field_no == 1) {
+ *c = entry->expected_value;
+ } else {
+ *c = entry->new_value;
+ }
+ se->info.next.step++;
+ goto out;
+ }
+
+ case TVAR:
+ case CONSTR:
+ case PRIM:
+ case MUT_PRIM:
+ case BCO:
+ // StgMutArrPtr.ptrs, no SRT
+ case MUT_ARR_PTRS_CLEAN:
+ case MUT_ARR_PTRS_DIRTY:
+ case MUT_ARR_PTRS_FROZEN_CLEAN:
+ case MUT_ARR_PTRS_FROZEN_DIRTY:
+ case SMALL_MUT_ARR_PTRS_CLEAN:
+ case SMALL_MUT_ARR_PTRS_DIRTY:
+ case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
+ case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
+ *c = find_ptrs(&se->info);
+ if (*c == NULL) {
+ popStackElement(ts);
+ break; // this breaks out of the switch not the loop
+ }
+ goto out;
+
+ // layout.payload.ptrs, SRT
+ case FUN: // always a heap object
+ case FUN_STATIC:
+ case FUN_2_0:
+ if (se->info.type == posTypePtrs) {
+ *c = find_ptrs(&se->info);
+ if (*c != NULL) {
+ goto out;
+ }
+ init_srt_fun(&se->info, get_fun_itbl(se->c));
+ }
+ goto do_srt;
+
+ case THUNK:
+ case THUNK_2_0:
+ if (se->info.type == posTypePtrs) {
+ *c = find_ptrs(&se->info);
+ if (*c != NULL) {
+ goto out;
+ }
+ init_srt_thunk(&se->info, get_thunk_itbl(se->c));
+ }
+ goto do_srt;
+
+ // SRT
+ do_srt:
+ case THUNK_STATIC:
+ case FUN_0_1:
+ case FUN_0_2:
+ case THUNK_0_1:
+ case THUNK_0_2:
+ case FUN_1_0:
+ case FUN_1_1:
+ case THUNK_1_0:
+ case THUNK_1_1:
+ *c = find_srt(&se->info);
+ if(*c == NULL) {
+ popStackElement(ts);
+ break; // this breaks out of the switch not the loop
+ }
+ goto out;
+
+ // no child (fixed), no SRT
+ case CONSTR_0_1:
+ case CONSTR_0_2:
+ case ARR_WORDS:
+ // one child (fixed), no SRT
+ case MUT_VAR_CLEAN:
+ case MUT_VAR_DIRTY:
+ case THUNK_SELECTOR:
+ case CONSTR_1_1:
+ // cannot appear
+ case PAP:
+ case AP:
+ case AP_STACK:
+ case TSO:
+ case STACK:
+ case IND_STATIC:
+ case CONSTR_NOCAF:
+ // stack objects
+ case UPDATE_FRAME:
+ case CATCH_FRAME:
+ case UNDERFLOW_FRAME:
+ case STOP_FRAME:
+ case RET_BCO:
+ case RET_SMALL:
+ case RET_BIG:
+ // invalid objects
+ case IND:
+ case INVALID_OBJECT:
+ default:
+ barf("Invalid object *c in traversePop(): %d", get_itbl(se->c)->type);
+ return;
+ }
+ } while (*c == NULL);
+
+out:
+
+ ASSERT(*c != NULL);
+
+ *cp = se->c;
+ *data = se->data;
+
+ if(last)
+ popStackElement(ts);
+
+ return;
+
+}
+
+/**
+ * Make sure a closure's profiling data is initialized to zero if it does not
+ * conform to the current value of the flip bit, returns true in this case.
+ *
+ * See Note [Profiling heap traversal visited bit].
+ */
+bool
+traverseMaybeInitClosureData(StgClosure *c)
+{
+ if (!isTravDataValid(c)) {
+ setTravDataToZero(c);
+ return true;
+ }
+ return false;
+}
+
+/* -----------------------------------------------------------------------------
+ Call traversePushClosure for each of the closures covered by a large bitmap.
+ -------------------------------------------------------------------------- */
+
+static void
+traverseLargeBitmap (traverseState *ts, StgPtr p, StgLargeBitmap *large_bitmap,
+ uint32_t size, StgClosure *c, stackData data)
+{
+ uint32_t i, b;
+ StgWord bitmap;
+
+ b = 0;
+ bitmap = large_bitmap->bitmap[b];
+ for (i = 0; i < size; ) {
+ if ((bitmap & 1) == 0) {
+ traversePushClosure(ts, (StgClosure *)*p, c, data);
+ }
+ i++;
+ p++;
+ if (i % BITS_IN(W_) == 0) {
+ b++;
+ bitmap = large_bitmap->bitmap[b];
+ } else {
+ bitmap = bitmap >> 1;
+ }
+ }
+}
+
+STATIC_INLINE StgPtr
+traverseSmallBitmap (traverseState *ts, StgPtr p, uint32_t size, StgWord bitmap,
+ StgClosure *c, stackData data)
+{
+ while (size > 0) {
+ if ((bitmap & 1) == 0) {
+ traversePushClosure(ts, (StgClosure *)*p, c, data);
+ }
+ p++;
+ bitmap = bitmap >> 1;
+ size--;
+ }
+ return p;
+}
+
+/**
+ * traversePushStack(ts, cp, data, stackStart, stackEnd) pushes all the objects
+ * in the STG stack-chunk from stackStart to stackEnd onto the traversal
+ * work-stack with 'c' and 'data' being their parent and associated data,
+ * respectively.
+ *
+ * Invariants:
+ *
+ * *cp is one of the following: TSO, AP_STACK.
+ *
+ * stackStart < stackEnd.
+ *
+ * If *c is TSO, its state is not ThreadComplete,or ThreadKilled,
+ * which means that its stack is ready to process.
+ *
+ * Note:
+ *
+ * This code was almost plagiarzied from GC.c! For each pointer,
+ * traversePushClosure() is invoked instead of evacuate().
+ */
+static void
+traversePushStack(traverseState *ts, StgClosure *cp, stackData data,
+ StgPtr stackStart, StgPtr stackEnd)
+{
+ StgPtr p;
+ const StgRetInfoTable *info;
+ StgWord bitmap;
+ uint32_t size;
+
+ ASSERT(get_itbl(cp)->type == STACK);
+
+ p = stackStart;
+ while (p < stackEnd) {
+ info = get_ret_itbl((StgClosure *)p);
+
+ switch(info->i.type) {
+
+ case UPDATE_FRAME:
+ traversePushClosure(ts, ((StgUpdateFrame *)p)->updatee, cp, data);
+ p += sizeofW(StgUpdateFrame);
+ continue;
+
+ case UNDERFLOW_FRAME:
+ case STOP_FRAME:
+ case CATCH_FRAME:
+ case CATCH_STM_FRAME:
+ case CATCH_RETRY_FRAME:
+ case ATOMICALLY_FRAME:
+ case RET_SMALL:
+ bitmap = BITMAP_BITS(info->i.layout.bitmap);
+ size = BITMAP_SIZE(info->i.layout.bitmap);
+ p++;
+ p = traverseSmallBitmap(ts, p, size, bitmap, cp, data);
+
+ follow_srt:
+ if (info->i.srt) {
+ traversePushClosure(ts, GET_SRT(info), cp, data);
+ }
+ continue;
+
+ case RET_BCO: {
+ StgBCO *bco;
+
+ p++;
+ traversePushClosure(ts, (StgClosure*)*p, cp, data);
+ bco = (StgBCO *)*p;
+ p++;
+ size = BCO_BITMAP_SIZE(bco);
+ traverseLargeBitmap(ts, p, BCO_BITMAP(bco), size, cp, data);
+ p += size;
+ continue;
+ }
+
+ // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
+ case RET_BIG:
+ size = GET_LARGE_BITMAP(&info->i)->size;
+ p++;
+ traverseLargeBitmap(ts, p, GET_LARGE_BITMAP(&info->i),
+ size, cp, data);
+ p += size;
+ // and don't forget to follow the SRT
+ goto follow_srt;
+
+ case RET_FUN: {
+ StgRetFun *ret_fun = (StgRetFun *)p;
+ const StgFunInfoTable *fun_info;
+
+ traversePushClosure(ts, ret_fun->fun, cp, data);
+ fun_info = get_fun_itbl(UNTAG_CONST_CLOSURE(ret_fun->fun));
+
+ p = (P_)&ret_fun->payload;
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+ size = BITMAP_SIZE(fun_info->f.b.bitmap);
+ p = traverseSmallBitmap(ts, p, size, bitmap, cp, data);
+ break;
+ case ARG_GEN_BIG:
+ size = GET_FUN_LARGE_BITMAP(fun_info)->size;
+ traverseLargeBitmap(ts, p, GET_FUN_LARGE_BITMAP(fun_info),
+ size, cp, data);
+ p += size;
+ break;
+ default:
+ bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
+ p = traverseSmallBitmap(ts, p, size, bitmap, cp, data);
+ break;
+ }
+ goto follow_srt;
+ }
+
+ default:
+ barf("Invalid object found in traversePushStack(): %d",
+ (int)(info->i.type));
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------------
+ * Call traversePushClosure for each of the children of a PAP/AP
+ * ------------------------------------------------------------------------- */
+
+STATIC_INLINE StgPtr
+traversePAP (traverseState *ts,
+ StgClosure *pap, /* NOT tagged */
+ stackData data,
+ StgClosure *fun, /* tagged */
+ StgClosure** payload, StgWord n_args)
+{
+ StgPtr p;
+ StgWord bitmap;
+ const StgFunInfoTable *fun_info;
+
+ traversePushClosure(ts, fun, pap, data);
+ fun = UNTAG_CLOSURE(fun);
+ fun_info = get_fun_itbl(fun);
+ ASSERT(fun_info->i.type != PAP);
+
+ p = (StgPtr)payload;
+
+ switch (fun_info->f.fun_type) {
+ case ARG_GEN:
+ bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
+ p = traverseSmallBitmap(ts, p, n_args, bitmap,
+ pap, data);
+ break;
+ case ARG_GEN_BIG:
+ traverseLargeBitmap(ts, p, GET_FUN_LARGE_BITMAP(fun_info),
+ n_args, pap, data);
+ p += n_args;
+ break;
+ case ARG_BCO:
+ traverseLargeBitmap(ts, (StgPtr)payload, BCO_BITMAP(fun),
+ n_args, pap, data);
+ p += n_args;
+ break;
+ default:
+ bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
+ p = traverseSmallBitmap(ts, p, n_args, bitmap, pap, data);
+ break;
+ }
+ return p;
+}
+
+static void
+resetMutableObjects(void)
+{
+ uint32_t g, n;
+ bdescr *bd;
+ StgPtr ml;
+
+ // The following code resets the 'trav' field of each unvisited mutable
+ // object.
+ for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
+ // NOT true: even G0 has a block on its mutable list
+ // ASSERT(g != 0 || (generations[g].mut_list == NULL));
+
+ // Traversing through mut_list is necessary
+ // because we can find MUT_VAR objects which have not been
+ // visited during heap traversal.
+ for (n = 0; n < n_capabilities; n++) {
+ for (bd = capabilities[n]->mut_lists[g]; bd != NULL; bd = bd->link) {
+ for (ml = bd->start; ml < bd->free; ml++) {
+
+ traverseMaybeInitClosureData((StgClosure *)*ml);
+ }
+ }
+ }
+ }
+}
+
+/**
+ * Traverse all closures on the traversal work-stack, calling 'visit_cb' on each
+ * closure. See 'visitClosure_cb' for details. This function flips the 'flip'
+ * bit and hence every closure's profiling data will be reset to zero upon
+ * visiting. See Note [Profiling heap traversal visited bit].
+ */
+void
+traverseWorkStack(traverseState *ts, visitClosure_cb visit_cb)
+{
+ // first_child = first child of c
+ StgClosure *c, *cp, *first_child;
+ stackData data, child_data;
+ StgWord typeOfc;
+
+ // Now we flip the flip bit.
+ flip = flip ^ 1;
+
+ // c = Current closure (possibly tagged)
+ // cp = Current closure's Parent (NOT tagged)
+ // data = current closures' associated data (NOT tagged)
+ // data_out = data to associate with current closure's children
+
+loop:
+ traversePop(ts, &c, &cp, &data);
+
+ if (c == NULL) {
+ debug("maxStackSize= %d\n", ts->maxStackSize);
+ resetMutableObjects();
+ return;
+ }
+inner_loop:
+ c = UNTAG_CLOSURE(c);
+
+ typeOfc = get_itbl(c)->type;
+
+ // special cases
+ switch (typeOfc) {
+ case TSO:
+ if (((StgTSO *)c)->what_next == ThreadComplete ||
+ ((StgTSO *)c)->what_next == ThreadKilled) {
+ debug("ThreadComplete or ThreadKilled encountered in traverseWorkStack()\n");
+ goto loop;
+ }
+ break;
+
+ case IND_STATIC:
+ // We just skip IND_STATIC, so it's never visited.
+ c = ((StgIndStatic *)c)->indirectee;
+ goto inner_loop;
+
+ case CONSTR_NOCAF:
+ // static objects with no pointers out, so goto loop.
+
+ // It is not just enough not to visit *c; it is
+ // mandatory because CONSTR_NOCAF are not reachable from
+ // scavenged_static_objects, the list from which is assumed to traverse
+ // all static objects after major garbage collections.
+ goto loop;
+
+ case THUNK_STATIC:
+ if (get_itbl(c)->srt == 0) {
+ // No need to visit *c; no dynamic objects are reachable from it.
+ //
+ // Static objects: if we traverse all the live closures,
+ // including static closures, during each heap census then
+ // we will observe that some static closures appear and
+ // disappear. eg. a closure may contain a pointer to a
+ // static function 'f' which is not otherwise reachable
+ // (it doesn't indirectly point to any CAFs, so it doesn't
+ // appear in any SRTs), so we would find 'f' during
+ // traversal. However on the next sweep there may be no
+ // closures pointing to 'f'.
+ //
+ // We must therefore ignore static closures whose SRT is
+ // empty, because these are exactly the closures that may
+ // "appear". A closure with a non-empty SRT, and which is
+ // still required, will always be reachable.
+ //
+ // But what about CONSTR? Surely these may be able
+ // to appear, and they don't have SRTs, so we can't
+ // check. So for now, we're calling
+ // resetStaticObjectForProfiling() from the
+ // garbage collector to reset the retainer sets in all the
+ // reachable static objects.
+ goto loop;
+ }
+ /* fall-thru */
+
+ case FUN_STATIC: {
+ const StgInfoTable *info = get_itbl(c);
+ if (info->srt == 0 && info->layout.payload.ptrs == 0) {
+ goto loop;
+ } else {
+ break;
+ }
+ }
+
+ default:
+ break;
+ }
+
+ // If this is the first visit to c, initialize its data.
+ bool first_visit = traverseMaybeInitClosureData(c);
+ bool traverse_children
+ = visit_cb(c, cp, data, first_visit, (stackData*)&child_data);
+ if(!traverse_children)
+ goto loop;
+
+ // process child
+
+ // Special case closures: we process these all in one go rather
+ // than attempting to save the current position, because doing so
+ // would be hard.
+ switch (typeOfc) {
+ case STACK:
+ traversePushStack(ts, c, child_data,
+ ((StgStack *)c)->sp,
+ ((StgStack *)c)->stack + ((StgStack *)c)->stack_size);
+ goto loop;
+
+ case TSO:
+ {
+ StgTSO *tso = (StgTSO *)c;
+
+ traversePushClosure(ts, (StgClosure *) tso->stackobj, c, child_data);
+ traversePushClosure(ts, (StgClosure *) tso->blocked_exceptions, c, child_data);
+ traversePushClosure(ts, (StgClosure *) tso->bq, c, child_data);
+ traversePushClosure(ts, (StgClosure *) tso->trec, c, child_data);
+ if ( tso->why_blocked == BlockedOnMVar
+ || tso->why_blocked == BlockedOnMVarRead
+ || tso->why_blocked == BlockedOnBlackHole
+ || tso->why_blocked == BlockedOnMsgThrowTo
+ ) {
+ traversePushClosure(ts, tso->block_info.closure, c, child_data);
+ }
+ goto loop;
+ }
+
+ case BLOCKING_QUEUE:
+ {
+ StgBlockingQueue *bq = (StgBlockingQueue *)c;
+ traversePushClosure(ts, (StgClosure *) bq->link, c, child_data);
+ traversePushClosure(ts, (StgClosure *) bq->bh, c, child_data);
+ traversePushClosure(ts, (StgClosure *) bq->owner, c, child_data);
+ goto loop;
+ }
+
+ case PAP:
+ {
+ StgPAP *pap = (StgPAP *)c;
+ traversePAP(ts, c, child_data, pap->fun, pap->payload, pap->n_args);
+ goto loop;
+ }
+
+ case AP:
+ {
+ StgAP *ap = (StgAP *)c;
+ traversePAP(ts, c, child_data, ap->fun, ap->payload, ap->n_args);
+ goto loop;
+ }
+
+ case AP_STACK:
+ traversePushClosure(ts, ((StgAP_STACK *)c)->fun, c, child_data);
+ traversePushStack(ts, c, child_data,
+ (StgPtr)((StgAP_STACK *)c)->payload,
+ (StgPtr)((StgAP_STACK *)c)->payload +
+ ((StgAP_STACK *)c)->size);
+ goto loop;
+ }
+
+ traversePushChildren(ts, c, child_data, &first_child);
+
+ // If first_child is null, c has no child.
+ // If first_child is not null, the top stack element points to the next
+ // object. traversePushChildren() may or may not push a stackElement on the
+ // stack.
+ if (first_child == NULL)
+ goto loop;
+
+ // (c, cp, data) = (first_child, c, child_data)
+ data = child_data;
+ cp = c;
+ c = first_child;
+ goto inner_loop;
+}
+
+/* -----------------------------------------------------------------------------
+ * Traverse all static objects for which we compute retainer sets,
+ * and reset their rs fields to NULL, which is accomplished by
+ * invoking traverseMaybeInitClosureData(). This function must be called
+ * before zeroing all objects reachable from scavenged_static_objects
+ * in the case of major garbage collections. See GarbageCollect() in
+ * GC.c.
+ * Note:
+ * The mut_once_list of the oldest generation must also be traversed?
+ * Why? Because if the evacuation of an object pointed to by a static
+ * indirection object fails, it is put back to the mut_once_list of
+ * the oldest generation.
+ * However, this is not necessary because any static indirection objects
+ * are just traversed through to reach dynamic objects. In other words,
+ * they are not taken into consideration in computing retainer sets.
+ *
+ * SDM (20/7/2011): I don't think this is doing anything sensible,
+ * because it happens before retainerProfile() and at the beginning of
+ * retainerProfil() we change the sense of 'flip'. So all of the
+ * calls to traverseMaybeInitClosureData() here are initialising retainer sets
+ * with the wrong flip. Also, I don't see why this is necessary. I
+ * added a traverseMaybeInitClosureData() call to retainRoot(), and that seems
+ * to have fixed the assertion failure in retainerSetOf() I was
+ * encountering.
+ * -------------------------------------------------------------------------- */
+void
+resetStaticObjectForProfiling( StgClosure *static_objects )
+{
+ uint32_t count = 0;
+ StgClosure *p;
+
+ p = static_objects;
+ while (p != END_OF_STATIC_OBJECT_LIST) {
+ p = UNTAG_STATIC_LIST_PTR(p);
+ count++;
+
+ switch (get_itbl(p)->type) {
+ case IND_STATIC:
+ // Since we do not compute the retainer set of any
+ // IND_STATIC object, we don't have to reset its retainer
+ // field.
+ p = (StgClosure*)*IND_STATIC_LINK(p);
+ break;
+ case THUNK_STATIC:
+ traverseMaybeInitClosureData(p);
+ p = (StgClosure*)*THUNK_STATIC_LINK(p);
+ break;
+ case FUN_STATIC:
+ case CONSTR:
+ case CONSTR_1_0:
+ case CONSTR_2_0:
+ case CONSTR_1_1:
+ case CONSTR_NOCAF:
+ traverseMaybeInitClosureData(p);
+ p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
+ break;
+ default:
+ barf("resetStaticObjectForProfiling: %p (%lu)",
+ p, (unsigned long)get_itbl(p)->type);
+ break;
+ }
+ }
+
+ debug("count in scavenged_static_objects = %d\n", count);
+}
+
+#endif /* PROFILING */
diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in
index 7ce3c7f5aa..674566c0ad 100644
--- a/rts/rts.cabal.in
+++ b/rts/rts.cabal.in
@@ -430,6 +430,7 @@ library
Timer.c
TopHandler.c
Trace.c
+ TraverseHeap.c
WSDeque.c
Weak.c
eventlog/EventLog.c