diff options
author | Daniel Gröber <dxld@darkboxed.org> | 2019-07-04 05:11:09 +0200 |
---|---|---|
committer | Daniel Gröber <dxld@darkboxed.org> | 2019-09-22 15:18:10 +0200 |
commit | 383f9089eea7f9228513260ad0f7215938cd4b31 (patch) | |
tree | 16e0579f6f7f82fef9131c5be74f0e4895b60283 /rts | |
parent | eb29735e321e244c161a40cceadd41fcab820f84 (diff) | |
download | haskell-383f9089eea7f9228513260ad0f7215938cd4b31.tar.gz |
rts: Split heap traversal from retainer profiler
This finally moves the newly generalised heap traversal code from the
retainer profiler into it's own file.
Diffstat (limited to 'rts')
-rw-r--r-- | rts/RetainerProfile.c | 1353 | ||||
-rw-r--r-- | rts/TraverseHeap.c | 1371 | ||||
-rw-r--r-- | rts/rts.cabal.in | 1 |
3 files changed, 1372 insertions, 1353 deletions
diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c index 301f712e59..6f053c09c4 100644 --- a/rts/RetainerProfile.c +++ b/rts/RetainerProfile.c @@ -66,41 +66,6 @@ static uint32_t numObjectVisited; // total number of objects visited static uint32_t timesAnyObjectVisited; // number of times any objects are // visited -/** Note [Profiling heap traversal visited bit] - * - * If the RTS is compiled with profiling enabled StgProfHeader can be used by - * profiling code to store per-heap object information. - * - * The generic heap traversal code reserves the least significant bit of the - * largest members of the 'trav' union to decide whether we've already visited a - * given closure in the current pass or not. The rest of the field is free to be - * used by the calling profiler. - * - * By doing things this way we implicitly assume that the LSB of the largest - * field in the 'trav' union is insignificant. This is true at least for the - * word aligned pointers which the retainer profiler currently stores there and - * should be maintained by new users of the 'trav' union for example by shifting - * the real data up by one bit. - * - * Since we don't want to have to scan the entire heap a second time just to - * reset the per-object visitied bit before/after the real traversal we make the - * interpretation of this bit dependent on the value of a global variable, - * 'flip'. - * - * When the 'trav' bit is equal to the value of 'flip' the closure data is - * valid otherwise not (see isTravDataValid). We then invert the value of 'flip' - * on each heap traversal (see traverseWorkStack), in effect marking all - * closure's data as invalid at once. - * - * There are some complications with this approach, namely: static objects and - * mutable data. There we do just go over all existing objects to reset the bit - * manually. See 'resetStaticObjectForProfiling' and 'computeRetainerSet'. - */ -StgWord flip = 0; - -#define setTravDataToZero(c) \ - (c)->header.prof.hp.trav.lsb = flip - /* ----------------------------------------------------------------------------- * Retainer stack - header * Note: @@ -111,181 +76,8 @@ StgWord flip = 0; * all. * -------------------------------------------------------------------------- */ -typedef enum { - // Object with fixed layout. Keeps an information about that - // element was processed. (stackPos.next.step) - posTypeStep, - // Description of the pointers-first heap object. Keeps information - // about layout. (stackPos.next.ptrs) - posTypePtrs, - // Keeps SRT bitmap (stackPos.next.srt) - posTypeSRT, - // Keeps a new object that was not inspected yet. Keeps a parent - // element (stackPos.next.parent) - posTypeFresh -} nextPosType; - -typedef union { - // fixed layout or layout specified by a field in the closure - StgWord step; - - // layout.payload - struct { - // See StgClosureInfo in InfoTables.h - StgHalfWord pos; - StgHalfWord ptrs; - StgPtr payload; - } ptrs; - - // SRT - struct { - StgClosure *srt; - } srt; -} nextPos; - -/** - * Position pointer into a closure. Determines what the next element to return - * for a stackElement is. - */ -typedef struct { - nextPosType type; - nextPos next; -} stackPos; - -/** - * An element of the traversal work-stack. Besides the closure itself this also - * stores it's parent and associated data. - * - * When 'info.type == posTypeFresh' a 'stackElement' represents just one - * closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an - * offset into the children of 'c'. This is to support returning a closure's - * children one-by-one without pushing one element per child onto the stack. See - * traversePushChildren() and traversePop(). - * - */ -typedef struct stackElement_ { - stackPos info; - StgClosure *c; - StgClosure *cp; // parent of 'c'. Only used when info.type == posTypeFresh. - stackData data; -} stackElement; - traverseState g_retainerTraverseState; - -#if defined(DEBUG) -unsigned int g_traversalDebugLevel = 0; -static inline void debug(const char *s, ...) -{ - va_list ap; - - if(g_traversalDebugLevel == 0) - return; - - va_start(ap,s); - vdebugBelch(s, ap); - va_end(ap); -} -#else -#define debug(...) -#endif - -// number of blocks allocated for one stack -#define BLOCKS_IN_STACK 1 - -/* ----------------------------------------------------------------------------- - * Add a new block group to the stack. - * Invariants: - * currentStack->link == s. - * -------------------------------------------------------------------------- */ -STATIC_INLINE void -newStackBlock( traverseState *ts, bdescr *bd ) -{ - ts->currentStack = bd; - ts->stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks); - ts->stackBottom = (stackElement *)bd->start; - ts->stackLimit = (stackElement *)ts->stackTop; - bd->free = (StgPtr)ts->stackLimit; -} - -/* ----------------------------------------------------------------------------- - * Return to the previous block group. - * Invariants: - * s->link == currentStack. - * -------------------------------------------------------------------------- */ -STATIC_INLINE void -returnToOldStack( traverseState *ts, bdescr *bd ) -{ - ts->currentStack = bd; - ts->stackTop = (stackElement *)bd->free; - ts->stackBottom = (stackElement *)bd->start; - ts->stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks); - bd->free = (StgPtr)ts->stackLimit; -} - -/** - * Initializes the traversal work-stack. - */ -void -initializeTraverseStack( traverseState *ts ) -{ - if (ts->firstStack != NULL) { - freeChain(ts->firstStack); - } - - ts->firstStack = allocGroup(BLOCKS_IN_STACK); - ts->firstStack->link = NULL; - ts->firstStack->u.back = NULL; - - ts->stackSize = 0; - ts->maxStackSize = 0; - - newStackBlock(ts, ts->firstStack); -} - -/** - * Frees all the block groups in the traversal works-stack. - * - * Invariants: - * firstStack != NULL - */ -void -closeTraverseStack( traverseState *ts ) -{ - freeChain(ts->firstStack); - ts->firstStack = NULL; -} - -int -getTraverseStackMaxSize(traverseState *ts) -{ - return ts->maxStackSize; -} - -/* ----------------------------------------------------------------------------- - * Returns true if the whole stack is empty. - * -------------------------------------------------------------------------- */ -STATIC_INLINE bool -isEmptyWorkStack( traverseState *ts ) -{ - return (ts->firstStack == ts->currentStack) && ts->stackTop == ts->stackLimit; -} - -/* ----------------------------------------------------------------------------- - * Returns size of stack - * -------------------------------------------------------------------------- */ -W_ -traverseWorkStackBlocks(traverseState *ts) -{ - bdescr* bd; - W_ res = 0; - - for (bd = ts->firstStack; bd != NULL; bd = bd->link) - res += bd->blocks; - - return res; -} - W_ retainerStackBlocks(void) { @@ -293,648 +85,6 @@ retainerStackBlocks(void) } /* ----------------------------------------------------------------------------- - * Initializes *info from ptrs and payload. - * Invariants: - * payload[] begins with ptrs pointers followed by non-pointers. - * -------------------------------------------------------------------------- */ -STATIC_INLINE void -init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload ) -{ - info->type = posTypePtrs; - info->next.ptrs.pos = 0; - info->next.ptrs.ptrs = ptrs; - info->next.ptrs.payload = payload; -} - -/* ----------------------------------------------------------------------------- - * Find the next object from *info. - * -------------------------------------------------------------------------- */ -STATIC_INLINE StgClosure * -find_ptrs( stackPos *info ) -{ - if (info->next.ptrs.pos < info->next.ptrs.ptrs) { - return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++]; - } else { - return NULL; - } -} - -/* ----------------------------------------------------------------------------- - * Initializes *info from SRT information stored in *infoTable. - * -------------------------------------------------------------------------- */ -STATIC_INLINE void -init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable ) -{ - info->type = posTypeSRT; - if (infoTable->i.srt) { - info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable); - } else { - info->next.srt.srt = NULL; - } -} - -STATIC_INLINE void -init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable ) -{ - info->type = posTypeSRT; - if (infoTable->i.srt) { - info->next.srt.srt = (StgClosure*)GET_SRT(infoTable); - } else { - info->next.srt.srt = NULL; - } -} - -/* ----------------------------------------------------------------------------- - * Find the next object from *info. - * -------------------------------------------------------------------------- */ -STATIC_INLINE StgClosure * -find_srt( stackPos *info ) -{ - StgClosure *c; - if (info->type == posTypeSRT) { - c = info->next.srt.srt; - info->next.srt.srt = NULL; - return c; - } - - return NULL; -} - -/** - * Push a set of closures, represented by a single 'stackElement', onto the - * traversal work-stack. - */ -static void -pushStackElement(traverseState *ts, stackElement *se) -{ - bdescr *nbd; // Next Block Descriptor - if (ts->stackTop - 1 < ts->stackBottom) { - debug("pushStackElement() to the next stack.\n"); - - // currentStack->free is updated when the active stack is switched - // to the next stack. - ts->currentStack->free = (StgPtr)ts->stackTop; - - if (ts->currentStack->link == NULL) { - nbd = allocGroup(BLOCKS_IN_STACK); - nbd->link = NULL; - nbd->u.back = ts->currentStack; - ts->currentStack->link = nbd; - } else - nbd = ts->currentStack->link; - - newStackBlock(ts, nbd); - } - - // adjust stackTop (acutal push) - ts->stackTop--; - // If the size of stackElement was huge, we would better replace the - // following statement by either a memcpy() call or a switch statement - // on the type of the element. Currently, the size of stackElement is - // small enough (5 words) that this direct assignment seems to be enough. - *ts->stackTop = *se; - - ts->stackSize++; - if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize; - ASSERT(ts->stackSize >= 0); - debug("stackSize = %d\n", ts->stackSize); -} - -/** - * Push a single closure onto the traversal work-stack. - * - * cp - object's parent - * c - closure - * data - data associated with closure. - */ -inline void -traversePushClosure(traverseState *ts, StgClosure *c, StgClosure *cp, stackData data) { - stackElement se; - - se.c = c; - se.cp = cp; - se.data = data; - se.info.type = posTypeFresh; - - pushStackElement(ts, &se); -}; - -/** - * traversePushChildren() extracts the first child of 'c' in 'first_child' and - * conceptually pushes all remaining children of 'c' onto the traversal stack - * while associating 'data' with the pushed elements to be returned upon poping. - * - * If 'c' has no children, 'first_child' is set to NULL and nothing is pushed - * onto the stack. - * - * If 'c' has only one child, 'first_child' is set to that child and nothing is - * pushed onto the stack. - * - * Invariants: - * - * - 'c' is not any of TSO, AP, PAP, AP_STACK, which means that there cannot - * be any stack objects. - * - * Note: SRTs are considered to be children as well. - * - * Note: When pushing onto the stack we only really push one 'stackElement' - * representing all children onto the stack. See traversePop() - */ -STATIC_INLINE void -traversePushChildren(traverseState *ts, StgClosure *c, stackData data, StgClosure **first_child) -{ - stackElement se; - - debug("traversePushChildren(): stackTop = 0x%x\n", ts->stackTop); - - ASSERT(get_itbl(c)->type != TSO); - ASSERT(get_itbl(c)->type != AP_STACK); - - // - // fill in se - // - - se.c = c; - se.data = data; - // Note: se.cp ommitted on purpose, only traversePushClosure uses that. - - // fill in se.info - switch (get_itbl(c)->type) { - // no child, no SRT - case CONSTR_0_1: - case CONSTR_0_2: - case ARR_WORDS: - case COMPACT_NFDATA: - *first_child = NULL; - return; - - // one child (fixed), no SRT - case MUT_VAR_CLEAN: - case MUT_VAR_DIRTY: - *first_child = ((StgMutVar *)c)->var; - return; - case THUNK_SELECTOR: - *first_child = ((StgSelector *)c)->selectee; - return; - case BLACKHOLE: - *first_child = ((StgInd *)c)->indirectee; - return; - case CONSTR_1_0: - case CONSTR_1_1: - *first_child = c->payload[0]; - return; - - // For CONSTR_2_0 and MVAR, we use se.info.step to record the position - // of the next child. We do not write a separate initialization code. - // Also we do not have to initialize info.type; - - // two children (fixed), no SRT - // need to push a stackElement, but nothing to store in se.info - case CONSTR_2_0: - *first_child = c->payload[0]; // return the first pointer - se.info.type = posTypeStep; - se.info.next.step = 2; // 2 = second - break; - - // three children (fixed), no SRT - // need to push a stackElement - case MVAR_CLEAN: - case MVAR_DIRTY: - // head must be TSO and the head of a linked list of TSOs. - // Shoule it be a child? Seems to be yes. - *first_child = (StgClosure *)((StgMVar *)c)->head; - se.info.type = posTypeStep; - se.info.next.step = 2; // 2 = second - break; - - // three children (fixed), no SRT - case WEAK: - *first_child = ((StgWeak *)c)->key; - se.info.type = posTypeStep; - se.info.next.step = 2; - break; - - // layout.payload.ptrs, no SRT - case TVAR: - case CONSTR: - case CONSTR_NOCAF: - case PRIM: - case MUT_PRIM: - case BCO: - init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, - (StgPtr)c->payload); - *first_child = find_ptrs(&se.info); - if (*first_child == NULL) - return; // no child - break; - - // StgMutArrPtr.ptrs, no SRT - case MUT_ARR_PTRS_CLEAN: - case MUT_ARR_PTRS_DIRTY: - case MUT_ARR_PTRS_FROZEN_CLEAN: - case MUT_ARR_PTRS_FROZEN_DIRTY: - init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs, - (StgPtr)(((StgMutArrPtrs *)c)->payload)); - *first_child = find_ptrs(&se.info); - if (*first_child == NULL) - return; - break; - - // StgMutArrPtr.ptrs, no SRT - case SMALL_MUT_ARR_PTRS_CLEAN: - case SMALL_MUT_ARR_PTRS_DIRTY: - case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN: - case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY: - init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs, - (StgPtr)(((StgSmallMutArrPtrs *)c)->payload)); - *first_child = find_ptrs(&se.info); - if (*first_child == NULL) - return; - break; - - // layout.payload.ptrs, SRT - case FUN_STATIC: - case FUN: // *c is a heap object. - case FUN_2_0: - init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload); - *first_child = find_ptrs(&se.info); - if (*first_child == NULL) - // no child from ptrs, so check SRT - goto fun_srt_only; - break; - - case THUNK: - case THUNK_2_0: - init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, - (StgPtr)((StgThunk *)c)->payload); - *first_child = find_ptrs(&se.info); - if (*first_child == NULL) - // no child from ptrs, so check SRT - goto thunk_srt_only; - break; - - // 1 fixed child, SRT - case FUN_1_0: - case FUN_1_1: - *first_child = c->payload[0]; - ASSERT(*first_child != NULL); - init_srt_fun(&se.info, get_fun_itbl(c)); - break; - - case THUNK_1_0: - case THUNK_1_1: - *first_child = ((StgThunk *)c)->payload[0]; - ASSERT(*first_child != NULL); - init_srt_thunk(&se.info, get_thunk_itbl(c)); - break; - - case FUN_0_1: // *c is a heap object. - case FUN_0_2: - fun_srt_only: - init_srt_fun(&se.info, get_fun_itbl(c)); - *first_child = find_srt(&se.info); - if (*first_child == NULL) - return; // no child - break; - - // SRT only - case THUNK_STATIC: - ASSERT(get_itbl(c)->srt != 0); - /* fall-thru */ - case THUNK_0_1: - case THUNK_0_2: - thunk_srt_only: - init_srt_thunk(&se.info, get_thunk_itbl(c)); - *first_child = find_srt(&se.info); - if (*first_child == NULL) - return; // no child - break; - - case TREC_CHUNK: - *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk; - se.info.type = posTypeStep; - se.info.next.step = 0; // entry no. - break; - - // cannot appear - case PAP: - case AP: - case AP_STACK: - case TSO: - case STACK: - case IND_STATIC: - // stack objects - case UPDATE_FRAME: - case CATCH_FRAME: - case UNDERFLOW_FRAME: - case STOP_FRAME: - case RET_BCO: - case RET_SMALL: - case RET_BIG: - // invalid objects - case IND: - case INVALID_OBJECT: - default: - barf("Invalid object *c in push(): %d", get_itbl(c)->type); - return; - } - - // se.cp has to be initialized when type==posTypeFresh. We don't do that - // here though. So type must be !=posTypeFresh. - ASSERT(se.info.type != posTypeFresh); - - pushStackElement(ts, &se); -} - -/** - * popStackElement(): Remove a depleted stackElement from the top of the - * traversal work-stack. - * - * Invariants: - * stackTop cannot be equal to stackLimit unless the whole stack is - * empty, in which case popStackElement() is not allowed. - */ -static void -popStackElement(traverseState *ts) { - debug("popStackElement(): stackTop = 0x%x\n", ts->stackTop); - - ASSERT(ts->stackTop != ts->stackLimit); - ASSERT(!isEmptyWorkStack(ts)); - - // <= (instead of <) is wrong! - if (ts->stackTop + 1 < ts->stackLimit) { - ts->stackTop++; - - ts->stackSize--; - if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize; - ASSERT(ts->stackSize >= 0); - debug("stackSize = (--) %d\n", ts->stackSize); - - return; - } - - bdescr *pbd; // Previous Block Descriptor - - debug("popStackElement() to the previous stack.\n"); - - ASSERT(ts->stackTop + 1 == ts->stackLimit); - ASSERT(ts->stackBottom == (stackElement *)ts->currentStack->start); - - if (ts->firstStack == ts->currentStack) { - // The stack is completely empty. - ts->stackTop++; - ASSERT(ts->stackTop == ts->stackLimit); - - ts->stackSize--; - if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize; - ASSERT(ts->stackSize >= 0); - debug("stackSize = %d\n", ts->stackSize); - - return; - } - - // currentStack->free is updated when the active stack is switched back - // to the previous stack. - ts->currentStack->free = (StgPtr)ts->stackLimit; - - // find the previous block descriptor - pbd = ts->currentStack->u.back; - ASSERT(pbd != NULL); - - returnToOldStack(ts, pbd); - - ts->stackSize--; - if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize; - ASSERT(ts->stackSize >= 0); - debug("stackSize = %d\n", ts->stackSize); -} - -/** - * Finds the next object to be considered for retainer profiling and store - * its pointer to *c. - * - * If the unprocessed object was stored in the stack (posTypeFresh), the - * this object is returned as-is. Otherwise Test if the topmost stack - * element indicates that more objects are left, - * and if so, retrieve the first object and store its pointer to *c. Also, - * set *cp and *data appropriately, both of which are stored in the stack - * element. The topmost stack element then is overwritten so as for it to now - * denote the next object. - * - * If the topmost stack element indicates no more objects are left, pop - * off the stack element until either an object can be retrieved or - * the work-stack becomes empty, indicated by true returned by - * isEmptyWorkStack(), in which case *c is set to NULL. - * - * Note: - * - * It is okay to call this function even when the work-stack is empty. - */ -STATIC_INLINE void -traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data) -{ - stackElement *se; - - debug("traversePop(): stackTop = 0x%x\n", ts->stackTop); - - // Is this the last internal element? If so instead of modifying the current - // stackElement in place we actually remove it from the stack. - bool last = false; - - do { - if (isEmptyWorkStack(ts)) { - *c = NULL; - return; - } - - // Note: Below every `break`, where the loop condition is true, must be - // accompanied by a popStackElement() otherwise this is an infinite - // loop. - se = ts->stackTop; - - // If this is a top-level element, you should pop that out. - if (se->info.type == posTypeFresh) { - *cp = se->cp; - *c = se->c; - *data = se->data; - popStackElement(ts); - return; - } - - // Note: The first ptr of all of these was already returned as - // *fist_child in push(), so we always start with the second field. - switch (get_itbl(se->c)->type) { - // two children (fixed), no SRT - // nothing in se.info - case CONSTR_2_0: - *c = se->c->payload[1]; - last = true; - goto out; - - // three children (fixed), no SRT - // need to push a stackElement - case MVAR_CLEAN: - case MVAR_DIRTY: - if (se->info.next.step == 2) { - *c = (StgClosure *)((StgMVar *)se->c)->tail; - se->info.next.step++; // move to the next step - // no popStackElement - } else { - *c = ((StgMVar *)se->c)->value; - last = true; - } - goto out; - - // three children (fixed), no SRT - case WEAK: - if (se->info.next.step == 2) { - *c = ((StgWeak *)se->c)->value; - se->info.next.step++; - // no popStackElement - } else { - *c = ((StgWeak *)se->c)->finalizer; - last = true; - } - goto out; - - case TREC_CHUNK: { - // These are pretty complicated: we have N entries, each - // of which contains 3 fields that we want to follow. So - // we divide the step counter: the 2 low bits indicate - // which field, and the rest of the bits indicate the - // entry number (starting from zero). - TRecEntry *entry; - uint32_t entry_no = se->info.next.step >> 2; - uint32_t field_no = se->info.next.step & 3; - if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) { - *c = NULL; - popStackElement(ts); - break; // this breaks out of the switch not the loop - } - entry = &((StgTRecChunk *)se->c)->entries[entry_no]; - if (field_no == 0) { - *c = (StgClosure *)entry->tvar; - } else if (field_no == 1) { - *c = entry->expected_value; - } else { - *c = entry->new_value; - } - se->info.next.step++; - goto out; - } - - case TVAR: - case CONSTR: - case PRIM: - case MUT_PRIM: - case BCO: - // StgMutArrPtr.ptrs, no SRT - case MUT_ARR_PTRS_CLEAN: - case MUT_ARR_PTRS_DIRTY: - case MUT_ARR_PTRS_FROZEN_CLEAN: - case MUT_ARR_PTRS_FROZEN_DIRTY: - case SMALL_MUT_ARR_PTRS_CLEAN: - case SMALL_MUT_ARR_PTRS_DIRTY: - case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN: - case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY: - *c = find_ptrs(&se->info); - if (*c == NULL) { - popStackElement(ts); - break; // this breaks out of the switch not the loop - } - goto out; - - // layout.payload.ptrs, SRT - case FUN: // always a heap object - case FUN_STATIC: - case FUN_2_0: - if (se->info.type == posTypePtrs) { - *c = find_ptrs(&se->info); - if (*c != NULL) { - goto out; - } - init_srt_fun(&se->info, get_fun_itbl(se->c)); - } - goto do_srt; - - case THUNK: - case THUNK_2_0: - if (se->info.type == posTypePtrs) { - *c = find_ptrs(&se->info); - if (*c != NULL) { - goto out; - } - init_srt_thunk(&se->info, get_thunk_itbl(se->c)); - } - goto do_srt; - - // SRT - do_srt: - case THUNK_STATIC: - case FUN_0_1: - case FUN_0_2: - case THUNK_0_1: - case THUNK_0_2: - case FUN_1_0: - case FUN_1_1: - case THUNK_1_0: - case THUNK_1_1: - *c = find_srt(&se->info); - if(*c == NULL) { - popStackElement(ts); - break; // this breaks out of the switch not the loop - } - goto out; - - // no child (fixed), no SRT - case CONSTR_0_1: - case CONSTR_0_2: - case ARR_WORDS: - // one child (fixed), no SRT - case MUT_VAR_CLEAN: - case MUT_VAR_DIRTY: - case THUNK_SELECTOR: - case CONSTR_1_1: - // cannot appear - case PAP: - case AP: - case AP_STACK: - case TSO: - case STACK: - case IND_STATIC: - case CONSTR_NOCAF: - // stack objects - case UPDATE_FRAME: - case CATCH_FRAME: - case UNDERFLOW_FRAME: - case STOP_FRAME: - case RET_BCO: - case RET_SMALL: - case RET_BIG: - // invalid objects - case IND: - case INVALID_OBJECT: - default: - barf("Invalid object *c in traversePop(): %d", get_itbl(se->c)->type); - return; - } - } while (*c == NULL); - -out: - - ASSERT(*c != NULL); - - *cp = se->c; - *data = se->data; - - if(last) - popStackElement(ts); - - return; - -} - -/* ----------------------------------------------------------------------------- * RETAINER PROFILING ENGINE * -------------------------------------------------------------------------- */ @@ -954,22 +104,6 @@ endRetainerProfiling( void ) outputAllRetainerSet(prof_file); } -/** - * Make sure a closure's profiling data is initialized to zero if it does not - * conform to the current value of the flip bit, returns true in this case. - * - * See Note [Profiling heap traversal visited bit]. - */ -bool -traverseMaybeInitClosureData(StgClosure *c) -{ - if (!isTravDataValid(c)) { - setTravDataToZero(c); - return true; - } - return false; -} - /* ----------------------------------------------------------------------------- * Returns true if *c is a retainer. * In general the retainers are the objects that may be the roots of the @@ -1120,214 +254,6 @@ associate( StgClosure *c, RetainerSet *s ) RSET(c) = (RetainerSet *)((StgWord)s | flip); } -/* ----------------------------------------------------------------------------- - Call traversePushClosure for each of the closures covered by a large bitmap. - -------------------------------------------------------------------------- */ - -static void -traverseLargeBitmap (traverseState *ts, StgPtr p, StgLargeBitmap *large_bitmap, - uint32_t size, StgClosure *c, stackData data) -{ - uint32_t i, b; - StgWord bitmap; - - b = 0; - bitmap = large_bitmap->bitmap[b]; - for (i = 0; i < size; ) { - if ((bitmap & 1) == 0) { - traversePushClosure(ts, (StgClosure *)*p, c, data); - } - i++; - p++; - if (i % BITS_IN(W_) == 0) { - b++; - bitmap = large_bitmap->bitmap[b]; - } else { - bitmap = bitmap >> 1; - } - } -} - -STATIC_INLINE StgPtr -traverseSmallBitmap (traverseState *ts, StgPtr p, uint32_t size, StgWord bitmap, - StgClosure *c, stackData data) -{ - while (size > 0) { - if ((bitmap & 1) == 0) { - traversePushClosure(ts, (StgClosure *)*p, c, data); - } - p++; - bitmap = bitmap >> 1; - size--; - } - return p; -} - -/** - * traversePushStack(ts, cp, data, stackStart, stackEnd) pushes all the objects - * in the STG stack-chunk from stackStart to stackEnd onto the traversal - * work-stack with 'c' and 'data' being their parent and associated data, - * respectively. - * - * Invariants: - * - * *cp is one of the following: TSO, AP_STACK. - * - * stackStart < stackEnd. - * - * If *c is TSO, its state is not ThreadComplete,or ThreadKilled, - * which means that its stack is ready to process. - * - * Note: - * - * This code was almost plagiarzied from GC.c! For each pointer, - * traversePushClosure() is invoked instead of evacuate(). - */ -static void -traversePushStack(traverseState *ts, StgClosure *cp, stackData data, - StgPtr stackStart, StgPtr stackEnd) -{ - StgPtr p; - const StgRetInfoTable *info; - StgWord bitmap; - uint32_t size; - - ASSERT(get_itbl(cp)->type == STACK); - - p = stackStart; - while (p < stackEnd) { - info = get_ret_itbl((StgClosure *)p); - - switch(info->i.type) { - - case UPDATE_FRAME: - traversePushClosure(ts, ((StgUpdateFrame *)p)->updatee, cp, data); - p += sizeofW(StgUpdateFrame); - continue; - - case UNDERFLOW_FRAME: - case STOP_FRAME: - case CATCH_FRAME: - case CATCH_STM_FRAME: - case CATCH_RETRY_FRAME: - case ATOMICALLY_FRAME: - case RET_SMALL: - bitmap = BITMAP_BITS(info->i.layout.bitmap); - size = BITMAP_SIZE(info->i.layout.bitmap); - p++; - p = traverseSmallBitmap(ts, p, size, bitmap, cp, data); - - follow_srt: - if (info->i.srt) { - traversePushClosure(ts, GET_SRT(info), cp, data); - } - continue; - - case RET_BCO: { - StgBCO *bco; - - p++; - traversePushClosure(ts, (StgClosure*)*p, cp, data); - bco = (StgBCO *)*p; - p++; - size = BCO_BITMAP_SIZE(bco); - traverseLargeBitmap(ts, p, BCO_BITMAP(bco), size, cp, data); - p += size; - continue; - } - - // large bitmap (> 32 entries, or > 64 on a 64-bit machine) - case RET_BIG: - size = GET_LARGE_BITMAP(&info->i)->size; - p++; - traverseLargeBitmap(ts, p, GET_LARGE_BITMAP(&info->i), - size, cp, data); - p += size; - // and don't forget to follow the SRT - goto follow_srt; - - case RET_FUN: { - StgRetFun *ret_fun = (StgRetFun *)p; - const StgFunInfoTable *fun_info; - - traversePushClosure(ts, ret_fun->fun, cp, data); - fun_info = get_fun_itbl(UNTAG_CONST_CLOSURE(ret_fun->fun)); - - p = (P_)&ret_fun->payload; - switch (fun_info->f.fun_type) { - case ARG_GEN: - bitmap = BITMAP_BITS(fun_info->f.b.bitmap); - size = BITMAP_SIZE(fun_info->f.b.bitmap); - p = traverseSmallBitmap(ts, p, size, bitmap, cp, data); - break; - case ARG_GEN_BIG: - size = GET_FUN_LARGE_BITMAP(fun_info)->size; - traverseLargeBitmap(ts, p, GET_FUN_LARGE_BITMAP(fun_info), - size, cp, data); - p += size; - break; - default: - bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]); - size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]); - p = traverseSmallBitmap(ts, p, size, bitmap, cp, data); - break; - } - goto follow_srt; - } - - default: - barf("Invalid object found in traversePushStack(): %d", - (int)(info->i.type)); - } - } -} - -/* ---------------------------------------------------------------------------- - * Call traversePushClosure for each of the children of a PAP/AP - * ------------------------------------------------------------------------- */ - -STATIC_INLINE StgPtr -traversePAP (traverseState *ts, - StgClosure *pap, /* NOT tagged */ - stackData data, - StgClosure *fun, /* tagged */ - StgClosure** payload, StgWord n_args) -{ - StgPtr p; - StgWord bitmap; - const StgFunInfoTable *fun_info; - - traversePushClosure(ts, fun, pap, data); - fun = UNTAG_CLOSURE(fun); - fun_info = get_fun_itbl(fun); - ASSERT(fun_info->i.type != PAP); - - p = (StgPtr)payload; - - switch (fun_info->f.fun_type) { - case ARG_GEN: - bitmap = BITMAP_BITS(fun_info->f.b.bitmap); - p = traverseSmallBitmap(ts, p, n_args, bitmap, - pap, data); - break; - case ARG_GEN_BIG: - traverseLargeBitmap(ts, p, GET_FUN_LARGE_BITMAP(fun_info), - n_args, pap, data); - p += n_args; - break; - case ARG_BCO: - traverseLargeBitmap(ts, (StgPtr)payload, BCO_BITMAP(fun), - n_args, pap, data); - p += n_args; - break; - default: - bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]); - p = traverseSmallBitmap(ts, p, n_args, bitmap, pap, data); - break; - } - return p; -} - static bool retainVisitClosure( StgClosure *c, const StgClosure *cp, const stackData data, const bool first_visit, stackData *out_data ) { @@ -1408,219 +334,6 @@ retainVisitClosure( StgClosure *c, const StgClosure *cp, const stackData data, c return 1; } -static void -resetMutableObjects(void) -{ - uint32_t g, n; - bdescr *bd; - StgPtr ml; - - // The following code resets the 'trav' field of each unvisited mutable - // object. - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - // NOT true: even G0 has a block on its mutable list - // ASSERT(g != 0 || (generations[g].mut_list == NULL)); - - // Traversing through mut_list is necessary - // because we can find MUT_VAR objects which have not been - // visited during heap traversal. - for (n = 0; n < n_capabilities; n++) { - for (bd = capabilities[n]->mut_lists[g]; bd != NULL; bd = bd->link) { - for (ml = bd->start; ml < bd->free; ml++) { - - traverseMaybeInitClosureData((StgClosure *)*ml); - } - } - } - } -} - -/** - * Traverse all closures on the traversal work-stack, calling 'visit_cb' on each - * closure. See 'visitClosure_cb' for details. This function flips the 'flip' - * bit and hence every closure's profiling data will be reset to zero upon - * visiting. See Note [Profiling heap traversal visited bit]. - */ -void -traverseWorkStack(traverseState *ts, visitClosure_cb visit_cb) -{ - // first_child = first child of c - StgClosure *c, *cp, *first_child; - stackData data, child_data; - StgWord typeOfc; - - // Now we flip the flip bit. - flip = flip ^ 1; - - // c = Current closure (possibly tagged) - // cp = Current closure's Parent (NOT tagged) - // data = current closures' associated data (NOT tagged) - // data_out = data to associate with current closure's children - -loop: - traversePop(ts, &c, &cp, &data); - - if (c == NULL) { - debug("maxStackSize= %d\n", ts->maxStackSize); - resetMutableObjects(); - return; - } -inner_loop: - c = UNTAG_CLOSURE(c); - - typeOfc = get_itbl(c)->type; - - // special cases - switch (typeOfc) { - case TSO: - if (((StgTSO *)c)->what_next == ThreadComplete || - ((StgTSO *)c)->what_next == ThreadKilled) { - debug("ThreadComplete or ThreadKilled encountered in traverseWorkStack()\n"); - goto loop; - } - break; - - case IND_STATIC: - // We just skip IND_STATIC, so it's never visited. - c = ((StgIndStatic *)c)->indirectee; - goto inner_loop; - - case CONSTR_NOCAF: - // static objects with no pointers out, so goto loop. - - // It is not just enough not to visit *c; it is - // mandatory because CONSTR_NOCAF are not reachable from - // scavenged_static_objects, the list from which is assumed to traverse - // all static objects after major garbage collections. - goto loop; - - case THUNK_STATIC: - if (get_itbl(c)->srt == 0) { - // No need to visit *c; no dynamic objects are reachable from it. - // - // Static objects: if we traverse all the live closures, - // including static closures, during each heap census then - // we will observe that some static closures appear and - // disappear. eg. a closure may contain a pointer to a - // static function 'f' which is not otherwise reachable - // (it doesn't indirectly point to any CAFs, so it doesn't - // appear in any SRTs), so we would find 'f' during - // traversal. However on the next sweep there may be no - // closures pointing to 'f'. - // - // We must therefore ignore static closures whose SRT is - // empty, because these are exactly the closures that may - // "appear". A closure with a non-empty SRT, and which is - // still required, will always be reachable. - // - // But what about CONSTR? Surely these may be able - // to appear, and they don't have SRTs, so we can't - // check. So for now, we're calling - // resetStaticObjectForProfiling() from the - // garbage collector to reset the retainer sets in all the - // reachable static objects. - goto loop; - } - /* fall-thru */ - - case FUN_STATIC: { - const StgInfoTable *info = get_itbl(c); - if (info->srt == 0 && info->layout.payload.ptrs == 0) { - goto loop; - } else { - break; - } - } - - default: - break; - } - - // If this is the first visit to c, initialize its data. - bool first_visit = traverseMaybeInitClosureData(c); - bool traverse_children - = visit_cb(c, cp, data, first_visit, (stackData*)&child_data); - if(!traverse_children) - goto loop; - - // process child - - // Special case closures: we process these all in one go rather - // than attempting to save the current position, because doing so - // would be hard. - switch (typeOfc) { - case STACK: - traversePushStack(ts, c, child_data, - ((StgStack *)c)->sp, - ((StgStack *)c)->stack + ((StgStack *)c)->stack_size); - goto loop; - - case TSO: - { - StgTSO *tso = (StgTSO *)c; - - traversePushClosure(ts, (StgClosure *) tso->stackobj, c, child_data); - traversePushClosure(ts, (StgClosure *) tso->blocked_exceptions, c, child_data); - traversePushClosure(ts, (StgClosure *) tso->bq, c, child_data); - traversePushClosure(ts, (StgClosure *) tso->trec, c, child_data); - if ( tso->why_blocked == BlockedOnMVar - || tso->why_blocked == BlockedOnMVarRead - || tso->why_blocked == BlockedOnBlackHole - || tso->why_blocked == BlockedOnMsgThrowTo - ) { - traversePushClosure(ts, tso->block_info.closure, c, child_data); - } - goto loop; - } - - case BLOCKING_QUEUE: - { - StgBlockingQueue *bq = (StgBlockingQueue *)c; - traversePushClosure(ts, (StgClosure *) bq->link, c, child_data); - traversePushClosure(ts, (StgClosure *) bq->bh, c, child_data); - traversePushClosure(ts, (StgClosure *) bq->owner, c, child_data); - goto loop; - } - - case PAP: - { - StgPAP *pap = (StgPAP *)c; - traversePAP(ts, c, child_data, pap->fun, pap->payload, pap->n_args); - goto loop; - } - - case AP: - { - StgAP *ap = (StgAP *)c; - traversePAP(ts, c, child_data, ap->fun, ap->payload, ap->n_args); - goto loop; - } - - case AP_STACK: - traversePushClosure(ts, ((StgAP_STACK *)c)->fun, c, child_data); - traversePushStack(ts, c, child_data, - (StgPtr)((StgAP_STACK *)c)->payload, - (StgPtr)((StgAP_STACK *)c)->payload + - ((StgAP_STACK *)c)->size); - goto loop; - } - - traversePushChildren(ts, c, child_data, &first_child); - - // If first_child is null, c has no child. - // If first_child is not null, the top stack element points to the next - // object. traversePushChildren() may or may not push a stackElement on the - // stack. - if (first_child == NULL) - goto loop; - - // (c, cp, data) = (first_child, c, child_data) - data = child_data; - cp = c; - c = first_child; - goto inner_loop; -} - /** * Push every object reachable from *tl onto the traversal work stack. */ @@ -1684,72 +397,6 @@ computeRetainerSet( traverseState *ts ) } /* ----------------------------------------------------------------------------- - * Traverse all static objects for which we compute retainer sets, - * and reset their rs fields to NULL, which is accomplished by - * invoking traverseMaybeInitClosureData(). This function must be called - * before zeroing all objects reachable from scavenged_static_objects - * in the case of major garbage collections. See GarbageCollect() in - * GC.c. - * Note: - * The mut_once_list of the oldest generation must also be traversed? - * Why? Because if the evacuation of an object pointed to by a static - * indirection object fails, it is put back to the mut_once_list of - * the oldest generation. - * However, this is not necessary because any static indirection objects - * are just traversed through to reach dynamic objects. In other words, - * they are not taken into consideration in computing retainer sets. - * - * SDM (20/7/2011): I don't think this is doing anything sensible, - * because it happens before retainerProfile() and at the beginning of - * retainerProfil() we change the sense of 'flip'. So all of the - * calls to traverseMaybeInitClosureData() here are initialising retainer sets - * with the wrong flip. Also, I don't see why this is necessary. I - * added a traverseMaybeInitClosureData() call to retainRoot(), and that seems - * to have fixed the assertion failure in retainerSetOf() I was - * encountering. - * -------------------------------------------------------------------------- */ -void -resetStaticObjectForProfiling( StgClosure *static_objects ) -{ - uint32_t count = 0; - StgClosure *p; - - p = static_objects; - while (p != END_OF_STATIC_OBJECT_LIST) { - p = UNTAG_STATIC_LIST_PTR(p); - count++; - - switch (get_itbl(p)->type) { - case IND_STATIC: - // Since we do not compute the retainer set of any - // IND_STATIC object, we don't have to reset its retainer - // field. - p = (StgClosure*)*IND_STATIC_LINK(p); - break; - case THUNK_STATIC: - traverseMaybeInitClosureData(p); - p = (StgClosure*)*THUNK_STATIC_LINK(p); - break; - case FUN_STATIC: - case CONSTR: - case CONSTR_1_0: - case CONSTR_2_0: - case CONSTR_1_1: - case CONSTR_NOCAF: - traverseMaybeInitClosureData(p); - p = (StgClosure*)*STATIC_LINK(get_itbl(p), p); - break; - default: - barf("resetStaticObjectForProfiling: %p (%lu)", - p, (unsigned long)get_itbl(p)->type); - break; - } - } - - debug("count in scavenged_static_objects = %d\n", count); -} - -/* ----------------------------------------------------------------------------- * Perform retainer profiling. * N is the oldest generation being profilied, where the generations are * numbered starting at 0. diff --git a/rts/TraverseHeap.c b/rts/TraverseHeap.c new file mode 100644 index 0000000000..bf2584cef4 --- /dev/null +++ b/rts/TraverseHeap.c @@ -0,0 +1,1371 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2001,2019 + * Author: Sungwoo Park, Daniel Gröber + * + * Generalised profiling heap traversal. + * + * ---------------------------------------------------------------------------*/ + +#if defined(PROFILING) + +#include "PosixSource.h" +#include "Rts.h" +#include "sm/Storage.h" + +#include "TraverseHeap.h" + +/** Note [Profiling heap traversal visited bit] + * + * If the RTS is compiled with profiling enabled StgProfHeader can be used by + * profiling code to store per-heap object information. + * + * The generic heap traversal code reserves the least significant bit of the + * largest members of the 'trav' union to decide whether we've already visited a + * given closure in the current pass or not. The rest of the field is free to be + * used by the calling profiler. + * + * By doing things this way we implicitly assume that the LSB of the largest + * field in the 'trav' union is insignificant. This is true at least for the + * word aligned pointers which the retainer profiler currently stores there and + * should be maintained by new users of the 'trav' union for example by shifting + * the real data up by one bit. + * + * Since we don't want to have to scan the entire heap a second time just to + * reset the per-object visitied bit before/after the real traversal we make the + * interpretation of this bit dependent on the value of a global variable, + * 'flip'. + * + * When the 'trav' bit is equal to the value of 'flip' the closure data is + * valid otherwise not (see isTravDataValid). We then invert the value of 'flip' + * on each heap traversal (see traverseWorkStack), in effect marking all + * closure's data as invalid at once. + * + * There are some complications with this approach, namely: static objects and + * mutable data. There we do just go over all existing objects to reset the bit + * manually. See 'resetStaticObjectForProfiling' and 'resetMutableObjects'. + */ +StgWord flip = 0; + +#define setTravDataToZero(c) \ + (c)->header.prof.hp.trav.lsb = flip + +typedef enum { + // Object with fixed layout. Keeps an information about that + // element was processed. (stackPos.next.step) + posTypeStep, + // Description of the pointers-first heap object. Keeps information + // about layout. (stackPos.next.ptrs) + posTypePtrs, + // Keeps SRT bitmap (stackPos.next.srt) + posTypeSRT, + // Keeps a new object that was not inspected yet. Keeps a parent + // element (stackPos.next.parent) + posTypeFresh +} nextPosType; + +typedef union { + // fixed layout or layout specified by a field in the closure + StgWord step; + + // layout.payload + struct { + // See StgClosureInfo in InfoTables.h + StgHalfWord pos; + StgHalfWord ptrs; + StgPtr payload; + } ptrs; + + // SRT + struct { + StgClosure *srt; + } srt; +} nextPos; + +/** + * Position pointer into a closure. Determines what the next element to return + * for a stackElement is. + */ +typedef struct { + nextPosType type; + nextPos next; +} stackPos; + +/** + * An element of the traversal work-stack. Besides the closure itself this also + * stores it's parent and associated data. + * + * When 'info.type == posTypeFresh' a 'stackElement' represents just one + * closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an + * offset into the children of 'c'. This is to support returning a closure's + * children one-by-one without pushing one element per child onto the stack. See + * traversePushChildren() and traversePop(). + * + */ +typedef struct stackElement_ { + stackPos info; + StgClosure *c; + StgClosure *cp; // parent of 'c'. Only used when info.type == posTypeFresh. + stackData data; +} stackElement; + + +#if defined(DEBUG) +unsigned int g_traversalDebugLevel = 0; +static inline void debug(const char *s, ...) +{ + va_list ap; + + if(g_traversalDebugLevel == 0) + return; + + va_start(ap,s); + vdebugBelch(s, ap); + va_end(ap); +} +#else +#define debug(...) +#endif + +// number of blocks allocated for one stack +#define BLOCKS_IN_STACK 1 + +/* ----------------------------------------------------------------------------- + * Add a new block group to the stack. + * Invariants: + * currentStack->link == s. + * -------------------------------------------------------------------------- */ +STATIC_INLINE void +newStackBlock( traverseState *ts, bdescr *bd ) +{ + ts->currentStack = bd; + ts->stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks); + ts->stackBottom = (stackElement *)bd->start; + ts->stackLimit = (stackElement *)ts->stackTop; + bd->free = (StgPtr)ts->stackLimit; +} + +/* ----------------------------------------------------------------------------- + * Return to the previous block group. + * Invariants: + * s->link == currentStack. + * -------------------------------------------------------------------------- */ +STATIC_INLINE void +returnToOldStack( traverseState *ts, bdescr *bd ) +{ + ts->currentStack = bd; + ts->stackTop = (stackElement *)bd->free; + ts->stackBottom = (stackElement *)bd->start; + ts->stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks); + bd->free = (StgPtr)ts->stackLimit; +} + +/** + * Initializes the traversal work-stack. + */ +void +initializeTraverseStack( traverseState *ts ) +{ + if (ts->firstStack != NULL) { + freeChain(ts->firstStack); + } + + ts->firstStack = allocGroup(BLOCKS_IN_STACK); + ts->firstStack->link = NULL; + ts->firstStack->u.back = NULL; + + ts->stackSize = 0; + ts->maxStackSize = 0; + + newStackBlock(ts, ts->firstStack); +} + +/** + * Frees all the block groups in the traversal works-stack. + * + * Invariants: + * firstStack != NULL + */ +void +closeTraverseStack( traverseState *ts ) +{ + freeChain(ts->firstStack); + ts->firstStack = NULL; +} + +int +getTraverseStackMaxSize(traverseState *ts) +{ + return ts->maxStackSize; +} + +/* ----------------------------------------------------------------------------- + * Returns true if the whole stack is empty. + * -------------------------------------------------------------------------- */ +STATIC_INLINE bool +isEmptyWorkStack( traverseState *ts ) +{ + return (ts->firstStack == ts->currentStack) && ts->stackTop == ts->stackLimit; +} + +/* ----------------------------------------------------------------------------- + * Returns size of stack + * -------------------------------------------------------------------------- */ +W_ +traverseWorkStackBlocks(traverseState *ts) +{ + bdescr* bd; + W_ res = 0; + + for (bd = ts->firstStack; bd != NULL; bd = bd->link) + res += bd->blocks; + + return res; +} + +/* ----------------------------------------------------------------------------- + * Initializes *info from ptrs and payload. + * Invariants: + * payload[] begins with ptrs pointers followed by non-pointers. + * -------------------------------------------------------------------------- */ +STATIC_INLINE void +init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload ) +{ + info->type = posTypePtrs; + info->next.ptrs.pos = 0; + info->next.ptrs.ptrs = ptrs; + info->next.ptrs.payload = payload; +} + +/* ----------------------------------------------------------------------------- + * Find the next object from *info. + * -------------------------------------------------------------------------- */ +STATIC_INLINE StgClosure * +find_ptrs( stackPos *info ) +{ + if (info->next.ptrs.pos < info->next.ptrs.ptrs) { + return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++]; + } else { + return NULL; + } +} + +/* ----------------------------------------------------------------------------- + * Initializes *info from SRT information stored in *infoTable. + * -------------------------------------------------------------------------- */ +STATIC_INLINE void +init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable ) +{ + info->type = posTypeSRT; + if (infoTable->i.srt) { + info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable); + } else { + info->next.srt.srt = NULL; + } +} + +STATIC_INLINE void +init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable ) +{ + info->type = posTypeSRT; + if (infoTable->i.srt) { + info->next.srt.srt = (StgClosure*)GET_SRT(infoTable); + } else { + info->next.srt.srt = NULL; + } +} + +/* ----------------------------------------------------------------------------- + * Find the next object from *info. + * -------------------------------------------------------------------------- */ +STATIC_INLINE StgClosure * +find_srt( stackPos *info ) +{ + StgClosure *c; + if (info->type == posTypeSRT) { + c = info->next.srt.srt; + info->next.srt.srt = NULL; + return c; + } + + return NULL; +} + +/** + * Push a set of closures, represented by a single 'stackElement', onto the + * traversal work-stack. + */ +static void +pushStackElement(traverseState *ts, stackElement *se) +{ + bdescr *nbd; // Next Block Descriptor + if (ts->stackTop - 1 < ts->stackBottom) { + debug("pushStackElement() to the next stack.\n"); + + // currentStack->free is updated when the active stack is switched + // to the next stack. + ts->currentStack->free = (StgPtr)ts->stackTop; + + if (ts->currentStack->link == NULL) { + nbd = allocGroup(BLOCKS_IN_STACK); + nbd->link = NULL; + nbd->u.back = ts->currentStack; + ts->currentStack->link = nbd; + } else + nbd = ts->currentStack->link; + + newStackBlock(ts, nbd); + } + + // adjust stackTop (acutal push) + ts->stackTop--; + // If the size of stackElement was huge, we would better replace the + // following statement by either a memcpy() call or a switch statement + // on the type of the element. Currently, the size of stackElement is + // small enough (5 words) that this direct assignment seems to be enough. + *ts->stackTop = *se; + + ts->stackSize++; + if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize; + ASSERT(ts->stackSize >= 0); + debug("stackSize = %d\n", ts->stackSize); +} + +/** + * Push a single closure onto the traversal work-stack. + * + * cp - object's parent + * c - closure + * data - data associated with closure. + */ +inline void +traversePushClosure(traverseState *ts, StgClosure *c, StgClosure *cp, stackData data) { + stackElement se; + + se.c = c; + se.cp = cp; + se.data = data; + se.info.type = posTypeFresh; + + pushStackElement(ts, &se); +}; + +/** + * traversePushChildren() extracts the first child of 'c' in 'first_child' and + * conceptually pushes all remaining children of 'c' onto the traversal stack + * while associating 'data' with the pushed elements to be returned upon poping. + * + * If 'c' has no children, 'first_child' is set to NULL and nothing is pushed + * onto the stack. + * + * If 'c' has only one child, 'first_child' is set to that child and nothing is + * pushed onto the stack. + * + * Invariants: + * + * - 'c' is not any of TSO, AP, PAP, AP_STACK, which means that there cannot + * be any stack objects. + * + * Note: SRTs are considered to be children as well. + * + * Note: When pushing onto the stack we only really push one 'stackElement' + * representing all children onto the stack. See traversePop() + */ +STATIC_INLINE void +traversePushChildren(traverseState *ts, StgClosure *c, stackData data, StgClosure **first_child) +{ + stackElement se; + + debug("traversePushChildren(): stackTop = 0x%x\n", ts->stackTop); + + ASSERT(get_itbl(c)->type != TSO); + ASSERT(get_itbl(c)->type != AP_STACK); + + // + // fill in se + // + + se.c = c; + se.data = data; + // Note: se.cp ommitted on purpose, only traversePushClosure uses that. + + // fill in se.info + switch (get_itbl(c)->type) { + // no child, no SRT + case CONSTR_0_1: + case CONSTR_0_2: + case ARR_WORDS: + case COMPACT_NFDATA: + *first_child = NULL; + return; + + // one child (fixed), no SRT + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: + *first_child = ((StgMutVar *)c)->var; + return; + case THUNK_SELECTOR: + *first_child = ((StgSelector *)c)->selectee; + return; + case BLACKHOLE: + *first_child = ((StgInd *)c)->indirectee; + return; + case CONSTR_1_0: + case CONSTR_1_1: + *first_child = c->payload[0]; + return; + + // For CONSTR_2_0 and MVAR, we use se.info.step to record the position + // of the next child. We do not write a separate initialization code. + // Also we do not have to initialize info.type; + + // two children (fixed), no SRT + // need to push a stackElement, but nothing to store in se.info + case CONSTR_2_0: + *first_child = c->payload[0]; // return the first pointer + se.info.type = posTypeStep; + se.info.next.step = 2; // 2 = second + break; + + // three children (fixed), no SRT + // need to push a stackElement + case MVAR_CLEAN: + case MVAR_DIRTY: + // head must be TSO and the head of a linked list of TSOs. + // Shoule it be a child? Seems to be yes. + *first_child = (StgClosure *)((StgMVar *)c)->head; + se.info.type = posTypeStep; + se.info.next.step = 2; // 2 = second + break; + + // three children (fixed), no SRT + case WEAK: + *first_child = ((StgWeak *)c)->key; + se.info.type = posTypeStep; + se.info.next.step = 2; + break; + + // layout.payload.ptrs, no SRT + case TVAR: + case CONSTR: + case CONSTR_NOCAF: + case PRIM: + case MUT_PRIM: + case BCO: + init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, + (StgPtr)c->payload); + *first_child = find_ptrs(&se.info); + if (*first_child == NULL) + return; // no child + break; + + // StgMutArrPtr.ptrs, no SRT + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + case MUT_ARR_PTRS_FROZEN_CLEAN: + case MUT_ARR_PTRS_FROZEN_DIRTY: + init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs, + (StgPtr)(((StgMutArrPtrs *)c)->payload)); + *first_child = find_ptrs(&se.info); + if (*first_child == NULL) + return; + break; + + // StgMutArrPtr.ptrs, no SRT + case SMALL_MUT_ARR_PTRS_CLEAN: + case SMALL_MUT_ARR_PTRS_DIRTY: + case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN: + case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY: + init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs, + (StgPtr)(((StgSmallMutArrPtrs *)c)->payload)); + *first_child = find_ptrs(&se.info); + if (*first_child == NULL) + return; + break; + + // layout.payload.ptrs, SRT + case FUN_STATIC: + case FUN: // *c is a heap object. + case FUN_2_0: + init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload); + *first_child = find_ptrs(&se.info); + if (*first_child == NULL) + // no child from ptrs, so check SRT + goto fun_srt_only; + break; + + case THUNK: + case THUNK_2_0: + init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, + (StgPtr)((StgThunk *)c)->payload); + *first_child = find_ptrs(&se.info); + if (*first_child == NULL) + // no child from ptrs, so check SRT + goto thunk_srt_only; + break; + + // 1 fixed child, SRT + case FUN_1_0: + case FUN_1_1: + *first_child = c->payload[0]; + ASSERT(*first_child != NULL); + init_srt_fun(&se.info, get_fun_itbl(c)); + break; + + case THUNK_1_0: + case THUNK_1_1: + *first_child = ((StgThunk *)c)->payload[0]; + ASSERT(*first_child != NULL); + init_srt_thunk(&se.info, get_thunk_itbl(c)); + break; + + case FUN_0_1: // *c is a heap object. + case FUN_0_2: + fun_srt_only: + init_srt_fun(&se.info, get_fun_itbl(c)); + *first_child = find_srt(&se.info); + if (*first_child == NULL) + return; // no child + break; + + // SRT only + case THUNK_STATIC: + ASSERT(get_itbl(c)->srt != 0); + /* fall-thru */ + case THUNK_0_1: + case THUNK_0_2: + thunk_srt_only: + init_srt_thunk(&se.info, get_thunk_itbl(c)); + *first_child = find_srt(&se.info); + if (*first_child == NULL) + return; // no child + break; + + case TREC_CHUNK: + *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk; + se.info.type = posTypeStep; + se.info.next.step = 0; // entry no. + break; + + // cannot appear + case PAP: + case AP: + case AP_STACK: + case TSO: + case STACK: + case IND_STATIC: + // stack objects + case UPDATE_FRAME: + case CATCH_FRAME: + case UNDERFLOW_FRAME: + case STOP_FRAME: + case RET_BCO: + case RET_SMALL: + case RET_BIG: + // invalid objects + case IND: + case INVALID_OBJECT: + default: + barf("Invalid object *c in push(): %d", get_itbl(c)->type); + return; + } + + // se.cp has to be initialized when type==posTypeFresh. We don't do that + // here though. So type must be !=posTypeFresh. + ASSERT(se.info.type != posTypeFresh); + + pushStackElement(ts, &se); +} + +/** + * popStackElement(): Remove a depleted stackElement from the top of the + * traversal work-stack. + * + * Invariants: + * stackTop cannot be equal to stackLimit unless the whole stack is + * empty, in which case popStackElement() is not allowed. + */ +static void +popStackElement(traverseState *ts) { + debug("popStackElement(): stackTop = 0x%x\n", ts->stackTop); + + ASSERT(ts->stackTop != ts->stackLimit); + ASSERT(!isEmptyWorkStack(ts)); + + // <= (instead of <) is wrong! + if (ts->stackTop + 1 < ts->stackLimit) { + ts->stackTop++; + + ts->stackSize--; + if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize; + ASSERT(ts->stackSize >= 0); + debug("stackSize = (--) %d\n", ts->stackSize); + + return; + } + + bdescr *pbd; // Previous Block Descriptor + + debug("popStackElement() to the previous stack.\n"); + + ASSERT(ts->stackTop + 1 == ts->stackLimit); + ASSERT(ts->stackBottom == (stackElement *)ts->currentStack->start); + + if (ts->firstStack == ts->currentStack) { + // The stack is completely empty. + ts->stackTop++; + ASSERT(ts->stackTop == ts->stackLimit); + + ts->stackSize--; + if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize; + ASSERT(ts->stackSize >= 0); + debug("stackSize = %d\n", ts->stackSize); + + return; + } + + // currentStack->free is updated when the active stack is switched back + // to the previous stack. + ts->currentStack->free = (StgPtr)ts->stackLimit; + + // find the previous block descriptor + pbd = ts->currentStack->u.back; + ASSERT(pbd != NULL); + + returnToOldStack(ts, pbd); + + ts->stackSize--; + if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize; + ASSERT(ts->stackSize >= 0); + debug("stackSize = %d\n", ts->stackSize); +} + +/** + * Finds the next object to be considered for retainer profiling and store + * its pointer to *c. + * + * If the unprocessed object was stored in the stack (posTypeFresh), the + * this object is returned as-is. Otherwise Test if the topmost stack + * element indicates that more objects are left, + * and if so, retrieve the first object and store its pointer to *c. Also, + * set *cp and *data appropriately, both of which are stored in the stack + * element. The topmost stack element then is overwritten so as for it to now + * denote the next object. + * + * If the topmost stack element indicates no more objects are left, pop + * off the stack element until either an object can be retrieved or + * the work-stack becomes empty, indicated by true returned by + * isEmptyWorkStack(), in which case *c is set to NULL. + * + * Note: + * + * It is okay to call this function even when the work-stack is empty. + */ +STATIC_INLINE void +traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data) +{ + stackElement *se; + + debug("traversePop(): stackTop = 0x%x\n", ts->stackTop); + + // Is this the last internal element? If so instead of modifying the current + // stackElement in place we actually remove it from the stack. + bool last = false; + + do { + if (isEmptyWorkStack(ts)) { + *c = NULL; + return; + } + + // Note: Below every `break`, where the loop condition is true, must be + // accompanied by a popStackElement() otherwise this is an infinite + // loop. + se = ts->stackTop; + + // If this is a top-level element, you should pop that out. + if (se->info.type == posTypeFresh) { + *cp = se->cp; + *c = se->c; + *data = se->data; + popStackElement(ts); + return; + } + + // Note: The first ptr of all of these was already returned as + // *fist_child in push(), so we always start with the second field. + switch (get_itbl(se->c)->type) { + // two children (fixed), no SRT + // nothing in se.info + case CONSTR_2_0: + *c = se->c->payload[1]; + last = true; + goto out; + + // three children (fixed), no SRT + // need to push a stackElement + case MVAR_CLEAN: + case MVAR_DIRTY: + if (se->info.next.step == 2) { + *c = (StgClosure *)((StgMVar *)se->c)->tail; + se->info.next.step++; // move to the next step + // no popStackElement + } else { + *c = ((StgMVar *)se->c)->value; + last = true; + } + goto out; + + // three children (fixed), no SRT + case WEAK: + if (se->info.next.step == 2) { + *c = ((StgWeak *)se->c)->value; + se->info.next.step++; + // no popStackElement + } else { + *c = ((StgWeak *)se->c)->finalizer; + last = true; + } + goto out; + + case TREC_CHUNK: { + // These are pretty complicated: we have N entries, each + // of which contains 3 fields that we want to follow. So + // we divide the step counter: the 2 low bits indicate + // which field, and the rest of the bits indicate the + // entry number (starting from zero). + TRecEntry *entry; + uint32_t entry_no = se->info.next.step >> 2; + uint32_t field_no = se->info.next.step & 3; + if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) { + *c = NULL; + popStackElement(ts); + break; // this breaks out of the switch not the loop + } + entry = &((StgTRecChunk *)se->c)->entries[entry_no]; + if (field_no == 0) { + *c = (StgClosure *)entry->tvar; + } else if (field_no == 1) { + *c = entry->expected_value; + } else { + *c = entry->new_value; + } + se->info.next.step++; + goto out; + } + + case TVAR: + case CONSTR: + case PRIM: + case MUT_PRIM: + case BCO: + // StgMutArrPtr.ptrs, no SRT + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + case MUT_ARR_PTRS_FROZEN_CLEAN: + case MUT_ARR_PTRS_FROZEN_DIRTY: + case SMALL_MUT_ARR_PTRS_CLEAN: + case SMALL_MUT_ARR_PTRS_DIRTY: + case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN: + case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY: + *c = find_ptrs(&se->info); + if (*c == NULL) { + popStackElement(ts); + break; // this breaks out of the switch not the loop + } + goto out; + + // layout.payload.ptrs, SRT + case FUN: // always a heap object + case FUN_STATIC: + case FUN_2_0: + if (se->info.type == posTypePtrs) { + *c = find_ptrs(&se->info); + if (*c != NULL) { + goto out; + } + init_srt_fun(&se->info, get_fun_itbl(se->c)); + } + goto do_srt; + + case THUNK: + case THUNK_2_0: + if (se->info.type == posTypePtrs) { + *c = find_ptrs(&se->info); + if (*c != NULL) { + goto out; + } + init_srt_thunk(&se->info, get_thunk_itbl(se->c)); + } + goto do_srt; + + // SRT + do_srt: + case THUNK_STATIC: + case FUN_0_1: + case FUN_0_2: + case THUNK_0_1: + case THUNK_0_2: + case FUN_1_0: + case FUN_1_1: + case THUNK_1_0: + case THUNK_1_1: + *c = find_srt(&se->info); + if(*c == NULL) { + popStackElement(ts); + break; // this breaks out of the switch not the loop + } + goto out; + + // no child (fixed), no SRT + case CONSTR_0_1: + case CONSTR_0_2: + case ARR_WORDS: + // one child (fixed), no SRT + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: + case THUNK_SELECTOR: + case CONSTR_1_1: + // cannot appear + case PAP: + case AP: + case AP_STACK: + case TSO: + case STACK: + case IND_STATIC: + case CONSTR_NOCAF: + // stack objects + case UPDATE_FRAME: + case CATCH_FRAME: + case UNDERFLOW_FRAME: + case STOP_FRAME: + case RET_BCO: + case RET_SMALL: + case RET_BIG: + // invalid objects + case IND: + case INVALID_OBJECT: + default: + barf("Invalid object *c in traversePop(): %d", get_itbl(se->c)->type); + return; + } + } while (*c == NULL); + +out: + + ASSERT(*c != NULL); + + *cp = se->c; + *data = se->data; + + if(last) + popStackElement(ts); + + return; + +} + +/** + * Make sure a closure's profiling data is initialized to zero if it does not + * conform to the current value of the flip bit, returns true in this case. + * + * See Note [Profiling heap traversal visited bit]. + */ +bool +traverseMaybeInitClosureData(StgClosure *c) +{ + if (!isTravDataValid(c)) { + setTravDataToZero(c); + return true; + } + return false; +} + +/* ----------------------------------------------------------------------------- + Call traversePushClosure for each of the closures covered by a large bitmap. + -------------------------------------------------------------------------- */ + +static void +traverseLargeBitmap (traverseState *ts, StgPtr p, StgLargeBitmap *large_bitmap, + uint32_t size, StgClosure *c, stackData data) +{ + uint32_t i, b; + StgWord bitmap; + + b = 0; + bitmap = large_bitmap->bitmap[b]; + for (i = 0; i < size; ) { + if ((bitmap & 1) == 0) { + traversePushClosure(ts, (StgClosure *)*p, c, data); + } + i++; + p++; + if (i % BITS_IN(W_) == 0) { + b++; + bitmap = large_bitmap->bitmap[b]; + } else { + bitmap = bitmap >> 1; + } + } +} + +STATIC_INLINE StgPtr +traverseSmallBitmap (traverseState *ts, StgPtr p, uint32_t size, StgWord bitmap, + StgClosure *c, stackData data) +{ + while (size > 0) { + if ((bitmap & 1) == 0) { + traversePushClosure(ts, (StgClosure *)*p, c, data); + } + p++; + bitmap = bitmap >> 1; + size--; + } + return p; +} + +/** + * traversePushStack(ts, cp, data, stackStart, stackEnd) pushes all the objects + * in the STG stack-chunk from stackStart to stackEnd onto the traversal + * work-stack with 'c' and 'data' being their parent and associated data, + * respectively. + * + * Invariants: + * + * *cp is one of the following: TSO, AP_STACK. + * + * stackStart < stackEnd. + * + * If *c is TSO, its state is not ThreadComplete,or ThreadKilled, + * which means that its stack is ready to process. + * + * Note: + * + * This code was almost plagiarzied from GC.c! For each pointer, + * traversePushClosure() is invoked instead of evacuate(). + */ +static void +traversePushStack(traverseState *ts, StgClosure *cp, stackData data, + StgPtr stackStart, StgPtr stackEnd) +{ + StgPtr p; + const StgRetInfoTable *info; + StgWord bitmap; + uint32_t size; + + ASSERT(get_itbl(cp)->type == STACK); + + p = stackStart; + while (p < stackEnd) { + info = get_ret_itbl((StgClosure *)p); + + switch(info->i.type) { + + case UPDATE_FRAME: + traversePushClosure(ts, ((StgUpdateFrame *)p)->updatee, cp, data); + p += sizeofW(StgUpdateFrame); + continue; + + case UNDERFLOW_FRAME: + case STOP_FRAME: + case CATCH_FRAME: + case CATCH_STM_FRAME: + case CATCH_RETRY_FRAME: + case ATOMICALLY_FRAME: + case RET_SMALL: + bitmap = BITMAP_BITS(info->i.layout.bitmap); + size = BITMAP_SIZE(info->i.layout.bitmap); + p++; + p = traverseSmallBitmap(ts, p, size, bitmap, cp, data); + + follow_srt: + if (info->i.srt) { + traversePushClosure(ts, GET_SRT(info), cp, data); + } + continue; + + case RET_BCO: { + StgBCO *bco; + + p++; + traversePushClosure(ts, (StgClosure*)*p, cp, data); + bco = (StgBCO *)*p; + p++; + size = BCO_BITMAP_SIZE(bco); + traverseLargeBitmap(ts, p, BCO_BITMAP(bco), size, cp, data); + p += size; + continue; + } + + // large bitmap (> 32 entries, or > 64 on a 64-bit machine) + case RET_BIG: + size = GET_LARGE_BITMAP(&info->i)->size; + p++; + traverseLargeBitmap(ts, p, GET_LARGE_BITMAP(&info->i), + size, cp, data); + p += size; + // and don't forget to follow the SRT + goto follow_srt; + + case RET_FUN: { + StgRetFun *ret_fun = (StgRetFun *)p; + const StgFunInfoTable *fun_info; + + traversePushClosure(ts, ret_fun->fun, cp, data); + fun_info = get_fun_itbl(UNTAG_CONST_CLOSURE(ret_fun->fun)); + + p = (P_)&ret_fun->payload; + switch (fun_info->f.fun_type) { + case ARG_GEN: + bitmap = BITMAP_BITS(fun_info->f.b.bitmap); + size = BITMAP_SIZE(fun_info->f.b.bitmap); + p = traverseSmallBitmap(ts, p, size, bitmap, cp, data); + break; + case ARG_GEN_BIG: + size = GET_FUN_LARGE_BITMAP(fun_info)->size; + traverseLargeBitmap(ts, p, GET_FUN_LARGE_BITMAP(fun_info), + size, cp, data); + p += size; + break; + default: + bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]); + size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]); + p = traverseSmallBitmap(ts, p, size, bitmap, cp, data); + break; + } + goto follow_srt; + } + + default: + barf("Invalid object found in traversePushStack(): %d", + (int)(info->i.type)); + } + } +} + +/* ---------------------------------------------------------------------------- + * Call traversePushClosure for each of the children of a PAP/AP + * ------------------------------------------------------------------------- */ + +STATIC_INLINE StgPtr +traversePAP (traverseState *ts, + StgClosure *pap, /* NOT tagged */ + stackData data, + StgClosure *fun, /* tagged */ + StgClosure** payload, StgWord n_args) +{ + StgPtr p; + StgWord bitmap; + const StgFunInfoTable *fun_info; + + traversePushClosure(ts, fun, pap, data); + fun = UNTAG_CLOSURE(fun); + fun_info = get_fun_itbl(fun); + ASSERT(fun_info->i.type != PAP); + + p = (StgPtr)payload; + + switch (fun_info->f.fun_type) { + case ARG_GEN: + bitmap = BITMAP_BITS(fun_info->f.b.bitmap); + p = traverseSmallBitmap(ts, p, n_args, bitmap, + pap, data); + break; + case ARG_GEN_BIG: + traverseLargeBitmap(ts, p, GET_FUN_LARGE_BITMAP(fun_info), + n_args, pap, data); + p += n_args; + break; + case ARG_BCO: + traverseLargeBitmap(ts, (StgPtr)payload, BCO_BITMAP(fun), + n_args, pap, data); + p += n_args; + break; + default: + bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]); + p = traverseSmallBitmap(ts, p, n_args, bitmap, pap, data); + break; + } + return p; +} + +static void +resetMutableObjects(void) +{ + uint32_t g, n; + bdescr *bd; + StgPtr ml; + + // The following code resets the 'trav' field of each unvisited mutable + // object. + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + // NOT true: even G0 has a block on its mutable list + // ASSERT(g != 0 || (generations[g].mut_list == NULL)); + + // Traversing through mut_list is necessary + // because we can find MUT_VAR objects which have not been + // visited during heap traversal. + for (n = 0; n < n_capabilities; n++) { + for (bd = capabilities[n]->mut_lists[g]; bd != NULL; bd = bd->link) { + for (ml = bd->start; ml < bd->free; ml++) { + + traverseMaybeInitClosureData((StgClosure *)*ml); + } + } + } + } +} + +/** + * Traverse all closures on the traversal work-stack, calling 'visit_cb' on each + * closure. See 'visitClosure_cb' for details. This function flips the 'flip' + * bit and hence every closure's profiling data will be reset to zero upon + * visiting. See Note [Profiling heap traversal visited bit]. + */ +void +traverseWorkStack(traverseState *ts, visitClosure_cb visit_cb) +{ + // first_child = first child of c + StgClosure *c, *cp, *first_child; + stackData data, child_data; + StgWord typeOfc; + + // Now we flip the flip bit. + flip = flip ^ 1; + + // c = Current closure (possibly tagged) + // cp = Current closure's Parent (NOT tagged) + // data = current closures' associated data (NOT tagged) + // data_out = data to associate with current closure's children + +loop: + traversePop(ts, &c, &cp, &data); + + if (c == NULL) { + debug("maxStackSize= %d\n", ts->maxStackSize); + resetMutableObjects(); + return; + } +inner_loop: + c = UNTAG_CLOSURE(c); + + typeOfc = get_itbl(c)->type; + + // special cases + switch (typeOfc) { + case TSO: + if (((StgTSO *)c)->what_next == ThreadComplete || + ((StgTSO *)c)->what_next == ThreadKilled) { + debug("ThreadComplete or ThreadKilled encountered in traverseWorkStack()\n"); + goto loop; + } + break; + + case IND_STATIC: + // We just skip IND_STATIC, so it's never visited. + c = ((StgIndStatic *)c)->indirectee; + goto inner_loop; + + case CONSTR_NOCAF: + // static objects with no pointers out, so goto loop. + + // It is not just enough not to visit *c; it is + // mandatory because CONSTR_NOCAF are not reachable from + // scavenged_static_objects, the list from which is assumed to traverse + // all static objects after major garbage collections. + goto loop; + + case THUNK_STATIC: + if (get_itbl(c)->srt == 0) { + // No need to visit *c; no dynamic objects are reachable from it. + // + // Static objects: if we traverse all the live closures, + // including static closures, during each heap census then + // we will observe that some static closures appear and + // disappear. eg. a closure may contain a pointer to a + // static function 'f' which is not otherwise reachable + // (it doesn't indirectly point to any CAFs, so it doesn't + // appear in any SRTs), so we would find 'f' during + // traversal. However on the next sweep there may be no + // closures pointing to 'f'. + // + // We must therefore ignore static closures whose SRT is + // empty, because these are exactly the closures that may + // "appear". A closure with a non-empty SRT, and which is + // still required, will always be reachable. + // + // But what about CONSTR? Surely these may be able + // to appear, and they don't have SRTs, so we can't + // check. So for now, we're calling + // resetStaticObjectForProfiling() from the + // garbage collector to reset the retainer sets in all the + // reachable static objects. + goto loop; + } + /* fall-thru */ + + case FUN_STATIC: { + const StgInfoTable *info = get_itbl(c); + if (info->srt == 0 && info->layout.payload.ptrs == 0) { + goto loop; + } else { + break; + } + } + + default: + break; + } + + // If this is the first visit to c, initialize its data. + bool first_visit = traverseMaybeInitClosureData(c); + bool traverse_children + = visit_cb(c, cp, data, first_visit, (stackData*)&child_data); + if(!traverse_children) + goto loop; + + // process child + + // Special case closures: we process these all in one go rather + // than attempting to save the current position, because doing so + // would be hard. + switch (typeOfc) { + case STACK: + traversePushStack(ts, c, child_data, + ((StgStack *)c)->sp, + ((StgStack *)c)->stack + ((StgStack *)c)->stack_size); + goto loop; + + case TSO: + { + StgTSO *tso = (StgTSO *)c; + + traversePushClosure(ts, (StgClosure *) tso->stackobj, c, child_data); + traversePushClosure(ts, (StgClosure *) tso->blocked_exceptions, c, child_data); + traversePushClosure(ts, (StgClosure *) tso->bq, c, child_data); + traversePushClosure(ts, (StgClosure *) tso->trec, c, child_data); + if ( tso->why_blocked == BlockedOnMVar + || tso->why_blocked == BlockedOnMVarRead + || tso->why_blocked == BlockedOnBlackHole + || tso->why_blocked == BlockedOnMsgThrowTo + ) { + traversePushClosure(ts, tso->block_info.closure, c, child_data); + } + goto loop; + } + + case BLOCKING_QUEUE: + { + StgBlockingQueue *bq = (StgBlockingQueue *)c; + traversePushClosure(ts, (StgClosure *) bq->link, c, child_data); + traversePushClosure(ts, (StgClosure *) bq->bh, c, child_data); + traversePushClosure(ts, (StgClosure *) bq->owner, c, child_data); + goto loop; + } + + case PAP: + { + StgPAP *pap = (StgPAP *)c; + traversePAP(ts, c, child_data, pap->fun, pap->payload, pap->n_args); + goto loop; + } + + case AP: + { + StgAP *ap = (StgAP *)c; + traversePAP(ts, c, child_data, ap->fun, ap->payload, ap->n_args); + goto loop; + } + + case AP_STACK: + traversePushClosure(ts, ((StgAP_STACK *)c)->fun, c, child_data); + traversePushStack(ts, c, child_data, + (StgPtr)((StgAP_STACK *)c)->payload, + (StgPtr)((StgAP_STACK *)c)->payload + + ((StgAP_STACK *)c)->size); + goto loop; + } + + traversePushChildren(ts, c, child_data, &first_child); + + // If first_child is null, c has no child. + // If first_child is not null, the top stack element points to the next + // object. traversePushChildren() may or may not push a stackElement on the + // stack. + if (first_child == NULL) + goto loop; + + // (c, cp, data) = (first_child, c, child_data) + data = child_data; + cp = c; + c = first_child; + goto inner_loop; +} + +/* ----------------------------------------------------------------------------- + * Traverse all static objects for which we compute retainer sets, + * and reset their rs fields to NULL, which is accomplished by + * invoking traverseMaybeInitClosureData(). This function must be called + * before zeroing all objects reachable from scavenged_static_objects + * in the case of major garbage collections. See GarbageCollect() in + * GC.c. + * Note: + * The mut_once_list of the oldest generation must also be traversed? + * Why? Because if the evacuation of an object pointed to by a static + * indirection object fails, it is put back to the mut_once_list of + * the oldest generation. + * However, this is not necessary because any static indirection objects + * are just traversed through to reach dynamic objects. In other words, + * they are not taken into consideration in computing retainer sets. + * + * SDM (20/7/2011): I don't think this is doing anything sensible, + * because it happens before retainerProfile() and at the beginning of + * retainerProfil() we change the sense of 'flip'. So all of the + * calls to traverseMaybeInitClosureData() here are initialising retainer sets + * with the wrong flip. Also, I don't see why this is necessary. I + * added a traverseMaybeInitClosureData() call to retainRoot(), and that seems + * to have fixed the assertion failure in retainerSetOf() I was + * encountering. + * -------------------------------------------------------------------------- */ +void +resetStaticObjectForProfiling( StgClosure *static_objects ) +{ + uint32_t count = 0; + StgClosure *p; + + p = static_objects; + while (p != END_OF_STATIC_OBJECT_LIST) { + p = UNTAG_STATIC_LIST_PTR(p); + count++; + + switch (get_itbl(p)->type) { + case IND_STATIC: + // Since we do not compute the retainer set of any + // IND_STATIC object, we don't have to reset its retainer + // field. + p = (StgClosure*)*IND_STATIC_LINK(p); + break; + case THUNK_STATIC: + traverseMaybeInitClosureData(p); + p = (StgClosure*)*THUNK_STATIC_LINK(p); + break; + case FUN_STATIC: + case CONSTR: + case CONSTR_1_0: + case CONSTR_2_0: + case CONSTR_1_1: + case CONSTR_NOCAF: + traverseMaybeInitClosureData(p); + p = (StgClosure*)*STATIC_LINK(get_itbl(p), p); + break; + default: + barf("resetStaticObjectForProfiling: %p (%lu)", + p, (unsigned long)get_itbl(p)->type); + break; + } + } + + debug("count in scavenged_static_objects = %d\n", count); +} + +#endif /* PROFILING */ diff --git a/rts/rts.cabal.in b/rts/rts.cabal.in index 7ce3c7f5aa..674566c0ad 100644 --- a/rts/rts.cabal.in +++ b/rts/rts.cabal.in @@ -430,6 +430,7 @@ library Timer.c TopHandler.c Trace.c + TraverseHeap.c WSDeque.c Weak.c eventlog/EventLog.c |