From 383f9089eea7f9228513260ad0f7215938cd4b31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gr=C3=B6ber?= Date: Thu, 4 Jul 2019 05:11:09 +0200 Subject: rts: Split heap traversal from retainer profiler This finally moves the newly generalised heap traversal code from the retainer profiler into it's own file. --- rts/RetainerProfile.c | 1353 ------------------------------------------------- 1 file changed, 1353 deletions(-) (limited to 'rts/RetainerProfile.c') diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c index 301f712e59..6f053c09c4 100644 --- a/rts/RetainerProfile.c +++ b/rts/RetainerProfile.c @@ -66,41 +66,6 @@ static uint32_t numObjectVisited; // total number of objects visited static uint32_t timesAnyObjectVisited; // number of times any objects are // visited -/** Note [Profiling heap traversal visited bit] - * - * If the RTS is compiled with profiling enabled StgProfHeader can be used by - * profiling code to store per-heap object information. - * - * The generic heap traversal code reserves the least significant bit of the - * largest members of the 'trav' union to decide whether we've already visited a - * given closure in the current pass or not. The rest of the field is free to be - * used by the calling profiler. - * - * By doing things this way we implicitly assume that the LSB of the largest - * field in the 'trav' union is insignificant. This is true at least for the - * word aligned pointers which the retainer profiler currently stores there and - * should be maintained by new users of the 'trav' union for example by shifting - * the real data up by one bit. - * - * Since we don't want to have to scan the entire heap a second time just to - * reset the per-object visitied bit before/after the real traversal we make the - * interpretation of this bit dependent on the value of a global variable, - * 'flip'. - * - * When the 'trav' bit is equal to the value of 'flip' the closure data is - * valid otherwise not (see isTravDataValid). We then invert the value of 'flip' - * on each heap traversal (see traverseWorkStack), in effect marking all - * closure's data as invalid at once. - * - * There are some complications with this approach, namely: static objects and - * mutable data. There we do just go over all existing objects to reset the bit - * manually. See 'resetStaticObjectForProfiling' and 'computeRetainerSet'. - */ -StgWord flip = 0; - -#define setTravDataToZero(c) \ - (c)->header.prof.hp.trav.lsb = flip - /* ----------------------------------------------------------------------------- * Retainer stack - header * Note: @@ -111,829 +76,14 @@ StgWord flip = 0; * all. * -------------------------------------------------------------------------- */ -typedef enum { - // Object with fixed layout. Keeps an information about that - // element was processed. (stackPos.next.step) - posTypeStep, - // Description of the pointers-first heap object. Keeps information - // about layout. (stackPos.next.ptrs) - posTypePtrs, - // Keeps SRT bitmap (stackPos.next.srt) - posTypeSRT, - // Keeps a new object that was not inspected yet. Keeps a parent - // element (stackPos.next.parent) - posTypeFresh -} nextPosType; - -typedef union { - // fixed layout or layout specified by a field in the closure - StgWord step; - - // layout.payload - struct { - // See StgClosureInfo in InfoTables.h - StgHalfWord pos; - StgHalfWord ptrs; - StgPtr payload; - } ptrs; - - // SRT - struct { - StgClosure *srt; - } srt; -} nextPos; - -/** - * Position pointer into a closure. Determines what the next element to return - * for a stackElement is. - */ -typedef struct { - nextPosType type; - nextPos next; -} stackPos; - -/** - * An element of the traversal work-stack. Besides the closure itself this also - * stores it's parent and associated data. - * - * When 'info.type == posTypeFresh' a 'stackElement' represents just one - * closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an - * offset into the children of 'c'. This is to support returning a closure's - * children one-by-one without pushing one element per child onto the stack. See - * traversePushChildren() and traversePop(). - * - */ -typedef struct stackElement_ { - stackPos info; - StgClosure *c; - StgClosure *cp; // parent of 'c'. Only used when info.type == posTypeFresh. - stackData data; -} stackElement; - traverseState g_retainerTraverseState; - -#if defined(DEBUG) -unsigned int g_traversalDebugLevel = 0; -static inline void debug(const char *s, ...) -{ - va_list ap; - - if(g_traversalDebugLevel == 0) - return; - - va_start(ap,s); - vdebugBelch(s, ap); - va_end(ap); -} -#else -#define debug(...) -#endif - -// number of blocks allocated for one stack -#define BLOCKS_IN_STACK 1 - -/* ----------------------------------------------------------------------------- - * Add a new block group to the stack. - * Invariants: - * currentStack->link == s. - * -------------------------------------------------------------------------- */ -STATIC_INLINE void -newStackBlock( traverseState *ts, bdescr *bd ) -{ - ts->currentStack = bd; - ts->stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks); - ts->stackBottom = (stackElement *)bd->start; - ts->stackLimit = (stackElement *)ts->stackTop; - bd->free = (StgPtr)ts->stackLimit; -} - -/* ----------------------------------------------------------------------------- - * Return to the previous block group. - * Invariants: - * s->link == currentStack. - * -------------------------------------------------------------------------- */ -STATIC_INLINE void -returnToOldStack( traverseState *ts, bdescr *bd ) -{ - ts->currentStack = bd; - ts->stackTop = (stackElement *)bd->free; - ts->stackBottom = (stackElement *)bd->start; - ts->stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks); - bd->free = (StgPtr)ts->stackLimit; -} - -/** - * Initializes the traversal work-stack. - */ -void -initializeTraverseStack( traverseState *ts ) -{ - if (ts->firstStack != NULL) { - freeChain(ts->firstStack); - } - - ts->firstStack = allocGroup(BLOCKS_IN_STACK); - ts->firstStack->link = NULL; - ts->firstStack->u.back = NULL; - - ts->stackSize = 0; - ts->maxStackSize = 0; - - newStackBlock(ts, ts->firstStack); -} - -/** - * Frees all the block groups in the traversal works-stack. - * - * Invariants: - * firstStack != NULL - */ -void -closeTraverseStack( traverseState *ts ) -{ - freeChain(ts->firstStack); - ts->firstStack = NULL; -} - -int -getTraverseStackMaxSize(traverseState *ts) -{ - return ts->maxStackSize; -} - -/* ----------------------------------------------------------------------------- - * Returns true if the whole stack is empty. - * -------------------------------------------------------------------------- */ -STATIC_INLINE bool -isEmptyWorkStack( traverseState *ts ) -{ - return (ts->firstStack == ts->currentStack) && ts->stackTop == ts->stackLimit; -} - -/* ----------------------------------------------------------------------------- - * Returns size of stack - * -------------------------------------------------------------------------- */ -W_ -traverseWorkStackBlocks(traverseState *ts) -{ - bdescr* bd; - W_ res = 0; - - for (bd = ts->firstStack; bd != NULL; bd = bd->link) - res += bd->blocks; - - return res; -} - W_ retainerStackBlocks(void) { return traverseWorkStackBlocks(&g_retainerTraverseState); } -/* ----------------------------------------------------------------------------- - * Initializes *info from ptrs and payload. - * Invariants: - * payload[] begins with ptrs pointers followed by non-pointers. - * -------------------------------------------------------------------------- */ -STATIC_INLINE void -init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload ) -{ - info->type = posTypePtrs; - info->next.ptrs.pos = 0; - info->next.ptrs.ptrs = ptrs; - info->next.ptrs.payload = payload; -} - -/* ----------------------------------------------------------------------------- - * Find the next object from *info. - * -------------------------------------------------------------------------- */ -STATIC_INLINE StgClosure * -find_ptrs( stackPos *info ) -{ - if (info->next.ptrs.pos < info->next.ptrs.ptrs) { - return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++]; - } else { - return NULL; - } -} - -/* ----------------------------------------------------------------------------- - * Initializes *info from SRT information stored in *infoTable. - * -------------------------------------------------------------------------- */ -STATIC_INLINE void -init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable ) -{ - info->type = posTypeSRT; - if (infoTable->i.srt) { - info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable); - } else { - info->next.srt.srt = NULL; - } -} - -STATIC_INLINE void -init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable ) -{ - info->type = posTypeSRT; - if (infoTable->i.srt) { - info->next.srt.srt = (StgClosure*)GET_SRT(infoTable); - } else { - info->next.srt.srt = NULL; - } -} - -/* ----------------------------------------------------------------------------- - * Find the next object from *info. - * -------------------------------------------------------------------------- */ -STATIC_INLINE StgClosure * -find_srt( stackPos *info ) -{ - StgClosure *c; - if (info->type == posTypeSRT) { - c = info->next.srt.srt; - info->next.srt.srt = NULL; - return c; - } - - return NULL; -} - -/** - * Push a set of closures, represented by a single 'stackElement', onto the - * traversal work-stack. - */ -static void -pushStackElement(traverseState *ts, stackElement *se) -{ - bdescr *nbd; // Next Block Descriptor - if (ts->stackTop - 1 < ts->stackBottom) { - debug("pushStackElement() to the next stack.\n"); - - // currentStack->free is updated when the active stack is switched - // to the next stack. - ts->currentStack->free = (StgPtr)ts->stackTop; - - if (ts->currentStack->link == NULL) { - nbd = allocGroup(BLOCKS_IN_STACK); - nbd->link = NULL; - nbd->u.back = ts->currentStack; - ts->currentStack->link = nbd; - } else - nbd = ts->currentStack->link; - - newStackBlock(ts, nbd); - } - - // adjust stackTop (acutal push) - ts->stackTop--; - // If the size of stackElement was huge, we would better replace the - // following statement by either a memcpy() call or a switch statement - // on the type of the element. Currently, the size of stackElement is - // small enough (5 words) that this direct assignment seems to be enough. - *ts->stackTop = *se; - - ts->stackSize++; - if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize; - ASSERT(ts->stackSize >= 0); - debug("stackSize = %d\n", ts->stackSize); -} - -/** - * Push a single closure onto the traversal work-stack. - * - * cp - object's parent - * c - closure - * data - data associated with closure. - */ -inline void -traversePushClosure(traverseState *ts, StgClosure *c, StgClosure *cp, stackData data) { - stackElement se; - - se.c = c; - se.cp = cp; - se.data = data; - se.info.type = posTypeFresh; - - pushStackElement(ts, &se); -}; - -/** - * traversePushChildren() extracts the first child of 'c' in 'first_child' and - * conceptually pushes all remaining children of 'c' onto the traversal stack - * while associating 'data' with the pushed elements to be returned upon poping. - * - * If 'c' has no children, 'first_child' is set to NULL and nothing is pushed - * onto the stack. - * - * If 'c' has only one child, 'first_child' is set to that child and nothing is - * pushed onto the stack. - * - * Invariants: - * - * - 'c' is not any of TSO, AP, PAP, AP_STACK, which means that there cannot - * be any stack objects. - * - * Note: SRTs are considered to be children as well. - * - * Note: When pushing onto the stack we only really push one 'stackElement' - * representing all children onto the stack. See traversePop() - */ -STATIC_INLINE void -traversePushChildren(traverseState *ts, StgClosure *c, stackData data, StgClosure **first_child) -{ - stackElement se; - - debug("traversePushChildren(): stackTop = 0x%x\n", ts->stackTop); - - ASSERT(get_itbl(c)->type != TSO); - ASSERT(get_itbl(c)->type != AP_STACK); - - // - // fill in se - // - - se.c = c; - se.data = data; - // Note: se.cp ommitted on purpose, only traversePushClosure uses that. - - // fill in se.info - switch (get_itbl(c)->type) { - // no child, no SRT - case CONSTR_0_1: - case CONSTR_0_2: - case ARR_WORDS: - case COMPACT_NFDATA: - *first_child = NULL; - return; - - // one child (fixed), no SRT - case MUT_VAR_CLEAN: - case MUT_VAR_DIRTY: - *first_child = ((StgMutVar *)c)->var; - return; - case THUNK_SELECTOR: - *first_child = ((StgSelector *)c)->selectee; - return; - case BLACKHOLE: - *first_child = ((StgInd *)c)->indirectee; - return; - case CONSTR_1_0: - case CONSTR_1_1: - *first_child = c->payload[0]; - return; - - // For CONSTR_2_0 and MVAR, we use se.info.step to record the position - // of the next child. We do not write a separate initialization code. - // Also we do not have to initialize info.type; - - // two children (fixed), no SRT - // need to push a stackElement, but nothing to store in se.info - case CONSTR_2_0: - *first_child = c->payload[0]; // return the first pointer - se.info.type = posTypeStep; - se.info.next.step = 2; // 2 = second - break; - - // three children (fixed), no SRT - // need to push a stackElement - case MVAR_CLEAN: - case MVAR_DIRTY: - // head must be TSO and the head of a linked list of TSOs. - // Shoule it be a child? Seems to be yes. - *first_child = (StgClosure *)((StgMVar *)c)->head; - se.info.type = posTypeStep; - se.info.next.step = 2; // 2 = second - break; - - // three children (fixed), no SRT - case WEAK: - *first_child = ((StgWeak *)c)->key; - se.info.type = posTypeStep; - se.info.next.step = 2; - break; - - // layout.payload.ptrs, no SRT - case TVAR: - case CONSTR: - case CONSTR_NOCAF: - case PRIM: - case MUT_PRIM: - case BCO: - init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, - (StgPtr)c->payload); - *first_child = find_ptrs(&se.info); - if (*first_child == NULL) - return; // no child - break; - - // StgMutArrPtr.ptrs, no SRT - case MUT_ARR_PTRS_CLEAN: - case MUT_ARR_PTRS_DIRTY: - case MUT_ARR_PTRS_FROZEN_CLEAN: - case MUT_ARR_PTRS_FROZEN_DIRTY: - init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs, - (StgPtr)(((StgMutArrPtrs *)c)->payload)); - *first_child = find_ptrs(&se.info); - if (*first_child == NULL) - return; - break; - - // StgMutArrPtr.ptrs, no SRT - case SMALL_MUT_ARR_PTRS_CLEAN: - case SMALL_MUT_ARR_PTRS_DIRTY: - case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN: - case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY: - init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs, - (StgPtr)(((StgSmallMutArrPtrs *)c)->payload)); - *first_child = find_ptrs(&se.info); - if (*first_child == NULL) - return; - break; - - // layout.payload.ptrs, SRT - case FUN_STATIC: - case FUN: // *c is a heap object. - case FUN_2_0: - init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload); - *first_child = find_ptrs(&se.info); - if (*first_child == NULL) - // no child from ptrs, so check SRT - goto fun_srt_only; - break; - - case THUNK: - case THUNK_2_0: - init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, - (StgPtr)((StgThunk *)c)->payload); - *first_child = find_ptrs(&se.info); - if (*first_child == NULL) - // no child from ptrs, so check SRT - goto thunk_srt_only; - break; - - // 1 fixed child, SRT - case FUN_1_0: - case FUN_1_1: - *first_child = c->payload[0]; - ASSERT(*first_child != NULL); - init_srt_fun(&se.info, get_fun_itbl(c)); - break; - - case THUNK_1_0: - case THUNK_1_1: - *first_child = ((StgThunk *)c)->payload[0]; - ASSERT(*first_child != NULL); - init_srt_thunk(&se.info, get_thunk_itbl(c)); - break; - - case FUN_0_1: // *c is a heap object. - case FUN_0_2: - fun_srt_only: - init_srt_fun(&se.info, get_fun_itbl(c)); - *first_child = find_srt(&se.info); - if (*first_child == NULL) - return; // no child - break; - - // SRT only - case THUNK_STATIC: - ASSERT(get_itbl(c)->srt != 0); - /* fall-thru */ - case THUNK_0_1: - case THUNK_0_2: - thunk_srt_only: - init_srt_thunk(&se.info, get_thunk_itbl(c)); - *first_child = find_srt(&se.info); - if (*first_child == NULL) - return; // no child - break; - - case TREC_CHUNK: - *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk; - se.info.type = posTypeStep; - se.info.next.step = 0; // entry no. - break; - - // cannot appear - case PAP: - case AP: - case AP_STACK: - case TSO: - case STACK: - case IND_STATIC: - // stack objects - case UPDATE_FRAME: - case CATCH_FRAME: - case UNDERFLOW_FRAME: - case STOP_FRAME: - case RET_BCO: - case RET_SMALL: - case RET_BIG: - // invalid objects - case IND: - case INVALID_OBJECT: - default: - barf("Invalid object *c in push(): %d", get_itbl(c)->type); - return; - } - - // se.cp has to be initialized when type==posTypeFresh. We don't do that - // here though. So type must be !=posTypeFresh. - ASSERT(se.info.type != posTypeFresh); - - pushStackElement(ts, &se); -} - -/** - * popStackElement(): Remove a depleted stackElement from the top of the - * traversal work-stack. - * - * Invariants: - * stackTop cannot be equal to stackLimit unless the whole stack is - * empty, in which case popStackElement() is not allowed. - */ -static void -popStackElement(traverseState *ts) { - debug("popStackElement(): stackTop = 0x%x\n", ts->stackTop); - - ASSERT(ts->stackTop != ts->stackLimit); - ASSERT(!isEmptyWorkStack(ts)); - - // <= (instead of <) is wrong! - if (ts->stackTop + 1 < ts->stackLimit) { - ts->stackTop++; - - ts->stackSize--; - if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize; - ASSERT(ts->stackSize >= 0); - debug("stackSize = (--) %d\n", ts->stackSize); - - return; - } - - bdescr *pbd; // Previous Block Descriptor - - debug("popStackElement() to the previous stack.\n"); - - ASSERT(ts->stackTop + 1 == ts->stackLimit); - ASSERT(ts->stackBottom == (stackElement *)ts->currentStack->start); - - if (ts->firstStack == ts->currentStack) { - // The stack is completely empty. - ts->stackTop++; - ASSERT(ts->stackTop == ts->stackLimit); - - ts->stackSize--; - if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize; - ASSERT(ts->stackSize >= 0); - debug("stackSize = %d\n", ts->stackSize); - - return; - } - - // currentStack->free is updated when the active stack is switched back - // to the previous stack. - ts->currentStack->free = (StgPtr)ts->stackLimit; - - // find the previous block descriptor - pbd = ts->currentStack->u.back; - ASSERT(pbd != NULL); - - returnToOldStack(ts, pbd); - - ts->stackSize--; - if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize; - ASSERT(ts->stackSize >= 0); - debug("stackSize = %d\n", ts->stackSize); -} - -/** - * Finds the next object to be considered for retainer profiling and store - * its pointer to *c. - * - * If the unprocessed object was stored in the stack (posTypeFresh), the - * this object is returned as-is. Otherwise Test if the topmost stack - * element indicates that more objects are left, - * and if so, retrieve the first object and store its pointer to *c. Also, - * set *cp and *data appropriately, both of which are stored in the stack - * element. The topmost stack element then is overwritten so as for it to now - * denote the next object. - * - * If the topmost stack element indicates no more objects are left, pop - * off the stack element until either an object can be retrieved or - * the work-stack becomes empty, indicated by true returned by - * isEmptyWorkStack(), in which case *c is set to NULL. - * - * Note: - * - * It is okay to call this function even when the work-stack is empty. - */ -STATIC_INLINE void -traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data) -{ - stackElement *se; - - debug("traversePop(): stackTop = 0x%x\n", ts->stackTop); - - // Is this the last internal element? If so instead of modifying the current - // stackElement in place we actually remove it from the stack. - bool last = false; - - do { - if (isEmptyWorkStack(ts)) { - *c = NULL; - return; - } - - // Note: Below every `break`, where the loop condition is true, must be - // accompanied by a popStackElement() otherwise this is an infinite - // loop. - se = ts->stackTop; - - // If this is a top-level element, you should pop that out. - if (se->info.type == posTypeFresh) { - *cp = se->cp; - *c = se->c; - *data = se->data; - popStackElement(ts); - return; - } - - // Note: The first ptr of all of these was already returned as - // *fist_child in push(), so we always start with the second field. - switch (get_itbl(se->c)->type) { - // two children (fixed), no SRT - // nothing in se.info - case CONSTR_2_0: - *c = se->c->payload[1]; - last = true; - goto out; - - // three children (fixed), no SRT - // need to push a stackElement - case MVAR_CLEAN: - case MVAR_DIRTY: - if (se->info.next.step == 2) { - *c = (StgClosure *)((StgMVar *)se->c)->tail; - se->info.next.step++; // move to the next step - // no popStackElement - } else { - *c = ((StgMVar *)se->c)->value; - last = true; - } - goto out; - - // three children (fixed), no SRT - case WEAK: - if (se->info.next.step == 2) { - *c = ((StgWeak *)se->c)->value; - se->info.next.step++; - // no popStackElement - } else { - *c = ((StgWeak *)se->c)->finalizer; - last = true; - } - goto out; - - case TREC_CHUNK: { - // These are pretty complicated: we have N entries, each - // of which contains 3 fields that we want to follow. So - // we divide the step counter: the 2 low bits indicate - // which field, and the rest of the bits indicate the - // entry number (starting from zero). - TRecEntry *entry; - uint32_t entry_no = se->info.next.step >> 2; - uint32_t field_no = se->info.next.step & 3; - if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) { - *c = NULL; - popStackElement(ts); - break; // this breaks out of the switch not the loop - } - entry = &((StgTRecChunk *)se->c)->entries[entry_no]; - if (field_no == 0) { - *c = (StgClosure *)entry->tvar; - } else if (field_no == 1) { - *c = entry->expected_value; - } else { - *c = entry->new_value; - } - se->info.next.step++; - goto out; - } - - case TVAR: - case CONSTR: - case PRIM: - case MUT_PRIM: - case BCO: - // StgMutArrPtr.ptrs, no SRT - case MUT_ARR_PTRS_CLEAN: - case MUT_ARR_PTRS_DIRTY: - case MUT_ARR_PTRS_FROZEN_CLEAN: - case MUT_ARR_PTRS_FROZEN_DIRTY: - case SMALL_MUT_ARR_PTRS_CLEAN: - case SMALL_MUT_ARR_PTRS_DIRTY: - case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN: - case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY: - *c = find_ptrs(&se->info); - if (*c == NULL) { - popStackElement(ts); - break; // this breaks out of the switch not the loop - } - goto out; - - // layout.payload.ptrs, SRT - case FUN: // always a heap object - case FUN_STATIC: - case FUN_2_0: - if (se->info.type == posTypePtrs) { - *c = find_ptrs(&se->info); - if (*c != NULL) { - goto out; - } - init_srt_fun(&se->info, get_fun_itbl(se->c)); - } - goto do_srt; - - case THUNK: - case THUNK_2_0: - if (se->info.type == posTypePtrs) { - *c = find_ptrs(&se->info); - if (*c != NULL) { - goto out; - } - init_srt_thunk(&se->info, get_thunk_itbl(se->c)); - } - goto do_srt; - - // SRT - do_srt: - case THUNK_STATIC: - case FUN_0_1: - case FUN_0_2: - case THUNK_0_1: - case THUNK_0_2: - case FUN_1_0: - case FUN_1_1: - case THUNK_1_0: - case THUNK_1_1: - *c = find_srt(&se->info); - if(*c == NULL) { - popStackElement(ts); - break; // this breaks out of the switch not the loop - } - goto out; - - // no child (fixed), no SRT - case CONSTR_0_1: - case CONSTR_0_2: - case ARR_WORDS: - // one child (fixed), no SRT - case MUT_VAR_CLEAN: - case MUT_VAR_DIRTY: - case THUNK_SELECTOR: - case CONSTR_1_1: - // cannot appear - case PAP: - case AP: - case AP_STACK: - case TSO: - case STACK: - case IND_STATIC: - case CONSTR_NOCAF: - // stack objects - case UPDATE_FRAME: - case CATCH_FRAME: - case UNDERFLOW_FRAME: - case STOP_FRAME: - case RET_BCO: - case RET_SMALL: - case RET_BIG: - // invalid objects - case IND: - case INVALID_OBJECT: - default: - barf("Invalid object *c in traversePop(): %d", get_itbl(se->c)->type); - return; - } - } while (*c == NULL); - -out: - - ASSERT(*c != NULL); - - *cp = se->c; - *data = se->data; - - if(last) - popStackElement(ts); - - return; - -} - /* ----------------------------------------------------------------------------- * RETAINER PROFILING ENGINE * -------------------------------------------------------------------------- */ @@ -954,22 +104,6 @@ endRetainerProfiling( void ) outputAllRetainerSet(prof_file); } -/** - * Make sure a closure's profiling data is initialized to zero if it does not - * conform to the current value of the flip bit, returns true in this case. - * - * See Note [Profiling heap traversal visited bit]. - */ -bool -traverseMaybeInitClosureData(StgClosure *c) -{ - if (!isTravDataValid(c)) { - setTravDataToZero(c); - return true; - } - return false; -} - /* ----------------------------------------------------------------------------- * Returns true if *c is a retainer. * In general the retainers are the objects that may be the roots of the @@ -1120,214 +254,6 @@ associate( StgClosure *c, RetainerSet *s ) RSET(c) = (RetainerSet *)((StgWord)s | flip); } -/* ----------------------------------------------------------------------------- - Call traversePushClosure for each of the closures covered by a large bitmap. - -------------------------------------------------------------------------- */ - -static void -traverseLargeBitmap (traverseState *ts, StgPtr p, StgLargeBitmap *large_bitmap, - uint32_t size, StgClosure *c, stackData data) -{ - uint32_t i, b; - StgWord bitmap; - - b = 0; - bitmap = large_bitmap->bitmap[b]; - for (i = 0; i < size; ) { - if ((bitmap & 1) == 0) { - traversePushClosure(ts, (StgClosure *)*p, c, data); - } - i++; - p++; - if (i % BITS_IN(W_) == 0) { - b++; - bitmap = large_bitmap->bitmap[b]; - } else { - bitmap = bitmap >> 1; - } - } -} - -STATIC_INLINE StgPtr -traverseSmallBitmap (traverseState *ts, StgPtr p, uint32_t size, StgWord bitmap, - StgClosure *c, stackData data) -{ - while (size > 0) { - if ((bitmap & 1) == 0) { - traversePushClosure(ts, (StgClosure *)*p, c, data); - } - p++; - bitmap = bitmap >> 1; - size--; - } - return p; -} - -/** - * traversePushStack(ts, cp, data, stackStart, stackEnd) pushes all the objects - * in the STG stack-chunk from stackStart to stackEnd onto the traversal - * work-stack with 'c' and 'data' being their parent and associated data, - * respectively. - * - * Invariants: - * - * *cp is one of the following: TSO, AP_STACK. - * - * stackStart < stackEnd. - * - * If *c is TSO, its state is not ThreadComplete,or ThreadKilled, - * which means that its stack is ready to process. - * - * Note: - * - * This code was almost plagiarzied from GC.c! For each pointer, - * traversePushClosure() is invoked instead of evacuate(). - */ -static void -traversePushStack(traverseState *ts, StgClosure *cp, stackData data, - StgPtr stackStart, StgPtr stackEnd) -{ - StgPtr p; - const StgRetInfoTable *info; - StgWord bitmap; - uint32_t size; - - ASSERT(get_itbl(cp)->type == STACK); - - p = stackStart; - while (p < stackEnd) { - info = get_ret_itbl((StgClosure *)p); - - switch(info->i.type) { - - case UPDATE_FRAME: - traversePushClosure(ts, ((StgUpdateFrame *)p)->updatee, cp, data); - p += sizeofW(StgUpdateFrame); - continue; - - case UNDERFLOW_FRAME: - case STOP_FRAME: - case CATCH_FRAME: - case CATCH_STM_FRAME: - case CATCH_RETRY_FRAME: - case ATOMICALLY_FRAME: - case RET_SMALL: - bitmap = BITMAP_BITS(info->i.layout.bitmap); - size = BITMAP_SIZE(info->i.layout.bitmap); - p++; - p = traverseSmallBitmap(ts, p, size, bitmap, cp, data); - - follow_srt: - if (info->i.srt) { - traversePushClosure(ts, GET_SRT(info), cp, data); - } - continue; - - case RET_BCO: { - StgBCO *bco; - - p++; - traversePushClosure(ts, (StgClosure*)*p, cp, data); - bco = (StgBCO *)*p; - p++; - size = BCO_BITMAP_SIZE(bco); - traverseLargeBitmap(ts, p, BCO_BITMAP(bco), size, cp, data); - p += size; - continue; - } - - // large bitmap (> 32 entries, or > 64 on a 64-bit machine) - case RET_BIG: - size = GET_LARGE_BITMAP(&info->i)->size; - p++; - traverseLargeBitmap(ts, p, GET_LARGE_BITMAP(&info->i), - size, cp, data); - p += size; - // and don't forget to follow the SRT - goto follow_srt; - - case RET_FUN: { - StgRetFun *ret_fun = (StgRetFun *)p; - const StgFunInfoTable *fun_info; - - traversePushClosure(ts, ret_fun->fun, cp, data); - fun_info = get_fun_itbl(UNTAG_CONST_CLOSURE(ret_fun->fun)); - - p = (P_)&ret_fun->payload; - switch (fun_info->f.fun_type) { - case ARG_GEN: - bitmap = BITMAP_BITS(fun_info->f.b.bitmap); - size = BITMAP_SIZE(fun_info->f.b.bitmap); - p = traverseSmallBitmap(ts, p, size, bitmap, cp, data); - break; - case ARG_GEN_BIG: - size = GET_FUN_LARGE_BITMAP(fun_info)->size; - traverseLargeBitmap(ts, p, GET_FUN_LARGE_BITMAP(fun_info), - size, cp, data); - p += size; - break; - default: - bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]); - size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]); - p = traverseSmallBitmap(ts, p, size, bitmap, cp, data); - break; - } - goto follow_srt; - } - - default: - barf("Invalid object found in traversePushStack(): %d", - (int)(info->i.type)); - } - } -} - -/* ---------------------------------------------------------------------------- - * Call traversePushClosure for each of the children of a PAP/AP - * ------------------------------------------------------------------------- */ - -STATIC_INLINE StgPtr -traversePAP (traverseState *ts, - StgClosure *pap, /* NOT tagged */ - stackData data, - StgClosure *fun, /* tagged */ - StgClosure** payload, StgWord n_args) -{ - StgPtr p; - StgWord bitmap; - const StgFunInfoTable *fun_info; - - traversePushClosure(ts, fun, pap, data); - fun = UNTAG_CLOSURE(fun); - fun_info = get_fun_itbl(fun); - ASSERT(fun_info->i.type != PAP); - - p = (StgPtr)payload; - - switch (fun_info->f.fun_type) { - case ARG_GEN: - bitmap = BITMAP_BITS(fun_info->f.b.bitmap); - p = traverseSmallBitmap(ts, p, n_args, bitmap, - pap, data); - break; - case ARG_GEN_BIG: - traverseLargeBitmap(ts, p, GET_FUN_LARGE_BITMAP(fun_info), - n_args, pap, data); - p += n_args; - break; - case ARG_BCO: - traverseLargeBitmap(ts, (StgPtr)payload, BCO_BITMAP(fun), - n_args, pap, data); - p += n_args; - break; - default: - bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]); - p = traverseSmallBitmap(ts, p, n_args, bitmap, pap, data); - break; - } - return p; -} - static bool retainVisitClosure( StgClosure *c, const StgClosure *cp, const stackData data, const bool first_visit, stackData *out_data ) { @@ -1408,219 +334,6 @@ retainVisitClosure( StgClosure *c, const StgClosure *cp, const stackData data, c return 1; } -static void -resetMutableObjects(void) -{ - uint32_t g, n; - bdescr *bd; - StgPtr ml; - - // The following code resets the 'trav' field of each unvisited mutable - // object. - for (g = 0; g < RtsFlags.GcFlags.generations; g++) { - // NOT true: even G0 has a block on its mutable list - // ASSERT(g != 0 || (generations[g].mut_list == NULL)); - - // Traversing through mut_list is necessary - // because we can find MUT_VAR objects which have not been - // visited during heap traversal. - for (n = 0; n < n_capabilities; n++) { - for (bd = capabilities[n]->mut_lists[g]; bd != NULL; bd = bd->link) { - for (ml = bd->start; ml < bd->free; ml++) { - - traverseMaybeInitClosureData((StgClosure *)*ml); - } - } - } - } -} - -/** - * Traverse all closures on the traversal work-stack, calling 'visit_cb' on each - * closure. See 'visitClosure_cb' for details. This function flips the 'flip' - * bit and hence every closure's profiling data will be reset to zero upon - * visiting. See Note [Profiling heap traversal visited bit]. - */ -void -traverseWorkStack(traverseState *ts, visitClosure_cb visit_cb) -{ - // first_child = first child of c - StgClosure *c, *cp, *first_child; - stackData data, child_data; - StgWord typeOfc; - - // Now we flip the flip bit. - flip = flip ^ 1; - - // c = Current closure (possibly tagged) - // cp = Current closure's Parent (NOT tagged) - // data = current closures' associated data (NOT tagged) - // data_out = data to associate with current closure's children - -loop: - traversePop(ts, &c, &cp, &data); - - if (c == NULL) { - debug("maxStackSize= %d\n", ts->maxStackSize); - resetMutableObjects(); - return; - } -inner_loop: - c = UNTAG_CLOSURE(c); - - typeOfc = get_itbl(c)->type; - - // special cases - switch (typeOfc) { - case TSO: - if (((StgTSO *)c)->what_next == ThreadComplete || - ((StgTSO *)c)->what_next == ThreadKilled) { - debug("ThreadComplete or ThreadKilled encountered in traverseWorkStack()\n"); - goto loop; - } - break; - - case IND_STATIC: - // We just skip IND_STATIC, so it's never visited. - c = ((StgIndStatic *)c)->indirectee; - goto inner_loop; - - case CONSTR_NOCAF: - // static objects with no pointers out, so goto loop. - - // It is not just enough not to visit *c; it is - // mandatory because CONSTR_NOCAF are not reachable from - // scavenged_static_objects, the list from which is assumed to traverse - // all static objects after major garbage collections. - goto loop; - - case THUNK_STATIC: - if (get_itbl(c)->srt == 0) { - // No need to visit *c; no dynamic objects are reachable from it. - // - // Static objects: if we traverse all the live closures, - // including static closures, during each heap census then - // we will observe that some static closures appear and - // disappear. eg. a closure may contain a pointer to a - // static function 'f' which is not otherwise reachable - // (it doesn't indirectly point to any CAFs, so it doesn't - // appear in any SRTs), so we would find 'f' during - // traversal. However on the next sweep there may be no - // closures pointing to 'f'. - // - // We must therefore ignore static closures whose SRT is - // empty, because these are exactly the closures that may - // "appear". A closure with a non-empty SRT, and which is - // still required, will always be reachable. - // - // But what about CONSTR? Surely these may be able - // to appear, and they don't have SRTs, so we can't - // check. So for now, we're calling - // resetStaticObjectForProfiling() from the - // garbage collector to reset the retainer sets in all the - // reachable static objects. - goto loop; - } - /* fall-thru */ - - case FUN_STATIC: { - const StgInfoTable *info = get_itbl(c); - if (info->srt == 0 && info->layout.payload.ptrs == 0) { - goto loop; - } else { - break; - } - } - - default: - break; - } - - // If this is the first visit to c, initialize its data. - bool first_visit = traverseMaybeInitClosureData(c); - bool traverse_children - = visit_cb(c, cp, data, first_visit, (stackData*)&child_data); - if(!traverse_children) - goto loop; - - // process child - - // Special case closures: we process these all in one go rather - // than attempting to save the current position, because doing so - // would be hard. - switch (typeOfc) { - case STACK: - traversePushStack(ts, c, child_data, - ((StgStack *)c)->sp, - ((StgStack *)c)->stack + ((StgStack *)c)->stack_size); - goto loop; - - case TSO: - { - StgTSO *tso = (StgTSO *)c; - - traversePushClosure(ts, (StgClosure *) tso->stackobj, c, child_data); - traversePushClosure(ts, (StgClosure *) tso->blocked_exceptions, c, child_data); - traversePushClosure(ts, (StgClosure *) tso->bq, c, child_data); - traversePushClosure(ts, (StgClosure *) tso->trec, c, child_data); - if ( tso->why_blocked == BlockedOnMVar - || tso->why_blocked == BlockedOnMVarRead - || tso->why_blocked == BlockedOnBlackHole - || tso->why_blocked == BlockedOnMsgThrowTo - ) { - traversePushClosure(ts, tso->block_info.closure, c, child_data); - } - goto loop; - } - - case BLOCKING_QUEUE: - { - StgBlockingQueue *bq = (StgBlockingQueue *)c; - traversePushClosure(ts, (StgClosure *) bq->link, c, child_data); - traversePushClosure(ts, (StgClosure *) bq->bh, c, child_data); - traversePushClosure(ts, (StgClosure *) bq->owner, c, child_data); - goto loop; - } - - case PAP: - { - StgPAP *pap = (StgPAP *)c; - traversePAP(ts, c, child_data, pap->fun, pap->payload, pap->n_args); - goto loop; - } - - case AP: - { - StgAP *ap = (StgAP *)c; - traversePAP(ts, c, child_data, ap->fun, ap->payload, ap->n_args); - goto loop; - } - - case AP_STACK: - traversePushClosure(ts, ((StgAP_STACK *)c)->fun, c, child_data); - traversePushStack(ts, c, child_data, - (StgPtr)((StgAP_STACK *)c)->payload, - (StgPtr)((StgAP_STACK *)c)->payload + - ((StgAP_STACK *)c)->size); - goto loop; - } - - traversePushChildren(ts, c, child_data, &first_child); - - // If first_child is null, c has no child. - // If first_child is not null, the top stack element points to the next - // object. traversePushChildren() may or may not push a stackElement on the - // stack. - if (first_child == NULL) - goto loop; - - // (c, cp, data) = (first_child, c, child_data) - data = child_data; - cp = c; - c = first_child; - goto inner_loop; -} - /** * Push every object reachable from *tl onto the traversal work stack. */ @@ -1683,72 +396,6 @@ computeRetainerSet( traverseState *ts ) traverseWorkStack(ts, &retainVisitClosure); } -/* ----------------------------------------------------------------------------- - * Traverse all static objects for which we compute retainer sets, - * and reset their rs fields to NULL, which is accomplished by - * invoking traverseMaybeInitClosureData(). This function must be called - * before zeroing all objects reachable from scavenged_static_objects - * in the case of major garbage collections. See GarbageCollect() in - * GC.c. - * Note: - * The mut_once_list of the oldest generation must also be traversed? - * Why? Because if the evacuation of an object pointed to by a static - * indirection object fails, it is put back to the mut_once_list of - * the oldest generation. - * However, this is not necessary because any static indirection objects - * are just traversed through to reach dynamic objects. In other words, - * they are not taken into consideration in computing retainer sets. - * - * SDM (20/7/2011): I don't think this is doing anything sensible, - * because it happens before retainerProfile() and at the beginning of - * retainerProfil() we change the sense of 'flip'. So all of the - * calls to traverseMaybeInitClosureData() here are initialising retainer sets - * with the wrong flip. Also, I don't see why this is necessary. I - * added a traverseMaybeInitClosureData() call to retainRoot(), and that seems - * to have fixed the assertion failure in retainerSetOf() I was - * encountering. - * -------------------------------------------------------------------------- */ -void -resetStaticObjectForProfiling( StgClosure *static_objects ) -{ - uint32_t count = 0; - StgClosure *p; - - p = static_objects; - while (p != END_OF_STATIC_OBJECT_LIST) { - p = UNTAG_STATIC_LIST_PTR(p); - count++; - - switch (get_itbl(p)->type) { - case IND_STATIC: - // Since we do not compute the retainer set of any - // IND_STATIC object, we don't have to reset its retainer - // field. - p = (StgClosure*)*IND_STATIC_LINK(p); - break; - case THUNK_STATIC: - traverseMaybeInitClosureData(p); - p = (StgClosure*)*THUNK_STATIC_LINK(p); - break; - case FUN_STATIC: - case CONSTR: - case CONSTR_1_0: - case CONSTR_2_0: - case CONSTR_1_1: - case CONSTR_NOCAF: - traverseMaybeInitClosureData(p); - p = (StgClosure*)*STATIC_LINK(get_itbl(p), p); - break; - default: - barf("resetStaticObjectForProfiling: %p (%lu)", - p, (unsigned long)get_itbl(p)->type); - break; - } - } - - debug("count in scavenged_static_objects = %d\n", count); -} - /* ----------------------------------------------------------------------------- * Perform retainer profiling. * N is the oldest generation being profilied, where the generations are -- cgit v1.2.1