diff options
Diffstat (limited to 'rts/RetainerProfile.c')
-rw-r--r-- | rts/RetainerProfile.c | 2338 |
1 files changed, 2338 insertions, 0 deletions
diff --git a/rts/RetainerProfile.c b/rts/RetainerProfile.c new file mode 100644 index 0000000000..c5c3de5314 --- /dev/null +++ b/rts/RetainerProfile.c @@ -0,0 +1,2338 @@ +/* ----------------------------------------------------------------------------- + * + * (c) The GHC Team, 2001 + * Author: Sungwoo Park + * + * Retainer profiling. + * + * ---------------------------------------------------------------------------*/ + +#ifdef PROFILING + +// Turn off inlining when debugging - it obfuscates things +#ifdef DEBUG +#define INLINE +#else +#define INLINE inline +#endif + +#include "Rts.h" +#include "RtsUtils.h" +#include "RetainerProfile.h" +#include "RetainerSet.h" +#include "Schedule.h" +#include "Printer.h" +#include "Storage.h" +#include "RtsFlags.h" +#include "Weak.h" +#include "Sanity.h" +#include "Profiling.h" +#include "Stats.h" +#include "BlockAlloc.h" +#include "ProfHeap.h" +#include "Apply.h" + +/* + Note: what to change in order to plug-in a new retainer profiling scheme? + (1) type retainer in ../includes/StgRetainerProf.h + (2) retainer function R(), i.e., getRetainerFrom() + (3) the two hashing functions, hashKeySingleton() and hashKeyAddElement(), + in RetainerSet.h, if needed. + (4) printRetainer() and printRetainerSetShort() in RetainerSet.c. + */ + +/* ----------------------------------------------------------------------------- + * Declarations... + * -------------------------------------------------------------------------- */ + +static nat retainerGeneration; // generation + +static nat numObjectVisited; // total number of objects visited +static nat timesAnyObjectVisited; // number of times any objects are visited + +/* + The rs field in the profile header of any object points to its retainer + set in an indirect way: if flip is 0, it points to the retainer set; + if flip is 1, it points to the next byte after the retainer set (even + for NULL pointers). Therefore, with flip 1, (rs ^ 1) is the actual + pointer. See retainerSetOf(). + */ + +StgWord flip = 0; // flip bit + // must be 0 if DEBUG_RETAINER is on (for static closures) + +#define setRetainerSetToNull(c) \ + (c)->header.prof.hp.rs = (RetainerSet *)((StgWord)NULL | flip) + +static void retainStack(StgClosure *, retainer, StgPtr, StgPtr); +static void retainClosure(StgClosure *, StgClosure *, retainer); +#ifdef DEBUG_RETAINER +static void belongToHeap(StgPtr p); +#endif + +#ifdef DEBUG_RETAINER +/* + cStackSize records how many times retainStack() has been invoked recursively, + that is, the number of activation records for retainStack() on the C stack. + maxCStackSize records its max value. + Invariants: + cStackSize <= maxCStackSize + */ +static nat cStackSize, maxCStackSize; + +static nat sumOfNewCost; // sum of the cost of each object, computed + // when the object is first visited +static nat sumOfNewCostExtra; // for those objects not visited during + // retainer profiling, e.g., MUT_VAR +static nat costArray[N_CLOSURE_TYPES]; + +nat sumOfCostLinear; // sum of the costs of all object, computed + // when linearly traversing the heap after + // retainer profiling +nat costArrayLinear[N_CLOSURE_TYPES]; +#endif + +/* ----------------------------------------------------------------------------- + * Retainer stack - header + * Note: + * Although the retainer stack implementation could be separated * + * from the retainer profiling engine, there does not seem to be + * any advantage in doing that; retainer stack is an integral part + * of retainer profiling engine and cannot be use elsewhere at + * all. + * -------------------------------------------------------------------------- */ + +typedef enum { + posTypeStep, + posTypePtrs, + posTypeSRT, + posTypeLargeSRT, +} nextPosType; + +typedef union { + // fixed layout or layout specified by a field in the closure + StgWord step; + + // layout.payload + struct { + // See StgClosureInfo in InfoTables.h +#if SIZEOF_VOID_P == 8 + StgWord32 pos; + StgWord32 ptrs; +#else + StgWord16 pos; + StgWord16 ptrs; +#endif + StgPtr payload; + } ptrs; + + // SRT + struct { + StgClosure **srt; + StgWord srt_bitmap; + } srt; + + // Large SRT + struct { + StgLargeSRT *srt; + StgWord offset; + } large_srt; + +} nextPos; + +typedef struct { + nextPosType type; + nextPos next; +} stackPos; + +typedef struct { + StgClosure *c; + retainer c_child_r; + stackPos info; +} stackElement; + +/* + Invariants: + firstStack points to the first block group. + currentStack points to the block group currently being used. + currentStack->free == stackLimit. + stackTop points to the topmost byte in the stack of currentStack. + Unless the whole stack is empty, stackTop must point to the topmost + object (or byte) in the whole stack. Thus, it is only when the whole stack + is empty that stackTop == stackLimit (not during the execution of push() + and pop()). + stackBottom == currentStack->start. + stackLimit == currentStack->start + BLOCK_SIZE_W * currentStack->blocks. + Note: + When a current stack becomes empty, stackTop is set to point to + the topmost element on the previous block group so as to satisfy + the invariants described above. + */ +static bdescr *firstStack = NULL; +static bdescr *currentStack; +static stackElement *stackBottom, *stackTop, *stackLimit; + +/* + currentStackBoundary is used to mark the current stack chunk. + If stackTop == currentStackBoundary, it means that the current stack chunk + is empty. It is the responsibility of the user to keep currentStackBoundary + valid all the time if it is to be employed. + */ +static stackElement *currentStackBoundary; + +/* + stackSize records the current size of the stack. + maxStackSize records its high water mark. + Invariants: + stackSize <= maxStackSize + Note: + stackSize is just an estimate measure of the depth of the graph. The reason + is that some heap objects have only a single child and may not result + in a new element being pushed onto the stack. Therefore, at the end of + retainer profiling, maxStackSize + maxCStackSize is some value no greater + than the actual depth of the graph. + */ +#ifdef DEBUG_RETAINER +static int stackSize, maxStackSize; +#endif + +// number of blocks allocated for one stack +#define BLOCKS_IN_STACK 1 + +/* ----------------------------------------------------------------------------- + * Add a new block group to the stack. + * Invariants: + * currentStack->link == s. + * -------------------------------------------------------------------------- */ +static INLINE void +newStackBlock( bdescr *bd ) +{ + currentStack = bd; + stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks); + stackBottom = (stackElement *)bd->start; + stackLimit = (stackElement *)stackTop; + bd->free = (StgPtr)stackLimit; +} + +/* ----------------------------------------------------------------------------- + * Return to the previous block group. + * Invariants: + * s->link == currentStack. + * -------------------------------------------------------------------------- */ +static INLINE void +returnToOldStack( bdescr *bd ) +{ + currentStack = bd; + stackTop = (stackElement *)bd->free; + stackBottom = (stackElement *)bd->start; + stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks); + bd->free = (StgPtr)stackLimit; +} + +/* ----------------------------------------------------------------------------- + * Initializes the traverse stack. + * -------------------------------------------------------------------------- */ +static void +initializeTraverseStack( void ) +{ + if (firstStack != NULL) { + freeChain(firstStack); + } + + firstStack = allocGroup(BLOCKS_IN_STACK); + firstStack->link = NULL; + firstStack->u.back = NULL; + + newStackBlock(firstStack); +} + +/* ----------------------------------------------------------------------------- + * Frees all the block groups in the traverse stack. + * Invariants: + * firstStack != NULL + * -------------------------------------------------------------------------- */ +static void +closeTraverseStack( void ) +{ + freeChain(firstStack); + firstStack = NULL; +} + +/* ----------------------------------------------------------------------------- + * Returns rtsTrue if the whole stack is empty. + * -------------------------------------------------------------------------- */ +static INLINE rtsBool +isEmptyRetainerStack( void ) +{ + return (firstStack == currentStack) && stackTop == stackLimit; +} + +/* ----------------------------------------------------------------------------- + * Returns size of stack + * -------------------------------------------------------------------------- */ +#ifdef DEBUG +lnat +retainerStackBlocks( void ) +{ + bdescr* bd; + lnat res = 0; + + for (bd = firstStack; bd != NULL; bd = bd->link) + res += bd->blocks; + + return res; +} +#endif + +/* ----------------------------------------------------------------------------- + * Returns rtsTrue if stackTop is at the stack boundary of the current stack, + * i.e., if the current stack chunk is empty. + * -------------------------------------------------------------------------- */ +static INLINE rtsBool +isOnBoundary( void ) +{ + return stackTop == currentStackBoundary; +} + +/* ----------------------------------------------------------------------------- + * Initializes *info from ptrs and payload. + * Invariants: + * payload[] begins with ptrs pointers followed by non-pointers. + * -------------------------------------------------------------------------- */ +static INLINE void +init_ptrs( stackPos *info, nat ptrs, StgPtr payload ) +{ + info->type = posTypePtrs; + info->next.ptrs.pos = 0; + info->next.ptrs.ptrs = ptrs; + info->next.ptrs.payload = payload; +} + +/* ----------------------------------------------------------------------------- + * Find the next object from *info. + * -------------------------------------------------------------------------- */ +static INLINE StgClosure * +find_ptrs( stackPos *info ) +{ + if (info->next.ptrs.pos < info->next.ptrs.ptrs) { + return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++]; + } else { + return NULL; + } +} + +/* ----------------------------------------------------------------------------- + * Initializes *info from SRT information stored in *infoTable. + * -------------------------------------------------------------------------- */ +static INLINE void +init_srt_fun( stackPos *info, StgFunInfoTable *infoTable ) +{ + if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) { + info->type = posTypeLargeSRT; + info->next.large_srt.srt = (StgLargeSRT *)GET_FUN_SRT(infoTable); + info->next.large_srt.offset = 0; + } else { + info->type = posTypeSRT; + info->next.srt.srt = (StgClosure **)GET_FUN_SRT(infoTable); + info->next.srt.srt_bitmap = infoTable->i.srt_bitmap; + } +} + +static INLINE void +init_srt_thunk( stackPos *info, StgThunkInfoTable *infoTable ) +{ + if (infoTable->i.srt_bitmap == (StgHalfWord)(-1)) { + info->type = posTypeLargeSRT; + info->next.large_srt.srt = (StgLargeSRT *)GET_SRT(infoTable); + info->next.large_srt.offset = 0; + } else { + info->type = posTypeSRT; + info->next.srt.srt = (StgClosure **)GET_SRT(infoTable); + info->next.srt.srt_bitmap = infoTable->i.srt_bitmap; + } +} + +/* ----------------------------------------------------------------------------- + * Find the next object from *info. + * -------------------------------------------------------------------------- */ +static INLINE StgClosure * +find_srt( stackPos *info ) +{ + StgClosure *c; + StgWord bitmap; + + if (info->type == posTypeSRT) { + // Small SRT bitmap + bitmap = info->next.srt.srt_bitmap; + while (bitmap != 0) { + if ((bitmap & 1) != 0) { +#ifdef ENABLE_WIN32_DLL_SUPPORT + + if ((unsigned long)(*(info->next.srt.srt)) & 0x1) + c = (* (StgClosure **)((unsigned long)*(info->next.srt.srt)) & ~0x1); + else + c = *(info->next.srt.srt); +#else + c = *(info->next.srt.srt); +#endif + bitmap = bitmap >> 1; + info->next.srt.srt++; + info->next.srt.srt_bitmap = bitmap; + return c; + } + bitmap = bitmap >> 1; + info->next.srt.srt++; + } + // bitmap is now zero... + return NULL; + } + else { + // Large SRT bitmap + nat i = info->next.large_srt.offset; + StgWord bitmap; + + // Follow the pattern from GC.c:scavenge_large_srt_bitmap(). + bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)]; + bitmap = bitmap >> (i % BITS_IN(StgWord)); + while (i < info->next.large_srt.srt->l.size) { + if ((bitmap & 1) != 0) { + c = ((StgClosure **)info->next.large_srt.srt->srt)[i]; + i++; + info->next.large_srt.offset = i; + return c; + } + i++; + if (i % BITS_IN(W_) == 0) { + bitmap = info->next.large_srt.srt->l.bitmap[i / BITS_IN(W_)]; + } else { + bitmap = bitmap >> 1; + } + } + // reached the end of this bitmap. + info->next.large_srt.offset = i; + return NULL; + } +} + +/* ----------------------------------------------------------------------------- + * push() pushes a stackElement representing the next child of *c + * onto the traverse stack. If *c has no child, *first_child is set + * to NULL and nothing is pushed onto the stack. If *c has only one + * child, *c_chlid is set to that child and nothing is pushed onto + * the stack. If *c has more than two children, *first_child is set + * to the first child and a stackElement representing the second + * child is pushed onto the stack. + + * Invariants: + * *c_child_r is the most recent retainer of *c's children. + * *c is not any of TSO, AP, PAP, AP_STACK, which means that + * there cannot be any stack objects. + * Note: SRTs are considered to be children as well. + * -------------------------------------------------------------------------- */ +static INLINE void +push( StgClosure *c, retainer c_child_r, StgClosure **first_child ) +{ + stackElement se; + bdescr *nbd; // Next Block Descriptor + +#ifdef DEBUG_RETAINER + // debugBelch("push(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary); +#endif + + ASSERT(get_itbl(c)->type != TSO); + ASSERT(get_itbl(c)->type != AP_STACK); + + // + // fill in se + // + + se.c = c; + se.c_child_r = c_child_r; + + // fill in se.info + switch (get_itbl(c)->type) { + // no child, no SRT + case CONSTR_0_1: + case CONSTR_0_2: + case CAF_BLACKHOLE: + case BLACKHOLE: + case SE_BLACKHOLE: + case SE_CAF_BLACKHOLE: + case ARR_WORDS: + *first_child = NULL; + return; + + // one child (fixed), no SRT + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: + *first_child = ((StgMutVar *)c)->var; + return; + case THUNK_SELECTOR: + *first_child = ((StgSelector *)c)->selectee; + return; + case IND_PERM: + case IND_OLDGEN_PERM: + case IND_OLDGEN: + *first_child = ((StgInd *)c)->indirectee; + return; + case CONSTR_1_0: + case CONSTR_1_1: + *first_child = c->payload[0]; + return; + + // For CONSTR_2_0 and MVAR, we use se.info.step to record the position + // of the next child. We do not write a separate initialization code. + // Also we do not have to initialize info.type; + + // two children (fixed), no SRT + // need to push a stackElement, but nothing to store in se.info + case CONSTR_2_0: + *first_child = c->payload[0]; // return the first pointer + // se.info.type = posTypeStep; + // se.info.next.step = 2; // 2 = second + break; + + // three children (fixed), no SRT + // need to push a stackElement + case MVAR: + // head must be TSO and the head of a linked list of TSOs. + // Shoule it be a child? Seems to be yes. + *first_child = (StgClosure *)((StgMVar *)c)->head; + // se.info.type = posTypeStep; + se.info.next.step = 2; // 2 = second + break; + + // three children (fixed), no SRT + case WEAK: + *first_child = ((StgWeak *)c)->key; + // se.info.type = posTypeStep; + se.info.next.step = 2; + break; + + // layout.payload.ptrs, no SRT + case CONSTR: + case STABLE_NAME: + case BCO: + case CONSTR_STATIC: + init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, + (StgPtr)c->payload); + *first_child = find_ptrs(&se.info); + if (*first_child == NULL) + return; // no child + break; + + // StgMutArrPtr.ptrs, no SRT + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + case MUT_ARR_PTRS_FROZEN: + case MUT_ARR_PTRS_FROZEN0: + init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs, + (StgPtr)(((StgMutArrPtrs *)c)->payload)); + *first_child = find_ptrs(&se.info); + if (*first_child == NULL) + return; + break; + + // layout.payload.ptrs, SRT + case FUN: // *c is a heap object. + case FUN_2_0: + init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload); + *first_child = find_ptrs(&se.info); + if (*first_child == NULL) + // no child from ptrs, so check SRT + goto fun_srt_only; + break; + + case THUNK: + case THUNK_2_0: + init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, + (StgPtr)((StgThunk *)c)->payload); + *first_child = find_ptrs(&se.info); + if (*first_child == NULL) + // no child from ptrs, so check SRT + goto thunk_srt_only; + break; + + // 1 fixed child, SRT + case FUN_1_0: + case FUN_1_1: + *first_child = c->payload[0]; + ASSERT(*first_child != NULL); + init_srt_fun(&se.info, get_fun_itbl(c)); + break; + + case THUNK_1_0: + case THUNK_1_1: + *first_child = ((StgThunk *)c)->payload[0]; + ASSERT(*first_child != NULL); + init_srt_thunk(&se.info, get_thunk_itbl(c)); + break; + + case FUN_STATIC: // *c is a heap object. + ASSERT(get_itbl(c)->srt_bitmap != 0); + case FUN_0_1: + case FUN_0_2: + fun_srt_only: + init_srt_fun(&se.info, get_fun_itbl(c)); + *first_child = find_srt(&se.info); + if (*first_child == NULL) + return; // no child + break; + + // SRT only + case THUNK_STATIC: + ASSERT(get_itbl(c)->srt_bitmap != 0); + case THUNK_0_1: + case THUNK_0_2: + thunk_srt_only: + init_srt_thunk(&se.info, get_thunk_itbl(c)); + *first_child = find_srt(&se.info); + if (*first_child == NULL) + return; // no child + break; + + case TVAR_WAIT_QUEUE: + *first_child = (StgClosure *)((StgTVarWaitQueue *)c)->waiting_tso; + se.info.next.step = 2; // 2 = second + break; + case TVAR: + *first_child = (StgClosure *)((StgTVar *)c)->current_value; + break; + case TREC_HEADER: + *first_child = (StgClosure *)((StgTRecHeader *)c)->enclosing_trec; + break; + case TREC_CHUNK: + *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk; + se.info.next.step = 0; // entry no. + break; + + // cannot appear + case PAP: + case AP: + case AP_STACK: + case TSO: + case IND_STATIC: + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + // stack objects + case UPDATE_FRAME: + case CATCH_FRAME: + case STOP_FRAME: + case RET_DYN: + case RET_BCO: + case RET_SMALL: + case RET_VEC_SMALL: + case RET_BIG: + case RET_VEC_BIG: + // invalid objects + case IND: + case BLOCKED_FETCH: + case FETCH_ME: + case FETCH_ME_BQ: + case RBH: + case REMOTE_REF: + case EVACUATED: + case INVALID_OBJECT: + default: + barf("Invalid object *c in push()"); + return; + } + + if (stackTop - 1 < stackBottom) { +#ifdef DEBUG_RETAINER + // debugBelch("push() to the next stack.\n"); +#endif + // currentStack->free is updated when the active stack is switched + // to the next stack. + currentStack->free = (StgPtr)stackTop; + + if (currentStack->link == NULL) { + nbd = allocGroup(BLOCKS_IN_STACK); + nbd->link = NULL; + nbd->u.back = currentStack; + currentStack->link = nbd; + } else + nbd = currentStack->link; + + newStackBlock(nbd); + } + + // adjust stackTop (acutal push) + stackTop--; + // If the size of stackElement was huge, we would better replace the + // following statement by either a memcpy() call or a switch statement + // on the type of the element. Currently, the size of stackElement is + // small enough (5 words) that this direct assignment seems to be enough. + *stackTop = se; + +#ifdef DEBUG_RETAINER + stackSize++; + if (stackSize > maxStackSize) maxStackSize = stackSize; + // ASSERT(stackSize >= 0); + // debugBelch("stackSize = %d\n", stackSize); +#endif +} + +/* ----------------------------------------------------------------------------- + * popOff() and popOffReal(): Pop a stackElement off the traverse stack. + * Invariants: + * stackTop cannot be equal to stackLimit unless the whole stack is + * empty, in which case popOff() is not allowed. + * Note: + * You can think of popOffReal() as a part of popOff() which is + * executed at the end of popOff() in necessary. Since popOff() is + * likely to be executed quite often while popOffReal() is not, we + * separate popOffReal() from popOff(), which is declared as an + * INLINE function (for the sake of execution speed). popOffReal() + * is called only within popOff() and nowhere else. + * -------------------------------------------------------------------------- */ +static void +popOffReal(void) +{ + bdescr *pbd; // Previous Block Descriptor + +#ifdef DEBUG_RETAINER + // debugBelch("pop() to the previous stack.\n"); +#endif + + ASSERT(stackTop + 1 == stackLimit); + ASSERT(stackBottom == (stackElement *)currentStack->start); + + if (firstStack == currentStack) { + // The stack is completely empty. + stackTop++; + ASSERT(stackTop == stackLimit); +#ifdef DEBUG_RETAINER + stackSize--; + if (stackSize > maxStackSize) maxStackSize = stackSize; + /* + ASSERT(stackSize >= 0); + debugBelch("stackSize = %d\n", stackSize); + */ +#endif + return; + } + + // currentStack->free is updated when the active stack is switched back + // to the previous stack. + currentStack->free = (StgPtr)stackLimit; + + // find the previous block descriptor + pbd = currentStack->u.back; + ASSERT(pbd != NULL); + + returnToOldStack(pbd); + +#ifdef DEBUG_RETAINER + stackSize--; + if (stackSize > maxStackSize) maxStackSize = stackSize; + /* + ASSERT(stackSize >= 0); + debugBelch("stackSize = %d\n", stackSize); + */ +#endif +} + +static INLINE void +popOff(void) { +#ifdef DEBUG_RETAINER + // debugBelch("\tpopOff(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary); +#endif + + ASSERT(stackTop != stackLimit); + ASSERT(!isEmptyRetainerStack()); + + // <= (instead of <) is wrong! + if (stackTop + 1 < stackLimit) { + stackTop++; +#ifdef DEBUG_RETAINER + stackSize--; + if (stackSize > maxStackSize) maxStackSize = stackSize; + /* + ASSERT(stackSize >= 0); + debugBelch("stackSize = %d\n", stackSize); + */ +#endif + return; + } + + popOffReal(); +} + +/* ----------------------------------------------------------------------------- + * Finds the next object to be considered for retainer profiling and store + * its pointer to *c. + * Test if the topmost stack element indicates that more objects are left, + * and if so, retrieve the first object and store its pointer to *c. Also, + * set *cp and *r appropriately, both of which are stored in the stack element. + * The topmost stack element then is overwritten so as for it to now denote + * the next object. + * If the topmost stack element indicates no more objects are left, pop + * off the stack element until either an object can be retrieved or + * the current stack chunk becomes empty, indicated by rtsTrue returned by + * isOnBoundary(), in which case *c is set to NULL. + * Note: + * It is okay to call this function even when the current stack chunk + * is empty. + * -------------------------------------------------------------------------- */ +static INLINE void +pop( StgClosure **c, StgClosure **cp, retainer *r ) +{ + stackElement *se; + +#ifdef DEBUG_RETAINER + // debugBelch("pop(): stackTop = 0x%x, currentStackBoundary = 0x%x\n", stackTop, currentStackBoundary); +#endif + + do { + if (isOnBoundary()) { // if the current stack chunk is depleted + *c = NULL; + return; + } + + se = stackTop; + + switch (get_itbl(se->c)->type) { + // two children (fixed), no SRT + // nothing in se.info + case CONSTR_2_0: + *c = se->c->payload[1]; + *cp = se->c; + *r = se->c_child_r; + popOff(); + return; + + // three children (fixed), no SRT + // need to push a stackElement + case MVAR: + if (se->info.next.step == 2) { + *c = (StgClosure *)((StgMVar *)se->c)->tail; + se->info.next.step++; // move to the next step + // no popOff + } else { + *c = ((StgMVar *)se->c)->value; + popOff(); + } + *cp = se->c; + *r = se->c_child_r; + return; + + // three children (fixed), no SRT + case WEAK: + if (se->info.next.step == 2) { + *c = ((StgWeak *)se->c)->value; + se->info.next.step++; + // no popOff + } else { + *c = ((StgWeak *)se->c)->finalizer; + popOff(); + } + *cp = se->c; + *r = se->c_child_r; + return; + + case TVAR_WAIT_QUEUE: + if (se->info.next.step == 2) { + *c = (StgClosure *)((StgTVarWaitQueue *)se->c)->next_queue_entry; + se->info.next.step++; // move to the next step + // no popOff + } else { + *c = (StgClosure *)((StgTVarWaitQueue *)se->c)->prev_queue_entry; + popOff(); + } + *cp = se->c; + *r = se->c_child_r; + return; + + case TVAR: + *c = (StgClosure *)((StgTVar *)se->c)->first_wait_queue_entry; + *cp = se->c; + *r = se->c_child_r; + popOff(); + return; + + case TREC_HEADER: + *c = (StgClosure *)((StgTRecHeader *)se->c)->current_chunk; + *cp = se->c; + *r = se->c_child_r; + popOff(); + return; + + case TREC_CHUNK: { + // These are pretty complicated: we have N entries, each + // of which contains 3 fields that we want to follow. So + // we divide the step counter: the 2 low bits indicate + // which field, and the rest of the bits indicate the + // entry number (starting from zero). + nat entry_no = se->info.next.step >> 2; + nat field_no = se->info.next.step & 3; + if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) { + *c = NULL; + popOff(); + return; + } + TRecEntry *entry = &((StgTRecChunk *)se->c)->entries[entry_no]; + if (field_no == 0) { + *c = (StgClosure *)entry->tvar; + } else if (field_no == 1) { + *c = entry->expected_value; + } else { + *c = entry->new_value; + } + *cp = se->c; + *r = se->c_child_r; + se->info.next.step++; + return; + } + + case CONSTR: + case STABLE_NAME: + case BCO: + case CONSTR_STATIC: + // StgMutArrPtr.ptrs, no SRT + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + case MUT_ARR_PTRS_FROZEN: + case MUT_ARR_PTRS_FROZEN0: + *c = find_ptrs(&se->info); + if (*c == NULL) { + popOff(); + break; + } + *cp = se->c; + *r = se->c_child_r; + return; + + // layout.payload.ptrs, SRT + case FUN: // always a heap object + case FUN_2_0: + if (se->info.type == posTypePtrs) { + *c = find_ptrs(&se->info); + if (*c != NULL) { + *cp = se->c; + *r = se->c_child_r; + return; + } + init_srt_fun(&se->info, get_fun_itbl(se->c)); + } + goto do_srt; + + case THUNK: + case THUNK_2_0: + if (se->info.type == posTypePtrs) { + *c = find_ptrs(&se->info); + if (*c != NULL) { + *cp = se->c; + *r = se->c_child_r; + return; + } + init_srt_thunk(&se->info, get_thunk_itbl(se->c)); + } + goto do_srt; + + // SRT + do_srt: + case THUNK_STATIC: + case FUN_STATIC: + case FUN_0_1: + case FUN_0_2: + case THUNK_0_1: + case THUNK_0_2: + case FUN_1_0: + case FUN_1_1: + case THUNK_1_0: + case THUNK_1_1: + *c = find_srt(&se->info); + if (*c != NULL) { + *cp = se->c; + *r = se->c_child_r; + return; + } + popOff(); + break; + + // no child (fixed), no SRT + case CONSTR_0_1: + case CONSTR_0_2: + case CAF_BLACKHOLE: + case BLACKHOLE: + case SE_BLACKHOLE: + case SE_CAF_BLACKHOLE: + case ARR_WORDS: + // one child (fixed), no SRT + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: + case THUNK_SELECTOR: + case IND_PERM: + case IND_OLDGEN_PERM: + case IND_OLDGEN: + case CONSTR_1_1: + // cannot appear + case PAP: + case AP: + case AP_STACK: + case TSO: + case IND_STATIC: + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + // stack objects + case RET_DYN: + case UPDATE_FRAME: + case CATCH_FRAME: + case STOP_FRAME: + case RET_BCO: + case RET_SMALL: + case RET_VEC_SMALL: + case RET_BIG: + case RET_VEC_BIG: + // invalid objects + case IND: + case BLOCKED_FETCH: + case FETCH_ME: + case FETCH_ME_BQ: + case RBH: + case REMOTE_REF: + case EVACUATED: + case INVALID_OBJECT: + default: + barf("Invalid object *c in pop()"); + return; + } + } while (rtsTrue); +} + +/* ----------------------------------------------------------------------------- + * RETAINER PROFILING ENGINE + * -------------------------------------------------------------------------- */ + +void +initRetainerProfiling( void ) +{ + initializeAllRetainerSet(); + retainerGeneration = 0; +} + +/* ----------------------------------------------------------------------------- + * This function must be called before f-closing prof_file. + * -------------------------------------------------------------------------- */ +void +endRetainerProfiling( void ) +{ +#ifdef SECOND_APPROACH + outputAllRetainerSet(prof_file); +#endif +} + +/* ----------------------------------------------------------------------------- + * Returns the actual pointer to the retainer set of the closure *c. + * It may adjust RSET(c) subject to flip. + * Side effects: + * RSET(c) is initialized to NULL if its current value does not + * conform to flip. + * Note: + * Even though this function has side effects, they CAN be ignored because + * subsequent calls to retainerSetOf() always result in the same return value + * and retainerSetOf() is the only way to retrieve retainerSet of a given + * closure. + * We have to perform an XOR (^) operation each time a closure is examined. + * The reason is that we do not know when a closure is visited last. + * -------------------------------------------------------------------------- */ +static INLINE void +maybeInitRetainerSet( StgClosure *c ) +{ + if (!isRetainerSetFieldValid(c)) { + setRetainerSetToNull(c); + } +} + +/* ----------------------------------------------------------------------------- + * Returns rtsTrue if *c is a retainer. + * -------------------------------------------------------------------------- */ +static INLINE rtsBool +isRetainer( StgClosure *c ) +{ + switch (get_itbl(c)->type) { + // + // True case + // + // TSOs MUST be retainers: they constitute the set of roots. + case TSO: + + // mutable objects + case MVAR: + case MUT_VAR_CLEAN: + case MUT_VAR_DIRTY: + case MUT_ARR_PTRS_CLEAN: + case MUT_ARR_PTRS_DIRTY: + case MUT_ARR_PTRS_FROZEN: + case MUT_ARR_PTRS_FROZEN0: + + // thunks are retainers. + case THUNK: + case THUNK_1_0: + case THUNK_0_1: + case THUNK_2_0: + case THUNK_1_1: + case THUNK_0_2: + case THUNK_SELECTOR: + case AP: + case AP_STACK: + + // Static thunks, or CAFS, are obviously retainers. + case THUNK_STATIC: + + // WEAK objects are roots; there is separate code in which traversing + // begins from WEAK objects. + case WEAK: + + // Since the other mutvar-type things are retainers, seems + // like the right thing to do: + case TVAR: + return rtsTrue; + + // + // False case + // + + // constructors + case CONSTR: + case CONSTR_1_0: + case CONSTR_0_1: + case CONSTR_2_0: + case CONSTR_1_1: + case CONSTR_0_2: + // functions + case FUN: + case FUN_1_0: + case FUN_0_1: + case FUN_2_0: + case FUN_1_1: + case FUN_0_2: + // partial applications + case PAP: + // blackholes + case CAF_BLACKHOLE: + case BLACKHOLE: + case SE_BLACKHOLE: + case SE_CAF_BLACKHOLE: + // indirection + case IND_PERM: + case IND_OLDGEN_PERM: + case IND_OLDGEN: + // static objects + case CONSTR_STATIC: + case FUN_STATIC: + // misc + case STABLE_NAME: + case BCO: + case ARR_WORDS: + // STM + case TVAR_WAIT_QUEUE: + case TREC_HEADER: + case TREC_CHUNK: + return rtsFalse; + + // + // Error case + // + // IND_STATIC cannot be *c, *cp, *r in the retainer profiling loop. + case IND_STATIC: + // CONSTR_INTLIKE, CONSTR_CHARLIKE, and CONSTR_NOCAF_STATIC + // cannot be *c, *cp, *r in the retainer profiling loop. + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + // Stack objects are invalid because they are never treated as + // legal objects during retainer profiling. + case UPDATE_FRAME: + case CATCH_FRAME: + case STOP_FRAME: + case RET_DYN: + case RET_BCO: + case RET_SMALL: + case RET_VEC_SMALL: + case RET_BIG: + case RET_VEC_BIG: + // other cases + case IND: + case BLOCKED_FETCH: + case FETCH_ME: + case FETCH_ME_BQ: + case RBH: + case REMOTE_REF: + case EVACUATED: + case INVALID_OBJECT: + default: + barf("Invalid object in isRetainer(): %d", get_itbl(c)->type); + return rtsFalse; + } +} + +/* ----------------------------------------------------------------------------- + * Returns the retainer function value for the closure *c, i.e., R(*c). + * This function does NOT return the retainer(s) of *c. + * Invariants: + * *c must be a retainer. + * Note: + * Depending on the definition of this function, the maintenance of retainer + * sets can be made easier. If most retainer sets are likely to be created + * again across garbage collections, refreshAllRetainerSet() in + * RetainerSet.c can simply do nothing. + * If this is not the case, we can free all the retainer sets and + * re-initialize the hash table. + * See refreshAllRetainerSet() in RetainerSet.c. + * -------------------------------------------------------------------------- */ +static INLINE retainer +getRetainerFrom( StgClosure *c ) +{ + ASSERT(isRetainer(c)); + +#if defined(RETAINER_SCHEME_INFO) + // Retainer scheme 1: retainer = info table + return get_itbl(c); +#elif defined(RETAINER_SCHEME_CCS) + // Retainer scheme 2: retainer = cost centre stack + return c->header.prof.ccs; +#elif defined(RETAINER_SCHEME_CC) + // Retainer scheme 3: retainer = cost centre + return c->header.prof.ccs->cc; +#endif +} + +/* ----------------------------------------------------------------------------- + * Associates the retainer set *s with the closure *c, that is, *s becomes + * the retainer set of *c. + * Invariants: + * c != NULL + * s != NULL + * -------------------------------------------------------------------------- */ +static INLINE void +associate( StgClosure *c, RetainerSet *s ) +{ + // StgWord has the same size as pointers, so the following type + // casting is okay. + RSET(c) = (RetainerSet *)((StgWord)s | flip); +} + +/* ----------------------------------------------------------------------------- + Call retainClosure for each of the closures covered by a large bitmap. + -------------------------------------------------------------------------- */ + +static void +retain_large_bitmap (StgPtr p, StgLargeBitmap *large_bitmap, nat size, + StgClosure *c, retainer c_child_r) +{ + nat i, b; + StgWord bitmap; + + b = 0; + bitmap = large_bitmap->bitmap[b]; + for (i = 0; i < size; ) { + if ((bitmap & 1) == 0) { + retainClosure((StgClosure *)*p, c, c_child_r); + } + i++; + p++; + if (i % BITS_IN(W_) == 0) { + b++; + bitmap = large_bitmap->bitmap[b]; + } else { + bitmap = bitmap >> 1; + } + } +} + +static INLINE StgPtr +retain_small_bitmap (StgPtr p, nat size, StgWord bitmap, + StgClosure *c, retainer c_child_r) +{ + while (size > 0) { + if ((bitmap & 1) == 0) { + retainClosure((StgClosure *)*p, c, c_child_r); + } + p++; + bitmap = bitmap >> 1; + size--; + } + return p; +} + +/* ----------------------------------------------------------------------------- + * Call retainClosure for each of the closures in an SRT. + * ------------------------------------------------------------------------- */ + +static void +retain_large_srt_bitmap (StgLargeSRT *srt, StgClosure *c, retainer c_child_r) +{ + nat i, b, size; + StgWord bitmap; + StgClosure **p; + + b = 0; + p = (StgClosure **)srt->srt; + size = srt->l.size; + bitmap = srt->l.bitmap[b]; + for (i = 0; i < size; ) { + if ((bitmap & 1) != 0) { + retainClosure((StgClosure *)*p, c, c_child_r); + } + i++; + p++; + if (i % BITS_IN(W_) == 0) { + b++; + bitmap = srt->l.bitmap[b]; + } else { + bitmap = bitmap >> 1; + } + } +} + +static INLINE void +retainSRT (StgClosure **srt, nat srt_bitmap, StgClosure *c, retainer c_child_r) +{ + nat bitmap; + StgClosure **p; + + bitmap = srt_bitmap; + p = srt; + + if (bitmap == (StgHalfWord)(-1)) { + retain_large_srt_bitmap( (StgLargeSRT *)srt, c, c_child_r ); + return; + } + + while (bitmap != 0) { + if ((bitmap & 1) != 0) { +#ifdef ENABLE_WIN32_DLL_SUPPORT + if ( (unsigned long)(*srt) & 0x1 ) { + retainClosure(*stgCast(StgClosure**,(stgCast(unsigned long, *srt) & ~0x1)), + c, c_child_r); + } else { + retainClosure(*srt,c,c_child_r); + } +#else + retainClosure(*srt,c,c_child_r); +#endif + } + p++; + bitmap = bitmap >> 1; + } +} + +/* ----------------------------------------------------------------------------- + * Process all the objects in the stack chunk from stackStart to stackEnd + * with *c and *c_child_r being their parent and their most recent retainer, + * respectively. Treat stackOptionalFun as another child of *c if it is + * not NULL. + * Invariants: + * *c is one of the following: TSO, AP_STACK. + * If *c is TSO, c == c_child_r. + * stackStart < stackEnd. + * RSET(c) and RSET(c_child_r) are valid, i.e., their + * interpretation conforms to the current value of flip (even when they + * are interpreted to be NULL). + * If *c is TSO, its state is not any of ThreadRelocated, ThreadComplete, + * or ThreadKilled, which means that its stack is ready to process. + * Note: + * This code was almost plagiarzied from GC.c! For each pointer, + * retainClosure() is invoked instead of evacuate(). + * -------------------------------------------------------------------------- */ +static void +retainStack( StgClosure *c, retainer c_child_r, + StgPtr stackStart, StgPtr stackEnd ) +{ + stackElement *oldStackBoundary; + StgPtr p; + StgRetInfoTable *info; + StgWord32 bitmap; + nat size; + +#ifdef DEBUG_RETAINER + cStackSize++; + if (cStackSize > maxCStackSize) maxCStackSize = cStackSize; +#endif + + /* + Each invocation of retainStack() creates a new virtual + stack. Since all such stacks share a single common stack, we + record the current currentStackBoundary, which will be restored + at the exit. + */ + oldStackBoundary = currentStackBoundary; + currentStackBoundary = stackTop; + +#ifdef DEBUG_RETAINER + // debugBelch("retainStack() called: oldStackBoundary = 0x%x, currentStackBoundary = 0x%x\n", oldStackBoundary, currentStackBoundary); +#endif + + ASSERT(get_itbl(c)->type != TSO || + (((StgTSO *)c)->what_next != ThreadRelocated && + ((StgTSO *)c)->what_next != ThreadComplete && + ((StgTSO *)c)->what_next != ThreadKilled)); + + p = stackStart; + while (p < stackEnd) { + info = get_ret_itbl((StgClosure *)p); + + switch(info->i.type) { + + case UPDATE_FRAME: + retainClosure(((StgUpdateFrame *)p)->updatee, c, c_child_r); + p += sizeofW(StgUpdateFrame); + continue; + + case STOP_FRAME: + case CATCH_FRAME: + case CATCH_STM_FRAME: + case CATCH_RETRY_FRAME: + case ATOMICALLY_FRAME: + case RET_SMALL: + case RET_VEC_SMALL: + bitmap = BITMAP_BITS(info->i.layout.bitmap); + size = BITMAP_SIZE(info->i.layout.bitmap); + p++; + p = retain_small_bitmap(p, size, bitmap, c, c_child_r); + + follow_srt: + retainSRT((StgClosure **)GET_SRT(info), info->i.srt_bitmap, c, c_child_r); + continue; + + case RET_BCO: { + StgBCO *bco; + + p++; + retainClosure((StgClosure *)*p, c, c_child_r); + bco = (StgBCO *)*p; + p++; + size = BCO_BITMAP_SIZE(bco); + retain_large_bitmap(p, BCO_BITMAP(bco), size, c, c_child_r); + p += size; + continue; + } + + // large bitmap (> 32 entries, or > 64 on a 64-bit machine) + case RET_BIG: + case RET_VEC_BIG: + size = GET_LARGE_BITMAP(&info->i)->size; + p++; + retain_large_bitmap(p, GET_LARGE_BITMAP(&info->i), + size, c, c_child_r); + p += size; + // and don't forget to follow the SRT + goto follow_srt; + + // Dynamic bitmap: the mask is stored on the stack + case RET_DYN: { + StgWord dyn; + dyn = ((StgRetDyn *)p)->liveness; + + // traverse the bitmap first + bitmap = RET_DYN_LIVENESS(dyn); + p = (P_)&((StgRetDyn *)p)->payload[0]; + size = RET_DYN_BITMAP_SIZE; + p = retain_small_bitmap(p, size, bitmap, c, c_child_r); + + // skip over the non-ptr words + p += RET_DYN_NONPTRS(dyn) + RET_DYN_NONPTR_REGS_SIZE; + + // follow the ptr words + for (size = RET_DYN_PTRS(dyn); size > 0; size--) { + retainClosure((StgClosure *)*p, c, c_child_r); + p++; + } + continue; + } + + case RET_FUN: { + StgRetFun *ret_fun = (StgRetFun *)p; + StgFunInfoTable *fun_info; + + retainClosure(ret_fun->fun, c, c_child_r); + fun_info = get_fun_itbl(ret_fun->fun); + + p = (P_)&ret_fun->payload; + switch (fun_info->f.fun_type) { + case ARG_GEN: + bitmap = BITMAP_BITS(fun_info->f.b.bitmap); + size = BITMAP_SIZE(fun_info->f.b.bitmap); + p = retain_small_bitmap(p, size, bitmap, c, c_child_r); + break; + case ARG_GEN_BIG: + size = GET_FUN_LARGE_BITMAP(fun_info)->size; + retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), + size, c, c_child_r); + p += size; + break; + default: + bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]); + size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]); + p = retain_small_bitmap(p, size, bitmap, c, c_child_r); + break; + } + goto follow_srt; + } + + default: + barf("Invalid object found in retainStack(): %d", + (int)(info->i.type)); + } + } + + // restore currentStackBoundary + currentStackBoundary = oldStackBoundary; +#ifdef DEBUG_RETAINER + // debugBelch("retainStack() finished: currentStackBoundary = 0x%x\n", currentStackBoundary); +#endif + +#ifdef DEBUG_RETAINER + cStackSize--; +#endif +} + +/* ---------------------------------------------------------------------------- + * Call retainClosure for each of the children of a PAP/AP + * ------------------------------------------------------------------------- */ + +static INLINE StgPtr +retain_PAP_payload (StgClosure *pap, retainer c_child_r, StgClosure *fun, + StgClosure** payload, StgWord n_args) +{ + StgPtr p; + StgWord bitmap; + StgFunInfoTable *fun_info; + + retainClosure(fun, pap, c_child_r); + fun_info = get_fun_itbl(fun); + ASSERT(fun_info->i.type != PAP); + + p = (StgPtr)payload; + + switch (fun_info->f.fun_type) { + case ARG_GEN: + bitmap = BITMAP_BITS(fun_info->f.b.bitmap); + p = retain_small_bitmap(p, n_args, bitmap, + pap, c_child_r); + break; + case ARG_GEN_BIG: + retain_large_bitmap(p, GET_FUN_LARGE_BITMAP(fun_info), + n_args, pap, c_child_r); + p += n_args; + break; + case ARG_BCO: + retain_large_bitmap((StgPtr)payload, BCO_BITMAP(fun), + n_args, pap, c_child_r); + p += n_args; + break; + default: + bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]); + p = retain_small_bitmap(p, n_args, bitmap, pap, c_child_r); + break; + } + return p; +} + +/* ----------------------------------------------------------------------------- + * Compute the retainer set of *c0 and all its desecents by traversing. + * *cp0 is the parent of *c0, and *r0 is the most recent retainer of *c0. + * Invariants: + * c0 = cp0 = r0 holds only for root objects. + * RSET(cp0) and RSET(r0) are valid, i.e., their + * interpretation conforms to the current value of flip (even when they + * are interpreted to be NULL). + * However, RSET(c0) may be corrupt, i.e., it may not conform to + * the current value of flip. If it does not, during the execution + * of this function, RSET(c0) must be initialized as well as all + * its descendants. + * Note: + * stackTop must be the same at the beginning and the exit of this function. + * *c0 can be TSO (as well as AP_STACK). + * -------------------------------------------------------------------------- */ +static void +retainClosure( StgClosure *c0, StgClosure *cp0, retainer r0 ) +{ + // c = Current closure + // cp = Current closure's Parent + // r = current closures' most recent Retainer + // c_child_r = current closure's children's most recent retainer + // first_child = first child of c + StgClosure *c, *cp, *first_child; + RetainerSet *s, *retainerSetOfc; + retainer r, c_child_r; + StgWord typeOfc; + +#ifdef DEBUG_RETAINER + // StgPtr oldStackTop; +#endif + +#ifdef DEBUG_RETAINER + // oldStackTop = stackTop; + // debugBelch("retainClosure() called: c0 = 0x%x, cp0 = 0x%x, r0 = 0x%x\n", c0, cp0, r0); +#endif + + // (c, cp, r) = (c0, cp0, r0) + c = c0; + cp = cp0; + r = r0; + goto inner_loop; + +loop: + //debugBelch("loop"); + // pop to (c, cp, r); + pop(&c, &cp, &r); + + if (c == NULL) { +#ifdef DEBUG_RETAINER + // debugBelch("retainClosure() ends: oldStackTop = 0x%x, stackTop = 0x%x\n", oldStackTop, stackTop); +#endif + return; + } + + //debugBelch("inner_loop"); + +inner_loop: + // c = current closure under consideration, + // cp = current closure's parent, + // r = current closure's most recent retainer + // + // Loop invariants (on the meaning of c, cp, r, and their retainer sets): + // RSET(cp) and RSET(r) are valid. + // RSET(c) is valid only if c has been visited before. + // + // Loop invariants (on the relation between c, cp, and r) + // if cp is not a retainer, r belongs to RSET(cp). + // if cp is a retainer, r == cp. + + typeOfc = get_itbl(c)->type; + +#ifdef DEBUG_RETAINER + switch (typeOfc) { + case IND_STATIC: + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + case CONSTR_STATIC: + case THUNK_STATIC: + case FUN_STATIC: + break; + default: + if (retainerSetOf(c) == NULL) { // first visit? + costArray[typeOfc] += cost(c); + sumOfNewCost += cost(c); + } + break; + } +#endif + + // special cases + switch (typeOfc) { + case TSO: + if (((StgTSO *)c)->what_next == ThreadComplete || + ((StgTSO *)c)->what_next == ThreadKilled) { +#ifdef DEBUG_RETAINER + debugBelch("ThreadComplete or ThreadKilled encountered in retainClosure()\n"); +#endif + goto loop; + } + if (((StgTSO *)c)->what_next == ThreadRelocated) { +#ifdef DEBUG_RETAINER + debugBelch("ThreadRelocated encountered in retainClosure()\n"); +#endif + c = (StgClosure *)((StgTSO *)c)->link; + goto inner_loop; + } + break; + + case IND_STATIC: + // We just skip IND_STATIC, so its retainer set is never computed. + c = ((StgIndStatic *)c)->indirectee; + goto inner_loop; + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + // static objects with no pointers out, so goto loop. + case CONSTR_NOCAF_STATIC: + // It is not just enough not to compute the retainer set for *c; it is + // mandatory because CONSTR_NOCAF_STATIC are not reachable from + // scavenged_static_objects, the list from which is assumed to traverse + // all static objects after major garbage collections. + goto loop; + case THUNK_STATIC: + case FUN_STATIC: + if (get_itbl(c)->srt_bitmap == 0) { + // No need to compute the retainer set; no dynamic objects + // are reachable from *c. + // + // Static objects: if we traverse all the live closures, + // including static closures, during each heap census then + // we will observe that some static closures appear and + // disappear. eg. a closure may contain a pointer to a + // static function 'f' which is not otherwise reachable + // (it doesn't indirectly point to any CAFs, so it doesn't + // appear in any SRTs), so we would find 'f' during + // traversal. However on the next sweep there may be no + // closures pointing to 'f'. + // + // We must therefore ignore static closures whose SRT is + // empty, because these are exactly the closures that may + // "appear". A closure with a non-empty SRT, and which is + // still required, will always be reachable. + // + // But what about CONSTR_STATIC? Surely these may be able + // to appear, and they don't have SRTs, so we can't + // check. So for now, we're calling + // resetStaticObjectForRetainerProfiling() from the + // garbage collector to reset the retainer sets in all the + // reachable static objects. + goto loop; + } + default: + break; + } + + // The above objects are ignored in computing the average number of times + // an object is visited. + timesAnyObjectVisited++; + + // If this is the first visit to c, initialize its retainer set. + maybeInitRetainerSet(c); + retainerSetOfc = retainerSetOf(c); + + // Now compute s: + // isRetainer(cp) == rtsTrue => s == NULL + // isRetainer(cp) == rtsFalse => s == cp.retainer + if (isRetainer(cp)) + s = NULL; + else + s = retainerSetOf(cp); + + // (c, cp, r, s) is available. + + // (c, cp, r, s, R_r) is available, so compute the retainer set for *c. + if (retainerSetOfc == NULL) { + // This is the first visit to *c. + numObjectVisited++; + + if (s == NULL) + associate(c, singleton(r)); + else + // s is actually the retainer set of *c! + associate(c, s); + + // compute c_child_r + c_child_r = isRetainer(c) ? getRetainerFrom(c) : r; + } else { + // This is not the first visit to *c. + if (isMember(r, retainerSetOfc)) + goto loop; // no need to process child + + if (s == NULL) + associate(c, addElement(r, retainerSetOfc)); + else { + // s is not NULL and cp is not a retainer. This means that + // each time *cp is visited, so is *c. Thus, if s has + // exactly one more element in its retainer set than c, s + // is also the new retainer set for *c. + if (s->num == retainerSetOfc->num + 1) { + associate(c, s); + } + // Otherwise, just add R_r to the current retainer set of *c. + else { + associate(c, addElement(r, retainerSetOfc)); + } + } + + if (isRetainer(c)) + goto loop; // no need to process child + + // compute c_child_r + c_child_r = r; + } + + // now, RSET() of all of *c, *cp, and *r is valid. + // (c, c_child_r) are available. + + // process child + + // Special case closures: we process these all in one go rather + // than attempting to save the current position, because doing so + // would be hard. + switch (typeOfc) { + case TSO: + retainStack(c, c_child_r, + ((StgTSO *)c)->sp, + ((StgTSO *)c)->stack + ((StgTSO *)c)->stack_size); + goto loop; + + case PAP: + { + StgPAP *pap = (StgPAP *)c; + retain_PAP_payload(c, c_child_r, pap->fun, pap->payload, pap->n_args); + goto loop; + } + + case AP: + { + StgAP *ap = (StgAP *)c; + retain_PAP_payload(c, c_child_r, ap->fun, ap->payload, ap->n_args); + goto loop; + } + + case AP_STACK: + retainClosure(((StgAP_STACK *)c)->fun, c, c_child_r); + retainStack(c, c_child_r, + (StgPtr)((StgAP_STACK *)c)->payload, + (StgPtr)((StgAP_STACK *)c)->payload + + ((StgAP_STACK *)c)->size); + goto loop; + } + + push(c, c_child_r, &first_child); + + // If first_child is null, c has no child. + // If first_child is not null, the top stack element points to the next + // object. push() may or may not push a stackElement on the stack. + if (first_child == NULL) + goto loop; + + // (c, cp, r) = (first_child, c, c_child_r) + r = c_child_r; + cp = c; + c = first_child; + goto inner_loop; +} + +/* ----------------------------------------------------------------------------- + * Compute the retainer set for every object reachable from *tl. + * -------------------------------------------------------------------------- */ +static void +retainRoot( StgClosure **tl ) +{ + // We no longer assume that only TSOs and WEAKs are roots; any closure can + // be a root. + + ASSERT(isEmptyRetainerStack()); + currentStackBoundary = stackTop; + + if (*tl != &stg_END_TSO_QUEUE_closure && isRetainer(*tl)) { + retainClosure(*tl, *tl, getRetainerFrom(*tl)); + } else { + retainClosure(*tl, *tl, CCS_SYSTEM); + } + + // NOT TRUE: ASSERT(isMember(getRetainerFrom(*tl), retainerSetOf(*tl))); + // *tl might be a TSO which is ThreadComplete, in which + // case we ignore it for the purposes of retainer profiling. +} + +/* ----------------------------------------------------------------------------- + * Compute the retainer set for each of the objects in the heap. + * -------------------------------------------------------------------------- */ +static void +computeRetainerSet( void ) +{ + StgWeak *weak; + RetainerSet *rtl; + nat g; + StgPtr ml; + bdescr *bd; +#ifdef DEBUG_RETAINER + RetainerSet tmpRetainerSet; +#endif + + GetRoots(retainRoot); // for scheduler roots + + // This function is called after a major GC, when key, value, and finalizer + // all are guaranteed to be valid, or reachable. + // + // The following code assumes that WEAK objects are considered to be roots + // for retainer profilng. + for (weak = weak_ptr_list; weak != NULL; weak = weak->link) + // retainRoot((StgClosure *)weak); + retainRoot((StgClosure **)&weak); + + // Consider roots from the stable ptr table. + markStablePtrTable(retainRoot); + + // The following code resets the rs field of each unvisited mutable + // object (computing sumOfNewCostExtra and updating costArray[] when + // debugging retainer profiler). + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + // NOT TRUE: even G0 has a block on its mutable list + // ASSERT(g != 0 || (generations[g].mut_list == NULL)); + + // Traversing through mut_list is necessary + // because we can find MUT_VAR objects which have not been + // visited during retainer profiling. + for (bd = generations[g].mut_list; bd != NULL; bd = bd->link) { + for (ml = bd->start; ml < bd->free; ml++) { + + maybeInitRetainerSet((StgClosure *)*ml); + rtl = retainerSetOf((StgClosure *)*ml); + +#ifdef DEBUG_RETAINER + if (rtl == NULL) { + // first visit to *ml + // This is a violation of the interface rule! + RSET(ml) = (RetainerSet *)((StgWord)(&tmpRetainerSet) | flip); + + switch (get_itbl((StgClosure *)ml)->type) { + case IND_STATIC: + // no cost involved + break; + case CONSTR_INTLIKE: + case CONSTR_CHARLIKE: + case CONSTR_NOCAF_STATIC: + case CONSTR_STATIC: + case THUNK_STATIC: + case FUN_STATIC: + barf("Invalid object in computeRetainerSet(): %d", get_itbl((StgClosure*)ml)->type); + break; + default: + // dynamic objects + costArray[get_itbl((StgClosure *)ml)->type] += cost((StgClosure *)ml); + sumOfNewCostExtra += cost((StgClosure *)ml); + break; + } + } +#endif + } + } + } +} + +/* ----------------------------------------------------------------------------- + * Traverse all static objects for which we compute retainer sets, + * and reset their rs fields to NULL, which is accomplished by + * invoking maybeInitRetainerSet(). This function must be called + * before zeroing all objects reachable from scavenged_static_objects + * in the case of major gabage collections. See GarbageCollect() in + * GC.c. + * Note: + * The mut_once_list of the oldest generation must also be traversed? + * Why? Because if the evacuation of an object pointed to by a static + * indirection object fails, it is put back to the mut_once_list of + * the oldest generation. + * However, this is not necessary because any static indirection objects + * are just traversed through to reach dynamic objects. In other words, + * they are not taken into consideration in computing retainer sets. + * -------------------------------------------------------------------------- */ +void +resetStaticObjectForRetainerProfiling( void ) +{ +#ifdef DEBUG_RETAINER + nat count; +#endif + StgClosure *p; + +#ifdef DEBUG_RETAINER + count = 0; +#endif + p = scavenged_static_objects; + while (p != END_OF_STATIC_LIST) { +#ifdef DEBUG_RETAINER + count++; +#endif + switch (get_itbl(p)->type) { + case IND_STATIC: + // Since we do not compute the retainer set of any + // IND_STATIC object, we don't have to reset its retainer + // field. + p = (StgClosure*)*IND_STATIC_LINK(p); + break; + case THUNK_STATIC: + maybeInitRetainerSet(p); + p = (StgClosure*)*THUNK_STATIC_LINK(p); + break; + case FUN_STATIC: + maybeInitRetainerSet(p); + p = (StgClosure*)*FUN_STATIC_LINK(p); + break; + case CONSTR_STATIC: + maybeInitRetainerSet(p); + p = (StgClosure*)*STATIC_LINK(get_itbl(p), p); + break; + default: + barf("resetStaticObjectForRetainerProfiling: %p (%s)", + p, get_itbl(p)->type); + break; + } + } +#ifdef DEBUG_RETAINER + // debugBelch("count in scavenged_static_objects = %d\n", count); +#endif +} + +/* ----------------------------------------------------------------------------- + * Perform retainer profiling. + * N is the oldest generation being profilied, where the generations are + * numbered starting at 0. + * Invariants: + * Note: + * This function should be called only immediately after major garbage + * collection. + * ------------------------------------------------------------------------- */ +void +retainerProfile(void) +{ +#ifdef DEBUG_RETAINER + nat i; + nat totalHeapSize; // total raw heap size (computed by linear scanning) +#endif + +#ifdef DEBUG_RETAINER + debugBelch(" < retainerProfile() invoked : %d>\n", retainerGeneration); +#endif + + stat_startRP(); + + // We haven't flipped the bit yet. +#ifdef DEBUG_RETAINER + debugBelch("Before traversing:\n"); + sumOfCostLinear = 0; + for (i = 0;i < N_CLOSURE_TYPES; i++) + costArrayLinear[i] = 0; + totalHeapSize = checkHeapSanityForRetainerProfiling(); + + debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize); + /* + debugBelch("costArrayLinear[] = "); + for (i = 0;i < N_CLOSURE_TYPES; i++) + debugBelch("[%u:%u] ", i, costArrayLinear[i]); + debugBelch("\n"); + */ + + ASSERT(sumOfCostLinear == totalHeapSize); + +/* +#define pcostArrayLinear(index) \ + if (costArrayLinear[index] > 0) \ + debugBelch("costArrayLinear[" #index "] = %u\n", costArrayLinear[index]) + pcostArrayLinear(THUNK_STATIC); + pcostArrayLinear(FUN_STATIC); + pcostArrayLinear(CONSTR_STATIC); + pcostArrayLinear(CONSTR_NOCAF_STATIC); + pcostArrayLinear(CONSTR_INTLIKE); + pcostArrayLinear(CONSTR_CHARLIKE); +*/ +#endif + + // Now we flips flip. + flip = flip ^ 1; + +#ifdef DEBUG_RETAINER + stackSize = 0; + maxStackSize = 0; + cStackSize = 0; + maxCStackSize = 0; +#endif + numObjectVisited = 0; + timesAnyObjectVisited = 0; + +#ifdef DEBUG_RETAINER + debugBelch("During traversing:\n"); + sumOfNewCost = 0; + sumOfNewCostExtra = 0; + for (i = 0;i < N_CLOSURE_TYPES; i++) + costArray[i] = 0; +#endif + + /* + We initialize the traverse stack each time the retainer profiling is + performed (because the traverse stack size varies on each retainer profiling + and this operation is not costly anyhow). However, we just refresh the + retainer sets. + */ + initializeTraverseStack(); +#ifdef DEBUG_RETAINER + initializeAllRetainerSet(); +#else + refreshAllRetainerSet(); +#endif + computeRetainerSet(); + +#ifdef DEBUG_RETAINER + debugBelch("After traversing:\n"); + sumOfCostLinear = 0; + for (i = 0;i < N_CLOSURE_TYPES; i++) + costArrayLinear[i] = 0; + totalHeapSize = checkHeapSanityForRetainerProfiling(); + + debugBelch("\tsumOfCostLinear = %d, totalHeapSize = %d\n", sumOfCostLinear, totalHeapSize); + ASSERT(sumOfCostLinear == totalHeapSize); + + // now, compare the two results + /* + Note: + costArray[] must be exactly the same as costArrayLinear[]. + Known exceptions: + 1) Dead weak pointers, whose type is CONSTR. These objects are not + reachable from any roots. + */ + debugBelch("Comparison:\n"); + debugBelch("\tcostArrayLinear[] (must be empty) = "); + for (i = 0;i < N_CLOSURE_TYPES; i++) + if (costArray[i] != costArrayLinear[i]) + // nothing should be printed except MUT_VAR after major GCs + debugBelch("[%u:%u] ", i, costArrayLinear[i]); + debugBelch("\n"); + + debugBelch("\tsumOfNewCost = %u\n", sumOfNewCost); + debugBelch("\tsumOfNewCostExtra = %u\n", sumOfNewCostExtra); + debugBelch("\tcostArray[] (must be empty) = "); + for (i = 0;i < N_CLOSURE_TYPES; i++) + if (costArray[i] != costArrayLinear[i]) + // nothing should be printed except MUT_VAR after major GCs + debugBelch("[%u:%u] ", i, costArray[i]); + debugBelch("\n"); + + // only for major garbage collection + ASSERT(sumOfNewCost + sumOfNewCostExtra == sumOfCostLinear); +#endif + + // post-processing + closeTraverseStack(); +#ifdef DEBUG_RETAINER + closeAllRetainerSet(); +#else + // Note that there is no post-processing for the retainer sets. +#endif + retainerGeneration++; + + stat_endRP( + retainerGeneration - 1, // retainerGeneration has just been incremented! +#ifdef DEBUG_RETAINER + maxCStackSize, maxStackSize, +#endif + (double)timesAnyObjectVisited / numObjectVisited); +} + +/* ----------------------------------------------------------------------------- + * DEBUGGING CODE + * -------------------------------------------------------------------------- */ + +#ifdef DEBUG_RETAINER + +#define LOOKS_LIKE_PTR(r) ((LOOKS_LIKE_STATIC_CLOSURE(r) || \ + ((HEAP_ALLOCED(r) && ((Bdescr((P_)r)->flags & BF_FREE) == 0)))) && \ + ((StgWord)(*(StgPtr)r)!=0xaaaaaaaa)) + +static nat +sanityCheckHeapClosure( StgClosure *c ) +{ + StgInfoTable *info; + + ASSERT(LOOKS_LIKE_GHC_INFO(c->header.info)); + ASSERT(!closure_STATIC(c)); + ASSERT(LOOKS_LIKE_PTR(c)); + + if ((((StgWord)RSET(c) & 1) ^ flip) != 0) { + if (get_itbl(c)->type == CONSTR && + !strcmp(get_itbl(c)->prof.closure_type, "DEAD_WEAK") && + !strcmp(get_itbl(c)->prof.closure_desc, "DEAD_WEAK")) { + debugBelch("\tUnvisited dead weak pointer object found: c = %p\n", c); + costArray[get_itbl(c)->type] += cost(c); + sumOfNewCost += cost(c); + } else + debugBelch( + "Unvisited object: flip = %d, c = %p(%d, %s, %s), rs = %p\n", + flip, c, get_itbl(c)->type, + get_itbl(c)->prof.closure_type, get_itbl(c)->prof.closure_desc, + RSET(c)); + } else { + // debugBelch("sanityCheckHeapClosure) S: flip = %d, c = %p(%d), rs = %p\n", flip, c, get_itbl(c)->type, RSET(c)); + } + + return closure_sizeW(c); +} + +static nat +heapCheck( bdescr *bd ) +{ + StgPtr p; + static nat costSum, size; + + costSum = 0; + while (bd != NULL) { + p = bd->start; + while (p < bd->free) { + size = sanityCheckHeapClosure((StgClosure *)p); + sumOfCostLinear += size; + costArrayLinear[get_itbl((StgClosure *)p)->type] += size; + p += size; + // no need for slop check; I think slops are not used currently. + } + ASSERT(p == bd->free); + costSum += bd->free - bd->start; + bd = bd->link; + } + + return costSum; +} + +static nat +smallObjectPoolCheck(void) +{ + bdescr *bd; + StgPtr p; + static nat costSum, size; + + bd = small_alloc_list; + costSum = 0; + + // first block + if (bd == NULL) + return costSum; + + p = bd->start; + while (p < alloc_Hp) { + size = sanityCheckHeapClosure((StgClosure *)p); + sumOfCostLinear += size; + costArrayLinear[get_itbl((StgClosure *)p)->type] += size; + p += size; + } + ASSERT(p == alloc_Hp); + costSum += alloc_Hp - bd->start; + + bd = bd->link; + while (bd != NULL) { + p = bd->start; + while (p < bd->free) { + size = sanityCheckHeapClosure((StgClosure *)p); + sumOfCostLinear += size; + costArrayLinear[get_itbl((StgClosure *)p)->type] += size; + p += size; + } + ASSERT(p == bd->free); + costSum += bd->free - bd->start; + bd = bd->link; + } + + return costSum; +} + +static nat +chainCheck(bdescr *bd) +{ + nat costSum, size; + + costSum = 0; + while (bd != NULL) { + // bd->free - bd->start is not an accurate measurement of the + // object size. Actually it is always zero, so we compute its + // size explicitly. + size = sanityCheckHeapClosure((StgClosure *)bd->start); + sumOfCostLinear += size; + costArrayLinear[get_itbl((StgClosure *)bd->start)->type] += size; + costSum += size; + bd = bd->link; + } + + return costSum; +} + +static nat +checkHeapSanityForRetainerProfiling( void ) +{ + nat costSum, g, s; + + costSum = 0; + debugBelch("START: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum); + if (RtsFlags.GcFlags.generations == 1) { + costSum += heapCheck(g0s0->to_blocks); + debugBelch("heapCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum); + costSum += chainCheck(g0s0->large_objects); + debugBelch("chainCheck: sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum); + } else { + for (g = 0; g < RtsFlags.GcFlags.generations; g++) + for (s = 0; s < generations[g].n_steps; s++) { + /* + After all live objects have been scavenged, the garbage + collector may create some objects in + scheduleFinalizers(). These objects are created throught + allocate(), so the small object pool or the large object + pool of the g0s0 may not be empty. + */ + if (g == 0 && s == 0) { + costSum += smallObjectPoolCheck(); + debugBelch("smallObjectPoolCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum); + costSum += chainCheck(generations[g].steps[s].large_objects); + debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum); + } else { + costSum += heapCheck(generations[g].steps[s].blocks); + debugBelch("heapCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum); + costSum += chainCheck(generations[g].steps[s].large_objects); + debugBelch("chainCheck(): sumOfCostLinear = %d, costSum = %d\n", sumOfCostLinear, costSum); + } + } + } + + return costSum; +} + +void +findPointer(StgPtr p) +{ + StgPtr q, r, e; + bdescr *bd; + nat g, s; + + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + for (s = 0; s < generations[g].n_steps; s++) { + // if (g == 0 && s == 0) continue; + bd = generations[g].steps[s].blocks; + for (; bd; bd = bd->link) { + for (q = bd->start; q < bd->free; q++) { + if (*q == (StgWord)p) { + r = q; + while (!LOOKS_LIKE_GHC_INFO(*r)) r--; + debugBelch("Found in gen[%d], step[%d]: q = %p, r = %p\n", g, s, q, r); + // return; + } + } + } + bd = generations[g].steps[s].large_objects; + for (; bd; bd = bd->link) { + e = bd->start + cost((StgClosure *)bd->start); + for (q = bd->start; q < e; q++) { + if (*q == (StgWord)p) { + r = q; + while (*r == 0 || !LOOKS_LIKE_GHC_INFO(*r)) r--; + debugBelch("Found in gen[%d], large_objects: %p\n", g, r); + // return; + } + } + } + } + } +} + +static void +belongToHeap(StgPtr p) +{ + bdescr *bd; + nat g, s; + + for (g = 0; g < RtsFlags.GcFlags.generations; g++) { + for (s = 0; s < generations[g].n_steps; s++) { + // if (g == 0 && s == 0) continue; + bd = generations[g].steps[s].blocks; + for (; bd; bd = bd->link) { + if (bd->start <= p && p < bd->free) { + debugBelch("Belongs to gen[%d], step[%d]", g, s); + return; + } + } + bd = generations[g].steps[s].large_objects; + for (; bd; bd = bd->link) { + if (bd->start <= p && p < bd->start + getHeapClosureSize((StgClosure *)bd->start)) { + debugBelch("Found in gen[%d], large_objects: %p\n", g, bd->start); + return; + } + } + } + } +} +#endif /* DEBUG_RETAINER */ + +#endif /* PROFILING */ |