diff options
Diffstat (limited to 'rts')
-rw-r--r-- | rts/Apply.cmm | 25 | ||||
-rw-r--r-- | rts/AutoApply.h | 11 | ||||
-rw-r--r-- | rts/Exception.cmm | 4 | ||||
-rw-r--r-- | rts/PrimOps.cmm | 4 | ||||
-rw-r--r-- | rts/ProfHeap.c | 4 | ||||
-rw-r--r-- | rts/Profiling.c | 1200 | ||||
-rw-r--r-- | rts/Profiling.h | 3 | ||||
-rw-r--r-- | rts/Proftimer.c | 2 | ||||
-rw-r--r-- | rts/RaiseAsync.c | 2 | ||||
-rw-r--r-- | rts/RtsFlags.c | 7 | ||||
-rw-r--r-- | rts/StgMiscClosures.cmm | 16 | ||||
-rw-r--r-- | rts/Updates.cmm | 2 | ||||
-rw-r--r-- | rts/sm/Storage.c | 6 |
13 files changed, 677 insertions, 609 deletions
diff --git a/rts/Apply.cmm b/rts/Apply.cmm index f9ac3b353c..5397fc55df 100644 --- a/rts/Apply.cmm +++ b/rts/Apply.cmm @@ -85,8 +85,9 @@ stg_PAP_apply // profiling TICK_ENT_PAP(); LDV_ENTER(pap); - // Enter PAP cost centre - ENTER_CCS_PAP_CL(pap); +#ifdef PROFILING + foreign "C" enterFunCCS(StgHeader_ccs(pap)); +#endif // Reload the stack W_ i; @@ -175,11 +176,9 @@ INFO_TABLE(stg_AP,/*special layout*/0,0,AP,"AP","AP") TICK_ENT_AP(); LDV_ENTER(ap); + ENTER_CCS_THUNK(ap); - // Enter PAP cost centre - ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL - - // Reload the stack + // Reload the stack W_ i; W_ p; p = ap + SIZEOF_StgHeader + OFFSET_StgAP_payload; @@ -245,11 +244,9 @@ INFO_TABLE(stg_AP_NOUPD,/*special layout*/0,0,AP,"AP_NOUPD","AP_NOUPD") TICK_ENT_AP(); LDV_ENTER(ap); + ENTER_CCS_THUNK(ap); - // Enter PAP cost centre - ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL - - // Reload the stack + // Reload the stack W_ i; W_ p; p = ap + SIZEOF_StgHeader + OFFSET_StgAP_payload; @@ -326,9 +323,7 @@ INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK") TICK_ENT_AP(); LDV_ENTER(ap); - - // Enter PAP cost centre - ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL + ENTER_CCS_THUNK(ap); // Reload the stack W_ i; @@ -379,9 +374,7 @@ INFO_TABLE(stg_AP_STACK_NOUPD,/*special layout*/0,0,AP_STACK, TICK_ENT_AP(); LDV_ENTER(ap); - - // Enter PAP cost centre - ENTER_CCS_PAP_CL(ap); // ToDo: ENTER_CC_AP_CL + ENTER_CCS_THUNK(ap); // Reload the stack W_ i; diff --git a/rts/AutoApply.h b/rts/AutoApply.h index bbec1224ff..547c5d2f28 100644 --- a/rts/AutoApply.h +++ b/rts/AutoApply.h @@ -76,5 +76,16 @@ Sp_adj(n+1); \ jump %ENTRY_CODE(Sp(0)); +// Jump to target, saving CCCS and restoring it on return +#if defined(PROFILING) +#define jump_SAVE_CCCS(target) \ + Sp(-1) = W_[CCCS]; \ + Sp(-2) = stg_restore_cccs_info; \ + Sp_adj(-2); \ + jump (target) +#else +#define jump_SAVE_CCCS(target) jump (target) +#endif + #endif /* APPLY_H */ diff --git a/rts/Exception.cmm b/rts/Exception.cmm index 591fa7ab9b..9f48f5d8f5 100644 --- a/rts/Exception.cmm +++ b/rts/Exception.cmm @@ -426,7 +426,9 @@ stg_raisezh * the info was only displayed for an *uncaught* exception. */ if (RtsFlags_ProfFlags_showCCSOnException(RtsFlags) != 0::I32) { - foreign "C" fprintCCS_stderr(W_[CCCS] "ptr") []; + SAVE_THREAD_STATE(); + foreign "C" fprintCCS_stderr(W_[CCCS] "ptr", CurrentTSO "ptr") []; + LOAD_THREAD_STATE(); } #endif diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm index bb4f73bbab..c96e459975 100644 --- a/rts/PrimOps.cmm +++ b/rts/PrimOps.cmm @@ -1204,6 +1204,8 @@ stg_takeMVarzh // into the heap check generated by the code generator, so we // have to do it in stg_gc_gen (see HeapStackCheck.cmm). HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR, stg_takeMVarzh); + TICK_ALLOC_PRIM(SIZEOF_StgMVarTSOQueue, 0, 0); + CCCS_ALLOC(SIZEOF_StgMVarTSOQueue); q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); @@ -1369,6 +1371,8 @@ stg_putMVarzh // see Note [mvar-heap-check] above HP_CHK_GEN_TICKY(SIZEOF_StgMVarTSOQueue, R1_PTR & R2_PTR, stg_putMVarzh); + TICK_ALLOC_PRIM(SIZEOF_StgMVarTSOQueue, 0, 0); + CCCS_ALLOC(SIZEOF_StgMVarTSOQueue); q = Hp - SIZEOF_StgMVarTSOQueue + WDS(1); diff --git a/rts/ProfHeap.c b/rts/ProfHeap.c index 9d95b4ccc0..56c44519fb 100644 --- a/rts/ProfHeap.c +++ b/rts/ProfHeap.c @@ -919,12 +919,12 @@ heapCensusChain( Census *census, bdescr *bd ) for (; bd != NULL; bd = bd->link) { // HACK: pretend a pinned block is just one big ARR_WORDS - // owned by CCS_SYSTEM. These blocks can be full of holes due + // owned by CCS_PINNED. These blocks can be full of holes due // to alignment constraints so we can't traverse the memory // and do a proper census. if (bd->flags & BF_PINNED) { StgClosure arr; - SET_HDR(&arr, &stg_ARR_WORDS_info, CCS_SYSTEM); + SET_HDR(&arr, &stg_ARR_WORDS_info, CCS_PINNED); heapProfObject(census, &arr, bd->blocks * BLOCK_SIZE_W, rtsTrue); continue; } diff --git a/rts/Profiling.c b/rts/Profiling.c index 5648f31e00..55495cdf94 100644 --- a/rts/Profiling.c +++ b/rts/Profiling.c @@ -36,12 +36,11 @@ Arena *prof_arena; unsigned int CC_ID = 1; unsigned int CCS_ID = 1; -unsigned int HP_ID = 1; /* figures for the profiling report. */ static StgWord64 total_alloc; -static lnat total_prof_ticks; +static lnat total_prof_ticks; /* Globals for opening the profiling log file(s) */ @@ -55,7 +54,7 @@ FILE *hp_file; */ CostCentreStack *CCCS; -/* Linked lists to keep track of cc's and ccs's that haven't +/* Linked lists to keep track of CCs and CCSs that haven't * been declared in the log file yet */ CostCentre *CC_LIST = NULL; @@ -78,67 +77,59 @@ CostCentreStack *CCS_LIST = NULL; * itself. These are costs that would not be incurred * during non-profiled execution of the program. * - * SUBSUMED is the one-and-only CCS placed on top-level functions. - * It indicates that all costs are to be attributed to the - * enclosing cost centre stack. SUBSUMED never accumulates - * any costs. The is_caf flag is set on the subsumed cost - * centre. - * * DONT_CARE is a placeholder cost-centre we assign to static * constructors. It should *never* accumulate any costs. + * + * PINNED accumulates memory allocated to pinned objects, which + * cannot be profiled separately because we cannot reliably + * traverse pinned memory. */ -CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_IS_BORING, ); -CC_DECLARE(CC_SYSTEM, "SYSTEM", "MAIN", CC_IS_BORING, ); -CC_DECLARE(CC_GC, "GC", "GC", CC_IS_BORING, ); -CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_IS_CAF, ); -CC_DECLARE(CC_SUBSUMED, "SUBSUMED", "MAIN", CC_IS_CAF, ); -CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_IS_BORING, ); +CC_DECLARE(CC_MAIN, "MAIN", "MAIN", CC_NOT_CAF, ); +CC_DECLARE(CC_SYSTEM, "SYSTEM", "SYSTEM", CC_NOT_CAF, ); +CC_DECLARE(CC_GC, "GC", "GC", CC_NOT_CAF, ); +CC_DECLARE(CC_OVERHEAD, "OVERHEAD_of", "PROFILING", CC_NOT_CAF, ); +CC_DECLARE(CC_DONT_CARE, "DONT_CARE", "MAIN", CC_NOT_CAF, ); +CC_DECLARE(CC_PINNED, "PINNED", "SYSTEM", CC_NOT_CAF, ); CCS_DECLARE(CCS_MAIN, CC_MAIN, ); CCS_DECLARE(CCS_SYSTEM, CC_SYSTEM, ); CCS_DECLARE(CCS_GC, CC_GC, ); CCS_DECLARE(CCS_OVERHEAD, CC_OVERHEAD, ); -CCS_DECLARE(CCS_SUBSUMED, CC_SUBSUMED, ); -CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, ); +CCS_DECLARE(CCS_DONT_CARE, CC_DONT_CARE, ); +CCS_DECLARE(CCS_PINNED, CC_PINNED, ); -/* - * Uniques for the XML log-file format - */ -#define CC_UQ 1 -#define CCS_UQ 2 -#define TC_UQ 3 -#define HEAP_OBJ_UQ 4 -#define TIME_UPD_UQ 5 -#define HEAP_UPD_UQ 6 - -/* +/* * Static Functions */ -static CostCentreStack * ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, - CostCentreStack *new_ccs ); -static rtsBool ccs_to_ignore ( CostCentreStack *ccs ); -static void count_ticks ( CostCentreStack *ccs ); -static void inherit_costs ( CostCentreStack *ccs ); -static void findCCSMaxLens ( CostCentreStack *ccs, nat indent, nat *max_label_len, nat *max_module_len ); -static void logCCS ( CostCentreStack *ccs, nat indent, nat max_label_len, nat max_module_len ); +static CostCentreStack * appendCCS ( CostCentreStack *ccs1, + CostCentreStack *ccs2 ); +static CostCentreStack * actualPush_ ( CostCentreStack *ccs, CostCentre *cc, + CostCentreStack *new_ccs ); +static rtsBool ignoreCCS ( CostCentreStack *ccs ); +static void countTickss ( CostCentreStack *ccs ); +static void inheritCosts ( CostCentreStack *ccs ); +static void findCCSMaxLens ( CostCentreStack *ccs, + nat indent, + nat *max_label_len, + nat *max_module_len ); +static void logCCS ( CostCentreStack *ccs, + nat indent, + nat max_label_len, + nat max_module_len ); static void reportCCS ( CostCentreStack *ccs ); -static void DecCCS ( CostCentreStack *ccs ); -static void DecBackEdge ( CostCentreStack *ccs, - CostCentreStack *oldccs ); -static CostCentreStack * CheckLoop ( CostCentreStack *ccs, CostCentre *cc ); +static CostCentreStack * checkLoop ( CostCentreStack *ccs, + CostCentre *cc ); static CostCentreStack * pruneCCSTree ( CostCentreStack *ccs ); -static CostCentreStack * ActualPush ( CostCentreStack *, CostCentre * ); -static CostCentreStack * IsInIndexTable ( IndexTable *, CostCentre * ); -static IndexTable * AddToIndexTable ( IndexTable *, CostCentreStack *, +static CostCentreStack * actualPush ( CostCentreStack *, CostCentre * ); +static CostCentreStack * isInIndexTable ( IndexTable *, CostCentre * ); +static IndexTable * addToIndexTable ( IndexTable *, CostCentreStack *, CostCentre *, unsigned int ); static void ccsSetSelected ( CostCentreStack *ccs ); -static void initTimeProfiling ( void ); -static void initProfilingLogFile( void ); - -static void reportCCS_XML ( CostCentreStack *ccs ); +static void initTimeProfiling ( void ); +static void initProfilingLogFile ( void ); /* ----------------------------------------------------------------------------- Initialise the profiling environment @@ -147,11 +138,11 @@ static void reportCCS_XML ( CostCentreStack *ccs ); void initProfiling1 (void) { - // initialise our arena - prof_arena = newArena(); + // initialise our arena + prof_arena = newArena(); - /* for the benefit of allocate()... */ - CCCS = CCS_SYSTEM; + /* for the benefit of allocate()... */ + CCCS = CCS_SYSTEM; } void @@ -163,93 +154,57 @@ freeProfiling (void) void initProfiling2 (void) { - CostCentreStack *ccs, *next; - - CCCS = CCS_SYSTEM; - - /* Set up the log file, and dump the header and cost centre - * information into it. */ - initProfilingLogFile(); - - /* Register all the cost centres / stacks in the program - * CC_MAIN gets link = 0, all others have non-zero link. - */ - REGISTER_CC(CC_MAIN); - REGISTER_CC(CC_SYSTEM); - REGISTER_CC(CC_GC); - REGISTER_CC(CC_OVERHEAD); - REGISTER_CC(CC_SUBSUMED); - REGISTER_CC(CC_DONT_CARE); - - REGISTER_CCS(CCS_SYSTEM); - REGISTER_CCS(CCS_GC); - REGISTER_CCS(CCS_OVERHEAD); - REGISTER_CCS(CCS_SUBSUMED); - REGISTER_CCS(CCS_DONT_CARE); - REGISTER_CCS(CCS_MAIN); - - /* find all the "special" cost centre stacks, and make them children - * of CCS_MAIN. - */ - ASSERT(CCS_LIST == CCS_MAIN); - CCS_LIST = CCS_LIST->prevStack; - CCS_MAIN->prevStack = NULL; - CCS_MAIN->root = CC_MAIN; - ccsSetSelected(CCS_MAIN); - DecCCS(CCS_MAIN); - - for (ccs = CCS_LIST; ccs != NULL; ) { - next = ccs->prevStack; - ccs->prevStack = NULL; - ActualPush_(CCS_MAIN,ccs->cc,ccs); - ccs->root = ccs->cc; - ccs = next; - } - - if (RtsFlags.CcFlags.doCostCentres) { - initTimeProfiling(); - } - - if (RtsFlags.ProfFlags.doHeapProfile) { - initHeapProfiling(); - } -} - -// Decide whether closures with this CCS should contribute to the heap -// profile. -static void -ccsSetSelected( CostCentreStack *ccs ) -{ - if (RtsFlags.ProfFlags.modSelector) { - if (! strMatchesSelector( ccs->cc->module, - RtsFlags.ProfFlags.modSelector ) ) { - ccs->selected = 0; - return; - } + CostCentreStack *ccs, *next; + + CCCS = CCS_SYSTEM; + + /* Set up the log file, and dump the header and cost centre + * information into it. + */ + initProfilingLogFile(); + + /* Register all the cost centres / stacks in the program + * CC_MAIN gets link = 0, all others have non-zero link. + */ + REGISTER_CC(CC_MAIN); + REGISTER_CC(CC_SYSTEM); + REGISTER_CC(CC_GC); + REGISTER_CC(CC_OVERHEAD); + REGISTER_CC(CC_DONT_CARE); + REGISTER_CC(CC_PINNED); + + REGISTER_CCS(CCS_SYSTEM); + REGISTER_CCS(CCS_GC); + REGISTER_CCS(CCS_OVERHEAD); + REGISTER_CCS(CCS_DONT_CARE); + REGISTER_CCS(CCS_PINNED); + REGISTER_CCS(CCS_MAIN); + + /* find all the registered cost centre stacks, and make them + * children of CCS_MAIN. + */ + ASSERT(CCS_LIST == CCS_MAIN); + CCS_LIST = CCS_LIST->prevStack; + CCS_MAIN->prevStack = NULL; + CCS_MAIN->root = CCS_MAIN; + ccsSetSelected(CCS_MAIN); + + // make CCS_MAIN the parent of all the pre-defined CCSs. + for (ccs = CCS_LIST; ccs != NULL; ) { + next = ccs->prevStack; + ccs->prevStack = NULL; + actualPush_(CCS_MAIN,ccs->cc,ccs); + ccs->root = ccs; + ccs = next; } - if (RtsFlags.ProfFlags.ccSelector) { - if (! strMatchesSelector( ccs->cc->label, - RtsFlags.ProfFlags.ccSelector ) ) { - ccs->selected = 0; - return; - } - } - if (RtsFlags.ProfFlags.ccsSelector) { - CostCentreStack *c; - for (c = ccs; c != NULL; c = c->prevStack) { - if ( strMatchesSelector( c->cc->label, - RtsFlags.ProfFlags.ccsSelector )) { - break; - } - } - if (c == NULL) { - ccs->selected = 0; - return; - } + + if (RtsFlags.CcFlags.doCostCentres) { + initTimeProfiling(); } - ccs->selected = 1; - return; + if (RtsFlags.ProfFlags.doHeapProfile) { + initHeapProfiling(); + } } @@ -294,21 +249,6 @@ initProfilingLogFile(void) RtsFlags.ProfFlags.doHeapProfile = 0; return; } - - if (RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) { - /* dump the time, and the profiling interval */ - fprintf(prof_file, "\"%s\"\n", time_str()); - fprintf(prof_file, "\"%d ms\"\n", RtsFlags.MiscFlags.tickInterval); - - /* declare all the cost centres */ - { - CostCentre *cc; - for (cc = CC_LIST; cc != NULL; cc = cc->link) { - fprintf(prof_file, "%d %ld \"%s\" \"%s\"\n", - CC_UQ, cc->ccID, cc->label, cc->module); - } - } - } } if (RtsFlags.ProfFlags.doHeapProfile) { @@ -329,40 +269,151 @@ initProfilingLogFile(void) void initTimeProfiling(void) { - /* Start ticking */ - startProfTimer(); + /* Start ticking */ + startProfTimer(); }; void endProfiling ( void ) { - if (RtsFlags.CcFlags.doCostCentres) { - stopProfTimer(); - } - if (RtsFlags.ProfFlags.doHeapProfile) { - endHeapProfiling(); - } + if (RtsFlags.CcFlags.doCostCentres) { + stopProfTimer(); + } + if (RtsFlags.ProfFlags.doHeapProfile) { + endHeapProfiling(); + } } /* ----------------------------------------------------------------------------- - Set cost centre stack when entering a function. + Set CCCS when entering a function. + + The algorithm is as follows. + + ccs ++> ccsfn = ccs ++ dropCommonPrefix ccs ccsfn + + where + + dropCommonPrefix A B + -- returns the suffix of B after removing any prefix common + -- to both A and B. + + e.g. + + <a,b,c> ++> <> = <a,b,c> + <a,b,c> ++> <d> = <a,b,c,d> + <a,b,c> ++> <a,b> = <a,b,c> + <a,b> ++> <a,b,c> = <a,b,c> + <a,b,c> ++> <a,b,d> = <a,b,c,d> + -------------------------------------------------------------------------- */ -rtsBool entering_PAP; -void -EnterFunCCS ( CostCentreStack *ccsfn ) +// implements c1 ++> c2, where c1 and c2 are equal depth +// +static void enterFunEqualStacks (CostCentreStack *ccs, CostCentreStack *ccsfn) { - /* PAP_entry has already set CCCS for us */ - if (entering_PAP) { - entering_PAP = rtsFalse; - return; - } + ASSERT(ccs->depth == ccsfn->depth); + if (ccs == ccsfn) return; + enterFunEqualStacks(ccs->prevStack, ccsfn->prevStack); + CCCS = pushCostCentre(CCCS, ccsfn->cc); +} + +// implements c1 ++> c2, where c2 is deeper than c1. +// Drop elements of c2 until we have equal stacks, call +// enterFunEqualStacks(), and then push on the elements that we +// dropped in reverse order. +// +static void enterFunCurShorter (CostCentreStack *ccsfn, StgWord n) +{ + if (n == 0) { + ASSERT(ccsfn->depth == CCCS->depth); + enterFunEqualStacks(CCCS,ccsfn); + return; + } + enterFunCurShorter(ccsfn->prevStack, n-1); + CCCS = pushCostCentre(CCCS, ccsfn->cc); +} + +void enterFunCCS ( CostCentreStack *ccsfn ) +{ + // common case 1: both stacks are the same + if (ccsfn == CCCS) { + return; + } + + // common case 2: the function stack is empty, or just CAF + if (ccsfn->prevStack == CCS_MAIN) { + return; + } + + // common case 3: the stacks are completely different (e.g. one is a + // descendent of MAIN and the other of a CAF): we append the whole + // of the function stack to the current CCS. + if (ccsfn->root != CCCS->root) { + CCCS = appendCCS(CCCS,ccsfn); + return; + } + + // uncommon case 4: CCCS is deeper than ccsfn + if (CCCS->depth > ccsfn->depth) { + nat i, n; + CostCentreStack *tmp = CCCS; + n = CCCS->depth - ccsfn->depth; + for (i = 0; i < n; i++) { + tmp = tmp->prevStack; + } + enterFunEqualStacks(tmp,ccsfn); + return; + } + + // uncommon case 5: ccsfn is deeper than CCCS + if (ccsfn->depth > CCCS->depth) { + enterFunCurShorter(ccsfn, ccsfn->depth - CCCS->depth); + return; + } - if (ccsfn->root->is_caf == CC_IS_CAF) { - CCCS = AppendCCS(CCCS,ccsfn); - } else { - CCCS = ccsfn; - } + // uncommon case 6: stacks are equal depth, but different + enterFunEqualStacks(CCCS,ccsfn); +} + +/* ----------------------------------------------------------------------------- + Decide whether closures with this CCS should contribute to the heap + profile. + -------------------------------------------------------------------------- */ + +static void +ccsSetSelected (CostCentreStack *ccs) +{ + if (RtsFlags.ProfFlags.modSelector) { + if (! strMatchesSelector (ccs->cc->module, + RtsFlags.ProfFlags.modSelector) ) { + ccs->selected = 0; + return; + } + } + if (RtsFlags.ProfFlags.ccSelector) { + if (! strMatchesSelector (ccs->cc->label, + RtsFlags.ProfFlags.ccSelector) ) { + ccs->selected = 0; + return; + } + } + if (RtsFlags.ProfFlags.ccsSelector) { + CostCentreStack *c; + for (c = ccs; c != NULL; c = c->prevStack) + { + if ( strMatchesSelector (c->cc->label, + RtsFlags.ProfFlags.ccsSelector) ) { + break; + } + } + if (c == NULL) { + ccs->selected = 0; + return; + } + } + + ccs->selected = 1; + return; } /* ----------------------------------------------------------------------------- @@ -370,211 +421,192 @@ EnterFunCCS ( CostCentreStack *ccsfn ) -------------------------------------------------------------------------- */ #ifdef DEBUG -CostCentreStack * _PushCostCentre ( CostCentreStack *ccs, CostCentre *cc ); +CostCentreStack * _pushCostCentre ( CostCentreStack *ccs, CostCentre *cc ); CostCentreStack * -PushCostCentre ( CostCentreStack *ccs, CostCentre *cc ) -#define PushCostCentre _PushCostCentre +pushCostCentre ( CostCentreStack *ccs, CostCentre *cc ) +#define pushCostCentre _pushCostCentre { IF_DEBUG(prof, traceBegin("pushing %s on ", cc->label); debugCCS(ccs); traceEnd();); - return PushCostCentre(ccs,cc); + return pushCostCentre(ccs,cc); } #endif -CostCentreStack * -PushCostCentre ( CostCentreStack *ccs, CostCentre *cc ) -{ - CostCentreStack *temp_ccs; - - if (ccs == EMPTY_STACK) - return ActualPush(ccs,cc); - else { - if (ccs->cc == cc) - return ccs; - else { - /* check if we've already memoized this stack */ - temp_ccs = IsInIndexTable(ccs->indexTable,cc); - - if (temp_ccs != EMPTY_STACK) - return temp_ccs; - else { - temp_ccs = CheckLoop(ccs,cc); - if (temp_ccs != NULL) { - /* we have recursed to an older CCS. Mark this in - * the index table, and emit a "back edge" into the - * log file. - */ - ccs->indexTable = AddToIndexTable(ccs->indexTable,temp_ccs,cc,1); - DecBackEdge(temp_ccs,ccs); - return temp_ccs; - } else { - return ActualPush(ccs,cc); - } - } - } - } -} - -static CostCentreStack * -CheckLoop ( CostCentreStack *ccs, CostCentre *cc ) -{ - while (ccs != EMPTY_STACK) { - if (ccs->cc == cc) - return ccs; - ccs = ccs->prevStack; - } - return NULL; -} - /* Append ccs1 to ccs2 (ignoring any CAF cost centre at the root of ccs1 */ #ifdef DEBUG -CostCentreStack *_AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ); +CostCentreStack *_appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ); CostCentreStack * -AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ) -#define AppendCCS _AppendCCS -{ - IF_DEBUG(prof, - if (ccs1 != ccs2) { - debugBelch("Appending "); - debugCCS(ccs1); - debugBelch(" to "); - debugCCS(ccs2); - debugBelch("\n");}); - return AppendCCS(ccs1,ccs2); +appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ) +#define appendCCS _appendCCS +{ + IF_DEBUG(prof, + if (ccs1 != ccs2) { + debugBelch("Appending "); + debugCCS(ccs1); + debugBelch(" to "); + debugCCS(ccs2); + debugBelch("\n");}); + return appendCCS(ccs1,ccs2); } #endif CostCentreStack * -AppendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ) +appendCCS ( CostCentreStack *ccs1, CostCentreStack *ccs2 ) { - CostCentreStack *ccs = NULL; + if (ccs1 == ccs2) { + return ccs1; + } + + if (ccs2 == CCS_MAIN || ccs2->cc->is_caf == CC_IS_CAF) { + // stop at a CAF element + return ccs1; + } - if (ccs1 == ccs2) { - return ccs1; - } + return pushCostCentre(appendCCS(ccs1, ccs2->prevStack), ccs2->cc); +} - if (ccs2->cc->is_caf == CC_IS_CAF) { - return ccs1; - } - - if (ccs2->prevStack != NULL) { - ccs = AppendCCS(ccs1, ccs2->prevStack); - } +// Pick one: +// #define RECURSION_DROPS +#define RECURSION_TRUNCATES - return PushCostCentre(ccs,ccs2->cc); +CostCentreStack * +pushCostCentre (CostCentreStack *ccs, CostCentre *cc) +{ + CostCentreStack *temp_ccs; + + if (ccs == EMPTY_STACK) + return actualPush(ccs,cc); + else { + if (ccs->cc == cc) + return ccs; + else { + // check if we've already memoized this stack + temp_ccs = isInIndexTable(ccs->indexTable,cc); + + if (temp_ccs != EMPTY_STACK) + return temp_ccs; + else { + temp_ccs = checkLoop(ccs,cc); + if (temp_ccs != NULL) { + // This CC is already in the stack somewhere. + // This could be recursion, or just calling + // another function with the same CC. + // A number of policies are possible at this + // point, we implement two here: + // - truncate the stack to the previous instance + // of this CC + // - ignore this push, return the same stack. + // + CostCentreStack *new_ccs; +#if defined(RECURSION_TRUNCATES) + new_ccs = temp_ccs; +#else // defined(RECURSION_DROPS) + new_ccs = ccs; +#endif + ccs->indexTable = addToIndexTable (ccs->indexTable, + new_ccs, cc, 1); + return new_ccs; + } else { + return actualPush (ccs,cc); + } + } + } + } } static CostCentreStack * -ActualPush ( CostCentreStack *ccs, CostCentre *cc ) +checkLoop (CostCentreStack *ccs, CostCentre *cc) { - CostCentreStack *new_ccs; - - /* allocate space for a new CostCentreStack */ - new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack)); - - return ActualPush_(ccs, cc, new_ccs); + while (ccs != EMPTY_STACK) { + if (ccs->cc == cc) + return ccs; + ccs = ccs->prevStack; + } + return NULL; } static CostCentreStack * -ActualPush_ ( CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs ) +actualPush (CostCentreStack *ccs, CostCentre *cc) { - /* assign values to each member of the structure */ - new_ccs->ccsID = CCS_ID++; - new_ccs->cc = cc; - new_ccs->prevStack = ccs; - - new_ccs->indexTable = EMPTY_TABLE; - - /* Initialise the various _scc_ counters to zero - */ - new_ccs->scc_count = 0; - - /* Initialize all other stats here. There should be a quick way - * that's easily used elsewhere too - */ - new_ccs->time_ticks = 0; - new_ccs->mem_alloc = 0; - new_ccs->inherited_ticks = 0; - new_ccs->inherited_alloc = 0; - - new_ccs->root = ccs->root; + CostCentreStack *new_ccs; - // Set the selected field. - ccsSetSelected(new_ccs); + // allocate space for a new CostCentreStack + new_ccs = (CostCentreStack *) arenaAlloc(prof_arena, sizeof(CostCentreStack)); - /* update the memoization table for the parent stack */ - if (ccs != EMPTY_STACK) - ccs->indexTable = AddToIndexTable(ccs->indexTable, new_ccs, cc, - 0/*not a back edge*/); - - /* make sure this CC is declared at the next heap/time sample */ - DecCCS(new_ccs); - - /* return a pointer to the new stack */ - return new_ccs; + return actualPush_(ccs, cc, new_ccs); } - static CostCentreStack * -IsInIndexTable(IndexTable *it, CostCentre *cc) +actualPush_ (CostCentreStack *ccs, CostCentre *cc, CostCentreStack *new_ccs) { - while (it!=EMPTY_TABLE) - { - if (it->cc==cc) - return it->ccs; - else - it = it->next; + /* assign values to each member of the structure */ + new_ccs->ccsID = CCS_ID++; + new_ccs->cc = cc; + new_ccs->prevStack = ccs; + new_ccs->root = ccs->root; + new_ccs->depth = ccs->depth + 1; + + new_ccs->indexTable = EMPTY_TABLE; + + /* Initialise the various _scc_ counters to zero + */ + new_ccs->scc_count = 0; + + /* Initialize all other stats here. There should be a quick way + * that's easily used elsewhere too + */ + new_ccs->time_ticks = 0; + new_ccs->mem_alloc = 0; + new_ccs->inherited_ticks = 0; + new_ccs->inherited_alloc = 0; + + // Set the selected field. + ccsSetSelected(new_ccs); + + /* update the memoization table for the parent stack */ + if (ccs != EMPTY_STACK) { + ccs->indexTable = addToIndexTable(ccs->indexTable, new_ccs, cc, + 0/*not a back edge*/); } - - /* otherwise we never found it so return EMPTY_TABLE */ - return EMPTY_TABLE; + + /* return a pointer to the new stack */ + return new_ccs; } -static IndexTable * -AddToIndexTable(IndexTable *it, CostCentreStack *new_ccs, - CostCentre *cc, unsigned int back_edge) +static CostCentreStack * +isInIndexTable(IndexTable *it, CostCentre *cc) { - IndexTable *new_it; - - new_it = arenaAlloc(prof_arena, sizeof(IndexTable)); + while (it!=EMPTY_TABLE) + { + if (it->cc == cc) + return it->ccs; + else + it = it->next; + } - new_it->cc = cc; - new_it->ccs = new_ccs; - new_it->next = it; - new_it->back_edge = back_edge; - return new_it; + /* otherwise we never found it so return EMPTY_TABLE */ + return EMPTY_TABLE; } -static void -DecCCS(CostCentreStack *ccs) +static IndexTable * +addToIndexTable (IndexTable *it, CostCentreStack *new_ccs, + CostCentre *cc, unsigned int back_edge) { - if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) { - if (ccs->prevStack == EMPTY_STACK) - fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ, - ccs->ccsID, ccs->cc->ccID); - else - fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ, - ccs->ccsID, ccs->cc->ccID, ccs->prevStack->ccsID); - } -} + IndexTable *new_it; -static void -DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs ) -{ - if (prof_file && RtsFlags.CcFlags.doCostCentres == COST_CENTRES_XML) { - if (ccs->prevStack == EMPTY_STACK) - fprintf(prof_file, "%d %ld 1 %ld\n", CCS_UQ, - ccs->ccsID, ccs->cc->ccID); - else - fprintf(prof_file, "%d %ld 2 %ld %ld\n", CCS_UQ, - ccs->ccsID, ccs->cc->ccID, oldccs->ccsID); - } + new_it = arenaAlloc(prof_arena, sizeof(IndexTable)); + + new_it->cc = cc; + new_it->ccs = new_ccs; + new_it->next = it; + new_it->back_edge = back_edge; + return new_it; } /* ----------------------------------------------------------------------------- @@ -585,12 +617,13 @@ DecBackEdge( CostCentreStack *ccs, CostCentreStack *oldccs ) * reports, so as not to cause confusion. */ static rtsBool -cc_to_ignore (CostCentre *cc) +ignoreCC (CostCentre *cc) { - if ( cc == CC_OVERHEAD + if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL && + ( cc == CC_OVERHEAD || cc == CC_DONT_CARE || cc == CC_GC - || cc == CC_SYSTEM) { + || cc == CC_SYSTEM)) { return rtsTrue; } else { return rtsFalse; @@ -598,13 +631,14 @@ cc_to_ignore (CostCentre *cc) } static rtsBool -ccs_to_ignore (CostCentreStack *ccs) +ignoreCCS (CostCentreStack *ccs) { - if ( ccs == CCS_OVERHEAD - || ccs == CCS_DONT_CARE - || ccs == CCS_GC - || ccs == CCS_SYSTEM) { - return rtsTrue; + if (RtsFlags.CcFlags.doCostCentres < COST_CENTRES_ALL && + ( ccs == CCS_OVERHEAD + || ccs == CCS_DONT_CARE + || ccs == CCS_GC + || ccs == CCS_SYSTEM)) { + return rtsTrue; } else { return rtsFalse; } @@ -617,88 +651,89 @@ ccs_to_ignore (CostCentreStack *ccs) static CostCentre *sorted_cc_list; static void -aggregate_cc_costs( CostCentreStack *ccs ) +aggregateCCCosts( CostCentreStack *ccs ) { - IndexTable *i; + IndexTable *i; - ccs->cc->mem_alloc += ccs->mem_alloc; - ccs->cc->time_ticks += ccs->time_ticks; + ccs->cc->mem_alloc += ccs->mem_alloc; + ccs->cc->time_ticks += ccs->time_ticks; - for (i = ccs->indexTable; i != 0; i = i->next) { - if (!i->back_edge) { - aggregate_cc_costs(i->ccs); + for (i = ccs->indexTable; i != 0; i = i->next) { + if (!i->back_edge) { + aggregateCCCosts(i->ccs); + } } - } } static void -insert_cc_in_sorted_list( CostCentre *new_cc ) +insertCCInSortedList( CostCentre *new_cc ) { - CostCentre **prev, *cc; + CostCentre **prev, *cc; - prev = &sorted_cc_list; - for (cc = sorted_cc_list; cc != NULL; cc = cc->link) { - if (new_cc->time_ticks > cc->time_ticks) { - new_cc->link = cc; - *prev = new_cc; - return; - } else { - prev = &(cc->link); + prev = &sorted_cc_list; + for (cc = sorted_cc_list; cc != NULL; cc = cc->link) { + if (new_cc->time_ticks > cc->time_ticks) { + new_cc->link = cc; + *prev = new_cc; + return; + } else { + prev = &(cc->link); + } } - } - new_cc->link = NULL; - *prev = new_cc; + new_cc->link = NULL; + *prev = new_cc; } static void -report_per_cc_costs( void ) +reportPerCCCosts( void ) { - CostCentre *cc, *next; - nat max_label_len, max_module_len; + CostCentre *cc, *next; + nat max_label_len, max_module_len; - aggregate_cc_costs(CCS_MAIN); - sorted_cc_list = NULL; + aggregateCCCosts(CCS_MAIN); + sorted_cc_list = NULL; - max_label_len = max_module_len = 0; + max_label_len = 11; // no shorter than the "COST CENTRE" header + max_module_len = 7; // no shorter than the "MODULE" header - for (cc = CC_LIST; cc != NULL; cc = next) { - next = cc->link; - if (cc->time_ticks > total_prof_ticks/100 - || cc->mem_alloc > total_alloc/100 - || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) { - insert_cc_in_sorted_list(cc); - - max_label_len = stg_max(strlen(cc->label), max_label_len); - max_module_len = stg_max(strlen(cc->module), max_module_len); + for (cc = CC_LIST; cc != NULL; cc = next) { + next = cc->link; + if (cc->time_ticks > total_prof_ticks/100 + || cc->mem_alloc > total_alloc/100 + || RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL) { + insertCCInSortedList(cc); + + max_label_len = stg_max(strlen(cc->label), max_label_len); + max_module_len = stg_max(strlen(cc->module), max_module_len); + } } - } - - fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE"); - fprintf(prof_file, "%6s %6s", "%time", "%alloc"); - if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { - fprintf(prof_file, " %5s %9s", "ticks", "bytes"); - } - fprintf(prof_file, "\n\n"); - - for (cc = sorted_cc_list; cc != NULL; cc = cc->link) { - if (cc_to_ignore(cc)) { - continue; - } - fprintf(prof_file, "%-*s %-*s", max_label_len, cc->label, max_module_len, cc->module); - fprintf(prof_file, "%6.1f %6.1f", - total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100), - total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat) - total_alloc * 100) - ); - - if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { - fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64, - (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_)); - } - fprintf(prof_file, "\n"); - } - fprintf(prof_file,"\n\n"); + fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE"); + fprintf(prof_file, "%6s %6s", "%time", "%alloc"); + if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { + fprintf(prof_file, " %5s %9s", "ticks", "bytes"); + } + fprintf(prof_file, "\n\n"); + + for (cc = sorted_cc_list; cc != NULL; cc = cc->link) { + if (ignoreCC(cc)) { + continue; + } + fprintf(prof_file, "%-*s %-*s", max_label_len, cc->label, max_module_len, cc->module); + fprintf(prof_file, "%6.1f %6.1f", + total_prof_ticks == 0 ? 0.0 : (cc->time_ticks / (StgFloat) total_prof_ticks * 100), + total_alloc == 0 ? 0.0 : (cc->mem_alloc / (StgFloat) + total_alloc * 100) + ); + + if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { + fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64, + (StgWord64)(cc->time_ticks), cc->mem_alloc*sizeof(W_)); + } + fprintf(prof_file, "\n"); + } + + fprintf(prof_file,"\n\n"); } /* ----------------------------------------------------------------------------- @@ -706,22 +741,18 @@ report_per_cc_costs( void ) -------------------------------------------------------------------------- */ static void -fprint_header( nat max_label_len, nat max_module_len ) +fprintHeader( nat max_label_len, nat max_module_len ) { - fprintf(prof_file, "%-24s %-10s individual inherited\n", "", ""); + fprintf(prof_file, "%-*s %-*s%6s %11s %11s %11s\n", max_label_len, "", max_module_len, "", "", "", "individual", "inherited"); - fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE"); - fprintf(prof_file, "%6s %10s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc"); + fprintf(prof_file, "%-*s %-*s", max_label_len, "COST CENTRE", max_module_len, "MODULE"); + fprintf(prof_file, "%6s %11s %5s %5s %5s %5s", "no.", "entries", "%time", "%alloc", "%time", "%alloc"); - if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { - fprintf(prof_file, " %5s %9s", "ticks", "bytes"); -#if defined(PROFILING_DETAIL_COUNTS) - fprintf(prof_file, " %8s %8s %8s %8s %8s %8s %8s", - "closures", "thunks", "funcs", "PAPs", "subfuns", "subcafs", "cafssub"); -#endif - } + if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { + fprintf(prof_file, " %5s %9s", "ticks", "bytes"); + } - fprintf(prof_file, "\n\n"); + fprintf(prof_file, "\n\n"); } void @@ -734,17 +765,9 @@ reportCCSProfiling( void ) total_prof_ticks = 0; total_alloc = 0; - count_ticks(CCS_MAIN); + countTickss(CCS_MAIN); - switch (RtsFlags.CcFlags.doCostCentres) { - case 0: - return; - case COST_CENTRES_XML: - gen_XML_logfile(); - return; - default: - break; - } + if (RtsFlags.CcFlags.doCostCentres == 0) return; fprintf(prof_file, "\t%s Time and Allocation Profiling Report (%s)\n", time_str(), "Final"); @@ -769,92 +792,83 @@ reportCCSProfiling( void ) showStgWord64(total_alloc * sizeof(W_), temp, rtsTrue/*commas*/)); -#if defined(PROFILING_DETAIL_COUNTS) - fprintf(prof_file, " (%lu closures)", total_allocs); -#endif fprintf(prof_file, " (excludes profiling overheads)\n\n"); - report_per_cc_costs(); + reportPerCCCosts(); - inherit_costs(CCS_MAIN); + inheritCosts(CCS_MAIN); reportCCS(pruneCCSTree(CCS_MAIN)); } static void findCCSMaxLens(CostCentreStack *ccs, nat indent, nat *max_label_len, nat *max_module_len) { - CostCentre *cc; - IndexTable *i; - - cc = ccs->cc; - - *max_label_len = stg_max(*max_label_len, indent + strlen(cc->label)); - *max_module_len = stg_max(*max_module_len, strlen(cc->module)); - - for (i = ccs->indexTable; i != 0; i = i->next) { - if (!i->back_edge) { - findCCSMaxLens(i->ccs, indent+1, max_label_len, max_module_len); + CostCentre *cc; + IndexTable *i; + + cc = ccs->cc; + + *max_label_len = stg_max(*max_label_len, indent + strlen(cc->label)); + *max_module_len = stg_max(*max_module_len, strlen(cc->module)); + + for (i = ccs->indexTable; i != 0; i = i->next) { + if (!i->back_edge) { + findCCSMaxLens(i->ccs, indent+1, max_label_len, max_module_len); + } } - } } static void logCCS(CostCentreStack *ccs, nat indent, nat max_label_len, nat max_module_len) { - CostCentre *cc; - IndexTable *i; + CostCentre *cc; + IndexTable *i; - cc = ccs->cc; - - /* Only print cost centres with non 0 data ! */ - - if ( RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL || - ! ccs_to_ignore(ccs)) - /* force printing of *all* cost centres if -P -P */ + cc = ccs->cc; + + /* Only print cost centres with non 0 data ! */ + + if (!ignoreCCS(ccs)) + /* force printing of *all* cost centres if -Pa */ { - fprintf(prof_file, "%-*s%-*s %-*s", - indent, "", max_label_len-indent, cc->label, max_module_len, cc->module); + fprintf(prof_file, "%-*s%-*s %-*s", + indent, "", max_label_len-indent, cc->label, max_module_len, cc->module); - fprintf(prof_file, "%6ld %11.0f %5.1f %5.1f %5.1f %5.1f", - ccs->ccsID, (double) ccs->scc_count, - total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0), - total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0), - total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0), - total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0) + fprintf(prof_file, "%6ld %11" FMT_Word64 " %5.1f %5.1f %5.1f %5.1f", + ccs->ccsID, ccs->scc_count, + total_prof_ticks == 0 ? 0.0 : ((double)ccs->time_ticks / (double)total_prof_ticks * 100.0), + total_alloc == 0 ? 0.0 : ((double)ccs->mem_alloc / (double)total_alloc * 100.0), + total_prof_ticks == 0 ? 0.0 : ((double)ccs->inherited_ticks / (double)total_prof_ticks * 100.0), + total_alloc == 0 ? 0.0 : ((double)ccs->inherited_alloc / (double)total_alloc * 100.0) ); - if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { - fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64, - (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_)); -#if defined(PROFILING_DETAIL_COUNTS) - fprintf(prof_file, " %8ld %8ld %8ld %8ld %8ld %8ld %8ld", - ccs->mem_allocs, ccs->thunk_count, - ccs->function_count, ccs->pap_count, - ccs->subsumed_fun_count, ccs->subsumed_caf_count, - ccs->caffun_subsumed); -#endif + if (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_VERBOSE) { + fprintf(prof_file, " %5" FMT_Word64 " %9" FMT_Word64, + (StgWord64)(ccs->time_ticks), ccs->mem_alloc*sizeof(W_)); + } + fprintf(prof_file, "\n"); } - fprintf(prof_file, "\n"); - } - for (i = ccs->indexTable; i != 0; i = i->next) { - if (!i->back_edge) { - logCCS(i->ccs, indent+1, max_label_len, max_module_len); + for (i = ccs->indexTable; i != 0; i = i->next) { + if (!i->back_edge) { + logCCS(i->ccs, indent+1, max_label_len, max_module_len); + } } - } } static void reportCCS(CostCentreStack *ccs) { - nat max_label_len, max_module_len; - max_label_len = max_module_len = 0; - - findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len); - - fprint_header(max_label_len, max_module_len); - logCCS(ccs, 0, max_label_len, max_module_len); + nat max_label_len, max_module_len; + + max_label_len = 11; // no shorter than "COST CENTRE" header + max_module_len = 7; // no shorter than "MODULE" header + + findCCSMaxLens(ccs, 0, &max_label_len, &max_module_len); + + fprintHeader(max_label_len, max_module_len); + logCCS(ccs, 0, max_label_len, max_module_len); } @@ -862,138 +876,176 @@ reportCCS(CostCentreStack *ccs) * ticks/allocations. */ static void -count_ticks(CostCentreStack *ccs) +countTickss(CostCentreStack *ccs) { - IndexTable *i; - - if (!ccs_to_ignore(ccs)) { - total_alloc += ccs->mem_alloc; - total_prof_ticks += ccs->time_ticks; - } - for (i = ccs->indexTable; i != NULL; i = i->next) - if (!i->back_edge) { - count_ticks(i->ccs); + IndexTable *i; + + if (!ignoreCCS(ccs)) { + total_alloc += ccs->mem_alloc; + total_prof_ticks += ccs->time_ticks; } + for (i = ccs->indexTable; i != NULL; i = i->next) + if (!i->back_edge) { + countTickss(i->ccs); + } } /* Traverse the cost centre stack tree and inherit ticks & allocs. */ static void -inherit_costs(CostCentreStack *ccs) +inheritCosts(CostCentreStack *ccs) { - IndexTable *i; + IndexTable *i; - if (ccs_to_ignore(ccs)) { return; } + if (ignoreCCS(ccs)) { return; } - ccs->inherited_ticks += ccs->time_ticks; - ccs->inherited_alloc += ccs->mem_alloc; + ccs->inherited_ticks += ccs->time_ticks; + ccs->inherited_alloc += ccs->mem_alloc; - for (i = ccs->indexTable; i != NULL; i = i->next) - if (!i->back_edge) { - inherit_costs(i->ccs); - ccs->inherited_ticks += i->ccs->inherited_ticks; - ccs->inherited_alloc += i->ccs->inherited_alloc; - } - - return; + for (i = ccs->indexTable; i != NULL; i = i->next) + if (!i->back_edge) { + inheritCosts(i->ccs); + ccs->inherited_ticks += i->ccs->inherited_ticks; + ccs->inherited_alloc += i->ccs->inherited_alloc; + } + + return; } +// +// Prune CCSs with zero entries, zero ticks or zero allocation from +// the tree, unless COST_CENTRES_ALL is on. +// static CostCentreStack * -pruneCCSTree( CostCentreStack *ccs ) +pruneCCSTree (CostCentreStack *ccs) { - CostCentreStack *ccs1; - IndexTable *i, **prev; - - prev = &ccs->indexTable; - for (i = ccs->indexTable; i != 0; i = i->next) { - if (i->back_edge) { continue; } + CostCentreStack *ccs1; + IndexTable *i, **prev; + + prev = &ccs->indexTable; + for (i = ccs->indexTable; i != 0; i = i->next) { + if (i->back_edge) { continue; } + + ccs1 = pruneCCSTree(i->ccs); + if (ccs1 == NULL) { + *prev = i->next; + } else { + prev = &(i->next); + } + } + + if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL + /* force printing of *all* cost centres if -P -P */ ) - ccs1 = pruneCCSTree(i->ccs); - if (ccs1 == NULL) { - *prev = i->next; + || ( ccs->indexTable != 0 ) + || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc ) + ) { + return ccs; } else { - prev = &(i->next); + return NULL; } - } - - if ( (RtsFlags.CcFlags.doCostCentres >= COST_CENTRES_ALL - /* force printing of *all* cost centres if -P -P */ ) - - || ( ccs->indexTable != 0 ) - || ( ccs->scc_count || ccs->time_ticks || ccs->mem_alloc ) - ) { - return ccs; - } else { - return NULL; - } } -/* ----------------------------------------------------------------------------- - Generate the XML time/allocation profile - -------------------------------------------------------------------------- */ - void -gen_XML_logfile( void ) +fprintCCS( FILE *f, CostCentreStack *ccs ) { - fprintf(prof_file, "%d %lu", TIME_UPD_UQ, total_prof_ticks); - - reportCCS_XML(pruneCCSTree(CCS_MAIN)); - - fprintf(prof_file, " 0\n"); + fprintf(f,"<"); + for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) { + fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label); + if (ccs->prevStack && ccs->prevStack != CCS_MAIN) { + fprintf(f,","); + } + } + fprintf(f,">"); } -static void -reportCCS_XML(CostCentreStack *ccs) +// Returns: True if the call stack ended with CAF +static rtsBool fprintCallStack (CostCentreStack *ccs) { - CostCentre *cc; - IndexTable *i; - - if (ccs_to_ignore(ccs)) { return; } - - cc = ccs->cc; - - fprintf(prof_file, " 1 %ld %" FMT_Word64 " %" FMT_Word64 " %" FMT_Word64, - ccs->ccsID, ccs->scc_count, (StgWord64)(ccs->time_ticks), ccs->mem_alloc); - - for (i = ccs->indexTable; i != 0; i = i->next) { - if (!i->back_edge) { - reportCCS_XML(i->ccs); + CostCentreStack *prev; + + fprintf(stderr,"%s.%s", ccs->cc->module, ccs->cc->label); + prev = ccs->prevStack; + while (prev && prev != CCS_MAIN) { + ccs = prev; + fprintf(stderr, ",\n called from %s.%s", + ccs->cc->module, ccs->cc->label); + prev = ccs->prevStack; } - } -} + fprintf(stderr, "\n"); -void -fprintCCS( FILE *f, CostCentreStack *ccs ) -{ - fprintf(f,"<"); - for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) { - fprintf(f,"%s.%s", ccs->cc->module, ccs->cc->label); - if (ccs->prevStack && ccs->prevStack != CCS_MAIN) { - fprintf(f,","); - } - } - fprintf(f,">"); + return (!strncmp(ccs->cc->label, "CAF", 3)); } /* For calling from .cmm code, where we can't reliably refer to stderr */ void -fprintCCS_stderr( CostCentreStack *ccs ) +fprintCCS_stderr (CostCentreStack *ccs, StgTSO *tso) { - fprintCCS(stderr, ccs); + rtsBool is_caf; + StgPtr frame; + StgStack *stack; + CostCentreStack *prev_ccs; + nat depth = 0; + const nat MAX_DEPTH = 10; // don't print gigantic chains of stacks + + fprintf(stderr, "*** Exception raised (reporting due to +RTS -xc), stack trace:\n "); + is_caf = fprintCallStack(ccs); + + // traverse the stack down to the enclosing update frame to + // find out where this CCS was evaluated from... + + stack = tso->stackobj; + frame = stack->sp; + prev_ccs = ccs; + + for (; is_caf && depth < MAX_DEPTH; depth++) + { + switch (get_itbl((StgClosure*)frame)->type) + { + case UPDATE_FRAME: + ccs = ((StgUpdateFrame*)frame)->header.prof.ccs; + frame += sizeofW(StgUpdateFrame); + if (ccs == CCS_MAIN) { + goto done; + } + if (ccs == prev_ccs) { + // ignore if this is the same as the previous stack, + // we're probably in library code and haven't + // accumulated any more interesting stack items + // since the last update frame. + break; + } + prev_ccs = ccs; + fprintf(stderr, " --> evaluated by: "); + is_caf = fprintCallStack(ccs); + break; + case UNDERFLOW_FRAME: + stack = ((StgUnderflowFrame*)frame)->next_chunk; + frame = stack->sp; + break; + case STOP_FRAME: + goto done; + default: + frame += stack_frame_sizeW((StgClosure*)frame); + break; + } + } +done: + return; } #ifdef DEBUG void debugCCS( CostCentreStack *ccs ) { - debugBelch("<"); - for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) { - debugBelch("%s.%s", ccs->cc->module, ccs->cc->label); - if (ccs->prevStack && ccs->prevStack != CCS_MAIN) { - debugBelch(","); - } - } - debugBelch(">"); + debugBelch("<"); + for (; ccs && ccs != CCS_MAIN; ccs = ccs->prevStack ) { + debugBelch("%s.%s", ccs->cc->module, ccs->cc->label); + if (ccs->prevStack && ccs->prevStack != CCS_MAIN) { + debugBelch(","); + } + } + debugBelch(">"); } #endif /* DEBUG */ diff --git a/rts/Profiling.h b/rts/Profiling.h index 3e365fe536..2ee3311c81 100644 --- a/rts/Profiling.h +++ b/rts/Profiling.h @@ -30,13 +30,12 @@ extern FILE *hp_file; #ifdef PROFILING -void gen_XML_logfile ( void ); void reportCCSProfiling ( void ); void PrintNewStackDecls ( void ); void fprintCCS( FILE *f, CostCentreStack *ccs ); -void fprintCCS_stderr( CostCentreStack *ccs ); +void fprintCCS_stderr (CostCentreStack *ccs, StgTSO *tso); #ifdef DEBUG void debugCCS( CostCentreStack *ccs ); diff --git a/rts/Proftimer.c b/rts/Proftimer.c index dfcc709625..82838184b7 100644 --- a/rts/Proftimer.c +++ b/rts/Proftimer.c @@ -65,11 +65,13 @@ initProfTimer( void ) startHeapProfTimer(); } +nat total_ticks = 0; void handleProfTick(void) { #ifdef PROFILING + total_ticks++; if (do_prof_ticks) { CCCS->time_ticks++; } diff --git a/rts/RaiseAsync.c b/rts/RaiseAsync.c index 775505f887..acc87b1938 100644 --- a/rts/RaiseAsync.c +++ b/rts/RaiseAsync.c @@ -739,7 +739,7 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, */ if (RtsFlags.ProfFlags.showCCSOnException) { - fprintCCS_stderr(tso->prof.CCCS); + fprintCCS_stderr(tso->prof.CCCS,tso); } #endif // ASSUMES: the thread is not already complete or dead diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c index 7009ea23a6..d2b4945c19 100644 --- a/rts/RtsFlags.c +++ b/rts/RtsFlags.c @@ -258,14 +258,12 @@ usage_text[] = { " -B Sound the bell at the start of each garbage collection", #if defined(PROFILING) "", -" -px Time/allocation profile (XML) (output file <program>.prof)", " -p Time/allocation profile (output file <program>.prof)", " -P More detailed Time/Allocation profile", " -Pa Give information about *all* cost centres", # if defined(PROFILING) "", -" -hx Heap residency profile (XML) (output file <program>.prof)", " -h<break-down> Heap residency profile (hp2ps) (output file <program>.hp)", " break-down: c = cost centre stack (default)", " m = module", @@ -936,10 +934,7 @@ error = rtsTrue; OPTION_SAFE; PROFILING_BUILD_ONLY( switch (rts_argv[arg][2]) { - case 'x': - RtsFlags.CcFlags.doCostCentres = COST_CENTRES_XML; - break; - case 'a': + case 'a': RtsFlags.CcFlags.doCostCentres = COST_CENTRES_ALL; break; default: diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm index b4a037d5d6..26f24f6f39 100644 --- a/rts/StgMiscClosures.cmm +++ b/rts/StgMiscClosures.cmm @@ -36,6 +36,19 @@ INFO_TABLE_RET (stg_stack_underflow_frame, UNDERFLOW_FRAME, P_ unused) } /* ---------------------------------------------------------------------------- + Restore a saved cost centre + ------------------------------------------------------------------------- */ + +INFO_TABLE_RET (stg_restore_cccs, RET_SMALL, W_ cccs) +{ +#if defined(PROFILING) + W_[CCCS] = Sp(1); +#endif + Sp_adj(2); + jump %ENTRY_CODE(Sp(0)); +} + +/* ---------------------------------------------------------------------------- Support for the bytecode interpreter. ------------------------------------------------------------------------- */ @@ -226,9 +239,6 @@ INFO_TABLE(stg_IND_PERM,1,0,IND_PERM,"IND_PERM","IND_PERM") LDV_ENTER(R1); - /* Enter PAP cost centre */ - ENTER_CCS_PAP_CL(R1); - /* For ticky-ticky, change the perm_ind to a normal ind on first * entry, so the number of ent_perm_inds is the number of *thunks* * entered again, not the number of subsequent entries. diff --git a/rts/Updates.cmm b/rts/Updates.cmm index 789bdd5e53..0b43b9cdf1 100644 --- a/rts/Updates.cmm +++ b/rts/Updates.cmm @@ -41,7 +41,7 @@ INFO_TABLE_RET( stg_upd_frame, UPDATE_FRAME, UPD_FRAME_PARAMS) W_ updatee; updatee = StgUpdateFrame_updatee(Sp); - + /* remove the update frame from the stack */ Sp = Sp + SIZEOF_StgUpdateFrame; diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c index 82e89a5470..1dad6c8df0 100644 --- a/rts/sm/Storage.c +++ b/rts/sm/Storage.c @@ -607,6 +607,9 @@ allocate (Capability *cap, lnat n) bdescr *bd; StgPtr p; + TICK_ALLOC_HEAP_NOCTR(n); + CCS_ALLOC(CCCS,n); + if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; @@ -638,9 +641,6 @@ allocate (Capability *cap, lnat n) /* small allocation (<LARGE_OBJECT_THRESHOLD) */ - TICK_ALLOC_HEAP_NOCTR(n); - CCS_ALLOC(CCCS,n); - bd = cap->r.rCurrentAlloc; if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { |