summaryrefslogtreecommitdiff
path: root/rts/Apply.cmm
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2016-12-15 11:17:19 -0500
committerBen Gamari <ben@smart-cactus.org>2016-12-15 11:17:29 -0500
commit394231b301efb6b56654b0a480ab794fe3b7e4db (patch)
treeaae8fd1ee76ed8a06b9c87beb5dd2cb1aa8187e7 /rts/Apply.cmm
parent5c76f834b5b7f2ee9712d0888a8b1b186b77dee5 (diff)
downloadhaskell-394231b301efb6b56654b0a480ab794fe3b7e4db.tar.gz
Fix cost-centre-stacks bug (#5654)
This fixes some cases of wrong stacks being generated by the profiler. For background and details on the fix see `Note [Evaluating functions with profiling]` in `rts/Apply.cmm`. This does have an impact on allocations for some programs when profiling. nofib results: ``` k-nucleotide +0.0% +8.8% +11.0% +11.0% 0.0% puzzle +0.0% +12.5% 0.244 0.246 0.0% typecheck 0.0% +8.7% +16.1% +16.2% 0.0% ------------------------------------------------------------------------ -------- Min -0.0% -0.0% -34.4% -35.5% -25.0% Max +0.0% +12.5% +48.9% +49.4% +10.6% Geometric Mean +0.0% +0.6% +2.0% +1.8% -0.3% ``` But runtimes don't seem to be affected much, and the examples I looked at were completely legitimate. For example, in puzzle we have this: ``` position :: ItemType -> StateType -> BankType position Bono = bonoPos position Edge = edgePos position Larry = larryPos position Adam = adamPos ``` where the identifiers on the rhs are all record selectors. Previously the profiler gave a stack that looked like ``` position bonoPos ... ``` i.e. `bonoPos` was at the same level of the call stack as `position`, but now it looks like ``` position bonoPos ... ``` I used the normaliser from the testsuite to diff the profiling output from other nofib programs and they all looked better. Test Plan: * the broken test passes * validate * compiled and ran all of nofib, measured perf, diff'd several .prof files Reviewers: niteria, erikd, austin, scpmw, bgamari Reviewed By: bgamari Subscribers: thomie Differential Revision: https://phabricator.haskell.org/D2804 GHC Trac Issues: #5654, #10007
Diffstat (limited to 'rts/Apply.cmm')
-rw-r--r--rts/Apply.cmm107
1 files changed, 107 insertions, 0 deletions
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
index 149a320f25..3a73ce09a4 100644
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -30,7 +30,114 @@ stg_ap_0_fast ( P_ fun )
IF_DEBUG(sanity,
ccall checkStackFrame(Sp "ptr"));
+#if !defined(PROFILING)
+
ENTER(fun);
+
+#else
+
+/*
+ Note [Evaluating functions with profiling]
+
+ If we evaluate something like
+
+ let f = {-# SCC "f" #-} g
+
+ where g is a function, then updating the thunk for f to point to g
+ would be incorrect: we've lost the SCC annotation. In general, when
+ we evaluate a function and the current CCS is different from the one
+ stored in the function, we need to return a function with the
+ correct CCS in it.
+
+ The mechanism we use to wrap the function is to create a
+ zero-argument PAP as a proxy object to hold the new CCS, and return
+ that.
+*/
+
+again:
+ W_ info;
+ W_ untaggedfun;
+ untaggedfun = UNTAG(fun);
+ info = %INFO_PTR(untaggedfun);
+ switch [INVALID_OBJECT .. N_CLOSURE_TYPES]
+ (TO_W_( %INFO_TYPE(%STD_INFO(info)) )) {
+ case
+ IND,
+ IND_STATIC:
+ {
+ fun = StgInd_indirectee(fun);
+ goto again;
+ }
+ case
+ FUN,
+ FUN_1_0,
+ FUN_0_1,
+ FUN_2_0,
+ FUN_1_1,
+ FUN_0_2,
+ FUN_STATIC,
+ BCO:
+ {
+ if (CCCS == StgHeader_ccs(untaggedfun)) {
+ return (fun);
+ } else {
+ // We're going to build a new PAP, with zero extra
+ // arguments and therefore the same arity as the
+ // original function. In other words, we're using a
+ // zero-argument PAP as an indirection to the
+ // function, so that we can attach a different CCS to
+ // it.
+ HP_CHK_GEN(SIZEOF_StgPAP);
+ TICK_ALLOC_PAP(SIZEOF_StgPAP, 0);
+ // attribute this allocation to the "overhead of profiling"
+ CCS_ALLOC(BYTES_TO_WDS(SIZEOF_StgPAP), CCS_OVERHEAD);
+ P_ pap;
+ W_ arity;
+ pap = Hp - SIZEOF_StgPAP + WDS(1);
+ SET_HDR(pap, stg_PAP_info, CCCS);
+ arity = TO_W_(StgFunInfoExtra_arity(%FUN_INFO(info)));
+ StgPAP_arity(pap) = arity;
+ StgPAP_fun(pap) = fun;
+ StgPAP_n_args(pap) = 0;
+ return (pap);
+ }
+ }
+ case PAP:
+ {
+ if (CCCS == StgHeader_ccs(untaggedfun)) {
+ return (fun);
+ } else {
+ // We're going to copy this PAP, and put the new CCS in it
+ fun = untaggedfun;
+ W_ size;
+ size = SIZEOF_StgPAP + WDS(TO_W_(StgPAP_n_args(fun)));
+ HP_CHK_GEN(size);
+ TICK_ALLOC_PAP(size, 0);
+ // attribute this allocation to the "overhead of profiling"
+ CCS_ALLOC(BYTES_TO_WDS(SIZEOF_StgPAP), CCS_OVERHEAD);
+ P_ pap;
+ pap = Hp - size + WDS(1);
+ SET_HDR(pap, stg_PAP_info, CCCS);
+ StgPAP_arity(pap) = StgPAP_arity(fun);
+ StgPAP_n_args(pap) = StgPAP_n_args(fun);
+ StgPAP_fun(pap) = StgPAP_fun(fun);
+ W_ i;
+ i = TO_W_(StgPAP_n_args(fun));
+ loop:
+ if (i == 0) {
+ return (pap);
+ }
+ i = i - 1;
+ StgPAP_payload(pap,i) = StgPAP_payload(fun,i);
+ goto loop;
+ }
+ }
+ default:
+ {
+ jump %ENTRY_CODE(info) (UNTAG(fun));
+ }
+ }
+#endif
}
/* -----------------------------------------------------------------------------