summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2013-11-28 10:55:26 +0000
committerSimon Marlow <marlowsd@gmail.com>2013-11-28 12:52:23 +0000
commit9021737d6cc8f9eb992377dc7f7446017062b79c (patch)
tree6e07576c7c683f9028e82e421c26cde110fda2c0 /compiler/codeGen
parente9b0d3686486b79537a5f9acdf6244afa81e7c78 (diff)
downloadhaskell-9021737d6cc8f9eb992377dc7f7446017062b79c.tar.gz
Comments on slow-call-shortcutting
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/StgCmmLayout.hs36
1 files changed, 36 insertions, 0 deletions
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 4f715683f0..54e2e920f9 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -188,6 +188,7 @@ slowCall fun stg_args
" with pat " ++ unpackFS rts_fun)
return r
+ -- Note [avoid intermediate PAPs]
let n_args = length stg_args
if n_args > arity && optLevel dflags >= 2
then do
@@ -195,6 +196,15 @@ slowCall fun stg_args
fun_iptr <- (CmmReg . CmmLocal) `fmap`
assignTemp (closureInfoPtr dflags (cmmUntag dflags funv))
+ -- ToDo: we could do slightly better here by reusing the
+ -- continuation from the slow call, which we have in r.
+ -- Also we'd like to push the continuation on the stack
+ -- before the branch, so that we only get one copy of the
+ -- code that saves all the live variables across the
+ -- call, but that might need some improvements to the
+ -- special case in the stack layout code to handle this
+ -- (see Note [diamond proc point]).
+
fast_code <- getCode $
emitCall (NativeNodeCall, NativeReturn)
(entryCode dflags fun_iptr)
@@ -224,6 +234,32 @@ slowCall fun stg_args
return r
+-- Note [avoid intermediate PAPs]
+--
+-- A slow call which needs multiple generic apply patterns will be
+-- almost guaranteed to create one or more intermediate PAPs when
+-- applied to a function that takes the correct number of arguments.
+-- We try to avoid this situation by generating code to test whether
+-- we are calling a function with the correct number of arguments
+-- first, i.e.:
+--
+-- if (TAG(f) != 0} { // f is not a thunk
+-- if (f->info.arity == n) {
+-- ... make a fast call to f ...
+-- }
+-- }
+-- ... otherwise make the slow call ...
+--
+-- We *only* do this when the call requires multiple generic apply
+-- functions, which requires pushing extra stack frames and probably
+-- results in intermediate PAPs. (I say probably, because it might be
+-- that we're over-applying a function, but that seems even less
+-- likely).
+--
+-- This very rarely applies, but if it does happen in an inner loop it
+-- can have a severe impact on performance (#6084).
+
+
--------------
direct_call :: String
-> Convention -- e.g. NativeNodeCall or NativeDirectCall