summaryrefslogtreecommitdiff
path: root/compiler/cmm/CmmContFlowOpt.hs
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-08-01 10:36:08 +0100
committerSimon Marlow <marlowsd@gmail.com>2012-08-02 11:56:02 +0100
commit7930221e2bb11b46ae32f0dda8f945b2893637c6 (patch)
treea87b72db3025af0822338147494d09a4a856f120 /compiler/cmm/CmmContFlowOpt.hs
parent6ede0067a91a0da7f682c3ea1d6df938616e983a (diff)
downloadhaskell-7930221e2bb11b46ae32f0dda8f945b2893637c6.tar.gz
Don't shortcut call-returns when not splitting proc points
See Note [shortcut call returns]
Diffstat (limited to 'compiler/cmm/CmmContFlowOpt.hs')
-rw-r--r--compiler/cmm/CmmContFlowOpt.hs54
1 files changed, 45 insertions, 9 deletions
diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs
index b2dbef49da..964f9f56da 100644
--- a/compiler/cmm/CmmContFlowOpt.hs
+++ b/compiler/cmm/CmmContFlowOpt.hs
@@ -24,12 +24,12 @@ import Prelude hiding (succ, unzip, zip)
--
-----------------------------------------------------------------------------
-cmmCfgOpts :: CmmGraph -> CmmGraph
-cmmCfgOpts g = fst (blockConcat g)
+cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
+cmmCfgOpts split g = fst (blockConcat split g)
-cmmCfgOptsProc :: CmmDecl -> CmmDecl
-cmmCfgOptsProc (CmmProc info lbl g) = CmmProc info' lbl g'
- where (g', env) = blockConcat g
+cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
+cmmCfgOptsProc split (CmmProc info lbl g) = CmmProc info' lbl g'
+ where (g', env) = blockConcat split g
info' = info{ info_tbls = new_info_tbls }
new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
@@ -44,7 +44,7 @@ cmmCfgOptsProc (CmmProc info lbl g) = CmmProc info' lbl g'
| otherwise
= (k,info)
-cmmCfgOptsProc top = top
+cmmCfgOptsProc _ top = top
-----------------------------------------------------------------------------
@@ -54,6 +54,7 @@ cmmCfgOptsProc top = top
-----------------------------------------------------------------------------
-- This optimisation does three things:
+--
-- - If a block finishes with an unconditional branch, then we may
-- be able to concatenate the block it points to and remove the
-- branch. We do this either if the destination block is small
@@ -63,6 +64,7 @@ cmmCfgOptsProc top = top
-- - If a block finishes in a call whose continuation block is a
-- goto, then we can shortcut the destination, making the
-- continuation block the destination of the goto.
+-- (but see Note [shortcut call returns])
--
-- - removes any unreachable blocks from the graph. This is a side
-- effect of starting with a postorder DFS traversal of the graph
@@ -93,8 +95,8 @@ cmmCfgOptsProc top = top
-- which labels we have renamed and apply the mapping at the end
-- with replaceLabels.
-blockConcat :: CmmGraph -> (CmmGraph, BlockEnv BlockId)
-blockConcat g@CmmGraph { g_entry = entry_id }
+blockConcat :: Bool -> CmmGraph -> (CmmGraph, BlockEnv BlockId)
+blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
= (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map)
where
-- we might be able to shortcut the entry BlockId itself
@@ -125,7 +127,8 @@ blockConcat g@CmmGraph { g_entry = entry_id }
-- calls: if we can shortcut the continuation label, then
-- we must *also* remember to substitute for the label in the
-- code, because we will push it somewhere.
- | Just b' <- callContinuation_maybe last
+ | splitting_procs -- Note [shortcut call returns]
+ , Just b' <- callContinuation_maybe last
, Just blk' <- mapLookup b' blocks
, Just dest <- canShortcut blk'
= (blocks, mapInsert b' dest shortcut_map)
@@ -184,6 +187,39 @@ okToDuplicate block
-- has a CmmExpr inside it.
_otherwise -> False
+
+{- Note [shortcut call returns]
+
+Consider this code that you might get from a recursive let-no-escape:
+
+ goto L1
+ L1:
+ if (Hp > HpLim) then L2 else L3
+ L2:
+ call stg_gc_noregs returns to L4
+ L4:
+ goto L1
+ L3:
+ ...
+ goto L1
+
+Then the control-flow optimiser shortcuts L4. But that turns L1
+into the call-return proc point, and every iteration of the loop
+has to shuffle variables to and from the stack. So we must *not*
+shortcut L4.
+
+Moreover not shortcutting call returns is probably fine. If L4 can
+concat with its branch target then it will still do so. And we
+save some compile time because we don't have to traverse all the
+code in replaceLabels.
+
+However, we probably do want to do this if we are splitting proc
+points, because L1 will be a proc-point anyway, so merging it with L4
+reduces the number of proc points. Unfortunately recursive
+let-no-escapes won't generate very good code with proc-point
+splitting on - we should probably
+-}
+
------------------------------------------------------------------------
-- Map over the CmmGraph, replacing each label with its mapping in the
-- supplied BlockEnv.