diff options
author | Andreas Klebinger <klebinger.andreas@gmx.at> | 2019-07-12 15:17:02 +0200 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2019-07-13 16:36:57 -0400 |
commit | 348cc8ebf1508b057a85551e5f6e699bb4cd8ade (patch) | |
tree | 4939e2ff1a39fe642f75de4762946005a8fbc041 /compiler | |
parent | 688a1b89584327d6ba0d3ec9558a3cd8a111c655 (diff) | |
download | haskell-348cc8ebf1508b057a85551e5f6e699bb4cd8ade.tar.gz |
Add two CmmSwitch optimizations.
Move switch expressions into a local variable when generating switches.
This avoids duplicating the expression if we translate the switch
to a tree search. This fixes #16933.
Further we now check if all branches of a switch have the same
destination, replacing the switch with a direct branch if that
is the case.
Both of these patterns appear in the ENTER macro used by the RTS
but are unlikely to occur in intermediate Cmm generated by GHC.
Nofib result summary:
--------------------------------------------------------------------------------
Program Size Allocs Runtime Elapsed TotalMem
--------------------------------------------------------------------------------
Min -0.0% -0.0% -15.7% -15.6% 0.0%
Max -0.0% 0.0% +5.4% +5.5% 0.0%
Geometric Mean -0.0% -0.0% -1.0% -1.0% -0.0%
Compiler allocations go up slightly: +0.2%
Example output before and after the change taken from RTS code below.
All but one of the memory loads `I32[_c3::I64 - 8]` are eliminated.
Instead the data is loaded once from memory in block c6.
Also the switch in block `ud` in the original code has been
eliminated completely.
Cmm without this commit:
```
stg_ap_0_fast() { // [R1]
{ []
}
{offset
ca: _c1::P64 = R1; // CmmAssign
goto c2; // CmmBranch
c2: if (_c1::P64 & 7 != 0) goto c4; else goto c6;
c6: _c3::I64 = I64[_c1::P64];
if (I32[_c3::I64 - 8] < 26 :: W32) goto ub; else goto ug;
ub: if (I32[_c3::I64 - 8] < 15 :: W32) goto uc; else goto ue;
uc: if (I32[_c3::I64 - 8] < 8 :: W32) goto c7; else goto ud;
ud: switch [8 .. 14] (%MO_SS_Conv_W32_W64(I32[_c3::I64 - 8])) {
case 8, 9, 10, 11, 12, 13, 14 : goto c4;
}
ue: if (I32[_c3::I64 - 8] >= 25 :: W32) goto c4; else goto uf;
uf: if (%MO_SS_Conv_W32_W64(I32[_c3::I64 - 8]) != 23) goto c7; else goto c4;
c4: R1 = _c1::P64;
call (P64[Sp])(R1) args: 8, res: 0, upd: 8;
ug: if (I32[_c3::I64 - 8] < 28 :: W32) goto uh; else goto ui;
uh: if (I32[_c3::I64 - 8] < 27 :: W32) goto c7; else goto c8;
ui: if (I32[_c3::I64 - 8] < 29 :: W32) goto c8; else goto c7;
c8: _c1::P64 = P64[_c1::P64 + 8];
goto c2;
c7: R1 = _c1::P64;
call (_c3::I64)(R1) args: 8, res: 0, upd: 8;
}
}
```
Cmm with this commit:
```
stg_ap_0_fast() { // [R1]
{ []
}
{offset
ca: _c1::P64 = R1;
goto c2;
c2: if (_c1::P64 & 7 != 0) goto c4; else goto c6;
c6: _c3::I64 = I64[_c1::P64];
_ub::I64 = %MO_SS_Conv_W32_W64(I32[_c3::I64 - 8]);
if (_ub::I64 < 26) goto uc; else goto uh;
uc: if (_ub::I64 < 15) goto ud; else goto uf;
ud: if (_ub::I64 < 8) goto c7; else goto c4;
uf: if (_ub::I64 >= 25) goto c4; else goto ug;
ug: if (_ub::I64 != 23) goto c7; else goto c4;
c4: R1 = _c1::P64;
call (P64[Sp])(R1) args: 8, res: 0, upd: 8;
uh: if (_ub::I64 < 28) goto ui; else goto uj;
ui: if (_ub::I64 < 27) goto c7; else goto c8;
uj: if (_ub::I64 < 29) goto c8; else goto c7;
c8: _c1::P64 = P64[_c1::P64 + 8];
goto c2;
c7: R1 = _c1::P64;
call (_c3::I64)(R1) args: 8, res: 0, upd: 8;
}
}
```
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/cmm/CmmContFlowOpt.hs | 9 | ||||
-rw-r--r-- | compiler/cmm/CmmImplementSwitchPlans.hs | 30 | ||||
-rw-r--r-- | compiler/cmm/CmmPipeline.hs | 1 | ||||
-rw-r--r-- | compiler/cmm/CmmUtils.hs | 11 |
4 files changed, 47 insertions, 4 deletions
diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index 92dd7abba5..606da02969 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -18,7 +18,7 @@ import Hoopl.Label import BlockId import Cmm import CmmUtils -import CmmSwitch (mapSwitchTargets) +import CmmSwitch (mapSwitchTargets, switchTargetsToList) import Maybes import Panic import Util @@ -295,6 +295,13 @@ blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } , Just cond' <- maybeInvertCmmExpr cond = CmmCondBranch cond' f t (invertLikeliness l) + -- If all jump destinations of a switch go to the + -- same target eliminate the switch. + | CmmSwitch _expr targets <- shortcut_last + , (t:ts) <- switchTargetsToList targets + , all (== t) ts + = CmmBranch t + | otherwise = shortcut_last diff --git a/compiler/cmm/CmmImplementSwitchPlans.hs b/compiler/cmm/CmmImplementSwitchPlans.hs index 2e2da5d305..84ff007bef 100644 --- a/compiler/cmm/CmmImplementSwitchPlans.hs +++ b/compiler/cmm/CmmImplementSwitchPlans.hs @@ -32,6 +32,7 @@ import DynFlags -- code generation. cmmImplementSwitchPlans :: DynFlags -> CmmGraph -> UniqSM CmmGraph cmmImplementSwitchPlans dflags g + -- Switch generation done by backend (LLVM/C) | targetSupportsSwitch (hscTarget dflags) = return g | otherwise = do blocks' <- concat `fmap` mapM (visitSwitches dflags) (toBlockList g) @@ -39,19 +40,42 @@ cmmImplementSwitchPlans dflags g visitSwitches :: DynFlags -> CmmBlock -> UniqSM [CmmBlock] visitSwitches dflags block - | (entry@(CmmEntry _ scope), middle, CmmSwitch expr ids) <- blockSplit block + | (entry@(CmmEntry _ scope), middle, CmmSwitch vanillaExpr ids) <- blockSplit block = do let plan = createSwitchPlan ids + -- See Note [Floating switch expressions] + (assignSimple, simpleExpr) <- floatSwitchExpr dflags vanillaExpr - (newTail, newBlocks) <- implementSwitchPlan dflags scope expr plan + (newTail, newBlocks) <- implementSwitchPlan dflags scope simpleExpr plan - let block' = entry `blockJoinHead` middle `blockAppend` newTail + let block' = entry `blockJoinHead` middle `blockAppend` assignSimple `blockAppend` newTail return $ block' : newBlocks | otherwise = return [block] +-- Note [Floating switch expressions] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +-- When we translate a sparse switch into a search tree we would like +-- to compute the value we compare against only once. + +-- For this purpose we assign the switch expression to a local register +-- and then use this register when constructing the actual binary tree. + +-- This is important as the expression could contain expensive code like +-- memory loads or divisions which we REALLY don't want to duplicate. + +-- This happend in parts of the handwritten RTS Cmm code. See also #16933 + +-- See Note [Floating switch expressions] +floatSwitchExpr :: DynFlags -> CmmExpr -> UniqSM (Block CmmNode O O, CmmExpr) +floatSwitchExpr _ reg@(CmmReg {}) = return (emptyBlock, reg) +floatSwitchExpr dflags expr = do + (assign, expr') <- cmmMkAssign dflags expr <$> getUniqueM + return (BMiddle assign, expr') + -- Implementing a switch plan (returning a tail block) implementSwitchPlan :: DynFlags -> CmmTickScope -> CmmExpr -> SwitchPlan -> UniqSM (Block CmmNode O C, [CmmBlock]) diff --git a/compiler/cmm/CmmPipeline.hs b/compiler/cmm/CmmPipeline.hs index 2f481c272a..b8ae2b57ab 100644 --- a/compiler/cmm/CmmPipeline.hs +++ b/compiler/cmm/CmmPipeline.hs @@ -75,6 +75,7 @@ cpsTop hsc_env proc = -- Any work storing block Labels must be performed _after_ -- elimCommonBlocks + ----------- Implement switches ------------------------------------------ g <- {-# SCC "createSwitchPlans" #-} runUniqSM $ cmmImplementSwitchPlans dflags g dump Opt_D_dump_cmm_switch "Post switch plan" g diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index 8a3b857ed9..c6e647f75e 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -1,4 +1,5 @@ {-# LANGUAGE GADTs, RankNTypes #-} +{-# LANGUAGE BangPatterns #-} ----------------------------------------------------------------------------- -- @@ -35,6 +36,8 @@ module CmmUtils( cmmSubWord, cmmAddWord, cmmMulWord, cmmQuotWord, cmmToWord, + cmmMkAssign, + isTrivialCmmExpr, hasNoGlobalRegs, isLit, isComparisonExpr, baseExpr, spExpr, hpExpr, spLimExpr, hpLimExpr, @@ -76,6 +79,7 @@ import BlockId import CLabel import Outputable import DynFlags +import Unique import CodeGen.Platform import Data.ByteString (ByteString) @@ -372,6 +376,13 @@ cmmToWord dflags e w = cmmExprWidth dflags e word = wordWidth dflags +cmmMkAssign :: DynFlags -> CmmExpr -> Unique -> (CmmNode O O, CmmExpr) +cmmMkAssign dflags expr uq = + let !ty = cmmExprType dflags expr + reg = (CmmLocal (LocalReg uq ty)) + in (CmmAssign reg expr, CmmReg reg) + + --------------------------------------------------- -- -- CmmExpr predicates |