summaryrefslogtreecommitdiff
path: root/compiler/codeGen/StgCmmUtils.hs
diff options
context:
space:
mode:
authorJoachim Breitner <mail@joachim-breitner.de>2015-03-30 10:20:14 +0200
committerJoachim Breitner <mail@joachim-breitner.de>2015-03-30 10:22:27 +0200
commitde1160be047790afde4ec76de0a81ba3be0c73fa (patch)
tree269bbb98b8451d2cf1ccf1a86dfaae69f2acb50e /compiler/codeGen/StgCmmUtils.hs
parente24f638158f96f80e476000cd7ce8555987d84f2 (diff)
downloadhaskell-de1160be047790afde4ec76de0a81ba3be0c73fa.tar.gz
Refactor the story around switches (#10137)
This re-implements the code generation for case expressions at the Stg → Cmm level, both for data type cases as well as for integral literal cases. (Cases on float are still treated as before). The goal is to allow for fancier strategies in implementing them, for a cleaner separation of the strategy from the gritty details of Cmm, and to run this later than the Common Block Optimization, allowing for one way to attack #10124. The new module CmmSwitch contains a number of notes explaining this changes. For example, it creates larger consecutive jump tables than the previous code, if possible. nofib shows little significant overall improvement of runtime. The rather large wobbling comes from changes in the code block order (see #8082, not much we can do about it). But the decrease in code size alone makes this worthwhile. ``` Program Size Allocs Runtime Elapsed TotalMem Min -1.8% 0.0% -6.1% -6.1% -2.9% Max -0.7% +0.0% +5.6% +5.7% +7.8% Geometric Mean -1.4% -0.0% -0.3% -0.3% +0.0% ``` Compilation time increases slightly: ``` -1 s.d. ----- -2.0% +1 s.d. ----- +2.5% Average ----- +0.3% ``` The test case T783 regresses a lot, but it is the only one exhibiting any regression. The cause is the changed order of branches in an if-then-else tree, which makes the hoople data flow analysis traverse the blocks in a suboptimal order. Reverting that gets rid of this regression, but has a consistent, if only very small (+0.2%), negative effect on runtime. So I conclude that this test is an extreme outlier and no reason to change the code. Differential Revision: https://phabricator.haskell.org/D720
Diffstat (limited to 'compiler/codeGen/StgCmmUtils.hs')
-rw-r--r--compiler/codeGen/StgCmmUtils.hs240
1 files changed, 59 insertions, 181 deletions
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
index 98295c9836..9e056582f3 100644
--- a/compiler/codeGen/StgCmmUtils.hs
+++ b/compiler/codeGen/StgCmmUtils.hs
@@ -51,6 +51,7 @@ import MkGraph
import CodeGen.Platform
import CLabel
import CmmUtils
+import CmmSwitch
import ForeignCall
import IdInfo
@@ -60,7 +61,6 @@ import SMRep
import Module
import Literal
import Digraph
-import ListSetOps
import Util
import Unique
import DynFlags
@@ -68,11 +68,11 @@ import FastString
import Outputable
import qualified Data.ByteString as BS
+import qualified Data.Map as M
import Data.Char
import Data.List
import Data.Ord
import Data.Word
-import Data.Maybe
-------------------------------------------------------------------------
@@ -87,14 +87,6 @@ cgLit (MachStr s) = newByteStringCLit (BS.unpack s)
cgLit other_lit = do dflags <- getDynFlags
return (mkSimpleLit dflags other_lit)
-mkLtOp :: DynFlags -> Literal -> MachOp
--- On signed literals we must do a signed comparison
-mkLtOp dflags (MachInt _) = MO_S_Lt (wordWidth dflags)
-mkLtOp _ (MachFloat _) = MO_F_Lt W32
-mkLtOp _ (MachDouble _) = MO_F_Lt W64
-mkLtOp dflags lit = MO_U_Lt (typeWidth (cmmLitType dflags (mkSimpleLit dflags lit)))
- -- ToDo: seems terribly indirect!
-
mkSimpleLit :: DynFlags -> Literal -> CmmLit
mkSimpleLit dflags (MachChar c) = CmmInt (fromIntegral (ord c)) (wordWidth dflags)
mkSimpleLit dflags MachNullAddr = zeroCLit dflags
@@ -460,174 +452,52 @@ emitSwitch :: CmmExpr -- Tag to switch on
-- behaviour outside this range is
-- undefined
-> FCode ()
-emitSwitch tag_expr branches mb_deflt lo_tag hi_tag
- = do { dflags <- getDynFlags
- ; mkCmmSwitch (via_C dflags) tag_expr branches mb_deflt lo_tag hi_tag }
- where
- via_C dflags | HscC <- hscTarget dflags = True
- | otherwise = False
-
-
-mkCmmSwitch :: Bool -- True <=> never generate a
- -- conditional tree
- -> CmmExpr -- Tag to switch on
- -> [(ConTagZ, CmmAGraphScoped)] -- Tagged branches
- -> Maybe CmmAGraphScoped -- Default branch (if any)
- -> ConTagZ -> ConTagZ -- Min and Max possible values;
- -- behaviour outside this range is
- -- undefined
- -> FCode ()
-- First, two rather common cases in which there is no work to do
-mkCmmSwitch _ _ [] (Just code) _ _ = emit (fst code)
-mkCmmSwitch _ _ [(_,code)] Nothing _ _ = emit (fst code)
+emitSwitch _ [] (Just code) _ _ = emit (fst code)
+emitSwitch _ [(_,code)] Nothing _ _ = emit (fst code)
-- Right, off we go
-mkCmmSwitch via_C tag_expr branches mb_deflt lo_tag hi_tag = do
+emitSwitch tag_expr branches mb_deflt lo_tag hi_tag = do
join_lbl <- newLabelC
mb_deflt_lbl <- label_default join_lbl mb_deflt
branches_lbls <- label_branches join_lbl branches
tag_expr' <- assignTemp' tag_expr
- emit =<< mk_switch tag_expr' (sortBy (comparing fst) branches_lbls)
- mb_deflt_lbl lo_tag hi_tag via_C
+ -- Sort the branches before calling mk_discrete_switch
+ let branches_lbls' = [ (fromIntegral i, l) | (i,l) <- sortBy (comparing fst) branches_lbls ]
+ let range = (fromIntegral lo_tag, fromIntegral hi_tag)
- -- Sort the branches before calling mk_switch
+ emit $ mk_discrete_switch False tag_expr' branches_lbls' mb_deflt_lbl range
emitLabel join_lbl
-mk_switch :: CmmExpr -> [(ConTagZ, BlockId)]
+mk_discrete_switch :: Bool -- ^ Use signed comparisons
+ -> CmmExpr
+ -> [(Integer, BlockId)]
-> Maybe BlockId
- -> ConTagZ -> ConTagZ -> Bool
- -> FCode CmmAGraph
+ -> (Integer, Integer)
+ -> CmmAGraph
-- SINGLETON TAG RANGE: no case analysis to do
-mk_switch _tag_expr [(tag, lbl)] _ lo_tag hi_tag _via_C
+mk_discrete_switch _ _tag_expr [(tag, lbl)] _ (lo_tag, hi_tag)
| lo_tag == hi_tag
= ASSERT( tag == lo_tag )
- return (mkBranch lbl)
+ mkBranch lbl
-- SINGLETON BRANCH, NO DEFAULT: no case analysis to do
-mk_switch _tag_expr [(_tag,lbl)] Nothing _ _ _
- = return (mkBranch lbl)
+mk_discrete_switch _ _tag_expr [(_tag,lbl)] Nothing _
+ = mkBranch lbl
-- The simplifier might have eliminated a case
-- so we may have e.g. case xs of
-- [] -> e
-- In that situation we can be sure the (:) case
-- can't happen, so no need to test
--- SINGLETON BRANCH: one equality check to do
-mk_switch tag_expr [(tag,lbl)] (Just deflt) _ _ _
- = do dflags <- getDynFlags
- let cond = cmmNeWord dflags tag_expr (mkIntExpr dflags tag)
- -- We have lo_tag < hi_tag, but there's only one branch,
- -- so there must be a default
- return (mkCbranch cond deflt lbl)
-
--- ToDo: we might want to check for the two branch case, where one of
--- the branches is the tag 0, because comparing '== 0' is likely to be
--- more efficient than other kinds of comparison.
-
--- DENSE TAG RANGE: use a switch statment.
---
--- We also use a switch uncoditionally when compiling via C, because
--- this will get emitted as a C switch statement and the C compiler
--- should do a good job of optimising it. Also, older GCC versions
--- (2.95 in particular) have problems compiling the complicated
--- if-trees generated by this code, so compiling to a switch every
--- time works around that problem.
---
-mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C
- | use_switch -- Use a switch
- = do let
- find_branch :: ConTagZ -> Maybe BlockId
- find_branch i = case (assocMaybe branches i) of
- Just lbl -> Just lbl
- Nothing -> mb_deflt
-
- -- NB. we have eliminated impossible branches at
- -- either end of the range (see below), so the first
- -- tag of a real branch is real_lo_tag (not lo_tag).
- arms :: [Maybe BlockId]
- arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]]
- dflags <- getDynFlags
- return (mkSwitch (cmmOffset dflags tag_expr (- real_lo_tag)) arms)
-
- -- if we can knock off a bunch of default cases with one if, then do so
- | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches
- = do dflags <- getDynFlags
- stmts <- mk_switch tag_expr branches mb_deflt
- lowest_branch hi_tag via_C
- mkCmmIfThenElse
- (cmmULtWord dflags tag_expr (mkIntExpr dflags lowest_branch))
- (mkBranch deflt)
- stmts
-
- | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches
- = do dflags <- getDynFlags
- stmts <- mk_switch tag_expr branches mb_deflt
- lo_tag highest_branch via_C
- mkCmmIfThenElse
- (cmmUGtWord dflags tag_expr (mkIntExpr dflags highest_branch))
- (mkBranch deflt)
- stmts
-
- | otherwise -- Use an if-tree
- = do dflags <- getDynFlags
- lo_stmts <- mk_switch tag_expr lo_branches mb_deflt
- lo_tag (mid_tag-1) via_C
- hi_stmts <- mk_switch tag_expr hi_branches mb_deflt
- mid_tag hi_tag via_C
- mkCmmIfThenElse
- (cmmUGeWord dflags tag_expr (mkIntExpr dflags mid_tag))
- hi_stmts
- lo_stmts
- -- we test (e >= mid_tag) rather than (e < mid_tag), because
- -- the former works better when e is a comparison, and there
- -- are two tags 0 & 1 (mid_tag == 1). In this case, the code
- -- generator can reduce the condition to e itself without
- -- having to reverse the sense of the comparison: comparisons
- -- can't always be easily reversed (eg. floating
- -- pt. comparisons).
- where
- use_switch = {- pprTrace "mk_switch" (
- ppr tag_expr <+> text "n_tags:" <+> int n_tags <+>
- text "branches:" <+> ppr (map fst branches) <+>
- text "n_branches:" <+> int n_branches <+>
- text "lo_tag:" <+> int lo_tag <+>
- text "hi_tag:" <+> int hi_tag <+>
- text "real_lo_tag:" <+> int real_lo_tag <+>
- text "real_hi_tag:" <+> int real_hi_tag) $ -}
- ASSERT( n_branches > 1 && n_tags > 1 )
- n_tags > 2 && (via_C || (dense && big_enough))
- -- up to 4 branches we use a decision tree, otherwise
- -- a switch (== jump table in the NCG). This seems to be
- -- optimal, and corresponds with what gcc does.
- big_enough = n_branches > 4
- dense = n_branches > (n_tags `div` 2)
- n_branches = length branches
-
- -- ignore default slots at each end of the range if there's
- -- no default branch defined.
- lowest_branch = fst (head branches)
- highest_branch = fst (last branches)
-
- real_lo_tag
- | isNothing mb_deflt = lowest_branch
- | otherwise = lo_tag
-
- real_hi_tag
- | isNothing mb_deflt = highest_branch
- | otherwise = hi_tag
-
- n_tags = real_hi_tag - real_lo_tag + 1
-
- -- INVARIANT: Provided hi_tag > lo_tag (which is true)
- -- lo_tag <= mid_tag < hi_tag
- -- lo_branches have tags < mid_tag
- -- hi_branches have tags >= mid_tag
- (lo_branches, mid_tag, hi_branches) = divideBranches branches
-
+-- SOMETHING MORE COMPLICATED: defer to CmmImplementSwitchPlans
+-- See Note [Cmm Switches, the general plan] in CmmSwitch
+mk_discrete_switch signed tag_expr branches mb_deflt range
+ = mkSwitch tag_expr $ mkSwitchTargets signed range mb_deflt (M.fromList branches)
divideBranches :: Ord a => [(a,b)] -> ([(a,b)], a, [(a,b)])
divideBranches branches = (lo_branches, mid, hi_branches)
@@ -644,20 +514,34 @@ emitCmmLitSwitch :: CmmExpr -- Tag to switch on
-> [(Literal, CmmAGraphScoped)] -- Tagged branches
-> CmmAGraphScoped -- Default branch (always)
-> FCode () -- Emit the code
--- Used for general literals, whose size might not be a word,
--- where there is always a default case, and where we don't know
--- the range of values for certain. For simplicity we always generate a tree.
---
--- ToDo: for integers we could do better here, perhaps by generalising
--- mk_switch and using that. --SDM 15/09/2004
emitCmmLitSwitch _scrut [] deflt = emit $ fst deflt
emitCmmLitSwitch scrut branches deflt = do
scrut' <- assignTemp' scrut
join_lbl <- newLabelC
deflt_lbl <- label_code join_lbl deflt
branches_lbls <- label_branches join_lbl branches
- emit =<< mk_lit_switch scrut' deflt_lbl noBound
- (sortBy (comparing fst) branches_lbls)
+
+ dflags <- getDynFlags
+ let cmm_ty = cmmExprType dflags scrut
+ rep = typeWidth cmm_ty
+
+ -- We find the necessary type information in the literals in the branches
+ let signed = case head branches of
+ (MachInt _, _) -> True
+ (MachInt64 _, _) -> True
+ _ -> False
+
+ let range | signed = (tARGET_MIN_INT dflags, tARGET_MAX_INT dflags)
+ | otherwise = (0, tARGET_MAX_WORD dflags)
+
+ if isFloatType cmm_ty
+ then emit =<< mk_float_switch rep scrut' deflt_lbl noBound branches_lbls
+ else emit $ mk_discrete_switch
+ signed
+ scrut'
+ [(litValue lit,l) | (lit,l) <- branches_lbls]
+ (Just deflt_lbl)
+ range
emitLabel join_lbl
-- | lower bound (inclusive), upper bound (exclusive)
@@ -666,31 +550,23 @@ type LitBound = (Maybe Literal, Maybe Literal)
noBound :: LitBound
noBound = (Nothing, Nothing)
-mk_lit_switch :: CmmExpr -> BlockId
+mk_float_switch :: Width -> CmmExpr -> BlockId
-> LitBound
-> [(Literal,BlockId)]
-> FCode CmmAGraph
-mk_lit_switch scrut deflt bounds [(lit,blk)]
- = do
- dflags <- getDynFlags
- let
- cmm_lit = mkSimpleLit dflags lit
- cmm_ty = cmmLitType dflags cmm_lit
- rep = typeWidth cmm_ty
- ne = if isFloatType cmm_ty then MO_F_Ne rep else MO_Ne rep
-
- return $ if lit `onlyWithinBounds'` bounds
- then mkBranch blk
- else mkCbranch (CmmMachOp ne [scrut, CmmLit cmm_lit]) deflt blk
+mk_float_switch rep scrut deflt _bounds [(lit,blk)]
+ = do dflags <- getDynFlags
+ return $ mkCbranch (cond dflags) deflt blk
where
- -- If the bounds already imply scrut == lit, then we can skip the final check (#10129)
- l `onlyWithinBounds'` (Just lo, Just hi) = l `onlyWithinBounds` (lo, hi)
- _ `onlyWithinBounds'` _ = False
+ cond dflags = CmmMachOp ne [scrut, CmmLit cmm_lit]
+ where
+ cmm_lit = mkSimpleLit dflags lit
+ ne = MO_F_Ne rep
-mk_lit_switch scrut deflt_blk_id (lo_bound, hi_bound) branches
+mk_float_switch rep scrut deflt_blk_id (lo_bound, hi_bound) branches
= do dflags <- getDynFlags
- lo_blk <- mk_lit_switch scrut deflt_blk_id bounds_lo lo_branches
- hi_blk <- mk_lit_switch scrut deflt_blk_id bounds_hi hi_branches
+ lo_blk <- mk_float_switch rep scrut deflt_blk_id bounds_lo lo_branches
+ hi_blk <- mk_float_switch rep scrut deflt_blk_id bounds_hi hi_branches
mkCmmIfThenElse (cond dflags) lo_blk hi_blk
where
(lo_branches, mid_lit, hi_branches) = divideBranches branches
@@ -698,8 +574,10 @@ mk_lit_switch scrut deflt_blk_id (lo_bound, hi_bound) branches
bounds_lo = (lo_bound, Just mid_lit)
bounds_hi = (Just mid_lit, hi_bound)
- cond dflags = CmmMachOp (mkLtOp dflags mid_lit)
- [scrut, CmmLit (mkSimpleLit dflags mid_lit)]
+ cond dflags = CmmMachOp lt [scrut, CmmLit cmm_lit]
+ where
+ cmm_lit = mkSimpleLit dflags mid_lit
+ lt = MO_F_Lt rep
--------------