summaryrefslogtreecommitdiff
path: root/compiler/GHC/Cmm/Switch.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/GHC/Cmm/Switch.hs')
-rw-r--r--compiler/GHC/Cmm/Switch.hs502
1 files changed, 502 insertions, 0 deletions
diff --git a/compiler/GHC/Cmm/Switch.hs b/compiler/GHC/Cmm/Switch.hs
new file mode 100644
index 0000000000..e89fadfd2e
--- /dev/null
+++ b/compiler/GHC/Cmm/Switch.hs
@@ -0,0 +1,502 @@
+{-# LANGUAGE GADTs #-}
+module GHC.Cmm.Switch (
+ SwitchTargets,
+ mkSwitchTargets,
+ switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned,
+ mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough,
+ switchTargetsToList, eqSwitchTargetWith,
+
+ SwitchPlan(..),
+ targetSupportsSwitch,
+ createSwitchPlan,
+ ) where
+
+import GhcPrelude
+
+import Outputable
+import DynFlags
+import GHC.Cmm.Dataflow.Label (Label)
+
+import Data.Maybe
+import Data.List (groupBy)
+import Data.Function (on)
+import qualified Data.Map as M
+
+-- Note [Cmm Switches, the general plan]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Compiling a high-level switch statement, as it comes out of a STG case
+-- expression, for example, allows for a surprising amount of design decisions.
+-- Therefore, we cleanly separated this from the Stg → Cmm transformation, as
+-- well as from the actual code generation.
+--
+-- The overall plan is:
+-- * The Stg → Cmm transformation creates a single `SwitchTargets` in
+-- emitSwitch and emitCmmLitSwitch in GHC.StgToCmm.Utils.
+-- At this stage, they are unsuitable for code generation.
+-- * A dedicated Cmm transformation (GHC.Cmm.Switch.Implement) replaces these
+-- switch statements with code that is suitable for code generation, i.e.
+-- a nice balanced tree of decisions with dense jump tables in the leafs.
+-- The actual planning of this tree is performed in pure code in createSwitchPlan
+-- in this module. See Note [createSwitchPlan].
+-- * The actual code generation will not do any further processing and
+-- implement each CmmSwitch with a jump tables.
+--
+-- When compiling to LLVM or C, GHC.Cmm.Switch.Implement leaves the switch
+-- statements alone, as we can turn a SwitchTargets value into a nice
+-- switch-statement in LLVM resp. C, and leave the rest to the compiler.
+--
+-- See Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement] why the two module are
+-- separated.
+
+-----------------------------------------------------------------------------
+-- Note [Magic Constants in GHC.Cmm.Switch]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- There are a lot of heuristics here that depend on magic values where it is
+-- hard to determine the "best" value (for whatever that means). These are the
+-- magic values:
+
+-- | Number of consecutive default values allowed in a jump table. If there are
+-- more of them, the jump tables are split.
+--
+-- Currently 7, as it costs 7 words of additional code when a jump table is
+-- split (at least on x64, determined experimentally).
+maxJumpTableHole :: Integer
+maxJumpTableHole = 7
+
+-- | Minimum size of a jump table. If the number is smaller, the switch is
+-- implemented using conditionals.
+-- Currently 5, because an if-then-else tree of 4 values is nice and compact.
+minJumpTableSize :: Int
+minJumpTableSize = 5
+
+-- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset].
+minJumpTableOffset :: Integer
+minJumpTableOffset = 2
+
+
+-----------------------------------------------------------------------------
+-- Switch Targets
+
+-- Note [SwitchTargets]
+-- ~~~~~~~~~~~~~~~~~~~~
+--
+-- The branches of a switch are stored in a SwitchTargets, which consists of an
+-- (optional) default jump target, and a map from values to jump targets.
+--
+-- If the default jump target is absent, the behaviour of the switch outside the
+-- values of the map is undefined.
+--
+-- We use an Integer for the keys the map so that it can be used in switches on
+-- unsigned as well as signed integers.
+--
+-- The map may be empty (we prune out-of-range branches here, so it could be us
+-- emptying it).
+--
+-- Before code generation, the table needs to be brought into a form where all
+-- entries are non-negative, so that it can be compiled into a jump table.
+-- See switchTargetsToTable.
+
+
+-- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch'
+-- value, and knows whether the value is signed, the possible range, an
+-- optional default value and a map from values to jump labels.
+data SwitchTargets =
+ SwitchTargets
+ Bool -- Signed values
+ (Integer, Integer) -- Range
+ (Maybe Label) -- Default value
+ (M.Map Integer Label) -- The branches
+ deriving (Show, Eq)
+
+-- | The smart constructor mkSwitchTargets normalises the map a bit:
+-- * No entries outside the range
+-- * No entries equal to the default
+-- * No default if all elements have explicit values
+mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets
+mkSwitchTargets signed range@(lo,hi) mbdef ids
+ = SwitchTargets signed range mbdef' ids'
+ where
+ ids' = dropDefault $ restrict ids
+ mbdef' | defaultNeeded = mbdef
+ | otherwise = Nothing
+
+ -- Drop entries outside the range, if there is a range
+ restrict = restrictMap (lo,hi)
+
+ -- Drop entries that equal the default, if there is a default
+ dropDefault | Just l <- mbdef = M.filter (/= l)
+ | otherwise = id
+
+ -- Check if the default is still needed
+ defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1
+
+
+-- | Changes all labels mentioned in the SwitchTargets value
+mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets
+mapSwitchTargets f (SwitchTargets signed range mbdef branches)
+ = SwitchTargets signed range (fmap f mbdef) (fmap f branches)
+
+-- | Returns the list of non-default branches of the SwitchTargets value
+switchTargetsCases :: SwitchTargets -> [(Integer, Label)]
+switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches
+
+-- | Return the default label of the SwitchTargets value
+switchTargetsDefault :: SwitchTargets -> Maybe Label
+switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef
+
+-- | Return the range of the SwitchTargets value
+switchTargetsRange :: SwitchTargets -> (Integer, Integer)
+switchTargetsRange (SwitchTargets _ range _ _) = range
+
+-- | Return whether this is used for a signed value
+switchTargetsSigned :: SwitchTargets -> Bool
+switchTargetsSigned (SwitchTargets signed _ _ _) = signed
+
+-- | switchTargetsToTable creates a dense jump table, usable for code generation.
+--
+-- Also returns an offset to add to the value; the list is 0-based on the
+-- result of that addition.
+--
+-- The conversion from Integer to Int is a bit of a wart, as the actual
+-- scrutinee might be an unsigned word, but it just works, due to wrap-around
+-- arithmetic (as verified by the CmmSwitchTest test case).
+switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label])
+switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
+ = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ])
+ where
+ labelFor i = case M.lookup i branches of Just l -> Just l
+ Nothing -> mbdef
+ start | lo >= 0 && lo < minJumpTableOffset = 0 -- See Note [Jump Table Offset]
+ | otherwise = lo
+
+-- Note [Jump Table Offset]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- Usually, the code for a jump table starting at x will first subtract x from
+-- the value, to avoid a large amount of empty entries. But if x is very small,
+-- the extra entries are no worse than the subtraction in terms of code size, and
+-- not having to do the subtraction is quicker.
+--
+-- I.e. instead of
+-- _u20N:
+-- leaq -1(%r14),%rax
+-- jmp *_n20R(,%rax,8)
+-- _n20R:
+-- .quad _c20p
+-- .quad _c20q
+-- do
+-- _u20N:
+-- jmp *_n20Q(,%r14,8)
+--
+-- _n20Q:
+-- .quad 0
+-- .quad _c20p
+-- .quad _c20q
+-- .quad _c20r
+
+-- | The list of all labels occurring in the SwitchTargets value.
+switchTargetsToList :: SwitchTargets -> [Label]
+switchTargetsToList (SwitchTargets _ _ mbdef branches)
+ = maybeToList mbdef ++ M.elems branches
+
+-- | Groups cases with equal targets, suitable for pretty-printing to a
+-- c-like switch statement with fall-through semantics.
+switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label)
+switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef)
+ where
+ groups = map (\xs -> (map fst xs, snd (head xs))) $
+ groupBy ((==) `on` snd) $
+ M.toList branches
+
+-- | Custom equality helper, needed for "GHC.Cmm.CommonBlockElim"
+eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool
+eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) =
+ signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2)
+ where
+ goMB Nothing Nothing = True
+ goMB (Just l1) (Just l2) = l1 `eq` l2
+ goMB _ _ = False
+ goList [] [] = True
+ goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2
+ goList _ _ = False
+
+-----------------------------------------------------------------------------
+-- Code generation for Switches
+
+
+-- | A SwitchPlan abstractly describes how a Switch statement ought to be
+-- implemented. See Note [createSwitchPlan]
+data SwitchPlan
+ = Unconditionally Label
+ | IfEqual Integer Label SwitchPlan
+ | IfLT Bool Integer SwitchPlan SwitchPlan
+ | JumpTable SwitchTargets
+ deriving Show
+--
+-- Note [createSwitchPlan]
+-- ~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- A SwitchPlan describes how a Switch statement is to be broken down into
+-- smaller pieces suitable for code generation.
+--
+-- createSwitchPlan creates such a switch plan, in these steps:
+-- 1. It splits the switch statement at segments of non-default values that
+-- are too large. See splitAtHoles and Note [Magic Constants in GHC.Cmm.Switch]
+-- 2. Too small jump tables should be avoided, so we break up smaller pieces
+-- in breakTooSmall.
+-- 3. We fill in the segments between those pieces with a jump to the default
+-- label (if there is one), returning a SeparatedList in mkFlatSwitchPlan
+-- 4. We find and replace two less-than branches by a single equal-to-test in
+-- findSingleValues
+-- 5. The thus collected pieces are assembled to a balanced binary tree.
+
+{-
+ Note [Two alts + default]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Discussion and a bit more info at #14644
+
+When dealing with a switch of the form:
+switch(e) {
+ case 1: goto l1;
+ case 3000: goto l2;
+ default: goto ldef;
+}
+
+If we treat it as a sparse jump table we would generate:
+
+if (e > 3000) //Check if value is outside of the jump table.
+ goto ldef;
+else {
+ if (e < 3000) { //Compare to upper value
+ if(e != 1) //Compare to remaining value
+ goto ldef;
+ else
+ goto l2;
+ }
+ else
+ goto l1;
+}
+
+Instead we special case this to :
+
+if (e==1) goto l1;
+else if (e==3000) goto l2;
+else goto l3;
+
+This means we have:
+* Less comparisons for: 1,<3000
+* Unchanged for 3000
+* One more for >3000
+
+This improves code in a few ways:
+* One comparison less means smaller code which helps with cache.
+* It exchanges a taken jump for two jumps no taken in the >range case.
+ Jumps not taken are cheaper (See Agner guides) making this about as fast.
+* For all other cases the first range check is removed making it faster.
+
+The end result is that the change is not measurably slower for the case
+>3000 and faster for the other cases.
+
+This makes running this kind of match in an inner loop cheaper by 10-20%
+depending on the data.
+In nofib this improves wheel-sieve1 by 4-9% depending on problem
+size.
+
+We could also add a second conditional jump after the comparison to
+keep the range check like this:
+ cmp 3000, rArgument
+ jg <default>
+ je <branch 2>
+While this is fairly cheap it made no big difference for the >3000 case
+and slowed down all other cases making it not worthwhile.
+-}
+
+
+-- | Does the target support switch out of the box? Then leave this to the
+-- target!
+targetSupportsSwitch :: HscTarget -> Bool
+targetSupportsSwitch HscC = True
+targetSupportsSwitch HscLlvm = True
+targetSupportsSwitch _ = False
+
+-- | This function creates a SwitchPlan from a SwitchTargets value, breaking it
+-- down into smaller pieces suitable for code generation.
+createSwitchPlan :: SwitchTargets -> SwitchPlan
+-- Lets do the common case of a singleton map quickly and efficiently (#10677)
+createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
+ | [(x, l)] <- M.toList m
+ = IfEqual x l (Unconditionally defLabel)
+-- And another common case, matching "booleans"
+createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m)
+ | [(x1, l1), (_x2,l2)] <- M.toAscList m
+ --Checking If |range| = 2 is enough if we have two unique literals
+ , hi - lo == 1
+ = IfEqual x1 l1 (Unconditionally l2)
+-- See Note [Two alts + default]
+createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
+ | [(x1, l1), (x2,l2)] <- M.toAscList m
+ = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel))
+createSwitchPlan (SwitchTargets signed range mbdef m) =
+ -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $
+ plan
+ where
+ pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m
+ flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces
+ plan = buildTree signed $ flatPlan
+
+
+---
+--- Step 1: Splitting at large holes
+---
+splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a]
+splitAtHoles _ m | M.null m = []
+splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles
+ where
+ holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m))
+ nonHoles = reassocTuples lo holes hi
+
+ (lo,_) = M.findMin m
+ (hi,_) = M.findMax m
+
+---
+--- Step 2: Avoid small jump tables
+---
+-- We do not want jump tables below a certain size. This breaks them up
+-- (into singleton maps, for now).
+breakTooSmall :: M.Map Integer a -> [M.Map Integer a]
+breakTooSmall m
+ | M.size m > minJumpTableSize = [m]
+ | otherwise = [M.singleton k v | (k,v) <- M.toList m]
+
+---
+--- Step 3: Fill in the blanks
+---
+
+-- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every
+-- two entries, dividing the range.
+-- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if
+-- the expression is < n, and plan2 otherwise.
+
+type FlatSwitchPlan = SeparatedList Integer SwitchPlan
+
+mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan
+
+-- If we have no default (i.e. undefined where there is no entry), we can
+-- branch at the minimum of each map
+mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty
+mkFlatSwitchPlan signed Nothing _ (m:ms)
+ = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ])
+
+-- If we have a default, we have to interleave segments that jump
+-- to the default between the maps
+mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps)
+ where
+ go (lo,hi) []
+ | lo > hi = []
+ | otherwise = [(lo, Unconditionally l)]
+ go (lo,hi) (m:ms)
+ | lo < min
+ = (lo, Unconditionally l) : go (min,hi) (m:ms)
+ | lo == min
+ = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms
+ | otherwise
+ = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min)
+ where
+ min = fst (M.findMin m)
+ max = fst (M.findMax m)
+
+
+mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan
+mkLeafPlan signed mbdef m
+ | [(_,l)] <- M.toList m -- singleton map
+ = Unconditionally l
+ | otherwise
+ = JumpTable $ mkSwitchTargets signed (min,max) mbdef m
+ where
+ min = fst (M.findMin m)
+ max = fst (M.findMax m)
+
+---
+--- Step 4: Reduce the number of branches using ==
+---
+
+-- A sequence of three unconditional jumps, with the outer two pointing to the
+-- same value and the bounds off by exactly one can be improved
+findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan
+findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs)
+ | l == l3 && i + 1 == i'
+ = findSingleValues (IfEqual i l2 (Unconditionally l), xs)
+findSingleValues (p, (i,p'):xs)
+ = (p,i) `consSL` findSingleValues (p', xs)
+findSingleValues (p, [])
+ = (p, [])
+
+---
+--- Step 5: Actually build the tree
+---
+
+-- Build a balanced tree from a separated list
+buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan
+buildTree _ (p,[]) = p
+buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2)
+ where
+ (sl1, m, sl2) = divideSL sl
+
+
+
+--
+-- Utility data type: Non-empty lists with extra markers in between each
+-- element:
+--
+
+type SeparatedList b a = (a, [(b,a)])
+
+consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a
+consSL (a, b) (a', xs) = (a, (b,a'):xs)
+
+divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a)
+divideSL (_,[]) = error "divideSL: Singleton SeparatedList"
+divideSL (p,xs) = ((p, xs1), m, (p', xs2))
+ where
+ (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs
+
+--
+-- Other Utilities
+--
+
+restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b
+restrictMap (lo,hi) m = mid
+ where (_, mid_hi) = M.split (lo-1) m
+ (mid, _) = M.split (hi+1) mid_hi
+
+-- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)]
+reassocTuples :: a -> [(a,a)] -> a -> [(a,a)]
+reassocTuples initial [] last
+ = [(initial,last)]
+reassocTuples initial ((a,b):tuples) last
+ = (initial,a) : reassocTuples b tuples last
+
+-- Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- I (Joachim) separated the two somewhat closely related modules
+--
+-- - GHC.Cmm.Switch, which provides the CmmSwitchTargets type and contains the strategy
+-- for implementing a Cmm switch (createSwitchPlan), and
+-- - GHC.Cmm.Switch.Implement, which contains the actual Cmm graph modification,
+--
+-- for these reasons:
+--
+-- * GHC.Cmm.Switch is very low in the dependency tree, i.e. does not depend on any
+-- GHC specific modules at all (with the exception of Output and
+-- GHC.Cmm.Dataflow (Literal)).
+-- * GHC.Cmm.Switch.Implement is the Cmm transformation and hence very high in
+-- the dependency tree.
+-- * GHC.Cmm.Switch provides the CmmSwitchTargets data type, which is abstract, but
+-- used in GHC.Cmm.Node.
+-- * Because GHC.Cmm.Switch is low in the dependency tree, the separation allows
+-- for more parallelism when building GHC.
+-- * The interaction between the modules is very explicit and easy to
+-- understand, due to the small and simple interface.