summaryrefslogtreecommitdiff
path: root/compiler/cmm/CmmOpt.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/cmm/CmmOpt.hs')
-rw-r--r--compiler/cmm/CmmOpt.hs507
1 files changed, 507 insertions, 0 deletions
diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs
new file mode 100644
index 0000000000..c454ff4c6a
--- /dev/null
+++ b/compiler/cmm/CmmOpt.hs
@@ -0,0 +1,507 @@
+-----------------------------------------------------------------------------
+--
+-- Cmm optimisation
+--
+-- (c) The University of Glasgow 2006
+--
+-----------------------------------------------------------------------------
+
+module CmmOpt (
+ cmmMiniInline,
+ cmmMachOpFold,
+ cmmLoopifyForC,
+ ) where
+
+#include "HsVersions.h"
+
+import Cmm
+import CmmUtils ( hasNoGlobalRegs )
+import CLabel ( entryLblToInfoLbl )
+import MachOp
+import SMRep ( tablesNextToCode )
+
+import UniqFM
+import Unique ( Unique )
+import Panic ( panic )
+
+import Outputable
+
+import Bits
+import Word
+import Int
+import GLAEXTS
+
+
+-- -----------------------------------------------------------------------------
+-- The mini-inliner
+
+{-
+This pass inlines assignments to temporaries that are used just
+once. It works as follows:
+
+ - count uses of each temporary
+ - for each temporary that occurs just once:
+ - attempt to push it forward to the statement that uses it
+ - only push forward past assignments to other temporaries
+ (assumes that temporaries are single-assignment)
+ - if we reach the statement that uses it, inline the rhs
+ and delete the original assignment.
+
+Possible generalisations: here is an example from factorial
+
+Fac_zdwfac_entry:
+ cmG:
+ _smi = R2;
+ if (_smi != 0) goto cmK;
+ R1 = R3;
+ jump I64[Sp];
+ cmK:
+ _smn = _smi * R3;
+ R2 = _smi + (-1);
+ R3 = _smn;
+ jump Fac_zdwfac_info;
+
+We want to inline _smi and _smn. To inline _smn:
+
+ - we must be able to push forward past assignments to global regs.
+ We can do this if the rhs of the assignment we are pushing
+ forward doesn't refer to the global reg being assigned to; easy
+ to test.
+
+To inline _smi:
+
+ - It is a trivial replacement, reg for reg, but it occurs more than
+ once.
+ - We can inline trivial assignments even if the temporary occurs
+ more than once, as long as we don't eliminate the original assignment
+ (this doesn't help much on its own).
+ - We need to be able to propagate the assignment forward through jumps;
+ if we did this, we would find that it can be inlined safely in all
+ its occurrences.
+-}
+
+cmmMiniInline :: [CmmBasicBlock] -> [CmmBasicBlock]
+cmmMiniInline blocks = map do_inline blocks
+ where
+ blockUses (BasicBlock _ stmts)
+ = foldr (plusUFM_C (+)) emptyUFM (map getStmtUses stmts)
+
+ uses = foldr (plusUFM_C (+)) emptyUFM (map blockUses blocks)
+
+ do_inline (BasicBlock id stmts)
+ = BasicBlock id (cmmMiniInlineStmts uses stmts)
+
+
+cmmMiniInlineStmts :: UniqFM Int -> [CmmStmt] -> [CmmStmt]
+cmmMiniInlineStmts uses [] = []
+cmmMiniInlineStmts uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts)
+ | Just 1 <- lookupUFM uses u,
+ Just stmts' <- lookForInline u expr stmts
+ =
+#ifdef NCG_DEBUG
+ trace ("nativeGen: inlining " ++ showSDoc (pprStmt stmt)) $
+#endif
+ cmmMiniInlineStmts uses stmts'
+
+cmmMiniInlineStmts uses (stmt:stmts)
+ = stmt : cmmMiniInlineStmts uses stmts
+
+
+-- Try to inline a temporary assignment. We can skip over assignments to
+-- other tempoararies, because we know that expressions aren't side-effecting
+-- and temporaries are single-assignment.
+lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _)) rhs) : rest)
+ | u /= u'
+ = case lookupUFM (getExprUses rhs) u of
+ Just 1 -> Just (inlineStmt u expr stmt : rest)
+ _other -> case lookForInline u expr rest of
+ Nothing -> Nothing
+ Just stmts -> Just (stmt:stmts)
+
+lookForInline u expr (CmmNop : rest)
+ = lookForInline u expr rest
+
+lookForInline u expr (stmt:stmts)
+ = case lookupUFM (getStmtUses stmt) u of
+ Just 1 | ok_to_inline -> Just (inlineStmt u expr stmt : stmts)
+ _other -> Nothing
+ where
+ -- we don't inline into CmmCall if the expression refers to global
+ -- registers. This is a HACK to avoid global registers clashing with
+ -- C argument-passing registers, really the back-end ought to be able
+ -- to handle it properly, but currently neither PprC nor the NCG can
+ -- do it. See also CgForeignCall:load_args_into_temps.
+ ok_to_inline = case stmt of
+ CmmCall{} -> hasNoGlobalRegs expr
+ _ -> True
+
+-- -----------------------------------------------------------------------------
+-- Boring Cmm traversals for collecting usage info and substitutions.
+
+getStmtUses :: CmmStmt -> UniqFM Int
+getStmtUses (CmmAssign _ e) = getExprUses e
+getStmtUses (CmmStore e1 e2) = plusUFM_C (+) (getExprUses e1) (getExprUses e2)
+getStmtUses (CmmCall target _ es _)
+ = plusUFM_C (+) (uses target) (getExprsUses (map fst es))
+ where uses (CmmForeignCall e _) = getExprUses e
+ uses _ = emptyUFM
+getStmtUses (CmmCondBranch e _) = getExprUses e
+getStmtUses (CmmSwitch e _) = getExprUses e
+getStmtUses (CmmJump e _) = getExprUses e
+getStmtUses _ = emptyUFM
+
+getExprUses :: CmmExpr -> UniqFM Int
+getExprUses (CmmReg (CmmLocal (LocalReg u _))) = unitUFM u 1
+getExprUses (CmmRegOff (CmmLocal (LocalReg u _)) _) = unitUFM u 1
+getExprUses (CmmLoad e _) = getExprUses e
+getExprUses (CmmMachOp _ es) = getExprsUses es
+getExprUses _other = emptyUFM
+
+getExprsUses es = foldr (plusUFM_C (+)) emptyUFM (map getExprUses es)
+
+inlineStmt :: Unique -> CmmExpr -> CmmStmt -> CmmStmt
+inlineStmt u a (CmmAssign r e) = CmmAssign r (inlineExpr u a e)
+inlineStmt u a (CmmStore e1 e2) = CmmStore (inlineExpr u a e1) (inlineExpr u a e2)
+inlineStmt u a (CmmCall target regs es vols)
+ = CmmCall (infn target) regs es' vols
+ where infn (CmmForeignCall fn cconv) = CmmForeignCall fn cconv
+ infn (CmmPrim p) = CmmPrim p
+ es' = [ (inlineExpr u a e, hint) | (e,hint) <- es ]
+inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d
+inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d
+inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d
+inlineStmt u a other_stmt = other_stmt
+
+inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr
+inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _)))
+ | u == u' = a
+ | otherwise = e
+inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off)
+ | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)]
+ | otherwise = e
+inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep
+inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es)
+inlineExpr u a other_expr = other_expr
+
+-- -----------------------------------------------------------------------------
+-- MachOp constant folder
+
+-- Now, try to constant-fold the MachOps. The arguments have already
+-- been optimized and folded.
+
+cmmMachOpFold
+ :: MachOp -- The operation from an CmmMachOp
+ -> [CmmExpr] -- The optimized arguments
+ -> CmmExpr
+
+cmmMachOpFold op arg@[CmmLit (CmmInt x rep)]
+ = case op of
+ MO_S_Neg r -> CmmLit (CmmInt (-x) rep)
+ MO_Not r -> CmmLit (CmmInt (complement x) rep)
+
+ -- these are interesting: we must first narrow to the
+ -- "from" type, in order to truncate to the correct size.
+ -- The final narrow/widen to the destination type
+ -- is implicit in the CmmLit.
+ MO_S_Conv from to
+ | isFloatingRep to -> CmmLit (CmmFloat (fromInteger x) to)
+ | otherwise -> CmmLit (CmmInt (narrowS from x) to)
+ MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
+
+ _ -> panic "cmmMachOpFold: unknown unary op"
+
+
+-- Eliminate conversion NOPs
+cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x
+cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x
+
+-- Eliminate nested conversions where possible
+cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]]
+ | Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
+ Just (_, rep3,signed2) <- isIntConversion conv_outer
+ = case () of
+ -- widen then narrow to the same size is a nop
+ _ | rep1 < rep2 && rep1 == rep3 -> x
+ -- Widen then narrow to different size: collapse to single conversion
+ -- but remember to use the signedness from the widening, just in case
+ -- the final conversion is a widen.
+ | rep1 < rep2 && rep2 > rep3 ->
+ cmmMachOpFold (intconv signed1 rep1 rep3) [x]
+ -- Nested widenings: collapse if the signedness is the same
+ | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
+ cmmMachOpFold (intconv signed1 rep1 rep3) [x]
+ -- Nested narrowings: collapse
+ | rep1 > rep2 && rep2 > rep3 ->
+ cmmMachOpFold (MO_U_Conv rep1 rep3) [x]
+ | otherwise ->
+ CmmMachOp conv_outer args
+ where
+ isIntConversion (MO_U_Conv rep1 rep2)
+ | not (isFloatingRep rep1) && not (isFloatingRep rep2)
+ = Just (rep1,rep2,False)
+ isIntConversion (MO_S_Conv rep1 rep2)
+ | not (isFloatingRep rep1) && not (isFloatingRep rep2)
+ = Just (rep1,rep2,True)
+ isIntConversion _ = Nothing
+
+ intconv True = MO_S_Conv
+ intconv False = MO_U_Conv
+
+-- ToDo: a narrow of a load can be collapsed into a narrow load, right?
+-- but what if the architecture only supports word-sized loads, should
+-- we do the transformation anyway?
+
+cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
+ = case mop of
+ -- for comparisons: don't forget to narrow the arguments before
+ -- comparing, since they might be out of range.
+ MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep)
+ MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep)
+
+ MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordRep)
+ MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep)
+ MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordRep)
+ MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep)
+
+ MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordRep)
+ MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep)
+ MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordRep)
+ MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep)
+
+ MO_Add r -> CmmLit (CmmInt (x + y) r)
+ MO_Sub r -> CmmLit (CmmInt (x - y) r)
+ MO_Mul r -> CmmLit (CmmInt (x * y) r)
+ MO_S_Quot r | y /= 0 -> CmmLit (CmmInt (x `quot` y) r)
+ MO_S_Rem r | y /= 0 -> CmmLit (CmmInt (x `rem` y) r)
+
+ MO_And r -> CmmLit (CmmInt (x .&. y) r)
+ MO_Or r -> CmmLit (CmmInt (x .|. y) r)
+ MO_Xor r -> CmmLit (CmmInt (x `xor` y) r)
+
+ MO_Shl r -> CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
+ MO_U_Shr r -> CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
+ MO_S_Shr r -> CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
+
+ other -> CmmMachOp mop args
+
+ where
+ x_u = narrowU xrep x
+ y_u = narrowU xrep y
+ x_s = narrowS xrep x
+ y_s = narrowS xrep y
+
+
+-- When possible, shift the constants to the right-hand side, so that we
+-- can match for strength reductions. Note that the code generator will
+-- also assume that constants have been shifted to the right when
+-- possible.
+
+cmmMachOpFold op [x@(CmmLit _), y]
+ | not (isLit y) && isCommutableMachOp op
+ = cmmMachOpFold op [y, x]
+
+-- Turn (a+b)+c into a+(b+c) where possible. Because literals are
+-- moved to the right, it is more likely that we will find
+-- opportunities for constant folding when the expression is
+-- right-associated.
+--
+-- ToDo: this appears to introduce a quadratic behaviour due to the
+-- nested cmmMachOpFold. Can we fix this?
+--
+-- Why do we check isLit arg1? If arg1 is a lit, it means that arg2
+-- is also a lit (otherwise arg1 would be on the right). If we
+-- put arg1 on the left of the rearranged expression, we'll get into a
+-- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ...
+--
+cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
+ | mop1 == mop2 && isAssociativeMachOp mop1 && not (isLit arg1)
+ = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg2,arg3]]
+
+-- Make a RegOff if we can
+cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
+ = CmmRegOff reg (fromIntegral (narrowS rep n))
+cmmMachOpFold (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
+ = CmmRegOff reg (off + fromIntegral (narrowS rep n))
+cmmMachOpFold (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
+ = CmmRegOff reg (- fromIntegral (narrowS rep n))
+cmmMachOpFold (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
+ = CmmRegOff reg (off - fromIntegral (narrowS rep n))
+
+-- Fold label(+/-)offset into a CmmLit where possible
+
+cmmMachOpFold (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
+ = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
+cmmMachOpFold (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
+ = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
+cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
+ = CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
+
+-- We can often do something with constants of 0 and 1 ...
+
+cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))]
+ = case mop of
+ MO_Add r -> x
+ MO_Sub r -> x
+ MO_Mul r -> y
+ MO_And r -> y
+ MO_Or r -> x
+ MO_Xor r -> x
+ MO_Shl r -> x
+ MO_S_Shr r -> x
+ MO_U_Shr r -> x
+ MO_Ne r | isComparisonExpr x -> x
+ MO_Eq r | Just x' <- maybeInvertConditionalExpr x -> x'
+ MO_U_Gt r | isComparisonExpr x -> x
+ MO_S_Gt r | isComparisonExpr x -> x
+ MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
+ MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
+ MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
+ MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
+ MO_U_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
+ MO_S_Le r | Just x' <- maybeInvertConditionalExpr x -> x'
+ other -> CmmMachOp mop args
+
+cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))]
+ = case mop of
+ MO_Mul r -> x
+ MO_S_Quot r -> x
+ MO_U_Quot r -> x
+ MO_S_Rem r -> CmmLit (CmmInt 0 rep)
+ MO_U_Rem r -> CmmLit (CmmInt 0 rep)
+ MO_Ne r | Just x' <- maybeInvertConditionalExpr x -> x'
+ MO_Eq r | isComparisonExpr x -> x
+ MO_U_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
+ MO_S_Lt r | Just x' <- maybeInvertConditionalExpr x -> x'
+ MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
+ MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep)
+ MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
+ MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep)
+ MO_U_Ge r | isComparisonExpr x -> x
+ MO_S_Ge r | isComparisonExpr x -> x
+ other -> CmmMachOp mop args
+
+-- Now look for multiplication/division by powers of 2 (integers).
+
+cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))]
+ = case mop of
+ MO_Mul rep
+ -> case exactLog2 n of
+ Nothing -> unchanged
+ Just p -> CmmMachOp (MO_Shl rep) [x, CmmLit (CmmInt p rep)]
+ MO_S_Quot rep
+ -> case exactLog2 n of
+ Nothing -> unchanged
+ Just p -> CmmMachOp (MO_S_Shr rep) [x, CmmLit (CmmInt p rep)]
+ other
+ -> unchanged
+ where
+ unchanged = CmmMachOp mop args
+
+-- Anything else is just too hard.
+
+cmmMachOpFold mop args = CmmMachOp mop args
+
+-- -----------------------------------------------------------------------------
+-- exactLog2
+
+-- This algorithm for determining the $\log_2$ of exact powers of 2 comes
+-- from GCC. It requires bit manipulation primitives, and we use GHC
+-- extensions. Tough.
+--
+-- Used to be in MachInstrs --SDM.
+-- ToDo: remove use of unboxery --SDM.
+
+w2i x = word2Int# x
+i2w x = int2Word# x
+
+exactLog2 :: Integer -> Maybe Integer
+exactLog2 x
+ = if (x <= 0 || x >= 2147483648) then
+ Nothing
+ else
+ case fromInteger x of { I# x# ->
+ if (w2i ((i2w x#) `and#` (i2w (0# -# x#))) /=# x#) then
+ Nothing
+ else
+ Just (toInteger (I# (pow2 x#)))
+ }
+ where
+ pow2 x# | x# ==# 1# = 0#
+ | otherwise = 1# +# pow2 (w2i (i2w x# `shiftRL#` 1#))
+
+
+-- -----------------------------------------------------------------------------
+-- widening / narrowing
+
+narrowU :: MachRep -> Integer -> Integer
+narrowU I8 x = fromIntegral (fromIntegral x :: Word8)
+narrowU I16 x = fromIntegral (fromIntegral x :: Word16)
+narrowU I32 x = fromIntegral (fromIntegral x :: Word32)
+narrowU I64 x = fromIntegral (fromIntegral x :: Word64)
+narrowU _ _ = panic "narrowTo"
+
+narrowS :: MachRep -> Integer -> Integer
+narrowS I8 x = fromIntegral (fromIntegral x :: Int8)
+narrowS I16 x = fromIntegral (fromIntegral x :: Int16)
+narrowS I32 x = fromIntegral (fromIntegral x :: Int32)
+narrowS I64 x = fromIntegral (fromIntegral x :: Int64)
+narrowS _ _ = panic "narrowTo"
+
+-- -----------------------------------------------------------------------------
+-- Loopify for C
+
+{-
+ This is a simple pass that replaces tail-recursive functions like this:
+
+ fac() {
+ ...
+ jump fac();
+ }
+
+ with this:
+
+ fac() {
+ L:
+ ...
+ goto L;
+ }
+
+ the latter generates better C code, because the C compiler treats it
+ like a loop, and brings full loop optimisation to bear.
+
+ In my measurements this makes little or no difference to anything
+ except factorial, but what the hell.
+-}
+
+cmmLoopifyForC :: CmmTop -> CmmTop
+cmmLoopifyForC p@(CmmProc info entry_lbl [] blocks@(BasicBlock top_id _ : _))
+ | null info = p -- only if there's an info table, ignore case alts
+ | otherwise =
+-- pprTrace "jump_lbl" (ppr jump_lbl <+> ppr entry_lbl) $
+ CmmProc info entry_lbl [] blocks'
+ where blocks' = [ BasicBlock id (map do_stmt stmts)
+ | BasicBlock id stmts <- blocks ]
+
+ do_stmt (CmmJump (CmmLit (CmmLabel lbl)) _) | lbl == jump_lbl
+ = CmmBranch top_id
+ do_stmt stmt = stmt
+
+ jump_lbl | tablesNextToCode = entryLblToInfoLbl entry_lbl
+ | otherwise = entry_lbl
+
+cmmLoopifyForC top = top
+
+-- -----------------------------------------------------------------------------
+-- Utils
+
+isLit (CmmLit _) = True
+isLit _ = False
+
+isComparisonExpr :: CmmExpr -> Bool
+isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
+isComparisonExpr _other = False
+
+maybeInvertConditionalExpr :: CmmExpr -> Maybe CmmExpr
+maybeInvertConditionalExpr (CmmMachOp op args)
+ | Just op' <- maybeInvertComparison op = Just (CmmMachOp op' args)
+maybeInvertConditionalExpr _ = Nothing