summaryrefslogtreecommitdiff
path: root/compiler/cmm
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/cmm')
-rw-r--r--compiler/cmm/CmmCallConv.hs29
-rw-r--r--compiler/cmm/CmmCvt.hs2
-rw-r--r--compiler/cmm/CmmDecl.hs4
-rw-r--r--compiler/cmm/CmmExpr.hs5
-rw-r--r--compiler/cmm/CmmLive.hs49
-rw-r--r--compiler/cmm/CmmNode.hs58
-rw-r--r--compiler/cmm/CmmOpt.hs366
-rw-r--r--compiler/cmm/CmmPipeline.hs (renamed from compiler/cmm/CmmCPS.hs)33
-rw-r--r--compiler/cmm/CmmProcPoint.hs2
-rw-r--r--compiler/cmm/CmmRewriteAssignments.hs628
-rw-r--r--compiler/cmm/CmmSpillReload.hs631
-rw-r--r--compiler/cmm/MkGraph.hs36
-rw-r--r--compiler/cmm/OldCmm.hs14
-rw-r--r--compiler/cmm/OldCmmUtils.hs4
-rw-r--r--compiler/cmm/PprC.hs4
-rw-r--r--compiler/cmm/cmm-notes36
16 files changed, 1041 insertions, 860 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index 830c879112..c81b868167 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -1,9 +1,6 @@
module CmmCallConv (
ParamLocation(..),
- ArgumentFormat,
- assignArguments,
- assignArgumentsPos,
- argumentsSize,
+ assignArgumentsPos
) where
#include "HsVersions.h"
@@ -21,25 +18,19 @@ import Outputable
-- Calculate the 'GlobalReg' or stack locations for function call
-- parameters as used by the Cmm calling convention.
-data ParamLocation a
+data ParamLocation
= RegisterParam GlobalReg
- | StackParam a
+ | StackParam ByteOff
-instance (Outputable a) => Outputable (ParamLocation a) where
+instance Outputable ParamLocation where
ppr (RegisterParam g) = ppr g
ppr (StackParam p) = ppr p
-type ArgumentFormat a b = [(a, ParamLocation b)]
-
-assignArguments :: (a -> CmmType) -> [a] -> ArgumentFormat a WordOff
--- Stack parameters are returned as word offsets.
-assignArguments _ _ = panic "assignArguments only used in dead codegen" -- assignments
-
-- | JD: For the new stack story, I want arguments passed on the stack to manifest as
-- positive offsets in a CallArea, not negative offsets from the stack pointer.
-- Also, I want byte offsets, not word offsets.
-assignArgumentsPos :: (Outputable a) => Convention -> (a -> CmmType) -> [a] ->
- ArgumentFormat a ByteOff
+assignArgumentsPos :: Convention -> (a -> CmmType) -> [a] ->
+ [(a, ParamLocation)]
-- Given a list of arguments, and a function that tells their types,
-- return a list showing where each argument is passed
assignArgumentsPos conv arg_ty reps = assignments
@@ -96,14 +87,6 @@ assignArgumentsPos conv arg_ty reps = assignments
where w = typeWidth (arg_ty r)
size = (((widthInBytes w - 1) `div` wORD_SIZE) + 1) * wORD_SIZE
off' = offset + size
-
-
-argumentsSize :: (a -> CmmType) -> [a] -> WordOff
-argumentsSize f reps = maximum (0 : map arg_top args)
- where
- args = assignArguments f reps
- arg_top (_, StackParam offset) = -offset
- arg_top (_, RegisterParam _) = 0
-----------------------------------------------------------------------------
-- Local information about the registers available
diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs
index 9382d8d1ed..83d72b8f6e 100644
--- a/compiler/cmm/CmmCvt.hs
+++ b/compiler/cmm/CmmCvt.hs
@@ -83,7 +83,7 @@ toZgraph fun_name g@(Old.ListGraph (Old.BasicBlock id ss : other_blocks)) =
strip_hints :: [Old.CmmHinted a] -> [a]
strip_hints = map Old.hintlessCmm
-convert_target :: Old.CmmCallTarget -> Old.HintedCmmFormals -> Old.HintedCmmActuals -> ForeignTarget
+convert_target :: Old.CmmCallTarget -> [Old.HintedCmmFormal] -> [Old.HintedCmmActual] -> ForeignTarget
convert_target (Old.CmmCallee e cc) ress args = ForeignTarget e (ForeignConvention cc (map Old.cmmHint args) (map Old.cmmHint ress))
convert_target (Old.CmmPrim op) _ress _args = PrimTarget op
diff --git a/compiler/cmm/CmmDecl.hs b/compiler/cmm/CmmDecl.hs
index e2da59beac..38eda2d1ac 100644
--- a/compiler/cmm/CmmDecl.hs
+++ b/compiler/cmm/CmmDecl.hs
@@ -10,7 +10,7 @@ module CmmDecl (
GenCmm(..), GenCmmTop(..),
CmmInfoTable(..), HasStaticClosure, ClosureTypeInfo(..), ConstrDescription,
ProfilingInfo(..), ClosureTypeTag,
- CmmActual, CmmActuals, CmmFormal, CmmFormals, ForeignHint(..),
+ CmmActual, CmmFormal, ForeignHint(..),
CmmStatic(..), Section(..),
) where
@@ -114,8 +114,6 @@ type SelectorOffset = StgWord
type CmmActual = CmmExpr
type CmmFormal = LocalReg
-type CmmActuals = [CmmActual]
-type CmmFormals = [CmmFormal]
data ForeignHint
= NoHint | AddrHint | SignedHint
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index 869bc1b4ac..b8cd3280e8 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -10,7 +10,7 @@ module CmmExpr
, DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed
, RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet
, plusRegSet, minusRegSet, timesRegSet
- , regUsedIn
+ , regUsedIn, regSlot
, Area(..), AreaId(..), SubArea, SubAreaSet, AreaMap, isStackSlotOf
, module CmmMachOp
, module CmmType
@@ -267,6 +267,9 @@ isStackSlotOf :: CmmExpr -> LocalReg -> Bool
isStackSlotOf (CmmStackSlot (RegSlot r) _) r' = r == r'
isStackSlotOf _ _ = False
+regSlot :: LocalReg -> CmmExpr
+regSlot r = CmmStackSlot (RegSlot r) (widthInBytes $ typeWidth $ localRegType r)
+
-----------------------------------------------------------------------------
-- Stack slot use information for expressions and other types [_$_]
-----------------------------------------------------------------------------
diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs
index c87a3a9b33..ca3ab095ed 100644
--- a/compiler/cmm/CmmLive.hs
+++ b/compiler/cmm/CmmLive.hs
@@ -1,11 +1,13 @@
{-# LANGUAGE GADTs #-}
+
{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
module CmmLive
( CmmLive
, cmmLiveness
, liveLattice
- , noLiveOnEntry, xferLive
+ , noLiveOnEntry, xferLive, gen, kill, gen_kill
+ , removeDeadAssignments
)
where
@@ -47,9 +49,6 @@ cmmLiveness graph =
where entry = g_entry graph
check facts = noLiveOnEntry entry (expectJust "check" $ mapLookup entry facts) facts
-gen_kill :: (DefinerOfLocalRegs a, UserOfLocalRegs a) => a -> CmmLive -> CmmLive
-gen_kill a = gen a . kill a
-
-- | On entry to the procedure, there had better not be any LocalReg's live-in.
noLiveOnEntry :: BlockId -> CmmLive -> a -> a
noLiveOnEntry bid in_fact x =
@@ -57,19 +56,47 @@ noLiveOnEntry bid in_fact x =
else pprPanic "LocalReg's live-in to graph" (ppr bid <+> ppr in_fact)
-- | The transfer equations use the traditional 'gen' and 'kill'
--- notations, which should be familiar from the dragon book.
-gen :: UserOfLocalRegs a => a -> RegSet -> RegSet
-gen a live = foldRegsUsed extendRegSet live a
+-- notations, which should be familiar from the Dragon Book.
+gen :: UserOfLocalRegs a => a -> RegSet -> RegSet
+gen a live = foldRegsUsed extendRegSet live a
kill :: DefinerOfLocalRegs a => a -> RegSet -> RegSet
kill a live = foldRegsDefd delOneFromUniqSet live a
--- Testing!
+gen_kill :: (DefinerOfLocalRegs a, UserOfLocalRegs a) => a -> CmmLive -> CmmLive
+gen_kill a = gen a . kill a
+
+-- | The transfer function
+-- EZY: Bits of this analysis are duplicated in CmmSpillReload, though
+-- it's not really easy to efficiently reuse all of this. Keep in mind
+-- if you need to update this analysis.
xferLive :: BwdTransfer CmmNode CmmLive
xferLive = mkBTransfer3 fst mid lst
where fst _ f = f
mid :: CmmNode O O -> CmmLive -> CmmLive
mid n f = gen_kill n f
lst :: CmmNode O C -> FactBase CmmLive -> CmmLive
- lst n f = gen_kill n $ case n of CmmCall {} -> emptyRegSet
- CmmForeignCall {} -> emptyRegSet
- _ -> joinOutFacts liveLattice n f
+ -- slightly inefficient: kill is unnecessary for emptyRegSet
+ lst n f = gen_kill n
+ $ case n of CmmCall{} -> emptyRegSet
+ CmmForeignCall{} -> emptyRegSet
+ _ -> joinOutFacts liveLattice n f
+
+-----------------------------------------------------------------------------
+-- Removing assignments to dead variables
+-----------------------------------------------------------------------------
+
+removeDeadAssignments :: CmmGraph -> FuelUniqSM CmmGraph
+removeDeadAssignments g =
+ liftM fst $ dataflowPassBwd g [] $ analRewBwd liveLattice xferLive rewrites
+ where rewrites = deepBwdRw3 nothing middle nothing
+ -- Beware: deepBwdRw with one polymorphic function seems more reasonable here,
+ -- but GHC panics while compiling, see bug #4045.
+ middle :: CmmNode O O -> Fact O CmmLive -> CmmReplGraph O O
+ middle (CmmAssign (CmmLocal reg') _) live | not (reg' `elemRegSet` live) = return $ Just emptyGraph
+ -- XXX maybe this should be somewhere else...
+ middle (CmmAssign lhs (CmmReg rhs)) _ | lhs == rhs = return $ Just emptyGraph
+ middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs = return $ Just emptyGraph
+ middle _ _ = return Nothing
+
+ nothing :: CmmNode e x -> Fact x CmmLive -> CmmReplGraph e x
+ nothing _ _ = return Nothing
diff --git a/compiler/cmm/CmmNode.hs b/compiler/cmm/CmmNode.hs
index 7d50d9ae72..f7950423fe 100644
--- a/compiler/cmm/CmmNode.hs
+++ b/compiler/cmm/CmmNode.hs
@@ -11,6 +11,7 @@ module CmmNode
( CmmNode(..)
, UpdFrameOffset, Convention(..), ForeignConvention(..), ForeignTarget(..)
, mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf
+ , mapExpM, mapExpDeepM, wrapRecExpM
)
where
@@ -22,6 +23,7 @@ import SMRep
import Compiler.Hoopl
import Data.Maybe
+import Data.List (tails)
import Prelude hiding (succ)
@@ -42,8 +44,8 @@ data CmmNode e x where
-- Like a "fat machine instruction"; can occur
-- in the middle of a block
ForeignTarget -> -- call target
- CmmFormals -> -- zero or more results
- CmmActuals -> -- zero or more arguments
+ [CmmFormal] -> -- zero or more results
+ [CmmActual] -> -- zero or more arguments
CmmNode O O
-- Semantics: kills only result regs; all other regs (both GlobalReg
-- and LocalReg) are preserved. But there is a current
@@ -105,8 +107,8 @@ data CmmNode e x where
CmmForeignCall :: { -- A safe foreign call; see Note [Foreign calls]
-- Always the last node of a block
tgt :: ForeignTarget, -- call target and convention
- res :: CmmFormals, -- zero or more results
- args :: CmmActuals, -- zero or more arguments; see Note [Register parameter passing]
+ res :: [CmmFormal], -- zero or more results
+ args :: [CmmActual], -- zero or more arguments; see Note [Register parameter passing]
succ :: Label, -- Label of continuation
updfr :: UpdFrameOffset, -- where the update frame is (for building infotable)
intrbl:: Bool -- whether or not the call is interruptible
@@ -323,6 +325,54 @@ mapExp f (CmmForeignCall tgt fs as succ updfr intrbl) = CmmForeignCall (mapFor
mapExpDeep :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x
mapExpDeep f = mapExp $ wrapRecExp f
+------------------------------------------------------------------------
+-- mapping Expr in CmmNode, but not performing allocation if no changes
+
+mapForeignTargetM :: (CmmExpr -> Maybe CmmExpr) -> ForeignTarget -> Maybe ForeignTarget
+mapForeignTargetM f (ForeignTarget e c) = (\x -> ForeignTarget x c) `fmap` f e
+mapForeignTargetM _ (PrimTarget _) = Nothing
+
+wrapRecExpM :: (CmmExpr -> Maybe CmmExpr) -> (CmmExpr -> Maybe CmmExpr)
+wrapRecExpM f n@(CmmMachOp op es) = maybe (f n) (f . CmmMachOp op) (mapListM (wrapRecExpM f) es)
+wrapRecExpM f n@(CmmLoad addr ty) = maybe (f n) (f . flip CmmLoad ty) (wrapRecExpM f addr)
+wrapRecExpM f e = f e
+
+mapExpM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
+mapExpM _ (CmmEntry _) = Nothing
+mapExpM _ (CmmComment _) = Nothing
+mapExpM f (CmmAssign r e) = CmmAssign r `fmap` f e
+mapExpM f (CmmStore addr e) = (\[addr', e'] -> CmmStore addr' e') `fmap` mapListM f [addr, e]
+mapExpM _ (CmmBranch _) = Nothing
+mapExpM f (CmmCondBranch e ti fi) = (\x -> CmmCondBranch x ti fi) `fmap` f e
+mapExpM f (CmmSwitch e tbl) = (\x -> CmmSwitch x tbl) `fmap` f e
+mapExpM f (CmmCall tgt mb_id o i s) = (\x -> CmmCall x mb_id o i s) `fmap` f tgt
+mapExpM f (CmmUnsafeForeignCall tgt fs as)
+ = case mapForeignTargetM f tgt of
+ Just tgt' -> Just (CmmUnsafeForeignCall tgt' fs (mapListJ f as))
+ Nothing -> (\xs -> CmmUnsafeForeignCall tgt fs xs) `fmap` mapListM f as
+mapExpM f (CmmForeignCall tgt fs as succ updfr intrbl)
+ = case mapForeignTargetM f tgt of
+ Just tgt' -> Just (CmmForeignCall tgt' fs (mapListJ f as) succ updfr intrbl)
+ Nothing -> (\xs -> CmmForeignCall tgt fs xs succ updfr intrbl) `fmap` mapListM f as
+
+-- share as much as possible
+mapListM :: (a -> Maybe a) -> [a] -> Maybe [a]
+mapListM f xs = let (b, r) = mapListT f xs
+ in if b then Just r else Nothing
+
+mapListJ :: (a -> Maybe a) -> [a] -> [a]
+mapListJ f xs = snd (mapListT f xs)
+
+mapListT :: (a -> Maybe a) -> [a] -> (Bool, [a])
+mapListT f xs = foldr g (False, []) (zip3 (tails xs) xs (map f xs))
+ where g (_, y, Nothing) (True, ys) = (True, y:ys)
+ g (_, _, Just y) (True, ys) = (True, y:ys)
+ g (ys', _, Nothing) (False, _) = (False, ys')
+ g (_, _, Just y) (False, ys) = (True, y:ys)
+
+mapExpDeepM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x)
+mapExpDeepM f = mapExpM $ wrapRecExpM f
+
-----------------------------------
-- folding Expr in CmmNode
diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs
index 69df4fbff1..dab866e186 100644
--- a/compiler/cmm/CmmOpt.hs
+++ b/compiler/cmm/CmmOpt.hs
@@ -14,10 +14,11 @@
-----------------------------------------------------------------------------
module CmmOpt (
- cmmEliminateDeadBlocks,
- cmmMiniInline,
- cmmMachOpFold,
- cmmLoopifyForC,
+ cmmEliminateDeadBlocks,
+ cmmMiniInline,
+ cmmMachOpFold,
+ cmmMachOpFoldM,
+ cmmLoopifyForC,
) where
#include "HsVersions.h"
@@ -302,114 +303,123 @@ inlineExpr u a other_expr = other_expr
-- been optimized and folded.
cmmMachOpFold
- :: MachOp -- The operation from an CmmMachOp
- -> [CmmExpr] -- The optimized arguments
+ :: MachOp -- The operation from an CmmMachOp
+ -> [CmmExpr] -- The optimized arguments
-> CmmExpr
-cmmMachOpFold op arg@[CmmLit (CmmInt x rep)]
- = case op of
+cmmMachOpFold op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM op args)
+
+-- Returns Nothing if no changes, useful for Hoopl, also reduces
+-- allocation!
+cmmMachOpFoldM
+ :: MachOp
+ -> [CmmExpr]
+ -> Maybe CmmExpr
+
+cmmMachOpFoldM op arg@[CmmLit (CmmInt x rep)]
+ = Just $ case op of
MO_S_Neg r -> CmmLit (CmmInt (-x) rep)
MO_Not r -> CmmLit (CmmInt (complement x) rep)
- -- these are interesting: we must first narrow to the
- -- "from" type, in order to truncate to the correct size.
- -- The final narrow/widen to the destination type
- -- is implicit in the CmmLit.
+ -- these are interesting: we must first narrow to the
+ -- "from" type, in order to truncate to the correct size.
+ -- The final narrow/widen to the destination type
+ -- is implicit in the CmmLit.
MO_SF_Conv from to -> CmmLit (CmmFloat (fromInteger x) to)
MO_SS_Conv from to -> CmmLit (CmmInt (narrowS from x) to)
MO_UU_Conv from to -> CmmLit (CmmInt (narrowU from x) to)
- _ -> panic "cmmMachOpFold: unknown unary op"
+ _ -> panic "cmmMachOpFoldM: unknown unary op"
-- Eliminate conversion NOPs
-cmmMachOpFold (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = x
-cmmMachOpFold (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = x
+cmmMachOpFoldM (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
+cmmMachOpFoldM (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x
-- Eliminate nested conversions where possible
-cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]]
+cmmMachOpFoldM conv_outer args@[CmmMachOp conv_inner [x]]
| Just (rep1,rep2,signed1) <- isIntConversion conv_inner,
Just (_, rep3,signed2) <- isIntConversion conv_outer
= case () of
- -- widen then narrow to the same size is a nop
- _ | rep1 < rep2 && rep1 == rep3 -> x
- -- Widen then narrow to different size: collapse to single conversion
- -- but remember to use the signedness from the widening, just in case
- -- the final conversion is a widen.
- | rep1 < rep2 && rep2 > rep3 ->
- cmmMachOpFold (intconv signed1 rep1 rep3) [x]
- -- Nested widenings: collapse if the signedness is the same
- | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
- cmmMachOpFold (intconv signed1 rep1 rep3) [x]
- -- Nested narrowings: collapse
- | rep1 > rep2 && rep2 > rep3 ->
- cmmMachOpFold (MO_UU_Conv rep1 rep3) [x]
- | otherwise ->
- CmmMachOp conv_outer args
+ -- widen then narrow to the same size is a nop
+ _ | rep1 < rep2 && rep1 == rep3 -> Just x
+ -- Widen then narrow to different size: collapse to single conversion
+ -- but remember to use the signedness from the widening, just in case
+ -- the final conversion is a widen.
+ | rep1 < rep2 && rep2 > rep3 ->
+ Just $ cmmMachOpFold (intconv signed1 rep1 rep3) [x]
+ -- Nested widenings: collapse if the signedness is the same
+ | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 ->
+ Just $ cmmMachOpFold (intconv signed1 rep1 rep3) [x]
+ -- Nested narrowings: collapse
+ | rep1 > rep2 && rep2 > rep3 ->
+ Just $ cmmMachOpFold (MO_UU_Conv rep1 rep3) [x]
+ | otherwise ->
+ Nothing
where
- isIntConversion (MO_UU_Conv rep1 rep2)
- = Just (rep1,rep2,False)
- isIntConversion (MO_SS_Conv rep1 rep2)
- = Just (rep1,rep2,True)
- isIntConversion _ = Nothing
+ isIntConversion (MO_UU_Conv rep1 rep2)
+ = Just (rep1,rep2,False)
+ isIntConversion (MO_SS_Conv rep1 rep2)
+ = Just (rep1,rep2,True)
+ isIntConversion _ = Nothing
- intconv True = MO_SS_Conv
- intconv False = MO_UU_Conv
+ intconv True = MO_SS_Conv
+ intconv False = MO_UU_Conv
-- ToDo: a narrow of a load can be collapsed into a narrow load, right?
-- but what if the architecture only supports word-sized loads, should
-- we do the transformation anyway?
-cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
+cmmMachOpFoldM mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)]
= case mop of
- -- for comparisons: don't forget to narrow the arguments before
- -- comparing, since they might be out of range.
- MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordWidth)
- MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordWidth)
-
- MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordWidth)
- MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordWidth)
- MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordWidth)
- MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordWidth)
-
- MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordWidth)
- MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordWidth)
- MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordWidth)
- MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordWidth)
-
- MO_Add r -> CmmLit (CmmInt (x + y) r)
- MO_Sub r -> CmmLit (CmmInt (x - y) r)
- MO_Mul r -> CmmLit (CmmInt (x * y) r)
- MO_U_Quot r | y /= 0 -> CmmLit (CmmInt (x_u `quot` y_u) r)
- MO_U_Rem r | y /= 0 -> CmmLit (CmmInt (x_u `rem` y_u) r)
- MO_S_Quot r | y /= 0 -> CmmLit (CmmInt (x `quot` y) r)
- MO_S_Rem r | y /= 0 -> CmmLit (CmmInt (x `rem` y) r)
-
- MO_And r -> CmmLit (CmmInt (x .&. y) r)
- MO_Or r -> CmmLit (CmmInt (x .|. y) r)
- MO_Xor r -> CmmLit (CmmInt (x `xor` y) r)
-
- MO_Shl r -> CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
- MO_U_Shr r -> CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
- MO_S_Shr r -> CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
-
- other -> CmmMachOp mop args
+ -- for comparisons: don't forget to narrow the arguments before
+ -- comparing, since they might be out of range.
+ MO_Eq r -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordWidth)
+ MO_Ne r -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordWidth)
+
+ MO_U_Gt r -> Just $ CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordWidth)
+ MO_U_Ge r -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordWidth)
+ MO_U_Lt r -> Just $ CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordWidth)
+ MO_U_Le r -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordWidth)
+
+ MO_S_Gt r -> Just $ CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordWidth)
+ MO_S_Ge r -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordWidth)
+ MO_S_Lt r -> Just $ CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordWidth)
+ MO_S_Le r -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordWidth)
+
+ MO_Add r -> Just $ CmmLit (CmmInt (x + y) r)
+ MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r)
+ MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r)
+ MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r)
+ MO_U_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem` y_u) r)
+ MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r)
+ MO_S_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r)
+
+ MO_And r -> Just $ CmmLit (CmmInt (x .&. y) r)
+ MO_Or r -> Just $ CmmLit (CmmInt (x .|. y) r)
+ MO_Xor r -> Just $ CmmLit (CmmInt (x `xor` y) r)
+
+ MO_Shl r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r)
+ MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r)
+ MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r)
+
+ other -> Nothing
where
- x_u = narrowU xrep x
- y_u = narrowU xrep y
- x_s = narrowS xrep x
- y_s = narrowS xrep y
-
+ x_u = narrowU xrep x
+ y_u = narrowU xrep y
+ x_s = narrowS xrep x
+ y_s = narrowS xrep y
+
-- When possible, shift the constants to the right-hand side, so that we
-- can match for strength reductions. Note that the code generator will
-- also assume that constants have been shifted to the right when
-- possible.
-cmmMachOpFold op [x@(CmmLit _), y]
- | not (isLit y) && isCommutableMachOp op
- = cmmMachOpFold op [y, x]
+cmmMachOpFoldM op [x@(CmmLit _), y]
+ | not (isLit y) && isCommutableMachOp op
+ = Just (cmmMachOpFold op [y, x])
-- Turn (a+b)+c into a+(b+c) where possible. Because literals are
-- moved to the right, it is more likely that we will find
@@ -427,38 +437,38 @@ cmmMachOpFold op [x@(CmmLit _), y]
-- Also don't do it if arg1 is PicBaseReg, so that we don't separate the
-- PicBaseReg from the corresponding label (or label difference).
--
-cmmMachOpFold mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
+cmmMachOpFoldM mop1 [CmmMachOp mop2 [arg1,arg2], arg3]
| mop2 `associates_with` mop1
&& not (isLit arg1) && not (isPicReg arg1)
- = cmmMachOpFold mop2 [arg1, cmmMachOpFold mop1 [arg2,arg3]]
+ = Just (cmmMachOpFold mop2 [arg1, cmmMachOpFold mop1 [arg2,arg3]])
where
MO_Add{} `associates_with` MO_Sub{} = True
mop1 `associates_with` mop2 =
mop1 == mop2 && isAssociativeMachOp mop1
-- special case: (a - b) + c ==> a + (c - b)
-cmmMachOpFold mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3]
+cmmMachOpFoldM mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3]
| not (isLit arg1) && not (isPicReg arg1)
- = cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg3,arg2]]
+ = Just (cmmMachOpFold mop1 [arg1, cmmMachOpFold mop2 [arg3,arg2]])
-- Make a RegOff if we can
-cmmMachOpFold (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
- = CmmRegOff reg (fromIntegral (narrowS rep n))
-cmmMachOpFold (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
- = CmmRegOff reg (off + fromIntegral (narrowS rep n))
-cmmMachOpFold (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
- = CmmRegOff reg (- fromIntegral (narrowS rep n))
-cmmMachOpFold (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
- = CmmRegOff reg (off - fromIntegral (narrowS rep n))
+cmmMachOpFoldM (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)]
+ = Just $ CmmRegOff reg (fromIntegral (narrowS rep n))
+cmmMachOpFoldM (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
+ = Just $ CmmRegOff reg (off + fromIntegral (narrowS rep n))
+cmmMachOpFoldM (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)]
+ = Just $ CmmRegOff reg (- fromIntegral (narrowS rep n))
+cmmMachOpFoldM (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)]
+ = Just $ CmmRegOff reg (off - fromIntegral (narrowS rep n))
-- Fold label(+/-)offset into a CmmLit where possible
-cmmMachOpFold (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
- = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
-cmmMachOpFold (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
- = CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
-cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
- = CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
+cmmMachOpFoldM (MO_Add _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
+ = Just $ CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
+cmmMachOpFoldM (MO_Add _) [CmmLit (CmmInt i rep), CmmLit (CmmLabel lbl)]
+ = Just $ CmmLit (CmmLabelOff lbl (fromIntegral (narrowU rep i)))
+cmmMachOpFoldM (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
+ = Just $ CmmLit (CmmLabelOff lbl (fromIntegral (negate (narrowU rep i))))
-- Comparison of literal with widened operand: perform the comparison
@@ -471,7 +481,7 @@ cmmMachOpFold (MO_Sub _) [CmmLit (CmmLabel lbl), CmmLit (CmmInt i rep)]
#if i386_TARGET_ARCH || x86_64_TARGET_ARCH
-- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
-cmmMachOpFold cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
+cmmMachOpFoldM cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
| -- if the operand is widened:
Just (rep, signed, narrow_fn) <- maybe_conversion conv,
-- and this is a comparison operation:
@@ -479,7 +489,7 @@ cmmMachOpFold cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
-- and the literal fits in the smaller size:
i == narrow_fn rep i
-- then we can do the comparison at the smaller size
- = cmmMachOpFold narrow_cmp [x, CmmLit (CmmInt i rep)]
+ = Just (cmmMachOpFold narrow_cmp [x, CmmLit (CmmInt i rep)])
where
maybe_conversion (MO_UU_Conv from to)
| to > from
@@ -491,7 +501,7 @@ cmmMachOpFold cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
-- don't attempt to apply this optimisation when the source
-- is a float; see #1916
maybe_conversion _ = Nothing
-
+
-- careful (#2080): if the original comparison was signed, but
-- we were doing an unsigned widen, then we must do an
-- unsigned comparison at the smaller size.
@@ -514,94 +524,92 @@ cmmMachOpFold cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
-- We can often do something with constants of 0 and 1 ...
-cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))]
+cmmMachOpFoldM mop args@[x, y@(CmmLit (CmmInt 0 _))]
= case mop of
- MO_Add r -> x
- MO_Sub r -> x
- MO_Mul r -> y
- MO_And r -> y
- MO_Or r -> x
- MO_Xor r -> x
- MO_Shl r -> x
- MO_S_Shr r -> x
- MO_U_Shr r -> x
- MO_Ne r | isComparisonExpr x -> x
- MO_Eq r | Just x' <- maybeInvertCmmExpr x -> x'
- MO_U_Gt r | isComparisonExpr x -> x
- MO_S_Gt r | isComparisonExpr x -> x
- MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth)
- MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth)
- MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth)
- MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth)
- MO_U_Le r | Just x' <- maybeInvertCmmExpr x -> x'
- MO_S_Le r | Just x' <- maybeInvertCmmExpr x -> x'
- other -> CmmMachOp mop args
-
-cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))]
+ MO_Add r -> Just x
+ MO_Sub r -> Just x
+ MO_Mul r -> Just y
+ MO_And r -> Just y
+ MO_Or r -> Just x
+ MO_Xor r -> Just x
+ MO_Shl r -> Just x
+ MO_S_Shr r -> Just x
+ MO_U_Shr r -> Just x
+ MO_Ne r | isComparisonExpr x -> Just x
+ MO_Eq r | Just x' <- maybeInvertCmmExpr x -> Just x'
+ MO_U_Gt r | isComparisonExpr x -> Just x
+ MO_S_Gt r | isComparisonExpr x -> Just x
+ MO_U_Lt r | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 wordWidth)
+ MO_S_Lt r | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 wordWidth)
+ MO_U_Ge r | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 wordWidth)
+ MO_S_Ge r | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 wordWidth)
+ MO_U_Le r | Just x' <- maybeInvertCmmExpr x -> Just x'
+ MO_S_Le r | Just x' <- maybeInvertCmmExpr x -> Just x'
+ other -> Nothing
+
+cmmMachOpFoldM mop args@[x, y@(CmmLit (CmmInt 1 rep))]
= case mop of
- MO_Mul r -> x
- MO_S_Quot r -> x
- MO_U_Quot r -> x
- MO_S_Rem r -> CmmLit (CmmInt 0 rep)
- MO_U_Rem r -> CmmLit (CmmInt 0 rep)
- MO_Ne r | Just x' <- maybeInvertCmmExpr x -> x'
- MO_Eq r | isComparisonExpr x -> x
- MO_U_Lt r | Just x' <- maybeInvertCmmExpr x -> x'
- MO_S_Lt r | Just x' <- maybeInvertCmmExpr x -> x'
- MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth)
- MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth)
- MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth)
- MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth)
- MO_U_Ge r | isComparisonExpr x -> x
- MO_S_Ge r | isComparisonExpr x -> x
- other -> CmmMachOp mop args
+ MO_Mul r -> Just x
+ MO_S_Quot r -> Just x
+ MO_U_Quot r -> Just x
+ MO_S_Rem r -> Just $ CmmLit (CmmInt 0 rep)
+ MO_U_Rem r -> Just $ CmmLit (CmmInt 0 rep)
+ MO_Ne r | Just x' <- maybeInvertCmmExpr x -> Just x'
+ MO_Eq r | isComparisonExpr x -> Just x
+ MO_U_Lt r | Just x' <- maybeInvertCmmExpr x -> Just x'
+ MO_S_Lt r | Just x' <- maybeInvertCmmExpr x -> Just x'
+ MO_U_Gt r | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 wordWidth)
+ MO_S_Gt r | isComparisonExpr x -> Just $ CmmLit (CmmInt 0 wordWidth)
+ MO_U_Le r | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 wordWidth)
+ MO_S_Le r | isComparisonExpr x -> Just $ CmmLit (CmmInt 1 wordWidth)
+ MO_U_Ge r | isComparisonExpr x -> Just x
+ MO_S_Ge r | isComparisonExpr x -> Just x
+ other -> Nothing
-- Now look for multiplication/division by powers of 2 (integers).
-cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))]
+cmmMachOpFoldM mop args@[x, y@(CmmLit (CmmInt n _))]
= case mop of
- MO_Mul rep
- | Just p <- exactLog2 n ->
- cmmMachOpFold (MO_Shl rep) [x, CmmLit (CmmInt p rep)]
- MO_U_Quot rep
- | Just p <- exactLog2 n ->
- cmmMachOpFold (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)]
- MO_S_Quot rep
- | Just p <- exactLog2 n,
- CmmReg _ <- x -> -- We duplicate x below, hence require
- -- it is a reg. FIXME: remove this restriction.
- -- shift right is not the same as quot, because it rounds
- -- to minus infinity, whereasq quot rounds toward zero.
- -- To fix this up, we add one less than the divisor to the
- -- dividend if it is a negative number.
- --
- -- to avoid a test/jump, we use the following sequence:
- -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve)
- -- x2 = y & (divisor-1)
- -- result = (x+x2) >>= log2(divisor)
- -- this could be done a bit more simply using conditional moves,
- -- but we're processor independent here.
- --
- -- we optimise the divide by 2 case slightly, generating
- -- x1 = x >> word_size-1 (unsigned)
- -- return = (x + x1) >>= log2(divisor)
- let
- bits = fromIntegral (widthInBits rep) - 1
- shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
- x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
- x2 = if p == 1 then x1 else
- CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
- x3 = CmmMachOp (MO_Add rep) [x, x2]
- in
- cmmMachOpFold (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)]
- other
- -> unchanged
- where
- unchanged = CmmMachOp mop args
+ MO_Mul rep
+ | Just p <- exactLog2 n ->
+ Just (cmmMachOpFold (MO_Shl rep) [x, CmmLit (CmmInt p rep)])
+ MO_U_Quot rep
+ | Just p <- exactLog2 n ->
+ Just (cmmMachOpFold (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
+ MO_S_Quot rep
+ | Just p <- exactLog2 n,
+ CmmReg _ <- x -> -- We duplicate x below, hence require
+ -- it is a reg. FIXME: remove this restriction.
+ -- shift right is not the same as quot, because it rounds
+ -- to minus infinity, whereasq quot rounds toward zero.
+ -- To fix this up, we add one less than the divisor to the
+ -- dividend if it is a negative number.
+ --
+ -- to avoid a test/jump, we use the following sequence:
+ -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve)
+ -- x2 = y & (divisor-1)
+ -- result = (x+x2) >>= log2(divisor)
+ -- this could be done a bit more simply using conditional moves,
+ -- but we're processor independent here.
+ --
+ -- we optimise the divide by 2 case slightly, generating
+ -- x1 = x >> word_size-1 (unsigned)
+ -- return = (x + x1) >>= log2(divisor)
+ let
+ bits = fromIntegral (widthInBits rep) - 1
+ shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
+ x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
+ x2 = if p == 1 then x1 else
+ CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
+ x3 = CmmMachOp (MO_Add rep) [x, x2]
+ in
+ Just (cmmMachOpFold (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)])
+ other
+ -> Nothing
-- Anything else is just too hard.
-cmmMachOpFold mop args = CmmMachOp mop args
+cmmMachOpFoldM _ _ = Nothing
-- -----------------------------------------------------------------------------
-- exactLog2
diff --git a/compiler/cmm/CmmCPS.hs b/compiler/cmm/CmmPipeline.hs
index 35eabb3317..1e4809d2b2 100644
--- a/compiler/cmm/CmmCPS.hs
+++ b/compiler/cmm/CmmPipeline.hs
@@ -2,21 +2,24 @@
-- Norman likes local bindings
-- If this module lives on I'd like to get rid of this flag in due course
-module CmmCPS (
- -- | Converts C-- with full proceedures and parameters
- -- to a CPS transformed C-- with the stack made manifest.
- -- Well, sort of.
- protoCmmCPS
+module CmmPipeline (
+ -- | Converts C-- with an implicit stack and native C-- calls into
+ -- optimized, CPS converted and native-call-less C--. The latter
+ -- C-- can be used to generate assembly.
+ cmmPipeline
) where
import CLabel
import Cmm
import CmmDecl
+import CmmLive
import CmmBuildInfoTables
import CmmCommonBlockElim
import CmmProcPoint
import CmmSpillReload
+import CmmRewriteAssignments
import CmmStackLayout
+import CmmContFlowOpt
import OptimizationFuel
import DynFlags
@@ -30,7 +33,7 @@ import Outputable
import StaticFlags
-----------------------------------------------------------------------------
--- |Top level driver for the CPS pass
+-- | Top level driver for C-- pipeline
-----------------------------------------------------------------------------
-- There are two complications here:
-- 1. We need to compile the procedures in two stages because we need
@@ -45,20 +48,27 @@ import StaticFlags
-- 2. We need to thread the module's SRT around when the SRT tables
-- are computed for each procedure.
-- The SRT needs to be threaded because it is grown lazily.
-protoCmmCPS :: HscEnv -- Compilation env including
+-- 3. We run control flow optimizations twice, once before any pipeline
+-- work is done, and once again at the very end on all of the
+-- resulting C-- blocks. EZY: It's unclear whether or not whether
+-- we actually need to do the initial pass.
+cmmPipeline :: HscEnv -- Compilation env including
-- dynamic flags: -dcmm-lint -ddump-cps-cmm
-> (TopSRT, [Cmm]) -- SRT table and accumulating list of compiled procs
-> Cmm -- Input C-- with Procedures
-> IO (TopSRT, [Cmm]) -- Output CPS transformed C--
-protoCmmCPS hsc_env (topSRT, rst) (Cmm tops) =
+cmmPipeline hsc_env (topSRT, rst) prog =
do let dflags = hsc_dflags hsc_env
+ (Cmm tops) = runCmmContFlowOpts prog
showPass dflags "CPSZ"
(cafEnvs, tops) <- liftM unzip $ mapM (cpsTop hsc_env) tops
let topCAFEnv = mkTopCAFInfo (concat cafEnvs)
(topSRT, tops) <- foldM (toTops hsc_env topCAFEnv) (topSRT, []) tops
let cmms = Cmm (reverse (concat tops))
dumpIfSet_dyn dflags Opt_D_dump_cps_cmm "Post CPS Cmm" (ppr cmms)
- return (topSRT, cmms : rst)
+ -- SRT is not affected by control flow optimization pass
+ let prog' = map runCmmContFlowOpts (cmms : rst)
+ return (topSRT, prog')
{- [Note global fuel]
~~~~~~~~~~~~~~~~~~~~~
@@ -98,9 +108,8 @@ cpsTop hsc_env (CmmProc h@(TopInfo {stack_info=StackInfo {arg_space=entry_off}})
dump Opt_D_dump_cmmz_rewrite "Post rewrite assignments" g
----------- Eliminate dead assignments -------------------
- -- Remove redundant reloads (and any other redundant asst)
- g <- runOptimization $ removeDeadAssignmentsAndReloads procPoints g
- dump Opt_D_dump_cmmz_dead "Post Dead Assignment Elimination" g
+ g <- runOptimization $ removeDeadAssignments g
+ dump Opt_D_dump_cmmz_dead "Post remove dead assignments" g
----------- Zero dead stack slots (Debug only) ---------------
-- Debugging: stubbing slots on death can cause crashes early
diff --git a/compiler/cmm/CmmProcPoint.hs b/compiler/cmm/CmmProcPoint.hs
index fbe979b9ab..0527b6eea0 100644
--- a/compiler/cmm/CmmProcPoint.hs
+++ b/compiler/cmm/CmmProcPoint.hs
@@ -234,7 +234,7 @@ algorithm would be just as good, so that's what we do.
-}
-data Protocol = Protocol Convention CmmFormals Area
+data Protocol = Protocol Convention [CmmFormal] Area
deriving Eq
instance Outputable Protocol where
ppr (Protocol c fs a) = text "Protocol" <+> ppr c <+> ppr fs <+> ppr a
diff --git a/compiler/cmm/CmmRewriteAssignments.hs b/compiler/cmm/CmmRewriteAssignments.hs
new file mode 100644
index 0000000000..c0b7510349
--- /dev/null
+++ b/compiler/cmm/CmmRewriteAssignments.hs
@@ -0,0 +1,628 @@
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE FlexibleContexts #-}
+
+{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
+
+-- TODO: Get rid of this flag:
+{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
+
+-- This module implements generalized code motion for assignments to
+-- local registers, inlining and sinking when possible. It also does
+-- some amount of rewriting for stores to register slots, which are
+-- effectively equivalent to local registers.
+module CmmRewriteAssignments
+ ( rewriteAssignments
+ ) where
+
+import Cmm
+import CmmExpr
+import CmmOpt
+import OptimizationFuel
+import StgCmmUtils
+
+import Control.Monad
+import UniqFM
+import Unique
+import BlockId
+
+import Compiler.Hoopl hiding (Unique)
+import Data.Maybe
+import Prelude hiding (succ, zip)
+
+----------------------------------------------------------------
+--- Main function
+
+rewriteAssignments :: CmmGraph -> FuelUniqSM CmmGraph
+rewriteAssignments g = do
+ -- Because we need to act on forwards and backwards information, we
+ -- first perform usage analysis and bake this information into the
+ -- graph (backwards transform), and then do a forwards transform
+ -- to actually perform inlining and sinking.
+ g' <- annotateUsage g
+ g'' <- liftM fst $ dataflowPassFwd g' [(g_entry g, fact_bot assignmentLattice)] $
+ analRewFwd assignmentLattice
+ assignmentTransfer
+ (assignmentRewrite `thenFwdRw` machOpFoldRewrite)
+ return (modifyGraph eraseRegUsage g'')
+
+----------------------------------------------------------------
+--- Usage information
+
+-- We decorate all register assignments with approximate usage
+-- information, that is, the maximum number of times the register is
+-- referenced while it is live along all outgoing control paths.
+-- This analysis provides a precise upper bound for usage, so if a
+-- register is never referenced, we can remove it, as that assignment is
+-- dead.
+--
+-- This analysis is very similar to liveness analysis; we just keep a
+-- little extra info. (Maybe we should move it to CmmLive, and subsume
+-- the old liveness analysis.)
+--
+-- There are a few subtleties here:
+--
+-- - If a register goes dead, and then becomes live again, the usages
+-- of the disjoint live range don't count towards the original range.
+--
+-- a = 1; // used once
+-- b = a;
+-- a = 2; // used once
+-- c = a;
+--
+-- - A register may be used multiple times, but these all reside in
+-- different control paths, such that any given execution only uses
+-- it once. In that case, the usage count may still be 1.
+--
+-- a = 1; // used once
+-- if (b) {
+-- c = a + 3;
+-- } else {
+-- c = a + 1;
+-- }
+--
+-- This policy corresponds to an inlining strategy that does not
+-- duplicate computation but may increase binary size.
+--
+-- - If we naively implement a usage count, we have a counting to
+-- infinity problem across joins. Furthermore, knowing that
+-- something is used 2 or more times in one runtime execution isn't
+-- particularly useful for optimizations (inlining may be beneficial,
+-- but there's no way of knowing that without register pressure
+-- information.)
+--
+-- while (...) {
+-- // first iteration, b used once
+-- // second iteration, b used twice
+-- // third iteration ...
+-- a = b;
+-- }
+-- // b used zero times
+--
+-- There is an orthogonal question, which is that for every runtime
+-- execution, the register may be used only once, but if we inline it
+-- in every conditional path, the binary size might increase a lot.
+-- But tracking this information would be tricky, because it violates
+-- the finite lattice restriction Hoopl requires for termination;
+-- we'd thus need to supply an alternate proof, which is probably
+-- something we should defer until we actually have an optimization
+-- that would take advantage of this. (This might also interact
+-- strangely with liveness information.)
+--
+-- a = ...;
+-- // a is used one time, but in X different paths
+-- case (b) of
+-- 1 -> ... a ...
+-- 2 -> ... a ...
+-- 3 -> ... a ...
+-- ...
+--
+-- - Memory stores to local register slots (CmmStore (CmmStackSlot
+-- (LocalReg _) 0) _) have similar behavior to local registers,
+-- in that these locations are all disjoint from each other. Thus,
+-- we attempt to inline them too. Note that because these are only
+-- generated as part of the spilling process, most of the time this
+-- will refer to a local register and the assignment will immediately
+-- die on the subsequent call. However, if we manage to replace that
+-- local register with a memory location, it means that we've managed
+-- to preserve a value on the stack without having to move it to
+-- another memory location again! We collect usage information just
+-- to be safe in case extra computation is involved.
+
+data RegUsage = SingleUse | ManyUse
+ deriving (Ord, Eq, Show)
+-- Absence in map = ZeroUse
+
+{-
+-- minBound is bottom, maxBound is top, least-upper-bound is max
+-- ToDo: Put this in Hoopl. Note that this isn't as useful as I
+-- originally hoped, because you usually want to leave out the bottom
+-- element when you have things like this put in maps. Maybe f is
+-- useful on its own as a combining function.
+boundedOrdLattice :: (Bounded a, Ord a) => String -> DataflowLattice a
+boundedOrdLattice n = DataflowLattice n minBound f
+ where f _ (OldFact x) (NewFact y)
+ | x >= y = (NoChange, x)
+ | otherwise = (SomeChange, y)
+-}
+
+-- Custom node type we'll rewrite to. CmmAssign nodes to local
+-- registers are replaced with AssignLocal nodes.
+data WithRegUsage n e x where
+ -- Plain will not contain CmmAssign nodes immediately after
+ -- transformation, but as we rewrite assignments, we may have
+ -- assignments here: these are assignments that should not be
+ -- rewritten!
+ Plain :: n e x -> WithRegUsage n e x
+ AssignLocal :: LocalReg -> CmmExpr -> RegUsage -> WithRegUsage n O O
+
+instance UserOfLocalRegs (n e x) => UserOfLocalRegs (WithRegUsage n e x) where
+ foldRegsUsed f z (Plain n) = foldRegsUsed f z n
+ foldRegsUsed f z (AssignLocal _ e _) = foldRegsUsed f z e
+
+instance DefinerOfLocalRegs (n e x) => DefinerOfLocalRegs (WithRegUsage n e x) where
+ foldRegsDefd f z (Plain n) = foldRegsDefd f z n
+ foldRegsDefd f z (AssignLocal r _ _) = foldRegsDefd f z r
+
+instance NonLocal n => NonLocal (WithRegUsage n) where
+ entryLabel (Plain n) = entryLabel n
+ successors (Plain n) = successors n
+
+liftRegUsage :: Graph n e x -> Graph (WithRegUsage n) e x
+liftRegUsage = mapGraph Plain
+
+eraseRegUsage :: Graph (WithRegUsage CmmNode) e x -> Graph CmmNode e x
+eraseRegUsage = mapGraph f
+ where f :: WithRegUsage CmmNode e x -> CmmNode e x
+ f (AssignLocal l e _) = CmmAssign (CmmLocal l) e
+ f (Plain n) = n
+
+type UsageMap = UniqFM RegUsage
+
+usageLattice :: DataflowLattice UsageMap
+usageLattice = DataflowLattice "usage counts for registers" emptyUFM (joinUFM f)
+ where f _ (OldFact x) (NewFact y)
+ | x >= y = (NoChange, x)
+ | otherwise = (SomeChange, y)
+
+-- We reuse the names 'gen' and 'kill', although we're doing something
+-- slightly different from the Dragon Book
+usageTransfer :: BwdTransfer (WithRegUsage CmmNode) UsageMap
+usageTransfer = mkBTransfer3 first middle last
+ where first _ f = f
+ middle :: WithRegUsage CmmNode O O -> UsageMap -> UsageMap
+ middle n f = gen_kill n f
+ last :: WithRegUsage CmmNode O C -> FactBase UsageMap -> UsageMap
+ -- Checking for CmmCall/CmmForeignCall is unnecessary, because
+ -- spills/reloads have already occurred by the time we do this
+ -- analysis.
+ -- XXX Deprecated warning is puzzling: what label are we
+ -- supposed to use?
+ -- ToDo: With a bit more cleverness here, we can avoid
+ -- disappointment and heartbreak associated with the inability
+ -- to inline into CmmCall and CmmForeignCall by
+ -- over-estimating the usage to be ManyUse.
+ last n f = gen_kill n (joinOutFacts usageLattice n f)
+ gen_kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
+ gen_kill a = gen a . kill a
+ gen :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
+ gen a f = foldRegsUsed increaseUsage f a
+ kill :: WithRegUsage CmmNode e x -> UsageMap -> UsageMap
+ kill a f = foldRegsDefd delFromUFM f a
+ increaseUsage f r = addToUFM_C combine f r SingleUse
+ where combine _ _ = ManyUse
+
+usageRewrite :: BwdRewrite FuelUniqSM (WithRegUsage CmmNode) UsageMap
+usageRewrite = mkBRewrite3 first middle last
+ where first _ _ = return Nothing
+ middle :: Monad m => WithRegUsage CmmNode O O -> UsageMap -> m (Maybe (Graph (WithRegUsage CmmNode) O O))
+ middle (Plain (CmmAssign (CmmLocal l) e)) f
+ = return . Just
+ $ case lookupUFM f l of
+ Nothing -> emptyGraph
+ Just usage -> mkMiddle (AssignLocal l e usage)
+ middle _ _ = return Nothing
+ last _ _ = return Nothing
+
+type CmmGraphWithRegUsage = GenCmmGraph (WithRegUsage CmmNode)
+annotateUsage :: CmmGraph -> FuelUniqSM (CmmGraphWithRegUsage)
+annotateUsage vanilla_g =
+ let g = modifyGraph liftRegUsage vanilla_g
+ in liftM fst $ dataflowPassBwd g [(g_entry g, fact_bot usageLattice)] $
+ analRewBwd usageLattice usageTransfer usageRewrite
+
+----------------------------------------------------------------
+--- Assignment tracking
+
+-- The idea is to maintain a map of local registers do expressions,
+-- such that the value of that register is the same as the value of that
+-- expression at any given time. We can then do several things,
+-- as described by Assignment.
+
+-- Assignment describes the various optimizations that are valid
+-- at a given point in the program.
+data Assignment =
+-- This assignment can always be inlined. It is cheap or single-use.
+ AlwaysInline CmmExpr
+-- This assignment should be sunk down to its first use. (This will
+-- increase code size if the register is used in multiple control flow
+-- paths, but won't increase execution time, and the reduction of
+-- register pressure is worth it, I think.)
+ | AlwaysSink CmmExpr
+-- We cannot safely optimize occurrences of this local register. (This
+-- corresponds to top in the lattice structure.)
+ | NeverOptimize
+
+-- Extract the expression that is being assigned to
+xassign :: Assignment -> Maybe CmmExpr
+xassign (AlwaysInline e) = Just e
+xassign (AlwaysSink e) = Just e
+xassign NeverOptimize = Nothing
+
+-- Extracts the expression, but only if they're the same constructor
+xassign2 :: (Assignment, Assignment) -> Maybe (CmmExpr, CmmExpr)
+xassign2 (AlwaysInline e, AlwaysInline e') = Just (e, e')
+xassign2 (AlwaysSink e, AlwaysSink e') = Just (e, e')
+xassign2 _ = Nothing
+
+-- Note: We'd like to make decisions about "not optimizing" as soon as
+-- possible, because this will make running the transfer function more
+-- efficient.
+type AssignmentMap = UniqFM Assignment
+
+assignmentLattice :: DataflowLattice AssignmentMap
+assignmentLattice = DataflowLattice "assignments for registers" emptyUFM (joinUFM add)
+ where add _ (OldFact old) (NewFact new)
+ = case (old, new) of
+ (NeverOptimize, _) -> (NoChange, NeverOptimize)
+ (_, NeverOptimize) -> (SomeChange, NeverOptimize)
+ (xassign2 -> Just (e, e'))
+ | e == e' -> (NoChange, old)
+ | otherwise -> (SomeChange, NeverOptimize)
+ _ -> (SomeChange, NeverOptimize)
+
+-- Deletes sinks from assignment map, because /this/ is the place
+-- where it will be sunk to.
+deleteSinks :: UserOfLocalRegs n => n -> AssignmentMap -> AssignmentMap
+deleteSinks n m = foldRegsUsed (adjustUFM f) m n
+ where f (AlwaysSink _) = NeverOptimize
+ f old = old
+
+-- Invalidates any expressions that use a register.
+invalidateUsersOf :: CmmReg -> AssignmentMap -> AssignmentMap
+-- foldUFM_Directly :: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a
+invalidateUsersOf reg m = foldUFM_Directly f m m -- [foldUFM performance]
+ where f u (xassign -> Just e) m | reg `regUsedIn` e = addToUFM_Directly m u NeverOptimize
+ f _ _ m = m
+{- This requires the entire spine of the map to be continually rebuilt,
+ - which causes crazy memory usage!
+invalidateUsersOf reg = mapUFM (invalidateUsers' reg)
+ where invalidateUsers' reg (xassign -> Just e) | reg `regUsedIn` e = NeverOptimize
+ invalidateUsers' _ old = old
+-}
+
+-- Note [foldUFM performance]
+-- These calls to fold UFM no longer leak memory, but they do cause
+-- pretty killer amounts of allocation. So they'll be something to
+-- optimize; we need an algorithmic change to prevent us from having to
+-- traverse the /entire/ map continually.
+
+middleAssignment :: WithRegUsage CmmNode O O -> AssignmentMap -> AssignmentMap
+
+-- Algorithm for annotated assignments:
+-- 1. Delete any sinking assignments that were used by this instruction
+-- 2. Add the assignment to our list of valid local assignments with
+-- the correct optimization policy.
+-- 3. Look for all assignments that reference that register and
+-- invalidate them.
+middleAssignment n@(AssignLocal r e usage) assign
+ = invalidateUsersOf (CmmLocal r) . add . deleteSinks n $ assign
+ where add m = addToUFM m r
+ $ case usage of
+ SingleUse -> AlwaysInline e
+ ManyUse -> decide e
+ decide CmmLit{} = AlwaysInline e
+ decide CmmReg{} = AlwaysInline e
+ decide CmmLoad{} = AlwaysSink e
+ decide CmmStackSlot{} = AlwaysSink e
+ decide CmmMachOp{} = AlwaysSink e
+ -- We'll always inline simple operations on the global
+ -- registers, to reduce register pressure: Sp - 4 or Hp - 8
+ -- EZY: Justify this optimization more carefully.
+ decide CmmRegOff{} = AlwaysInline e
+
+-- Algorithm for unannotated assignments of global registers:
+-- 1. Delete any sinking assignments that were used by this instruction
+-- 2. Look for all assignments that reference this register and
+-- invalidate them.
+middleAssignment (Plain n@(CmmAssign reg@(CmmGlobal _) _)) assign
+ = invalidateUsersOf reg . deleteSinks n $ assign
+
+-- Algorithm for unannotated assignments of *local* registers: do
+-- nothing (it's a reload, so no state should have changed)
+middleAssignment (Plain (CmmAssign (CmmLocal _) _)) assign = assign
+
+-- Algorithm for stores:
+-- 1. Delete any sinking assignments that were used by this instruction
+-- 2. Look for all assignments that load from memory locations that
+-- were clobbered by this store and invalidate them.
+middleAssignment (Plain n@(CmmStore lhs rhs)) assign
+ = let m = deleteSinks n assign
+ in foldUFM_Directly f m m -- [foldUFM performance]
+ where f u (xassign -> Just x) m | (lhs, rhs) `clobbers` (u, x) = addToUFM_Directly m u NeverOptimize
+ f _ _ m = m
+{- Also leaky
+ = mapUFM_Directly p . deleteSinks n $ assign
+ -- ToDo: There's a missed opportunity here: even if a memory
+ -- access we're attempting to sink gets clobbered at some
+ -- location, it's still /better/ to sink it to right before the
+ -- point where it gets clobbered. How might we do this?
+ -- Unfortunately, it's too late to change the assignment...
+ where p r (xassign -> Just x) | (lhs, rhs) `clobbers` (r, x) = NeverOptimize
+ p _ old = old
+-}
+
+-- Assumption: Unsafe foreign calls don't clobber memory
+-- Since foreign calls clobber caller saved registers, we need
+-- invalidate any assignments that reference those global registers.
+-- This is kind of expensive. (One way to optimize this might be to
+-- store extra information about expressions that allow this and other
+-- checks to be done cheaply.)
+middleAssignment (Plain n@(CmmUnsafeForeignCall{})) assign
+ = deleteCallerSaves (foldRegsDefd (\m r -> addToUFM m r NeverOptimize) (deleteSinks n assign) n)
+ where deleteCallerSaves m = foldUFM_Directly f m m
+ f u (xassign -> Just x) m | wrapRecExpf g x False = addToUFM_Directly m u NeverOptimize
+ f _ _ m = m
+ g (CmmReg (CmmGlobal r)) _ | callerSaves r = True
+ g (CmmRegOff (CmmGlobal r) _) _ | callerSaves r = True
+ g _ b = b
+
+middleAssignment (Plain (CmmComment {})) assign
+ = assign
+
+-- Assumptions:
+-- * Writes using Hp do not overlap with any other memory locations
+-- (An important invariant being relied on here is that we only ever
+-- use Hp to allocate values on the heap, which appears to be the
+-- case given hpReg usage, and that our heap writing code doesn't
+-- do anything stupid like overlapping writes.)
+-- * Stack slots do not overlap with any other memory locations
+-- * Stack slots for different areas do not overlap
+-- * Stack slots within the same area and different offsets may
+-- overlap; we need to do a size check (see 'overlaps').
+-- * Register slots only overlap with themselves. (But this shouldn't
+-- happen in practice, because we'll fail to inline a reload across
+-- the next spill.)
+-- * Non stack-slot stores always conflict with each other. (This is
+-- not always the case; we could probably do something special for Hp)
+clobbers :: (CmmExpr, CmmExpr) -- (lhs, rhs) of clobbering CmmStore
+ -> (Unique, CmmExpr) -- (register, expression) that may be clobbered
+ -> Bool
+clobbers (CmmRegOff (CmmGlobal Hp) _, _) (_, _) = False
+clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False
+-- ToDo: Also catch MachOp case
+clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _)
+ | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?)
+clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
+ where f (CmmLoad (CmmStackSlot (CallArea a') o') t)
+ = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t))
+ f (CmmLoad e _) = containsStackSlot e
+ f (CmmMachOp _ es) = or (map f es)
+ f _ = False
+ -- Maybe there's an invariant broken if this actually ever
+ -- returns True
+ containsStackSlot (CmmLoad{}) = True -- load of a load, all bets off
+ containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)
+ containsStackSlot (CmmStackSlot{}) = True
+ containsStackSlot _ = False
+clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr
+ where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l'
+ f _ = False
+clobbers _ (_, e) = f e
+ where f (CmmLoad (CmmStackSlot _ _) _) = False
+ f (CmmLoad{}) = True -- conservative
+ f (CmmMachOp _ es) = or (map f es)
+ f _ = False
+
+-- Check for memory overlapping.
+-- Diagram:
+-- 4 8 12
+-- s -w- o
+-- [ I32 ]
+-- [ F64 ]
+-- s' -w'- o'
+type CallSubArea = (AreaId, Int, Int) -- area, offset, width
+overlaps :: CallSubArea -> CallSubArea -> Bool
+overlaps (a, _, _) (a', _, _) | a /= a' = False
+overlaps (_, o, w) (_, o', w') =
+ let s = o - w
+ s' = o' - w'
+ in (s' < o) && (s < o) -- Not LTE, because [ I32 ][ I32 ] is OK
+
+lastAssignment :: WithRegUsage CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)]
+lastAssignment (Plain (CmmCall _ (Just k) _ _ _)) assign = [(k, invalidateVolatile k assign)]
+lastAssignment (Plain (CmmForeignCall {succ=k})) assign = [(k, invalidateVolatile k assign)]
+lastAssignment l assign = map (\id -> (id, deleteSinks l assign)) $ successors l
+
+-- Invalidates any expressions that have volatile contents: essentially,
+-- all terminals volatile except for literals and loads of stack slots
+-- that do not correspond to the call area for 'k' (the current call
+-- area is volatile because overflow return parameters may be written
+-- there.)
+-- Note: mapUFM could be expensive, but hopefully block boundaries
+-- aren't too common. If it is a problem, replace with something more
+-- clever.
+invalidateVolatile :: BlockId -> AssignmentMap -> AssignmentMap
+invalidateVolatile k m = mapUFM p m
+ where p (AlwaysInline e) = if exp e then AlwaysInline e else NeverOptimize
+ where exp CmmLit{} = True
+ exp (CmmLoad (CmmStackSlot (CallArea (Young k')) _) _)
+ | k' == k = False
+ exp (CmmLoad (CmmStackSlot _ _) _) = True
+ exp (CmmMachOp _ es) = and (map exp es)
+ exp _ = False
+ p _ = NeverOptimize -- probably shouldn't happen with AlwaysSink
+
+assignmentTransfer :: FwdTransfer (WithRegUsage CmmNode) AssignmentMap
+assignmentTransfer = mkFTransfer3 (flip const) middleAssignment ((mkFactBase assignmentLattice .) . lastAssignment)
+
+-- Note [Soundness of inlining]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- In the Hoopl paper, the soundness condition on rewrite functions is
+-- described as follows:
+--
+-- "If it replaces a node n by a replacement graph g, then g must
+-- be observationally equivalent to n under the assumptions
+-- expressed by the incoming dataflow fact f. Moreover, analysis of
+-- g must produce output fact(s) that are at least as informative
+-- as the fact(s) produced by applying the transfer function to n."
+--
+-- We consider the second condition in more detail here. It says given
+-- the rewrite R(n, f) = g, then for any incoming fact f' consistent
+-- with f (f' >= f), then running the transfer function T(f', n) <= T(f', g).
+-- For inlining this is not necessarily the case:
+--
+-- n = "x = a + 2"
+-- f = f' = {a = y}
+-- g = "x = y + 2"
+-- T(f', n) = {x = a + 2, a = y}
+-- T(f', g) = {x = y + 2, a = y}
+--
+-- y + 2 and a + 2 are not obviously comparable, and a naive
+-- implementation of the lattice would say they are incomparable.
+-- At best, this means we may be over-conservative, at worst, it means
+-- we may not terminate.
+--
+-- However, in the original Lerner-Grove-Chambers paper, soundness and
+-- termination are separated, and only equivalence of facts is required
+-- for soundness. Monotonicity of the transfer function is not required
+-- for termination (as the calculation of least-upper-bound prevents
+-- this from being a problem), but it means we won't necessarily find
+-- the least-fixed point.
+
+-- Note [Coherency of annotations]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Is it possible for our usage annotations to become invalid after we
+-- start performing transformations? As the usage info only provides
+-- an upper bound, we only need to consider cases where the usages of
+-- a register may increase due to transformations--e.g. any reference
+-- to a local register in an AlwaysInline or AlwaysSink instruction, whose
+-- originating assignment was single use (we don't care about the
+-- many use case, because it is the top of the lattice). But such a
+-- case is not possible, because we always inline any single use
+-- register. QED.
+--
+-- TODO: A useful lint option would be to check this invariant that
+-- there is never a local register in the assignment map that is
+-- single-use.
+
+-- Note [Soundness of store rewriting]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Its soundness depends on the invariant that no assignment is made to
+-- the local register before its store is accessed. This is clearly
+-- true with unoptimized spill-reload code, and as the store will always
+-- be rewritten first (if possible), there is no chance of it being
+-- propagated down before getting written (possibly with incorrect
+-- values from the assignment map, due to reassignment of the local
+-- register.) This is probably not locally sound.
+
+assignmentRewrite :: FwdRewrite FuelUniqSM (WithRegUsage CmmNode) AssignmentMap
+assignmentRewrite = mkFRewrite3 first middle last
+ where
+ first _ _ = return Nothing
+ middle :: WithRegUsage CmmNode O O -> AssignmentMap -> GenCmmReplGraph (WithRegUsage CmmNode) O O
+ middle (Plain m) assign = return $ rewrite assign (precompute assign m) mkMiddle m
+ middle (AssignLocal l e u) assign = return $ rewriteLocal assign (precompute assign (CmmAssign (CmmLocal l) e)) l e u
+ last (Plain l) assign = return $ rewrite assign (precompute assign l) mkLast l
+ -- Tuple is (inline?, reloads for sinks)
+ precompute :: AssignmentMap -> CmmNode O x -> (Bool, [WithRegUsage CmmNode O O])
+ precompute assign n = foldRegsUsed f (False, []) n -- duplicates are harmless
+ where f (i, l) r = case lookupUFM assign r of
+ Just (AlwaysSink e) -> (i, (Plain (CmmAssign (CmmLocal r) e)):l)
+ Just (AlwaysInline _) -> (True, l)
+ Just NeverOptimize -> (i, l)
+ -- This case can show up when we have
+ -- limited optimization fuel.
+ Nothing -> (i, l)
+ rewrite :: AssignmentMap
+ -> (Bool, [WithRegUsage CmmNode O O])
+ -> (WithRegUsage CmmNode O x -> Graph (WithRegUsage CmmNode) O x)
+ -> CmmNode O x
+ -> Maybe (Graph (WithRegUsage CmmNode) O x)
+ rewrite _ (False, []) _ _ = Nothing
+ -- Note [CmmCall Inline Hack]
+ -- Conservative hack: don't do any inlining on what will
+ -- be translated into an OldCmm CmmCalls, since the code
+ -- produced here tends to be unproblematic and I need to write
+ -- lint passes to ensure that we don't put anything in the
+ -- arguments that could be construed as a global register by
+ -- some later translation pass. (For example, slots will turn
+ -- into dereferences of Sp). See [Register parameter passing].
+ -- ToDo: Fix this up to only bug out if all inlines were for
+ -- CmmExprs with global registers (we can't use the
+ -- straightforward mapExpDeep call, in this case.) ToDo: We miss
+ -- an opportunity here, where all possible inlinings should
+ -- instead be sunk.
+ rewrite _ (True, []) _ n | not (inlinable n) = Nothing -- see [CmmCall Inline Hack]
+ rewrite assign (i, xs) mk n = Just $ mkMiddles xs <*> mk (Plain (inline i assign n))
+
+ rewriteLocal :: AssignmentMap
+ -> (Bool, [WithRegUsage CmmNode O O])
+ -> LocalReg -> CmmExpr -> RegUsage
+ -> Maybe (Graph (WithRegUsage CmmNode) O O)
+ rewriteLocal _ (False, []) _ _ _ = Nothing
+ rewriteLocal assign (i, xs) l e u = Just $ mkMiddles xs <*> mkMiddle n'
+ where n' = AssignLocal l e' u
+ e' = if i then wrapRecExp (inlineExp assign) e else e
+ -- inlinable check omitted, since we can always inline into
+ -- assignments.
+
+ inline :: Bool -> AssignmentMap -> CmmNode e x -> CmmNode e x
+ inline False _ n = n
+ inline True _ n | not (inlinable n) = n -- see [CmmCall Inline Hack]
+ inline True assign n = mapExpDeep (inlineExp assign) n
+
+ inlineExp assign old@(CmmReg (CmmLocal r))
+ = case lookupUFM assign r of
+ Just (AlwaysInline x) -> x
+ _ -> old
+ inlineExp assign old@(CmmRegOff (CmmLocal r) i)
+ = case lookupUFM assign r of
+ Just (AlwaysInline x) ->
+ case x of
+ (CmmRegOff r' i') -> CmmRegOff r' (i + i')
+ _ -> CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
+ where rep = typeWidth (localRegType r)
+ _ -> old
+ -- See Note [Soundness of store rewriting]
+ inlineExp assign old@(CmmLoad (CmmStackSlot (RegSlot r) _) _)
+ = case lookupUFM assign r of
+ Just (AlwaysInline x) -> x
+ _ -> old
+ inlineExp _ old = old
+
+ inlinable :: CmmNode e x -> Bool
+ inlinable (CmmCall{}) = False
+ inlinable (CmmForeignCall{}) = False
+ inlinable (CmmUnsafeForeignCall{}) = False
+ inlinable _ = True
+
+-- Need to interleave this with inlining, because machop folding results
+-- in literals, which we can inline more aggressively, and inlining
+-- gives us opportunities for more folding. However, we don't need any
+-- facts to do MachOp folding.
+machOpFoldRewrite :: FwdRewrite FuelUniqSM (WithRegUsage CmmNode) a
+machOpFoldRewrite = mkFRewrite3 first middle last
+ where first _ _ = return Nothing
+ middle :: WithRegUsage CmmNode O O -> a -> GenCmmReplGraph (WithRegUsage CmmNode) O O
+ middle (Plain m) _ = return (fmap (mkMiddle . Plain) (foldNode m))
+ middle (AssignLocal l e r) _ = return (fmap f (wrapRecExpM foldExp e))
+ where f e' = mkMiddle (AssignLocal l e' r)
+ last :: WithRegUsage CmmNode O C -> a -> GenCmmReplGraph (WithRegUsage CmmNode) O C
+ last (Plain l) _ = return (fmap (mkLast . Plain) (foldNode l))
+ foldNode :: CmmNode e x -> Maybe (CmmNode e x)
+ foldNode n = mapExpDeepM foldExp n
+ foldExp (CmmMachOp op args) = cmmMachOpFoldM op args
+ foldExp _ = Nothing
+
+-- ToDo: Outputable instance for UsageMap and AssignmentMap
diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs
index 2dcfb027a3..3033e7b421 100644
--- a/compiler/cmm/CmmSpillReload.hs
+++ b/compiler/cmm/CmmSpillReload.hs
@@ -1,22 +1,14 @@
-{-# LANGUAGE GADTs, NoMonoLocalBinds, FlexibleContexts, ViewPatterns #-}
+{-# LANGUAGE GADTs, NoMonoLocalBinds, FlexibleContexts #-}
-- Norman likes local bindings
-- If this module lives on I'd like to get rid of this flag in due course
{-# OPTIONS_GHC -fno-warn-warnings-deprecations #-}
+
+-- TODO: Get rid of this flag:
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
-#if __GLASGOW_HASKELL__ >= 701
--- GHC 7.0.1 improved incomplete pattern warnings with GADTs
-{-# OPTIONS_GHC -fwarn-incomplete-patterns #-}
-#endif
module CmmSpillReload
- ( DualLive(..)
- , dualLiveLattice, dualLiveTransfers, dualLiveness
- --, insertSpillsAndReloads --- XXX todo check live-in at entry against formals
- , dualLivenessWithInsertion
-
- , rewriteAssignments
- , removeDeadAssignmentsAndReloads
+ ( dualLivenessWithInsertion
)
where
@@ -25,14 +17,11 @@ import Cmm
import CmmExpr
import CmmLive
import OptimizationFuel
-import StgCmmUtils
import Control.Monad
import Outputable hiding (empty)
import qualified Outputable as PP
import UniqSet
-import UniqFM
-import Unique
import Compiler.Hoopl hiding (Unique)
import Data.Maybe
@@ -40,38 +29,36 @@ import Prelude hiding (succ, zip)
{- Note [Overview of spill/reload]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The point of this module is to insert spills and reloads to
-establish the invariant that at a call (or at any proc point with
-an established protocol) all live variables not expected in
-registers are sitting on the stack. We use a backward analysis to
-insert spills and reloads. It should be followed by a
-forward transformation to sink reloads as deeply as possible, so as
-to reduce register pressure.
+The point of this module is to insert spills and reloads to establish
+the invariant that at a call or any proc point with an established
+protocol all live variables not expected in registers are sitting on the
+stack. We use a backward dual liveness analysis (both traditional
+register liveness as well as register slot liveness on the stack) to
+insert spills and reloads. It should be followed by a forward
+transformation to sink reloads as deeply as possible, so as to reduce
+register pressure: this transformation is performed by
+CmmRewriteAssignments.
A variable can be expected to be live in a register, live on the
stack, or both. This analysis ensures that spills and reloads are
inserted as needed to make sure that every live variable needed
-after a call is available on the stack. Spills are pushed back to
-their reaching definitions, but reloads are dropped wherever needed
-and will have to be sunk by a later forward transformation.
+after a call is available on the stack. Spills are placed immediately
+after their reaching definitions, but reloads are placed immediately
+after a return from a call (the entry point.)
+
+Note that we offer no guarantees about the consistency of the value
+in memory and the value in the register, except that they are
+equal across calls/procpoints. If the variable is changed, this
+mapping breaks: but as the original value of the register may still
+be useful in a different context, the memory location is not updated.
-}
data DualLive = DualLive { on_stack :: RegSet, in_regs :: RegSet }
-dualUnion :: DualLive -> DualLive -> DualLive
-dualUnion (DualLive s r) (DualLive s' r') =
- DualLive (s `unionUniqSets` s') (r `unionUniqSets` r')
-
-dualUnionList :: [DualLive] -> DualLive
-dualUnionList ls = DualLive ss rs
- where ss = unionManyUniqSets $ map on_stack ls
- rs = unionManyUniqSets $ map in_regs ls
-
changeStack, changeRegs :: (RegSet -> RegSet) -> DualLive -> DualLive
changeStack f live = live { on_stack = f (on_stack live) }
changeRegs f live = live { in_regs = f (in_regs live) }
-
dualLiveLattice :: DataflowLattice DualLive
dualLiveLattice = DataflowLattice "variables live in registers and on stack" empty add
where empty = DualLive emptyRegSet emptyRegSet
@@ -85,21 +72,24 @@ dualLivenessWithInsertion :: BlockSet -> CmmGraph -> FuelUniqSM CmmGraph
dualLivenessWithInsertion procPoints g =
liftM fst $ dataflowPassBwd g [] $ analRewBwd dualLiveLattice
(dualLiveTransfers (g_entry g) procPoints)
- (insertSpillAndReloadRewrites g procPoints)
-
-dualLiveness :: BlockSet -> CmmGraph -> FuelUniqSM (BlockEnv DualLive)
-dualLiveness procPoints g =
- liftM snd $ dataflowPassBwd g [] $ analBwd dualLiveLattice $ dualLiveTransfers (g_entry g) procPoints
+ (insertSpillsAndReloads g procPoints)
+
+-- Note [Live registers on entry to procpoints]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Remember that the transfer function is only ever run on the rewritten
+-- version of a graph, and the rewrite function for spills and reloads
+-- enforces the invariant that no local registers are live on entry to
+-- a procpoint. Accordingly, we check for this invariant here. An old
+-- version of this code incorrectly claimed that any live registers were
+-- live on the stack before entering the function: this is wrong, but
+-- didn't cause bugs because it never actually was invoked.
dualLiveTransfers :: BlockId -> BlockSet -> (BwdTransfer CmmNode DualLive)
dualLiveTransfers entry procPoints = mkBTransfer3 first middle last
where first :: CmmNode C O -> DualLive -> DualLive
- first (CmmEntry id) live = check live id $ -- live at procPoint => spill
- if id /= entry && setMember id procPoints
- then DualLive { on_stack = on_stack live `plusRegSet` in_regs live
- , in_regs = emptyRegSet }
- else live
- where check live id x = if id == entry then noLiveOnEntry id (in_regs live) x else x
+ first (CmmEntry id) live -- See Note [Live registers on entry to procpoints]
+ | id == entry || setMember id procPoints = noLiveOnEntry id (in_regs live) live
+ | otherwise = live
middle :: CmmNode O O -> DualLive -> DualLive
middle m = changeStack updSlots
@@ -112,548 +102,52 @@ dualLiveTransfers entry procPoints = mkBTransfer3 first middle last
spill live _ = live
reload live s@(RegSlot r, _, _) = check s $ extendRegSet live r
reload live _ = live
+ -- Ensure the assignment refers to the entirety of the
+ -- register slot (and not just a slice).
check (RegSlot (LocalReg _ ty), o, w) x
| o == w && w == widthInBytes (typeWidth ty) = x
- check _ _ = panic "middleDualLiveness unsupported: slices"
+ check _ _ = panic "dualLiveTransfers: slices unsupported"
+
+ -- Register analysis is identical to liveness analysis from CmmLive.
last :: CmmNode O C -> FactBase DualLive -> DualLive
- last l fb = case l of
- CmmBranch id -> lkp id
- l@(CmmCall {cml_cont=Nothing}) -> changeRegs (gen l . kill l) empty
- l@(CmmCall {cml_cont=Just k}) -> call l k
- l@(CmmForeignCall {succ=k}) -> call l k
- l@(CmmCondBranch _ t f) -> changeRegs (gen l . kill l) $ dualUnion (lkp t) (lkp f)
- l@(CmmSwitch _ tbl) -> changeRegs (gen l . kill l) $ dualUnionList $ map lkp (catMaybes tbl)
+ last l fb = changeRegs (gen_kill l) $ case l of
+ CmmCall {cml_cont=Nothing} -> empty
+ CmmCall {cml_cont=Just k} -> keep_stack_only k
+ CmmForeignCall {succ=k} -> keep_stack_only k
+ _ -> joinOutFacts dualLiveLattice l fb
where empty = fact_bot dualLiveLattice
- lkp id = empty `fromMaybe` lookupFact id fb
- call l k = DualLive (on_stack (lkp k)) (gen l emptyRegSet)
-
-gen :: UserOfLocalRegs a => a -> RegSet -> RegSet
-gen a live = foldRegsUsed extendRegSet live a
-kill :: DefinerOfLocalRegs a => a -> RegSet -> RegSet
-kill a live = foldRegsDefd deleteFromRegSet live a
+ lkp k = fromMaybe empty (lookupFact k fb)
+ keep_stack_only k = DualLive (on_stack (lkp k)) emptyRegSet
-insertSpillAndReloadRewrites :: CmmGraph -> BlockSet -> CmmBwdRewrite DualLive
-insertSpillAndReloadRewrites graph procPoints = deepBwdRw3 first middle nothing
+insertSpillsAndReloads :: CmmGraph -> BlockSet -> CmmBwdRewrite DualLive
+insertSpillsAndReloads graph procPoints = deepBwdRw3 first middle nothing
-- Beware: deepBwdRw with one polymorphic function seems more reasonable here,
-- but GHC miscompiles it, see bug #4044.
where first :: CmmNode C O -> Fact O DualLive -> CmmReplGraph C O
first e@(CmmEntry id) live = return $
if id /= (g_entry graph) && setMember id procPoints then
- case map reload (uniqSetToList spill_regs) of
+ case map reload (uniqSetToList (in_regs live)) of
[] -> Nothing
is -> Just $ mkFirst e <*> mkMiddles is
else Nothing
- where
- -- If we are splitting procedures, we need the LastForeignCall
- -- to spill its results to the stack because they will only
- -- be used by a separate procedure (so they can't stay in LocalRegs).
- splitting = True
- spill_regs = if splitting then in_regs live
- else in_regs live `minusRegSet` defs
- defs = case mapLookup id firstDefs of
- Just defs -> defs
- Nothing -> emptyRegSet
- -- A LastForeignCall may contain some definitions, which take place
- -- on return from the function call. Therefore, we build a map (firstDefs)
- -- from BlockId to the set of variables defined on return to the BlockId.
- firstDefs = mapFold addLive emptyBlockMap (toBlockMap graph)
- addLive :: CmmBlock -> BlockEnv RegSet -> BlockEnv RegSet
- addLive b env = case lastNode b of
- CmmForeignCall {succ=k, res=defs} -> add k (mkRegSet defs) env
- _ -> env
- add bid defs env = mapInsert bid defs'' env
- where defs'' = case mapLookup bid env of
- Just defs' -> timesRegSet defs defs'
- Nothing -> defs
+ -- EZY: There was some dead code for handling the case where
+ -- we were not splitting procedures. Check Git history if
+ -- you're interested (circa e26ea0f41).
middle :: CmmNode O O -> Fact O DualLive -> CmmReplGraph O O
+ -- Don't add spills next to reloads.
middle (CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg') _) _)) _ | reg == reg' = return Nothing
- middle m@(CmmAssign (CmmLocal reg) _) live = return $
- if reg `elemRegSet` on_stack live then -- must spill
- my_trace "Spilling" (f4sep [text "spill" <+> ppr reg,
- text "after"{-, ppr m-}]) $
- Just $ mkMiddles $ [m, spill reg]
- else Nothing
+ -- Spill if register is live on stack.
+ middle m@(CmmAssign (CmmLocal reg) _) live
+ | reg `elemRegSet` on_stack live = return (Just (mkMiddles [m, spill reg]))
middle _ _ = return Nothing
nothing _ _ = return Nothing
-regSlot :: LocalReg -> CmmExpr
-regSlot r = CmmStackSlot (RegSlot r) (widthInBytes $ typeWidth $ localRegType r)
-
spill, reload :: LocalReg -> CmmNode O O
spill r = CmmStore (regSlot r) (CmmReg $ CmmLocal r)
reload r = CmmAssign (CmmLocal r) (CmmLoad (regSlot r) $ localRegType r)
-removeDeadAssignmentsAndReloads :: BlockSet -> CmmGraph -> FuelUniqSM CmmGraph
-removeDeadAssignmentsAndReloads procPoints g =
- liftM fst $ dataflowPassBwd g [] $ analRewBwd dualLiveLattice
- (dualLiveTransfers (g_entry g) procPoints)
- rewrites
- where rewrites = deepBwdRw3 nothing middle nothing
- -- Beware: deepBwdRw with one polymorphic function seems more reasonable here,
- -- but GHC panics while compiling, see bug #4045.
- middle :: CmmNode O O -> Fact O DualLive -> CmmReplGraph O O
- middle (CmmAssign (CmmLocal reg') _) live | not (reg' `elemRegSet` in_regs live) = return $ Just emptyGraph
- -- XXX maybe this should be somewhere else...
- middle (CmmAssign lhs (CmmReg rhs)) _ | lhs == rhs = return $ Just emptyGraph
- middle (CmmStore lhs (CmmLoad rhs _)) _ | lhs == rhs = return $ Just emptyGraph
- middle _ _ = return Nothing
-
- nothing _ _ = return Nothing
-
-----------------------------------------------------------------
---- Usage information
-
--- We decorate all register assignments with usage information,
--- that is, the maximum number of times the register is referenced
--- while it is live along all outgoing control paths. There are a few
--- subtleties here:
---
--- - If a register goes dead, and then becomes live again, the usages
--- of the disjoint live range don't count towards the original range.
---
--- a = 1; // used once
--- b = a;
--- a = 2; // used once
--- c = a;
---
--- - A register may be used multiple times, but these all reside in
--- different control paths, such that any given execution only uses
--- it once. In that case, the usage count may still be 1.
---
--- a = 1; // used once
--- if (b) {
--- c = a + 3;
--- } else {
--- c = a + 1;
--- }
---
--- This policy corresponds to an inlining strategy that does not
--- duplicate computation but may increase binary size.
---
--- - If we naively implement a usage count, we have a counting to
--- infinity problem across joins. Furthermore, knowing that
--- something is used 2 or more times in one runtime execution isn't
--- particularly useful for optimizations (inlining may be beneficial,
--- but there's no way of knowing that without register pressure
--- information.)
---
--- while (...) {
--- // first iteration, b used once
--- // second iteration, b used twice
--- // third iteration ...
--- a = b;
--- }
--- // b used zero times
---
--- There is an orthogonal question, which is that for every runtime
--- execution, the register may be used only once, but if we inline it
--- in every conditional path, the binary size might increase a lot.
--- But tracking this information would be tricky, because it violates
--- the finite lattice restriction Hoopl requires for termination;
--- we'd thus need to supply an alternate proof, which is probably
--- something we should defer until we actually have an optimization
--- that would take advantage of this. (This might also interact
--- strangely with liveness information.)
---
--- a = ...;
--- // a is used one time, but in X different paths
--- case (b) of
--- 1 -> ... a ...
--- 2 -> ... a ...
--- 3 -> ... a ...
--- ...
---
--- This analysis is very similar to liveness analysis; we just keep a
--- little extra info. (Maybe we should move it to CmmLive, and subsume
--- the old liveness analysis.)
-
-data RegUsage = SingleUse | ManyUse
- deriving (Ord, Eq, Show)
--- Absence in map = ZeroUse
-
-{-
--- minBound is bottom, maxBound is top, least-upper-bound is max
--- ToDo: Put this in Hoopl. Note that this isn't as useful as I
--- originally hoped, because you usually want to leave out the bottom
--- element when you have things like this put in maps. Maybe f is
--- useful on its own as a combining function.
-boundedOrdLattice :: (Bounded a, Ord a) => String -> DataflowLattice a
-boundedOrdLattice n = DataflowLattice n minBound f
- where f _ (OldFact x) (NewFact y)
- | x >= y = (NoChange, x)
- | otherwise = (SomeChange, y)
--}
-
--- Custom node type we'll rewrite to. CmmAssign nodes to local
--- registers are replaced with AssignLocal nodes.
-data WithRegUsage n e x where
- Plain :: n e x -> WithRegUsage n e x
- AssignLocal :: LocalReg -> CmmExpr -> RegUsage -> WithRegUsage n O O
-
-instance UserOfLocalRegs (n e x) => UserOfLocalRegs (WithRegUsage n e x) where
- foldRegsUsed f z (Plain n) = foldRegsUsed f z n
- foldRegsUsed f z (AssignLocal _ e _) = foldRegsUsed f z e
-
-instance DefinerOfLocalRegs (n e x) => DefinerOfLocalRegs (WithRegUsage n e x) where
- foldRegsDefd f z (Plain n) = foldRegsDefd f z n
- foldRegsDefd f z (AssignLocal r _ _) = foldRegsDefd f z r
-
-instance NonLocal n => NonLocal (WithRegUsage n) where
- entryLabel (Plain n) = entryLabel n
- successors (Plain n) = successors n
-
-liftRegUsage :: Graph n e x -> Graph (WithRegUsage n) e x
-liftRegUsage = mapGraph Plain
-
-eraseRegUsage :: Graph (WithRegUsage CmmNode) e x -> Graph CmmNode e x
-eraseRegUsage = mapGraph f
- where f :: WithRegUsage CmmNode e x -> CmmNode e x
- f (AssignLocal l e _) = CmmAssign (CmmLocal l) e
- f (Plain n) = n
-
-type UsageMap = UniqFM RegUsage
-
-usageLattice :: DataflowLattice UsageMap
-usageLattice = DataflowLattice "usage counts for registers" emptyUFM (joinUFM f)
- where f _ (OldFact x) (NewFact y)
- | x >= y = (NoChange, x)
- | otherwise = (SomeChange, y)
-
--- We reuse the names 'gen' and 'kill', although we're doing something
--- slightly different from the Dragon Book
-usageTransfer :: BwdTransfer (WithRegUsage CmmNode) UsageMap
-usageTransfer = mkBTransfer3 first middle last
- where first _ f = f
- middle :: WithRegUsage CmmNode O O -> UsageMap -> UsageMap
- middle n f = gen_kill n f
- last :: WithRegUsage CmmNode O C -> FactBase UsageMap -> UsageMap
- -- Checking for CmmCall/CmmForeignCall is unnecessary, because
- -- spills/reloads have already occurred by the time we do this
- -- analysis.
- -- XXX Deprecated warning is puzzling: what label are we
- -- supposed to use?
- -- ToDo: With a bit more cleverness here, we can avoid
- -- disappointment and heartbreak associated with the inability
- -- to inline into CmmCall and CmmForeignCall by
- -- over-estimating the usage to be ManyUse.
- last n f = gen_kill n (joinOutFacts usageLattice n f)
- gen_kill a = gen a . kill a
- gen a f = foldRegsUsed increaseUsage f a
- kill a f = foldRegsDefd delFromUFM f a
- increaseUsage f r = addToUFM_C combine f r SingleUse
- where combine _ _ = ManyUse
-
-usageRewrite :: BwdRewrite FuelUniqSM (WithRegUsage CmmNode) UsageMap
-usageRewrite = mkBRewrite3 first middle last
- where first _ _ = return Nothing
- middle :: Monad m => WithRegUsage CmmNode O O -> UsageMap -> m (Maybe (Graph (WithRegUsage CmmNode) O O))
- middle (Plain (CmmAssign (CmmLocal l) e)) f
- = return . Just
- $ case lookupUFM f l of
- Nothing -> emptyGraph
- Just usage -> mkMiddle (AssignLocal l e usage)
- middle _ _ = return Nothing
- last _ _ = return Nothing
-
-type CmmGraphWithRegUsage = GenCmmGraph (WithRegUsage CmmNode)
-annotateUsage :: CmmGraph -> FuelUniqSM (CmmGraphWithRegUsage)
-annotateUsage vanilla_g =
- let g = modifyGraph liftRegUsage vanilla_g
- in liftM fst $ dataflowPassBwd g [(g_entry g, fact_bot usageLattice)] $
- analRewBwd usageLattice usageTransfer usageRewrite
-
-----------------------------------------------------------------
---- Assignment tracking
-
--- The idea is to maintain a map of local registers do expressions,
--- such that the value of that register is the same as the value of that
--- expression at any given time. We can then do several things,
--- as described by Assignment.
-
--- Assignment describes the various optimizations that are valid
--- at a given point in the program.
-data Assignment =
--- This assignment can always be inlined. It is cheap or single-use.
- AlwaysInline CmmExpr
--- This assignment should be sunk down to its first use. (This will
--- increase code size if the register is used in multiple control flow
--- paths, but won't increase execution time, and the reduction of
--- register pressure is worth it.)
- | AlwaysSink CmmExpr
--- We cannot safely optimize occurrences of this local register. (This
--- corresponds to top in the lattice structure.)
- | NeverOptimize
-
--- Extract the expression that is being assigned to
-xassign :: Assignment -> Maybe CmmExpr
-xassign (AlwaysInline e) = Just e
-xassign (AlwaysSink e) = Just e
-xassign NeverOptimize = Nothing
-
--- Extracts the expression, but only if they're the same constructor
-xassign2 :: (Assignment, Assignment) -> Maybe (CmmExpr, CmmExpr)
-xassign2 (AlwaysInline e, AlwaysInline e') = Just (e, e')
-xassign2 (AlwaysSink e, AlwaysSink e') = Just (e, e')
-xassign2 _ = Nothing
-
--- Note: We'd like to make decisions about "not optimizing" as soon as
--- possible, because this will make running the transfer function more
--- efficient.
-type AssignmentMap = UniqFM Assignment
-
-assignmentLattice :: DataflowLattice AssignmentMap
-assignmentLattice = DataflowLattice "assignments for registers" emptyUFM (joinUFM add)
- where add _ (OldFact old) (NewFact new)
- = case (old, new) of
- (NeverOptimize, _) -> (NoChange, NeverOptimize)
- (_, NeverOptimize) -> (SomeChange, NeverOptimize)
- (xassign2 -> Just (e, e'))
- | e == e' -> (NoChange, old)
- | otherwise -> (SomeChange, NeverOptimize)
- _ -> (SomeChange, NeverOptimize)
-
--- Deletes sinks from assignment map, because /this/ is the place
--- where it will be sunk to.
-deleteSinks :: UserOfLocalRegs n => n -> AssignmentMap -> AssignmentMap
-deleteSinks n m = foldRegsUsed (adjustUFM f) m n
- where f (AlwaysSink _) = NeverOptimize
- f old = old
-
--- Invalidates any expressions that use a register.
-invalidateUsersOf :: CmmReg -> AssignmentMap -> AssignmentMap
--- foldUFM_Directly :: (Unique -> elt -> a -> a) -> a -> UniqFM elt -> a
-invalidateUsersOf reg m = foldUFM_Directly f m m -- [foldUFM performance]
- where f u (xassign -> Just e) m | reg `regUsedIn` e = addToUFM_Directly m u NeverOptimize
- f _ _ m = m
-{- This requires the entire spine of the map to be continually rebuilt,
- - which causes crazy memory usage!
-invalidateUsersOf reg = mapUFM (invalidateUsers' reg)
- where invalidateUsers' reg (xassign -> Just e) | reg `regUsedIn` e = NeverOptimize
- invalidateUsers' _ old = old
--}
-
--- Note [foldUFM performance]
--- These calls to fold UFM no longer leak memory, but they do cause
--- pretty killer amounts of allocation. So they'll be something to
--- optimize; we need an algorithmic change to prevent us from having to
--- traverse the /entire/ map continually.
-
-middleAssignment :: WithRegUsage CmmNode O O -> AssignmentMap -> AssignmentMap
-
--- Algorithm for annotated assignments:
--- 1. Delete any sinking assignments that were used by this instruction
--- 2. Add the assignment to our list of valid local assignments with
--- the correct optimization policy.
--- 3. Look for all assignments that reference that register and
--- invalidate them.
-middleAssignment n@(AssignLocal r e usage) assign
- = invalidateUsersOf (CmmLocal r) . add . deleteSinks n $ assign
- where add m = addToUFM m r
- $ case usage of
- SingleUse -> AlwaysInline e
- ManyUse -> decide e
- decide CmmLit{} = AlwaysInline e
- decide CmmReg{} = AlwaysInline e
- decide CmmLoad{} = AlwaysSink e
- decide CmmStackSlot{} = AlwaysSink e
- decide CmmMachOp{} = AlwaysSink e
- -- We'll always inline simple operations on the global
- -- registers, to reduce register pressure: Sp - 4 or Hp - 8
- -- EZY: Justify this optimization more carefully.
- decide CmmRegOff{} = AlwaysInline e
-
--- Algorithm for unannotated assignments of global registers:
--- 1. Delete any sinking assignments that were used by this instruction
--- 2. Look for all assignments that reference this register and
--- invalidate them.
-middleAssignment (Plain n@(CmmAssign reg@(CmmGlobal _) _)) assign
- = invalidateUsersOf reg . deleteSinks n $ assign
-
--- Algorithm for unannotated assignments of *local* registers: do
--- nothing (it's a reload, so no state should have changed)
-middleAssignment (Plain (CmmAssign (CmmLocal _) _)) assign = assign
-
--- Algorithm for stores:
--- 1. Delete any sinking assignments that were used by this instruction
--- 2. Look for all assignments that load from memory locations that
--- were clobbered by this store and invalidate them.
-middleAssignment (Plain n@(CmmStore lhs rhs)) assign
- = let m = deleteSinks n assign
- in foldUFM_Directly f m m -- [foldUFM performance]
- where f u (xassign -> Just x) m | (lhs, rhs) `clobbers` (u, x) = addToUFM_Directly m u NeverOptimize
- f _ _ m = m
-{- Also leaky
- = mapUFM_Directly p . deleteSinks n $ assign
- -- ToDo: There's a missed opportunity here: even if a memory
- -- access we're attempting to sink gets clobbered at some
- -- location, it's still /better/ to sink it to right before the
- -- point where it gets clobbered. How might we do this?
- -- Unfortunately, it's too late to change the assignment...
- where p r (xassign -> Just x) | (lhs, rhs) `clobbers` (r, x) = NeverOptimize
- p _ old = old
--}
-
--- Assumption: Unsafe foreign calls don't clobber memory
--- Since foreign calls clobber caller saved registers, we need
--- invalidate any assignments that reference those global registers.
--- This is kind of expensive. (One way to optimize this might be to
--- store extra information about expressions that allow this and other
--- checks to be done cheaply.)
-middleAssignment (Plain n@(CmmUnsafeForeignCall{})) assign
- = deleteCallerSaves (foldRegsDefd (\m r -> addToUFM m r NeverOptimize) (deleteSinks n assign) n)
- where deleteCallerSaves m = foldUFM_Directly f m m
- f u (xassign -> Just x) m | wrapRecExpf g x False = addToUFM_Directly m u NeverOptimize
- f _ _ m = m
- g (CmmReg (CmmGlobal r)) _ | callerSaves r = True
- g (CmmRegOff (CmmGlobal r) _) _ | callerSaves r = True
- g _ b = b
-
-middleAssignment (Plain (CmmComment {})) assign
- = assign
-
--- Assumptions:
--- * Writes using Hp do not overlap with any other memory locations
--- (An important invariant being relied on here is that we only ever
--- use Hp to allocate values on the heap, which appears to be the
--- case given hpReg usage, and that our heap writing code doesn't
--- do anything stupid like overlapping writes.)
--- * Stack slots do not overlap with any other memory locations
--- * Stack slots for different areas do not overlap
--- * Stack slots within the same area and different offsets may
--- overlap; we need to do a size check (see 'overlaps').
--- * Register slots only overlap with themselves. (But this shouldn't
--- happen in practice, because we'll fail to inline a reload across
--- the next spill.)
--- * Non stack-slot stores always conflict with each other. (This is
--- not always the case; we could probably do something special for Hp)
-clobbers :: (CmmExpr, CmmExpr) -- (lhs, rhs) of clobbering CmmStore
- -> (Unique, CmmExpr) -- (register, expression) that may be clobbered
- -> Bool
-clobbers (CmmRegOff (CmmGlobal Hp) _, _) (_, _) = False
-clobbers (CmmReg (CmmGlobal Hp), _) (_, _) = False
--- ToDo: Also catch MachOp case
-clobbers (ss@CmmStackSlot{}, CmmReg (CmmLocal r)) (u, CmmLoad (ss'@CmmStackSlot{}) _)
- | getUnique r == u, ss == ss' = False -- No-op on the stack slot (XXX: Do we need this special case?)
-clobbers (CmmStackSlot (CallArea a) o, rhs) (_, expr) = f expr
- where f (CmmLoad (CmmStackSlot (CallArea a') o') t)
- = (a, o, widthInBytes (cmmExprWidth rhs)) `overlaps` (a', o', widthInBytes (typeWidth t))
- f (CmmLoad e _) = containsStackSlot e
- f (CmmMachOp _ es) = or (map f es)
- f _ = False
- -- Maybe there's an invariant broken if this actually ever
- -- returns True
- containsStackSlot (CmmLoad{}) = True -- load of a load, all bets off
- containsStackSlot (CmmMachOp _ es) = or (map containsStackSlot es)
- containsStackSlot (CmmStackSlot{}) = True
- containsStackSlot _ = False
-clobbers (CmmStackSlot (RegSlot l) _, _) (_, expr) = f expr
- where f (CmmLoad (CmmStackSlot (RegSlot l') _) _) = l == l'
- f _ = False
-clobbers _ (_, e) = f e
- where f (CmmLoad (CmmStackSlot _ _) _) = False
- f (CmmLoad{}) = True -- conservative
- f (CmmMachOp _ es) = or (map f es)
- f _ = False
-
--- Check for memory overlapping.
--- Diagram:
--- 4 8 12
--- s -w- o
--- [ I32 ]
--- [ F64 ]
--- s' -w'- o'
-type CallSubArea = (AreaId, Int, Int) -- area, offset, width
-overlaps :: CallSubArea -> CallSubArea -> Bool
-overlaps (a, _, _) (a', _, _) | a /= a' = False
-overlaps (_, o, w) (_, o', w') =
- let s = o - w
- s' = o' - w'
- in (s' < o) && (s < o) -- Not LTE, because [ I32 ][ I32 ] is OK
-
-lastAssignment :: WithRegUsage CmmNode O C -> AssignmentMap -> [(Label, AssignmentMap)]
--- Variables are dead across calls, so invalidating all mappings is justified
-lastAssignment (Plain (CmmCall _ (Just k) _ _ _)) assign = [(k, mapUFM (const NeverOptimize) assign)]
-lastAssignment (Plain (CmmForeignCall {succ=k})) assign = [(k, mapUFM (const NeverOptimize) assign)]
-lastAssignment l assign = map (\id -> (id, deleteSinks l assign)) $ successors l
-
-assignmentTransfer :: FwdTransfer (WithRegUsage CmmNode) AssignmentMap
-assignmentTransfer = mkFTransfer3 (flip const) middleAssignment ((mkFactBase assignmentLattice .) . lastAssignment)
-
-assignmentRewrite :: FwdRewrite FuelUniqSM (WithRegUsage CmmNode) AssignmentMap
-assignmentRewrite = mkFRewrite3 first middle last
- where
- first _ _ = return Nothing
- middle :: WithRegUsage CmmNode O O -> AssignmentMap -> GenCmmReplGraph (WithRegUsage CmmNode) O O
- middle (Plain m) assign = return $ rewrite assign (precompute assign m) mkMiddle m
- middle (AssignLocal l e u) assign = return $ rewriteLocal assign (precompute assign (CmmAssign (CmmLocal l) e)) mkMiddle l e u
- last (Plain l) assign = return $ rewrite assign (precompute assign l) mkLast l
- -- Tuple is (inline?, reloads)
- precompute assign n = foldRegsUsed f (False, []) n -- duplicates are harmless
- where f (i, l) r = case lookupUFM assign r of
- Just (AlwaysSink e) -> (i, (Plain (CmmAssign (CmmLocal r) e)):l)
- Just (AlwaysInline _) -> (True, l)
- Just NeverOptimize -> (i, l)
- -- This case can show up when we have
- -- limited optimization fuel.
- Nothing -> (i, l)
- rewrite _ (False, []) _ _ = Nothing
- -- Note [CmmCall Inline Hack]
- -- Conservative hack: don't do any inlining on what will
- -- be translated into an OldCmm CmmCalls, since the code
- -- produced here tends to be unproblematic and I need to write
- -- lint passes to ensure that we don't put anything in the
- -- arguments that could be construed as a global register by
- -- some later translation pass. (For example, slots will turn
- -- into dereferences of Sp). See [Register parameter passing].
- -- ToDo: Fix this up to only bug out if all inlines were for
- -- CmmExprs with global registers (we can't use the
- -- straightforward mapExpDeep call, in this case.) ToDo: We miss
- -- an opportunity here, where all possible inlinings should
- -- instead be sunk.
- rewrite _ (True, []) _ n | not (inlinable n) = Nothing -- see [CmmCall Inline Hack]
- rewrite assign (i, xs) mk n = Just $ mkMiddles xs <*> mk (Plain (inline i assign n))
-
- rewriteLocal _ (False, []) _ _ _ _ = Nothing
- rewriteLocal assign (i, xs) mk l e u = Just $ mkMiddles xs <*> mk n'
- where n' = AssignLocal l e' u
- e' = if i then wrapRecExp (inlineExp assign) e else e
- -- inlinable check omitted, since we can always inline into
- -- assignments.
-
- inline :: Bool -> AssignmentMap -> CmmNode e x -> CmmNode e x
- inline False _ n = n
- inline True _ n | not (inlinable n) = n -- see [CmmCall Inline Hack]
- inline True assign n = mapExpDeep (inlineExp assign) n
-
- inlineExp assign old@(CmmReg (CmmLocal r))
- = case lookupUFM assign r of
- Just (AlwaysInline x) -> x
- _ -> old
- inlineExp assign old@(CmmRegOff (CmmLocal r) i)
- = case lookupUFM assign r of
- Just (AlwaysInline x) ->
- case x of
- (CmmRegOff r' i') -> CmmRegOff r' (i + i')
- _ -> CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)]
- where rep = typeWidth (localRegType r)
- _ -> old
- inlineExp _ old = old
-
- inlinable :: CmmNode e x -> Bool
- inlinable (CmmCall{}) = False
- inlinable (CmmForeignCall{}) = False
- inlinable (CmmUnsafeForeignCall{}) = False
- inlinable _ = True
-
-rewriteAssignments :: CmmGraph -> FuelUniqSM CmmGraph
-rewriteAssignments g = do
- g' <- annotateUsage g
- g'' <- liftM fst $ dataflowPassFwd g' [(g_entry g, fact_bot assignmentLattice)] $
- analRewFwd assignmentLattice assignmentTransfer assignmentRewrite
- return (modifyGraph eraseRegUsage g'')
-
---------------------
-- prettyprinting
@@ -670,12 +164,3 @@ instance Outputable DualLive where
else (ppr_regs "live in regs =" regs),
if isEmptyUniqSet stack then PP.empty
else (ppr_regs "live on stack =" stack)]
-
--- ToDo: Outputable instance for UsageMap and AssignmentMap
-
-my_trace :: String -> SDoc -> a -> a
-my_trace = if False then pprTrace else \_ _ a -> a
-
-f4sep :: [SDoc] -> SDoc
-f4sep [] = fsep []
-f4sep (d:ds) = fsep (d : map (nest 4) ds)
diff --git a/compiler/cmm/MkGraph.hs b/compiler/cmm/MkGraph.hs
index 1e3f17b5a8..d1ac5712ab 100644
--- a/compiler/cmm/MkGraph.hs
+++ b/compiler/cmm/MkGraph.hs
@@ -119,25 +119,25 @@ mkAssign :: CmmReg -> CmmExpr -> CmmAGraph
mkStore :: CmmExpr -> CmmExpr -> CmmAGraph
---------- Calls
-mkCall :: CmmExpr -> (Convention, Convention) -> CmmFormals -> CmmActuals ->
+mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual] ->
UpdFrameOffset -> CmmAGraph
-mkCmmCall :: CmmExpr -> CmmFormals -> CmmActuals ->
+mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmActual] ->
UpdFrameOffset -> CmmAGraph
-- Native C-- calling convention
-mkSafeCall :: ForeignTarget -> CmmFormals -> CmmActuals -> UpdFrameOffset -> Bool -> CmmAGraph
-mkUnsafeCall :: ForeignTarget -> CmmFormals -> CmmActuals -> CmmAGraph
-mkFinalCall :: CmmExpr -> CCallConv -> CmmActuals -> UpdFrameOffset -> CmmAGraph
+mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset -> Bool -> CmmAGraph
+mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph
+mkFinalCall :: CmmExpr -> CCallConv -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
-- Never returns; like exit() or barf()
---------- Control transfer
-mkJump :: CmmExpr -> CmmActuals -> UpdFrameOffset -> CmmAGraph
-mkDirectJump :: CmmExpr -> CmmActuals -> UpdFrameOffset -> CmmAGraph
-mkJumpGC :: CmmExpr -> CmmActuals -> UpdFrameOffset -> CmmAGraph
-mkForeignJump :: Convention -> CmmExpr -> CmmActuals -> UpdFrameOffset -> CmmAGraph
+mkJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkDirectJump :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkJumpGC :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkForeignJump :: Convention -> CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph
mkSwitch :: CmmExpr -> [Maybe BlockId] -> CmmAGraph
-mkReturn :: CmmExpr -> CmmActuals -> UpdFrameOffset -> CmmAGraph
-mkReturnSimple :: CmmActuals -> UpdFrameOffset -> CmmAGraph
+mkReturn :: CmmExpr -> [CmmActual] -> UpdFrameOffset -> CmmAGraph
+mkReturnSimple :: [CmmActual] -> UpdFrameOffset -> CmmAGraph
mkBranch :: BlockId -> CmmAGraph
mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> CmmAGraph
@@ -288,8 +288,8 @@ stackStubExpr w = CmmLit (CmmInt 0 w)
-- the variables in their spill slots.
-- Therefore, for copying arguments and results, we provide different
-- functions to pass the arguments in an overflow area and to pass them in spill slots.
-copyInOflow :: Convention -> Area -> CmmFormals -> (Int, CmmAGraph)
-copyInSlot :: Convention -> CmmFormals -> [CmmNode O O]
+copyInOflow :: Convention -> Area -> [CmmFormal] -> (Int, CmmAGraph)
+copyInSlot :: Convention -> [CmmFormal] -> [CmmNode O O]
copyOutSlot :: Convention -> [LocalReg] -> [CmmNode O O]
copyInOflow conv area formals = (offset, catAGraphs $ map mkMiddle nodes)
@@ -298,7 +298,7 @@ copyInSlot c f = snd $ copyIn oneCopySlotI c (panic "no area for copying to slot
type SlotCopier = Area -> (LocalReg, ByteOff) -> (ByteOff, [CmmNode O O]) ->
(ByteOff, [CmmNode O O])
-type CopyIn = SlotCopier -> Convention -> Area -> CmmFormals -> (ByteOff, [CmmNode O O])
+type CopyIn = SlotCopier -> Convention -> Area -> [CmmFormal] -> (ByteOff, [CmmNode O O])
-- Return the number of bytes used for copying arguments, as well as the
-- instructions to copy the arguments.
@@ -331,7 +331,7 @@ oneCopySlotI _ (reg, _) (n, ms) =
-- Factoring out the common parts of the copyout functions yielded something
-- more complicated:
-copyOutOflow :: Convention -> Transfer -> Area -> CmmActuals -> UpdFrameOffset ->
+copyOutOflow :: Convention -> Transfer -> Area -> [CmmActual] -> UpdFrameOffset ->
(Int, CmmAGraph)
-- Generate code to move the actual parameters into the locations
-- required by the calling convention. This includes a store for the return address.
@@ -355,7 +355,7 @@ copyOutOflow conv transfer area@(CallArea a) actuals updfr_off
else ([], 0)
Old -> ([], updfr_off)
- args :: [(CmmExpr, ParamLocation ByteOff)] -- The argument and where to put it
+ args :: [(CmmExpr, ParamLocation)] -- The argument and where to put it
args = assignArgumentsPos conv cmmExprType actuals
args' = foldl adjust setRA args
@@ -372,10 +372,10 @@ copyOutSlot conv actuals = foldr co [] args
toExp r = CmmReg (CmmLocal r)
args = assignArgumentsPos conv localRegType actuals
-mkCallEntry :: Convention -> CmmFormals -> (Int, CmmAGraph)
+mkCallEntry :: Convention -> [CmmFormal] -> (Int, CmmAGraph)
mkCallEntry conv formals = copyInOflow conv (CallArea Old) formals
-lastWithArgs :: Transfer -> Area -> Convention -> CmmActuals -> UpdFrameOffset ->
+lastWithArgs :: Transfer -> Area -> Convention -> [CmmActual] -> UpdFrameOffset ->
(ByteOff -> CmmAGraph) -> CmmAGraph
lastWithArgs transfer area conv actuals updfr_off last =
let (outArgs, copies) = copyOutOflow conv transfer area actuals updfr_off in
diff --git a/compiler/cmm/OldCmm.hs b/compiler/cmm/OldCmm.hs
index f5c08172d7..de1a8e0dcb 100644
--- a/compiler/cmm/OldCmm.hs
+++ b/compiler/cmm/OldCmm.hs
@@ -14,7 +14,7 @@ module OldCmm (
cmmMapGraphM, cmmTopMapGraphM,
GenBasicBlock(..), CmmBasicBlock, blockId, blockStmts, mapBlockStmts,
CmmStmt(..), CmmReturnInfo(..), CmmHinted(..),
- HintedCmmFormal, HintedCmmFormals, HintedCmmActual, HintedCmmActuals,
+ HintedCmmFormal, HintedCmmActual,
CmmSafety(..), CmmCallTarget(..),
module CmmDecl,
module CmmExpr,
@@ -146,8 +146,8 @@ data CmmStmt -- Old-style
| CmmCall -- A call (foreign, native or primitive), with
CmmCallTarget
- HintedCmmFormals -- zero or more results
- HintedCmmActuals -- zero or more arguments
+ [HintedCmmFormal] -- zero or more results
+ [HintedCmmActual] -- zero or more arguments
CmmSafety -- whether to build a continuation
CmmReturnInfo
-- Some care is necessary when handling the arguments of these, see
@@ -164,22 +164,20 @@ data CmmStmt -- Old-style
-- Undefined outside range, and when there's a Nothing
| CmmJump CmmExpr -- Jump to another C-- function,
- HintedCmmActuals -- with these parameters. (parameters never used)
+ [HintedCmmActual] -- with these parameters. (parameters never used)
| CmmReturn -- Return from a native C-- function,
- HintedCmmActuals -- with these return values. (parameters never used)
+ [HintedCmmActual] -- with these return values. (parameters never used)
data CmmHinted a = CmmHinted { hintlessCmm :: a, cmmHint :: ForeignHint }
deriving( Eq )
-type HintedCmmActuals = [HintedCmmActual]
-type HintedCmmFormals = [HintedCmmFormal]
type HintedCmmFormal = CmmHinted CmmFormal
type HintedCmmActual = CmmHinted CmmActual
data CmmSafety = CmmUnsafe | CmmSafe C_SRT | CmmInterruptible
--- | enable us to fold used registers over 'CmmActuals' and 'CmmFormals'
+-- | enable us to fold used registers over '[CmmActual]' and '[CmmFormal]'
instance UserOfLocalRegs CmmStmt where
foldRegsUsed f (set::b) s = stmt s set
where
diff --git a/compiler/cmm/OldCmmUtils.hs b/compiler/cmm/OldCmmUtils.hs
index ea9ef8a54a..14a17d7946 100644
--- a/compiler/cmm/OldCmmUtils.hs
+++ b/compiler/cmm/OldCmmUtils.hs
@@ -78,8 +78,8 @@ cheapEqReg _ _ = False
---------------------------------------------------
loadArgsIntoTemps :: [Unique]
- -> HintedCmmActuals
- -> ([Unique], [CmmStmt], HintedCmmActuals)
+ -> [HintedCmmActual]
+ -> ([Unique], [CmmStmt], [HintedCmmActual])
loadArgsIntoTemps uniques [] = (uniques, [], [])
loadArgsIntoTemps uniques ((CmmHinted e hint):args) =
(uniques'',
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index aa7d914253..1e11c0c55b 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -266,7 +266,7 @@ pprStmt stmt = case stmt of
CmmJump lbl _params -> mkJMP_(pprExpr lbl) <> semi
CmmSwitch arg ids -> pprSwitch arg ids
-pprCFunType :: SDoc -> CCallConv -> HintedCmmFormals -> HintedCmmActuals -> SDoc
+pprCFunType :: SDoc -> CCallConv -> [HintedCmmFormal] -> [HintedCmmActual] -> SDoc
pprCFunType ppr_fn cconv ress args
= res_type ress <+>
parens (text (ccallConvAttribute cconv) <> ppr_fn) <>
@@ -807,7 +807,7 @@ pprLocalReg (LocalReg uniq _) = char '_' <> ppr uniq
-- -----------------------------------------------------------------------------
-- Foreign Calls
-pprCall :: SDoc -> CCallConv -> HintedCmmFormals -> HintedCmmActuals -> CmmSafety
+pprCall :: SDoc -> CCallConv -> [HintedCmmFormal] -> [HintedCmmActual] -> CmmSafety
-> SDoc
pprCall ppr_fn cconv results args _
diff --git a/compiler/cmm/cmm-notes b/compiler/cmm/cmm-notes
index 98c2e83699..f35e72d36c 100644
--- a/compiler/cmm/cmm-notes
+++ b/compiler/cmm/cmm-notes
@@ -1,9 +1,5 @@
More notes (June 11)
~~~~~~~~~~~~~~~~~~~~
-* Kill dead code assignArguments, argumentsSize in CmmCallConv.
- Bake in ByteOff to ParamLocation and ArgumentFormat
- CmmActuals -> [CmmActual] similary CmmFormals
-
* Possible refactoring: Nuke AGraph in favour of
mkIfThenElse :: Expr -> Graph -> Graph -> FCode Graph
or even
@@ -12,16 +8,10 @@ More notes (June 11)
or parameterise FCode over its envt; the CgState part seem useful for both
-* Move top and tail calls to runCmmContFlowOpts from HscMain to CmmCps.cpsTop
- (and rename the latter!)
-
* "Remove redundant reloads" in CmmSpillReload should be redundant; since
insertLateReloads is now gone, every reload is reloading a live variable.
Test and nuke.
-* Sink and inline S(RegSlot(x)) = e in precisely the same way that we
- sink and inline x = e
-
* Stack layout is very like register assignment: find non-conflicting assigments.
In particular we can use colouring or linear scan (etc).
@@ -110,6 +100,8 @@ Things to do:
dichotomy. Mostly this means global replace, but we also need to make
Label an instance of Outputable (probably in the Outputable module).
+ EZY: We should use Label, since that's the terminology Hoopl uses.
+
- NB that CmmProcPoint line 283 has a hack that works around a GADT-related
bug in 6.10.
@@ -255,7 +247,7 @@ CmmCvt.hs Conversion between old and new Cmm reps
CmmOpt.hs Hopefully-redundant optimiser
-------- Stuff to keep ------------
-CmmCPS.hs Driver for new pipeline
+CmmPipeline.hs Driver for new pipeline
CmmLive.hs Liveness analysis, dead code elim
CmmProcPoint.hs Identifying and splitting out proc-points
@@ -302,24 +294,24 @@ BlockId.hs BlockId, BlockEnv, BlockSet
type RawCmm = GenCmm CmmStatic [CmmStatic] (ListGraph CmmStmt)
* HscMain.tryNewCodeGen
- - STG->Cmm: StgCmm.codeGen (new codegen)
- - Optimise: CmmContFlowOpt (simple optimisations, very self contained)
- - Cps convert: CmmCPS.protoCmmCPS
- - Optimise: CmmContFlowOpt again
- - Convert: CmmCvt.cmmOfZgraph (convert to old rep) very self contained
+ - STG->Cmm: StgCmm.codeGen (new codegen)
+ - Optimize and CPS: CmmPipeline.cmmPipeline
+ - Convert: CmmCvt.cmmOfZgraph (convert to old rep) very self contained
* StgCmm.hs The new STG -> Cmm conversion code generator
Lots of modules StgCmmXXX
----------------------------------------------------
- CmmCPS.protoCmmCPS The new pipeline
+ CmmPipeline.cmmPipeline The new pipeline
----------------------------------------------------
-CmmCPS.protoCmmCPS:
- 1. Do cpsTop for each procedures separately
- 2. Build SRT representation; this spans multiple procedures
- (unless split-objs)
+CmmPipeline.cmmPipeline:
+ 1. Do control flow optimization
+ 2. Do cpsTop for each procedures separately
+ 3. Build SRT representation; this spans multiple procedures
+ (unless split-objs)
+ 4. Do control flow optimization on all resulting procedures
cpsTop:
* CmmCommonBlockElim.elimCommonBlocks:
@@ -457,7 +449,7 @@ a dominator analysis, using the Dataflow Engine.
f's keep-alive refs to include h1.
* The SRT info is the C_SRT field of Cmm.ClosureTypeInfo in a
- CmmInfoTable attached to each CmmProc. CmmCPS.toTops actually does
+ CmmInfoTable attached to each CmmProc. CmmPipeline.toTops actually does
the attaching, right at the end of the pipeline. The C_SRT part
gives offsets within a single, shared table of closure pointers.