summaryrefslogtreecommitdiff
path: root/compiler/GHC/CmmToAsm
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/GHC/CmmToAsm')
-rw-r--r--compiler/GHC/CmmToAsm/BlockLayout.hs18
-rw-r--r--compiler/GHC/CmmToAsm/CFG.hs39
-rw-r--r--compiler/GHC/CmmToAsm/Instr.hs5
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear.hs79
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/Base.hs5
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs5
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs4
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/State.hs1
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/X86.hs4
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs4
10 files changed, 121 insertions, 43 deletions
diff --git a/compiler/GHC/CmmToAsm/BlockLayout.hs b/compiler/GHC/CmmToAsm/BlockLayout.hs
index 07faa91473..67eff764e1 100644
--- a/compiler/GHC/CmmToAsm/BlockLayout.hs
+++ b/compiler/GHC/CmmToAsm/BlockLayout.hs
@@ -636,22 +636,8 @@ sequenceChain :: forall a i. (Instruction i, Outputable i)
-> [GenBasicBlock i] -- ^ Blocks placed in sequence.
sequenceChain _info _weights [] = []
sequenceChain _info _weights [x] = [x]
-sequenceChain info weights' blocks@((BasicBlock entry _):_) =
- let weights :: CFG
- weights = --pprTrace "cfg'" (pprEdgeWeights cfg')
- cfg'
- where
- (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-} mkGlobalWeights entry weights'
- cfg' = {-# SCC rewriteEdges #-}
- mapFoldlWithKey
- (\cfg from m ->
- mapFoldlWithKey
- (\cfg to w -> setEdgeWeight cfg (EdgeWeight w) from to )
- cfg m )
- weights'
- globalEdgeWeights
-
- directEdges :: [CfgEdge]
+sequenceChain info weights blocks@((BasicBlock entry _):_) =
+ let directEdges :: [CfgEdge]
directEdges = sortBy (flip compare) $ catMaybes . map relevantWeight $ (infoEdgeList weights)
where
relevantWeight :: CfgEdge -> Maybe CfgEdge
diff --git a/compiler/GHC/CmmToAsm/CFG.hs b/compiler/GHC/CmmToAsm/CFG.hs
index ad3a3cdae7..5c68e77fd1 100644
--- a/compiler/GHC/CmmToAsm/CFG.hs
+++ b/compiler/GHC/CmmToAsm/CFG.hs
@@ -670,11 +670,21 @@ findBackEdges root cfg =
typedEdges =
classifyEdges root getSuccs edges :: [((BlockId,BlockId),EdgeType)]
-
-optimizeCFG :: D.CfgWeights -> RawCmmDecl -> CFG -> CFG
-optimizeCFG _ (CmmData {}) cfg = cfg
-optimizeCFG weights (CmmProc info _lab _live graph) cfg =
- {-# SCC optimizeCFG #-}
+optimizeCFG :: Bool -> D.CfgWeights -> RawCmmDecl -> CFG -> CFG
+optimizeCFG _ _ (CmmData {}) cfg = cfg
+optimizeCFG doStaticPred weights proc@(CmmProc _info _lab _live graph) cfg =
+ (if doStaticPred then staticPredCfg (g_entry graph) else id) $
+ optHsPatterns weights proc $ cfg
+
+-- | Modify branch weights based on educated guess on
+-- patterns GHC tends to produce and how they affect
+-- performance.
+--
+-- Most importantly we penalize jumps across info tables.
+optHsPatterns :: D.CfgWeights -> RawCmmDecl -> CFG -> CFG
+optHsPatterns _ (CmmData {}) cfg = cfg
+optHsPatterns weights (CmmProc info _lab _live graph) cfg =
+ {-# SCC optHsPatterns #-}
-- pprTrace "Initial:" (pprEdgeWeights cfg) $
-- pprTrace "Initial:" (ppr $ mkGlobalWeights (g_entry graph) cfg) $
@@ -749,6 +759,21 @@ optimizeCFG weights (CmmProc info _lab _live graph) cfg =
| CmmSource { trans_cmmNode = CmmCondBranch {} } <- source = True
| otherwise = False
+-- | Convert block-local branch weights to global weights.
+staticPredCfg :: BlockId -> CFG -> CFG
+staticPredCfg entry cfg = cfg'
+ where
+ (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-}
+ mkGlobalWeights entry cfg
+ cfg' = {-# SCC rewriteEdges #-}
+ mapFoldlWithKey
+ (\cfg from m ->
+ mapFoldlWithKey
+ (\cfg to w -> setEdgeWeight cfg (EdgeWeight w) from to )
+ cfg m )
+ cfg
+ globalEdgeWeights
+
-- | Determine loop membership of blocks based on SCC analysis
-- This is faster but only gives yes/no answers.
loopMembers :: HasDebugCallStack => CFG -> LabelMap Bool
@@ -922,6 +947,10 @@ revPostorderFrom cfg root =
-- reverse post order. Which is required for diamond control flow to work probably.
--
-- We also apply a few prediction heuristics (based on the same paper)
+--
+-- The returned result represents frequences.
+-- For blocks it's the expected number of executions and
+-- for edges is the number of traversals.
{-# NOINLINE mkGlobalWeights #-}
{-# SCC mkGlobalWeights #-}
diff --git a/compiler/GHC/CmmToAsm/Instr.hs b/compiler/GHC/CmmToAsm/Instr.hs
index 833a72a74a..869c5eb238 100644
--- a/compiler/GHC/CmmToAsm/Instr.hs
+++ b/compiler/GHC/CmmToAsm/Instr.hs
@@ -37,7 +37,10 @@ import GHC.CmmToAsm.Config
-- (for allocation purposes, anyway).
--
data RegUsage
- = RU [Reg] [Reg]
+ = RU {
+ reads :: [Reg],
+ writes :: [Reg]
+ }
-- | No regs read or written to.
noUsage :: RegUsage
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear.hs b/compiler/GHC/CmmToAsm/Reg/Linear.hs
index 00b4915d7b..55cb73af1a 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-}
+{-# LANGUAGE ConstraintKinds #-}
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
@@ -137,6 +138,7 @@ import GHC.Platform
import Data.Maybe
import Data.List
import Control.Monad
+import Control.Applicative
-- -----------------------------------------------------------------------------
-- Top level of the register allocator
@@ -229,8 +231,13 @@ linearRegAlloc config entry_ids block_live sccs
go f = linearRegAlloc' config f entry_ids block_live sccs
platform = ncgPlatform config
+-- | Constraints on the instruction instances used by the
+-- linear allocator.
+type OutputableRegConstraint freeRegs instr =
+ (FR freeRegs, Outputable freeRegs, Outputable instr, Instruction instr)
+
linearRegAlloc'
- :: (FR freeRegs, Outputable instr, Instruction instr)
+ :: OutputableRegConstraint freeRegs instr
=> NCGConfig
-> freeRegs
-> [BlockId] -- ^ entry points
@@ -246,7 +253,7 @@ linearRegAlloc' config initFreeRegs entry_ids block_live sccs
return (blocks, stats, getStackUse stack)
-linearRA_SCCs :: (FR freeRegs, Instruction instr, Outputable instr)
+linearRA_SCCs :: OutputableRegConstraint freeRegs instr
=> [BlockId]
-> BlockMap RegSet
-> [NatBasicBlock instr]
@@ -281,7 +288,7 @@ linearRA_SCCs entry_ids block_live blocksAcc (CyclicSCC blocks : sccs)
more sanity checking to guard against this eventuality.
-}
-process :: (FR freeRegs, Instruction instr, Outputable instr)
+process :: OutputableRegConstraint freeRegs instr
=> [BlockId]
-> BlockMap RegSet
-> [GenBasicBlock (LiveInstr instr)]
@@ -325,15 +332,18 @@ process entry_ids block_live (b@(BasicBlock id _) : blocks)
-- | Do register allocation on this basic block
--
processBlock
- :: (FR freeRegs, Outputable instr, Instruction instr)
+ :: OutputableRegConstraint freeRegs instr
=> BlockMap RegSet -- ^ live regs on entry to each basic block
-> LiveBasicBlock instr -- ^ block to do register allocation on
-> RegM freeRegs [NatBasicBlock instr] -- ^ block with registers allocated
processBlock block_live (BasicBlock id instrs)
- = do initBlock id block_live
+ = do -- pprTraceM "processBlock" $ text "" $$ ppr (BasicBlock id instrs)
+ initBlock id block_live
+
(instrs', fixups)
<- linearRA block_live [] [] id instrs
+ -- pprTraceM "blockResult" $ ppr (instrs', fixups)
return $ BasicBlock id instrs' : fixups
@@ -369,7 +379,7 @@ initBlock id block_live
-- | Do allocation for a sequence of instructions.
linearRA
- :: (FR freeRegs, Outputable instr, Instruction instr)
+ :: OutputableRegConstraint freeRegs instr
=> BlockMap RegSet -- ^ map of what vregs are live on entry to each block.
-> [instr] -- ^ accumulator for instructions already processed.
-> [NatBasicBlock instr] -- ^ accumulator for blocks of fixup code.
@@ -396,7 +406,7 @@ linearRA block_live accInstr accFixups id (instr:instrs)
-- | Do allocation for a single instruction.
raInsn
- :: (FR freeRegs, Outputable instr, Instruction instr)
+ :: OutputableRegConstraint freeRegs instr
=> BlockMap RegSet -- ^ map of what vregs are love on entry to each block.
-> [instr] -- ^ accumulator for instructions already processed.
-> BlockId -- ^ the id of the current block, for debugging
@@ -476,7 +486,7 @@ isInReg src assig | Just (InReg _) <- lookupUFM assig src = True
| otherwise = False
-genRaInsn :: (FR freeRegs, Instruction instr, Outputable instr)
+genRaInsn :: OutputableRegConstraint freeRegs instr
=> BlockMap RegSet
-> [instr]
-> BlockId
@@ -486,6 +496,7 @@ genRaInsn :: (FR freeRegs, Instruction instr, Outputable instr)
-> RegM freeRegs ([instr], [NatBasicBlock instr])
genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
+-- pprTraceM "genRaInsn" $ ppr (block_id, instr)
platform <- getPlatform
case regUsageOfInstr platform instr of { RU read written ->
do
@@ -525,6 +536,8 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
(fixup_blocks, adjusted_instr)
<- joinToTargets block_live block_id instr
+-- when (not $ null fixup_blocks) $ pprTraceM "genRA:FixBlocks" $ ppr fixup_blocks
+
-- Debugging - show places where the reg alloc inserted
-- assignment fixup blocks.
-- when (not $ null fixup_blocks) $
@@ -737,7 +750,7 @@ data SpillLoc = ReadMem StackSlot -- reading from register only in memory
-- the list of free registers and free stack slots.
allocateRegsAndSpill
- :: (FR freeRegs, Outputable instr, Instruction instr)
+ :: forall freeRegs instr. (FR freeRegs, Outputable instr, Instruction instr)
=> Bool -- True <=> reading (load up spilled regs)
-> [VirtualReg] -- don't push these out
-> [instr] -- spill insns
@@ -749,7 +762,8 @@ allocateRegsAndSpill _ _ spills alloc []
= return (spills, reverse alloc)
allocateRegsAndSpill reading keep spills alloc (r:rs)
- = do assig <- getAssigR
+ = do assig <- getAssigR :: RegM freeRegs (RegMap Loc)
+ -- pprTraceM "allocateRegsAndSpill:assig" (ppr (r:rs) $$ ppr assig)
let doSpill = allocRegsAndSpill_spill reading keep spills alloc r rs assig
case lookupUFM assig r of
-- case (1a): already in a register
@@ -779,6 +793,26 @@ allocateRegsAndSpill reading keep spills alloc (r:rs)
| otherwise -> doSpill WriteNew
+-- | Given a virtual reg find a preferred real register.
+-- The preferred register is simply the first one the variable
+-- was assigned to (if any). This way when we allocate for a loop
+-- variables are likely to end up in the same registers at the
+-- end and start of the loop, avoiding redundant reg-reg moves.
+-- Note: I tried returning a list of past assignments, but that
+-- turned out to barely matter but added a few tenths of
+-- a percent to compile time.
+findPrefRealReg :: forall freeRegs u. Uniquable u
+ => u -> RegM freeRegs (Maybe RealReg)
+findPrefRealReg vreg = do
+ bassig <- getBlockAssigR :: RegM freeRegs (BlockMap (freeRegs,RegMap Loc))
+ return $ foldr (findVirtRegAssig) Nothing bassig
+ where
+ findVirtRegAssig :: (freeRegs,RegMap Loc) -> Maybe RealReg -> Maybe RealReg
+ findVirtRegAssig assig z =
+ z <|> case lookupUFM (snd assig) vreg of
+ Just (InReg real_reg) -> Just real_reg
+ Just (InBoth real_reg _) -> Just real_reg
+ _ -> z
-- reading is redundant with reason, but we keep it around because it's
-- convenient and it maintains the recursive structure of the allocator. -- EZY
@@ -795,18 +829,26 @@ allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr, Outputable instr)
allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
= do platform <- getPlatform
freeRegs <- getFreeRegsR
- let freeRegs_thisClass = frGetFreeRegs platform (classOfVirtualReg r) freeRegs
+ let freeRegs_thisClass = frGetFreeRegs platform (classOfVirtualReg r) freeRegs :: [RealReg]
- case freeRegs_thisClass of
+ -- Can we put the variable into a register it already was?
+ pref_reg <- findPrefRealReg r
+ case freeRegs_thisClass of
-- case (2): we have a free register
- (my_reg : _) ->
- do spills' <- loadTemp r spill_loc my_reg spills
+ (first_free : _) ->
+ do let final_reg
+ | Just reg <- pref_reg
+ , reg `elem` freeRegs_thisClass
+ = reg
+ | otherwise
+ = first_free
+ spills' <- loadTemp r spill_loc final_reg spills
- setAssigR (addToUFM assig r $! newLocation spill_loc my_reg)
- setFreeRegsR $ frAllocateReg platform my_reg freeRegs
+ setAssigR (addToUFM assig r $! newLocation spill_loc final_reg)
+ setFreeRegsR $ frAllocateReg platform final_reg freeRegs
- allocateRegsAndSpill reading keep spills' (my_reg : alloc) rs
+ allocateRegsAndSpill reading keep spills' (final_reg : alloc) rs
-- case (3): we need to push something out to free up a register
@@ -814,7 +856,8 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
do let inRegOrBoth (InReg _) = True
inRegOrBoth (InBoth _ _) = True
inRegOrBoth _ = False
- let candidates' =
+ let candidates' :: UniqFM Loc
+ candidates' =
flip delListFromUFM keep $
filterUFM inRegOrBoth $
assig
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs b/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
index 5784660e3f..6a110f0a48 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
@@ -30,6 +30,7 @@ import GHC.Types.Unique.FM
import GHC.Types.Unique.Supply
import GHC.Cmm.BlockId
+data ReadingOrWriting = Reading | Writing deriving (Eq,Ord)
-- | Used to store the register assignment on entry to a basic block.
-- We use this to handle join points, where multiple branch instructions
@@ -138,6 +139,8 @@ data RA_State freeRegs
, ra_config :: !NCGConfig
-- | (from,fixup,to) : We inserted fixup code between from and to
- , ra_fixups :: [(BlockId,BlockId,BlockId)] }
+ , ra_fixups :: [(BlockId,BlockId,BlockId)]
+
+ }
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs b/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs
index fe19164357..fd0719c656 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
-- | Free regs map for PowerPC
module GHC.CmmToAsm.Reg.Linear.PPC where
@@ -27,6 +29,9 @@ import Data.Bits
data FreeRegs = FreeRegs !Word32 !Word32
deriving( Show ) -- The Show is used in an ASSERT
+instance Outputable FreeRegs where
+ ppr = text . show
+
noFreeRegs :: FreeRegs
noFreeRegs = FreeRegs 0 0
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs b/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs
index ac7dc85366..063a8836b3 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE CPP #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-- | Free regs map for SPARC
module GHC.CmmToAsm.Reg.Linear.SPARC where
@@ -38,6 +39,9 @@ data FreeRegs
instance Show FreeRegs where
show = showFreeRegs
+instance Outputable FreeRegs where
+ ppr = text . showFreeRegs
+
-- | A reg map where no regs are free to be allocated.
noFreeRegs :: FreeRegs
noFreeRegs = FreeRegs 0 0 0
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs
index f96cc71239..ab05ab632a 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE CPP, PatternSynonyms, DeriveFunctor #-}
+{-# LANGUAGE ScopedTypeVariables #-}
#if !defined(GHC_LOADED_INTO_GHCI)
{-# LANGUAGE UnboxedTuples #-}
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs b/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
index ae37b0f9d1..42f63b5752 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-- | Free regs map for i386
module GHC.CmmToAsm.Reg.Linear.X86 where
@@ -9,12 +10,13 @@ import GHC.Platform.Reg.Class
import GHC.Platform.Reg
import GHC.Utils.Panic
import GHC.Platform
+import GHC.Utils.Outputable
import Data.Word
import Data.Bits
newtype FreeRegs = FreeRegs Word32
- deriving Show
+ deriving (Show,Outputable)
noFreeRegs :: FreeRegs
noFreeRegs = FreeRegs 0
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs b/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
index 325e033e85..44eea342a4 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-- | Free regs map for x86_64
module GHC.CmmToAsm.Reg.Linear.X86_64 where
@@ -9,12 +10,13 @@ import GHC.Platform.Reg.Class
import GHC.Platform.Reg
import GHC.Utils.Panic
import GHC.Platform
+import GHC.Utils.Outputable
import Data.Word
import Data.Bits
newtype FreeRegs = FreeRegs Word64
- deriving Show
+ deriving (Show,Outputable)
noFreeRegs :: FreeRegs
noFreeRegs = FreeRegs 0