summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/GHC/CmmToAsm.hs2
-rw-r--r--compiler/GHC/CmmToAsm/BlockLayout.hs18
-rw-r--r--compiler/GHC/CmmToAsm/CFG.hs39
-rw-r--r--compiler/GHC/CmmToAsm/Instr.hs5
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear.hs79
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/Base.hs5
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs5
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs4
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/State.hs1
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/X86.hs4
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs4
-rw-r--r--compiler/GHC/Driver/Flags.hs1
-rw-r--r--compiler/GHC/Driver/Session.hs2
-rw-r--r--compiler/GHC/Utils/Outputable.hs3
-rw-r--r--docs/users_guide/8.12.1-notes.rst15
-rw-r--r--docs/users_guide/using-optimisation.rst13
16 files changed, 152 insertions, 48 deletions
diff --git a/compiler/GHC/CmmToAsm.hs b/compiler/GHC/CmmToAsm.hs
index 544edc801e..f771d79254 100644
--- a/compiler/GHC/CmmToAsm.hs
+++ b/compiler/GHC/CmmToAsm.hs
@@ -728,7 +728,7 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
let optimizedCFG :: Maybe CFG
optimizedCFG =
- optimizeCFG (cfgWeightInfo dflags) cmm <$!> postShortCFG
+ optimizeCFG (gopt Opt_CmmStaticPred dflags) (cfgWeightInfo dflags) cmm <$!> postShortCFG
maybeDumpCfg dflags optimizedCFG "CFG Weights - Final" proc_name
diff --git a/compiler/GHC/CmmToAsm/BlockLayout.hs b/compiler/GHC/CmmToAsm/BlockLayout.hs
index 07faa91473..67eff764e1 100644
--- a/compiler/GHC/CmmToAsm/BlockLayout.hs
+++ b/compiler/GHC/CmmToAsm/BlockLayout.hs
@@ -636,22 +636,8 @@ sequenceChain :: forall a i. (Instruction i, Outputable i)
-> [GenBasicBlock i] -- ^ Blocks placed in sequence.
sequenceChain _info _weights [] = []
sequenceChain _info _weights [x] = [x]
-sequenceChain info weights' blocks@((BasicBlock entry _):_) =
- let weights :: CFG
- weights = --pprTrace "cfg'" (pprEdgeWeights cfg')
- cfg'
- where
- (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-} mkGlobalWeights entry weights'
- cfg' = {-# SCC rewriteEdges #-}
- mapFoldlWithKey
- (\cfg from m ->
- mapFoldlWithKey
- (\cfg to w -> setEdgeWeight cfg (EdgeWeight w) from to )
- cfg m )
- weights'
- globalEdgeWeights
-
- directEdges :: [CfgEdge]
+sequenceChain info weights blocks@((BasicBlock entry _):_) =
+ let directEdges :: [CfgEdge]
directEdges = sortBy (flip compare) $ catMaybes . map relevantWeight $ (infoEdgeList weights)
where
relevantWeight :: CfgEdge -> Maybe CfgEdge
diff --git a/compiler/GHC/CmmToAsm/CFG.hs b/compiler/GHC/CmmToAsm/CFG.hs
index ad3a3cdae7..5c68e77fd1 100644
--- a/compiler/GHC/CmmToAsm/CFG.hs
+++ b/compiler/GHC/CmmToAsm/CFG.hs
@@ -670,11 +670,21 @@ findBackEdges root cfg =
typedEdges =
classifyEdges root getSuccs edges :: [((BlockId,BlockId),EdgeType)]
-
-optimizeCFG :: D.CfgWeights -> RawCmmDecl -> CFG -> CFG
-optimizeCFG _ (CmmData {}) cfg = cfg
-optimizeCFG weights (CmmProc info _lab _live graph) cfg =
- {-# SCC optimizeCFG #-}
+optimizeCFG :: Bool -> D.CfgWeights -> RawCmmDecl -> CFG -> CFG
+optimizeCFG _ _ (CmmData {}) cfg = cfg
+optimizeCFG doStaticPred weights proc@(CmmProc _info _lab _live graph) cfg =
+ (if doStaticPred then staticPredCfg (g_entry graph) else id) $
+ optHsPatterns weights proc $ cfg
+
+-- | Modify branch weights based on educated guess on
+-- patterns GHC tends to produce and how they affect
+-- performance.
+--
+-- Most importantly we penalize jumps across info tables.
+optHsPatterns :: D.CfgWeights -> RawCmmDecl -> CFG -> CFG
+optHsPatterns _ (CmmData {}) cfg = cfg
+optHsPatterns weights (CmmProc info _lab _live graph) cfg =
+ {-# SCC optHsPatterns #-}
-- pprTrace "Initial:" (pprEdgeWeights cfg) $
-- pprTrace "Initial:" (ppr $ mkGlobalWeights (g_entry graph) cfg) $
@@ -749,6 +759,21 @@ optimizeCFG weights (CmmProc info _lab _live graph) cfg =
| CmmSource { trans_cmmNode = CmmCondBranch {} } <- source = True
| otherwise = False
+-- | Convert block-local branch weights to global weights.
+staticPredCfg :: BlockId -> CFG -> CFG
+staticPredCfg entry cfg = cfg'
+ where
+ (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-}
+ mkGlobalWeights entry cfg
+ cfg' = {-# SCC rewriteEdges #-}
+ mapFoldlWithKey
+ (\cfg from m ->
+ mapFoldlWithKey
+ (\cfg to w -> setEdgeWeight cfg (EdgeWeight w) from to )
+ cfg m )
+ cfg
+ globalEdgeWeights
+
-- | Determine loop membership of blocks based on SCC analysis
-- This is faster but only gives yes/no answers.
loopMembers :: HasDebugCallStack => CFG -> LabelMap Bool
@@ -922,6 +947,10 @@ revPostorderFrom cfg root =
-- reverse post order. Which is required for diamond control flow to work probably.
--
-- We also apply a few prediction heuristics (based on the same paper)
+--
+-- The returned result represents frequences.
+-- For blocks it's the expected number of executions and
+-- for edges is the number of traversals.
{-# NOINLINE mkGlobalWeights #-}
{-# SCC mkGlobalWeights #-}
diff --git a/compiler/GHC/CmmToAsm/Instr.hs b/compiler/GHC/CmmToAsm/Instr.hs
index 833a72a74a..869c5eb238 100644
--- a/compiler/GHC/CmmToAsm/Instr.hs
+++ b/compiler/GHC/CmmToAsm/Instr.hs
@@ -37,7 +37,10 @@ import GHC.CmmToAsm.Config
-- (for allocation purposes, anyway).
--
data RegUsage
- = RU [Reg] [Reg]
+ = RU {
+ reads :: [Reg],
+ writes :: [Reg]
+ }
-- | No regs read or written to.
noUsage :: RegUsage
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear.hs b/compiler/GHC/CmmToAsm/Reg/Linear.hs
index 00b4915d7b..55cb73af1a 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-}
+{-# LANGUAGE ConstraintKinds #-}
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
@@ -137,6 +138,7 @@ import GHC.Platform
import Data.Maybe
import Data.List
import Control.Monad
+import Control.Applicative
-- -----------------------------------------------------------------------------
-- Top level of the register allocator
@@ -229,8 +231,13 @@ linearRegAlloc config entry_ids block_live sccs
go f = linearRegAlloc' config f entry_ids block_live sccs
platform = ncgPlatform config
+-- | Constraints on the instruction instances used by the
+-- linear allocator.
+type OutputableRegConstraint freeRegs instr =
+ (FR freeRegs, Outputable freeRegs, Outputable instr, Instruction instr)
+
linearRegAlloc'
- :: (FR freeRegs, Outputable instr, Instruction instr)
+ :: OutputableRegConstraint freeRegs instr
=> NCGConfig
-> freeRegs
-> [BlockId] -- ^ entry points
@@ -246,7 +253,7 @@ linearRegAlloc' config initFreeRegs entry_ids block_live sccs
return (blocks, stats, getStackUse stack)
-linearRA_SCCs :: (FR freeRegs, Instruction instr, Outputable instr)
+linearRA_SCCs :: OutputableRegConstraint freeRegs instr
=> [BlockId]
-> BlockMap RegSet
-> [NatBasicBlock instr]
@@ -281,7 +288,7 @@ linearRA_SCCs entry_ids block_live blocksAcc (CyclicSCC blocks : sccs)
more sanity checking to guard against this eventuality.
-}
-process :: (FR freeRegs, Instruction instr, Outputable instr)
+process :: OutputableRegConstraint freeRegs instr
=> [BlockId]
-> BlockMap RegSet
-> [GenBasicBlock (LiveInstr instr)]
@@ -325,15 +332,18 @@ process entry_ids block_live (b@(BasicBlock id _) : blocks)
-- | Do register allocation on this basic block
--
processBlock
- :: (FR freeRegs, Outputable instr, Instruction instr)
+ :: OutputableRegConstraint freeRegs instr
=> BlockMap RegSet -- ^ live regs on entry to each basic block
-> LiveBasicBlock instr -- ^ block to do register allocation on
-> RegM freeRegs [NatBasicBlock instr] -- ^ block with registers allocated
processBlock block_live (BasicBlock id instrs)
- = do initBlock id block_live
+ = do -- pprTraceM "processBlock" $ text "" $$ ppr (BasicBlock id instrs)
+ initBlock id block_live
+
(instrs', fixups)
<- linearRA block_live [] [] id instrs
+ -- pprTraceM "blockResult" $ ppr (instrs', fixups)
return $ BasicBlock id instrs' : fixups
@@ -369,7 +379,7 @@ initBlock id block_live
-- | Do allocation for a sequence of instructions.
linearRA
- :: (FR freeRegs, Outputable instr, Instruction instr)
+ :: OutputableRegConstraint freeRegs instr
=> BlockMap RegSet -- ^ map of what vregs are live on entry to each block.
-> [instr] -- ^ accumulator for instructions already processed.
-> [NatBasicBlock instr] -- ^ accumulator for blocks of fixup code.
@@ -396,7 +406,7 @@ linearRA block_live accInstr accFixups id (instr:instrs)
-- | Do allocation for a single instruction.
raInsn
- :: (FR freeRegs, Outputable instr, Instruction instr)
+ :: OutputableRegConstraint freeRegs instr
=> BlockMap RegSet -- ^ map of what vregs are love on entry to each block.
-> [instr] -- ^ accumulator for instructions already processed.
-> BlockId -- ^ the id of the current block, for debugging
@@ -476,7 +486,7 @@ isInReg src assig | Just (InReg _) <- lookupUFM assig src = True
| otherwise = False
-genRaInsn :: (FR freeRegs, Instruction instr, Outputable instr)
+genRaInsn :: OutputableRegConstraint freeRegs instr
=> BlockMap RegSet
-> [instr]
-> BlockId
@@ -486,6 +496,7 @@ genRaInsn :: (FR freeRegs, Instruction instr, Outputable instr)
-> RegM freeRegs ([instr], [NatBasicBlock instr])
genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
+-- pprTraceM "genRaInsn" $ ppr (block_id, instr)
platform <- getPlatform
case regUsageOfInstr platform instr of { RU read written ->
do
@@ -525,6 +536,8 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
(fixup_blocks, adjusted_instr)
<- joinToTargets block_live block_id instr
+-- when (not $ null fixup_blocks) $ pprTraceM "genRA:FixBlocks" $ ppr fixup_blocks
+
-- Debugging - show places where the reg alloc inserted
-- assignment fixup blocks.
-- when (not $ null fixup_blocks) $
@@ -737,7 +750,7 @@ data SpillLoc = ReadMem StackSlot -- reading from register only in memory
-- the list of free registers and free stack slots.
allocateRegsAndSpill
- :: (FR freeRegs, Outputable instr, Instruction instr)
+ :: forall freeRegs instr. (FR freeRegs, Outputable instr, Instruction instr)
=> Bool -- True <=> reading (load up spilled regs)
-> [VirtualReg] -- don't push these out
-> [instr] -- spill insns
@@ -749,7 +762,8 @@ allocateRegsAndSpill _ _ spills alloc []
= return (spills, reverse alloc)
allocateRegsAndSpill reading keep spills alloc (r:rs)
- = do assig <- getAssigR
+ = do assig <- getAssigR :: RegM freeRegs (RegMap Loc)
+ -- pprTraceM "allocateRegsAndSpill:assig" (ppr (r:rs) $$ ppr assig)
let doSpill = allocRegsAndSpill_spill reading keep spills alloc r rs assig
case lookupUFM assig r of
-- case (1a): already in a register
@@ -779,6 +793,26 @@ allocateRegsAndSpill reading keep spills alloc (r:rs)
| otherwise -> doSpill WriteNew
+-- | Given a virtual reg find a preferred real register.
+-- The preferred register is simply the first one the variable
+-- was assigned to (if any). This way when we allocate for a loop
+-- variables are likely to end up in the same registers at the
+-- end and start of the loop, avoiding redundant reg-reg moves.
+-- Note: I tried returning a list of past assignments, but that
+-- turned out to barely matter but added a few tenths of
+-- a percent to compile time.
+findPrefRealReg :: forall freeRegs u. Uniquable u
+ => u -> RegM freeRegs (Maybe RealReg)
+findPrefRealReg vreg = do
+ bassig <- getBlockAssigR :: RegM freeRegs (BlockMap (freeRegs,RegMap Loc))
+ return $ foldr (findVirtRegAssig) Nothing bassig
+ where
+ findVirtRegAssig :: (freeRegs,RegMap Loc) -> Maybe RealReg -> Maybe RealReg
+ findVirtRegAssig assig z =
+ z <|> case lookupUFM (snd assig) vreg of
+ Just (InReg real_reg) -> Just real_reg
+ Just (InBoth real_reg _) -> Just real_reg
+ _ -> z
-- reading is redundant with reason, but we keep it around because it's
-- convenient and it maintains the recursive structure of the allocator. -- EZY
@@ -795,18 +829,26 @@ allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr, Outputable instr)
allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
= do platform <- getPlatform
freeRegs <- getFreeRegsR
- let freeRegs_thisClass = frGetFreeRegs platform (classOfVirtualReg r) freeRegs
+ let freeRegs_thisClass = frGetFreeRegs platform (classOfVirtualReg r) freeRegs :: [RealReg]
- case freeRegs_thisClass of
+ -- Can we put the variable into a register it already was?
+ pref_reg <- findPrefRealReg r
+ case freeRegs_thisClass of
-- case (2): we have a free register
- (my_reg : _) ->
- do spills' <- loadTemp r spill_loc my_reg spills
+ (first_free : _) ->
+ do let final_reg
+ | Just reg <- pref_reg
+ , reg `elem` freeRegs_thisClass
+ = reg
+ | otherwise
+ = first_free
+ spills' <- loadTemp r spill_loc final_reg spills
- setAssigR (addToUFM assig r $! newLocation spill_loc my_reg)
- setFreeRegsR $ frAllocateReg platform my_reg freeRegs
+ setAssigR (addToUFM assig r $! newLocation spill_loc final_reg)
+ setFreeRegsR $ frAllocateReg platform final_reg freeRegs
- allocateRegsAndSpill reading keep spills' (my_reg : alloc) rs
+ allocateRegsAndSpill reading keep spills' (final_reg : alloc) rs
-- case (3): we need to push something out to free up a register
@@ -814,7 +856,8 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
do let inRegOrBoth (InReg _) = True
inRegOrBoth (InBoth _ _) = True
inRegOrBoth _ = False
- let candidates' =
+ let candidates' :: UniqFM Loc
+ candidates' =
flip delListFromUFM keep $
filterUFM inRegOrBoth $
assig
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs b/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
index 5784660e3f..6a110f0a48 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
@@ -30,6 +30,7 @@ import GHC.Types.Unique.FM
import GHC.Types.Unique.Supply
import GHC.Cmm.BlockId
+data ReadingOrWriting = Reading | Writing deriving (Eq,Ord)
-- | Used to store the register assignment on entry to a basic block.
-- We use this to handle join points, where multiple branch instructions
@@ -138,6 +139,8 @@ data RA_State freeRegs
, ra_config :: !NCGConfig
-- | (from,fixup,to) : We inserted fixup code between from and to
- , ra_fixups :: [(BlockId,BlockId,BlockId)] }
+ , ra_fixups :: [(BlockId,BlockId,BlockId)]
+
+ }
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs b/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs
index fe19164357..fd0719c656 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/PPC.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+
-- | Free regs map for PowerPC
module GHC.CmmToAsm.Reg.Linear.PPC where
@@ -27,6 +29,9 @@ import Data.Bits
data FreeRegs = FreeRegs !Word32 !Word32
deriving( Show ) -- The Show is used in an ASSERT
+instance Outputable FreeRegs where
+ ppr = text . show
+
noFreeRegs :: FreeRegs
noFreeRegs = FreeRegs 0 0
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs b/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs
index ac7dc85366..063a8836b3 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/SPARC.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE CPP #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-- | Free regs map for SPARC
module GHC.CmmToAsm.Reg.Linear.SPARC where
@@ -38,6 +39,9 @@ data FreeRegs
instance Show FreeRegs where
show = showFreeRegs
+instance Outputable FreeRegs where
+ ppr = text . showFreeRegs
+
-- | A reg map where no regs are free to be allocated.
noFreeRegs :: FreeRegs
noFreeRegs = FreeRegs 0 0 0
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs
index f96cc71239..ab05ab632a 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE CPP, PatternSynonyms, DeriveFunctor #-}
+{-# LANGUAGE ScopedTypeVariables #-}
#if !defined(GHC_LOADED_INTO_GHCI)
{-# LANGUAGE UnboxedTuples #-}
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs b/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
index ae37b0f9d1..42f63b5752 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-- | Free regs map for i386
module GHC.CmmToAsm.Reg.Linear.X86 where
@@ -9,12 +10,13 @@ import GHC.Platform.Reg.Class
import GHC.Platform.Reg
import GHC.Utils.Panic
import GHC.Platform
+import GHC.Utils.Outputable
import Data.Word
import Data.Bits
newtype FreeRegs = FreeRegs Word32
- deriving Show
+ deriving (Show,Outputable)
noFreeRegs :: FreeRegs
noFreeRegs = FreeRegs 0
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs b/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
index 325e033e85..44eea342a4 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-- | Free regs map for x86_64
module GHC.CmmToAsm.Reg.Linear.X86_64 where
@@ -9,12 +10,13 @@ import GHC.Platform.Reg.Class
import GHC.Platform.Reg
import GHC.Utils.Panic
import GHC.Platform
+import GHC.Utils.Outputable
import Data.Word
import Data.Bits
newtype FreeRegs = FreeRegs Word64
- deriving Show
+ deriving (Show,Outputable)
noFreeRegs :: FreeRegs
noFreeRegs = FreeRegs 0
diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs
index b0be5f4bce..d0b04713fb 100644
--- a/compiler/GHC/Driver/Flags.hs
+++ b/compiler/GHC/Driver/Flags.hs
@@ -181,6 +181,7 @@ data GeneralFlag
| Opt_LlvmFillUndefWithGarbage -- Testing for undef bugs (hidden flag)
| Opt_IrrefutableTuples
| Opt_CmmSink
+ | Opt_CmmStaticPred
| Opt_CmmElimCommonBlocks
| Opt_AsmShortcutting
| Opt_OmitYields
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index ac099a61dd..87b13a8e99 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -3514,6 +3514,7 @@ fFlagsDeps = [
flagSpec "case-folding" Opt_CaseFolding,
flagSpec "cmm-elim-common-blocks" Opt_CmmElimCommonBlocks,
flagSpec "cmm-sink" Opt_CmmSink,
+ flagSpec "cmm-static-pred" Opt_CmmStaticPred,
flagSpec "cse" Opt_CSE,
flagSpec "stg-cse" Opt_StgCSE,
flagSpec "stg-lift-lams" Opt_StgLiftLams,
@@ -4065,6 +4066,7 @@ optLevelFlags -- see Note [Documenting optimisation flags]
, ([1,2], Opt_CmmElimCommonBlocks)
, ([2], Opt_AsmShortcutting)
, ([1,2], Opt_CmmSink)
+ , ([1,2], Opt_CmmStaticPred)
, ([1,2], Opt_CSE)
, ([1,2], Opt_StgCSE)
, ([2], Opt_StgLiftLams)
diff --git a/compiler/GHC/Utils/Outputable.hs b/compiler/GHC/Utils/Outputable.hs
index ba843cef30..149713773d 100644
--- a/compiler/GHC/Utils/Outputable.hs
+++ b/compiler/GHC/Utils/Outputable.hs
@@ -837,6 +837,9 @@ instance Outputable Word16 where
instance Outputable Word32 where
ppr n = integer $ fromIntegral n
+instance Outputable Word64 where
+ ppr n = integer $ fromIntegral n
+
instance Outputable Word where
ppr n = integer $ fromIntegral n
diff --git a/docs/users_guide/8.12.1-notes.rst b/docs/users_guide/8.12.1-notes.rst
index 5c478b8fa4..c15869df75 100644
--- a/docs/users_guide/8.12.1-notes.rst
+++ b/docs/users_guide/8.12.1-notes.rst
@@ -10,7 +10,14 @@ following sections.
Highlights
----------
-- TODO
+* NCG
+
+ - The linear register allocator saw improvements reducing the number
+ of redundant move instructions. Rare edge cases can see double
+ digit improvements in runtime for inner loops.
+
+ In the mean this improved runtime by about 0.8%. For details
+ see ticket #17823.
Full details
------------
@@ -148,11 +155,11 @@ Arrow notation
``hsGroupTopLevelFixitySigs`` function, which collects all top-level fixity
signatures, including those for class methods defined inside classes.
-- The ``Exception`` module was boiled down acknowledging the existence of
+- The ``Exception`` module was boiled down acknowledging the existence of
the ``exceptions`` dependency. In particular, the ``ExceptionMonad``
class is not a proper class anymore, but a mere synonym for ``MonadThrow``,
- ``MonadCatch``, ``MonadMask`` (all from ``exceptions``) and ``MonadIO``.
- All of ``g*``-functions from the module (``gtry``, ``gcatch``, etc.) are
+ ``MonadCatch``, ``MonadMask`` (all from ``exceptions``) and ``MonadIO``.
+ All of ``g*``-functions from the module (``gtry``, ``gcatch``, etc.) are
erased, and their ``exceptions``-alternatives are meant to be used in the
GHC code instead.
diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst
index 4ca47524a7..8ec19cb147 100644
--- a/docs/users_guide/using-optimisation.rst
+++ b/docs/users_guide/using-optimisation.rst
@@ -214,6 +214,19 @@ by saying ``-fno-wombat``.
to their usage sites. It also inlines simple expressions like
literals or registers.
+.. ghc-flag:: -fcmm-static-pred
+ :shortdesc: Enable static control flow prediction. Implied by :ghc-flag:`-O`.
+ :type: dynamic
+ :reverse: -fno-cmm-static-pred
+ :category:
+
+ :default: off but enabled with :ghc-flag:`-O`.
+
+ This enables static control flow prediction on the final Cmm
+ code. If enabled GHC will apply certain heuristics to identify
+ loops and hot code paths. This information is then used by the
+ register allocation and code layout passes.
+
.. ghc-flag:: -fasm-shortcutting
:shortdesc: Enable shortcutting on assembly. Implied by :ghc-flag:`-O2`.
:type: dynamic