summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2021-01-07 23:39:47 -0500
committerAndreas Klebinger <klebinger.andreas@gmx.at>2021-03-30 12:22:28 +0200
commit9201bf638fe4d88460c0ac683a22412d2cdc73db (patch)
tree27deeb3d1507735cd3fbdfdd3bc871713a2302f6
parent6604409594f71d2ed5963bb3897bc4ee772cc5c0 (diff)
downloadhaskell-wip/ncg-perf-2.tar.gz
Specialize the linear register allocatorwip/ncg-perf-2
We also combine the two FR instances for X86 into one parametrised one. ------------------------- Metric Decrease: T12707 T13379 T3294 T4801 T783 T18478 T17096 T11374 ------------------------- Co-Author: Andreas Klebinger
-rw-r--r--compiler/GHC/CmmToAsm.hs7
-rw-r--r--compiler/GHC/CmmToAsm/Monad.hs12
-rw-r--r--compiler/GHC/CmmToAsm/PPC.hs6
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear.hs95
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs26
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs12
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/State.hs2
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/X86Base.hs (renamed from compiler/GHC/CmmToAsm/Reg/Linear/X86.hs)28
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs55
-rw-r--r--compiler/GHC/CmmToAsm/Reg/Liveness.hs7
-rw-r--r--compiler/GHC/CmmToAsm/SPARC.hs6
-rw-r--r--compiler/GHC/CmmToAsm/X86.hs52
-rw-r--r--compiler/GHC/CmmToAsm/X86/Ppr.hs1
-rw-r--r--compiler/ghc.cabal.in3
14 files changed, 194 insertions, 118 deletions
diff --git a/compiler/GHC/CmmToAsm.hs b/compiler/GHC/CmmToAsm.hs
index 6712db8d05..19309c7ec1 100644
--- a/compiler/GHC/CmmToAsm.hs
+++ b/compiler/GHC/CmmToAsm.hs
@@ -582,9 +582,10 @@ cmmNativeGen logger dflags modLoc ncgImpl us fileIds dbgMap cmm count
else do
-- do linear register allocation
let reg_alloc proc = do
- (alloced, maybe_more_stack, ra_stats) <-
- Linear.regAlloc config proc
- case maybe_more_stack of
+ let regAlloc = (linearRegAlloc ncgImpl)
+ (alloced, maybe_more_stack, ra_stats) <-
+ regAlloc config proc
+ case maybe_more_stack of
Nothing -> return ( alloced, ra_stats, [] )
Just amount -> do
(alloced',stack_updt_blks) <-
diff --git a/compiler/GHC/CmmToAsm/Monad.hs b/compiler/GHC/CmmToAsm/Monad.hs
index 8fb834ec7a..c5b2358e6f 100644
--- a/compiler/GHC/CmmToAsm/Monad.hs
+++ b/compiler/GHC/CmmToAsm/Monad.hs
@@ -71,6 +71,8 @@ import GHC.Utils.Outputable (SDoc, ppr)
import GHC.Utils.Panic (pprPanic)
import GHC.CmmToAsm.CFG
import GHC.CmmToAsm.CFG.Weight
+import GHC.CmmToAsm.Reg.Linear.Base
+import GHC.CmmToAsm.Reg.Liveness
data NcgImpl statics instr jumpDest = NcgImpl {
ncgConfig :: !NCGConfig,
@@ -97,9 +99,17 @@ data NcgImpl statics instr jumpDest = NcgImpl {
-- See Note [What is this unwinding business?] in "GHC.Cmm.DebugBlock"
-- and Note [Unwinding information in the NCG] in this module.
invertCondBranches :: Maybe CFG -> LabelMap RawCmmStatics -> [NatBasicBlock instr]
- -> [NatBasicBlock instr]
+ -> [NatBasicBlock instr],
-- ^ Turn the sequence of @jcc l1; jmp l2@ into @jncc l2; \<block_l1>@
-- when possible.
+ linearRegAlloc :: NCGConfig
+ -> LiveCmmDecl statics instr
+ -> UniqSM ( NatCmmDecl statics instr
+ , Maybe Int -- number of extra stack slots required,
+ -- beyond maxSpillSlots
+ , Maybe RegAllocStats
+ )
+ -- ^ Specialized version of the linear register allocator.
}
data NatM_State
diff --git a/compiler/GHC/CmmToAsm/PPC.hs b/compiler/GHC/CmmToAsm/PPC.hs
index 148fd1b4b2..6b0e250ddf 100644
--- a/compiler/GHC/CmmToAsm/PPC.hs
+++ b/compiler/GHC/CmmToAsm/PPC.hs
@@ -12,13 +12,17 @@ import GHC.CmmToAsm.Instr
import GHC.CmmToAsm.Monad
import GHC.CmmToAsm.Config
import GHC.CmmToAsm.Types
+import qualified GHC.CmmToAsm.Reg.Linear as Linear
+import qualified GHC.CmmToAsm.Reg.Linear.FreeRegs as FR
+import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC
import qualified GHC.CmmToAsm.PPC.Instr as PPC
import qualified GHC.CmmToAsm.PPC.Ppr as PPC
import qualified GHC.CmmToAsm.PPC.CodeGen as PPC
import qualified GHC.CmmToAsm.PPC.Regs as PPC
import qualified GHC.CmmToAsm.PPC.RegInfo as PPC
+
ncgPPC :: NCGConfig -> NcgImpl RawCmmStatics PPC.Instr PPC.JumpDest
ncgPPC config = NcgImpl
{ ncgConfig = config
@@ -36,6 +40,8 @@ ncgPPC config = NcgImpl
, ncgMakeFarBranches = PPC.makeFarBranches
, extractUnwindPoints = const []
, invertCondBranches = \_ _ -> id
+ , linearRegAlloc = Linear.regAlloc (FR.frInitFreeRegs platform :: PPC.FreeRegs)
+
}
where
platform = ncgPlatform config
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear.hs b/compiler/GHC/CmmToAsm/Reg/Linear.hs
index 31bcbd1d68..e13f0f4845 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-}
{-# LANGUAGE ConstraintKinds #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
@@ -113,10 +114,6 @@ import GHC.CmmToAsm.Reg.Linear.StackMap
import GHC.CmmToAsm.Reg.Linear.FreeRegs
import GHC.CmmToAsm.Reg.Linear.Stats
import GHC.CmmToAsm.Reg.Linear.JoinToTargets
-import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC
-import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC
-import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86
-import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64
import GHC.CmmToAsm.Reg.Target
import GHC.CmmToAsm.Reg.Liveness
import GHC.CmmToAsm.Reg.Utils
@@ -146,10 +143,42 @@ import Control.Applicative
-- -----------------------------------------------------------------------------
-- Top level of the register allocator
+{- Note [Specializing the register allocator]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The register allocator is quite overloaded. Most importantly over the instruction
+set (instr) and the bitmap to keep track of free registers (freeRegs).
+
+I (AndreasK) added a fair few number of INLINEABLE pragmas which now allows us to
+specialize the allocator in the modules defining the NcgImpl for the varius backends.
+
+Each NcgImpl will then carry around a function that is the (usually specialized)
+allocator for that particular backend.
+
+When compiling with -Wall-missed-specializations sadly it becomes obvious that quite
+often there are spurious errors. For the details see #19592.
+The way to check that we really fully specialize the allocator is then:
+* Compile with -Wall-missed-specialisations.
+* Look at the list of "failed" specialisations.
+* Ignore constraint tuple selectors "$p2(%,,%)" and the like.
+* For all other reported functions make sure they either don't appear in the
+ final core (because of inlining) or are replaced by their monomorphic variant.
+
+I did this when making the simplifier specializable in early 2021. If you perform
+major refactorings on it it would be wise to do so again.
+
+NB. Another reasonable design would be the have a register allocator class, with instances
+for the various backends. Maybe worth changing over if we apply this to the graph
+allocator as well.
+
+-}
+
-- Allocate registers
+{-# INLINEABLE regAlloc #-}
regAlloc
- :: Instruction instr
- => NCGConfig
+ :: (Outputable freeRegs, FR freeRegs, Instruction instr)
+ => freeRegs
+ -> NCGConfig
-> LiveCmmDecl statics instr
-> UniqSM ( NatCmmDecl statics instr
, Maybe Int -- number of extra stack slots required,
@@ -157,23 +186,23 @@ regAlloc
, Maybe RegAllocStats
)
-regAlloc _ (CmmData sec d)
+regAlloc _fr _ (CmmData sec d)
= return
( CmmData sec d
, Nothing
, Nothing )
-regAlloc _ (CmmProc (LiveInfo info _ _ _) lbl live [])
+regAlloc _fr _ (CmmProc (LiveInfo info _ _ _) lbl live [])
= return ( CmmProc info lbl live (ListGraph [])
, Nothing
, Nothing )
-regAlloc config (CmmProc static lbl live sccs)
+regAlloc fr config (CmmProc static lbl live sccs)
| LiveInfo info entry_ids@(first_id:_) block_live _ <- static
= do
-- do register allocation on each component.
!(!final_blocks, !stats, !stack_use)
- <- linearRegAlloc config entry_ids block_live sccs
+ <- linearRegAlloc fr config entry_ids block_live sccs
-- make sure the block that was first in the input list
-- stays at the front of the output
@@ -192,21 +221,22 @@ regAlloc config (CmmProc static lbl live sccs)
, Just stats)
-- bogus. to make non-exhaustive match warning go away.
-regAlloc _ (CmmProc _ _ _ _)
+regAlloc _fr _ (CmmProc _ _ _ _)
= panic "RegAllocLinear.regAlloc: no match"
-- -----------------------------------------------------------------------------
-- Linear sweep to allocate registers
-
-- | Do register allocation on some basic blocks.
-- But be careful to allocate a block in an SCC only if it has
-- an entry in the block map or it is the first block.
--
+{-# INLINEABLE linearRegAlloc #-}
linearRegAlloc
- :: forall instr. Instruction instr
- => NCGConfig
+ :: forall instr freeRegs. (Outputable freeRegs, FR freeRegs, Instruction instr)
+ => freeRegs
+ -> NCGConfig
-> [BlockId] -- ^ entry points
-> BlockMap RegSet
-- ^ live regs on entry to each basic block
@@ -214,17 +244,17 @@ linearRegAlloc
-- ^ instructions annotated with "deaths"
-> UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
-linearRegAlloc config entry_ids block_live sccs
+linearRegAlloc initFreeRegs config entry_ids block_live sccs
= case platformArch platform of
- ArchX86 -> go $ (frInitFreeRegs platform :: X86.FreeRegs)
- ArchX86_64 -> go $ (frInitFreeRegs platform :: X86_64.FreeRegs)
+ ArchX86 -> go
+ ArchX86_64 -> go
+ ArchSPARC -> go --go $ (frInitFreeRegs platform :: SPARC.FreeRegs)
+ ArchPPC -> go --go $ (frInitFreeRegs platform :: PPC.FreeRegs)
+ ArchPPC_64 _ -> go --go $ (frInitFreeRegs platform :: PPC.FreeRegs)
ArchS390X -> panic "linearRegAlloc ArchS390X"
- ArchSPARC -> go $ (frInitFreeRegs platform :: SPARC.FreeRegs)
ArchSPARC64 -> panic "linearRegAlloc ArchSPARC64"
- ArchPPC -> go $ (frInitFreeRegs platform :: PPC.FreeRegs)
ArchARM _ _ _ -> panic "linearRegAlloc ArchARM"
ArchAArch64 -> panic "linearRegAlloc ArchAArch64"
- ArchPPC_64 _ -> go $ (frInitFreeRegs platform :: PPC.FreeRegs)
ArchAlpha -> panic "linearRegAlloc ArchAlpha"
ArchMipseb -> panic "linearRegAlloc ArchMipseb"
ArchMipsel -> panic "linearRegAlloc ArchMipsel"
@@ -232,9 +262,8 @@ linearRegAlloc config entry_ids block_live sccs
ArchJavaScript -> panic "linearRegAlloc ArchJavaScript"
ArchUnknown -> panic "linearRegAlloc ArchUnknown"
where
- go :: (FR regs, Outputable regs)
- => regs -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
- go f = linearRegAlloc' config f entry_ids block_live sccs
+ go :: UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
+ go = linearRegAlloc' config initFreeRegs entry_ids block_live sccs
platform = ncgPlatform config
-- | Constraints on the instruction instances used by the
@@ -242,6 +271,7 @@ linearRegAlloc config entry_ids block_live sccs
type OutputableRegConstraint freeRegs instr =
(FR freeRegs, Outputable freeRegs, Instruction instr)
+{-# INLINEABLE linearRegAlloc' #-}
linearRegAlloc'
:: OutputableRegConstraint freeRegs instr
=> NCGConfig
@@ -259,6 +289,7 @@ linearRegAlloc' config initFreeRegs entry_ids block_live sccs
return (blocks, stats, getStackUse stack)
+{-# INLINEABLE linearRA_SCCs #-}
linearRA_SCCs :: OutputableRegConstraint freeRegs instr
=> [BlockId]
-> BlockMap RegSet
@@ -293,7 +324,7 @@ linearRA_SCCs entry_ids block_live blocksAcc (CyclicSCC blocks : sccs)
some reason then this function will loop. We should probably do some
more sanity checking to guard against this eventuality.
-}
-
+{-# INLINEABLE process #-}
process :: forall freeRegs instr. (OutputableRegConstraint freeRegs instr)
=> [BlockId]
-> BlockMap RegSet
@@ -330,7 +361,7 @@ process entry_ids block_live =
else do go blocks (b : next_round) accum madeProgress
-
+{-# INLINEABLE processBlock #-}
-- | Do register allocation on this basic block
--
processBlock
@@ -348,7 +379,7 @@ processBlock block_live (BasicBlock id instrs)
-- pprTraceM "blockResult" $ ppr (instrs', fixups)
return $ BasicBlock id instrs' : fixups
-
+{-# INLINEABLE initBlock #-}
-- | Load the freeregs and current reg assignment into the RegM state
-- for the basic block with this BlockId.
initBlock :: FR freeRegs
@@ -378,7 +409,7 @@ initBlock id block_live
-> do setFreeRegsR freeregs
setAssigR assig
-
+{-# INLINEABLE linearRA #-}
-- | Do allocation for a sequence of instructions.
linearRA
:: forall freeRegs instr. (OutputableRegConstraint freeRegs instr)
@@ -468,7 +499,7 @@ raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live))
raInsn _ _ _ instr
= do
platform <- getPlatform
- let instr' = fmap (pprInstr platform) instr
+ let instr' = fmap (pprInstr platform) instr :: LiveInstr SDoc
pprPanic "raInsn" (text "no match for:" <> ppr instr')
-- ToDo: what can we do about
@@ -488,7 +519,7 @@ isInReg :: Reg -> RegMap Loc -> Bool
isInReg src assig | Just (InReg _) <- lookupUFM assig src = True
| otherwise = False
-
+{-# INLINEABLE genRaInsn #-}
genRaInsn :: forall freeRegs instr.
OutputableRegConstraint freeRegs instr
=> BlockMap RegSet
@@ -607,6 +638,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
-- -----------------------------------------------------------------------------
-- releaseRegs
+{-# INLINEABLE releaseRegs #-}
releaseRegs :: FR freeRegs => [Reg] -> RegM freeRegs ()
releaseRegs regs = do
platform <- getPlatform
@@ -638,6 +670,7 @@ releaseRegs regs = do
-- - clobbered regs are not allocatable.
--
+{-# INLINEABLE saveClobberedTemps #-}
saveClobberedTemps
:: forall instr freeRegs.
(Instruction instr, FR freeRegs)
@@ -706,7 +739,7 @@ saveClobberedTemps clobbered dying
clobber new_assign (spill : instrs) rest
-
+{-# INLINEABLE clobberRegs #-}
-- | Mark all these real regs as allocated,
-- and kick out their vreg assignments.
--
@@ -763,6 +796,7 @@ data SpillLoc = ReadMem StackSlot -- reading from register only in memory
-- We also update the register assignment in the process, and
-- the list of free registers and free stack slots.
+{-# INLINEABLE allocateRegsAndSpill #-}
allocateRegsAndSpill
:: forall freeRegs instr. (FR freeRegs, Instruction instr)
=> Bool -- True <=> reading (load up spilled regs)
@@ -828,6 +862,7 @@ findPrefRealReg vreg = do
Just (InBoth real_reg _) -> Just real_reg
_ -> z
+{-# INLINEABLE allocRegsAndSpill_spill #-}
-- reading is redundant with reason, but we keep it around because it's
-- convenient and it maintains the recursive structure of the allocator. -- EZY
allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr)
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs b/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
index 1768422f5c..c0442a8282 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE CPP #-}
+{-# LANGUAGE FlexibleInstances #-}
module GHC.CmmToAsm.Reg.Linear.FreeRegs (
FR(..),
@@ -32,8 +33,7 @@ import GHC.Platform
import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC
import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC
-import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86
-import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64
+import qualified GHC.CmmToAsm.Reg.Linear.X86Base as X86Base
import qualified GHC.CmmToAsm.PPC.Instr as PPC.Instr
import qualified GHC.CmmToAsm.SPARC.Instr as SPARC.Instr
@@ -45,17 +45,19 @@ class Show freeRegs => FR freeRegs where
frInitFreeRegs :: Platform -> freeRegs
frReleaseReg :: Platform -> RealReg -> freeRegs -> freeRegs
-instance FR X86.FreeRegs where
- frAllocateReg = \_ -> X86.allocateReg
- frGetFreeRegs = X86.getFreeRegs
- frInitFreeRegs = X86.initFreeRegs
- frReleaseReg = \_ -> X86.releaseReg
+instance FR (X86Base.FreeRegsX86) where
+ {-# INLINEABLE frAllocateReg #-}
+ frAllocateReg = \_ -> X86Base.allocateReg
+ frGetFreeRegs = X86Base.getFreeRegs
+ frInitFreeRegs = X86Base.initFreeRegs
+ frReleaseReg = \_ -> X86Base.releaseReg
-instance FR X86_64.FreeRegs where
- frAllocateReg = \_ -> X86_64.allocateReg
- frGetFreeRegs = X86_64.getFreeRegs
- frInitFreeRegs = X86_64.initFreeRegs
- frReleaseReg = \_ -> X86_64.releaseReg
+instance FR (X86Base.FreeRegsX86_64) where
+ {-# INLINEABLE frAllocateReg #-}
+ frAllocateReg = \_ -> X86Base.allocateReg
+ frGetFreeRegs = X86Base.getFreeRegs
+ frInitFreeRegs = X86Base.initFreeRegs
+ frReleaseReg = \_ -> X86Base.releaseReg
instance FR PPC.FreeRegs where
frAllocateReg = \_ -> PPC.allocateReg
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs b/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
index d0330a4f6a..bef8ef6306 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
@@ -33,6 +33,7 @@ import GHC.Types.Unique.Set
-- | For a jump instruction at the end of a block, generate fixup code so its
-- vregs are in the correct regs for its destination.
--
+{-# INLINEABLE joinToTargets #-}
joinToTargets
:: (FR freeRegs, Instruction instr)
=> BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs
@@ -57,6 +58,7 @@ joinToTargets block_live id instr
= joinToTargets' block_live [] id instr (jumpDestsOfInstr instr)
-----
+{-# INLINEABLE joinToTargets' #-}
joinToTargets'
:: (FR freeRegs, Instruction instr)
=> BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs
@@ -112,6 +114,7 @@ joinToTargets' block_live new_blocks block_id instr (dest:dests)
-- this is the first time we jumped to this block.
+{-# INLINEABLE joinToTargets_first #-}
joinToTargets_first :: (FR freeRegs, Instruction instr)
=> BlockMap RegSet
-> [NatBasicBlock instr]
@@ -141,6 +144,7 @@ joinToTargets_first block_live new_blocks block_id instr dest dests
-- we've jumped to this block before
+{-# INLINEABLE joinToTargets_again #-}
joinToTargets_again :: (Instruction instr, FR freeRegs)
=> BlockMap RegSet
-> [NatBasicBlock instr]
@@ -296,6 +300,7 @@ expandNode vreg src dst
-- destinations. We have eliminated any possibility of single-node
-- cycles in expandNode above.
--
+{-# INLINEABLE handleComponent #-}
handleComponent
:: Instruction instr
=> Int -> instr -> SCC (Node Loc Unique)
@@ -348,6 +353,7 @@ handleComponent _ _ (CyclicSCC _)
-- | Move a vreg between these two locations.
--
+{-# INLINEABLE makeMove #-}
makeMove
:: Instruction instr
=> Int -- ^ current C stack delta.
@@ -363,13 +369,13 @@ makeMove delta vreg src dst
case (src, dst) of
(InReg s, InReg d) ->
do recordSpill (SpillJoinRR vreg)
- return $ mkRegRegMoveInstr platform (RegReal s) (RegReal d)
+ return $! mkRegRegMoveInstr platform (RegReal s) (RegReal d)
(InMem s, InReg d) ->
do recordSpill (SpillJoinRM vreg)
- return $ mkLoadInstr config (RegReal d) delta s
+ return $! mkLoadInstr config (RegReal d) delta s
(InReg s, InMem d) ->
do recordSpill (SpillJoinRM vreg)
- return $ mkSpillInstr config (RegReal s) delta d
+ return $! mkSpillInstr config (RegReal s) delta d
_ ->
-- we don't handle memory to memory moves.
-- they shouldn't happen because we don't share
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs
index 4fdc5c96cf..8e53c5799b 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs
@@ -119,7 +119,7 @@ makeRAStats state
{ ra_spillInstrs = binSpillReasons (ra_spills state)
, ra_fixupList = ra_fixups state }
-
+{-# INLINEABLE spillR #-}
spillR :: Instruction instr
=> Reg -> Unique -> RegM freeRegs (instr, Int)
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs b/compiler/GHC/CmmToAsm/Reg/Linear/X86Base.hs
index 42f63b5752..fbc592369e 100644
--- a/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Linear/X86Base.hs
@@ -1,7 +1,8 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE ConstraintKinds #-}
-- | Free regs map for i386
-module GHC.CmmToAsm.Reg.Linear.X86 where
+module GHC.CmmToAsm.Reg.Linear.X86Base where
import GHC.Prelude
@@ -15,28 +16,40 @@ import GHC.Utils.Outputable
import Data.Word
import Data.Bits
-newtype FreeRegs = FreeRegs Word32
+-- Instead of being parametric we could also just use Word64
+-- at all times. Obviously bad for 32bit ghc performance. But
+-- not sure if anyone still cares about that.
+type FreeRegsX86 = FreeRegs Word32
+type FreeRegsX86_64 = FreeRegs Word64
+
+newtype FreeRegs w = FreeRegs w
deriving (Show,Outputable)
-noFreeRegs :: FreeRegs
+type FreeRegConstraints w = (Bits w, Num w)
+
+noFreeRegs :: FreeRegConstraints w => FreeRegs w
noFreeRegs = FreeRegs 0
-releaseReg :: RealReg -> FreeRegs -> FreeRegs
+{-# INLINEABLE releaseReg #-}
+releaseReg :: FreeRegConstraints w => RealReg -> FreeRegs w -> FreeRegs w
releaseReg (RealRegSingle n) (FreeRegs f)
= FreeRegs (f .|. (1 `shiftL` n))
releaseReg _ _
= panic "RegAlloc.Linear.X86.FreeRegs.releaseReg: no reg"
-initFreeRegs :: Platform -> FreeRegs
+{-# INLINEABLE initFreeRegs #-}
+initFreeRegs :: FreeRegConstraints w => Platform -> FreeRegs w
initFreeRegs platform
= foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
-getFreeRegs :: Platform -> RegClass -> FreeRegs -> [RealReg] -- lazily
+{-# INLINEABLE getFreeRegs #-}
+getFreeRegs :: FreeRegConstraints w => Platform -> RegClass -> FreeRegs w -> [RealReg] -- lazily
getFreeRegs platform cls (FreeRegs f) = go f 0
where go 0 _ = []
go n m
+ -- Register is free && has the right class
| n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls
= RealRegSingle m : (go (n `shiftR` 1) $! (m+1))
@@ -45,7 +58,8 @@ getFreeRegs platform cls (FreeRegs f) = go f 0
-- ToDo: there's no point looking through all the integer registers
-- in order to find a floating-point one.
-allocateReg :: RealReg -> FreeRegs -> FreeRegs
+{-# INLINEABLE allocateReg #-}
+allocateReg :: FreeRegConstraints w => RealReg -> FreeRegs w -> FreeRegs w
allocateReg (RealRegSingle r) (FreeRegs f)
= FreeRegs (f .&. complement (1 `shiftL` r))
diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs b/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
deleted file mode 100644
index 44eea342a4..0000000000
--- a/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs
+++ /dev/null
@@ -1,55 +0,0 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-
--- | Free regs map for x86_64
-module GHC.CmmToAsm.Reg.Linear.X86_64 where
-
-import GHC.Prelude
-
-import GHC.CmmToAsm.X86.Regs
-import GHC.Platform.Reg.Class
-import GHC.Platform.Reg
-import GHC.Utils.Panic
-import GHC.Platform
-import GHC.Utils.Outputable
-
-import Data.Word
-import Data.Bits
-
-newtype FreeRegs = FreeRegs Word64
- deriving (Show,Outputable)
-
-noFreeRegs :: FreeRegs
-noFreeRegs = FreeRegs 0
-
-releaseReg :: RealReg -> FreeRegs -> FreeRegs
-releaseReg (RealRegSingle n) (FreeRegs f)
- = FreeRegs (f .|. (1 `shiftL` n))
-
-releaseReg _ _
- = panic "RegAlloc.Linear.X86_64.FreeRegs.releaseReg: no reg"
-
-initFreeRegs :: Platform -> FreeRegs
-initFreeRegs platform
- = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
-
-getFreeRegs :: Platform -> RegClass -> FreeRegs -> [RealReg] -- lazily
-getFreeRegs platform cls (FreeRegs f) = go f 0
-
- where go 0 _ = []
- go n m
- | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls
- = RealRegSingle m : (go (n `shiftR` 1) $! (m+1))
-
- | otherwise
- = go (n `shiftR` 1) $! (m+1)
- -- ToDo: there's no point looking through all the integer registers
- -- in order to find a floating-point one.
-
-allocateReg :: RealReg -> FreeRegs -> FreeRegs
-allocateReg (RealRegSingle r) (FreeRegs f)
- = FreeRegs (f .&. complement (1 `shiftL` r))
-
-allocateReg _ _
- = panic "RegAlloc.Linear.X86_64.FreeRegs.allocateReg: no reg"
-
-
diff --git a/compiler/GHC/CmmToAsm/Reg/Liveness.hs b/compiler/GHC/CmmToAsm/Reg/Liveness.hs
index bf53ecf421..1e69c50bd6 100644
--- a/compiler/GHC/CmmToAsm/Reg/Liveness.hs
+++ b/compiler/GHC/CmmToAsm/Reg/Liveness.hs
@@ -172,6 +172,7 @@ instance Instruction instr => Instruction (InstrSR instr) where
mkStackDeallocInstr platform amount =
Instr <$> mkStackDeallocInstr platform amount
+ {-# INLINEABLE pprInstr #-}
pprInstr platform i = ppr (fmap (pprInstr platform) i)
@@ -570,11 +571,13 @@ stripLiveBlock config (BasicBlock i lis)
spillNat acc (LiveInstr (SPILL reg slot) _ : instrs)
= do delta <- get
- spillNat (mkSpillInstr config reg delta slot : acc) instrs
+ let !ins = mkSpillInstr config reg delta slot
+ spillNat (ins : acc) instrs
spillNat acc (LiveInstr (RELOAD slot reg) _ : instrs)
= do delta <- get
- spillNat (mkLoadInstr config reg delta slot : acc) instrs
+ let !ins = mkLoadInstr config reg delta slot
+ spillNat (ins : acc) instrs
spillNat acc (LiveInstr (Instr instr) _ : instrs)
| Just i <- takeDeltaInstr instr
diff --git a/compiler/GHC/CmmToAsm/SPARC.hs b/compiler/GHC/CmmToAsm/SPARC.hs
index 7d9a671932..0272ec9493 100644
--- a/compiler/GHC/CmmToAsm/SPARC.hs
+++ b/compiler/GHC/CmmToAsm/SPARC.hs
@@ -14,6 +14,11 @@ import GHC.CmmToAsm.Config
import GHC.CmmToAsm.Types
import GHC.CmmToAsm.Instr
+import qualified GHC.CmmToAsm.Reg.Linear as Linear
+import qualified GHC.CmmToAsm.Reg.Linear.FreeRegs as FR
+
+import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC
+
import qualified GHC.CmmToAsm.SPARC.Instr as SPARC
import qualified GHC.CmmToAsm.SPARC.Ppr as SPARC
import qualified GHC.CmmToAsm.SPARC.CodeGen as SPARC
@@ -41,6 +46,7 @@ ncgSPARC config = NcgImpl
-- Allocating more stack space for spilling isn't currently supported for the
-- linear register allocator on SPARC, hence the panic below.
, ncgAllocMoreStack = noAllocMoreStack
+ , linearRegAlloc = Linear.regAlloc (FR.frInitFreeRegs platform :: SPARC.FreeRegs)
}
where
platform = ncgPlatform config
diff --git a/compiler/GHC/CmmToAsm/X86.hs b/compiler/GHC/CmmToAsm/X86.hs
index dbeeddc184..863cb044a4 100644
--- a/compiler/GHC/CmmToAsm/X86.hs
+++ b/compiler/GHC/CmmToAsm/X86.hs
@@ -9,6 +9,8 @@ where
import GHC.Prelude
+import GHC.Types.Unique.Supply
+
import GHC.CmmToAsm.Instr
import GHC.CmmToAsm.Monad
import GHC.CmmToAsm.Config
@@ -19,10 +21,49 @@ import qualified GHC.CmmToAsm.X86.Instr as X86
import qualified GHC.CmmToAsm.X86.Ppr as X86
import qualified GHC.CmmToAsm.X86.CodeGen as X86
import qualified GHC.CmmToAsm.X86.Regs as X86
+import qualified GHC.CmmToAsm.Reg.Linear.X86Base as X86Base
+-- import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64
-ncgX86 :: NCGConfig -> NcgImpl (Alignment, RawCmmStatics) X86.Instr X86.JumpDest
-ncgX86 = ncgX86_64
+import qualified GHC.CmmToAsm.Reg.Linear as Linear
+import qualified GHC.CmmToAsm.Reg.Linear.FreeRegs as Linear
+import GHC.CmmToAsm.Reg.Liveness (LiveCmmDecl)
+
+{-# SPECIALIZE Linear.regAlloc :: X86Base.FreeRegsX86 -> NCGConfig
+ -> LiveCmmDecl statics X86.Instr
+ -> UniqSM
+ (NatCmmDecl statics X86.Instr,
+ Maybe Int,
+ Maybe Linear.RegAllocStats) #-}
+
+{-# SPECIALIZE Linear.regAlloc :: X86Base.FreeRegsX86_64 -> NCGConfig
+ -> LiveCmmDecl statics X86.Instr
+ -> UniqSM
+ (NatCmmDecl statics X86.Instr,
+ Maybe Int,
+ Maybe Linear.RegAllocStats) #-}
+linearRegAllocX86 :: NCGConfig
+ -> LiveCmmDecl statics X86.Instr
+ -> UniqSM
+ (NatCmmDecl statics X86.Instr,
+ Maybe Int,
+ Maybe Linear.RegAllocStats)
+linearRegAllocX86 config = Linear.regAlloc
+ (Linear.frInitFreeRegs (ncgPlatform config) :: X86Base.FreeRegsX86)
+ config
+
+linearRegAllocX86_64 :: NCGConfig
+ -> LiveCmmDecl statics X86.Instr
+ -> UniqSM
+ (NatCmmDecl statics X86.Instr,
+ Maybe Int,
+ Maybe Linear.RegAllocStats)
+linearRegAllocX86_64 config = Linear.regAlloc
+ (Linear.frInitFreeRegs (ncgPlatform config) :: X86Base.FreeRegsX86_64)
+ config
+
+ncgX86 :: NCGConfig -> NcgImpl (Alignment, RawCmmStatics) X86.Instr X86.JumpDest
+ncgX86 config = (ncgX86_64 config) { linearRegAlloc = linearRegAllocX86 }
ncgX86_64 :: NCGConfig -> NcgImpl (Alignment, RawCmmStatics) X86.Instr X86.JumpDest
ncgX86_64 config = NcgImpl
@@ -41,6 +82,7 @@ ncgX86_64 config = NcgImpl
, ncgMakeFarBranches = const id
, extractUnwindPoints = X86.extractUnwindPoints
, invertCondBranches = X86.invertCondBranches
+ , linearRegAlloc = linearRegAllocX86_64
}
where
platform = ncgPlatform config
@@ -54,12 +96,18 @@ instance Instruction X86.Instr where
patchJumpInstr = X86.patchJumpInstr
mkSpillInstr = X86.mkSpillInstr
mkLoadInstr = X86.mkLoadInstr
+ {-# INLINEABLE takeDeltaInstr #-}
takeDeltaInstr = X86.takeDeltaInstr
isMetaInstr = X86.isMetaInstr
mkRegRegMoveInstr = X86.mkRegRegMoveInstr
+ {-# INLINEABLE takeRegRegMoveInstr #-}
takeRegRegMoveInstr = X86.takeRegRegMoveInstr
+ {-# INLINEABLE mkJumpInstr #-}
mkJumpInstr = X86.mkJumpInstr
+ {-# INLINEABLE mkStackAllocInstr #-}
mkStackAllocInstr = X86.mkStackAllocInstr
+ {-# INLINEABLE mkStackDeallocInstr #-}
mkStackDeallocInstr = X86.mkStackDeallocInstr
+ {-# INLINEABLE pprInstr #-}
pprInstr = X86.pprInstr
diff --git a/compiler/GHC/CmmToAsm/X86/Ppr.hs b/compiler/GHC/CmmToAsm/X86/Ppr.hs
index 97abf78006..478c61aa49 100644
--- a/compiler/GHC/CmmToAsm/X86/Ppr.hs
+++ b/compiler/GHC/CmmToAsm/X86/Ppr.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
-----------------------------------------------------------------------------
--
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 29137a146f..97ab9621c3 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -244,8 +244,7 @@ Library
GHC.CmmToAsm.Reg.Linear.StackMap
GHC.CmmToAsm.Reg.Linear.State
GHC.CmmToAsm.Reg.Linear.Stats
- GHC.CmmToAsm.Reg.Linear.X86
- GHC.CmmToAsm.Reg.Linear.X86_64
+ GHC.CmmToAsm.Reg.Linear.X86Base
GHC.CmmToAsm.Reg.Liveness
GHC.CmmToAsm.Reg.Target
GHC.CmmToAsm.Reg.Utils