diff options
author | Ben Gamari <ben@smart-cactus.org> | 2021-01-07 23:39:47 -0500 |
---|---|---|
committer | Andreas Klebinger <klebinger.andreas@gmx.at> | 2021-03-30 12:22:28 +0200 |
commit | 9201bf638fe4d88460c0ac683a22412d2cdc73db (patch) | |
tree | 27deeb3d1507735cd3fbdfdd3bc871713a2302f6 | |
parent | 6604409594f71d2ed5963bb3897bc4ee772cc5c0 (diff) | |
download | haskell-wip/ncg-perf-2.tar.gz |
Specialize the linear register allocatorwip/ncg-perf-2
We also combine the two FR instances for X86 into one parametrised one.
-------------------------
Metric Decrease:
T12707
T13379
T3294
T4801
T783
T18478
T17096
T11374
-------------------------
Co-Author: Andreas Klebinger
-rw-r--r-- | compiler/GHC/CmmToAsm.hs | 7 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Monad.hs | 12 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/PPC.hs | 6 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear.hs | 95 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs | 26 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs | 12 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/State.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/X86Base.hs (renamed from compiler/GHC/CmmToAsm/Reg/Linear/X86.hs) | 28 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs | 55 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Liveness.hs | 7 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/SPARC.hs | 6 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/X86.hs | 52 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/X86/Ppr.hs | 1 | ||||
-rw-r--r-- | compiler/ghc.cabal.in | 3 |
14 files changed, 194 insertions, 118 deletions
diff --git a/compiler/GHC/CmmToAsm.hs b/compiler/GHC/CmmToAsm.hs index 6712db8d05..19309c7ec1 100644 --- a/compiler/GHC/CmmToAsm.hs +++ b/compiler/GHC/CmmToAsm.hs @@ -582,9 +582,10 @@ cmmNativeGen logger dflags modLoc ncgImpl us fileIds dbgMap cmm count else do -- do linear register allocation let reg_alloc proc = do - (alloced, maybe_more_stack, ra_stats) <- - Linear.regAlloc config proc - case maybe_more_stack of + let regAlloc = (linearRegAlloc ncgImpl) + (alloced, maybe_more_stack, ra_stats) <- + regAlloc config proc + case maybe_more_stack of Nothing -> return ( alloced, ra_stats, [] ) Just amount -> do (alloced',stack_updt_blks) <- diff --git a/compiler/GHC/CmmToAsm/Monad.hs b/compiler/GHC/CmmToAsm/Monad.hs index 8fb834ec7a..c5b2358e6f 100644 --- a/compiler/GHC/CmmToAsm/Monad.hs +++ b/compiler/GHC/CmmToAsm/Monad.hs @@ -71,6 +71,8 @@ import GHC.Utils.Outputable (SDoc, ppr) import GHC.Utils.Panic (pprPanic) import GHC.CmmToAsm.CFG import GHC.CmmToAsm.CFG.Weight +import GHC.CmmToAsm.Reg.Linear.Base +import GHC.CmmToAsm.Reg.Liveness data NcgImpl statics instr jumpDest = NcgImpl { ncgConfig :: !NCGConfig, @@ -97,9 +99,17 @@ data NcgImpl statics instr jumpDest = NcgImpl { -- See Note [What is this unwinding business?] in "GHC.Cmm.DebugBlock" -- and Note [Unwinding information in the NCG] in this module. invertCondBranches :: Maybe CFG -> LabelMap RawCmmStatics -> [NatBasicBlock instr] - -> [NatBasicBlock instr] + -> [NatBasicBlock instr], -- ^ Turn the sequence of @jcc l1; jmp l2@ into @jncc l2; \<block_l1>@ -- when possible. + linearRegAlloc :: NCGConfig + -> LiveCmmDecl statics instr + -> UniqSM ( NatCmmDecl statics instr + , Maybe Int -- number of extra stack slots required, + -- beyond maxSpillSlots + , Maybe RegAllocStats + ) + -- ^ Specialized version of the linear register allocator. } data NatM_State diff --git a/compiler/GHC/CmmToAsm/PPC.hs b/compiler/GHC/CmmToAsm/PPC.hs index 148fd1b4b2..6b0e250ddf 100644 --- a/compiler/GHC/CmmToAsm/PPC.hs +++ b/compiler/GHC/CmmToAsm/PPC.hs @@ -12,13 +12,17 @@ import GHC.CmmToAsm.Instr import GHC.CmmToAsm.Monad import GHC.CmmToAsm.Config import GHC.CmmToAsm.Types +import qualified GHC.CmmToAsm.Reg.Linear as Linear +import qualified GHC.CmmToAsm.Reg.Linear.FreeRegs as FR +import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC import qualified GHC.CmmToAsm.PPC.Instr as PPC import qualified GHC.CmmToAsm.PPC.Ppr as PPC import qualified GHC.CmmToAsm.PPC.CodeGen as PPC import qualified GHC.CmmToAsm.PPC.Regs as PPC import qualified GHC.CmmToAsm.PPC.RegInfo as PPC + ncgPPC :: NCGConfig -> NcgImpl RawCmmStatics PPC.Instr PPC.JumpDest ncgPPC config = NcgImpl { ncgConfig = config @@ -36,6 +40,8 @@ ncgPPC config = NcgImpl , ncgMakeFarBranches = PPC.makeFarBranches , extractUnwindPoints = const [] , invertCondBranches = \_ _ -> id + , linearRegAlloc = Linear.regAlloc (FR.frInitFreeRegs platform :: PPC.FreeRegs) + } where platform = ncgPlatform config diff --git a/compiler/GHC/CmmToAsm/Reg/Linear.hs b/compiler/GHC/CmmToAsm/Reg/Linear.hs index 31bcbd1d68..e13f0f4845 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear.hs @@ -1,5 +1,6 @@ {-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-} {-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE FlexibleContexts #-} {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} @@ -113,10 +114,6 @@ import GHC.CmmToAsm.Reg.Linear.StackMap import GHC.CmmToAsm.Reg.Linear.FreeRegs import GHC.CmmToAsm.Reg.Linear.Stats import GHC.CmmToAsm.Reg.Linear.JoinToTargets -import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC -import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC -import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 -import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 import GHC.CmmToAsm.Reg.Target import GHC.CmmToAsm.Reg.Liveness import GHC.CmmToAsm.Reg.Utils @@ -146,10 +143,42 @@ import Control.Applicative -- ----------------------------------------------------------------------------- -- Top level of the register allocator +{- Note [Specializing the register allocator] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The register allocator is quite overloaded. Most importantly over the instruction +set (instr) and the bitmap to keep track of free registers (freeRegs). + +I (AndreasK) added a fair few number of INLINEABLE pragmas which now allows us to +specialize the allocator in the modules defining the NcgImpl for the varius backends. + +Each NcgImpl will then carry around a function that is the (usually specialized) +allocator for that particular backend. + +When compiling with -Wall-missed-specializations sadly it becomes obvious that quite +often there are spurious errors. For the details see #19592. +The way to check that we really fully specialize the allocator is then: +* Compile with -Wall-missed-specialisations. +* Look at the list of "failed" specialisations. +* Ignore constraint tuple selectors "$p2(%,,%)" and the like. +* For all other reported functions make sure they either don't appear in the + final core (because of inlining) or are replaced by their monomorphic variant. + +I did this when making the simplifier specializable in early 2021. If you perform +major refactorings on it it would be wise to do so again. + +NB. Another reasonable design would be the have a register allocator class, with instances +for the various backends. Maybe worth changing over if we apply this to the graph +allocator as well. + +-} + -- Allocate registers +{-# INLINEABLE regAlloc #-} regAlloc - :: Instruction instr - => NCGConfig + :: (Outputable freeRegs, FR freeRegs, Instruction instr) + => freeRegs + -> NCGConfig -> LiveCmmDecl statics instr -> UniqSM ( NatCmmDecl statics instr , Maybe Int -- number of extra stack slots required, @@ -157,23 +186,23 @@ regAlloc , Maybe RegAllocStats ) -regAlloc _ (CmmData sec d) +regAlloc _fr _ (CmmData sec d) = return ( CmmData sec d , Nothing , Nothing ) -regAlloc _ (CmmProc (LiveInfo info _ _ _) lbl live []) +regAlloc _fr _ (CmmProc (LiveInfo info _ _ _) lbl live []) = return ( CmmProc info lbl live (ListGraph []) , Nothing , Nothing ) -regAlloc config (CmmProc static lbl live sccs) +regAlloc fr config (CmmProc static lbl live sccs) | LiveInfo info entry_ids@(first_id:_) block_live _ <- static = do -- do register allocation on each component. !(!final_blocks, !stats, !stack_use) - <- linearRegAlloc config entry_ids block_live sccs + <- linearRegAlloc fr config entry_ids block_live sccs -- make sure the block that was first in the input list -- stays at the front of the output @@ -192,21 +221,22 @@ regAlloc config (CmmProc static lbl live sccs) , Just stats) -- bogus. to make non-exhaustive match warning go away. -regAlloc _ (CmmProc _ _ _ _) +regAlloc _fr _ (CmmProc _ _ _ _) = panic "RegAllocLinear.regAlloc: no match" -- ----------------------------------------------------------------------------- -- Linear sweep to allocate registers - -- | Do register allocation on some basic blocks. -- But be careful to allocate a block in an SCC only if it has -- an entry in the block map or it is the first block. -- +{-# INLINEABLE linearRegAlloc #-} linearRegAlloc - :: forall instr. Instruction instr - => NCGConfig + :: forall instr freeRegs. (Outputable freeRegs, FR freeRegs, Instruction instr) + => freeRegs + -> NCGConfig -> [BlockId] -- ^ entry points -> BlockMap RegSet -- ^ live regs on entry to each basic block @@ -214,17 +244,17 @@ linearRegAlloc -- ^ instructions annotated with "deaths" -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int) -linearRegAlloc config entry_ids block_live sccs +linearRegAlloc initFreeRegs config entry_ids block_live sccs = case platformArch platform of - ArchX86 -> go $ (frInitFreeRegs platform :: X86.FreeRegs) - ArchX86_64 -> go $ (frInitFreeRegs platform :: X86_64.FreeRegs) + ArchX86 -> go + ArchX86_64 -> go + ArchSPARC -> go --go $ (frInitFreeRegs platform :: SPARC.FreeRegs) + ArchPPC -> go --go $ (frInitFreeRegs platform :: PPC.FreeRegs) + ArchPPC_64 _ -> go --go $ (frInitFreeRegs platform :: PPC.FreeRegs) ArchS390X -> panic "linearRegAlloc ArchS390X" - ArchSPARC -> go $ (frInitFreeRegs platform :: SPARC.FreeRegs) ArchSPARC64 -> panic "linearRegAlloc ArchSPARC64" - ArchPPC -> go $ (frInitFreeRegs platform :: PPC.FreeRegs) ArchARM _ _ _ -> panic "linearRegAlloc ArchARM" ArchAArch64 -> panic "linearRegAlloc ArchAArch64" - ArchPPC_64 _ -> go $ (frInitFreeRegs platform :: PPC.FreeRegs) ArchAlpha -> panic "linearRegAlloc ArchAlpha" ArchMipseb -> panic "linearRegAlloc ArchMipseb" ArchMipsel -> panic "linearRegAlloc ArchMipsel" @@ -232,9 +262,8 @@ linearRegAlloc config entry_ids block_live sccs ArchJavaScript -> panic "linearRegAlloc ArchJavaScript" ArchUnknown -> panic "linearRegAlloc ArchUnknown" where - go :: (FR regs, Outputable regs) - => regs -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int) - go f = linearRegAlloc' config f entry_ids block_live sccs + go :: UniqSM ([NatBasicBlock instr], RegAllocStats, Int) + go = linearRegAlloc' config initFreeRegs entry_ids block_live sccs platform = ncgPlatform config -- | Constraints on the instruction instances used by the @@ -242,6 +271,7 @@ linearRegAlloc config entry_ids block_live sccs type OutputableRegConstraint freeRegs instr = (FR freeRegs, Outputable freeRegs, Instruction instr) +{-# INLINEABLE linearRegAlloc' #-} linearRegAlloc' :: OutputableRegConstraint freeRegs instr => NCGConfig @@ -259,6 +289,7 @@ linearRegAlloc' config initFreeRegs entry_ids block_live sccs return (blocks, stats, getStackUse stack) +{-# INLINEABLE linearRA_SCCs #-} linearRA_SCCs :: OutputableRegConstraint freeRegs instr => [BlockId] -> BlockMap RegSet @@ -293,7 +324,7 @@ linearRA_SCCs entry_ids block_live blocksAcc (CyclicSCC blocks : sccs) some reason then this function will loop. We should probably do some more sanity checking to guard against this eventuality. -} - +{-# INLINEABLE process #-} process :: forall freeRegs instr. (OutputableRegConstraint freeRegs instr) => [BlockId] -> BlockMap RegSet @@ -330,7 +361,7 @@ process entry_ids block_live = else do go blocks (b : next_round) accum madeProgress - +{-# INLINEABLE processBlock #-} -- | Do register allocation on this basic block -- processBlock @@ -348,7 +379,7 @@ processBlock block_live (BasicBlock id instrs) -- pprTraceM "blockResult" $ ppr (instrs', fixups) return $ BasicBlock id instrs' : fixups - +{-# INLINEABLE initBlock #-} -- | Load the freeregs and current reg assignment into the RegM state -- for the basic block with this BlockId. initBlock :: FR freeRegs @@ -378,7 +409,7 @@ initBlock id block_live -> do setFreeRegsR freeregs setAssigR assig - +{-# INLINEABLE linearRA #-} -- | Do allocation for a sequence of instructions. linearRA :: forall freeRegs instr. (OutputableRegConstraint freeRegs instr) @@ -468,7 +499,7 @@ raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live)) raInsn _ _ _ instr = do platform <- getPlatform - let instr' = fmap (pprInstr platform) instr + let instr' = fmap (pprInstr platform) instr :: LiveInstr SDoc pprPanic "raInsn" (text "no match for:" <> ppr instr') -- ToDo: what can we do about @@ -488,7 +519,7 @@ isInReg :: Reg -> RegMap Loc -> Bool isInReg src assig | Just (InReg _) <- lookupUFM assig src = True | otherwise = False - +{-# INLINEABLE genRaInsn #-} genRaInsn :: forall freeRegs instr. OutputableRegConstraint freeRegs instr => BlockMap RegSet @@ -607,6 +638,7 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do -- ----------------------------------------------------------------------------- -- releaseRegs +{-# INLINEABLE releaseRegs #-} releaseRegs :: FR freeRegs => [Reg] -> RegM freeRegs () releaseRegs regs = do platform <- getPlatform @@ -638,6 +670,7 @@ releaseRegs regs = do -- - clobbered regs are not allocatable. -- +{-# INLINEABLE saveClobberedTemps #-} saveClobberedTemps :: forall instr freeRegs. (Instruction instr, FR freeRegs) @@ -706,7 +739,7 @@ saveClobberedTemps clobbered dying clobber new_assign (spill : instrs) rest - +{-# INLINEABLE clobberRegs #-} -- | Mark all these real regs as allocated, -- and kick out their vreg assignments. -- @@ -763,6 +796,7 @@ data SpillLoc = ReadMem StackSlot -- reading from register only in memory -- We also update the register assignment in the process, and -- the list of free registers and free stack slots. +{-# INLINEABLE allocateRegsAndSpill #-} allocateRegsAndSpill :: forall freeRegs instr. (FR freeRegs, Instruction instr) => Bool -- True <=> reading (load up spilled regs) @@ -828,6 +862,7 @@ findPrefRealReg vreg = do Just (InBoth real_reg _) -> Just real_reg _ -> z +{-# INLINEABLE allocRegsAndSpill_spill #-} -- reading is redundant with reason, but we keep it around because it's -- convenient and it maintains the recursive structure of the allocator. -- EZY allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr) diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs b/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs index 1768422f5c..c0442a8282 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs @@ -1,4 +1,5 @@ {-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleInstances #-} module GHC.CmmToAsm.Reg.Linear.FreeRegs ( FR(..), @@ -32,8 +33,7 @@ import GHC.Platform import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC -import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 -import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 +import qualified GHC.CmmToAsm.Reg.Linear.X86Base as X86Base import qualified GHC.CmmToAsm.PPC.Instr as PPC.Instr import qualified GHC.CmmToAsm.SPARC.Instr as SPARC.Instr @@ -45,17 +45,19 @@ class Show freeRegs => FR freeRegs where frInitFreeRegs :: Platform -> freeRegs frReleaseReg :: Platform -> RealReg -> freeRegs -> freeRegs -instance FR X86.FreeRegs where - frAllocateReg = \_ -> X86.allocateReg - frGetFreeRegs = X86.getFreeRegs - frInitFreeRegs = X86.initFreeRegs - frReleaseReg = \_ -> X86.releaseReg +instance FR (X86Base.FreeRegsX86) where + {-# INLINEABLE frAllocateReg #-} + frAllocateReg = \_ -> X86Base.allocateReg + frGetFreeRegs = X86Base.getFreeRegs + frInitFreeRegs = X86Base.initFreeRegs + frReleaseReg = \_ -> X86Base.releaseReg -instance FR X86_64.FreeRegs where - frAllocateReg = \_ -> X86_64.allocateReg - frGetFreeRegs = X86_64.getFreeRegs - frInitFreeRegs = X86_64.initFreeRegs - frReleaseReg = \_ -> X86_64.releaseReg +instance FR (X86Base.FreeRegsX86_64) where + {-# INLINEABLE frAllocateReg #-} + frAllocateReg = \_ -> X86Base.allocateReg + frGetFreeRegs = X86Base.getFreeRegs + frInitFreeRegs = X86Base.initFreeRegs + frReleaseReg = \_ -> X86Base.releaseReg instance FR PPC.FreeRegs where frAllocateReg = \_ -> PPC.allocateReg diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs b/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs index d0330a4f6a..bef8ef6306 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs @@ -33,6 +33,7 @@ import GHC.Types.Unique.Set -- | For a jump instruction at the end of a block, generate fixup code so its -- vregs are in the correct regs for its destination. -- +{-# INLINEABLE joinToTargets #-} joinToTargets :: (FR freeRegs, Instruction instr) => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs @@ -57,6 +58,7 @@ joinToTargets block_live id instr = joinToTargets' block_live [] id instr (jumpDestsOfInstr instr) ----- +{-# INLINEABLE joinToTargets' #-} joinToTargets' :: (FR freeRegs, Instruction instr) => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs @@ -112,6 +114,7 @@ joinToTargets' block_live new_blocks block_id instr (dest:dests) -- this is the first time we jumped to this block. +{-# INLINEABLE joinToTargets_first #-} joinToTargets_first :: (FR freeRegs, Instruction instr) => BlockMap RegSet -> [NatBasicBlock instr] @@ -141,6 +144,7 @@ joinToTargets_first block_live new_blocks block_id instr dest dests -- we've jumped to this block before +{-# INLINEABLE joinToTargets_again #-} joinToTargets_again :: (Instruction instr, FR freeRegs) => BlockMap RegSet -> [NatBasicBlock instr] @@ -296,6 +300,7 @@ expandNode vreg src dst -- destinations. We have eliminated any possibility of single-node -- cycles in expandNode above. -- +{-# INLINEABLE handleComponent #-} handleComponent :: Instruction instr => Int -> instr -> SCC (Node Loc Unique) @@ -348,6 +353,7 @@ handleComponent _ _ (CyclicSCC _) -- | Move a vreg between these two locations. -- +{-# INLINEABLE makeMove #-} makeMove :: Instruction instr => Int -- ^ current C stack delta. @@ -363,13 +369,13 @@ makeMove delta vreg src dst case (src, dst) of (InReg s, InReg d) -> do recordSpill (SpillJoinRR vreg) - return $ mkRegRegMoveInstr platform (RegReal s) (RegReal d) + return $! mkRegRegMoveInstr platform (RegReal s) (RegReal d) (InMem s, InReg d) -> do recordSpill (SpillJoinRM vreg) - return $ mkLoadInstr config (RegReal d) delta s + return $! mkLoadInstr config (RegReal d) delta s (InReg s, InMem d) -> do recordSpill (SpillJoinRM vreg) - return $ mkSpillInstr config (RegReal s) delta d + return $! mkSpillInstr config (RegReal s) delta d _ -> -- we don't handle memory to memory moves. -- they shouldn't happen because we don't share diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs index 4fdc5c96cf..8e53c5799b 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs @@ -119,7 +119,7 @@ makeRAStats state { ra_spillInstrs = binSpillReasons (ra_spills state) , ra_fixupList = ra_fixups state } - +{-# INLINEABLE spillR #-} spillR :: Instruction instr => Reg -> Unique -> RegM freeRegs (instr, Int) diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs b/compiler/GHC/CmmToAsm/Reg/Linear/X86Base.hs index 42f63b5752..fbc592369e 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/X86.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/X86Base.hs @@ -1,7 +1,8 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE ConstraintKinds #-} -- | Free regs map for i386 -module GHC.CmmToAsm.Reg.Linear.X86 where +module GHC.CmmToAsm.Reg.Linear.X86Base where import GHC.Prelude @@ -15,28 +16,40 @@ import GHC.Utils.Outputable import Data.Word import Data.Bits -newtype FreeRegs = FreeRegs Word32 +-- Instead of being parametric we could also just use Word64 +-- at all times. Obviously bad for 32bit ghc performance. But +-- not sure if anyone still cares about that. +type FreeRegsX86 = FreeRegs Word32 +type FreeRegsX86_64 = FreeRegs Word64 + +newtype FreeRegs w = FreeRegs w deriving (Show,Outputable) -noFreeRegs :: FreeRegs +type FreeRegConstraints w = (Bits w, Num w) + +noFreeRegs :: FreeRegConstraints w => FreeRegs w noFreeRegs = FreeRegs 0 -releaseReg :: RealReg -> FreeRegs -> FreeRegs +{-# INLINEABLE releaseReg #-} +releaseReg :: FreeRegConstraints w => RealReg -> FreeRegs w -> FreeRegs w releaseReg (RealRegSingle n) (FreeRegs f) = FreeRegs (f .|. (1 `shiftL` n)) releaseReg _ _ = panic "RegAlloc.Linear.X86.FreeRegs.releaseReg: no reg" -initFreeRegs :: Platform -> FreeRegs +{-# INLINEABLE initFreeRegs #-} +initFreeRegs :: FreeRegConstraints w => Platform -> FreeRegs w initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform) -getFreeRegs :: Platform -> RegClass -> FreeRegs -> [RealReg] -- lazily +{-# INLINEABLE getFreeRegs #-} +getFreeRegs :: FreeRegConstraints w => Platform -> RegClass -> FreeRegs w -> [RealReg] -- lazily getFreeRegs platform cls (FreeRegs f) = go f 0 where go 0 _ = [] go n m + -- Register is free && has the right class | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls = RealRegSingle m : (go (n `shiftR` 1) $! (m+1)) @@ -45,7 +58,8 @@ getFreeRegs platform cls (FreeRegs f) = go f 0 -- ToDo: there's no point looking through all the integer registers -- in order to find a floating-point one. -allocateReg :: RealReg -> FreeRegs -> FreeRegs +{-# INLINEABLE allocateReg #-} +allocateReg :: FreeRegConstraints w => RealReg -> FreeRegs w -> FreeRegs w allocateReg (RealRegSingle r) (FreeRegs f) = FreeRegs (f .&. complement (1 `shiftL` r)) diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs b/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs deleted file mode 100644 index 44eea342a4..0000000000 --- a/compiler/GHC/CmmToAsm/Reg/Linear/X86_64.hs +++ /dev/null @@ -1,55 +0,0 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} - --- | Free regs map for x86_64 -module GHC.CmmToAsm.Reg.Linear.X86_64 where - -import GHC.Prelude - -import GHC.CmmToAsm.X86.Regs -import GHC.Platform.Reg.Class -import GHC.Platform.Reg -import GHC.Utils.Panic -import GHC.Platform -import GHC.Utils.Outputable - -import Data.Word -import Data.Bits - -newtype FreeRegs = FreeRegs Word64 - deriving (Show,Outputable) - -noFreeRegs :: FreeRegs -noFreeRegs = FreeRegs 0 - -releaseReg :: RealReg -> FreeRegs -> FreeRegs -releaseReg (RealRegSingle n) (FreeRegs f) - = FreeRegs (f .|. (1 `shiftL` n)) - -releaseReg _ _ - = panic "RegAlloc.Linear.X86_64.FreeRegs.releaseReg: no reg" - -initFreeRegs :: Platform -> FreeRegs -initFreeRegs platform - = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform) - -getFreeRegs :: Platform -> RegClass -> FreeRegs -> [RealReg] -- lazily -getFreeRegs platform cls (FreeRegs f) = go f 0 - - where go 0 _ = [] - go n m - | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls - = RealRegSingle m : (go (n `shiftR` 1) $! (m+1)) - - | otherwise - = go (n `shiftR` 1) $! (m+1) - -- ToDo: there's no point looking through all the integer registers - -- in order to find a floating-point one. - -allocateReg :: RealReg -> FreeRegs -> FreeRegs -allocateReg (RealRegSingle r) (FreeRegs f) - = FreeRegs (f .&. complement (1 `shiftL` r)) - -allocateReg _ _ - = panic "RegAlloc.Linear.X86_64.FreeRegs.allocateReg: no reg" - - diff --git a/compiler/GHC/CmmToAsm/Reg/Liveness.hs b/compiler/GHC/CmmToAsm/Reg/Liveness.hs index bf53ecf421..1e69c50bd6 100644 --- a/compiler/GHC/CmmToAsm/Reg/Liveness.hs +++ b/compiler/GHC/CmmToAsm/Reg/Liveness.hs @@ -172,6 +172,7 @@ instance Instruction instr => Instruction (InstrSR instr) where mkStackDeallocInstr platform amount = Instr <$> mkStackDeallocInstr platform amount + {-# INLINEABLE pprInstr #-} pprInstr platform i = ppr (fmap (pprInstr platform) i) @@ -570,11 +571,13 @@ stripLiveBlock config (BasicBlock i lis) spillNat acc (LiveInstr (SPILL reg slot) _ : instrs) = do delta <- get - spillNat (mkSpillInstr config reg delta slot : acc) instrs + let !ins = mkSpillInstr config reg delta slot + spillNat (ins : acc) instrs spillNat acc (LiveInstr (RELOAD slot reg) _ : instrs) = do delta <- get - spillNat (mkLoadInstr config reg delta slot : acc) instrs + let !ins = mkLoadInstr config reg delta slot + spillNat (ins : acc) instrs spillNat acc (LiveInstr (Instr instr) _ : instrs) | Just i <- takeDeltaInstr instr diff --git a/compiler/GHC/CmmToAsm/SPARC.hs b/compiler/GHC/CmmToAsm/SPARC.hs index 7d9a671932..0272ec9493 100644 --- a/compiler/GHC/CmmToAsm/SPARC.hs +++ b/compiler/GHC/CmmToAsm/SPARC.hs @@ -14,6 +14,11 @@ import GHC.CmmToAsm.Config import GHC.CmmToAsm.Types import GHC.CmmToAsm.Instr +import qualified GHC.CmmToAsm.Reg.Linear as Linear +import qualified GHC.CmmToAsm.Reg.Linear.FreeRegs as FR + +import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC + import qualified GHC.CmmToAsm.SPARC.Instr as SPARC import qualified GHC.CmmToAsm.SPARC.Ppr as SPARC import qualified GHC.CmmToAsm.SPARC.CodeGen as SPARC @@ -41,6 +46,7 @@ ncgSPARC config = NcgImpl -- Allocating more stack space for spilling isn't currently supported for the -- linear register allocator on SPARC, hence the panic below. , ncgAllocMoreStack = noAllocMoreStack + , linearRegAlloc = Linear.regAlloc (FR.frInitFreeRegs platform :: SPARC.FreeRegs) } where platform = ncgPlatform config diff --git a/compiler/GHC/CmmToAsm/X86.hs b/compiler/GHC/CmmToAsm/X86.hs index dbeeddc184..863cb044a4 100644 --- a/compiler/GHC/CmmToAsm/X86.hs +++ b/compiler/GHC/CmmToAsm/X86.hs @@ -9,6 +9,8 @@ where import GHC.Prelude +import GHC.Types.Unique.Supply + import GHC.CmmToAsm.Instr import GHC.CmmToAsm.Monad import GHC.CmmToAsm.Config @@ -19,10 +21,49 @@ import qualified GHC.CmmToAsm.X86.Instr as X86 import qualified GHC.CmmToAsm.X86.Ppr as X86 import qualified GHC.CmmToAsm.X86.CodeGen as X86 import qualified GHC.CmmToAsm.X86.Regs as X86 +import qualified GHC.CmmToAsm.Reg.Linear.X86Base as X86Base +-- import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 -ncgX86 :: NCGConfig -> NcgImpl (Alignment, RawCmmStatics) X86.Instr X86.JumpDest -ncgX86 = ncgX86_64 +import qualified GHC.CmmToAsm.Reg.Linear as Linear +import qualified GHC.CmmToAsm.Reg.Linear.FreeRegs as Linear +import GHC.CmmToAsm.Reg.Liveness (LiveCmmDecl) + +{-# SPECIALIZE Linear.regAlloc :: X86Base.FreeRegsX86 -> NCGConfig + -> LiveCmmDecl statics X86.Instr + -> UniqSM + (NatCmmDecl statics X86.Instr, + Maybe Int, + Maybe Linear.RegAllocStats) #-} + +{-# SPECIALIZE Linear.regAlloc :: X86Base.FreeRegsX86_64 -> NCGConfig + -> LiveCmmDecl statics X86.Instr + -> UniqSM + (NatCmmDecl statics X86.Instr, + Maybe Int, + Maybe Linear.RegAllocStats) #-} +linearRegAllocX86 :: NCGConfig + -> LiveCmmDecl statics X86.Instr + -> UniqSM + (NatCmmDecl statics X86.Instr, + Maybe Int, + Maybe Linear.RegAllocStats) +linearRegAllocX86 config = Linear.regAlloc + (Linear.frInitFreeRegs (ncgPlatform config) :: X86Base.FreeRegsX86) + config + +linearRegAllocX86_64 :: NCGConfig + -> LiveCmmDecl statics X86.Instr + -> UniqSM + (NatCmmDecl statics X86.Instr, + Maybe Int, + Maybe Linear.RegAllocStats) +linearRegAllocX86_64 config = Linear.regAlloc + (Linear.frInitFreeRegs (ncgPlatform config) :: X86Base.FreeRegsX86_64) + config + +ncgX86 :: NCGConfig -> NcgImpl (Alignment, RawCmmStatics) X86.Instr X86.JumpDest +ncgX86 config = (ncgX86_64 config) { linearRegAlloc = linearRegAllocX86 } ncgX86_64 :: NCGConfig -> NcgImpl (Alignment, RawCmmStatics) X86.Instr X86.JumpDest ncgX86_64 config = NcgImpl @@ -41,6 +82,7 @@ ncgX86_64 config = NcgImpl , ncgMakeFarBranches = const id , extractUnwindPoints = X86.extractUnwindPoints , invertCondBranches = X86.invertCondBranches + , linearRegAlloc = linearRegAllocX86_64 } where platform = ncgPlatform config @@ -54,12 +96,18 @@ instance Instruction X86.Instr where patchJumpInstr = X86.patchJumpInstr mkSpillInstr = X86.mkSpillInstr mkLoadInstr = X86.mkLoadInstr + {-# INLINEABLE takeDeltaInstr #-} takeDeltaInstr = X86.takeDeltaInstr isMetaInstr = X86.isMetaInstr mkRegRegMoveInstr = X86.mkRegRegMoveInstr + {-# INLINEABLE takeRegRegMoveInstr #-} takeRegRegMoveInstr = X86.takeRegRegMoveInstr + {-# INLINEABLE mkJumpInstr #-} mkJumpInstr = X86.mkJumpInstr + {-# INLINEABLE mkStackAllocInstr #-} mkStackAllocInstr = X86.mkStackAllocInstr + {-# INLINEABLE mkStackDeallocInstr #-} mkStackDeallocInstr = X86.mkStackDeallocInstr + {-# INLINEABLE pprInstr #-} pprInstr = X86.pprInstr diff --git a/compiler/GHC/CmmToAsm/X86/Ppr.hs b/compiler/GHC/CmmToAsm/X86/Ppr.hs index 97abf78006..478c61aa49 100644 --- a/compiler/GHC/CmmToAsm/X86/Ppr.hs +++ b/compiler/GHC/CmmToAsm/X86/Ppr.hs @@ -1,5 +1,6 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MultiParamTypeClasses #-} ----------------------------------------------------------------------------- -- diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 29137a146f..97ab9621c3 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -244,8 +244,7 @@ Library GHC.CmmToAsm.Reg.Linear.StackMap GHC.CmmToAsm.Reg.Linear.State GHC.CmmToAsm.Reg.Linear.Stats - GHC.CmmToAsm.Reg.Linear.X86 - GHC.CmmToAsm.Reg.Linear.X86_64 + GHC.CmmToAsm.Reg.Linear.X86Base GHC.CmmToAsm.Reg.Liveness GHC.CmmToAsm.Reg.Target GHC.CmmToAsm.Reg.Utils |