diff options
Diffstat (limited to 'compiler/GHC/CmmToAsm/Reg')
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs | 12 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear.hs | 100 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs | 137 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs | 24 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs | 10 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Linear/State.hs | 4 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Liveness.hs | 10 | ||||
-rw-r--r-- | compiler/GHC/CmmToAsm/Reg/Target.hs | 12 |
8 files changed, 252 insertions, 57 deletions
diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs b/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs index 1050fbaa96..83f581cac4 100644 --- a/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs +++ b/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs @@ -111,7 +111,10 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts excl ArchSPARC64 -> panic "trivColorable ArchSPARC64" ArchPPC_64 _ -> 15 ArchARM _ _ _ -> panic "trivColorable ArchARM" - ArchAArch64 -> panic "trivColorable ArchAArch64" + -- We should be able to allocate *a lot* more in princple. + -- essentially all 32 - SP, so 31, we'd trash the link reg + -- as well as the platform and all others though. + ArchAArch64 -> 18 ArchAlpha -> panic "trivColorable ArchAlpha" ArchMipseb -> panic "trivColorable ArchMipseb" ArchMipsel -> panic "trivColorable ArchMipsel" @@ -143,7 +146,10 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclus ArchSPARC64 -> panic "trivColorable ArchSPARC64" ArchPPC_64 _ -> 0 ArchARM _ _ _ -> panic "trivColorable ArchARM" - ArchAArch64 -> panic "trivColorable ArchAArch64" + -- we can in princple address all the float regs as + -- segments. So we could have 64 Float regs. Or + -- 128 Half regs, or even 256 Byte regs. + ArchAArch64 -> 0 ArchAlpha -> panic "trivColorable ArchAlpha" ArchMipseb -> panic "trivColorable ArchMipseb" ArchMipsel -> panic "trivColorable ArchMipsel" @@ -177,7 +183,7 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclu ArchSPARC64 -> panic "trivColorable ArchSPARC64" ArchPPC_64 _ -> 20 ArchARM _ _ _ -> panic "trivColorable ArchARM" - ArchAArch64 -> panic "trivColorable ArchAArch64" + ArchAArch64 -> 32 ArchAlpha -> panic "trivColorable ArchAlpha" ArchMipseb -> panic "trivColorable ArchMipseb" ArchMipsel -> panic "trivColorable ArchMipsel" diff --git a/compiler/GHC/CmmToAsm/Reg/Linear.hs b/compiler/GHC/CmmToAsm/Reg/Linear.hs index 88fdcd6bce..a9a4545f62 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear.hs @@ -110,10 +110,11 @@ import GHC.CmmToAsm.Reg.Linear.StackMap import GHC.CmmToAsm.Reg.Linear.FreeRegs import GHC.CmmToAsm.Reg.Linear.Stats import GHC.CmmToAsm.Reg.Linear.JoinToTargets -import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC -import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC -import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 -import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 +import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC +import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC +import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 +import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 +import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as AArch64 import GHC.CmmToAsm.Reg.Target import GHC.CmmToAsm.Reg.Liveness import GHC.CmmToAsm.Reg.Utils @@ -121,6 +122,7 @@ import GHC.CmmToAsm.Instr import GHC.CmmToAsm.Config import GHC.CmmToAsm.Types import GHC.Platform.Reg +import GHC.Platform.Reg.Class (RegClass(..)) import GHC.Cmm.BlockId import GHC.Cmm.Dataflow.Collections @@ -202,7 +204,7 @@ regAlloc _ (CmmProc _ _ _ _) -- an entry in the block map or it is the first block. -- linearRegAlloc - :: forall instr. Instruction instr + :: forall instr. (Instruction instr) => NCGConfig -> [BlockId] -- ^ entry points -> BlockMap RegSet @@ -220,7 +222,7 @@ linearRegAlloc config entry_ids block_live sccs ArchSPARC64 -> panic "linearRegAlloc ArchSPARC64" ArchPPC -> go $ (frInitFreeRegs platform :: PPC.FreeRegs) ArchARM _ _ _ -> panic "linearRegAlloc ArchARM" - ArchAArch64 -> panic "linearRegAlloc ArchAArch64" + ArchAArch64 -> go $ (frInitFreeRegs platform :: AArch64.FreeRegs) ArchPPC_64 _ -> go $ (frInitFreeRegs platform :: PPC.FreeRegs) ArchAlpha -> panic "linearRegAlloc ArchAlpha" ArchMipseb -> panic "linearRegAlloc ArchMipseb" @@ -487,7 +489,7 @@ isInReg src assig | Just (InReg _) <- lookupUFM assig src = True genRaInsn :: forall freeRegs instr. - OutputableRegConstraint freeRegs instr + (OutputableRegConstraint freeRegs instr) => BlockMap RegSet -> [instr] -> BlockId @@ -497,7 +499,7 @@ genRaInsn :: forall freeRegs instr. -> RegM freeRegs ([instr], [NatBasicBlock instr]) genRaInsn block_live new_instrs block_id instr r_dying w_dying = do --- pprTraceM "genRaInsn" $ ppr (block_id, instr) +-- pprTraceM "genRaInsn" $ ppr (block_id, instr) platform <- getPlatform case regUsageOfInstr platform instr of { RU read written -> do @@ -509,19 +511,20 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do -- so using nub isn't a problem). let virt_read = nub [ vr | (RegVirtual vr) <- read ] :: [VirtualReg] - -- debugging -{- freeregs <- getFreeRegsR - assig <- getAssigR - pprDebugAndThen (defaultDynFlags Settings{ sTargetPlatform=platform } undefined) trace "genRaInsn" - (ppr instr - $$ text "r_dying = " <+> ppr r_dying - $$ text "w_dying = " <+> ppr w_dying - $$ text "virt_read = " <+> ppr virt_read - $$ text "virt_written = " <+> ppr virt_written - $$ text "freeregs = " <+> text (show freeregs) - $$ text "assig = " <+> ppr assig) - $ do --} +-- do +-- let real_read = nub [ rr | (RegReal rr) <- read] +-- freeregs <- getFreeRegsR +-- assig <- getAssigR + +-- pprTraceM "genRaInsn" +-- ( text "block = " <+> ppr block_id +-- $$ text "instruction = " <+> ppr instr +-- $$ text "r_dying = " <+> ppr r_dying +-- $$ text "w_dying = " <+> ppr w_dying +-- $$ text "read = " <+> ppr real_read <+> ppr virt_read +-- $$ text "written = " <+> ppr real_written <+> ppr virt_written +-- $$ text "freeregs = " <+> ppr freeregs +-- $$ text "assign = " <+> ppr assig) -- (a), (b) allocate real regs for all regs read by this instruction. (r_spills, r_allocd) <- @@ -580,7 +583,6 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do Nothing -> x Just y -> y - -- (j) free up stack slots for dead spilled regs -- TODO (can't be bothered right now) @@ -592,7 +594,32 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do | src == dst -> [] _ -> [patched_instr] - let code = concat [ squashed_instr, w_spills, reverse r_spills, clobber_saves, new_instrs ] + -- On the use of @reverse@ below. + -- Since we can have spills and reloads produce multiple instructions + -- we need to ensure they are emitted in the correct order. We used to only + -- emit single instructions in mkSpill/mkReload/mkRegRegMove. + -- As such order of spills and reloads didn't matter. However, with + -- mutliple instructions potentially issued by those functions we need to be + -- careful to not break execution order. Reversing the spills (clobber will + -- also spill), will ensure they are emitted in the right order. + -- + -- See also Ticket 19910 for changing the return type from [] to OrdList. + + -- For debugging, uncomment the follow line and the mkComment lines. + -- u <- getUniqueR + let code = concat [ -- mkComment (text "<genRaInsn(" <> ppr u <> text ")>") + -- ,mkComment (text "<genRaInsn(" <> ppr u <> text "):squashed>")] + squashed_instr + -- ,mkComment (text "<genRaInsn(" <> ppr u <> text "):w_spills>") + , reverse w_spills + -- ,mkComment (text "<genRaInsn(" <> ppr u <> text "):r_spills>") + , reverse r_spills + -- ,mkComment (text "<genRaInsn(" <> ppr u <> text "):clobber_saves>") + , reverse clobber_saves + -- ,mkComment (text "<genRaInsn(" <> ppr u <> text "):new_instrs>") + , new_instrs + -- ,mkComment (text "</genRaInsn(" <> ppr u <> text ")>") + ] -- pprTrace "patched-code" ((vcat $ map (docToSDoc . pprInstr) code)) $ do -- pprTrace "pached-fixup" ((ppr fixup_blocks)) $ do @@ -609,6 +636,7 @@ releaseRegs regs = do platform <- getPlatform assig <- getAssigR free <- getFreeRegsR + let loop assig !free [] = do setAssigR assig; setFreeRegsR free; return () loop assig !free (RegReal rr : rs) = loop assig (frReleaseReg platform rr free) rs loop assig !free (r:rs) = @@ -662,8 +690,9 @@ saveClobberedTemps clobbered dying (instrs,assig') <- clobber assig [] to_spill setAssigR assig' - return instrs - + return $ -- mkComment (text "<saveClobberedTemps>") ++ + instrs +-- ++ mkComment (text "</saveClobberedTemps>") where -- See Note [UniqFM and the register allocator] clobber :: RegMap Loc -> [instr] -> [(Unique,RealReg)] -> RegM freeRegs ([instr], RegMap Loc) @@ -700,7 +729,7 @@ saveClobberedTemps clobbered dying let new_assign = addToUFM_Directly assig temp (InBoth reg slot) - clobber new_assign (spill : instrs) rest + clobber new_assign (spill ++ instrs) rest @@ -714,7 +743,17 @@ clobberRegs [] clobberRegs clobbered = do platform <- getPlatform freeregs <- getFreeRegsR - setFreeRegsR $! foldl' (flip $ frAllocateReg platform) freeregs clobbered + + let gpRegs = frGetFreeRegs platform RcInteger freeregs :: [RealReg] + fltRegs = frGetFreeRegs platform RcFloat freeregs :: [RealReg] + dblRegs = frGetFreeRegs platform RcDouble freeregs :: [RealReg] + + let extra_clobbered = [ r | r <- clobbered + , r `elem` (gpRegs ++ fltRegs ++ dblRegs) ] + + setFreeRegsR $! foldl' (flip $ frAllocateReg platform) freeregs extra_clobbered + + -- setFreeRegsR $! foldl' (flip $ frAllocateReg platform) freeregs clobbered assig <- getAssigR setAssigR $! clobber assig (nonDetUFMToList assig) @@ -909,10 +948,7 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc | (temp_to_push_out, (my_reg :: RealReg)) : _ <- candidates_inReg = do - (spill_insn, slot) <- spillR (RegReal my_reg) temp_to_push_out - let spill_store = (if reading then id else reverse) - [ -- COMMENT (fsLit "spill alloc") - spill_insn ] + (spill_store, slot) <- spillR (RegReal my_reg) temp_to_push_out -- record that this temp was spilled recordSpill (SpillAlloc temp_to_push_out) @@ -962,7 +998,7 @@ loadTemp vreg (ReadMem slot) hreg spills = do insn <- loadR (RegReal hreg) slot recordSpill (SpillLoad $ getUnique vreg) - return $ {- COMMENT (fsLit "spill load") : -} insn : spills + return $ {- mkComment (text "spill load") : -} insn ++ spills loadTemp _ _ _ spills = return spills diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs b/compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs new file mode 100644 index 0000000000..50299c4e74 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs @@ -0,0 +1,137 @@ +module GHC.CmmToAsm.Reg.Linear.AArch64 where + +import GHC.Prelude + +import GHC.CmmToAsm.AArch64.Regs +import GHC.Platform.Reg.Class +import GHC.Platform.Reg + +import GHC.Utils.Outputable +import GHC.Utils.Panic +import GHC.Platform + +import Data.Word + +import GHC.Stack +-- AArch64 has 32 64bit general purpose register r0..r30, and zr/sp +-- AArch64 has 32 128bit floating point registers v0..v31 as part of the NEON +-- extension in Armv8-A. +-- +-- Armv8-A is a fundamental change to the Arm architecture. It supports the +-- 64-bit Execution state called “AArch64”, and a new 64-bit instruction set +-- “A64”. To provide compatibility with the Armv7-A (32-bit architecture) +-- instruction set, a 32-bit variant of Armv8-A “AArch32” is provided. Most of +-- existing Armv7-A code can be run in the AArch32 execution state of Armv8-A. +-- +-- these can be addresses as q/d/s/h/b 0..31, or v.f<size>[idx] +-- where size is 64, 32, 16, 8, ... and the index i allows us +-- to access the given part. +-- +-- History of Arm Adv SIMD +-- .---------------------------------------------------------------------------. +-- | Armv6 | Armv7-A | Armv8-A AArch64 | +-- | SIMD extension | NEON | NEON | +-- |===========================================================================| +-- | - Operates on 32-bit | - Separate reg. bank, | - Separate reg. bank, | +-- | GP ARM registers | 32x64-bit NEON regs | 32x128-bit NEON regs | +-- | - 8-bit/16-bit integer | - 8/16/32/64-bit int | - 8/16/32/64-bit int | +-- | | - Single percision fp | - Single percision fp | +-- | | | - Double precision fp | +-- | | | - Single/Double fp are | +-- | | | IEEE compliant | +-- | - 2x16-bit/4x8-bit ops | - Up to 16x8-bit ops | - Up to 16x8-bit ops | +-- | per instruction | per instruction | per instruction | +-- '---------------------------------------------------------------------------' + +data FreeRegs = FreeRegs !Word32 !Word32 + +instance Show FreeRegs where + show (FreeRegs g f) = "FreeRegs: " ++ showBits g ++ "; " ++ showBits f + +instance Outputable FreeRegs where + ppr (FreeRegs g f) = text " " <+> foldr (\i x -> pad_int i <+> x) (text "") [0..31] + $$ text "GPR" <+> foldr (\i x -> show_bit g i <+> x) (text "") [0..31] + $$ text "FPR" <+> foldr (\i x -> show_bit f i <+> x) (text "") [0..31] + where pad_int i | i < 10 = char ' ' <> int i + pad_int i = int i + -- remember bit = 1 means it's available. + show_bit bits bit | testBit bits bit = text " " + show_bit _ _ = text " x" + +noFreeRegs :: FreeRegs +noFreeRegs = FreeRegs 0 0 + +showBits :: Word32 -> String +showBits w = map (\i -> if testBit w i then '1' else '0') [0..31] + +-- FR instance implementation (See Linear.FreeRegs) +allocateReg :: HasCallStack => RealReg -> FreeRegs -> FreeRegs +allocateReg (RealRegSingle r) (FreeRegs g f) + | r > 31 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) + | r < 32 && testBit g r = FreeRegs (clearBit g r) f + | r > 31 = panic $ "Linear.AArch64.allocReg: double allocation of float reg v" ++ show (r - 32) ++ "; " ++ showBits f + | otherwise = pprPanic "Linear.AArch64.allocReg" $ text ("double allocation of gp reg x" ++ show r ++ "; " ++ showBits g) +allocateReg _ _ = panic "Linear.AArch64.allocReg: bad reg" + +-- we start from 28 downwards... the logic is similar to the ppc logic. +-- 31 is Stack Pointer +-- 30 is Link Register +-- 29 is Stack Frame (by convention) +-- 19-28 are callee save +-- the lower ones are all caller save + +-- For this reason someone decided to give aarch64 only 6 regs for +-- STG: +-- 19: Base +-- 20: Sp +-- 21: Hp +-- 22-27: R1-R6 +-- 28: SpLim + +-- For LLVM code gen interop: +-- See https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20150119/253722.html +-- and the current ghccc implementation here: +-- https://github.com/llvm/llvm-project/blob/161ae1f39816edf667aaa190bce702a86879c7bd/llvm/lib/Target/AArch64/AArch64CallingConvention.td#L324-L363 +-- and https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/compiler/generated-code +-- for the STG discussion. +{- For reference the ghcc from the link above: +let Entry = 1 in +def CC_AArch64_GHC : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType<i64>>, + + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>, + + CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, + CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>, + CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>, + + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim + CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> +]>; +-} + +getFreeRegs :: RegClass -> FreeRegs -> [RealReg] +getFreeRegs cls (FreeRegs g f) + | RcFloat <- cls = [] -- For now we only support double and integer registers, floats will need to be promoted. + | RcDouble <- cls = go 32 f 31 + | RcInteger <- cls = go 0 g 18 + where + go _ _ i | i < 0 = [] + go off x i | testBit x i = RealRegSingle (off + i) : (go off x $! i - 1) + | otherwise = go off x $! i - 1 + +initFreeRegs :: Platform -> FreeRegs +initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform) + +releaseReg :: HasCallStack => RealReg -> FreeRegs -> FreeRegs +releaseReg (RealRegSingle r) (FreeRegs g f) + | r > 31 && testBit f (r - 32) = pprPanic "Linear.AArch64.releaseReg" (text "can't release non-allocated reg v" <> int (r - 32)) + | r < 32 && testBit g r = pprPanic "Linear.AArch64.releaseReg" (text "can't release non-allocated reg x" <> int r) + | r > 31 = FreeRegs g (setBit f (r - 32)) + | otherwise = FreeRegs (setBit g r) f +releaseReg _ _ = pprPanic "Linear.AArch64.releaseReg" (text "bad reg") diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs b/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs index d501718c4a..3ae0fa140d 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs @@ -25,14 +25,16 @@ import GHC.Platform -- getFreeRegs cls f = filter ( (==cls) . regClass . RealReg ) f -- allocateReg f r = filter (/= r) f -import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC -import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC -import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 -import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 +import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC +import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC +import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 +import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 +import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as AArch64 -import qualified GHC.CmmToAsm.PPC.Instr as PPC.Instr -import qualified GHC.CmmToAsm.SPARC.Instr as SPARC.Instr -import qualified GHC.CmmToAsm.X86.Instr as X86.Instr +import qualified GHC.CmmToAsm.PPC.Instr as PPC.Instr +import qualified GHC.CmmToAsm.SPARC.Instr as SPARC.Instr +import qualified GHC.CmmToAsm.X86.Instr as X86.Instr +import qualified GHC.CmmToAsm.AArch64.Instr as AArch64.Instr class Show freeRegs => FR freeRegs where frAllocateReg :: Platform -> RealReg -> freeRegs -> freeRegs @@ -58,6 +60,12 @@ instance FR PPC.FreeRegs where frInitFreeRegs = PPC.initFreeRegs frReleaseReg = \_ -> PPC.releaseReg +instance FR AArch64.FreeRegs where + frAllocateReg = \_ -> AArch64.allocateReg + frGetFreeRegs = \_ -> AArch64.getFreeRegs + frInitFreeRegs = AArch64.initFreeRegs + frReleaseReg = \_ -> AArch64.releaseReg + instance FR SPARC.FreeRegs where frAllocateReg = SPARC.allocateReg frGetFreeRegs = \_ -> SPARC.getFreeRegs @@ -73,7 +81,7 @@ maxSpillSlots config = case platformArch (ncgPlatform config) of ArchSPARC -> SPARC.Instr.maxSpillSlots config ArchSPARC64 -> panic "maxSpillSlots ArchSPARC64" ArchARM _ _ _ -> panic "maxSpillSlots ArchARM" - ArchAArch64 -> panic "maxSpillSlots ArchAArch64" + ArchAArch64 -> AArch64.Instr.maxSpillSlots config ArchPPC_64 _ -> PPC.Instr.maxSpillSlots config ArchAlpha -> panic "maxSpillSlots ArchAlpha" ArchMipseb -> panic "maxSpillSlots ArchMipseb" diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs b/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs index d0330a4f6a..cbdf5d030b 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs @@ -26,6 +26,7 @@ import GHC.Cmm.BlockId import GHC.Cmm.Dataflow.Collections import GHC.Data.Graph.Directed import GHC.Utils.Panic +import GHC.Utils.Monad (concatMapM) import GHC.Types.Unique import GHC.Types.Unique.FM import GHC.Types.Unique.Set @@ -306,7 +307,7 @@ handleComponent -- go via a spill slot. -- handleComponent delta _ (AcyclicSCC (DigraphNode vreg src dsts)) - = mapM (makeMove delta vreg src) dsts + = concatMapM (makeMove delta vreg src) dsts -- Handle some cyclic moves. @@ -340,7 +341,7 @@ handleComponent delta instr -- make sure to do all the reloads after all the spills, -- so we don't end up clobbering the source values. - return ([instrSpill] ++ concat remainingFixUps ++ [instrLoad]) + return (instrSpill ++ concat remainingFixUps ++ instrLoad) handleComponent _ _ (CyclicSCC _) = panic "Register Allocator: handleComponent cyclic" @@ -354,7 +355,7 @@ makeMove -> Unique -- ^ unique of the vreg that we're moving. -> Loc -- ^ source location. -> Loc -- ^ destination location. - -> RegM freeRegs instr -- ^ move instruction. + -> RegM freeRegs [instr] -- ^ move instruction. makeMove delta vreg src dst = do config <- getConfig @@ -363,7 +364,7 @@ makeMove delta vreg src dst case (src, dst) of (InReg s, InReg d) -> do recordSpill (SpillJoinRR vreg) - return $ mkRegRegMoveInstr platform (RegReal s) (RegReal d) + return $ [mkRegRegMoveInstr platform (RegReal s) (RegReal d)] (InMem s, InReg d) -> do recordSpill (SpillJoinRM vreg) return $ mkLoadInstr config (RegReal d) delta s @@ -377,4 +378,3 @@ makeMove delta vreg src dst panic ("makeMove " ++ show vreg ++ " (" ++ show src ++ ") (" ++ show dst ++ ")" ++ " we don't handle mem->mem moves.") - diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs index 24a75121b8..ec1cd517ea 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs @@ -121,7 +121,7 @@ makeRAStats state spillR :: Instruction instr - => Reg -> Unique -> RegM freeRegs (instr, Int) + => Reg -> Unique -> RegM freeRegs ([instr], Int) spillR reg temp = mkRegM $ \s -> let (stack1,slot) = getStackSlotFor (ra_stack s) temp @@ -131,7 +131,7 @@ spillR reg temp = mkRegM $ \s -> loadR :: Instruction instr - => Reg -> Int -> RegM freeRegs instr + => Reg -> Int -> RegM freeRegs [instr] loadR reg slot = mkRegM $ \s -> RA_Result s (mkLoadInstr (ra_config s) reg (ra_delta s) slot) diff --git a/compiler/GHC/CmmToAsm/Reg/Liveness.hs b/compiler/GHC/CmmToAsm/Reg/Liveness.hs index 4d70533624..ad8190270f 100644 --- a/compiler/GHC/CmmToAsm/Reg/Liveness.hs +++ b/compiler/GHC/CmmToAsm/Reg/Liveness.hs @@ -174,6 +174,8 @@ instance Instruction instr => Instruction (InstrSR instr) where pprInstr platform i = ppr (fmap (pprInstr platform) i) + mkComment = fmap Instr . mkComment + -- | An instruction with liveness information. data LiveInstr instr @@ -565,16 +567,20 @@ stripLiveBlock config (BasicBlock i lis) where (instrs', _) = runState (spillNat [] lis) 0 + -- spillNat :: [instr] -> [LiveInstr instr] -> State Int [instr] + spillNat :: Instruction instr => [instr] -> [LiveInstr instr] -> State Int [instr] spillNat acc [] = return (reverse acc) + -- The SPILL/RELOAD cases do not appear to be exercised by our codegens + -- spillNat acc (LiveInstr (SPILL reg slot) _ : instrs) = do delta <- get - spillNat (mkSpillInstr config reg delta slot : acc) instrs + spillNat (mkSpillInstr config reg delta slot ++ acc) instrs spillNat acc (LiveInstr (RELOAD slot reg) _ : instrs) = do delta <- get - spillNat (mkLoadInstr config reg delta slot : acc) instrs + spillNat (mkLoadInstr config reg delta slot ++ acc) instrs spillNat acc (LiveInstr (Instr instr) _ : instrs) | Just i <- takeDeltaInstr instr diff --git a/compiler/GHC/CmmToAsm/Reg/Target.hs b/compiler/GHC/CmmToAsm/Reg/Target.hs index 4611c3a8e8..22b22e21cc 100644 --- a/compiler/GHC/CmmToAsm/Reg/Target.hs +++ b/compiler/GHC/CmmToAsm/Reg/Target.hs @@ -34,6 +34,8 @@ import qualified GHC.CmmToAsm.X86.Regs as X86 import qualified GHC.CmmToAsm.X86.RegInfo as X86 import qualified GHC.CmmToAsm.PPC.Regs as PPC import qualified GHC.CmmToAsm.SPARC.Regs as SPARC +import qualified GHC.CmmToAsm.AArch64.Regs as AArch64 + targetVirtualRegSqueeze :: Platform -> RegClass -> VirtualReg -> Int targetVirtualRegSqueeze platform @@ -46,7 +48,7 @@ targetVirtualRegSqueeze platform ArchSPARC64 -> panic "targetVirtualRegSqueeze ArchSPARC64" ArchPPC_64 _ -> PPC.virtualRegSqueeze ArchARM _ _ _ -> panic "targetVirtualRegSqueeze ArchARM" - ArchAArch64 -> panic "targetVirtualRegSqueeze ArchAArch64" + ArchAArch64 -> AArch64.virtualRegSqueeze ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha" ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb" ArchMipsel -> panic "targetVirtualRegSqueeze ArchMipsel" @@ -66,7 +68,7 @@ targetRealRegSqueeze platform ArchSPARC64 -> panic "targetRealRegSqueeze ArchSPARC64" ArchPPC_64 _ -> PPC.realRegSqueeze ArchARM _ _ _ -> panic "targetRealRegSqueeze ArchARM" - ArchAArch64 -> panic "targetRealRegSqueeze ArchAArch64" + ArchAArch64 -> AArch64.realRegSqueeze ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha" ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb" ArchMipsel -> panic "targetRealRegSqueeze ArchMipsel" @@ -85,7 +87,7 @@ targetClassOfRealReg platform ArchSPARC64 -> panic "targetClassOfRealReg ArchSPARC64" ArchPPC_64 _ -> PPC.classOfRealReg ArchARM _ _ _ -> panic "targetClassOfRealReg ArchARM" - ArchAArch64 -> panic "targetClassOfRealReg ArchAArch64" + ArchAArch64 -> AArch64.classOfRealReg ArchAlpha -> panic "targetClassOfRealReg ArchAlpha" ArchMipseb -> panic "targetClassOfRealReg ArchMipseb" ArchMipsel -> panic "targetClassOfRealReg ArchMipsel" @@ -104,7 +106,7 @@ targetMkVirtualReg platform ArchSPARC64 -> panic "targetMkVirtualReg ArchSPARC64" ArchPPC_64 _ -> PPC.mkVirtualReg ArchARM _ _ _ -> panic "targetMkVirtualReg ArchARM" - ArchAArch64 -> panic "targetMkVirtualReg ArchAArch64" + ArchAArch64 -> AArch64.mkVirtualReg ArchAlpha -> panic "targetMkVirtualReg ArchAlpha" ArchMipseb -> panic "targetMkVirtualReg ArchMipseb" ArchMipsel -> panic "targetMkVirtualReg ArchMipsel" @@ -123,7 +125,7 @@ targetRegDotColor platform ArchSPARC64 -> panic "targetRegDotColor ArchSPARC64" ArchPPC_64 _ -> PPC.regDotColor ArchARM _ _ _ -> panic "targetRegDotColor ArchARM" - ArchAArch64 -> panic "targetRegDotColor ArchAArch64" + ArchAArch64 -> AArch64.regDotColor ArchAlpha -> panic "targetRegDotColor ArchAlpha" ArchMipseb -> panic "targetRegDotColor ArchMipseb" ArchMipsel -> panic "targetRegDotColor ArchMipsel" |