summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Trommler <ptrommler@acm.org>2018-12-11 16:43:49 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2019-03-15 10:30:08 -0400
commit83e09d3c2b2e84b85fa25e271eff2747fc783f09 (patch)
treef7969d07d5a1cccd58893fd47aa133b0c7464211
parent97032ed9b2594c8939cab776ff871051d6dba30a (diff)
downloadhaskell-83e09d3c2b2e84b85fa25e271eff2747fc783f09.tar.gz
PPC NCG: Use liveness information in CmmCall
We make liveness information for global registers available on `JMP` and `BCTR`, which were the last instructions missing. With complete liveness information we do not need to reserve global registers in `freeReg` anymore. Moreover we assign R9 and R10 to callee saves registers. Cleanup by removing `Reg_Su`, which was unused, from `freeReg` and removing unused register definitions. The calculation of the number of floating point registers is too conservative. Just follow X86 and specify the constants directly. Overall on PowerPC this results in 0.3 % smaller code size in nofib while runtime is slightly better in some tests.
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs40
-rw-r--r--compiler/nativeGen/PPC/Instr.hs35
-rw-r--r--compiler/nativeGen/PPC/Ppr.hs4
-rw-r--r--compiler/nativeGen/PPC/Regs.hs12
-rw-r--r--includes/CodeGen.Platform.hs72
-rw-r--r--includes/stg/MachRegs.h13
6 files changed, 57 insertions, 119 deletions
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 516a49aee3..c640ba115f 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -180,10 +180,16 @@ stmtToInstrs stmt = do
return (b1 `appOL` b2)
CmmSwitch arg ids -> do dflags <- getDynFlags
genSwitch dflags arg ids
- CmmCall { cml_target = arg } -> genJump arg
+ CmmCall { cml_target = arg
+ , cml_args_regs = gregs } -> do
+ dflags <- getDynFlags
+ genJump arg (jumpRegs dflags gregs)
_ ->
panic "stmtToInstrs: statement should have been cps'd away"
+jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
+jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
+ where platform = targetPlatform dflags
--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
@@ -1042,19 +1048,19 @@ assignReg_FltCode = assignReg_IntCode
-genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
+genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
-genJump (CmmLit (CmmLabel lbl))
- = return (unitOL $ JMP lbl)
+genJump (CmmLit (CmmLabel lbl)) regs
+ = return (unitOL $ JMP lbl regs)
-genJump tree
+genJump tree gregs
= do
dflags <- getDynFlags
- genJump' tree (platformToGCP (targetPlatform dflags))
+ genJump' tree (platformToGCP (targetPlatform dflags)) gregs
-genJump' :: CmmExpr -> GenCCallPlatform -> NatM InstrBlock
+genJump' :: CmmExpr -> GenCCallPlatform -> [Reg] -> NatM InstrBlock
-genJump' tree (GCP64ELF 1)
+genJump' tree (GCP64ELF 1) regs
= do
(target,code) <- getSomeReg tree
return (code
@@ -1062,20 +1068,20 @@ genJump' tree (GCP64ELF 1)
`snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
`snocOL` MTCTR r11
`snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
- `snocOL` BCTR [] Nothing)
+ `snocOL` BCTR [] Nothing regs)
-genJump' tree (GCP64ELF 2)
+genJump' tree (GCP64ELF 2) regs
= do
(target,code) <- getSomeReg tree
return (code
`snocOL` MR r12 target
`snocOL` MTCTR r12
- `snocOL` BCTR [] Nothing)
+ `snocOL` BCTR [] Nothing regs)
-genJump' tree _
+genJump' tree _ regs
= do
(target,code) <- getSomeReg tree
- return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing)
+ return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing regs)
-- -----------------------------------------------------------------------------
-- Unconditional branches
@@ -2044,7 +2050,7 @@ genSwitch dflags expr targets
SL fmt tmp reg (RIImm (ImmInt sha)),
LD fmt tmp (AddrRegReg tableReg tmp),
MTCTR tmp,
- BCTR ids (Just lbl)
+ BCTR ids (Just lbl) []
]
return code
@@ -2062,7 +2068,7 @@ genSwitch dflags expr targets
LD fmt tmp (AddrRegReg tableReg tmp),
ADD tmp tmp (RIReg tableReg),
MTCTR tmp,
- BCTR ids (Just lbl)
+ BCTR ids (Just lbl) []
]
return code
| otherwise
@@ -2077,14 +2083,14 @@ genSwitch dflags expr targets
ADDIS tmp tmp (HA (ImmCLbl lbl)),
LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
MTCTR tmp,
- BCTR ids (Just lbl)
+ BCTR ids (Just lbl) []
]
return code
where (offset, ids) = switchTargetsToTable targets
generateJumpTableForInstr :: DynFlags -> Instr
-> Maybe (NatCmmDecl CmmStatics Instr)
-generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
+generateJumpTableForInstr dflags (BCTR ids (Just lbl) _) =
let jumpTable
| (positionIndependent dflags)
|| (not $ target32Bit $ targetPlatform dflags)
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
index e618e189b8..79cc7aeefb 100644
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -151,12 +151,12 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do
-- "labeled-goto" we use JMP, and for "computed-goto" we
-- use MTCTR followed by BCTR. See 'PPC.CodeGen.genJump'.
= case insn of
- JMP _ -> dealloc ++ (insn : r)
- BCTR [] Nothing -> dealloc ++ (insn : r)
- BCTR ids label -> BCTR (map (fmap retarget) ids) label : r
- BCCFAR cond b p -> BCCFAR cond (retarget b) p : r
- BCC cond b p -> BCC cond (retarget b) p : r
- _ -> insn : r
+ JMP _ _ -> dealloc ++ (insn : r)
+ BCTR [] Nothing _ -> dealloc ++ (insn : r)
+ BCTR ids label rs -> BCTR (map (fmap retarget) ids) label rs : r
+ BCCFAR cond b p -> BCCFAR cond (retarget b) p : r
+ BCC cond b p -> BCC cond (retarget b) p : r
+ _ -> insn : r
-- BL and BCTRL are call-like instructions rather than
-- jumps, and are used only for C calls.
@@ -223,10 +223,13 @@ data Instr
-- Just True: branch likely taken
-- Just False: branch likely not taken
-- Nothing: no hint
- | JMP CLabel -- same as branch,
+ | JMP CLabel [Reg] -- same as branch,
-- but with CLabel instead of block ID
+ -- and live global registers
| MTCTR Reg
- | BCTR [Maybe BlockId] (Maybe CLabel) -- with list of local destinations, and jump table location if necessary
+ | BCTR [Maybe BlockId] (Maybe CLabel) [Reg]
+ -- with list of local destinations, and
+ -- jump table location if necessary
| BL CLabel [Reg] -- with list of argument regs
| BCTRL [Reg]
@@ -324,8 +327,9 @@ ppc_regUsageOfInstr platform instr
CMPL _ reg ri -> usage (reg : regRI ri,[])
BCC _ _ _ -> noUsage
BCCFAR _ _ _ -> noUsage
+ JMP _ regs -> usage (regs, [])
MTCTR reg -> usage ([reg],[])
- BCTR _ _ -> noUsage
+ BCTR _ _ regs -> usage (regs, [])
BL _ params -> usage (params, callClobberedRegs platform)
BCTRL params -> usage (params, callClobberedRegs platform)
@@ -416,8 +420,9 @@ ppc_patchRegsOfInstr instr env
CMPL fmt reg ri -> CMPL fmt (env reg) (fixRI ri)
BCC cond lbl p -> BCC cond lbl p
BCCFAR cond lbl p -> BCCFAR cond lbl p
+ JMP l regs -> JMP l regs -- global regs will not be remapped
MTCTR reg -> MTCTR (env reg)
- BCTR targets lbl -> BCTR targets lbl
+ BCTR targets lbl rs -> BCTR targets lbl rs
BL imm argRegs -> BL imm argRegs -- argument regs
BCTRL argRegs -> BCTRL argRegs -- cannot be remapped
ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri)
@@ -506,10 +511,10 @@ ppc_isJumpishInstr instr
ppc_jumpDestsOfInstr :: Instr -> [BlockId]
ppc_jumpDestsOfInstr insn
= case insn of
- BCC _ id _ -> [id]
- BCCFAR _ id _ -> [id]
- BCTR targets _ -> [id | Just id <- targets]
- _ -> []
+ BCC _ id _ -> [id]
+ BCCFAR _ id _ -> [id]
+ BCTR targets _ _ -> [id | Just id <- targets]
+ _ -> []
-- | Change the destination of this jump instruction.
@@ -520,7 +525,7 @@ ppc_patchJumpInstr insn patchF
= case insn of
BCC cc id p -> BCC cc (patchF id) p
BCCFAR cc id p -> BCCFAR cc (patchF id) p
- BCTR ids lbl -> BCTR (map (fmap patchF) ids) lbl
+ BCTR ids lbl rs -> BCTR (map (fmap patchF) ids) lbl rs
_ -> insn
diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs
index c4eb0811bd..23525ffb5a 100644
--- a/compiler/nativeGen/PPC/Ppr.hs
+++ b/compiler/nativeGen/PPC/Ppr.hs
@@ -548,7 +548,7 @@ pprInstr (BCCFAR cond blockid prediction) = vcat [
Just True -> char '-'
Just False -> char '+'
-pprInstr (JMP lbl)
+pprInstr (JMP lbl _)
-- We never jump to ForeignLabels; if we ever do, c.f. handling for "BL"
| isForeignLabel lbl = panic "PPC.Ppr.pprInstr: JMP to ForeignLabel"
| otherwise =
@@ -565,7 +565,7 @@ pprInstr (MTCTR reg) = hcat [
char '\t',
pprReg reg
]
-pprInstr (BCTR _ _) = hcat [
+pprInstr (BCTR _ _ _) = hcat [
char '\t',
text "bctr"
]
diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs
index f0b9914270..7b16f217d9 100644
--- a/compiler/nativeGen/PPC/Regs.hs
+++ b/compiler/nativeGen/PPC/Regs.hs
@@ -37,9 +37,9 @@ module PPC.Regs (
fits16Bits,
makeImmediate,
fReg,
- r0, sp, toc, r3, r4, r11, r12, r27, r28, r30,
+ r0, sp, toc, r3, r4, r11, r12, r30,
tmpReg,
- f1, f20, f21,
+ f1,
allocatableRegs
@@ -306,7 +306,7 @@ point registers.
fReg :: Int -> RegNo
fReg x = (32 + x)
-r0, sp, toc, r3, r4, r11, r12, r27, r28, r30, f1, f20, f21 :: Reg
+r0, sp, toc, r3, r4, r11, r12, r30, f1 :: Reg
r0 = regSingle 0
sp = regSingle 1
toc = regSingle 2
@@ -314,12 +314,8 @@ r3 = regSingle 3
r4 = regSingle 4
r11 = regSingle 11
r12 = regSingle 12
-r27 = regSingle 27
-r28 = regSingle 28
r30 = regSingle 30
f1 = regSingle $ fReg 1
-f20 = regSingle $ fReg 20
-f21 = regSingle $ fReg 21
-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
-- i.e., these are the regs for which we are prepared to allow the
@@ -335,4 +331,4 @@ tmpReg platform =
case platformArch platform of
ArchPPC -> regSingle 13
ArchPPC_64 _ -> regSingle 30
- _ -> panic "PPC.Regs.tmpReg: unknowm arch"
+ _ -> panic "PPC.Regs.tmpReg: unknown arch"
diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs
index 6c00597764..2a373283d4 100644
--- a/includes/CodeGen.Platform.hs
+++ b/includes/CodeGen.Platform.hs
@@ -904,78 +904,9 @@ freeReg 30 = False
# if defined(REG_Base)
freeReg REG_Base = False
# endif
-# if defined(REG_R1)
-freeReg REG_R1 = False
-# endif
-# if defined(REG_R2)
-freeReg REG_R2 = False
-# endif
-# if defined(REG_R3)
-freeReg REG_R3 = False
-# endif
-# if defined(REG_R4)
-freeReg REG_R4 = False
-# endif
-# if defined(REG_R5)
-freeReg REG_R5 = False
-# endif
-# if defined(REG_R6)
-freeReg REG_R6 = False
-# endif
-# if defined(REG_R7)
-freeReg REG_R7 = False
-# endif
-# if defined(REG_R8)
-freeReg REG_R8 = False
-# endif
-# if defined(REG_R9)
-freeReg REG_R9 = False
-# endif
-# if defined(REG_R10)
-freeReg REG_R10 = False
-# endif
-# if defined(REG_F1)
-freeReg REG_F1 = False
-# endif
-# if defined(REG_F2)
-freeReg REG_F2 = False
-# endif
-# if defined(REG_F3)
-freeReg REG_F3 = False
-# endif
-# if defined(REG_F4)
-freeReg REG_F4 = False
-# endif
-# if defined(REG_F5)
-freeReg REG_F5 = False
-# endif
-# if defined(REG_F6)
-freeReg REG_F6 = False
-# endif
-# if defined(REG_D1)
-freeReg REG_D1 = False
-# endif
-# if defined(REG_D2)
-freeReg REG_D2 = False
-# endif
-# if defined(REG_D3)
-freeReg REG_D3 = False
-# endif
-# if defined(REG_D4)
-freeReg REG_D4 = False
-# endif
-# if defined(REG_D5)
-freeReg REG_D5 = False
-# endif
-# if defined(REG_D6)
-freeReg REG_D6 = False
-# endif
# if defined(REG_Sp)
freeReg REG_Sp = False
# endif
-# if defined(REG_Su)
-freeReg REG_Su = False
-# endif
# if defined(REG_SpLim)
freeReg REG_SpLim = False
# endif
@@ -1119,9 +1050,6 @@ freeReg REG_D6_2 = False
# if defined(REG_Sp)
freeReg REG_Sp = False
# endif
-# if defined(REG_Su)
-freeReg REG_Su = False
-# endif
# if defined(REG_SpLim)
freeReg REG_SpLim = False
# endif
diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h
index 3300580977..8095887c2c 100644
--- a/includes/stg/MachRegs.h
+++ b/includes/stg/MachRegs.h
@@ -312,6 +312,8 @@ the stack. See Note [Overlapping global registers] for implications.
#define REG_R6 r19
#define REG_R7 r20
#define REG_R8 r21
+#define REG_R9 r22
+#define REG_R10 r23
#define REG_F1 fr14
#define REG_F2 fr15
@@ -327,13 +329,14 @@ the stack. See Note [Overlapping global registers] for implications.
#define REG_D5 fr24
#define REG_D6 fr25
-#define REG_Sp r22
-#define REG_SpLim r24
-
-#define REG_Hp r25
-
+#define REG_Sp r24
+#define REG_SpLim r25
+#define REG_Hp r26
#define REG_Base r27
+#define MAX_REAL_FLOAT_REG 6
+#define MAX_REAL_DOUBLE_REG 6
+
/* -----------------------------------------------------------------------------
The Sun SPARC register mapping