summaryrefslogtreecommitdiff
path: root/compiler/nativeGen/PPC
diff options
context:
space:
mode:
authorPeter Trommler <ptrommler@acm.org>2016-04-29 17:45:10 +0000
committerAustin Seipp <austin@well-typed.com>2016-04-29 17:45:31 +0000
commitb725fe0a8d2a2ee3e6d95bb0ec345ee532381ee2 (patch)
tree0295b03a07ba9cc7b0fc43d153f3097ad566c20b /compiler/nativeGen/PPC
parent00053ee6f450c4503c25ed9ba33089d991e2a04b (diff)
downloadhaskell-b725fe0a8d2a2ee3e6d95bb0ec345ee532381ee2.tar.gz
PPC NCG: Improve pointer de-tagging code
Generate a clrr[wd]i instruction to clear the tag bits in a pointer. This saves one instruction and one temporary register. Optimize signed comparison with zero after andi. operation This saves one instruction when comparing a pointer tag with zero. This reduces code size by 0.6 % in all nofib benchmarks. Test Plan: validate on AIX and 32-bit Linux Reviewed By: erikd, hvr Differential Revision: https://phabricator.haskell.org/D2093
Diffstat (limited to 'compiler/nativeGen/PPC')
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs27
-rw-r--r--compiler/nativeGen/PPC/Instr.hs3
-rw-r--r--compiler/nativeGen/PPC/Ppr.hs11
3 files changed, 36 insertions, 5 deletions
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 7b2f79b9db..12d5d88f53 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -574,7 +574,7 @@ getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
-- (needed for PIC)
MO_Add W32 ->
case y of
- CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True (-imm)
+ CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm
-> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
CmmLit lit
-> do
@@ -626,7 +626,16 @@ getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
| otherwise -> remainderCode rep DIVDU (extendSExpr dflags rep x)
(extendSExpr dflags rep y)
- MO_And rep -> trivialCode rep False AND x y
+ MO_And rep -> case y of
+ (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4
+ -> do
+ (src, srcCode) <- getSomeReg x
+ let clear_mask = if imm == -4 then 2 else 3
+ fmt = intFormat rep
+ code dst = srcCode
+ `appOL` unitOL (CLRRI fmt dst src clear_mask)
+ return (Any fmt code)
+ _ -> trivialCode rep False AND x y
MO_Or rep -> trivialCode rep False OR x y
MO_Xor rep -> trivialCode rep False XOR x y
@@ -912,14 +921,22 @@ getCondCode (CmmMachOp mop [x, y])
getCondCode _ = panic "getCondCode(2)(powerpc)"
-
-- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
-- passed back up the tree.
condIntCode, condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
--- ###FIXME: I16 and I8!
--- TODO: Is this still an issue? All arguments are extend?Expr'd.
+-- optimize pointer tag checks. Operation andi. sets condition register
+-- so cmpi ..., 0 is redundant.
+condIntCode cond (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)])
+ (CmmLit (CmmInt 0 _))
+ | not $ condUnsigned cond,
+ Just src2 <- makeImmediate rep False imm
+ = do
+ (src1, code) <- getSomeReg x
+ let code' = code `snocOL` AND r0 src1 (RIImm src2)
+ return (CondCode False cond code')
+
condIntCode cond x (CmmLit (CmmInt y rep))
| Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
= do
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
index b5c26ed906..23d8b6b2e6 100644
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -251,6 +251,7 @@ data Instr
| SRA Format Reg Reg RI -- shift right arithmetic
| RLWINM Reg Reg Int Int Int -- Rotate Left Word Immediate then AND with Mask
+ | CLRRI Format Reg Reg Int -- clear right immediate (extended mnemonic)
| FADD Format Reg Reg Reg
| FSUB Format Reg Reg Reg
@@ -340,6 +341,7 @@ ppc_regUsageOfInstr platform instr
SR _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
SRA _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
RLWINM reg1 reg2 _ _ _ -> usage ([reg2], [reg1])
+ CLRRI _ reg1 reg2 _ -> usage ([reg2], [reg1])
FADD _ r1 r2 r3 -> usage ([r2,r3], [r1])
FSUB _ r1 r2 r3 -> usage ([r2,r3], [r1])
@@ -430,6 +432,7 @@ ppc_patchRegsOfInstr instr env
-> SRA fmt (env reg1) (env reg2) (fixRI ri)
RLWINM reg1 reg2 sh mb me
-> RLWINM (env reg1) (env reg2) sh mb me
+ CLRRI fmt reg1 reg2 n -> CLRRI fmt (env reg1) (env reg2) n
FADD fmt r1 r2 r3 -> FADD fmt (env r1) (env r2) (env r3)
FSUB fmt r1 r2 r3 -> FSUB fmt (env r1) (env r2) (env r3)
FMUL fmt r1 r2 r3 -> FMUL fmt (env r1) (env r2) (env r3)
diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs
index 59b0ad80d1..3e1fd078f3 100644
--- a/compiler/nativeGen/PPC/Ppr.hs
+++ b/compiler/nativeGen/PPC/Ppr.hs
@@ -791,6 +791,17 @@ pprInstr (RLWINM reg1 reg2 sh mb me) = hcat [
int me
]
+pprInstr (CLRRI fmt reg1 reg2 n) = hcat [
+ text "\tclrr",
+ pprFormat fmt,
+ text "i ",
+ pprReg reg1,
+ text ", ",
+ pprReg reg2,
+ text ", ",
+ int n
+ ]
+
pprInstr (FADD fmt reg1 reg2 reg3) = pprBinaryF (sLit "fadd") fmt reg1 reg2 reg3
pprInstr (FSUB fmt reg1 reg2 reg3) = pprBinaryF (sLit "fsub") fmt reg1 reg2 reg3
pprInstr (FMUL fmt reg1 reg2 reg3) = pprBinaryF (sLit "fmul") fmt reg1 reg2 reg3