summaryrefslogtreecommitdiff
path: root/compiler/nativeGen/PPC/CodeGen.hs
diff options
context:
space:
mode:
authorPeter Trommler <ptrommler@acm.org>2017-11-02 11:57:05 -0400
committerBen Gamari <ben@smart-cactus.org>2017-11-02 12:43:30 -0400
commit1130c67bbb6dc06f513e5c8705a488a591fabadb (patch)
treea5cb5b4a5154e9e51cc7cbce71bf453d3dcb4f4f /compiler/nativeGen/PPC/CodeGen.hs
parent1317ba625d40fbd51cb0538b3fde28f412f30c01 (diff)
downloadhaskell-1130c67bbb6dc06f513e5c8705a488a591fabadb.tar.gz
PPC NCG: Impl branch prediction, atomic ops.
Implement AtomicRMW ops, atomic read, atomic write in PowerPC native code generator. Also implement branch prediction because we need it in atomic ops anyway. This patch improves the issue in #12537 a bit but does not fix it entirely. The fallback operations for atomicread and atomicwrite in libraries/ghc-prim/cbits/atomic.c are incorrect. This patch avoids those functions by implementing the operations directly in the native code generator. This is also what the x86/amd64 NCG and the LLVM backend do. Test Plan: validate on AIX and PowerPC (32-bit) Linux Reviewers: erikd, hvr, austin, bgamari, simonmar Reviewed By: hvr, bgamari Subscribers: rwbarton, thomie GHC Trac Issues: #12537 Differential Revision: https://phabricator.haskell.org/D3984
Diffstat (limited to 'compiler/nativeGen/PPC/CodeGen.hs')
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs149
1 files changed, 117 insertions, 32 deletions
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index b5f1a62d22..d37f385a18 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -162,8 +162,8 @@ stmtToInstrs stmt = do
-> genCCall target result_regs args
CmmBranch id -> genBranch id
- CmmCondBranch arg true false _ -> do
- b1 <- genCondJump true arg
+ CmmCondBranch arg true false prediction -> do
+ b1 <- genCondJump true arg prediction
b2 <- genBranch false
return (b1 `appOL` b2)
CmmSwitch arg ids -> do dflags <- getDynFlags
@@ -1071,11 +1071,12 @@ comparison to do.
genCondJump
:: BlockId -- the branch target
-> CmmExpr -- the condition on which to branch
+ -> Maybe Bool
-> NatM InstrBlock
-genCondJump id bool = do
+genCondJump id bool prediction = do
CondCode _ cond code <- getCondCode bool
- return (code `snocOL` BCC cond id)
+ return (code `snocOL` BCC cond id prediction)
@@ -1099,6 +1100,90 @@ genCCall (PrimTarget MO_Touch) _ _
genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
= return $ nilOL
+genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n]
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ fmt = intFormat width
+ reg_dst = getRegisterReg platform (CmmLocal dst)
+ (instr, n_code) <- case amop of
+ AMO_Add -> getSomeRegOrImm ADD True reg_dst
+ AMO_Sub -> case n of
+ CmmLit (CmmInt i _)
+ | Just imm <- makeImmediate width True (-i)
+ -> return (ADD reg_dst reg_dst (RIImm imm), nilOL)
+ _
+ -> do
+ (n_reg, n_code) <- getSomeReg n
+ return (SUBF reg_dst n_reg reg_dst, n_code)
+ AMO_And -> getSomeRegOrImm AND False reg_dst
+ AMO_Nand -> do (n_reg, n_code) <- getSomeReg n
+ return (NAND reg_dst reg_dst n_reg, n_code)
+ AMO_Or -> getSomeRegOrImm OR False reg_dst
+ AMO_Xor -> getSomeRegOrImm XOR False reg_dst
+ Amode addr_reg addr_code <- getAmodeIndex addr
+ lbl_retry <- getBlockIdNat
+ return $ n_code `appOL` addr_code
+ `appOL` toOL [ HWSYNC
+ , BCC ALWAYS lbl_retry Nothing
+
+ , NEWBLOCK lbl_retry
+ , LDR fmt reg_dst addr_reg
+ , instr
+ , STC fmt reg_dst addr_reg
+ , BCC NE lbl_retry (Just False)
+ , ISYNC
+ ]
+ where
+ getAmodeIndex (CmmMachOp (MO_Add _) [x, y])
+ = do
+ (regX, codeX) <- getSomeReg x
+ (regY, codeY) <- getSomeReg y
+ return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
+ getAmodeIndex other
+ = do
+ (reg, code) <- getSomeReg other
+ return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here!
+ getSomeRegOrImm op sign dst
+ = case n of
+ CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i
+ -> return (op dst dst (RIImm imm), nilOL)
+ _
+ -> do
+ (n_reg, n_code) <- getSomeReg n
+ return (op dst dst (RIReg n_reg), n_code)
+
+genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr]
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ fmt = intFormat width
+ reg_dst = getRegisterReg platform (CmmLocal dst)
+ form = if widthInBits width == 64 then DS else D
+ Amode addr_reg addr_code <- getAmode form addr
+ lbl_end <- getBlockIdNat
+ return $ addr_code `appOL` toOL [ HWSYNC
+ , LD fmt reg_dst addr_reg
+ , CMP fmt reg_dst (RIReg reg_dst)
+ , BCC NE lbl_end (Just False)
+ , BCC ALWAYS lbl_end Nothing
+ -- See Note [Seemingly useless cmp and bne]
+ , NEWBLOCK lbl_end
+ , ISYNC
+ ]
+
+-- Note [Seemingly useless cmp and bne]
+-- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction
+-- the second paragraph says that isync may complete before storage accesses
+-- "associated" with a preceding instruction have been performed. The cmp
+-- operation and the following bne introduce a data and control dependency
+-- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe
+-- Fetch).
+-- This is also what gcc does.
+
+
+genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
+ code <- assignMem_IntCode (intFormat width) addr val
+ return $ unitOL(HWSYNC) `appOL` code
+
genCCall (PrimTarget (MO_Clz width)) [dst] [src]
= do dflags <- getDynFlags
let platform = targetPlatform dflags
@@ -1111,17 +1196,17 @@ genCCall (PrimTarget (MO_Clz width)) [dst] [src]
lbl3 <- getBlockIdNat
let vr_hi = getHiVRegFromLo vr_lo
cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0))
- , BCC NE lbl2
- , BCC ALWAYS lbl1
+ , BCC NE lbl2 Nothing
+ , BCC ALWAYS lbl1 Nothing
, NEWBLOCK lbl1
, CNTLZ II32 reg_dst vr_lo
, ADD reg_dst reg_dst (RIImm (ImmInt 32))
- , BCC ALWAYS lbl3
+ , BCC ALWAYS lbl3 Nothing
, NEWBLOCK lbl2
, CNTLZ II32 reg_dst vr_hi
- , BCC ALWAYS lbl3
+ , BCC ALWAYS lbl3 Nothing
, NEWBLOCK lbl3
]
@@ -1168,8 +1253,8 @@ genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
cnttzlo <- cnttz format reg_dst vr_lo
let vr_hi = getHiVRegFromLo vr_lo
cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0))
- , BCC NE lbl2
- , BCC ALWAYS lbl1
+ , BCC NE lbl2 Nothing
+ , BCC ALWAYS lbl1 Nothing
, NEWBLOCK lbl1
, ADD x' vr_hi (RIImm (ImmInt (-1)))
@@ -1177,12 +1262,12 @@ genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
, CNTLZ format r' x''
-- 32 + (32 - clz(x''))
, SUBFC reg_dst r' (RIImm (ImmInt 64))
- , BCC ALWAYS lbl3
+ , BCC ALWAYS lbl3 Nothing
, NEWBLOCK lbl2
]
`appOL` cnttzlo `appOL`
- toOL [ BCC ALWAYS lbl3
+ toOL [ BCC ALWAYS lbl3 Nothing
, NEWBLOCK lbl3
]
@@ -1316,21 +1401,21 @@ genCCall target dest_regs argsAndHints
-- rhat = un32 - q1*vn1
, MULL fmt tmp q1 (RIReg vn1)
, SUBF rhat tmp un32
- , BCC ALWAYS again1
+ , BCC ALWAYS again1 Nothing
, NEWBLOCK again1
-- if (q1 >= b || q1*vn0 > b*rhat + un1)
, CMPL fmt q1 (RIReg b)
- , BCC GEU then1
- , BCC ALWAYS no1
+ , BCC GEU then1 Nothing
+ , BCC ALWAYS no1 Nothing
, NEWBLOCK no1
, MULL fmt tmp q1 (RIReg vn0)
, SL fmt tmp1 rhat (RIImm (ImmInt half))
, ADD tmp1 tmp1 (RIReg un1)
, CMPL fmt tmp (RIReg tmp1)
- , BCC LEU endif1
- , BCC ALWAYS then1
+ , BCC LEU endif1 Nothing
+ , BCC ALWAYS then1 Nothing
, NEWBLOCK then1
-- q1 = q1 - 1
@@ -1339,8 +1424,8 @@ genCCall target dest_regs argsAndHints
, ADD rhat rhat (RIReg vn1)
-- if (rhat < b) goto again1
, CMPL fmt rhat (RIReg b)
- , BCC LTT again1
- , BCC ALWAYS endif1
+ , BCC LTT again1 Nothing
+ , BCC ALWAYS endif1 Nothing
, NEWBLOCK endif1
-- un21 = un32*b + un1 - q1*v
@@ -1354,21 +1439,21 @@ genCCall target dest_regs argsAndHints
-- rhat = un21- q0*vn1
, MULL fmt tmp q0 (RIReg vn1)
, SUBF rhat tmp un21
- , BCC ALWAYS again2
+ , BCC ALWAYS again2 Nothing
, NEWBLOCK again2
-- if (q0>b || q0*vn0 > b*rhat + un0)
, CMPL fmt q0 (RIReg b)
- , BCC GEU then2
- , BCC ALWAYS no2
+ , BCC GEU then2 Nothing
+ , BCC ALWAYS no2 Nothing
, NEWBLOCK no2
, MULL fmt tmp q0 (RIReg vn0)
, SL fmt tmp1 rhat (RIImm (ImmInt half))
, ADD tmp1 tmp1 (RIReg un0)
, CMPL fmt tmp (RIReg tmp1)
- , BCC LEU endif2
- , BCC ALWAYS then2
+ , BCC LEU endif2 Nothing
+ , BCC ALWAYS then2 Nothing
, NEWBLOCK then2
-- q0 = q0 - 1
@@ -1377,8 +1462,8 @@ genCCall target dest_regs argsAndHints
, ADD rhat rhat (RIReg vn1)
-- if (rhat<b) goto again2
, CMPL fmt rhat (RIReg b)
- , BCC LTT again2
- , BCC ALWAYS endif2
+ , BCC LTT again2 Nothing
+ , BCC ALWAYS endif2 Nothing
, NEWBLOCK endif2
-- compute remainder
@@ -1909,12 +1994,12 @@ genCCall' dflags gcp target dest_regs args
MO_BSwap w -> (fsLit $ bSwapLabel w, False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False)
- MO_Clz w -> (fsLit $ clzLabel w, False)
- MO_Ctz w -> (fsLit $ ctzLabel w, False)
- MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False)
+ MO_Clz _ -> unsupported
+ MO_Ctz _ -> unsupported
+ MO_AtomicRMW {} -> unsupported
MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
- MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False)
- MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False)
+ MO_AtomicRead _ -> unsupported
+ MO_AtomicWrite _ -> unsupported
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
@@ -1926,7 +2011,7 @@ genCCall' dflags gcp target dest_regs args
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
- (MO_Prefetch_Data _ ) -> unsupported
+ MO_Prefetch_Data _ -> unsupported
unsupported = panic ("outOfLineCmmOp: " ++ show mop
++ " not supported")