diff options
author | Peter Trommler <ptrommler@acm.org> | 2017-11-02 11:57:05 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2017-11-02 12:43:30 -0400 |
commit | 1130c67bbb6dc06f513e5c8705a488a591fabadb (patch) | |
tree | a5cb5b4a5154e9e51cc7cbce71bf453d3dcb4f4f /compiler/nativeGen | |
parent | 1317ba625d40fbd51cb0538b3fde28f412f30c01 (diff) | |
download | haskell-1130c67bbb6dc06f513e5c8705a488a591fabadb.tar.gz |
PPC NCG: Impl branch prediction, atomic ops.
Implement AtomicRMW ops, atomic read, atomic write
in PowerPC native code generator. Also implement
branch prediction because we need it in atomic ops
anyway.
This patch improves the issue in #12537 a bit but
does not fix it entirely.
The fallback operations for atomicread and atomicwrite
in libraries/ghc-prim/cbits/atomic.c are incorrect.
This patch avoids those functions by implementing the
operations directly in the native code generator. This
is also what the x86/amd64 NCG and the LLVM backend
do.
Test Plan: validate on AIX and PowerPC (32-bit) Linux
Reviewers: erikd, hvr, austin, bgamari, simonmar
Reviewed By: hvr, bgamari
Subscribers: rwbarton, thomie
GHC Trac Issues: #12537
Differential Revision: https://phabricator.haskell.org/D3984
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 149 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Instr.hs | 51 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Ppr.hs | 62 |
3 files changed, 201 insertions, 61 deletions
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index b5f1a62d22..d37f385a18 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -162,8 +162,8 @@ stmtToInstrs stmt = do -> genCCall target result_regs args CmmBranch id -> genBranch id - CmmCondBranch arg true false _ -> do - b1 <- genCondJump true arg + CmmCondBranch arg true false prediction -> do + b1 <- genCondJump true arg prediction b2 <- genBranch false return (b1 `appOL` b2) CmmSwitch arg ids -> do dflags <- getDynFlags @@ -1071,11 +1071,12 @@ comparison to do. genCondJump :: BlockId -- the branch target -> CmmExpr -- the condition on which to branch + -> Maybe Bool -> NatM InstrBlock -genCondJump id bool = do +genCondJump id bool prediction = do CondCode _ cond code <- getCondCode bool - return (code `snocOL` BCC cond id) + return (code `snocOL` BCC cond id prediction) @@ -1099,6 +1100,90 @@ genCCall (PrimTarget MO_Touch) _ _ genCCall (PrimTarget (MO_Prefetch_Data _)) _ _ = return $ nilOL +genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] + = do dflags <- getDynFlags + let platform = targetPlatform dflags + fmt = intFormat width + reg_dst = getRegisterReg platform (CmmLocal dst) + (instr, n_code) <- case amop of + AMO_Add -> getSomeRegOrImm ADD True reg_dst + AMO_Sub -> case n of + CmmLit (CmmInt i _) + | Just imm <- makeImmediate width True (-i) + -> return (ADD reg_dst reg_dst (RIImm imm), nilOL) + _ + -> do + (n_reg, n_code) <- getSomeReg n + return (SUBF reg_dst n_reg reg_dst, n_code) + AMO_And -> getSomeRegOrImm AND False reg_dst + AMO_Nand -> do (n_reg, n_code) <- getSomeReg n + return (NAND reg_dst reg_dst n_reg, n_code) + AMO_Or -> getSomeRegOrImm OR False reg_dst + AMO_Xor -> getSomeRegOrImm XOR False reg_dst + Amode addr_reg addr_code <- getAmodeIndex addr + lbl_retry <- getBlockIdNat + return $ n_code `appOL` addr_code + `appOL` toOL [ HWSYNC + , BCC ALWAYS lbl_retry Nothing + + , NEWBLOCK lbl_retry + , LDR fmt reg_dst addr_reg + , instr + , STC fmt reg_dst addr_reg + , BCC NE lbl_retry (Just False) + , ISYNC + ] + where + getAmodeIndex (CmmMachOp (MO_Add _) [x, y]) + = do + (regX, codeX) <- getSomeReg x + (regY, codeY) <- getSomeReg y + return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) + getAmodeIndex other + = do + (reg, code) <- getSomeReg other + return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here! + getSomeRegOrImm op sign dst + = case n of + CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i + -> return (op dst dst (RIImm imm), nilOL) + _ + -> do + (n_reg, n_code) <- getSomeReg n + return (op dst dst (RIReg n_reg), n_code) + +genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr] + = do dflags <- getDynFlags + let platform = targetPlatform dflags + fmt = intFormat width + reg_dst = getRegisterReg platform (CmmLocal dst) + form = if widthInBits width == 64 then DS else D + Amode addr_reg addr_code <- getAmode form addr + lbl_end <- getBlockIdNat + return $ addr_code `appOL` toOL [ HWSYNC + , LD fmt reg_dst addr_reg + , CMP fmt reg_dst (RIReg reg_dst) + , BCC NE lbl_end (Just False) + , BCC ALWAYS lbl_end Nothing + -- See Note [Seemingly useless cmp and bne] + , NEWBLOCK lbl_end + , ISYNC + ] + +-- Note [Seemingly useless cmp and bne] +-- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction +-- the second paragraph says that isync may complete before storage accesses +-- "associated" with a preceding instruction have been performed. The cmp +-- operation and the following bne introduce a data and control dependency +-- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe +-- Fetch). +-- This is also what gcc does. + + +genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do + code <- assignMem_IntCode (intFormat width) addr val + return $ unitOL(HWSYNC) `appOL` code + genCCall (PrimTarget (MO_Clz width)) [dst] [src] = do dflags <- getDynFlags let platform = targetPlatform dflags @@ -1111,17 +1196,17 @@ genCCall (PrimTarget (MO_Clz width)) [dst] [src] lbl3 <- getBlockIdNat let vr_hi = getHiVRegFromLo vr_lo cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0)) - , BCC NE lbl2 - , BCC ALWAYS lbl1 + , BCC NE lbl2 Nothing + , BCC ALWAYS lbl1 Nothing , NEWBLOCK lbl1 , CNTLZ II32 reg_dst vr_lo , ADD reg_dst reg_dst (RIImm (ImmInt 32)) - , BCC ALWAYS lbl3 + , BCC ALWAYS lbl3 Nothing , NEWBLOCK lbl2 , CNTLZ II32 reg_dst vr_hi - , BCC ALWAYS lbl3 + , BCC ALWAYS lbl3 Nothing , NEWBLOCK lbl3 ] @@ -1168,8 +1253,8 @@ genCCall (PrimTarget (MO_Ctz width)) [dst] [src] cnttzlo <- cnttz format reg_dst vr_lo let vr_hi = getHiVRegFromLo vr_lo cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0)) - , BCC NE lbl2 - , BCC ALWAYS lbl1 + , BCC NE lbl2 Nothing + , BCC ALWAYS lbl1 Nothing , NEWBLOCK lbl1 , ADD x' vr_hi (RIImm (ImmInt (-1))) @@ -1177,12 +1262,12 @@ genCCall (PrimTarget (MO_Ctz width)) [dst] [src] , CNTLZ format r' x'' -- 32 + (32 - clz(x'')) , SUBFC reg_dst r' (RIImm (ImmInt 64)) - , BCC ALWAYS lbl3 + , BCC ALWAYS lbl3 Nothing , NEWBLOCK lbl2 ] `appOL` cnttzlo `appOL` - toOL [ BCC ALWAYS lbl3 + toOL [ BCC ALWAYS lbl3 Nothing , NEWBLOCK lbl3 ] @@ -1316,21 +1401,21 @@ genCCall target dest_regs argsAndHints -- rhat = un32 - q1*vn1 , MULL fmt tmp q1 (RIReg vn1) , SUBF rhat tmp un32 - , BCC ALWAYS again1 + , BCC ALWAYS again1 Nothing , NEWBLOCK again1 -- if (q1 >= b || q1*vn0 > b*rhat + un1) , CMPL fmt q1 (RIReg b) - , BCC GEU then1 - , BCC ALWAYS no1 + , BCC GEU then1 Nothing + , BCC ALWAYS no1 Nothing , NEWBLOCK no1 , MULL fmt tmp q1 (RIReg vn0) , SL fmt tmp1 rhat (RIImm (ImmInt half)) , ADD tmp1 tmp1 (RIReg un1) , CMPL fmt tmp (RIReg tmp1) - , BCC LEU endif1 - , BCC ALWAYS then1 + , BCC LEU endif1 Nothing + , BCC ALWAYS then1 Nothing , NEWBLOCK then1 -- q1 = q1 - 1 @@ -1339,8 +1424,8 @@ genCCall target dest_regs argsAndHints , ADD rhat rhat (RIReg vn1) -- if (rhat < b) goto again1 , CMPL fmt rhat (RIReg b) - , BCC LTT again1 - , BCC ALWAYS endif1 + , BCC LTT again1 Nothing + , BCC ALWAYS endif1 Nothing , NEWBLOCK endif1 -- un21 = un32*b + un1 - q1*v @@ -1354,21 +1439,21 @@ genCCall target dest_regs argsAndHints -- rhat = un21- q0*vn1 , MULL fmt tmp q0 (RIReg vn1) , SUBF rhat tmp un21 - , BCC ALWAYS again2 + , BCC ALWAYS again2 Nothing , NEWBLOCK again2 -- if (q0>b || q0*vn0 > b*rhat + un0) , CMPL fmt q0 (RIReg b) - , BCC GEU then2 - , BCC ALWAYS no2 + , BCC GEU then2 Nothing + , BCC ALWAYS no2 Nothing , NEWBLOCK no2 , MULL fmt tmp q0 (RIReg vn0) , SL fmt tmp1 rhat (RIImm (ImmInt half)) , ADD tmp1 tmp1 (RIReg un0) , CMPL fmt tmp (RIReg tmp1) - , BCC LEU endif2 - , BCC ALWAYS then2 + , BCC LEU endif2 Nothing + , BCC ALWAYS then2 Nothing , NEWBLOCK then2 -- q0 = q0 - 1 @@ -1377,8 +1462,8 @@ genCCall target dest_regs argsAndHints , ADD rhat rhat (RIReg vn1) -- if (rhat<b) goto again2 , CMPL fmt rhat (RIReg b) - , BCC LTT again2 - , BCC ALWAYS endif2 + , BCC LTT again2 Nothing + , BCC ALWAYS endif2 Nothing , NEWBLOCK endif2 -- compute remainder @@ -1909,12 +1994,12 @@ genCCall' dflags gcp target dest_regs args MO_BSwap w -> (fsLit $ bSwapLabel w, False) MO_PopCnt w -> (fsLit $ popCntLabel w, False) - MO_Clz w -> (fsLit $ clzLabel w, False) - MO_Ctz w -> (fsLit $ ctzLabel w, False) - MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False) + MO_Clz _ -> unsupported + MO_Ctz _ -> unsupported + MO_AtomicRMW {} -> unsupported MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False) - MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False) - MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False) + MO_AtomicRead _ -> unsupported + MO_AtomicWrite _ -> unsupported MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported @@ -1926,7 +2011,7 @@ genCCall' dflags gcp target dest_regs args MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported - (MO_Prefetch_Data _ ) -> unsupported + MO_Prefetch_Data _ -> unsupported unsupported = panic ("outOfLineCmmOp: " ++ show mop ++ " not supported") diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs index d39f0bcff5..cef3eb7cd8 100644 --- a/compiler/nativeGen/PPC/Instr.hs +++ b/compiler/nativeGen/PPC/Instr.hs @@ -126,7 +126,7 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do insert_stack_insns (BasicBlock id insns) | Just new_blockid <- mapLookup id new_blockmap - = [ BasicBlock id [alloc, BCC ALWAYS new_blockid] + = [ BasicBlock id [alloc, BCC ALWAYS new_blockid Nothing] , BasicBlock new_blockid block' ] | otherwise @@ -142,8 +142,8 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do JMP _ -> dealloc : insn : r BCTR [] Nothing -> dealloc : insn : r BCTR ids label -> BCTR (map (fmap retarget) ids) label : r - BCCFAR cond b -> BCCFAR cond (retarget b) : r - BCC cond b -> BCC cond (retarget b) : r + BCCFAR cond b p -> BCCFAR cond (retarget b) p : r + BCC cond b p -> BCC cond (retarget b) p : r _ -> insn : r -- BL and BCTRL are call-like instructions rather than -- jumps, and are used only for C calls. @@ -192,10 +192,12 @@ data Instr -- Loads and stores. | LD Format Reg AddrMode -- Load format, dst, src | LDFAR Format Reg AddrMode -- Load format, dst, src 32 bit offset + | LDR Format Reg AddrMode -- Load and reserve format, dst, src | LA Format Reg AddrMode -- Load arithmetic format, dst, src | ST Format Reg AddrMode -- Store format, src, dst | STFAR Format Reg AddrMode -- Store format, src, dst 32 bit offset | STU Format Reg AddrMode -- Store with Update format, src, dst + | STC Format Reg AddrMode -- Store conditional format, src, dst | LIS Reg Imm -- Load Immediate Shifted dst, src | LI Reg Imm -- Load Immediate dst, src | MR Reg Reg -- Move Register dst, src -- also for fmr @@ -203,8 +205,12 @@ data Instr | CMP Format Reg RI -- format, src1, src2 | CMPL Format Reg RI -- format, src1, src2 - | BCC Cond BlockId - | BCCFAR Cond BlockId + | BCC Cond BlockId (Maybe Bool) -- cond, block, hint + | BCCFAR Cond BlockId (Maybe Bool) -- cond, block, hint + -- hint: + -- Just True: branch likely taken + -- Just False: branch likely not taken + -- Nothing: no hint | JMP CLabel -- same as branch, -- but with CLabel instead of block ID | MTCTR Reg @@ -234,6 +240,7 @@ data Instr | DIV Format Bool Reg Reg Reg | AND Reg Reg RI -- dst, src1, src2 | ANDC Reg Reg Reg -- AND with complement, dst = src1 & ~ src2 + | NAND Reg Reg Reg -- dst, src1, src2 | OR Reg Reg RI -- dst, src1, src2 | ORIS Reg Reg Imm -- OR Immediate Shifted dst, src1, src2 | XOR Reg Reg RI -- dst, src1, src2 @@ -274,6 +281,8 @@ data Instr | MFLR Reg -- move from link register | FETCHPC Reg -- pseudo-instruction: -- bcl to next insn, mflr reg + | HWSYNC -- heavy weight sync + | ISYNC -- instruction synchronize | LWSYNC -- memory barrier | NOP -- no operation, PowerPC 64 bit -- needs this as place holder to @@ -292,17 +301,19 @@ ppc_regUsageOfInstr platform instr = case instr of LD _ reg addr -> usage (regAddr addr, [reg]) LDFAR _ reg addr -> usage (regAddr addr, [reg]) + LDR _ reg addr -> usage (regAddr addr, [reg]) LA _ reg addr -> usage (regAddr addr, [reg]) ST _ reg addr -> usage (reg : regAddr addr, []) STFAR _ reg addr -> usage (reg : regAddr addr, []) STU _ reg addr -> usage (reg : regAddr addr, []) + STC _ reg addr -> usage (reg : regAddr addr, []) LIS reg _ -> usage ([], [reg]) LI reg _ -> usage ([], [reg]) MR reg1 reg2 -> usage ([reg2], [reg1]) CMP _ reg ri -> usage (reg : regRI ri,[]) CMPL _ reg ri -> usage (reg : regRI ri,[]) - BCC _ _ -> noUsage - BCCFAR _ _ -> noUsage + BCC _ _ _ -> noUsage + BCCFAR _ _ _ -> noUsage MTCTR reg -> usage ([reg],[]) BCTR _ _ -> noUsage BL _ params -> usage (params, callClobberedRegs platform) @@ -327,6 +338,7 @@ ppc_regUsageOfInstr platform instr AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) ANDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + NAND reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) ORIS reg1 reg2 _ -> usage ([reg2], [reg1]) XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) @@ -382,17 +394,19 @@ ppc_patchRegsOfInstr instr env = case instr of LD fmt reg addr -> LD fmt (env reg) (fixAddr addr) LDFAR fmt reg addr -> LDFAR fmt (env reg) (fixAddr addr) + LDR fmt reg addr -> LDR fmt (env reg) (fixAddr addr) LA fmt reg addr -> LA fmt (env reg) (fixAddr addr) ST fmt reg addr -> ST fmt (env reg) (fixAddr addr) STFAR fmt reg addr -> STFAR fmt (env reg) (fixAddr addr) STU fmt reg addr -> STU fmt (env reg) (fixAddr addr) + STC fmt reg addr -> STC fmt (env reg) (fixAddr addr) LIS reg imm -> LIS (env reg) imm LI reg imm -> LI (env reg) imm MR reg1 reg2 -> MR (env reg1) (env reg2) CMP fmt reg ri -> CMP fmt (env reg) (fixRI ri) CMPL fmt reg ri -> CMPL fmt (env reg) (fixRI ri) - BCC cond lbl -> BCC cond lbl - BCCFAR cond lbl -> BCCFAR cond lbl + BCC cond lbl p -> BCC cond lbl p + BCCFAR cond lbl p -> BCCFAR cond lbl p MTCTR reg -> MTCTR (env reg) BCTR targets lbl -> BCTR targets lbl BL imm argRegs -> BL imm argRegs -- argument regs @@ -419,6 +433,7 @@ ppc_patchRegsOfInstr instr env AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri) ANDC reg1 reg2 reg3 -> ANDC (env reg1) (env reg2) (env reg3) + NAND reg1 reg2 reg3 -> NAND (env reg1) (env reg2) (env reg3) OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri) ORIS reg1 reg2 imm -> ORIS (env reg1) (env reg2) imm XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri) @@ -482,8 +497,8 @@ ppc_isJumpishInstr instr ppc_jumpDestsOfInstr :: Instr -> [BlockId] ppc_jumpDestsOfInstr insn = case insn of - BCC _ id -> [id] - BCCFAR _ id -> [id] + BCC _ id _ -> [id] + BCCFAR _ id _ -> [id] BCTR targets _ -> [id | Just id <- targets] _ -> [] @@ -494,8 +509,8 @@ ppc_jumpDestsOfInstr insn ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr ppc_patchJumpInstr insn patchF = case insn of - BCC cc id -> BCC cc (patchF id) - BCCFAR cc id -> BCCFAR cc (patchF id) + BCC cc id p -> BCC cc (patchF id) p + BCCFAR cc id p -> BCCFAR cc (patchF id) p BCTR ids lbl -> BCTR (map (fmap patchF) ids) lbl _ -> insn @@ -642,7 +657,7 @@ ppc_mkJumpInstr -> [Instr] ppc_mkJumpInstr id - = [BCC ALWAYS id] + = [BCC ALWAYS id Nothing] -- | Take the source and destination from this reg -> reg move instruction @@ -671,12 +686,12 @@ makeFarBranches info_env blocks handleBlock addr (BasicBlock id instrs) = BasicBlock id (zipWith makeFar [addr..] instrs) - makeFar _ (BCC ALWAYS tgt) = BCC ALWAYS tgt - makeFar addr (BCC cond tgt) + makeFar _ (BCC ALWAYS tgt _) = BCC ALWAYS tgt Nothing + makeFar addr (BCC cond tgt p) | abs (addr - targetAddr) >= nearLimit - = BCCFAR cond tgt + = BCCFAR cond tgt p | otherwise - = BCC cond tgt + = BCC cond tgt p where Just targetAddr = lookupUFM blockAddressMap tgt makeFar _ other = other diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs index 7f8f407bd8..70735f9f85 100644 --- a/compiler/nativeGen/PPC/Ppr.hs +++ b/compiler/nativeGen/PPC/Ppr.hs @@ -28,7 +28,7 @@ import Hoopl.Label import BlockId import CLabel -import Unique ( pprUniqueAlways ) +import Unique ( pprUniqueAlways, getUnique ) import Platform import FastString import Outputable @@ -313,11 +313,13 @@ pprImm (HIGHESTA i) pprAddr :: AddrMode -> SDoc pprAddr (AddrRegReg r1 r2) - = pprReg r1 <+> text ", " <+> pprReg r2 - -pprAddr (AddrRegImm r1 (ImmInt i)) = hcat [ int i, char '(', pprReg r1, char ')' ] -pprAddr (AddrRegImm r1 (ImmInteger i)) = hcat [ integer i, char '(', pprReg r1, char ')' ] -pprAddr (AddrRegImm r1 imm) = hcat [ pprImm imm, char '(', pprReg r1, char ')' ] + = pprReg r1 <> char ',' <+> pprReg r2 +pprAddr (AddrRegImm r1 (ImmInt i)) + = hcat [ int i, char '(', pprReg r1, char ')' ] +pprAddr (AddrRegImm r1 (ImmInteger i)) + = hcat [ integer i, char '(', pprReg r1, char ')' ] +pprAddr (AddrRegImm r1 imm) + = hcat [ pprImm imm, char '(', pprReg r1, char ')' ] pprSectionAlign :: Section -> SDoc @@ -453,15 +455,27 @@ pprInstr (LD fmt reg addr) = hcat [ text ", ", pprAddr addr ] + pprInstr (LDFAR fmt reg (AddrRegImm source off)) = sdocWithPlatform $ \platform -> vcat [ pprInstr (ADDIS (tmpReg platform) source (HA off)), pprInstr (LD fmt reg (AddrRegImm (tmpReg platform) (LO off))) ] - pprInstr (LDFAR _ _ _) = panic "PPC.Ppr.pprInstr LDFAR: no match" +pprInstr (LDR fmt reg1 addr) = hcat [ + text "\tl", + case fmt of + II32 -> char 'w' + II64 -> char 'd' + _ -> panic "PPC.Ppr.Instr LDR: no match", + text "arx\t", + pprReg reg1, + text ", ", + pprAddr addr + ] + pprInstr (LA fmt reg addr) = hcat [ char '\t', text "l", @@ -511,6 +525,17 @@ pprInstr (STU fmt reg addr) = hcat [ text ", ", pprAddr addr ] +pprInstr (STC fmt reg1 addr) = hcat [ + text "\tst", + case fmt of + II32 -> char 'w' + II64 -> char 'd' + _ -> panic "PPC.Ppr.Instr STC: no match", + text "cx.\t", + pprReg reg1, + text ", ", + pprAddr addr + ] pprInstr (LIS reg imm) = hcat [ char '\t', text "lis", @@ -572,19 +597,25 @@ pprInstr (CMPL fmt reg ri) = hcat [ RIReg _ -> empty RIImm _ -> char 'i' ] -pprInstr (BCC cond blockid) = hcat [ +pprInstr (BCC cond blockid prediction) = hcat [ char '\t', text "b", pprCond cond, + pprPrediction prediction, char '\t', ppr lbl ] - where lbl = blockLbl blockid + where lbl = mkAsmTempLabel (getUnique blockid) + pprPrediction p = case p of + Nothing -> empty + Just True -> char '+' + Just False -> char '-' -pprInstr (BCCFAR cond blockid) = vcat [ +pprInstr (BCCFAR cond blockid prediction) = vcat [ hcat [ text "\tb", pprCond (condNegate cond), + neg_prediction, text "\t$+8" ], hcat [ @@ -592,7 +623,11 @@ pprInstr (BCCFAR cond blockid) = vcat [ ppr lbl ] ] - where lbl = blockLbl blockid + where lbl = mkAsmTempLabel (getUnique blockid) + neg_prediction = case prediction of + Nothing -> empty + Just True -> char '-' + Just False -> char '+' pprInstr (JMP lbl) -- We never jump to ForeignLabels; if we ever do, c.f. handling for "BL" @@ -744,6 +779,7 @@ pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [ ] pprInstr (AND reg1 reg2 ri) = pprLogic (sLit "and") reg1 reg2 ri pprInstr (ANDC reg1 reg2 reg3) = pprLogic (sLit "andc") reg1 reg2 (RIReg reg3) +pprInstr (NAND reg1 reg2 reg3) = pprLogic (sLit "nand") reg1 reg2 (RIReg reg3) pprInstr (OR reg1 reg2 ri) = pprLogic (sLit "or") reg1 reg2 ri pprInstr (XOR reg1 reg2 ri) = pprLogic (sLit "xor") reg1 reg2 ri @@ -925,6 +961,10 @@ pprInstr (FETCHPC reg) = vcat [ hcat [ text "1:\tmflr\t", pprReg reg ] ] +pprInstr HWSYNC = text "\tsync" + +pprInstr ISYNC = text "\tisync" + pprInstr LWSYNC = text "\tlwsync" pprInstr NOP = text "\tnop" |