summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Trommler <ptrommler@acm.org>2017-04-25 18:37:16 -0400
committerBen Gamari <ben@smart-cactus.org>2017-04-25 18:39:50 -0400
commit89a3241f708502e8fbcfaddbbe634790ad9cd02a (patch)
tree9264c8bb53a229ea2a6a55debd0088bb88354bd2
parent9373994acaf1b73fe0e7cf8e03594c63cec8d235 (diff)
downloadhaskell-89a3241f708502e8fbcfaddbbe634790ad9cd02a.tar.gz
PPC NCG: Implement callish prim ops
Provide PowerPC optimised implementations of callish prim ops. MO_?_QuotRem The generic implementation of quotient remainder prim ops uses a division and a remainder operation. There is no remainder on PowerPC and so we need to implement remainder "by hand" which results in a duplication of the divide operation when using the generic code. Avoid this duplication by implementing the prim op in the native code generator. MO_U_Mul2 Use PowerPC's instructions for long multiplication. Addition and subtraction Use PowerPC add/subtract with carry/overflow instructions MO_Clz and MO_Ctz Use PowerPC's CNTLZ instruction and implement count trailing zeros using count leading zeros MO_QuotRem2 Implement an algorithm given by Henry Warren in "Hacker's Delight" using PowerPC divide instruction. TODO: Use long division instructions when available (POWER7 and later). Test Plan: validate on AIX and 32-bit Linux Reviewers: simonmar, erikd, hvr, austin, bgamari Reviewed By: erikd, hvr, bgamari Subscribers: trofi, kgardas, thomie Differential Revision: https://phabricator.haskell.org/D2973
-rw-r--r--compiler/codeGen/StgCmmPrim.hs28
-rw-r--r--compiler/nativeGen/PIC.hs5
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs464
-rw-r--r--compiler/nativeGen/PPC/Instr.hs102
-rw-r--r--compiler/nativeGen/PPC/Ppr.hs196
5 files changed, 611 insertions, 184 deletions
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 0edde0659f..235109ffe9 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -815,33 +815,41 @@ type GenericOp = [CmmFormal] -> [CmmActual] -> FCode ()
callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp
callishPrimOpSupported dflags op
= case op of
- IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags))
+ IntQuotRemOp | ncg && (x86ish
+ || ppc) -> Left (MO_S_QuotRem (wordWidth dflags))
| otherwise -> Right (genericIntQuotRemOp dflags)
- WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags))
+ WordQuotRemOp | ncg && (x86ish
+ || ppc) -> Left (MO_U_QuotRem (wordWidth dflags))
| otherwise -> Right (genericWordQuotRemOp dflags)
- WordQuotRem2Op | (ncg && x86ish)
+ WordQuotRem2Op | (ncg && (x86ish
+ || ppc))
|| llvm -> Left (MO_U_QuotRem2 (wordWidth dflags))
| otherwise -> Right (genericWordQuotRem2Op dflags)
- WordAdd2Op | (ncg && x86ish)
+ WordAdd2Op | (ncg && (x86ish
+ || ppc))
|| llvm -> Left (MO_Add2 (wordWidth dflags))
| otherwise -> Right genericWordAdd2Op
- WordSubCOp | (ncg && x86ish)
+ WordSubCOp | (ncg && (x86ish
+ || ppc))
|| llvm -> Left (MO_SubWordC (wordWidth dflags))
| otherwise -> Right genericWordSubCOp
- IntAddCOp | (ncg && x86ish)
+ IntAddCOp | (ncg && (x86ish
+ || ppc))
|| llvm -> Left (MO_AddIntC (wordWidth dflags))
| otherwise -> Right genericIntAddCOp
- IntSubCOp | (ncg && x86ish)
+ IntSubCOp | (ncg && (x86ish
+ || ppc))
|| llvm -> Left (MO_SubIntC (wordWidth dflags))
| otherwise -> Right genericIntSubCOp
- WordMul2Op | ncg && x86ish
+ WordMul2Op | ncg && (x86ish
+ || ppc)
|| llvm -> Left (MO_U_Mul2 (wordWidth dflags))
| otherwise -> Right genericWordMul2Op
FloatFabsOp | (ncg && x86ish)
@@ -863,6 +871,10 @@ callishPrimOpSupported dflags op
ArchX86 -> True
ArchX86_64 -> True
_ -> False
+ ppc = case platformArch (targetPlatform dflags) of
+ ArchPPC -> True
+ ArchPPC_64 _ -> True
+ _ -> False
genericIntQuotRemOp :: DynFlags -> GenericOp
genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y]
diff --git a/compiler/nativeGen/PIC.hs b/compiler/nativeGen/PIC.hs
index babceac4f0..d6005745b3 100644
--- a/compiler/nativeGen/PIC.hs
+++ b/compiler/nativeGen/PIC.hs
@@ -521,7 +521,7 @@ pprGotDeclaration _ _ OSAIX
]
--- PPC 64 ELF v1needs a Table Of Contents (TOC) on Linux
+-- PPC 64 ELF v1 needs a Table Of Contents (TOC) on Linux
pprGotDeclaration _ (ArchPPC_64 ELF_V1) OSLinux
= text ".section \".toc\",\"aw\""
-- In ELF v2 we also need to tell the assembler that we want ABI
@@ -814,7 +814,8 @@ initializePicBase_ppc ArchPPC os picReg
fetchPC (BasicBlock bID insns) =
BasicBlock bID (PPC.FETCHPC picReg
: PPC.ADDIS picReg picReg (PPC.HA gotOffset)
- : PPC.ADDI picReg picReg (PPC.LO gotOffset)
+ : PPC.ADD picReg picReg
+ (PPC.RIImm (PPC.LO gotOffset))
: PPC.MR PPC.r30 picReg
: insns)
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 1f06c7bd3e..1467267842 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -359,7 +359,7 @@ iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
r2hi = getHiVRegFromLo r2lo
code = code1 `appOL`
code2 `appOL`
- toOL [ SUBFC rlo r2lo r1lo,
+ toOL [ SUBFC rlo r2lo (RIReg r1lo),
SUBFE rhi r2hi r1hi ]
return (ChildCode64 code rlo)
@@ -589,42 +589,37 @@ getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
MO_Add rep -> trivialCode rep True ADD x y
MO_Sub rep ->
- case y of -- subfi ('substract from' with immediate) doesn't exist
+ case y of
CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
-> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
- _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
-
- MO_Mul rep
- | arch32 -> trivialCode rep True MULLW x y
- | otherwise -> trivialCode rep True MULLD x y
-
- MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
- MO_S_MulMayOflo W64 -> trivialCodeNoImm' II64 MULLD_MayOflo x y
-
- MO_S_MulMayOflo _ -> panic "S_MulMayOflo: (II8/16) not implemented"
- MO_U_MulMayOflo _ -> panic "U_MulMayOflo: not implemented"
+ _ -> case x of
+ CmmLit (CmmInt imm _)
+ | Just _ <- makeImmediate rep True imm
+ -- subfi ('substract from' with immediate) doesn't exist
+ -> trivialCode rep True SUBFC y x
+ _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
+
+ MO_Mul rep -> shiftMulCode rep True MULL x y
+ MO_S_MulMayOflo rep -> do
+ (src1, code1) <- getSomeReg x
+ (src2, code2) <- getSomeReg y
+ let
+ format = intFormat rep
+ code dst = code1 `appOL` code2
+ `appOL` toOL [ MULLO format dst src1 src2
+ , MFOV format dst
+ ]
+ return (Any format code)
- MO_S_Quot rep
- | arch32 -> trivialCodeNoImm' (intFormat rep) DIVW
- (extendSExpr dflags rep x) (extendSExpr dflags rep y)
- | otherwise -> trivialCodeNoImm' (intFormat rep) DIVD
+ MO_S_Quot rep -> trivialCodeNoImmSign (intFormat rep) True DIV
(extendSExpr dflags rep x) (extendSExpr dflags rep y)
- MO_U_Quot rep
- | arch32 -> trivialCodeNoImm' (intFormat rep) DIVWU
- (extendUExpr dflags rep x) (extendUExpr dflags rep y)
- | otherwise -> trivialCodeNoImm' (intFormat rep) DIVDU
+ MO_U_Quot rep -> trivialCodeNoImmSign (intFormat rep) False DIV
(extendUExpr dflags rep x) (extendUExpr dflags rep y)
- MO_S_Rem rep
- | arch32 -> remainderCode rep DIVW (extendSExpr dflags rep x)
- (extendSExpr dflags rep y)
- | otherwise -> remainderCode rep DIVD (extendSExpr dflags rep x)
+ MO_S_Rem rep -> remainderCode rep True (extendSExpr dflags rep x)
(extendSExpr dflags rep y)
- MO_U_Rem rep
- | arch32 -> remainderCode rep DIVWU (extendSExpr dflags rep x)
- (extendSExpr dflags rep y)
- | otherwise -> remainderCode rep DIVDU (extendSExpr dflags rep x)
- (extendSExpr dflags rep y)
+ MO_U_Rem rep -> remainderCode rep False (extendUExpr dflags rep x)
+ (extendUExpr dflags rep y)
MO_And rep -> case y of
(CmmLit (CmmInt imm _)) | imm == -8 || imm == -4
@@ -639,17 +634,15 @@ getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
MO_Or rep -> trivialCode rep False OR x y
MO_Xor rep -> trivialCode rep False XOR x y
- MO_Shl rep -> shiftCode rep SL x y
- MO_S_Shr rep -> shiftCode rep SRA (extendSExpr dflags rep x) y
- MO_U_Shr rep -> shiftCode rep SR (extendUExpr dflags rep x) y
+ MO_Shl rep -> shiftMulCode rep False SL x y
+ MO_S_Shr rep -> shiftMulCode rep False SRA (extendSExpr dflags rep x) y
+ MO_U_Shr rep -> shiftMulCode rep False SR (extendUExpr dflags rep x) y
_ -> panic "PPC.CodeGen.getRegister: no match"
where
triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
- arch32 = target32Bit $ targetPlatform dflags
-
getRegister' _ (CmmLit (CmmInt i rep))
| Just imm <- makeImmediate rep True i
= let
@@ -1090,22 +1083,370 @@ genCondJump id bool = do
-- Now the biggest nightmare---calls. Most of the nastiness is buried in
-- @get_arg@, which moves the arguments to the correct registers/stack
-- locations. Apart from that, the code is easy.
---
--- (If applicable) Do not fill the delay slots here; you will confuse the
--- register allocator.
-genCCall :: ForeignTarget -- function to call
+genCCall :: ForeignTarget -- function to call
-> [CmmFormal] -- where to put the result
-> [CmmActual] -- arguments (of mixed type)
-> NatM InstrBlock
+genCCall (PrimTarget MO_WriteBarrier) _ _
+ = return $ unitOL LWSYNC
+
+genCCall (PrimTarget MO_Touch) _ _
+ = return $ nilOL
+
+genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
+ = return $ nilOL
+
+genCCall (PrimTarget (MO_Clz width)) [dst] [src]
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ reg_dst = getRegisterReg platform (CmmLocal dst)
+ if target32Bit platform && width == W64
+ then do
+ ChildCode64 code vr_lo <- iselExpr64 src
+ lbl1 <- getBlockIdNat
+ lbl2 <- getBlockIdNat
+ lbl3 <- getBlockIdNat
+ let vr_hi = getHiVRegFromLo vr_lo
+ cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0))
+ , BCC NE lbl2
+ , BCC ALWAYS lbl1
+
+ , NEWBLOCK lbl1
+ , CNTLZ II32 reg_dst vr_lo
+ , ADD reg_dst reg_dst (RIImm (ImmInt 32))
+ , BCC ALWAYS lbl3
+
+ , NEWBLOCK lbl2
+ , CNTLZ II32 reg_dst vr_hi
+ , BCC ALWAYS lbl3
+
+ , NEWBLOCK lbl3
+ ]
+ return $ code `appOL` cntlz
+ else do
+ let format = if width == W64 then II64 else II32
+ (s_reg, s_code) <- getSomeReg src
+ (pre, reg , post) <-
+ case width of
+ W64 -> return (nilOL, s_reg, nilOL)
+ W32 -> return (nilOL, s_reg, nilOL)
+ W16 -> do
+ reg_tmp <- getNewRegNat format
+ return
+ ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535))
+ , reg_tmp
+ , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16)))
+ )
+ W8 -> do
+ reg_tmp <- getNewRegNat format
+ return
+ ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255))
+ , reg_tmp
+ , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24)))
+ )
+ _ -> panic "genCall: Clz wrong format"
+ let cntlz = unitOL (CNTLZ format reg_dst reg)
+ return $ s_code `appOL` pre `appOL` cntlz `appOL` post
+
+genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ reg_dst = getRegisterReg platform (CmmLocal dst)
+ if target32Bit platform && width == W64
+ then do
+ let format = II32
+ ChildCode64 code vr_lo <- iselExpr64 src
+ lbl1 <- getBlockIdNat
+ lbl2 <- getBlockIdNat
+ lbl3 <- getBlockIdNat
+ x' <- getNewRegNat format
+ x'' <- getNewRegNat format
+ r' <- getNewRegNat format
+ cnttzlo <- cnttz format reg_dst vr_lo
+ let vr_hi = getHiVRegFromLo vr_lo
+ cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0))
+ , BCC NE lbl2
+ , BCC ALWAYS lbl1
+
+ , NEWBLOCK lbl1
+ , ADD x' vr_hi (RIImm (ImmInt (-1)))
+ , ANDC x'' x' vr_hi
+ , CNTLZ format r' x''
+ -- 32 + (32 - clz(x''))
+ , SUBFC reg_dst r' (RIImm (ImmInt 64))
+ , BCC ALWAYS lbl3
+
+ , NEWBLOCK lbl2
+ ]
+ `appOL` cnttzlo `appOL`
+ toOL [ BCC ALWAYS lbl3
+
+ , NEWBLOCK lbl3
+ ]
+ return $ code `appOL` cnttz64
+ else do
+ let format = if width == W64 then II64 else II32
+ (s_reg, s_code) <- getSomeReg src
+ (reg_ctz, pre_code) <-
+ case width of
+ W64 -> return (s_reg, nilOL)
+ W32 -> return (s_reg, nilOL)
+ W16 -> do
+ reg_tmp <- getNewRegNat format
+ return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1))
+ W8 -> do
+ reg_tmp <- getNewRegNat format
+ return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256)))
+ _ -> panic "genCall: Ctz wrong format"
+ ctz_code <- cnttz format reg_dst reg_ctz
+ return $ s_code `appOL` pre_code `appOL` ctz_code
+ where
+ -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1))
+ -- see Henry S. Warren, Hacker's Delight, p 107
+ cnttz format dst src = do
+ let format_bits = 8 * formatInBytes format
+ x' <- getNewRegNat format
+ x'' <- getNewRegNat format
+ r' <- getNewRegNat format
+ return $ toOL [ ADD x' src (RIImm (ImmInt (-1)))
+ , ANDC x'' x' src
+ , CNTLZ format r' x''
+ , SUBFC dst r' (RIImm (ImmInt (format_bits)))
+ ]
+
genCCall target dest_regs argsAndHints
= do dflags <- getDynFlags
- genCCall' dflags (platformToGCP (targetPlatform dflags))
- target dest_regs argsAndHints
+ let platform = targetPlatform dflags
+ case target of
+ PrimTarget (MO_S_QuotRem width) -> divOp1 platform True width
+ dest_regs argsAndHints
+ PrimTarget (MO_U_QuotRem width) -> divOp1 platform False width
+ dest_regs argsAndHints
+ PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs
+ argsAndHints
+ PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs
+ argsAndHints
+ PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints
+ PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints
+ PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width
+ dest_regs argsAndHints
+ PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width
+ dest_regs argsAndHints
+ _ -> genCCall' dflags (platformToGCP platform)
+ target dest_regs argsAndHints
+ where divOp1 platform signed width [res_q, res_r] [arg_x, arg_y]
+ = do let reg_q = getRegisterReg platform (CmmLocal res_q)
+ reg_r = getRegisterReg platform (CmmLocal res_r)
+ fmt = intFormat width
+ (x_reg, x_code) <- getSomeReg arg_x
+ (y_reg, y_code) <- getSomeReg arg_y
+ return $ y_code `appOL` x_code
+ `appOL` toOL [ DIV fmt signed reg_q x_reg y_reg
+ , MULL fmt reg_r reg_q (RIReg y_reg)
+ , SUBF reg_r reg_r x_reg
+ ]
+
+ divOp1 _ _ _ _ _
+ = panic "genCCall: Wrong number of arguments for divOp1"
+ divOp2 platform width [res_q, res_r]
+ [arg_x_high, arg_x_low, arg_y]
+ = do let reg_q = getRegisterReg platform (CmmLocal res_q)
+ reg_r = getRegisterReg platform (CmmLocal res_r)
+ fmt = intFormat width
+ half = 4 * (formatInBytes fmt)
+ (xh_reg, xh_code) <- getSomeReg arg_x_high
+ (xl_reg, xl_code) <- getSomeReg arg_x_low
+ (y_reg, y_code) <- getSomeReg arg_y
+ s <- getNewRegNat fmt
+ b <- getNewRegNat fmt
+ v <- getNewRegNat fmt
+ vn1 <- getNewRegNat fmt
+ vn0 <- getNewRegNat fmt
+ un32 <- getNewRegNat fmt
+ tmp <- getNewRegNat fmt
+ un10 <- getNewRegNat fmt
+ un1 <- getNewRegNat fmt
+ un0 <- getNewRegNat fmt
+ q1 <- getNewRegNat fmt
+ rhat <- getNewRegNat fmt
+ tmp1 <- getNewRegNat fmt
+ q0 <- getNewRegNat fmt
+ un21 <- getNewRegNat fmt
+ again1 <- getBlockIdNat
+ no1 <- getBlockIdNat
+ then1 <- getBlockIdNat
+ endif1 <- getBlockIdNat
+ again2 <- getBlockIdNat
+ no2 <- getBlockIdNat
+ then2 <- getBlockIdNat
+ endif2 <- getBlockIdNat
+ return $ y_code `appOL` xl_code `appOL` xh_code `appOL`
+ -- see Hacker's Delight p 196 Figure 9-3
+ toOL [ -- b = 2 ^ (bits_in_word / 2)
+ LI b (ImmInt 1)
+ , SL fmt b b (RIImm (ImmInt half))
+ -- s = clz(y)
+ , CNTLZ fmt s y_reg
+ -- v = y << s
+ , SL fmt v y_reg (RIReg s)
+ -- vn1 = upper half of v
+ , SR fmt vn1 v (RIImm (ImmInt half))
+ -- vn0 = lower half of v
+ , CLRLI fmt vn0 v half
+ -- un32 = (u1 << s)
+ -- | (u0 >> (bits_in_word - s))
+ , SL fmt un32 xh_reg (RIReg s)
+ , SUBFC tmp s
+ (RIImm (ImmInt (8 * formatInBytes fmt)))
+ , SR fmt tmp xl_reg (RIReg tmp)
+ , OR un32 un32 (RIReg tmp)
+ -- un10 = u0 << s
+ , SL fmt un10 xl_reg (RIReg s)
+ -- un1 = upper half of un10
+ , SR fmt un1 un10 (RIImm (ImmInt half))
+ -- un0 = lower half of un10
+ , CLRLI fmt un0 un10 half
+ -- q1 = un32/vn1
+ , DIV fmt False q1 un32 vn1
+ -- rhat = un32 - q1*vn1
+ , MULL fmt tmp q1 (RIReg vn1)
+ , SUBF rhat tmp un32
+ , BCC ALWAYS again1
+
+ , NEWBLOCK again1
+ -- if (q1 >= b || q1*vn0 > b*rhat + un1)
+ , CMPL fmt q1 (RIReg b)
+ , BCC GEU then1
+ , BCC ALWAYS no1
+
+ , NEWBLOCK no1
+ , MULL fmt tmp q1 (RIReg vn0)
+ , SL fmt tmp1 rhat (RIImm (ImmInt half))
+ , ADD tmp1 tmp1 (RIReg un1)
+ , CMPL fmt tmp (RIReg tmp1)
+ , BCC LEU endif1
+ , BCC ALWAYS then1
+
+ , NEWBLOCK then1
+ -- q1 = q1 - 1
+ , ADD q1 q1 (RIImm (ImmInt (-1)))
+ -- rhat = rhat + vn1
+ , ADD rhat rhat (RIReg vn1)
+ -- if (rhat < b) goto again1
+ , CMPL fmt rhat (RIReg b)
+ , BCC LTT again1
+ , BCC ALWAYS endif1
+
+ , NEWBLOCK endif1
+ -- un21 = un32*b + un1 - q1*v
+ , SL fmt un21 un32 (RIImm (ImmInt half))
+ , ADD un21 un21 (RIReg un1)
+ , MULL fmt tmp q1 (RIReg v)
+ , SUBF un21 tmp un21
+ -- compute second quotient digit
+ -- q0 = un21/vn1
+ , DIV fmt False q0 un21 vn1
+ -- rhat = un21- q0*vn1
+ , MULL fmt tmp q0 (RIReg vn1)
+ , SUBF rhat tmp un21
+ , BCC ALWAYS again2
+
+ , NEWBLOCK again2
+ -- if (q0>b || q0*vn0 > b*rhat + un0)
+ , CMPL fmt q0 (RIReg b)
+ , BCC GEU then2
+ , BCC ALWAYS no2
+
+ , NEWBLOCK no2
+ , MULL fmt tmp q0 (RIReg vn0)
+ , SL fmt tmp1 rhat (RIImm (ImmInt half))
+ , ADD tmp1 tmp1 (RIReg un0)
+ , CMPL fmt tmp (RIReg tmp1)
+ , BCC LEU endif2
+ , BCC ALWAYS then2
+
+ , NEWBLOCK then2
+ -- q0 = q0 - 1
+ , ADD q0 q0 (RIImm (ImmInt (-1)))
+ -- rhat = rhat + vn1
+ , ADD rhat rhat (RIReg vn1)
+ -- if (rhat<b) goto again2
+ , CMPL fmt rhat (RIReg b)
+ , BCC LTT again2
+ , BCC ALWAYS endif2
+
+ , NEWBLOCK endif2
+ -- compute remainder
+ -- r = (un21*b + un0 - q0*v) >> s
+ , SL fmt reg_r un21 (RIImm (ImmInt half))
+ , ADD reg_r reg_r (RIReg un0)
+ , MULL fmt tmp q0 (RIReg v)
+ , SUBF reg_r tmp reg_r
+ , SR fmt reg_r reg_r (RIReg s)
+ -- compute quotient
+ -- q = q1*b + q0
+ , SL fmt reg_q q1 (RIImm (ImmInt half))
+ , ADD reg_q reg_q (RIReg q0)
+ ]
+ divOp2 _ _ _ _
+ = panic "genCCall: Wrong number of arguments for divOp2"
+ multOp2 platform width [res_h, res_l] [arg_x, arg_y]
+ = do let reg_h = getRegisterReg platform (CmmLocal res_h)
+ reg_l = getRegisterReg platform (CmmLocal res_l)
+ fmt = intFormat width
+ (x_reg, x_code) <- getSomeReg arg_x
+ (y_reg, y_code) <- getSomeReg arg_y
+ return $ y_code `appOL` x_code
+ `appOL` toOL [ MULL fmt reg_l x_reg (RIReg y_reg)
+ , MULHU fmt reg_h x_reg y_reg
+ ]
+ multOp2 _ _ _ _
+ = panic "genCall: Wrong number of arguments for multOp2"
+ add2Op platform [res_h, res_l] [arg_x, arg_y]
+ = do let reg_h = getRegisterReg platform (CmmLocal res_h)
+ reg_l = getRegisterReg platform (CmmLocal res_l)
+ (x_reg, x_code) <- getSomeReg arg_x
+ (y_reg, y_code) <- getSomeReg arg_y
+ return $ y_code `appOL` x_code
+ `appOL` toOL [ LI reg_h (ImmInt 0)
+ , ADDC reg_l x_reg y_reg
+ , ADDZE reg_h reg_h
+ ]
+ add2Op _ _ _
+ = panic "genCCall: Wrong number of arguments/results for add2"
+
+ -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1,
+ -- which is 0 for borrow and 1 otherwise. We need 1 and 0
+ -- so xor with 1.
+ subcOp platform [res_r, res_c] [arg_x, arg_y]
+ = do let reg_r = getRegisterReg platform (CmmLocal res_r)
+ reg_c = getRegisterReg platform (CmmLocal res_c)
+ (x_reg, x_code) <- getSomeReg arg_x
+ (y_reg, y_code) <- getSomeReg arg_y
+ return $ y_code `appOL` x_code
+ `appOL` toOL [ LI reg_c (ImmInt 0)
+ , SUBFC reg_r y_reg (RIReg x_reg)
+ , ADDZE reg_c reg_c
+ , XOR reg_c reg_c (RIImm (ImmInt 1))
+ ]
+ subcOp _ _ _
+ = panic "genCCall: Wrong number of arguments/results for subc"
+ addSubCOp instr platform width [res_r, res_c] [arg_x, arg_y]
+ = do let reg_r = getRegisterReg platform (CmmLocal res_r)
+ reg_c = getRegisterReg platform (CmmLocal res_c)
+ (x_reg, x_code) <- getSomeReg arg_x
+ (y_reg, y_code) <- getSomeReg arg_y
+ return $ y_code `appOL` x_code
+ `appOL` toOL [ instr reg_r y_reg x_reg,
+ -- SUBFO argument order reversed!
+ MFOV (intFormat width) reg_c
+ ]
+ addSubCOp _ _ _ _ _
+ = panic "genCall: Wrong number of arguments/results for addC"
-- TODO: replace 'Int' by an enum such as 'PPC_64ABI'
data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF !Int | GCPAIX
- deriving Eq
platformToGCP :: Platform -> GenCCallPlatform
platformToGCP platform = case platformOS platform of
@@ -1175,15 +1516,6 @@ genCCall'
-}
-genCCall' _ _ (PrimTarget MO_WriteBarrier) _ _
- = return $ unitOL LWSYNC
-
-genCCall' _ _ (PrimTarget MO_Touch) _ _
- = return $ nilOL
-
-genCCall' _ _ (PrimTarget (MO_Prefetch_Data _)) _ _
- = return $ nilOL
-
genCCall' dflags gcp target dest_regs args
= ASSERT(not $ any (`elem` [II16]) $ map cmmTypeFormat argReps)
-- we rely on argument promotion in the codeGen
@@ -1767,21 +2099,22 @@ trivialCode rep _ instr x y = do
let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
return (Any (intFormat rep) code)
-shiftCode
+shiftMulCode
:: Width
+ -> Bool
-> (Format-> Reg -> Reg -> RI -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
-shiftCode width instr x (CmmLit (CmmInt y _))
- | Just imm <- makeImmediate width False y
+shiftMulCode width sign instr x (CmmLit (CmmInt y _))
+ | Just imm <- makeImmediate width sign y
= do
(src1, code1) <- getSomeReg x
let format = intFormat width
let code dst = code1 `snocOL` instr format dst src1 (RIImm imm)
return (Any format code)
-shiftCode width instr x y = do
+shiftMulCode width _ instr x y = do
(src1, code1) <- getSomeReg x
(src2, code2) <- getSomeReg y
let format = intFormat width
@@ -1800,6 +2133,12 @@ trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
-> CmmExpr -> CmmExpr -> NatM Register
trivialCodeNoImm format instr x y = trivialCodeNoImm' format (instr format) x y
+trivialCodeNoImmSign :: Format -> Bool
+ -> (Format -> Bool -> Reg -> Reg -> Reg -> Instr)
+ -> CmmExpr -> CmmExpr -> NatM Register
+trivialCodeNoImmSign format sgn instr x y
+ = trivialCodeNoImm' format (instr format sgn) x y
+
trivialUCode
:: Format
@@ -1813,19 +2152,16 @@ trivialUCode rep instr x = do
-- There is no "remainder" instruction on the PPC, so we have to do
-- it the hard way.
--- The "div" parameter is the division instruction to use (DIVW or DIVWU)
+-- The "sgn" parameter is the signedness for the division instruction
-remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
- -> CmmExpr -> CmmExpr -> NatM Register
-remainderCode rep div x y = do
- dflags <- getDynFlags
- let mull_instr = if target32Bit $ targetPlatform dflags then MULLW
- else MULLD
+remainderCode :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
+remainderCode rep sgn x y = do
+ let fmt = intFormat rep
(src1, code1) <- getSomeReg x
(src2, code2) <- getSomeReg y
let code dst = code1 `appOL` code2 `appOL` toOL [
- div dst src1 src2,
- mull_instr dst dst (RIReg src2),
+ DIV fmt sgn dst src1 src2,
+ MULL fmt dst dst (RIReg src2),
SUBF dst dst src1
]
return (Any (intFormat rep) code)
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
index ae7d6bf260..e395b388e9 100644
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -210,40 +210,34 @@ data Instr
| BCTRL [Reg]
| ADD Reg Reg RI -- dst, src1, src2
+ | ADDO Reg Reg Reg -- add and set overflow
| ADDC Reg Reg Reg -- (carrying) dst, src1, src2
- | ADDE Reg Reg Reg -- (extend) dst, src1, src2
- | ADDI Reg Reg Imm -- Add Immediate dst, src1, src2
+ | ADDE Reg Reg Reg -- (extended) dst, src1, src2
+ | ADDZE Reg Reg -- (to zero extended) dst, src
| ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
| SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
- | SUBFC Reg Reg Reg -- (carrying) dst, src1, src2 ; dst = src2 - src1
- | SUBFE Reg Reg Reg -- (extend) dst, src1, src2 ; dst = src2 - src1
- | MULLD Reg Reg RI
- | MULLW Reg Reg RI
- | DIVW Reg Reg Reg
- | DIVD Reg Reg Reg
- | DIVWU Reg Reg Reg
- | DIVDU Reg Reg Reg
-
- | MULLW_MayOflo Reg Reg Reg
- -- dst = 1 if src1 * src2 overflows
- -- pseudo-instruction; pretty-printed as:
- -- mullwo. dst, src1, src2
+ | SUBFO Reg Reg Reg -- subtract from and set overflow
+ | SUBFC Reg Reg RI -- (carrying) dst, src1, src2 ;
+ -- dst = src2 - src1
+ | SUBFE Reg Reg Reg -- (extended) dst, src1, src2 ;
+ -- dst = src2 - src1
+ | MULL Format Reg Reg RI
+ | MULLO Format Reg Reg Reg -- multiply and set overflow
+ | MFOV Format Reg -- move overflow bit (1|33) to register
+ -- pseudo-instruction; pretty printed as
-- mfxer dst
- -- rlwinm dst, dst, 2, 31,31
- | MULLD_MayOflo Reg Reg Reg
- -- dst = 1 if src1 * src2 overflows
- -- pseudo-instruction; pretty-printed as:
- -- mulldo. dst, src1, src2
- -- mfxer dst
- -- rlwinm dst, dst, 2, 31,31
-
+ -- extr[w|d]i dst, dst, 1, [1|33]
+ | MULHU Format Reg Reg Reg
+ | DIV Format Bool Reg Reg Reg
| AND Reg Reg RI -- dst, src1, src2
+ | ANDC Reg Reg Reg -- AND with complement, dst = src1 & ~ src2
| OR Reg Reg RI -- dst, src1, src2
| ORIS Reg Reg Imm -- OR Immediate Shifted dst, src1, src2
| XOR Reg Reg RI -- dst, src1, src2
| XORIS Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
| EXTS Format Reg Reg
+ | CNTLZ Format Reg Reg
| NEG Reg Reg
| NOT Reg Reg
@@ -253,6 +247,7 @@ data Instr
| SRA Format Reg Reg RI -- shift right arithmetic
| RLWINM Reg Reg Int Int Int -- Rotate Left Word Immediate then AND with Mask
+ | CLRLI Format Reg Reg Int -- clear left immediate (extended mnemonic)
| CLRRI Format Reg Reg Int -- clear right immediate (extended mnemonic)
| FADD Format Reg Reg Reg
@@ -275,9 +270,6 @@ data Instr
| MFLR Reg -- move from link register
| FETCHPC Reg -- pseudo-instruction:
-- bcl to next insn, mflr reg
- | FETCHTOC Reg CLabel -- pseudo-instruction
- -- add TOC offset to address in r12
- -- print .localentry for label
| LWSYNC -- memory barrier
| NOP -- no operation, PowerPC 64 bit
-- needs this as place holder to
@@ -313,36 +305,37 @@ ppc_regUsageOfInstr platform instr
BCTRL params -> usage (params, callClobberedRegs platform)
ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
+ ADDO reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
ADDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
ADDE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- ADDI reg1 reg2 _ -> usage ([reg2], [reg1])
+ ADDZE reg1 reg2 -> usage ([reg2], [reg1])
ADDIS reg1 reg2 _ -> usage ([reg2], [reg1])
SUBF reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- SUBFC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
+ SUBFO reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
+ SUBFC reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
SUBFE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- MULLD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- DIVW reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- DIVD reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- DIVWU reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- DIVDU reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
-
- MULLW_MayOflo reg1 reg2 reg3
- -> usage ([reg2,reg3], [reg1])
- MULLD_MayOflo reg1 reg2 reg3
- -> usage ([reg2,reg3], [reg1])
+ MULL _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
+ MULLO _ reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
+ MFOV _ reg -> usage ([], [reg])
+ MULHU _ reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
+ DIV _ _ reg1 reg2 reg3
+ -> usage ([reg2,reg3], [reg1])
+
AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
+ ANDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
ORIS reg1 reg2 _ -> usage ([reg2], [reg1])
XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
XORIS reg1 reg2 _ -> usage ([reg2], [reg1])
EXTS _ reg1 reg2 -> usage ([reg2], [reg1])
+ CNTLZ _ reg1 reg2 -> usage ([reg2], [reg1])
NEG reg1 reg2 -> usage ([reg2], [reg1])
NOT reg1 reg2 -> usage ([reg2], [reg1])
SL _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
SR _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
SRA _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
RLWINM reg1 reg2 _ _ _ -> usage ([reg2], [reg1])
+ CLRLI _ reg1 reg2 _ -> usage ([reg2], [reg1])
CLRRI _ reg1 reg2 _ -> usage ([reg2], [reg1])
FADD _ r1 r2 r3 -> usage ([r2,r3], [r1])
@@ -358,7 +351,6 @@ ppc_regUsageOfInstr platform instr
MFCR reg -> usage ([], [reg])
MFLR reg -> usage ([], [reg])
FETCHPC reg -> usage ([], [reg])
- FETCHTOC reg _ -> usage ([], [reg])
UPDATE_SP _ _ -> usage ([], [sp])
_ -> noUsage
where
@@ -401,29 +393,33 @@ ppc_patchRegsOfInstr instr env
BL imm argRegs -> BL imm argRegs -- argument regs
BCTRL argRegs -> BCTRL argRegs -- cannot be remapped
ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri)
+ ADDO reg1 reg2 reg3 -> ADDO (env reg1) (env reg2) (env reg3)
ADDC reg1 reg2 reg3 -> ADDC (env reg1) (env reg2) (env reg3)
ADDE reg1 reg2 reg3 -> ADDE (env reg1) (env reg2) (env reg3)
- ADDI reg1 reg2 imm -> ADDI (env reg1) (env reg2) imm
+ ADDZE reg1 reg2 -> ADDZE (env reg1) (env reg2)
ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm
SUBF reg1 reg2 reg3 -> SUBF (env reg1) (env reg2) (env reg3)
- SUBFC reg1 reg2 reg3 -> SUBFC (env reg1) (env reg2) (env reg3)
+ SUBFO reg1 reg2 reg3 -> SUBFO (env reg1) (env reg2) (env reg3)
+ SUBFC reg1 reg2 ri -> SUBFC (env reg1) (env reg2) (fixRI ri)
SUBFE reg1 reg2 reg3 -> SUBFE (env reg1) (env reg2) (env reg3)
- MULLD reg1 reg2 ri -> MULLD (env reg1) (env reg2) (fixRI ri)
- MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri)
- DIVW reg1 reg2 reg3 -> DIVW (env reg1) (env reg2) (env reg3)
- DIVD reg1 reg2 reg3 -> DIVD (env reg1) (env reg2) (env reg3)
- DIVWU reg1 reg2 reg3 -> DIVWU (env reg1) (env reg2) (env reg3)
- DIVDU reg1 reg2 reg3 -> DIVDU (env reg1) (env reg2) (env reg3)
- MULLW_MayOflo reg1 reg2 reg3
- -> MULLW_MayOflo (env reg1) (env reg2) (env reg3)
- MULLD_MayOflo reg1 reg2 reg3
- -> MULLD_MayOflo (env reg1) (env reg2) (env reg3)
+ MULL fmt reg1 reg2 ri
+ -> MULL fmt (env reg1) (env reg2) (fixRI ri)
+ MULLO fmt reg1 reg2 reg3
+ -> MULLO fmt (env reg1) (env reg2) (env reg3)
+ MFOV fmt reg -> MFOV fmt (env reg)
+ MULHU fmt reg1 reg2 reg3
+ -> MULHU fmt (env reg1) (env reg2) (env reg3)
+ DIV fmt sgn reg1 reg2 reg3
+ -> DIV fmt sgn (env reg1) (env reg2) (env reg3)
+
AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri)
+ ANDC reg1 reg2 reg3 -> ANDC (env reg1) (env reg2) (env reg3)
OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri)
ORIS reg1 reg2 imm -> ORIS (env reg1) (env reg2) imm
XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri)
XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm
EXTS fmt reg1 reg2 -> EXTS fmt (env reg1) (env reg2)
+ CNTLZ fmt reg1 reg2 -> CNTLZ fmt (env reg1) (env reg2)
NEG reg1 reg2 -> NEG (env reg1) (env reg2)
NOT reg1 reg2 -> NOT (env reg1) (env reg2)
SL fmt reg1 reg2 ri
@@ -434,6 +430,7 @@ ppc_patchRegsOfInstr instr env
-> SRA fmt (env reg1) (env reg2) (fixRI ri)
RLWINM reg1 reg2 sh mb me
-> RLWINM (env reg1) (env reg2) sh mb me
+ CLRLI fmt reg1 reg2 n -> CLRLI fmt (env reg1) (env reg2) n
CLRRI fmt reg1 reg2 n -> CLRRI fmt (env reg1) (env reg2) n
FADD fmt r1 r2 r3 -> FADD fmt (env r1) (env r2) (env r3)
FSUB fmt r1 r2 r3 -> FSUB fmt (env r1) (env r2) (env r3)
@@ -448,7 +445,6 @@ ppc_patchRegsOfInstr instr env
MFCR reg -> MFCR (env reg)
MFLR reg -> MFLR (env reg)
FETCHPC reg -> FETCHPC (env reg)
- FETCHTOC reg lab -> FETCHTOC (env reg) lab
_ -> instr
where
fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs
index 0a1657ddf1..025dfaf244 100644
--- a/compiler/nativeGen/PPC/Ppr.hs
+++ b/compiler/nativeGen/PPC/Ppr.hs
@@ -229,20 +229,20 @@ pprReg r
pprFormat :: Format -> SDoc
pprFormat x
= ptext (case x of
- II8 -> sLit "b"
- II16 -> sLit "h"
- II32 -> sLit "w"
- II64 -> sLit "d"
- FF32 -> sLit "fs"
- FF64 -> sLit "fd"
- _ -> panic "PPC.Ppr.pprFormat: no match")
+ II8 -> sLit "b"
+ II16 -> sLit "h"
+ II32 -> sLit "w"
+ II64 -> sLit "d"
+ FF32 -> sLit "fs"
+ FF64 -> sLit "fd"
+ _ -> panic "PPC.Ppr.pprFormat: no match")
pprCond :: Cond -> SDoc
pprCond c
= ptext (case c of {
ALWAYS -> sLit "";
- EQQ -> sLit "eq"; NE -> sLit "ne";
+ EQQ -> sLit "eq"; NE -> sLit "ne";
LTT -> sLit "lt"; GE -> sLit "ge";
GTT -> sLit "gt"; LE -> sLit "le";
LU -> sLit "lt"; GEU -> sLit "ge";
@@ -493,7 +493,6 @@ pprInstr (STFAR fmt reg (AddrRegImm source off)) =
pprInstr (ADDIS (tmpReg platform) source (HA off)),
pprInstr (ST fmt reg (AddrRegImm (tmpReg platform) (LO off)))
]
-
pprInstr (STFAR _ _ _) =
panic "PPC.Ppr.pprInstr STFAR: no match"
pprInstr (STU fmt reg addr) = hcat [
@@ -638,9 +637,9 @@ pprInstr (BCTRL _) = hcat [
text "bctrl"
]
pprInstr (ADD reg1 reg2 ri) = pprLogic (sLit "add") reg1 reg2 ri
-pprInstr (ADDI reg1 reg2 imm) = hcat [
+pprInstr (ADDIS reg1 reg2 imm) = hcat [
char '\t',
- text "addi",
+ text "addis",
char '\t',
pprReg reg1,
text ", ",
@@ -648,50 +647,85 @@ pprInstr (ADDI reg1 reg2 imm) = hcat [
text ", ",
pprImm imm
]
-pprInstr (ADDIS reg1 reg2 imm) = hcat [
+
+pprInstr (ADDO reg1 reg2 reg3) = pprLogic (sLit "addo") reg1 reg2 (RIReg reg3)
+pprInstr (ADDC reg1 reg2 reg3) = pprLogic (sLit "addc") reg1 reg2 (RIReg reg3)
+pprInstr (ADDE reg1 reg2 reg3) = pprLogic (sLit "adde") reg1 reg2 (RIReg reg3)
+pprInstr (ADDZE reg1 reg2) = pprUnary (sLit "addze") reg1 reg2
+pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3)
+pprInstr (SUBFO reg1 reg2 reg3) = pprLogic (sLit "subfo") reg1 reg2 (RIReg reg3)
+pprInstr (SUBFC reg1 reg2 ri) = hcat [
char '\t',
- text "addis",
+ text "subf",
+ case ri of
+ RIReg _ -> empty
+ RIImm _ -> char 'i',
+ text "c\t",
+ pprReg reg1,
+ text ", ",
+ pprReg reg2,
+ text ", ",
+ pprRI ri
+ ]
+pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3)
+pprInstr (MULL fmt reg1 reg2 ri) = pprMul fmt reg1 reg2 ri
+pprInstr (MULLO fmt reg1 reg2 reg3) = hcat [
char '\t',
+ text "mull",
+ case fmt of
+ II32 -> char 'w'
+ II64 -> char 'd'
+ _ -> panic "PPC: illegal format",
+ text "o\t",
pprReg reg1,
text ", ",
pprReg reg2,
text ", ",
- pprImm imm
+ pprReg reg3
]
+pprInstr (MFOV fmt reg) = vcat [
+ hcat [
+ char '\t',
+ text "mfxer",
+ char '\t',
+ pprReg reg
+ ],
+ hcat [
+ char '\t',
+ text "extr",
+ case fmt of
+ II32 -> char 'w'
+ II64 -> char 'd'
+ _ -> panic "PPC: illegal format",
+ text "i\t",
+ pprReg reg,
+ text ", ",
+ pprReg reg,
+ text ", 1, ",
+ case fmt of
+ II32 -> text "1"
+ II64 -> text "33"
+ _ -> panic "PPC: illegal format"
+ ]
+ ]
-pprInstr (ADDC reg1 reg2 reg3) = pprLogic (sLit "addc") reg1 reg2 (RIReg reg3)
-pprInstr (ADDE reg1 reg2 reg3) = pprLogic (sLit "adde") reg1 reg2 (RIReg reg3)
-pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3)
-pprInstr (SUBFC reg1 reg2 reg3) = pprLogic (sLit "subfc") reg1 reg2 (RIReg reg3)
-pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3)
-pprInstr (MULLD reg1 reg2 ri@(RIReg _)) = pprLogic (sLit "mulld") reg1 reg2 ri
-pprInstr (MULLW reg1 reg2 ri@(RIReg _)) = pprLogic (sLit "mullw") reg1 reg2 ri
-pprInstr (MULLD reg1 reg2 ri@(RIImm _)) = pprLogic (sLit "mull") reg1 reg2 ri
-pprInstr (MULLW reg1 reg2 ri@(RIImm _)) = pprLogic (sLit "mull") reg1 reg2 ri
-pprInstr (DIVW reg1 reg2 reg3) = pprLogic (sLit "divw") reg1 reg2 (RIReg reg3)
-pprInstr (DIVD reg1 reg2 reg3) = pprLogic (sLit "divd") reg1 reg2 (RIReg reg3)
-pprInstr (DIVWU reg1 reg2 reg3) = pprLogic (sLit "divwu") reg1 reg2 (RIReg reg3)
-pprInstr (DIVDU reg1 reg2 reg3) = pprLogic (sLit "divdu") reg1 reg2 (RIReg reg3)
-
-pprInstr (MULLW_MayOflo reg1 reg2 reg3) = vcat [
- hcat [ text "\tmullwo\t", pprReg reg1, ptext (sLit ", "),
- pprReg reg2, text ", ",
- pprReg reg3 ],
- hcat [ text "\tmfxer\t", pprReg reg1 ],
- hcat [ text "\trlwinm\t", pprReg reg1, ptext (sLit ", "),
- pprReg reg1, text ", ",
- text "2, 31, 31" ]
- ]
-pprInstr (MULLD_MayOflo reg1 reg2 reg3) = vcat [
- hcat [ text "\tmulldo\t", pprReg reg1, ptext (sLit ", "),
- pprReg reg2, text ", ",
- pprReg reg3 ],
- hcat [ text "\tmfxer\t", pprReg reg1 ],
- hcat [ text "\trlwinm\t", pprReg reg1, ptext (sLit ", "),
- pprReg reg1, text ", ",
- text "2, 31, 31" ]
+pprInstr (MULHU fmt reg1 reg2 reg3) = hcat [
+ char '\t',
+ text "mulh",
+ case fmt of
+ II32 -> char 'w'
+ II64 -> char 'd'
+ _ -> panic "PPC: illegal format",
+ text "u\t",
+ pprReg reg1,
+ text ", ",
+ pprReg reg2,
+ text ", ",
+ pprReg reg3
]
+pprInstr (DIV fmt sgn reg1 reg2 reg3) = pprDiv fmt sgn reg1 reg2 reg3
+
-- for some reason, "andi" doesn't exist.
-- we'll use "andi." instead.
pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [
@@ -705,6 +739,7 @@ pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [
pprImm imm
]
pprInstr (AND reg1 reg2 ri) = pprLogic (sLit "and") reg1 reg2 ri
+pprInstr (ANDC reg1 reg2 reg3) = pprLogic (sLit "andc") reg1 reg2 (RIReg reg3)
pprInstr (OR reg1 reg2 ri) = pprLogic (sLit "or") reg1 reg2 ri
pprInstr (XOR reg1 reg2 ri) = pprLogic (sLit "xor") reg1 reg2 ri
@@ -740,6 +775,18 @@ pprInstr (EXTS fmt reg1 reg2) = hcat [
text ", ",
pprReg reg2
]
+pprInstr (CNTLZ fmt reg1 reg2) = hcat [
+ char '\t',
+ text "cntlz",
+ case fmt of
+ II32 -> char 'w'
+ II64 -> char 'd'
+ _ -> panic "PPC: illegal format",
+ char '\t',
+ pprReg reg1,
+ text ", ",
+ pprReg reg2
+ ]
pprInstr (NEG reg1 reg2) = pprUnary (sLit "neg") reg1 reg2
pprInstr (NOT reg1 reg2) = pprUnary (sLit "not") reg1 reg2
@@ -798,6 +845,16 @@ pprInstr (RLWINM reg1 reg2 sh mb me) = hcat [
int me
]
+pprInstr (CLRLI fmt reg1 reg2 n) = hcat [
+ text "\tclrl",
+ pprFormat fmt,
+ text "i ",
+ pprReg reg1,
+ text ", ",
+ pprReg reg2,
+ text ", ",
+ int n
+ ]
pprInstr (CLRRI fmt reg1 reg2 n) = hcat [
text "\tclrr",
pprFormat fmt,
@@ -863,18 +920,6 @@ pprInstr (FETCHPC reg) = vcat [
hcat [ text "1:\tmflr\t", pprReg reg ]
]
-pprInstr (FETCHTOC reg lab) = vcat [
- hcat [ text "0:\taddis\t", pprReg reg,
- text ",12,.TOC.-0b@ha" ],
- hcat [ text "\taddi\t", pprReg reg,
- char ',', pprReg reg,
- text ",.TOC.-0b@l" ],
- hcat [ text "\t.localentry\t",
- ppr lab,
- text ",.-",
- ppr lab]
- ]
-
pprInstr LWSYNC = text "\tlwsync"
pprInstr NOP = text "\tnop"
@@ -914,6 +959,43 @@ pprLogic op reg1 reg2 ri = hcat [
]
+pprMul :: Format -> Reg -> Reg -> RI -> SDoc
+pprMul fmt reg1 reg2 ri = hcat [
+ char '\t',
+ text "mull",
+ case ri of
+ RIReg _ -> case fmt of
+ II32 -> char 'w'
+ II64 -> char 'd'
+ _ -> panic "PPC: illegal format"
+ RIImm _ -> char 'i',
+ char '\t',
+ pprReg reg1,
+ text ", ",
+ pprReg reg2,
+ text ", ",
+ pprRI ri
+ ]
+
+
+pprDiv :: Format -> Bool -> Reg -> Reg -> Reg -> SDoc
+pprDiv fmt sgn reg1 reg2 reg3 = hcat [
+ char '\t',
+ text "div",
+ case fmt of
+ II32 -> char 'w'
+ II64 -> char 'd'
+ _ -> panic "PPC: illegal format",
+ if sgn then empty else char 'u',
+ char '\t',
+ pprReg reg1,
+ text ", ",
+ pprReg reg2,
+ text ", ",
+ pprReg reg3
+ ]
+
+
pprUnary :: LitString -> Reg -> Reg -> SDoc
pprUnary op reg1 reg2 = hcat [
char '\t',