diff options
author | Peter Trommler <ptrommler@acm.org> | 2017-04-25 18:37:16 -0400 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2017-04-25 18:39:50 -0400 |
commit | 89a3241f708502e8fbcfaddbbe634790ad9cd02a (patch) | |
tree | 9264c8bb53a229ea2a6a55debd0088bb88354bd2 | |
parent | 9373994acaf1b73fe0e7cf8e03594c63cec8d235 (diff) | |
download | haskell-89a3241f708502e8fbcfaddbbe634790ad9cd02a.tar.gz |
PPC NCG: Implement callish prim ops
Provide PowerPC optimised implementations of callish prim ops.
MO_?_QuotRem
The generic implementation of quotient remainder prim ops uses
a division and a remainder operation. There is no remainder on
PowerPC and so we need to implement remainder "by hand" which
results in a duplication of the divide operation when using the
generic code.
Avoid this duplication by implementing the prim op in the native
code generator.
MO_U_Mul2
Use PowerPC's instructions for long multiplication.
Addition and subtraction
Use PowerPC add/subtract with carry/overflow instructions
MO_Clz and MO_Ctz
Use PowerPC's CNTLZ instruction and implement count trailing
zeros using count leading zeros
MO_QuotRem2
Implement an algorithm given by Henry Warren in "Hacker's Delight"
using PowerPC divide instruction. TODO: Use long division instructions
when available (POWER7 and later).
Test Plan: validate on AIX and 32-bit Linux
Reviewers: simonmar, erikd, hvr, austin, bgamari
Reviewed By: erikd, hvr, bgamari
Subscribers: trofi, kgardas, thomie
Differential Revision: https://phabricator.haskell.org/D2973
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 28 | ||||
-rw-r--r-- | compiler/nativeGen/PIC.hs | 5 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 464 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Instr.hs | 102 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Ppr.hs | 196 |
5 files changed, 611 insertions, 184 deletions
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 0edde0659f..235109ffe9 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -815,33 +815,41 @@ type GenericOp = [CmmFormal] -> [CmmActual] -> FCode () callishPrimOpSupported :: DynFlags -> PrimOp -> Either CallishMachOp GenericOp callishPrimOpSupported dflags op = case op of - IntQuotRemOp | ncg && x86ish -> Left (MO_S_QuotRem (wordWidth dflags)) + IntQuotRemOp | ncg && (x86ish + || ppc) -> Left (MO_S_QuotRem (wordWidth dflags)) | otherwise -> Right (genericIntQuotRemOp dflags) - WordQuotRemOp | ncg && x86ish -> Left (MO_U_QuotRem (wordWidth dflags)) + WordQuotRemOp | ncg && (x86ish + || ppc) -> Left (MO_U_QuotRem (wordWidth dflags)) | otherwise -> Right (genericWordQuotRemOp dflags) - WordQuotRem2Op | (ncg && x86ish) + WordQuotRem2Op | (ncg && (x86ish + || ppc)) || llvm -> Left (MO_U_QuotRem2 (wordWidth dflags)) | otherwise -> Right (genericWordQuotRem2Op dflags) - WordAdd2Op | (ncg && x86ish) + WordAdd2Op | (ncg && (x86ish + || ppc)) || llvm -> Left (MO_Add2 (wordWidth dflags)) | otherwise -> Right genericWordAdd2Op - WordSubCOp | (ncg && x86ish) + WordSubCOp | (ncg && (x86ish + || ppc)) || llvm -> Left (MO_SubWordC (wordWidth dflags)) | otherwise -> Right genericWordSubCOp - IntAddCOp | (ncg && x86ish) + IntAddCOp | (ncg && (x86ish + || ppc)) || llvm -> Left (MO_AddIntC (wordWidth dflags)) | otherwise -> Right genericIntAddCOp - IntSubCOp | (ncg && x86ish) + IntSubCOp | (ncg && (x86ish + || ppc)) || llvm -> Left (MO_SubIntC (wordWidth dflags)) | otherwise -> Right genericIntSubCOp - WordMul2Op | ncg && x86ish + WordMul2Op | ncg && (x86ish + || ppc) || llvm -> Left (MO_U_Mul2 (wordWidth dflags)) | otherwise -> Right genericWordMul2Op FloatFabsOp | (ncg && x86ish) @@ -863,6 +871,10 @@ callishPrimOpSupported dflags op ArchX86 -> True ArchX86_64 -> True _ -> False + ppc = case platformArch (targetPlatform dflags) of + ArchPPC -> True + ArchPPC_64 _ -> True + _ -> False genericIntQuotRemOp :: DynFlags -> GenericOp genericIntQuotRemOp dflags [res_q, res_r] [arg_x, arg_y] diff --git a/compiler/nativeGen/PIC.hs b/compiler/nativeGen/PIC.hs index babceac4f0..d6005745b3 100644 --- a/compiler/nativeGen/PIC.hs +++ b/compiler/nativeGen/PIC.hs @@ -521,7 +521,7 @@ pprGotDeclaration _ _ OSAIX ] --- PPC 64 ELF v1needs a Table Of Contents (TOC) on Linux +-- PPC 64 ELF v1 needs a Table Of Contents (TOC) on Linux pprGotDeclaration _ (ArchPPC_64 ELF_V1) OSLinux = text ".section \".toc\",\"aw\"" -- In ELF v2 we also need to tell the assembler that we want ABI @@ -814,7 +814,8 @@ initializePicBase_ppc ArchPPC os picReg fetchPC (BasicBlock bID insns) = BasicBlock bID (PPC.FETCHPC picReg : PPC.ADDIS picReg picReg (PPC.HA gotOffset) - : PPC.ADDI picReg picReg (PPC.LO gotOffset) + : PPC.ADD picReg picReg + (PPC.RIImm (PPC.LO gotOffset)) : PPC.MR PPC.r30 picReg : insns) diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 1f06c7bd3e..1467267842 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -359,7 +359,7 @@ iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do r2hi = getHiVRegFromLo r2lo code = code1 `appOL` code2 `appOL` - toOL [ SUBFC rlo r2lo r1lo, + toOL [ SUBFC rlo r2lo (RIReg r1lo), SUBFE rhi r2hi r1hi ] return (ChildCode64 code rlo) @@ -589,42 +589,37 @@ getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps MO_Add rep -> trivialCode rep True ADD x y MO_Sub rep -> - case y of -- subfi ('substract from' with immediate) doesn't exist + case y of CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm) -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep) - _ -> trivialCodeNoImm' (intFormat rep) SUBF y x - - MO_Mul rep - | arch32 -> trivialCode rep True MULLW x y - | otherwise -> trivialCode rep True MULLD x y - - MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y - MO_S_MulMayOflo W64 -> trivialCodeNoImm' II64 MULLD_MayOflo x y - - MO_S_MulMayOflo _ -> panic "S_MulMayOflo: (II8/16) not implemented" - MO_U_MulMayOflo _ -> panic "U_MulMayOflo: not implemented" + _ -> case x of + CmmLit (CmmInt imm _) + | Just _ <- makeImmediate rep True imm + -- subfi ('substract from' with immediate) doesn't exist + -> trivialCode rep True SUBFC y x + _ -> trivialCodeNoImm' (intFormat rep) SUBF y x + + MO_Mul rep -> shiftMulCode rep True MULL x y + MO_S_MulMayOflo rep -> do + (src1, code1) <- getSomeReg x + (src2, code2) <- getSomeReg y + let + format = intFormat rep + code dst = code1 `appOL` code2 + `appOL` toOL [ MULLO format dst src1 src2 + , MFOV format dst + ] + return (Any format code) - MO_S_Quot rep - | arch32 -> trivialCodeNoImm' (intFormat rep) DIVW - (extendSExpr dflags rep x) (extendSExpr dflags rep y) - | otherwise -> trivialCodeNoImm' (intFormat rep) DIVD + MO_S_Quot rep -> trivialCodeNoImmSign (intFormat rep) True DIV (extendSExpr dflags rep x) (extendSExpr dflags rep y) - MO_U_Quot rep - | arch32 -> trivialCodeNoImm' (intFormat rep) DIVWU - (extendUExpr dflags rep x) (extendUExpr dflags rep y) - | otherwise -> trivialCodeNoImm' (intFormat rep) DIVDU + MO_U_Quot rep -> trivialCodeNoImmSign (intFormat rep) False DIV (extendUExpr dflags rep x) (extendUExpr dflags rep y) - MO_S_Rem rep - | arch32 -> remainderCode rep DIVW (extendSExpr dflags rep x) - (extendSExpr dflags rep y) - | otherwise -> remainderCode rep DIVD (extendSExpr dflags rep x) + MO_S_Rem rep -> remainderCode rep True (extendSExpr dflags rep x) (extendSExpr dflags rep y) - MO_U_Rem rep - | arch32 -> remainderCode rep DIVWU (extendSExpr dflags rep x) - (extendSExpr dflags rep y) - | otherwise -> remainderCode rep DIVDU (extendSExpr dflags rep x) - (extendSExpr dflags rep y) + MO_U_Rem rep -> remainderCode rep False (extendUExpr dflags rep x) + (extendUExpr dflags rep y) MO_And rep -> case y of (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4 @@ -639,17 +634,15 @@ getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps MO_Or rep -> trivialCode rep False OR x y MO_Xor rep -> trivialCode rep False XOR x y - MO_Shl rep -> shiftCode rep SL x y - MO_S_Shr rep -> shiftCode rep SRA (extendSExpr dflags rep x) y - MO_U_Shr rep -> shiftCode rep SR (extendUExpr dflags rep x) y + MO_Shl rep -> shiftMulCode rep False SL x y + MO_S_Shr rep -> shiftMulCode rep False SRA (extendSExpr dflags rep x) y + MO_U_Shr rep -> shiftMulCode rep False SR (extendUExpr dflags rep x) y _ -> panic "PPC.CodeGen.getRegister: no match" where triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y - arch32 = target32Bit $ targetPlatform dflags - getRegister' _ (CmmLit (CmmInt i rep)) | Just imm <- makeImmediate rep True i = let @@ -1090,22 +1083,370 @@ genCondJump id bool = do -- Now the biggest nightmare---calls. Most of the nastiness is buried in -- @get_arg@, which moves the arguments to the correct registers/stack -- locations. Apart from that, the code is easy. --- --- (If applicable) Do not fill the delay slots here; you will confuse the --- register allocator. -genCCall :: ForeignTarget -- function to call +genCCall :: ForeignTarget -- function to call -> [CmmFormal] -- where to put the result -> [CmmActual] -- arguments (of mixed type) -> NatM InstrBlock +genCCall (PrimTarget MO_WriteBarrier) _ _ + = return $ unitOL LWSYNC + +genCCall (PrimTarget MO_Touch) _ _ + = return $ nilOL + +genCCall (PrimTarget (MO_Prefetch_Data _)) _ _ + = return $ nilOL + +genCCall (PrimTarget (MO_Clz width)) [dst] [src] + = do dflags <- getDynFlags + let platform = targetPlatform dflags + reg_dst = getRegisterReg platform (CmmLocal dst) + if target32Bit platform && width == W64 + then do + ChildCode64 code vr_lo <- iselExpr64 src + lbl1 <- getBlockIdNat + lbl2 <- getBlockIdNat + lbl3 <- getBlockIdNat + let vr_hi = getHiVRegFromLo vr_lo + cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0)) + , BCC NE lbl2 + , BCC ALWAYS lbl1 + + , NEWBLOCK lbl1 + , CNTLZ II32 reg_dst vr_lo + , ADD reg_dst reg_dst (RIImm (ImmInt 32)) + , BCC ALWAYS lbl3 + + , NEWBLOCK lbl2 + , CNTLZ II32 reg_dst vr_hi + , BCC ALWAYS lbl3 + + , NEWBLOCK lbl3 + ] + return $ code `appOL` cntlz + else do + let format = if width == W64 then II64 else II32 + (s_reg, s_code) <- getSomeReg src + (pre, reg , post) <- + case width of + W64 -> return (nilOL, s_reg, nilOL) + W32 -> return (nilOL, s_reg, nilOL) + W16 -> do + reg_tmp <- getNewRegNat format + return + ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535)) + , reg_tmp + , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16))) + ) + W8 -> do + reg_tmp <- getNewRegNat format + return + ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255)) + , reg_tmp + , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24))) + ) + _ -> panic "genCall: Clz wrong format" + let cntlz = unitOL (CNTLZ format reg_dst reg) + return $ s_code `appOL` pre `appOL` cntlz `appOL` post + +genCCall (PrimTarget (MO_Ctz width)) [dst] [src] + = do dflags <- getDynFlags + let platform = targetPlatform dflags + reg_dst = getRegisterReg platform (CmmLocal dst) + if target32Bit platform && width == W64 + then do + let format = II32 + ChildCode64 code vr_lo <- iselExpr64 src + lbl1 <- getBlockIdNat + lbl2 <- getBlockIdNat + lbl3 <- getBlockIdNat + x' <- getNewRegNat format + x'' <- getNewRegNat format + r' <- getNewRegNat format + cnttzlo <- cnttz format reg_dst vr_lo + let vr_hi = getHiVRegFromLo vr_lo + cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0)) + , BCC NE lbl2 + , BCC ALWAYS lbl1 + + , NEWBLOCK lbl1 + , ADD x' vr_hi (RIImm (ImmInt (-1))) + , ANDC x'' x' vr_hi + , CNTLZ format r' x'' + -- 32 + (32 - clz(x'')) + , SUBFC reg_dst r' (RIImm (ImmInt 64)) + , BCC ALWAYS lbl3 + + , NEWBLOCK lbl2 + ] + `appOL` cnttzlo `appOL` + toOL [ BCC ALWAYS lbl3 + + , NEWBLOCK lbl3 + ] + return $ code `appOL` cnttz64 + else do + let format = if width == W64 then II64 else II32 + (s_reg, s_code) <- getSomeReg src + (reg_ctz, pre_code) <- + case width of + W64 -> return (s_reg, nilOL) + W32 -> return (s_reg, nilOL) + W16 -> do + reg_tmp <- getNewRegNat format + return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1)) + W8 -> do + reg_tmp <- getNewRegNat format + return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256))) + _ -> panic "genCall: Ctz wrong format" + ctz_code <- cnttz format reg_dst reg_ctz + return $ s_code `appOL` pre_code `appOL` ctz_code + where + -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1)) + -- see Henry S. Warren, Hacker's Delight, p 107 + cnttz format dst src = do + let format_bits = 8 * formatInBytes format + x' <- getNewRegNat format + x'' <- getNewRegNat format + r' <- getNewRegNat format + return $ toOL [ ADD x' src (RIImm (ImmInt (-1))) + , ANDC x'' x' src + , CNTLZ format r' x'' + , SUBFC dst r' (RIImm (ImmInt (format_bits))) + ] + genCCall target dest_regs argsAndHints = do dflags <- getDynFlags - genCCall' dflags (platformToGCP (targetPlatform dflags)) - target dest_regs argsAndHints + let platform = targetPlatform dflags + case target of + PrimTarget (MO_S_QuotRem width) -> divOp1 platform True width + dest_regs argsAndHints + PrimTarget (MO_U_QuotRem width) -> divOp1 platform False width + dest_regs argsAndHints + PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs + argsAndHints + PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs + argsAndHints + PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints + PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints + PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width + dest_regs argsAndHints + PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width + dest_regs argsAndHints + _ -> genCCall' dflags (platformToGCP platform) + target dest_regs argsAndHints + where divOp1 platform signed width [res_q, res_r] [arg_x, arg_y] + = do let reg_q = getRegisterReg platform (CmmLocal res_q) + reg_r = getRegisterReg platform (CmmLocal res_r) + fmt = intFormat width + (x_reg, x_code) <- getSomeReg arg_x + (y_reg, y_code) <- getSomeReg arg_y + return $ y_code `appOL` x_code + `appOL` toOL [ DIV fmt signed reg_q x_reg y_reg + , MULL fmt reg_r reg_q (RIReg y_reg) + , SUBF reg_r reg_r x_reg + ] + + divOp1 _ _ _ _ _ + = panic "genCCall: Wrong number of arguments for divOp1" + divOp2 platform width [res_q, res_r] + [arg_x_high, arg_x_low, arg_y] + = do let reg_q = getRegisterReg platform (CmmLocal res_q) + reg_r = getRegisterReg platform (CmmLocal res_r) + fmt = intFormat width + half = 4 * (formatInBytes fmt) + (xh_reg, xh_code) <- getSomeReg arg_x_high + (xl_reg, xl_code) <- getSomeReg arg_x_low + (y_reg, y_code) <- getSomeReg arg_y + s <- getNewRegNat fmt + b <- getNewRegNat fmt + v <- getNewRegNat fmt + vn1 <- getNewRegNat fmt + vn0 <- getNewRegNat fmt + un32 <- getNewRegNat fmt + tmp <- getNewRegNat fmt + un10 <- getNewRegNat fmt + un1 <- getNewRegNat fmt + un0 <- getNewRegNat fmt + q1 <- getNewRegNat fmt + rhat <- getNewRegNat fmt + tmp1 <- getNewRegNat fmt + q0 <- getNewRegNat fmt + un21 <- getNewRegNat fmt + again1 <- getBlockIdNat + no1 <- getBlockIdNat + then1 <- getBlockIdNat + endif1 <- getBlockIdNat + again2 <- getBlockIdNat + no2 <- getBlockIdNat + then2 <- getBlockIdNat + endif2 <- getBlockIdNat + return $ y_code `appOL` xl_code `appOL` xh_code `appOL` + -- see Hacker's Delight p 196 Figure 9-3 + toOL [ -- b = 2 ^ (bits_in_word / 2) + LI b (ImmInt 1) + , SL fmt b b (RIImm (ImmInt half)) + -- s = clz(y) + , CNTLZ fmt s y_reg + -- v = y << s + , SL fmt v y_reg (RIReg s) + -- vn1 = upper half of v + , SR fmt vn1 v (RIImm (ImmInt half)) + -- vn0 = lower half of v + , CLRLI fmt vn0 v half + -- un32 = (u1 << s) + -- | (u0 >> (bits_in_word - s)) + , SL fmt un32 xh_reg (RIReg s) + , SUBFC tmp s + (RIImm (ImmInt (8 * formatInBytes fmt))) + , SR fmt tmp xl_reg (RIReg tmp) + , OR un32 un32 (RIReg tmp) + -- un10 = u0 << s + , SL fmt un10 xl_reg (RIReg s) + -- un1 = upper half of un10 + , SR fmt un1 un10 (RIImm (ImmInt half)) + -- un0 = lower half of un10 + , CLRLI fmt un0 un10 half + -- q1 = un32/vn1 + , DIV fmt False q1 un32 vn1 + -- rhat = un32 - q1*vn1 + , MULL fmt tmp q1 (RIReg vn1) + , SUBF rhat tmp un32 + , BCC ALWAYS again1 + + , NEWBLOCK again1 + -- if (q1 >= b || q1*vn0 > b*rhat + un1) + , CMPL fmt q1 (RIReg b) + , BCC GEU then1 + , BCC ALWAYS no1 + + , NEWBLOCK no1 + , MULL fmt tmp q1 (RIReg vn0) + , SL fmt tmp1 rhat (RIImm (ImmInt half)) + , ADD tmp1 tmp1 (RIReg un1) + , CMPL fmt tmp (RIReg tmp1) + , BCC LEU endif1 + , BCC ALWAYS then1 + + , NEWBLOCK then1 + -- q1 = q1 - 1 + , ADD q1 q1 (RIImm (ImmInt (-1))) + -- rhat = rhat + vn1 + , ADD rhat rhat (RIReg vn1) + -- if (rhat < b) goto again1 + , CMPL fmt rhat (RIReg b) + , BCC LTT again1 + , BCC ALWAYS endif1 + + , NEWBLOCK endif1 + -- un21 = un32*b + un1 - q1*v + , SL fmt un21 un32 (RIImm (ImmInt half)) + , ADD un21 un21 (RIReg un1) + , MULL fmt tmp q1 (RIReg v) + , SUBF un21 tmp un21 + -- compute second quotient digit + -- q0 = un21/vn1 + , DIV fmt False q0 un21 vn1 + -- rhat = un21- q0*vn1 + , MULL fmt tmp q0 (RIReg vn1) + , SUBF rhat tmp un21 + , BCC ALWAYS again2 + + , NEWBLOCK again2 + -- if (q0>b || q0*vn0 > b*rhat + un0) + , CMPL fmt q0 (RIReg b) + , BCC GEU then2 + , BCC ALWAYS no2 + + , NEWBLOCK no2 + , MULL fmt tmp q0 (RIReg vn0) + , SL fmt tmp1 rhat (RIImm (ImmInt half)) + , ADD tmp1 tmp1 (RIReg un0) + , CMPL fmt tmp (RIReg tmp1) + , BCC LEU endif2 + , BCC ALWAYS then2 + + , NEWBLOCK then2 + -- q0 = q0 - 1 + , ADD q0 q0 (RIImm (ImmInt (-1))) + -- rhat = rhat + vn1 + , ADD rhat rhat (RIReg vn1) + -- if (rhat<b) goto again2 + , CMPL fmt rhat (RIReg b) + , BCC LTT again2 + , BCC ALWAYS endif2 + + , NEWBLOCK endif2 + -- compute remainder + -- r = (un21*b + un0 - q0*v) >> s + , SL fmt reg_r un21 (RIImm (ImmInt half)) + , ADD reg_r reg_r (RIReg un0) + , MULL fmt tmp q0 (RIReg v) + , SUBF reg_r tmp reg_r + , SR fmt reg_r reg_r (RIReg s) + -- compute quotient + -- q = q1*b + q0 + , SL fmt reg_q q1 (RIImm (ImmInt half)) + , ADD reg_q reg_q (RIReg q0) + ] + divOp2 _ _ _ _ + = panic "genCCall: Wrong number of arguments for divOp2" + multOp2 platform width [res_h, res_l] [arg_x, arg_y] + = do let reg_h = getRegisterReg platform (CmmLocal res_h) + reg_l = getRegisterReg platform (CmmLocal res_l) + fmt = intFormat width + (x_reg, x_code) <- getSomeReg arg_x + (y_reg, y_code) <- getSomeReg arg_y + return $ y_code `appOL` x_code + `appOL` toOL [ MULL fmt reg_l x_reg (RIReg y_reg) + , MULHU fmt reg_h x_reg y_reg + ] + multOp2 _ _ _ _ + = panic "genCall: Wrong number of arguments for multOp2" + add2Op platform [res_h, res_l] [arg_x, arg_y] + = do let reg_h = getRegisterReg platform (CmmLocal res_h) + reg_l = getRegisterReg platform (CmmLocal res_l) + (x_reg, x_code) <- getSomeReg arg_x + (y_reg, y_code) <- getSomeReg arg_y + return $ y_code `appOL` x_code + `appOL` toOL [ LI reg_h (ImmInt 0) + , ADDC reg_l x_reg y_reg + , ADDZE reg_h reg_h + ] + add2Op _ _ _ + = panic "genCCall: Wrong number of arguments/results for add2" + + -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1, + -- which is 0 for borrow and 1 otherwise. We need 1 and 0 + -- so xor with 1. + subcOp platform [res_r, res_c] [arg_x, arg_y] + = do let reg_r = getRegisterReg platform (CmmLocal res_r) + reg_c = getRegisterReg platform (CmmLocal res_c) + (x_reg, x_code) <- getSomeReg arg_x + (y_reg, y_code) <- getSomeReg arg_y + return $ y_code `appOL` x_code + `appOL` toOL [ LI reg_c (ImmInt 0) + , SUBFC reg_r y_reg (RIReg x_reg) + , ADDZE reg_c reg_c + , XOR reg_c reg_c (RIImm (ImmInt 1)) + ] + subcOp _ _ _ + = panic "genCCall: Wrong number of arguments/results for subc" + addSubCOp instr platform width [res_r, res_c] [arg_x, arg_y] + = do let reg_r = getRegisterReg platform (CmmLocal res_r) + reg_c = getRegisterReg platform (CmmLocal res_c) + (x_reg, x_code) <- getSomeReg arg_x + (y_reg, y_code) <- getSomeReg arg_y + return $ y_code `appOL` x_code + `appOL` toOL [ instr reg_r y_reg x_reg, + -- SUBFO argument order reversed! + MFOV (intFormat width) reg_c + ] + addSubCOp _ _ _ _ _ + = panic "genCall: Wrong number of arguments/results for addC" -- TODO: replace 'Int' by an enum such as 'PPC_64ABI' data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF !Int | GCPAIX - deriving Eq platformToGCP :: Platform -> GenCCallPlatform platformToGCP platform = case platformOS platform of @@ -1175,15 +1516,6 @@ genCCall' -} -genCCall' _ _ (PrimTarget MO_WriteBarrier) _ _ - = return $ unitOL LWSYNC - -genCCall' _ _ (PrimTarget MO_Touch) _ _ - = return $ nilOL - -genCCall' _ _ (PrimTarget (MO_Prefetch_Data _)) _ _ - = return $ nilOL - genCCall' dflags gcp target dest_regs args = ASSERT(not $ any (`elem` [II16]) $ map cmmTypeFormat argReps) -- we rely on argument promotion in the codeGen @@ -1767,21 +2099,22 @@ trivialCode rep _ instr x y = do let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2) return (Any (intFormat rep) code) -shiftCode +shiftMulCode :: Width + -> Bool -> (Format-> Reg -> Reg -> RI -> Instr) -> CmmExpr -> CmmExpr -> NatM Register -shiftCode width instr x (CmmLit (CmmInt y _)) - | Just imm <- makeImmediate width False y +shiftMulCode width sign instr x (CmmLit (CmmInt y _)) + | Just imm <- makeImmediate width sign y = do (src1, code1) <- getSomeReg x let format = intFormat width let code dst = code1 `snocOL` instr format dst src1 (RIImm imm) return (Any format code) -shiftCode width instr x y = do +shiftMulCode width _ instr x y = do (src1, code1) <- getSomeReg x (src2, code2) <- getSomeReg y let format = intFormat width @@ -1800,6 +2133,12 @@ trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr) -> CmmExpr -> CmmExpr -> NatM Register trivialCodeNoImm format instr x y = trivialCodeNoImm' format (instr format) x y +trivialCodeNoImmSign :: Format -> Bool + -> (Format -> Bool -> Reg -> Reg -> Reg -> Instr) + -> CmmExpr -> CmmExpr -> NatM Register +trivialCodeNoImmSign format sgn instr x y + = trivialCodeNoImm' format (instr format sgn) x y + trivialUCode :: Format @@ -1813,19 +2152,16 @@ trivialUCode rep instr x = do -- There is no "remainder" instruction on the PPC, so we have to do -- it the hard way. --- The "div" parameter is the division instruction to use (DIVW or DIVWU) +-- The "sgn" parameter is the signedness for the division instruction -remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -remainderCode rep div x y = do - dflags <- getDynFlags - let mull_instr = if target32Bit $ targetPlatform dflags then MULLW - else MULLD +remainderCode :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register +remainderCode rep sgn x y = do + let fmt = intFormat rep (src1, code1) <- getSomeReg x (src2, code2) <- getSomeReg y let code dst = code1 `appOL` code2 `appOL` toOL [ - div dst src1 src2, - mull_instr dst dst (RIReg src2), + DIV fmt sgn dst src1 src2, + MULL fmt dst dst (RIReg src2), SUBF dst dst src1 ] return (Any (intFormat rep) code) diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs index ae7d6bf260..e395b388e9 100644 --- a/compiler/nativeGen/PPC/Instr.hs +++ b/compiler/nativeGen/PPC/Instr.hs @@ -210,40 +210,34 @@ data Instr | BCTRL [Reg] | ADD Reg Reg RI -- dst, src1, src2 + | ADDO Reg Reg Reg -- add and set overflow | ADDC Reg Reg Reg -- (carrying) dst, src1, src2 - | ADDE Reg Reg Reg -- (extend) dst, src1, src2 - | ADDI Reg Reg Imm -- Add Immediate dst, src1, src2 + | ADDE Reg Reg Reg -- (extended) dst, src1, src2 + | ADDZE Reg Reg -- (to zero extended) dst, src | ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2 | SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1 - | SUBFC Reg Reg Reg -- (carrying) dst, src1, src2 ; dst = src2 - src1 - | SUBFE Reg Reg Reg -- (extend) dst, src1, src2 ; dst = src2 - src1 - | MULLD Reg Reg RI - | MULLW Reg Reg RI - | DIVW Reg Reg Reg - | DIVD Reg Reg Reg - | DIVWU Reg Reg Reg - | DIVDU Reg Reg Reg - - | MULLW_MayOflo Reg Reg Reg - -- dst = 1 if src1 * src2 overflows - -- pseudo-instruction; pretty-printed as: - -- mullwo. dst, src1, src2 + | SUBFO Reg Reg Reg -- subtract from and set overflow + | SUBFC Reg Reg RI -- (carrying) dst, src1, src2 ; + -- dst = src2 - src1 + | SUBFE Reg Reg Reg -- (extended) dst, src1, src2 ; + -- dst = src2 - src1 + | MULL Format Reg Reg RI + | MULLO Format Reg Reg Reg -- multiply and set overflow + | MFOV Format Reg -- move overflow bit (1|33) to register + -- pseudo-instruction; pretty printed as -- mfxer dst - -- rlwinm dst, dst, 2, 31,31 - | MULLD_MayOflo Reg Reg Reg - -- dst = 1 if src1 * src2 overflows - -- pseudo-instruction; pretty-printed as: - -- mulldo. dst, src1, src2 - -- mfxer dst - -- rlwinm dst, dst, 2, 31,31 - + -- extr[w|d]i dst, dst, 1, [1|33] + | MULHU Format Reg Reg Reg + | DIV Format Bool Reg Reg Reg | AND Reg Reg RI -- dst, src1, src2 + | ANDC Reg Reg Reg -- AND with complement, dst = src1 & ~ src2 | OR Reg Reg RI -- dst, src1, src2 | ORIS Reg Reg Imm -- OR Immediate Shifted dst, src1, src2 | XOR Reg Reg RI -- dst, src1, src2 | XORIS Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2 | EXTS Format Reg Reg + | CNTLZ Format Reg Reg | NEG Reg Reg | NOT Reg Reg @@ -253,6 +247,7 @@ data Instr | SRA Format Reg Reg RI -- shift right arithmetic | RLWINM Reg Reg Int Int Int -- Rotate Left Word Immediate then AND with Mask + | CLRLI Format Reg Reg Int -- clear left immediate (extended mnemonic) | CLRRI Format Reg Reg Int -- clear right immediate (extended mnemonic) | FADD Format Reg Reg Reg @@ -275,9 +270,6 @@ data Instr | MFLR Reg -- move from link register | FETCHPC Reg -- pseudo-instruction: -- bcl to next insn, mflr reg - | FETCHTOC Reg CLabel -- pseudo-instruction - -- add TOC offset to address in r12 - -- print .localentry for label | LWSYNC -- memory barrier | NOP -- no operation, PowerPC 64 bit -- needs this as place holder to @@ -313,36 +305,37 @@ ppc_regUsageOfInstr platform instr BCTRL params -> usage (params, callClobberedRegs platform) ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + ADDO reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) ADDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) ADDE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - ADDI reg1 reg2 _ -> usage ([reg2], [reg1]) + ADDZE reg1 reg2 -> usage ([reg2], [reg1]) ADDIS reg1 reg2 _ -> usage ([reg2], [reg1]) SUBF reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - SUBFC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + SUBFO reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + SUBFC reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) SUBFE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - MULLD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - MULLW reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - DIVW reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - DIVD reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - DIVWU reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - DIVDU reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - - MULLW_MayOflo reg1 reg2 reg3 - -> usage ([reg2,reg3], [reg1]) - MULLD_MayOflo reg1 reg2 reg3 - -> usage ([reg2,reg3], [reg1]) + MULL _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + MULLO _ reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + MFOV _ reg -> usage ([], [reg]) + MULHU _ reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + DIV _ _ reg1 reg2 reg3 + -> usage ([reg2,reg3], [reg1]) + AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) + ANDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) ORIS reg1 reg2 _ -> usage ([reg2], [reg1]) XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) XORIS reg1 reg2 _ -> usage ([reg2], [reg1]) EXTS _ reg1 reg2 -> usage ([reg2], [reg1]) + CNTLZ _ reg1 reg2 -> usage ([reg2], [reg1]) NEG reg1 reg2 -> usage ([reg2], [reg1]) NOT reg1 reg2 -> usage ([reg2], [reg1]) SL _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) SR _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) SRA _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) RLWINM reg1 reg2 _ _ _ -> usage ([reg2], [reg1]) + CLRLI _ reg1 reg2 _ -> usage ([reg2], [reg1]) CLRRI _ reg1 reg2 _ -> usage ([reg2], [reg1]) FADD _ r1 r2 r3 -> usage ([r2,r3], [r1]) @@ -358,7 +351,6 @@ ppc_regUsageOfInstr platform instr MFCR reg -> usage ([], [reg]) MFLR reg -> usage ([], [reg]) FETCHPC reg -> usage ([], [reg]) - FETCHTOC reg _ -> usage ([], [reg]) UPDATE_SP _ _ -> usage ([], [sp]) _ -> noUsage where @@ -401,29 +393,33 @@ ppc_patchRegsOfInstr instr env BL imm argRegs -> BL imm argRegs -- argument regs BCTRL argRegs -> BCTRL argRegs -- cannot be remapped ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri) + ADDO reg1 reg2 reg3 -> ADDO (env reg1) (env reg2) (env reg3) ADDC reg1 reg2 reg3 -> ADDC (env reg1) (env reg2) (env reg3) ADDE reg1 reg2 reg3 -> ADDE (env reg1) (env reg2) (env reg3) - ADDI reg1 reg2 imm -> ADDI (env reg1) (env reg2) imm + ADDZE reg1 reg2 -> ADDZE (env reg1) (env reg2) ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm SUBF reg1 reg2 reg3 -> SUBF (env reg1) (env reg2) (env reg3) - SUBFC reg1 reg2 reg3 -> SUBFC (env reg1) (env reg2) (env reg3) + SUBFO reg1 reg2 reg3 -> SUBFO (env reg1) (env reg2) (env reg3) + SUBFC reg1 reg2 ri -> SUBFC (env reg1) (env reg2) (fixRI ri) SUBFE reg1 reg2 reg3 -> SUBFE (env reg1) (env reg2) (env reg3) - MULLD reg1 reg2 ri -> MULLD (env reg1) (env reg2) (fixRI ri) - MULLW reg1 reg2 ri -> MULLW (env reg1) (env reg2) (fixRI ri) - DIVW reg1 reg2 reg3 -> DIVW (env reg1) (env reg2) (env reg3) - DIVD reg1 reg2 reg3 -> DIVD (env reg1) (env reg2) (env reg3) - DIVWU reg1 reg2 reg3 -> DIVWU (env reg1) (env reg2) (env reg3) - DIVDU reg1 reg2 reg3 -> DIVDU (env reg1) (env reg2) (env reg3) - MULLW_MayOflo reg1 reg2 reg3 - -> MULLW_MayOflo (env reg1) (env reg2) (env reg3) - MULLD_MayOflo reg1 reg2 reg3 - -> MULLD_MayOflo (env reg1) (env reg2) (env reg3) + MULL fmt reg1 reg2 ri + -> MULL fmt (env reg1) (env reg2) (fixRI ri) + MULLO fmt reg1 reg2 reg3 + -> MULLO fmt (env reg1) (env reg2) (env reg3) + MFOV fmt reg -> MFOV fmt (env reg) + MULHU fmt reg1 reg2 reg3 + -> MULHU fmt (env reg1) (env reg2) (env reg3) + DIV fmt sgn reg1 reg2 reg3 + -> DIV fmt sgn (env reg1) (env reg2) (env reg3) + AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri) + ANDC reg1 reg2 reg3 -> ANDC (env reg1) (env reg2) (env reg3) OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri) ORIS reg1 reg2 imm -> ORIS (env reg1) (env reg2) imm XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri) XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm EXTS fmt reg1 reg2 -> EXTS fmt (env reg1) (env reg2) + CNTLZ fmt reg1 reg2 -> CNTLZ fmt (env reg1) (env reg2) NEG reg1 reg2 -> NEG (env reg1) (env reg2) NOT reg1 reg2 -> NOT (env reg1) (env reg2) SL fmt reg1 reg2 ri @@ -434,6 +430,7 @@ ppc_patchRegsOfInstr instr env -> SRA fmt (env reg1) (env reg2) (fixRI ri) RLWINM reg1 reg2 sh mb me -> RLWINM (env reg1) (env reg2) sh mb me + CLRLI fmt reg1 reg2 n -> CLRLI fmt (env reg1) (env reg2) n CLRRI fmt reg1 reg2 n -> CLRRI fmt (env reg1) (env reg2) n FADD fmt r1 r2 r3 -> FADD fmt (env r1) (env r2) (env r3) FSUB fmt r1 r2 r3 -> FSUB fmt (env r1) (env r2) (env r3) @@ -448,7 +445,6 @@ ppc_patchRegsOfInstr instr env MFCR reg -> MFCR (env reg) MFLR reg -> MFLR (env reg) FETCHPC reg -> FETCHPC (env reg) - FETCHTOC reg lab -> FETCHTOC (env reg) lab _ -> instr where fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2) diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs index 0a1657ddf1..025dfaf244 100644 --- a/compiler/nativeGen/PPC/Ppr.hs +++ b/compiler/nativeGen/PPC/Ppr.hs @@ -229,20 +229,20 @@ pprReg r pprFormat :: Format -> SDoc pprFormat x = ptext (case x of - II8 -> sLit "b" - II16 -> sLit "h" - II32 -> sLit "w" - II64 -> sLit "d" - FF32 -> sLit "fs" - FF64 -> sLit "fd" - _ -> panic "PPC.Ppr.pprFormat: no match") + II8 -> sLit "b" + II16 -> sLit "h" + II32 -> sLit "w" + II64 -> sLit "d" + FF32 -> sLit "fs" + FF64 -> sLit "fd" + _ -> panic "PPC.Ppr.pprFormat: no match") pprCond :: Cond -> SDoc pprCond c = ptext (case c of { ALWAYS -> sLit ""; - EQQ -> sLit "eq"; NE -> sLit "ne"; + EQQ -> sLit "eq"; NE -> sLit "ne"; LTT -> sLit "lt"; GE -> sLit "ge"; GTT -> sLit "gt"; LE -> sLit "le"; LU -> sLit "lt"; GEU -> sLit "ge"; @@ -493,7 +493,6 @@ pprInstr (STFAR fmt reg (AddrRegImm source off)) = pprInstr (ADDIS (tmpReg platform) source (HA off)), pprInstr (ST fmt reg (AddrRegImm (tmpReg platform) (LO off))) ] - pprInstr (STFAR _ _ _) = panic "PPC.Ppr.pprInstr STFAR: no match" pprInstr (STU fmt reg addr) = hcat [ @@ -638,9 +637,9 @@ pprInstr (BCTRL _) = hcat [ text "bctrl" ] pprInstr (ADD reg1 reg2 ri) = pprLogic (sLit "add") reg1 reg2 ri -pprInstr (ADDI reg1 reg2 imm) = hcat [ +pprInstr (ADDIS reg1 reg2 imm) = hcat [ char '\t', - text "addi", + text "addis", char '\t', pprReg reg1, text ", ", @@ -648,50 +647,85 @@ pprInstr (ADDI reg1 reg2 imm) = hcat [ text ", ", pprImm imm ] -pprInstr (ADDIS reg1 reg2 imm) = hcat [ + +pprInstr (ADDO reg1 reg2 reg3) = pprLogic (sLit "addo") reg1 reg2 (RIReg reg3) +pprInstr (ADDC reg1 reg2 reg3) = pprLogic (sLit "addc") reg1 reg2 (RIReg reg3) +pprInstr (ADDE reg1 reg2 reg3) = pprLogic (sLit "adde") reg1 reg2 (RIReg reg3) +pprInstr (ADDZE reg1 reg2) = pprUnary (sLit "addze") reg1 reg2 +pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3) +pprInstr (SUBFO reg1 reg2 reg3) = pprLogic (sLit "subfo") reg1 reg2 (RIReg reg3) +pprInstr (SUBFC reg1 reg2 ri) = hcat [ char '\t', - text "addis", + text "subf", + case ri of + RIReg _ -> empty + RIImm _ -> char 'i', + text "c\t", + pprReg reg1, + text ", ", + pprReg reg2, + text ", ", + pprRI ri + ] +pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3) +pprInstr (MULL fmt reg1 reg2 ri) = pprMul fmt reg1 reg2 ri +pprInstr (MULLO fmt reg1 reg2 reg3) = hcat [ char '\t', + text "mull", + case fmt of + II32 -> char 'w' + II64 -> char 'd' + _ -> panic "PPC: illegal format", + text "o\t", pprReg reg1, text ", ", pprReg reg2, text ", ", - pprImm imm + pprReg reg3 ] +pprInstr (MFOV fmt reg) = vcat [ + hcat [ + char '\t', + text "mfxer", + char '\t', + pprReg reg + ], + hcat [ + char '\t', + text "extr", + case fmt of + II32 -> char 'w' + II64 -> char 'd' + _ -> panic "PPC: illegal format", + text "i\t", + pprReg reg, + text ", ", + pprReg reg, + text ", 1, ", + case fmt of + II32 -> text "1" + II64 -> text "33" + _ -> panic "PPC: illegal format" + ] + ] -pprInstr (ADDC reg1 reg2 reg3) = pprLogic (sLit "addc") reg1 reg2 (RIReg reg3) -pprInstr (ADDE reg1 reg2 reg3) = pprLogic (sLit "adde") reg1 reg2 (RIReg reg3) -pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3) -pprInstr (SUBFC reg1 reg2 reg3) = pprLogic (sLit "subfc") reg1 reg2 (RIReg reg3) -pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3) -pprInstr (MULLD reg1 reg2 ri@(RIReg _)) = pprLogic (sLit "mulld") reg1 reg2 ri -pprInstr (MULLW reg1 reg2 ri@(RIReg _)) = pprLogic (sLit "mullw") reg1 reg2 ri -pprInstr (MULLD reg1 reg2 ri@(RIImm _)) = pprLogic (sLit "mull") reg1 reg2 ri -pprInstr (MULLW reg1 reg2 ri@(RIImm _)) = pprLogic (sLit "mull") reg1 reg2 ri -pprInstr (DIVW reg1 reg2 reg3) = pprLogic (sLit "divw") reg1 reg2 (RIReg reg3) -pprInstr (DIVD reg1 reg2 reg3) = pprLogic (sLit "divd") reg1 reg2 (RIReg reg3) -pprInstr (DIVWU reg1 reg2 reg3) = pprLogic (sLit "divwu") reg1 reg2 (RIReg reg3) -pprInstr (DIVDU reg1 reg2 reg3) = pprLogic (sLit "divdu") reg1 reg2 (RIReg reg3) - -pprInstr (MULLW_MayOflo reg1 reg2 reg3) = vcat [ - hcat [ text "\tmullwo\t", pprReg reg1, ptext (sLit ", "), - pprReg reg2, text ", ", - pprReg reg3 ], - hcat [ text "\tmfxer\t", pprReg reg1 ], - hcat [ text "\trlwinm\t", pprReg reg1, ptext (sLit ", "), - pprReg reg1, text ", ", - text "2, 31, 31" ] - ] -pprInstr (MULLD_MayOflo reg1 reg2 reg3) = vcat [ - hcat [ text "\tmulldo\t", pprReg reg1, ptext (sLit ", "), - pprReg reg2, text ", ", - pprReg reg3 ], - hcat [ text "\tmfxer\t", pprReg reg1 ], - hcat [ text "\trlwinm\t", pprReg reg1, ptext (sLit ", "), - pprReg reg1, text ", ", - text "2, 31, 31" ] +pprInstr (MULHU fmt reg1 reg2 reg3) = hcat [ + char '\t', + text "mulh", + case fmt of + II32 -> char 'w' + II64 -> char 'd' + _ -> panic "PPC: illegal format", + text "u\t", + pprReg reg1, + text ", ", + pprReg reg2, + text ", ", + pprReg reg3 ] +pprInstr (DIV fmt sgn reg1 reg2 reg3) = pprDiv fmt sgn reg1 reg2 reg3 + -- for some reason, "andi" doesn't exist. -- we'll use "andi." instead. pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [ @@ -705,6 +739,7 @@ pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [ pprImm imm ] pprInstr (AND reg1 reg2 ri) = pprLogic (sLit "and") reg1 reg2 ri +pprInstr (ANDC reg1 reg2 reg3) = pprLogic (sLit "andc") reg1 reg2 (RIReg reg3) pprInstr (OR reg1 reg2 ri) = pprLogic (sLit "or") reg1 reg2 ri pprInstr (XOR reg1 reg2 ri) = pprLogic (sLit "xor") reg1 reg2 ri @@ -740,6 +775,18 @@ pprInstr (EXTS fmt reg1 reg2) = hcat [ text ", ", pprReg reg2 ] +pprInstr (CNTLZ fmt reg1 reg2) = hcat [ + char '\t', + text "cntlz", + case fmt of + II32 -> char 'w' + II64 -> char 'd' + _ -> panic "PPC: illegal format", + char '\t', + pprReg reg1, + text ", ", + pprReg reg2 + ] pprInstr (NEG reg1 reg2) = pprUnary (sLit "neg") reg1 reg2 pprInstr (NOT reg1 reg2) = pprUnary (sLit "not") reg1 reg2 @@ -798,6 +845,16 @@ pprInstr (RLWINM reg1 reg2 sh mb me) = hcat [ int me ] +pprInstr (CLRLI fmt reg1 reg2 n) = hcat [ + text "\tclrl", + pprFormat fmt, + text "i ", + pprReg reg1, + text ", ", + pprReg reg2, + text ", ", + int n + ] pprInstr (CLRRI fmt reg1 reg2 n) = hcat [ text "\tclrr", pprFormat fmt, @@ -863,18 +920,6 @@ pprInstr (FETCHPC reg) = vcat [ hcat [ text "1:\tmflr\t", pprReg reg ] ] -pprInstr (FETCHTOC reg lab) = vcat [ - hcat [ text "0:\taddis\t", pprReg reg, - text ",12,.TOC.-0b@ha" ], - hcat [ text "\taddi\t", pprReg reg, - char ',', pprReg reg, - text ",.TOC.-0b@l" ], - hcat [ text "\t.localentry\t", - ppr lab, - text ",.-", - ppr lab] - ] - pprInstr LWSYNC = text "\tlwsync" pprInstr NOP = text "\tnop" @@ -914,6 +959,43 @@ pprLogic op reg1 reg2 ri = hcat [ ] +pprMul :: Format -> Reg -> Reg -> RI -> SDoc +pprMul fmt reg1 reg2 ri = hcat [ + char '\t', + text "mull", + case ri of + RIReg _ -> case fmt of + II32 -> char 'w' + II64 -> char 'd' + _ -> panic "PPC: illegal format" + RIImm _ -> char 'i', + char '\t', + pprReg reg1, + text ", ", + pprReg reg2, + text ", ", + pprRI ri + ] + + +pprDiv :: Format -> Bool -> Reg -> Reg -> Reg -> SDoc +pprDiv fmt sgn reg1 reg2 reg3 = hcat [ + char '\t', + text "div", + case fmt of + II32 -> char 'w' + II64 -> char 'd' + _ -> panic "PPC: illegal format", + if sgn then empty else char 'u', + char '\t', + pprReg reg1, + text ", ", + pprReg reg2, + text ", ", + pprReg reg3 + ] + + pprUnary :: LitString -> Reg -> Reg -> SDoc pprUnary op reg1 reg2 = hcat [ char '\t', |