PPC NCG: Implement callish prim ops

Provide PowerPC optimised implementations of callish prim ops. MO_?_QuotRem The generic implementation of quotient remainder prim ops uses a division and a remainder operation. There is no remainder on PowerPC and so we need to implement remainder "by hand" which results in a duplication of the divide operation when using the generic code. Avoid this duplication by implementing the prim op in the native code generator. MO_U_Mul2 Use PowerPC's instructions for long multiplication. Addition and subtraction Use PowerPC add/subtract with carry/overflow instructions MO_Clz and MO_Ctz Use PowerPC's CNTLZ instruction and implement count trailing zeros using count leading zeros MO_QuotRem2 Implement an algorithm given by Henry Warren in "Hacker's Delight" using PowerPC divide instruction. TODO: Use long division instructions when available (POWER7 and later). Test Plan: validate on AIX and 32-bit Linux Reviewers: simonmar, erikd, hvr, austin, bgamari Reviewed By: erikd, hvr, bgamari Subscribers: trofi, kgardas, thomie Differential Revision: https://phabricator.haskell.org/D2973
author: Peter Trommler <ptrommler@acm.org> 2017-04-25 18:37:16 -0400
committer: Ben Gamari <ben@smart-cactus.org> 2017-04-25 18:39:50 -0400
commit: 89a3241f708502e8fbcfaddbbe634790ad9cd02a (patch)
tree: 9264c8bb53a229ea2a6a55debd0088bb88354bd2 /compiler/nativeGen/PPC/CodeGen.hs
parent: 9373994acaf1b73fe0e7cf8e03594c63cec8d235 (diff)
download: haskell-89a3241f708502e8fbcfaddbbe634790ad9cd02a.tar.gz
1 files changed, 400 insertions, 64 deletions
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 1f06c7bd3e..1467267842 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -359,7 +359,7 @@ iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
         r2hi = getHiVRegFromLo r2lo
         code =  code1 `appOL`
                 code2 `appOL`
-                toOL [ SUBFC rlo r2lo r1lo,
+                toOL [ SUBFC rlo r2lo (RIReg r1lo),
                        SUBFE rhi r2hi r1hi ]
    return (ChildCode64 code rlo)
 
@@ -589,42 +589,37 @@ getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
 
       MO_Add rep -> trivialCode rep True ADD x y
       MO_Sub rep ->
-        case y of    -- subfi ('substract from' with immediate) doesn't exist
+        case y of
           CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
             -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
-          _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
-
-      MO_Mul rep
-       | arch32    -> trivialCode rep True MULLW x y
-       | otherwise -> trivialCode rep True MULLD x y
-
-      MO_S_MulMayOflo W32 -> trivialCodeNoImm' II32 MULLW_MayOflo x y
-      MO_S_MulMayOflo W64 -> trivialCodeNoImm' II64 MULLD_MayOflo x y
-
-      MO_S_MulMayOflo _ -> panic "S_MulMayOflo: (II8/16) not implemented"
-      MO_U_MulMayOflo _ -> panic "U_MulMayOflo: not implemented"
+          _ -> case x of
+                 CmmLit (CmmInt imm _)
+                   | Just _ <- makeImmediate rep True imm
+                   -- subfi ('substract from' with immediate) doesn't exist
+                   -> trivialCode rep True SUBFC y x
+                 _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
+
+      MO_Mul rep -> shiftMulCode rep True MULL x y
+      MO_S_MulMayOflo rep -> do
+        (src1, code1) <- getSomeReg x
+        (src2, code2) <- getSomeReg y
+        let
+          format = intFormat rep
+          code dst = code1 `appOL` code2
+                       `appOL` toOL [ MULLO format dst src1 src2
+                                    , MFOV  format dst
+                                    ]
+        return (Any format code)
 
-      MO_S_Quot rep
-       | arch32     -> trivialCodeNoImm' (intFormat rep) DIVW
-                (extendSExpr dflags rep x) (extendSExpr dflags rep y)
-       | otherwise  -> trivialCodeNoImm' (intFormat rep) DIVD
+      MO_S_Quot rep -> trivialCodeNoImmSign (intFormat rep) True DIV
                 (extendSExpr dflags rep x) (extendSExpr dflags rep y)
-      MO_U_Quot rep
-       | arch32     -> trivialCodeNoImm' (intFormat rep) DIVWU
-                (extendUExpr dflags rep x) (extendUExpr dflags rep y)
-       | otherwise  -> trivialCodeNoImm' (intFormat rep) DIVDU
+      MO_U_Quot rep -> trivialCodeNoImmSign (intFormat rep) False DIV
                 (extendUExpr dflags rep x) (extendUExpr dflags rep y)
 
-      MO_S_Rem rep
-       | arch32    -> remainderCode rep DIVW (extendSExpr dflags rep x)
-                                             (extendSExpr dflags rep y)
-       | otherwise -> remainderCode rep DIVD (extendSExpr dflags rep x)
+      MO_S_Rem rep -> remainderCode rep True (extendSExpr dflags rep x)
                                              (extendSExpr dflags rep y)
-      MO_U_Rem rep
-       | arch32    -> remainderCode rep DIVWU (extendSExpr dflags rep x)
-                                              (extendSExpr dflags rep y)
-       | otherwise -> remainderCode rep DIVDU (extendSExpr dflags rep x)
-                                              (extendSExpr dflags rep y)
+      MO_U_Rem rep -> remainderCode rep False (extendUExpr dflags rep x)
+                                              (extendUExpr dflags rep y)
 
       MO_And rep   -> case y of
         (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4
@@ -639,17 +634,15 @@ getRegister' dflags (CmmMachOp mop [x, y]) -- dyadic PrimOps
       MO_Or rep    -> trivialCode rep False OR x y
       MO_Xor rep   -> trivialCode rep False XOR x y
 
-      MO_Shl rep   -> shiftCode rep SL x y
-      MO_S_Shr rep -> shiftCode rep SRA (extendSExpr dflags rep x) y
-      MO_U_Shr rep -> shiftCode rep SR (extendUExpr dflags rep x) y
+      MO_Shl rep   -> shiftMulCode rep False SL x y
+      MO_S_Shr rep -> shiftMulCode rep False SRA (extendSExpr dflags rep x) y
+      MO_U_Shr rep -> shiftMulCode rep False SR (extendUExpr dflags rep x) y
       _         -> panic "PPC.CodeGen.getRegister: no match"
 
   where
     triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
     triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
 
-    arch32 = target32Bit $ targetPlatform dflags
-
 getRegister' _ (CmmLit (CmmInt i rep))
   | Just imm <- makeImmediate rep True i
   = let
@@ -1090,22 +1083,370 @@ genCondJump id bool = do
 -- Now the biggest nightmare---calls.  Most of the nastiness is buried in
 -- @get_arg@, which moves the arguments to the correct registers/stack
 -- locations.  Apart from that, the code is easy.
---
--- (If applicable) Do not fill the delay slots here; you will confuse the
--- register allocator.
 
-genCCall :: ForeignTarget            -- function to call
+genCCall :: ForeignTarget      -- function to call
          -> [CmmFormal]        -- where to put the result
          -> [CmmActual]        -- arguments (of mixed type)
          -> NatM InstrBlock
+genCCall (PrimTarget MO_WriteBarrier) _ _
+ = return $ unitOL LWSYNC
+
+genCCall (PrimTarget MO_Touch) _ _
+ = return $ nilOL
+
+genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
+ = return $ nilOL
+
+genCCall (PrimTarget (MO_Clz width)) [dst] [src]
+ = do dflags <- getDynFlags
+      let platform = targetPlatform dflags
+          reg_dst = getRegisterReg platform (CmmLocal dst)
+      if target32Bit platform && width == W64
+        then do
+          ChildCode64 code vr_lo <- iselExpr64 src
+          lbl1 <- getBlockIdNat
+          lbl2 <- getBlockIdNat
+          lbl3 <- getBlockIdNat
+          let vr_hi = getHiVRegFromLo vr_lo
+              cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0))
+                           , BCC NE lbl2
+                           , BCC ALWAYS lbl1
+
+                           , NEWBLOCK lbl1
+                           , CNTLZ II32 reg_dst vr_lo
+                           , ADD reg_dst reg_dst (RIImm (ImmInt 32))
+                           , BCC ALWAYS lbl3
+
+                           , NEWBLOCK lbl2
+                           , CNTLZ II32 reg_dst vr_hi
+                           , BCC ALWAYS lbl3
+
+                           , NEWBLOCK lbl3
+                           ]
+          return $ code `appOL` cntlz
+        else do
+          let format = if width == W64 then II64 else II32
+          (s_reg, s_code) <- getSomeReg src
+          (pre, reg , post) <-
+            case width of
+              W64 -> return (nilOL, s_reg, nilOL)
+              W32 -> return (nilOL, s_reg, nilOL)
+              W16 -> do
+                reg_tmp <- getNewRegNat format
+                return
+                  ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535))
+                  , reg_tmp
+                  , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16)))
+                  )
+              W8  -> do
+                reg_tmp <- getNewRegNat format
+                return
+                  ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255))
+                  , reg_tmp
+                  , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24)))
+                  )
+              _   -> panic "genCall: Clz wrong format"
+          let cntlz = unitOL (CNTLZ format reg_dst reg)
+          return $ s_code `appOL` pre `appOL` cntlz `appOL` post
+
+genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
+ = do dflags <- getDynFlags
+      let platform = targetPlatform dflags
+          reg_dst = getRegisterReg platform (CmmLocal dst)
+      if target32Bit platform && width == W64
+        then do
+          let format = II32
+          ChildCode64 code vr_lo <- iselExpr64 src
+          lbl1 <- getBlockIdNat
+          lbl2 <- getBlockIdNat
+          lbl3 <- getBlockIdNat
+          x' <- getNewRegNat format
+          x'' <- getNewRegNat format
+          r' <- getNewRegNat format
+          cnttzlo <- cnttz format reg_dst vr_lo
+          let vr_hi = getHiVRegFromLo vr_lo
+              cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0))
+                             , BCC NE lbl2
+                             , BCC ALWAYS lbl1
+
+                             , NEWBLOCK lbl1
+                             , ADD x' vr_hi (RIImm (ImmInt (-1)))
+                             , ANDC x'' x' vr_hi
+                             , CNTLZ format r' x''
+                               -- 32 + (32 - clz(x''))
+                             , SUBFC reg_dst r' (RIImm (ImmInt 64))
+                             , BCC ALWAYS lbl3
+
+                             , NEWBLOCK lbl2
+                             ]
+                        `appOL` cnttzlo `appOL`
+                        toOL [ BCC ALWAYS lbl3
+
+                             , NEWBLOCK lbl3
+                             ]
+          return $ code `appOL` cnttz64
+        else do
+          let format = if width == W64 then II64 else II32
+          (s_reg, s_code) <- getSomeReg src
+          (reg_ctz, pre_code) <-
+            case width of
+              W64 -> return (s_reg, nilOL)
+              W32 -> return (s_reg, nilOL)
+              W16 -> do
+                reg_tmp <- getNewRegNat format
+                return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1))
+              W8  -> do
+                reg_tmp <- getNewRegNat format
+                return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256)))
+              _   -> panic "genCall: Ctz wrong format"
+          ctz_code <- cnttz format reg_dst reg_ctz
+          return $ s_code `appOL` pre_code `appOL` ctz_code
+        where
+          -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1))
+          -- see Henry S. Warren, Hacker's Delight, p 107
+          cnttz format dst src = do
+            let format_bits = 8 * formatInBytes format
+            x' <- getNewRegNat format
+            x'' <- getNewRegNat format
+            r' <- getNewRegNat format
+            return $ toOL [ ADD x' src (RIImm (ImmInt (-1)))
+                          , ANDC x'' x' src
+                          , CNTLZ format r' x''
+                          , SUBFC dst r' (RIImm (ImmInt (format_bits)))
+                          ]
+
 genCCall target dest_regs argsAndHints
  = do dflags <- getDynFlags
-      genCCall' dflags (platformToGCP (targetPlatform dflags))
-                target dest_regs argsAndHints
+      let platform = targetPlatform dflags
+      case target of
+        PrimTarget (MO_S_QuotRem  width) -> divOp1 platform True  width
+                                                   dest_regs argsAndHints
+        PrimTarget (MO_U_QuotRem  width) -> divOp1 platform False width
+                                                   dest_regs argsAndHints
+        PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs
+                                                   argsAndHints
+        PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs
+                                                argsAndHints
+        PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints
+        PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints
+        PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width
+                                                   dest_regs argsAndHints
+        PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width
+                                                   dest_regs argsAndHints
+        _ -> genCCall' dflags (platformToGCP platform)
+                       target dest_regs argsAndHints
+        where divOp1 platform signed width [res_q, res_r] [arg_x, arg_y]
+                = do let reg_q = getRegisterReg platform (CmmLocal res_q)
+                         reg_r = getRegisterReg platform (CmmLocal res_r)
+                         fmt   = intFormat width
+                     (x_reg, x_code) <- getSomeReg arg_x
+                     (y_reg, y_code) <- getSomeReg arg_y
+                     return $       y_code `appOL` x_code
+                            `appOL` toOL [ DIV fmt signed reg_q x_reg y_reg
+                                         , MULL fmt reg_r reg_q (RIReg y_reg)
+                                         , SUBF reg_r reg_r x_reg
+                                         ]
+
+              divOp1 _ _ _ _ _
+                = panic "genCCall: Wrong number of arguments for divOp1"
+              divOp2 platform width [res_q, res_r]
+                                    [arg_x_high, arg_x_low, arg_y]
+                = do let reg_q = getRegisterReg platform (CmmLocal res_q)
+                         reg_r = getRegisterReg platform (CmmLocal res_r)
+                         fmt   = intFormat width
+                         half  = 4 * (formatInBytes fmt)
+                     (xh_reg, xh_code) <- getSomeReg arg_x_high
+                     (xl_reg, xl_code) <- getSomeReg arg_x_low
+                     (y_reg, y_code) <- getSomeReg arg_y
+                     s <- getNewRegNat fmt
+                     b <- getNewRegNat fmt
+                     v <- getNewRegNat fmt
+                     vn1 <- getNewRegNat fmt
+                     vn0 <- getNewRegNat fmt
+                     un32 <- getNewRegNat fmt
+                     tmp  <- getNewRegNat fmt
+                     un10 <- getNewRegNat fmt
+                     un1 <- getNewRegNat fmt
+                     un0 <- getNewRegNat fmt
+                     q1 <- getNewRegNat fmt
+                     rhat <- getNewRegNat fmt
+                     tmp1 <- getNewRegNat fmt
+                     q0 <- getNewRegNat fmt
+                     un21 <- getNewRegNat fmt
+                     again1 <- getBlockIdNat
+                     no1 <- getBlockIdNat
+                     then1 <- getBlockIdNat
+                     endif1 <- getBlockIdNat
+                     again2 <- getBlockIdNat
+                     no2 <- getBlockIdNat
+                     then2 <- getBlockIdNat
+                     endif2 <- getBlockIdNat
+                     return $ y_code `appOL` xl_code `appOL` xh_code `appOL`
+                              -- see Hacker's Delight p 196 Figure 9-3
+                              toOL [ -- b = 2 ^ (bits_in_word / 2)
+                                     LI b (ImmInt 1)
+                                   , SL fmt b b (RIImm (ImmInt half))
+                                     -- s = clz(y)
+                                   , CNTLZ fmt s y_reg
+                                     -- v = y << s
+                                   , SL fmt v y_reg (RIReg s)
+                                     -- vn1 = upper half of v
+                                   , SR fmt vn1 v (RIImm (ImmInt half))
+                                     -- vn0 = lower half of v
+                                   , CLRLI fmt vn0 v half
+                                     -- un32 = (u1 << s)
+                                     --      | (u0 >> (bits_in_word - s))
+                                   , SL fmt un32 xh_reg (RIReg s)
+                                   , SUBFC tmp s
+                                        (RIImm (ImmInt (8 * formatInBytes fmt)))
+                                   , SR fmt tmp xl_reg (RIReg tmp)
+                                   , OR un32 un32 (RIReg tmp)
+                                     -- un10 = u0 << s
+                                   , SL fmt un10 xl_reg (RIReg s)
+                                     -- un1 = upper half of un10
+                                   , SR fmt un1 un10 (RIImm (ImmInt half))
+                                     -- un0 = lower half of un10
+                                   , CLRLI fmt un0 un10 half
+                                     -- q1 = un32/vn1
+                                   , DIV fmt False q1 un32 vn1
+                                     -- rhat = un32 - q1*vn1
+                                   , MULL fmt tmp q1 (RIReg vn1)
+                                   , SUBF rhat tmp un32
+                                   , BCC ALWAYS again1
+
+                                   , NEWBLOCK again1
+                                     -- if (q1 >= b || q1*vn0 > b*rhat + un1)
+                                   , CMPL fmt q1 (RIReg b)
+                                   , BCC GEU then1
+                                   , BCC ALWAYS no1
+
+                                   , NEWBLOCK no1
+                                   , MULL fmt tmp q1 (RIReg vn0)
+                                   , SL fmt tmp1 rhat (RIImm (ImmInt half))
+                                   , ADD tmp1 tmp1 (RIReg un1)
+                                   , CMPL fmt tmp (RIReg tmp1)
+                                   , BCC LEU endif1
+                                   , BCC ALWAYS then1
+
+                                   , NEWBLOCK then1
+                                     -- q1 = q1 - 1
+                                   , ADD q1 q1 (RIImm (ImmInt (-1)))
+                                     -- rhat = rhat + vn1
+                                   , ADD rhat rhat (RIReg vn1)
+                                     -- if (rhat < b) goto again1
+                                   , CMPL fmt rhat (RIReg b)
+                                   , BCC LTT again1
+                                   , BCC ALWAYS endif1
+
+                                   , NEWBLOCK endif1
+                                     -- un21 = un32*b + un1 - q1*v
+                                   , SL fmt un21 un32 (RIImm (ImmInt half))
+                                   , ADD un21 un21 (RIReg un1)
+                                   , MULL fmt tmp q1 (RIReg v)
+                                   , SUBF un21 tmp un21
+                                     -- compute second quotient digit
+                                     -- q0 = un21/vn1
+                                   , DIV fmt False q0 un21 vn1
+                                     -- rhat = un21- q0*vn1
+                                   , MULL fmt tmp q0 (RIReg vn1)
+                                   , SUBF rhat tmp un21
+                                   , BCC ALWAYS again2
+
+                                   , NEWBLOCK again2
+                                     -- if (q0>b || q0*vn0 > b*rhat + un0)
+                                   , CMPL fmt q0 (RIReg b)
+                                   , BCC GEU then2
+                                   , BCC ALWAYS no2
+
+                                   , NEWBLOCK no2
+                                   , MULL fmt tmp q0 (RIReg vn0)
+                                   , SL fmt tmp1 rhat (RIImm (ImmInt half))
+                                   , ADD tmp1 tmp1 (RIReg un0)
+                                   , CMPL fmt tmp (RIReg tmp1)
+                                   , BCC LEU endif2
+                                   , BCC ALWAYS then2
+
+                                   , NEWBLOCK then2
+                                     -- q0 = q0 - 1
+                                   , ADD q0 q0 (RIImm (ImmInt (-1)))
+                                     -- rhat = rhat + vn1
+                                   , ADD rhat rhat (RIReg vn1)
+                                     -- if (rhat<b) goto again2
+                                   , CMPL fmt rhat (RIReg b)
+                                   , BCC LTT again2
+                                   , BCC ALWAYS endif2
+
+                                   , NEWBLOCK endif2
+                                     -- compute remainder
+                                     -- r = (un21*b + un0 - q0*v) >> s
+                                   , SL fmt reg_r un21 (RIImm (ImmInt half))
+                                   , ADD reg_r reg_r (RIReg un0)
+                                   , MULL fmt tmp q0 (RIReg v)
+                                   , SUBF reg_r tmp reg_r
+                                   , SR fmt reg_r reg_r (RIReg s)
+                                     -- compute quotient
+                                     -- q = q1*b + q0
+                                   , SL fmt reg_q q1 (RIImm (ImmInt half))
+                                   , ADD reg_q reg_q (RIReg q0)
+                                   ]
+              divOp2 _ _ _ _
+                = panic "genCCall: Wrong number of arguments for divOp2"
+              multOp2 platform width [res_h, res_l] [arg_x, arg_y]
+                = do let reg_h = getRegisterReg platform (CmmLocal res_h)
+                         reg_l = getRegisterReg platform (CmmLocal res_l)
+                         fmt = intFormat width
+                     (x_reg, x_code) <- getSomeReg arg_x
+                     (y_reg, y_code) <- getSomeReg arg_y
+                     return $ y_code `appOL` x_code
+                            `appOL` toOL [ MULL fmt reg_l x_reg (RIReg y_reg)
+                                         , MULHU fmt reg_h x_reg y_reg
+                                         ]
+              multOp2 _ _ _ _
+                = panic "genCall: Wrong number of arguments for multOp2"
+              add2Op platform [res_h, res_l] [arg_x, arg_y]
+                = do let reg_h = getRegisterReg platform (CmmLocal res_h)
+                         reg_l = getRegisterReg platform (CmmLocal res_l)
+                     (x_reg, x_code) <- getSomeReg arg_x
+                     (y_reg, y_code) <- getSomeReg arg_y
+                     return $ y_code `appOL` x_code
+                            `appOL` toOL [ LI reg_h (ImmInt 0)
+                                         , ADDC reg_l x_reg y_reg
+                                         , ADDZE reg_h reg_h
+                                         ]
+              add2Op _ _ _
+                = panic "genCCall: Wrong number of arguments/results for add2"
+
+              -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1,
+              -- which is 0 for borrow and 1 otherwise. We need 1 and 0
+              -- so xor with 1.
+              subcOp platform [res_r, res_c] [arg_x, arg_y]
+                = do let reg_r = getRegisterReg platform (CmmLocal res_r)
+                         reg_c = getRegisterReg platform (CmmLocal res_c)
+                     (x_reg, x_code) <- getSomeReg arg_x
+                     (y_reg, y_code) <- getSomeReg arg_y
+                     return $ y_code `appOL` x_code
+                            `appOL` toOL [ LI reg_c (ImmInt 0)
+                                         , SUBFC reg_r y_reg (RIReg x_reg)
+                                         , ADDZE reg_c reg_c
+                                         , XOR reg_c reg_c (RIImm (ImmInt 1))
+                                         ]
+              subcOp _ _ _
+                = panic "genCCall: Wrong number of arguments/results for subc"
+              addSubCOp instr platform width [res_r, res_c] [arg_x, arg_y]
+                = do let reg_r = getRegisterReg platform (CmmLocal res_r)
+                         reg_c = getRegisterReg platform (CmmLocal res_c)
+                     (x_reg, x_code) <- getSomeReg arg_x
+                     (y_reg, y_code) <- getSomeReg arg_y
+                     return $ y_code `appOL` x_code
+                            `appOL` toOL [ instr reg_r y_reg x_reg,
+                                           -- SUBFO argument order reversed!
+                                           MFOV (intFormat width) reg_c
+                                         ]
+              addSubCOp _ _ _ _ _
+                = panic "genCall: Wrong number of arguments/results for addC"
 
 -- TODO: replace 'Int' by an enum such as 'PPC_64ABI'
 data GenCCallPlatform = GCPLinux | GCPDarwin | GCPLinux64ELF !Int | GCPAIX
-                      deriving Eq
 
 platformToGCP :: Platform -> GenCCallPlatform
 platformToGCP platform = case platformOS platform of
@@ -1175,15 +1516,6 @@ genCCall'
 -}
 
 
-genCCall' _ _ (PrimTarget MO_WriteBarrier) _ _
- = return $ unitOL LWSYNC
-
-genCCall' _ _ (PrimTarget MO_Touch) _ _
- = return $ nilOL
-
-genCCall' _ _ (PrimTarget (MO_Prefetch_Data _)) _ _
- = return $ nilOL
-
 genCCall' dflags gcp target dest_regs args
   = ASSERT(not $ any (`elem` [II16]) $ map cmmTypeFormat argReps)
         -- we rely on argument promotion in the codeGen
@@ -1767,21 +2099,22 @@ trivialCode rep _ instr x y = do
     let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
     return (Any (intFormat rep) code)
 
-shiftCode
+shiftMulCode
         :: Width
+        -> Bool
         -> (Format-> Reg -> Reg -> RI -> Instr)
         -> CmmExpr
         -> CmmExpr
         -> NatM Register
-shiftCode width instr x (CmmLit (CmmInt y _))
-    | Just imm <- makeImmediate width False y
+shiftMulCode width sign instr x (CmmLit (CmmInt y _))
+    | Just imm <- makeImmediate width sign y
     = do
         (src1, code1) <- getSomeReg x
         let format = intFormat width
         let code dst = code1 `snocOL` instr format dst src1 (RIImm imm)
         return (Any format code)
 
-shiftCode width instr x y = do
+shiftMulCode width _ instr x y = do
     (src1, code1) <- getSomeReg x
     (src2, code2) <- getSomeReg y
     let format = intFormat width
@@ -1800,6 +2133,12 @@ trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
                  -> CmmExpr -> CmmExpr -> NatM Register
 trivialCodeNoImm format instr x y = trivialCodeNoImm' format (instr format) x y
 
+trivialCodeNoImmSign :: Format -> Bool
+                     -> (Format -> Bool -> Reg -> Reg -> Reg -> Instr)
+                     -> CmmExpr -> CmmExpr -> NatM Register
+trivialCodeNoImmSign  format sgn instr x y
+  = trivialCodeNoImm' format (instr format sgn) x y
+
 
 trivialUCode
         :: Format
@@ -1813,19 +2152,16 @@ trivialUCode rep instr x = do
 
 -- There is no "remainder" instruction on the PPC, so we have to do
 -- it the hard way.
--- The "div" parameter is the division instruction to use (DIVW or DIVWU)
+-- The "sgn" parameter is the signedness for the division instruction
 
-remainderCode :: Width -> (Reg -> Reg -> Reg -> Instr)
-    -> CmmExpr -> CmmExpr -> NatM Register
-remainderCode rep div x y = do
-    dflags <- getDynFlags
-    let mull_instr = if target32Bit $ targetPlatform dflags then MULLW
-                                                            else MULLD
+remainderCode :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
+remainderCode rep sgn x y = do
+    let fmt = intFormat rep
     (src1, code1) <- getSomeReg x
     (src2, code2) <- getSomeReg y
     let code dst = code1 `appOL` code2 `appOL` toOL [
-                div dst src1 src2,
-                mull_instr dst dst (RIReg src2),
+                DIV fmt sgn dst src1 src2,
+                MULL fmt dst dst (RIReg src2),
                 SUBF dst dst src1
             ]
     return (Any (intFormat rep) code)
author	Peter Trommler <ptrommler@acm.org>	2017-04-25 18:37:16 -0400
committer	Ben Gamari <ben@smart-cactus.org>	2017-04-25 18:39:50 -0400
commit	89a3241f708502e8fbcfaddbbe634790ad9cd02a (patch)
tree	9264c8bb53a229ea2a6a55debd0088bb88354bd2 /compiler/nativeGen/PPC/CodeGen.hs
parent	9373994acaf1b73fe0e7cf8e03594c63cec8d235 (diff)
download	haskell-89a3241f708502e8fbcfaddbbe634790ad9cd02a.tar.gz