1 files changed, 42 insertions, 38 deletions
diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs
index 3cb28217f2..e837d29783 100644
--- a/compiler/cmm/CmmOpt.hs
+++ b/compiler/cmm/CmmOpt.hs
@@ -1,10 +1,6 @@
-{-# LANGUAGE CPP #-}
-
 -- The default iteration limit is a bit too low for the definitions
 -- in this module.
-#if __GLASGOW_HASKELL__ >= 800
 {-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}
-#endif
 
 -----------------------------------------------------------------------------
 --
@@ -21,7 +17,7 @@ module CmmOpt (
         cmmMachOpFoldM
  ) where
 
-#include "HsVersions.h"
+import GhcPrelude
 
 import CmmUtils
 import Cmm
@@ -357,35 +353,51 @@ cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))]
         MO_U_Quot rep
            | Just p <- exactLog2 n ->
                  Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)])
+        MO_U_Rem rep
+           | Just _ <- exactLog2 n ->
+                 Just (cmmMachOpFold dflags (MO_And rep) [x, CmmLit (CmmInt (n - 1) rep)])
         MO_S_Quot rep
            | Just p <- exactLog2 n,
-             CmmReg _ <- x ->   -- We duplicate x below, hence require
+             CmmReg _ <- x ->   -- We duplicate x in signedQuotRemHelper, hence require
+                                -- it is a reg.  FIXME: remove this restriction.
+                Just (cmmMachOpFold dflags (MO_S_Shr rep)
+                  [signedQuotRemHelper rep p, CmmLit (CmmInt p rep)])
+        MO_S_Rem rep
+           | Just p <- exactLog2 n,
+             CmmReg _ <- x ->   -- We duplicate x in signedQuotRemHelper, hence require
                                 -- it is a reg.  FIXME: remove this restriction.
-                -- shift right is not the same as quot, because it rounds
-                -- to minus infinity, whereasq quot rounds toward zero.
-                -- To fix this up, we add one less than the divisor to the
-                -- dividend if it is a negative number.
-                --
-                -- to avoid a test/jump, we use the following sequence:
-                --      x1 = x >> word_size-1  (all 1s if -ve, all 0s if +ve)
-                --      x2 = y & (divisor-1)
-                --      result = (x+x2) >>= log2(divisor)
-                -- this could be done a bit more simply using conditional moves,
-                -- but we're processor independent here.
-                --
-                -- we optimise the divide by 2 case slightly, generating
-                --      x1 = x >> word_size-1  (unsigned)
-                --      return = (x + x1) >>= log2(divisor)
-                let
-                    bits = fromIntegral (widthInBits rep) - 1
-                    shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
-                    x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
-                    x2 = if p == 1 then x1 else
-                         CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
-                    x3 = CmmMachOp (MO_Add rep) [x, x2]
-                in
-                Just (cmmMachOpFold dflags (MO_S_Shr rep) [x3, CmmLit (CmmInt p rep)])
+                -- We replace (x `rem` 2^p) by (x - (x `quot` 2^p) * 2^p).
+                -- Moreover, we fuse MO_S_Shr (last operation of MO_S_Quot)
+                -- and MO_S_Shl (multiplication by 2^p) into a single MO_And operation.
+                Just (cmmMachOpFold dflags (MO_Sub rep)
+                    [x, cmmMachOpFold dflags (MO_And rep)
+                      [signedQuotRemHelper rep p, CmmLit (CmmInt (- n) rep)]])
         _ -> Nothing
+  where
+    -- In contrast with unsigned integers, for signed ones
+    -- shift right is not the same as quot, because it rounds
+    -- to minus infinity, whereas quot rounds toward zero.
+    -- To fix this up, we add one less than the divisor to the
+    -- dividend if it is a negative number.
+    --
+    -- to avoid a test/jump, we use the following sequence:
+    --      x1 = x >> word_size-1  (all 1s if -ve, all 0s if +ve)
+    --      x2 = y & (divisor-1)
+    --      result = x + x2
+    -- this could be done a bit more simply using conditional moves,
+    -- but we're processor independent here.
+    --
+    -- we optimise the divide by 2 case slightly, generating
+    --      x1 = x >> word_size-1  (unsigned)
+    --      return = x + x1
+    signedQuotRemHelper :: Width -> Integer -> CmmExpr
+    signedQuotRemHelper rep p = CmmMachOp (MO_Add rep) [x, x2]
+      where
+        bits = fromIntegral (widthInBits rep) - 1
+        shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep
+        x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)]
+        x2 = if p == 1 then x1 else
+             CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
 
 -- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x
 -- Unfortunately this needs a unique supply because x might not be a
@@ -410,14 +422,6 @@ That's what the constant-folding operations on comparison operators do above.
 -- -----------------------------------------------------------------------------
 -- Utils
 
-isLit :: CmmExpr -> Bool
-isLit (CmmLit _) = True
-isLit _          = False
-
-isComparisonExpr :: CmmExpr -> Bool
-isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op
-isComparisonExpr _                  = False
-
 isPicReg :: CmmExpr -> Bool
 isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True
 isPicReg _ = False