From f0bac29baea62e7e1cd1a2659107cc7c029c9165 Mon Sep 17 00:00:00 2001
From: Simon Peyton Jones <simonpj@microsoft.com>
Date: Fri, 19 Nov 2021 18:34:11 +0000
Subject: Make INLINE/NOINLINE pragmas a bgi less constraining

We can inline a bit earlier than the previous pragmas said.  I think
they dated from an era in which the InitialPhase did no inlining.

I don't think this patch will have much effect, but it's
a bit cleaner.
---
 libraries/base/Data/List.hs |  5 ++-
 libraries/base/GHC/Arr.hs   |  3 +-
 libraries/base/GHC/Base.hs  |  7 ++--
 libraries/base/GHC/Enum.hs  | 12 +++---
 libraries/base/GHC/Float.hs |  3 +-
 libraries/base/GHC/Real.hs  | 98 ++++++++++++++++++++++++++-------------------
 6 files changed, 75 insertions(+), 53 deletions(-)

(limited to 'libraries')

diff --git a/libraries/base/Data/List.hs b/libraries/base/Data/List.hs
index 2c6b4a7363..e7ce57aa75 100644
--- a/libraries/base/Data/List.hs
+++ b/libraries/base/Data/List.hs
@@ -762,7 +762,10 @@ minimumBy cmp = fromMaybe (errorWithoutStackTrace "minimumBy: empty structure")
 -- >>> genericLength [1..200] :: Int8
 -- -56
 genericLength           :: (Num i) => [a] -> i
-{-# NOINLINE [1] genericLength #-}
+{-# NOINLINE [2] genericLength #-}
+    -- Give time for the RULEs for (++) to fire in InitialPhase
+    -- It's recursive, so won't inline anyway,
+    -- but saying so is more explicit
 genericLength []        =  0
 genericLength (_:l)     =  1 + genericLength l
 
diff --git a/libraries/base/GHC/Arr.hs b/libraries/base/GHC/Arr.hs
index 06951d3851..63d79237c5 100644
--- a/libraries/base/GHC/Arr.hs
+++ b/libraries/base/GHC/Arr.hs
@@ -497,7 +497,8 @@ eqArray arr1@(Array l1 u1 n1 _) arr2@(Array l2 u2 n2 _) =
     l1 == l2 && u1 == u2 &&
     and [unsafeAt arr1 i == unsafeAt arr2 i | i <- [0 .. n1 - 1]]
 
-{-# INLINE [1] cmpArray #-}
+{-# INLINE [2] cmpArray #-}
+-- See Note [Allow time for type-specialisation rules to fire] in GHC.Real
 cmpArray :: (Ix i, Ord e) => Array i e -> Array i e -> Ordering
 cmpArray arr1 arr2 = compare (assocs arr1) (assocs arr2)
 
diff --git a/libraries/base/GHC/Base.hs b/libraries/base/GHC/Base.hs
index 14d4f1ab51..356b692898 100644
--- a/libraries/base/GHC/Base.hs
+++ b/libraries/base/GHC/Base.hs
@@ -1384,9 +1384,10 @@ The rules for map work like this.
 -- first list.
 
 (++) :: [a] -> [a] -> [a]
-{-# NOINLINE [1] (++) #-}    -- We want the RULE to fire first.
-                             -- It's recursive, so won't inline anyway,
-                             -- but saying so is more explicit
+{-# NOINLINE [2] (++) #-}
+  -- Give time for the RULEs for (++) to fire in InitialPhase
+  -- It's recursive, so won't inline anyway,
+  -- but saying so is more explicit
 (++) []     ys = ys
 (++) (x:xs) ys = x : xs ++ ys
 
diff --git a/libraries/base/GHC/Enum.hs b/libraries/base/GHC/Enum.hs
index 4292592f44..5c184256f4 100644
--- a/libraries/base/GHC/Enum.hs
+++ b/libraries/base/GHC/Enum.hs
@@ -422,7 +422,7 @@ eftCharFB c n x0 y = go x0
                     go x | isTrue# (x ># y) = n
                          | otherwise        = C# (chr# x) `c` go (x +# 1#)
 
-{-# NOINLINE [1] eftChar #-}
+{-# NOINLINE [1] eftChar #-} -- Inline after rule "eftChar" is inactive
 eftChar :: Int# -> Int# -> String
 eftChar x y | isTrue# (x ># y ) = []
             | otherwise         = C# (chr# x) : eftChar (x +# 1#) y
@@ -437,7 +437,7 @@ efdCharFB c n x1 x2
   where
     !delta = x2 -# x1
 
-{-# NOINLINE [1] efdChar #-}
+{-# NOINLINE [1] efdChar #-} -- Inline after rule "efdChar" is inactive
 efdChar :: Int# -> Int# -> String
 efdChar x1 x2
   | isTrue# (delta >=# 0#) = go_up_char_list x1 delta 0x10FFFF#
@@ -453,7 +453,7 @@ efdtCharFB c n x1 x2 lim
   where
     !delta = x2 -# x1
 
-{-# NOINLINE [1] efdtChar #-}
+{-# NOINLINE [1] efdtChar #-} -- Inline after rule "efdtChar" is inactive
 efdtChar :: Int# -> Int# -> Int# -> String
 efdtChar x1 x2 lim
   | isTrue# (delta >=# 0#) = go_up_char_list x1 delta lim
@@ -910,7 +910,7 @@ enumDeltaIntegerFB :: (Integer -> b -> b) -> Integer -> Integer -> b
 enumDeltaIntegerFB c x0 d = go x0
   where go x = x `seq` (x `c` go (x+d))
 
-{-# NOINLINE [1] enumDeltaInteger #-}
+{-# NOINLINE [1] enumDeltaInteger #-} -- Inline after rule "enumDeltaInteger" is inactive
 enumDeltaInteger :: Integer -> Integer -> [Integer]
 enumDeltaInteger x d = x `seq` (x : enumDeltaInteger (x+d) d)
 -- strict accumulator, so
@@ -936,13 +936,13 @@ enumDeltaToInteger1FB c n x0 lim = go (x0 :: Integer)
                         go x | x > lim   = n
                              | otherwise = x `c` go (x+1)
 
-{-# NOINLINE [1] enumDeltaToInteger #-}
+{-# NOINLINE [1] enumDeltaToInteger #-} -- Inline after rule "efdtInteger" is inactive
 enumDeltaToInteger :: Integer -> Integer -> Integer -> [Integer]
 enumDeltaToInteger x delta lim
   | delta >= 0 = up_list x delta lim
   | otherwise  = dn_list x delta lim
 
-{-# NOINLINE [1] enumDeltaToInteger1 #-}
+{-# NOINLINE [1] enumDeltaToInteger1 #-} -- Inline after rule "efdtInteger1" is inactive
 enumDeltaToInteger1 :: Integer -> Integer -> [Integer]
 -- Special case for Delta = 1
 enumDeltaToInteger1 x0 lim = go (x0 :: Integer)
diff --git a/libraries/base/GHC/Float.hs b/libraries/base/GHC/Float.hs
index 6ec4e84ceb..f614410a6b 100644
--- a/libraries/base/GHC/Float.hs
+++ b/libraries/base/GHC/Float.hs
@@ -1125,7 +1125,8 @@ Now, here's Lennart's code (which works):
 "fromRat/Double"    fromRat = (fromRational :: Rational -> Double)
   #-}
 
-{-# NOINLINE [1] fromRat #-}
+{-# NOINLINE [2] fromRat #-}
+-- See Note [Allow time for type-specialisation rules to fire] in GHC.Real
 fromRat :: (RealFloat a) => Rational -> a
 
 -- Deal with special cases first, delegating the real work to fromRat'
diff --git a/libraries/base/GHC/Real.hs b/libraries/base/GHC/Real.hs
index a4d97b0497..d970a3e1ed 100644
--- a/libraries/base/GHC/Real.hs
+++ b/libraries/base/GHC/Real.hs
@@ -40,6 +40,17 @@ infixl 7  %
 default ()              -- Double isn't available yet,
                         -- and we shouldn't be using defaults anyway
 
+{- Note [Allow time for type-specialisation rules to fire]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+  lcm = ...
+  {-# RULES "lcm/Integer->Integer->Integer" lcm = integerLcm  #-}
+
+We want to delay inlining `lcm` until the rule (which is a form of manual
+type specialisation) has had a chance to fire.  It can fire in InitialPhase,
+so INLINE[2] seems sufficient.  c.f. #20709
+-}
+
 ------------------------------------------------------------------------
 -- Divide by zero and arithmetic overflow
 ------------------------------------------------------------------------
@@ -617,6 +628,8 @@ fromIntegral = fromInteger . toInteger
 -- > NaN
 realToFrac :: (Real a, Fractional b) => a -> b
 {-# NOINLINE [1] realToFrac #-}
+-- See Note [Allow time for type-specialisation rules to fire]
+-- These rule actually appear in other modules, e.g. GHC.Float
 realToFrac = fromRational . toRational
 
 --------------------------------------------------------------
@@ -665,47 +678,48 @@ x0 ^ y0 | y0 < 0    = errorWithoutStackTrace "Negative exponent"
 x ^^ n          =  if n >= 0 then x^n else recip (x^(negate n))
 
 {- Note [Half of y - 1]
-   ~~~~~~~~~~~~~~~~~~~~~
-   Since y is guaranteed to be odd and positive here,
-   half of y - 1 can be computed as y `quot` 2, optimising subtraction away.
--}
-
-{- Note [Inlining (^)
-   ~~~~~~~~~~~~~~~~~~~~~
-   The INLINABLE pragma allows (^) to be specialised at its call sites.
-   If it is called repeatedly at the same type, that can make a huge
-   difference, because of those constants which can be repeatedly
-   calculated.
-
-   Currently the fromInteger calls are not floated because we get
-             \d1 d2 x y -> blah
-   after the gentle round of simplification. -}
-
-{- Rules for powers with known small exponent
-    see #5237
-    For small exponents, (^) is inefficient compared to manually
-    expanding the multiplication tree.
-    Here, rules for the most common exponent types are given.
-    The range of exponents for which rules are given is quite
-    arbitrary and kept small to not unduly increase the number of rules.
-    0 and 1 are excluded based on the assumption that nobody would
-    write x^0 or x^1 in code and the cases where an exponent could
-    be statically resolved to 0 or 1 are rare.
-
-    It might be desirable to have corresponding rules also for
-    exponents of other types (e. g., Word), but it's doubtful they
-    would fire, since the exponents of other types tend to get
-    floated out before the rule has a chance to fire.
-
-    Also desirable would be rules for (^^), but I haven't managed
-    to get those to fire.
-
-    Note: Trying to save multiplications by sharing the square for
-    exponents 4 and 5 does not save time, indeed, for Double, it is
-    up to twice slower, so the rules contain flat sequences of
-    multiplications.
+~~~~~~~~~~~~~~~~~~~~~~~~
+Since y is guaranteed to be odd and positive here,
+half of y - 1 can be computed as y `quot` 2, optimising subtraction away.
+
+Note [Inlining (^)
+~~~~~~~~~~~~~~~~~~
+The INLINABLE pragma allows (^) to be specialised at its call sites.
+If it is called repeatedly at the same type, that can make a huge
+difference, because of those constants which can be repeatedly
+calculated.
+
+Currently the fromInteger calls are not floated because we get
+          \d1 d2 x y -> blah
+after the gentle round of simplification.
+
+Note [Powers with small exponent]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For small exponents, (^) is inefficient compared to manually
+expanding the multiplication tree (see #5237).
+
+Here, rules for the most common exponent types are given.
+The range of exponents for which rules are given is quite
+arbitrary and kept small to not unduly increase the number of rules.
+0 and 1 are excluded based on the assumption that nobody would
+write x^0 or x^1 in code and the cases where an exponent could
+be statically resolved to 0 or 1 are rare.
+
+It might be desirable to have corresponding rules also for
+exponents of other types (e. g., Word), but it's doubtful they
+would fire, since the exponents of other types tend to get
+floated out before the rule has a chance to fire.
+
+Also desirable would be rules for (^^), but I haven't managed
+to get those to fire.
+
+Note: Trying to save multiplications by sharing the square for
+exponents 4 and 5 does not save time, indeed, for Double, it is
+up to twice slower, so the rules contain flat sequences of
+multiplications.
 -}
 
+-- See Note [Powers with small exponent]
 {-# RULES
 "^2/Int"        forall x. x ^ (2 :: Int) = let u = x in u*u
 "^3/Int"        forall x. x ^ (3 :: Int) = let u = x in u*u*u
@@ -787,7 +801,9 @@ x ^^ n          =  if n >= 0 then x^n else recip (x^(negate n))
 -- the result may be negative if one of the arguments is @'minBound'@ (and
 -- necessarily is if the other is @0@ or @'minBound'@) for such types.
 gcd             :: (Integral a) => a -> a -> a
-{-# NOINLINE [1] gcd #-}
+{-# SPECIALISE gcd :: Int -> Int -> Int #-}
+{-# SPECIALISE gcd :: Word -> Word -> Word #-}
+{-# NOINLINE [2] gcd #-} -- See Note [Allow time for type-specialisation rules to fire]
 gcd x y         =  gcd' (abs x) (abs y)
                    where gcd' a 0  =  a
                          gcd' a b  =  gcd' b (a `rem` b)
@@ -796,7 +812,7 @@ gcd x y         =  gcd' (abs x) (abs y)
 lcm             :: (Integral a) => a -> a -> a
 {-# SPECIALISE lcm :: Int -> Int -> Int #-}
 {-# SPECIALISE lcm :: Word -> Word -> Word #-}
-{-# NOINLINE [1] lcm #-}
+{-# NOINLINE [2] lcm #-} -- See Note [Allow time for type-specialisation rules to fire]
 lcm _ 0         =  0
 lcm 0 _         =  0
 lcm x y         =  abs ((x `quot` (gcd x y)) * y)
-- 
cgit v1.2.1