summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Graf <sebastian.graf@kit.edu>2018-09-17 21:11:09 +0200
committerKrzysztof Gogolewski <krz.gogolewski@gmail.com>2018-09-17 21:11:10 +0200
commite655aac18c5b240f27fcaf26317ad87be5ce8b96 (patch)
treee496319636b98443874749e07791028e671ddd87
parent43967c0c7d2d0110cfc5f9d64a7dab3a3dda8953 (diff)
downloadhaskell-e655aac18c5b240f27fcaf26317ad87be5ce8b96.tar.gz
Make sure forM_ and related functions fuse cleanly
Summary: It was revealed in #8763 that it's hard to come up with a list fusion helper for `efdtIntFB` that doesn't duplicated occurrences of `c`, which is crucial in guaranteeing that it is inlined. Not inlining `c` led to spoiled join points, in turn leading to unnecessary heap allocation. This patch tackles the problem from a different angle: Fixing all consumers instead of the less often used producer `efdtIntFB` by inserting an INLINE pragma in the appropriate places. See https://ghc.haskell.org/trac/ghc/ticket/8763#comment:76 and the new Note [List fusion and continuations in 'c']. A quick run of NoFib revealed no regression or improvements whatsoever. Reviewers: hvr, bgamari, simonpj Reviewed By: simonpj Subscribers: simonpj, rwbarton, carter GHC Trac Issues: #8763 Differential Revision: https://phabricator.haskell.org/D5131
-rw-r--r--libraries/base/Data/Foldable.hs110
-rw-r--r--testsuite/tests/perf/compiler/T4007.stdout4
-rw-r--r--testsuite/tests/perf/should_run/T8763.hs41
-rw-r--r--testsuite/tests/perf/should_run/all.T7
4 files changed, 153 insertions, 9 deletions
diff --git a/libraries/base/Data/Foldable.hs b/libraries/base/Data/Foldable.hs
index 441a9be21c..f5f3112138 100644
--- a/libraries/base/Data/Foldable.hs
+++ b/libraries/base/Data/Foldable.hs
@@ -512,20 +512,27 @@ deriving instance Foldable Down
-- | Monadic fold over the elements of a structure,
-- associating to the right, i.e. from right to left.
foldrM :: (Foldable t, Monad m) => (a -> b -> m b) -> b -> t a -> m b
-foldrM f z0 xs = foldl f' return xs z0
- where f' k x z = f x z >>= k
+foldrM f z0 xs = foldl c return xs z0
+ -- See Note [List fusion and continuations in 'c']
+ where c k x z = f x z >>= k
+ {-# INLINE c #-}
-- | Monadic fold over the elements of a structure,
-- associating to the left, i.e. from left to right.
foldlM :: (Foldable t, Monad m) => (b -> a -> m b) -> b -> t a -> m b
-foldlM f z0 xs = foldr f' return xs z0
- where f' x k z = f z x >>= k
+foldlM f z0 xs = foldr c return xs z0
+ -- See Note [List fusion and continuations in 'c']
+ where c x k z = f z x >>= k
+ {-# INLINE c #-}
-- | Map each element of a structure to an action, evaluate these
-- actions from left to right, and ignore the results. For a version
-- that doesn't ignore the results see 'Data.Traversable.traverse'.
traverse_ :: (Foldable t, Applicative f) => (a -> f b) -> t a -> f ()
-traverse_ f = foldr ((*>) . f) (pure ())
+traverse_ f = foldr c (pure ())
+ -- See Note [List fusion and continuations in 'c']
+ where c x k = f x *> k
+ {-# INLINE c #-}
-- | 'for_' is 'traverse_' with its arguments flipped. For a version
-- that doesn't ignore the results see 'Data.Traversable.for'.
@@ -547,7 +554,10 @@ for_ = flip traverse_
-- As of base 4.8.0.0, 'mapM_' is just 'traverse_', specialized to
-- 'Monad'.
mapM_ :: (Foldable t, Monad m) => (a -> m b) -> t a -> m ()
-mapM_ f= foldr ((>>) . f) (return ())
+mapM_ f = foldr c (return ())
+ -- See Note [List fusion and continuations in 'c']
+ where c x k = f x >> k
+ {-# INLINE c #-}
-- | 'forM_' is 'mapM_' with its arguments flipped. For a version that
-- doesn't ignore the results see 'Data.Traversable.forM'.
@@ -561,7 +571,10 @@ forM_ = flip mapM_
-- ignore the results. For a version that doesn't ignore the results
-- see 'Data.Traversable.sequenceA'.
sequenceA_ :: (Foldable t, Applicative f) => t (f a) -> f ()
-sequenceA_ = foldr (*>) (pure ())
+sequenceA_ = foldr c (pure ())
+ -- See Note [List fusion and continuations in 'c']
+ where c m k = m *> k
+ {-# INLINE c #-}
-- | Evaluate each monadic action in the structure from left to right,
-- and ignore the results. For a version that doesn't ignore the
@@ -570,7 +583,10 @@ sequenceA_ = foldr (*>) (pure ())
-- As of base 4.8.0.0, 'sequence_' is just 'sequenceA_', specialized
-- to 'Monad'.
sequence_ :: (Foldable t, Monad m) => t (m a) -> m ()
-sequence_ = foldr (>>) (return ())
+sequence_ = foldr c (return ())
+ -- See Note [List fusion and continuations in 'c']
+ where c m k = m >> k
+ {-# INLINE c #-}
-- | The sum of a collection of actions, generalizing 'concat'.
--
@@ -650,6 +666,84 @@ find :: Foldable t => (a -> Bool) -> t a -> Maybe a
find p = getFirst . foldMap (\ x -> First (if p x then Just x else Nothing))
{-
+Note [List fusion and continuations in 'c']
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Suppose we define
+ mapM_ f = foldr ((>>) . f) (return ())
+(this is the way it used to be).
+
+Now suppose we want to optimise the call
+
+ mapM_ <big> (build g)
+ where
+ g c n = ...(c x1 y1)...(c x2 y2)....n...
+
+GHC used to proceed like this:
+
+ mapM_ <big> (build g)
+
+ = { Defintion of mapM_ }
+ foldr ((>>) . <big>) (return ()) (build g)
+
+ = { foldr/build rule }
+ g ((>>) . <big>) (return ())
+
+ = { Inline g }
+ let c = (>>) . <big>
+ n = return ()
+ in ...(c x1 y1)...(c x2 y2)....n...
+
+The trouble is that `c`, being big, will not be inlined. And that can
+be absolutely terrible for performance, as we saw in Trac #8763.
+
+It's much better to define
+
+ mapM_ f = foldr c (return ())
+ where
+ c x k = f x >> k
+ {-# INLINE c #-}
+
+Now we get
+ mapM_ <big> (build g)
+
+ = { inline mapM_ }
+ foldr c (return ()) (build g)
+ where c x k = f x >> k
+ {-# INLINE c #-}
+ f = <big>
+
+Notice that `f` does not inline into the RHS of `c`,
+because the INLINE pragma stops it; see
+Note [Simplifying inside stable unfoldings] in SimplUtils.
+Continuing:
+
+ = { foldr/build rule }
+ g c (return ())
+ where ...
+ c x k = f x >> k
+ {-# INLINE c #-}
+ f = <big>
+
+ = { inline g }
+ ...(c x1 y1)...(c x2 y2)....n...
+ where c x k = f x >> k
+ {-# INLINE c #-}
+ f = <big>
+ n = return ()
+
+ Now, crucially, `c` does inline
+
+ = { inline c }
+ ...(f x1 >> y1)...(f x2 >> y2)....n...
+ where f = <big>
+ n = return ()
+
+And all is well! The key thing is that the fragment
+`(f x1 >> y1)` is inlined into the body of the builder
+`g`.
+-}
+
+{-
Note [maximumBy/minimumBy space usage]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When the type signatures of maximumBy and minimumBy were generalized to work
diff --git a/testsuite/tests/perf/compiler/T4007.stdout b/testsuite/tests/perf/compiler/T4007.stdout
index 9b23359663..14e7bf82e3 100644
--- a/testsuite/tests/perf/compiler/T4007.stdout
+++ b/testsuite/tests/perf/compiler/T4007.stdout
@@ -1,8 +1,10 @@
Rule fired: Class op foldr (BUILTIN)
-Rule fired: Class op >> (BUILTIN)
Rule fired: Class op return (BUILTIN)
Rule fired: unpack (GHC.Base)
Rule fired: fold/build (GHC.Base)
+Rule fired: Class op >> (BUILTIN)
+Rule fired: Class op >> (BUILTIN)
+Rule fired: SPEC/T4007 sequence__c @ IO _ _ (T4007)
Rule fired: <# (BUILTIN)
Rule fired: tagToEnum# (BUILTIN)
Rule fired: unpack-list (GHC.Base)
diff --git a/testsuite/tests/perf/should_run/T8763.hs b/testsuite/tests/perf/should_run/T8763.hs
new file mode 100644
index 0000000000..90c4436ce9
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T8763.hs
@@ -0,0 +1,41 @@
+-- | The fusion helper for @enumFromThenTo \@Int@ had multiple
+-- occurences of @c@, which made the simplifier refuse to inline it.
+-- The new implementation for @efdtInt{Up,Dn}FB@ only have a single
+-- occurence of @c@ which the simplifier inlines unconditionally.
+module Main (main) where
+
+import Control.Monad (when, forM_)
+import GHC.ST
+
+nop :: Monad m => a -> m ()
+nop _ = return ()
+{-# NOINLINE nop #-}
+
+-- This is the baseline, using @enumFromTo@ which already had only a
+-- single occurence of @c@.
+f :: Int -> ST s ()
+f n =
+ do
+ forM_ [2..n] $ \p -> do
+ let isPrime = p == (p - 1)
+ when isPrime $
+ forM_ [p + p, p + p + p .. n] $ \k -> do
+ nop k
+{-# NOINLINE f #-}
+
+g :: Int -> ST s ()
+g n =
+ do
+ forM_ [2,3..n] $ \p -> do
+ -- This do block should be too big to get inlined multiple times.
+ -- Pad with @nop@s as necessary if this doesn't reproduce anymore.
+ let isPrime = p == (p - 1)
+ when isPrime $
+ forM_ [p + p, p + p + p .. n] $ \k -> do
+ nop k
+{-# NOINLINE g #-}
+
+main :: IO ()
+main = do
+ -- runST (f 40000000) `seq` return ()
+ runST (g 40000000) `seq` return ()
diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T
index 1a85e7073d..37ce0a454f 100644
--- a/testsuite/tests/perf/should_run/all.T
+++ b/testsuite/tests/perf/should_run/all.T
@@ -518,6 +518,13 @@ test('T13001',
compile_and_run,
['-O2'])
+test('T8763',
+ [stats_num_field('bytes allocated',
+ [ (wordsize(64), 41056, 20) ]),
+ only_ways(['normal'])],
+ compile_and_run,
+ ['-O2'])
+
test('T12990',
[stats_num_field('bytes allocated',
[ (wordsize(64), 20040936, 5) ]),