Make `drop` and `dropWhile` fuse (#18964)wip/T18964

I copied the fusion framework we have in place for `take`. T18964 asserts that we regress neither when fusion fires nor when it doesn't. Fixes #18964.
author: Sebastian Graf <sebastian.graf@kit.edu> 2022-12-02 16:46:45 +0100
committer: Sebastian Graf <sebastian.graf@kit.edu> 2022-12-06 16:23:53 +0100
commit: 4bb308efc01bf290269d776c74deb4ede7b24902 (patch)
tree: b7b00e163463a26cfb5d088eb9d877f75b215ed2
parent: a9d9b8c0458e838f331ead62dca272665ecbf20d (diff)
download: haskell-wip/T18964.tar.gz
4 files changed, 65 insertions, 25 deletions
diff --git a/libraries/base/GHC/List.hs b/libraries/base/GHC/List.hs
index b96c16cfab..658dabe302 100644
--- a/libraries/base/GHC/List.hs
+++ b/libraries/base/GHC/List.hs
@@ -517,9 +517,9 @@ scanl' = scanlGo'
 -- See Note [scanl rewrite rules]
 {-# RULES
 "scanl'"  [~1] forall f a bs . scanl' f a bs =
-  build (\c n -> a `c` foldr (scanlFB' f c) (flipSeqScanl' n) bs a)
+  build (\c n -> a `c` foldr (scanlFB' f c) (flipSeq n) bs a)
 "scanlList'" [1] forall f a bs .
-    foldr (scanlFB' f (:)) (flipSeqScanl' []) bs a = tail (scanl' f a bs)
+    foldr (scanlFB' f (:)) (flipSeq []) bs a = tail (scanl' f a bs)
  #-}
 
 {-# INLINE [0] scanlFB' #-} -- See Note [Inline FB functions]
@@ -527,10 +527,6 @@ scanlFB' :: (b -> a -> b) -> (b -> c -> c) -> a -> (b -> c) -> b -> c
 scanlFB' f c = \b g -> oneShot (\x -> let !b' = f x b in b' `c` g b')
   -- See Note [Left folds via right fold]
 
-{-# INLINE [0] flipSeqScanl' #-}
-flipSeqScanl' :: a -> b -> a
-flipSeqScanl' a !_b = a
-
 {-
 Note [scanl rewrite rules]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -890,12 +886,23 @@ takeWhileFB p c n = \x r -> if p x then x `c` r else n
 -- []
 -- >>> dropWhile (< 0) [1,2,3]
 -- [1,2,3]
+{-# NOINLINE [1] dropWhile #-}
 dropWhile               :: (a -> Bool) -> [a] -> [a]
 dropWhile _ []          =  []
 dropWhile p xs@(x:xs')
             | p x       =  dropWhile p xs'
             | otherwise =  xs
 
+{-# INLINE [0] dropWhileFB #-} -- See Note [Inline FB functions]
+dropWhileFB :: (a -> Bool) -> (a -> b -> b) -> b -> a -> (Bool -> b) -> Bool -> b
+dropWhileFB p c _n x xs = \drp -> if drp && p x then xs True else x `c` xs False
+
+{-# RULES
+"dropWhile"     [~1] forall p xs. dropWhile p xs =
+                                build (\c n -> foldr (dropWhileFB p c n) (flipSeq n) xs True)
+"dropWhileList" [1]  forall p xs. foldr (dropWhileFB p (:) []) (flipSeq []) xs True = dropWhile p xs
+ #-}
+
 -- | 'take' @n@, applied to a list @xs@, returns the prefix of @xs@
 -- of length @n@, or @xs@ itself if @n >= 'length' xs@.
 --
@@ -932,7 +939,7 @@ take n xs | 0 < n     = unsafeTake n xs
 
 -- A version of take that takes the whole list if it's given an argument less
 -- than 1.
-{-# NOINLINE [1] unsafeTake #-}
+{-# NOINLINE [0] unsafeTake #-} -- See Note [Inline FB functions]
 unsafeTake :: Int -> [a] -> [a]
 unsafeTake !_  []     = []
 unsafeTake 1   (x: _) = [x]
@@ -941,20 +948,18 @@ unsafeTake m   (x:xs) = x : unsafeTake (m - 1) xs
 {-# RULES
 "take"     [~1] forall n xs . take n xs =
   build (\c nil -> if 0 < n
-                   then foldr (takeFB c nil) (flipSeqTake nil) xs n
+                   then foldr (takeFB c nil) (flipSeq nil) xs n
                    else nil)
-"unsafeTakeList"  [1] forall n xs . foldr (takeFB (:) []) (flipSeqTake []) xs n
+"unsafeTakeList"  [1] forall n xs . foldr (takeFB (:) []) (flipSeq []) xs n
                                         = unsafeTake n xs
  #-}
 
-{-# INLINE [0] flipSeqTake #-}
--- Just flip seq, specialized to Int, but not inlined too early.
--- It's important to force the numeric argument here, even though
--- it's not used. Otherwise, take n [] doesn't force n. This is
--- bad for strictness analysis and unboxing, and leads to increased
--- allocation in T7257.
-flipSeqTake :: a -> Int -> a
-flipSeqTake x !_n = x
+{-# INLINE [0] flipSeq #-}
+-- Just flip seq, but not inlined too early.
+-- It's important to force the argument here, even though it's not used.
+-- Otherwise, take n [] can't unbox n, leading to increased allocation in T7257.
+flipSeq :: a -> b -> a
+flipSeq x !_n = x
 
 {-# INLINE [0] takeFB #-} -- See Note [Inline FB functions]
 takeFB :: (a -> b -> b) -> b -> a -> (Int -> b) -> Int -> b
@@ -993,17 +998,31 @@ drop n xs     | n <= 0 =  xs
 drop _ []              =  []
 drop n (_:xs)          =  drop (n-1) xs
 #else /* hack away */
-{-# INLINE drop #-}
+{-# INLINE[1] drop #-} -- Why [1]? See justification on take! => RULES
 drop n ls
   | n <= 0     = ls
   | otherwise  = unsafeDrop n ls
-  where
-    -- A version of drop that drops the whole list if given an argument
-    -- less than 1
-    unsafeDrop :: Int -> [a] -> [a]
-    unsafeDrop !_ []     = []
-    unsafeDrop 1  (_:xs) = xs
-    unsafeDrop m  (_:xs) = unsafeDrop (m - 1) xs
+
+-- A version of drop that drops the whole list if given an argument
+-- less than 1
+{-# NOINLINE [0] unsafeDrop #-} -- See Note [Inline FB functions]
+unsafeDrop :: Int -> [a] -> [a]
+unsafeDrop !_ []     = []
+unsafeDrop 1  (_:xs) = xs
+unsafeDrop m  (_:xs) = unsafeDrop (m - 1) xs
+
+{-# RULES
+"drop"     [~1] forall n xs . drop n xs =
+  build (\c nil -> if n <= 0
+                   then foldr c nil xs
+                   else foldr (dropFB c nil) (flipSeq nil) xs n)
+"unsafeDropList"  [1] forall n xs . foldr (dropFB (:) []) (flipSeq []) xs n
+                                        = unsafeDrop n xs
+ #-}
+
+{-# INLINE [0] dropFB #-} -- See Note [Inline FB functions]
+dropFB :: (a -> b -> b) -> b -> a -> (Int -> b) -> Int -> b
+dropFB c _n x xs = \ m -> if m <= 0 then x `c` xs m else xs (m-1)
 #endif
 
 -- | 'splitAt' @n xs@ returns a tuple where first element is @xs@ prefix of
diff --git a/testsuite/tests/perf/should_run/T18964.hs b/testsuite/tests/perf/should_run/T18964.hs
new file mode 100644
index 0000000000..e9c7b915e7
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T18964.hs
@@ -0,0 +1,15 @@
+import GHC.Exts
+import Data.Int
+
+main :: IO ()
+main = do
+  -- drop should fuse away and the program should consume O(1) space
+  -- If fusion fails, this allocates about 640MB.
+  print $ sum $ drop 10 [0..10000000::Int64]
+  -- Here, drop can't fuse. This asserts that we don't regress in allocations in that case either
+  -- If we don't do a good job here, we'll see more than 6.4MB of allocs.
+  print $ lazy $ sum $ lazy $ drop 10 $ lazy [0..100000::Int64]
+
+  -- and once more with dropWhile
+  print $ sum $ dropWhile (< 10) [0..10000000::Int64]
+  print $ lazy $ sum $ lazy $ dropWhile (< 10) $ lazy [0..100000::Int64]
diff --git a/testsuite/tests/perf/should_run/T18964.stdout b/testsuite/tests/perf/should_run/T18964.stdout
new file mode 100644
index 0000000000..b1c0d38f19
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T18964.stdout
@@ -0,0 +1,4 @@
+50000004999955
+5000049955
+50000004999955
+5000049955
diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T
index 59ad878a4c..ba75906c7d 100644
--- a/testsuite/tests/perf/should_run/all.T
+++ b/testsuite/tests/perf/should_run/all.T
@@ -408,3 +408,5 @@ test('T21839r',
         only_ways(['normal'])],
     compile_and_run,
     ['-O'])
+
+test('T18964', [collect_stats('bytes allocated', 1), only_ways(['normal'])], compile_and_run, ['-O'])
author	Sebastian Graf <sebastian.graf@kit.edu>	2022-12-02 16:46:45 +0100
committer	Sebastian Graf <sebastian.graf@kit.edu>	2022-12-06 16:23:53 +0100
commit	4bb308efc01bf290269d776c74deb4ede7b24902 (patch)
tree	b7b00e163463a26cfb5d088eb9d877f75b215ed2
parent	a9d9b8c0458e838f331ead62dca272665ecbf20d (diff)
download	haskell-wip/T18964.tar.gz