summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTakano Akio <tak@anoak.io>2017-01-10 14:36:00 -0500
committerBen Gamari <ben@smart-cactus.org>2017-01-10 14:36:38 -0500
commit09bce7accd330e99b1667f8b4eda7def722d6f0c (patch)
treecfc8fc62ab052aa70c8c3750eb3452ce723da0d0
parent266a9dc4cd34008f1162eb276032c85ef8371842 (diff)
downloadhaskell-09bce7accd330e99b1667f8b4eda7def722d6f0c.tar.gz
Mark *FB functions INLINE[0] (Fixes #13001)
When fusion rules successfully fire, we are left with calls to *FB functions. They are higher-order functions, and therefore they often benefit from inlining. This is particularly important when then final consumer is a strict fold (foldl', length, etc.), because not inlining these functions means allocating a function closure for each element in the list, which often is more costly than what fusion eliminates. Nofib shows a slight increase in the binary size: ------------------------------------------------------------------------ Program Size Allocs Runtime Elapsed TotalMem ------------------------------------------------------------------------ gen_regexps -0.3% 0.0% 0.000 0.000 0.0% puzzle +0.8% 0.0% 0.089 0.090 0.0% reptile +0.8% -0.0% 0.008 0.008 0.0% ------------------------------------------------------------------------ Min -0.3% -0.0% -7.3% -7.1% 0.0% Max +0.8% +0.0% +7.8% +7.7% +1.8% Geometric Mean +0.0% -0.0% +0.2% +0.2% +0.0% ------------------------------------------------------------------------ Reviewers: simonpj, austin, hvr, bgamari Reviewed By: simonpj Subscribers: simonpj, thomie Differential Revision: https://phabricator.haskell.org/D2951 GHC Trac Issues: #13001
-rw-r--r--libraries/base/Data/Maybe.hs2
-rw-r--r--libraries/base/Data/OldList.hs2
-rw-r--r--libraries/base/GHC/Base.hs2
-rw-r--r--libraries/base/GHC/Enum.hs27
-rwxr-xr-xlibraries/base/GHC/Exts.hs1
-rw-r--r--libraries/base/GHC/List.hs42
-rw-r--r--testsuite/tests/perf/should_run/T13001.hs7
-rw-r--r--testsuite/tests/perf/should_run/T13001.stdout1
-rw-r--r--testsuite/tests/perf/should_run/all.T8
9 files changed, 69 insertions, 23 deletions
diff --git a/libraries/base/Data/Maybe.hs b/libraries/base/Data/Maybe.hs
index e81cdf7a4c..d8aad53b9e 100644
--- a/libraries/base/Data/Maybe.hs
+++ b/libraries/base/Data/Maybe.hs
@@ -293,7 +293,7 @@ mapMaybe f (x:xs) =
"mapMaybeList" [1] forall f. foldr (mapMaybeFB (:) f) [] = mapMaybe f
#-}
-{-# NOINLINE [0] mapMaybeFB #-}
+{-# INLINE [0] mapMaybeFB #-} -- See Note [Inline FB functions] in GHC.List
mapMaybeFB :: (b -> r -> r) -> (a -> Maybe b) -> a -> r -> r
mapMaybeFB cons f x next = case f x of
Nothing -> next
diff --git a/libraries/base/Data/OldList.hs b/libraries/base/Data/OldList.hs
index 1846182c95..c3618c42a9 100644
--- a/libraries/base/Data/OldList.hs
+++ b/libraries/base/Data/OldList.hs
@@ -1098,7 +1098,7 @@ words s = case dropWhile {-partain:Char.-}isSpace s of
"wordsList" [1] wordsFB (:) [] = words
#-}
wordsFB :: ([Char] -> b -> b) -> b -> String -> b
-{-# NOINLINE [0] wordsFB #-}
+{-# INLINE [0] wordsFB #-} -- See Note [Inline FB functions] in GHC.List
wordsFB c n = go
where
go s = case dropWhile isSpace s of
diff --git a/libraries/base/GHC/Base.hs b/libraries/base/GHC/Base.hs
index 490596e869..25c78b2f22 100644
--- a/libraries/base/GHC/Base.hs
+++ b/libraries/base/GHC/Base.hs
@@ -902,7 +902,7 @@ map f (x:xs) = f x : map f xs
-- Note eta expanded
mapFB :: (elt -> lst -> lst) -> (a -> elt) -> a -> lst -> lst
-{-# INLINE [0] mapFB #-}
+{-# INLINE [0] mapFB #-} -- See Note [Inline FB functions] in GHC.List
mapFB c f = \x ys -> c (f x) ys
-- The rules for map work like this.
diff --git a/libraries/base/GHC/Enum.hs b/libraries/base/GHC/Enum.hs
index a8b6600c33..50ca4a0b2b 100644
--- a/libraries/base/GHC/Enum.hs
+++ b/libraries/base/GHC/Enum.hs
@@ -331,7 +331,7 @@ instance Enum Char where
-- We can do better than for Ints because we don't
-- have hassles about arithmetic overflow at maxBound
-{-# INLINE [0] eftCharFB #-}
+{-# INLINE [0] eftCharFB #-} -- See Note [Inline FB functions] in GHC.List
eftCharFB :: (Char -> a -> a) -> a -> Int# -> Int# -> a
eftCharFB c n x0 y = go x0
where
@@ -345,7 +345,7 @@ eftChar x y | isTrue# (x ># y ) = []
-- For enumFromThenTo we give up on inlining
-{-# NOINLINE [0] efdCharFB #-}
+{-# INLINE [0] efdCharFB #-} -- See Note [Inline FB functions] in GHC.List
efdCharFB :: (Char -> a -> a) -> a -> Int# -> Int# -> a
efdCharFB c n x1 x2
| isTrue# (delta >=# 0#) = go_up_char_fb c n x1 delta 0x10FFFF#
@@ -361,7 +361,7 @@ efdChar x1 x2
where
!delta = x2 -# x1
-{-# NOINLINE [0] efdtCharFB #-}
+{-# INLINE [0] efdtCharFB #-} -- See Note [Inline FB functions] in GHC.List
efdtCharFB :: (Char -> a -> a) -> a -> Int# -> Int# -> Int# -> a
efdtCharFB c n x1 x2 lim
| isTrue# (delta >=# 0#) = go_up_char_fb c n x1 delta lim
@@ -476,7 +476,7 @@ eftInt x0 y | isTrue# (x0 ># y) = []
then []
else go (x +# 1#)
-{-# INLINE [0] eftIntFB #-}
+{-# INLINE [0] eftIntFB #-} -- See Note [Inline FB functions] in GHC.List
eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
eftIntFB c n x0 y | isTrue# (x0 ># y) = n
| otherwise = go x0
@@ -514,7 +514,7 @@ efdtInt x1 x2 y
| isTrue# (x2 >=# x1) = efdtIntUp x1 x2 y
| otherwise = efdtIntDn x1 x2 y
-{-# INLINE [0] efdtIntFB #-}
+{-# INLINE [0] efdtIntFB #-} -- See Note [Inline FB functions] in GHC.List
efdtIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> Int# -> r
efdtIntFB c n x1 x2 y
| isTrue# (x2 >=# x1) = efdtIntUpFB c n x1 x2 y
@@ -536,6 +536,7 @@ efdtIntUp x1 x2 y -- Be careful about overflow!
in I# x1 : go_up x2
-- Requires x2 >= x1
+{-# INLINE [0] efdtIntUpFB #-} -- See Note [Inline FB functions] in GHC.List
efdtIntUpFB :: (Int -> r -> r) -> r -> Int# -> Int# -> Int# -> r
efdtIntUpFB c n x1 x2 y -- Be careful about overflow!
| isTrue# (y <# x2) = if isTrue# (y <# x1) then n else I# x1 `c` n
@@ -566,6 +567,7 @@ efdtIntDn x1 x2 y -- Be careful about underflow!
in I# x1 : go_dn x2
-- Requires x2 <= x1
+{-# INLINE [0] efdtIntDnFB #-} -- See Note [Inline FB functions] in GHC.List
efdtIntDnFB :: (Int -> r -> r) -> r -> Int# -> Int# -> Int# -> r
efdtIntDnFB c n x1 x2 y -- Be careful about underflow!
| isTrue# (y ># x2) = if isTrue# (y ># x1) then n else I# x1 `c` n
@@ -655,7 +657,7 @@ eftWord x0 y | isTrue# (x0 `gtWord#` y) = []
then []
else go (x `plusWord#` 1##)
-{-# INLINE [0] eftWordFB #-}
+{-# INLINE [0] eftWordFB #-} -- See Note [Inline FB functions] in GHC.List
eftWordFB :: (Word -> r -> r) -> r -> Word# -> Word# -> r
eftWordFB c n x0 y | isTrue# (x0 `gtWord#` y) = n
| otherwise = go x0
@@ -693,7 +695,7 @@ efdtWord x1 x2 y
| isTrue# (x2 `geWord#` x1) = efdtWordUp x1 x2 y
| otherwise = efdtWordDn x1 x2 y
-{-# INLINE [0] efdtWordFB #-}
+{-# INLINE [0] efdtWordFB #-} -- See Note [Inline FB functions] in GHC.List
efdtWordFB :: (Word -> r -> r) -> r -> Word# -> Word# -> Word# -> r
efdtWordFB c n x1 x2 y
| isTrue# (x2 `geWord#` x1) = efdtWordUpFB c n x1 x2 y
@@ -715,6 +717,7 @@ efdtWordUp x1 x2 y -- Be careful about overflow!
in W# x1 : go_up x2
-- Requires x2 >= x1
+{-# INLINE [0] efdtWordUpFB #-} -- See Note [Inline FB functions] in GHC.List
efdtWordUpFB :: (Word -> r -> r) -> r -> Word# -> Word# -> Word# -> r
efdtWordUpFB c n x1 x2 y -- Be careful about overflow!
| isTrue# (y `ltWord#` x2) = if isTrue# (y `ltWord#` x1) then n else W# x1 `c` n
@@ -745,6 +748,7 @@ efdtWordDn x1 x2 y -- Be careful about underflow!
in W# x1 : go_dn x2
-- Requires x2 <= x1
+{-# INLINE [0] efdtWordDnFB #-} -- See Note [Inline FB functions] in GHC.List
efdtWordDnFB :: (Word -> r -> r) -> r -> Word# -> Word# -> Word# -> r
efdtWordDnFB c n x1 x2 y -- Be careful about underflow!
| isTrue# (y `gtWord#` x2) = if isTrue# (y `gtWord#` x1) then n else W# x1 `c` n
@@ -805,7 +809,8 @@ We do not do it for Int this way because hand-tuned code already exists, and
the special case varies more from the general case, due to the issue of overflows.
-}
-{-# NOINLINE [0] enumDeltaIntegerFB #-}
+{-# INLINE [0] enumDeltaIntegerFB #-}
+-- See Note [Inline FB functions] in GHC.List
enumDeltaIntegerFB :: (Integer -> b -> b) -> Integer -> Integer -> b
enumDeltaIntegerFB c x0 d = go x0
where go x = x `seq` (x `c` go (x+d))
@@ -817,7 +822,8 @@ enumDeltaInteger x d = x `seq` (x : enumDeltaInteger (x+d) d)
-- head (drop 1000000 [1 .. ]
-- works
-{-# NOINLINE [0] enumDeltaToIntegerFB #-}
+{-# INLINE [0] enumDeltaToIntegerFB #-}
+-- See Note [Inline FB functions] in GHC.List
-- Don't inline this until RULE "enumDeltaToInteger" has had a chance to fire
enumDeltaToIntegerFB :: (Integer -> a -> a) -> a
-> Integer -> Integer -> Integer -> a
@@ -825,7 +831,8 @@ enumDeltaToIntegerFB c n x delta lim
| delta >= 0 = up_fb c n x delta lim
| otherwise = dn_fb c n x delta lim
-{-# NOINLINE [0] enumDeltaToInteger1FB #-}
+{-# INLINE [0] enumDeltaToInteger1FB #-}
+-- See Note [Inline FB functions] in GHC.List
-- Don't inline this until RULE "enumDeltaToInteger" has had a chance to fire
enumDeltaToInteger1FB :: (Integer -> a -> a) -> a
-> Integer -> Integer -> a
diff --git a/libraries/base/GHC/Exts.hs b/libraries/base/GHC/Exts.hs
index 2e047e306b..f6204aabd4 100755
--- a/libraries/base/GHC/Exts.hs
+++ b/libraries/base/GHC/Exts.hs
@@ -124,6 +124,7 @@ sortWith f = sortBy (\x y -> compare (f x) (f y))
groupWith :: Ord b => (a -> b) -> [a] -> [[a]]
groupWith f xs = build (\c n -> groupByFB c n (\x y -> f x == f y) (sortWith f xs))
+{-# INLINE [0] groupByFB #-} -- See Note [Inline FB functions] in GHC.List
groupByFB :: ([a] -> lst -> lst) -> lst -> (a -> a -> Bool) -> [a] -> lst
groupByFB c n eq xs0 = groupByFBCore xs0
where groupByFBCore [] = n
diff --git a/libraries/base/GHC/List.hs b/libraries/base/GHC/List.hs
index e1903c34d6..3eab407b1b 100644
--- a/libraries/base/GHC/List.hs
+++ b/libraries/base/GHC/List.hs
@@ -153,7 +153,7 @@ filter pred (x:xs)
| pred x = x : filter pred xs
| otherwise = filter pred xs
-{-# NOINLINE [0] filterFB #-}
+{-# INLINE [0] filterFB #-} -- See Note [Inline FB functions]
filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
filterFB c p x r | p x = x `c` r
| otherwise = r
@@ -206,6 +206,28 @@ The oneShot annotations used in this module are correct, as we only use them in
argumets to foldr, where we know how the arguments are called.
-}
+{-
+Note [Inline FB functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+After fusion rules successfully fire, we are usually left with one or more calls
+to list-producing functions abstracted over cons and nil. Here we call them
+FB functions because their names usually end with 'FB'. It's a good idea to
+inline FB functions because:
+
+* They are higher-order functions and therefore benefits from inlining.
+
+* When the final consumer is a left fold, inlining the FB functions is the only
+ way to make arity expansion to happen. See Note [Left fold via right fold].
+
+For this reason we mark all FB functions INLINE [0]. The [0] phase-specifier
+ensures that calls to FB functions can be written back to the original form
+when no fusion happens.
+
+Without these inline pragmas, the loop in perf/should_run/T13001 won't be
+allocation-free. Also see Trac #13001.
+-}
+
-- ----------------------------------------------------------------------------
-- | A strict version of 'foldl'.
@@ -267,7 +289,7 @@ scanl = scanlGo
foldr (scanlFB f (:)) (constScanl []) bs a = tail (scanl f a bs)
#-}
-{-# INLINE [0] scanlFB #-}
+{-# INLINE [0] scanlFB #-} -- See Note [Inline FB functions]
scanlFB :: (b -> a -> b) -> (b -> c -> c) -> a -> (b -> c) -> b -> c
scanlFB f c = \b g -> oneShot (\x -> let b' = f x b in b' `c` g b')
-- See Note [Left folds via right fold]
@@ -305,7 +327,7 @@ scanl' = scanlGo'
foldr (scanlFB' f (:)) (flipSeqScanl' []) bs a = tail (scanl' f a bs)
#-}
-{-# INLINE [0] scanlFB' #-}
+{-# INLINE [0] scanlFB' #-} -- See Note [Inline FB functions]
scanlFB' :: (b -> a -> b) -> (b -> c -> c) -> a -> (b -> c) -> b -> c
scanlFB' f c = \b g -> oneShot (\x -> let !b' = f x b in b' `c` g b')
-- See Note [Left folds via right fold]
@@ -372,7 +394,7 @@ strictUncurryScanr :: (a -> b -> c) -> (a, b) -> c
strictUncurryScanr f pair = case pair of
(x, y) -> f x y
-{-# INLINE [0] scanrFB #-}
+{-# INLINE [0] scanrFB #-} -- See Note [Inline FB functions]
scanrFB :: (a -> b -> b) -> (b -> c -> c) -> a -> (b, c) -> (b, c)
scanrFB f c = \x (r, est) -> (f x r, r `c` est)
@@ -428,7 +450,7 @@ minimum xs = foldl1 min xs
iterate :: (a -> a) -> a -> [a]
iterate f x = x : iterate f (f x)
-{-# NOINLINE [0] iterateFB #-}
+{-# INLINE [0] iterateFB #-} -- See Note [Inline FB functions]
iterateFB :: (a -> b -> b) -> (a -> a) -> a -> b
iterateFB c f x0 = go x0
where go x = x `c` go (f x)
@@ -445,7 +467,7 @@ repeat :: a -> [a]
-- The pragma just gives the rules more chance to fire
repeat x = xs where xs = x : xs
-{-# INLINE [0] repeatFB #-} -- ditto
+{-# INLINE [0] repeatFB #-} -- ditto -- See Note [Inline FB functions]
repeatFB :: (a -> b -> b) -> a -> b
repeatFB c x = xs where xs = x `c` xs
@@ -486,7 +508,7 @@ takeWhile p (x:xs)
| p x = x : takeWhile p xs
| otherwise = []
-{-# INLINE [0] takeWhileFB #-}
+{-# INLINE [0] takeWhileFB #-} -- See Note [Inline FB functions]
takeWhileFB :: (a -> Bool) -> (a -> b -> b) -> b -> a -> b -> b
takeWhileFB p c n = \x r -> if p x then x `c` r else n
@@ -572,7 +594,7 @@ unsafeTake m (x:xs) = x : unsafeTake (m - 1) xs
flipSeqTake :: a -> Int -> a
flipSeqTake x !_n = x
-{-# INLINE [0] takeFB #-}
+{-# INLINE [0] takeFB #-} -- See Note [Inline FB functions]
takeFB :: (a -> b -> b) -> b -> a -> (Int -> b) -> Int -> b
-- The \m accounts for the fact that takeFB is used in a higher-order
-- way by takeFoldr, so it's better to inline. A good example is
@@ -914,7 +936,7 @@ zip [] _bs = []
zip _as [] = []
zip (a:as) (b:bs) = (a,b) : zip as bs
-{-# INLINE [0] zipFB #-}
+{-# INLINE [0] zipFB #-} -- See Note [Inline FB functions]
zipFB :: ((a, b) -> c -> d) -> a -> b -> c -> d
zipFB c = \x y r -> (x,y) `c` r
@@ -953,7 +975,7 @@ zipWith f (a:as) (b:bs) = f a b : zipWith f as bs
-- zipWithFB must have arity 2 since it gets two arguments in the "zipWith"
-- rule; it might not get inlined otherwise
-{-# INLINE [0] zipWithFB #-}
+{-# INLINE [0] zipWithFB #-} -- See Note [Inline FB functions]
zipWithFB :: (a -> b -> c) -> (d -> e -> a) -> d -> e -> b -> c
zipWithFB c f = \x y r -> (x `f` y) `c` r
diff --git a/testsuite/tests/perf/should_run/T13001.hs b/testsuite/tests/perf/should_run/T13001.hs
new file mode 100644
index 0000000000..ac6d110385
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T13001.hs
@@ -0,0 +1,7 @@
+import Data.IORef
+
+main :: IO ()
+main = do
+ ref <- newIORef 10000
+ n <- readIORef ref
+ print $ length $ [0::Int, 2 .. n]
diff --git a/testsuite/tests/perf/should_run/T13001.stdout b/testsuite/tests/perf/should_run/T13001.stdout
new file mode 100644
index 0000000000..4f0a7c2636
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T13001.stdout
@@ -0,0 +1 @@
+5001
diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T
index 424bdcb04f..89ae3ecc70 100644
--- a/testsuite/tests/perf/should_run/all.T
+++ b/testsuite/tests/perf/should_run/all.T
@@ -452,3 +452,11 @@ test('T12996',
only_ways(['normal'])],
compile_and_run,
['-O2'])
+
+test('T13001',
+ [stats_num_field('bytes allocated',
+ [ (wordsize(32), 46728, 20)
+ , (wordsize(64), 50600, 20) ]),
+ only_ways(['normal'])],
+ compile_and_run,
+ ['-O2'])