diff options
author | Hécate <hecate+gitlab@glitchbra.in> | 2020-10-11 11:20:56 +0200 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-11-01 08:52:08 -0500 |
commit | ce1bb9959e2465db1c3880f3c532ae7e1be39b41 (patch) | |
tree | fac7bff20989102e49f38be23c9703ceabcfbfae | |
parent | dfd27445308d1ed2df8826c2a045130e918e8192 (diff) | |
download | haskell-ce1bb9959e2465db1c3880f3c532ae7e1be39b41.tar.gz |
Fix a leak in `transpose`
This patch was authored by David Feuer <david.feuer@gmail.com>
-rw-r--r-- | libraries/base/Data/OldList.hs | 50 | ||||
-rw-r--r-- | libraries/base/tests/T18642.hs | 27 | ||||
-rw-r--r-- | libraries/base/tests/T18642.stdout | 3 | ||||
-rw-r--r-- | libraries/base/tests/all.T | 1 |
4 files changed, 75 insertions, 6 deletions
diff --git a/libraries/base/Data/OldList.hs b/libraries/base/Data/OldList.hs index 9331113d70..f4ef222673 100644 --- a/libraries/base/Data/OldList.hs +++ b/libraries/base/Data/OldList.hs @@ -547,19 +547,57 @@ intercalate xs xss = concat (intersperse xs xss) -- -- >>> transpose [[10,11],[20],[],[30,31,32]] -- [[10,20,30],[11,31],[32]] -transpose :: [[a]] -> [[a]] -transpose [] = [] -transpose ([] : xss) = transpose xss -transpose ((x:xs) : xss) = (x : hds) : transpose (xs : tls) +transpose :: [[a]] -> [[a]] +transpose [] = [] +transpose ([] : xss) = transpose xss +transpose ((x : xs) : xss) = combine x hds xs tls where -- We tie the calculations of heads and tails together -- to prevent heads from leaking into tails and vice versa. -- unzip makes the selector thunk arrangements we need to -- ensure everything gets cleaned up properly. - (hds, tls) = unzip [(hd, tl) | (hd:tl) <- xss] + (hds, tls) = unzip [(hd, tl) | hd : tl <- xss] + combine y h ys t = (y:h) : transpose (ys:t) + {-# NOINLINE combine #-} + {- Implementation note: + If the bottom part of the function was written as such: + + ``` + transpose ((x : xs) : xss) = (x:hds) : transpose (xs:tls) + where + (hds,tls) = hdstls + hdstls = unzip [(hd, tl) | hd : tl <- xss] + {-# NOINLINE hdstls #-} + ``` + Here are the steps that would take place: + + 1. We allocate a thunk, `hdstls`, representing the result of unzipping. + 2. We allocate selector thunks, `hds` and `tls`, that deconstruct `hdstls`. + 3. Install `hds` as the tail of the result head and pass `xs:tls` to + the recursive call in the result tail. + + Once optimised, this code would amount to: + + ``` + transpose ((x : xs) : xss) = (x:hds) : (let tls = snd hdstls in transpose (xs:tls)) + where + hds = fst hdstls + hdstls = unzip [(hd, tl) | hd : tl <- xss] + {-# NOINLINE hdstls #-} + ``` + + In particular, GHC does not produce the `tls` selector thunk immediately; + rather, it waits to do so until the tail of the result is actually demanded. + So when `hds` is demanded, that does not resolve `snd hdstls`; the tail of the + result keeps `hdstls` alive. + + By writing `combine` and making it NOINLINE, we prevent GHC from delaying + the selector thunk allocation, requiring that `hds` and `tls` are actually + allocated to be passed to `combine`. + -} --- | The 'partition' function takes a predicate a list and returns +-- | The 'partition' function takes a predicate and a list, and returns -- the pair of lists of elements which do and do not satisfy the -- predicate, respectively; i.e., -- diff --git a/libraries/base/tests/T18642.hs b/libraries/base/tests/T18642.hs new file mode 100644 index 0000000000..0645eb38d0 --- /dev/null +++ b/libraries/base/tests/T18642.hs @@ -0,0 +1,27 @@ +{-# LANGUAGE NumericUnderscores #-} +module Main where + +import Data.List (transpose, foldl') +import GHC.Stats +import System.Exit + +thingy :: [[[Int]]] +thingy = [ [[1],[2]], [[1..10^7], [3]]] + +thingy2 :: [[[Int]]] +thingy2 = [ [[1],[2]], [[3], [2..10^7+1]]] + +main = do + htr : ttr <- pure $ transpose thingy + print $ even $ foldl' (+) 0 . head . tail $ htr + + htr2 : ttr2 <- pure $ transpose thingy2 + print $ even $ foldl' (+) 0 . head . tail . head $ ttr2 + + maxLiveBytes <- max_live_bytes <$> getRTSStats + if (maxLiveBytes) < 200_000 + then putStrLn "Test is running in the expected residency limit" + else do + putStrLn $ "Test is running with " <> show maxLiveBytes <> " bytes of residency!" + exitFailure + diff --git a/libraries/base/tests/T18642.stdout b/libraries/base/tests/T18642.stdout new file mode 100644 index 0000000000..9342107f3f --- /dev/null +++ b/libraries/base/tests/T18642.stdout @@ -0,0 +1,3 @@ +True +True +Test is running in the expected residency limit diff --git a/libraries/base/tests/all.T b/libraries/base/tests/all.T index 29146204f2..ac65224ef0 100644 --- a/libraries/base/tests/all.T +++ b/libraries/base/tests/all.T @@ -260,3 +260,4 @@ test('T16943b', normal, compile_and_run, ['']) test('T17499', [collect_stats('bytes allocated',5)], compile_and_run, ['-O -w']) test('T16643', normal, compile_and_run, ['']) test('clamp', normal, compile_and_run, ['']) +test('T18642', extra_run_opts('+RTS -T -RTS'), compile_and_run, ['-O2']) |