From 914342c1ed282ea2d9bdd54d69a455372557d846 Mon Sep 17 00:00:00 2001 From: David Feuer Date: Wed, 1 Oct 2014 15:02:33 +0200 Subject: Make foldr2 a bit more strict in order to make its RULES semantics preserving. This fixes #9495. --- docs/users_guide/bugs.xml | 9 ++++++++- libraries/base/GHC/List.lhs | 43 +++++++++++++++++++++++++++++-------------- libraries/base/changelog.md | 4 ++++ 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/docs/users_guide/bugs.xml b/docs/users_guide/bugs.xml index dba0d86630..f9dfaae485 100644 --- a/docs/users_guide/bugs.xml +++ b/docs/users_guide/bugs.xml @@ -291,7 +291,14 @@ checking for duplicates. The reason for this is efficiency, pure and simple. if you get stuck on it. - + + zip and zipWith semantics + zip and zipWith can give + less defined results than the Report specifies in certain cases. This deviation + is needed to allow more opportunities for list fusion. In particular, + termination of the left list cannot be used to avoid hitting bottom in the + right list. See the documentation for details. + Reading integers diff --git a/libraries/base/GHC/List.lhs b/libraries/base/GHC/List.lhs index ffcc8ab56f..8c8e4bb050 100644 --- a/libraries/base/GHC/List.lhs +++ b/libraries/base/GHC/List.lhs @@ -646,7 +646,7 @@ xs !! (I# n0) | isTrue# (n0 <# 0#) = error "Prelude.(!!): negative index\n" foldr2 :: (a -> b -> c -> c) -> c -> [a] -> [b] -> c foldr2 k z = go where - go [] _ys = z + go [] ys = ys `seq` z -- see #9495 for the seq go _xs [] = z go (x:xs) (y:ys) = k x y (go xs ys) {-# INLINE [0] foldr2 #-} @@ -670,16 +670,6 @@ foldr2_right k _z y r (x:xs) = k x y (r xs) #-} \end{code} -The foldr2/right rule isn't exactly right, because it changes -the strictness of foldr2 (and thereby zip) - -E.g. main = print (null (zip nonobviousNil (build undefined))) - where nonobviousNil = f 3 - f n = if n == 0 then [] else f (n-1) - -I'm going to leave it though. - - Zips for larger tuples are in the List module. \begin{code} @@ -687,10 +677,22 @@ Zips for larger tuples are in the List module. -- | 'zip' takes two lists and returns a list of corresponding pairs. -- If one input list is short, excess elements of the longer list are -- discarded. +-- +-- NOTE: GHC's implementation of @zip@ deviates slightly from the +-- standard. In particular, Haskell 98 and Haskell 2010 require that +-- @zip [x1,x2,...,xn] (y1:y2:...:yn:_|_) = [(x1,y1),(x2,y2),...,(xn,yn)]@ +-- In GHC, however, +-- @zip [x1,x2,...,xn] (y1:y2:...:yn:_|_) = (x1,y1):(x2,y2):...:(xn,yn):_|_@ +-- That is, you cannot use termination of the left list to avoid hitting +-- bottom in the right list. + +-- This deviation is necessary to make fusion with 'build' in the right +-- list preserve semantics. {-# NOINLINE [1] zip #-} zip :: [a] -> [b] -> [(a,b)] +zip [] bs = bs `seq` [] -- see #9495 for the seq +zip _as [] = [] zip (a:as) (b:bs) = (a,b) : zip as bs -zip _ _ = [] {-# INLINE [0] zipFB #-} zipFB :: ((a, b) -> c -> d) -> a -> b -> c -> d @@ -723,10 +725,23 @@ zip3 _ _ _ = [] -- as the first argument, instead of a tupling function. -- For example, @'zipWith' (+)@ is applied to two lists to produce the -- list of corresponding sums. +-- +-- NOTE: GHC's implementation of @zipWith@ deviates slightly from the +-- standard. In particular, Haskell 98 and Haskell 2010 require that +-- @zipWith (,) [x1,x2,...,xn] (y1:y2:...:yn:_|_) = [(x1,y1),(x2,y2),...,(xn,yn)]@ +-- In GHC, however, +-- @zipWith (,) [x1,x2,...,xn] (y1:y2:...:yn:_|_) = (x1,y1):(x2,y2):...:(xn,yn):_|_@ +-- That is, you cannot use termination of the left list to avoid hitting +-- bottom in the right list. + +-- This deviation is necessary to make fusion with 'build' in the right +-- list preserve semantics. + {-# NOINLINE [1] zipWith #-} zipWith :: (a->b->c) -> [a]->[b]->[c] -zipWith f (a:as) (b:bs) = f a b : zipWith f as bs -zipWith _ _ _ = [] +zipWith _f [] bs = bs `seq` [] -- see #9495 for the seq +zipWith _f _as [] = [] +zipWith f (a:as) (b:bs) = f a b : zipWith f as bs -- zipWithFB must have arity 2 since it gets two arguments in the "zipWith" -- rule; it might not get inlined otherwise diff --git a/libraries/base/changelog.md b/libraries/base/changelog.md index 7b168fec5c..7529782492 100644 --- a/libraries/base/changelog.md +++ b/libraries/base/changelog.md @@ -73,6 +73,10 @@ the functions from `Data.List` (in other words, `Data.OldList` corresponds to `base-4.7.0.1`'s `Data.List`) + * `foldr2` (together with `zip` and `zipWith`) is made a bit stricter in the + second argument, so that the fusion RULES for it do not change the + semantics. (#9596) + ## 4.7.0.1 *Jul 2014* * Bundled with GHC 7.8.3 -- cgit v1.2.1