From 0670f387b5946b777f032fcb9b1b4fc4fd661bc0 Mon Sep 17 00:00:00 2001 From: Viktor Dukhovni Date: Sun, 27 Dec 2020 22:37:16 -0500 Subject: New overview of Foldable class Also updated stale external URL in Traversable --- libraries/base/Data/Foldable.hs | 778 ++++++++++++++++++++++++++++++++++--- libraries/base/Data/Traversable.hs | 25 +- 2 files changed, 750 insertions(+), 53 deletions(-) diff --git a/libraries/base/Data/Foldable.hs b/libraries/base/Data/Foldable.hs index acd06c1382..48acbe2d14 100644 --- a/libraries/base/Data/Foldable.hs +++ b/libraries/base/Data/Foldable.hs @@ -48,6 +48,51 @@ module Data.Foldable ( -- * Searches notElem, find + + -- * Overview + -- $overview + + -- ** Recursive and corecursive reduction + -- $reduction + + -- *** Strict recursive folds + -- $strict + + -- **** List of strict functions + -- $strictlist + + -- *** Lazy corecursive folds + -- $lazy + + -- **** List of lazy functions + -- $lazylist + + -- *** Short-circuit folds + -- $shortcircuit + + -- **** List of short-circuit functions + -- $shortlist + + -- *** Hybrid folds + -- $hybrid + + -- ** Avoiding multi-pass algorithms + -- $multipass + + -- * Defining instances + -- $instances + + -- *** Being strict by being lazy + -- $strictlazy + + -- * Laws + -- $laws + + -- * Notes + -- $notes + + -- * See also + -- $also ) where import Data.Bool @@ -70,52 +115,21 @@ import GHC.Num ( Num(..) ) infix 4 `elem`, `notElem` --- | Data structures that can be folded. --- --- For example, given a data type --- --- > data Tree a = Empty | Leaf a | Node (Tree a) a (Tree a) --- --- a suitable instance would be --- --- > instance Foldable Tree where --- > foldMap f Empty = mempty --- > foldMap f (Leaf x) = f x --- > foldMap f (Node l k r) = foldMap f l `mappend` f k `mappend` foldMap f r --- --- This is suitable even for abstract types, as the monoid is assumed --- to satisfy the monoid laws. Alternatively, one could define @foldr@: --- --- > instance Foldable Tree where --- > foldr f z Empty = z --- > foldr f z (Leaf x) = f x z --- > foldr f z (Node l k r) = foldr f (f k (foldr f z r)) l --- --- @Foldable@ instances are expected to satisfy the following laws: --- --- > foldr f z t = appEndo (foldMap (Endo . f) t ) z --- --- > foldl f z t = appEndo (getDual (foldMap (Dual . Endo . flip f) t)) z --- --- > fold = foldMap id --- --- > length = getSum . foldMap (Sum . const 1) --- --- @sum@, @product@, @maximum@, and @minimum@ should all be essentially --- equivalent to @foldMap@ forms, such as --- --- > sum = getSum . foldMap Sum --- --- but may be less defined. --- --- If the type is also a 'Functor' instance, it should satisfy --- --- > foldMap f = fold . fmap f +-- XXX: Missing haddock feature. Links to anchors in other modules +-- don't have a sensible way to name the link within the module itself. +-- Thus, the below "Data.Foldable#overview" works well when shown as +-- @Data.Foldable@ from other modules, but in the home module it should +-- be possible to specify alternative link text. :-( + +-- | The Foldable class represents data structures that can be reduced to a +-- summary value one element at a time. Strict left-associative folds are a +-- good fit for space-efficient reduction, while lazy right-associative folds +-- are good fit for corecursive iteration or for folds that short-circuit after +-- processing an initial subsequence of the structure's elements. -- --- which implies that +-- A more detailed description can be found in the overview section of +-- "Data.Foldable#overview". -- --- > foldMap f . fmap g = foldMap (f . g) - class Foldable t where {-# MINIMAL foldMap | foldr #-} @@ -250,11 +264,11 @@ class Foldable t where -- > foldl f z [x1, x2, ..., xn] == (...((z `f` x1) `f` x2) `f`...) `f` xn -- -- Note that to produce the outermost application of the operator the - -- entire input list must be traversed. This means that 'foldl'' will + -- entire input list must be traversed. This means that 'foldl' will -- diverge if given an infinite list. -- -- Also note that if you want an efficient left-fold, you probably want to - -- use 'foldl'' instead of 'foldl'. The reason for this is that latter does + -- use `foldl'` instead of 'foldl'. The reason for this is that latter does -- not force the "inner" results (e.g. @z \`f\` x1@ in the above example) -- before applying them to the operator (e.g. to @(\`f\` x2)@). This results -- in a thunk chain \(\mathcal{O}(n)\) elements long, which then must be @@ -1283,5 +1297,675 @@ this could be particularly bad (see #10830). For the common case of lists, switching the implementations of maximumBy and minimumBy to foldl1 solves the issue, assuming GHC's strictness analysis can then -make these functions only use O(1) stack space. As of base 4.16, we have switched to employing foldl' over foldl1, not relying on GHC's optimiser. See https://gitlab.haskell.org/ghc/ghc/-/issues/17867 for more context. +make these functions only use O(1) stack space. As of base 4.16, we have +switched to employing foldl' over foldl1, not relying on GHC's optimiser. See +https://gitlab.haskell.org/ghc/ghc/-/issues/17867 for more context. -} + +-------------- + +-- In order to avoid having actual Unicode glyphs in the module source, +-- the below numeric HTML entity codes are used: +-- +-- * ellipsis = … + +-- $overview +-- +-- #overview# +-- Foldable structures are reduced to a summary value by accumulating +-- contributions to the result one element at a time. +-- +-- == Left and right folds +-- +-- #leftright# +-- Merging the contribution of the current element with an accumulator value +-- from a partial result is performed by an /update function/, either +-- explicitly provided by the caller as in `foldr`, implicit as in `length`, or +-- partly implicit as in `foldMap` (where each element is mapped into a monoid, +-- and the Monoid's `mappend` performs the merge). +-- +-- A key distinction is between left-associative and right-associative +-- folds: +-- +-- * In left-associative folds the accumulator is a partial fold over the +-- elements that __precede__ the current element, and is passed to the update +-- function as its first (left) argument. The outer-most application of the +-- update function merges the contribution of the last element of the +-- structure with the contributions of all its predecessors. +-- +-- * In right-associative folds the accumulator is a partial fold over the +-- elements that __follow__ the current element, and is passed to the update +-- function as its second (right) argument. The outer-most application of +-- the update function merges the contribution of the first element of the +-- structure with the contributions of all its successors. +-- +-- These two types of folds are typified by the left-associative strict +-- `foldl'` and the right-associative lazy `foldr`. +-- +-- @ +-- `foldl'` :: Foldable t => (b -> a -> b) -> b -> t a -> b +-- `foldr` :: Foldable t => (a -> b -> b) -> b -> t a -> b +-- @ +-- +-- Example usage: +-- +-- >>> foldl' (+) 0 [1..100] +-- 5050 +-- >>> foldr (&&) True (repeat False) +-- False +-- +-- The first argument of both is an explicit /update function/ that merges the +-- contribution of an element of the structure with a partial fold over, +-- respectively, either the preceding or following elements of the structure. +-- +-- The second argument of both is an initial accumulator value @z@ of type +-- @b@. This is the result of the fold when the structure is empty. +-- When the structure is non-empty, this is the accumulator value merged with +-- the first element in left-associative folds, or with the last element in +-- right-associative folds. +-- +-- The third and final argument is a @Foldable@ structure containing elements +-- @(a, b, c, …)@. +-- +-- * __`foldl'`__ takes an update function of the form: +-- +-- @ +-- f :: b -- accumulated fold of the initial elements +-- -> a -- current element +-- -> b -- updated fold, inclusive of current element +-- @ +-- +-- If the structure's last element is @y@, the result of the fold is: +-- +-- @ +-- g y . … . g c . g b . g a $ z +-- where g element !acc = f acc element +-- @ +-- +-- Since `foldl'` is strict in the accumulator, this is always +-- a [strict](#strict) reduction with no opportunity for early return or +-- intermediate results. The structure must be finite, since no result is +-- returned until the last element is processed. The advantage of +-- strictness is space efficiency: the final result can be computed without +-- storing a potentially deep stack of lazy intermediate results. +-- +-- * __`foldr`__ takes an update function of the form: +-- +-- @ +-- f :: a -- current element +-- -> b -- accumulated fold of the remaining elements +-- -> b -- updated fold, inclusive of current element +-- @ +-- +-- the result of the fold is: +-- +-- @ +-- f a . f b . f c . … $ z +-- @ +-- +-- If each call of @f@ on the current element @e@, (referenced as @(f e)@ +-- below) returns a structure in which its second argument is captured in a +-- lazily-evaluated component, then the fold of the remaining elements is +-- available to the caller of `foldr` as a pending computation (thunk) that +-- is computed only when that component is evaluated. +-- +-- Alternatively, if any of the @(f e)@ ignore their second argument, the +-- fold stops there, with the remaining elements unused. As a result, +-- `foldr` is well suited to define both [corecursive](#corec) +-- and [short-circuit](#short) reductions. +-- +-- When the update function is always strict in the second argument, +-- `foldl'` is generally a better choice than `foldr`. When `foldr` is +-- called with a strict update function, evaluation cannot begin until the +-- last element is reached, by which point a deep stack of pending function +-- applications may have been built up in memory. +-- +-- In finite structures for which right-to-left sequencing no less efficient as +-- left-to-right sequencing, there is no inherent performance distinction +-- between left-associative and right-associative folds. If the structure's +-- @Foldable@ instance takes advantage of this symmetry to also make strict +-- right folds space-efficient and lazy left folds corecursive, one need only +-- take care to choose either a strict or lazy method for the task at hand. + +-------------- + +-- $reduction +-- +-- As observed in the [above description](#leftright) of left and right folds, +-- there are three general ways in which a structure can be reduced to a +-- summary value: +-- +-- * __Recursive__ reduction, which is strict in all the elements of the +-- structure. This produces a single final result only after processing the +-- entire input structure, and so the input must be finite. +-- +-- * __Corecursion__, which yields intermediate results as it encounters +-- additional input elements. Lazy processing of the remaining elements +-- makes the intermediate results available even before the rest of the +-- input is processed. The input may be unbounded, and the caller can +-- stop processing intermediate results early. +-- +-- * __Short-circuit__ reduction, which examines some initial sequence of the +-- input elements, but stops once a termination condition is met, returning a +-- final result based only on the elements considered up to that point. The +-- remaining elements are not considered. The input should generally be +-- finite, because the termination condition might otherwise never be met. +-- +-- Whether a fold is recursive, corecursive or short-circuiting can depend on +-- both the method chosen to perform the fold and on the update function passed +-- to that method (which may be implicit, as with the `mappend` method of a +-- monoid instance). +-- +-- There are also hybrid cases, where the method and/or the update function are +-- not well suited to the task at hand, resulting in a fold that fails to yield +-- incremental results until the entire input is processed, or fails to +-- strictly evaluate results as it goes, deferring all the work to the +-- evaluation of a large final thunk. Such cases should be avoided, either by +-- selecting a more appropriate @Foldable@ method, or by tailoring the update +-- function to the chosen method. +-- +-- The distinction between these types of folds is critical, both in deciding +-- which @Foldable@ method to use to perform the reduction efficiently, and in +-- writing @Foldable@ instances for new structures. Below is a more detailed +-- overview of each type. + +-------------- + +-- $strict +-- #strict# +-- +-- Common examples of strict recursive reduction are the various /aggregate/ +-- functions, like 'sum', 'product', 'length', as well as more complex +-- summaries such as frequency counts. These functions return only a single +-- value after processing the entire input structure. In such cases, lazy +-- processing of the tail of the input structure is generally not only +-- unnecessary, but also inefficient. Thus, these and similar folds should be +-- implemented in terms of strict left-associative @Foldable@ methods (typically +-- `foldl'`) to perform an efficient reduction in constant space. +-- +-- Conversely, an implementation of @Foldable@ for a new structure should +-- ensure that `foldl'` actually performs a strict left-associative reduction. +-- +-- The `foldMap'` method is a special case of `foldl'`, in which the initial +-- accumulator is `mempty` and the update function is @mappend . f@, where +-- @f@ maps each input element into the 'Monoid' in question. Therefore, +-- `foldMap'` is an appropriate choice under essentially the same conditions +-- as `foldl'`, and its implementation for a given @Foldable@ structure should +-- also be a strict left-associative reduction. +-- +-- While the examples below are not necessarily the most optimal definitions of +-- the intended functions, they are all cases in which `foldMap'` is far more +-- appropriate (as well as more efficient) than the lazy `foldMap`. +-- +-- > length = getSum . foldMap' (const (Sum 1)) +-- > sum = getSum . foldMap' Sum +-- > product = getProduct . foldMap' Product +-- +-- [ The actual default definitions employ coercions to optimise out +-- 'getSum' and 'getProduct'. ] + +-------------- + +-- $strictlist +-- +-- The full list of strict recursive functions in this module is: +-- +-- * Provided the update function is strict in its left argument: +-- +-- @ +-- `foldl'` :: Foldable t => (b -> a -> b) -> b -> t a -> b +-- @ +-- +-- * Provided `mappend` is strict in its left argument: +-- +-- @ +-- `foldMap'` :: (Foldable t, Monoid m) => (a -> m) -> t a -> m +-- @ +-- +-- * Provided the instance is correctly defined: +-- +-- @ +-- `length` :: Foldable t => t a -> Int +-- `sum` :: (Foldable t, Num a) => t a -> a +-- `product` :: (Foldable t, Num a) => t a -> a +-- `maximum` :: (Foldable t, Ord a) => t a -> a +-- `minimum` :: (Foldable t, Ord a) => t a -> a +-- `maximumBy` :: Foldable t => (a -> a -> Ordering) -> t a -> a +-- `minimumBy` :: Foldable t => (a -> a -> Ordering) -> t a -> a +-- @ + +-------------- + +-- $lazy +-- +-- #corec# +-- Common examples of lazy corecursive reduction are functions that map and +-- flatten a structure to a lazy stream of result values, i.e. an iterator +-- over the transformed input elements. In such cases, it is important to +-- choose a @Foldable@ method that is lazy in the tail of the structure, such +-- as `foldr` (or `foldMap`, if the result @Monoid@ has a lazy `mappend` as +-- with e.g. ByteString Builders). +-- +-- Conversely, an implementation of `foldr` for a structure that can +-- accommodate a large (and possibly unbounded) number of elements is expected +-- to be lazy in the tail of the input, allowing update functions that are lazy +-- in the accumulator to yield intermediate results incrementally. Such folds +-- are right-associative, with the tail of the stream returned as a lazily +-- evaluated component of the result (an element of a tuple or some other +-- non-strict constructor, e.g. the @(:)@ constructor for lists). +-- +-- The @toList@ function below lazily transforms a @Foldable@ structure to a +-- List. Note that this transformation may be lossy, e.g. for a keyed +-- container (@Map@, @HashMap@, …) the output stream holds only the +-- values, not the keys. Lossless transformations to\/from lists of @(key, +-- value)@ pairs are typically available in the modules for the specific +-- container types. +-- +-- > toList = foldr (:) [] + +-------------- + +-- $lazylist +-- +-- The full list of lazy corecursive functions in this module is: +-- +-- * Provided the reduction function is lazy in its second argument, +-- (otherwise best to use a strict recursive reduction): +-- +-- @ +-- `foldr` :: Foldable t => (a -> b -> b) -> b -> t a -> b +-- `foldr1` :: Foldable t => (a -> a -> a) -> t a -> a +-- @ +-- +-- * Provided the 'Monoid' `mappend` is lazy in its second argument +-- (otherwise best to use a strict recursive reduction): +-- +-- @ +-- `fold` :: Foldable t => Monoid m => t m -> m +-- `foldMap` :: Foldable t => Monoid m => (a -> m) -> t a -> m +-- @ +-- +-- * Provided the instance is correctly defined: +-- +-- @ +-- `toList` :: Foldable t => t a -> [a] +-- `concat` :: Foldable t => t [a] -> [a] +-- `concatMap` :: Foldable t => (a -> [b]) -> t a -> [b] +-- @ + +-------------- + +-- $shortcircuit +-- +-- #short# +-- Examples of short-cicuit reduction include various boolean predicates +-- that test whether some or all the elements of a structure satisfy a +-- given condition. Because these don't necessarily consume the entire +-- list, they typically employ `foldr` with an update function that +-- is conditionally strict in its second argument. Once the termination +-- condition is met the second argument (tail of the input structure) is +-- ignored. No result is returned until that happens. +-- +-- The key distinguishing feature of these folds is /conditional/ strictness +-- in the second argument, it is sometimes evaluated and sometimes not. +-- +-- The simplest (degenerate case) of these is 'null', which determines whether +-- a structure is empty or not. This only needs to look at the first element, +-- and only to the extent of whether it exists or not, and not its value. In +-- this case termination is guaranteed, and infinite input structures are fine. +-- Its default definition is of course in terms of the lazy 'foldr': +-- +-- > null = foldr (\_ _ -> False) True +-- +-- A more general example is `any`, which applies a predicate to each input +-- element in turn until it finds the first one for which the predicate is +-- true, at which point it returns success. If, in an infinite input stream +-- the predicate is false for all the elements, `any` will not terminate, +-- but since it runs in constant space, it typically won't run out of memory, +-- it'll just loop forever. + +-------------- + +-- $shortlist +-- +-- The full list of short-circuit folds in this module is: +-- +-- * Boolean predicate folds. +-- These functions examine elements strictly until a condition is met, +-- but then return a result ignoring the rest (lazy in the tail). These +-- may loop forever given an unbounded input where no elements satisy the +-- termination condition. +-- +-- @ +-- `null` :: Foldable t => t a -> Bool +-- `elem` :: Foldable t => Eq a => a -> t a -> Bool +-- `notElem` :: (Foldable t, Eq a) => a -> t a -> Bool +-- `and` :: Foldable t => t Bool -> Bool +-- `or` :: Foldable t => t Bool -> Bool +-- `find` :: Foldable t => (a -> Bool) -> t a -> Maybe a +-- `any` :: Foldable t => (a -> Bool) -> t a -> Bool +-- `all` :: Foldable t => (a -> Bool) -> t a -> Bool +-- @ +-- +-- * Many instances of '<|>' (e.g. the 'Maybe' instance) are conditionally +-- lazy, and use or don't use their second argument depending on the value +-- of the first. These are used with the folds below, which terminate as +-- early as possible, but otherwise generally keep going. Some instances +-- (e.g. for List) are always strict, but the result is lazy in the tail +-- of the output, so that `asum` for a list of lists is in fact corecursive. +-- These folds are defined in terms of `foldr`. +-- +-- @ +-- `asum` :: (Foldable t, Alternative f) => t (f a) -> f a +-- `msum` :: (Foldable t, MonadPlus m) => t (m a) -> m a +-- @ +-- +-- * Likewise, the '*>' operator in some `Applicative` functors, and '>>' +-- in some monads are conditionally lazy and can /short-circuit/ a chain of +-- computations. The below folds will terminate as early as possible, but +-- even infinite loops can be productive here, when evaluated solely for +-- their stream of IO side-effects. See "Data.Traversable#validation" +-- for some additional discussion. +-- +-- @ +-- `traverse_` :: (Foldable t, Applicative f) => (a -> f b) -> t a -> f () +-- `for_` :: (Foldable t, Applicative f) => t a -> (a -> f b) -> f () +-- `sequenceA_` :: (Foldable t, Applicative f) => t (f a) -> f () +-- `mapM_` :: (Foldable t, Monad m) => (a -> m b) -> t a -> m () +-- `forM_` :: (Foldable t, Monad m) => t a -> (a -> m b) -> m () +-- `sequence_` :: (Foldable t, Monad m) => t (m a) -> m () +-- @ +-- +-- * Finally, there's one more special case, which can short-circuit when the +-- monad @m@ is a 'MonadPlus', and the update function conditionally calls +-- 'mzero'. The monadic result is a strict left fold of the inputs when the +-- monad's bind operator is strict in its first argument. And yet the +-- monadic actions of this ostensibly left fold are sequenced via the lazy +-- `foldr`, which allows it to short-circuit early. The monadic side-effects +-- are evaluated in left-to-right order. +-- +-- @ +-- `foldlM` :: (Foldable t, Monad m) => (b -> a -> m b) -> b -> t a -> m b +-- @ +-- +-- For a structure @xs@ with elements @(a, b, c, …)@, the default +-- definition of @(foldlM f xs z)@ expands (via `foldr`) to: +-- +-- @ +-- return z >>= (g a >>= (g b >>= (g c >>= ...))) +-- where g element acc = f acc element -- i.e. g = flip f +-- @ +-- +-- The fold is not strict in the accumulator, unless @f@ is. But even when +-- @f@ is strict, if the Monad's bind operator is not strict in its right +-- argument the chain of monadic actions can short-circuit: +-- +-- >>> :set -XBangPatterns +-- >>> import Control.Monad +-- >>> import Control.Monad.Trans +-- >>> import Control.Monad.Trans.Maybe +-- >>> import Data.Foldable +-- >>> let f !_ e = when (e > 3) mzero >> lift (print e) +-- >>> runMaybeT $ foldlM f () [0..] +-- 0 +-- 1 +-- 2 +-- 3 +-- Nothing +-- +-- Contrast this with `foldrM`, which uses `foldl` to sequence the effects, +-- and therefore diverges (running out of space) when given an unbounded +-- input structure. The short-circuit condition is never reached +-- +-- >>> let f e _ = when (e > 3) mzero >> lift (print e) +-- >>> runMaybeT $ foldrM f () [0..] +-- ...hangs... +-- +-- If the update function is changed to short-circuit on the initial +-- elements and the structure is finite, `foldrM` will run effects and +-- produce results in reverse order: +-- +-- >>> let f e _ = when (e < 3) mzero >> lift (print e) +-- >>> runMaybeT $ foldrM f () [0..5] +-- 5 +-- 4 +-- 3 +-- Nothing + +-------------- + +-- $hybrid +-- +-- The below folds, are neither strict reductions that produce a final answer +-- in constant space, nor lazy corecursions, and so have limited applicability. +-- They do have specialised uses, but are best avoided when in doubt. +-- +-- @ +-- `foldr'` :: (a -> b -> b) -> b -> t a -> b +-- `foldl` :: (b -> a -> b) -> b -> t a -> b +-- `foldl1` :: (a -> a -> a) -> t a -> a +-- `foldrM` :: (Foldable t, Monad m) => (a -> b -> m b) -> b -> t a -> m b +-- @ + +-------------- + +-- $instances +-- +-- For many structures reasonably efficient @Foldable@ instances can be derived +-- automatically, by enabling the @DeriveFoldable@ GHC extension. When this +-- works, it is generally not necessary to define a custom instance by hand. +-- Though in some cases one may be able to get slightly faster hand-tuned code, +-- care is required to avoid producing slower code, or code that is not +-- sufficiently lazy, strict or /lawful/. +-- +-- The hand-crafted intances can get away with only defining one of 'foldr' or +-- 'foldMap'. All the other methods have default definitions in terms of one +-- of these. The default definitions have the expected strictness and the +-- expected asymptotic runtime and space costs, modulo small constant factors. +-- If you choose to hand-tune, benchmarking is advised to see whether you're +-- doing better than the default derived implementations, plus careful tests to +-- ensure that the custom methods are correct. +-- +-- For example, given a data type +-- +-- > data Tree a = Empty | Leaf a | Node (Tree a) a (Tree a) +-- +-- a suitable instance would be: +-- +-- > instance Foldable Tree where +-- > foldr f z Empty = z +-- > foldr f z (Leaf x) = f x z +-- > foldr f z (Node l k r) = foldr f (f k (foldr f z r)) l +-- +-- The 'Node' case is a right fold of the left subtree whose initial +-- value is a right fold of the rest of the tree. +-- +-- For example, when `f` is '(:)', all three cases return an immediate +-- value, respectively @z@ or a /cons cell/ holding @x@ or @l@, with the +-- remainder the structure, if any, encapsulated in a lazy thunk. +-- This meets the expected efficient [corecursive](#corec) behaviour +-- of 'foldr'. +-- +-- Alternatively, one could define @foldMap@: +-- +-- > instance Foldable Tree where +-- > foldMap f Empty = mempty +-- > foldMap f (Leaf x) = f x +-- > foldMap f (Node l k r) = foldMap f l <> f k <> foldMap f r +-- +-- And indeed some efficiency may be gained by directly defining both, +-- avoiding some indirection in the default definitions that express +-- one in terms of the other. If you implement just one, likely 'foldr' +-- is the better choice. +-- +-- The fact that `foldl'` can be reasonably efficiently defined in terms +-- of 'foldr' is one of the more surprising features of @Foldable@. It +-- may be instructive to take a look at how this works. + +-------------- + +-- $strictlazy +-- +-- #strictlazy# +-- +-- The left fold: +-- +-- @ +-- foldl' f z [a, b, …, x, y] +-- @ +-- +-- can be expanded as: +-- +-- @ +-- id . g y . g x . … . g b . g a $ z +-- \ \ where g = flip f +-- @ +-- +-- In which to maintain the expected strictness we need to perform function +-- application eagerly, and composition lazily. To that end we introduce a new +-- function @f'@ which maps each element @x@ to an eager application of @g x@ +-- to its argument, followed by an application of a lazily computed composition +-- (@k@) of the @g e@ functions for the remaining elements @e@: +-- +-- > f' x k z = k $! (g x) z = k $! f z x +-- +-- We see that a lazy 'foldr' of the @g e@ endomorphisms, with @f'@ as as the +-- update function, in fact yields a strict left fold, that avoids building a +-- deep chain of intermediate thunks: +-- +-- > foldl' f z0 xs = foldr f' id xs z0 +-- > where f' x k z = k $! f z x +-- +-- The function applied to @z0@ is built corecursively, and its terms are +-- applied eagerly to the accumulator before further terms are applied to +-- the result. So, as promised, this will run in constant space, and GHC +-- is able to optimise this to an efficient loop. + +-------------- + +-- $multipass +-- +-- In applications where you want to compute a composite function of a +-- structure, which requires more than one aggregate as an input, it is +-- generally best to compute all the aggregates in a single pass, rather +-- than to traverse the same structure repeatedly. +-- +-- The [@foldl@](http://hackage.haskell.org/package/foldl) package implements a +-- robust general framework for dealing with this situation. If you choose to +-- to do it yourself, with a bit of care, the simplest cases are not difficult +-- to handle directly. You just need to accumulate the individual aggregates +-- as __strict__ components of a single data type, and then apply a final +-- transformation to it to extract the composite result. For example, +-- computing an average requires computing both the 'sum' and the 'length' of a +-- (non-empty) structure and dividing the sum by the length: +-- +-- > import Data.Foldable (foldl') +-- > +-- > data PairS a b = P !a !b -- strict pair +-- > +-- > -- | Compute sum and length in a single pass, then reduce to the average. +-- > average :: (Foldable f, Fractional a) => f a -> a +-- > average = pairToFrac . foldl' f z +-- > where +-- > z = P 0 (0 :: Int) +-- > f (P s l) a = P (s+a) (l+1) +-- > pairToFrac (P s l) = s / fromIntegral l +-- +-- The above example is somewhat contrived, some structures keep track of +-- their length internally, and can return it in @O(1)@ time, so this +-- particular recipe for averages is not always the most efficient. +-- In general, composite aggregate functions of large structures benefit +-- from single-pass reduction. + +-------------- + +-- $laws +-- +-- @Foldable@ instances are expected to satisfy the following laws: +-- +-- > foldr f z t = appEndo (foldMap (Endo . f) t ) z +-- +-- > foldl f z t = appEndo (getDual (foldMap (Dual . Endo . flip f) t)) z +-- +-- > fold = foldMap id +-- +-- > length = getSum . foldMap (Sum . const 1) +-- +-- @sum@, @product@, @maximum@, and @minimum@ should all be essentially +-- equivalent to @foldMap@ forms, such as +-- +-- > sum = getSum . foldMap' Sum +-- > product = getProduct . foldMap' Product +-- +-- but are generally more efficient when defined more directly as: +-- +-- > sum = foldl' (+) 0 +-- > sum = foldl' (*) 1 +-- +-- If the type is also a 'Functor' instance, it should satisfy +-- +-- > foldMap f = fold . fmap f +-- +-- which implies that +-- +-- > foldMap f . fmap g = foldMap (f . g) +-- + +-------------- + +-- $notes +-- +-- #notes# +-- The absence of a 'Functor' superclass allows +-- @Foldable@ structures to impose constraints on their element types. Thus, +-- Sets are @Foldable@, even though @Set@ imposes an 'Ord' constraint on its +-- elements (this precludes defining a @Functor@ instance for @Set@). +-- +-- The @Foldable@ class makes it possible to use idioms familiar from the List +-- type with container structures that are better suited to the task at hand. +-- This allows a user to substitute more appropriate @Foldable@ data types +-- for Lists without requiring new idioms (see [[1\]](#uselistsnot) for when +-- not to use lists). +-- +-- The more general methods of the @Foldable@ class are now exported by the +-- "Prelude" in place of the original List-specific methods (see the +-- [FTP Proposal](https://wiki.haskell.org/Foldable_Traversable_In_Prelude)). +-- The List-specific variants are for now still available in "GHC.OldList", but +-- that module is intended only as a transitional aid, and may be removed in +-- the future. +-- +-- Surprises can arise from the @Foldable@ instance of the 2-tuple @(a,)@ which +-- now behaves as a 1-element @Foldable@ container in its second slot. In +-- contexts where a specific monomorphic type is expected, and you want to be +-- able to rely on type errors to guide refactoring, it may make sense to +-- define and use less-polymorphic variants of some of the @Foldable@ methods. +-- +-- Below are two examples showing a definition of a reusable less-polymorphic +-- 'sum' and a one-off in-line specialisation of 'length': +-- +-- > {-# LANGUAGE TypeApplications #-} +-- > +-- > mySum :: Num a => [a] -> a +-- > mySum = sum +-- > +-- > type SlowVector a = [a] +-- > slowLength :: SlowVector -> Int +-- > slowLength v = length @[] v +-- +-- In both cases, if the data type to which the function is applied changes +-- to something other than a list, the call-site will no longer compile until +-- appropriate changes are made. + +-------------- + +-- $also +-- +-- * [1] #uselistsnot# \"When You Should Use Lists in Haskell (Mostly, You Should Not)\", +-- by Johannes Waldmann, +-- in arxiv.org, Programming Languages (cs.PL), at +-- . +-- +-- * [2] \"The Essence of the Iterator Pattern\", +-- by Jeremy Gibbons and Bruno Oliveira, +-- in /Mathematically-Structured Functional Programming/, 2006, online at +-- . diff --git a/libraries/base/Data/Traversable.hs b/libraries/base/Data/Traversable.hs index b2cd20f0ac..152ddc31cc 100644 --- a/libraries/base/Data/Traversable.hs +++ b/libraries/base/Data/Traversable.hs @@ -82,8 +82,17 @@ import qualified GHC.List as List ( foldr ) -- $setup -- >>> import Prelude +-- XXX: Missing haddock feature. Links to anchors in other modules +-- don't have a sensible way to name the link within the module itself. +-- Thus, the below "Data.Traversable#overview" works well when shown as +-- @Data.Traversable@ from other modules, but in the home module it should +-- be possible to specify alternative link text. :-( + -- | Functors representing data structures that can be traversed from --- left to right. +-- left to right, performing an action on each element. +-- +-- A more detailed description can be found in the overview section of +-- "Data.Traversable#overview". -- class (Functor t, Foldable t) => Traversable t where {-# MINIMAL traverse | sequenceA #-} @@ -461,7 +470,9 @@ foldMapDefault = coerce (traverse :: (a -> Const m ()) -> t a -> Const m (t ())) ------------------ -- $overview --- @Traversable@ functors can be thought of as polymorphic containers that +-- +-- #overview# +-- Traversable functors can be thought of as polymorphic containers that -- support mapping of applicative (or monadic) effects over the container -- (element-wise) to create a new container of __the same shape__, with the -- effects sequenced in a natural order for the container type in question. @@ -549,6 +560,8 @@ foldMapDefault = coerce (traverse :: (a -> Const m ()) -> t a -> Const m (t ())) ------------------ -- $validation +-- +-- #validation# -- A hypothetical application of the above is to validate a structure: -- -- >>> validate :: Int -> Either (String, Int) Int @@ -581,9 +594,9 @@ foldMapDefault = coerce (traverse :: (a -> Const m ()) -> t a -> Const m (t ())) -- The @Foldable@ instance should be defined in a manner that avoids -- construction of an unnecesary copy of the container. -- --- Perhaps the most widely used @Foldable@ methods are 'mapM_' and its flipped --- version 'forM_'. Often, to sequence IO actions (that return no useful --- results) over all the elements of a @Traversable@ container. One special +-- The @Foldable@ method 'mapM_' and its flipped version 'forM_' can be used +-- to sequence IO actions over all the elements of a @Traversable@ container +-- (just for their side-effects, ignoring any results) . One special -- case is a 'Maybe' container that optionally holds a value. Given: -- -- > action :: a -> IO () @@ -838,7 +851,7 @@ foldMapDefault = coerce (traverse :: (a -> Const m ()) -> t a -> Const m (t ())) -- * [1] \"The Essence of the Iterator Pattern\", -- by Jeremy Gibbons and Bruno Oliveira, -- in /Mathematically-Structured Functional Programming/, 2006, online at --- . +-- . -- -- * \"Applicative Programming with Effects\", -- by Conor McBride and Ross Paterson, -- cgit v1.2.1