summaryrefslogtreecommitdiff
path: root/libraries
diff options
context:
space:
mode:
authorBodigrim <andrew.lelechenko@gmail.com>2022-10-09 13:45:31 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2022-10-11 18:02:59 -0400
commitda679f2eda0f920afa522ce9c4b9ad50cdec7d74 (patch)
treee602e88d1d81566934b4beb19b602b50b9962411 /libraries
parentdce9f320ce7275fa97f49abef604abbc3b0f9a9c (diff)
downloadhaskell-da679f2eda0f920afa522ce9c4b9ad50cdec7d74.tar.gz
Extend documentation for Data.List, mostly wrt infinite lists
Diffstat (limited to 'libraries')
-rw-r--r--libraries/base/Data/List.hs13
-rw-r--r--libraries/base/Data/OldList.hs242
-rw-r--r--libraries/base/GHC/List.hs14
3 files changed, 216 insertions, 53 deletions
diff --git a/libraries/base/Data/List.hs b/libraries/base/Data/List.hs
index 4474e51268..d7a5922031 100644
--- a/libraries/base/Data/List.hs
+++ b/libraries/base/Data/List.hs
@@ -229,14 +229,23 @@ import GHC.Base ( Bool(..), Eq((==)), otherwise )
--
-- @since 4.8.0.0
--
--- ==== __Examples__
---
-- >>> isSubsequenceOf "GHC" "The Glorious Haskell Compiler"
-- True
-- >>> isSubsequenceOf ['a','d'..'z'] ['a'..'z']
-- True
-- >>> isSubsequenceOf [1..10] [10,9..0]
-- False
+--
+-- For the result to be 'True', the first list must be finite;
+-- for the result to be 'False', the second list must be finite:
+--
+-- >>> [0,2..10] `isSubsequenceOf` [0..]
+-- True
+-- >>> [0..] `isSubsequenceOf` [0,2..10]
+-- False
+-- >>> [0,2..] `isSubsequenceOf` [0..]
+-- * Hangs forever*
+--
isSubsequenceOf :: (Eq a) => [a] -> [a] -> Bool
isSubsequenceOf [] _ = True
isSubsequenceOf _ [] = False
diff --git a/libraries/base/Data/OldList.hs b/libraries/base/Data/OldList.hs
index 4859cac7ac..4ba0a8e4df 100644
--- a/libraries/base/Data/OldList.hs
+++ b/libraries/base/Data/OldList.hs
@@ -266,6 +266,7 @@ stripPrefix _ _ = Nothing
-- | The 'elemIndex' function returns the index of the first element
-- in the given list which is equal (by '==') to the query element,
-- or 'Nothing' if there is no such element.
+-- For the result to be 'Nothing', the list must be finite.
--
-- >>> elemIndex 4 [0..]
-- Just 4
@@ -283,6 +284,7 @@ elemIndices x xs = findIndices (x==) xs -- arity 2 so that we don't get a PAP; #
-- | The 'find' function takes a predicate and a list and returns the
-- first element in the list matching the predicate, or 'Nothing' if
-- there is no such element.
+-- For the result to be 'Nothing', the list must be finite.
--
-- >>> find (> 4) [1..]
-- Just 5
@@ -295,6 +297,7 @@ find p = listToMaybe . filter p
-- | The 'findIndex' function takes a predicate and a list and returns
-- the index of the first element in the list satisfying the predicate,
-- or 'Nothing' if there is no such element.
+-- For the result to be 'Nothing', the list must be finite.
--
-- >>> findIndex isSpace "Hello World!"
-- Just 5
@@ -325,23 +328,41 @@ findIndices p ls = build $ \c n ->
--
-- >>> "Hello" `isPrefixOf` "Hello World!"
-- True
---
-- >>> "Hello" `isPrefixOf` "Wello Horld!"
-- False
+--
+-- For the result to be 'True', the first list must be finite;
+-- 'False', however, results from any mismatch:
+--
+-- >>> [0..] `isPrefixOf` [1..]
+-- False
+-- >>> [0..] `isPrefixOf` [0..99]
+-- False
+-- >>> [0..99] `isPrefixOf` [0..]
+-- True
+-- >>> [0..] `isPrefixOf` [0..]
+-- * Hangs forever *
+--
isPrefixOf :: (Eq a) => [a] -> [a] -> Bool
isPrefixOf [] _ = True
isPrefixOf _ [] = False
isPrefixOf (x:xs) (y:ys)= x == y && isPrefixOf xs ys
-- | The 'isSuffixOf' function takes two lists and returns 'True' iff
--- the first list is a suffix of the second. The second list must be
--- finite.
+-- the first list is a suffix of the second.
--
-- >>> "ld!" `isSuffixOf` "Hello World!"
-- True
---
-- >>> "World" `isSuffixOf` "Hello World!"
-- False
+--
+-- The second list must be finite; however the first list may be infinite:
+--
+-- >>> [0..] `isSuffixOf` [0..99]
+-- False
+-- >>> [0..99] `isSuffixOf` [0..]
+-- * Hangs forever *
+--
isSuffixOf :: (Eq a) => [a] -> [a] -> Bool
ns `isSuffixOf` hs = maybe False id $ do
delta <- dropLengthMaybe ns hs
@@ -383,9 +404,19 @@ dropLengthMaybe (_:x') (_:y') = dropLengthMaybe x' y'
--
-- >>> isInfixOf "Haskell" "I really like Haskell."
-- True
---
-- >>> isInfixOf "Ial" "I really like Haskell."
-- False
+--
+-- For the result to be 'True', the first list must be finite;
+-- for the result to be 'False', the second list must be finite:
+--
+-- >>> [20..50] `isInfixOf` [0..]
+-- True
+-- >>> [0..] `isInfixOf` [20..50]
+-- False
+-- >>> [0..] `isInfixOf` [0..]
+-- * Hangs forever *
+--
isInfixOf :: (Eq a) => [a] -> [a] -> Bool
isInfixOf needle haystack = any (isPrefixOf needle) (tails haystack)
@@ -396,6 +427,12 @@ isInfixOf needle haystack = any (isPrefixOf needle) (tails haystack)
--
-- >>> nub [1,2,3,4,3,2,1,2,4,3,5]
-- [1,2,3,4,5]
+--
+-- If the order of outputs does not matter and there exists @instance Ord a@,
+-- it's faster to use
+-- 'map' @Data.List.NonEmpty.@'Data.List.NonEmpty.head' . @Data.List.NonEmpty.@'Data.List.NonEmpty.group' . 'sort',
+-- which takes only \(\mathcal{O}(n \log n)\) time.
+--
nub :: (Eq a) => [a] -> [a]
nub = nubBy (==)
@@ -454,56 +491,103 @@ deleteBy eq x (y:ys) = if x `eq` y then ys else y : deleteBy eq x ys
-- | The '\\' function is list difference (non-associative).
-- In the result of @xs@ '\\' @ys@, the first occurrence of each element of
-- @ys@ in turn (if any) has been removed from @xs@. Thus
---
--- > (xs ++ ys) \\ xs == ys.
+-- @(xs ++ ys) \\\\ xs == ys@.
--
-- >>> "Hello World!" \\ "ell W"
-- "Hoorld!"
--
-- It is a special case of 'deleteFirstsBy', which allows the programmer
-- to supply their own equality test.
-
+--
+-- The second list must be finite, but the first may be infinite.
+--
+-- >>> take 5 ([0..] \\ [2..4])
+-- [0,1,5,6,7]
+-- >>> take 5 ([0..] \\ [2..])
+-- * Hangs forever *
+--
(\\) :: (Eq a) => [a] -> [a] -> [a]
(\\) = foldl (flip delete)
-- | The 'union' function returns the list union of the two lists.
+-- It is a special case of 'unionBy', which allows the programmer to supply
+-- their own equality test.
-- For example,
--
-- >>> "dog" `union` "cow"
-- "dogcw"
--
--- Duplicates, and elements of the first list, are removed from the
--- the second list, but if the first list contains duplicates, so will
--- the result.
--- It is a special case of 'unionBy', which allows the programmer to supply
--- their own equality test.
-
+-- If equal elements are present in both lists, an element from the first list
+-- will be used. If the second list contains equal elements, only the first one
+-- will be retained:
+--
+-- >>> import Data.Semigroup
+-- >>> union [Arg () "dog"] [Arg () "cow"]
+-- [Arg () "dog"]
+-- >>> union [] [Arg () "dog", Arg () "cow"]
+-- [Arg () "dog"]
+--
+-- However if the first list contains duplicates, so will
+-- the result:
+--
+-- >>> "coot" `union` "duck"
+-- "cootduk"
+-- >>> "duck" `union` "coot"
+-- "duckot"
+--
+-- 'union' is productive even if both arguments are infinite.
+--
union :: (Eq a) => [a] -> [a] -> [a]
union = unionBy (==)
-- | The 'unionBy' function is the non-overloaded version of 'union'.
+-- Both arguments may be infinite.
+--
unionBy :: (a -> a -> Bool) -> [a] -> [a] -> [a]
unionBy eq xs ys = xs ++ foldl (flip (deleteBy eq)) (nubBy eq ys) xs
-- | The 'intersect' function takes the list intersection of two lists.
+-- It is a special case of 'intersectBy', which allows the programmer to
+-- supply their own equality test.
-- For example,
--
-- >>> [1,2,3,4] `intersect` [2,4,6,8]
-- [2,4]
--
--- If the first list contains duplicates, so will the result.
+-- If equal elements are present in both lists, an element from the first list
+-- will be used, and all duplicates from the second list quashed:
--
--- >>> [1,2,2,3,4] `intersect` [6,4,4,2]
--- [2,2,4]
+-- >>> import Data.Semigroup
+-- >>> intersect [Arg () "dog"] [Arg () "cow", Arg () "cat"]
+-- [Arg () "dog"]
+--
+-- However if the first list contains duplicates, so will the result.
+--
+-- >>> "coot" `intersect` "heron"
+-- "oo"
+-- >>> "heron" `intersect` "coot"
+-- "o"
+--
+-- If the second list is infinite, 'intersect' either hangs
+-- or returns its first argument in full. Otherwise if the first list
+-- is infinite, 'intersect' might be productive:
+--
+-- >>> intersect [100..] [0..]
+-- [100,101,102,103...
+-- >>> intersect [0] [1..]
+-- * Hangs forever *
+-- >>> intersect [1..] [0]
+-- * Hangs forever *
+-- >>> intersect (cycle [1..3]) [2]
+-- [2,2,2,2...
--
--- It is a special case of 'intersectBy', which allows the programmer to
--- supply their own equality test. If the element is found in both the first
--- and the second list, the element from the first list will be used.
-
intersect :: (Eq a) => [a] -> [a] -> [a]
intersect = intersectBy (==)
-- | The 'intersectBy' function is the non-overloaded version of 'intersect'.
+-- It is productive for infinite arguments only if the first one
+-- is a subset of the second.
+--
intersectBy :: (a -> a -> Bool) -> [a] -> [a] -> [a]
intersectBy _ [] _ = []
intersectBy _ _ [] = []
@@ -547,6 +631,12 @@ intercalate xs xss = concat (intersperse xs xss)
--
-- >>> transpose [[10,11],[20],[],[30,31,32]]
-- [[10,20,30],[11,31],[32]]
+--
+-- For this reason the outer list must be finite; otherwise 'transpose' hangs:
+--
+-- >>> transpose (repeat [])
+-- * Hangs forever *
+--
transpose :: [[a]] -> [[a]]
transpose [] = []
transpose ([] : xss) = transpose xss
@@ -681,7 +771,8 @@ insertBy cmp x ys@(y:ys')
GT -> y : insertBy cmp x ys'
_ -> x : ys
--- | The 'maximumBy' function takes a comparison function and a list
+-- | The 'maximumBy' function is the non-overloaded version of 'maximum',
+-- which takes a comparison function and a list
-- and returns the greatest element of the list by the comparison function.
-- The list must be finite and non-empty.
--
@@ -697,7 +788,8 @@ maximumBy cmp xs = foldl1 maxBy xs
GT -> x
_ -> y
--- | The 'minimumBy' function takes a comparison function and a list
+-- | The 'minimumBy' function is the non-overloaded version of 'minimum',
+-- which takes a comparison function and a list
-- and returns the least element of the list by the comparison function.
-- The list must be finite and non-empty.
--
@@ -1045,23 +1137,42 @@ unzip7 = foldr (\(a,b,c,d,e,f,g) ~(as,bs,cs,ds,es,fs,gs) ->
-- | The 'deleteFirstsBy' function takes a predicate and two lists and
-- returns the first list with the first occurrence of each element of
--- the second list removed.
+-- the second list removed. This is the non-overloaded version of '(\\)'.
+--
+-- The second list must be finite, but the first may be infinite.
+--
deleteFirstsBy :: (a -> a -> Bool) -> [a] -> [a] -> [a]
deleteFirstsBy eq = foldl (flip (deleteBy eq))
-- | The 'group' function takes a list and returns a list of lists such
-- that the concatenation of the result is equal to the argument. Moreover,
--- each sublist in the result contains only equal elements. For example,
+-- each sublist in the result is non-empty and all elements are equal
+-- to the first one. For example,
--
-- >>> group "Mississippi"
-- ["M","i","ss","i","ss","i","pp","i"]
--
--- It is a special case of 'groupBy', which allows the programmer to supply
+-- 'group' is a special case of 'groupBy', which allows the programmer to supply
-- their own equality test.
+--
+-- It's often preferable to use @Data.List.NonEmpty.@'Data.List.NonEmpty.group',
+-- which provides type-level guarantees of non-emptiness of inner lists.
+--
group :: Eq a => [a] -> [[a]]
group = groupBy (==)
-- | The 'groupBy' function is the non-overloaded version of 'group'.
+--
+-- When a supplied relation is not transitive, it is important
+-- to remember that equality is checked against the first element in the group,
+-- not against the nearest neighbour:
+--
+-- >>> groupBy (\a b -> b - a < 5) [0..19]
+-- [[0,1,2,3,4],[5,6,7,8,9],[10,11,12,13,14],[15,16,17,18,19]]
+--
+-- It's often preferable to use @Data.List.NonEmpty.@'Data.List.NonEmpty.groupBy',
+-- which provides type-level guarantees of non-emptiness of inner lists.
+--
groupBy :: (a -> a -> Bool) -> [a] -> [[a]]
groupBy _ [] = []
groupBy eq (x:xs) = (x:ys) : groupBy eq zs
@@ -1078,6 +1189,10 @@ groupBy eq (x:xs) = (x:ys) : groupBy eq zs
--
-- In particular,
-- @inits _|_ = [] : _|_@
+--
+-- 'inits' is semantically equivalent to @'map' 'reverse' . 'scanl' ('flip' (:)) []@,
+-- but under the hood uses a queue to amortize costs of 'reverse'.
+--
inits :: [a] -> [[a]]
inits = map toListSB . scanl' snocSB emptySB
{-# NOINLINE inits #-}
@@ -1106,6 +1221,12 @@ tails lst = build (\c n ->
--
-- >>> subsequences "abc"
-- ["","a","b","ab","c","ac","bc","abc"]
+--
+-- This function is productive on infinite inputs:
+--
+-- >>> take 8 $ subsequences ['a'..]
+-- ["","a","b","ab","c","ac","bc","abc"]
+--
subsequences :: [a] -> [[a]]
subsequences xs = [] : nonEmptySubsequences xs
@@ -1124,7 +1245,23 @@ nonEmptySubsequences (x:xs) = [x] : foldr f [] (nonEmptySubsequences xs)
--
-- >>> permutations "abc"
-- ["abc","bac","cba","bca","cab","acb"]
+--
+-- This function is productive on infinite inputs:
+--
+-- >>> take 6 $ map (take 3) $ permutations ['a'..]
+-- ["abc","bac","cba","bca","cab","acb"]
+--
+-- Note that the order of permutations is not lexicographic.
+-- It satisfies the following property:
+--
+-- > map (take n) (take (product [1..n]) (permutations ([1..n] ++ undefined))) == permutations [1..n]
+--
permutations :: [a] -> [[a]]
+-- See https://stackoverflow.com/questions/24484348/what-does-this-list-permutations-implementation-in-haskell-exactly-do/24564307#24564307
+-- for the analysis of this rather cryptic implementation.
+-- Related discussions:
+-- * https://mail.haskell.org/pipermail/haskell-cafe/2021-December/134920.html
+-- * https://mail.haskell.org/pipermail/libraries/2007-December/008788.html
permutations xs0 = xs0 : perms xs0 []
where
perms [] _ = []
@@ -1147,12 +1284,22 @@ permutations xs0 = xs0 : perms xs0 []
--
-- >>> sort [1,6,4,3,2,5]
-- [1,2,3,4,5,6]
+--
+-- The argument must be finite.
+--
sort :: (Ord a) => [a] -> [a]
-- | The 'sortBy' function is the non-overloaded version of 'sort'.
+-- The argument must be finite.
--
-- >>> sortBy (\(a,_) (b,_) -> compare a b) [(2, "world"), (4, "!"), (1, "Hello")]
-- [(1,"Hello"),(2,"world"),(4,"!")]
+--
+-- The supplied comparison relation is supposed to be reflexive and antisymmetric,
+-- otherwise, e. g., for @\_ _ -> GT@, the ordered list simply does not exist.
+-- The relation is also expected to be transitive: if it is not then 'sortBy'
+-- might fail to find an ordered permutation, even if it exists.
+--
sortBy :: (a -> a -> Ordering) -> [a] -> [a]
#if defined(USE_REPORT_PRELUDE)
@@ -1310,10 +1457,10 @@ rqpart cmp x (y:ys) rle rgt r =
#endif /* USE_REPORT_PRELUDE */
-- | Sort a list by comparing the results of a key function applied to each
--- element. @sortOn f@ is equivalent to @sortBy (comparing f)@, but has the
+-- element. @'sortOn' f@ is equivalent to @'sortBy' ('comparing' f)@, but has the
-- performance advantage of only evaluating @f@ once for each element in the
-- input list. This is called the decorate-sort-undecorate paradigm, or
--- Schwartzian transform.
+-- <https://en.wikipedia.org/wiki/Schwartzian_transform Schwartzian transform>.
--
-- Elements are arranged from lowest to highest, keeping duplicates in
-- the order they appeared in the input.
@@ -1321,6 +1468,8 @@ rqpart cmp x (y:ys) rle rgt r =
-- >>> sortOn fst [(2, "world"), (4, "!"), (1, "Hello")]
-- [(1,"Hello"),(2,"world"),(4,"!")]
--
+-- The argument must be finite.
+--
-- @since 4.8.0.0
sortOn :: Ord b => (a -> b) -> [a] -> [a]
sortOn f =
@@ -1397,35 +1546,29 @@ unfoldr f b0 = build (\c n ->
-- Functions on strings
-- | Splits the argument into a list of /lines/ stripped of their terminating
--- @\n@ characters. The @\n@ terminator is optional in a final non-empty
+-- @\\n@ characters. The @\\n@ terminator is optional in a final non-empty
-- line of the argument string.
--
-- For example:
--
-- >>> lines "" -- empty input contains no lines
-- []
---
-- >>> lines "\n" -- single empty line
-- [""]
---
-- >>> lines "one" -- single unterminated line
-- ["one"]
---
-- >>> lines "one\n" -- single non-empty line
-- ["one"]
---
-- >>> lines "one\n\n" -- second line is empty
-- ["one",""]
---
-- >>> lines "one\ntwo" -- second line is unterminated
-- ["one","two"]
---
-- >>> lines "one\ntwo\n" -- two non-empty lines
-- ["one","two"]
--
--- When the argument string is empty, or ends in a @\n@ character, it can be
+-- When the argument string is empty, or ends in a @\\n@ character, it can be
-- recovered by passing the result of 'lines' to the 'unlines' function.
--- Otherwise, 'unlines' appends the missing terminating @\n@. This makes
+-- Otherwise, 'unlines' appends the missing terminating @\\n@. This makes
-- @unlines . lines@ /idempotent/:
--
-- > (unlines . lines) . (unlines . lines) = (unlines . lines)
@@ -1443,15 +1586,13 @@ lines s = cons (case break (== '\n') s of
where
cons ~(h, t) = h : t
--- | Appends a @\n@ character to each input string, then concatenates the
--- results. Equivalent to @'foldMap' (\s -> s '++' "\n")@.
+-- | Appends a @\\n@ character to each input string, then concatenates the
+-- results. Equivalent to @'foldMap' (\s -> s '++' "\\n")@.
--
-- >>> unlines ["Hello", "World", "!"]
-- "Hello\nWorld\n!\n"
--
--- = Note
---
--- @'unlines' '.' 'lines' '/=' 'id'@ when the input is not @\n@-terminated:
+-- Note that @'unlines' '.' 'lines' '/=' 'id'@ when the input is not @\\n@-terminated:
--
-- >>> unlines . lines $ "foo\nbar"
-- "foo\nbar\n"
@@ -1466,10 +1607,14 @@ unlines (l:ls) = l ++ '\n' : unlines ls
#endif
-- | 'words' breaks a string up into a list of words, which were delimited
--- by white space.
+-- by white space (as defined by 'isSpace'). This function trims any white spaces
+-- at the beginning and at the end.
--
-- >>> words "Lorem ipsum\ndolor"
-- ["Lorem","ipsum","dolor"]
+-- >>> words " foo bar "
+-- ["foo","bar"]
+--
words :: String -> [String]
{-# NOINLINE [1] words #-}
words s = case dropWhile {-partain:Char.-}isSpace s of
@@ -1491,11 +1636,18 @@ wordsFB c n = go
s' -> w `c` go s''
where (w, s'') = break isSpace s'
--- | 'unwords' is an inverse operation to 'words'.
--- It joins words with separating spaces.
+-- | 'unwords' joins words with separating spaces (U+0020 SPACE).
--
-- >>> unwords ["Lorem", "ipsum", "dolor"]
-- "Lorem ipsum dolor"
+--
+-- 'unwords' is neither left nor right inverse of 'words':
+--
+-- >>> words (unwords [" "])
+-- []
+-- >>> unwords (words "foo\nbar")
+-- "foo bar"
+--
unwords :: [String] -> String
#if defined(USE_REPORT_PRELUDE)
unwords [] = ""
diff --git a/libraries/base/GHC/List.hs b/libraries/base/GHC/List.hs
index 6b1a94fafe..b96c16cfab 100644
--- a/libraries/base/GHC/List.hs
+++ b/libraries/base/GHC/List.hs
@@ -801,8 +801,8 @@ iterate'FB c f x0 = go x0
-- | 'repeat' @x@ is an infinite list, with @x@ the value of every element.
--
--- >>> take 20 $ repeat 17
---[17,17,17,17,17,17,17,17,17...
+-- >>> repeat 17
+-- [17,17,17,17,17,17,17,17,17...
repeat :: a -> [a]
{-# INLINE [0] repeat #-}
-- The pragma just gives the rules more chance to fire
@@ -839,9 +839,9 @@ replicate n x = take n (repeat x)
--
-- >>> cycle []
-- *** Exception: Prelude.cycle: empty list
--- >>> take 20 $ cycle [42]
+-- >>> cycle [42]
-- [42,42,42,42,42,42,42,42,42,42...
--- >>> take 20 $ cycle [2, 5, 7]
+-- >>> cycle [2, 5, 7]
-- [2,5,7,2,5,7,2,5,7,2,5,7...
cycle :: HasCallStack => [a] -> [a]
cycle [] = errorEmptyList "cycle"
@@ -1284,6 +1284,7 @@ notElem x (y:ys)= x /= y && notElem x ys
-- | \(\mathcal{O}(n)\). 'lookup' @key assocs@ looks up a key in an association
-- list.
+-- For the result to be 'Nothing', the list must be finite.
--
-- >>> lookup 2 []
-- Nothing
@@ -1291,6 +1292,7 @@ notElem x (y:ys)= x /= y && notElem x ys
-- Nothing
-- >>> lookup 2 [(1, "first"), (2, "second"), (3, "third")]
-- Just "second"
+--
lookup :: (Eq a) => a -> [(a,b)] -> Maybe b
lookup _key [] = Nothing
lookup key ((x,y):xys)
@@ -1349,8 +1351,8 @@ concat = foldr (++) []
-- >>> ['a', 'b', 'c'] !! (-1)
-- *** Exception: Prelude.!!: negative index
--
--- WARNING: This function is partial. You can use <'atMay'
--- https://hackage.haskell.org/package/safe-0.3.19/docs/Safe.html#v:atMay>
+-- WARNING: This function is partial. You can use
+-- <https://hackage.haskell.org/package/safe/docs/Safe.html#v:atMay atMay>
-- instead.
#if defined(USE_REPORT_PRELUDE)
(!!) :: [a] -> Int -> a