summaryrefslogtreecommitdiff
path: root/testsuite/tests/programs/seward-space-leak/Main.lhs
diff options
context:
space:
mode:
Diffstat (limited to 'testsuite/tests/programs/seward-space-leak/Main.lhs')
-rw-r--r--testsuite/tests/programs/seward-space-leak/Main.lhs650
1 files changed, 650 insertions, 0 deletions
diff --git a/testsuite/tests/programs/seward-space-leak/Main.lhs b/testsuite/tests/programs/seward-space-leak/Main.lhs
new file mode 100644
index 0000000000..327118d5e3
--- /dev/null
+++ b/testsuite/tests/programs/seward-space-leak/Main.lhs
@@ -0,0 +1,650 @@
+{-
+
+This test runs for a Long Time (10mins for the registerised version)
+and allocates 3.4Gbytes. It also hammers the GC; with -H16M it spend
+40% of the time in the GC.
+
+
+
+Date: Sun, 25 Oct 92 16:38:12 GMT
+From: Julian Seward (DRL PhD) <sewardj@uk.ac.man.cs>
+Message-Id: <9210251638.AA21153@r6b.cs.man.ac.uk>
+To: partain@uk.ac.glasgow.dcs
+Subject: Space consumption in 0.09 produced binary
+Cc: sewardj@uk.ac.man.cs, simonpj@uk.ac.glasgow.dcs
+
+Folks,
+
+At the risk of wasting even more of your valuable time, here is
+a small problem I ran into:
+
+The program (XXXX.lhs) listed below runs in constant space (about 4k)
+in both Gofer and hbc 0.998.5. When compiled with 0.09, it runs out
+of heap in seconds (4 meg heap).
+
+The program builds a gigantic list of things (CDSs, in fact), I believe
+at least 100,000 long, and searches to find out if a particular CDS is
+present. The CDS list is generated lazily, and should be thrown away
+as it goes, until apply_cds is found (see the bottom of the listing).
+Gofer and hbc behave as expected, but I suspect ghc is holding onto
+the complete list unnecessarily.
+
+I include XXXX.stat as supporting evidence.
+
+Jules
+
+(compiled hence:
+ghc9 -v -O -gc-ap -flet-float -Rgc-stats -Rmax-heapsize 14M -o XXXX XXXX.lhs
+)
+
+-----------------------------------------------------------------------
+
+XXXX +RTS -S
+
+Collector: APPEL HeapSize: 4,194,304 (bytes)
+
+ Alloc Live Live Astk Bstk OldGen GC GC TOT TOT Page Flts Collec Resid
+ bytes bytes % bytes bytes roots user elap user elap GC TOT tion %heap
+2097108 1119672 53.4 52 132 1119616 0.33 0.35 1.01 1.15 0 0 Minor
+1537300 918200 59.7 48 128 918188 0.26 0.31 1.76 1.95 0 0 Minor
+1078216 654212 60.7 56 160 652612 0.19 0.18 2.29 2.46 0 0 Minor
+ 751108 442140 58.9 52 108 442140 0.12 0.12 2.64 2.84 0 0 Minor
+3134224 2935044 93.6 52 108 1.49 1.50 4.13 4.34 0 0 *MAJOR* 70.0%
+ 629612 376848 59.9 52 132 376836 0.11 0.11 4.44 4.64 0 0 Minor
+ 441184 265100 60.1 96 200 264416 0.08 0.07 4.66 4.86 0 0 Minor
+ 308640 204072 66.1 56 160 199476 0.06 0.05 4.81 5.01 0 0 Minor
+3781064 3687092 97.5 56 160 1.81 1.85 6.62 6.86 0 0 *MAJOR* 87.9%
+ 253600 160584 63.3 52 108 160584 0.05 0.04 6.75 6.98 0 0 Minor
+ 173312 112344 64.8 56 160 110304 0.03 0.03 6.83 7.07 0 0 Minor
+ 117128 77260 66.0 36 140 74112 0.01 0.02 6.88 7.13 0 0 Minor
+4037280 3985284 98.7 36 140 1.96 1.98 8.85 9.11 0 0 *MAJOR* 95.0%
+
+-------------------------------------------------------------------------
+-}
+
+> module Main where
+
+%============================================================
+%============================================================
+
+\section{A CDS interpreter}
+
+\subsection{Declarations}
+
+Second attempt at a CDS interpreter. Should do
+loop detection correctly in the presence of higher order functions.
+
+The types allowed are very restrictive at the mo.
+
+> data Type = Two
+> | Fn [Type]
+
+Now, we also have to define CDSs and selectors.
+\begin{itemize}
+\item
+@Empty@ is a non-legitimate CDS, denoting no value at all. We use
+it as an argument in calls to other CDSs denoting that
+the particular argument is not really supplied.
+\item
+@Par@ is similarly a non-legit CDS, but useful for constructing
+selectors. It simply denotes the parameter specified (note
+parameter numbering starts at 1).
+\item
+@Zero@ and @One@ are constant valued CDSs.
+\item
+@Call@.
+Calls to other functions are done with @Call@, which expects
+the callee to return @Zero@ or @One@, and selects the relevant
+branch. The @Tag@s identify calls in the dependancy list.
+Although a @Call@ is a glorified @Case@ statement, the only allowed
+return values are @Zero@ and @One@. Hence the @CDS CDS@ continuations
+rather than the more comprehensive @(AList Return CDS)@.
+We require arguments to be fully disassembled.
+\item @Case@
+Case selectors can only be of the following form:
+\begin{itemize}
+\item
+ @[Par n]@ if the n'th parameter is not a function space.
+\item
+ @[Par n, v1 ... vn]@ if the n'th parameter is a function space of
+ arity n. The v's may be only @Empty@, @Zero@,
+ @One@, or @Par n@.
+\end{itemize}
+\end{itemize}
+We also have a @Magic@ CDS which is a load of mumbo-jumbo for use
+in enumeration of and compilation to CDSs. Of no significance
+whatever here.
+
+> data CDS = Empty
+> | Par Int
+> | Zero
+> | One
+> | Case [CDS] (AList Return CDS)
+> | Call String Tag [CDS] CDS CDS
+> | Magic
+>
+> type AList a b = [(a, b)]
+>
+> type Tag = Int
+
+> instance Eq CDS where
+> (Par n1) == (Par n2) = n1 == n2
+> Zero == Zero = True
+> One == One = True
+> (Case sels1 rets1) == (Case sels2 rets2) = sels1 == sels2 &&
+> rets1 == rets2
+> (Call f1 t1 sels1 a1 b1) == (Call f2 t2 sels2 a2 b2)
+> = f1 == f2 && t1 == t2 && sels1 == sels2 && a1 == a2 && b1 == b2
+> Magic == Magic = True
+> _ == _ = False
+
+
+A @Return@ is a temporary thing used to decide which way to go at
+a @Case@ statement.
+
+> data Return = RZero
+> | ROne
+> | RP Int
+
+> instance Eq Return where
+> RZero == RZero = True
+> ROne == ROne = True
+> (RP p1) == (RP p2) = p1 == p2
+> _ == _ = False
+
+We need a code store, which gives out a fresh instance of a CDS
+as necessary. ToDo: Need to rename call sites? I don't think so.
+
+> type Code = AList String CDS
+
+%============================================================
+%============================================================
+
+\subsection{The evaluator}
+Main CDS evaluator takes
+\begin{itemize}
+\item the code store
+\item the dependancy list, a list of @Tag@s of calls which are
+ currently in progress
+\item the current arguments
+\item the CDS fragment currently being worked on
+\end{itemize}
+
+> type Depends = [Tag]
+>
+> eval :: Code -> Depends -> [CDS] -> CDS -> CDS
+
+Evaluating a constant valued CDS is trivial. There may be arguments
+present -- this is not a mistake.
+
+> eval co de args Zero = Zero
+> eval co de args One = One
+
+Making a call is also pretty simple, because we assume
+that all non-functional arguments are presented as literals,
+and all functional values have already been dismantled (unless
+they are being passed unchanged in the same position in a recursive call
+to the same function, something for the compiler to detect).
+
+Two other issues are at work here. Guided by the selectors,
+we copy the args to make a set of args for the call. However, if an
+copied arg is Empty, the call cannot proceed, so we return the CDS as-is.
+Note that an Empty *selector* is not allowed in a Call (although it is
+in a Case).
+
+The second issue arises if the call can go ahead. We need to check the
+tag on the call just about to be made with the tags of calls already in
+progress (in de) to see if we are looping. If the tag has already been
+encountered, the result of the call is Zero, so the Zero alternative is
+immediately selected.
+
+> eval co de args cds@(Call fname tag params alt0 alt1)
+> = let (copied_an_empty, callee_args) = copy_args args params
+> augmented_de = tag : de
+> callee_code = lkup co fname
+> callee_result = eval co augmented_de callee_args callee_code
+> been_here_before = tag `elem` de
+> in
+> if copied_an_empty
+> then cds
+> else
+> if been_here_before
+> then eval co augmented_de args alt0
+> else case callee_result of
+> Zero -> eval co de args alt0
+> One -> eval co de args alt1
+> _ -> error "Bad callee result"
+
+Case really means "evaluate".
+
+ - make sure first selector is non-Empty. If so, return CDS as-is.
+
+ - Copy other args. If Empty is *copied*, return CDS as-is.
+ Otherwise, call evaluator and switch on head of result.
+
+Note about switching on the head of the result. We expect to see
+*only* the following as results:
+
+ Zero
+ One
+ Case [Param m, rest]
+
+in which case switching is performed on
+
+ Zero
+ One
+ Case (Param m)
+
+ToDo: what happens if a Call turns up ???
+
+> eval co de args cds@(Case ((Par n):ps) alts)
+> = let (copied_an_empty, new_args) = copy_args args ps
+> functional_param = args !! (n-1)
+> in if functional_param == Empty ||
+> copied_an_empty
+> then cds
+> else eval co de args
+> (lkup alts (get_head
+> (eval co de new_args functional_param)))
+
+Auxiliary for evaluating Case expressions.
+
+> get_head Zero = RZero
+> get_head One = ROne
+> get_head (Case ((Par n):_) _) = RP n
+
+Copy args based on directions in a list of selectors.
+Also returns a boolean which is True if an Empty has been
+*copied*. An Empty *selector* simply produces Empty in the
+corresponding output position.
+
+> copy_args :: [CDS] -> [CDS] -> (Bool, [CDS])
+>
+> copy_args args params
+> = case cax False params [] of
+> (empty_copied, res) -> (empty_copied, reverse res)
+> where
+> cax empty [] res = (empty, res)
+> cax empty (Zero:ps) res = cax empty ps (Zero:res)
+> cax empty (One:ps) res = cax empty ps (One:res)
+> cax empty (Empty:ps) res = cax empty ps (Empty:res)
+> cax empty ((Par n):ps) res
+> = case args !! (n-1) of
+> Empty -> cax True ps (Empty:res)
+> other -> cax empty ps (other:res)
+
+> lkup env k = head ( [v | (kk,v) <- env, kk == k] ++
+> [error ( "Can't look up " ) ] )
+
+%============================================================
+%============================================================
+
+%============================================================
+%============================================================
+
+Something to make running tests easier ...
+
+> eval0 fname args = eval test [] args (lkup test fname)
+>
+> two = [Zero, One]
+
+Now for some test data ...
+
+> test
+> =
+> [
+> ("add", add_cds),
+> ("apply", apply_cds),
+> ("k0", k0_cds),
+> ("id", id_cds),
+> ("k1", k1_cds),
+> ("kkkr", kkkr_cds),
+> ("kkkl", kkkl_cds),
+> ("apply2", apply2_cds)
+> ]
+>
+
+Constant Zero function.
+
+> k0_cds
+> = Case [Par 1]
+> [(RZero, Zero),
+> (ROne, Zero)]
+>
+
+Identity.
+
+> id_cds
+> = Case [Par 1]
+> [(RZero, Zero),
+> (ROne, One)]
+
+Constant One function.
+
+> k1_cds
+> = Case [Par 1]
+> [(RZero, One),
+> (ROne, One)]
+
+Strict in both of two arguments, for example (+).
+
+> add_cds
+> = Case [Par 1]
+> [(RZero, Case [Par 2]
+> [(RZero, Zero),
+> (ROne, Zero)
+> ]),
+> (ROne, Case [Par 2]
+> [(RZero, Zero),
+> (ROne, One)
+> ])
+> ]
+
+The (in)famous apply function.
+
+> apply_cds
+> = Case [Par 1, Empty]
+> [(RZero, Zero),
+> (ROne, One),
+> (RP 1, Case [Par 2]
+> [(RZero, Case [Par 1, Zero]
+> [(RZero, Zero),
+> (ROne, One)]),
+> (ROne, Case [Par 1, One]
+> [(RZero, Zero),
+> (ROne, One)])
+> ])
+> ]
+
+The inverse K-combinator: K x y = y
+
+> kkkr_cds
+> = Case [Par 2]
+> [(RZero, Zero),
+> (ROne, One)
+> ]
+
+The standard K-combinator, defined thus: K x y = K-inverse y x.
+Purpose of this is to test function calling.
+
+> kkkl_cds
+> = Case [Par 1]
+> [(RZero, Case [Par 2]
+> [(RZero, Call "kkkr" 101 [Zero, Zero] Zero One),
+> (ROne, Call "kkkr" 102 [One, Zero] Zero One)
+> ]),
+> (ROne, Case [Par 2]
+> [(RZero, Call "kkkr" 103 [Zero, One] Zero One),
+> (ROne, Call "kkkr" 104 [One, One] Zero One)
+> ])
+> ]
+
+Apply a 2-argument function (apply2 f x y = f x y).
+
+> apply2_cds
+> = Case [Par 1, Empty, Empty]
+> [(RZero, Zero),
+> (ROne, One),
+> (RP 1, Case [Par 2]
+> [(RZero, Case [Par 1, Zero, Empty]
+> [(RZero, Zero),
+> (ROne, One),
+> (RP 2, Case [Par 3]
+> [(RZero, Case [Par 1, Zero, Zero]
+> [(RZero, Zero),
+> (ROne, One)]),
+> (ROne, Case [Par 1, Zero, One]
+> [(RZero, Zero),
+> (ROne, One)])
+> ])
+> ]),
+> (ROne, Case [Par 1, One, Empty]
+> [(RZero, Zero),
+> (ROne, One),
+> (RP 2, Case [Par 3]
+> [(RZero, Case [Par 1, One, Zero]
+> [(RZero, Zero),
+> (ROne, One)]),
+> (ROne, Case [Par 1, One, One]
+> [(RZero, Zero),
+> (ROne, One)])
+> ])
+> ])
+> ]),
+> (RP 2, Case [Par 3]
+> [(RZero, Case [Par 1, Empty, Zero]
+> [(RZero, Zero),
+> (ROne, One),
+> (RP 1, Case [Par 2]
+> [(RZero, Case [Par 1, Zero, Zero]
+> [(RZero, Zero),
+> (ROne, One)]),
+> (ROne, Case [Par 1, One, Zero]
+> [(RZero, Zero),
+> (ROne, One)])
+> ])
+> ]),
+> (ROne, Case [Par 1, Empty, One]
+> [(RZero, Zero),
+> (ROne, One),
+> (RP 1, Case [Par 2]
+> [(RZero, Case [Par 1, Zero, One]
+> [(RZero, Zero),
+> (ROne, One)]),
+> (ROne, Case [Par 1, One, One]
+> [(RZero, Zero),
+> (ROne, One)])
+> ])
+> ])
+> ])
+> ]
+
+Simple, isn't it!
+
+%============================================================
+%============================================================
+
+%============================================================
+%============================================================
+
+Enumeration of all CDSs of a given type.
+
+Define n-ary branched trees. These are used to hold the
+possible prefixes of function arguments, something essential
+when enumerating higher-order CDSs. ToDo: translate to English
+
+> data NTree a = NLeaf
+> | NBranch a [NTree a]
+
+The enumeration enterprise involves some mutual recursion
+when it comes to higher-order functions. We define the
+top-level enumerator function, for trivial cases, hence:
+
+> enumerate :: Type -> [CDS]
+>
+> enumerate Two = [Zero, One]
+> enumerate (Fn ats) =
+> expand_templates (traverse (length ats) (gen_pfx_trees ats))
+
+Enumerating a function space is tricky. In summary:
+
+ - Generate the prefix trees for each argument.
+ For non-function arguments this trivial, but for
+ function-valued arguments this means a call to the
+ enumerator to get all the possible values of the
+ (argument) function space.
+
+ - Traverse the prefix trees, generating a series of
+ "templates" for functions.
+
+ - Expand each template thus generated into a genuine CDS.
+ Each template denotes a group of CDSs, all of
+ the same "shape" and differing only in the constants
+ they return. The Magic and RMagic constructors are
+ used for these purposes.
+
+Generating prefix trees. For a Two-argument, is easy:
+
+> gen_pfx_trees :: [Type] -> [NTree [CDS]]
+>
+> gen_pfx_trees ts = zipWith gen_pfx_tree ts [1 .. length ts]
+>
+> gen_pfx_tree :: Type -> Int -> NTree [CDS]
+>
+> gen_pfx_tree Two n = NBranch [Par n] []
+
+Note all prefixes are missing the initial (Par n) selector ...
+
+For a function arg
+
+ - enumerate each of the *function's* args
+
+ - starting with a selector [Empty, ...., Empty],
+ make a tree wherein at each level, branching is
+ achieved by filling in every Empty with every value
+ of that argument type. ToDo: fix this
+
+> gen_pfx_tree (Fn arg_types) n
+> = let number_args = length arg_types
+> enumed_args = map enumerate arg_types
+> initial_sel = take number_args (repeat Empty)
+> init_tree = NBranch ((Par n):initial_sel) []
+> in
+> expand_pfx_tree number_args number_args n enumed_args init_tree
+
+@expand_pfx_tree@ expands a tree until there are no Emptys
+at the leaves. Its first parameter is the number of Emptys
+in the tree it has been given; when zero, expansion is complete.
+The second parameter is the number of Emptys in the original
+tree (equal to the arity of the function being enumerated).
+Third number is the argument number in the top-level function,
+needed to make the initial "Par n" selector.
+Also needs to carry around the enumeration of the function's
+arguments.
+
+> expand_pfx_tree :: Int -> Int -> Int -> [[CDS]] -> NTree [CDS] -> NTree [CDS]
+>
+> expand_pfx_tree 0 w i enums tree = tree
+>
+> expand_pfx_tree n w i enums (NBranch sels [])
+> = let indices = [0 .. w - 1]
+> n_minus_1 = n - 1
+> new_sels = concat (map expand_sel indices)
+> expand_sel n
+> = case sels !! (n+1) of
+> Empty -> map (upd (n+1) sels) (enums !! n)
+> other -> []
+> mk_trivial_tree sel = NBranch sel []
+> in
+> NBranch sels (map (expand_pfx_tree n_minus_1 w i enums . mk_trivial_tree)
+> new_sels)
+
+> upd :: Int -> [a] -> a -> [a]
+> upd 0 (y:ys) x = x:ys
+> upd n (y:ys) x = y:upd (n-1) ys x
+
+In the second phase, the prefix trees are traversed to generate
+CDS templates (full of Magic, but no Zero or One).
+The first arg is the number of arguments, and the
+second the prefix trees for each argument.
+
+> traverse :: Int -> [NTree [CDS]] -> [CDS]
+
+Each pfxtree denotes a selector, one for each argument, plus a load
+of more specific selectors. So for each argument, one manufactures
+all possible sub-cds using the sub-selectors as the set Z.
+You then take this arg's selector, and manufacture a load of CDSs
+like this:
+\begin{verbatim}
+ Case this_selector
+ 0 -> z | z <- Z
+ 1 -> z | z <- Z
+ Par n -> z | z <- Z for each n in [1 .. length this_selector]
+ satisfying this_selector !! n == Empty
+\end{verbatim}
+
+
+> traverse n pfxtrees
+> = Magic : concat (map doOne [0 .. n - 1])
+> where
+> doOne i = traverse_arg n i pfxtrees (pfxtrees !! i)
+
+@traverse_arg@ makes the CDSs corresponding to descending a
+particular argument, the number of which is given as its second
+parameter. It also gets the complete set of pfxtrees and the one
+to descend. Note that having descended in the given argument, we
+check its sub-selectors. If none, (an empty list), this replaced
+by [NLeaf] to make everything work out. A NLeaf selector
+is a dummy which generates no CDSs.
+
+> traverse_arg n i pfxtrees NLeaf
+> = []
+
+> traverse_arg n i pfxtrees (NBranch this_selector subsidiary_selectors_init)
+> = let subsidiary_selectors
+> = case subsidiary_selectors_init of
+> [] -> [NLeaf]; (_:_) -> subsidiary_selectors_init
+> subsidiary_pfxtrees = map (upd i pfxtrees) subsidiary_selectors
+> par_requests = preq 1 [] this_selector
+> preq n acc [] = acc
+> preq n acc (Empty:rest) = preq (n+1) ((RP n):acc) rest
+> preq n acc (other:rest) = preq (n+1) acc rest
+> subsidiary_cdss = concat (map (traverse n) subsidiary_pfxtrees)
+> all_poss_rhss = splat (2 + length par_requests) subsidiary_cdss
+> all_poss_returns = [RZero, ROne] ++ par_requests
+> in
+> [Case this_selector (zip all_poss_returns rhs)
+> | rhs <- all_poss_rhss]
+>
+> splat :: Int -> [a] -> [[a]]
+> splat 0 set = [[]]
+> splat n set = [x:xs | x <- set, xs <- splat (n-1) set]
+
+The final stage in the game is to fill in all the @Magic@s
+with constants. A template with $n$ @Magic@s presently generates
+@2^n@ CDSs, obtained by all possible combinations of
+filling each @Magic@ in with @Zero@ or @One@. To do this we
+first need to count the @Magic@s.
+
+> count_magic :: CDS -> Int
+>
+> count_magic Magic = 1
+> count_magic (Case sels alts) = sum (map (count_magic.snd) alts)
+
+We don't expect to see anything else at this stage.
+Now make $2^n$ lists, each of length $n$, each with a different
+sequence of @Zero@s and @One@s. Use these to label the
+@Magic@s in the template.
+
+> label_cds :: CDS -> [CDS] -> ([CDS], CDS)
+>
+> label_cds Magic (l:ls) = (ls, l)
+> label_cds (Case sels alts) ls
+> = case f ls alts of (l9, alts_done) -> (l9, Case sels alts_done)
+> where
+> f l0 [] = (l0, [])
+> f l0 (a:as) = let (l1, a_done) = lalt l0 a
+> (l2, as_done) = f l1 as
+> in (l2, a_done:as_done)
+> lalt l0 (ret, cds) = case label_cds cds l0 of
+> (l1, cds_done) -> (l1, (ret, cds_done))
+
+Finally:
+
+> expand_templates :: [CDS] -> [CDS]
+>
+> expand_templates ts
+> = concat (map f ts)
+> where
+> f tem = map (snd . label_cds tem)
+> (splat (count_magic tem) [Zero, One])
+
+--> testq tt = (layn . map show' . nub) (enumerate tt)
+
+> main = putStrLn (show (apply_cds `myElem` (enumerate (Fn [Fn [Two], Two]))))
+>
+> i `myElem` [] = False
+> i `myElem` (x:xs) = if i == x then True else i `myElem` xs
+
+%============================================================
+%============================================================