From 4b16ff6d5d89ba7054daad312acf32de4140488e Mon Sep 17 00:00:00 2001 From: Herbert Valerio Riedel Date: Mon, 1 Dec 2014 08:45:16 +0100 Subject: unlit compiler/stranal/ modules Reviewed By: austin Differential Revision: https://phabricator.haskell.org/D541 --- compiler/stranal/DmdAnal.hs | 1186 ++++++++++++++++++++++++++++++++++++++++ compiler/stranal/DmdAnal.lhs | 1187 ----------------------------------------- compiler/stranal/WorkWrap.hs | 476 +++++++++++++++++ compiler/stranal/WorkWrap.lhs | 477 ----------------- compiler/stranal/WwLib.hs | 770 ++++++++++++++++++++++++++ compiler/stranal/WwLib.lhs | 776 --------------------------- 6 files changed, 2432 insertions(+), 2440 deletions(-) create mode 100644 compiler/stranal/DmdAnal.hs delete mode 100644 compiler/stranal/DmdAnal.lhs create mode 100644 compiler/stranal/WorkWrap.hs delete mode 100644 compiler/stranal/WorkWrap.lhs create mode 100644 compiler/stranal/WwLib.hs delete mode 100644 compiler/stranal/WwLib.lhs diff --git a/compiler/stranal/DmdAnal.hs b/compiler/stranal/DmdAnal.hs new file mode 100644 index 0000000000..9d9af64a7e --- /dev/null +++ b/compiler/stranal/DmdAnal.hs @@ -0,0 +1,1186 @@ +{- +(c) The GRASP/AQUA Project, Glasgow University, 1993-1998 + + + ----------------- + A demand analysis + ----------------- +-} + +{-# LANGUAGE CPP #-} + +module DmdAnal ( dmdAnalProgram ) where + +#include "HsVersions.h" + +import DynFlags +import WwLib ( findTypeShape, deepSplitProductType_maybe ) +import Demand -- All of it +import CoreSyn +import Outputable +import VarEnv +import BasicTypes +import FastString +import Data.List +import DataCon +import Id +import CoreUtils ( exprIsHNF, exprType, exprIsTrivial ) +import TyCon +import Type +import FamInstEnv +import Util +import Maybes ( isJust ) +import TysWiredIn ( unboxedPairDataCon ) +import TysPrim ( realWorldStatePrimTy ) +import ErrUtils ( dumpIfSet_dyn ) +import Name ( getName, stableNameCmp ) +import Data.Function ( on ) + +{- +************************************************************************ +* * +\subsection{Top level stuff} +* * +************************************************************************ +-} + +dmdAnalProgram :: DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram +dmdAnalProgram dflags fam_envs binds + = do { + let { binds_plus_dmds = do_prog binds } ; + dumpIfSet_dyn dflags Opt_D_dump_strsigs "Strictness signatures" $ + dumpStrSig binds_plus_dmds ; + return binds_plus_dmds + } + where + do_prog :: CoreProgram -> CoreProgram + do_prog binds = snd $ mapAccumL dmdAnalTopBind (emptyAnalEnv dflags fam_envs) binds + +-- Analyse a (group of) top-level binding(s) +dmdAnalTopBind :: AnalEnv + -> CoreBind + -> (AnalEnv, CoreBind) +dmdAnalTopBind sigs (NonRec id rhs) + = (extendAnalEnv TopLevel sigs id sig, NonRec id2 rhs2) + where + ( _, _, _, rhs1) = dmdAnalRhs TopLevel Nothing sigs id rhs + (sig, _, id2, rhs2) = dmdAnalRhs TopLevel Nothing (nonVirgin sigs) id rhs1 + -- Do two passes to improve CPR information + -- See comments with ignore_cpr_info in mk_sig_ty + -- and with extendSigsWithLam + +dmdAnalTopBind sigs (Rec pairs) + = (sigs', Rec pairs') + where + (sigs', _, pairs') = dmdFix TopLevel sigs pairs + -- We get two iterations automatically + -- c.f. the NonRec case above + +{- +************************************************************************ +* * +\subsection{The analyser itself} +* * +************************************************************************ + +Note [Ensure demand is strict] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It's important not to analyse e with a lazy demand because +a) When we encounter case s of (a,b) -> + we demand s with U(d1d2)... but if the overall demand is lazy + that is wrong, and we'd need to reduce the demand on s, + which is inconvenient +b) More important, consider + f (let x = R in x+x), where f is lazy + We still want to mark x as demanded, because it will be when we + enter the let. If we analyse f's arg with a Lazy demand, we'll + just mark x as Lazy +c) The application rule wouldn't be right either + Evaluating (f x) in a L demand does *not* cause + evaluation of f in a C(L) demand! +-} + +-- If e is complicated enough to become a thunk, its contents will be evaluated +-- at most once, so oneify it. +dmdTransformThunkDmd :: CoreExpr -> Demand -> Demand +dmdTransformThunkDmd e + | exprIsTrivial e = id + | otherwise = oneifyDmd + +-- Do not process absent demands +-- Otherwise act like in a normal demand analysis +-- See |-* relation in the companion paper +dmdAnalStar :: AnalEnv + -> Demand -- This one takes a *Demand* + -> CoreExpr -> (BothDmdArg, CoreExpr) +dmdAnalStar env dmd e + | (cd, defer_and_use) <- toCleanDmd dmd (exprType e) + , (dmd_ty, e') <- dmdAnal env cd e + = (postProcessDmdTypeM defer_and_use dmd_ty, e') + +-- Main Demand Analsysis machinery +dmdAnal, dmdAnal' :: AnalEnv + -> CleanDemand -- The main one takes a *CleanDemand* + -> CoreExpr -> (DmdType, CoreExpr) + +-- The CleanDemand is always strict and not absent +-- See Note [Ensure demand is strict] + +dmdAnal env d e = -- pprTrace "dmdAnal" (ppr d <+> ppr e) $ + dmdAnal' env d e + +dmdAnal' _ _ (Lit lit) = (nopDmdType, Lit lit) +dmdAnal' _ _ (Type ty) = (nopDmdType, Type ty) -- Doesn't happen, in fact +dmdAnal' _ _ (Coercion co) = (nopDmdType, Coercion co) + +dmdAnal' env dmd (Var var) + = (dmdTransform env var dmd, Var var) + +dmdAnal' env dmd (Cast e co) + = (dmd_ty, Cast e' co) + where + (dmd_ty, e') = dmdAnal env dmd e + +{- ----- I don't get this, so commenting out ------- + to_co = pSnd (coercionKind co) + dmd' + | Just tc <- tyConAppTyCon_maybe to_co + , isRecursiveTyCon tc = cleanEvalDmd + | otherwise = dmd + -- This coerce usually arises from a recursive + -- newtype, and we don't want to look inside them + -- for exactly the same reason that we don't look + -- inside recursive products -- we might not reach + -- a fixpoint. So revert to a vanilla Eval demand +-} + +dmdAnal' env dmd (Tick t e) + = (dmd_ty, Tick t e') + where + (dmd_ty, e') = dmdAnal env dmd e + +dmdAnal' env dmd (App fun (Type ty)) + = (fun_ty, App fun' (Type ty)) + where + (fun_ty, fun') = dmdAnal env dmd fun + +dmdAnal' sigs dmd (App fun (Coercion co)) + = (fun_ty, App fun' (Coercion co)) + where + (fun_ty, fun') = dmdAnal sigs dmd fun + +-- Lots of the other code is there to make this +-- beautiful, compositional, application rule :-) +dmdAnal' env dmd (App fun arg) -- Non-type arguments + = let -- [Type arg handled above] + call_dmd = mkCallDmd dmd + (fun_ty, fun') = dmdAnal env call_dmd fun + (arg_dmd, res_ty) = splitDmdTy fun_ty + (arg_ty, arg') = dmdAnalStar env (dmdTransformThunkDmd arg arg_dmd) arg + in +-- pprTrace "dmdAnal:app" (vcat +-- [ text "dmd =" <+> ppr dmd +-- , text "expr =" <+> ppr (App fun arg) +-- , text "fun dmd_ty =" <+> ppr fun_ty +-- , text "arg dmd =" <+> ppr arg_dmd +-- , text "arg dmd_ty =" <+> ppr arg_ty +-- , text "res dmd_ty =" <+> ppr res_ty +-- , text "overall res dmd_ty =" <+> ppr (res_ty `bothDmdType` arg_ty) ]) + (res_ty `bothDmdType` arg_ty, App fun' arg') + +-- this is an anonymous lambda, since @dmdAnalRhs@ uses @collectBinders@ +dmdAnal' env dmd (Lam var body) + | isTyVar var + = let + (body_ty, body') = dmdAnal env dmd body + in + (body_ty, Lam var body') + + | otherwise + = let (body_dmd, defer_and_use@(_,one_shot)) = peelCallDmd dmd + -- body_dmd - a demand to analyze the body + -- one_shot - one-shotness of the lambda + -- hence, cardinality of its free vars + + env' = extendSigsWithLam env var + (body_ty, body') = dmdAnal env' body_dmd body + (lam_ty, var') = annotateLamIdBndr env notArgOfDfun body_ty one_shot var + in + (postProcessUnsat defer_and_use lam_ty, Lam var' body') + +dmdAnal' env dmd (Case scrut case_bndr ty [alt@(DataAlt dc, _, _)]) + -- Only one alternative with a product constructor + | let tycon = dataConTyCon dc + , isProductTyCon tycon + , Just rec_tc' <- checkRecTc (ae_rec_tc env) tycon + = let + env_w_tc = env { ae_rec_tc = rec_tc' } + env_alt = extendAnalEnv NotTopLevel env_w_tc case_bndr case_bndr_sig + (alt_ty, alt') = dmdAnalAlt env_alt dmd alt + (alt_ty1, case_bndr') = annotateBndr env alt_ty case_bndr + (_, bndrs', _) = alt' + case_bndr_sig = cprProdSig (dataConRepArity dc) + -- Inside the alternative, the case binder has the CPR property. + -- Meaning that a case on it will successfully cancel. + -- Example: + -- f True x = case x of y { I# x' -> if x' ==# 3 then y else I# 8 } + -- f False x = I# 3 + -- + -- We want f to have the CPR property: + -- f b x = case fw b x of { r -> I# r } + -- fw True x = case x of y { I# x' -> if x' ==# 3 then x' else 8 } + -- fw False x = 3 + + -- Figure out whether the demand on the case binder is used, and use + -- that to set the scrut_dmd. This is utterly essential. + -- Consider f x = case x of y { (a,b) -> k y a } + -- If we just take scrut_demand = U(L,A), then we won't pass x to the + -- worker, so the worker will rebuild + -- x = (a, absent-error) + -- and that'll crash. + -- So at one stage I had: + -- dead_case_bndr = isAbsDmd (idDemandInfo case_bndr') + -- keepity | dead_case_bndr = Drop + -- | otherwise = Keep + -- + -- But then consider + -- case x of y { (a,b) -> h y + a } + -- where h : U(LL) -> T + -- The above code would compute a Keep for x, since y is not Abs, which is silly + -- The insight is, of course, that a demand on y is a demand on the + -- scrutinee, so we need to `both` it with the scrut demand + + scrut_dmd1 = mkProdDmd [idDemandInfo b | b <- bndrs', isId b] + scrut_dmd2 = strictenDmd (idDemandInfo case_bndr') + scrut_dmd = scrut_dmd1 `bothCleanDmd` scrut_dmd2 + + (scrut_ty, scrut') = dmdAnal env scrut_dmd scrut + res_ty = alt_ty1 `bothDmdType` toBothDmdArg scrut_ty + in +-- pprTrace "dmdAnal:Case1" (vcat [ text "scrut" <+> ppr scrut +-- , text "dmd" <+> ppr dmd +-- , text "case_bndr_dmd" <+> ppr (idDemandInfo case_bndr') +-- , text "scrut_dmd" <+> ppr scrut_dmd +-- , text "scrut_ty" <+> ppr scrut_ty +-- , text "alt_ty" <+> ppr alt_ty1 +-- , text "res_ty" <+> ppr res_ty ]) $ + (res_ty, Case scrut' case_bndr' ty [alt']) + +dmdAnal' env dmd (Case scrut case_bndr ty alts) + = let -- Case expression with multiple alternatives + (alt_tys, alts') = mapAndUnzip (dmdAnalAlt env dmd) alts + (scrut_ty, scrut') = dmdAnal env cleanEvalDmd scrut + (alt_ty, case_bndr') = annotateBndr env (foldr lubDmdType botDmdType alt_tys) case_bndr + res_ty = alt_ty `bothDmdType` toBothDmdArg scrut_ty + in +-- pprTrace "dmdAnal:Case2" (vcat [ text "scrut" <+> ppr scrut +-- , text "scrut_ty" <+> ppr scrut_ty +-- , text "alt_tys" <+> ppr alt_tys +-- , text "alt_ty" <+> ppr alt_ty +-- , text "res_ty" <+> ppr res_ty ]) $ + (res_ty, Case scrut' case_bndr' ty alts') + +dmdAnal' env dmd (Let (NonRec id rhs) body) + = (body_ty2, Let (NonRec id2 annotated_rhs) body') + where + (sig, lazy_fv, id1, rhs') = dmdAnalRhs NotTopLevel Nothing env id rhs + (body_ty, body') = dmdAnal (extendAnalEnv NotTopLevel env id sig) dmd body + (body_ty1, id2) = annotateBndr env body_ty id1 + body_ty2 = addLazyFVs body_ty1 lazy_fv + + -- Annotate top-level lambdas at RHS basing on the aggregated demand info + -- See Note [Annotating lambdas at right-hand side] + annotated_rhs = annLamWithShotness (idDemandInfo id2) rhs' + + -- If the actual demand is better than the vanilla call + -- demand, you might think that we might do better to re-analyse + -- the RHS with the stronger demand. + -- But (a) That seldom happens, because it means that *every* path in + -- the body of the let has to use that stronger demand + -- (b) It often happens temporarily in when fixpointing, because + -- the recursive function at first seems to place a massive demand. + -- But we don't want to go to extra work when the function will + -- probably iterate to something less demanding. + -- In practice, all the times the actual demand on id2 is more than + -- the vanilla call demand seem to be due to (b). So we don't + -- bother to re-analyse the RHS. + +dmdAnal' env dmd (Let (Rec pairs) body) + = let + (env', lazy_fv, pairs') = dmdFix NotTopLevel env pairs + (body_ty, body') = dmdAnal env' dmd body + body_ty1 = deleteFVs body_ty (map fst pairs) + body_ty2 = addLazyFVs body_ty1 lazy_fv + in + body_ty2 `seq` + (body_ty2, Let (Rec pairs') body') + +annLamWithShotness :: Demand -> CoreExpr -> CoreExpr +annLamWithShotness d e + | Just u <- cleanUseDmd_maybe d + = go u e + | otherwise = e + where + go u e + | Just (c, u') <- peelUseCall u + , Lam bndr body <- e + = if isTyVar bndr + then Lam bndr (go u body) + else Lam (setOneShotness c bndr) (go u' body) + | otherwise + = e + +setOneShotness :: Count -> Id -> Id +setOneShotness One bndr = setOneShotLambda bndr +setOneShotness Many bndr = bndr + +dmdAnalAlt :: AnalEnv -> CleanDemand -> Alt Var -> (DmdType, Alt Var) +dmdAnalAlt env dmd (con,bndrs,rhs) + = let + (rhs_ty, rhs') = dmdAnal env dmd rhs + rhs_ty' = addDataConPatDmds con bndrs rhs_ty + (alt_ty, bndrs') = annotateBndrs env rhs_ty' bndrs + final_alt_ty | io_hack_reqd = deferAfterIO alt_ty + | otherwise = alt_ty + + -- Note [IO hack in the demand analyser] + -- + -- There's a hack here for I/O operations. Consider + -- case foo x s of { (# s, r #) -> y } + -- Is this strict in 'y'. Normally yes, but what if 'foo' is an I/O + -- operation that simply terminates the program (not in an erroneous way)? + -- In that case we should not evaluate y before the call to 'foo'. + -- Hackish solution: spot the IO-like situation and add a virtual branch, + -- as if we had + -- case foo x s of + -- (# s, r #) -> y + -- other -> return () + -- So the 'y' isn't necessarily going to be evaluated + -- + -- A more complete example (Trac #148, #1592) where this shows up is: + -- do { let len = ; + -- ; when (...) (exitWith ExitSuccess) + -- ; print len } + + io_hack_reqd = con == DataAlt unboxedPairDataCon && + idType (head bndrs) `eqType` realWorldStatePrimTy + in + (final_alt_ty, (con, bndrs', rhs')) + +{- +Note [Aggregated demand for cardinality] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We use different strategies for strictness and usage/cardinality to +"unleash" demands captured on free variables by bindings. Let us +consider the example: + +f1 y = let {-# NOINLINE h #-} + h = y + in (h, h) + +We are interested in obtaining cardinality demand U1 on |y|, as it is +used only in a thunk, and, therefore, is not going to be updated any +more. Therefore, the demand on |y|, captured and unleashed by usage of +|h| is U1. However, if we unleash this demand every time |h| is used, +and then sum up the effects, the ultimate demand on |y| will be U1 + +U1 = U. In order to avoid it, we *first* collect the aggregate demand +on |h| in the body of let-expression, and only then apply the demand +transformer: + +transf[x](U) = {y |-> U1} + +so the resulting demand on |y| is U1. + +The situation is, however, different for strictness, where this +aggregating approach exhibits worse results because of the nature of +|both| operation for strictness. Consider the example: + +f y c = + let h x = y |seq| x + in case of + True -> h True + False -> y + +It is clear that |f| is strict in |y|, however, the suggested analysis +will infer from the body of |let| that |h| is used lazily (as it is +used in one branch only), therefore lazy demand will be put on its +free variable |y|. Conversely, if the demand on |h| is unleashed right +on the spot, we will get the desired result, namely, that |f| is +strict in |y|. + +Note [Annotating lambdas at right-hand side] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Let us take a look at the following example: + +g f = let x = 100 + h = \y -> f x y + in h 5 + +One can see that |h| is called just once, therefore the RHS of h can +be annotated as a one-shot lambda. This is done by the function +annLamWithShotness *a posteriori*, i.e., basing on the aggregated +usage demand on |h| from the body of |let|-expression, which is C1(U) +in this case. + +In other words, for locally-bound lambdas we can infer +one-shotness. +-} + +addDataConPatDmds :: AltCon -> [Var] -> DmdType -> DmdType +-- See Note [Add demands for strict constructors] +addDataConPatDmds DEFAULT _ dmd_ty = dmd_ty +addDataConPatDmds (LitAlt _) _ dmd_ty = dmd_ty +addDataConPatDmds (DataAlt con) bndrs dmd_ty + = foldr add dmd_ty str_bndrs + where + add bndr dmd_ty = addVarDmd dmd_ty bndr seqDmd + str_bndrs = [ b | (b,s) <- zipEqual "addDataConPatBndrs" + (filter isId bndrs) + (dataConRepStrictness con) + , isMarkedStrict s ] + +{- +Note [Add demands for strict constructors] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this program (due to Roman): + + data X a = X !a + + foo :: X Int -> Int -> Int + foo (X a) n = go 0 + where + go i | i < n = a + go (i+1) + | otherwise = 0 + +We want the worker for 'foo' too look like this: + + $wfoo :: Int# -> Int# -> Int# + +with the first argument unboxed, so that it is not eval'd each time +around the loop (which would otherwise happen, since 'foo' is not +strict in 'a'. It is sound for the wrapper to pass an unboxed arg +because X is strict, so its argument must be evaluated. And if we +*don't* pass an unboxed argument, we can't even repair it by adding a +`seq` thus: + + foo (X a) n = a `seq` go 0 + +because the seq is discarded (very early) since X is strict! + +There is the usual danger of reboxing, which as usual we ignore. But +if X is monomorphic, and has an UNPACK pragma, then this optimisation +is even more important. We don't want the wrapper to rebox an unboxed +argument, and pass an Int to $wfoo! + +************************************************************************ +* * + Demand transformer +* * +************************************************************************ +-} + +dmdTransform :: AnalEnv -- The strictness environment + -> Id -- The function + -> CleanDemand -- The demand on the function + -> DmdType -- The demand type of the function in this context + -- Returned DmdEnv includes the demand on + -- this function plus demand on its free variables + +dmdTransform env var dmd + | isDataConWorkId var -- Data constructor + = dmdTransformDataConSig (idArity var) (idStrictness var) dmd + + | gopt Opt_DmdTxDictSel (ae_dflags env), + Just _ <- isClassOpId_maybe var -- Dictionary component selector + = dmdTransformDictSelSig (idStrictness var) dmd + + | isGlobalId var -- Imported function + = let res = dmdTransformSig (idStrictness var) dmd in +-- pprTrace "dmdTransform" (vcat [ppr var, ppr (idStrictness var), ppr dmd, ppr res]) + res + + | Just (sig, top_lvl) <- lookupSigEnv env var -- Local letrec bound thing + , let fn_ty = dmdTransformSig sig dmd + = -- pprTrace "dmdTransform" (vcat [ppr var, ppr sig, ppr dmd, ppr fn_ty]) $ + if isTopLevel top_lvl + then fn_ty -- Don't record top level things + else addVarDmd fn_ty var (mkOnceUsedDmd dmd) + + | otherwise -- Local non-letrec-bound thing + = unitVarDmd var (mkOnceUsedDmd dmd) + +{- +************************************************************************ +* * +\subsection{Bindings} +* * +************************************************************************ +-} + +-- Recursive bindings +dmdFix :: TopLevelFlag + -> AnalEnv -- Does not include bindings for this binding + -> [(Id,CoreExpr)] + -> (AnalEnv, DmdEnv, + [(Id,CoreExpr)]) -- Binders annotated with stricness info + +dmdFix top_lvl env orig_pairs + = (updSigEnv env (sigEnv final_env), lazy_fv, pairs') + -- Return to original virgin state, keeping new signatures + where + bndrs = map fst orig_pairs + initial_env = addInitialSigs top_lvl env bndrs + (final_env, lazy_fv, pairs') = loop 1 initial_env orig_pairs + + loop :: Int + -> AnalEnv -- Already contains the current sigs + -> [(Id,CoreExpr)] + -> (AnalEnv, DmdEnv, [(Id,CoreExpr)]) + loop n env pairs + = -- pprTrace "dmd loop" (ppr n <+> ppr bndrs $$ ppr env) $ + loop' n env pairs + + loop' n env pairs + | found_fixpoint + = (env', lazy_fv, pairs') + -- Note: return pairs', not pairs. pairs' is the result of + -- processing the RHSs with sigs (= sigs'), whereas pairs + -- is the result of processing the RHSs with the *previous* + -- iteration of sigs. + + | n >= 10 + = -- pprTrace "dmdFix loop" (ppr n <+> (vcat + -- [ text "Sigs:" <+> ppr [ (id,lookupVarEnv (sigEnv env) id, + -- lookupVarEnv (sigEnv env') id) + -- | (id,_) <- pairs], + -- text "env:" <+> ppr env, + -- text "binds:" <+> pprCoreBinding (Rec pairs)])) + (env, lazy_fv, orig_pairs) -- Safe output + -- The lazy_fv part is really important! orig_pairs has no strictness + -- info, including nothing about free vars. But if we have + -- letrec f = ....y..... in ...f... + -- where 'y' is free in f, we must record that y is mentioned, + -- otherwise y will get recorded as absent altogether + + | otherwise + = loop (n+1) (nonVirgin env') pairs' + where + found_fixpoint = all (same_sig (sigEnv env) (sigEnv env')) bndrs + + ((env',lazy_fv), pairs') = mapAccumL my_downRhs (env, emptyDmdEnv) pairs + -- mapAccumL: Use the new signature to do the next pair + -- The occurrence analyser has arranged them in a good order + -- so this can significantly reduce the number of iterations needed + + my_downRhs (env, lazy_fv) (id,rhs) + = ((env', lazy_fv'), (id', rhs')) + where + (sig, lazy_fv1, id', rhs') = dmdAnalRhs top_lvl (Just bndrs) env id rhs + lazy_fv' = plusVarEnv_C bothDmd lazy_fv lazy_fv1 + env' = extendAnalEnv top_lvl env id sig + + same_sig sigs sigs' var = lookup sigs var == lookup sigs' var + lookup sigs var = case lookupVarEnv sigs var of + Just (sig,_) -> sig + Nothing -> pprPanic "dmdFix" (ppr var) + +-- Non-recursive bindings +dmdAnalRhs :: TopLevelFlag + -> Maybe [Id] -- Just bs <=> recursive, Nothing <=> non-recursive + -> AnalEnv -> Id -> CoreExpr + -> (StrictSig, DmdEnv, Id, CoreExpr) +-- Process the RHS of the binding, add the strictness signature +-- to the Id, and augment the environment with the signature as well. +dmdAnalRhs top_lvl rec_flag env id rhs + | Just fn <- unpackTrivial rhs -- See Note [Demand analysis for trivial right-hand sides] + , let fn_str = getStrictness env fn + fn_fv | isLocalId fn = unitVarEnv fn topDmd + | otherwise = emptyDmdEnv + -- Note [Remember to demand the function itself] + -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + -- fn_fv: don't forget to produce a demand for fn itself + -- Lacking this caused Trac #9128 + -- The demand is very conservative (topDmd), but that doesn't + -- matter; trivial bindings are usually inlined, so it only + -- kicks in for top-level bindings and NOINLINE bindings + = (fn_str, fn_fv, set_idStrictness env id fn_str, rhs) + + | otherwise + = (sig_ty, lazy_fv, id', mkLams bndrs' body') + where + (bndrs, body) = collectBinders rhs + env_body = foldl extendSigsWithLam env bndrs + (body_ty, body') = dmdAnal env_body body_dmd body + body_ty' = removeDmdTyArgs body_ty -- zap possible deep CPR info + (DmdType rhs_fv rhs_dmds rhs_res, bndrs') + = annotateLamBndrs env (isDFunId id) body_ty' bndrs + sig_ty = mkStrictSig (mkDmdType sig_fv rhs_dmds rhs_res') + id' = set_idStrictness env id sig_ty + -- See Note [NOINLINE and strictness] + + -- See Note [Product demands for function body] + body_dmd = case deepSplitProductType_maybe (ae_fam_envs env) (exprType body) of + Nothing -> cleanEvalDmd + Just (dc, _, _, _) -> cleanEvalProdDmd (dataConRepArity dc) + + -- See Note [Lazy and unleashable free variables] + -- See Note [Aggregated demand for cardinality] + rhs_fv1 = case rec_flag of + Just bs -> reuseEnv (delVarEnvList rhs_fv bs) + Nothing -> rhs_fv + + (lazy_fv, sig_fv) = splitFVs is_thunk rhs_fv1 + + rhs_res' = trimCPRInfo trim_all trim_sums rhs_res + trim_all = is_thunk && not_strict + trim_sums = not (isTopLevel top_lvl) -- See Note [CPR for sum types] + + -- See Note [CPR for thunks] + is_thunk = not (exprIsHNF rhs) + not_strict + = isTopLevel top_lvl -- Top level and recursive things don't + || isJust rec_flag -- get their demandInfo set at all + || not (isStrictDmd (idDemandInfo id) || ae_virgin env) + -- See Note [Optimistic CPR in the "virgin" case] + +unpackTrivial :: CoreExpr -> Maybe Id +-- Returns (Just v) if the arg is really equal to v, modulo +-- casts, type applications etc +-- See Note [Demand analysis for trivial right-hand sides] +unpackTrivial (Var v) = Just v +unpackTrivial (Cast e _) = unpackTrivial e +unpackTrivial (Lam v e) | isTyVar v = unpackTrivial e +unpackTrivial (App e a) | isTypeArg a = unpackTrivial e +unpackTrivial _ = Nothing + +{- +Note [Demand analysis for trivial right-hand sides] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + foo = plusInt |> co +where plusInt is an arity-2 function with known strictness. Clearly +we want plusInt's strictness to propagate to foo! But because it has +no manifest lambdas, it won't do so automatically, and indeed 'co' might +have type (Int->Int->Int) ~ T, so we *can't* eta-expand. So we have a +special case for right-hand sides that are "trivial", namely variables, +casts, type applications, and the like. + +Note that this can mean that 'foo' has an arity that is smaller than that +indicated by its demand info. e.g. if co :: (Int->Int->Int) ~ T, then +foo's arity will be zero (see Note [exprArity invariant] in CoreArity), +but its demand signature will be that of plusInt. A small example is the +test case of Trac #8963. + + +Note [Product demands for function body] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +This example comes from shootout/binary_trees: + + Main.check' = \ b z ds. case z of z' { I# ip -> + case ds_d13s of + Main.Nil -> z' + Main.Node s14k s14l s14m -> + Main.check' (not b) + (Main.check' b + (case b { + False -> I# (-# s14h s14k); + True -> I# (+# s14h s14k) + }) + s14l) + s14m } } } + +Here we *really* want to unbox z, even though it appears to be used boxed in +the Nil case. Partly the Nil case is not a hot path. But more specifically, +the whole function gets the CPR property if we do. + +So for the demand on the body of a RHS we use a product demand if it's +a product type. + +************************************************************************ +* * +\subsection{Strictness signatures and types} +* * +************************************************************************ +-} + +unitVarDmd :: Var -> Demand -> DmdType +unitVarDmd var dmd + = DmdType (unitVarEnv var dmd) [] topRes + +addVarDmd :: DmdType -> Var -> Demand -> DmdType +addVarDmd (DmdType fv ds res) var dmd + = DmdType (extendVarEnv_C bothDmd fv var dmd) ds res + +addLazyFVs :: DmdType -> DmdEnv -> DmdType +addLazyFVs dmd_ty lazy_fvs + = dmd_ty `bothDmdType` mkBothDmdArg lazy_fvs + -- Using bothDmdType (rather than just both'ing the envs) + -- is vital. Consider + -- let f = \x -> (x,y) + -- in error (f 3) + -- Here, y is treated as a lazy-fv of f, but we must `bothDmd` that L + -- demand with the bottom coming up from 'error' + -- + -- I got a loop in the fixpointer without this, due to an interaction + -- with the lazy_fv filtering in dmdAnalRhs. Roughly, it was + -- letrec f n x + -- = letrec g y = x `fatbar` + -- letrec h z = z + ...g... + -- in h (f (n-1) x) + -- in ... + -- In the initial iteration for f, f=Bot + -- Suppose h is found to be strict in z, but the occurrence of g in its RHS + -- is lazy. Now consider the fixpoint iteration for g, esp the demands it + -- places on its free variables. Suppose it places none. Then the + -- x `fatbar` ...call to h... + -- will give a x->V demand for x. That turns into a L demand for x, + -- which floats out of the defn for h. Without the modifyEnv, that + -- L demand doesn't get both'd with the Bot coming up from the inner + -- call to f. So we just get an L demand for x for g. + +{- +Note [Do not strictify the argument dictionaries of a dfun] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The typechecker can tie recursive knots involving dfuns, so we do the +conservative thing and refrain from strictifying a dfun's argument +dictionaries. +-} + +annotateBndr :: AnalEnv -> DmdType -> Var -> (DmdType, Var) +-- The returned env has the var deleted +-- The returned var is annotated with demand info +-- according to the result demand of the provided demand type +-- No effect on the argument demands +annotateBndr env dmd_ty var + | isId var = (dmd_ty', setIdDemandInfo var dmd) + | otherwise = (dmd_ty, var) + where + (dmd_ty', dmd) = findBndrDmd env False dmd_ty var + +annotateBndrs :: AnalEnv -> DmdType -> [Var] -> (DmdType, [Var]) +annotateBndrs env = mapAccumR (annotateBndr env) + +annotateLamBndrs :: AnalEnv -> DFunFlag -> DmdType -> [Var] -> (DmdType, [Var]) +annotateLamBndrs env args_of_dfun ty bndrs = mapAccumR annotate ty bndrs + where + annotate dmd_ty bndr + | isId bndr = annotateLamIdBndr env args_of_dfun dmd_ty Many bndr + | otherwise = (dmd_ty, bndr) + +annotateLamIdBndr :: AnalEnv + -> DFunFlag -- is this lambda at the top of the RHS of a dfun? + -> DmdType -- Demand type of body + -> Count -- One-shot-ness of the lambda + -> Id -- Lambda binder + -> (DmdType, -- Demand type of lambda + Id) -- and binder annotated with demand + +annotateLamIdBndr env arg_of_dfun dmd_ty one_shot id +-- For lambdas we add the demand to the argument demands +-- Only called for Ids + = ASSERT( isId id ) + -- pprTrace "annLamBndr" (vcat [ppr id, ppr _dmd_ty]) $ + (final_ty, setOneShotness one_shot (setIdDemandInfo id dmd)) + where + -- Watch out! See note [Lambda-bound unfoldings] + final_ty = case maybeUnfoldingTemplate (idUnfolding id) of + Nothing -> main_ty + Just unf -> main_ty `bothDmdType` unf_ty + where + (unf_ty, _) = dmdAnalStar env dmd unf + + main_ty = addDemand dmd dmd_ty' + (dmd_ty', dmd) = findBndrDmd env arg_of_dfun dmd_ty id + +deleteFVs :: DmdType -> [Var] -> DmdType +deleteFVs (DmdType fvs dmds res) bndrs + = DmdType (delVarEnvList fvs bndrs) dmds res + +{- +Note [CPR for sum types] +~~~~~~~~~~~~~~~~~~~~~~~~ +At the moment we do not do CPR for let-bindings that + * non-top level + * bind a sum type +Reason: I found that in some benchmarks we were losing let-no-escapes, +which messed it all up. Example + let j = \x. .... + in case y of + True -> j False + False -> j True +If we w/w this we get + let j' = \x. .... + in case y of + True -> case j' False of { (# a #) -> Just a } + False -> case j' True of { (# a #) -> Just a } +Notice that j' is not a let-no-escape any more. + +However this means in turn that the *enclosing* function +may be CPR'd (via the returned Justs). But in the case of +sums, there may be Nothing alternatives; and that messes +up the sum-type CPR. + +Conclusion: only do this for products. It's still not +guaranteed OK for products, but sums definitely lose sometimes. + +Note [CPR for thunks] +~~~~~~~~~~~~~~~~~~~~~ +If the rhs is a thunk, we usually forget the CPR info, because +it is presumably shared (else it would have been inlined, and +so we'd lose sharing if w/w'd it into a function). E.g. + + let r = case expensive of + (a,b) -> (b,a) + in ... + +If we marked r as having the CPR property, then we'd w/w into + + let $wr = \() -> case expensive of + (a,b) -> (# b, a #) + r = case $wr () of + (# b,a #) -> (b,a) + in ... + +But now r is a thunk, which won't be inlined, so we are no further ahead. +But consider + + f x = let r = case expensive of (a,b) -> (b,a) + in if foo r then r else (x,x) + +Does f have the CPR property? Well, no. + +However, if the strictness analyser has figured out (in a previous +iteration) that it's strict, then we DON'T need to forget the CPR info. +Instead we can retain the CPR info and do the thunk-splitting transform +(see WorkWrap.splitThunk). + +This made a big difference to PrelBase.modInt, which had something like + modInt = \ x -> let r = ... -> I# v in + ...body strict in r... +r's RHS isn't a value yet; but modInt returns r in various branches, so +if r doesn't have the CPR property then neither does modInt +Another case I found in practice (in Complex.magnitude), looks like this: + let k = if ... then I# a else I# b + in ... body strict in k .... +(For this example, it doesn't matter whether k is returned as part of +the overall result; but it does matter that k's RHS has the CPR property.) +Left to itself, the simplifier will make a join point thus: + let $j k = ...body strict in k... + if ... then $j (I# a) else $j (I# b) +With thunk-splitting, we get instead + let $j x = let k = I#x in ...body strict in k... + in if ... then $j a else $j b +This is much better; there's a good chance the I# won't get allocated. + +The difficulty with this is that we need the strictness type to +look at the body... but we now need the body to calculate the demand +on the variable, so we can decide whether its strictness type should +have a CPR in it or not. Simple solution: + a) use strictness info from the previous iteration + b) make sure we do at least 2 iterations, by doing a second + round for top-level non-recs. Top level recs will get at + least 2 iterations except for totally-bottom functions + which aren't very interesting anyway. + +NB: strictly_demanded is never true of a top-level Id, or of a recursive Id. + +Note [Optimistic CPR in the "virgin" case] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Demand and strictness info are initialized by top elements. However, +this prevents from inferring a CPR property in the first pass of the +analyser, so we keep an explicit flag ae_virgin in the AnalEnv +datatype. + +We can't start with 'not-demanded' (i.e., top) because then consider + f x = let + t = ... I# x + in + if ... then t else I# y else f x' + +In the first iteration we'd have no demand info for x, so assume +not-demanded; then we'd get TopRes for f's CPR info. Next iteration +we'd see that t was demanded, and so give it the CPR property, but by +now f has TopRes, so it will stay TopRes. Instead, by checking the +ae_virgin flag at the first time round, we say 'yes t is demanded' the +first time. + +However, this does mean that for non-recursive bindings we must +iterate twice to be sure of not getting over-optimistic CPR info, +in the case where t turns out to be not-demanded. This is handled +by dmdAnalTopBind. + + +Note [NOINLINE and strictness] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The strictness analyser used to have a HACK which ensured that NOINLNE +things were not strictness-analysed. The reason was unsafePerformIO. +Left to itself, the strictness analyser would discover this strictness +for unsafePerformIO: + unsafePerformIO: C(U(AV)) +But then consider this sub-expression + unsafePerformIO (\s -> let r = f x in + case writeIORef v r s of (# s1, _ #) -> + (# s1, r #) +The strictness analyser will now find that r is sure to be eval'd, +and may then hoist it out. This makes tests/lib/should_run/memo002 +deadlock. + +Solving this by making all NOINLINE things have no strictness info is overkill. +In particular, it's overkill for runST, which is perfectly respectable. +Consider + f x = runST (return x) +This should be strict in x. + +So the new plan is to define unsafePerformIO using the 'lazy' combinator: + + unsafePerformIO (IO m) = lazy (case m realWorld# of (# _, r #) -> r) + +Remember, 'lazy' is a wired-in identity-function Id, of type a->a, which is +magically NON-STRICT, and is inlined after strictness analysis. So +unsafePerformIO will look non-strict, and that's what we want. + +Now we don't need the hack in the strictness analyser. HOWEVER, this +decision does mean that even a NOINLINE function is not entirely +opaque: some aspect of its implementation leaks out, notably its +strictness. For example, if you have a function implemented by an +error stub, but which has RULES, you may want it not to be eliminated +in favour of error! + +Note [Lazy and unleasheable free variables] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We put the strict and once-used FVs in the DmdType of the Id, so +that at its call sites we unleash demands on its strict fvs. +An example is 'roll' in imaginary/wheel-sieve2 +Something like this: + roll x = letrec + go y = if ... then roll (x-1) else x+1 + in + go ms +We want to see that roll is strict in x, which is because +go is called. So we put the DmdEnv for x in go's DmdType. + +Another example: + + f :: Int -> Int -> Int + f x y = let t = x+1 + h z = if z==0 then t else + if z==1 then x+1 else + x + h (z-1) + in h y + +Calling h does indeed evaluate x, but we can only see +that if we unleash a demand on x at the call site for t. + +Incidentally, here's a place where lambda-lifting h would +lose the cigar --- we couldn't see the joint strictness in t/x + + ON THE OTHER HAND +We don't want to put *all* the fv's from the RHS into the +DmdType, because that makes fixpointing very slow --- the +DmdType gets full of lazy demands that are slow to converge. + + +Note [Lamba-bound unfoldings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We allow a lambda-bound variable to carry an unfolding, a facility that is used +exclusively for join points; see Note [Case binders and join points]. If so, +we must be careful to demand-analyse the RHS of the unfolding! Example + \x. \y{=Just x}. +Then if uses 'y', then transitively it uses 'x', and we must not +forget that fact, otherwise we might make 'x' absent when it isn't. + + +************************************************************************ +* * +\subsection{Strictness signatures} +* * +************************************************************************ +-} + +type DFunFlag = Bool -- indicates if the lambda being considered is in the + -- sequence of lambdas at the top of the RHS of a dfun +notArgOfDfun :: DFunFlag +notArgOfDfun = False + +data AnalEnv + = AE { ae_dflags :: DynFlags + , ae_sigs :: SigEnv + , ae_virgin :: Bool -- True on first iteration only + -- See Note [Initialising strictness] + , ae_rec_tc :: RecTcChecker + , ae_fam_envs :: FamInstEnvs + } + + -- We use the se_env to tell us whether to + -- record info about a variable in the DmdEnv + -- We do so if it's a LocalId, but not top-level + -- + -- The DmdEnv gives the demand on the free vars of the function + -- when it is given enough args to satisfy the strictness signature + +type SigEnv = VarEnv (StrictSig, TopLevelFlag) + +instance Outputable AnalEnv where + ppr (AE { ae_sigs = env, ae_virgin = virgin }) + = ptext (sLit "AE") <+> braces (vcat + [ ptext (sLit "ae_virgin =") <+> ppr virgin + , ptext (sLit "ae_sigs =") <+> ppr env ]) + +emptyAnalEnv :: DynFlags -> FamInstEnvs -> AnalEnv +emptyAnalEnv dflags fam_envs + = AE { ae_dflags = dflags + , ae_sigs = emptySigEnv + , ae_virgin = True + , ae_rec_tc = initRecTc + , ae_fam_envs = fam_envs + } + +emptySigEnv :: SigEnv +emptySigEnv = emptyVarEnv + +sigEnv :: AnalEnv -> SigEnv +sigEnv = ae_sigs + +updSigEnv :: AnalEnv -> SigEnv -> AnalEnv +updSigEnv env sigs = env { ae_sigs = sigs } + +extendAnalEnv :: TopLevelFlag -> AnalEnv -> Id -> StrictSig -> AnalEnv +extendAnalEnv top_lvl env var sig + = env { ae_sigs = extendSigEnv top_lvl (ae_sigs env) var sig } + +extendSigEnv :: TopLevelFlag -> SigEnv -> Id -> StrictSig -> SigEnv +extendSigEnv top_lvl sigs var sig = extendVarEnv sigs var (sig, top_lvl) + +lookupSigEnv :: AnalEnv -> Id -> Maybe (StrictSig, TopLevelFlag) +lookupSigEnv env id = lookupVarEnv (ae_sigs env) id + +getStrictness :: AnalEnv -> Id -> StrictSig +getStrictness env fn + | isGlobalId fn = idStrictness fn + | Just (sig, _) <- lookupSigEnv env fn = sig + | otherwise = nopSig + +addInitialSigs :: TopLevelFlag -> AnalEnv -> [Id] -> AnalEnv +-- See Note [Initialising strictness] +addInitialSigs top_lvl env@(AE { ae_sigs = sigs, ae_virgin = virgin }) ids + = env { ae_sigs = extendVarEnvList sigs [ (id, (init_sig id, top_lvl)) + | id <- ids ] } + where + init_sig | virgin = \_ -> botSig + | otherwise = idStrictness + +nonVirgin :: AnalEnv -> AnalEnv +nonVirgin env = env { ae_virgin = False } + +extendSigsWithLam :: AnalEnv -> Id -> AnalEnv +-- Extend the AnalEnv when we meet a lambda binder +extendSigsWithLam env id + | isId id + , isStrictDmd (idDemandInfo id) || ae_virgin env + -- See Note [Optimistic CPR in the "virgin" case] + -- See Note [Initial CPR for strict binders] + , Just (dc,_,_,_) <- deepSplitProductType_maybe (ae_fam_envs env) $ idType id + = extendAnalEnv NotTopLevel env id (cprProdSig (dataConRepArity dc)) + + | otherwise + = env + +findBndrDmd :: AnalEnv -> Bool -> DmdType -> Id -> (DmdType, Demand) +-- See Note [Trimming a demand to a type] in Demand.lhs +findBndrDmd env arg_of_dfun dmd_ty id + = (dmd_ty', dmd') + where + dmd' = zapDemand (ae_dflags env) $ + strictify $ + trimToType starting_dmd (findTypeShape fam_envs id_ty) + + (dmd_ty', starting_dmd) = peelFV dmd_ty id + + id_ty = idType id + + strictify dmd + | gopt Opt_DictsStrict (ae_dflags env) + -- We never want to strictify a recursive let. At the moment + -- annotateBndr is only call for non-recursive lets; if that + -- changes, we need a RecFlag parameter and another guard here. + , not arg_of_dfun -- See Note [Do not strictify the argument dictionaries of a dfun] + = strictifyDictDmd id_ty dmd + | otherwise + = dmd + + fam_envs = ae_fam_envs env + +set_idStrictness :: AnalEnv -> Id -> StrictSig -> Id +set_idStrictness env id sig + = setIdStrictness id (zapStrictSig (ae_dflags env) sig) + +dumpStrSig :: CoreProgram -> SDoc +dumpStrSig binds = vcat (map printId ids) + where + ids = sortBy (stableNameCmp `on` getName) (concatMap getIds binds) + getIds (NonRec i _) = [ i ] + getIds (Rec bs) = map fst bs + printId id | isExportedId id = ppr id <> colon <+> pprIfaceStrictSig (idStrictness id) + | otherwise = empty + +{- +Note [Initial CPR for strict binders] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +CPR is initialized for a lambda binder in an optimistic manner, i.e, +if the binder is used strictly and at least some of its components as +a product are used, which is checked by the value of the absence +demand. + +If the binder is marked demanded with a strict demand, then give it a +CPR signature, because in the likely event that this is a lambda on a +fn defn [we only use this when the lambda is being consumed with a +call demand], it'll be w/w'd and so it will be CPR-ish. E.g. + + f = \x::(Int,Int). if ...strict in x... then + x + else + (a,b) +We want f to have the CPR property because x does, by the time f has been w/w'd + +Also note that we only want to do this for something that definitely +has product type, else we may get over-optimistic CPR results +(e.g. from \x -> x!). + + +Note [Initialising strictness] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +See section 9.2 (Finding fixpoints) of the paper. + +Our basic plan is to initialise the strictness of each Id in a +recursive group to "bottom", and find a fixpoint from there. However, +this group B might be inside an *enclosing* recursiveb group A, in +which case we'll do the entire fixpoint shebang on for each iteration +of A. This can be illustrated by the following example: + +Example: + + f [] = [] + f (x:xs) = let g [] = f xs + g (y:ys) = y+1 : g ys + in g (h x) + +At each iteration of the fixpoint for f, the analyser has to find a +fixpoint for the enclosed function g. In the meantime, the demand +values for g at each iteration for f are *greater* than those we +encountered in the previous iteration for f. Therefore, we can begin +the fixpoint for g not with the bottom value but rather with the +result of the previous analysis. I.e., when beginning the fixpoint +process for g, we can start from the demand signature computed for g +previously and attached to the binding occurrence of g. + +To speed things up, we initialise each iteration of A (the enclosing +one) from the result of the last one, which is neatly recorded in each +binder. That way we make use of earlier iterations of the fixpoint +algorithm. (Cunning plan.) + +But on the *first* iteration we want to *ignore* the current strictness +of the Id, and start from "bottom". Nowadays the Id can have a current +strictness, because interface files record strictness for nested bindings. +To know when we are in the first iteration, we look at the ae_virgin +field of the AnalEnv. +-} diff --git a/compiler/stranal/DmdAnal.lhs b/compiler/stranal/DmdAnal.lhs deleted file mode 100644 index 5cb2655afd..0000000000 --- a/compiler/stranal/DmdAnal.lhs +++ /dev/null @@ -1,1187 +0,0 @@ -% -% (c) The GRASP/AQUA Project, Glasgow University, 1993-1998 -% - - ----------------- - A demand analysis - ----------------- - -\begin{code} -{-# LANGUAGE CPP #-} - -module DmdAnal ( dmdAnalProgram ) where - -#include "HsVersions.h" - -import DynFlags -import WwLib ( findTypeShape, deepSplitProductType_maybe ) -import Demand -- All of it -import CoreSyn -import Outputable -import VarEnv -import BasicTypes -import FastString -import Data.List -import DataCon -import Id -import CoreUtils ( exprIsHNF, exprType, exprIsTrivial ) -import TyCon -import Type -import FamInstEnv -import Util -import Maybes ( isJust ) -import TysWiredIn ( unboxedPairDataCon ) -import TysPrim ( realWorldStatePrimTy ) -import ErrUtils ( dumpIfSet_dyn ) -import Name ( getName, stableNameCmp ) -import Data.Function ( on ) -\end{code} - -%************************************************************************ -%* * -\subsection{Top level stuff} -%* * -%************************************************************************ - -\begin{code} -dmdAnalProgram :: DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram -dmdAnalProgram dflags fam_envs binds - = do { - let { binds_plus_dmds = do_prog binds } ; - dumpIfSet_dyn dflags Opt_D_dump_strsigs "Strictness signatures" $ - dumpStrSig binds_plus_dmds ; - return binds_plus_dmds - } - where - do_prog :: CoreProgram -> CoreProgram - do_prog binds = snd $ mapAccumL dmdAnalTopBind (emptyAnalEnv dflags fam_envs) binds - --- Analyse a (group of) top-level binding(s) -dmdAnalTopBind :: AnalEnv - -> CoreBind - -> (AnalEnv, CoreBind) -dmdAnalTopBind sigs (NonRec id rhs) - = (extendAnalEnv TopLevel sigs id sig, NonRec id2 rhs2) - where - ( _, _, _, rhs1) = dmdAnalRhs TopLevel Nothing sigs id rhs - (sig, _, id2, rhs2) = dmdAnalRhs TopLevel Nothing (nonVirgin sigs) id rhs1 - -- Do two passes to improve CPR information - -- See comments with ignore_cpr_info in mk_sig_ty - -- and with extendSigsWithLam - -dmdAnalTopBind sigs (Rec pairs) - = (sigs', Rec pairs') - where - (sigs', _, pairs') = dmdFix TopLevel sigs pairs - -- We get two iterations automatically - -- c.f. the NonRec case above -\end{code} - -%************************************************************************ -%* * -\subsection{The analyser itself} -%* * -%************************************************************************ - -Note [Ensure demand is strict] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -It's important not to analyse e with a lazy demand because -a) When we encounter case s of (a,b) -> - we demand s with U(d1d2)... but if the overall demand is lazy - that is wrong, and we'd need to reduce the demand on s, - which is inconvenient -b) More important, consider - f (let x = R in x+x), where f is lazy - We still want to mark x as demanded, because it will be when we - enter the let. If we analyse f's arg with a Lazy demand, we'll - just mark x as Lazy -c) The application rule wouldn't be right either - Evaluating (f x) in a L demand does *not* cause - evaluation of f in a C(L) demand! - -\begin{code} --- If e is complicated enough to become a thunk, its contents will be evaluated --- at most once, so oneify it. -dmdTransformThunkDmd :: CoreExpr -> Demand -> Demand -dmdTransformThunkDmd e - | exprIsTrivial e = id - | otherwise = oneifyDmd - --- Do not process absent demands --- Otherwise act like in a normal demand analysis --- See |-* relation in the companion paper -dmdAnalStar :: AnalEnv - -> Demand -- This one takes a *Demand* - -> CoreExpr -> (BothDmdArg, CoreExpr) -dmdAnalStar env dmd e - | (cd, defer_and_use) <- toCleanDmd dmd (exprType e) - , (dmd_ty, e') <- dmdAnal env cd e - = (postProcessDmdTypeM defer_and_use dmd_ty, e') - --- Main Demand Analsysis machinery -dmdAnal, dmdAnal' :: AnalEnv - -> CleanDemand -- The main one takes a *CleanDemand* - -> CoreExpr -> (DmdType, CoreExpr) - --- The CleanDemand is always strict and not absent --- See Note [Ensure demand is strict] - -dmdAnal env d e = -- pprTrace "dmdAnal" (ppr d <+> ppr e) $ - dmdAnal' env d e - -dmdAnal' _ _ (Lit lit) = (nopDmdType, Lit lit) -dmdAnal' _ _ (Type ty) = (nopDmdType, Type ty) -- Doesn't happen, in fact -dmdAnal' _ _ (Coercion co) = (nopDmdType, Coercion co) - -dmdAnal' env dmd (Var var) - = (dmdTransform env var dmd, Var var) - -dmdAnal' env dmd (Cast e co) - = (dmd_ty, Cast e' co) - where - (dmd_ty, e') = dmdAnal env dmd e - -{- ----- I don't get this, so commenting out ------- - to_co = pSnd (coercionKind co) - dmd' - | Just tc <- tyConAppTyCon_maybe to_co - , isRecursiveTyCon tc = cleanEvalDmd - | otherwise = dmd - -- This coerce usually arises from a recursive - -- newtype, and we don't want to look inside them - -- for exactly the same reason that we don't look - -- inside recursive products -- we might not reach - -- a fixpoint. So revert to a vanilla Eval demand --} - -dmdAnal' env dmd (Tick t e) - = (dmd_ty, Tick t e') - where - (dmd_ty, e') = dmdAnal env dmd e - -dmdAnal' env dmd (App fun (Type ty)) - = (fun_ty, App fun' (Type ty)) - where - (fun_ty, fun') = dmdAnal env dmd fun - -dmdAnal' sigs dmd (App fun (Coercion co)) - = (fun_ty, App fun' (Coercion co)) - where - (fun_ty, fun') = dmdAnal sigs dmd fun - --- Lots of the other code is there to make this --- beautiful, compositional, application rule :-) -dmdAnal' env dmd (App fun arg) -- Non-type arguments - = let -- [Type arg handled above] - call_dmd = mkCallDmd dmd - (fun_ty, fun') = dmdAnal env call_dmd fun - (arg_dmd, res_ty) = splitDmdTy fun_ty - (arg_ty, arg') = dmdAnalStar env (dmdTransformThunkDmd arg arg_dmd) arg - in --- pprTrace "dmdAnal:app" (vcat --- [ text "dmd =" <+> ppr dmd --- , text "expr =" <+> ppr (App fun arg) --- , text "fun dmd_ty =" <+> ppr fun_ty --- , text "arg dmd =" <+> ppr arg_dmd --- , text "arg dmd_ty =" <+> ppr arg_ty --- , text "res dmd_ty =" <+> ppr res_ty --- , text "overall res dmd_ty =" <+> ppr (res_ty `bothDmdType` arg_ty) ]) - (res_ty `bothDmdType` arg_ty, App fun' arg') - --- this is an anonymous lambda, since @dmdAnalRhs@ uses @collectBinders@ -dmdAnal' env dmd (Lam var body) - | isTyVar var - = let - (body_ty, body') = dmdAnal env dmd body - in - (body_ty, Lam var body') - - | otherwise - = let (body_dmd, defer_and_use@(_,one_shot)) = peelCallDmd dmd - -- body_dmd - a demand to analyze the body - -- one_shot - one-shotness of the lambda - -- hence, cardinality of its free vars - - env' = extendSigsWithLam env var - (body_ty, body') = dmdAnal env' body_dmd body - (lam_ty, var') = annotateLamIdBndr env notArgOfDfun body_ty one_shot var - in - (postProcessUnsat defer_and_use lam_ty, Lam var' body') - -dmdAnal' env dmd (Case scrut case_bndr ty [alt@(DataAlt dc, _, _)]) - -- Only one alternative with a product constructor - | let tycon = dataConTyCon dc - , isProductTyCon tycon - , Just rec_tc' <- checkRecTc (ae_rec_tc env) tycon - = let - env_w_tc = env { ae_rec_tc = rec_tc' } - env_alt = extendAnalEnv NotTopLevel env_w_tc case_bndr case_bndr_sig - (alt_ty, alt') = dmdAnalAlt env_alt dmd alt - (alt_ty1, case_bndr') = annotateBndr env alt_ty case_bndr - (_, bndrs', _) = alt' - case_bndr_sig = cprProdSig (dataConRepArity dc) - -- Inside the alternative, the case binder has the CPR property. - -- Meaning that a case on it will successfully cancel. - -- Example: - -- f True x = case x of y { I# x' -> if x' ==# 3 then y else I# 8 } - -- f False x = I# 3 - -- - -- We want f to have the CPR property: - -- f b x = case fw b x of { r -> I# r } - -- fw True x = case x of y { I# x' -> if x' ==# 3 then x' else 8 } - -- fw False x = 3 - - -- Figure out whether the demand on the case binder is used, and use - -- that to set the scrut_dmd. This is utterly essential. - -- Consider f x = case x of y { (a,b) -> k y a } - -- If we just take scrut_demand = U(L,A), then we won't pass x to the - -- worker, so the worker will rebuild - -- x = (a, absent-error) - -- and that'll crash. - -- So at one stage I had: - -- dead_case_bndr = isAbsDmd (idDemandInfo case_bndr') - -- keepity | dead_case_bndr = Drop - -- | otherwise = Keep - -- - -- But then consider - -- case x of y { (a,b) -> h y + a } - -- where h : U(LL) -> T - -- The above code would compute a Keep for x, since y is not Abs, which is silly - -- The insight is, of course, that a demand on y is a demand on the - -- scrutinee, so we need to `both` it with the scrut demand - - scrut_dmd1 = mkProdDmd [idDemandInfo b | b <- bndrs', isId b] - scrut_dmd2 = strictenDmd (idDemandInfo case_bndr') - scrut_dmd = scrut_dmd1 `bothCleanDmd` scrut_dmd2 - - (scrut_ty, scrut') = dmdAnal env scrut_dmd scrut - res_ty = alt_ty1 `bothDmdType` toBothDmdArg scrut_ty - in --- pprTrace "dmdAnal:Case1" (vcat [ text "scrut" <+> ppr scrut --- , text "dmd" <+> ppr dmd --- , text "case_bndr_dmd" <+> ppr (idDemandInfo case_bndr') --- , text "scrut_dmd" <+> ppr scrut_dmd --- , text "scrut_ty" <+> ppr scrut_ty --- , text "alt_ty" <+> ppr alt_ty1 --- , text "res_ty" <+> ppr res_ty ]) $ - (res_ty, Case scrut' case_bndr' ty [alt']) - -dmdAnal' env dmd (Case scrut case_bndr ty alts) - = let -- Case expression with multiple alternatives - (alt_tys, alts') = mapAndUnzip (dmdAnalAlt env dmd) alts - (scrut_ty, scrut') = dmdAnal env cleanEvalDmd scrut - (alt_ty, case_bndr') = annotateBndr env (foldr lubDmdType botDmdType alt_tys) case_bndr - res_ty = alt_ty `bothDmdType` toBothDmdArg scrut_ty - in --- pprTrace "dmdAnal:Case2" (vcat [ text "scrut" <+> ppr scrut --- , text "scrut_ty" <+> ppr scrut_ty --- , text "alt_tys" <+> ppr alt_tys --- , text "alt_ty" <+> ppr alt_ty --- , text "res_ty" <+> ppr res_ty ]) $ - (res_ty, Case scrut' case_bndr' ty alts') - -dmdAnal' env dmd (Let (NonRec id rhs) body) - = (body_ty2, Let (NonRec id2 annotated_rhs) body') - where - (sig, lazy_fv, id1, rhs') = dmdAnalRhs NotTopLevel Nothing env id rhs - (body_ty, body') = dmdAnal (extendAnalEnv NotTopLevel env id sig) dmd body - (body_ty1, id2) = annotateBndr env body_ty id1 - body_ty2 = addLazyFVs body_ty1 lazy_fv - - -- Annotate top-level lambdas at RHS basing on the aggregated demand info - -- See Note [Annotating lambdas at right-hand side] - annotated_rhs = annLamWithShotness (idDemandInfo id2) rhs' - - -- If the actual demand is better than the vanilla call - -- demand, you might think that we might do better to re-analyse - -- the RHS with the stronger demand. - -- But (a) That seldom happens, because it means that *every* path in - -- the body of the let has to use that stronger demand - -- (b) It often happens temporarily in when fixpointing, because - -- the recursive function at first seems to place a massive demand. - -- But we don't want to go to extra work when the function will - -- probably iterate to something less demanding. - -- In practice, all the times the actual demand on id2 is more than - -- the vanilla call demand seem to be due to (b). So we don't - -- bother to re-analyse the RHS. - -dmdAnal' env dmd (Let (Rec pairs) body) - = let - (env', lazy_fv, pairs') = dmdFix NotTopLevel env pairs - (body_ty, body') = dmdAnal env' dmd body - body_ty1 = deleteFVs body_ty (map fst pairs) - body_ty2 = addLazyFVs body_ty1 lazy_fv - in - body_ty2 `seq` - (body_ty2, Let (Rec pairs') body') - -annLamWithShotness :: Demand -> CoreExpr -> CoreExpr -annLamWithShotness d e - | Just u <- cleanUseDmd_maybe d - = go u e - | otherwise = e - where - go u e - | Just (c, u') <- peelUseCall u - , Lam bndr body <- e - = if isTyVar bndr - then Lam bndr (go u body) - else Lam (setOneShotness c bndr) (go u' body) - | otherwise - = e - -setOneShotness :: Count -> Id -> Id -setOneShotness One bndr = setOneShotLambda bndr -setOneShotness Many bndr = bndr - -dmdAnalAlt :: AnalEnv -> CleanDemand -> Alt Var -> (DmdType, Alt Var) -dmdAnalAlt env dmd (con,bndrs,rhs) - = let - (rhs_ty, rhs') = dmdAnal env dmd rhs - rhs_ty' = addDataConPatDmds con bndrs rhs_ty - (alt_ty, bndrs') = annotateBndrs env rhs_ty' bndrs - final_alt_ty | io_hack_reqd = deferAfterIO alt_ty - | otherwise = alt_ty - - -- Note [IO hack in the demand analyser] - -- - -- There's a hack here for I/O operations. Consider - -- case foo x s of { (# s, r #) -> y } - -- Is this strict in 'y'. Normally yes, but what if 'foo' is an I/O - -- operation that simply terminates the program (not in an erroneous way)? - -- In that case we should not evaluate y before the call to 'foo'. - -- Hackish solution: spot the IO-like situation and add a virtual branch, - -- as if we had - -- case foo x s of - -- (# s, r #) -> y - -- other -> return () - -- So the 'y' isn't necessarily going to be evaluated - -- - -- A more complete example (Trac #148, #1592) where this shows up is: - -- do { let len = ; - -- ; when (...) (exitWith ExitSuccess) - -- ; print len } - - io_hack_reqd = con == DataAlt unboxedPairDataCon && - idType (head bndrs) `eqType` realWorldStatePrimTy - in - (final_alt_ty, (con, bndrs', rhs')) -\end{code} - -Note [Aggregated demand for cardinality] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We use different strategies for strictness and usage/cardinality to -"unleash" demands captured on free variables by bindings. Let us -consider the example: - -f1 y = let {-# NOINLINE h #-} - h = y - in (h, h) - -We are interested in obtaining cardinality demand U1 on |y|, as it is -used only in a thunk, and, therefore, is not going to be updated any -more. Therefore, the demand on |y|, captured and unleashed by usage of -|h| is U1. However, if we unleash this demand every time |h| is used, -and then sum up the effects, the ultimate demand on |y| will be U1 + -U1 = U. In order to avoid it, we *first* collect the aggregate demand -on |h| in the body of let-expression, and only then apply the demand -transformer: - -transf[x](U) = {y |-> U1} - -so the resulting demand on |y| is U1. - -The situation is, however, different for strictness, where this -aggregating approach exhibits worse results because of the nature of -|both| operation for strictness. Consider the example: - -f y c = - let h x = y |seq| x - in case of - True -> h True - False -> y - -It is clear that |f| is strict in |y|, however, the suggested analysis -will infer from the body of |let| that |h| is used lazily (as it is -used in one branch only), therefore lazy demand will be put on its -free variable |y|. Conversely, if the demand on |h| is unleashed right -on the spot, we will get the desired result, namely, that |f| is -strict in |y|. - -Note [Annotating lambdas at right-hand side] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Let us take a look at the following example: - -g f = let x = 100 - h = \y -> f x y - in h 5 - -One can see that |h| is called just once, therefore the RHS of h can -be annotated as a one-shot lambda. This is done by the function -annLamWithShotness *a posteriori*, i.e., basing on the aggregated -usage demand on |h| from the body of |let|-expression, which is C1(U) -in this case. - -In other words, for locally-bound lambdas we can infer -one-shotness. - -\begin{code} -addDataConPatDmds :: AltCon -> [Var] -> DmdType -> DmdType --- See Note [Add demands for strict constructors] -addDataConPatDmds DEFAULT _ dmd_ty = dmd_ty -addDataConPatDmds (LitAlt _) _ dmd_ty = dmd_ty -addDataConPatDmds (DataAlt con) bndrs dmd_ty - = foldr add dmd_ty str_bndrs - where - add bndr dmd_ty = addVarDmd dmd_ty bndr seqDmd - str_bndrs = [ b | (b,s) <- zipEqual "addDataConPatBndrs" - (filter isId bndrs) - (dataConRepStrictness con) - , isMarkedStrict s ] -\end{code} - -Note [Add demands for strict constructors] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Consider this program (due to Roman): - - data X a = X !a - - foo :: X Int -> Int -> Int - foo (X a) n = go 0 - where - go i | i < n = a + go (i+1) - | otherwise = 0 - -We want the worker for 'foo' too look like this: - - $wfoo :: Int# -> Int# -> Int# - -with the first argument unboxed, so that it is not eval'd each time -around the loop (which would otherwise happen, since 'foo' is not -strict in 'a'. It is sound for the wrapper to pass an unboxed arg -because X is strict, so its argument must be evaluated. And if we -*don't* pass an unboxed argument, we can't even repair it by adding a -`seq` thus: - - foo (X a) n = a `seq` go 0 - -because the seq is discarded (very early) since X is strict! - -There is the usual danger of reboxing, which as usual we ignore. But -if X is monomorphic, and has an UNPACK pragma, then this optimisation -is even more important. We don't want the wrapper to rebox an unboxed -argument, and pass an Int to $wfoo! - -%************************************************************************ -%* * - Demand transformer -%* * -%************************************************************************ - -\begin{code} -dmdTransform :: AnalEnv -- The strictness environment - -> Id -- The function - -> CleanDemand -- The demand on the function - -> DmdType -- The demand type of the function in this context - -- Returned DmdEnv includes the demand on - -- this function plus demand on its free variables - -dmdTransform env var dmd - | isDataConWorkId var -- Data constructor - = dmdTransformDataConSig (idArity var) (idStrictness var) dmd - - | gopt Opt_DmdTxDictSel (ae_dflags env), - Just _ <- isClassOpId_maybe var -- Dictionary component selector - = dmdTransformDictSelSig (idStrictness var) dmd - - | isGlobalId var -- Imported function - = let res = dmdTransformSig (idStrictness var) dmd in --- pprTrace "dmdTransform" (vcat [ppr var, ppr (idStrictness var), ppr dmd, ppr res]) - res - - | Just (sig, top_lvl) <- lookupSigEnv env var -- Local letrec bound thing - , let fn_ty = dmdTransformSig sig dmd - = -- pprTrace "dmdTransform" (vcat [ppr var, ppr sig, ppr dmd, ppr fn_ty]) $ - if isTopLevel top_lvl - then fn_ty -- Don't record top level things - else addVarDmd fn_ty var (mkOnceUsedDmd dmd) - - | otherwise -- Local non-letrec-bound thing - = unitVarDmd var (mkOnceUsedDmd dmd) -\end{code} - -%************************************************************************ -%* * -\subsection{Bindings} -%* * -%************************************************************************ - -\begin{code} - --- Recursive bindings -dmdFix :: TopLevelFlag - -> AnalEnv -- Does not include bindings for this binding - -> [(Id,CoreExpr)] - -> (AnalEnv, DmdEnv, - [(Id,CoreExpr)]) -- Binders annotated with stricness info - -dmdFix top_lvl env orig_pairs - = (updSigEnv env (sigEnv final_env), lazy_fv, pairs') - -- Return to original virgin state, keeping new signatures - where - bndrs = map fst orig_pairs - initial_env = addInitialSigs top_lvl env bndrs - (final_env, lazy_fv, pairs') = loop 1 initial_env orig_pairs - - loop :: Int - -> AnalEnv -- Already contains the current sigs - -> [(Id,CoreExpr)] - -> (AnalEnv, DmdEnv, [(Id,CoreExpr)]) - loop n env pairs - = -- pprTrace "dmd loop" (ppr n <+> ppr bndrs $$ ppr env) $ - loop' n env pairs - - loop' n env pairs - | found_fixpoint - = (env', lazy_fv, pairs') - -- Note: return pairs', not pairs. pairs' is the result of - -- processing the RHSs with sigs (= sigs'), whereas pairs - -- is the result of processing the RHSs with the *previous* - -- iteration of sigs. - - | n >= 10 - = -- pprTrace "dmdFix loop" (ppr n <+> (vcat - -- [ text "Sigs:" <+> ppr [ (id,lookupVarEnv (sigEnv env) id, - -- lookupVarEnv (sigEnv env') id) - -- | (id,_) <- pairs], - -- text "env:" <+> ppr env, - -- text "binds:" <+> pprCoreBinding (Rec pairs)])) - (env, lazy_fv, orig_pairs) -- Safe output - -- The lazy_fv part is really important! orig_pairs has no strictness - -- info, including nothing about free vars. But if we have - -- letrec f = ....y..... in ...f... - -- where 'y' is free in f, we must record that y is mentioned, - -- otherwise y will get recorded as absent altogether - - | otherwise - = loop (n+1) (nonVirgin env') pairs' - where - found_fixpoint = all (same_sig (sigEnv env) (sigEnv env')) bndrs - - ((env',lazy_fv), pairs') = mapAccumL my_downRhs (env, emptyDmdEnv) pairs - -- mapAccumL: Use the new signature to do the next pair - -- The occurrence analyser has arranged them in a good order - -- so this can significantly reduce the number of iterations needed - - my_downRhs (env, lazy_fv) (id,rhs) - = ((env', lazy_fv'), (id', rhs')) - where - (sig, lazy_fv1, id', rhs') = dmdAnalRhs top_lvl (Just bndrs) env id rhs - lazy_fv' = plusVarEnv_C bothDmd lazy_fv lazy_fv1 - env' = extendAnalEnv top_lvl env id sig - - same_sig sigs sigs' var = lookup sigs var == lookup sigs' var - lookup sigs var = case lookupVarEnv sigs var of - Just (sig,_) -> sig - Nothing -> pprPanic "dmdFix" (ppr var) - --- Non-recursive bindings -dmdAnalRhs :: TopLevelFlag - -> Maybe [Id] -- Just bs <=> recursive, Nothing <=> non-recursive - -> AnalEnv -> Id -> CoreExpr - -> (StrictSig, DmdEnv, Id, CoreExpr) --- Process the RHS of the binding, add the strictness signature --- to the Id, and augment the environment with the signature as well. -dmdAnalRhs top_lvl rec_flag env id rhs - | Just fn <- unpackTrivial rhs -- See Note [Demand analysis for trivial right-hand sides] - , let fn_str = getStrictness env fn - fn_fv | isLocalId fn = unitVarEnv fn topDmd - | otherwise = emptyDmdEnv - -- Note [Remember to demand the function itself] - -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -- fn_fv: don't forget to produce a demand for fn itself - -- Lacking this caused Trac #9128 - -- The demand is very conservative (topDmd), but that doesn't - -- matter; trivial bindings are usually inlined, so it only - -- kicks in for top-level bindings and NOINLINE bindings - = (fn_str, fn_fv, set_idStrictness env id fn_str, rhs) - - | otherwise - = (sig_ty, lazy_fv, id', mkLams bndrs' body') - where - (bndrs, body) = collectBinders rhs - env_body = foldl extendSigsWithLam env bndrs - (body_ty, body') = dmdAnal env_body body_dmd body - body_ty' = removeDmdTyArgs body_ty -- zap possible deep CPR info - (DmdType rhs_fv rhs_dmds rhs_res, bndrs') - = annotateLamBndrs env (isDFunId id) body_ty' bndrs - sig_ty = mkStrictSig (mkDmdType sig_fv rhs_dmds rhs_res') - id' = set_idStrictness env id sig_ty - -- See Note [NOINLINE and strictness] - - -- See Note [Product demands for function body] - body_dmd = case deepSplitProductType_maybe (ae_fam_envs env) (exprType body) of - Nothing -> cleanEvalDmd - Just (dc, _, _, _) -> cleanEvalProdDmd (dataConRepArity dc) - - -- See Note [Lazy and unleashable free variables] - -- See Note [Aggregated demand for cardinality] - rhs_fv1 = case rec_flag of - Just bs -> reuseEnv (delVarEnvList rhs_fv bs) - Nothing -> rhs_fv - - (lazy_fv, sig_fv) = splitFVs is_thunk rhs_fv1 - - rhs_res' = trimCPRInfo trim_all trim_sums rhs_res - trim_all = is_thunk && not_strict - trim_sums = not (isTopLevel top_lvl) -- See Note [CPR for sum types] - - -- See Note [CPR for thunks] - is_thunk = not (exprIsHNF rhs) - not_strict - = isTopLevel top_lvl -- Top level and recursive things don't - || isJust rec_flag -- get their demandInfo set at all - || not (isStrictDmd (idDemandInfo id) || ae_virgin env) - -- See Note [Optimistic CPR in the "virgin" case] - -unpackTrivial :: CoreExpr -> Maybe Id --- Returns (Just v) if the arg is really equal to v, modulo --- casts, type applications etc --- See Note [Demand analysis for trivial right-hand sides] -unpackTrivial (Var v) = Just v -unpackTrivial (Cast e _) = unpackTrivial e -unpackTrivial (Lam v e) | isTyVar v = unpackTrivial e -unpackTrivial (App e a) | isTypeArg a = unpackTrivial e -unpackTrivial _ = Nothing -\end{code} - -Note [Demand analysis for trivial right-hand sides] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Consider - foo = plusInt |> co -where plusInt is an arity-2 function with known strictness. Clearly -we want plusInt's strictness to propagate to foo! But because it has -no manifest lambdas, it won't do so automatically, and indeed 'co' might -have type (Int->Int->Int) ~ T, so we *can't* eta-expand. So we have a -special case for right-hand sides that are "trivial", namely variables, -casts, type applications, and the like. - -Note that this can mean that 'foo' has an arity that is smaller than that -indicated by its demand info. e.g. if co :: (Int->Int->Int) ~ T, then -foo's arity will be zero (see Note [exprArity invariant] in CoreArity), -but its demand signature will be that of plusInt. A small example is the -test case of Trac #8963. - - -Note [Product demands for function body] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This example comes from shootout/binary_trees: - - Main.check' = \ b z ds. case z of z' { I# ip -> - case ds_d13s of - Main.Nil -> z' - Main.Node s14k s14l s14m -> - Main.check' (not b) - (Main.check' b - (case b { - False -> I# (-# s14h s14k); - True -> I# (+# s14h s14k) - }) - s14l) - s14m } } } - -Here we *really* want to unbox z, even though it appears to be used boxed in -the Nil case. Partly the Nil case is not a hot path. But more specifically, -the whole function gets the CPR property if we do. - -So for the demand on the body of a RHS we use a product demand if it's -a product type. - -%************************************************************************ -%* * -\subsection{Strictness signatures and types} -%* * -%************************************************************************ - -\begin{code} -unitVarDmd :: Var -> Demand -> DmdType -unitVarDmd var dmd - = DmdType (unitVarEnv var dmd) [] topRes - -addVarDmd :: DmdType -> Var -> Demand -> DmdType -addVarDmd (DmdType fv ds res) var dmd - = DmdType (extendVarEnv_C bothDmd fv var dmd) ds res - -addLazyFVs :: DmdType -> DmdEnv -> DmdType -addLazyFVs dmd_ty lazy_fvs - = dmd_ty `bothDmdType` mkBothDmdArg lazy_fvs - -- Using bothDmdType (rather than just both'ing the envs) - -- is vital. Consider - -- let f = \x -> (x,y) - -- in error (f 3) - -- Here, y is treated as a lazy-fv of f, but we must `bothDmd` that L - -- demand with the bottom coming up from 'error' - -- - -- I got a loop in the fixpointer without this, due to an interaction - -- with the lazy_fv filtering in dmdAnalRhs. Roughly, it was - -- letrec f n x - -- = letrec g y = x `fatbar` - -- letrec h z = z + ...g... - -- in h (f (n-1) x) - -- in ... - -- In the initial iteration for f, f=Bot - -- Suppose h is found to be strict in z, but the occurrence of g in its RHS - -- is lazy. Now consider the fixpoint iteration for g, esp the demands it - -- places on its free variables. Suppose it places none. Then the - -- x `fatbar` ...call to h... - -- will give a x->V demand for x. That turns into a L demand for x, - -- which floats out of the defn for h. Without the modifyEnv, that - -- L demand doesn't get both'd with the Bot coming up from the inner - -- call to f. So we just get an L demand for x for g. -\end{code} - -Note [Do not strictify the argument dictionaries of a dfun] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The typechecker can tie recursive knots involving dfuns, so we do the -conservative thing and refrain from strictifying a dfun's argument -dictionaries. - -\begin{code} -annotateBndr :: AnalEnv -> DmdType -> Var -> (DmdType, Var) --- The returned env has the var deleted --- The returned var is annotated with demand info --- according to the result demand of the provided demand type --- No effect on the argument demands -annotateBndr env dmd_ty var - | isId var = (dmd_ty', setIdDemandInfo var dmd) - | otherwise = (dmd_ty, var) - where - (dmd_ty', dmd) = findBndrDmd env False dmd_ty var - -annotateBndrs :: AnalEnv -> DmdType -> [Var] -> (DmdType, [Var]) -annotateBndrs env = mapAccumR (annotateBndr env) - -annotateLamBndrs :: AnalEnv -> DFunFlag -> DmdType -> [Var] -> (DmdType, [Var]) -annotateLamBndrs env args_of_dfun ty bndrs = mapAccumR annotate ty bndrs - where - annotate dmd_ty bndr - | isId bndr = annotateLamIdBndr env args_of_dfun dmd_ty Many bndr - | otherwise = (dmd_ty, bndr) - -annotateLamIdBndr :: AnalEnv - -> DFunFlag -- is this lambda at the top of the RHS of a dfun? - -> DmdType -- Demand type of body - -> Count -- One-shot-ness of the lambda - -> Id -- Lambda binder - -> (DmdType, -- Demand type of lambda - Id) -- and binder annotated with demand - -annotateLamIdBndr env arg_of_dfun dmd_ty one_shot id --- For lambdas we add the demand to the argument demands --- Only called for Ids - = ASSERT( isId id ) - -- pprTrace "annLamBndr" (vcat [ppr id, ppr _dmd_ty]) $ - (final_ty, setOneShotness one_shot (setIdDemandInfo id dmd)) - where - -- Watch out! See note [Lambda-bound unfoldings] - final_ty = case maybeUnfoldingTemplate (idUnfolding id) of - Nothing -> main_ty - Just unf -> main_ty `bothDmdType` unf_ty - where - (unf_ty, _) = dmdAnalStar env dmd unf - - main_ty = addDemand dmd dmd_ty' - (dmd_ty', dmd) = findBndrDmd env arg_of_dfun dmd_ty id - -deleteFVs :: DmdType -> [Var] -> DmdType -deleteFVs (DmdType fvs dmds res) bndrs - = DmdType (delVarEnvList fvs bndrs) dmds res -\end{code} - -Note [CPR for sum types] -~~~~~~~~~~~~~~~~~~~~~~~~ -At the moment we do not do CPR for let-bindings that - * non-top level - * bind a sum type -Reason: I found that in some benchmarks we were losing let-no-escapes, -which messed it all up. Example - let j = \x. .... - in case y of - True -> j False - False -> j True -If we w/w this we get - let j' = \x. .... - in case y of - True -> case j' False of { (# a #) -> Just a } - False -> case j' True of { (# a #) -> Just a } -Notice that j' is not a let-no-escape any more. - -However this means in turn that the *enclosing* function -may be CPR'd (via the returned Justs). But in the case of -sums, there may be Nothing alternatives; and that messes -up the sum-type CPR. - -Conclusion: only do this for products. It's still not -guaranteed OK for products, but sums definitely lose sometimes. - -Note [CPR for thunks] -~~~~~~~~~~~~~~~~~~~~~ -If the rhs is a thunk, we usually forget the CPR info, because -it is presumably shared (else it would have been inlined, and -so we'd lose sharing if w/w'd it into a function). E.g. - - let r = case expensive of - (a,b) -> (b,a) - in ... - -If we marked r as having the CPR property, then we'd w/w into - - let $wr = \() -> case expensive of - (a,b) -> (# b, a #) - r = case $wr () of - (# b,a #) -> (b,a) - in ... - -But now r is a thunk, which won't be inlined, so we are no further ahead. -But consider - - f x = let r = case expensive of (a,b) -> (b,a) - in if foo r then r else (x,x) - -Does f have the CPR property? Well, no. - -However, if the strictness analyser has figured out (in a previous -iteration) that it's strict, then we DON'T need to forget the CPR info. -Instead we can retain the CPR info and do the thunk-splitting transform -(see WorkWrap.splitThunk). - -This made a big difference to PrelBase.modInt, which had something like - modInt = \ x -> let r = ... -> I# v in - ...body strict in r... -r's RHS isn't a value yet; but modInt returns r in various branches, so -if r doesn't have the CPR property then neither does modInt -Another case I found in practice (in Complex.magnitude), looks like this: - let k = if ... then I# a else I# b - in ... body strict in k .... -(For this example, it doesn't matter whether k is returned as part of -the overall result; but it does matter that k's RHS has the CPR property.) -Left to itself, the simplifier will make a join point thus: - let $j k = ...body strict in k... - if ... then $j (I# a) else $j (I# b) -With thunk-splitting, we get instead - let $j x = let k = I#x in ...body strict in k... - in if ... then $j a else $j b -This is much better; there's a good chance the I# won't get allocated. - -The difficulty with this is that we need the strictness type to -look at the body... but we now need the body to calculate the demand -on the variable, so we can decide whether its strictness type should -have a CPR in it or not. Simple solution: - a) use strictness info from the previous iteration - b) make sure we do at least 2 iterations, by doing a second - round for top-level non-recs. Top level recs will get at - least 2 iterations except for totally-bottom functions - which aren't very interesting anyway. - -NB: strictly_demanded is never true of a top-level Id, or of a recursive Id. - -Note [Optimistic CPR in the "virgin" case] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Demand and strictness info are initialized by top elements. However, -this prevents from inferring a CPR property in the first pass of the -analyser, so we keep an explicit flag ae_virgin in the AnalEnv -datatype. - -We can't start with 'not-demanded' (i.e., top) because then consider - f x = let - t = ... I# x - in - if ... then t else I# y else f x' - -In the first iteration we'd have no demand info for x, so assume -not-demanded; then we'd get TopRes for f's CPR info. Next iteration -we'd see that t was demanded, and so give it the CPR property, but by -now f has TopRes, so it will stay TopRes. Instead, by checking the -ae_virgin flag at the first time round, we say 'yes t is demanded' the -first time. - -However, this does mean that for non-recursive bindings we must -iterate twice to be sure of not getting over-optimistic CPR info, -in the case where t turns out to be not-demanded. This is handled -by dmdAnalTopBind. - - -Note [NOINLINE and strictness] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The strictness analyser used to have a HACK which ensured that NOINLNE -things were not strictness-analysed. The reason was unsafePerformIO. -Left to itself, the strictness analyser would discover this strictness -for unsafePerformIO: - unsafePerformIO: C(U(AV)) -But then consider this sub-expression - unsafePerformIO (\s -> let r = f x in - case writeIORef v r s of (# s1, _ #) -> - (# s1, r #) -The strictness analyser will now find that r is sure to be eval'd, -and may then hoist it out. This makes tests/lib/should_run/memo002 -deadlock. - -Solving this by making all NOINLINE things have no strictness info is overkill. -In particular, it's overkill for runST, which is perfectly respectable. -Consider - f x = runST (return x) -This should be strict in x. - -So the new plan is to define unsafePerformIO using the 'lazy' combinator: - - unsafePerformIO (IO m) = lazy (case m realWorld# of (# _, r #) -> r) - -Remember, 'lazy' is a wired-in identity-function Id, of type a->a, which is -magically NON-STRICT, and is inlined after strictness analysis. So -unsafePerformIO will look non-strict, and that's what we want. - -Now we don't need the hack in the strictness analyser. HOWEVER, this -decision does mean that even a NOINLINE function is not entirely -opaque: some aspect of its implementation leaks out, notably its -strictness. For example, if you have a function implemented by an -error stub, but which has RULES, you may want it not to be eliminated -in favour of error! - -Note [Lazy and unleasheable free variables] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We put the strict and once-used FVs in the DmdType of the Id, so -that at its call sites we unleash demands on its strict fvs. -An example is 'roll' in imaginary/wheel-sieve2 -Something like this: - roll x = letrec - go y = if ... then roll (x-1) else x+1 - in - go ms -We want to see that roll is strict in x, which is because -go is called. So we put the DmdEnv for x in go's DmdType. - -Another example: - - f :: Int -> Int -> Int - f x y = let t = x+1 - h z = if z==0 then t else - if z==1 then x+1 else - x + h (z-1) - in h y - -Calling h does indeed evaluate x, but we can only see -that if we unleash a demand on x at the call site for t. - -Incidentally, here's a place where lambda-lifting h would -lose the cigar --- we couldn't see the joint strictness in t/x - - ON THE OTHER HAND -We don't want to put *all* the fv's from the RHS into the -DmdType, because that makes fixpointing very slow --- the -DmdType gets full of lazy demands that are slow to converge. - - -Note [Lamba-bound unfoldings] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We allow a lambda-bound variable to carry an unfolding, a facility that is used -exclusively for join points; see Note [Case binders and join points]. If so, -we must be careful to demand-analyse the RHS of the unfolding! Example - \x. \y{=Just x}. -Then if uses 'y', then transitively it uses 'x', and we must not -forget that fact, otherwise we might make 'x' absent when it isn't. - - -%************************************************************************ -%* * -\subsection{Strictness signatures} -%* * -%************************************************************************ - -\begin{code} -type DFunFlag = Bool -- indicates if the lambda being considered is in the - -- sequence of lambdas at the top of the RHS of a dfun -notArgOfDfun :: DFunFlag -notArgOfDfun = False - -data AnalEnv - = AE { ae_dflags :: DynFlags - , ae_sigs :: SigEnv - , ae_virgin :: Bool -- True on first iteration only - -- See Note [Initialising strictness] - , ae_rec_tc :: RecTcChecker - , ae_fam_envs :: FamInstEnvs - } - - -- We use the se_env to tell us whether to - -- record info about a variable in the DmdEnv - -- We do so if it's a LocalId, but not top-level - -- - -- The DmdEnv gives the demand on the free vars of the function - -- when it is given enough args to satisfy the strictness signature - -type SigEnv = VarEnv (StrictSig, TopLevelFlag) - -instance Outputable AnalEnv where - ppr (AE { ae_sigs = env, ae_virgin = virgin }) - = ptext (sLit "AE") <+> braces (vcat - [ ptext (sLit "ae_virgin =") <+> ppr virgin - , ptext (sLit "ae_sigs =") <+> ppr env ]) - -emptyAnalEnv :: DynFlags -> FamInstEnvs -> AnalEnv -emptyAnalEnv dflags fam_envs - = AE { ae_dflags = dflags - , ae_sigs = emptySigEnv - , ae_virgin = True - , ae_rec_tc = initRecTc - , ae_fam_envs = fam_envs - } - -emptySigEnv :: SigEnv -emptySigEnv = emptyVarEnv - -sigEnv :: AnalEnv -> SigEnv -sigEnv = ae_sigs - -updSigEnv :: AnalEnv -> SigEnv -> AnalEnv -updSigEnv env sigs = env { ae_sigs = sigs } - -extendAnalEnv :: TopLevelFlag -> AnalEnv -> Id -> StrictSig -> AnalEnv -extendAnalEnv top_lvl env var sig - = env { ae_sigs = extendSigEnv top_lvl (ae_sigs env) var sig } - -extendSigEnv :: TopLevelFlag -> SigEnv -> Id -> StrictSig -> SigEnv -extendSigEnv top_lvl sigs var sig = extendVarEnv sigs var (sig, top_lvl) - -lookupSigEnv :: AnalEnv -> Id -> Maybe (StrictSig, TopLevelFlag) -lookupSigEnv env id = lookupVarEnv (ae_sigs env) id - -getStrictness :: AnalEnv -> Id -> StrictSig -getStrictness env fn - | isGlobalId fn = idStrictness fn - | Just (sig, _) <- lookupSigEnv env fn = sig - | otherwise = nopSig - -addInitialSigs :: TopLevelFlag -> AnalEnv -> [Id] -> AnalEnv --- See Note [Initialising strictness] -addInitialSigs top_lvl env@(AE { ae_sigs = sigs, ae_virgin = virgin }) ids - = env { ae_sigs = extendVarEnvList sigs [ (id, (init_sig id, top_lvl)) - | id <- ids ] } - where - init_sig | virgin = \_ -> botSig - | otherwise = idStrictness - -nonVirgin :: AnalEnv -> AnalEnv -nonVirgin env = env { ae_virgin = False } - -extendSigsWithLam :: AnalEnv -> Id -> AnalEnv --- Extend the AnalEnv when we meet a lambda binder -extendSigsWithLam env id - | isId id - , isStrictDmd (idDemandInfo id) || ae_virgin env - -- See Note [Optimistic CPR in the "virgin" case] - -- See Note [Initial CPR for strict binders] - , Just (dc,_,_,_) <- deepSplitProductType_maybe (ae_fam_envs env) $ idType id - = extendAnalEnv NotTopLevel env id (cprProdSig (dataConRepArity dc)) - - | otherwise - = env - -findBndrDmd :: AnalEnv -> Bool -> DmdType -> Id -> (DmdType, Demand) --- See Note [Trimming a demand to a type] in Demand.lhs -findBndrDmd env arg_of_dfun dmd_ty id - = (dmd_ty', dmd') - where - dmd' = zapDemand (ae_dflags env) $ - strictify $ - trimToType starting_dmd (findTypeShape fam_envs id_ty) - - (dmd_ty', starting_dmd) = peelFV dmd_ty id - - id_ty = idType id - - strictify dmd - | gopt Opt_DictsStrict (ae_dflags env) - -- We never want to strictify a recursive let. At the moment - -- annotateBndr is only call for non-recursive lets; if that - -- changes, we need a RecFlag parameter and another guard here. - , not arg_of_dfun -- See Note [Do not strictify the argument dictionaries of a dfun] - = strictifyDictDmd id_ty dmd - | otherwise - = dmd - - fam_envs = ae_fam_envs env - -set_idStrictness :: AnalEnv -> Id -> StrictSig -> Id -set_idStrictness env id sig - = setIdStrictness id (zapStrictSig (ae_dflags env) sig) - -dumpStrSig :: CoreProgram -> SDoc -dumpStrSig binds = vcat (map printId ids) - where - ids = sortBy (stableNameCmp `on` getName) (concatMap getIds binds) - getIds (NonRec i _) = [ i ] - getIds (Rec bs) = map fst bs - printId id | isExportedId id = ppr id <> colon <+> pprIfaceStrictSig (idStrictness id) - | otherwise = empty - -\end{code} - -Note [Initial CPR for strict binders] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -CPR is initialized for a lambda binder in an optimistic manner, i.e, -if the binder is used strictly and at least some of its components as -a product are used, which is checked by the value of the absence -demand. - -If the binder is marked demanded with a strict demand, then give it a -CPR signature, because in the likely event that this is a lambda on a -fn defn [we only use this when the lambda is being consumed with a -call demand], it'll be w/w'd and so it will be CPR-ish. E.g. - - f = \x::(Int,Int). if ...strict in x... then - x - else - (a,b) -We want f to have the CPR property because x does, by the time f has been w/w'd - -Also note that we only want to do this for something that definitely -has product type, else we may get over-optimistic CPR results -(e.g. from \x -> x!). - - -Note [Initialising strictness] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -See section 9.2 (Finding fixpoints) of the paper. - -Our basic plan is to initialise the strictness of each Id in a -recursive group to "bottom", and find a fixpoint from there. However, -this group B might be inside an *enclosing* recursiveb group A, in -which case we'll do the entire fixpoint shebang on for each iteration -of A. This can be illustrated by the following example: - -Example: - - f [] = [] - f (x:xs) = let g [] = f xs - g (y:ys) = y+1 : g ys - in g (h x) - -At each iteration of the fixpoint for f, the analyser has to find a -fixpoint for the enclosed function g. In the meantime, the demand -values for g at each iteration for f are *greater* than those we -encountered in the previous iteration for f. Therefore, we can begin -the fixpoint for g not with the bottom value but rather with the -result of the previous analysis. I.e., when beginning the fixpoint -process for g, we can start from the demand signature computed for g -previously and attached to the binding occurrence of g. - -To speed things up, we initialise each iteration of A (the enclosing -one) from the result of the last one, which is neatly recorded in each -binder. That way we make use of earlier iterations of the fixpoint -algorithm. (Cunning plan.) - -But on the *first* iteration we want to *ignore* the current strictness -of the Id, and start from "bottom". Nowadays the Id can have a current -strictness, because interface files record strictness for nested bindings. -To know when we are in the first iteration, we look at the ae_virgin -field of the AnalEnv. diff --git a/compiler/stranal/WorkWrap.hs b/compiler/stranal/WorkWrap.hs new file mode 100644 index 0000000000..eedababb43 --- /dev/null +++ b/compiler/stranal/WorkWrap.hs @@ -0,0 +1,476 @@ +{- +(c) The GRASP/AQUA Project, Glasgow University, 1993-1998 + +\section[WorkWrap]{Worker/wrapper-generating back-end of strictness analyser} +-} + +{-# LANGUAGE CPP #-} +module WorkWrap ( wwTopBinds ) where + +import CoreSyn +import CoreUnfold ( certainlyWillInline, mkWwInlineRule, mkWorkerUnfolding ) +import CoreUtils ( exprType, exprIsHNF ) +import CoreArity ( exprArity ) +import Var +import Id +import IdInfo +import UniqSupply +import BasicTypes +import DynFlags +import VarEnv ( isEmptyVarEnv ) +import Demand +import WwLib +import Util +import Outputable +import FamInstEnv +import MonadUtils + +#include "HsVersions.h" + +{- +We take Core bindings whose binders have: + +\begin{enumerate} + +\item Strictness attached (by the front-end of the strictness +analyser), and / or + +\item Constructed Product Result information attached by the CPR +analysis pass. + +\end{enumerate} + +and we return some ``plain'' bindings which have been +worker/wrapper-ified, meaning: + +\begin{enumerate} + +\item Functions have been split into workers and wrappers where +appropriate. If a function has both strictness and CPR properties +then only one worker/wrapper doing both transformations is produced; + +\item Binders' @IdInfos@ have been updated to reflect the existence of +these workers/wrappers (this is where we get STRICTNESS and CPR pragma +info for exported values). +\end{enumerate} +-} + +wwTopBinds :: DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram + +wwTopBinds dflags fam_envs us top_binds + = initUs_ us $ do + top_binds' <- mapM (wwBind dflags fam_envs) top_binds + return (concat top_binds') + +{- +************************************************************************ +* * +\subsection[wwBind-wwExpr]{@wwBind@ and @wwExpr@} +* * +************************************************************************ + +@wwBind@ works on a binding, trying each \tr{(binder, expr)} pair in +turn. Non-recursive case first, then recursive... +-} + +wwBind :: DynFlags + -> FamInstEnvs + -> CoreBind + -> UniqSM [CoreBind] -- returns a WwBinding intermediate form; + -- the caller will convert to Expr/Binding, + -- as appropriate. + +wwBind dflags fam_envs (NonRec binder rhs) = do + new_rhs <- wwExpr dflags fam_envs rhs + new_pairs <- tryWW dflags fam_envs NonRecursive binder new_rhs + return [NonRec b e | (b,e) <- new_pairs] + -- Generated bindings must be non-recursive + -- because the original binding was. + +wwBind dflags fam_envs (Rec pairs) + = return . Rec <$> concatMapM do_one pairs + where + do_one (binder, rhs) = do new_rhs <- wwExpr dflags fam_envs rhs + tryWW dflags fam_envs Recursive binder new_rhs + +{- +@wwExpr@ basically just walks the tree, looking for appropriate +annotations that can be used. Remember it is @wwBind@ that does the +matching by looking for strict arguments of the correct type. +@wwExpr@ is a version that just returns the ``Plain'' Tree. +-} + +wwExpr :: DynFlags -> FamInstEnvs -> CoreExpr -> UniqSM CoreExpr + +wwExpr _ _ e@(Type {}) = return e +wwExpr _ _ e@(Coercion {}) = return e +wwExpr _ _ e@(Lit {}) = return e +wwExpr _ _ e@(Var {}) = return e + +wwExpr dflags fam_envs (Lam binder expr) + = Lam binder <$> wwExpr dflags fam_envs expr + +wwExpr dflags fam_envs (App f a) + = App <$> wwExpr dflags fam_envs f <*> wwExpr dflags fam_envs a + +wwExpr dflags fam_envs (Tick note expr) + = Tick note <$> wwExpr dflags fam_envs expr + +wwExpr dflags fam_envs (Cast expr co) = do + new_expr <- wwExpr dflags fam_envs expr + return (Cast new_expr co) + +wwExpr dflags fam_envs (Let bind expr) + = mkLets <$> wwBind dflags fam_envs bind <*> wwExpr dflags fam_envs expr + +wwExpr dflags fam_envs (Case expr binder ty alts) = do + new_expr <- wwExpr dflags fam_envs expr + new_alts <- mapM ww_alt alts + return (Case new_expr binder ty new_alts) + where + ww_alt (con, binders, rhs) = do + new_rhs <- wwExpr dflags fam_envs rhs + return (con, binders, new_rhs) + +{- +************************************************************************ +* * +\subsection[tryWW]{@tryWW@: attempt a worker/wrapper pair} +* * +************************************************************************ + +@tryWW@ just accumulates arguments, converts strictness info from the +front-end into the proper form, then calls @mkWwBodies@ to do +the business. + +The only reason this is monadised is for the unique supply. + +Note [Don't w/w INLINE things] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It's very important to refrain from w/w-ing an INLINE function (ie one +with a stable unfolding) because the wrapper will then overwrite the +old stable unfolding with the wrapper code. + +Furthermore, if the programmer has marked something as INLINE, +we may lose by w/w'ing it. + +If the strictness analyser is run twice, this test also prevents +wrappers (which are INLINEd) from being re-done. (You can end up with +several liked-named Ids bouncing around at the same time---absolute +mischief.) + +Notice that we refrain from w/w'ing an INLINE function even if it is +in a recursive group. It might not be the loop breaker. (We could +test for loop-breaker-hood, but I'm not sure that ever matters.) + +Note [Worker-wrapper for INLINABLE functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have + {-# INLINABLE f #-} + f :: Ord a => [a] -> Int -> a + f x y = ....f.... + +where f is strict in y, we might get a more efficient loop by w/w'ing +f. But that would make a new unfolding which would overwrite the old +one! So the function would no longer be ININABLE, and in particular +will not be specialised at call sites in other modules. + +This comes in practice (Trac #6056). + +Solution: do the w/w for strictness analysis, but transfer the Stable +unfolding to the *worker*. So we will get something like this: + + {-# INLINE[0] f #-} + f :: Ord a => [a] -> Int -> a + f d x y = case y of I# y' -> fw d x y' + + {-# INLINABLE[0] fw #-} + fw :: Ord a => [a] -> Int# -> a + fw d x y' = let y = I# y' in ...f... + +How do we "transfer the unfolding"? Easy: by using the old one, wrapped +in work_fn! See CoreUnfold.mkWorkerUnfolding. + +Note [Activation for INLINABLE worker] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Follows on from Note [Worker-wrapper for INLINABLE functions] +It is *vital* that if the worker gets an INLINABLE pragma (from the +original function), then the worker has the same phase activation as +the wrapper (or later). That is necessary to allow the wrapper to +inline into the worker's unfolding: see SimplUtils +Note [Simplifying inside stable unfoldings]. + +Notihng is lost by giving the worker the same activation as the +worker, because the worker won't have any chance of inlining until the +wrapper does; there's no point in giving it an earlier activation. + +Note [Don't w/w inline small non-loop-breaker things] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In general, we refrain from w/w-ing *small* functions, which are not +loop breakers, because they'll inline anyway. But we must take care: +it may look small now, but get to be big later after other inlining +has happened. So we take the precaution of adding an INLINE pragma to +any such functions. + +I made this change when I observed a big function at the end of +compilation with a useful strictness signature but no w-w. (It was +small during demand analysis, we refrained from w/w, and then got big +when something was inlined in its rhs.) When I measured it on nofib, +it didn't make much difference; just a few percent improved allocation +on one benchmark (bspt/Euclid.space). But nothing got worse. + +There is an infelicity though. We may get something like + f = g val +==> + g x = case gw x of r -> I# r + + f {- InlineStable, Template = g val -} + f = case gw x of r -> I# r + +The code for f duplicates that for g, without any real benefit. It +won't really be executed, because calls to f will go via the inlining. + +Note [Wrapper activation] +~~~~~~~~~~~~~~~~~~~~~~~~~ +When should the wrapper inlining be active? It must not be active +earlier than the current Activation of the Id (eg it might have a +NOINLINE pragma). But in fact strictness analysis happens fairly +late in the pipeline, and we want to prioritise specialisations over +strictness. Eg if we have + module Foo where + f :: Num a => a -> Int -> a + f n 0 = n -- Strict in the Int, hence wrapper + f n x = f (n+n) (x-1) + + g :: Int -> Int + g x = f x x -- Provokes a specialisation for f + + module Bar where + import Foo + + h :: Int -> Int + h x = f 3 x + +Then we want the specialisation for 'f' to kick in before the wrapper does. + +Now in fact the 'gentle' simplification pass encourages this, by +having rules on, but inlinings off. But that's kind of lucky. It seems +more robust to give the wrapper an Activation of (ActiveAfter 0), +so that it becomes active in an importing module at the same time that +it appears in the first place in the defining module. + +At one stage I tried making the wrapper inlining always-active, and +that had a very bad effect on nofib/imaginary/x2n1; a wrapper was +inlined before the specialisation fired. +-} + +tryWW :: DynFlags + -> FamInstEnvs + -> RecFlag + -> Id -- The fn binder + -> CoreExpr -- The bound rhs; its innards + -- are already ww'd + -> UniqSM [(Id, CoreExpr)] -- either *one* or *two* pairs; + -- if one, then no worker (only + -- the orig "wrapper" lives on); + -- if two, then a worker and a + -- wrapper. +tryWW dflags fam_envs is_rec fn_id rhs + | isNeverActive inline_act + -- No point in worker/wrappering if the thing is never inlined! + -- Because the no-inline prag will prevent the wrapper ever + -- being inlined at a call site. + -- + -- Furthermore, don't even expose strictness info + = return [ (fn_id, rhs) ] + + | not loop_breaker + , Just stable_unf <- certainlyWillInline dflags fn_unf + = return [ (fn_id `setIdUnfolding` stable_unf, rhs) ] + -- Note [Don't w/w inline small non-loop-breaker, or INLINE, things] + -- NB: use idUnfolding because we don't want to apply + -- this criterion to a loop breaker! + + | is_fun + = splitFun dflags fam_envs new_fn_id fn_info wrap_dmds res_info rhs + + | is_thunk -- See Note [Thunk splitting] + = splitThunk dflags fam_envs is_rec new_fn_id rhs + + | otherwise + = return [ (new_fn_id, rhs) ] + + where + loop_breaker = isStrongLoopBreaker (occInfo fn_info) + fn_info = idInfo fn_id + inline_act = inlinePragmaActivation (inlinePragInfo fn_info) + fn_unf = unfoldingInfo fn_info + + -- In practice it always will have a strictness + -- signature, even if it's a uninformative one + strict_sig = strictnessInfo fn_info + StrictSig (DmdType env wrap_dmds res_info) = strict_sig + + -- new_fn_id has the DmdEnv zapped. + -- (a) it is never used again + -- (b) it wastes space + -- (c) it becomes incorrect as things are cloned, because + -- we don't push the substitution into it + new_fn_id | isEmptyVarEnv env = fn_id + | otherwise = fn_id `setIdStrictness` + mkClosedStrictSig wrap_dmds res_info + + is_fun = notNull wrap_dmds + is_thunk = not is_fun && not (exprIsHNF rhs) + + +--------------------- +splitFun :: DynFlags -> FamInstEnvs -> Id -> IdInfo -> [Demand] -> DmdResult -> CoreExpr + -> UniqSM [(Id, CoreExpr)] +splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs + = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) ) do + -- The arity should match the signature + stuff <- mkWwBodies dflags fam_envs fun_ty wrap_dmds res_info one_shots + case stuff of + Just (work_demands, wrap_fn, work_fn) -> do + work_uniq <- getUniqueM + let work_rhs = work_fn rhs + work_prag = InlinePragma { inl_inline = inl_inline inl_prag + , inl_sat = Nothing + , inl_act = wrap_act + , inl_rule = FunLike } + -- idl_inline: copy from fn_id; see Note [Worker-wrapper for INLINABLE functions] + -- idl_act: see Note [Activation for INLINABLE workers] + -- inl_rule: it does not make sense for workers to be constructorlike. + + work_id = mkWorkerId work_uniq fn_id (exprType work_rhs) + `setIdOccInfo` occInfo fn_info + -- Copy over occurrence info from parent + -- Notably whether it's a loop breaker + -- Doesn't matter much, since we will simplify next, but + -- seems right-er to do so + + `setInlinePragma` work_prag + + `setIdUnfolding` mkWorkerUnfolding dflags work_fn (unfoldingInfo fn_info) + -- See Note [Worker-wrapper for INLINABLE functions] + + `setIdStrictness` mkClosedStrictSig work_demands work_res_info + -- Even though we may not be at top level, + -- it's ok to give it an empty DmdEnv + + `setIdArity` exprArity work_rhs + -- Set the arity so that the Core Lint check that the + -- arity is consistent with the demand type goes through + + wrap_act = ActiveAfter 0 + wrap_rhs = wrap_fn work_id + wrap_prag = InlinePragma { inl_inline = Inline + , inl_sat = Nothing + , inl_act = wrap_act + , inl_rule = rule_match_info } + -- See Note [Wrapper activation] + -- The RuleMatchInfo is (and must be) unaffected + + wrap_id = fn_id `setIdUnfolding` mkWwInlineRule wrap_rhs arity + `setInlinePragma` wrap_prag + `setIdOccInfo` NoOccInfo + -- Zap any loop-breaker-ness, to avoid bleating from Lint + -- about a loop breaker with an INLINE rule + + return $ [(work_id, work_rhs), (wrap_id, wrap_rhs)] + -- Worker first, because wrapper mentions it + + Nothing -> return [(fn_id, rhs)] + where + fun_ty = idType fn_id + inl_prag = inlinePragInfo fn_info + rule_match_info = inlinePragmaRuleMatchInfo inl_prag + arity = arityInfo fn_info + -- The arity is set by the simplifier using exprEtaExpandArity + -- So it may be more than the number of top-level-visible lambdas + + work_res_info | isBotRes res_info = botRes -- Cpr stuff done by wrapper + | otherwise = topRes + + one_shots = get_one_shots rhs + +-- If the original function has one-shot arguments, it is important to +-- make the wrapper and worker have corresponding one-shot arguments too. +-- Otherwise we spuriously float stuff out of case-expression join points, +-- which is very annoying. +get_one_shots :: Expr Var -> [OneShotInfo] +get_one_shots (Lam b e) + | isId b = idOneShotInfo b : get_one_shots e + | otherwise = get_one_shots e +get_one_shots (Tick _ e) = get_one_shots e +get_one_shots _ = [] + +{- +Note [Do not split void functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider this rather common form of binding: + $j = \x:Void# -> ...no use of x... + +Since x is not used it'll be marked as absent. But there is no point +in w/w-ing because we'll simply add (\y:Void#), see WwLib.mkWorerArgs. + +If x has a more interesting type (eg Int, or Int#), there *is* a point +in w/w so that we don't pass the argument at all. + +Note [Thunk splitting] +~~~~~~~~~~~~~~~~~~~~~~ +Suppose x is used strictly (never mind whether it has the CPR +property). + + let + x* = x-rhs + in body + +splitThunk transforms like this: + + let + x* = case x-rhs of { I# a -> I# a } + in body + +Now simplifier will transform to + + case x-rhs of + I# a -> let x* = I# a + in body + +which is what we want. Now suppose x-rhs is itself a case: + + x-rhs = case e of { T -> I# a; F -> I# b } + +The join point will abstract over a, rather than over (which is +what would have happened before) which is fine. + +Notice that x certainly has the CPR property now! + +In fact, splitThunk uses the function argument w/w splitting +function, so that if x's demand is deeper (say U(U(L,L),L)) +then the splitting will go deeper too. +-} + +-- See Note [Thunk splitting] +-- splitThunk converts the *non-recursive* binding +-- x = e +-- into +-- x = let x = e +-- in case x of +-- I# y -> let x = I# y in x } +-- See comments above. Is it not beautifully short? +-- Moreover, it works just as well when there are +-- several binders, and if the binders are lifted +-- E.g. x = e +-- --> x = let x = e in +-- case x of (a,b) -> let x = (a,b) in x + +splitThunk :: DynFlags -> FamInstEnvs -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)] +splitThunk dflags fam_envs is_rec fn_id rhs + = do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs [fn_id] + ; let res = [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ] + ; if useful then ASSERT2( isNonRec is_rec, ppr fn_id ) -- The thunk must be non-recursive + return res + else return [(fn_id, rhs)] } diff --git a/compiler/stranal/WorkWrap.lhs b/compiler/stranal/WorkWrap.lhs deleted file mode 100644 index d2c7b3da1d..0000000000 --- a/compiler/stranal/WorkWrap.lhs +++ /dev/null @@ -1,477 +0,0 @@ -% -% (c) The GRASP/AQUA Project, Glasgow University, 1993-1998 -% -\section[WorkWrap]{Worker/wrapper-generating back-end of strictness analyser} - -\begin{code} -{-# LANGUAGE CPP #-} -module WorkWrap ( wwTopBinds ) where - -import CoreSyn -import CoreUnfold ( certainlyWillInline, mkWwInlineRule, mkWorkerUnfolding ) -import CoreUtils ( exprType, exprIsHNF ) -import CoreArity ( exprArity ) -import Var -import Id -import IdInfo -import UniqSupply -import BasicTypes -import DynFlags -import VarEnv ( isEmptyVarEnv ) -import Demand -import WwLib -import Util -import Outputable -import FamInstEnv -import MonadUtils - -#include "HsVersions.h" -\end{code} - -We take Core bindings whose binders have: - -\begin{enumerate} - -\item Strictness attached (by the front-end of the strictness -analyser), and / or - -\item Constructed Product Result information attached by the CPR -analysis pass. - -\end{enumerate} - -and we return some ``plain'' bindings which have been -worker/wrapper-ified, meaning: - -\begin{enumerate} - -\item Functions have been split into workers and wrappers where -appropriate. If a function has both strictness and CPR properties -then only one worker/wrapper doing both transformations is produced; - -\item Binders' @IdInfos@ have been updated to reflect the existence of -these workers/wrappers (this is where we get STRICTNESS and CPR pragma -info for exported values). -\end{enumerate} - -\begin{code} -wwTopBinds :: DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram - -wwTopBinds dflags fam_envs us top_binds - = initUs_ us $ do - top_binds' <- mapM (wwBind dflags fam_envs) top_binds - return (concat top_binds') -\end{code} - -%************************************************************************ -%* * -\subsection[wwBind-wwExpr]{@wwBind@ and @wwExpr@} -%* * -%************************************************************************ - -@wwBind@ works on a binding, trying each \tr{(binder, expr)} pair in -turn. Non-recursive case first, then recursive... - -\begin{code} -wwBind :: DynFlags - -> FamInstEnvs - -> CoreBind - -> UniqSM [CoreBind] -- returns a WwBinding intermediate form; - -- the caller will convert to Expr/Binding, - -- as appropriate. - -wwBind dflags fam_envs (NonRec binder rhs) = do - new_rhs <- wwExpr dflags fam_envs rhs - new_pairs <- tryWW dflags fam_envs NonRecursive binder new_rhs - return [NonRec b e | (b,e) <- new_pairs] - -- Generated bindings must be non-recursive - -- because the original binding was. - -wwBind dflags fam_envs (Rec pairs) - = return . Rec <$> concatMapM do_one pairs - where - do_one (binder, rhs) = do new_rhs <- wwExpr dflags fam_envs rhs - tryWW dflags fam_envs Recursive binder new_rhs -\end{code} - -@wwExpr@ basically just walks the tree, looking for appropriate -annotations that can be used. Remember it is @wwBind@ that does the -matching by looking for strict arguments of the correct type. -@wwExpr@ is a version that just returns the ``Plain'' Tree. - -\begin{code} -wwExpr :: DynFlags -> FamInstEnvs -> CoreExpr -> UniqSM CoreExpr - -wwExpr _ _ e@(Type {}) = return e -wwExpr _ _ e@(Coercion {}) = return e -wwExpr _ _ e@(Lit {}) = return e -wwExpr _ _ e@(Var {}) = return e - -wwExpr dflags fam_envs (Lam binder expr) - = Lam binder <$> wwExpr dflags fam_envs expr - -wwExpr dflags fam_envs (App f a) - = App <$> wwExpr dflags fam_envs f <*> wwExpr dflags fam_envs a - -wwExpr dflags fam_envs (Tick note expr) - = Tick note <$> wwExpr dflags fam_envs expr - -wwExpr dflags fam_envs (Cast expr co) = do - new_expr <- wwExpr dflags fam_envs expr - return (Cast new_expr co) - -wwExpr dflags fam_envs (Let bind expr) - = mkLets <$> wwBind dflags fam_envs bind <*> wwExpr dflags fam_envs expr - -wwExpr dflags fam_envs (Case expr binder ty alts) = do - new_expr <- wwExpr dflags fam_envs expr - new_alts <- mapM ww_alt alts - return (Case new_expr binder ty new_alts) - where - ww_alt (con, binders, rhs) = do - new_rhs <- wwExpr dflags fam_envs rhs - return (con, binders, new_rhs) -\end{code} - -%************************************************************************ -%* * -\subsection[tryWW]{@tryWW@: attempt a worker/wrapper pair} -%* * -%************************************************************************ - -@tryWW@ just accumulates arguments, converts strictness info from the -front-end into the proper form, then calls @mkWwBodies@ to do -the business. - -The only reason this is monadised is for the unique supply. - -Note [Don't w/w INLINE things] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -It's very important to refrain from w/w-ing an INLINE function (ie one -with a stable unfolding) because the wrapper will then overwrite the -old stable unfolding with the wrapper code. - -Furthermore, if the programmer has marked something as INLINE, -we may lose by w/w'ing it. - -If the strictness analyser is run twice, this test also prevents -wrappers (which are INLINEd) from being re-done. (You can end up with -several liked-named Ids bouncing around at the same time---absolute -mischief.) - -Notice that we refrain from w/w'ing an INLINE function even if it is -in a recursive group. It might not be the loop breaker. (We could -test for loop-breaker-hood, but I'm not sure that ever matters.) - -Note [Worker-wrapper for INLINABLE functions] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If we have - {-# INLINABLE f #-} - f :: Ord a => [a] -> Int -> a - f x y = ....f.... - -where f is strict in y, we might get a more efficient loop by w/w'ing -f. But that would make a new unfolding which would overwrite the old -one! So the function would no longer be ININABLE, and in particular -will not be specialised at call sites in other modules. - -This comes in practice (Trac #6056). - -Solution: do the w/w for strictness analysis, but transfer the Stable -unfolding to the *worker*. So we will get something like this: - - {-# INLINE[0] f #-} - f :: Ord a => [a] -> Int -> a - f d x y = case y of I# y' -> fw d x y' - - {-# INLINABLE[0] fw #-} - fw :: Ord a => [a] -> Int# -> a - fw d x y' = let y = I# y' in ...f... - -How do we "transfer the unfolding"? Easy: by using the old one, wrapped -in work_fn! See CoreUnfold.mkWorkerUnfolding. - -Note [Activation for INLINABLE worker] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Follows on from Note [Worker-wrapper for INLINABLE functions] -It is *vital* that if the worker gets an INLINABLE pragma (from the -original function), then the worker has the same phase activation as -the wrapper (or later). That is necessary to allow the wrapper to -inline into the worker's unfolding: see SimplUtils -Note [Simplifying inside stable unfoldings]. - -Notihng is lost by giving the worker the same activation as the -worker, because the worker won't have any chance of inlining until the -wrapper does; there's no point in giving it an earlier activation. - -Note [Don't w/w inline small non-loop-breaker things] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In general, we refrain from w/w-ing *small* functions, which are not -loop breakers, because they'll inline anyway. But we must take care: -it may look small now, but get to be big later after other inlining -has happened. So we take the precaution of adding an INLINE pragma to -any such functions. - -I made this change when I observed a big function at the end of -compilation with a useful strictness signature but no w-w. (It was -small during demand analysis, we refrained from w/w, and then got big -when something was inlined in its rhs.) When I measured it on nofib, -it didn't make much difference; just a few percent improved allocation -on one benchmark (bspt/Euclid.space). But nothing got worse. - -There is an infelicity though. We may get something like - f = g val -==> - g x = case gw x of r -> I# r - - f {- InlineStable, Template = g val -} - f = case gw x of r -> I# r - -The code for f duplicates that for g, without any real benefit. It -won't really be executed, because calls to f will go via the inlining. - -Note [Wrapper activation] -~~~~~~~~~~~~~~~~~~~~~~~~~ -When should the wrapper inlining be active? It must not be active -earlier than the current Activation of the Id (eg it might have a -NOINLINE pragma). But in fact strictness analysis happens fairly -late in the pipeline, and we want to prioritise specialisations over -strictness. Eg if we have - module Foo where - f :: Num a => a -> Int -> a - f n 0 = n -- Strict in the Int, hence wrapper - f n x = f (n+n) (x-1) - - g :: Int -> Int - g x = f x x -- Provokes a specialisation for f - - module Bar where - import Foo - - h :: Int -> Int - h x = f 3 x - -Then we want the specialisation for 'f' to kick in before the wrapper does. - -Now in fact the 'gentle' simplification pass encourages this, by -having rules on, but inlinings off. But that's kind of lucky. It seems -more robust to give the wrapper an Activation of (ActiveAfter 0), -so that it becomes active in an importing module at the same time that -it appears in the first place in the defining module. - -At one stage I tried making the wrapper inlining always-active, and -that had a very bad effect on nofib/imaginary/x2n1; a wrapper was -inlined before the specialisation fired. - -\begin{code} -tryWW :: DynFlags - -> FamInstEnvs - -> RecFlag - -> Id -- The fn binder - -> CoreExpr -- The bound rhs; its innards - -- are already ww'd - -> UniqSM [(Id, CoreExpr)] -- either *one* or *two* pairs; - -- if one, then no worker (only - -- the orig "wrapper" lives on); - -- if two, then a worker and a - -- wrapper. -tryWW dflags fam_envs is_rec fn_id rhs - | isNeverActive inline_act - -- No point in worker/wrappering if the thing is never inlined! - -- Because the no-inline prag will prevent the wrapper ever - -- being inlined at a call site. - -- - -- Furthermore, don't even expose strictness info - = return [ (fn_id, rhs) ] - - | not loop_breaker - , Just stable_unf <- certainlyWillInline dflags fn_unf - = return [ (fn_id `setIdUnfolding` stable_unf, rhs) ] - -- Note [Don't w/w inline small non-loop-breaker, or INLINE, things] - -- NB: use idUnfolding because we don't want to apply - -- this criterion to a loop breaker! - - | is_fun - = splitFun dflags fam_envs new_fn_id fn_info wrap_dmds res_info rhs - - | is_thunk -- See Note [Thunk splitting] - = splitThunk dflags fam_envs is_rec new_fn_id rhs - - | otherwise - = return [ (new_fn_id, rhs) ] - - where - loop_breaker = isStrongLoopBreaker (occInfo fn_info) - fn_info = idInfo fn_id - inline_act = inlinePragmaActivation (inlinePragInfo fn_info) - fn_unf = unfoldingInfo fn_info - - -- In practice it always will have a strictness - -- signature, even if it's a uninformative one - strict_sig = strictnessInfo fn_info - StrictSig (DmdType env wrap_dmds res_info) = strict_sig - - -- new_fn_id has the DmdEnv zapped. - -- (a) it is never used again - -- (b) it wastes space - -- (c) it becomes incorrect as things are cloned, because - -- we don't push the substitution into it - new_fn_id | isEmptyVarEnv env = fn_id - | otherwise = fn_id `setIdStrictness` - mkClosedStrictSig wrap_dmds res_info - - is_fun = notNull wrap_dmds - is_thunk = not is_fun && not (exprIsHNF rhs) - - ---------------------- -splitFun :: DynFlags -> FamInstEnvs -> Id -> IdInfo -> [Demand] -> DmdResult -> CoreExpr - -> UniqSM [(Id, CoreExpr)] -splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs - = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) ) do - -- The arity should match the signature - stuff <- mkWwBodies dflags fam_envs fun_ty wrap_dmds res_info one_shots - case stuff of - Just (work_demands, wrap_fn, work_fn) -> do - work_uniq <- getUniqueM - let work_rhs = work_fn rhs - work_prag = InlinePragma { inl_inline = inl_inline inl_prag - , inl_sat = Nothing - , inl_act = wrap_act - , inl_rule = FunLike } - -- idl_inline: copy from fn_id; see Note [Worker-wrapper for INLINABLE functions] - -- idl_act: see Note [Activation for INLINABLE workers] - -- inl_rule: it does not make sense for workers to be constructorlike. - - work_id = mkWorkerId work_uniq fn_id (exprType work_rhs) - `setIdOccInfo` occInfo fn_info - -- Copy over occurrence info from parent - -- Notably whether it's a loop breaker - -- Doesn't matter much, since we will simplify next, but - -- seems right-er to do so - - `setInlinePragma` work_prag - - `setIdUnfolding` mkWorkerUnfolding dflags work_fn (unfoldingInfo fn_info) - -- See Note [Worker-wrapper for INLINABLE functions] - - `setIdStrictness` mkClosedStrictSig work_demands work_res_info - -- Even though we may not be at top level, - -- it's ok to give it an empty DmdEnv - - `setIdArity` exprArity work_rhs - -- Set the arity so that the Core Lint check that the - -- arity is consistent with the demand type goes through - - wrap_act = ActiveAfter 0 - wrap_rhs = wrap_fn work_id - wrap_prag = InlinePragma { inl_inline = Inline - , inl_sat = Nothing - , inl_act = wrap_act - , inl_rule = rule_match_info } - -- See Note [Wrapper activation] - -- The RuleMatchInfo is (and must be) unaffected - - wrap_id = fn_id `setIdUnfolding` mkWwInlineRule wrap_rhs arity - `setInlinePragma` wrap_prag - `setIdOccInfo` NoOccInfo - -- Zap any loop-breaker-ness, to avoid bleating from Lint - -- about a loop breaker with an INLINE rule - - return $ [(work_id, work_rhs), (wrap_id, wrap_rhs)] - -- Worker first, because wrapper mentions it - - Nothing -> return [(fn_id, rhs)] - where - fun_ty = idType fn_id - inl_prag = inlinePragInfo fn_info - rule_match_info = inlinePragmaRuleMatchInfo inl_prag - arity = arityInfo fn_info - -- The arity is set by the simplifier using exprEtaExpandArity - -- So it may be more than the number of top-level-visible lambdas - - work_res_info | isBotRes res_info = botRes -- Cpr stuff done by wrapper - | otherwise = topRes - - one_shots = get_one_shots rhs - --- If the original function has one-shot arguments, it is important to --- make the wrapper and worker have corresponding one-shot arguments too. --- Otherwise we spuriously float stuff out of case-expression join points, --- which is very annoying. -get_one_shots :: Expr Var -> [OneShotInfo] -get_one_shots (Lam b e) - | isId b = idOneShotInfo b : get_one_shots e - | otherwise = get_one_shots e -get_one_shots (Tick _ e) = get_one_shots e -get_one_shots _ = [] -\end{code} - -Note [Do not split void functions] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Consider this rather common form of binding: - $j = \x:Void# -> ...no use of x... - -Since x is not used it'll be marked as absent. But there is no point -in w/w-ing because we'll simply add (\y:Void#), see WwLib.mkWorerArgs. - -If x has a more interesting type (eg Int, or Int#), there *is* a point -in w/w so that we don't pass the argument at all. - -Note [Thunk splitting] -~~~~~~~~~~~~~~~~~~~~~~ -Suppose x is used strictly (never mind whether it has the CPR -property). - - let - x* = x-rhs - in body - -splitThunk transforms like this: - - let - x* = case x-rhs of { I# a -> I# a } - in body - -Now simplifier will transform to - - case x-rhs of - I# a -> let x* = I# a - in body - -which is what we want. Now suppose x-rhs is itself a case: - - x-rhs = case e of { T -> I# a; F -> I# b } - -The join point will abstract over a, rather than over (which is -what would have happened before) which is fine. - -Notice that x certainly has the CPR property now! - -In fact, splitThunk uses the function argument w/w splitting -function, so that if x's demand is deeper (say U(U(L,L),L)) -then the splitting will go deeper too. - -\begin{code} --- See Note [Thunk splitting] --- splitThunk converts the *non-recursive* binding --- x = e --- into --- x = let x = e --- in case x of --- I# y -> let x = I# y in x } --- See comments above. Is it not beautifully short? --- Moreover, it works just as well when there are --- several binders, and if the binders are lifted --- E.g. x = e --- --> x = let x = e in --- case x of (a,b) -> let x = (a,b) in x - -splitThunk :: DynFlags -> FamInstEnvs -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)] -splitThunk dflags fam_envs is_rec fn_id rhs - = do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs [fn_id] - ; let res = [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ] - ; if useful then ASSERT2( isNonRec is_rec, ppr fn_id ) -- The thunk must be non-recursive - return res - else return [(fn_id, rhs)] } -\end{code} diff --git a/compiler/stranal/WwLib.hs b/compiler/stranal/WwLib.hs new file mode 100644 index 0000000000..8c96afadd6 --- /dev/null +++ b/compiler/stranal/WwLib.hs @@ -0,0 +1,770 @@ +{- +(c) The GRASP/AQUA Project, Glasgow University, 1993-1998 + +\section[WwLib]{A library for the ``worker\/wrapper'' back-end to the strictness analyser} +-} + +{-# LANGUAGE CPP #-} + +module WwLib ( mkWwBodies, mkWWstr, mkWorkerArgs + , deepSplitProductType_maybe, findTypeShape + ) where + +#include "HsVersions.h" + +import CoreSyn +import CoreUtils ( exprType, mkCast ) +import Id ( Id, idType, mkSysLocal, idDemandInfo, setIdDemandInfo, + setIdUnfolding, + setIdInfo, idOneShotInfo, setIdOneShotInfo + ) +import IdInfo ( vanillaIdInfo ) +import DataCon +import Demand +import MkCore ( mkRuntimeErrorApp, aBSENT_ERROR_ID ) +import MkId ( voidArgId, voidPrimId ) +import TysPrim ( voidPrimTy ) +import TysWiredIn ( tupleCon ) +import Type +import Coercion hiding ( substTy, substTyVarBndr ) +import FamInstEnv +import BasicTypes ( TupleSort(..), OneShotInfo(..), worstOneShot ) +import Literal ( absentLiteralOf ) +import TyCon +import UniqSupply +import Unique +import Maybes +import Util +import Outputable +import DynFlags +import FastString + +{- +************************************************************************ +* * +\subsection[mkWrapperAndWorker]{@mkWrapperAndWorker@} +* * +************************************************************************ + +Here's an example. The original function is: + +\begin{verbatim} +g :: forall a . Int -> [a] -> a + +g = \/\ a -> \ x ys -> + case x of + 0 -> head ys + _ -> head (tail ys) +\end{verbatim} + +From this, we want to produce: +\begin{verbatim} +-- wrapper (an unfolding) +g :: forall a . Int -> [a] -> a + +g = \/\ a -> \ x ys -> + case x of + I# x# -> $wg a x# ys + -- call the worker; don't forget the type args! + +-- worker +$wg :: forall a . Int# -> [a] -> a + +$wg = \/\ a -> \ x# ys -> + let + x = I# x# + in + case x of -- note: body of g moved intact + 0 -> head ys + _ -> head (tail ys) +\end{verbatim} + +Something we have to be careful about: Here's an example: + +\begin{verbatim} +-- "f" strictness: U(P)U(P) +f (I# a) (I# b) = a +# b + +g = f -- "g" strictness same as "f" +\end{verbatim} + +\tr{f} will get a worker all nice and friendly-like; that's good. +{\em But we don't want a worker for \tr{g}}, even though it has the +same strictness as \tr{f}. Doing so could break laziness, at best. + +Consequently, we insist that the number of strictness-info items is +exactly the same as the number of lambda-bound arguments. (This is +probably slightly paranoid, but OK in practice.) If it isn't the +same, we ``revise'' the strictness info, so that we won't propagate +the unusable strictness-info into the interfaces. + + +************************************************************************ +* * +\subsection{The worker wrapper core} +* * +************************************************************************ + +@mkWwBodies@ is called when doing the worker\/wrapper split inside a module. +-} + +mkWwBodies :: DynFlags + -> FamInstEnvs + -> Type -- Type of original function + -> [Demand] -- Strictness of original function + -> DmdResult -- Info about function result + -> [OneShotInfo] -- One-shot-ness of the function, value args only + -> UniqSM (Maybe ([Demand], -- Demands for worker (value) args + Id -> CoreExpr, -- Wrapper body, lacking only the worker Id + CoreExpr -> CoreExpr)) -- Worker body, lacking the original function rhs + +-- wrap_fn_args E = \x y -> E +-- work_fn_args E = E x y + +-- wrap_fn_str E = case x of { (a,b) -> +-- case a of { (a1,a2) -> +-- E a1 a2 b y }} +-- work_fn_str E = \a2 a2 b y -> +-- let a = (a1,a2) in +-- let x = (a,b) in +-- E + +mkWwBodies dflags fam_envs fun_ty demands res_info one_shots + = do { let arg_info = demands `zip` (one_shots ++ repeat NoOneShotInfo) + all_one_shots = foldr (worstOneShot . snd) OneShotLam arg_info + ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) <- mkWWargs emptyTvSubst fun_ty arg_info + ; (useful1, work_args, wrap_fn_str, work_fn_str) <- mkWWstr dflags fam_envs wrap_args + + -- Do CPR w/w. See Note [Always do CPR w/w] + ; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty) <- mkWWcpr fam_envs res_ty res_info + + ; let (work_lam_args, work_call_args) = mkWorkerArgs dflags work_args all_one_shots cpr_res_ty + worker_args_dmds = [idDemandInfo v | v <- work_call_args, isId v] + wrapper_body = wrap_fn_args . wrap_fn_cpr . wrap_fn_str . applyToVars work_call_args . Var + worker_body = mkLams work_lam_args. work_fn_str . work_fn_cpr . work_fn_args + + ; if useful1 && not (only_one_void_argument) || useful2 + then return (Just (worker_args_dmds, wrapper_body, worker_body)) + else return Nothing + } + -- We use an INLINE unconditionally, even if the wrapper turns out to be + -- something trivial like + -- fw = ... + -- f = __inline__ (coerce T fw) + -- The point is to propagate the coerce to f's call sites, so even though + -- f's RHS is now trivial (size 1) we still want the __inline__ to prevent + -- fw from being inlined into f's RHS + where + -- Note [Do not split void functions] + only_one_void_argument + | [d] <- demands + , Just (arg_ty1, _) <- splitFunTy_maybe fun_ty + , isAbsDmd d && isVoidTy arg_ty1 + = True + | otherwise + = False + +{- +Note [Always do CPR w/w] +~~~~~~~~~~~~~~~~~~~~~~~~ +At one time we refrained from doing CPR w/w for thunks, on the grounds that +we might duplicate work. But that is already handled by the demand analyser, +which doesn't give the CPR proprety if w/w might waste work: see +Note [CPR for thunks] in DmdAnal. + +And if something *has* been given the CPR property and we don't w/w, it's +a disaster, because then the enclosing function might say it has the CPR +property, but now doesn't and there a cascade of disaster. A good example +is Trac #5920. + + +************************************************************************ +* * +\subsection{Making wrapper args} +* * +************************************************************************ + +During worker-wrapper stuff we may end up with an unlifted thing +which we want to let-bind without losing laziness. So we +add a void argument. E.g. + + f = /\a -> \x y z -> E::Int# -- E does not mention x,y,z +==> + fw = /\ a -> \void -> E + f = /\ a -> \x y z -> fw realworld + +We use the state-token type which generates no code. +-} + +mkWorkerArgs :: DynFlags -> [Var] + -> OneShotInfo -- Whether all arguments are one-shot + -> Type -- Type of body + -> ([Var], -- Lambda bound args + [Var]) -- Args at call site +mkWorkerArgs dflags args all_one_shot res_ty + | any isId args || not needsAValueLambda + = (args, args) + | otherwise + = (args ++ [newArg], args ++ [voidPrimId]) + where + needsAValueLambda = + isUnLiftedType res_ty + || not (gopt Opt_FunToThunk dflags) + -- see Note [Protecting the last value argument] + + -- see Note [All One-Shot Arguments of a Worker] + newArg = setIdOneShotInfo voidArgId all_one_shot + +{- +Note [Protecting the last value argument] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If the user writes (\_ -> E), they might be intentionally disallowing +the sharing of E. Since absence analysis and worker-wrapper are keen +to remove such unused arguments, we add in a void argument to prevent +the function from becoming a thunk. + +The user can avoid adding the void argument with the -ffun-to-thunk +flag. However, this can create sharing, which may be bad in two ways. 1) It can +create a space leak. 2) It can prevent inlining *under a lambda*. If w/w +removes the last argument from a function f, then f now looks like a thunk, and +so f can't be inlined *under a lambda*. + +Note [All One-Shot Arguments of a Worker] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Sometimes, derived join-points are just lambda-lifted thunks, whose +only argument is of the unit type and is never used. This might +interfere with the absence analysis, basing on which results these +never-used arguments are eliminated in the worker. The additional +argument `all_one_shot` of `mkWorkerArgs` is to prevent this. + +Example. Suppose we have + foo = \p(one-shot) q(one-shot). y + 3 +Then we drop the unused args to give + foo = \pq. $wfoo void# + $wfoo = \void(one-shot). y + 3 + +But suppse foo didn't have all one-shot args: + foo = \p(not-one-shot) q(one-shot). expensive y + 3 +Then we drop the unused args to give + foo = \pq. $wfoo void# + $wfoo = \void(not-one-shot). y + 3 + +If we made the void-arg one-shot we might inline an expensive +computation for y, which would be terrible! + + +************************************************************************ +* * +\subsection{Coercion stuff} +* * +************************************************************************ + +We really want to "look through" coerces. +Reason: I've seen this situation: + + let f = coerce T (\s -> E) + in \x -> case x of + p -> coerce T' f + q -> \s -> E2 + r -> coerce T' f + +If only we w/w'd f, we'd get + let f = coerce T (\s -> fw s) + fw = \s -> E + in ... + +Now we'll inline f to get + + let fw = \s -> E + in \x -> case x of + p -> fw + q -> \s -> E2 + r -> fw + +Now we'll see that fw has arity 1, and will arity expand +the \x to get what we want. +-} + +-- mkWWargs just does eta expansion +-- is driven off the function type and arity. +-- It chomps bites off foralls, arrows, newtypes +-- and keeps repeating that until it's satisfied the supplied arity + +mkWWargs :: TvSubst -- Freshening substitution to apply to the type + -- See Note [Freshen type variables] + -> Type -- The type of the function + -> [(Demand,OneShotInfo)] -- Demands and one-shot info for value arguments + -> UniqSM ([Var], -- Wrapper args + CoreExpr -> CoreExpr, -- Wrapper fn + CoreExpr -> CoreExpr, -- Worker fn + Type) -- Type of wrapper body + +mkWWargs subst fun_ty arg_info + | null arg_info + = return ([], id, id, substTy subst fun_ty) + + | ((dmd,one_shot):arg_info') <- arg_info + , Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty + = do { uniq <- getUniqueM + ; let arg_ty' = substTy subst arg_ty + id = mk_wrap_arg uniq arg_ty' dmd one_shot + ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) + <- mkWWargs subst fun_ty' arg_info' + ; return (id : wrap_args, + Lam id . wrap_fn_args, + work_fn_args . (`App` varToCoreExpr id), + res_ty) } + + | Just (tv, fun_ty') <- splitForAllTy_maybe fun_ty + = do { let (subst', tv') = substTyVarBndr subst tv + -- This substTyVarBndr clones the type variable when necy + -- See Note [Freshen type variables] + ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) + <- mkWWargs subst' fun_ty' arg_info + ; return (tv' : wrap_args, + Lam tv' . wrap_fn_args, + work_fn_args . (`App` Type (mkTyVarTy tv')), + res_ty) } + + | Just (co, rep_ty) <- topNormaliseNewType_maybe fun_ty + -- The newtype case is for when the function has + -- a newtype after the arrow (rare) + -- + -- It's also important when we have a function returning (say) a pair + -- wrapped in a newtype, at least if CPR analysis can look + -- through such newtypes, which it probably can since they are + -- simply coerces. + + = do { (wrap_args, wrap_fn_args, work_fn_args, res_ty) + <- mkWWargs subst rep_ty arg_info + ; return (wrap_args, + \e -> Cast (wrap_fn_args e) (mkSymCo co), + \e -> work_fn_args (Cast e co), + res_ty) } + + | otherwise + = WARN( True, ppr fun_ty ) -- Should not happen: if there is a demand + return ([], id, id, substTy subst fun_ty) -- then there should be a function arrow + +applyToVars :: [Var] -> CoreExpr -> CoreExpr +applyToVars vars fn = mkVarApps fn vars + +mk_wrap_arg :: Unique -> Type -> Demand -> OneShotInfo -> Id +mk_wrap_arg uniq ty dmd one_shot + = mkSysLocal (fsLit "w") uniq ty + `setIdDemandInfo` dmd + `setIdOneShotInfo` one_shot + +{- +Note [Freshen type variables] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Wen we do a worker/wrapper split, we must not use shadowed names, +else we'll get + f = /\ a /\a. fw a a +which is obviously wrong. Type variables can can in principle shadow, +within a type (e.g. forall a. a -> forall a. a->a). But type +variables *are* mentioned in , so we must substitute. + +That's why we carry the TvSubst through mkWWargs + +************************************************************************ +* * +\subsection{Strictness stuff} +* * +************************************************************************ +-} + +mkWWstr :: DynFlags + -> FamInstEnvs + -> [Var] -- Wrapper args; have their demand info on them + -- *Includes type variables* + -> UniqSM (Bool, -- Is this useful + [Var], -- Worker args + CoreExpr -> CoreExpr, -- Wrapper body, lacking the worker call + -- and without its lambdas + -- This fn adds the unboxing + + CoreExpr -> CoreExpr) -- Worker body, lacking the original body of the function, + -- and lacking its lambdas. + -- This fn does the reboxing +mkWWstr _ _ [] + = return (False, [], nop_fn, nop_fn) + +mkWWstr dflags fam_envs (arg : args) = do + (useful1, args1, wrap_fn1, work_fn1) <- mkWWstr_one dflags fam_envs arg + (useful2, args2, wrap_fn2, work_fn2) <- mkWWstr dflags fam_envs args + return (useful1 || useful2, args1 ++ args2, wrap_fn1 . wrap_fn2, work_fn1 . work_fn2) + +{- +Note [Unpacking arguments with product and polymorphic demands] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The argument is unpacked in a case if it has a product type and has a +strict *and* used demand put on it. I.e., arguments, with demands such +as the following ones: + + + + +will be unpacked, but + + or + +will not, because the pieces aren't used. This is quite important otherwise +we end up unpacking massive tuples passed to the bottoming function. Example: + + f :: ((Int,Int) -> String) -> (Int,Int) -> a + f g pr = error (g pr) + + main = print (f fst (1, error "no")) + +Does 'main' print "error 1" or "error no"? We don't really want 'f' +to unbox its second argument. This actually happened in GHC's onwn +source code, in Packages.applyPackageFlag, which ended up un-boxing +the enormous DynFlags tuple, and being strict in the +as-yet-un-filled-in pkgState files. +-} + +---------------------- +-- mkWWstr_one wrap_arg = (useful, work_args, wrap_fn, work_fn) +-- * wrap_fn assumes wrap_arg is in scope, +-- brings into scope work_args (via cases) +-- * work_fn assumes work_args are in scope, a +-- brings into scope wrap_arg (via lets) +mkWWstr_one :: DynFlags -> FamInstEnvs -> Var + -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr -> CoreExpr) +mkWWstr_one dflags fam_envs arg + | isTyVar arg + = return (False, [arg], nop_fn, nop_fn) + + -- See Note [Worker-wrapper for bottoming functions] + | isAbsDmd dmd + , Just work_fn <- mk_absent_let dflags arg + -- Absent case. We can't always handle absence for arbitrary + -- unlifted types, so we need to choose just the cases we can + --- (that's what mk_absent_let does) + = return (True, [], nop_fn, work_fn) + + -- See Note [Worthy functions for Worker-Wrapper split] + | isSeqDmd dmd -- `seq` demand; evaluate in wrapper in the hope + -- of dropping seqs in the worker + = let arg_w_unf = arg `setIdUnfolding` evaldUnfolding + -- Tell the worker arg that it's sure to be evaluated + -- so that internal seqs can be dropped + in return (True, [arg_w_unf], mk_seq_case arg, nop_fn) + -- Pass the arg, anyway, even if it is in theory discarded + -- Consider + -- f x y = x `seq` y + -- x gets a (Eval (Poly Abs)) demand, but if we fail to pass it to the worker + -- we ABSOLUTELY MUST record that x is evaluated in the wrapper. + -- Something like: + -- f x y = x `seq` fw y + -- fw y = let x{Evald} = error "oops" in (x `seq` y) + -- If we don't pin on the "Evald" flag, the seq doesn't disappear, and + -- we end up evaluating the absent thunk. + -- But the Evald flag is pretty weird, and I worry that it might disappear + -- during simplification, so for now I've just nuked this whole case + + | isStrictDmd dmd + , Just cs <- splitProdDmd_maybe dmd + -- See Note [Unpacking arguments with product and polymorphic demands] + , Just (data_con, inst_tys, inst_con_arg_tys, co) + <- deepSplitProductType_maybe fam_envs (idType arg) + , cs `equalLength` inst_con_arg_tys + -- See Note [mkWWstr and unsafeCoerce] + = do { (uniq1:uniqs) <- getUniquesM + ; let unpk_args = zipWith mk_ww_local uniqs inst_con_arg_tys + unpk_args_w_ds = zipWithEqual "mkWWstr" set_worker_arg_info unpk_args cs + unbox_fn = mkUnpackCase (Var arg) co uniq1 + data_con unpk_args + rebox_fn = Let (NonRec arg con_app) + con_app = mkConApp2 data_con inst_tys unpk_args `mkCast` mkSymCo co + ; (_, worker_args, wrap_fn, work_fn) <- mkWWstr dflags fam_envs unpk_args_w_ds + ; return (True, worker_args, unbox_fn . wrap_fn, work_fn . rebox_fn) } + -- Don't pass the arg, rebox instead + + | otherwise -- Other cases + = return (False, [arg], nop_fn, nop_fn) + + where + dmd = idDemandInfo arg + one_shot = idOneShotInfo arg + -- If the wrapper argument is a one-shot lambda, then + -- so should (all) the corresponding worker arguments be + -- This bites when we do w/w on a case join point + set_worker_arg_info worker_arg demand + = worker_arg `setIdDemandInfo` demand + `setIdOneShotInfo` one_shot + +---------------------- +nop_fn :: CoreExpr -> CoreExpr +nop_fn body = body + +{- +Note [mkWWstr and unsafeCoerce] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +By using unsafeCoerce, it is possible to make the number of demands fail to +match the number of constructor arguments; this happened in Trac #8037. +If so, the worker/wrapper split doesn't work right and we get a Core Lint +bug. The fix here is simply to decline to do w/w if that happens. + +************************************************************************ +* * + Type scrutiny that is specfic to demand analysis +* * +************************************************************************ + +Note [Do not unpack class dictionaries] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have + f :: Ord a => [a] -> Int -> a + {-# INLINABLE f #-} +and we worker/wrapper f, we'll get a worker with an INLINALBE pragma +(see Note [Worker-wrapper for INLINABLE functions] in WorkWrap), which +can still be specialised by the type-class specialiser, something like + fw :: Ord a => [a] -> Int# -> a + +BUT if f is strict in the Ord dictionary, we might unpack it, to get + fw :: (a->a->Bool) -> [a] -> Int# -> a +and the type-class specialiser can't specialise that. An example is +Trac #6056. + +Moreover, dictinoaries can have a lot of fields, so unpacking them can +increase closure sizes. + +Conclusion: don't unpack dictionaries. +-} + +deepSplitProductType_maybe :: FamInstEnvs -> Type -> Maybe (DataCon, [Type], [Type], Coercion) +-- If deepSplitProductType_maybe ty = Just (dc, tys, arg_tys, co) +-- then dc @ tys (args::arg_tys) :: rep_ty +-- co :: ty ~ rep_ty +deepSplitProductType_maybe fam_envs ty + | let (co, ty1) = topNormaliseType_maybe fam_envs ty + `orElse` (mkReflCo Representational ty, ty) + , Just (tc, tc_args) <- splitTyConApp_maybe ty1 + , Just con <- isDataProductTyCon_maybe tc + , not (isClassTyCon tc) -- See Note [Do not unpack class dictionaries] + = Just (con, tc_args, dataConInstArgTys con tc_args, co) +deepSplitProductType_maybe _ _ = Nothing + +deepSplitCprType_maybe :: FamInstEnvs -> ConTag -> Type -> Maybe (DataCon, [Type], [Type], Coercion) +-- If deepSplitCprType_maybe n ty = Just (dc, tys, arg_tys, co) +-- then dc @ tys (args::arg_tys) :: rep_ty +-- co :: ty ~ rep_ty +deepSplitCprType_maybe fam_envs con_tag ty + | let (co, ty1) = topNormaliseType_maybe fam_envs ty + `orElse` (mkReflCo Representational ty, ty) + , Just (tc, tc_args) <- splitTyConApp_maybe ty1 + , isDataTyCon tc + , let cons = tyConDataCons tc + , cons `lengthAtLeast` con_tag -- This might not be true if we import the + -- type constructor via a .hs-bool file (#8743) + , let con = cons !! (con_tag - fIRST_TAG) + = Just (con, tc_args, dataConInstArgTys con tc_args, co) +deepSplitCprType_maybe _ _ _ = Nothing + +findTypeShape :: FamInstEnvs -> Type -> TypeShape +-- Uncover the arrow and product shape of a type +-- The data type TypeShape is defined in Demand +-- See Note [Trimming a demand to a type] in Demand +findTypeShape fam_envs ty + | Just (_, ty') <- splitForAllTy_maybe ty + = findTypeShape fam_envs ty' + + | Just (tc, tc_args) <- splitTyConApp_maybe ty + , Just con <- isDataProductTyCon_maybe tc + = TsProd (map (findTypeShape fam_envs) $ dataConInstArgTys con tc_args) + + | Just (_, res) <- splitFunTy_maybe ty + = TsFun (findTypeShape fam_envs res) + + | Just (_, ty') <- topNormaliseType_maybe fam_envs ty + = findTypeShape fam_envs ty' + + | otherwise + = TsUnk + +{- +************************************************************************ +* * +\subsection{CPR stuff} +* * +************************************************************************ + + +@mkWWcpr@ takes the worker/wrapper pair produced from the strictness +info and adds in the CPR transformation. The worker returns an +unboxed tuple containing non-CPR components. The wrapper takes this +tuple and re-produces the correct structured output. + +The non-CPR results appear ordered in the unboxed tuple as if by a +left-to-right traversal of the result structure. +-} + +mkWWcpr :: FamInstEnvs + -> Type -- function body type + -> DmdResult -- CPR analysis results + -> UniqSM (Bool, -- Is w/w'ing useful? + CoreExpr -> CoreExpr, -- New wrapper + CoreExpr -> CoreExpr, -- New worker + Type) -- Type of worker's body + +mkWWcpr fam_envs body_ty res + = case returnsCPR_maybe res of + Nothing -> return (False, id, id, body_ty) -- No CPR info + Just con_tag | Just stuff <- deepSplitCprType_maybe fam_envs con_tag body_ty + -> mkWWcpr_help stuff + | otherwise + -- See Note [non-algebraic or open body type warning] + -> WARN( True, text "mkWWcpr: non-algebraic or open body type" <+> ppr body_ty ) + return (False, id, id, body_ty) + +mkWWcpr_help :: (DataCon, [Type], [Type], Coercion) + -> UniqSM (Bool, CoreExpr -> CoreExpr, CoreExpr -> CoreExpr, Type) + +mkWWcpr_help (data_con, inst_tys, arg_tys, co) + | [arg_ty1] <- arg_tys + , isUnLiftedType arg_ty1 + -- Special case when there is a single result of unlifted type + -- + -- Wrapper: case (..call worker..) of x -> C x + -- Worker: case ( ..body.. ) of C x -> x + = do { (work_uniq : arg_uniq : _) <- getUniquesM + ; let arg = mk_ww_local arg_uniq arg_ty1 + con_app = mkConApp2 data_con inst_tys [arg] `mkCast` mkSymCo co + + ; return ( True + , \ wkr_call -> Case wkr_call arg (exprType con_app) [(DEFAULT, [], con_app)] + , \ body -> mkUnpackCase body co work_uniq data_con [arg] (Var arg) + , arg_ty1 ) } + + | otherwise -- The general case + -- Wrapper: case (..call worker..) of (# a, b #) -> C a b + -- Worker: case ( ...body... ) of C a b -> (# a, b #) + = do { (work_uniq : uniqs) <- getUniquesM + ; let (wrap_wild : args) = zipWith mk_ww_local uniqs (ubx_tup_ty : arg_tys) + ubx_tup_con = tupleCon UnboxedTuple (length arg_tys) + ubx_tup_ty = exprType ubx_tup_app + ubx_tup_app = mkConApp2 ubx_tup_con arg_tys args + con_app = mkConApp2 data_con inst_tys args `mkCast` mkSymCo co + + ; return (True + , \ wkr_call -> Case wkr_call wrap_wild (exprType con_app) [(DataAlt ubx_tup_con, args, con_app)] + , \ body -> mkUnpackCase body co work_uniq data_con args ubx_tup_app + , ubx_tup_ty ) } + +mkUnpackCase :: CoreExpr -> Coercion -> Unique -> DataCon -> [Id] -> CoreExpr -> CoreExpr +-- (mkUnpackCase e co uniq Con args body) +-- returns +-- case e |> co of bndr { Con args -> body } + +mkUnpackCase (Tick tickish e) co uniq con args body -- See Note [Profiling and unpacking] + = Tick tickish (mkUnpackCase e co uniq con args body) +mkUnpackCase scrut co uniq boxing_con unpk_args body + = Case casted_scrut bndr (exprType body) + [(DataAlt boxing_con, unpk_args, body)] + where + casted_scrut = scrut `mkCast` co + bndr = mk_ww_local uniq (exprType casted_scrut) + +{- +Note [non-algebraic or open body type warning] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are a few cases where the W/W transformation is told that something +returns a constructor, but the type at hand doesn't really match this. One +real-world example involves unsafeCoerce: + foo = IO a + foo = unsafeCoerce c_exit + foreign import ccall "c_exit" c_exit :: IO () +Here CPR will tell you that `foo` returns a () constructor for sure, but trying +to create a worker/wrapper for type `a` obviously fails. +(This was a real example until ee8e792 in libraries/base.) + +It does not seem feasible to avoid all such cases already in the analyser (and +after all, the analysis is not really wrong), so we simply do nothing here in +mkWWcpr. But we still want to emit warning with -DDEBUG, to hopefully catch +other cases where something went avoidably wrong. + + +Note [Profiling and unpacking] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If the original function looked like + f = \ x -> {-# SCC "foo" #-} E + +then we want the CPR'd worker to look like + \ x -> {-# SCC "foo" #-} (case E of I# x -> x) +and definitely not + \ x -> case ({-# SCC "foo" #-} E) of I# x -> x) + +This transform doesn't move work or allocation +from one cost centre to another. + +Later [SDM]: presumably this is because we want the simplifier to +eliminate the case, and the scc would get in the way? I'm ok with +including the case itself in the cost centre, since it is morally +part of the function (post transformation) anyway. + + +************************************************************************ +* * +\subsection{Utilities} +* * +************************************************************************ + +Note [Absent errors] +~~~~~~~~~~~~~~~~~~~~ +We make a new binding for Ids that are marked absent, thus + let x = absentError "x :: Int" +The idea is that this binding will never be used; but if it +buggily is used we'll get a runtime error message. + +Coping with absence for *unlifted* types is important; see, for +example, Trac #4306. For these we find a suitable literal, +using Literal.absentLiteralOf. We don't have literals for +every primitive type, so the function is partial. + + [I did try the experiment of using an error thunk for unlifted + things too, relying on the simplifier to drop it as dead code, + by making absentError + (a) *not* be a bottoming Id, + (b) be "ok for speculation" + But that relies on the simplifier finding that it really + is dead code, which is fragile, and indeed failed when + profiling is on, which disables various optimisations. So + using a literal will do.] +-} + +mk_absent_let :: DynFlags -> Id -> Maybe (CoreExpr -> CoreExpr) +mk_absent_let dflags arg + | not (isUnLiftedType arg_ty) + = Just (Let (NonRec arg abs_rhs)) + | Just tc <- tyConAppTyCon_maybe arg_ty + , Just lit <- absentLiteralOf tc + = Just (Let (NonRec arg (Lit lit))) + | arg_ty `eqType` voidPrimTy + = Just (Let (NonRec arg (Var voidPrimId))) + | otherwise + = WARN( True, ptext (sLit "No absent value for") <+> ppr arg_ty ) + Nothing + where + arg_ty = idType arg + abs_rhs = mkRuntimeErrorApp aBSENT_ERROR_ID arg_ty msg + msg = showSDoc dflags (ppr arg <+> ppr (idType arg)) + +mk_seq_case :: Id -> CoreExpr -> CoreExpr +mk_seq_case arg body = Case (Var arg) (sanitiseCaseBndr arg) (exprType body) [(DEFAULT, [], body)] + +sanitiseCaseBndr :: Id -> Id +-- The argument we are scrutinising has the right type to be +-- a case binder, so it's convenient to re-use it for that purpose. +-- But we *must* throw away all its IdInfo. In particular, the argument +-- will have demand info on it, and that demand info may be incorrect for +-- the case binder. e.g. case ww_arg of ww_arg { I# x -> ... } +-- Quite likely ww_arg isn't used in '...'. The case may get discarded +-- if the case binder says "I'm demanded". This happened in a situation +-- like (x+y) `seq` .... +sanitiseCaseBndr id = id `setIdInfo` vanillaIdInfo + +mk_ww_local :: Unique -> Type -> Id +mk_ww_local uniq ty = mkSysLocal (fsLit "ww") uniq ty diff --git a/compiler/stranal/WwLib.lhs b/compiler/stranal/WwLib.lhs deleted file mode 100644 index 1f1fbdf745..0000000000 --- a/compiler/stranal/WwLib.lhs +++ /dev/null @@ -1,776 +0,0 @@ -% -% (c) The GRASP/AQUA Project, Glasgow University, 1993-1998 -% -\section[WwLib]{A library for the ``worker\/wrapper'' back-end to the strictness analyser} - -\begin{code} -{-# LANGUAGE CPP #-} - -module WwLib ( mkWwBodies, mkWWstr, mkWorkerArgs - , deepSplitProductType_maybe, findTypeShape - ) where - -#include "HsVersions.h" - -import CoreSyn -import CoreUtils ( exprType, mkCast ) -import Id ( Id, idType, mkSysLocal, idDemandInfo, setIdDemandInfo, - setIdUnfolding, - setIdInfo, idOneShotInfo, setIdOneShotInfo - ) -import IdInfo ( vanillaIdInfo ) -import DataCon -import Demand -import MkCore ( mkRuntimeErrorApp, aBSENT_ERROR_ID ) -import MkId ( voidArgId, voidPrimId ) -import TysPrim ( voidPrimTy ) -import TysWiredIn ( tupleCon ) -import Type -import Coercion hiding ( substTy, substTyVarBndr ) -import FamInstEnv -import BasicTypes ( TupleSort(..), OneShotInfo(..), worstOneShot ) -import Literal ( absentLiteralOf ) -import TyCon -import UniqSupply -import Unique -import Maybes -import Util -import Outputable -import DynFlags -import FastString -\end{code} - - -%************************************************************************ -%* * -\subsection[mkWrapperAndWorker]{@mkWrapperAndWorker@} -%* * -%************************************************************************ - -Here's an example. The original function is: - -\begin{verbatim} -g :: forall a . Int -> [a] -> a - -g = \/\ a -> \ x ys -> - case x of - 0 -> head ys - _ -> head (tail ys) -\end{verbatim} - -From this, we want to produce: -\begin{verbatim} --- wrapper (an unfolding) -g :: forall a . Int -> [a] -> a - -g = \/\ a -> \ x ys -> - case x of - I# x# -> $wg a x# ys - -- call the worker; don't forget the type args! - --- worker -$wg :: forall a . Int# -> [a] -> a - -$wg = \/\ a -> \ x# ys -> - let - x = I# x# - in - case x of -- note: body of g moved intact - 0 -> head ys - _ -> head (tail ys) -\end{verbatim} - -Something we have to be careful about: Here's an example: - -\begin{verbatim} --- "f" strictness: U(P)U(P) -f (I# a) (I# b) = a +# b - -g = f -- "g" strictness same as "f" -\end{verbatim} - -\tr{f} will get a worker all nice and friendly-like; that's good. -{\em But we don't want a worker for \tr{g}}, even though it has the -same strictness as \tr{f}. Doing so could break laziness, at best. - -Consequently, we insist that the number of strictness-info items is -exactly the same as the number of lambda-bound arguments. (This is -probably slightly paranoid, but OK in practice.) If it isn't the -same, we ``revise'' the strictness info, so that we won't propagate -the unusable strictness-info into the interfaces. - - -%************************************************************************ -%* * -\subsection{The worker wrapper core} -%* * -%************************************************************************ - -@mkWwBodies@ is called when doing the worker\/wrapper split inside a module. - -\begin{code} -mkWwBodies :: DynFlags - -> FamInstEnvs - -> Type -- Type of original function - -> [Demand] -- Strictness of original function - -> DmdResult -- Info about function result - -> [OneShotInfo] -- One-shot-ness of the function, value args only - -> UniqSM (Maybe ([Demand], -- Demands for worker (value) args - Id -> CoreExpr, -- Wrapper body, lacking only the worker Id - CoreExpr -> CoreExpr)) -- Worker body, lacking the original function rhs - --- wrap_fn_args E = \x y -> E --- work_fn_args E = E x y - --- wrap_fn_str E = case x of { (a,b) -> --- case a of { (a1,a2) -> --- E a1 a2 b y }} --- work_fn_str E = \a2 a2 b y -> --- let a = (a1,a2) in --- let x = (a,b) in --- E - -mkWwBodies dflags fam_envs fun_ty demands res_info one_shots - = do { let arg_info = demands `zip` (one_shots ++ repeat NoOneShotInfo) - all_one_shots = foldr (worstOneShot . snd) OneShotLam arg_info - ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) <- mkWWargs emptyTvSubst fun_ty arg_info - ; (useful1, work_args, wrap_fn_str, work_fn_str) <- mkWWstr dflags fam_envs wrap_args - - -- Do CPR w/w. See Note [Always do CPR w/w] - ; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty) <- mkWWcpr fam_envs res_ty res_info - - ; let (work_lam_args, work_call_args) = mkWorkerArgs dflags work_args all_one_shots cpr_res_ty - worker_args_dmds = [idDemandInfo v | v <- work_call_args, isId v] - wrapper_body = wrap_fn_args . wrap_fn_cpr . wrap_fn_str . applyToVars work_call_args . Var - worker_body = mkLams work_lam_args. work_fn_str . work_fn_cpr . work_fn_args - - ; if useful1 && not (only_one_void_argument) || useful2 - then return (Just (worker_args_dmds, wrapper_body, worker_body)) - else return Nothing - } - -- We use an INLINE unconditionally, even if the wrapper turns out to be - -- something trivial like - -- fw = ... - -- f = __inline__ (coerce T fw) - -- The point is to propagate the coerce to f's call sites, so even though - -- f's RHS is now trivial (size 1) we still want the __inline__ to prevent - -- fw from being inlined into f's RHS - where - -- Note [Do not split void functions] - only_one_void_argument - | [d] <- demands - , Just (arg_ty1, _) <- splitFunTy_maybe fun_ty - , isAbsDmd d && isVoidTy arg_ty1 - = True - | otherwise - = False - -\end{code} - -Note [Always do CPR w/w] -~~~~~~~~~~~~~~~~~~~~~~~~ -At one time we refrained from doing CPR w/w for thunks, on the grounds that -we might duplicate work. But that is already handled by the demand analyser, -which doesn't give the CPR proprety if w/w might waste work: see -Note [CPR for thunks] in DmdAnal. - -And if something *has* been given the CPR property and we don't w/w, it's -a disaster, because then the enclosing function might say it has the CPR -property, but now doesn't and there a cascade of disaster. A good example -is Trac #5920. - - -%************************************************************************ -%* * -\subsection{Making wrapper args} -%* * -%************************************************************************ - -During worker-wrapper stuff we may end up with an unlifted thing -which we want to let-bind without losing laziness. So we -add a void argument. E.g. - - f = /\a -> \x y z -> E::Int# -- E does not mention x,y,z -==> - fw = /\ a -> \void -> E - f = /\ a -> \x y z -> fw realworld - -We use the state-token type which generates no code. - -\begin{code} -mkWorkerArgs :: DynFlags -> [Var] - -> OneShotInfo -- Whether all arguments are one-shot - -> Type -- Type of body - -> ([Var], -- Lambda bound args - [Var]) -- Args at call site -mkWorkerArgs dflags args all_one_shot res_ty - | any isId args || not needsAValueLambda - = (args, args) - | otherwise - = (args ++ [newArg], args ++ [voidPrimId]) - where - needsAValueLambda = - isUnLiftedType res_ty - || not (gopt Opt_FunToThunk dflags) - -- see Note [Protecting the last value argument] - - -- see Note [All One-Shot Arguments of a Worker] - newArg = setIdOneShotInfo voidArgId all_one_shot -\end{code} - -Note [Protecting the last value argument] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If the user writes (\_ -> E), they might be intentionally disallowing -the sharing of E. Since absence analysis and worker-wrapper are keen -to remove such unused arguments, we add in a void argument to prevent -the function from becoming a thunk. - -The user can avoid adding the void argument with the -ffun-to-thunk -flag. However, this can create sharing, which may be bad in two ways. 1) It can -create a space leak. 2) It can prevent inlining *under a lambda*. If w/w -removes the last argument from a function f, then f now looks like a thunk, and -so f can't be inlined *under a lambda*. - -Note [All One-Shot Arguments of a Worker] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Sometimes, derived join-points are just lambda-lifted thunks, whose -only argument is of the unit type and is never used. This might -interfere with the absence analysis, basing on which results these -never-used arguments are eliminated in the worker. The additional -argument `all_one_shot` of `mkWorkerArgs` is to prevent this. - -Example. Suppose we have - foo = \p(one-shot) q(one-shot). y + 3 -Then we drop the unused args to give - foo = \pq. $wfoo void# - $wfoo = \void(one-shot). y + 3 - -But suppse foo didn't have all one-shot args: - foo = \p(not-one-shot) q(one-shot). expensive y + 3 -Then we drop the unused args to give - foo = \pq. $wfoo void# - $wfoo = \void(not-one-shot). y + 3 - -If we made the void-arg one-shot we might inline an expensive -computation for y, which would be terrible! - - -%************************************************************************ -%* * -\subsection{Coercion stuff} -%* * -%************************************************************************ - -We really want to "look through" coerces. -Reason: I've seen this situation: - - let f = coerce T (\s -> E) - in \x -> case x of - p -> coerce T' f - q -> \s -> E2 - r -> coerce T' f - -If only we w/w'd f, we'd get - let f = coerce T (\s -> fw s) - fw = \s -> E - in ... - -Now we'll inline f to get - - let fw = \s -> E - in \x -> case x of - p -> fw - q -> \s -> E2 - r -> fw - -Now we'll see that fw has arity 1, and will arity expand -the \x to get what we want. - -\begin{code} --- mkWWargs just does eta expansion --- is driven off the function type and arity. --- It chomps bites off foralls, arrows, newtypes --- and keeps repeating that until it's satisfied the supplied arity - -mkWWargs :: TvSubst -- Freshening substitution to apply to the type - -- See Note [Freshen type variables] - -> Type -- The type of the function - -> [(Demand,OneShotInfo)] -- Demands and one-shot info for value arguments - -> UniqSM ([Var], -- Wrapper args - CoreExpr -> CoreExpr, -- Wrapper fn - CoreExpr -> CoreExpr, -- Worker fn - Type) -- Type of wrapper body - -mkWWargs subst fun_ty arg_info - | null arg_info - = return ([], id, id, substTy subst fun_ty) - - | ((dmd,one_shot):arg_info') <- arg_info - , Just (arg_ty, fun_ty') <- splitFunTy_maybe fun_ty - = do { uniq <- getUniqueM - ; let arg_ty' = substTy subst arg_ty - id = mk_wrap_arg uniq arg_ty' dmd one_shot - ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) - <- mkWWargs subst fun_ty' arg_info' - ; return (id : wrap_args, - Lam id . wrap_fn_args, - work_fn_args . (`App` varToCoreExpr id), - res_ty) } - - | Just (tv, fun_ty') <- splitForAllTy_maybe fun_ty - = do { let (subst', tv') = substTyVarBndr subst tv - -- This substTyVarBndr clones the type variable when necy - -- See Note [Freshen type variables] - ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) - <- mkWWargs subst' fun_ty' arg_info - ; return (tv' : wrap_args, - Lam tv' . wrap_fn_args, - work_fn_args . (`App` Type (mkTyVarTy tv')), - res_ty) } - - | Just (co, rep_ty) <- topNormaliseNewType_maybe fun_ty - -- The newtype case is for when the function has - -- a newtype after the arrow (rare) - -- - -- It's also important when we have a function returning (say) a pair - -- wrapped in a newtype, at least if CPR analysis can look - -- through such newtypes, which it probably can since they are - -- simply coerces. - - = do { (wrap_args, wrap_fn_args, work_fn_args, res_ty) - <- mkWWargs subst rep_ty arg_info - ; return (wrap_args, - \e -> Cast (wrap_fn_args e) (mkSymCo co), - \e -> work_fn_args (Cast e co), - res_ty) } - - | otherwise - = WARN( True, ppr fun_ty ) -- Should not happen: if there is a demand - return ([], id, id, substTy subst fun_ty) -- then there should be a function arrow - -applyToVars :: [Var] -> CoreExpr -> CoreExpr -applyToVars vars fn = mkVarApps fn vars - -mk_wrap_arg :: Unique -> Type -> Demand -> OneShotInfo -> Id -mk_wrap_arg uniq ty dmd one_shot - = mkSysLocal (fsLit "w") uniq ty - `setIdDemandInfo` dmd - `setIdOneShotInfo` one_shot -\end{code} - -Note [Freshen type variables] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Wen we do a worker/wrapper split, we must not use shadowed names, -else we'll get - f = /\ a /\a. fw a a -which is obviously wrong. Type variables can can in principle shadow, -within a type (e.g. forall a. a -> forall a. a->a). But type -variables *are* mentioned in , so we must substitute. - -That's why we carry the TvSubst through mkWWargs - -%************************************************************************ -%* * -\subsection{Strictness stuff} -%* * -%************************************************************************ - -\begin{code} -mkWWstr :: DynFlags - -> FamInstEnvs - -> [Var] -- Wrapper args; have their demand info on them - -- *Includes type variables* - -> UniqSM (Bool, -- Is this useful - [Var], -- Worker args - CoreExpr -> CoreExpr, -- Wrapper body, lacking the worker call - -- and without its lambdas - -- This fn adds the unboxing - - CoreExpr -> CoreExpr) -- Worker body, lacking the original body of the function, - -- and lacking its lambdas. - -- This fn does the reboxing -mkWWstr _ _ [] - = return (False, [], nop_fn, nop_fn) - -mkWWstr dflags fam_envs (arg : args) = do - (useful1, args1, wrap_fn1, work_fn1) <- mkWWstr_one dflags fam_envs arg - (useful2, args2, wrap_fn2, work_fn2) <- mkWWstr dflags fam_envs args - return (useful1 || useful2, args1 ++ args2, wrap_fn1 . wrap_fn2, work_fn1 . work_fn2) - -\end{code} - -Note [Unpacking arguments with product and polymorphic demands] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The argument is unpacked in a case if it has a product type and has a -strict *and* used demand put on it. I.e., arguments, with demands such -as the following ones: - - - - -will be unpacked, but - - or - -will not, because the pieces aren't used. This is quite important otherwise -we end up unpacking massive tuples passed to the bottoming function. Example: - - f :: ((Int,Int) -> String) -> (Int,Int) -> a - f g pr = error (g pr) - - main = print (f fst (1, error "no")) - -Does 'main' print "error 1" or "error no"? We don't really want 'f' -to unbox its second argument. This actually happened in GHC's onwn -source code, in Packages.applyPackageFlag, which ended up un-boxing -the enormous DynFlags tuple, and being strict in the -as-yet-un-filled-in pkgState files. - -\begin{code} ----------------------- --- mkWWstr_one wrap_arg = (useful, work_args, wrap_fn, work_fn) --- * wrap_fn assumes wrap_arg is in scope, --- brings into scope work_args (via cases) --- * work_fn assumes work_args are in scope, a --- brings into scope wrap_arg (via lets) -mkWWstr_one :: DynFlags -> FamInstEnvs -> Var - -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr -> CoreExpr) -mkWWstr_one dflags fam_envs arg - | isTyVar arg - = return (False, [arg], nop_fn, nop_fn) - - -- See Note [Worker-wrapper for bottoming functions] - | isAbsDmd dmd - , Just work_fn <- mk_absent_let dflags arg - -- Absent case. We can't always handle absence for arbitrary - -- unlifted types, so we need to choose just the cases we can - --- (that's what mk_absent_let does) - = return (True, [], nop_fn, work_fn) - - -- See Note [Worthy functions for Worker-Wrapper split] - | isSeqDmd dmd -- `seq` demand; evaluate in wrapper in the hope - -- of dropping seqs in the worker - = let arg_w_unf = arg `setIdUnfolding` evaldUnfolding - -- Tell the worker arg that it's sure to be evaluated - -- so that internal seqs can be dropped - in return (True, [arg_w_unf], mk_seq_case arg, nop_fn) - -- Pass the arg, anyway, even if it is in theory discarded - -- Consider - -- f x y = x `seq` y - -- x gets a (Eval (Poly Abs)) demand, but if we fail to pass it to the worker - -- we ABSOLUTELY MUST record that x is evaluated in the wrapper. - -- Something like: - -- f x y = x `seq` fw y - -- fw y = let x{Evald} = error "oops" in (x `seq` y) - -- If we don't pin on the "Evald" flag, the seq doesn't disappear, and - -- we end up evaluating the absent thunk. - -- But the Evald flag is pretty weird, and I worry that it might disappear - -- during simplification, so for now I've just nuked this whole case - - | isStrictDmd dmd - , Just cs <- splitProdDmd_maybe dmd - -- See Note [Unpacking arguments with product and polymorphic demands] - , Just (data_con, inst_tys, inst_con_arg_tys, co) - <- deepSplitProductType_maybe fam_envs (idType arg) - , cs `equalLength` inst_con_arg_tys - -- See Note [mkWWstr and unsafeCoerce] - = do { (uniq1:uniqs) <- getUniquesM - ; let unpk_args = zipWith mk_ww_local uniqs inst_con_arg_tys - unpk_args_w_ds = zipWithEqual "mkWWstr" set_worker_arg_info unpk_args cs - unbox_fn = mkUnpackCase (Var arg) co uniq1 - data_con unpk_args - rebox_fn = Let (NonRec arg con_app) - con_app = mkConApp2 data_con inst_tys unpk_args `mkCast` mkSymCo co - ; (_, worker_args, wrap_fn, work_fn) <- mkWWstr dflags fam_envs unpk_args_w_ds - ; return (True, worker_args, unbox_fn . wrap_fn, work_fn . rebox_fn) } - -- Don't pass the arg, rebox instead - - | otherwise -- Other cases - = return (False, [arg], nop_fn, nop_fn) - - where - dmd = idDemandInfo arg - one_shot = idOneShotInfo arg - -- If the wrapper argument is a one-shot lambda, then - -- so should (all) the corresponding worker arguments be - -- This bites when we do w/w on a case join point - set_worker_arg_info worker_arg demand - = worker_arg `setIdDemandInfo` demand - `setIdOneShotInfo` one_shot - ----------------------- -nop_fn :: CoreExpr -> CoreExpr -nop_fn body = body -\end{code} - -Note [mkWWstr and unsafeCoerce] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -By using unsafeCoerce, it is possible to make the number of demands fail to -match the number of constructor arguments; this happened in Trac #8037. -If so, the worker/wrapper split doesn't work right and we get a Core Lint -bug. The fix here is simply to decline to do w/w if that happens. - -%************************************************************************ -%* * - Type scrutiny that is specfic to demand analysis -%* * -%************************************************************************ - -Note [Do not unpack class dictionaries] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If we have - f :: Ord a => [a] -> Int -> a - {-# INLINABLE f #-} -and we worker/wrapper f, we'll get a worker with an INLINALBE pragma -(see Note [Worker-wrapper for INLINABLE functions] in WorkWrap), which -can still be specialised by the type-class specialiser, something like - fw :: Ord a => [a] -> Int# -> a - -BUT if f is strict in the Ord dictionary, we might unpack it, to get - fw :: (a->a->Bool) -> [a] -> Int# -> a -and the type-class specialiser can't specialise that. An example is -Trac #6056. - -Moreover, dictinoaries can have a lot of fields, so unpacking them can -increase closure sizes. - -Conclusion: don't unpack dictionaries. - -\begin{code} -deepSplitProductType_maybe :: FamInstEnvs -> Type -> Maybe (DataCon, [Type], [Type], Coercion) --- If deepSplitProductType_maybe ty = Just (dc, tys, arg_tys, co) --- then dc @ tys (args::arg_tys) :: rep_ty --- co :: ty ~ rep_ty -deepSplitProductType_maybe fam_envs ty - | let (co, ty1) = topNormaliseType_maybe fam_envs ty - `orElse` (mkReflCo Representational ty, ty) - , Just (tc, tc_args) <- splitTyConApp_maybe ty1 - , Just con <- isDataProductTyCon_maybe tc - , not (isClassTyCon tc) -- See Note [Do not unpack class dictionaries] - = Just (con, tc_args, dataConInstArgTys con tc_args, co) -deepSplitProductType_maybe _ _ = Nothing - -deepSplitCprType_maybe :: FamInstEnvs -> ConTag -> Type -> Maybe (DataCon, [Type], [Type], Coercion) --- If deepSplitCprType_maybe n ty = Just (dc, tys, arg_tys, co) --- then dc @ tys (args::arg_tys) :: rep_ty --- co :: ty ~ rep_ty -deepSplitCprType_maybe fam_envs con_tag ty - | let (co, ty1) = topNormaliseType_maybe fam_envs ty - `orElse` (mkReflCo Representational ty, ty) - , Just (tc, tc_args) <- splitTyConApp_maybe ty1 - , isDataTyCon tc - , let cons = tyConDataCons tc - , cons `lengthAtLeast` con_tag -- This might not be true if we import the - -- type constructor via a .hs-bool file (#8743) - , let con = cons !! (con_tag - fIRST_TAG) - = Just (con, tc_args, dataConInstArgTys con tc_args, co) -deepSplitCprType_maybe _ _ _ = Nothing - -findTypeShape :: FamInstEnvs -> Type -> TypeShape --- Uncover the arrow and product shape of a type --- The data type TypeShape is defined in Demand --- See Note [Trimming a demand to a type] in Demand -findTypeShape fam_envs ty - | Just (_, ty') <- splitForAllTy_maybe ty - = findTypeShape fam_envs ty' - - | Just (tc, tc_args) <- splitTyConApp_maybe ty - , Just con <- isDataProductTyCon_maybe tc - = TsProd (map (findTypeShape fam_envs) $ dataConInstArgTys con tc_args) - - | Just (_, res) <- splitFunTy_maybe ty - = TsFun (findTypeShape fam_envs res) - - | Just (_, ty') <- topNormaliseType_maybe fam_envs ty - = findTypeShape fam_envs ty' - - | otherwise - = TsUnk -\end{code} - - -%************************************************************************ -%* * -\subsection{CPR stuff} -%* * -%************************************************************************ - - -@mkWWcpr@ takes the worker/wrapper pair produced from the strictness -info and adds in the CPR transformation. The worker returns an -unboxed tuple containing non-CPR components. The wrapper takes this -tuple and re-produces the correct structured output. - -The non-CPR results appear ordered in the unboxed tuple as if by a -left-to-right traversal of the result structure. - - -\begin{code} -mkWWcpr :: FamInstEnvs - -> Type -- function body type - -> DmdResult -- CPR analysis results - -> UniqSM (Bool, -- Is w/w'ing useful? - CoreExpr -> CoreExpr, -- New wrapper - CoreExpr -> CoreExpr, -- New worker - Type) -- Type of worker's body - -mkWWcpr fam_envs body_ty res - = case returnsCPR_maybe res of - Nothing -> return (False, id, id, body_ty) -- No CPR info - Just con_tag | Just stuff <- deepSplitCprType_maybe fam_envs con_tag body_ty - -> mkWWcpr_help stuff - | otherwise - -- See Note [non-algebraic or open body type warning] - -> WARN( True, text "mkWWcpr: non-algebraic or open body type" <+> ppr body_ty ) - return (False, id, id, body_ty) - -mkWWcpr_help :: (DataCon, [Type], [Type], Coercion) - -> UniqSM (Bool, CoreExpr -> CoreExpr, CoreExpr -> CoreExpr, Type) - -mkWWcpr_help (data_con, inst_tys, arg_tys, co) - | [arg_ty1] <- arg_tys - , isUnLiftedType arg_ty1 - -- Special case when there is a single result of unlifted type - -- - -- Wrapper: case (..call worker..) of x -> C x - -- Worker: case ( ..body.. ) of C x -> x - = do { (work_uniq : arg_uniq : _) <- getUniquesM - ; let arg = mk_ww_local arg_uniq arg_ty1 - con_app = mkConApp2 data_con inst_tys [arg] `mkCast` mkSymCo co - - ; return ( True - , \ wkr_call -> Case wkr_call arg (exprType con_app) [(DEFAULT, [], con_app)] - , \ body -> mkUnpackCase body co work_uniq data_con [arg] (Var arg) - , arg_ty1 ) } - - | otherwise -- The general case - -- Wrapper: case (..call worker..) of (# a, b #) -> C a b - -- Worker: case ( ...body... ) of C a b -> (# a, b #) - = do { (work_uniq : uniqs) <- getUniquesM - ; let (wrap_wild : args) = zipWith mk_ww_local uniqs (ubx_tup_ty : arg_tys) - ubx_tup_con = tupleCon UnboxedTuple (length arg_tys) - ubx_tup_ty = exprType ubx_tup_app - ubx_tup_app = mkConApp2 ubx_tup_con arg_tys args - con_app = mkConApp2 data_con inst_tys args `mkCast` mkSymCo co - - ; return (True - , \ wkr_call -> Case wkr_call wrap_wild (exprType con_app) [(DataAlt ubx_tup_con, args, con_app)] - , \ body -> mkUnpackCase body co work_uniq data_con args ubx_tup_app - , ubx_tup_ty ) } - -mkUnpackCase :: CoreExpr -> Coercion -> Unique -> DataCon -> [Id] -> CoreExpr -> CoreExpr --- (mkUnpackCase e co uniq Con args body) --- returns --- case e |> co of bndr { Con args -> body } - -mkUnpackCase (Tick tickish e) co uniq con args body -- See Note [Profiling and unpacking] - = Tick tickish (mkUnpackCase e co uniq con args body) -mkUnpackCase scrut co uniq boxing_con unpk_args body - = Case casted_scrut bndr (exprType body) - [(DataAlt boxing_con, unpk_args, body)] - where - casted_scrut = scrut `mkCast` co - bndr = mk_ww_local uniq (exprType casted_scrut) -\end{code} - -Note [non-algebraic or open body type warning] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -There are a few cases where the W/W transformation is told that something -returns a constructor, but the type at hand doesn't really match this. One -real-world example involves unsafeCoerce: - foo = IO a - foo = unsafeCoerce c_exit - foreign import ccall "c_exit" c_exit :: IO () -Here CPR will tell you that `foo` returns a () constructor for sure, but trying -to create a worker/wrapper for type `a` obviously fails. -(This was a real example until ee8e792 in libraries/base.) - -It does not seem feasible to avoid all such cases already in the analyser (and -after all, the analysis is not really wrong), so we simply do nothing here in -mkWWcpr. But we still want to emit warning with -DDEBUG, to hopefully catch -other cases where something went avoidably wrong. - - -Note [Profiling and unpacking] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If the original function looked like - f = \ x -> {-# SCC "foo" #-} E - -then we want the CPR'd worker to look like - \ x -> {-# SCC "foo" #-} (case E of I# x -> x) -and definitely not - \ x -> case ({-# SCC "foo" #-} E) of I# x -> x) - -This transform doesn't move work or allocation -from one cost centre to another. - -Later [SDM]: presumably this is because we want the simplifier to -eliminate the case, and the scc would get in the way? I'm ok with -including the case itself in the cost centre, since it is morally -part of the function (post transformation) anyway. - - -%************************************************************************ -%* * -\subsection{Utilities} -%* * -%************************************************************************ - -Note [Absent errors] -~~~~~~~~~~~~~~~~~~~~ -We make a new binding for Ids that are marked absent, thus - let x = absentError "x :: Int" -The idea is that this binding will never be used; but if it -buggily is used we'll get a runtime error message. - -Coping with absence for *unlifted* types is important; see, for -example, Trac #4306. For these we find a suitable literal, -using Literal.absentLiteralOf. We don't have literals for -every primitive type, so the function is partial. - - [I did try the experiment of using an error thunk for unlifted - things too, relying on the simplifier to drop it as dead code, - by making absentError - (a) *not* be a bottoming Id, - (b) be "ok for speculation" - But that relies on the simplifier finding that it really - is dead code, which is fragile, and indeed failed when - profiling is on, which disables various optimisations. So - using a literal will do.] - -\begin{code} -mk_absent_let :: DynFlags -> Id -> Maybe (CoreExpr -> CoreExpr) -mk_absent_let dflags arg - | not (isUnLiftedType arg_ty) - = Just (Let (NonRec arg abs_rhs)) - | Just tc <- tyConAppTyCon_maybe arg_ty - , Just lit <- absentLiteralOf tc - = Just (Let (NonRec arg (Lit lit))) - | arg_ty `eqType` voidPrimTy - = Just (Let (NonRec arg (Var voidPrimId))) - | otherwise - = WARN( True, ptext (sLit "No absent value for") <+> ppr arg_ty ) - Nothing - where - arg_ty = idType arg - abs_rhs = mkRuntimeErrorApp aBSENT_ERROR_ID arg_ty msg - msg = showSDoc dflags (ppr arg <+> ppr (idType arg)) - -mk_seq_case :: Id -> CoreExpr -> CoreExpr -mk_seq_case arg body = Case (Var arg) (sanitiseCaseBndr arg) (exprType body) [(DEFAULT, [], body)] - -sanitiseCaseBndr :: Id -> Id --- The argument we are scrutinising has the right type to be --- a case binder, so it's convenient to re-use it for that purpose. --- But we *must* throw away all its IdInfo. In particular, the argument --- will have demand info on it, and that demand info may be incorrect for --- the case binder. e.g. case ww_arg of ww_arg { I# x -> ... } --- Quite likely ww_arg isn't used in '...'. The case may get discarded --- if the case binder says "I'm demanded". This happened in a situation --- like (x+y) `seq` .... -sanitiseCaseBndr id = id `setIdInfo` vanillaIdInfo - -mk_ww_local :: Unique -> Type -> Id -mk_ww_local uniq ty = mkSysLocal (fsLit "ww") uniq ty -\end{code} -- cgit v1.2.1