diff options
Diffstat (limited to 'compiler/GHC/Core/Opt')
-rw-r--r-- | compiler/GHC/Core/Opt/Arity.hs | 545 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Pipeline.hs | 52 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify.hs | 38 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Simplify/Env.hs | 2 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Specialise.hs | 279 |
5 files changed, 679 insertions, 237 deletions
diff --git a/compiler/GHC/Core/Opt/Arity.hs b/compiler/GHC/Core/Opt/Arity.hs index b55d91767e..2471470814 100644 --- a/compiler/GHC/Core/Opt/Arity.hs +++ b/compiler/GHC/Core/Opt/Arity.hs @@ -15,10 +15,18 @@ module GHC.Core.Opt.Arity ( manifestArity, joinRhsArity, exprArity, typeArity , exprEtaExpandArity, findRhsArity , etaExpand, etaExpandAT - , etaExpandToJoinPoint, etaExpandToJoinPointRule , exprBotStrictness_maybe + + -- ** ArityType , ArityType(..), expandableArityType, arityTypeArity , maxWithArity, isBotArityType, idArityType + + -- ** Join points + , etaExpandToJoinPoint, etaExpandToJoinPointRule + + -- ** Coercions and casts + , pushCoArg, pushCoArgs, pushCoValArg, pushCoTyArg + , pushCoercionIntoLambda, pushCoDataCon, collectBindersPushingCo ) where @@ -31,15 +39,21 @@ import GHC.Driver.Ppr import GHC.Core import GHC.Core.FVs import GHC.Core.Utils -import GHC.Core.Subst import GHC.Types.Demand import GHC.Types.Var import GHC.Types.Var.Env import GHC.Types.Id -import GHC.Core.Type as Type -import GHC.Core.TyCon ( initRecTc, checkRecTc ) + +-- We have two sorts of substitution: +-- GHC.Core.Subst.Subst, and GHC.Core.TyCo.TCvSubst +-- Both have substTy, substCo Hence need for qualification +import GHC.Core.Subst as Core +import GHC.Core.Type as Type +import GHC.Core.Coercion as Type + +import GHC.Core.DataCon +import GHC.Core.TyCon ( initRecTc, checkRecTc, tyConArity ) import GHC.Core.Predicate ( isDictTy ) -import GHC.Core.Coercion as Coercion import GHC.Core.Multiplicity import GHC.Types.Var.Set import GHC.Types.Basic @@ -48,7 +62,8 @@ import GHC.Driver.Session ( DynFlags, GeneralFlag(..), gopt ) import GHC.Utils.Outputable import GHC.Utils.Panic import GHC.Data.FastString -import GHC.Utils.Misc ( lengthAtLeast ) +import GHC.Data.Pair +import GHC.Utils.Misc {- ************************************************************************ @@ -1076,12 +1091,11 @@ eta_expand one_shots orig_expr go oss (Cast expr co) = Cast (go oss expr) co go oss expr - = -- pprTrace "ee" (vcat [ppr orig_expr, ppr expr, ppr etas]) $ - retick $ etaInfoAbs etas (etaInfoApp subst' sexpr etas) + = -- pprTrace "ee" (vcat [ppr orig_expr, ppr expr, pprEtaInfos etas]) $ + retick $ etaInfoAbs etas (etaInfoApp in_scope' sexpr etas) where in_scope = mkInScopeSet (exprFreeVars expr) (in_scope', etas) = mkEtaWW oss (ppr orig_expr) in_scope (exprType expr) - subst' = mkEmptySubst in_scope' -- Find ticks behind type apps. -- See Note [Eta expansion and source notes] @@ -1090,76 +1104,197 @@ eta_expand one_shots orig_expr sexpr = foldl' App expr'' args retick expr = foldr mkTick expr ticks - -- Abstraction Application +{- ********************************************************************* +* * + The EtaInfo mechanism + mkEtaWW, etaInfoAbs, etaInfoApp +* * +********************************************************************* -} + +{- Note [The EtaInfo mechanism] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Suppose we have (e :: ty) and we want to eta-expand it to arity N. +This what eta_expand does. We do it in two steps: + +1. mkEtaWW: from 'ty' and 'N' build a [EtaInfo] which describes + the shape of the expansion necessary to expand to arity N. + +2. Build the term + \ v1..vn. e v1 .. vn + where those abstractions and applications are described by + the same [EtaInfo]. Specifically we build the term + + etaInfoAbs etas (etaInfoApp in_scope e etas) + + where etas :: [EtaInfo]# + etaInfoAbs builds the lambdas + etaInfoApp builds the applictions + + Note that the /same/ [EtaInfo] drives both etaInfoAbs and etaInfoApp + +To a first approximation [EtaInfo] is just [Var]. But +casts complicate the question. If we have + newtype N a = MkN (S -> a) +and + ty = N (N Int) +then the eta-expansion must look like + (\x (\y. ((e |> co1) x) |> co2) y) + |> sym co2) + |> sym co1 +where + co1 :: N (N Int) ~ S -> N Int + co2 :: N Int ~ S -> Int + +Blimey! Look at all those casts. Moreover, if the type +is very deeply nested (as happens in #18223), the repetition +of types can make the overall term very large. So there is a big +payoff in cancelling out casts aggressively wherever possible. +(See also Note [No crap in eta-expanded code].) + +This matters a lot in etaEInfoApp, where we +* Do beta-reduction on the fly +* Use getARg_mabye to get a cast out of the way, + so that we can do beta reduction +Together this makes a big difference. Consider when e is + case x of + True -> (\x -> e1) |> c1 + False -> (\p -> e2) |> c2 + +When we eta-expand this to arity 1, say, etaInfoAbs will wrap +a (\eta) around the outside and use etaInfoApp to apply each +alternative to 'eta'. We want to beta-reduce all that junk +away. + +#18223 was a dramtic example in which the intermediate term was +grotesquely huge, even though the next Simplifier iteration squashed +it. Better to kill it at birth. +-} + -------------- -data EtaInfo = EtaVar Var -- /\a. [] [] a - -- \x. [] [] x - | EtaCo Coercion -- [] |> sym co [] |> co +data EtaInfo -- Abstraction Application + = EtaVar Var -- /\a. [] [] a + -- (\x. []) [] x + | EtaCo CoercionR -- [] |> sym co [] |> co instance Outputable EtaInfo where - ppr (EtaVar v) = text "EtaVar" <+> ppr v - ppr (EtaCo co) = text "EtaCo" <+> ppr co + ppr (EtaVar v) = text "EtaVar" <+> ppr v <+> dcolon <+> ppr (idType v) + ppr (EtaCo co) = text "EtaCo" <+> hang (ppr co) 2 (dcolon <+> ppr (coercionType co)) + +-- Used in debug-printing +-- pprEtaInfos :: [EtaInfo] -> SDoc +-- pprEtaInfos eis = brackets $ vcat $ punctuate comma $ map ppr eis pushCoercion :: Coercion -> [EtaInfo] -> [EtaInfo] +-- Puts a EtaCo on the front of a [EtaInfo], but combining +-- with an existing EtaCo if possible +-- A minor improvement pushCoercion co1 (EtaCo co2 : eis) | isReflCo co = eis | otherwise = EtaCo co : eis where co = co1 `mkTransCo` co2 -pushCoercion co eis = EtaCo co : eis +pushCoercion co eis + = EtaCo co : eis + +getArg_maybe :: [EtaInfo] -> Maybe (CoreArg, [EtaInfo]) +-- Get an argument to the front of the [EtaInfo], if possible, +-- by pushing any EtaCo through the argument +getArg_maybe eis = go MRefl eis + where + go :: MCoercion -> [EtaInfo] -> Maybe (CoreArg, [EtaInfo]) + go _ [] = Nothing + go mco (EtaCo co2 : eis) = go (mkTransMCoL mco co2) eis + go MRefl (EtaVar v : eis) = Just (varToCoreExpr v, eis) + go (MCo co) (EtaVar v : eis) + | Just (arg, mco) <- pushCoArg co (varToCoreExpr v) + = case mco of + MRefl -> Just (arg, eis) + MCo co -> Just (arg, pushCoercion co eis) + | otherwise + = Nothing + +mkCastMCo :: CoreExpr -> MCoercionR -> CoreExpr +mkCastMCo e MRefl = e +mkCastMCo e (MCo co) = Cast e co + -- We are careful to use (MCo co) only when co is not reflexive + -- Hence (Cast e co) rather than (mkCast e co) + +mkPiMCo :: Var -> MCoercionR -> MCoercionR +mkPiMCo _ MRefl = MRefl +mkPiMCo v (MCo co) = MCo (mkPiCo Representational v co) -------------- etaInfoAbs :: [EtaInfo] -> CoreExpr -> CoreExpr -etaInfoAbs [] expr = expr -etaInfoAbs (EtaVar v : eis) expr = Lam v (etaInfoAbs eis expr) -etaInfoAbs (EtaCo co : eis) expr = Cast (etaInfoAbs eis expr) (mkSymCo co) +-- See Note [The EtaInfo mechanism] +etaInfoAbs eis expr + | null eis = expr + | otherwise = case final_mco of + MRefl -> expr' + MCo co -> mkCast expr' co + where + (expr', final_mco) = foldr do_one (split_cast expr) eis + + do_one :: EtaInfo -> (CoreExpr, MCoercion) -> (CoreExpr, MCoercion) + -- Implements the "Abstraction" column in the comments for data EtaInfo + -- In both argument and result the pair (e,mco) denotes (e |> mco) + do_one (EtaVar v) (expr, mco) = (Lam v expr, mkPiMCo v mco) + do_one (EtaCo co) (expr, mco) = (expr, mco `mkTransMCoL` mkSymCo co) + + split_cast :: CoreExpr -> (CoreExpr, MCoercion) + split_cast (Cast e co) = (e, MCo co) + split_cast e = (e, MRefl) + -- We could look in the body of lets, and the branches of a case + -- But then we would have to worry about whether the cast mentioned + -- any of the bound variables, which is tiresome. Later maybe. + -- Result: we may end up with + -- (\(x::Int). case x of { DEFAULT -> e1 |> co }) |> sym (<Int>->co) + -- and fail to optimise it away -------------- -etaInfoApp :: Subst -> CoreExpr -> [EtaInfo] -> CoreExpr +etaInfoApp :: InScopeSet -> CoreExpr -> [EtaInfo] -> CoreExpr -- (etaInfoApp s e eis) returns something equivalent to --- ((substExpr s e) `appliedto` eis) - -etaInfoApp subst (Lam v1 e) (EtaVar v2 : eis) - = etaInfoApp (GHC.Core.Subst.extendSubstWithVar subst v1 v2) e eis - -etaInfoApp subst (Cast e co1) eis - = etaInfoApp subst e (pushCoercion co' eis) - where - co' = GHC.Core.Subst.substCo subst co1 +-- (substExpr s e `appliedto` eis) +-- See Note [The EtaInfo mechanism] -etaInfoApp subst (Case e b ty alts) eis - = Case (subst_expr subst e) b1 ty' alts' +etaInfoApp in_scope expr eis + = go (mkEmptySubst in_scope) expr eis where - (subst1, b1) = substBndr subst b - alts' = map subst_alt alts - ty' = etaInfoAppTy (GHC.Core.Subst.substTy subst ty) eis - subst_alt (con, bs, rhs) = (con, bs', etaInfoApp subst2 rhs eis) - where - (subst2,bs') = substBndrs subst1 bs - -etaInfoApp subst (Let b e) eis - | not (isJoinBind b) - -- See Note [Eta expansion for join points] - = Let b' (etaInfoApp subst' e eis) - where - (subst', b') = substBindSC subst b + go :: Subst -> CoreExpr -> [EtaInfo] -> CoreExpr + -- 'go' pushed down the eta-infos into the branch of a case + -- and the body of a let; and does beta-reduction if possible + go subst (Tick t e) eis + = Tick (substTickish subst t) (go subst e eis) + go subst (Cast e co) eis + = go subst e (pushCoercion (Core.substCo subst co) eis) + go subst (Case e b ty alts) eis + = Case (Core.substExprSC subst e) b1 ty' alts' + where + (subst1, b1) = Core.substBndr subst b + alts' = map subst_alt alts + ty' = etaInfoAppTy (Core.substTy subst ty) eis + subst_alt (con, bs, rhs) = (con, bs', go subst2 rhs eis) + where + (subst2,bs') = Core.substBndrs subst1 bs + go subst (Let b e) eis + | not (isJoinBind b) -- See Note [Eta expansion for join points] + = Let b' (go subst' e eis) + where + (subst', b') = Core.substBindSC subst b -etaInfoApp subst (Tick t e) eis - = Tick (substTickish subst t) (etaInfoApp subst e eis) + -- Beta-reduction if possible, using getArg_maybe to push + -- any intervening casts past the argument + -- See Note [The EtaInfo mechansim] + go subst (Lam v e) eis + | Just (arg, eis') <- getArg_maybe eis + = go (Core.extendSubst subst v arg) e eis' -etaInfoApp subst expr _ - | (Var fun, _) <- collectArgs expr - , Var fun' <- lookupIdSubst subst fun - , isJoinId fun' - = subst_expr subst expr + -- Stop pushing down; just wrap the expression up + go subst e eis = wrap (Core.substExprSC subst e) eis -etaInfoApp subst e eis - = go (subst_expr subst e) eis - where - go e [] = e - go e (EtaVar v : eis) = go (App e (varToCoreExpr v)) eis - go e (EtaCo co : eis) = go (Cast e co) eis + wrap e [] = e + wrap e (EtaVar v : eis) = wrap (App e (varToCoreExpr v)) eis + wrap e (EtaCo co : eis) = wrap (Cast e co) eis -------------- @@ -1235,7 +1370,7 @@ mkEtaWW orig_oss ppr_orig_expr in_scope orig_ty -- We want to get -- coerce T (\x::[T] -> (coerce ([T]->Int) e) x) | Just (co, ty') <- topNormaliseNewType_maybe ty - , let co' = Coercion.substCo subst co + , let co' = Type.substCo subst co -- Remember to apply the substitution to co (#16979) -- (or we could have applied to ty, but then -- we'd have had to zap it for the recursive call) @@ -1253,21 +1388,290 @@ mkEtaWW orig_oss ppr_orig_expr in_scope orig_ty -- with an explicit lambda having a non-function type +{- ********************************************************************* +* * + The "push rules" +* * +************************************************************************ ------------- -subst_expr :: Subst -> CoreExpr -> CoreExpr --- Apply a substitution to an expression. We use substExpr --- not substExprSC (short-cutting substitution) because --- we may be changing the types of join points, so applying --- the in-scope set is necessary. +Here we implement the "push rules" from FC papers: + +* The push-argument rules, where we can move a coercion past an argument. + We have + (fun |> co) arg + and we want to transform it to + (fun arg') |> co' + for some suitable co' and transformed arg'. + +* The PushK rule for data constructors. We have + (K e1 .. en) |> co + and we want to transform to + (K e1' .. en') + by pushing the coercion into the arguments +-} + +pushCoArgs :: CoercionR -> [CoreArg] -> Maybe ([CoreArg], MCoercion) +pushCoArgs co [] = return ([], MCo co) +pushCoArgs co (arg:args) = do { (arg', m_co1) <- pushCoArg co arg + ; case m_co1 of + MCo co1 -> do { (args', m_co2) <- pushCoArgs co1 args + ; return (arg':args', m_co2) } + MRefl -> return (arg':args, MRefl) } + +pushCoArg :: CoercionR -> CoreArg -> Maybe (CoreArg, MCoercion) +-- We have (fun |> co) arg, and we want to transform it to +-- (fun arg) |> co +-- This may fail, e.g. if (fun :: N) where N is a newtype +-- C.f. simplCast in GHC.Core.Opt.Simplify +-- 'co' is always Representational +-- If the returned coercion is Nothing, then it would have been reflexive +pushCoArg co (Type ty) = do { (ty', m_co') <- pushCoTyArg co ty + ; return (Type ty', m_co') } +pushCoArg co val_arg = do { (arg_co, m_co') <- pushCoValArg co + ; return (val_arg `mkCastMCo` arg_co, m_co') } + +pushCoTyArg :: CoercionR -> Type -> Maybe (Type, MCoercionR) +-- We have (fun |> co) @ty +-- Push the coercion through to return +-- (fun @ty') |> co' +-- 'co' is always Representational +-- If the returned coercion is Nothing, then it would have been reflexive; +-- it's faster not to compute it, though. +pushCoTyArg co ty + -- The following is inefficient - don't do `eqType` here, the coercion + -- optimizer will take care of it. See #14737. + -- -- | tyL `eqType` tyR + -- -- = Just (ty, Nothing) + + | isReflCo co + = Just (ty, MRefl) + + | isForAllTy_ty tyL + = ASSERT2( isForAllTy_ty tyR, ppr co $$ ppr ty ) + Just (ty `mkCastTy` co1, MCo co2) + + | otherwise + = Nothing + where + Pair tyL tyR = coercionKind co + -- co :: tyL ~ tyR + -- tyL = forall (a1 :: k1). ty1 + -- tyR = forall (a2 :: k2). ty2 + + co1 = mkSymCo (mkNthCo Nominal 0 co) + -- co1 :: k2 ~N k1 + -- Note that NthCo can extract a Nominal equality between the + -- kinds of the types related by a coercion between forall-types. + -- See the NthCo case in GHC.Core.Lint. + + co2 = mkInstCo co (mkGReflLeftCo Nominal ty co1) + -- co2 :: ty1[ (ty|>co1)/a1 ] ~ ty2[ ty/a2 ] + -- Arg of mkInstCo is always nominal, hence mkNomReflCo + +pushCoValArg :: CoercionR -> Maybe (MCoercionR, MCoercionR) +-- We have (fun |> co) arg +-- Push the coercion through to return +-- (fun (arg |> co_arg)) |> co_res +-- 'co' is always Representational +-- If the second returned Coercion is actually Nothing, then no cast is necessary; +-- the returned coercion would have been reflexive. +pushCoValArg co + -- The following is inefficient - don't do `eqType` here, the coercion + -- optimizer will take care of it. See #14737. + -- -- | tyL `eqType` tyR + -- -- = Just (mkRepReflCo arg, Nothing) + + | isReflCo co + = Just (MRefl, MRefl) + + | isFunTy tyL + , (co_mult, co1, co2) <- decomposeFunCo Representational co + , isReflexiveCo co_mult + -- We can't push the coercion in the case where co_mult isn't reflexivity: + -- it could be an unsafe axiom, and losing this information could yield + -- ill-typed terms. For instance (fun x ::(1) Int -> (fun _ -> () |> co) x) + -- with co :: (Int -> ()) ~ (Int #-> ()), would reduce to (fun x ::(1) Int + -- -> (fun _ ::(Many) Int -> ()) x) which is ill-typed + + -- If co :: (tyL1 -> tyL2) ~ (tyR1 -> tyR2) + -- then co1 :: tyL1 ~ tyR1 + -- co2 :: tyL2 ~ tyR2 + = ASSERT2( isFunTy tyR, ppr co $$ ppr arg ) + Just (coToMCo (mkSymCo co1), coToMCo co2) + -- Critically, coToMCo to checks for ReflCo; the whole coercion may not + -- be reflexive, but either of its components might be + -- We could use isReflexiveCo, but it's not clear if the benefit + -- is worth the cost, and it makes no difference in #18223 + + | otherwise + = Nothing + where + arg = funArgTy tyR + Pair tyL tyR = coercionKind co + +pushCoercionIntoLambda + :: InScopeSet -> Var -> CoreExpr -> CoercionR -> Maybe (Var, CoreExpr) +-- This implements the Push rule from the paper on coercions +-- (\x. e) |> co +-- ===> +-- (\x'. e |> co') +pushCoercionIntoLambda in_scope x e co + | ASSERT(not (isTyVar x) && not (isCoVar x)) True + , Pair s1s2 t1t2 <- coercionKind co + , Just (_, _s1,_s2) <- splitFunTy_maybe s1s2 + , Just (w1, t1,_t2) <- splitFunTy_maybe t1t2 + , (co_mult, co1, co2) <- decomposeFunCo Representational co + , isReflexiveCo co_mult + -- We can't push the coercion in the case where co_mult isn't + -- reflexivity. See pushCoValArg for more details. + = let + -- Should we optimize the coercions here? + -- Otherwise they might not match too well + x' = x `setIdType` t1 `setIdMult` w1 + in_scope' = in_scope `extendInScopeSet` x' + subst = extendIdSubst (mkEmptySubst in_scope') + x + (mkCast (Var x') co1) + in Just (x', substExpr subst e `mkCast` co2) + | otherwise + = pprTrace "exprIsLambda_maybe: Unexpected lambda in case" (ppr (Lam x e)) + Nothing + +pushCoDataCon :: DataCon -> [CoreExpr] -> Coercion + -> Maybe (DataCon + , [Type] -- Universal type args + , [CoreExpr]) -- All other args incl existentials +-- Implement the KPush reduction rule as described in "Down with kinds" +-- The transformation applies iff we have +-- (C e1 ... en) `cast` co +-- where co :: (T t1 .. tn) ~ to_ty +-- The left-hand one must be a T, because exprIsConApp returned True +-- but the right-hand one might not be. (Though it usually will.) +pushCoDataCon dc dc_args co + | isReflCo co || from_ty `eqType` to_ty -- try cheap test first + , let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars dc) dc_args + = Just (dc, map exprToType univ_ty_args, rest_args) + + | Just (to_tc, to_tc_arg_tys) <- splitTyConApp_maybe to_ty + , to_tc == dataConTyCon dc + -- These two tests can fail; we might see + -- (C x y) `cast` (g :: T a ~ S [a]), + -- where S is a type function. In fact, exprIsConApp + -- will probably not be called in such circumstances, + -- but there's nothing wrong with it + + = let + tc_arity = tyConArity to_tc + dc_univ_tyvars = dataConUnivTyVars dc + dc_ex_tcvars = dataConExTyCoVars dc + arg_tys = dataConRepArgTys dc + + non_univ_args = dropList dc_univ_tyvars dc_args + (ex_args, val_args) = splitAtList dc_ex_tcvars non_univ_args + + -- Make the "Psi" from the paper + omegas = decomposeCo tc_arity co (tyConRolesRepresentational to_tc) + (psi_subst, to_ex_arg_tys) + = liftCoSubstWithEx Representational + dc_univ_tyvars + omegas + dc_ex_tcvars + (map exprToType ex_args) + + -- Cast the value arguments (which include dictionaries) + new_val_args = zipWith cast_arg (map scaledThing arg_tys) val_args + cast_arg arg_ty arg = mkCast arg (psi_subst arg_ty) + + to_ex_args = map Type to_ex_arg_tys + + dump_doc = vcat [ppr dc, ppr dc_univ_tyvars, ppr dc_ex_tcvars, + ppr arg_tys, ppr dc_args, + ppr ex_args, ppr val_args, ppr co, ppr from_ty, ppr to_ty, ppr to_tc + , ppr $ mkTyConApp to_tc (map exprToType $ takeList dc_univ_tyvars dc_args) ] + in + ASSERT2( eqType from_ty (mkTyConApp to_tc (map exprToType $ takeList dc_univ_tyvars dc_args)), dump_doc ) + ASSERT2( equalLength val_args arg_tys, dump_doc ) + Just (dc, to_tc_arg_tys, to_ex_args ++ new_val_args) + + | otherwise + = Nothing + + where + Pair from_ty to_ty = coercionKind co + +collectBindersPushingCo :: CoreExpr -> ([Var], CoreExpr) +-- Collect lambda binders, pushing coercions inside if possible +-- E.g. (\x.e) |> g g :: <Int> -> blah +-- = (\x. e |> Nth 1 g) +-- +-- That is, -- --- ToDo: we could instead check if we actually *are* --- changing any join points' types, and if not use substExprSC. -subst_expr = substExpr +-- collectBindersPushingCo ((\x.e) |> g) === ([x], e |> Nth 1 g) +collectBindersPushingCo e + = go [] e + where + -- Peel off lambdas until we hit a cast. + go :: [Var] -> CoreExpr -> ([Var], CoreExpr) + -- The accumulator is in reverse order + go bs (Lam b e) = go (b:bs) e + go bs (Cast e co) = go_c bs e co + go bs e = (reverse bs, e) + + -- We are in a cast; peel off casts until we hit a lambda. + go_c :: [Var] -> CoreExpr -> CoercionR -> ([Var], CoreExpr) + -- (go_c bs e c) is same as (go bs e (e |> c)) + go_c bs (Cast e co1) co2 = go_c bs e (co1 `mkTransCo` co2) + go_c bs (Lam b e) co = go_lam bs b e co + go_c bs e co = (reverse bs, mkCast e co) + + -- We are in a lambda under a cast; peel off lambdas and build a + -- new coercion for the body. + go_lam :: [Var] -> Var -> CoreExpr -> CoercionR -> ([Var], CoreExpr) + -- (go_lam bs b e c) is same as (go_c bs (\b.e) c) + go_lam bs b e co + | isTyVar b + , let Pair tyL tyR = coercionKind co + , ASSERT( isForAllTy_ty tyL ) + isForAllTy_ty tyR + , isReflCo (mkNthCo Nominal 0 co) -- See Note [collectBindersPushingCo] + = go_c (b:bs) e (mkInstCo co (mkNomReflCo (mkTyVarTy b))) + + | isCoVar b + , let Pair tyL tyR = coercionKind co + , ASSERT( isForAllTy_co tyL ) + isForAllTy_co tyR + , isReflCo (mkNthCo Nominal 0 co) -- See Note [collectBindersPushingCo] + , let cov = mkCoVarCo b + = go_c (b:bs) e (mkInstCo co (mkNomReflCo (mkCoercionTy cov))) + + | isId b + , let Pair tyL tyR = coercionKind co + , ASSERT( isFunTy tyL) isFunTy tyR + , (co_mult, co_arg, co_res) <- decomposeFunCo Representational co + , isReflCo co_mult -- See Note [collectBindersPushingCo] + , isReflCo co_arg -- See Note [collectBindersPushingCo] + = go_c (b:bs) e co_res + + | otherwise = (reverse bs, mkCast (Lam b e) co) +{- --------------- +Note [collectBindersPushingCo] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We just look for coercions of form + <type> # w -> blah +(and similarly for foralls) to keep this function simple. We could do +more elaborate stuff, but it'd involve substitution etc. + +-} + +{- ********************************************************************* +* * + Join points +* * +********************************************************************* -} +------------------- -- | Split an expression into the given number of binders and a body, -- eta-expanding if necessary. Counts value *and* type binders. etaExpandToJoinPoint :: JoinArity -> CoreExpr -> ([CoreBndr], CoreExpr) @@ -1307,7 +1711,7 @@ etaBodyForJoinPoint need_args body = (reverse rev_bs, e) go n ty subst rev_bs e | Just (tv, res_ty) <- splitForAllTy_maybe ty - , let (subst', tv') = Type.substVarBndr subst tv + , let (subst', tv') = substVarBndr subst tv = go (n-1) res_ty subst' (tv' : rev_bs) (e `App` varToCoreExpr tv') | Just (mult, arg_ty, res_ty) <- splitFunTy_maybe ty , let (subst', b) = freshEtaId n subst (Scaled mult arg_ty) @@ -1318,6 +1722,8 @@ etaBodyForJoinPoint need_args body init_subst e = mkEmptyTCvSubst (mkInScopeSet (exprFreeVars e)) + + -------------- freshEtaId :: Int -> TCvSubst -> Scaled Type -> (TCvSubst, Id) -- Make a fresh Id, with specified type (after applying substitution) @@ -1336,3 +1742,4 @@ freshEtaId n subst ty -- "OrCoVar" since this can be used to eta-expand -- coercion abstractions subst' = extendTCvInScope subst eta_id' + diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs index c3f2fc9f85..6d0712e634 100644 --- a/compiler/GHC/Core/Opt/Pipeline.hs +++ b/compiler/GHC/Core/Opt/Pipeline.hs @@ -315,33 +315,38 @@ getCoreToDo dflags runWhen do_float_in CoreDoFloatInwards, + simplify "final", -- Final tidy-up + maybe_rule_check FinalPhase, + -------- After this we have -O2 passes ----------------- + -- None of them run with -O + -- Case-liberation for -O2. This should be after -- strictness analysis and the simplification which follows it. - runWhen liberate_case (CoreDoPasses [ - CoreLiberateCase, - simplify "post-liberate-case" - ]), -- Run the simplifier after LiberateCase to vastly - -- reduce the possibility of shadowing - -- Reason: see Note [Shadowing] in GHC.Core.Opt.SpecConstr + runWhen liberate_case $ CoreDoPasses + [ CoreLiberateCase, simplify "post-liberate-case" ], + -- Run the simplifier after LiberateCase to vastly + -- reduce the possibility of shadowing + -- Reason: see Note [Shadowing] in GHC.Core.Opt.SpecConstr - runWhen spec_constr CoreDoSpecConstr, + runWhen spec_constr $ CoreDoPasses + [ CoreDoSpecConstr, simplify "post-spec-constr"], + -- See Note [Simplify after SpecConstr] maybe_rule_check FinalPhase, - runWhen late_specialise - (CoreDoPasses [ CoreDoSpecialising - , simplify "post-late-spec"]), + runWhen late_specialise $ CoreDoPasses + [ CoreDoSpecialising, simplify "post-late-spec"], -- LiberateCase can yield new CSE opportunities because it peels -- off one layer of a recursive function (concretely, I saw this -- in wheel-sieve1), and I'm guessing that SpecConstr can too -- And CSE is a very cheap pass. So it seems worth doing here. - runWhen ((liberate_case || spec_constr) && cse) CoreCSE, + runWhen ((liberate_case || spec_constr) && cse) $ CoreDoPasses + [ CoreCSE, simplify "post-final-cse" ], - -- Final clean-up simplification: - simplify "final", + --------- End of -O2 passes -------------- runWhen late_dmd_anal $ CoreDoPasses ( dmd_cpr_ww ++ [simplify "post-late-ww"] @@ -410,6 +415,27 @@ or with -O0. Two reasons: But watch out: list fusion can prevent floating. So use phase control to switch off those rules until after floating. +Note [Simplify after SpecConstr] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +We want to run the simplifier after SpecConstr, and before late-Specialise, +for two reasons, both shown up in test perf/compiler/T16473, +with -O2 -flate-specialise + +1. I found that running late-Specialise after SpecConstr, with no + simplification in between meant that the carefullly constructed + SpecConstr rule never got to fire. (It was something like + lvl = f a -- Arity 1 + ....g lvl.... + SpecConstr specialised g for argument lvl; but Specialise then + specialised lvl = f a to lvl = $sf, and inlined. Or something like + that.) + +2. Specialise relies on unfoldings being available for top-level dictionary + bindings; but SpecConstr kills them all! The Simplifer restores them. + +This extra run of the simplifier has a cost, but this is only with -O2. + + ************************************************************************ * * The CoreToDo interpreter diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs index e219a0dba9..6c207766bd 100644 --- a/compiler/GHC/Core/Opt/Simplify.hs +++ b/compiler/GHC/Core/Opt/Simplify.hs @@ -51,9 +51,9 @@ import GHC.Core.Unfold import GHC.Core.Unfold.Make import GHC.Core.Utils import GHC.Core.Opt.Arity ( ArityType(..), arityTypeArity, isBotArityType + , pushCoTyArg, pushCoValArg , idArityType, etaExpandAT ) -import GHC.Core.SimpleOpt ( pushCoTyArg, pushCoValArg - , joinPointBinding_maybe, joinPointBindings_maybe ) +import GHC.Core.SimpleOpt ( joinPointBinding_maybe, joinPointBindings_maybe ) import GHC.Core.FVs ( mkRuleInfo ) import GHC.Core.Rules ( lookupRule, getRules, initRuleOpts ) import GHC.Types.Basic @@ -318,7 +318,7 @@ simplLazyBind env top_lvl is_rec bndr bndr1 rhs rhs_se ; let rhs_cont = mkRhsStop (substTy body_env (exprType body)) ; (body_floats0, body0) <- {-#SCC "simplExprF" #-} simplExprF body_env body rhs_cont - -- Never float join-floats out of a non-join let-binding + -- Never float join-floats out of a non-join let-binding (which this is) -- So wrap the body in the join-floats right now -- Hence: body_floats1 consists only of let-floats ; let (body_floats1, body1) = wrapJoinFloatsX body_floats0 body0 @@ -1414,25 +1414,23 @@ simplCast env body co0 cont0 -- type of the hole changes (#16312) -- (f |> co) e ===> (f (e |> co1)) |> co2 - -- where co :: (s1->s2) ~ (t1~t2) + -- where co :: (s1->s2) ~ (t1->t2) -- co1 :: t1 ~ s1 -- co2 :: s2 ~ t2 addCoerce co cont@(ApplyToVal { sc_arg = arg, sc_env = arg_se , sc_dup = dup, sc_cont = tail }) - | Just (co1, m_co2) <- pushCoValArg co - , let new_ty = coercionRKind co1 - , not (isTypeLevPoly new_ty) -- Without this check, we get a lev-poly arg - -- See Note [Levity polymorphism invariants] in GHC.Core - -- test: typecheck/should_run/EtaExpandLevPoly + | Just (m_co1, m_co2) <- pushCoValArg co + , levity_ok m_co1 = {-#SCC "addCoerce-pushCoValArg" #-} do { tail' <- addCoerceM m_co2 tail - ; if isReflCo co1 - then return (cont { sc_cont = tail' - , sc_hole_ty = coercionLKind co }) + ; case m_co1 of { + MRefl -> return (cont { sc_cont = tail' + , sc_hole_ty = coercionLKind co }) ; -- Avoid simplifying if possible; -- See Note [Avoiding exponential behaviour] - else do - { (dup', arg_se', arg') <- simplArg env dup arg_se arg + + MCo co1 -> + do { (dup', arg_se', arg') <- simplArg env dup arg_se arg -- When we build the ApplyTo we can't mix the OutCoercion -- 'co' with the InExpr 'arg', so we simplify -- to make it all consistent. It's a bit messy. @@ -1442,7 +1440,7 @@ simplCast env body co0 cont0 , sc_env = arg_se' , sc_dup = dup' , sc_cont = tail' - , sc_hole_ty = coercionLKind co }) } } + , sc_hole_ty = coercionLKind co }) } } } addCoerce co cont | isReflexiveCo co = return cont -- Having this at the end makes a huge @@ -1450,6 +1448,13 @@ simplCast env body co0 cont0 -- See Note [Optimising reflexivity] | otherwise = return (CastIt co cont) + levity_ok :: MCoercionR -> Bool + levity_ok MRefl = True + levity_ok (MCo co) = not $ isTypeLevPoly $ coercionRKind co + -- Without this check, we get a lev-poly arg + -- See Note [Levity polymorphism invariants] in GHC.Core + -- test: typecheck/should_run/EtaExpandLevPoly + simplArg :: SimplEnv -> DupFlag -> StaticEnv -> CoreExpr -> SimplM (DupFlag, StaticEnv, OutExpr) simplArg env dup_flag arg_env arg @@ -3114,7 +3119,7 @@ knownCon :: SimplEnv knownCon env scrut dc_floats dc dc_ty_args dc_args bndr bs rhs cont = do { (floats1, env1) <- bind_args env bs dc_args - ; (floats2, env2) <- bind_case_bndr env1 + ; (floats2, env2) <- bind_case_bndr env1 ; (floats3, expr') <- simplExprF env2 rhs cont ; case dc_floats of [] -> @@ -3240,6 +3245,7 @@ altsWouldDup [_] = False altsWouldDup (alt:alts) | is_bot_alt alt = altsWouldDup alts | otherwise = not (all is_bot_alt alts) + -- otherwise case: first alt is non-bot, so all the rest must be bot where is_bot_alt (_,_,rhs) = exprIsDeadEnd rhs diff --git a/compiler/GHC/Core/Opt/Simplify/Env.hs b/compiler/GHC/Core/Opt/Simplify/Env.hs index 4ceaf637ed..7049e3e578 100644 --- a/compiler/GHC/Core/Opt/Simplify/Env.hs +++ b/compiler/GHC/Core/Opt/Simplify/Env.hs @@ -596,7 +596,7 @@ addJoinFlts :: JoinFloats -> JoinFloats -> JoinFloats addJoinFlts = appOL mkRecFloats :: SimplFloats -> SimplFloats --- Flattens the floats from env2 into a single Rec group, +-- Flattens the floats into a single Rec group, -- They must either all be lifted LetFloats or all JoinFloats mkRecFloats floats@(SimplFloats { sfLetFloats = LetFloats bs ff , sfJoinFloats = jbs diff --git a/compiler/GHC/Core/Opt/Specialise.hs b/compiler/GHC/Core/Opt/Specialise.hs index 8e9e35d208..ba5679778b 100644 --- a/compiler/GHC/Core/Opt/Specialise.hs +++ b/compiler/GHC/Core/Opt/Specialise.hs @@ -21,22 +21,21 @@ import GHC.Tc.Utils.TcType hiding( substTy ) import GHC.Core.Type hiding( substTy, extendTvSubstList ) import GHC.Core.Multiplicity import GHC.Core.Predicate -import GHC.Unit.Module( Module, HasModule(..) ) +import GHC.Unit.Module( Module ) import GHC.Core.Coercion( Coercion ) import GHC.Core.Opt.Monad import qualified GHC.Core.Subst as Core -import GHC.Core.Unfold import GHC.Core.Unfold.Make import GHC.Types.Var ( isLocalVar ) import GHC.Types.Var.Set import GHC.Types.Var.Env import GHC.Core import GHC.Core.Rules -import GHC.Core.SimpleOpt ( collectBindersPushingCo ) import GHC.Core.Utils ( exprIsTrivial, getIdFromTrivialExpr_maybe , mkCast, exprType ) import GHC.Core.FVs -import GHC.Core.Opt.Arity ( etaExpandToJoinPointRule ) +import GHC.Core.Opt.Arity ( collectBindersPushingCo + , etaExpandToJoinPointRule ) import GHC.Types.Unique.Supply import GHC.Types.Name import GHC.Types.Id.Make ( voidArgId, voidPrimId ) @@ -53,12 +52,9 @@ import GHC.Utils.Misc import GHC.Utils.Outputable import GHC.Utils.Panic import GHC.Data.FastString -import GHC.Utils.Monad.State import GHC.Types.Unique.DFM import GHC.Core.TyCo.Rep (TyCoBinder (..)) -import Control.Monad - {- ************************************************************************ * * @@ -592,28 +588,29 @@ specProgram guts@(ModGuts { mg_module = this_mod , mg_binds = binds }) = do { dflags <- getDynFlags + -- We need to start with a Subst that knows all the things + -- that are in scope, so that the substitution engine doesn't + -- accidentally re-use a unique that's already in use + -- Easiest thing is to do it all at once, as if all the top-level + -- decls were mutually recursive + ; let top_env = SE { se_subst = Core.mkEmptySubst $ mkInScopeSet $ mkVarSet $ + bindersOfBinds binds + , se_interesting = emptyVarSet + , se_module = this_mod + , se_dflags = dflags } + + go [] = return ([], emptyUDs) + go (bind:binds) = do (binds', uds) <- go binds + (bind', uds') <- specBind top_env bind uds + return (bind' ++ binds', uds') + -- Specialise the bindings of this module - ; (binds', uds) <- runSpecM dflags this_mod (go binds) + ; (binds', uds) <- runSpecM (go binds) - ; (spec_rules, spec_binds) <- specImports dflags this_mod top_env - local_rules uds + ; (spec_rules, spec_binds) <- specImports top_env local_rules uds ; return (guts { mg_binds = spec_binds ++ binds' , mg_rules = spec_rules ++ local_rules }) } - where - -- We need to start with a Subst that knows all the things - -- that are in scope, so that the substitution engine doesn't - -- accidentally re-use a unique that's already in use - -- Easiest thing is to do it all at once, as if all the top-level - -- decls were mutually recursive - top_env = SE { se_subst = Core.mkEmptySubst $ mkInScopeSet $ mkVarSet $ - bindersOfBinds binds - , se_interesting = emptyVarSet } - - go [] = return ([], emptyUDs) - go (bind:binds) = do (binds', uds) <- go binds - (bind', uds') <- specBind top_env bind uds - return (bind' ++ binds', uds') {- Note [Wrap bindings returned by specImports] @@ -643,13 +640,13 @@ See #10491 * * ********************************************************************* -} -specImports :: DynFlags -> Module -> SpecEnv +specImports :: SpecEnv -> [CoreRule] -> UsageDetails -> CoreM ([CoreRule], [CoreBind]) -specImports dflags this_mod top_env local_rules +specImports top_env local_rules (MkUD { ud_binds = dict_binds, ud_calls = calls }) - | not $ gopt Opt_CrossModuleSpecialise dflags + | not $ gopt Opt_CrossModuleSpecialise (se_dflags top_env) -- See Note [Disabling cross-module specialisation] = return ([], wrapDictBinds dict_binds []) @@ -657,8 +654,7 @@ specImports dflags this_mod top_env local_rules = do { hpt_rules <- getRuleBase ; let rule_base = extendRuleBaseList hpt_rules local_rules - ; (spec_rules, spec_binds) <- spec_imports dflags this_mod top_env - [] rule_base + ; (spec_rules, spec_binds) <- spec_imports top_env [] rule_base dict_binds calls -- Don't forget to wrap the specialized bindings with @@ -674,9 +670,7 @@ specImports dflags this_mod top_env local_rules } -- | Specialise a set of calls to imported bindings -spec_imports :: DynFlags - -> Module - -> SpecEnv -- Passed in so that all top-level Ids are in scope +spec_imports :: SpecEnv -- Passed in so that all top-level Ids are in scope -> [Id] -- Stack of imported functions being specialised -- See Note [specImport call stack] -> RuleBase -- Rules from this module and the home package @@ -686,8 +680,7 @@ spec_imports :: DynFlags -> CallDetails -- Calls for imported things -> CoreM ( [CoreRule] -- New rules , [CoreBind] ) -- Specialised bindings -spec_imports dflags this_mod top_env - callers rule_base dict_binds calls +spec_imports top_env callers rule_base dict_binds calls = do { let import_calls = dVarEnvElts calls -- ; debugTraceMsg (text "specImports {" <+> -- vcat [ text "calls:" <+> ppr import_calls @@ -701,16 +694,13 @@ spec_imports dflags this_mod top_env go _ [] = return ([], []) go rb (cis : other_calls) = do { -- debugTraceMsg (text "specImport {" <+> ppr cis) - ; (rules1, spec_binds1) <- spec_import dflags this_mod top_env - callers rb dict_binds cis + ; (rules1, spec_binds1) <- spec_import top_env callers rb dict_binds cis -- ; debugTraceMsg (text "specImport }" <+> ppr cis) ; (rules2, spec_binds2) <- go (extendRuleBaseList rb rules1) other_calls ; return (rules1 ++ rules2, spec_binds1 ++ spec_binds2) } -spec_import :: DynFlags - -> Module - -> SpecEnv -- Passed in so that all top-level Ids are in scope +spec_import :: SpecEnv -- Passed in so that all top-level Ids are in scope -> [Id] -- Stack of imported functions being specialised -- See Note [specImport call stack] -> RuleBase -- Rules from this module @@ -719,8 +709,7 @@ spec_import :: DynFlags -> CallInfoSet -- Imported function and calls for it -> CoreM ( [CoreRule] -- New rules , [CoreBind] ) -- Specialised bindings -spec_import dflags this_mod top_env callers - rb dict_binds cis@(CIS fn _) +spec_import top_env callers rb dict_binds cis@(CIS fn _) | isIn "specImport" fn callers = return ([], []) -- No warning. This actually happens all the time -- when specialising a recursive function, because @@ -731,8 +720,7 @@ spec_import dflags this_mod top_env callers = do { -- debugTraceMsg (text "specImport:no valid calls") ; return ([], []) } - | wantSpecImport dflags unfolding - , Just rhs <- maybeUnfoldingTemplate unfolding + | Just rhs <- canSpecImport dflags fn = do { -- Get rules from the external package state -- We keep doing this in case we "page-fault in" -- more rules as we go along @@ -744,8 +732,8 @@ spec_import dflags this_mod top_env callers ; (rules1, spec_pairs, MkUD { ud_binds = dict_binds1, ud_calls = new_calls }) <- do { -- debugTraceMsg (text "specImport1" <+> vcat [ppr fn, ppr good_calls, ppr rhs]) - ; runSpecM dflags this_mod $ - specCalls (Just this_mod) top_env rules_for_fn good_calls fn rhs } + ; runSpecM $ + specCalls True top_env rules_for_fn good_calls fn rhs } ; let spec_binds1 = [NonRec b r | (b,r) <- spec_pairs] -- After the rules kick in we may get recursion, but -- we rely on a global GlomBinds to sort that out later @@ -753,7 +741,7 @@ spec_import dflags this_mod top_env callers -- Now specialise any cascaded calls -- ; debugTraceMsg (text "specImport 2" <+> (ppr fn $$ ppr rules1 $$ ppr spec_binds1)) - ; (rules2, spec_binds2) <- spec_imports dflags this_mod top_env + ; (rules2, spec_binds2) <- spec_imports top_env (fn:callers) (extendRuleBaseList rb rules1) (dict_binds `unionBags` dict_binds1) @@ -769,11 +757,34 @@ spec_import dflags this_mod top_env callers ; return ([], [])} where - unfolding = realIdUnfolding fn -- We want to see the unfolding even for loop breakers + dflags = se_dflags top_env good_calls = filterCalls cis dict_binds -- SUPER IMPORTANT! Drop calls that (directly or indirectly) refer to fn -- See Note [Avoiding loops in specImports] +canSpecImport :: DynFlags -> Id -> Maybe CoreExpr +-- See Note [Specialise imported INLINABLE things] +canSpecImport dflags fn + | CoreUnfolding { uf_src = src, uf_tmpl = rhs } <- unf + , isStableSource src + = Just rhs -- By default, specialise only imported things that have a stable + -- unfolding; that is, have an INLINE or INLINABLE pragma + -- Specialise even INLINE things; it hasn't inlined yet, + -- so perhaps it never will. Moreover it may have calls + -- inside it that we want to specialise + + -- CoreUnfolding case does /not/ include DFunUnfoldings; + -- We only specialise DFunUnfoldings with -fspecialise-aggressively + -- See Note [Do not specialise imported DFuns] + + | gopt Opt_SpecialiseAggressively dflags + = maybeUnfoldingTemplate unf -- With -fspecialise-aggressively, specialise anything + -- with an unfolding, stable or not, DFun or not + + | otherwise = Nothing + where + unf = realIdUnfolding fn -- We want to see the unfolding even for loop breakers + -- | Returns whether or not to show a missed-spec warning. -- If -Wall-missed-specializations is on, show the warning. -- Otherwise, if -Wmissed-specializations is on, only show a warning @@ -798,24 +809,47 @@ tryWarnMissingSpecs dflags callers fn calls_for_fn , whenPprDebug (text "calls:" <+> vcat (map (pprCallInfo fn) calls_for_fn)) , text "Probable fix: add INLINABLE pragma on" <+> quotes (ppr fn) ]) -wantSpecImport :: DynFlags -> Unfolding -> Bool --- See Note [Specialise imported INLINABLE things] -wantSpecImport dflags unf - = case unf of - NoUnfolding -> False - BootUnfolding -> False - OtherCon {} -> False - DFunUnfolding {} -> True - CoreUnfolding { uf_src = src, uf_guidance = _guidance } - | gopt Opt_SpecialiseAggressively dflags -> True - | isStableSource src -> True - -- Specialise even INLINE things; it hasn't inlined yet, - -- so perhaps it never will. Moreover it may have calls - -- inside it that we want to specialise - | otherwise -> False -- Stable, not INLINE, hence INLINABLE -{- Note [Avoiding loops in specImports] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +{- Note [Do not specialise imported DFuns] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Ticket #18223 shows that specialising calls of DFuns is can cause a huge +and entirely unnecessary blowup in program size. Consider a call to + f @[[[[[[[[T]]]]]]]] d1 x +where df :: C a => C [a] + d1 :: C [[[[[[[[T]]]]]]]] = dfC[] @[[[[[[[T]]]]]]] d1 + d2 :: C [[[[[[[T]]]]]]] = dfC[] @[[[[[[T]]]]]] d3 + ... +Now we'll specialise f's RHS, which may give rise to calls to 'g', +also overloaded, which we will specialise, and so on. However, if +we specialise the calls to dfC[], we'll generate specialised copies of +all methods of C, at all types; and the same for C's superclasses. + +And many of these specialised functions will never be called. We are +going to call the specialised 'f', and the specialised 'g', but DFuns +group functions into a tuple, many of whose elements may never be used. + +With deeply-nested types this can lead to a simply overwhelming number +of specialisations: see #18223 for a simple example (from the wild). +I measured the number of specialisations for various numbers of calls +of `flip evalStateT ()`, and got this + + Size after one simplification + #calls #SPEC rules Terms Types + 5 56 3100 10600 + 9 108 13660 77206 + +The real tests case has 60+ calls, which blew GHC out of the water. + +Solution: don't specialise DFuns. The downside is that if we end +up with (h (dfun d)), /and/ we don't specialise 'h', then we won't +pass to 'h' a tuple of specialised functions. + +However, the flag -fspecialise-aggressively (experimental, off by default) +allows DFuns to specialise as well. + +Note [Avoiding loops in specImports] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We must take great care when specialising instance declarations (functions like $fOrdList) lest we accidentally build a recursive dictionary. See Note [Avoiding loops]. @@ -1003,6 +1037,9 @@ data SpecEnv -- Dict Ids that we know something about -- and hence may be worth specialising against -- See Note [Interesting dictionary arguments] + + , se_module :: Module + , se_dflags :: DynFlags } instance Outputable SpecEnv where @@ -1310,7 +1347,7 @@ specDefn :: SpecEnv specDefn env body_uds fn rhs = do { let (body_uds_without_me, calls_for_me) = callsForMe fn body_uds rules_for_me = idCoreRules fn - ; (rules, spec_defns, spec_uds) <- specCalls Nothing env rules_for_me + ; (rules, spec_defns, spec_uds) <- specCalls False env rules_for_me calls_for_me fn rhs ; return ( fn `addIdSpecialisations` rules , spec_defns @@ -1323,8 +1360,8 @@ specDefn env body_uds fn rhs -- body_uds_without_me --------------------------- -specCalls :: Maybe Module -- Just this_mod => specialising imported fn - -- Nothing => specialising local fn +specCalls :: Bool -- True => specialising imported fn + -- False => specialising local fn -> SpecEnv -> [CoreRule] -- Existing RULES for the fn -> [CallInfo] @@ -1339,7 +1376,7 @@ type SpecInfo = ( [CoreRule] -- Specialisation rules , [(Id,CoreExpr)] -- Specialised definition , UsageDetails ) -- Usage details from specialised RHSs -specCalls mb_mod env existing_rules calls_for_me fn rhs +specCalls spec_imp env existing_rules calls_for_me fn rhs -- The first case is the interesting one | notNull calls_for_me -- And there are some calls to specialise && not (isNeverActive (idInlineActivation fn)) @@ -1370,7 +1407,9 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs inl_act = inlinePragmaActivation inl_prag is_local = isLocalId fn is_dfun = isDFunId fn - + dflags = se_dflags env + ropts = initRuleOpts dflags + this_mod = se_module env -- Figure out whether the function has an INLINE pragma -- See Note [Inline specialisations] @@ -1412,8 +1451,6 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs -- , ppr dx_binds ]) $ -- return () - ; dflags <- getDynFlags - ; let ropts = initRuleOpts dflags ; if not useful -- No useful specialisation || already_covered ropts rules_acc rule_lhs_args then return spec_acc @@ -1441,17 +1478,15 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs = Nothing ; spec_fn <- newSpecIdSM fn spec_fn_ty spec_join_arity - ; this_mod <- getModule ; let -- The rule to put in the function's specialisation is: -- forall x @b d1' d2'. -- f x @T1 @b @T2 d1' d2' = f1 x @b -- See Note [Specialising Calls] - herald = case mb_mod of - Nothing -- Specialising local fn - -> text "SPEC" - Just this_mod -- Specialising imported fn - -> text "SPEC/" <> ppr this_mod + herald | spec_imp = -- Specialising imported fn + text "SPEC/" <> ppr this_mod + | otherwise = -- Specialising local fn + text "SPEC" rule_name = mkFastString $ showSDoc dflags $ herald <+> ftext (occNameFS (getOccName fn)) @@ -2480,15 +2515,15 @@ mkCallUDs env f args res = mkCallUDs' env f args mkCallUDs' env f args - | not (want_calls_for f) -- Imported from elsewhere - || null ci_key -- No useful specialisation - -- See also Note [Specialisations already covered] + | wantCallsFor env f -- We want it, and... + , not (null ci_key) -- this call site has a useful specialisation + = -- pprTrace "mkCallUDs: keeping" _trace_doc + singleCall f ci_key + + | otherwise -- See also Note [Specialisations already covered] = -- pprTrace "mkCallUDs: discarding" _trace_doc emptyUDs - | otherwise - = -- pprTrace "mkCallUDs: keeping" _trace_doc - singleCall f ci_key where _trace_doc = vcat [ppr f, ppr args, ppr ci_key] pis = fst $ splitPiTys $ idType f @@ -2525,12 +2560,23 @@ mkCallUDs' env f args mk_spec_arg _ (Anon VisArg _) = UnspecArg - want_calls_for f = isLocalId f || isJust (maybeUnfoldingTemplate (realIdUnfolding f)) - -- For imported things, we gather call instances if - -- there is an unfolding that we could in principle specialise - -- We might still decide not to use it (consulting dflags) - -- in specImports - -- Use 'realIdUnfolding' to ignore the loop-breaker flag! +wantCallsFor :: SpecEnv -> Id -> Bool +wantCallsFor _env _f = True + -- We could reduce the size of the UsageDetails by being less eager + -- about collecting calls for LocalIds: there is no point for + -- ones that are lambda-bound. We can't decide this by looking at + -- the (absence of an) unfolding, because unfoldings for local + -- functions are discarded by cloneBindSM, so no local binder will + -- have an unfolding at this stage. We'd have to keep a candidate + -- set of let-binders. + -- + -- Not many lambda-bound variables have dictionary arguments, so + -- this would make little difference anyway. + -- + -- For imported Ids we could check for an unfolding, but we have to + -- do so anyway in canSpecImport, and it seems better to have it + -- all in one place. So we simply collect usage info for imported + -- overloaded functions. {- Note [Type determines value] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2780,55 +2826,12 @@ deleteCallsFor bs calls = delDVarEnvList calls bs ************************************************************************ -} -newtype SpecM a = SpecM (State SpecState a) deriving (Functor) - -data SpecState = SpecState { - spec_uniq_supply :: UniqSupply, - spec_module :: Module, - spec_dflags :: DynFlags - } - -instance Applicative SpecM where - pure x = SpecM $ return x - (<*>) = ap - -instance Monad SpecM where - SpecM x >>= f = SpecM $ do y <- x - case f y of - SpecM z -> - z - -instance MonadFail SpecM where - fail str = SpecM $ error str - -instance MonadUnique SpecM where - getUniqueSupplyM - = SpecM $ do st <- get - let (us1, us2) = splitUniqSupply $ spec_uniq_supply st - put $ st { spec_uniq_supply = us2 } - return us1 - - getUniqueM - = SpecM $ do st <- get - let (u,us') = takeUniqFromSupply $ spec_uniq_supply st - put $ st { spec_uniq_supply = us' } - return u - -instance HasDynFlags SpecM where - getDynFlags = SpecM $ liftM spec_dflags get - -instance HasModule SpecM where - getModule = SpecM $ liftM spec_module get - -runSpecM :: DynFlags -> Module -> SpecM a -> CoreM a -runSpecM dflags this_mod (SpecM spec) - = do us <- getUniqueSupplyM - let initialState = SpecState { - spec_uniq_supply = us, - spec_module = this_mod, - spec_dflags = dflags - } - return $ evalState spec initialState +type SpecM a = UniqSM a + +runSpecM :: SpecM a -> CoreM a +runSpecM thing_inside + = do { us <- getUniqueSupplyM + ; return (initUs_ us thing_inside) } mapAndCombineSM :: (a -> SpecM (b, UsageDetails)) -> [a] -> SpecM ([b], UsageDetails) mapAndCombineSM _ [] = return ([], emptyUDs) |