summaryrefslogtreecommitdiff
path: root/compiler/GHC/Core/Opt
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/GHC/Core/Opt')
-rw-r--r--compiler/GHC/Core/Opt/Arity.hs545
-rw-r--r--compiler/GHC/Core/Opt/Pipeline.hs52
-rw-r--r--compiler/GHC/Core/Opt/Simplify.hs38
-rw-r--r--compiler/GHC/Core/Opt/Simplify/Env.hs2
-rw-r--r--compiler/GHC/Core/Opt/Specialise.hs279
5 files changed, 679 insertions, 237 deletions
diff --git a/compiler/GHC/Core/Opt/Arity.hs b/compiler/GHC/Core/Opt/Arity.hs
index b55d91767e..2471470814 100644
--- a/compiler/GHC/Core/Opt/Arity.hs
+++ b/compiler/GHC/Core/Opt/Arity.hs
@@ -15,10 +15,18 @@ module GHC.Core.Opt.Arity
( manifestArity, joinRhsArity, exprArity, typeArity
, exprEtaExpandArity, findRhsArity
, etaExpand, etaExpandAT
- , etaExpandToJoinPoint, etaExpandToJoinPointRule
, exprBotStrictness_maybe
+
+ -- ** ArityType
, ArityType(..), expandableArityType, arityTypeArity
, maxWithArity, isBotArityType, idArityType
+
+ -- ** Join points
+ , etaExpandToJoinPoint, etaExpandToJoinPointRule
+
+ -- ** Coercions and casts
+ , pushCoArg, pushCoArgs, pushCoValArg, pushCoTyArg
+ , pushCoercionIntoLambda, pushCoDataCon, collectBindersPushingCo
)
where
@@ -31,15 +39,21 @@ import GHC.Driver.Ppr
import GHC.Core
import GHC.Core.FVs
import GHC.Core.Utils
-import GHC.Core.Subst
import GHC.Types.Demand
import GHC.Types.Var
import GHC.Types.Var.Env
import GHC.Types.Id
-import GHC.Core.Type as Type
-import GHC.Core.TyCon ( initRecTc, checkRecTc )
+
+-- We have two sorts of substitution:
+-- GHC.Core.Subst.Subst, and GHC.Core.TyCo.TCvSubst
+-- Both have substTy, substCo Hence need for qualification
+import GHC.Core.Subst as Core
+import GHC.Core.Type as Type
+import GHC.Core.Coercion as Type
+
+import GHC.Core.DataCon
+import GHC.Core.TyCon ( initRecTc, checkRecTc, tyConArity )
import GHC.Core.Predicate ( isDictTy )
-import GHC.Core.Coercion as Coercion
import GHC.Core.Multiplicity
import GHC.Types.Var.Set
import GHC.Types.Basic
@@ -48,7 +62,8 @@ import GHC.Driver.Session ( DynFlags, GeneralFlag(..), gopt )
import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Data.FastString
-import GHC.Utils.Misc ( lengthAtLeast )
+import GHC.Data.Pair
+import GHC.Utils.Misc
{-
************************************************************************
@@ -1076,12 +1091,11 @@ eta_expand one_shots orig_expr
go oss (Cast expr co) = Cast (go oss expr) co
go oss expr
- = -- pprTrace "ee" (vcat [ppr orig_expr, ppr expr, ppr etas]) $
- retick $ etaInfoAbs etas (etaInfoApp subst' sexpr etas)
+ = -- pprTrace "ee" (vcat [ppr orig_expr, ppr expr, pprEtaInfos etas]) $
+ retick $ etaInfoAbs etas (etaInfoApp in_scope' sexpr etas)
where
in_scope = mkInScopeSet (exprFreeVars expr)
(in_scope', etas) = mkEtaWW oss (ppr orig_expr) in_scope (exprType expr)
- subst' = mkEmptySubst in_scope'
-- Find ticks behind type apps.
-- See Note [Eta expansion and source notes]
@@ -1090,76 +1104,197 @@ eta_expand one_shots orig_expr
sexpr = foldl' App expr'' args
retick expr = foldr mkTick expr ticks
- -- Abstraction Application
+{- *********************************************************************
+* *
+ The EtaInfo mechanism
+ mkEtaWW, etaInfoAbs, etaInfoApp
+* *
+********************************************************************* -}
+
+{- Note [The EtaInfo mechanism]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Suppose we have (e :: ty) and we want to eta-expand it to arity N.
+This what eta_expand does. We do it in two steps:
+
+1. mkEtaWW: from 'ty' and 'N' build a [EtaInfo] which describes
+ the shape of the expansion necessary to expand to arity N.
+
+2. Build the term
+ \ v1..vn. e v1 .. vn
+ where those abstractions and applications are described by
+ the same [EtaInfo]. Specifically we build the term
+
+ etaInfoAbs etas (etaInfoApp in_scope e etas)
+
+ where etas :: [EtaInfo]#
+ etaInfoAbs builds the lambdas
+ etaInfoApp builds the applictions
+
+ Note that the /same/ [EtaInfo] drives both etaInfoAbs and etaInfoApp
+
+To a first approximation [EtaInfo] is just [Var]. But
+casts complicate the question. If we have
+ newtype N a = MkN (S -> a)
+and
+ ty = N (N Int)
+then the eta-expansion must look like
+ (\x (\y. ((e |> co1) x) |> co2) y)
+ |> sym co2)
+ |> sym co1
+where
+ co1 :: N (N Int) ~ S -> N Int
+ co2 :: N Int ~ S -> Int
+
+Blimey! Look at all those casts. Moreover, if the type
+is very deeply nested (as happens in #18223), the repetition
+of types can make the overall term very large. So there is a big
+payoff in cancelling out casts aggressively wherever possible.
+(See also Note [No crap in eta-expanded code].)
+
+This matters a lot in etaEInfoApp, where we
+* Do beta-reduction on the fly
+* Use getARg_mabye to get a cast out of the way,
+ so that we can do beta reduction
+Together this makes a big difference. Consider when e is
+ case x of
+ True -> (\x -> e1) |> c1
+ False -> (\p -> e2) |> c2
+
+When we eta-expand this to arity 1, say, etaInfoAbs will wrap
+a (\eta) around the outside and use etaInfoApp to apply each
+alternative to 'eta'. We want to beta-reduce all that junk
+away.
+
+#18223 was a dramtic example in which the intermediate term was
+grotesquely huge, even though the next Simplifier iteration squashed
+it. Better to kill it at birth.
+-}
+
--------------
-data EtaInfo = EtaVar Var -- /\a. [] [] a
- -- \x. [] [] x
- | EtaCo Coercion -- [] |> sym co [] |> co
+data EtaInfo -- Abstraction Application
+ = EtaVar Var -- /\a. [] [] a
+ -- (\x. []) [] x
+ | EtaCo CoercionR -- [] |> sym co [] |> co
instance Outputable EtaInfo where
- ppr (EtaVar v) = text "EtaVar" <+> ppr v
- ppr (EtaCo co) = text "EtaCo" <+> ppr co
+ ppr (EtaVar v) = text "EtaVar" <+> ppr v <+> dcolon <+> ppr (idType v)
+ ppr (EtaCo co) = text "EtaCo" <+> hang (ppr co) 2 (dcolon <+> ppr (coercionType co))
+
+-- Used in debug-printing
+-- pprEtaInfos :: [EtaInfo] -> SDoc
+-- pprEtaInfos eis = brackets $ vcat $ punctuate comma $ map ppr eis
pushCoercion :: Coercion -> [EtaInfo] -> [EtaInfo]
+-- Puts a EtaCo on the front of a [EtaInfo], but combining
+-- with an existing EtaCo if possible
+-- A minor improvement
pushCoercion co1 (EtaCo co2 : eis)
| isReflCo co = eis
| otherwise = EtaCo co : eis
where
co = co1 `mkTransCo` co2
-pushCoercion co eis = EtaCo co : eis
+pushCoercion co eis
+ = EtaCo co : eis
+
+getArg_maybe :: [EtaInfo] -> Maybe (CoreArg, [EtaInfo])
+-- Get an argument to the front of the [EtaInfo], if possible,
+-- by pushing any EtaCo through the argument
+getArg_maybe eis = go MRefl eis
+ where
+ go :: MCoercion -> [EtaInfo] -> Maybe (CoreArg, [EtaInfo])
+ go _ [] = Nothing
+ go mco (EtaCo co2 : eis) = go (mkTransMCoL mco co2) eis
+ go MRefl (EtaVar v : eis) = Just (varToCoreExpr v, eis)
+ go (MCo co) (EtaVar v : eis)
+ | Just (arg, mco) <- pushCoArg co (varToCoreExpr v)
+ = case mco of
+ MRefl -> Just (arg, eis)
+ MCo co -> Just (arg, pushCoercion co eis)
+ | otherwise
+ = Nothing
+
+mkCastMCo :: CoreExpr -> MCoercionR -> CoreExpr
+mkCastMCo e MRefl = e
+mkCastMCo e (MCo co) = Cast e co
+ -- We are careful to use (MCo co) only when co is not reflexive
+ -- Hence (Cast e co) rather than (mkCast e co)
+
+mkPiMCo :: Var -> MCoercionR -> MCoercionR
+mkPiMCo _ MRefl = MRefl
+mkPiMCo v (MCo co) = MCo (mkPiCo Representational v co)
--------------
etaInfoAbs :: [EtaInfo] -> CoreExpr -> CoreExpr
-etaInfoAbs [] expr = expr
-etaInfoAbs (EtaVar v : eis) expr = Lam v (etaInfoAbs eis expr)
-etaInfoAbs (EtaCo co : eis) expr = Cast (etaInfoAbs eis expr) (mkSymCo co)
+-- See Note [The EtaInfo mechanism]
+etaInfoAbs eis expr
+ | null eis = expr
+ | otherwise = case final_mco of
+ MRefl -> expr'
+ MCo co -> mkCast expr' co
+ where
+ (expr', final_mco) = foldr do_one (split_cast expr) eis
+
+ do_one :: EtaInfo -> (CoreExpr, MCoercion) -> (CoreExpr, MCoercion)
+ -- Implements the "Abstraction" column in the comments for data EtaInfo
+ -- In both argument and result the pair (e,mco) denotes (e |> mco)
+ do_one (EtaVar v) (expr, mco) = (Lam v expr, mkPiMCo v mco)
+ do_one (EtaCo co) (expr, mco) = (expr, mco `mkTransMCoL` mkSymCo co)
+
+ split_cast :: CoreExpr -> (CoreExpr, MCoercion)
+ split_cast (Cast e co) = (e, MCo co)
+ split_cast e = (e, MRefl)
+ -- We could look in the body of lets, and the branches of a case
+ -- But then we would have to worry about whether the cast mentioned
+ -- any of the bound variables, which is tiresome. Later maybe.
+ -- Result: we may end up with
+ -- (\(x::Int). case x of { DEFAULT -> e1 |> co }) |> sym (<Int>->co)
+ -- and fail to optimise it away
--------------
-etaInfoApp :: Subst -> CoreExpr -> [EtaInfo] -> CoreExpr
+etaInfoApp :: InScopeSet -> CoreExpr -> [EtaInfo] -> CoreExpr
-- (etaInfoApp s e eis) returns something equivalent to
--- ((substExpr s e) `appliedto` eis)
-
-etaInfoApp subst (Lam v1 e) (EtaVar v2 : eis)
- = etaInfoApp (GHC.Core.Subst.extendSubstWithVar subst v1 v2) e eis
-
-etaInfoApp subst (Cast e co1) eis
- = etaInfoApp subst e (pushCoercion co' eis)
- where
- co' = GHC.Core.Subst.substCo subst co1
+-- (substExpr s e `appliedto` eis)
+-- See Note [The EtaInfo mechanism]
-etaInfoApp subst (Case e b ty alts) eis
- = Case (subst_expr subst e) b1 ty' alts'
+etaInfoApp in_scope expr eis
+ = go (mkEmptySubst in_scope) expr eis
where
- (subst1, b1) = substBndr subst b
- alts' = map subst_alt alts
- ty' = etaInfoAppTy (GHC.Core.Subst.substTy subst ty) eis
- subst_alt (con, bs, rhs) = (con, bs', etaInfoApp subst2 rhs eis)
- where
- (subst2,bs') = substBndrs subst1 bs
-
-etaInfoApp subst (Let b e) eis
- | not (isJoinBind b)
- -- See Note [Eta expansion for join points]
- = Let b' (etaInfoApp subst' e eis)
- where
- (subst', b') = substBindSC subst b
+ go :: Subst -> CoreExpr -> [EtaInfo] -> CoreExpr
+ -- 'go' pushed down the eta-infos into the branch of a case
+ -- and the body of a let; and does beta-reduction if possible
+ go subst (Tick t e) eis
+ = Tick (substTickish subst t) (go subst e eis)
+ go subst (Cast e co) eis
+ = go subst e (pushCoercion (Core.substCo subst co) eis)
+ go subst (Case e b ty alts) eis
+ = Case (Core.substExprSC subst e) b1 ty' alts'
+ where
+ (subst1, b1) = Core.substBndr subst b
+ alts' = map subst_alt alts
+ ty' = etaInfoAppTy (Core.substTy subst ty) eis
+ subst_alt (con, bs, rhs) = (con, bs', go subst2 rhs eis)
+ where
+ (subst2,bs') = Core.substBndrs subst1 bs
+ go subst (Let b e) eis
+ | not (isJoinBind b) -- See Note [Eta expansion for join points]
+ = Let b' (go subst' e eis)
+ where
+ (subst', b') = Core.substBindSC subst b
-etaInfoApp subst (Tick t e) eis
- = Tick (substTickish subst t) (etaInfoApp subst e eis)
+ -- Beta-reduction if possible, using getArg_maybe to push
+ -- any intervening casts past the argument
+ -- See Note [The EtaInfo mechansim]
+ go subst (Lam v e) eis
+ | Just (arg, eis') <- getArg_maybe eis
+ = go (Core.extendSubst subst v arg) e eis'
-etaInfoApp subst expr _
- | (Var fun, _) <- collectArgs expr
- , Var fun' <- lookupIdSubst subst fun
- , isJoinId fun'
- = subst_expr subst expr
+ -- Stop pushing down; just wrap the expression up
+ go subst e eis = wrap (Core.substExprSC subst e) eis
-etaInfoApp subst e eis
- = go (subst_expr subst e) eis
- where
- go e [] = e
- go e (EtaVar v : eis) = go (App e (varToCoreExpr v)) eis
- go e (EtaCo co : eis) = go (Cast e co) eis
+ wrap e [] = e
+ wrap e (EtaVar v : eis) = wrap (App e (varToCoreExpr v)) eis
+ wrap e (EtaCo co : eis) = wrap (Cast e co) eis
--------------
@@ -1235,7 +1370,7 @@ mkEtaWW orig_oss ppr_orig_expr in_scope orig_ty
-- We want to get
-- coerce T (\x::[T] -> (coerce ([T]->Int) e) x)
| Just (co, ty') <- topNormaliseNewType_maybe ty
- , let co' = Coercion.substCo subst co
+ , let co' = Type.substCo subst co
-- Remember to apply the substitution to co (#16979)
-- (or we could have applied to ty, but then
-- we'd have had to zap it for the recursive call)
@@ -1253,21 +1388,290 @@ mkEtaWW orig_oss ppr_orig_expr in_scope orig_ty
-- with an explicit lambda having a non-function type
+{- *********************************************************************
+* *
+ The "push rules"
+* *
+************************************************************************
-------------
-subst_expr :: Subst -> CoreExpr -> CoreExpr
--- Apply a substitution to an expression. We use substExpr
--- not substExprSC (short-cutting substitution) because
--- we may be changing the types of join points, so applying
--- the in-scope set is necessary.
+Here we implement the "push rules" from FC papers:
+
+* The push-argument rules, where we can move a coercion past an argument.
+ We have
+ (fun |> co) arg
+ and we want to transform it to
+ (fun arg') |> co'
+ for some suitable co' and transformed arg'.
+
+* The PushK rule for data constructors. We have
+ (K e1 .. en) |> co
+ and we want to transform to
+ (K e1' .. en')
+ by pushing the coercion into the arguments
+-}
+
+pushCoArgs :: CoercionR -> [CoreArg] -> Maybe ([CoreArg], MCoercion)
+pushCoArgs co [] = return ([], MCo co)
+pushCoArgs co (arg:args) = do { (arg', m_co1) <- pushCoArg co arg
+ ; case m_co1 of
+ MCo co1 -> do { (args', m_co2) <- pushCoArgs co1 args
+ ; return (arg':args', m_co2) }
+ MRefl -> return (arg':args, MRefl) }
+
+pushCoArg :: CoercionR -> CoreArg -> Maybe (CoreArg, MCoercion)
+-- We have (fun |> co) arg, and we want to transform it to
+-- (fun arg) |> co
+-- This may fail, e.g. if (fun :: N) where N is a newtype
+-- C.f. simplCast in GHC.Core.Opt.Simplify
+-- 'co' is always Representational
+-- If the returned coercion is Nothing, then it would have been reflexive
+pushCoArg co (Type ty) = do { (ty', m_co') <- pushCoTyArg co ty
+ ; return (Type ty', m_co') }
+pushCoArg co val_arg = do { (arg_co, m_co') <- pushCoValArg co
+ ; return (val_arg `mkCastMCo` arg_co, m_co') }
+
+pushCoTyArg :: CoercionR -> Type -> Maybe (Type, MCoercionR)
+-- We have (fun |> co) @ty
+-- Push the coercion through to return
+-- (fun @ty') |> co'
+-- 'co' is always Representational
+-- If the returned coercion is Nothing, then it would have been reflexive;
+-- it's faster not to compute it, though.
+pushCoTyArg co ty
+ -- The following is inefficient - don't do `eqType` here, the coercion
+ -- optimizer will take care of it. See #14737.
+ -- -- | tyL `eqType` tyR
+ -- -- = Just (ty, Nothing)
+
+ | isReflCo co
+ = Just (ty, MRefl)
+
+ | isForAllTy_ty tyL
+ = ASSERT2( isForAllTy_ty tyR, ppr co $$ ppr ty )
+ Just (ty `mkCastTy` co1, MCo co2)
+
+ | otherwise
+ = Nothing
+ where
+ Pair tyL tyR = coercionKind co
+ -- co :: tyL ~ tyR
+ -- tyL = forall (a1 :: k1). ty1
+ -- tyR = forall (a2 :: k2). ty2
+
+ co1 = mkSymCo (mkNthCo Nominal 0 co)
+ -- co1 :: k2 ~N k1
+ -- Note that NthCo can extract a Nominal equality between the
+ -- kinds of the types related by a coercion between forall-types.
+ -- See the NthCo case in GHC.Core.Lint.
+
+ co2 = mkInstCo co (mkGReflLeftCo Nominal ty co1)
+ -- co2 :: ty1[ (ty|>co1)/a1 ] ~ ty2[ ty/a2 ]
+ -- Arg of mkInstCo is always nominal, hence mkNomReflCo
+
+pushCoValArg :: CoercionR -> Maybe (MCoercionR, MCoercionR)
+-- We have (fun |> co) arg
+-- Push the coercion through to return
+-- (fun (arg |> co_arg)) |> co_res
+-- 'co' is always Representational
+-- If the second returned Coercion is actually Nothing, then no cast is necessary;
+-- the returned coercion would have been reflexive.
+pushCoValArg co
+ -- The following is inefficient - don't do `eqType` here, the coercion
+ -- optimizer will take care of it. See #14737.
+ -- -- | tyL `eqType` tyR
+ -- -- = Just (mkRepReflCo arg, Nothing)
+
+ | isReflCo co
+ = Just (MRefl, MRefl)
+
+ | isFunTy tyL
+ , (co_mult, co1, co2) <- decomposeFunCo Representational co
+ , isReflexiveCo co_mult
+ -- We can't push the coercion in the case where co_mult isn't reflexivity:
+ -- it could be an unsafe axiom, and losing this information could yield
+ -- ill-typed terms. For instance (fun x ::(1) Int -> (fun _ -> () |> co) x)
+ -- with co :: (Int -> ()) ~ (Int #-> ()), would reduce to (fun x ::(1) Int
+ -- -> (fun _ ::(Many) Int -> ()) x) which is ill-typed
+
+ -- If co :: (tyL1 -> tyL2) ~ (tyR1 -> tyR2)
+ -- then co1 :: tyL1 ~ tyR1
+ -- co2 :: tyL2 ~ tyR2
+ = ASSERT2( isFunTy tyR, ppr co $$ ppr arg )
+ Just (coToMCo (mkSymCo co1), coToMCo co2)
+ -- Critically, coToMCo to checks for ReflCo; the whole coercion may not
+ -- be reflexive, but either of its components might be
+ -- We could use isReflexiveCo, but it's not clear if the benefit
+ -- is worth the cost, and it makes no difference in #18223
+
+ | otherwise
+ = Nothing
+ where
+ arg = funArgTy tyR
+ Pair tyL tyR = coercionKind co
+
+pushCoercionIntoLambda
+ :: InScopeSet -> Var -> CoreExpr -> CoercionR -> Maybe (Var, CoreExpr)
+-- This implements the Push rule from the paper on coercions
+-- (\x. e) |> co
+-- ===>
+-- (\x'. e |> co')
+pushCoercionIntoLambda in_scope x e co
+ | ASSERT(not (isTyVar x) && not (isCoVar x)) True
+ , Pair s1s2 t1t2 <- coercionKind co
+ , Just (_, _s1,_s2) <- splitFunTy_maybe s1s2
+ , Just (w1, t1,_t2) <- splitFunTy_maybe t1t2
+ , (co_mult, co1, co2) <- decomposeFunCo Representational co
+ , isReflexiveCo co_mult
+ -- We can't push the coercion in the case where co_mult isn't
+ -- reflexivity. See pushCoValArg for more details.
+ = let
+ -- Should we optimize the coercions here?
+ -- Otherwise they might not match too well
+ x' = x `setIdType` t1 `setIdMult` w1
+ in_scope' = in_scope `extendInScopeSet` x'
+ subst = extendIdSubst (mkEmptySubst in_scope')
+ x
+ (mkCast (Var x') co1)
+ in Just (x', substExpr subst e `mkCast` co2)
+ | otherwise
+ = pprTrace "exprIsLambda_maybe: Unexpected lambda in case" (ppr (Lam x e))
+ Nothing
+
+pushCoDataCon :: DataCon -> [CoreExpr] -> Coercion
+ -> Maybe (DataCon
+ , [Type] -- Universal type args
+ , [CoreExpr]) -- All other args incl existentials
+-- Implement the KPush reduction rule as described in "Down with kinds"
+-- The transformation applies iff we have
+-- (C e1 ... en) `cast` co
+-- where co :: (T t1 .. tn) ~ to_ty
+-- The left-hand one must be a T, because exprIsConApp returned True
+-- but the right-hand one might not be. (Though it usually will.)
+pushCoDataCon dc dc_args co
+ | isReflCo co || from_ty `eqType` to_ty -- try cheap test first
+ , let (univ_ty_args, rest_args) = splitAtList (dataConUnivTyVars dc) dc_args
+ = Just (dc, map exprToType univ_ty_args, rest_args)
+
+ | Just (to_tc, to_tc_arg_tys) <- splitTyConApp_maybe to_ty
+ , to_tc == dataConTyCon dc
+ -- These two tests can fail; we might see
+ -- (C x y) `cast` (g :: T a ~ S [a]),
+ -- where S is a type function. In fact, exprIsConApp
+ -- will probably not be called in such circumstances,
+ -- but there's nothing wrong with it
+
+ = let
+ tc_arity = tyConArity to_tc
+ dc_univ_tyvars = dataConUnivTyVars dc
+ dc_ex_tcvars = dataConExTyCoVars dc
+ arg_tys = dataConRepArgTys dc
+
+ non_univ_args = dropList dc_univ_tyvars dc_args
+ (ex_args, val_args) = splitAtList dc_ex_tcvars non_univ_args
+
+ -- Make the "Psi" from the paper
+ omegas = decomposeCo tc_arity co (tyConRolesRepresentational to_tc)
+ (psi_subst, to_ex_arg_tys)
+ = liftCoSubstWithEx Representational
+ dc_univ_tyvars
+ omegas
+ dc_ex_tcvars
+ (map exprToType ex_args)
+
+ -- Cast the value arguments (which include dictionaries)
+ new_val_args = zipWith cast_arg (map scaledThing arg_tys) val_args
+ cast_arg arg_ty arg = mkCast arg (psi_subst arg_ty)
+
+ to_ex_args = map Type to_ex_arg_tys
+
+ dump_doc = vcat [ppr dc, ppr dc_univ_tyvars, ppr dc_ex_tcvars,
+ ppr arg_tys, ppr dc_args,
+ ppr ex_args, ppr val_args, ppr co, ppr from_ty, ppr to_ty, ppr to_tc
+ , ppr $ mkTyConApp to_tc (map exprToType $ takeList dc_univ_tyvars dc_args) ]
+ in
+ ASSERT2( eqType from_ty (mkTyConApp to_tc (map exprToType $ takeList dc_univ_tyvars dc_args)), dump_doc )
+ ASSERT2( equalLength val_args arg_tys, dump_doc )
+ Just (dc, to_tc_arg_tys, to_ex_args ++ new_val_args)
+
+ | otherwise
+ = Nothing
+
+ where
+ Pair from_ty to_ty = coercionKind co
+
+collectBindersPushingCo :: CoreExpr -> ([Var], CoreExpr)
+-- Collect lambda binders, pushing coercions inside if possible
+-- E.g. (\x.e) |> g g :: <Int> -> blah
+-- = (\x. e |> Nth 1 g)
+--
+-- That is,
--
--- ToDo: we could instead check if we actually *are*
--- changing any join points' types, and if not use substExprSC.
-subst_expr = substExpr
+-- collectBindersPushingCo ((\x.e) |> g) === ([x], e |> Nth 1 g)
+collectBindersPushingCo e
+ = go [] e
+ where
+ -- Peel off lambdas until we hit a cast.
+ go :: [Var] -> CoreExpr -> ([Var], CoreExpr)
+ -- The accumulator is in reverse order
+ go bs (Lam b e) = go (b:bs) e
+ go bs (Cast e co) = go_c bs e co
+ go bs e = (reverse bs, e)
+
+ -- We are in a cast; peel off casts until we hit a lambda.
+ go_c :: [Var] -> CoreExpr -> CoercionR -> ([Var], CoreExpr)
+ -- (go_c bs e c) is same as (go bs e (e |> c))
+ go_c bs (Cast e co1) co2 = go_c bs e (co1 `mkTransCo` co2)
+ go_c bs (Lam b e) co = go_lam bs b e co
+ go_c bs e co = (reverse bs, mkCast e co)
+
+ -- We are in a lambda under a cast; peel off lambdas and build a
+ -- new coercion for the body.
+ go_lam :: [Var] -> Var -> CoreExpr -> CoercionR -> ([Var], CoreExpr)
+ -- (go_lam bs b e c) is same as (go_c bs (\b.e) c)
+ go_lam bs b e co
+ | isTyVar b
+ , let Pair tyL tyR = coercionKind co
+ , ASSERT( isForAllTy_ty tyL )
+ isForAllTy_ty tyR
+ , isReflCo (mkNthCo Nominal 0 co) -- See Note [collectBindersPushingCo]
+ = go_c (b:bs) e (mkInstCo co (mkNomReflCo (mkTyVarTy b)))
+
+ | isCoVar b
+ , let Pair tyL tyR = coercionKind co
+ , ASSERT( isForAllTy_co tyL )
+ isForAllTy_co tyR
+ , isReflCo (mkNthCo Nominal 0 co) -- See Note [collectBindersPushingCo]
+ , let cov = mkCoVarCo b
+ = go_c (b:bs) e (mkInstCo co (mkNomReflCo (mkCoercionTy cov)))
+
+ | isId b
+ , let Pair tyL tyR = coercionKind co
+ , ASSERT( isFunTy tyL) isFunTy tyR
+ , (co_mult, co_arg, co_res) <- decomposeFunCo Representational co
+ , isReflCo co_mult -- See Note [collectBindersPushingCo]
+ , isReflCo co_arg -- See Note [collectBindersPushingCo]
+ = go_c (b:bs) e co_res
+
+ | otherwise = (reverse bs, mkCast (Lam b e) co)
+{-
---------------
+Note [collectBindersPushingCo]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We just look for coercions of form
+ <type> # w -> blah
+(and similarly for foralls) to keep this function simple. We could do
+more elaborate stuff, but it'd involve substitution etc.
+
+-}
+
+{- *********************************************************************
+* *
+ Join points
+* *
+********************************************************************* -}
+-------------------
-- | Split an expression into the given number of binders and a body,
-- eta-expanding if necessary. Counts value *and* type binders.
etaExpandToJoinPoint :: JoinArity -> CoreExpr -> ([CoreBndr], CoreExpr)
@@ -1307,7 +1711,7 @@ etaBodyForJoinPoint need_args body
= (reverse rev_bs, e)
go n ty subst rev_bs e
| Just (tv, res_ty) <- splitForAllTy_maybe ty
- , let (subst', tv') = Type.substVarBndr subst tv
+ , let (subst', tv') = substVarBndr subst tv
= go (n-1) res_ty subst' (tv' : rev_bs) (e `App` varToCoreExpr tv')
| Just (mult, arg_ty, res_ty) <- splitFunTy_maybe ty
, let (subst', b) = freshEtaId n subst (Scaled mult arg_ty)
@@ -1318,6 +1722,8 @@ etaBodyForJoinPoint need_args body
init_subst e = mkEmptyTCvSubst (mkInScopeSet (exprFreeVars e))
+
+
--------------
freshEtaId :: Int -> TCvSubst -> Scaled Type -> (TCvSubst, Id)
-- Make a fresh Id, with specified type (after applying substitution)
@@ -1336,3 +1742,4 @@ freshEtaId n subst ty
-- "OrCoVar" since this can be used to eta-expand
-- coercion abstractions
subst' = extendTCvInScope subst eta_id'
+
diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs
index c3f2fc9f85..6d0712e634 100644
--- a/compiler/GHC/Core/Opt/Pipeline.hs
+++ b/compiler/GHC/Core/Opt/Pipeline.hs
@@ -315,33 +315,38 @@ getCoreToDo dflags
runWhen do_float_in CoreDoFloatInwards,
+ simplify "final", -- Final tidy-up
+
maybe_rule_check FinalPhase,
+ -------- After this we have -O2 passes -----------------
+ -- None of them run with -O
+
-- Case-liberation for -O2. This should be after
-- strictness analysis and the simplification which follows it.
- runWhen liberate_case (CoreDoPasses [
- CoreLiberateCase,
- simplify "post-liberate-case"
- ]), -- Run the simplifier after LiberateCase to vastly
- -- reduce the possibility of shadowing
- -- Reason: see Note [Shadowing] in GHC.Core.Opt.SpecConstr
+ runWhen liberate_case $ CoreDoPasses
+ [ CoreLiberateCase, simplify "post-liberate-case" ],
+ -- Run the simplifier after LiberateCase to vastly
+ -- reduce the possibility of shadowing
+ -- Reason: see Note [Shadowing] in GHC.Core.Opt.SpecConstr
- runWhen spec_constr CoreDoSpecConstr,
+ runWhen spec_constr $ CoreDoPasses
+ [ CoreDoSpecConstr, simplify "post-spec-constr"],
+ -- See Note [Simplify after SpecConstr]
maybe_rule_check FinalPhase,
- runWhen late_specialise
- (CoreDoPasses [ CoreDoSpecialising
- , simplify "post-late-spec"]),
+ runWhen late_specialise $ CoreDoPasses
+ [ CoreDoSpecialising, simplify "post-late-spec"],
-- LiberateCase can yield new CSE opportunities because it peels
-- off one layer of a recursive function (concretely, I saw this
-- in wheel-sieve1), and I'm guessing that SpecConstr can too
-- And CSE is a very cheap pass. So it seems worth doing here.
- runWhen ((liberate_case || spec_constr) && cse) CoreCSE,
+ runWhen ((liberate_case || spec_constr) && cse) $ CoreDoPasses
+ [ CoreCSE, simplify "post-final-cse" ],
- -- Final clean-up simplification:
- simplify "final",
+ --------- End of -O2 passes --------------
runWhen late_dmd_anal $ CoreDoPasses (
dmd_cpr_ww ++ [simplify "post-late-ww"]
@@ -410,6 +415,27 @@ or with -O0. Two reasons:
But watch out: list fusion can prevent floating. So use phase control
to switch off those rules until after floating.
+Note [Simplify after SpecConstr]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We want to run the simplifier after SpecConstr, and before late-Specialise,
+for two reasons, both shown up in test perf/compiler/T16473,
+with -O2 -flate-specialise
+
+1. I found that running late-Specialise after SpecConstr, with no
+ simplification in between meant that the carefullly constructed
+ SpecConstr rule never got to fire. (It was something like
+ lvl = f a -- Arity 1
+ ....g lvl....
+ SpecConstr specialised g for argument lvl; but Specialise then
+ specialised lvl = f a to lvl = $sf, and inlined. Or something like
+ that.)
+
+2. Specialise relies on unfoldings being available for top-level dictionary
+ bindings; but SpecConstr kills them all! The Simplifer restores them.
+
+This extra run of the simplifier has a cost, but this is only with -O2.
+
+
************************************************************************
* *
The CoreToDo interpreter
diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs
index e219a0dba9..6c207766bd 100644
--- a/compiler/GHC/Core/Opt/Simplify.hs
+++ b/compiler/GHC/Core/Opt/Simplify.hs
@@ -51,9 +51,9 @@ import GHC.Core.Unfold
import GHC.Core.Unfold.Make
import GHC.Core.Utils
import GHC.Core.Opt.Arity ( ArityType(..), arityTypeArity, isBotArityType
+ , pushCoTyArg, pushCoValArg
, idArityType, etaExpandAT )
-import GHC.Core.SimpleOpt ( pushCoTyArg, pushCoValArg
- , joinPointBinding_maybe, joinPointBindings_maybe )
+import GHC.Core.SimpleOpt ( joinPointBinding_maybe, joinPointBindings_maybe )
import GHC.Core.FVs ( mkRuleInfo )
import GHC.Core.Rules ( lookupRule, getRules, initRuleOpts )
import GHC.Types.Basic
@@ -318,7 +318,7 @@ simplLazyBind env top_lvl is_rec bndr bndr1 rhs rhs_se
; let rhs_cont = mkRhsStop (substTy body_env (exprType body))
; (body_floats0, body0) <- {-#SCC "simplExprF" #-} simplExprF body_env body rhs_cont
- -- Never float join-floats out of a non-join let-binding
+ -- Never float join-floats out of a non-join let-binding (which this is)
-- So wrap the body in the join-floats right now
-- Hence: body_floats1 consists only of let-floats
; let (body_floats1, body1) = wrapJoinFloatsX body_floats0 body0
@@ -1414,25 +1414,23 @@ simplCast env body co0 cont0
-- type of the hole changes (#16312)
-- (f |> co) e ===> (f (e |> co1)) |> co2
- -- where co :: (s1->s2) ~ (t1~t2)
+ -- where co :: (s1->s2) ~ (t1->t2)
-- co1 :: t1 ~ s1
-- co2 :: s2 ~ t2
addCoerce co cont@(ApplyToVal { sc_arg = arg, sc_env = arg_se
, sc_dup = dup, sc_cont = tail })
- | Just (co1, m_co2) <- pushCoValArg co
- , let new_ty = coercionRKind co1
- , not (isTypeLevPoly new_ty) -- Without this check, we get a lev-poly arg
- -- See Note [Levity polymorphism invariants] in GHC.Core
- -- test: typecheck/should_run/EtaExpandLevPoly
+ | Just (m_co1, m_co2) <- pushCoValArg co
+ , levity_ok m_co1
= {-#SCC "addCoerce-pushCoValArg" #-}
do { tail' <- addCoerceM m_co2 tail
- ; if isReflCo co1
- then return (cont { sc_cont = tail'
- , sc_hole_ty = coercionLKind co })
+ ; case m_co1 of {
+ MRefl -> return (cont { sc_cont = tail'
+ , sc_hole_ty = coercionLKind co }) ;
-- Avoid simplifying if possible;
-- See Note [Avoiding exponential behaviour]
- else do
- { (dup', arg_se', arg') <- simplArg env dup arg_se arg
+
+ MCo co1 ->
+ do { (dup', arg_se', arg') <- simplArg env dup arg_se arg
-- When we build the ApplyTo we can't mix the OutCoercion
-- 'co' with the InExpr 'arg', so we simplify
-- to make it all consistent. It's a bit messy.
@@ -1442,7 +1440,7 @@ simplCast env body co0 cont0
, sc_env = arg_se'
, sc_dup = dup'
, sc_cont = tail'
- , sc_hole_ty = coercionLKind co }) } }
+ , sc_hole_ty = coercionLKind co }) } } }
addCoerce co cont
| isReflexiveCo co = return cont -- Having this at the end makes a huge
@@ -1450,6 +1448,13 @@ simplCast env body co0 cont0
-- See Note [Optimising reflexivity]
| otherwise = return (CastIt co cont)
+ levity_ok :: MCoercionR -> Bool
+ levity_ok MRefl = True
+ levity_ok (MCo co) = not $ isTypeLevPoly $ coercionRKind co
+ -- Without this check, we get a lev-poly arg
+ -- See Note [Levity polymorphism invariants] in GHC.Core
+ -- test: typecheck/should_run/EtaExpandLevPoly
+
simplArg :: SimplEnv -> DupFlag -> StaticEnv -> CoreExpr
-> SimplM (DupFlag, StaticEnv, OutExpr)
simplArg env dup_flag arg_env arg
@@ -3114,7 +3119,7 @@ knownCon :: SimplEnv
knownCon env scrut dc_floats dc dc_ty_args dc_args bndr bs rhs cont
= do { (floats1, env1) <- bind_args env bs dc_args
- ; (floats2, env2) <- bind_case_bndr env1
+ ; (floats2, env2) <- bind_case_bndr env1
; (floats3, expr') <- simplExprF env2 rhs cont
; case dc_floats of
[] ->
@@ -3240,6 +3245,7 @@ altsWouldDup [_] = False
altsWouldDup (alt:alts)
| is_bot_alt alt = altsWouldDup alts
| otherwise = not (all is_bot_alt alts)
+ -- otherwise case: first alt is non-bot, so all the rest must be bot
where
is_bot_alt (_,_,rhs) = exprIsDeadEnd rhs
diff --git a/compiler/GHC/Core/Opt/Simplify/Env.hs b/compiler/GHC/Core/Opt/Simplify/Env.hs
index 4ceaf637ed..7049e3e578 100644
--- a/compiler/GHC/Core/Opt/Simplify/Env.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Env.hs
@@ -596,7 +596,7 @@ addJoinFlts :: JoinFloats -> JoinFloats -> JoinFloats
addJoinFlts = appOL
mkRecFloats :: SimplFloats -> SimplFloats
--- Flattens the floats from env2 into a single Rec group,
+-- Flattens the floats into a single Rec group,
-- They must either all be lifted LetFloats or all JoinFloats
mkRecFloats floats@(SimplFloats { sfLetFloats = LetFloats bs ff
, sfJoinFloats = jbs
diff --git a/compiler/GHC/Core/Opt/Specialise.hs b/compiler/GHC/Core/Opt/Specialise.hs
index 8e9e35d208..ba5679778b 100644
--- a/compiler/GHC/Core/Opt/Specialise.hs
+++ b/compiler/GHC/Core/Opt/Specialise.hs
@@ -21,22 +21,21 @@ import GHC.Tc.Utils.TcType hiding( substTy )
import GHC.Core.Type hiding( substTy, extendTvSubstList )
import GHC.Core.Multiplicity
import GHC.Core.Predicate
-import GHC.Unit.Module( Module, HasModule(..) )
+import GHC.Unit.Module( Module )
import GHC.Core.Coercion( Coercion )
import GHC.Core.Opt.Monad
import qualified GHC.Core.Subst as Core
-import GHC.Core.Unfold
import GHC.Core.Unfold.Make
import GHC.Types.Var ( isLocalVar )
import GHC.Types.Var.Set
import GHC.Types.Var.Env
import GHC.Core
import GHC.Core.Rules
-import GHC.Core.SimpleOpt ( collectBindersPushingCo )
import GHC.Core.Utils ( exprIsTrivial, getIdFromTrivialExpr_maybe
, mkCast, exprType )
import GHC.Core.FVs
-import GHC.Core.Opt.Arity ( etaExpandToJoinPointRule )
+import GHC.Core.Opt.Arity ( collectBindersPushingCo
+ , etaExpandToJoinPointRule )
import GHC.Types.Unique.Supply
import GHC.Types.Name
import GHC.Types.Id.Make ( voidArgId, voidPrimId )
@@ -53,12 +52,9 @@ import GHC.Utils.Misc
import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Data.FastString
-import GHC.Utils.Monad.State
import GHC.Types.Unique.DFM
import GHC.Core.TyCo.Rep (TyCoBinder (..))
-import Control.Monad
-
{-
************************************************************************
* *
@@ -592,28 +588,29 @@ specProgram guts@(ModGuts { mg_module = this_mod
, mg_binds = binds })
= do { dflags <- getDynFlags
+ -- We need to start with a Subst that knows all the things
+ -- that are in scope, so that the substitution engine doesn't
+ -- accidentally re-use a unique that's already in use
+ -- Easiest thing is to do it all at once, as if all the top-level
+ -- decls were mutually recursive
+ ; let top_env = SE { se_subst = Core.mkEmptySubst $ mkInScopeSet $ mkVarSet $
+ bindersOfBinds binds
+ , se_interesting = emptyVarSet
+ , se_module = this_mod
+ , se_dflags = dflags }
+
+ go [] = return ([], emptyUDs)
+ go (bind:binds) = do (binds', uds) <- go binds
+ (bind', uds') <- specBind top_env bind uds
+ return (bind' ++ binds', uds')
+
-- Specialise the bindings of this module
- ; (binds', uds) <- runSpecM dflags this_mod (go binds)
+ ; (binds', uds) <- runSpecM (go binds)
- ; (spec_rules, spec_binds) <- specImports dflags this_mod top_env
- local_rules uds
+ ; (spec_rules, spec_binds) <- specImports top_env local_rules uds
; return (guts { mg_binds = spec_binds ++ binds'
, mg_rules = spec_rules ++ local_rules }) }
- where
- -- We need to start with a Subst that knows all the things
- -- that are in scope, so that the substitution engine doesn't
- -- accidentally re-use a unique that's already in use
- -- Easiest thing is to do it all at once, as if all the top-level
- -- decls were mutually recursive
- top_env = SE { se_subst = Core.mkEmptySubst $ mkInScopeSet $ mkVarSet $
- bindersOfBinds binds
- , se_interesting = emptyVarSet }
-
- go [] = return ([], emptyUDs)
- go (bind:binds) = do (binds', uds) <- go binds
- (bind', uds') <- specBind top_env bind uds
- return (bind' ++ binds', uds')
{-
Note [Wrap bindings returned by specImports]
@@ -643,13 +640,13 @@ See #10491
* *
********************************************************************* -}
-specImports :: DynFlags -> Module -> SpecEnv
+specImports :: SpecEnv
-> [CoreRule]
-> UsageDetails
-> CoreM ([CoreRule], [CoreBind])
-specImports dflags this_mod top_env local_rules
+specImports top_env local_rules
(MkUD { ud_binds = dict_binds, ud_calls = calls })
- | not $ gopt Opt_CrossModuleSpecialise dflags
+ | not $ gopt Opt_CrossModuleSpecialise (se_dflags top_env)
-- See Note [Disabling cross-module specialisation]
= return ([], wrapDictBinds dict_binds [])
@@ -657,8 +654,7 @@ specImports dflags this_mod top_env local_rules
= do { hpt_rules <- getRuleBase
; let rule_base = extendRuleBaseList hpt_rules local_rules
- ; (spec_rules, spec_binds) <- spec_imports dflags this_mod top_env
- [] rule_base
+ ; (spec_rules, spec_binds) <- spec_imports top_env [] rule_base
dict_binds calls
-- Don't forget to wrap the specialized bindings with
@@ -674,9 +670,7 @@ specImports dflags this_mod top_env local_rules
}
-- | Specialise a set of calls to imported bindings
-spec_imports :: DynFlags
- -> Module
- -> SpecEnv -- Passed in so that all top-level Ids are in scope
+spec_imports :: SpecEnv -- Passed in so that all top-level Ids are in scope
-> [Id] -- Stack of imported functions being specialised
-- See Note [specImport call stack]
-> RuleBase -- Rules from this module and the home package
@@ -686,8 +680,7 @@ spec_imports :: DynFlags
-> CallDetails -- Calls for imported things
-> CoreM ( [CoreRule] -- New rules
, [CoreBind] ) -- Specialised bindings
-spec_imports dflags this_mod top_env
- callers rule_base dict_binds calls
+spec_imports top_env callers rule_base dict_binds calls
= do { let import_calls = dVarEnvElts calls
-- ; debugTraceMsg (text "specImports {" <+>
-- vcat [ text "calls:" <+> ppr import_calls
@@ -701,16 +694,13 @@ spec_imports dflags this_mod top_env
go _ [] = return ([], [])
go rb (cis : other_calls)
= do { -- debugTraceMsg (text "specImport {" <+> ppr cis)
- ; (rules1, spec_binds1) <- spec_import dflags this_mod top_env
- callers rb dict_binds cis
+ ; (rules1, spec_binds1) <- spec_import top_env callers rb dict_binds cis
-- ; debugTraceMsg (text "specImport }" <+> ppr cis)
; (rules2, spec_binds2) <- go (extendRuleBaseList rb rules1) other_calls
; return (rules1 ++ rules2, spec_binds1 ++ spec_binds2) }
-spec_import :: DynFlags
- -> Module
- -> SpecEnv -- Passed in so that all top-level Ids are in scope
+spec_import :: SpecEnv -- Passed in so that all top-level Ids are in scope
-> [Id] -- Stack of imported functions being specialised
-- See Note [specImport call stack]
-> RuleBase -- Rules from this module
@@ -719,8 +709,7 @@ spec_import :: DynFlags
-> CallInfoSet -- Imported function and calls for it
-> CoreM ( [CoreRule] -- New rules
, [CoreBind] ) -- Specialised bindings
-spec_import dflags this_mod top_env callers
- rb dict_binds cis@(CIS fn _)
+spec_import top_env callers rb dict_binds cis@(CIS fn _)
| isIn "specImport" fn callers
= return ([], []) -- No warning. This actually happens all the time
-- when specialising a recursive function, because
@@ -731,8 +720,7 @@ spec_import dflags this_mod top_env callers
= do { -- debugTraceMsg (text "specImport:no valid calls")
; return ([], []) }
- | wantSpecImport dflags unfolding
- , Just rhs <- maybeUnfoldingTemplate unfolding
+ | Just rhs <- canSpecImport dflags fn
= do { -- Get rules from the external package state
-- We keep doing this in case we "page-fault in"
-- more rules as we go along
@@ -744,8 +732,8 @@ spec_import dflags this_mod top_env callers
; (rules1, spec_pairs, MkUD { ud_binds = dict_binds1, ud_calls = new_calls })
<- do { -- debugTraceMsg (text "specImport1" <+> vcat [ppr fn, ppr good_calls, ppr rhs])
- ; runSpecM dflags this_mod $
- specCalls (Just this_mod) top_env rules_for_fn good_calls fn rhs }
+ ; runSpecM $
+ specCalls True top_env rules_for_fn good_calls fn rhs }
; let spec_binds1 = [NonRec b r | (b,r) <- spec_pairs]
-- After the rules kick in we may get recursion, but
-- we rely on a global GlomBinds to sort that out later
@@ -753,7 +741,7 @@ spec_import dflags this_mod top_env callers
-- Now specialise any cascaded calls
-- ; debugTraceMsg (text "specImport 2" <+> (ppr fn $$ ppr rules1 $$ ppr spec_binds1))
- ; (rules2, spec_binds2) <- spec_imports dflags this_mod top_env
+ ; (rules2, spec_binds2) <- spec_imports top_env
(fn:callers)
(extendRuleBaseList rb rules1)
(dict_binds `unionBags` dict_binds1)
@@ -769,11 +757,34 @@ spec_import dflags this_mod top_env callers
; return ([], [])}
where
- unfolding = realIdUnfolding fn -- We want to see the unfolding even for loop breakers
+ dflags = se_dflags top_env
good_calls = filterCalls cis dict_binds
-- SUPER IMPORTANT! Drop calls that (directly or indirectly) refer to fn
-- See Note [Avoiding loops in specImports]
+canSpecImport :: DynFlags -> Id -> Maybe CoreExpr
+-- See Note [Specialise imported INLINABLE things]
+canSpecImport dflags fn
+ | CoreUnfolding { uf_src = src, uf_tmpl = rhs } <- unf
+ , isStableSource src
+ = Just rhs -- By default, specialise only imported things that have a stable
+ -- unfolding; that is, have an INLINE or INLINABLE pragma
+ -- Specialise even INLINE things; it hasn't inlined yet,
+ -- so perhaps it never will. Moreover it may have calls
+ -- inside it that we want to specialise
+
+ -- CoreUnfolding case does /not/ include DFunUnfoldings;
+ -- We only specialise DFunUnfoldings with -fspecialise-aggressively
+ -- See Note [Do not specialise imported DFuns]
+
+ | gopt Opt_SpecialiseAggressively dflags
+ = maybeUnfoldingTemplate unf -- With -fspecialise-aggressively, specialise anything
+ -- with an unfolding, stable or not, DFun or not
+
+ | otherwise = Nothing
+ where
+ unf = realIdUnfolding fn -- We want to see the unfolding even for loop breakers
+
-- | Returns whether or not to show a missed-spec warning.
-- If -Wall-missed-specializations is on, show the warning.
-- Otherwise, if -Wmissed-specializations is on, only show a warning
@@ -798,24 +809,47 @@ tryWarnMissingSpecs dflags callers fn calls_for_fn
, whenPprDebug (text "calls:" <+> vcat (map (pprCallInfo fn) calls_for_fn))
, text "Probable fix: add INLINABLE pragma on" <+> quotes (ppr fn) ])
-wantSpecImport :: DynFlags -> Unfolding -> Bool
--- See Note [Specialise imported INLINABLE things]
-wantSpecImport dflags unf
- = case unf of
- NoUnfolding -> False
- BootUnfolding -> False
- OtherCon {} -> False
- DFunUnfolding {} -> True
- CoreUnfolding { uf_src = src, uf_guidance = _guidance }
- | gopt Opt_SpecialiseAggressively dflags -> True
- | isStableSource src -> True
- -- Specialise even INLINE things; it hasn't inlined yet,
- -- so perhaps it never will. Moreover it may have calls
- -- inside it that we want to specialise
- | otherwise -> False -- Stable, not INLINE, hence INLINABLE
-{- Note [Avoiding loops in specImports]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+{- Note [Do not specialise imported DFuns]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Ticket #18223 shows that specialising calls of DFuns is can cause a huge
+and entirely unnecessary blowup in program size. Consider a call to
+ f @[[[[[[[[T]]]]]]]] d1 x
+where df :: C a => C [a]
+ d1 :: C [[[[[[[[T]]]]]]]] = dfC[] @[[[[[[[T]]]]]]] d1
+ d2 :: C [[[[[[[T]]]]]]] = dfC[] @[[[[[[T]]]]]] d3
+ ...
+Now we'll specialise f's RHS, which may give rise to calls to 'g',
+also overloaded, which we will specialise, and so on. However, if
+we specialise the calls to dfC[], we'll generate specialised copies of
+all methods of C, at all types; and the same for C's superclasses.
+
+And many of these specialised functions will never be called. We are
+going to call the specialised 'f', and the specialised 'g', but DFuns
+group functions into a tuple, many of whose elements may never be used.
+
+With deeply-nested types this can lead to a simply overwhelming number
+of specialisations: see #18223 for a simple example (from the wild).
+I measured the number of specialisations for various numbers of calls
+of `flip evalStateT ()`, and got this
+
+ Size after one simplification
+ #calls #SPEC rules Terms Types
+ 5 56 3100 10600
+ 9 108 13660 77206
+
+The real tests case has 60+ calls, which blew GHC out of the water.
+
+Solution: don't specialise DFuns. The downside is that if we end
+up with (h (dfun d)), /and/ we don't specialise 'h', then we won't
+pass to 'h' a tuple of specialised functions.
+
+However, the flag -fspecialise-aggressively (experimental, off by default)
+allows DFuns to specialise as well.
+
+Note [Avoiding loops in specImports]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We must take great care when specialising instance declarations
(functions like $fOrdList) lest we accidentally build a recursive
dictionary. See Note [Avoiding loops].
@@ -1003,6 +1037,9 @@ data SpecEnv
-- Dict Ids that we know something about
-- and hence may be worth specialising against
-- See Note [Interesting dictionary arguments]
+
+ , se_module :: Module
+ , se_dflags :: DynFlags
}
instance Outputable SpecEnv where
@@ -1310,7 +1347,7 @@ specDefn :: SpecEnv
specDefn env body_uds fn rhs
= do { let (body_uds_without_me, calls_for_me) = callsForMe fn body_uds
rules_for_me = idCoreRules fn
- ; (rules, spec_defns, spec_uds) <- specCalls Nothing env rules_for_me
+ ; (rules, spec_defns, spec_uds) <- specCalls False env rules_for_me
calls_for_me fn rhs
; return ( fn `addIdSpecialisations` rules
, spec_defns
@@ -1323,8 +1360,8 @@ specDefn env body_uds fn rhs
-- body_uds_without_me
---------------------------
-specCalls :: Maybe Module -- Just this_mod => specialising imported fn
- -- Nothing => specialising local fn
+specCalls :: Bool -- True => specialising imported fn
+ -- False => specialising local fn
-> SpecEnv
-> [CoreRule] -- Existing RULES for the fn
-> [CallInfo]
@@ -1339,7 +1376,7 @@ type SpecInfo = ( [CoreRule] -- Specialisation rules
, [(Id,CoreExpr)] -- Specialised definition
, UsageDetails ) -- Usage details from specialised RHSs
-specCalls mb_mod env existing_rules calls_for_me fn rhs
+specCalls spec_imp env existing_rules calls_for_me fn rhs
-- The first case is the interesting one
| notNull calls_for_me -- And there are some calls to specialise
&& not (isNeverActive (idInlineActivation fn))
@@ -1370,7 +1407,9 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs
inl_act = inlinePragmaActivation inl_prag
is_local = isLocalId fn
is_dfun = isDFunId fn
-
+ dflags = se_dflags env
+ ropts = initRuleOpts dflags
+ this_mod = se_module env
-- Figure out whether the function has an INLINE pragma
-- See Note [Inline specialisations]
@@ -1412,8 +1451,6 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs
-- , ppr dx_binds ]) $
-- return ()
- ; dflags <- getDynFlags
- ; let ropts = initRuleOpts dflags
; if not useful -- No useful specialisation
|| already_covered ropts rules_acc rule_lhs_args
then return spec_acc
@@ -1441,17 +1478,15 @@ specCalls mb_mod env existing_rules calls_for_me fn rhs
= Nothing
; spec_fn <- newSpecIdSM fn spec_fn_ty spec_join_arity
- ; this_mod <- getModule
; let
-- The rule to put in the function's specialisation is:
-- forall x @b d1' d2'.
-- f x @T1 @b @T2 d1' d2' = f1 x @b
-- See Note [Specialising Calls]
- herald = case mb_mod of
- Nothing -- Specialising local fn
- -> text "SPEC"
- Just this_mod -- Specialising imported fn
- -> text "SPEC/" <> ppr this_mod
+ herald | spec_imp = -- Specialising imported fn
+ text "SPEC/" <> ppr this_mod
+ | otherwise = -- Specialising local fn
+ text "SPEC"
rule_name = mkFastString $ showSDoc dflags $
herald <+> ftext (occNameFS (getOccName fn))
@@ -2480,15 +2515,15 @@ mkCallUDs env f args
res = mkCallUDs' env f args
mkCallUDs' env f args
- | not (want_calls_for f) -- Imported from elsewhere
- || null ci_key -- No useful specialisation
- -- See also Note [Specialisations already covered]
+ | wantCallsFor env f -- We want it, and...
+ , not (null ci_key) -- this call site has a useful specialisation
+ = -- pprTrace "mkCallUDs: keeping" _trace_doc
+ singleCall f ci_key
+
+ | otherwise -- See also Note [Specialisations already covered]
= -- pprTrace "mkCallUDs: discarding" _trace_doc
emptyUDs
- | otherwise
- = -- pprTrace "mkCallUDs: keeping" _trace_doc
- singleCall f ci_key
where
_trace_doc = vcat [ppr f, ppr args, ppr ci_key]
pis = fst $ splitPiTys $ idType f
@@ -2525,12 +2560,23 @@ mkCallUDs' env f args
mk_spec_arg _ (Anon VisArg _)
= UnspecArg
- want_calls_for f = isLocalId f || isJust (maybeUnfoldingTemplate (realIdUnfolding f))
- -- For imported things, we gather call instances if
- -- there is an unfolding that we could in principle specialise
- -- We might still decide not to use it (consulting dflags)
- -- in specImports
- -- Use 'realIdUnfolding' to ignore the loop-breaker flag!
+wantCallsFor :: SpecEnv -> Id -> Bool
+wantCallsFor _env _f = True
+ -- We could reduce the size of the UsageDetails by being less eager
+ -- about collecting calls for LocalIds: there is no point for
+ -- ones that are lambda-bound. We can't decide this by looking at
+ -- the (absence of an) unfolding, because unfoldings for local
+ -- functions are discarded by cloneBindSM, so no local binder will
+ -- have an unfolding at this stage. We'd have to keep a candidate
+ -- set of let-binders.
+ --
+ -- Not many lambda-bound variables have dictionary arguments, so
+ -- this would make little difference anyway.
+ --
+ -- For imported Ids we could check for an unfolding, but we have to
+ -- do so anyway in canSpecImport, and it seems better to have it
+ -- all in one place. So we simply collect usage info for imported
+ -- overloaded functions.
{- Note [Type determines value]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2780,55 +2826,12 @@ deleteCallsFor bs calls = delDVarEnvList calls bs
************************************************************************
-}
-newtype SpecM a = SpecM (State SpecState a) deriving (Functor)
-
-data SpecState = SpecState {
- spec_uniq_supply :: UniqSupply,
- spec_module :: Module,
- spec_dflags :: DynFlags
- }
-
-instance Applicative SpecM where
- pure x = SpecM $ return x
- (<*>) = ap
-
-instance Monad SpecM where
- SpecM x >>= f = SpecM $ do y <- x
- case f y of
- SpecM z ->
- z
-
-instance MonadFail SpecM where
- fail str = SpecM $ error str
-
-instance MonadUnique SpecM where
- getUniqueSupplyM
- = SpecM $ do st <- get
- let (us1, us2) = splitUniqSupply $ spec_uniq_supply st
- put $ st { spec_uniq_supply = us2 }
- return us1
-
- getUniqueM
- = SpecM $ do st <- get
- let (u,us') = takeUniqFromSupply $ spec_uniq_supply st
- put $ st { spec_uniq_supply = us' }
- return u
-
-instance HasDynFlags SpecM where
- getDynFlags = SpecM $ liftM spec_dflags get
-
-instance HasModule SpecM where
- getModule = SpecM $ liftM spec_module get
-
-runSpecM :: DynFlags -> Module -> SpecM a -> CoreM a
-runSpecM dflags this_mod (SpecM spec)
- = do us <- getUniqueSupplyM
- let initialState = SpecState {
- spec_uniq_supply = us,
- spec_module = this_mod,
- spec_dflags = dflags
- }
- return $ evalState spec initialState
+type SpecM a = UniqSM a
+
+runSpecM :: SpecM a -> CoreM a
+runSpecM thing_inside
+ = do { us <- getUniqueSupplyM
+ ; return (initUs_ us thing_inside) }
mapAndCombineSM :: (a -> SpecM (b, UsageDetails)) -> [a] -> SpecM ([b], UsageDetails)
mapAndCombineSM _ [] = return ([], emptyUDs)