diff options
author | Simon Peyton Jones <simonpj@microsoft.com> | 2014-05-08 11:21:16 +0100 |
---|---|---|
committer | Simon Peyton Jones <simonpj@microsoft.com> | 2014-08-28 11:14:05 +0100 |
commit | 6e0f6ededff6018a88dd390590a09f79842ccfa5 (patch) | |
tree | c2d4f46cfdcf8b236d9ac751c48f0b0ccced7503 /compiler | |
parent | e9cd1d5e9d6f0e019d6433a3c7dd9585b3f7ae6b (diff) | |
download | haskell-6e0f6ededff6018a88dd390590a09f79842ccfa5.tar.gz |
Refactor unfoldings
There are two main refactorings here
1. Move the uf_arity field
out of CoreUnfolding
into UnfWhen
It's a lot tidier there. If I've got this right, no behaviour
should change.
2. Define specUnfolding and use it in DsBinds and Specialise
a) commons-up some shared code
b) makes sure that Specialise correctly specialises DFun
unfoldings (which it didn't before)
The two got put together because both ended up interacting in the
specialiser.
They cause zero difference to nofib.
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/coreSyn/CoreSubst.lhs | 9 | ||||
-rw-r--r-- | compiler/coreSyn/CoreSyn.lhs | 13 | ||||
-rw-r--r-- | compiler/coreSyn/CoreUnfold.lhs | 327 | ||||
-rw-r--r-- | compiler/coreSyn/PprCore.lhs | 8 | ||||
-rw-r--r-- | compiler/deSugar/DsBinds.lhs | 24 | ||||
-rw-r--r-- | compiler/iface/MkIface.lhs | 10 | ||||
-rw-r--r-- | compiler/iface/TcIface.lhs | 6 | ||||
-rw-r--r-- | compiler/simplCore/Simplify.lhs | 10 | ||||
-rw-r--r-- | compiler/specialise/Specialise.lhs | 99 |
9 files changed, 294 insertions, 212 deletions
diff --git a/compiler/coreSyn/CoreSubst.lhs b/compiler/coreSyn/CoreSubst.lhs index 2544c45117..1951252271 100644 --- a/compiler/coreSyn/CoreSubst.lhs +++ b/compiler/coreSyn/CoreSubst.lhs @@ -1179,8 +1179,8 @@ exprIsConApp_maybe (in_scope, id_unf) expr -- and that is the business of callSiteInline. -- In practice, without this test, most of the "hits" were -- CPR'd workers getting inlined back into their wrappers, - | Just rhs <- expandUnfolding_maybe unfolding - , unfoldingArity unfolding == 0 + | idArity fun == 0 + , Just rhs <- expandUnfolding_maybe unfolding , let in_scope' = extendInScopeSetSet in_scope (exprFreeVars rhs) = go (Left in_scope') rhs cont where @@ -1327,10 +1327,9 @@ exprIsLambda_maybe (in_scope_set, id_unf) (Cast casted_e co) -- Another attempt: See if we find a partial unfolding exprIsLambda_maybe (in_scope_set, id_unf) e | (Var f, as) <- collectArgs e - , let unfolding = id_unf f - , Just rhs <- expandUnfolding_maybe unfolding + , idArity f > length (filter isValArg as) -- Make sure there is hope to get a lambda - , unfoldingArity unfolding > length (filter isValArg as) + , Just rhs <- expandUnfolding_maybe (id_unf f) -- Optimize, for beta-reduction , let e' = simpleOptExprWith (mkEmptySubst in_scope_set) (rhs `mkApps` as) -- Recurse, because of possible casts diff --git a/compiler/coreSyn/CoreSyn.lhs b/compiler/coreSyn/CoreSyn.lhs index 12a60daddd..d107c900fb 100644 --- a/compiler/coreSyn/CoreSyn.lhs +++ b/compiler/coreSyn/CoreSyn.lhs @@ -55,7 +55,7 @@ module CoreSyn ( -- ** Predicates and deconstruction on 'Unfolding' unfoldingTemplate, setUnfoldingTemplate, expandUnfolding_maybe, - maybeUnfoldingTemplate, otherCons, unfoldingArity, + maybeUnfoldingTemplate, otherCons, isValueUnfolding, isEvaldUnfolding, isCheapUnfolding, isExpandableUnfolding, isConLikeUnfolding, isCompulsoryUnfolding, isStableUnfolding, isStableCoreUnfolding_maybe, @@ -686,7 +686,6 @@ data Unfolding uf_tmpl :: CoreExpr, -- Template; occurrence info is correct uf_src :: UnfoldingSource, -- Where the unfolding came from uf_is_top :: Bool, -- True <=> top level binding - uf_arity :: Arity, -- Number of value arguments expected uf_is_value :: Bool, -- exprIsHNF template (cached); it is ok to discard -- a `seq` on this variable uf_is_conlike :: Bool, -- True <=> applicn of constructor or CONLIKE function @@ -752,6 +751,8 @@ data UnfoldingGuidance -- Used (a) for small *and* cheap unfoldings -- (b) for INLINE functions -- See Note [INLINE for small functions] in CoreUnfold + ug_arity :: Arity, -- Number of value arguments expected + ug_unsat_ok :: Bool, -- True <=> ok to inline even if unsaturated ug_boring_ok :: Bool -- True <=> ok to inline even if the context is boring -- So True,True means "always" @@ -846,8 +847,8 @@ seqUnfolding :: Unfolding -> () seqUnfolding (CoreUnfolding { uf_tmpl = e, uf_is_top = top, uf_is_value = b1, uf_is_work_free = b2, uf_expandable = b3, uf_is_conlike = b4, - uf_arity = a, uf_guidance = g}) - = seqExpr e `seq` top `seq` b1 `seq` a `seq` b2 `seq` b3 `seq` b4 `seq` seqGuidance g + uf_guidance = g}) + = seqExpr e `seq` top `seq` b1 `seq` b2 `seq` b3 `seq` b4 `seq` seqGuidance g seqUnfolding _ = () @@ -936,10 +937,6 @@ isStableUnfolding (CoreUnfolding { uf_src = src }) = isStableSource src isStableUnfolding (DFunUnfolding {}) = True isStableUnfolding _ = False -unfoldingArity :: Unfolding -> Arity -unfoldingArity (CoreUnfolding { uf_arity = arity }) = arity -unfoldingArity _ = panic "unfoldingArity" - isClosedUnfolding :: Unfolding -> Bool -- No free variables isClosedUnfolding (CoreUnfolding {}) = False isClosedUnfolding (DFunUnfolding {}) = False diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs index fa9259a005..e1d06ad37c 100644 --- a/compiler/coreSyn/CoreUnfold.lhs +++ b/compiler/coreSyn/CoreUnfold.lhs @@ -31,6 +31,7 @@ module CoreUnfold ( mkTopUnfolding, mkSimpleUnfolding, mkInlineUnfolding, mkInlinableUnfolding, mkWwInlineRule, mkCompulsoryUnfolding, mkDFunUnfolding, + specUnfolding, interestingArg, ArgSummary(..), @@ -108,27 +109,31 @@ mkDFunUnfolding bndrs con ops mkWwInlineRule :: CoreExpr -> Arity -> Unfolding mkWwInlineRule expr arity = mkCoreUnfolding InlineStable True - (simpleOptExpr expr) arity - (UnfWhen unSaturatedOk boringCxtNotOk) + (simpleOptExpr expr) + (UnfWhen { ug_arity = arity, ug_unsat_ok = unSaturatedOk + , ug_boring_ok = boringCxtNotOk }) mkCompulsoryUnfolding :: CoreExpr -> Unfolding mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded = mkCoreUnfolding InlineCompulsory True - (simpleOptExpr expr) 0 -- Arity of unfolding doesn't matter - (UnfWhen unSaturatedOk boringCxtOk) + (simpleOptExpr expr) + (UnfWhen { ug_arity = 0 -- Arity of unfolding doesn't matter + , ug_unsat_ok = unSaturatedOk, ug_boring_ok = boringCxtOk }) mkInlineUnfolding :: Maybe Arity -> CoreExpr -> Unfolding -mkInlineUnfolding mb_arity expr +mkInlineUnfolding mb_arity expr = mkCoreUnfolding InlineStable True -- Note [Top-level flag on inline rules] - expr' arity - (UnfWhen unsat_ok boring_ok) + expr' guide where expr' = simpleOptExpr expr - (unsat_ok, arity) = case mb_arity of - Nothing -> (unSaturatedOk, manifestArity expr') - Just ar -> (needSaturated, ar) - + guide = case mb_arity of + Nothing -> UnfWhen { ug_arity = manifestArity expr' + , ug_unsat_ok = unSaturatedOk + , ug_boring_ok = boring_ok } + Just arity -> UnfWhen { ug_arity = arity + , ug_unsat_ok = needSaturated + , ug_boring_ok = boring_ok } boring_ok = inlineBoringOk expr' mkInlinableUnfolding :: DynFlags -> CoreExpr -> Unfolding @@ -137,19 +142,81 @@ mkInlinableUnfolding dflags expr where expr' = simpleOptExpr expr is_bot = isJust (exprBotStrictness_maybe expr') + +specUnfolding :: DynFlags -> Subst -> [Var] -> [CoreExpr] -> Unfolding -> Unfolding +-- See Note [Specialising unfoldings] +specUnfolding _ subst new_bndrs spec_args + df@(DFunUnfolding { df_bndrs = bndrs, df_con = con , df_args = args }) + = ASSERT2( length bndrs >= length spec_args, ppr df $$ ppr spec_args $$ ppr new_bndrs ) + mkDFunUnfolding (new_bndrs ++ extra_bndrs) con + (map (substExpr spec_doc subst2) args) + where + subst1 = extendSubstList subst (bndrs `zip` spec_args) + (subst2, extra_bndrs) = substBndrs subst1 (dropList spec_args bndrs) + +specUnfolding _dflags subst new_bndrs spec_args + (CoreUnfolding { uf_src = src, uf_tmpl = tmpl + , uf_is_top = top_lvl + , uf_guidance = old_guidance }) + | isStableSource src -- See Note [Specialising unfoldings] + , UnfWhen { ug_arity = old_arity + , ug_unsat_ok = unsat_ok + , ug_boring_ok = boring_ok } <- old_guidance + = let guidance = UnfWhen { ug_arity = old_arity - count isValArg spec_args + + count isId new_bndrs + , ug_unsat_ok = unsat_ok + , ug_boring_ok = boring_ok } + new_tmpl = simpleOptExpr $ mkLams new_bndrs $ + mkApps (substExpr spec_doc subst tmpl) spec_args + -- The beta-redexes created here will be simplified + -- away by simplOptExpr in mkUnfolding + + in mkCoreUnfolding src top_lvl new_tmpl guidance + +specUnfolding _ _ _ _ _ = noUnfolding + +spec_doc :: SDoc +spec_doc = ptext (sLit "specUnfolding") \end{code} -Internal functions +Note [Specialising unfoldings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When we specialise a function for some given type-class arguments, we use +specUnfolding to specialise its unfolding. Some important points: + +* If the original function has a DFunUnfolding, the specialised one + must do so too! Otherwise we lose the magic rules that make it + interact with ClassOps + +* There is a bit of hack for INLINABLE functions: + f :: Ord a => .... + f = <big-rhs> + {- INLINEABLE f #-} + Now if we specialise f, should the specialised version still have + an INLINEABLE pragma? If it does, we'll capture a specialised copy + of <big-rhs> as its unfolding, and that probaby won't inline. But + if we don't, the specialised version of <big-rhs> might be small + enough to inline at a call site. This happens with Control.Monad.liftM3, + and can cause a lot more allocation as a result (nofib n-body shows this). + + Moreover, keeping the INLINEABLE thing isn't much help, because + the specialised function (probaby) isn't overloaded any more. + + Conclusion: drop the INLINEALE pragma. In practice what this means is: + if a stable unfolding has UnfoldingGuidance of UnfWhen, + we keep it (so the specialised thing too will always inline) + if a stable unfolding has UnfoldingGuidance of UnfIfGoodArgs + (which arises from INLINEABLE), we discard it + \begin{code} mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr - -> Arity -> UnfoldingGuidance -> Unfolding + -> UnfoldingGuidance -> Unfolding -- Occurrence-analyses the expression before capturing it -mkCoreUnfolding src top_lvl expr arity guidance +mkCoreUnfolding src top_lvl expr guidance = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr, -- See Note [Occurrrence analysis of unfoldings] uf_src = src, - uf_arity = arity, uf_is_top = top_lvl, uf_is_value = exprIsHNF expr, uf_is_conlike = exprIsConLike expr, @@ -169,7 +236,6 @@ mkUnfolding dflags src top_lvl is_bottoming expr = CoreUnfolding { uf_tmpl = occurAnalyseExpr expr, -- See Note [Occurrrence analysis of unfoldings] uf_src = src, - uf_arity = arity, uf_is_top = top_lvl, uf_is_value = exprIsHNF expr, uf_is_conlike = exprIsConLike expr, @@ -177,7 +243,7 @@ mkUnfolding dflags src top_lvl is_bottoming expr uf_is_work_free = exprIsWorkFree expr, uf_guidance = guidance } where - (arity, guidance) = calcUnfoldingGuidance dflags expr + guidance = calcUnfoldingGuidance dflags expr -- NB: *not* (calcUnfoldingGuidance (occurAnalyseExpr expr))! -- See Note [Calculate unfolding guidance on the non-occ-anal'd expression] \end{code} @@ -256,39 +322,38 @@ inlineBoringOk e calcUnfoldingGuidance :: DynFlags -> CoreExpr -- Expression to look at - -> (Arity, UnfoldingGuidance) + -> UnfoldingGuidance calcUnfoldingGuidance dflags expr - = case collectBinders expr of { (bndrs, body) -> - let - bOMB_OUT_SIZE = ufCreationThreshold dflags - -- Bomb out if size gets bigger than this - val_bndrs = filter isId bndrs - n_val_bndrs = length val_bndrs - - guidance - = case sizeExpr dflags (iUnbox bOMB_OUT_SIZE) val_bndrs body of - TooBig -> UnfNever - SizeIs size cased_bndrs scrut_discount - | uncondInline expr n_val_bndrs (iBox size) - -> UnfWhen unSaturatedOk boringCxtOk -- Note [INLINE for small functions] - | otherwise - -> UnfIfGoodArgs { ug_args = map (discount cased_bndrs) val_bndrs - , ug_size = iBox size - , ug_res = iBox scrut_discount } - - discount :: Bag (Id,Int) -> Id -> Int - discount cbs bndr = foldlBag combine 0 cbs + = case sizeExpr dflags (iUnbox bOMB_OUT_SIZE) val_bndrs body of + TooBig -> UnfNever + SizeIs size cased_bndrs scrut_discount + | uncondInline expr n_val_bndrs (iBox size) + -> UnfWhen { ug_unsat_ok = unSaturatedOk + , ug_boring_ok = boringCxtOk + , ug_arity = n_val_bndrs } -- Note [INLINE for small functions] + | otherwise + -> UnfIfGoodArgs { ug_args = map (mk_discount cased_bndrs) val_bndrs + , ug_size = iBox size + , ug_res = iBox scrut_discount } + + where + (bndrs, body) = collectBinders expr + bOMB_OUT_SIZE = ufCreationThreshold dflags + -- Bomb out if size gets bigger than this + val_bndrs = filter isId bndrs + n_val_bndrs = length val_bndrs + + mk_discount :: Bag (Id,Int) -> Id -> Int + mk_discount cbs bndr = foldlBag combine 0 cbs where - combine acc (bndr', disc) + combine acc (bndr', disc) | bndr == bndr' = acc `plus_disc` disc | otherwise = acc - + plus_disc :: Int -> Int -> Int plus_disc | isFunTy (idType bndr) = max | otherwise = (+) -- See Note [Function and non-function discounts] - in - (n_val_bndrs, guidance) } \end{code} Note [Computing the size of an expression] @@ -365,7 +430,7 @@ Things to note: saturated will give a lambda instead of a PAP, and will be more efficient at runtime. -(3) However, when the function's arity > 0, we do insist that it +(3) However, when the function's arity > 0, we do insist that it has at least one value argument at the call site. (This check is made in the UnfWhen case of callSiteInline.) Otherwise we find this: f = /\a \x:a. x @@ -381,7 +446,7 @@ Things to note: single instruction, but we do not want to unconditionally replace every occurrence of x with (y +# z). So we only do the unconditional-inline thing for *trivial* expressions. - + NB: you might think that PostInlineUnconditionally would do this but it doesn't fire for top-level things; see SimplUtils Note [Top level and postInlineUnconditionally] @@ -847,13 +912,13 @@ smallEnoughToInline _ _ ---------------- certainlyWillInline :: DynFlags -> Unfolding -> Bool -- Sees if the unfolding is pretty certain to inline -certainlyWillInline dflags (CoreUnfolding { uf_arity = n_vals, uf_guidance = guidance }) +certainlyWillInline dflags (CoreUnfolding { uf_guidance = guidance }) = case guidance of UnfNever -> False UnfWhen {} -> True - UnfIfGoodArgs { ug_size = size} - -> n_vals > 0 -- See Note [certainlyWillInline: be caseful of thunks] - && size - (10 * (n_vals +1)) <= ufUseThreshold dflags + UnfIfGoodArgs { ug_size = size, ug_args = args } + -> not (null args) -- See Note [certainlyWillInline: be caseful of thunks] + && size - (10 * (length args +1)) <= ufUseThreshold dflags certainlyWillInline _ _ = False @@ -932,92 +997,101 @@ callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info -- Things with an INLINE pragma may have an unfolding *and* -- be a loop breaker (maybe the knot is not yet untied) CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top - , uf_is_work_free = is_wf, uf_arity = uf_arity + , uf_is_work_free = is_wf , uf_guidance = guidance, uf_expandable = is_exp } | active_unfolding -> tryUnfolding dflags id lone_variable arg_infos cont_info unf_template is_top - is_wf is_exp uf_arity guidance - | dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags - -> pprTrace "Inactive unfolding:" (ppr id) Nothing - | otherwise -> Nothing + is_wf is_exp guidance + | otherwise -> traceInline dflags "Inactive unfolding:" (ppr id) Nothing NoUnfolding -> Nothing OtherCon {} -> Nothing DFunUnfolding {} -> Nothing -- Never unfold a DFun -tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt - -> CoreExpr -> Bool -> Bool -> Bool -> Arity -> UnfoldingGuidance - -> Maybe CoreExpr -tryUnfolding dflags id lone_variable - arg_infos cont_info unf_template is_top - is_wf is_exp uf_arity guidance - -- uf_arity will typically be equal to (idArity id), - -- but may be less for InlineRules +traceInline :: DynFlags -> String -> SDoc -> a -> a +traceInline dflags str doc result | dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags - = pprTrace ("Considering inlining: " ++ showSDocDump dflags (ppr id)) - (vcat [text "arg infos" <+> ppr arg_infos, - text "uf arity" <+> ppr uf_arity, - text "interesting continuation" <+> ppr cont_info, - text "some_benefit" <+> ppr some_benefit, - text "is exp:" <+> ppr is_exp, - text "is work-free:" <+> ppr is_wf, - text "guidance" <+> ppr guidance, - extra_doc, - text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"]) - result - | otherwise = result + = pprTrace str doc result + | otherwise + = result + +tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt + -> CoreExpr -> Bool -> Bool -> Bool -> UnfoldingGuidance + -> Maybe CoreExpr +tryUnfolding dflags id lone_variable + arg_infos cont_info unf_template is_top + is_wf is_exp guidance + = case guidance of + UnfNever -> traceInline dflags str (ptext (sLit "UnfNever")) Nothing + + UnfWhen { ug_arity = uf_arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok } + | enough_args && (boring_ok || some_benefit) + -- See Note [INLINE for small functions (3)] + -> traceInline dflags str (mk_doc some_benefit empty True) (Just unf_template) + | otherwise + -> traceInline dflags str (mk_doc some_benefit empty False) Nothing + where + some_benefit = calc_some_benefit uf_arity + enough_args = (n_val_args >= uf_arity) || (unsat_ok && n_val_args > 0) + + UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size } + | is_wf && some_benefit && small_enough + -> traceInline dflags str (mk_doc some_benefit extra_doc True) (Just unf_template) + | otherwise + -> traceInline dflags str (mk_doc some_benefit extra_doc False) Nothing + where + some_benefit = calc_some_benefit (length arg_discounts) + extra_doc = text "discounted size =" <+> int discounted_size + discounted_size = size - discount + small_enough = discounted_size <= ufUseThreshold dflags + discount = computeDiscount dflags arg_discounts + res_discount arg_infos cont_info where + mk_doc some_benefit extra_doc yes_or_no + = vcat [ text "arg infos" <+> ppr arg_infos + , text "interesting continuation" <+> ppr cont_info + , text "some_benefit" <+> ppr some_benefit + , text "is exp:" <+> ppr is_exp + , text "is work-free:" <+> ppr is_wf + , text "guidance" <+> ppr guidance + , extra_doc + , text "ANSWER =" <+> if yes_or_no then text "YES" else text "NO"] + + str = "Considering inlining: " ++ showSDocDump dflags (ppr id) n_val_args = length arg_infos - saturated = n_val_args >= uf_arity - cont_info' | n_val_args > uf_arity = ValAppCtxt - | otherwise = cont_info - - result | yes_or_no = Just unf_template - | otherwise = Nothing - - interesting_args = any nonTriv arg_infos - -- NB: (any nonTriv arg_infos) looks at the - -- over-saturated args too which is "wrong"; - -- but if over-saturated we inline anyway. -- some_benefit is used when the RHS is small enough -- and the call has enough (or too many) value -- arguments (ie n_val_args >= arity). But there must -- be *something* interesting about some argument, or the -- result context, to make it worth inlining - some_benefit + calc_some_benefit :: Arity -> Bool -- The Arity is the number of args + -- expected by the unfolding + calc_some_benefit uf_arity | not saturated = interesting_args -- Under-saturated -- Note [Unsaturated applications] | otherwise = interesting_args -- Saturated or over-saturated || interesting_call - - interesting_call - = case cont_info' of - CaseCtxt -> not (lone_variable && is_wf) -- Note [Lone variables] - ValAppCtxt -> True -- Note [Cast then apply] - RuleArgCtxt -> uf_arity > 0 -- See Note [Unfold info lazy contexts] - DiscArgCtxt -> uf_arity > 0 -- - RhsCtxt -> uf_arity > 0 -- - _ -> not is_top && uf_arity > 0 -- Note [Nested functions] + where + saturated = n_val_args >= uf_arity + over_saturated = n_val_args > uf_arity + interesting_args = any nonTriv arg_infos + -- NB: (any nonTriv arg_infos) looks at the + -- over-saturated args too which is "wrong"; + -- but if over-saturated we inline anyway. + + interesting_call + | over_saturated + = True + | otherwise + = case cont_info of + CaseCtxt -> not (lone_variable && is_wf) -- Note [Lone variables] + ValAppCtxt -> True -- Note [Cast then apply] + RuleArgCtxt -> uf_arity > 0 -- See Note [Unfold info lazy contexts] + DiscArgCtxt -> uf_arity > 0 -- + RhsCtxt -> uf_arity > 0 -- + _ -> not is_top && uf_arity > 0 -- Note [Nested functions] -- Note [Inlining in ArgCtxt] - - (yes_or_no, extra_doc) - = case guidance of - UnfNever -> (False, empty) - - UnfWhen unsat_ok boring_ok - -> (enough_args && (boring_ok || some_benefit), empty ) - where -- See Note [INLINE for small functions (3)] - enough_args = saturated || (unsat_ok && n_val_args > 0) - - UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size } - -> ( is_wf && some_benefit && small_enough - , (text "discounted size =" <+> int discounted_size) ) - where - discounted_size = size - discount - small_enough = discounted_size <= ufUseThreshold dflags - discount = computeDiscount dflags uf_arity arg_discounts - res_discount arg_infos cont_info' \end{code} Note [Unfold into lazy contexts], Note [RHS of lets] @@ -1213,37 +1287,42 @@ This kind of thing can occur if you have which Roman did. \begin{code} -computeDiscount :: DynFlags -> Arity -> [Int] -> Int -> [ArgSummary] -> CallCtxt +computeDiscount :: DynFlags -> [Int] -> Int -> [ArgSummary] -> CallCtxt -> Int -computeDiscount dflags uf_arity arg_discounts res_discount arg_infos cont_info +computeDiscount dflags arg_discounts res_discount arg_infos cont_info -- We multiple the raw discounts (args_discount and result_discount) -- ty opt_UnfoldingKeenessFactor because the former have to do with -- *size* whereas the discounts imply that there's some extra -- *efficiency* to be gained (e.g. beta reductions, case reductions) -- by inlining. - = 10 -- Discount of 1 because the result replaces the call - -- so we count 1 for the function itself + = 10 -- Discount of 10 because the result replaces the call + -- so we count 10 for the function itself - + 10 * length (take uf_arity arg_infos) - -- Discount of (un-scaled) 1 for each arg supplied, + + 10 * length actual_arg_discounts + -- Discount of 10 for each arg supplied, -- because the result replaces the call + round (ufKeenessFactor dflags * - fromIntegral (arg_discount + res_discount')) + fromIntegral (total_arg_discount + res_discount')) where - arg_discount = sum (zipWith mk_arg_discount arg_discounts arg_infos) + actual_arg_discounts = zipWith mk_arg_discount arg_discounts arg_infos + total_arg_discount = sum actual_arg_discounts - mk_arg_discount _ TrivArg = 0 + mk_arg_discount _ TrivArg = 0 mk_arg_discount _ NonTrivArg = 10 - mk_arg_discount discount ValueArg = discount + mk_arg_discount discount ValueArg = discount - res_discount' = case cont_info of + res_discount' + | LT <- arg_discounts `compareLength` arg_infos + = res_discount -- Over-saturated + | otherwise + = case cont_info of BoringCtxt -> 0 CaseCtxt -> res_discount -- Presumably a constructor ValAppCtxt -> res_discount -- Presumably a function _ -> 40 `min` res_discount - -- ToDo: this 40 `min` res_dicount doesn't seem right + -- ToDo: this 40 `min` res_discount doesn't seem right -- for DiscArgCtxt it shouldn't matter because the function will -- get the arg discount for any non-triv arg -- for RuleArgCtxt we do want to be keener to inline; but not only diff --git a/compiler/coreSyn/PprCore.lhs b/compiler/coreSyn/PprCore.lhs index f86a911ede..593c670cae 100644 --- a/compiler/coreSyn/PprCore.lhs +++ b/compiler/coreSyn/PprCore.lhs @@ -421,9 +421,10 @@ showAttributes stuff \begin{code} instance Outputable UnfoldingGuidance where ppr UnfNever = ptext (sLit "NEVER") - ppr (UnfWhen unsat_ok boring_ok) + ppr (UnfWhen { ug_arity = arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok }) = ptext (sLit "ALWAYS_IF") <> - parens (ptext (sLit "unsat_ok=") <> ppr unsat_ok <> comma <> + parens (ptext (sLit "arity=") <> int arity <> comma <> + ptext (sLit "unsat_ok=") <> ppr unsat_ok <> comma <> ptext (sLit "boring_ok=") <> ppr boring_ok) ppr (UnfIfGoodArgs { ug_args = cs, ug_size = size, ug_res = discount }) = hsep [ ptext (sLit "IF_ARGS"), @@ -446,13 +447,12 @@ instance Outputable Unfolding where ppr (CoreUnfolding { uf_src = src , uf_tmpl=rhs, uf_is_top=top, uf_is_value=hnf , uf_is_conlike=conlike, uf_is_work_free=wf - , uf_expandable=exp, uf_guidance=g, uf_arity=arity}) + , uf_expandable=exp, uf_guidance=g }) = ptext (sLit "Unf") <> braces (pp_info $$ pp_rhs) where pp_info = fsep $ punctuate comma [ ptext (sLit "Src=") <> ppr src , ptext (sLit "TopLvl=") <> ppr top - , ptext (sLit "Arity=") <> int arity , ptext (sLit "Value=") <> ppr hnf , ptext (sLit "ConLike=") <> ppr conlike , ptext (sLit "WorkFree=") <> ppr wf diff --git a/compiler/deSugar/DsBinds.lhs b/compiler/deSugar/DsBinds.lhs index 172d19b9ac..18b6856ec1 100644 --- a/compiler/deSugar/DsBinds.lhs +++ b/compiler/deSugar/DsBinds.lhs @@ -463,8 +463,11 @@ dsSpec mb_poly_rhs (L loc (SpecPrag poly_id spec_co spec_inl)) Right (rule_bndrs, _fn, args) -> do { dflags <- getDynFlags - ; let spec_unf = specUnfolding bndrs args (realIdUnfolding poly_id) - spec_id = mkLocalId spec_name spec_ty + ; let fn_unf = realIdUnfolding poly_id + unf_fvs = stableUnfoldingVars fn_unf `orElse` emptyVarSet + in_scope = mkInScopeSet (unf_fvs `unionVarSet` exprsFreeVars args) + spec_unf = specUnfolding dflags (mkEmptySubst in_scope) bndrs args fn_unf + spec_id = mkLocalId spec_name spec_ty `setInlinePragma` inl_prag `setIdUnfolding` spec_unf rule = mkRule False {- Not auto -} is_local_id @@ -474,11 +477,14 @@ dsSpec mb_poly_rhs (L loc (SpecPrag poly_id spec_co spec_inl)) (mkVarApps (Var spec_id) bndrs) ; spec_rhs <- dsHsWrapper spec_co poly_rhs - ; let spec_pair = makeCorePair dflags spec_id False (dictArity bndrs) spec_rhs ; when (isInlinePragma id_inl && wopt Opt_WarnPointlessPragmas dflags) (warnDs (specOnInline poly_name)) - ; return (Just (unitOL spec_pair, rule)) + + ; return (Just (unitOL (spec_id, spec_rhs), rule)) + -- NB: do *not* use makeCorePair on (spec_id,spec_rhs), because + -- makeCorePair overwrites the unfolding, which we have + -- just created using specUnfolding } } } where is_local_id = isJust mb_poly_rhs @@ -515,16 +521,6 @@ dsSpec mb_poly_rhs (L loc (SpecPrag poly_id spec_co spec_inl)) | otherwise = spec_prag_act -- Specified by user -specUnfolding :: [Var] -> [CoreExpr] -> Unfolding -> Unfolding -specUnfolding new_bndrs new_args df@(DFunUnfolding { df_bndrs = bndrs, df_args = args }) - = ASSERT2( equalLength new_args bndrs, ppr df $$ ppr new_args $$ ppr new_bndrs ) - df { df_bndrs = new_bndrs, df_args = map (substExpr (text "specUnfolding") subst) args } - where - subst = mkOpenSubst (mkInScopeSet fvs) (bndrs `zip` new_args) - fvs = (exprsFreeVars args `delVarSetList` bndrs) `extendVarSetList` new_bndrs - -specUnfolding _ _ _ = noUnfolding - specOnInline :: Name -> MsgDoc specOnInline f = ptext (sLit "SPECIALISE pragma on INLINE function probably won't fire:") <+> quotes (ppr f) diff --git a/compiler/iface/MkIface.lhs b/compiler/iface/MkIface.lhs index 1aba9eee44..cbaed1fc05 100644 --- a/compiler/iface/MkIface.lhs +++ b/compiler/iface/MkIface.lhs @@ -1881,14 +1881,16 @@ toIfaceIdInfo id_info -------------------------- toIfUnfolding :: Bool -> Unfolding -> Maybe IfaceInfoItem -toIfUnfolding lb (CoreUnfolding { uf_tmpl = rhs, uf_arity = arity - , uf_src = src, uf_guidance = guidance }) +toIfUnfolding lb (CoreUnfolding { uf_tmpl = rhs + , uf_src = src + , uf_guidance = guidance }) = Just $ HsUnfold lb $ case src of InlineStable -> case guidance of - UnfWhen unsat_ok boring_ok -> IfInlineRule arity unsat_ok boring_ok if_rhs - _other -> IfCoreUnfold True if_rhs + UnfWhen {ug_arity = arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok } + -> IfInlineRule arity unsat_ok boring_ok if_rhs + _other -> IfCoreUnfold True if_rhs InlineCompulsory -> IfCompulsory if_rhs InlineRhs -> IfCoreUnfold False if_rhs -- Yes, even if guidance is UnfNever, expose the unfolding diff --git a/compiler/iface/TcIface.lhs b/compiler/iface/TcIface.lhs index 68f9e8fd65..37b65b01af 100644 --- a/compiler/iface/TcIface.lhs +++ b/compiler/iface/TcIface.lhs @@ -1306,9 +1306,9 @@ tcUnfolding name _ _ (IfInlineRule arity unsat_ok boring_ok if_expr) = do { mb_expr <- tcPragExpr name if_expr ; return (case mb_expr of Nothing -> NoUnfolding - Just expr -> mkCoreUnfolding InlineStable True expr arity - (UnfWhen unsat_ok boring_ok)) - } + Just expr -> mkCoreUnfolding InlineStable True expr guidance )} + where + guidance = UnfWhen { ug_arity = arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok } tcUnfolding name dfun_ty _ (IfDFunUnfold bs ops) = bindIfaceBndrs bs $ \ bs' -> diff --git a/compiler/simplCore/Simplify.lhs b/compiler/simplCore/Simplify.lhs index cc214f7513..d722f5164c 100644 --- a/compiler/simplCore/Simplify.lhs +++ b/compiler/simplCore/Simplify.lhs @@ -744,19 +744,19 @@ simplUnfolding env top_lvl id new_rhs unf ; args' <- mapM (simplExpr env') args ; return (mkDFunUnfolding bndrs' con args') } - CoreUnfolding { uf_tmpl = expr, uf_arity = arity - , uf_src = src, uf_guidance = guide } + CoreUnfolding { uf_tmpl = expr, uf_src = src, uf_guidance = guide } | isStableSource src -> do { expr' <- simplExpr rule_env expr ; case guide of - UnfWhen sat_ok _ -- Happens for INLINE things - -> let guide' = UnfWhen sat_ok (inlineBoringOk expr') + UnfWhen { ug_arity = arity, ug_unsat_ok = sat_ok } -- Happens for INLINE things + -> let guide' = UnfWhen { ug_arity = arity, ug_unsat_ok = sat_ok + , ug_boring_ok = inlineBoringOk expr' } -- Refresh the boring-ok flag, in case expr' -- has got small. This happens, notably in the inlinings -- for dfuns for single-method classes; see -- Note [Single-method classes] in TcInstDcls. -- A test case is Trac #4138 - in return (mkCoreUnfolding src is_top_lvl expr' arity guide') + in return (mkCoreUnfolding src is_top_lvl expr' guide') -- See Note [Top-level flag on inline rules] in CoreUnfold _other -- Happens for INLINABLE things diff --git a/compiler/specialise/Specialise.lhs b/compiler/specialise/Specialise.lhs index baa5d1971f..8003fa87d1 100644 --- a/compiler/specialise/Specialise.lhs +++ b/compiler/specialise/Specialise.lhs @@ -1072,8 +1072,6 @@ specCalls env rules_for_me calls_for_me fn rhs -- Figure out whether the function has an INLINE pragma -- See Note [Inline specialisations] - spec_arity = unfoldingArity fn_unf - n_dicts -- Arity of the *specialised* inline rule - (rhs_tyvars, rhs_ids, rhs_body) = collectTyAndValBinders rhs rhs_dict_ids = take n_dicts rhs_ids @@ -1123,22 +1121,24 @@ specCalls env rules_for_me calls_for_me fn rhs -- spec_tyvars = [a,c] -- ty_args = [t1,b,t3] spec_tv_binds = [(tv,ty) | (tv, Just ty) <- rhs_tyvars `zip` call_ts] - spec_ty_args = map snd spec_tv_binds env1 = extendTvSubstList env spec_tv_binds (rhs_env, poly_tyvars) = substBndrs env1 [tv | (tv, Nothing) <- rhs_tyvars `zip` call_ts] - ; (rhs_env2, inst_dict_ids, dx_binds) - <- bindAuxiliaryDicts rhs_env (zipEqual "bindAux" rhs_dict_ids call_ds) - ; let ty_args = mk_ty_args call_ts poly_tyvars - inst_args = ty_args ++ map Var inst_dict_ids + -- Clone rhs_dicts, including instantiating their types + ; inst_dict_ids <- mapM (newDictBndr rhs_env) rhs_dict_ids + ; let (rhs_env2, dx_binds, spec_dict_args) + = bindAuxiliaryDicts rhs_env rhs_dict_ids call_ds inst_dict_ids + ty_args = mk_ty_args call_ts poly_tyvars + rule_args = ty_args ++ map Var inst_dict_ids + rule_bndrs = poly_tyvars ++ inst_dict_ids ; dflags <- getDynFlags - ; if already_covered dflags inst_args then + ; if already_covered dflags rule_args then return Nothing else do { -- Figure out the type of the specialised function - let body_ty = applyTypeToArgs rhs fn_type inst_args + let body_ty = applyTypeToArgs rhs fn_type rule_args (lam_args, app_args) -- Add a dummy argument if body_ty is unlifted | isUnLiftedType body_ty -- C.f. WwLib.mkWorkerArgs = (poly_tyvars ++ [voidArgId], poly_tyvars ++ [voidPrimId]) @@ -1150,13 +1150,13 @@ specCalls env rules_for_me calls_for_me fn rhs ; let -- The rule to put in the function's specialisation is: -- forall b, d1',d2'. f t1 b t3 d1' d2' = f1 b - rule_name = mkFastString ("SPEC " ++ showSDocDump dflags (ppr fn <+> ppr spec_ty_args)) + rule_name = mkFastString ("SPEC " ++ showSDocDump dflags (ppr fn <+> hsep (map ppr_call_key_ty call_ts))) spec_env_rule = mkRule True {- Auto generated -} is_local rule_name inl_act -- Note [Auto-specialisation and RULES] (idName fn) - (poly_tyvars ++ inst_dict_ids) - inst_args + rule_bndrs + rule_args (mkVarApps (Var spec_f) app_args) -- Add the { d1' = dx1; d2' = dx2 } usage stuff @@ -1165,20 +1165,18 @@ specCalls env rules_for_me calls_for_me fn rhs -------------------------------------- -- Add a suitable unfolding if the spec_inl_prag says so -- See Note [Inline specialisations] - spec_inl_prag + (spec_inl_prag, spec_unf) | not is_local && isStrongLoopBreaker (idOccInfo fn) - = neverInlinePragma -- See Note [Specialising imported functions] in OccurAnal - | otherwise - = case inl_prag of - InlinePragma { inl_inline = Inlinable } - -> inl_prag { inl_inline = EmptyInlineSpec } - _ -> inl_prag + = (neverInlinePragma, noUnfolding) + -- See Note [Specialising imported functions] in OccurAnal - spec_unf - = case inlinePragmaSpec spec_inl_prag of - Inline -> mkInlineUnfolding (Just spec_arity) spec_rhs - Inlinable -> mkInlinableUnfolding dflags spec_rhs - _ -> NoUnfolding + | InlinePragma { inl_inline = Inlinable } <- inl_prag + = (inl_prag { inl_inline = EmptyInlineSpec }, noUnfolding) + + | otherwise + = (inl_prag, specUnfolding dflags (se_subst env) + poly_tyvars (ty_args ++ spec_dict_args) + fn_unf) -------------------------------------- -- Adding arity information just propagates it a bit faster @@ -1193,34 +1191,35 @@ specCalls env rules_for_me calls_for_me fn rhs bindAuxiliaryDicts :: SpecEnv - -> [(DictId,CoreExpr)] -- (orig_dict, dx) - -> SpecM (SpecEnv, -- Substitute for all orig_dicts - [DictId], -- Cloned dict Ids - [CoreBind]) -- Auxiliary bindings + -> [DictId] -> [CoreExpr] -- Original dict bndrs, and the witnessing expressions + -> [DictId] -- A cloned dict-id for each dict arg + -> (SpecEnv, -- Substitute for all orig_dicts + [CoreBind], -- Auxiliary dict bindings + [CoreExpr]) -- Witnessing expressions (all trivial) -- Bind any dictionary arguments to fresh names, to preserve sharing -bindAuxiliaryDicts env@(SE { se_subst = subst, se_interesting = interesting }) - dict_binds - = do { inst_dict_ids <- mapM (newDictBndr env . fst) dict_binds - -- Clone rhs_dicts, including instantiating their types - ; let triples = inst_dict_ids `zip` dict_binds - (subst', binds) = go subst [] triples - interesting_dicts = mkVarSet [ dx_id | (dx_id, (_, dx)) <- triples - , interestingDict env dx ] +bindAuxiliaryDicts env@(SE { se_subst = subst, se_interesting = interesting }) + orig_dict_ids call_ds inst_dict_ids + = (env', dx_binds, spec_dict_args) + where + (dx_binds, spec_dict_args) = go call_ds inst_dict_ids + env' = env { se_subst = CoreSubst.extendIdSubstList subst (orig_dict_ids `zip` spec_dict_args) + , se_interesting = interesting `unionVarSet` interesting_dicts } + + interesting_dicts = mkVarSet [ dx_id | NonRec dx_id dx <- dx_binds + , interestingDict env dx ] -- See Note [Make the new dictionaries interesting] - env' = env { se_subst = subst' - , se_interesting = interesting `unionVarSet` interesting_dicts } - ; return (env', inst_dict_ids, binds) } - where - go subst binds [] = (subst, binds) - go subst binds ((dx_id, (d, dx)) : triples) - | exprIsTrivial dx = go (CoreSubst.extendIdSubst subst d dx) binds triples - | otherwise = go (CoreSubst.extendIdSubst subst d (Var dx_id)) - (NonRec dx_id dx : binds) triples + go [] _ = ([], []) + go (dx:dxs) (dx_id:dx_ids) + | exprIsTrivial dx = (dx_binds, dx:args) + | otherwise = (NonRec dx_id dx : dx_binds, Var dx_id : args) + where + (dx_binds, args) = go dxs dx_ids -- In the first case extend the substitution but not bindings; -- in the latter extend the bindings but not the substitution. -- For the former, note that we bind the *original* dict in the substitution, -- overriding any d->dx_id binding put there by substBndrs + go _ _ = pprPanic "bindAuxiliaryDicts" (ppr orig_dict_ids $$ ppr call_ds $$ ppr inst_dict_ids) \end{code} Note [Make the new dictionaries interesting] @@ -1550,6 +1549,16 @@ instance Outputable CallInfoSet where ppr (CIS fn map) = hang (ptext (sLit "CIS") <+> ppr fn) 2 (ppr map) +{- +pprCallInfo :: Id -> CallInfo -> SDoc +pprCallInfo fn (CallKey mb_tys, (dxs, _)) + = hang (ppr fn) 2 (sep (map ppr_call_key_ty mb_tys ++ map pprParendExpr dxs)) +-} + +ppr_call_key_ty :: Maybe Type -> SDoc +ppr_call_key_ty Nothing = char '_' +ppr_call_key_ty (Just ty) = char '@' <+> pprParendType ty + instance Outputable CallKey where ppr (CallKey ts) = ppr ts |