diff options
author | Kavon Farvardin <kavon@farvard.in> | 2018-09-23 15:29:37 -0500 |
---|---|---|
committer | Kavon Farvardin <kavon@farvard.in> | 2018-09-23 15:29:37 -0500 |
commit | 84c2ad99582391005b5e873198b15e9e9eb4f78d (patch) | |
tree | caa8c2f2ec7e97fbb4977263c6817c9af5025cf4 /compiler/coreSyn/CoreUnfold.hs | |
parent | 8ddb47cfcf5776e9a3c55fd37947c8a95e00fa12 (diff) | |
parent | e68b439fe5de61b9a2ca51af472185c62ccb8b46 (diff) | |
download | haskell-wip/T13904.tar.gz |
update to current master againwip/T13904
Diffstat (limited to 'compiler/coreSyn/CoreUnfold.hs')
-rw-r--r-- | compiler/coreSyn/CoreUnfold.hs | 177 |
1 files changed, 129 insertions, 48 deletions
diff --git a/compiler/coreSyn/CoreUnfold.hs b/compiler/coreSyn/CoreUnfold.hs index a104cd693f..adb399ea6f 100644 --- a/compiler/coreSyn/CoreUnfold.hs +++ b/compiler/coreSyn/CoreUnfold.hs @@ -42,6 +42,8 @@ module CoreUnfold ( #include "HsVersions.h" +import GhcPrelude + import DynFlags import CoreSyn import PprCore () -- Instances @@ -63,8 +65,10 @@ import Bag import Util import Outputable import ForeignCall +import Name import qualified Data.ByteString as BS +import Data.List {- ************************************************************************ @@ -81,7 +85,7 @@ mkTopUnfolding dflags is_bottoming rhs mkImplicitUnfolding :: DynFlags -> CoreExpr -> Unfolding -- For implicit Ids, do a tiny bit of optimising first mkImplicitUnfolding dflags expr - = mkTopUnfolding dflags False (simpleOptExpr expr) + = mkTopUnfolding dflags False (simpleOptExpr dflags expr) -- Note [Top-level flag on inline rules] -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -100,17 +104,17 @@ mkDFunUnfolding bndrs con ops , df_args = map occurAnalyseExpr ops } -- See Note [Occurrence analysis of unfoldings] -mkWwInlineRule :: CoreExpr -> Arity -> Unfolding -mkWwInlineRule expr arity +mkWwInlineRule :: DynFlags -> CoreExpr -> Arity -> Unfolding +mkWwInlineRule dflags expr arity = mkCoreUnfolding InlineStable True - (simpleOptExpr expr) + (simpleOptExpr dflags expr) (UnfWhen { ug_arity = arity, ug_unsat_ok = unSaturatedOk , ug_boring_ok = boringCxtNotOk }) mkCompulsoryUnfolding :: CoreExpr -> Unfolding mkCompulsoryUnfolding expr -- Used for things that absolutely must be unfolded = mkCoreUnfolding InlineCompulsory True - (simpleOptExpr expr) + (simpleOptExpr unsafeGlobalDynFlags expr) (UnfWhen { ug_arity = 0 -- Arity of unfolding doesn't matter , ug_unsat_ok = unSaturatedOk, ug_boring_ok = boringCxtOk }) @@ -122,7 +126,7 @@ mkWorkerUnfolding dflags work_fn | isStableSource src = mkCoreUnfolding src top_lvl new_tmpl guidance where - new_tmpl = simpleOptExpr (work_fn tmpl) + new_tmpl = simpleOptExpr dflags (work_fn tmpl) guidance = calcUnfoldingGuidance dflags False new_tmpl mkWorkerUnfolding _ _ _ = noUnfolding @@ -137,7 +141,7 @@ mkInlineUnfolding expr True -- Note [Top-level flag on inline rules] expr' guide where - expr' = simpleOptExpr expr + expr' = simpleOptExpr unsafeGlobalDynFlags expr guide = UnfWhen { ug_arity = manifestArity expr' , ug_unsat_ok = unSaturatedOk , ug_boring_ok = boring_ok } @@ -151,24 +155,28 @@ mkInlineUnfoldingWithArity arity expr True -- Note [Top-level flag on inline rules] expr' guide where - expr' = simpleOptExpr expr + expr' = simpleOptExpr unsafeGlobalDynFlags expr guide = UnfWhen { ug_arity = arity , ug_unsat_ok = needSaturated , ug_boring_ok = boring_ok } - boring_ok = inlineBoringOk expr' + -- See Note [INLINE pragmas and boring contexts] as to why we need to look + -- at the arity here. + boring_ok | arity == 0 = True + | otherwise = inlineBoringOk expr' mkInlinableUnfolding :: DynFlags -> CoreExpr -> Unfolding mkInlinableUnfolding dflags expr = mkUnfolding dflags InlineStable False False expr' where - expr' = simpleOptExpr expr + expr' = simpleOptExpr dflags expr -specUnfolding :: [Var] -> (CoreExpr -> CoreExpr) -> Arity -> Unfolding -> Unfolding +specUnfolding :: DynFlags -> [Var] -> (CoreExpr -> CoreExpr) -> Arity + -> Unfolding -> Unfolding -- See Note [Specialising unfoldings] -- specUnfolding spec_bndrs spec_app arity_decrease unf -- = \spec_bndrs. spec_app( unf ) -- -specUnfolding spec_bndrs spec_app arity_decrease +specUnfolding dflags spec_bndrs spec_app arity_decrease df@(DFunUnfolding { df_bndrs = old_bndrs, df_con = con, df_args = args }) = ASSERT2( arity_decrease == count isId old_bndrs - count isId spec_bndrs, ppr df ) mkDFunUnfolding spec_bndrs con (map spec_arg args) @@ -180,11 +188,11 @@ specUnfolding spec_bndrs spec_app arity_decrease -- \new_bndrs. MkD (spec_app(\old_bndrs. <op1>)) ... ditto <opn> -- The ASSERT checks the value part of that where - spec_arg arg = simpleOptExpr (spec_app (mkLams old_bndrs arg)) + spec_arg arg = simpleOptExpr dflags (spec_app (mkLams old_bndrs arg)) -- The beta-redexes created by spec_app will be -- simplified away by simplOptExpr -specUnfolding spec_bndrs spec_app arity_decrease +specUnfolding dflags spec_bndrs spec_app arity_decrease (CoreUnfolding { uf_src = src, uf_tmpl = tmpl , uf_is_top = top_lvl , uf_guidance = old_guidance }) @@ -195,13 +203,13 @@ specUnfolding spec_bndrs spec_app arity_decrease = let guidance = UnfWhen { ug_arity = old_arity - arity_decrease , ug_unsat_ok = unsat_ok , ug_boring_ok = boring_ok } - new_tmpl = simpleOptExpr (mkLams spec_bndrs (spec_app tmpl)) + new_tmpl = simpleOptExpr dflags (mkLams spec_bndrs (spec_app tmpl)) -- The beta-redexes created by spec_app will be -- simplified away by simplOptExpr in mkCoreUnfolding src top_lvl new_tmpl guidance -specUnfolding _ _ _ _ = noUnfolding +specUnfolding _ _ _ _ _ = noUnfolding {- Note [Specialising unfoldings] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -231,6 +239,72 @@ specUnfolding to specialise its unfolding. Some important points: we keep it (so the specialised thing too will always inline) if a stable unfolding has UnfoldingGuidance of UnfIfGoodArgs (which arises from INLINABLE), we discard it + +Note [Honour INLINE on 0-ary bindings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + + x = <expensive> + {-# INLINE x #-} + + f y = ...x... + +The semantics of an INLINE pragma is + + inline x at every call site, provided it is saturated; + that is, applied to at least as many arguments as appear + on the LHS of the Haskell source definition. + +(This soure-code-derived arity is stored in the `ug_arity` field of +the `UnfoldingGuidance`.) + +In the example, x's ug_arity is 0, so we should inline it at every use +site. It's rare to have such an INLINE pragma (usually INLINE Is on +functions), but it's occasionally very important (Trac #15578, #15519). +In #15519 we had something like + x = case (g a b) of I# r -> T r + {-# INLINE x #-} + f y = ...(h x).... + +where h is strict. So we got + f y = ...(case g a b of I# r -> h (T r))... + +and that in turn allowed SpecConstr to ramp up performance. + +How do we deliver on this? By adjusting the ug_boring_ok +flag in mkInlineUnfoldingWithArity; see +Note [INLINE pragmas and boring contexts] + +NB: there is a real risk that full laziness will float it right back +out again. Consider again + x = factorial 200 + {-# INLINE x #-} + f y = ...x... + +After inlining we get + f y = ...(factorial 200)... + +but it's entirely possible that full laziness will do + lvl23 = factorial 200 + f y = ...lvl23... + +That's a problem for another day. + +Note [INLINE pragmas and boring contexts] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +An INLINE pragma uses mkInlineUnfoldingWithArity to build the +unfolding. That sets the ug_boring_ok flag to False if the function +is not tiny (inlineBorkingOK), so that even INLINE functions are not +inlined in an utterly boring context. E.g. + \x y. Just (f y x) +Nothing is gained by inlining f here, even if it has an INLINE +pragma. + +But for 0-ary bindings, we want to inline regardless; see +Note [Honour INLINE on 0-ary bindings]. + +I'm a bit worried that it's possible for the same kind of problem +to arise for non-0-ary functions too, but let's wait and see. -} mkCoreUnfolding :: UnfoldingSource -> Bool -> CoreExpr @@ -696,7 +770,8 @@ sizeExpr dflags bOMB_OUT_SIZE top_args expr -- | Finds a nominal size of a string literal. litSize :: Literal -> Int -- Used by CoreUnfold.sizeExpr -litSize (LitInteger {}) = 100 -- Note [Size of literal integers] +litSize (LitNumber LitNumInteger _ _) = 100 -- Note [Size of literal integers] +litSize (LitNumber LitNumNatural _ _) = 100 litSize (MachStr str) = 10 + 10 * ((BS.length str + 3) `div` 4) -- If size could be 0 then @f "x"@ might be too small -- [Sept03: make literal strings a bit bigger to avoid fruitless @@ -943,7 +1018,7 @@ In a function application (f a b) Code for manipulating sizes -} --- | The size of an candidate expression for unfolding +-- | The size of a candidate expression for unfolding data ExprSize = TooBig | SizeIs { _es_size_is :: {-# UNPACK #-} !Int -- ^ Size found @@ -1147,51 +1222,55 @@ callSiteInline dflags id active_unfolding lone_variable arg_infos cont_info -- idUnfolding checks for loop-breakers, returning NoUnfolding -- Things with an INLINE pragma may have an unfolding *and* -- be a loop breaker (maybe the knot is not yet untied) - CoreUnfolding { uf_tmpl = unf_template, uf_is_top = is_top + CoreUnfolding { uf_tmpl = unf_template , uf_is_work_free = is_wf , uf_guidance = guidance, uf_expandable = is_exp } | active_unfolding -> tryUnfolding dflags id lone_variable - arg_infos cont_info unf_template is_top + arg_infos cont_info unf_template is_wf is_exp guidance - | otherwise -> traceInline dflags "Inactive unfolding:" (ppr id) Nothing + | otherwise -> traceInline dflags id "Inactive unfolding:" (ppr id) Nothing NoUnfolding -> Nothing BootUnfolding -> Nothing OtherCon {} -> Nothing DFunUnfolding {} -> Nothing -- Never unfold a DFun -traceInline :: DynFlags -> String -> SDoc -> a -> a -traceInline dflags str doc result +traceInline :: DynFlags -> Id -> String -> SDoc -> a -> a +traceInline dflags inline_id str doc result + | Just prefix <- inlineCheck dflags + = if prefix `isPrefixOf` occNameString (getOccName inline_id) + then pprTrace str doc result + else result | dopt Opt_D_dump_inlinings dflags && dopt Opt_D_verbose_core2core dflags = pprTrace str doc result | otherwise = result tryUnfolding :: DynFlags -> Id -> Bool -> [ArgSummary] -> CallCtxt - -> CoreExpr -> Bool -> Bool -> Bool -> UnfoldingGuidance + -> CoreExpr -> Bool -> Bool -> UnfoldingGuidance -> Maybe CoreExpr tryUnfolding dflags id lone_variable - arg_infos cont_info unf_template is_top + arg_infos cont_info unf_template is_wf is_exp guidance = case guidance of - UnfNever -> traceInline dflags str (text "UnfNever") Nothing + UnfNever -> traceInline dflags id str (text "UnfNever") Nothing UnfWhen { ug_arity = uf_arity, ug_unsat_ok = unsat_ok, ug_boring_ok = boring_ok } | enough_args && (boring_ok || some_benefit || ufVeryAggressive dflags) -- See Note [INLINE for small functions (3)] - -> traceInline dflags str (mk_doc some_benefit empty True) (Just unf_template) + -> traceInline dflags id str (mk_doc some_benefit empty True) (Just unf_template) | otherwise - -> traceInline dflags str (mk_doc some_benefit empty False) Nothing + -> traceInline dflags id str (mk_doc some_benefit empty False) Nothing where some_benefit = calc_some_benefit uf_arity enough_args = (n_val_args >= uf_arity) || (unsat_ok && n_val_args > 0) UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size } | ufVeryAggressive dflags - -> traceInline dflags str (mk_doc some_benefit extra_doc True) (Just unf_template) + -> traceInline dflags id str (mk_doc some_benefit extra_doc True) (Just unf_template) | is_wf && some_benefit && small_enough - -> traceInline dflags str (mk_doc some_benefit extra_doc True) (Just unf_template) + -> traceInline dflags id str (mk_doc some_benefit extra_doc True) (Just unf_template) | otherwise - -> traceInline dflags str (mk_doc some_benefit extra_doc False) Nothing + -> traceInline dflags id str (mk_doc some_benefit extra_doc False) Nothing where some_benefit = calc_some_benefit (length arg_discounts) extra_doc = text "discounted size =" <+> int discounted_size @@ -1239,13 +1318,13 @@ tryUnfolding dflags id lone_variable = True | otherwise = case cont_info of - CaseCtxt -> not (lone_variable && is_wf) -- Note [Lone variables] - ValAppCtxt -> True -- Note [Cast then apply] + CaseCtxt -> not (lone_variable && is_exp) -- Note [Lone variables] + ValAppCtxt -> True -- Note [Cast then apply] RuleArgCtxt -> uf_arity > 0 -- See Note [Unfold info lazy contexts] - DiscArgCtxt -> uf_arity > 0 -- + DiscArgCtxt -> uf_arity > 0 -- Note [Inlining in ArgCtxt] RhsCtxt -> uf_arity > 0 -- - _ -> not is_top && uf_arity > 0 -- Note [Nested functions] - -- Note [Inlining in ArgCtxt] + _other -> False -- See Note [Nested functions] + {- Note [Unfold into lazy contexts], Note [RHS of lets] @@ -1315,18 +1394,17 @@ However for worker/wrapper it may be worth inlining even if the arity is not satisfied (as we do in the CoreUnfolding case) so we don't require saturation. - Note [Nested functions] ~~~~~~~~~~~~~~~~~~~~~~~ -If a function has a nested defn we also record some-benefit, on the -grounds that we are often able to eliminate the binding, and hence the -allocation, for the function altogether; this is good for join points. -But this only makes sense for *functions*; inlining a constructor -doesn't help allocation unless the result is scrutinised. UNLESS the -constructor occurs just once, albeit possibly in multiple case -branches. Then inlining it doesn't increase allocation, but it does -increase the chance that the constructor won't be allocated at all in -the branches that don't use it. +At one time we treated a call of a non-top-level function as +"interesting" (regardless of how boring the context) in the hope +that inlining it would eliminate the binding, and its allocation. +Specifically, in the default case of interesting_call we had + _other -> not is_top && uf_arity > 0 + +But actually postInlineUnconditionally does some of this and overall +it makes virtually no difference to nofib. So I simplified away this +special case Note [Cast then apply] ~~~~~~~~~~~~~~~~~~~~~~ @@ -1386,9 +1464,10 @@ because the latter is strict. s = "foo" f = \x -> ...(error s)... -Fundamentally such contexts should not encourage inlining because the +Fundamentally such contexts should not encourage inlining because, provided +the RHS is "expandable" (see Note [exprIsExpandable] in CoreUtils) the context can ``see'' the unfolding of the variable (e.g. case or a -RULE) so there's no gain. If the thing is bound to a value. +RULE) so there's no gain. However, watch out: @@ -1439,6 +1518,8 @@ This kind of thing can occur if you have foo = let x = e in (x,x) which Roman did. + + -} computeDiscount :: DynFlags -> [Int] -> Int -> [ArgSummary] -> CallCtxt |