summaryrefslogtreecommitdiff
path: root/compiler/coreSyn/CoreUnfold.lhs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/coreSyn/CoreUnfold.lhs')
-rw-r--r--compiler/coreSyn/CoreUnfold.lhs106
1 files changed, 63 insertions, 43 deletions
diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs
index 4f1dee3da3..930041dea4 100644
--- a/compiler/coreSyn/CoreUnfold.lhs
+++ b/compiler/coreSyn/CoreUnfold.lhs
@@ -174,8 +174,7 @@ mkUnfolding src top_lvl is_bottoming expr
uf_guidance = guidance }
where
is_cheap = exprIsCheap expr
- (arity, guidance) = calcUnfoldingGuidance is_cheap
- opt_UF_CreationThreshold expr
+ (arity, guidance) = calcUnfoldingGuidance expr
-- Sometimes during simplification, there's a large let-bound thing
-- which has been substituted, and so is now dead; so 'expr' contains
-- two copies of the thing while the occurrence-analysed expression doesn't
@@ -217,14 +216,13 @@ inlineBoringOk e
go _ _ = boringCxtNotOk
calcUnfoldingGuidance
- :: Bool -- True <=> the rhs is cheap, or we want to treat it
- -- as cheap (INLINE things)
- -> Int -- Bomb out if size gets bigger than this
- -> CoreExpr -- Expression to look at
+ :: CoreExpr -- Expression to look at
-> (Arity, UnfoldingGuidance)
-calcUnfoldingGuidance expr_is_cheap bOMB_OUT_SIZE expr
+calcUnfoldingGuidance expr
= case collectBinders expr of { (bndrs, body) ->
let
+ bOMB_OUT_SIZE = opt_UF_CreationThreshold
+ -- Bomb out if size gets bigger than this
val_bndrs = filter isId bndrs
n_val_bndrs = length val_bndrs
@@ -232,8 +230,7 @@ calcUnfoldingGuidance expr_is_cheap bOMB_OUT_SIZE expr
= case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of
TooBig -> UnfNever
SizeIs size cased_bndrs scrut_discount
- | uncondInline n_val_bndrs (iBox size)
- , expr_is_cheap
+ | uncondInline expr n_val_bndrs (iBox size)
-> UnfWhen unSaturatedOk boringCxtOk -- Note [INLINE for small functions]
| otherwise
-> UnfIfGoodArgs { ug_args = map (discount cased_bndrs) val_bndrs
@@ -278,9 +275,10 @@ Notice that 'x' counts 0, while (f x) counts 2. That's deliberate: there's
a function call to account for. Notice also that constructor applications
are very cheap, because exposing them to a caller is so valuable.
-[25/5/11] All sizes are now multiplied by 10, except for primops.
-This makes primops look cheap, and seems to be almost unversally
-beneficial. Done partly as a result of #4978.
+[25/5/11] All sizes are now multiplied by 10, except for primops
+(which have sizes like 1 or 4. This makes primops look fantastically
+cheap, and seems to be almost unversally beneficial. Done partly as a
+result of #4978.
Note [Do not inline top-level bottoming functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -289,7 +287,6 @@ and similar friends. See Note [Bottoming floats] in SetLevels.
Do not re-inline them! But we *do* still inline if they are very small
(the uncondInline stuff).
-
Note [INLINE for small functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider {-# INLINE f #-}
@@ -302,43 +299,54 @@ inline unconditionally, regardless of how boring the context is.
Things to note:
- * We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
- than the thing it's replacing. Notice that
+(1) We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
+ than the thing it's replacing. Notice that
(f x) --> (g 3) -- YES, unconditionally
(f x) --> x : [] -- YES, *even though* there are two
-- arguments to the cons
x --> g 3 -- NO
x --> Just v -- NO
- It's very important not to unconditionally replace a variable by
- a non-atomic term.
-
-* We do this even if the thing isn't saturated, else we end up with the
- silly situation that
- f x y = x
- ...map (f 3)...
- doesn't inline. Even in a boring context, inlining without being
- saturated will give a lambda instead of a PAP, and will be more
- efficient at runtime.
-
-* However, when the function's arity > 0, we do insist that it
- has at least one value argument at the call site. Otherwise we find this:
- f = /\a \x:a. x
- d = /\b. MkD (f b)
- If we inline f here we get
- d = /\b. MkD (\x:b. x)
- and then prepareRhs floats out the argument, abstracting the type
- variables, so we end up with the original again!
-
+ It's very important not to unconditionally replace a variable by
+ a non-atomic term.
+
+(2) We do this even if the thing isn't saturated, else we end up with the
+ silly situation that
+ f x y = x
+ ...map (f 3)...
+ doesn't inline. Even in a boring context, inlining without being
+ saturated will give a lambda instead of a PAP, and will be more
+ efficient at runtime.
+
+(3) However, when the function's arity > 0, we do insist that it
+ has at least one value argument at the call site. (This check is
+ made in the UnfWhen case of callSiteInline.) Otherwise we find this:
+ f = /\a \x:a. x
+ d = /\b. MkD (f b)
+ If we inline f here we get
+ d = /\b. MkD (\x:b. x)
+ and then prepareRhs floats out the argument, abstracting the type
+ variables, so we end up with the original again!
+
+(4) We must be much more cautious about arity-zero things. Consider
+ let x = y +# z in ...
+ In *size* terms primops look very small, because the generate a
+ single instruction, but we do not want to unconditionally replace
+ every occurrence of x with (y +# z). So we only do the
+ unconditional-inline thing for *trivial* expressions.
+
+ NB: you might think that PostInlineUnconditionally would do this
+ but it doesn't fire for top-level things; see SimplUtils
+ Note [Top level and postInlineUnconditionally]
\begin{code}
-uncondInline :: Arity -> Int -> Bool
+uncondInline :: CoreExpr -> Arity -> Int -> Bool
-- Inline unconditionally if there no size increase
-- Size of call is arity (+1 for the function)
-- See Note [INLINE for small functions]
-uncondInline arity size
- | arity == 0 = size == 0
- | otherwise = size <= 10 * (arity + 1)
+uncondInline rhs arity size
+ | arity > 0 = size <= 10 * (arity + 1) -- See Note [INLINE for small functions] (1)
+ | otherwise = exprIsTrivial rhs -- See Note [INLINE for small functions] (4)
\end{code}
@@ -747,17 +755,28 @@ smallEnoughToInline _
----------------
certainlyWillInline :: Unfolding -> Bool
-- Sees if the unfolding is pretty certain to inline
-certainlyWillInline (CoreUnfolding { uf_is_cheap = is_cheap, uf_arity = n_vals, uf_guidance = guidance })
+certainlyWillInline (CoreUnfolding { uf_arity = n_vals, uf_guidance = guidance })
= case guidance of
UnfNever -> False
UnfWhen {} -> True
UnfIfGoodArgs { ug_size = size}
- -> is_cheap && size - (10 * (n_vals +1)) <= opt_UF_UseThreshold
+ -> n_vals > 0 -- See Note [certainlyWillInline: be caseful of thunks]
+ && size - (10 * (n_vals +1)) <= opt_UF_UseThreshold
certainlyWillInline _
= False
\end{code}
+Note [certainlyWillInline: be caseful of thunks]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Don't claim that thunks will certainly inline, because that risks work
+duplication. Even if the work duplication is not great (eg is_cheap
+holds), it can make a big difference in an inner loop In Trac #5623 we
+found that the WorkWrap phase thought that
+ y = case x of F# v -> F# (v +# v)
+was certainlyWillInline, so the addition got duplicated.
+
+
%************************************************************************
%* *
\subsection{callSiteInline}
@@ -894,7 +913,7 @@ tryUnfolding dflags id lone_variable
UnfWhen unsat_ok boring_ok
-> (enough_args && (boring_ok || some_benefit), empty )
- where -- See Note [INLINE for small functions]
+ where -- See Note [INLINE for small functions (3)]
enough_args = saturated || (unsat_ok && n_val_args > 0)
UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
@@ -1084,7 +1103,8 @@ to be cheap, and that's good because exprIsConApp_maybe doesn't
think that expression is a constructor application.
I used to test is_value rather than is_cheap, which was utterly
-wrong, because the above expression responds True to exprIsHNF.
+wrong, because the above expression responds True to exprIsHNF,
+which is what sets is_value.
This kind of thing can occur if you have