summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Graf <sebastian.graf@kit.edu>2022-10-17 17:35:57 +0200
committerSebastian Graf <sebastian.graf@kit.edu>2022-10-17 18:48:02 +0200
commit129906ad4d5bcbe40d00a82bc2ff721b7f55e558 (patch)
tree4161e2d748c40d44fde83fff2e2765abb3184e3c
parent0d3ed47f183f17589b5219ec0623ecf09e41501d (diff)
downloadhaskell-wip/T22227.tar.gz
Remove Note [Self-recursive tail calls]wip/T22227
We now do loopification in Core, so there should be no letrec left where all recursive calls are in tail position and the code is effectively dead. Hence we remove it.
-rw-r--r--compiler/GHC/Core/Opt/OccurAnal.hs16
-rw-r--r--compiler/GHC/Driver/Config/StgToCmm.hs1
-rw-r--r--compiler/GHC/Driver/Flags.hs2
-rw-r--r--compiler/GHC/Driver/Session.hs3
-rw-r--r--compiler/GHC/StgToCmm/Bind.hs9
-rw-r--r--compiler/GHC/StgToCmm/Closure.hs31
-rw-r--r--compiler/GHC/StgToCmm/Config.hs1
-rw-r--r--compiler/GHC/StgToCmm/Expr.hs110
-rw-r--r--compiler/GHC/StgToCmm/Heap.hs32
-rw-r--r--compiler/GHC/StgToCmm/Monad.hs17
-rw-r--r--docs/users_guide/using-optimisation.rst9
11 files changed, 30 insertions, 201 deletions
diff --git a/compiler/GHC/Core/Opt/OccurAnal.hs b/compiler/GHC/Core/Opt/OccurAnal.hs
index f296bc77ae..b6e58f0df9 100644
--- a/compiler/GHC/Core/Opt/OccurAnal.hs
+++ b/compiler/GHC/Core/Opt/OccurAnal.hs
@@ -809,12 +809,10 @@ So instead it would be good to *loopify* and transform the letrec `next` to
And now the RHS of the non-recursive `next` is quite cheap to duplicate and
since there is only one syntactic occurrence it is quite likely to inline, so we
-get a tight loop. Besides, if it *doesn't* inline, the code we get is no worse.
+get a tight loop.
-(You might think that the loopified form has unconditionally favorable
-operational properties even if the let is not inlined, because a jump is cheaper
-than a full function call. But we have Note [Self-recursive tail calls] that
-makes sure that the pre-loopified code actually has the same performance.)
+Even if `next` *doesn't* inline, the code we get is much better: the closure for
+`next` is smaller and the recursive call sequence can turn into a jump.
We implement this *transformation* in the Occurrence Analyser because it is has
all the necessary information at hand in `occAnalRec` while the Simplifier
@@ -832,6 +830,14 @@ regression tests T13966, T14287, T22227 (#14068 has no reproducer).
SpecConstr doesn't much like the loopified form, though. Hence it implements
Note [Denesting non-recursive let bindings].
+
+Some historic notes: Loopification was first described in "Low-level code
+optimizations in the Glasgow Haskell Compiler" by Krzysztof Woś, though we use
+different approach. Krzysztof performed his optimization at the Cmm level,
+whereas we perform ours at the Core level (already). Historically, we did
+loopification during StgToCmm (d61c3ac) but doing it in Core with join points
+enables subsequent optimisations such as inlining `next` in the example above
+and makes the old implementation superfluous.
-}
diff --git a/compiler/GHC/Driver/Config/StgToCmm.hs b/compiler/GHC/Driver/Config/StgToCmm.hs
index 38e8f6684d..609c41f213 100644
--- a/compiler/GHC/Driver/Config/StgToCmm.hs
+++ b/compiler/GHC/Driver/Config/StgToCmm.hs
@@ -32,7 +32,6 @@ initStgToCmmConfig dflags mod = StgToCmmConfig
, stgToCmmTickyDynThunk = gopt Opt_Ticky_Dyn_Thunk dflags
, stgToCmmTickyTag = gopt Opt_Ticky_Tag dflags
-- flags
- , stgToCmmLoopification = gopt Opt_Loopification dflags
, stgToCmmAlignCheck = gopt Opt_AlignmentSanitisation dflags
, stgToCmmOptHpc = gopt Opt_Hpc dflags
, stgToCmmFastPAPCalls = gopt Opt_FastPAPCalls dflags
diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs
index 83d87b6898..2a0aebd224 100644
--- a/compiler/GHC/Driver/Flags.hs
+++ b/compiler/GHC/Driver/Flags.hs
@@ -274,7 +274,7 @@ data GeneralFlag
| Opt_DictsStrict -- be strict in argument dictionaries
| Opt_DmdTxDictSel -- ^ deprecated, no effect and behaviour is now default.
-- Allowed switching of a special demand transformer for dictionary selectors
- | Opt_Loopification -- See Note [Self-recursive tail calls]
+ | Opt_Loopification -- Deprecated
| Opt_CfgBlocklayout -- ^ Use the cfg based block layout algorithm.
| Opt_WeightlessBlocklayout -- ^ Layout based on last instruction per block.
| Opt_CprAnal
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index cf6a5da5e3..5cc8bc7a39 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -3436,7 +3436,8 @@ fFlagsDeps = [
flagSpec "liberate-case" Opt_LiberateCase,
flagHiddenSpec "llvm-tbaa" Opt_LlvmTBAA,
flagHiddenSpec "llvm-fill-undef-with-garbage" Opt_LlvmFillUndefWithGarbage,
- flagSpec "loopification" Opt_Loopification,
+ depFlagSpec "loopification"
+ Opt_Loopification "loopification now happens as part of Occurrence Analysis",
flagSpec "block-layout-cfg" Opt_CfgBlocklayout,
flagSpec "block-layout-weightless" Opt_WeightlessBlocklayout,
flagSpec "omit-interface-pragmas" Opt_OmitInterfacePragmas,
diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs
index 177c3f2912..1ddc1675ff 100644
--- a/compiler/GHC/StgToCmm/Bind.hs
+++ b/compiler/GHC/StgToCmm/Bind.hs
@@ -41,7 +41,6 @@ import GHC.StgToCmm.Closure
import GHC.StgToCmm.Foreign (emitPrimCall)
import GHC.Cmm.Graph
-import GHC.Cmm.BlockId
import GHC.Cmm
import GHC.Cmm.Info
import GHC.Cmm.Utils
@@ -511,12 +510,6 @@ closureCodeBody top_lvl bndr cl_info cc args@(arg0:_) body fv_details
; platform <- getPlatform
; let node_points = nodeMustPointToIt profile lf_info
node' = if node_points then Just node else Nothing
- ; loop_header_id <- newBlockId
- -- Extend reader monad with information that
- -- self-recursive tail calls can be optimized into local
- -- jumps. See Note [Self-recursive tail calls] in GHC.StgToCmm.Expr.
- ; withSelfLoop (bndr, loop_header_id, arg_regs) $ do
- {
-- Main payload
; entryHeapCheck cl_info node' arity arg_regs $ do
{ -- emit LDV code when profiling
@@ -533,7 +526,7 @@ closureCodeBody top_lvl bndr cl_info cc args@(arg0:_) body fv_details
; when node_points $ load_fvs node lf_info fv_bindings
; checkFunctionArgTags (text "TagCheck failed - Argument to local function:" <> ppr bndr) bndr args
; void $ cgExpr body
- }}}
+ }}
}
diff --git a/compiler/GHC/StgToCmm/Closure.hs b/compiler/GHC/StgToCmm/Closure.hs
index fc76664d94..0b2640bb54 100644
--- a/compiler/GHC/StgToCmm/Closure.hs
+++ b/compiler/GHC/StgToCmm/Closure.hs
@@ -77,7 +77,6 @@ import GHC.Runtime.Heap.Layout
import GHC.Cmm
import GHC.Cmm.Utils
import GHC.StgToCmm.Types
-import GHC.StgToCmm.Sequel
import GHC.Types.CostCentre
import GHC.Cmm.BlockId
@@ -95,7 +94,6 @@ import GHC.Types.Basic
import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Utils.Panic.Plain
-import GHC.Utils.Misc
import Data.Coerce (coerce)
import qualified Data.ByteString.Char8 as BS8
@@ -511,22 +509,9 @@ getCallMethod :: StgToCmmConfig
-- tail calls using the same data constructor,
-- JumpToIt. This saves us one case branch in
-- cgIdApp
- -> Maybe SelfLoopInfo -- can we perform a self-recursive tail-call
-> CallMethod
-getCallMethod cfg _ id _ n_args v_args _cg_loc (Just (self_loop_id, block_id, args))
- | stgToCmmLoopification cfg
- , id == self_loop_id
- , args `lengthIs` (n_args - v_args)
- -- If these patterns match then we know that:
- -- * loopification optimisation is turned on
- -- * function is performing a self-recursive call in a tail position
- -- * number of non-void parameters of the function matches functions arity.
- -- See Note [Self-recursive tail calls] and Note [Void arguments in
- -- self-recursive tail calls] in GHC.StgToCmm.Expr for more details
- = JumpToIt block_id args
-
-getCallMethod cfg name id (LFReEntrant _ arity _ _) n_args _v_args _cg_loc _self_loop_info
+getCallMethod cfg name id (LFReEntrant _ arity _ _) n_args _v_args _cg_loc
| n_args == 0 -- No args at all
&& not (profileIsProfiling (stgToCmmProfile cfg))
-- See Note [Evaluating functions with profiling] in rts/Apply.cmm
@@ -534,16 +519,16 @@ getCallMethod cfg name id (LFReEntrant _ arity _ _) n_args _v_args _cg_loc _self
| n_args < arity = SlowCall -- Not enough args
| otherwise = DirectEntry (enterIdLabel (stgToCmmPlatform cfg) name (idCafInfo id)) arity
-getCallMethod _ _name _ LFUnlifted n_args _v_args _cg_loc _self_loop_info
+getCallMethod _ _name _ LFUnlifted n_args _v_args _cg_loc
= assert (n_args == 0) ReturnIt
-getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc _self_loop_info
+getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc
= assert (n_args == 0) ReturnIt
-- n_args=0 because it'd be ill-typed to apply a saturated
-- constructor application to anything
getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun)
- n_args _v_args _cg_loc _self_loop_info
+ n_args _v_args _cg_loc
| Just sig <- idTagSig_maybe id
, isTaggedSig sig -- Infered to be already evaluated by Tag Inference
@@ -581,7 +566,7 @@ getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun)
updatable) 0
-- Imported(Unknown) Ids
-getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _v_args _cg_locs _self_loop_info
+getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _v_args _cg_locs
| n_args == 0
, Just sig <- idTagSig_maybe id
, isTaggedSig sig -- Infered to be already evaluated by Tag Inference
@@ -598,14 +583,14 @@ getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _v_args _cg_loc
EnterIt -- Not a function
-- TODO: Redundant with above match?
--- getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info
+-- getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc
-- = assertPpr (n_args == 0) (ppr name <+> ppr n_args)
-- EnterIt -- Not a function
-getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs) _self_loop_info
+getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs)
= JumpToIt blk_id lne_regs
-getCallMethod _ _ _ _ _ _ _ _ = panic "Unknown call method"
+getCallMethod _ _ _ _ _ _ _ = panic "Unknown call method"
-----------------------------------------------------------------------------
-- Data types for closure information
diff --git a/compiler/GHC/StgToCmm/Config.hs b/compiler/GHC/StgToCmm/Config.hs
index f2bd349ae7..e5a91b0a7f 100644
--- a/compiler/GHC/StgToCmm/Config.hs
+++ b/compiler/GHC/StgToCmm/Config.hs
@@ -43,7 +43,6 @@ data StgToCmmConfig = StgToCmmConfig
-- dynamic thunks
, stgToCmmTickyTag :: !Bool -- ^ True indicates ticky will count number of avoided tag checks by tag inference.
---------------------------------- Flags --------------------------------------
- , stgToCmmLoopification :: !Bool -- ^ Loopification enabled (cf @-floopification@)
, stgToCmmAlignCheck :: !Bool -- ^ Insert alignment check (cf @-falignment-sanitisation@)
, stgToCmmOptHpc :: !Bool -- ^ perform code generation for code coverage
, stgToCmmFastPAPCalls :: !Bool -- ^
diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs
index fcf91b4509..a7b9677c8e 100644
--- a/compiler/GHC/StgToCmm/Expr.hs
+++ b/compiler/GHC/StgToCmm/Expr.hs
@@ -996,7 +996,6 @@ cgIdApp fun_id args = do
platform <- getPlatform
fun_info <- getCgIdInfo fun_id
cfg <- getStgToCmmConfig
- self_loop <- getSelfLoop
let profile = stgToCmmProfile cfg
fun_arg = StgVarArg fun_id
fun_name = idName fun_id
@@ -1004,7 +1003,7 @@ cgIdApp fun_id args = do
lf_info = cg_lf fun_info
n_args = length args
v_args = length $ filter (isZeroBitTy . stgArgType) args
- case getCallMethod cfg fun_name fun_id lf_info n_args v_args (cg_loc fun_info) self_loop of
+ case getCallMethod cfg fun_name fun_id lf_info n_args v_args (cg_loc fun_info) of
-- A value in WHNF, so we can just return it.
ReturnIt
| isZeroBitTy (idType fun_id) -> emitReturn []
@@ -1052,113 +1051,6 @@ cgIdApp fun_id args = do
; emit (mkBranch blk_id)
; return AssignedDirectly }
--- Note [Self-recursive tail calls]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- Self-recursive tail calls can be optimized into a local jump in the same
--- way as let-no-escape bindings (see Note [What is a non-escaping let] in
--- "GHC.CoreToStg"). Consider this:
---
--- foo.info:
--- a = R1 // calling convention
--- b = R2
--- goto L1
--- L1: ...
--- ...
--- ...
--- L2: R1 = x
--- R2 = y
--- call foo(R1,R2)
---
--- Instead of putting x and y into registers (or other locations required by the
--- calling convention) and performing a call we can put them into local
--- variables a and b and perform jump to L1:
---
--- foo.info:
--- a = R1
--- b = R2
--- goto L1
--- L1: ...
--- ...
--- ...
--- L2: a = x
--- b = y
--- goto L1
---
--- This can be done only when function is calling itself in a tail position
--- and only if the call passes number of parameters equal to function's arity.
--- Note that this cannot be performed if a function calls itself with a
--- continuation.
---
--- This in fact implements optimization known as "loopification". It was
--- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
--- by Krzysztof Woś, though we use different approach. Krzysztof performed his
--- optimization at the Cmm level, whereas we perform ours during code generation
--- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
--- generated in the first place.
---
--- Implementation is spread across a couple of places in the code:
---
--- * FCode monad stores additional information in its reader environment
--- (stgToCmmSelfLoop field). This information tells us which function can
--- tail call itself in an optimized way (it is the function currently
--- being compiled), what is the label of a loop header (L1 in example above)
--- and information about local registers in which we should arguments
--- before making a call (this would be a and b in example above).
---
--- * Whenever we are compiling a function, we set that information to reflect
--- the fact that function currently being compiled can be jumped to, instead
--- of called. This is done in closureCodyBody in GHC.StgToCmm.Bind.
---
--- * We also have to emit a label to which we will be jumping. We make sure
--- that the label is placed after a stack check but before the heap
--- check. The reason is that making a recursive tail-call does not increase
--- the stack so we only need to check once. But it may grow the heap, so we
--- have to repeat the heap check in every self-call. This is done in
--- do_checks in GHC.StgToCmm.Heap.
---
--- * When we begin compilation of another closure we remove the additional
--- information from the environment. This is done by forkClosureBody
--- in GHC.StgToCmm.Monad. Other functions that duplicate the environment -
--- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
--- words, we only need to clean the environment of the self-loop information
--- when compiling right hand side of a closure (binding).
---
--- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
--- of call will be generated. getCallMethod decides to generate a self
--- recursive tail call when (a) environment stores information about
--- possible self tail-call; (b) that tail call is to a function currently
--- being compiled; (c) number of passed non-void arguments is equal to
--- function's arity. (d) loopification is turned on via -floopification
--- command-line option.
---
--- * Command line option to turn loopification on and off is implemented in
--- DynFlags, then passed to StgToCmmConfig for this phase.
---
---
--- Note [Void arguments in self-recursive tail calls]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- State# tokens can get in the way of the loopification optimization as seen in
--- #11372. Consider this:
---
--- foo :: [a]
--- -> (a -> State# s -> (# State s, Bool #))
--- -> State# s
--- -> (# State# s, Maybe a #)
--- foo [] f s = (# s, Nothing #)
--- foo (x:xs) f s = case f x s of
--- (# s', b #) -> case b of
--- True -> (# s', Just x #)
--- False -> foo xs f s'
---
--- We would like to compile the call to foo as a local jump instead of a call
--- (see Note [Self-recursive tail calls]). However, the generated function has
--- an arity of 2 while we apply it to 3 arguments, one of them being of void
--- type. Thus, we mustn't count arguments of void type when checking whether
--- we can turn a call into a self-recursive jump.
---
-
emitEnter :: CmmExpr -> FCode ReturnKind
emitEnter fun = do
{ platform <- getPlatform
diff --git a/compiler/GHC/StgToCmm/Heap.hs b/compiler/GHC/StgToCmm/Heap.hs
index a7e7f23e9d..88d48c1897 100644
--- a/compiler/GHC/StgToCmm/Heap.hs
+++ b/compiler/GHC/StgToCmm/Heap.hs
@@ -635,15 +635,6 @@ do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
Just stk_hwm -> tickyStackCheck
>> (emit =<< mkCmmIfGoto' (sp_oflo stk_hwm) gc_id (Just False) )
- -- Emit new label that might potentially be a header
- -- of a self-recursive tail call.
- -- See Note [Self-recursive loop header].
- self_loop_info <- getSelfLoop
- case self_loop_info of
- Just (_, loop_header_id, _)
- | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
- _otherwise -> return ()
-
if isJust mb_alloc_lit
then do
tickyHeapCheck
@@ -667,26 +658,3 @@ do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
-- stack check succeeds. Otherwise we might end up
-- with slop at the end of the current block, which can
-- confuse the LDV profiler.
-
--- Note [Self-recursive loop header]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- Self-recursive loop header is required by loopification optimization (See
--- Note [Self-recursive tail calls] in GHC.StgToCmm.Expr). We emit it if:
---
--- 1. There is information about self-loop in the FCode environment. We don't
--- check the binder (first component of the self_loop_info) because we are
--- certain that if the self-loop info is present then we are compiling the
--- binder body. Reason: the only possible way to get here with the
--- self_loop_info present is from closureCodeBody.
---
--- 2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
--- to preempt the heap check (see #367 for motivation behind this check). It
--- is True for heap checks placed at the entry to a function and
--- let-no-escape heap checks but false for other heap checks (eg. in case
--- alternatives or created from hand-written high-level Cmm). The second
--- check (isJust mb_stk_hwm) is true for heap checks at the entry to a
--- function and some heap checks created in hand-written Cmm. Otherwise it
--- is Nothing. In other words the only situation when both conditions are
--- true is when compiling stack and heap checks at the entry to a
--- function. This is the only situation when we want to emit a self-loop
--- label.
diff --git a/compiler/GHC/StgToCmm/Monad.hs b/compiler/GHC/StgToCmm/Monad.hs
index 9f9d292937..50043bc01f 100644
--- a/compiler/GHC/StgToCmm/Monad.hs
+++ b/compiler/GHC/StgToCmm/Monad.hs
@@ -54,7 +54,7 @@ module GHC.StgToCmm.Monad (
getModuleName,
-- ideally we wouldn't export these, but some other modules access internal state
- getState, setState, getSelfLoop, withSelfLoop, getStgToCmmConfig,
+ getState, setState, getStgToCmmConfig,
-- more localised access to monad state
CgIdInfo(..),
@@ -296,8 +296,6 @@ data FCodeState =
-- else the RTS will deadlock _and_ also experience a severe
-- performance degradation
, fcs_sequel :: !Sequel -- ^ What to do at end of basic block
- , fcs_selfloop :: Maybe SelfLoopInfo -- ^ Which tail calls can be compiled as local jumps?
- -- See Note [Self-recursive tail calls] in GHC.StgToCmm.Expr
, fcs_ticky :: !CLabel -- ^ Destination for ticky counts
, fcs_tickscope :: !CmmTickScope -- ^ Tick scope for new blocks & ticks
}
@@ -455,7 +453,6 @@ initFCodeState :: Platform -> FCodeState
initFCodeState p =
MkFCodeState { fcs_upframeoffset = platformWordSizeInBytes p
, fcs_sequel = Return
- , fcs_selfloop = Nothing
, fcs_ticky = mkTopTickyCtrLabel
, fcs_tickscope = GlobalScope
}
@@ -467,22 +464,13 @@ getFCodeState = FCode $ \_ fstate state -> (fstate,state)
withFCodeState :: FCode a -> FCodeState -> FCode a
withFCodeState (FCode fcode) fst = FCode $ \cfg _ state -> fcode cfg fst state
-getSelfLoop :: FCode (Maybe SelfLoopInfo)
-getSelfLoop = fcs_selfloop <$> getFCodeState
-
-withSelfLoop :: SelfLoopInfo -> FCode a -> FCode a
-withSelfLoop self_loop code = do
- fstate <- getFCodeState
- withFCodeState code (fstate {fcs_selfloop = Just self_loop})
-
-- ----------------------------------------------------------------------------
-- Get/set the end-of-block info
withSequel :: Sequel -> FCode a -> FCode a
withSequel sequel code
= do { fstate <- getFCodeState
- ; withFCodeState code (fstate { fcs_sequel = sequel
- , fcs_selfloop = Nothing }) }
+ ; withFCodeState code (fstate { fcs_sequel = sequel }) }
getSequel :: FCode Sequel
getSequel = fcs_sequel <$> getFCodeState
@@ -578,7 +566,6 @@ forkClosureBody body_code
; state <- getState
; let fcs = fstate { fcs_sequel = Return
, fcs_upframeoffset = platformWordSizeInBytes platform
- , fcs_selfloop = Nothing
}
fork_state_in = (initCgState us) { cgs_binds = cgs_binds state }
((),fork_state_out) = doFCode body_code cfg fcs fork_state_in
diff --git a/docs/users_guide/using-optimisation.rst b/docs/users_guide/using-optimisation.rst
index d3ca68a1df..817fd2ca3f 100644
--- a/docs/users_guide/using-optimisation.rst
+++ b/docs/users_guide/using-optimisation.rst
@@ -649,17 +649,16 @@ by saying ``-fno-wombat``.
Set the size threshold for the liberate-case transformation.
.. ghc-flag:: -floopification
- :shortdesc: Turn saturated self-recursive tail-calls into local jumps in the
- generated assembly. Implied by :ghc-flag:`-O`.
+ :shortdesc: *(deprecated)* Does nothing
:type: dynamic
:reverse: -fno-loopification
:category:
:default: on
- When this optimisation is enabled the code generator will turn all
- self-recursive saturated tail calls into local jumps rather than
- function calls.
+ This flag has no effect since GHC 9.6 - its behavior is always on.
+ It used to instruct the code generator to turn all self-recursive saturated
+ tail calls into local jumps rather than function calls.
.. ghc-flag:: -fllvm-pass-vectors-in-regs
:shortdesc: *(deprecated)* Does nothing