This patch addresses the exponential blow-up in the simplifier.wip/T13253

Specifically: #13253 exponential inlining #10421 ditto #18140 strict constructors #18282 another nested-function call case This patch makes one really significant changes: change the way that mkDupableCont handles StrictArg. The details are explained in GHC.Core.Opt.Simplify Note [Duplicating StrictArg]. Specific changes * In mkDupableCont, when making auxiliary bindings for the other arguments of a call, add extra plumbing so that we don't forget the demand on them. Otherwise we haev to wait for another round of strictness analysis. But actually all the info is to hand. This change affects: - Make the strictness list in ArgInfo be [Demand] instead of [Bool], and rename it to ai_dmds. - Add as_dmd to ValArg - Simplify.makeTrivial takes a Demand - mkDupableContWithDmds takes a [Demand] There are a number of other small changes 1. For Ids that are used at most once in each branch of a case, make the occurrence analyser record the total number of syntactic occurrences. Previously we recorded just OneBranch or MultipleBranches. I thought this was going to be useful, but I ended up barely using it; see Note [Note [Suppress exponential blowup] in GHC.Core.Opt.Simplify.Utils Actual changes: * See the occ_n_br field of OneOcc. * postInlineUnconditionally 2. I found a small perf buglet in SetLevels; see the new function GHC.Core.Opt.SetLevels.hasFreeJoin 3. Remove the sc_cci field of StrictArg. I found I could get its information from the sc_fun field instead. Less to get wrong! 4. In ArgInfo, arrange that ai_dmds and ai_discs have a simpler invariant: they line up with the value arguments beyond ai_args This allowed a bit of nice refactoring; see isStrictArgInfo, lazyArgcontext, strictArgContext There is virtually no difference in nofib. (The runtime numbers are bogus -- I tried a few manually.) Program Size Allocs Runtime Elapsed TotalMem -------------------------------------------------------------------------------- fft +0.0% -2.0% -48.3% -49.4% 0.0% multiplier +0.0% -2.2% -50.3% -50.9% 0.0% -------------------------------------------------------------------------------- Min -0.4% -2.2% -59.2% -60.4% 0.0% Max +0.0% +0.1% +3.3% +4.9% 0.0% Geometric Mean +0.0% -0.0% -33.2% -34.3% -0.0% Test T18282 is an existing example of these deeply-nested strict calls. We get a big decrease in compile time (-85%) because so much less inlining takes place. Metric Decrease: T18282
author: Simon Peyton Jones <simonpj@microsoft.com> 2020-06-11 15:45:53 +0100
committer: Simon Peyton Jones <simonpj@microsoft.com> 2020-07-27 11:41:45 +0100
commit: 3f65f1e043d9f603dd729026e09bcea7873af1bb (patch)
tree: a0d703526f7378c71ffda4e90ed7286a5faf8fdf
parent: dff1cb3d9c111808fec60190747272b973547c52 (diff)
download: haskell-wip/T13253.tar.gz
33 files changed, 848 insertions, 277 deletions
diff --git a/compiler/GHC/Core/Coercion.hs b/compiler/GHC/Core/Coercion.hs
index c749ed0280..3937c0ce3e 100644
--- a/compiler/GHC/Core/Coercion.hs
+++ b/compiler/GHC/Core/Coercion.hs
@@ -2866,7 +2866,7 @@ simplifyArgsWorker orig_ki_binders orig_inner_ki orig_fvs
          -- The bangs here have been observed to improve performance
          -- significantly in optimized builds.
          let kind_co = mkSymCo $
-               liftCoSubst Nominal lc (tyCoBinderType binder)
+                       liftCoSubst Nominal lc (tyCoBinderType binder)
              !casted_xi = xi `mkCastTy` kind_co
              casted_co =  mkCoherenceLeftCo role xi kind_co co
 
diff --git a/compiler/GHC/Core/Opt/OccurAnal.hs b/compiler/GHC/Core/Opt/OccurAnal.hs
index 500c2bdab6..12ffcbb587 100644
--- a/compiler/GHC/Core/Opt/OccurAnal.hs
+++ b/compiler/GHC/Core/Opt/OccurAnal.hs
@@ -832,7 +832,7 @@ occAnalNonRecBind env lvl imp_rule_edges bndr rhs body_usage
 
     certainly_inline -- See Note [Cascading inlines]
       = case occ of
-          OneOcc { occ_in_lam = NotInsideLam, occ_one_br = InOneBranch }
+          OneOcc { occ_in_lam = NotInsideLam, occ_n_br = 1 }
             -> active && not_stable
           _ -> False
 
@@ -2563,7 +2563,7 @@ mkOneOcc id int_cxt arity
   = emptyDetails
   where
     occ_info = OneOcc { occ_in_lam  = NotInsideLam
-                      , occ_one_br  = InOneBranch
+                      , occ_n_br    = oneBranch
                       , occ_int_cxt = int_cxt
                       , occ_tail    = AlwaysTailCalled arity }
 
@@ -2967,11 +2967,15 @@ addOccInfo a1 a2  = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
 -- (orOccInfo orig new) is used
 -- when combining occurrence info from branches of a case
 
-orOccInfo (OneOcc { occ_in_lam = in_lam1, occ_int_cxt = int_cxt1
-                  , occ_tail   = tail1 })
-          (OneOcc { occ_in_lam = in_lam2, occ_int_cxt = int_cxt2
-                  , occ_tail   = tail2 })
-  = OneOcc { occ_one_br  = MultipleBranches -- because it occurs in both branches
+orOccInfo (OneOcc { occ_in_lam  = in_lam1
+                  , occ_n_br    = nbr1
+                  , occ_int_cxt = int_cxt1
+                  , occ_tail    = tail1 })
+          (OneOcc { occ_in_lam  = in_lam2
+                  , occ_n_br    = nbr2
+                  , occ_int_cxt = int_cxt2
+                  , occ_tail    = tail2 })
+  = OneOcc { occ_n_br    = nbr1 + nbr2
            , occ_in_lam  = in_lam1 `mappend` in_lam2
            , occ_int_cxt = int_cxt1 `mappend` int_cxt2
            , occ_tail    = tail1 `andTailCallInfo` tail2 }
diff --git a/compiler/GHC/Core/Opt/SetLevels.hs b/compiler/GHC/Core/Opt/SetLevels.hs
index 91e9f6ec34..efcf96e6df 100644
--- a/compiler/GHC/Core/Opt/SetLevels.hs
+++ b/compiler/GHC/Core/Opt/SetLevels.hs
@@ -658,8 +658,8 @@ lvlMFE env strict_ctxt e@(_, AnnCase {})
 lvlMFE env strict_ctxt ann_expr
   |  floatTopLvlOnly env && not (isTopLvl dest_lvl)
          -- Only floating to the top level is allowed.
-  || anyDVarSet isJoinId fvs   -- If there is a free join, don't float
-                               -- See Note [Free join points]
+  || hasFreeJoin env fvs   -- If there is a free join, don't float
+                           -- See Note [Free join points]
   || isExprLevPoly expr
          -- We can't let-bind levity polymorphic expressions
          -- See Note [Levity polymorphism invariants] in GHC.Core
@@ -755,6 +755,14 @@ lvlMFE env strict_ctxt ann_expr
                 && floatConsts env
                 && (not strict_ctxt || is_bot || exprIsHNF expr)
 
+hasFreeJoin :: LevelEnv -> DVarSet -> Bool
+-- Has a free join point which is not being floated to top level.
+-- (In the latter case it won't be a join point any more.)
+-- Not treating top-level ones specially had a massive effect
+-- on nofib/minimax/Prog.prog
+hasFreeJoin env fvs
+  = not (maxFvLevel isJoinId env fvs == tOP_LEVEL)
+
 isBottomThunk :: Maybe (Arity, s) -> Bool
 -- See Note [Bottoming floats] (2)
 isBottomThunk (Just (0, _)) = True   -- Zero arity
diff --git a/compiler/GHC/Core/Opt/Simplify.hs b/compiler/GHC/Core/Opt/Simplify.hs
index abfad1940f..e7fc0fbced 100644
--- a/compiler/GHC/Core/Opt/Simplify.hs
+++ b/compiler/GHC/Core/Opt/Simplify.hs
@@ -6,7 +6,7 @@
 
 {-# LANGUAGE CPP #-}
 
-{-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
+{-# OPTIONS_GHC -Wno-incomplete-record-updates -Wno-incomplete-uni-patterns #-}
 module GHC.Core.Opt.Simplify ( simplTopBinds, simplExpr, simplRules ) where
 
 #include "HsVersions.h"
@@ -39,8 +39,8 @@ import GHC.Core.Opt.Monad ( Tick(..), SimplMode(..) )
 import GHC.Core
 import GHC.Builtin.Types.Prim( realWorldStatePrimTy )
 import GHC.Builtin.Names( runRWKey )
-import GHC.Types.Demand ( StrictSig(..), dmdTypeDepth, isStrictDmd
-                        , mkClosedStrictSig, topDmd, botDiv )
+import GHC.Types.Demand ( StrictSig(..), Demand, dmdTypeDepth, isStrictDmd
+                        , mkClosedStrictSig, topDmd, seqDmd, botDiv )
 import GHC.Types.Cpr    ( mkCprSig, botCpr )
 import GHC.Core.Ppr     ( pprCoreExpr )
 import GHC.Types.Unique ( hasKey )
@@ -598,7 +598,7 @@ prepareRhs mode top_lvl occ rhs0
         = do { (is_exp, floats1, fun') <- go (n_val_args+1) fun
              ; case is_exp of
                 False -> return (False, emptyLetFloats, App fun arg)
-                True  -> do { (floats2, arg') <- makeTrivial mode top_lvl occ arg
+                True  -> do { (floats2, arg') <- makeTrivial mode top_lvl topDmd occ arg
                             ; return (True, floats1 `addLetFlts` floats2, App fun' arg') } }
     go n_val_args (Var fun)
         = return (is_exp, emptyLetFloats, Var fun)
@@ -628,32 +628,34 @@ prepareRhs mode top_lvl occ rhs0
         = return (False, emptyLetFloats, other)
 
 makeTrivialArg :: SimplMode -> ArgSpec -> SimplM (LetFloats, ArgSpec)
-makeTrivialArg mode arg@(ValArg { as_arg = e })
-  = do { (floats, e') <- makeTrivial mode NotTopLevel (fsLit "arg") e
+makeTrivialArg mode arg@(ValArg { as_arg = e, as_dmd = dmd })
+  = do { (floats, e') <- makeTrivial mode NotTopLevel dmd (fsLit "arg") e
        ; return (floats, arg { as_arg = e' }) }
 makeTrivialArg _ arg
   = return (emptyLetFloats, arg)  -- CastBy, TyArg
 
-makeTrivial :: SimplMode -> TopLevelFlag
+makeTrivial :: SimplMode -> TopLevelFlag -> Demand
             -> FastString  -- ^ A "friendly name" to build the new binder from
             -> OutExpr     -- ^ This expression satisfies the let/app invariant
             -> SimplM (LetFloats, OutExpr)
 -- Binds the expression to a variable, if it's not trivial, returning the variable
-makeTrivial mode top_lvl occ_fs expr
+-- For the Demand argument, see Note [Keeping demand info in StrictArg Plan A]
+makeTrivial mode top_lvl dmd occ_fs expr
   | exprIsTrivial expr                          -- Already trivial
   || not (bindingOk top_lvl expr expr_ty)       -- Cannot trivialise
                                                 --   See Note [Cannot trivialise]
   = return (emptyLetFloats, expr)
 
   | Cast expr' co <- expr
-  = do { (floats, triv_expr) <- makeTrivial mode top_lvl occ_fs expr'
+  = do { (floats, triv_expr) <- makeTrivial mode top_lvl dmd occ_fs expr'
        ; return (floats, Cast triv_expr co) }
 
   | otherwise
   = do { (floats, new_id) <- makeTrivialBinding mode top_lvl occ_fs
-                                                vanillaIdInfo expr expr_ty
+                                                id_info expr expr_ty
        ; return (floats, Var new_id) }
   where
+    id_info = vanillaIdInfo `setDemandInfo` dmd
     expr_ty = exprType expr
 
 makeTrivialBinding :: SimplMode -> TopLevelFlag
@@ -1010,13 +1012,17 @@ simplExprF1 env (App fun arg) cont
           -- (instead of one-at-a-time). But in practice, we have not
           -- observed the quadratic behavior, so this extra entanglement
           -- seems not worthwhile.
+          --
+          -- But the (exprType fun) is repeated, to push it into two
+          -- separate, rarely used, thunks; rather than always alloating
+          -- a shared thunk.  Makes a small efficiency difference
         let fun_ty = exprType fun
             (m, _, _) = splitFunTy fun_ty
         in
-                simplExprF env fun $
-                 ApplyToVal { sc_arg = arg, sc_env = env
-                            , sc_hole_ty = substTy env (exprType fun)
-                            , sc_dup = NoDup, sc_cont = cont, sc_mult = m }
+        simplExprF env fun $
+        ApplyToVal { sc_arg = arg, sc_env = env
+                   , sc_hole_ty = substTy env (exprType fun)
+                   , sc_dup = NoDup, sc_cont = cont, sc_mult = m }
 
 simplExprF1 env expr@(Lam {}) cont
   = {-#SCC "simplExprF1-Lam" #-}
@@ -1567,7 +1573,7 @@ simplNonRecE env bndr (rhs, rhs_se) (bndrs, body) cont
          simplLam env' bndrs body cont }
 
   -- Deal with strict bindings
-  | isStrictId bndr          -- Includes coercions
+  | isStrictId bndr          -- Includes coercions, and unlifted types
   , sm_case_case (getMode env)
   = simplExprF (rhs_se `setInScopeFromE` env) rhs
                (StrictBind { sc_bndr = bndr, sc_bndrs = bndrs, sc_body = body
@@ -1924,7 +1930,7 @@ rebuildCall :: SimplEnv
 --    - and rebuild
 
 ---------- Bottoming applications --------------
-rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args, ai_strs = [] }) cont
+rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args, ai_dmds = [] }) cont
   -- When we run out of strictness args, it means
   -- that the call is definitely bottom; see GHC.Core.Opt.Simplify.Utils.mkArgInfo
   -- Then we want to discard the entire strict continuation.  E.g.
@@ -1974,9 +1980,9 @@ rebuildCall env info (ApplyToTy { sc_arg_ty = arg_ty, sc_hole_ty = hole_ty, sc_c
 ---------- The runRW# rule. Do this after absorbing all arguments ------
 -- runRW# :: forall (r :: RuntimeRep) (o :: TYPE r). (State# RealWorld -> o) -> o
 -- K[ runRW# rr ty body ]  -->  runRW rr' ty' (\s. K[ body s ])
-rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args })
+rebuildCall env (ArgInfo { ai_fun = fun_id, ai_args = rev_args })
             (ApplyToVal { sc_arg = arg, sc_env = arg_se, sc_cont = cont, sc_mult = m })
-  | fun `hasKey` runRWKey
+  | fun_id `hasKey` runRWKey
   , not (contIsStop cont)  -- Don't fiddle around if the continuation is boring
   , [ TyArg {}, TyArg {} ] <- rev_args
   = do { s <- newId (fsLit "s") Many realWorldStatePrimTy
@@ -1990,25 +1996,24 @@ rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args })
        ; body' <- simplExprC env' arg cont'
        ; let arg'  = Lam s body'
              rr'   = getRuntimeRep ty'
-             call' = mkApps (Var fun) [mkTyArg rr', mkTyArg ty', arg']
+             call' = mkApps (Var fun_id) [mkTyArg rr', mkTyArg ty', arg']
        ; return (emptyFloats env, call') }
 
-rebuildCall env info@(ArgInfo { ai_encl = encl_rules
-                              , ai_strs = str:strs, ai_discs = disc:discs })
+rebuildCall env fun_info
             (ApplyToVal { sc_arg = arg, sc_env = arg_se
                         , sc_dup = dup_flag, sc_hole_ty = fun_ty
                         , sc_cont = cont, sc_mult = m })
   -- Argument is already simplified
   | isSimplified dup_flag     -- See Note [Avoid redundant simplification]
-  = rebuildCall env (addValArgTo info' (m, arg) fun_ty) cont
+  = rebuildCall env (addValArgTo fun_info (m, arg) fun_ty) cont
 
   -- Strict arguments
-  | str
+  | isStrictArgInfo fun_info
   , sm_case_case (getMode env)
   = -- pprTrace "Strict Arg" (ppr arg $$ ppr (seIdSubst env) $$ ppr (seInScope env)) $
     simplExprF (arg_se `setInScopeFromE` env) arg
-               (StrictArg { sc_fun = info', sc_cci = cci_strict
-                          , sc_dup = Simplified, sc_fun_ty = fun_ty
+               (StrictArg { sc_fun = fun_info, sc_fun_ty = fun_ty
+                          , sc_dup = Simplified
                           , sc_cont = cont, sc_mult = m })
                 -- Note [Shadowing]
 
@@ -2019,27 +2024,11 @@ rebuildCall env info@(ArgInfo { ai_encl = encl_rules
         -- have to be very careful about bogus strictness through
         -- floating a demanded let.
   = do  { arg' <- simplExprC (arg_se `setInScopeFromE` env) arg
-                             (mkLazyArgStop arg_ty cci_lazy)
-        ; rebuildCall env (addValArgTo info' (m, arg') fun_ty) cont }
+                             (mkLazyArgStop arg_ty (lazyArgContext fun_info))
+        ; rebuildCall env (addValArgTo fun_info (m, arg') fun_ty) cont }
   where
-    info'  = info { ai_strs = strs, ai_discs = discs }
     arg_ty = funArgTy fun_ty
 
-    -- Use this for lazy arguments
-    cci_lazy | encl_rules = RuleArgCtxt
-             | disc > 0   = DiscArgCtxt  -- Be keener here
-             | otherwise  = BoringCtxt   -- Nothing interesting
-
-    -- ..and this for strict arguments
-    cci_strict | encl_rules = RuleArgCtxt
-               | disc > 0   = DiscArgCtxt
-               | otherwise  = RhsCtxt
-      -- Why RhsCtxt?  if we see f (g x) (h x), and f is strict, we
-      -- want to be a bit more eager to inline g, because it may
-      -- expose an eval (on x perhaps) that can be eliminated or
-      -- shared. I saw this in nofib 'boyer2', RewriteFuns.onewayunify1
-      -- It's worth an 18% improvement in allocation for this
-      -- particular benchmark; 5% on 'mate' and 1.3% on 'multiplier'
 
 ---------- No further useful info, revert to generic rebuild ------------
 rebuildCall env (ArgInfo { ai_fun = fun, ai_args = rev_args }) cont
@@ -2243,6 +2232,7 @@ trySeqRules in_env scrut rhs cont
                 , TyArg { as_arg_ty  = rhs_ty
                         , as_hole_ty = res2_ty }
                 , ValArg { as_arg = no_cast_scrut
+                         , as_dmd = seqDmd
                          , as_hole_ty = res3_ty
                          , as_mult = Many } ]
                 -- The multiplicity of the scrutiny above is Many because the type
@@ -3268,31 +3258,41 @@ altsWouldDup (alt:alts)
     is_bot_alt (_,_,rhs) = exprIsDeadEnd rhs
 
 -------------------------
-mkDupableCont :: SimplEnv -> SimplCont
+mkDupableCont :: SimplEnv
+              -> SimplCont
               -> SimplM ( SimplFloats  -- Incoming SimplEnv augmented with
                                        --   extra let/join-floats and in-scope variables
                         , SimplCont)   -- dup_cont: duplicable continuation
-
 mkDupableCont env cont
+  = mkDupableContWithDmds env (repeat topDmd) cont
+
+mkDupableContWithDmds
+   :: SimplEnv  -> [Demand]  -- Demands on arguments; always infinite
+   -> SimplCont -> SimplM ( SimplFloats, SimplCont)
+
+mkDupableContWithDmds env _ cont
   | contIsDupable cont
   = return (emptyFloats env, cont)
 
-mkDupableCont _ (Stop {}) = panic "mkDupableCont"     -- Handled by previous eqn
+mkDupableContWithDmds _ _ (Stop {}) = panic "mkDupableCont"     -- Handled by previous eqn
 
-mkDupableCont env (CastIt ty cont)
-  = do  { (floats, cont') <- mkDupableCont env cont
+mkDupableContWithDmds env dmds (CastIt ty cont)
+  = do  { (floats, cont') <- mkDupableContWithDmds env dmds cont
         ; return (floats, CastIt ty cont') }
 
 -- Duplicating ticks for now, not sure if this is good or not
-mkDupableCont env (TickIt t cont)
-  = do  { (floats, cont') <- mkDupableCont env cont
+mkDupableContWithDmds env dmds (TickIt t cont)
+  = do  { (floats, cont') <- mkDupableContWithDmds env dmds cont
         ; return (floats, TickIt t cont') }
 
-mkDupableCont env (StrictBind { sc_bndr = bndr, sc_bndrs = bndrs
-                              , sc_body = body, sc_env = se, sc_cont = cont})
-  -- See Note [Duplicating StrictBind]
+mkDupableContWithDmds env _
+     (StrictBind { sc_bndr = bndr, sc_bndrs = bndrs
+                 , sc_body = body, sc_env = se, sc_cont = cont})
+-- See Note [Duplicating StrictBind]
+-- K[ let x = <> in b ]  -->   join j x = K[ b ]
+--                             j <>
   = do { let sb_env = se `setInScopeFromE` env
-       ; (sb_env1, bndr') <- simplBinder sb_env bndr
+       ; (sb_env1, bndr')      <- simplBinder sb_env bndr
        ; (floats1, join_inner) <- simplLam sb_env1 bndrs body cont
           -- No need to use mkDupableCont before simplLam; we
           -- use cont once here, and then share the result if necessary
@@ -3300,56 +3300,66 @@ mkDupableCont env (StrictBind { sc_bndr = bndr, sc_bndrs = bndrs
        ; let join_body = wrapFloats floats1 join_inner
              res_ty    = contResultType cont
 
-       ; (floats2, body2)
-            <- if exprIsDupable (targetPlatform (seDynFlags env)) join_body
-               then return (emptyFloats env, join_body)
-               else do { join_bndr <- newJoinId [bndr'] res_ty
-                       ; let join_call = App (Var join_bndr) (Var bndr')
-                             join_rhs  = Lam (setOneShotLambda bndr') join_body
-                             join_bind = NonRec join_bndr join_rhs
-                             floats    = emptyFloats env `extendFloats` join_bind
-                       ; return (floats, join_call) }
-       ; return ( floats2
-                , StrictBind { sc_bndr = bndr', sc_bndrs = []
-                             , sc_body = body2
-                             , sc_env  = zapSubstEnv se `setInScopeFromF` floats2
-                                         -- See Note [StaticEnv invariant] in GHC.Core.Opt.Simplify.Utils
-                             , sc_dup  = OkToDup
-                             , sc_cont = mkBoringStop res_ty } ) }
-
-mkDupableCont env (StrictArg { sc_fun = info, sc_cci = cci
-                             , sc_cont = cont, sc_fun_ty = fun_ty, sc_mult = m })
-        -- See Note [Duplicating StrictArg]
-        -- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable
-  = do { (floats1, cont') <- mkDupableCont env cont
+       ; mkDupableStrictBind env bndr' join_body res_ty }
+
+mkDupableContWithDmds env _
+    (StrictArg { sc_fun = fun, sc_cont = cont
+               , sc_fun_ty = fun_ty, sc_mult = m })
+  -- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable
+  | thumbsUpPlanA cont
+  = -- Use Plan A of Note [Duplicating StrictArg]
+    do { let (_ : dmds) = ai_dmds fun
+       ; (floats1, cont')  <- mkDupableContWithDmds env dmds cont
+                              -- Use the demands from the function to add the right
+                              -- demand info on any bindings we make for further args
        ; (floats_s, args') <- mapAndUnzipM (makeTrivialArg (getMode env))
-                                           (ai_args info)
+                                           (ai_args fun)
        ; return ( foldl' addLetFloats floats1 floats_s
-                , StrictArg { sc_fun = info { ai_args = args' }
+                , StrictArg { sc_fun = fun { ai_args = args' }
                             , sc_cont = cont'
-                            , sc_cci = cci
                             , sc_fun_ty = fun_ty
                             , sc_mult = m
                             , sc_dup = OkToDup} ) }
 
-mkDupableCont env (ApplyToTy { sc_cont = cont
-                             , sc_arg_ty = arg_ty, sc_hole_ty = hole_ty })
-  = do  { (floats, cont') <- mkDupableCont env cont
+  | otherwise
+  = -- Use Plan B of Note [Duplicating StrictArg]
+    --   K[ f a b <> ]   -->   join j x = K[ f a b x ]
+    --                         j <>
+    do { let arg_ty = funArgTy fun_ty
+             rhs_ty = contResultType cont
+       ; arg_bndr <- newId (fsLit "arg") m arg_ty   -- ToDo: check this linearity argument
+       ; let env' = env `addNewInScopeIds` [arg_bndr]
+       ; (floats, join_rhs) <- rebuildCall env' (addValArgTo fun (m, Var arg_bndr) fun_ty) cont
+       ; mkDupableStrictBind env' arg_bndr (wrapFloats floats join_rhs) rhs_ty }
+  where
+    thumbsUpPlanA (StrictArg {})               = False
+    thumbsUpPlanA (CastIt _ k)                 = thumbsUpPlanA k
+    thumbsUpPlanA (TickIt _ k)                 = thumbsUpPlanA k
+    thumbsUpPlanA (ApplyToVal { sc_cont = k }) = thumbsUpPlanA k
+    thumbsUpPlanA (ApplyToTy  { sc_cont = k }) = thumbsUpPlanA k
+    thumbsUpPlanA (Select {})                  = True
+    thumbsUpPlanA (StrictBind {})              = True
+    thumbsUpPlanA (Stop {})                    = True
+
+mkDupableContWithDmds env dmds
+    (ApplyToTy { sc_cont = cont, sc_arg_ty = arg_ty, sc_hole_ty = hole_ty })
+  = do  { (floats, cont') <- mkDupableContWithDmds env dmds cont
         ; return (floats, ApplyToTy { sc_cont = cont'
                                     , sc_arg_ty = arg_ty, sc_hole_ty = hole_ty }) }
 
-mkDupableCont env (ApplyToVal { sc_arg = arg, sc_dup = dup
-                              , sc_env = se, sc_cont = cont
-                              , sc_hole_ty = hole_ty, sc_mult = mult })
+mkDupableContWithDmds env dmds
+    (ApplyToVal { sc_arg = arg, sc_dup = dup, sc_env = se
+                , sc_cont = cont, sc_hole_ty = hole_ty, sc_mult = mult })
   =     -- e.g.         [...hole...] (...arg...)
         --      ==>
         --              let a = ...arg...
         --              in [...hole...] a
         -- NB: sc_dup /= OkToDup; that is caught earlier by contIsDupable
-    do  { (floats1, cont') <- mkDupableCont env cont
+    do  { let (dmd:_) = dmds   -- Never fails
+        ; (floats1, cont') <- mkDupableContWithDmds env dmds cont
         ; let env' = env `setInScopeFromF` floats1
         ; (_, se', arg') <- simplArg env' dup se arg
-        ; (let_floats2, arg'') <- makeTrivial (getMode env) NotTopLevel (fsLit "karg") arg'
+        ; (let_floats2, arg'') <- makeTrivial (getMode env) NotTopLevel dmd (fsLit "karg") arg'
         ; let all_floats = floats1 `addLetFloats` let_floats2
         ; return ( all_floats
                  , ApplyToVal { sc_arg = arg''
@@ -3361,8 +3371,8 @@ mkDupableCont env (ApplyToVal { sc_arg = arg, sc_dup = dup
                               , sc_dup = OkToDup, sc_cont = cont'
                               , sc_hole_ty = hole_ty, sc_mult = mult }) }
 
-mkDupableCont env (Select { sc_bndr = case_bndr, sc_alts = alts
-                          , sc_env = se, sc_cont = cont })
+mkDupableContWithDmds env _
+    (Select { sc_bndr = case_bndr, sc_alts = alts, sc_env = se, sc_cont = cont })
   =     -- e.g.         (case [...hole...] of { pi -> ei })
         --      ===>
         --              let ji = \xij -> ei
@@ -3404,6 +3414,34 @@ mkDupableCont env (Select { sc_bndr = case_bndr, sc_alts = alts
                                       -- See Note [StaticEnv invariant] in GHC.Core.Opt.Simplify.Utils
                           , sc_cont = mkBoringStop (contResultType cont) } ) }
 
+mkDupableStrictBind :: SimplEnv -> OutId -> OutExpr -> OutType
+                    -> SimplM (SimplFloats, SimplCont)
+mkDupableStrictBind env arg_bndr join_rhs res_ty
+  | exprIsDupable (targetPlatform (seDynFlags env)) join_rhs
+  = return (emptyFloats env
+           , StrictBind { sc_bndr = arg_bndr, sc_bndrs = []
+                        , sc_body = join_rhs
+                        , sc_env  = zapSubstEnv env
+                          -- See Note [StaticEnv invariant] in GHC.Core.Opt.Simplify.Utils
+                        , sc_dup  = OkToDup
+                        , sc_cont = mkBoringStop res_ty } )
+  | otherwise
+  = do { join_bndr <- newJoinId [arg_bndr] res_ty
+       ; let arg_info = ArgInfo { ai_fun   = join_bndr
+                                , ai_rules = Nothing, ai_args  = []
+                                , ai_encl  = False, ai_dmds  = repeat topDmd
+                                , ai_discs = repeat 0 }
+       ; return ( addJoinFloats (emptyFloats env) $
+                  unitJoinFloat                   $
+                  NonRec join_bndr                $
+                  Lam (setOneShotLambda arg_bndr) join_rhs
+                , StrictArg { sc_dup    = OkToDup
+                            , sc_fun    = arg_info
+                            , sc_fun_ty = idType join_bndr
+                            , sc_cont   = mkBoringStop res_ty
+                            , sc_mult   = Many   -- ToDo: check this!
+                            } ) }
+
 mkDupableAlt :: Platform -> OutId
              -> JoinFloats -> OutAlt
              -> SimplM (JoinFloats, OutAlt)
@@ -3577,57 +3615,102 @@ type variables as well as term variables.
 
 Note [Duplicating StrictArg]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We make a StrictArg duplicable simply by making all its
-stored-up arguments (in sc_fun) trivial, by let-binding
-them.  Thus:
-        f E [..hole..]
-        ==>     let a = E
-                in f a [..hole..]
-Now if the thing in the hole is a case expression (which is when
-we'll call mkDupableCont), we'll push the function call into the
-branches, which is what we want.  Now RULES for f may fire, and
-call-pattern specialisation.  Here's an example from #3116
+Dealing with making a StrictArg continuation duplicable has turned out
+to be one of the trickiest corners of the simplifier, giving rise
+to several cases in which the simplier expanded the program's size
+*exponentially*.  They include
+  #13253 exponential inlining
+  #10421 ditto
+  #18140 strict constructors
+  #18282 another nested-function call case
+
+Suppose we have a call
+  f e1 (case x of { True -> r1; False -> r2 }) e3
+and f is strict in its second argument.  Then we end up in
+mkDupableCont with a StrictArg continuation for (f e1 <> e3).
+There are two ways to make it duplicable.
+
+* Plan A: move the entire call inwards, being careful not
+  to duplicate e1 or e3, thus:
+     let a1 = e1
+         a3 = e3
+     in case x of { True  -> f a1 r1 a3
+                  ; False -> f a1 r2 a3 }
+
+* Plan B: make a join point:
+     join $j x = f e1 x e3
+     in case x of { True  -> jump $j r1
+                  ; False -> jump $j r2 }
+  Notice that Plan B is very like the way we handle strict
+  bindings; see Note [Duplicating StrictBind].
+
+Plan A is good. Here's an example from #3116
      go (n+1) (case l of
                  1  -> bs'
                  _  -> Chunk p fpc (o+1) (l-1) bs')
-If we can push the call for 'go' inside the case, we get
+
+If we pushed the entire call for 'go' inside the case, we get
 call-pattern specialisation for 'go', which is *crucial* for
-this program.
+this particular program.
 
-Here is the (&&) example:
-        && E (case x of { T -> F; F -> T })
-  ==>   let a = E in
-        case x of { T -> && a F; F -> && a T }
-Much better!
-
-Notice that
-  * Arguments to f *after* the strict one are handled by
-    the ApplyToVal case of mkDupableCont.  Eg
-        f [..hole..] E
-
-  * We can only do the let-binding of E because the function
-    part of a StrictArg continuation is an explicit syntax
-    tree.  In earlier versions we represented it as a function
-    (CoreExpr -> CoreEpxr) which we couldn't take apart.
-
-Historical aide: previously we did this (where E is a
-big argument:
-        f E [..hole..]
-        ==>     let $j = \a -> f E a
-                in $j [..hole..]
-
-But this is terrible! Here's an example:
+Here is another example.
         && E (case x of { T -> F; F -> T })
-Now, && is strict so we end up simplifying the case with
-an ArgOf continuation.  If we let-bind it, we get
-        let $j = \v -> && E v
-        in simplExpr (case x of { T -> F; F -> T })
-                     (ArgOf (\r -> $j r)
-And after simplifying more we get
-        let $j = \v -> && E v
-        in case x of { T -> $j F; F -> $j T }
-Which is a Very Bad Thing
 
+Pushing the call inward (being careful not to duplicate E)
+        let a = E
+        in case x of { T -> && a F; F -> && a T }
+
+and now the (&& a F) etc can optimise.  Moreover there might
+be a RULE for the function that can fire when it "sees" the
+particular case alterantive.
+
+But Plan A can have terrible, terrible behaviour. Here is a classic
+case:
+  f (f (f (f (f True))))
+
+Suppose f is strict, and has a body that is small enough to inline.
+The innermost call inlines (seeing the True) to give
+  f (f (f (f (case v of { True -> e1; False -> e2 }))))
+
+Now, suppose we naively push the entire continuation into both
+case branches (it doesn't look large, just f.f.f.f). We get
+  case v of
+    True  -> f (f (f (f e1)))
+    False -> f (f (f (f e2)))
+
+And now the process repeats, so we end up with an exponentially large
+number of copies of f. No good!
+
+CONCLUSION: we want Plan A in general, but do Plan B is there a
+danger of this nested call behaviour. The function that decides
+this is called thumbsUpPlanA.
+
+Note [Keeping demand info in StrictArg Plan A]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Following on from Note [Duplicating StrictArg], another common code
+pattern that can go bad is this:
+   f (case x1 of { T -> F; F -> T })
+     (case x2 of { T -> F; F -> T })
+     ...etc...
+when f is strict in all its arguments.  (It might, for example, be a
+strict data constructor whose wrapper has not yet been inlined.)
+
+We use Plan A (because there is no nesting) giving
+  let a2 = case x2 of ...
+      a3 = case x3 of ...
+  in case x1 of { T -> f F a2 a3 ... ; F -> f T a2 a3 ... }
+
+Now we must be careful!  a2 and a3 are small, and the OneOcc code in
+postInlineUnconditionally may inline them both at both sites; see Note
+Note [Inline small things to avoid creating a thunk] in
+Simplify.Utils. But if we do inline them, the entire process will
+repeat -- back to exponential behaviour.
+
+So we are careful to keep the demand-info on a2 and a3.  Then they'll
+be /strict/ let-bindings, which will be dealt with by StrictBind.
+That's why contIsDupableWithDmds is careful to propagage demand
+info to the auxiliary bindings it creates.  See the Demand argument
+to makeTrivial.
 
 Note [Duplicating StrictBind]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -3636,9 +3719,10 @@ that for case expressions.  After all,
    let x* = e in b   is similar to    case e of x -> b
 
 So we potentially make a join-point for the body, thus:
-   let x = [] in b   ==>   join j x = b
-                           in let x = [] in j x
+   let x = <> in b   ==>   join j x = b
+                           in j <>
 
+Just like StrictArg in fact -- and indeed they share code.
 
 Note [Join point abstraction]  Historical note
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/compiler/GHC/Core/Opt/Simplify/Utils.hs b/compiler/GHC/Core/Opt/Simplify/Utils.hs
index e9ee16157f..0d3a577938 100644
--- a/compiler/GHC/Core/Opt/Simplify/Utils.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Utils.hs
@@ -29,6 +29,7 @@ module GHC.Core.Opt.Simplify.Utils (
         ArgInfo(..), ArgSpec(..), mkArgInfo,
         addValArgTo, addCastTo, addTyArgTo,
         argInfoExpr, argInfoAppArgs, pushSimplifiedArgs,
+        isStrictArgInfo, lazyArgContext,
 
         abstractFloats,
 
@@ -153,8 +154,9 @@ data SimplCont
   | StrictArg           -- (StrictArg (f e1 ..en) K)[e] = K[ f e1 .. en e ]
       { sc_dup  :: DupFlag     -- Always Simplified or OkToDup
       , sc_fun  :: ArgInfo     -- Specifies f, e1..en, Whether f has rules, etc
-                               --     plus strictness flags for *further* args
-      , sc_cci  :: CallCtxt    -- Whether *this* argument position is interesting
+                               --     plus demands and discount flags for *this* arg
+                               --          and further args
+                               --     So ai_dmds and ai_discs are never empty
       , sc_fun_ty :: OutType   -- Type of the function (f e1 .. en),
                                -- presumably (arg_ty -> res_ty)
                                -- where res_ty is expected by sc_cont
@@ -269,32 +271,52 @@ data ArgInfo
                                 -- or an enclosing one has rules (recursively)
                                 --      True => be keener to inline in all args
 
-        ai_strs :: [Bool],      -- Strictness of remaining arguments
+        ai_dmds :: [Demand],    -- Demands on remaining value arguments (beyond ai_args)
                                 --   Usually infinite, but if it is finite it guarantees
                                 --   that the function diverges after being given
                                 --   that number of args
-        ai_discs :: [Int]       -- Discounts for remaining arguments; non-zero => be keener to inline
+
+        ai_discs :: [Int]       -- Discounts for remaining value arguments (beyong ai_args)
+                                --   non-zero => be keener to inline
                                 --   Always infinite
     }
 
 data ArgSpec
   = ValArg { as_mult :: Mult
-           , as_arg :: OutExpr        -- Apply to this (coercion or value); c.f. ApplyToVal
+           , as_dmd  :: Demand        -- Demand placed on this argument
+           , as_arg  :: OutExpr       -- Apply to this (coercion or value); c.f. ApplyToVal
            , as_hole_ty :: OutType }  -- Type of the function (presumably t1 -> t2)
+
   | TyArg { as_arg_ty  :: OutType     -- Apply to this type; c.f. ApplyToTy
           , as_hole_ty :: OutType }   -- Type of the function (presumably forall a. blah)
+
   | CastBy OutCoercion                -- Cast by this; c.f. CastIt
 
+instance Outputable ArgInfo where
+  ppr (ArgInfo { ai_fun = fun, ai_args = args, ai_dmds = dmds })
+    = text "ArgInfo" <+> braces
+         (sep [ text "fun =" <+> ppr fun
+              , text "dmds =" <+> ppr dmds
+              , text "args =" <+> ppr args ])
+
 instance Outputable ArgSpec where
   ppr (ValArg { as_mult = mult, as_arg = arg })  = text "ValArg" <+> ppr mult <+> ppr arg
   ppr (TyArg { as_arg_ty = ty }) = text "TyArg" <+> ppr ty
   ppr (CastBy c)                 = text "CastBy" <+> ppr c
 
 addValArgTo :: ArgInfo -> (Mult, OutExpr) -> OutType -> ArgInfo
-addValArgTo ai (w, arg) hole_ty = ai { ai_args = arg_spec : ai_args ai
-                                     , ai_rules = decRules (ai_rules ai) }
-  where
-    arg_spec = ValArg { as_arg = arg, as_hole_ty = hole_ty, as_mult = w }
+addValArgTo ai (w, arg) hole_ty
+  | ArgInfo { ai_dmds = dmd:dmds, ai_discs = _:discs, ai_rules = rules } <- ai
+      -- Pop the top demand and and discounts off
+  , let arg_spec = ValArg { as_arg = arg, as_hole_ty = hole_ty
+                          , as_mult = w, as_dmd = dmd }
+  = ai { ai_args  = arg_spec : ai_args ai
+       , ai_dmds  = dmds
+       , ai_discs = discs
+       , ai_rules = decRules rules }
+  | otherwise
+  = pprPanic "addValArgTo" (ppr ai $$ ppr arg)
+    -- There should always be enough demands and discounts
 
 addTyArgTo :: ArgInfo -> OutType -> OutType -> ArgInfo
 addTyArgTo ai arg_ty hole_ty = ai { ai_args = arg_spec : ai_args ai
@@ -305,6 +327,12 @@ addTyArgTo ai arg_ty hole_ty = ai { ai_args = arg_spec : ai_args ai
 addCastTo :: ArgInfo -> OutCoercion -> ArgInfo
 addCastTo ai co = ai { ai_args = CastBy co : ai_args ai }
 
+isStrictArgInfo :: ArgInfo -> Bool
+-- True if the function is strict in the next argument
+isStrictArgInfo (ArgInfo { ai_dmds = dmds })
+  | dmd:_ <- dmds = isStrictDmd dmd
+  | otherwise     = False
+
 argInfoAppArgs :: [ArgSpec] -> [OutExpr]
 argInfoAppArgs []                              = []
 argInfoAppArgs (CastBy {}                : _)  = []  -- Stop at a cast
@@ -461,8 +489,8 @@ contArgs cont
   | otherwise = go [] cont
   where
     lone (ApplyToTy  {}) = False  -- See Note [Lone variables] in GHC.Core.Unfold
-    lone (ApplyToVal {}) = False
-    lone (CastIt {})     = False
+    lone (ApplyToVal {}) = False  -- NB: even a type application or cast
+    lone (CastIt {})     = False  --     stops it being "lone"
     lone _               = True
 
     go args (ApplyToVal { sc_arg = arg, sc_env = se, sc_cont = k })
@@ -489,17 +517,16 @@ mkArgInfo env fun rules n_val_args call_cont
   = ArgInfo { ai_fun = fun, ai_args = []
             , ai_rules = fun_rules
             , ai_encl = False
-            , ai_strs = vanilla_stricts
+            , ai_dmds = vanilla_dmds
             , ai_discs = vanilla_discounts }
   | otherwise
-  = ArgInfo { ai_fun = fun, ai_args = []
+  = ArgInfo { ai_fun   = fun
+            , ai_args = []
             , ai_rules = fun_rules
             , ai_encl  = interestingArgContext rules call_cont
-            , ai_strs  = arg_stricts
+            , ai_dmds  = add_type_strictness (idType fun) arg_dmds
             , ai_discs = arg_discounts }
   where
-    fun_ty = idType fun
-
     fun_rules = mkFunRules rules
 
     vanilla_discounts, arg_discounts :: [Int]
@@ -509,14 +536,14 @@ mkArgInfo env fun rules n_val_args call_cont
                               -> discounts ++ vanilla_discounts
                         _     -> vanilla_discounts
 
-    vanilla_stricts, arg_stricts :: [Bool]
-    vanilla_stricts  = repeat False
+    vanilla_dmds, arg_dmds :: [Demand]
+    vanilla_dmds  = repeat topDmd
 
-    arg_stricts
+    arg_dmds
       | not (sm_inline (seMode env))
-      = vanilla_stricts -- See Note [Do not expose strictness if sm_inline=False]
+      = vanilla_dmds -- See Note [Do not expose strictness if sm_inline=False]
       | otherwise
-      = add_type_str fun_ty $
+      = -- add_type_str fun_ty $
         case splitStrictSig (idStrictness fun) of
           (demands, result_info)
                 | not (demands `lengthExceeds` n_val_args)
@@ -529,36 +556,40 @@ mkArgInfo env fun rules n_val_args call_cont
                         -- inlining lone variables, so its ok
                         -- (see GHC.Core.Op.Simplify.Utils.analyseCont)
                    if isDeadEndDiv result_info then
-                        map isStrictDmd demands         -- Finite => result is bottom
+                        demands  -- Finite => result is bottom
                    else
-                        map isStrictDmd demands ++ vanilla_stricts
+                        demands ++ vanilla_dmds
                | otherwise
                -> WARN( True, text "More demands than arity" <+> ppr fun <+> ppr (idArity fun)
                                 <+> ppr n_val_args <+> ppr demands )
-                   vanilla_stricts      -- Not enough args, or no strictness
+                  vanilla_dmds      -- Not enough args, or no strictness
 
-    add_type_str :: Type -> [Bool] -> [Bool]
+    add_type_strictness :: Type -> [Demand] -> [Demand]
     -- If the function arg types are strict, record that in the 'strictness bits'
     -- No need to instantiate because unboxed types (which dominate the strict
     --   types) can't instantiate type variables.
-    -- add_type_str is done repeatedly (for each call);
+    -- add_type_strictness is done repeatedly (for each call);
     --   might be better once-for-all in the function
     -- But beware primops/datacons with no strictness
 
-    add_type_str _ [] = []
-    add_type_str fun_ty all_strs@(str:strs)
+    add_type_strictness fun_ty dmds
+      | null dmds = []
+
+      | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty
+      = add_type_strictness fun_ty' dmds     -- Look through foralls
+
       | Just (_, arg_ty, fun_ty') <- splitFunTy_maybe fun_ty        -- Add strict-type info
-      = (str || Just False == isLiftedType_maybe arg_ty)
-        : add_type_str fun_ty' strs
+      , dmd : rest_dmds <- dmds
+      , let dmd' = case isLiftedType_maybe arg_ty of
+                       Just False -> strictenDmd dmd
+                       _          -> dmd
+      = dmd' : add_type_strictness fun_ty' rest_dmds
           -- If the type is levity-polymorphic, we can't know whether it's
           -- strict. isLiftedType_maybe will return Just False only when
           -- we're sure the type is unlifted.
 
-      | Just (_, fun_ty') <- splitForAllTy_maybe fun_ty
-      = add_type_str fun_ty' all_strs     -- Look through foralls
-
       | otherwise
-      = all_strs
+      = dmds
 
 {- Note [Unsaturated functions]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -647,6 +678,26 @@ This made a small compile-time perf improvement in perf/compiler/T6048,
 and it looks plausible to me.
 -}
 
+lazyArgContext :: ArgInfo -> CallCtxt
+-- Use this for lazy arguments
+lazyArgContext (ArgInfo { ai_encl = encl_rules, ai_discs = discs })
+  | encl_rules                = RuleArgCtxt
+  | disc:_ <- discs, disc > 0 = DiscArgCtxt  -- Be keener here
+  | otherwise                 = BoringCtxt   -- Nothing interesting
+
+strictArgContext :: ArgInfo -> CallCtxt
+strictArgContext (ArgInfo { ai_encl = encl_rules, ai_discs = discs })
+-- Use this for strict arguments
+  | encl_rules                = RuleArgCtxt
+  | disc:_ <- discs, disc > 0 = DiscArgCtxt  -- Be keener here
+  | otherwise                 = RhsCtxt
+      -- Why RhsCtxt?  if we see f (g x) (h x), and f is strict, we
+      -- want to be a bit more eager to inline g, because it may
+      -- expose an eval (on x perhaps) that can be eliminated or
+      -- shared. I saw this in nofib 'boyer2', RewriteFuns.onewayunify1
+      -- It's worth an 18% improvement in allocation for this
+      -- particular benchmark; 5% on 'mate' and 1.3% on 'multiplier'
+
 interestingCallContext :: SimplEnv -> SimplCont -> CallCtxt
 -- See Note [Interesting call context]
 interestingCallContext env cont
@@ -663,7 +714,7 @@ interestingCallContext env cont
         -- motivation to inline. See Note [Cast then apply]
         -- in GHC.Core.Unfold
 
-    interesting (StrictArg { sc_cci = cci }) = cci
+    interesting (StrictArg { sc_fun = fun }) = strictArgContext fun
     interesting (StrictBind {})              = BoringCtxt
     interesting (Stop _ cci)                 = cci
     interesting (TickIt _ k)                 = interesting k
@@ -713,16 +764,13 @@ interestingArgContext rules call_cont
     go (Select {})                  = False
     go (ApplyToVal {})              = False  -- Shouldn't really happen
     go (ApplyToTy  {})              = False  -- Ditto
-    go (StrictArg { sc_cci = cci }) = interesting cci
+    go (StrictArg { sc_fun = fun }) = ai_encl fun
     go (StrictBind {})              = False      -- ??
     go (CastIt _ c)                 = go c
-    go (Stop _ cci)                 = interesting cci
+    go (Stop _ RuleArgCtxt)         = True
+    go (Stop _ _)                   = False
     go (TickIt _ c)                 = go c
 
-    interesting RuleArgCtxt = True
-    interesting _           = False
-
-
 {- Note [Interesting arguments]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 An argument is interesting if it deserves a discount for unfoldings
@@ -1201,9 +1249,9 @@ preInlineUnconditionally env top_lvl bndr rhs rhs_env
     extend_subst_with inl_rhs = extendIdSubst env bndr (mkContEx rhs_env inl_rhs)
 
     one_occ IAmDead = True -- Happens in ((\x.1) v)
-    one_occ OneOcc{ occ_one_br = InOneBranch
+    one_occ OneOcc{ occ_n_br   = 1
                   , occ_in_lam = NotInsideLam }   = isNotTopLevel top_lvl || early_phase
-    one_occ OneOcc{ occ_one_br = InOneBranch
+    one_occ OneOcc{ occ_n_br   = 1
                   , occ_in_lam = IsInsideLam
                   , occ_int_cxt = IsInteresting } = canInlineInLam rhs
     one_occ _                                     = False
@@ -1317,24 +1365,15 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs
   | exprIsTrivial rhs           = True
   | otherwise
   = case occ_info of
-        -- The point of examining occ_info here is that for *non-values*
-        -- that occur outside a lambda, the call-site inliner won't have
-        -- a chance (because it doesn't know that the thing
-        -- only occurs once).   The pre-inliner won't have gotten
-        -- it either, if the thing occurs in more than one branch
-        -- So the main target is things like
-        --      let x = f y in
-        --      case v of
-        --         True  -> case x of ...
-        --         False -> case x of ...
-        -- This is very important in practice; e.g. wheel-seive1 doubles
-        -- in allocation if you miss this out
-      OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt }
-               -- OneOcc => no code-duplication issue
-        ->     smallEnoughToInline dflags unfolding     -- Small enough to dup
+      OneOcc { occ_in_lam = in_lam, occ_int_cxt = int_cxt, occ_n_br = n_br }
+        -- See Note [Inline small things to avoid creating a thunk]
+
+        -> n_br < 100  -- See Note [Suppress exponential blowup]
+
+           && smallEnoughToInline dflags unfolding     -- Small enough to dup
                         -- ToDo: consider discount on smallEnoughToInline if int_cxt is true
                         --
-                        -- NB: Do NOT inline arbitrarily big things, even if one_br is True
+                        -- NB: Do NOT inline arbitrarily big things, even if occ_n_br=1
                         -- Reason: doing so risks exponential behaviour.  We simplify a big
                         --         expression, inline it, and simplify it again.  But if the
                         --         very same thing happens in the big expression, we get
@@ -1381,7 +1420,56 @@ postInlineUnconditionally env top_lvl bndr occ_info rhs
     active    = isActive (sm_phase (getMode env)) (idInlineActivation bndr)
         -- See Note [pre/postInlineUnconditionally in gentle mode]
 
-{-
+{- Note [Inline small things to avoid creating a thunk]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The point of examining occ_info here is that for *non-values* that
+occur outside a lambda, the call-site inliner won't have a chance
+(because it doesn't know that the thing only occurs once).  The
+pre-inliner won't have gotten it either, if the thing occurs in more
+than one branch So the main target is things like
+
+     let x = f y in
+     case v of
+        True  -> case x of ...
+        False -> case x of ...
+
+This is very important in practice; e.g. wheel-seive1 doubles
+in allocation if you miss this out.  And bits of GHC itself start
+to allocate more.  An egregious example is test perf/compiler/T14697,
+where GHC.Driver.CmdLine.$wprocessArgs allocated hugely more.
+
+Note [Suppress exponential blowup]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In #13253, and several related tickets, we got an exponential blowup
+in code size from postInlineUnconditionally.  The trouble comes when
+we have
+  let j1a = case f y     of { True -> p;   False -> q }
+      j1b = case f y     of { True -> q;   False -> p }
+      j2a = case f (y+1) of { True -> j1a; False -> j1b }
+      j2b = case f (y+1) of { True -> j1b; False -> j1a }
+      ...
+  in case f (y+10) of { True -> j10a; False -> j10b }
+
+when there are many branches. In pass 1, postInlineUnconditionally
+inlines j10a and j10b (they are both small).  Now we have two calls
+to j9a and two to j9b.  In pass 2, postInlineUnconditionally inlines
+all four of these calls, leaving four calls to j8a and j8b. Etc.
+Yikes!  This is exponential!
+
+A possible plan: stop doing postInlineUnconditionally
+for some fixed, smallish number of branches, say 4. But that turned
+out to be bad: see Note [Inline small things to avoid creating a thunk].
+And, as it happened, the problem with #13253 was solved in a
+different way (Note [Duplicating StrictArg] in Simplify).
+
+So I just set an arbitrary, high limit of 100, to stop any
+totally exponential behaviour.
+
+This still leaves the nasty possiblity that /ordinary/ inlining (not
+postInlineUnconditionally) might inline these join points, each of
+which is individually quiet small.  I'm still not sure what to do
+about this (e.g. see #15488).
+
 Note [Top level and postInlineUnconditionally]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We don't do postInlineUnconditionally for top-level things (even for
diff --git a/compiler/GHC/Core/SimpleOpt.hs b/compiler/GHC/Core/SimpleOpt.hs
index afd915cf86..ab3eed4b60 100644
--- a/compiler/GHC/Core/SimpleOpt.hs
+++ b/compiler/GHC/Core/SimpleOpt.hs
@@ -433,7 +433,7 @@ simple_bind_pair env@(SOE { soe_inl = inl_env, soe_subst = subst })
     safe_to_inline IAmALoopBreaker{}                  = False
     safe_to_inline IAmDead                            = True
     safe_to_inline OneOcc{ occ_in_lam = NotInsideLam
-                         , occ_one_br = InOneBranch } = True
+                         , occ_n_br = 1 }             = True
     safe_to_inline OneOcc{}                           = False
     safe_to_inline ManyOccs{}                         = False
 
diff --git a/compiler/GHC/Types/Basic.hs b/compiler/GHC/Types/Basic.hs
index cf373f76d5..a0693b3f86 100644
--- a/compiler/GHC/Types/Basic.hs
+++ b/compiler/GHC/Types/Basic.hs
@@ -68,7 +68,7 @@ module GHC.Types.Basic (
         isNoOccInfo, strongLoopBreaker, weakLoopBreaker,
 
         InsideLam(..),
-        OneBranch(..),
+        BranchCount, oneBranch,
         InterestingCxt(..),
         TailCallInfo(..), tailCallInfo, zapOccTailCallInfo,
         isAlwaysTailCalled,
@@ -978,7 +978,7 @@ data OccInfo
                         -- lambda and case-bound variables.
 
   | OneOcc          { occ_in_lam  :: !InsideLam
-                    , occ_one_br  :: !OneBranch
+                    , occ_n_br    :: {-# UNPACK #-} !BranchCount
                     , occ_int_cxt :: !InterestingCxt
                     , occ_tail    :: !TailCallInfo }
                         -- ^ Occurs exactly once (per branch), not inside a rule
@@ -992,6 +992,16 @@ data OccInfo
 
 type RulesOnly = Bool
 
+type BranchCount = Int
+  -- For OneOcc, the BranchCount says how many syntactic occurrences there are
+  -- At the moment we really only check for 1 or >1, but in principle
+  --   we could pay attention to how *many* occurences there are
+  --   (notably in postInlineUnconditionally).
+  -- But meanwhile, Ints are very efficiently represented.
+
+oneBranch :: BranchCount
+oneBranch = 1
+
 {-
 Note [LoopBreaker OccInfo]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1058,14 +1068,6 @@ instance Monoid InsideLam where
   mappend = (Semi.<>)
 
 -----------------
-data OneBranch
-  = InOneBranch
-    -- ^ One syntactic occurrence: Occurs in only one case branch
-    -- so no code-duplication issue to worry about
-  | MultipleBranches
-  deriving (Eq)
-
------------------
 data TailCallInfo = AlwaysTailCalled JoinArity -- See Note [TailCallInfo]
                   | NoTailCallInfo
   deriving (Eq)
@@ -1124,12 +1126,10 @@ instance Outputable OccInfo where
           pp_ro | rule_only = char '!'
                 | otherwise = empty
   ppr (OneOcc inside_lam one_branch int_cxt tail_info)
-        = text "Once" <> pp_lam inside_lam <> pp_br one_branch <> pp_args int_cxt <> pp_tail
+        = text "Once" <> pp_lam inside_lam <> ppr one_branch <> pp_args int_cxt <> pp_tail
         where
           pp_lam IsInsideLam     = char 'L'
           pp_lam NotInsideLam    = empty
-          pp_br MultipleBranches = char '*'
-          pp_br InOneBranch      = empty
           pp_args IsInteresting  = char '!'
           pp_args NotInteresting = empty
           pp_tail                = pprShortTailCallInfo tail_info
@@ -1156,7 +1156,7 @@ AlwaysTailCalled.
 
 Note that there is a 'TailCallInfo' on a 'ManyOccs' value. One might expect that
 being tail-called would mean that the variable could only appear once per branch
-(thus getting a `OneOcc { occ_one_br = True }` occurrence info), but a join
+(thus getting a `OneOcc { }` occurrence info), but a join
 point can also be invoked from other join points, not just from case branches:
 
   let j1 x = ...
@@ -1167,7 +1167,7 @@ point can also be invoked from other join points, not just from case branches:
        C -> j2 q
 
 Here both 'j1' and 'j2' will get marked AlwaysTailCalled, but j1 will get
-ManyOccs and j2 will get `OneOcc { occ_one_br = True }`.
+ManyOccs and j2 will get `OneOcc { occ_n_br = 2 }`.
 
 ************************************************************************
 *                                                                      *
diff --git a/compiler/GHC/Types/Demand.hs b/compiler/GHC/Types/Demand.hs
index fd504eda30..51acdf3d8e 100644
--- a/compiler/GHC/Types/Demand.hs
+++ b/compiler/GHC/Types/Demand.hs
@@ -1285,14 +1285,14 @@ splitDmdTy ty@(DmdType _ [] res_ty)       = (defaultArgDmd res_ty, ty)
 deferAfterPreciseException :: DmdType -> DmdType
 deferAfterPreciseException = lubDmdType exnDmdType
 
-strictenDmd :: Demand -> CleanDemand
+strictenDmd :: Demand -> Demand
 strictenDmd (JD { sd = s, ud = u})
   = JD { sd = poke_s s, ud = poke_u u }
   where
-    poke_s Lazy      = HeadStr
-    poke_s (Str s)   = s
-    poke_u Abs       = UHead
-    poke_u (Use _ u) = u
+    poke_s Lazy      = Str HeadStr
+    poke_s s         = s
+    poke_u Abs       = useTop
+    poke_u u         = u
 
 -- Deferring and peeling
 
diff --git a/compiler/GHC/Types/Id/Info.hs b/compiler/GHC/Types/Id/Info.hs
index dfd6ef96ab..f67f581b74 100644
--- a/compiler/GHC/Types/Id/Info.hs
+++ b/compiler/GHC/Types/Id/Info.hs
@@ -58,7 +58,7 @@ module GHC.Types.Id.Info (
         isDeadOcc, isStrongLoopBreaker, isWeakLoopBreaker,
         occInfo, setOccInfo,
 
-        InsideLam(..), OneBranch(..),
+        InsideLam(..), BranchCount,
 
         TailCallInfo(..),
         tailCallInfo, isAlwaysTailCalled,
diff --git a/testsuite/tests/numeric/should_compile/T14465.stdout b/testsuite/tests/numeric/should_compile/T14465.stdout
index 75c05a57ee..5001d5b3a4 100644
--- a/testsuite/tests/numeric/should_compile/T14465.stdout
+++ b/testsuite/tests/numeric/should_compile/T14465.stdout
@@ -82,7 +82,7 @@ plusOne :: Natural -> Natural
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
-         Tmpl= \ (n [Occ=Once] :: Natural) -> naturalAdd n M.minusOne1}]
+         Tmpl= \ (n [Occ=Once1] :: Natural) -> naturalAdd n M.minusOne1}]
 plusOne = \ (n :: Natural) -> naturalAdd n M.minusOne1
 
 -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
diff --git a/testsuite/tests/numeric/should_compile/T7116.stdout b/testsuite/tests/numeric/should_compile/T7116.stdout
index 9548a7f445..41995d9734 100644
--- a/testsuite/tests/numeric/should_compile/T7116.stdout
+++ b/testsuite/tests/numeric/should_compile/T7116.stdout
@@ -48,7 +48,7 @@ dr :: Double -> Double
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
-         Tmpl= \ (x [Occ=Once!] :: Double) ->
+         Tmpl= \ (x [Occ=Once1!] :: Double) ->
                  case x of { GHC.Types.D# x1 ->
                  GHC.Types.D# (GHC.Prim.+## x1 x1)
                  }}]
@@ -65,7 +65,7 @@ dl :: Double -> Double
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
-         Tmpl= \ (x [Occ=Once!] :: Double) ->
+         Tmpl= \ (x [Occ=Once1!] :: Double) ->
                  case x of { GHC.Types.D# y -> GHC.Types.D# (GHC.Prim.+## y y) }}]
 dl = dr
 
@@ -78,7 +78,7 @@ fr :: Float -> Float
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
-         Tmpl= \ (x [Occ=Once!] :: Float) ->
+         Tmpl= \ (x [Occ=Once1!] :: Float) ->
                  case x of { GHC.Types.F# x1 ->
                  GHC.Types.F# (GHC.Prim.plusFloat# x1 x1)
                  }}]
@@ -97,7 +97,7 @@ fl :: Float -> Float
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
-         Tmpl= \ (x [Occ=Once!] :: Float) ->
+         Tmpl= \ (x [Occ=Once1!] :: Float) ->
                  case x of { GHC.Types.F# y ->
                  GHC.Types.F# (GHC.Prim.plusFloat# y y)
                  }}]
diff --git a/testsuite/tests/perf/compiler/T10421.hs b/testsuite/tests/perf/compiler/T10421.hs
new file mode 100644
index 0000000000..226cc95fd2
--- /dev/null
+++ b/testsuite/tests/perf/compiler/T10421.hs
@@ -0,0 +1,51 @@
+-- Exponential with GHC 8.10
+
+module RegBig where
+
+import Prelude
+
+import Control.Applicative
+import T10421_Form
+import T10421_Y
+
+data Register
+  = Register String
+             String
+             String
+             String
+             String
+             String
+             String
+             String
+             String
+             String
+             String
+             String
+
+registerForm :: a -> IO (FormResult Register)
+registerForm _  = do
+  (a1, _) <- mreq textField "" Nothing
+  (a2, _) <- mreq textField "" Nothing
+  (a3, _) <- mreq textField "" Nothing
+  (a4, _) <- mreq textField "" Nothing
+  (a5, _) <- mreq textField "" Nothing
+  (a6, _) <- mreq textField "" Nothing
+  (a7, _) <- mreq textField "" Nothing
+  (a8, _) <- mreq textField "" Nothing
+  (a9, _) <- mreq textField "" Nothing
+  (a10, _) <- mreq textField "" Nothing
+  (a11, _) <- mreq textField "" Nothing
+  (a12, _) <- mreq textField "" Nothing
+  return (Register <$> a1
+                   <*> a2
+                   <*> a3
+                   <*> a4
+                   <*> a5
+                   <*> a6
+                   <*> a7
+                   <*> a8
+                   <*> a9
+                   <*> a10
+                   <*> a11
+                   <*> a12
+      )
diff --git a/testsuite/tests/perf/compiler/T10421_Form.hs b/testsuite/tests/perf/compiler/T10421_Form.hs
new file mode 100644
index 0000000000..0abf7ad9d5
--- /dev/null
+++ b/testsuite/tests/perf/compiler/T10421_Form.hs
@@ -0,0 +1,19 @@
+-- Form.hs
+module T10421_Form where
+
+import Control.Applicative
+
+data FormResult a = FormMissing
+                  | FormFailure [String]
+                  | FormSuccess a
+instance Functor FormResult where
+    fmap _ FormMissing = FormMissing
+    fmap _ (FormFailure errs) = FormFailure errs
+    fmap f (FormSuccess a) = FormSuccess $ f a
+instance Applicative FormResult where
+    pure = FormSuccess
+    (FormSuccess f) <*> (FormSuccess g) = FormSuccess $ f g
+    (FormFailure x) <*> (FormFailure y) = FormFailure $ x ++ y
+    (FormFailure x) <*> _ = FormFailure x
+    _ <*> (FormFailure y) = FormFailure y
+    _ <*> _ = FormMissing
diff --git a/testsuite/tests/perf/compiler/T10421_Y.hs b/testsuite/tests/perf/compiler/T10421_Y.hs
new file mode 100644
index 0000000000..de28838e86
--- /dev/null
+++ b/testsuite/tests/perf/compiler/T10421_Y.hs
@@ -0,0 +1,17 @@
+-- Y.hs
+{-# OPTIONS_GHC -fomit-interface-pragmas #-}
+-- Imagine the values defined in this module are complicated
+-- and there is no useful inlining/strictness/etc. information
+
+module T10421_Y where
+
+import T10421_Form
+
+mreq :: a -> b -> c -> IO (FormResult d, ())
+mreq = undefined
+
+mopt :: a -> b -> c -> IO (FormResult d, ())
+mopt = undefined
+
+textField = undefined
+checkBoxField = undefined
diff --git a/testsuite/tests/perf/compiler/T10421a.hs b/testsuite/tests/perf/compiler/T10421a.hs
new file mode 100644
index 0000000000..3a58f6dd62
--- /dev/null
+++ b/testsuite/tests/perf/compiler/T10421a.hs
@@ -0,0 +1,54 @@
+-- Exponential with GHC 8.10
+--
+-- This is a smaller version of T10421, but demonstrates the same blow-up
+
+module RegBig where
+
+import Prelude
+
+import Control.Applicative
+import T10421a_Form
+
+data Register
+  = Register String
+             String
+             String
+             String
+             String
+             String
+             String
+             String
+             String
+             String
+             String
+             String
+
+registerForm :: FormResult String   -- a1
+             -> FormResult String
+             -> FormResult String   -- a3
+             -> FormResult String
+             -> FormResult String
+             -> FormResult String   -- a6
+             -> FormResult String   -- a7
+             -> FormResult String
+             -> FormResult String
+             -> FormResult String
+             -> FormResult String
+             -> FormResult String   -- a12
+             -> IO (FormResult Register)
+
+registerForm a1 a2 a3 a4 a5 a6 a7
+             a8 a9 a10 a11 a12
+ = return (Register <$> a1
+                   <*> a2
+                   <*> a3
+                   <*> a4
+                   <*> a5
+                   <*> a6
+                   <*> a7
+                   <*> a8
+                   <*> a9
+                   <*> a10
+                   <*> a11
+                   <*> a12
+      )
diff --git a/testsuite/tests/perf/compiler/T10421a_Form.hs b/testsuite/tests/perf/compiler/T10421a_Form.hs
new file mode 100644
index 0000000000..165768b6e9
--- /dev/null
+++ b/testsuite/tests/perf/compiler/T10421a_Form.hs
@@ -0,0 +1,19 @@
+-- Form.hs
+module T10421a_Form where
+
+import Control.Applicative
+
+data FormResult a = FormMissing
+                  | FormFailure [String]
+                  | FormSuccess a
+instance Functor FormResult where
+    fmap _ FormMissing = FormMissing
+    fmap _ (FormFailure errs) = FormFailure errs
+    fmap f (FormSuccess a) = FormSuccess $ f a
+instance Applicative FormResult where
+    pure = FormSuccess
+    (FormSuccess f) <*> (FormSuccess g) = FormSuccess $ f g
+    (FormFailure x) <*> (FormFailure y) = FormFailure $ x ++ y
+    (FormFailure x) <*> _ = FormFailure x
+    _ <*> (FormFailure y) = FormFailure y
+    _ <*> _ = FormMissing
diff --git a/testsuite/tests/perf/compiler/T13253-spj.hs b/testsuite/tests/perf/compiler/T13253-spj.hs
new file mode 100644
index 0000000000..9c8af39aca
--- /dev/null
+++ b/testsuite/tests/perf/compiler/T13253-spj.hs
@@ -0,0 +1,20 @@
+-- Exponential with GHC 8.10
+
+module T13253 where
+
+f :: Int -> Bool -> Bool
+{-# INLINE f #-}
+f y x = case x of { True -> y>0 ; False -> y<0 }
+
+foo y x = f (y+1) $
+          f (y+2) $
+          f (y+3) $
+          f (y+4) $
+          f (y+5) $
+          f (y+6) $
+          f (y+7) $
+          f (y+8) $
+          f (y+9) $
+          f (y+10) $
+          f (y+11) $
+          f y x
diff --git a/testsuite/tests/perf/compiler/T13253.hs b/testsuite/tests/perf/compiler/T13253.hs
new file mode 100644
index 0000000000..859bc06ff6
--- /dev/null
+++ b/testsuite/tests/perf/compiler/T13253.hs
@@ -0,0 +1,122 @@
+-- Exponential with GHC 8.10
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module T13253 where
+
+import Control.Monad (liftM)
+import Control.Monad.Trans.RWS.Lazy -- check how strict behaves
+import Control.Monad.Trans.Reader (ReaderT)
+import Control.Monad.IO.Class (MonadIO (..))
+import Control.Monad.Trans.Class (MonadTrans (..))
+import Data.ByteString (ByteString)
+import Data.Monoid (Any (..))
+import Data.Semigroup (Semigroup (..))
+import Data.String (IsString (..))
+import System.Environment (getEnv)
+
+type Handler = ReaderT () IO
+type MForm = RWST (Maybe ([(String, Text)], ()), (), ()) Any [Int]
+type Text = ByteString -- close enough
+
+data HugeStruct = HugeStruct
+  !Text
+  !Text
+  !Text
+  !Text
+  !Text
+  !Text
+  !Text
+  !Text
+  !Text -- 9th
+  !Text
+  !Text
+
+data FormResult a = FormMissing
+                  | FormFailure [Text]
+                  | FormSuccess a
+    deriving Show
+instance Functor FormResult where
+    fmap _ FormMissing = FormMissing
+    fmap _ (FormFailure errs) = FormFailure errs
+    fmap f (FormSuccess a) = FormSuccess $ f a
+instance Applicative FormResult where
+    pure = FormSuccess
+    (FormSuccess f) <*> (FormSuccess g) = FormSuccess $ f g
+    (FormFailure x) <*> (FormFailure y) = FormFailure $ x ++ y
+    (FormFailure x) <*> _ = FormFailure x
+    _ <*> (FormFailure y) = FormFailure y
+    _ <*> _ = FormMissing
+instance Monoid m => Monoid (FormResult m) where
+    mempty = pure mempty
+    mappend = (<>)
+instance Semigroup m => Semigroup (FormResult m) where
+    x <> y = (<>) <$> x <*> y
+
+mreq :: MonadIO m => String -> MForm m (FormResult Text, ())
+-- fast
+--mreq v = pure (FormFailure [], ())
+-- slow
+mreq v = mhelper v (\m l -> FormFailure ["fail"]) FormSuccess
+
+askParams :: Monad m => MForm m (Maybe [(String, Text)])
+askParams = do
+    (x, _, _) <- ask
+    return $ liftM fst x
+
+mhelper
+    :: MonadIO m
+    => String
+    -> (() -> () -> FormResult b) -- on missing
+    -> (Text -> FormResult b)      -- on success
+    -> MForm m (FormResult b, ())
+mhelper v onMissing onFound = do
+    -- without tell, also faster
+    tell (Any True)
+    -- with different "askParams": faster.
+    -- mp <- liftIO $ read <$> readFile v
+    mp <- askParams
+    (res, x) <- case mp of
+        Nothing -> return (FormMissing, ())
+        Just p -> do
+            return $ case lookup v p of
+                Nothing -> (onMissing () (), ())
+                Just t -> (onFound t, ())
+    return (res, x)
+
+-- not inlining, also faster:
+-- {-# NOINLINE mhelper #-}
+
+sampleForm2 :: MForm Handler (FormResult HugeStruct)
+sampleForm2 = do
+    (x01, _) <- mreq "UNUSED"
+    (x02, _) <- mreq "UNUSED"
+    (x03, _) <- mreq "UNUSED"
+    (x04, _) <- mreq "UNUSED"
+    (x05, _) <- mreq "UNUSED"
+    (x06, _) <- mreq "UNUSED"
+    (x07, _) <- mreq "UNUSED"
+    (x08, _) <- mreq "UNUSED"
+    (x09, _) <- mreq "UNUSED"
+    (x10, _) <- mreq "UNUSED"
+    (x11, _) <- mreq "UNUSED"
+
+    let hugeStructRes = HugeStruct
+          <$> x01
+          <*> x02
+          <*> x03
+          <*> x04
+          <*> x05
+          <*> x06
+          <*> x07
+          <*> x08
+          <*> x09
+          <*> x10
+          <*> x11
+
+    pure hugeStructRes
+
+
+main :: IO ()
+main = pure ()
diff --git a/testsuite/tests/perf/compiler/T18140.hs b/testsuite/tests/perf/compiler/T18140.hs
new file mode 100644
index 0000000000..9b75b98054
--- /dev/null
+++ b/testsuite/tests/perf/compiler/T18140.hs
@@ -0,0 +1,57 @@
+-- Exponential with GHC 8.10
+
+{-# LANGUAGE BangPatterns  #-}
+module T18140 where
+
+
+data D = D
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+  !(Maybe Bool)
+
+maMB :: Maybe Bool -> Maybe Bool -> Maybe Bool
+maMB Nothing  y        = y
+maMB x        Nothing  = x
+maMB (Just x) (Just y) = Just (maB x y)
+
+maB :: Bool -> Bool -> Bool
+maB _ y = y
+
+maD :: D -> D -> D
+maD  (D x'1 x'2 x'3 x'4  x'5 x'6 x'7 x'8 x'9 x'10 x'11 x'12 x'13 x'14 x'15 x'16 x'17 x'18)
+     (D y'1 y'2 y'3 y'4  y'5 y'6 y'7 y'8 y'9 y'10 y'11 y'12 y'13 y'14 y'15 y'16 y'17 y'18)
+     = D
+      (maMB x'1 y'1)
+      (maMB x'2 y'2)
+      (maMB x'3 y'3)
+      (maMB x'4 y'4)
+      (maMB x'5 y'5)
+      (maMB x'6 y'6)
+      (maMB x'7 y'7)
+      (maMB x'8 y'8)
+      (maMB x'9 y'9)
+      (maMB x'10 y'10)
+      (maMB x'11 y'11)
+      (maMB x'12 y'12)
+      (maMB x'13 y'13)
+      (maMB x'14 y'14)
+      (maMB x'15 y'15)
+      (maMB x'16 y'16)
+      (maMB x'17 y'17)
+      (maMB x'18 y'18)
+
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index 77549999d7..52cd3e219a 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -388,3 +388,30 @@ test ('T18282',
       ],
       compile,
       ['-v0 -O'])
+test ('T18140',
+      [ collect_compiler_stats('bytes allocated',2)
+      ],
+      compile,
+      ['-v0 -O'])
+test('T10421',
+    [ only_ways(['normal']),
+      collect_compiler_stats('bytes allocated', 1)
+    ],
+    multimod_compile,
+    ['T10421', '-v0 -O'])
+test('T10421a',
+    [ only_ways(['normal']),
+      collect_compiler_stats('bytes allocated', 1)
+    ],
+    multimod_compile,
+    ['T10421a', '-v0 -O'])
+test ('T13253',
+      [ collect_compiler_stats('bytes allocated',2)
+      ],
+      compile,
+      ['-v0 -O'])
+test ('T13253-spj',
+      [ collect_compiler_stats('bytes allocated',2)
+      ],
+      compile,
+      ['-v0 -O'])
diff --git a/testsuite/tests/simplCore/should_compile/T13143.stderr b/testsuite/tests/simplCore/should_compile/T13143.stderr
index 6d069f6cbd..f90459114b 100644
--- a/testsuite/tests/simplCore/should_compile/T13143.stderr
+++ b/testsuite/tests/simplCore/should_compile/T13143.stderr
@@ -94,11 +94,11 @@ g [InlPrag=NOUSERINLINE[2]] :: Bool -> Bool -> Int -> Int
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=3,unsat_ok=True,boring_ok=False)
-         Tmpl= \ (w [Occ=Once] :: Bool)
-                 (w1 [Occ=Once] :: Bool)
-                 (w2 [Occ=Once!] :: Int) ->
-                 case w2 of { GHC.Types.I# ww1 [Occ=Once] ->
-                 case T13143.$wg w w1 ww1 of ww2 [Occ=Once] { __DEFAULT ->
+         Tmpl= \ (w [Occ=Once1] :: Bool)
+                 (w1 [Occ=Once1] :: Bool)
+                 (w2 [Occ=Once1!] :: Int) ->
+                 case w2 of { GHC.Types.I# ww1 [Occ=Once1] ->
+                 case T13143.$wg w w1 ww1 of ww2 [Occ=Once1] { __DEFAULT ->
                  GHC.Types.I# ww2
                  }
                  }}]
diff --git a/testsuite/tests/simplCore/should_compile/T15631.stdout b/testsuite/tests/simplCore/should_compile/T15631.stdout
index b1ada8b039..cce6777d74 100644
--- a/testsuite/tests/simplCore/should_compile/T15631.stdout
+++ b/testsuite/tests/simplCore/should_compile/T15631.stdout
@@ -3,5 +3,5 @@
       case GHC.List.reverse1 @a w (GHC.Types.[] @a) of {
         [] -> case Foo.f1 @a of { GHC.Types.I# v1 -> GHC.Prim.+# ww2 v1 };
           case GHC.List.$wlenAcc
-                 case Foo.$wf @a w of ww [Occ=Once] { __DEFAULT ->
+                 case Foo.$wf @a w of ww [Occ=Once1] { __DEFAULT ->
       case Foo.$wf @a w of ww { __DEFAULT -> GHC.Types.I# ww }
diff --git a/testsuite/tests/simplCore/should_compile/T17901.stdout b/testsuite/tests/simplCore/should_compile/T17901.stdout
index 7a09f6e2df..3017c7a4a6 100644
--- a/testsuite/tests/simplCore/should_compile/T17901.stdout
+++ b/testsuite/tests/simplCore/should_compile/T17901.stdout
@@ -1,14 +1,14 @@
-                 (wombat1 [Occ=Once*!] :: T -> t)
+                 (wombat1 [Occ=Once3!] :: T -> t)
                    A -> wombat1 T17901.A;
                    B -> wombat1 T17901.B;
                    C -> wombat1 T17901.C
   = \ (@t) (wombat1 :: T -> t) (x :: T) ->
       case x of wild { __DEFAULT -> wombat1 wild }
-         Tmpl= \ (@t) (wombat2 [Occ=Once!] :: S -> t) (x [Occ=Once] :: S) ->
-                 case x of wild [Occ=Once] { __DEFAULT -> wombat2 wild }}]
+                 (wombat2 [Occ=Once1!] :: S -> t)
+                 case x of wild [Occ=Once1] { __DEFAULT -> wombat2 wild }}]
   = \ (@t) (wombat2 :: S -> t) (x :: S) ->
       case x of wild { __DEFAULT -> wombat2 wild }
-         Tmpl= \ (@t) (wombat3 [Occ=Once!] :: W -> t) (x [Occ=Once] :: W) ->
-                 case x of wild [Occ=Once] { __DEFAULT -> wombat3 wild }}]
+                 (wombat3 [Occ=Once1!] :: W -> t)
+                 case x of wild [Occ=Once1] { __DEFAULT -> wombat3 wild }}]
   = \ (@t) (wombat3 :: W -> t) (x :: W) ->
       case x of wild { __DEFAULT -> wombat3 wild }
diff --git a/testsuite/tests/simplCore/should_compile/T18355.stderr b/testsuite/tests/simplCore/should_compile/T18355.stderr
index 50efeca4b1..6b7372c5af 100644
--- a/testsuite/tests/simplCore/should_compile/T18355.stderr
+++ b/testsuite/tests/simplCore/should_compile/T18355.stderr
@@ -12,10 +12,10 @@ f :: forall {a}. Num a => a -> Bool -> a -> a
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=4,unsat_ok=True,boring_ok=False)
          Tmpl= \ (@a)
-                 ($dNum [Occ=Once*] :: Num a)
-                 (x [Occ=Once*] :: a)
-                 (b [Occ=Once!] :: Bool)
-                 (eta [Occ=Once*, OS=OneShot] :: a) ->
+                 ($dNum [Occ=Once2] :: Num a)
+                 (x [Occ=Once2] :: a)
+                 (b [Occ=Once1!] :: Bool)
+                 (eta [Occ=Once2, OS=OneShot] :: a) ->
                  case b of {
                    False -> - @a $dNum x eta;
                    True -> + @a $dNum x eta
@@ -41,7 +41,7 @@ T18355.$trModule4 = "main"#
 T18355.$trModule3 :: GHC.Types.TrName
 [GblId,
  Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
-         WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 20}]
+         WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}]
 T18355.$trModule3 = GHC.Types.TrNameS T18355.$trModule4
 
 -- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0}
@@ -55,14 +55,14 @@ T18355.$trModule2 = "T18355"#
 T18355.$trModule1 :: GHC.Types.TrName
 [GblId,
  Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
-         WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 20}]
+         WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}]
 T18355.$trModule1 = GHC.Types.TrNameS T18355.$trModule2
 
 -- RHS size: {terms: 3, types: 0, coercions: 0, joins: 0/0}
 T18355.$trModule :: GHC.Types.Module
 [GblId,
  Unf=Unf{Src=<vanilla>, TopLvl=True, Value=True, ConLike=True,
-         WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 30}]
+         WorkFree=True, Expandable=True, Guidance=IF_ARGS [] 10 10}]
 T18355.$trModule
   = GHC.Types.Module T18355.$trModule3 T18355.$trModule1
 
diff --git a/testsuite/tests/simplCore/should_compile/T3717.stderr b/testsuite/tests/simplCore/should_compile/T3717.stderr
index f2fe900bfd..13fc4e943a 100644
--- a/testsuite/tests/simplCore/should_compile/T3717.stderr
+++ b/testsuite/tests/simplCore/should_compile/T3717.stderr
@@ -61,9 +61,9 @@ foo [InlPrag=NOUSERINLINE[2]] :: Int -> Int
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
-         Tmpl= \ (w [Occ=Once!] :: Int) ->
-                 case w of { GHC.Types.I# ww1 [Occ=Once] ->
-                 case T3717.$wfoo ww1 of ww2 [Occ=Once] { __DEFAULT ->
+         Tmpl= \ (w [Occ=Once1!] :: Int) ->
+                 case w of { GHC.Types.I# ww1 [Occ=Once1] ->
+                 case T3717.$wfoo ww1 of ww2 [Occ=Once1] { __DEFAULT ->
                  GHC.Types.I# ww2
                  }
                  }}]
diff --git a/testsuite/tests/simplCore/should_compile/T3772.stdout b/testsuite/tests/simplCore/should_compile/T3772.stdout
index 731e7f23a7..dae44e102b 100644
--- a/testsuite/tests/simplCore/should_compile/T3772.stdout
+++ b/testsuite/tests/simplCore/should_compile/T3772.stdout
@@ -69,8 +69,8 @@ foo [InlPrag=NOUSERINLINE[final]] :: Int -> ()
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
-         Tmpl= \ (w [Occ=Once!] :: Int) ->
-                 case w of { GHC.Types.I# ww1 [Occ=Once] -> T3772.$wfoo ww1 }}]
+         Tmpl= \ (w [Occ=Once1!] :: Int) ->
+                 case w of { GHC.Types.I# ww1 [Occ=Once1] -> T3772.$wfoo ww1 }}]
 foo
   = \ (w :: Int) -> case w of { GHC.Types.I# ww1 -> T3772.$wfoo ww1 }
 
diff --git a/testsuite/tests/simplCore/should_compile/T4908.stderr b/testsuite/tests/simplCore/should_compile/T4908.stderr
index 0074e4b1a0..76e46f98f3 100644
--- a/testsuite/tests/simplCore/should_compile/T4908.stderr
+++ b/testsuite/tests/simplCore/should_compile/T4908.stderr
@@ -85,8 +85,8 @@ f [InlPrag=NOUSERINLINE[2]] :: Int -> (Int, Int) -> Bool
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=2,unsat_ok=True,boring_ok=False)
-         Tmpl= \ (w [Occ=Once!] :: Int) (w1 [Occ=Once] :: (Int, Int)) ->
-                 case w of { I# ww1 [Occ=Once] -> T4908.$wf ww1 w1 }}]
+         Tmpl= \ (w [Occ=Once1!] :: Int) (w1 [Occ=Once1] :: (Int, Int)) ->
+                 case w of { I# ww1 [Occ=Once1] -> T4908.$wf ww1 w1 }}]
 f = \ (w :: Int) (w1 :: (Int, Int)) ->
       case w of { I# ww1 -> T4908.$wf ww1 w1 }
 
diff --git a/testsuite/tests/simplCore/should_compile/T4930.stderr b/testsuite/tests/simplCore/should_compile/T4930.stderr
index 9560d1973c..b58298aedb 100644
--- a/testsuite/tests/simplCore/should_compile/T4930.stderr
+++ b/testsuite/tests/simplCore/should_compile/T4930.stderr
@@ -61,9 +61,9 @@ foo [InlPrag=NOUSERINLINE[2]] :: Int -> Int
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
-         Tmpl= \ (w [Occ=Once!] :: Int) ->
-                 case w of { GHC.Types.I# ww1 [Occ=Once] ->
-                 case T4930.$wfoo ww1 of ww2 [Occ=Once] { __DEFAULT ->
+         Tmpl= \ (w [Occ=Once1!] :: Int) ->
+                 case w of { GHC.Types.I# ww1 [Occ=Once1] ->
+                 case T4930.$wfoo ww1 of ww2 [Occ=Once1] { __DEFAULT ->
                  GHC.Types.I# ww2
                  }
                  }}]
diff --git a/testsuite/tests/simplCore/should_compile/T5366.stdout b/testsuite/tests/simplCore/should_compile/T5366.stdout
index 735d059fb5..92fed9ddda 100644
--- a/testsuite/tests/simplCore/should_compile/T5366.stdout
+++ b/testsuite/tests/simplCore/should_compile/T5366.stdout
@@ -1,2 +1,2 @@
-                 case ds of { Bar dt [Occ=Once] _ [Occ=Dead] -> GHC.Types.I# dt }}]
+                 case ds of { Bar dt [Occ=Once1] _ [Occ=Dead] -> GHC.Types.I# dt }}]
 f = \ (ds :: Bar) -> case ds of { Bar dt dt1 -> GHC.Types.I# dt }
diff --git a/testsuite/tests/simplCore/should_compile/T7360.stderr b/testsuite/tests/simplCore/should_compile/T7360.stderr
index d8ded3351f..ccf2147977 100644
--- a/testsuite/tests/simplCore/should_compile/T7360.stderr
+++ b/testsuite/tests/simplCore/should_compile/T7360.stderr
@@ -13,11 +13,11 @@ T7360.$WFoo3 [InlPrag=INLINE[final] CONLIKE] :: Int #-> Foo
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
-         Tmpl= \ (dt [Occ=Once!] :: Int) ->
-                 case dt of { GHC.Types.I# dt [Occ=Once] -> T7360.Foo3 dt }}]
+         Tmpl= \ (dt [Occ=Once1!] :: Int) ->
+                 case dt of { GHC.Types.I# dt [Occ=Once1] -> T7360.Foo3 dt }}]
 T7360.$WFoo3
-  = \ (dt [Occ=Once!] :: Int) ->
-      case dt of { GHC.Types.I# dt [Occ=Once] -> T7360.Foo3 dt }
+  = \ (dt [Occ=Once1!] :: Int) ->
+      case dt of { GHC.Types.I# dt [Occ=Once1] -> T7360.Foo3 dt }
 
 -- RHS size: {terms: 5, types: 2, coercions: 0, joins: 0/0}
 fun1 [InlPrag=NOINLINE] :: Foo -> ()
@@ -40,10 +40,10 @@ fun2 :: forall {a}. [a] -> ((), Int)
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
-         Tmpl= \ (@a) (x [Occ=Once] :: [a]) ->
+         Tmpl= \ (@a) (x [Occ=Once1] :: [a]) ->
                  (T7360.fun4,
-                  case x of wild [Occ=Once] { __DEFAULT ->
-                  case GHC.List.$wlenAcc @a wild 0# of ww2 [Occ=Once] { __DEFAULT ->
+                  case x of wild [Occ=Once1] { __DEFAULT ->
+                  case GHC.List.$wlenAcc @a wild 0# of ww2 [Occ=Once1] { __DEFAULT ->
                   GHC.Types.I# ww2
                   }
                   })}]
diff --git a/testsuite/tests/simplCore/should_compile/T7865.stdout b/testsuite/tests/simplCore/should_compile/T7865.stdout
index 7c5d779425..76088acdb0 100644
--- a/testsuite/tests/simplCore/should_compile/T7865.stdout
+++ b/testsuite/tests/simplCore/should_compile/T7865.stdout
@@ -1,7 +1,7 @@
 T7865.$wexpensive [InlPrag=NOINLINE]
 T7865.$wexpensive
 expensive [InlPrag=NOUSERINLINE[final]] :: Int -> Int
-                 case T7865.$wexpensive ww1 of ww2 [Occ=Once] { __DEFAULT ->
+                 case T7865.$wexpensive ww1 of ww2 [Occ=Once1] { __DEFAULT ->
 expensive
       case T7865.$wexpensive ww1 of ww2 { __DEFAULT -> GHC.Types.I# ww2 }
             case T7865.$wexpensive ww1 of ww2 { __DEFAULT ->
diff --git a/testsuite/tests/simplCore/should_compile/spec-inline.stderr b/testsuite/tests/simplCore/should_compile/spec-inline.stderr
index bf9cb1fd1c..c91b3ef901 100644
--- a/testsuite/tests/simplCore/should_compile/spec-inline.stderr
+++ b/testsuite/tests/simplCore/should_compile/spec-inline.stderr
@@ -118,8 +118,9 @@ Roman.foo_go [InlPrag=NOUSERINLINE[2]]
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=2,unsat_ok=True,boring_ok=False)
-         Tmpl= \ (w [Occ=Once] :: Maybe Int) (w1 [Occ=Once] :: Maybe Int) ->
-                 case Roman.$wgo w w1 of ww [Occ=Once] { __DEFAULT ->
+         Tmpl= \ (w [Occ=Once1] :: Maybe Int)
+                 (w1 [Occ=Once1] :: Maybe Int) ->
+                 case Roman.$wgo w w1 of ww [Occ=Once1] { __DEFAULT ->
                  GHC.Types.I# ww
                  }}]
 Roman.foo_go
@@ -149,8 +150,8 @@ foo :: Int -> Int
  Unf=Unf{Src=InlineStable, TopLvl=True, Value=True, ConLike=True,
          WorkFree=True, Expandable=True,
          Guidance=ALWAYS_IF(arity=1,unsat_ok=True,boring_ok=False)
-         Tmpl= \ (n [Occ=Once!] :: Int) ->
-                 case n of n1 [Occ=Once] { GHC.Types.I# _ [Occ=Dead] ->
+         Tmpl= \ (n [Occ=Once1!] :: Int) ->
+                 case n of n1 [Occ=Once1] { GHC.Types.I# _ [Occ=Dead] ->
                  Roman.foo_go (GHC.Maybe.Just @Int n1) Roman.foo1
                  }}]
 foo
diff --git a/testsuite/tests/stranal/should_compile/T16029.stdout b/testsuite/tests/stranal/should_compile/T16029.stdout
index 77957255c8..5b3a03a603 100644
--- a/testsuite/tests/stranal/should_compile/T16029.stdout
+++ b/testsuite/tests/stranal/should_compile/T16029.stdout
@@ -1,11 +1,11 @@
 T16029.$WMkT [InlPrag=INLINE[final] CONLIKE] :: Int #-> Int #-> T
-         Tmpl= \ (dt [Occ=Once!] :: Int) (dt [Occ=Once!] :: Int) ->
-  = \ (dt [Occ=Once!] :: Int) (dt [Occ=Once!] :: Int) ->
+         Tmpl= \ (dt [Occ=Once1!] :: Int) (dt [Occ=Once1!] :: Int) ->
+  = \ (dt [Occ=Once1!] :: Int) (dt [Occ=Once1!] :: Int) ->
   :: GHC.Prim.Int# -> GHC.Prim.Int#
   = \ (ww :: GHC.Prim.Int#) ->
 g2 [InlPrag=NOUSERINLINE[2]] :: T -> Int -> Int
-         Tmpl= \ (w [Occ=Once!] :: T) (w1 [Occ=Once!] :: Int) ->
+         Tmpl= \ (w [Occ=Once1!] :: T) (w1 [Occ=Once1!] :: Int) ->
   = \ (w :: T) (w1 :: Int) ->
 g1 [InlPrag=NOUSERINLINE[2]] :: S -> Int -> Int
-         Tmpl= \ (w [Occ=Once!] :: S) (w1 [Occ=Once!] :: Int) ->
+         Tmpl= \ (w [Occ=Once1!] :: S) (w1 [Occ=Once1!] :: Int) ->
   = \ (w :: S) (w1 :: Int) ->
author	Simon Peyton Jones <simonpj@microsoft.com>	2020-06-11 15:45:53 +0100
committer	Simon Peyton Jones <simonpj@microsoft.com>	2020-07-27 11:41:45 +0100
commit	3f65f1e043d9f603dd729026e09bcea7873af1bb (patch)
tree	a0d703526f7378c71ffda4e90ed7286a5faf8fdf
parent	dff1cb3d9c111808fec60190747272b973547c52 (diff)
download	haskell-wip/T13253.tar.gz