Improve the handling of used-once stuffbetter-ho-cardinality

Joachim and I are committing this onto a branch so that we can share it, but we expect to do a bit more work before merging it onto head. Nofib staus: - Most programs, no change - A few improve - A couple get worse (cacheprof, tak, rfib) Investigating the "get worse" set is what's holding up putting this on head. The major issue is this. Consider map (f g) ys where f's demand signature looks like f :: <L,C1(C1(U))> -> <L,U> -> . So 'f' is not saturated. What demand do we place on g? Answer C(C1(U)) That is, the inner C1 should stay, even though f is not saturated. I found that this made a significant difference in the demand signatures inferred in GHC.IO, which uses lots of higher-order exception handlers. I also had to add used-once demand signatures for some of the 'catch' primops, so that we know their handlers are only called once.
author: Simon Peyton Jones <simonpj@microsoft.com> 2013-11-22 17:13:05 +0000
committer: Simon Peyton Jones <simonpj@microsoft.com> 2013-12-02 17:33:59 +0000
commit: 20cc59419b5fae60eea9c81f56020ef15256dc84 (patch)
tree: 70a54aa0f99ceb69374ed7ec036f4381b649e5c3
parent: 51deeb0db3abac9f4369d3f8a3744e1313ecebf4 (diff)
download: haskell-better-ho-cardinality.tar.gz
17 files changed, 540 insertions, 394 deletions
diff --git a/compiler/basicTypes/BasicTypes.lhs b/compiler/basicTypes/BasicTypes.lhs
index 6fd038dcfa..b39e049d76 100644
--- a/compiler/basicTypes/BasicTypes.lhs
+++ b/compiler/basicTypes/BasicTypes.lhs
@@ -47,6 +47,11 @@ module BasicTypes(
         TupleSort(..), tupleSortBoxity, boxityNormalTupleSort,
         tupleParens,
 
+        -- ** The OneShotInfo type
+        OneShotInfo(..),
+        noOneShotInfo, hasNoOneShotInfo, isOneShotInfo,
+        bestOneShot, worstOneShot,
+
         OccInfo(..), seqOccInfo, zapFragileOcc, isOneOcc,
         isDeadOcc, isStrongLoopBreaker, isWeakLoopBreaker, isNoOcc,
         strongLoopBreaker, weakLoopBreaker,
@@ -136,6 +141,56 @@ type Alignment = Int -- align to next N-byte boundary (N must be a power of 2).
 
 %************************************************************************
 %*                                                                      *
+         One-shot information
+%*                                                                      *
+%************************************************************************
+
+\begin{code}
+-- | If the 'Id' is a lambda-bound variable then it may have lambda-bound
+-- variable info. Sometimes we know whether the lambda binding this variable
+-- is a \"one-shot\" lambda; that is, whether it is applied at most once.
+--
+-- This information may be useful in optimisation, as computations may
+-- safely be floated inside such a lambda without risk of duplicating
+-- work.
+data OneShotInfo = NoOneShotInfo -- ^ No information
+                 | ProbOneShot   -- ^ The lambda is probably applied at most once
+                 | OneShotLam    -- ^ The lambda is applied at most once.
+
+-- | It is always safe to assume that an 'Id' has no lambda-bound variable information
+noOneShotInfo :: OneShotInfo
+noOneShotInfo = NoOneShotInfo
+
+isOneShotInfo, hasNoOneShotInfo :: OneShotInfo -> Bool
+isOneShotInfo OneShotLam = True
+isOneShotInfo _          = False
+
+hasNoOneShotInfo NoOneShotInfo = True
+hasNoOneShotInfo _             = False
+
+worstOneShot, bestOneShot :: OneShotInfo -> OneShotInfo -> OneShotInfo
+worstOneShot NoOneShotInfo _             = NoOneShotInfo
+worstOneShot ProbOneShot   NoOneShotInfo = NoOneShotInfo
+worstOneShot ProbOneShot   _             = ProbOneShot
+worstOneShot OneShotLam    os            = os
+
+bestOneShot NoOneShotInfo os         = os
+bestOneShot ProbOneShot   OneShotLam = OneShotLam
+bestOneShot ProbOneShot   _          = ProbOneShot
+bestOneShot OneShotLam    _          = OneShotLam
+
+pprOneShotInfo :: OneShotInfo -> SDoc
+pprOneShotInfo NoOneShotInfo = empty
+pprOneShotInfo ProbOneShot   = ptext (sLit "ProbOneShot")
+pprOneShotInfo OneShotLam    = ptext (sLit "OneShot")
+
+instance Outputable OneShotInfo where
+    ppr = pprOneShotInfo
+\end{code}
+
+
+%************************************************************************
+%*                                                                      *
            Swap flag
 %*                                                                      *
 %************************************************************************
diff --git a/compiler/basicTypes/Demand.lhs b/compiler/basicTypes/Demand.lhs
index 796c7cd326..b28e48f0fe 100644
--- a/compiler/basicTypes/Demand.lhs
+++ b/compiler/basicTypes/Demand.lhs
@@ -14,7 +14,7 @@ module Demand (
         mkProdDmd, mkOnceUsedDmd, mkManyUsedDmd, mkHeadStrict, oneifyDmd,
         getUsage, toCleanDmd, 
         absDmd, topDmd, botDmd, seqDmd,
-        lubDmd, bothDmd,
+        lubDmd, bothDmd, apply1Dmd, apply2Dmd, 
         isTopDmd, isBotDmd, isAbsDmd, isSeqDmd, 
         peelUseCall, cleanUseDmd_maybe, strictenDmd, bothCleanDmd,
 
@@ -35,15 +35,15 @@ module Demand (
 
         evalDmd, cleanEvalDmd, cleanEvalProdDmd, isStrictDmd, 
         splitDmdTy, splitFVs,
-        deferDmd, deferType, deferAndUse, deferEnv, modifyEnv,
+        postProcessDmdType, postProcessDmdTypeM,
 
         splitProdDmd, splitProdDmd_maybe, peelCallDmd, mkCallDmd,
         dmdTransformSig, dmdTransformDataConSig, dmdTransformDictSelSig,
         argOneShots, argsOneShots,
 
-        isSingleUsed, useType, useEnv, zapDemand, zapStrictSig,
+        isSingleUsed, useEnv, zapDemand, zapStrictSig,
 
-        worthSplittingFun, worthSplittingThunk,
+        worthSplittingArgDmd, worthSplittingThunkDmd,
 
         strictifyDictDmd
 
@@ -465,6 +465,11 @@ mkJointDmds ss as = zipWithEqual "mkJointDmds" mkJointDmd ss as
 absDmd :: JointDmd
 absDmd = mkJointDmd Lazy Abs
 
+apply1Dmd, apply2Dmd :: Demand
+-- C1(U), C1(C1(U)) respectively
+apply1Dmd = JD { strd = Lazy, absd = Use Many (UCall One Used) }
+apply2Dmd = JD { strd = Lazy, absd = Use Many (UCall One (UCall One Used)) }
+
 topDmd :: JointDmd
 topDmd = mkJointDmd Lazy useTop
 
@@ -506,9 +511,6 @@ seqDemandList :: [JointDmd] -> ()
 seqDemandList [] = ()
 seqDemandList (d:ds) = seqDemand d `seq` seqDemandList ds
 
-deferDmd :: JointDmd -> JointDmd
-deferDmd (JD {absd = a}) = mkJointDmd Lazy a 
-
 isStrictDmd :: Demand -> Bool
 -- See Note [Strict demands]
 isStrictDmd (JD {absd = Abs})  = False
@@ -518,9 +520,6 @@ isStrictDmd _                  = True
 isWeakDmd :: Demand -> Bool
 isWeakDmd (JD {strd = s, absd = a}) = isLazy s && isUsedMU a
 
-useDmd :: JointDmd -> JointDmd
-useDmd (JD {strd=d, absd=a}) = mkJointDmd d (markAsUsedDmd a)
-
 cleanUseDmd_maybe :: JointDmd -> Maybe UseDmd
 cleanUseDmd_maybe (JD { absd = Use _ ud }) = Just ud
 cleanUseDmd_maybe _                        = Nothing
@@ -621,30 +620,6 @@ mkCallDmd :: CleanDemand -> CleanDemand
 mkCallDmd (CD {sd = d, ud = u}) 
   = mkCleanDmd (mkSCall d) (mkUCall One u)
 
--- Returns result demand * strictness flag * one-shotness of the call 
-peelCallDmd :: CleanDemand 
-            -> ( CleanDemand
-               , Bool      -- True <=> had to strengthen from HeadStr
-                           --          hence defer results
-               , Count)    -- Call count
-
--- Exploiting the fact that 
--- on the strictness side      C(B) = B
--- and on the usage side       C(U) = U 
-peelCallDmd (CD {sd = s, ud = u}) 
-  = let (s', b) = peel_s s
-        (u', c) = peel_u u
-    in  (mkCleanDmd s' u', b, c)
-  where
-    peel_s (SCall s)   = (s,        False)
-    peel_s HyperStr    = (HyperStr, False)
-    peel_s _           = (HeadStr,  True)
-
-    peel_u (UCall c u) = (u,       c)
-    peel_u _           = (Used, Many)
-       -- The last case includes UHead which seems a bit wrong
-       -- because the body isn't used at all!
-
 cleanEvalDmd :: CleanDemand
 cleanEvalDmd = mkCleanDmd HeadStr Used
 
@@ -801,45 +776,40 @@ resTypeArgDmd _              = topDmd
 %************************************************************************
 
 \begin{code}
-worthSplittingFun :: [JointDmd] -> DmdResult -> Bool
-                -- True <=> the wrapper would not be an identity function
-worthSplittingFun ds res
-  = any worth_it ds || returnsCPR res
-        -- worthSplitting returns False for an empty list of demands,
-        -- and hence do_strict_ww is False if arity is zero and there is no CPR
+worthSplittingArgDmd :: Demand    -- Demand on a function argument
+                     -> Bool
+worthSplittingArgDmd dmd
+  = go dmd
   where
-    worth_it (JD {absd=Abs})                             = True      -- Absent arg
+    go (JD {absd=Abs}) = True      -- Absent arg
 
     -- See Note [Worker-wrapper for bottoming functions]
-    worth_it (JD {strd=Str HyperStr, absd=Use _ (UProd _)}) = True
-
-    -- See Note [Worthy functions for Worker-Wrapper split]    
-    worth_it (JD {strd=Str (SProd {})})                   = True  -- Product arg to evaluate
-    worth_it (JD {strd=Str HeadStr, absd=Use _ (UProd _)}) = True  -- Strictly used product arg
-    worth_it (JD {strd=Str HeadStr, absd=Use _ UHead})     = True 
-    worth_it _                                            = False
-
-worthSplittingThunk :: JointDmd         -- Demand on the thunk
-                    -> DmdResult        -- CPR info for the thunk
-                    -> Bool
-worthSplittingThunk dmd res
-  = worth_it dmd || returnsCPR res
+    go (JD {strd=Str HyperStr, absd=Use _ (UProd _)}) = True
+
+    -- See Note [Worthy functions for Worker-Wrapper split]
+    go (JD {strd=Str (SProd {})})                    = True  -- Product arg to evaluate
+    go (JD {strd=Str HeadStr, absd=Use _ (UProd _)}) = True  -- Strictly used product arg
+    go (JD {strd=Str HeadStr, absd=Use _ UHead})     = True
+
+    go _ = False
+
+worthSplittingThunkDmd :: Demand         -- Demand on the thunk
+                       -> Bool
+worthSplittingThunkDmd dmd
+  = go dmd
   where
         -- Split if the thing is unpacked
-    worth_it (JD {strd=Str (SProd {}), absd=Use _ a})   = some_comp_used a
-    worth_it (JD {strd=Str HeadStr, absd=Use _ UProd {}}) = True   
-        -- second component points out that at least some of     
-    worth_it _                                      = False
+    go (JD {strd=Str (SProd {}), absd=Use _ a})     = some_comp_used a
+    go (JD {strd=Str HeadStr, absd=Use _ UProd {}}) = True
+    go _                                            = False
 
     some_comp_used Used       = True
     some_comp_used (UProd _ ) = True
     some_comp_used _          = False
-
-
 \end{code}
 
 Note [Worthy functions for Worker-Wrapper split]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 For non-bottoming functions a worker-wrapper transformation takes into
 account several possibilities to decide if the function is worthy for
 splitting:
@@ -866,7 +836,7 @@ usage information: if the function uses its product argument's
 components, the WW split can be beneficial. Example:
 
 g :: Bool -> (Int, Int) -> Int
-g c p = case p of (a,b) -> 
+g c p = case p of (a,b) ->
           if c then a else b
 
 The function g is strict in is argument p and lazy in its
@@ -908,7 +878,7 @@ masssive tuple which is barely used.  Example:
         f g pr = error (g pr)
 
         main = print (f fst (1, error "no"))
-          
+
 Here, f does not take 'pr' apart, and it's stupid to do so.
 Imagine that it had millions of fields. This actually happened
 in GHC itself where the tuple was DynFlags
@@ -923,7 +893,8 @@ in GHC itself where the tuple was DynFlags
 \begin{code}
 type Demand = JointDmd
 
-type DmdEnv = VarEnv Demand
+type DmdEnv = VarEnv Demand   -- If a variable v is not in the domain of the
+                              -- DmdEnv, it implicitly maps to <Lazy,Absent>
 
 data DmdType = DmdType 
                   DmdEnv        -- Demand on explicitly-mentioned 
@@ -1061,30 +1032,6 @@ splitDmdTy :: DmdType -> (Demand, DmdType)
 splitDmdTy (DmdType fv (dmd:dmds) res_ty) = (dmd, DmdType fv dmds res_ty)
 splitDmdTy ty@(DmdType _ [] res_ty)       = (resTypeArgDmd res_ty, ty)
 
-deferAndUse :: Bool    -- Lazify (defer) the type
-            -> Count   -- Many => manify the type
-            -> DmdType -> DmdType
-deferAndUse True  Many ty = deferType (useType ty)
-deferAndUse False Many ty = useType ty
-deferAndUse True  One  ty = deferType ty
-deferAndUse False One  ty = ty
-
-deferType :: DmdType -> DmdType
--- deferType ty1 ==  ty1 `lubType` DT { v -> <L,A> } [] top }
--- Ie it might be used, or not 
-deferType (DmdType fv _ _) = DmdType (deferEnv fv) [] topRes
-
-deferEnv :: DmdEnv -> DmdEnv
-deferEnv fv = mapVarEnv deferDmd fv
-
-useType :: DmdType -> DmdType
--- useType ty1 == ty1 `bothType` ty1
--- NB that bothType is assymetrical, so no-op on argument demands
-useType (DmdType fv ds res_ty) = DmdType (useEnv fv) ds res_ty
-
-useEnv :: DmdEnv -> DmdEnv
-useEnv fv = mapVarEnv useDmd fv
-
 modifyEnv :: Bool                       -- No-op if False
           -> (Demand -> Demand)         -- The zapper
           -> DmdEnv -> DmdEnv           -- Env1 and Env2
@@ -1107,20 +1054,84 @@ strictenDmd (JD {strd = s, absd = u})
     poke_s (Str s)   = s
     poke_u Abs       = UHead
     poke_u (Use _ u) = u
+\end{code}
 
-toCleanDmd :: (CleanDemand -> e -> (DmdType, e))
-           -> Demand
-           -> e -> (DmdType, e)
+Deferring and peeeling
+
+\begin{code}
+type DeferAndUse   -- Describes how to degrade a result type
+   =( Bool        -- Lazify (defer) the type
+    , Count)      -- Many => manify the type
+
+type DeferAndUseM = Maybe DeferAndUse
+  -- Nothing <=> absent-ify the result type; it will never be used
+
+toCleanDmd :: Demand -> (CleanDemand, DeferAndUseM)
 -- See Note [Analyzing with lazy demand and lambdas]
-toCleanDmd anal (JD { strd = s, absd = u }) e
+toCleanDmd (JD { strd = s, absd = u })
   = case (s,u) of
-      (_, Abs) -> mf (const topDmdType) (anal (CD { sd = HeadStr, ud = Used }) e)
-                  --  See Note [Always analyse in virgin pass]
-             
-      (Str s', Use c u') -> mf (deferAndUse False c) (anal (CD { sd = s',      ud = u' }) e)
-      (Lazy,   Use c u') -> mf (deferAndUse True c)  (anal (CD { sd = HeadStr, ud = u' }) e)
+      (Str s', Use c u') -> (CD { sd = s',      ud = u' },   Just (False, c))
+      (Lazy,   Use c u') -> (CD { sd = HeadStr, ud = u' },   Just (True,  c))
+      (_,      Abs)      -> (CD { sd = HeadStr, ud = Used }, Nothing)
+
+postProcessDmdTypeM :: DeferAndUseM -> DmdType -> DmdType
+postProcessDmdTypeM Nothing   _  = topDmdType
+  -- Incoming demand was Absent, so just discard all usage information
+  -- We only processed the thing at all to analyse the body
+  -- See Note [Always analyse in virgin pass]
+postProcessDmdTypeM (Just du) ty = postProcessDmdType du ty
+
+postProcessDmdType :: DeferAndUse -> DmdType -> DmdType
+postProcessDmdType (True,  Many) ty  = deferAndUse ty
+postProcessDmdType (False, Many) ty  = useType ty
+postProcessDmdType (True,  One)  ty = deferType ty
+postProcessDmdType (False, One)  ty = ty
+
+deferType, useType, deferAndUse :: DmdType -> DmdType
+deferType   (DmdType fv ds _)      = DmdType (deferEnv fv)    (map deferDmd ds)    topRes
+useType     (DmdType fv ds res_ty) = DmdType (useEnv fv)      (map useDmd ds)      res_ty
+deferAndUse (DmdType fv ds _)      = DmdType (deferUseEnv fv) (map deferUseDmd ds) topRes
+
+deferEnv, useEnv, deferUseEnv :: DmdEnv -> DmdEnv
+deferEnv    fv = mapVarEnv deferDmd fv
+useEnv      fv = mapVarEnv useDmd fv
+deferUseEnv fv = mapVarEnv deferUseDmd fv
+
+deferDmd, useDmd, deferUseDmd :: JointDmd -> JointDmd
+deferDmd    (JD {strd=_, absd=a}) = mkJointDmd Lazy a
+useDmd      (JD {strd=d, absd=a}) = mkJointDmd d    (markAsUsedDmd a)
+deferUseDmd (JD {strd=_, absd=a}) = mkJointDmd Lazy (markAsUsedDmd a)
+
+peelCallDmd :: CleanDemand -> (CleanDemand, DeferAndUse)
+-- Exploiting the fact that
+-- on the strictness side      C(B) = B
+-- and on the usage side       C(U) = U
+peelCallDmd (CD {sd = s, ud = u})
+  = case (s, u) of
+      (SCall s', UCall c u') -> (CD { sd = s',       ud = u' },   (False, c))
+      (SCall s', _)          -> (CD { sd = s',       ud = Used }, (False, Many))
+      (HyperStr, UCall c u') -> (CD { sd = HyperStr, ud = u' },   (False, c))
+      (HyperStr, _)          -> (CD { sd = HyperStr, ud = Used }, (False, Many))
+      (_,        UCall c u') -> (CD { sd = HeadStr,  ud = u' },   (True,  c))
+      (_,        _)          -> (CD { sd = HeadStr,  ud = Used }, (True,  Many))
+       -- The _ cases for usage includes UHead which seems a bit wrong
+       -- because the body isn't used at all!
+       -- c.f. the Abs case in toCleanDmd
+
+peelManyCalls :: [Demand] -> CleanDemand -> DeferAndUse
+peelManyCalls arg_ds (CD { sd = str, ud = abs })
+  = (go_str arg_ds str, go_abs arg_ds abs)
   where
-    mf f (a,b) = (f a, b)
+    go_str :: [Demand] -> StrDmd -> Bool     -- True <=> unsaturated, defer
+    go_str [] _              = False
+    go_str (_:_)  HyperStr   = False         -- HyperStr = Call(HyperStr)
+    go_str (_:as) (SCall d') = go_str as d'
+    go_str _      _          = True
+
+    go_abs :: [Demand] -> UseDmd -> Count    -- Many <=> unsaturated, or at least
+    go_abs []      _             = One       --          one UCall Many in the demand
+    go_abs (_:as) (UCall One d') = go_abs as d'
+    go_abs _      _              = Many
 \end{code}
 
 Note [Always analyse in virgin pass]
@@ -1235,61 +1246,44 @@ botSig = StrictSig botDmdType
 cprProdSig :: StrictSig
 cprProdSig = StrictSig cprProdDmdType
 
-argsOneShots :: StrictSig -> Arity -> [[Bool]]
+argsOneShots :: StrictSig -> Arity -> [[OneShotInfo]]
 argsOneShots (StrictSig (DmdType _ arg_ds _)) n_val_args
-  | arg_ds `lengthExceeds` n_val_args
-  = []   -- Too few arguments
-  | otherwise
   = go arg_ds
   where
+    good_one_shot
+      | arg_ds `lengthExceeds` n_val_args = ProbOneShot
+      | otherwise                         = OneShotLam
+
     go []               = []
-    go (arg_d : arg_ds) = argOneShots arg_d `cons` go arg_ds
-    
+    go (arg_d : arg_ds) = argOneShots good_one_shot arg_d `cons` go arg_ds
+
     cons [] [] = []
     cons a  as = a:as
 
-argOneShots :: JointDmd -> [Bool]
-argOneShots (JD { absd = usg })
+argOneShots :: OneShotInfo -> JointDmd -> [OneShotInfo]
+argOneShots one_shot_info (JD { absd = usg })
   = case usg of
       Use _ arg_usg -> go arg_usg
       _             -> []
   where
-    go (UCall One  u) = True  : go u
-    go (UCall Many u) = False : go u
+    go (UCall One  u) = one_shot_info : go u
+    go (UCall Many u) = NoOneShotInfo : go u
     go _              = []
 
 dmdTransformSig :: StrictSig -> CleanDemand -> DmdType
 -- (dmdTransformSig fun_sig dmd) considers a call to a function whose
 -- signature is fun_sig, with demand dmd.  We return the demand
 -- that the function places on its context (eg its args)
-dmdTransformSig (StrictSig dmd_ty@(DmdType _ arg_ds _)) 
-                (CD { sd = str, ud = abs })
-  = dmd_ty2
-  where
-    dmd_ty1 | str_sat   = dmd_ty
-            | otherwise = deferType dmd_ty
-    dmd_ty2 | abs_sat   = dmd_ty1
-            | otherwise = useType dmd_ty1
-
-    str_sat = go_str arg_ds str
-    abs_sat = go_abs arg_ds abs
-
-    go_str [] _              = True
-    go_str (_:_)  HyperStr   = True         -- HyperStr = Call(HyperStr)
-    go_str (_:as) (SCall d') = go_str as d'
-    go_str _      _          = False
-
-    go_abs []      _             = True
-    go_abs (_:as) (UCall One d') = go_abs as d'
-    go_abs _      _              = False
-
-    -- NB: it's important to use deferType, and not just return topDmdType
+dmdTransformSig (StrictSig dmd_ty@(DmdType _ arg_ds _)) cd
+  = postProcessDmdType (peelManyCalls arg_ds cd) dmd_ty
+    -- NB: it's important to use postProcessDmdType, and not
+    -- just return topDmdType for unsaturated calls
     -- Consider     let { f x y = p + x } in f 1
-    -- The application isn't saturated, but we must nevertheless propagate 
-    --      a lazy demand for p!  
+    -- The application isn't saturated, but we must nevertheless propagate
+    --      a lazy demand for p!
 
 dmdTransformDataConSig :: Arity -> StrictSig -> CleanDemand -> DmdType
--- Same as dmdTranformSig but for a data constructor (worker), 
+-- Same as dmdTransformSig but for a data constructor (worker), 
 -- which has a special kind of demand transformer.
 -- If the constructor is saturated, we feed the demand on 
 -- the result into the constructor arguments.
@@ -1305,8 +1299,9 @@ dmdTransformDataConSig arity (StrictSig (DmdType _ _ con_res))
   where
     go_str 0 dmd        = Just (splitStrProdDmd arity dmd)
     go_str n (SCall s') = go_str (n-1) s'
+    go_str n HyperStr   = go_str (n-1) HyperStr
     go_str _ _          = Nothing
-   
+
     go_abs 0 dmd            = Just (splitUseProdDmd arity dmd)
     go_abs n (UCall One u') = go_abs (n-1) u'
     go_abs _ _              = Nothing
@@ -1317,15 +1312,15 @@ dmdTransformDictSelSig :: StrictSig -> CleanDemand -> DmdType
 -- argument: the dictionary), we feed the demand on the result into
 -- the indicated dictionary component.
 dmdTransformDictSelSig (StrictSig (DmdType _ [dict_dmd] _)) cd
-   | (cd',defer,_) <- peelCallDmd cd
-   , not defer
+   | (cd',defer_use) <- peelCallDmd cd
    , Just jds <- splitProdDmd_maybe dict_dmd
-   = DmdType emptyDmdEnv [mkManyUsedDmd $ mkProdDmd $ map (enhance cd') jds] topRes
+   = postProcessDmdType defer_use $
+     DmdType emptyDmdEnv [mkOnceUsedDmd $ mkProdDmd $ map (enhance cd') jds] topRes
    | otherwise
    = topDmdType              -- See Note [Demand transformer for a dictionary selector]
   where
     enhance cd old | isAbsDmd old = old
-                   | otherwise    = mkManyUsedDmd cd
+                   | otherwise    = mkOnceUsedDmd cd  -- This is the one!
 
 dmdTransformDictSelSig _ _ = panic "dmdTransformDictSelSig: no args"
 \end{code}
diff --git a/compiler/basicTypes/Id.lhs b/compiler/basicTypes/Id.lhs
index c2e0c2199d..899f6fd91c 100644
--- a/compiler/basicTypes/Id.lhs
+++ b/compiler/basicTypes/Id.lhs
@@ -65,15 +65,17 @@ module Id (
         idInlineActivation, setInlineActivation, idRuleMatchInfo,
 
         -- ** One-shot lambdas
-        isOneShotBndr, isOneShotLambda, isStateHackType,
-        setOneShotLambda, clearOneShotLambda,
+        isOneShotBndr, isOneShotLambda, isProbablyOneShotLambda,
+        setOneShotLambda, clearOneShotLambda, 
+        updOneShotInfo, setIdOneShotInfo,
+        isStateHackType, stateHackOneShot, typeOneShot,
 
         -- ** Reading 'IdInfo' fields
         idArity, 
         idUnfolding, realIdUnfolding,
         idSpecialisation, idCoreRules, idHasRules,
         idCafInfo,
-        idLBVarInfo,
+        idOneShotInfo,
         idOccInfo,
 
         -- ** Writing 'IdInfo' fields
@@ -130,6 +132,7 @@ infixl  1 `setIdUnfoldingLazily`,
           `setIdUnfolding`,
           `setIdArity`,
           `setIdOccInfo`,
+          `setIdOneShotInfo`,
 
           `setIdSpecialisation`,
           `setInlinePragma`,
@@ -236,7 +239,8 @@ mkVanillaGlobalWithInfo = mkGlobalId VanillaId
 
 -- | For an explanation of global vs. local 'Id's, see "Var#globalvslocal"
 mkLocalId :: Name -> Type -> Id
-mkLocalId name ty = mkLocalIdWithInfo name ty vanillaIdInfo
+mkLocalId name ty = mkLocalIdWithInfo name ty
+                         (vanillaIdInfo `setOneShotInfo` typeOneShot ty)
 
 mkLocalIdWithInfo :: Name -> Type -> IdInfo -> Id
 mkLocalIdWithInfo name ty info = Var.mkLocalVar VanillaId name ty info
@@ -587,18 +591,27 @@ isConLikeId id = isDataConWorkId id || isConLike (idRuleMatchInfo id)
         ---------------------------------
         -- ONE-SHOT LAMBDAS
 \begin{code}
-idLBVarInfo :: Id -> LBVarInfo
-idLBVarInfo id = lbvarInfo (idInfo id)
+idOneShotInfo :: Id -> OneShotInfo
+idOneShotInfo id = oneShotInfo (idInfo id)
 
 -- | Returns whether the lambda associated with the 'Id' is certainly applied at most once
--- OR we are applying the \"state hack\" which makes it appear as if theis is the case for
--- lambdas used in @IO@. You should prefer using this over 'isOneShotLambda'
-isOneShotBndr :: Id -> Bool
 -- This one is the "business end", called externally.
+-- It works on type variables as well as Ids, returning True
 -- Its main purpose is to encapsulate the Horrible State Hack
-isOneShotBndr id = isOneShotLambda id || isStateHackType (idType id)
+isOneShotBndr :: Var -> Bool
+isOneShotBndr var
+  | isTyVar var = True
+  | otherwise   = isOneShotLambda var
 
 -- | Should we apply the state hack to values of this 'Type'?
+stateHackOneShot :: OneShotInfo
+stateHackOneShot = OneShotLam         -- Or maybe ProbOneShot?
+
+typeOneShot :: Type -> OneShotInfo
+typeOneShot ty
+   | isStateHackType ty = stateHackOneShot
+   | otherwise          = NoOneShotInfo
+
 isStateHackType :: Type -> Bool
 isStateHackType ty
   | opt_NoStateHack
@@ -629,17 +642,36 @@ isStateHackType ty
 -- | Returns whether the lambda associated with the 'Id' is certainly applied at most once.
 -- You probably want to use 'isOneShotBndr' instead
 isOneShotLambda :: Id -> Bool
-isOneShotLambda id = case idLBVarInfo id of
-                       IsOneShotLambda  -> True
-                       NoLBVarInfo      -> False
+isOneShotLambda id = case idOneShotInfo id of
+                       OneShotLam -> True
+                       _          -> False
+
+isProbablyOneShotLambda :: Id -> Bool
+isProbablyOneShotLambda id = case idOneShotInfo id of
+                               OneShotLam    -> True
+                               ProbOneShot   -> True
+                               NoOneShotInfo -> False
 
 setOneShotLambda :: Id -> Id
-setOneShotLambda id = modifyIdInfo (`setLBVarInfo` IsOneShotLambda) id
+setOneShotLambda id = modifyIdInfo (`setOneShotInfo` OneShotLam) id
 
 clearOneShotLambda :: Id -> Id
-clearOneShotLambda id
-  | isOneShotLambda id = modifyIdInfo (`setLBVarInfo` NoLBVarInfo) id
-  | otherwise          = id
+clearOneShotLambda id = modifyIdInfo (`setOneShotInfo` NoOneShotInfo) id
+
+setIdOneShotInfo :: Id -> OneShotInfo -> Id
+setIdOneShotInfo id one_shot = modifyIdInfo (`setOneShotInfo` one_shot) id
+
+updOneShotInfo :: Id -> OneShotInfo -> Id
+-- Combine the info in the Id with new info
+updOneShotInfo id one_shot
+  | do_upd    = setIdOneShotInfo id one_shot
+  | otherwise = id
+  where
+    do_upd = case (idOneShotInfo id, one_shot) of
+                (NoOneShotInfo, _) -> True
+                (OneShotLam,    _) -> False
+                (_, NoOneShotInfo) -> False
+                _                  -> True
 
 -- The OneShotLambda functions simply fiddle with the IdInfo flag
 -- But watch out: this may change the type of something else
diff --git a/compiler/basicTypes/IdInfo.lhs b/compiler/basicTypes/IdInfo.lhs
index a2bdd5ce54..fa12183bf2 100644
--- a/compiler/basicTypes/IdInfo.lhs
+++ b/compiler/basicTypes/IdInfo.lhs
@@ -24,9 +24,13 @@ module IdInfo (
 	vanillaIdInfo, noCafIdInfo,
 	seqIdInfo, megaSeqIdInfo,
 
+        -- ** The OneShotInfo type
+        OneShotInfo(..),
+        oneShotInfo, noOneShotInfo, hasNoOneShotInfo,
+        setOneShotInfo, 
+
 	-- ** Zapping various forms of Info
 	zapLamInfo, zapFragileInfo,
-
         zapDemandInfo,
 
 	-- ** The ArityInfo type
@@ -52,7 +56,7 @@ module IdInfo (
 
 	InsideLam, OneBranch,
 	insideLam, notInsideLam, oneBranch, notOneBranch,
-	
+
 	-- ** The SpecInfo type
 	SpecInfo(..),
 	emptySpecInfo,
@@ -65,11 +69,6 @@ module IdInfo (
 	ppCafInfo, mayHaveCafRefs,
 	cafInfo, setCafInfo,
 
-        -- ** The LBVarInfo type
-        LBVarInfo(..),
-        noLBVarInfo, hasNoLBVarInfo,
-        lbvarInfo, setLBVarInfo,
-
         -- ** Tick-box Info
         TickBoxOp(..), TickBoxId,
     ) where
@@ -94,7 +93,7 @@ infixl 	1 `setSpecInfo`,
 	  `setArityInfo`,
 	  `setInlinePragInfo`,
 	  `setUnfoldingInfo`,
-	  `setLBVarInfo`,
+	  `setOneShotInfo`,
 	  `setOccInfo`,
 	  `setCafInfo`,
 	  `setStrictnessInfo`,
@@ -191,7 +190,7 @@ pprIdDetails other     = brackets (pp other)
 -- 
 -- The 'IdInfo' gives information about the value, or definition, of the
 -- 'Id'.  It does not contain information about the 'Id''s usage,
--- except for 'demandInfo' and 'lbvarInfo'.
+-- except for 'demandInfo' and 'oneShotInfo'.
 data IdInfo
   = IdInfo {
 	arityInfo 	:: !ArityInfo,		-- ^ 'Id' arity
@@ -199,7 +198,7 @@ data IdInfo
 			   			-- See Note [Specialisations and RULES in IdInfo]
 	unfoldingInfo	:: Unfolding,		-- ^ The 'Id's unfolding
 	cafInfo		:: CafInfo,		-- ^ 'Id' CAF info
-        lbvarInfo	:: LBVarInfo,		-- ^ Info about a lambda-bound variable, if the 'Id' is one
+        oneShotInfo	:: OneShotInfo,		-- ^ Info about a lambda-bound variable, if the 'Id' is one
 	inlinePragInfo	:: InlinePragma,	-- ^ Any inline pragma atached to the 'Id'
 	occInfo		:: OccInfo,		-- ^ How the 'Id' occurs in the program
 
@@ -223,12 +222,14 @@ megaSeqIdInfo info
 -- some unfoldings are not calculated at all
 --    seqUnfolding (unfoldingInfo info)		`seq`
 
-    seqDemandInfo (demandInfo info)         `seq`
-    seqStrictnessInfo (strictnessInfo info) `seq`
-
+    seqDemandInfo (demandInfo info)             `seq`
+    seqStrictnessInfo (strictnessInfo info)     `seq`
     seqCaf (cafInfo info)			`seq`
-    seqLBVar (lbvarInfo info)			`seq`
-    seqOccInfo (occInfo info) 
+    seqOneShot (oneShotInfo info)		`seq`
+    seqOccInfo (occInfo info)
+
+seqOneShot :: OneShotInfo -> ()
+seqOneShot l = l `seq` ()
 
 seqStrictnessInfo :: StrictSig -> ()
 seqStrictnessInfo ty = seqStrictSig ty
@@ -266,8 +267,8 @@ setArityInfo	  info ar  = info { arityInfo = ar  }
 setCafInfo :: IdInfo -> CafInfo -> IdInfo
 setCafInfo        info caf = info { cafInfo = caf }
 
-setLBVarInfo :: IdInfo -> LBVarInfo -> IdInfo
-setLBVarInfo      info lb = {-lb `seq`-} info { lbvarInfo = lb }
+setOneShotInfo :: IdInfo -> OneShotInfo -> IdInfo
+setOneShotInfo      info lb = {-lb `seq`-} info { oneShotInfo = lb }
 
 setDemandInfo :: IdInfo -> Demand -> IdInfo
 setDemandInfo info dd = dd `seq` info { demandInfo = dd }
@@ -286,7 +287,7 @@ vanillaIdInfo
 	    arityInfo		= unknownArity,
 	    specInfo		= emptySpecInfo,
 	    unfoldingInfo	= noUnfolding,
-	    lbvarInfo		= NoLBVarInfo,
+	    oneShotInfo		= NoOneShotInfo,
 	    inlinePragInfo 	= defaultInlinePragma,
 	    occInfo		= NoOccInfo,
             demandInfo	        = topDmd,
@@ -465,43 +466,6 @@ ppCafInfo MayHaveCafRefs = empty
 
 %************************************************************************
 %*									*
-\subsection[lbvar-IdInfo]{Lambda-bound var info about an @Id@}
-%*									*
-%************************************************************************
-
-\begin{code}
--- | If the 'Id' is a lambda-bound variable then it may have lambda-bound
--- variable info. Sometimes we know whether the lambda binding this variable
--- is a \"one-shot\" lambda; that is, whether it is applied at most once.
---
--- This information may be useful in optimisation, as computations may
--- safely be floated inside such a lambda without risk of duplicating
--- work.
-data LBVarInfo = NoLBVarInfo            -- ^ No information
-	       | IsOneShotLambda	-- ^ The lambda is applied at most once).
-
--- | It is always safe to assume that an 'Id' has no lambda-bound variable information
-noLBVarInfo :: LBVarInfo
-noLBVarInfo = NoLBVarInfo
-
-hasNoLBVarInfo :: LBVarInfo -> Bool
-hasNoLBVarInfo NoLBVarInfo     = True
-hasNoLBVarInfo IsOneShotLambda = False
-
-seqLBVar :: LBVarInfo -> ()
-seqLBVar l = l `seq` ()
-
-pprLBVarInfo :: LBVarInfo -> SDoc
-pprLBVarInfo NoLBVarInfo     = empty
-pprLBVarInfo IsOneShotLambda = ptext (sLit "OneShot")
-
-instance Outputable LBVarInfo where
-    ppr = pprLBVarInfo
-\end{code}
-
-
-%************************************************************************
-%*									*
 \subsection{Bulk operations on IdInfo}
 %*									*
 %************************************************************************
diff --git a/compiler/basicTypes/MkId.lhs b/compiler/basicTypes/MkId.lhs
index 2bc0d1245f..1c5bb70e1e 100644
--- a/compiler/basicTypes/MkId.lhs
+++ b/compiler/basicTypes/MkId.lhs
@@ -1319,7 +1319,8 @@ inlined.
 \begin{code}
 realWorldPrimId :: Id   -- :: State# RealWorld
 realWorldPrimId = pcMiscPrelId realWorldName realWorldStatePrimTy
-                     (noCafIdInfo `setUnfoldingInfo` evaldUnfolding)    -- Note [evaldUnfoldings]
+                     (noCafIdInfo `setUnfoldingInfo` evaldUnfolding    -- Note [evaldUnfoldings]
+                                  `setOneShotInfo` stateHackOneShot)
 
 voidPrimId :: Id     -- Global constant :: Void#
 voidPrimId  = pcMiscPrelId voidPrimIdName voidPrimTy
diff --git a/compiler/coreSyn/CoreArity.lhs b/compiler/coreSyn/CoreArity.lhs
index 2c9a1375fb..406ebbf617 100644
--- a/compiler/coreSyn/CoreArity.lhs
+++ b/compiler/coreSyn/CoreArity.lhs
@@ -102,7 +102,7 @@ exprArity e = go e
     trim_arity arity ty = arity `min` length (typeArity ty)
 
 ---------------
-typeArity :: Type -> [OneShot]
+typeArity :: Type -> [OneShotInfo]
 -- How many value arrows are visible in the type?
 -- We look through foralls, and newtypes
 -- See Note [exprArity invariant]
@@ -114,8 +114,7 @@ typeArity ty
       = go rec_nts ty'
 
       | Just (arg,res) <- splitFunTy_maybe ty    
-      = isStateHackType arg : go rec_nts res
-
+      = typeOneShot arg : go rec_nts res
       | Just (tc,tys) <- splitTyConApp_maybe ty 
       , Just (ty', _) <- instNewTyCon_maybe tc tys
       , Just rec_nts' <- checkRecTc rec_nts tc  -- See Note [Expanding newtypes]
@@ -476,16 +475,10 @@ Then  f             :: AT [False,False] ATop
 -------------------- Main arity code ----------------------------
 \begin{code}
 -- See Note [ArityType]
-data ArityType = ATop [OneShot] | ABot Arity
+data ArityType = ATop [OneShotInfo] | ABot Arity
      -- There is always an explicit lambda
      -- to justify the [OneShot], or the Arity
 
-type OneShot = Bool    -- False <=> Know nothing
-                       -- True  <=> Can definitely float inside this lambda
-	               -- The 'True' case can arise either because a binder
-		       -- is marked one-shot, or because it's a state lambda
-		       -- and we have the state hack on
-
 vanillaArityType :: ArityType
 vanillaArityType = ATop []	-- Totally uninformative
 
@@ -543,7 +536,7 @@ findRhsArity dflags bndr rhs old_arity
 #ifdef DEBUG
                     pprTrace "Exciting arity"
                        (vcat [ ppr bndr <+> ppr cur_arity <+> ppr new_arity
-                             , ppr rhs])
+                                                    , ppr rhs])
 #endif
                     go new_arity
       where
@@ -562,8 +555,9 @@ rhsEtaExpandArity :: DynFlags -> CheapAppFun -> CoreExpr -> Arity
 rhsEtaExpandArity dflags cheap_app e
   = case (arityType env e) of
       ATop (os:oss)
-        | os || has_lam e -> 1 + length oss  -- Don't expand PAPs/thunks
-                                             -- Note [Eta expanding thunks]
+        | isOneShotInfo os || has_lam e -> 1 + length oss  
+                                   -- Don't expand PAPs/thunks
+                                   -- Note [Eta expanding thunks]
         | otherwise       -> 0
       ATop []             -> 0
       ABot n              -> n
@@ -647,15 +641,15 @@ when saturated" so we don't want to be too gung-ho about saturating!
 
 \begin{code}
 arityLam :: Id -> ArityType -> ArityType
-arityLam id (ATop as) = ATop (isOneShotBndr id : as)
+arityLam id (ATop as) = ATop (idOneShotInfo id : as)
 arityLam _  (ABot n)  = ABot (n+1)
 
 floatIn :: Bool -> ArityType -> ArityType
--- We have something like (let x = E in b), 
--- where b has the given arity type.  
+-- We have something like (let x = E in b),
+-- where b has the given arity type.
 floatIn _     (ABot n)  = ABot n
 floatIn True  (ATop as) = ATop as
-floatIn False (ATop as) = ATop (takeWhile id as)
+floatIn False (ATop as) = ATop (takeWhile isOneShotInfo as)
    -- If E is not cheap, keep arity only for one-shots
 
 arityApp :: ArityType -> Bool -> ArityType
@@ -667,37 +661,34 @@ arityApp (ATop [])     _     = ATop []
 arityApp (ATop (_:as)) cheap = floatIn cheap (ATop as)
 
 andArityType :: ArityType -> ArityType -> ArityType   -- Used for branches of a 'case'
-andArityType (ABot n1) (ABot n2) 
+andArityType (ABot n1) (ABot n2)
   = ABot (n1 `min` n2)
 andArityType (ATop as)  (ABot _)  = ATop as
 andArityType (ABot _)   (ATop bs) = ATop bs
 andArityType (ATop as)  (ATop bs) = ATop (as `combine` bs)
   where	     -- See Note [Combining case branches]
-    combine (a:as) (b:bs) = (a && b) : combine as bs
-    combine []     bs     = take_one_shots bs
-    combine as     []     = take_one_shots as
-
-    take_one_shots [] = []
-    take_one_shots (one_shot : as) 
-      | one_shot  = True : take_one_shots as
-      | otherwise = [] 
+    combine (a:as) (b:bs) = (a `bestOneShot` b) : combine as bs
+    combine []     bs     = takeWhile isOneShotInfo bs
+    combine as     []     = takeWhile isOneShotInfo as
 \end{code}
 
 Note [Combining case branches]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider    
+Consider
   go = \x. let z = go e0
                go2 = \x. case x of
                            True  -> z
                            False -> \s(one-shot). e1
            in go2 x
-We *really* want to eta-expand go and go2.  
+We *really* want to eta-expand go and go2.
 When combining the barnches of the case we have
-     ATop [] `andAT` ATop [True]
-and we want to get ATop [True].  But if the inner
+     ATop [] `andAT` ATop [OneShotLam]
+and we want to get ATop [OneShotLam].  But if the inner
 lambda wasn't one-shot we don't want to do this.
 (We need a proper arity analysis to justify that.)
 
+So we combine the best of the two branches, on the (slightly dodgy)
+basis that if we know one branch is one-shot, then they all must be.
 
 \begin{code}
 ---------------------------
@@ -738,7 +729,7 @@ arityType _ (Var v)
   | otherwise
   = ATop (take (idArity v) one_shots)
   where
-    one_shots :: [Bool]	    -- One-shot-ness derived from the type
+    one_shots :: [OneShotInfo]	-- One-shot-ness derived from the type
     one_shots = typeArity (idType v)
 
 	-- Lambdas; increase arity
@@ -778,7 +769,7 @@ arityType env (Case scrut _ _ alts)
      ATop as | not (ae_ped_bot env)    -- Check -fpedantic-bottoms
              , is_under scrut             -> ATop as
              | exprOkForSpeculation scrut -> ATop as
-             | otherwise                  -> ATop (takeWhile id as)	    
+             | otherwise                  -> ATop (takeWhile isOneShotInfo as)
   where
     -- is_under implements Note [Dealing with bottom (3)]
     is_under (Var f)           = f `elem` ae_bndrs env
diff --git a/compiler/coreSyn/CoreUtils.lhs b/compiler/coreSyn/CoreUtils.lhs
index 8f7b7772e8..52613dab41 100644
--- a/compiler/coreSyn/CoreUtils.lhs
+++ b/compiler/coreSyn/CoreUtils.lhs
@@ -776,6 +776,12 @@ exprIsCheap' good_app (Tick t e)
      -- never duplicate ticks.  If we get this wrong, then HPC's entry
      -- counts will be off (check test in libraries/hpc/tests/raytrace)
 
+exprIsCheap' good_app (Let (NonRec _ b) e)
+  = exprIsCheap' good_app b && exprIsCheap' good_app e
+exprIsCheap' good_app (Let (Rec prs) e)
+  = all (exprIsCheap' good_app . snd) prs && exprIsCheap' good_app e
+
+{-
 exprIsCheap' good_app (Let (NonRec x _) e)
   | isUnLiftedType (idType x) = exprIsCheap' good_app e
   | otherwise                 = False
@@ -783,6 +789,7 @@ exprIsCheap' good_app (Let (NonRec x _) e)
         -- and do no allocation, so just look at the body
         -- Non-strict lets do allocation so we don't treat them as cheap
         -- See also
+-}
 
 exprIsCheap' good_app other_expr        -- Applications and variables
   = go other_expr []
diff --git a/compiler/coreSyn/PprCore.lhs b/compiler/coreSyn/PprCore.lhs
index 1868a320d2..d7990085fe 100644
--- a/compiler/coreSyn/PprCore.lhs
+++ b/compiler/coreSyn/PprCore.lhs
@@ -296,16 +296,23 @@ pprTypedLamBinder bind_site debug_on var
   = sdocWithDynFlags $ \dflags ->
     case () of
     _
-      | not debug_on && isDeadBinder var       -> char '_'
-      | not debug_on, CaseBind <- bind_site    -> -- No parens, no kind info
-                                                  pprUntypedBinder var
-      | gopt Opt_SuppressTypeSignatures dflags -> -- Suppress the signature
-                                                  pprUntypedBinder var
-      | isTyVar var                            -> parens (pprKindedTyVarBndr var)
-      | otherwise ->
-            parens (hang (pprIdBndr var)
-                         2 (vcat [ dcolon <+> pprType (idType var), pp_unf]))
+      | not debug_on            -- Even dead binders can be one-shot
+      , isDeadBinder var        -> char '_' <+> ppWhen (isId var)
+                                                (pprIdBndrInfo (idInfo var))
+
+      | not debug_on            -- No parens, no kind info
+      , CaseBind <- bind_site   -> pprUntypedBinder var
+
+      | suppress_sigs dflags    -> pprUntypedBinder var
+
+      | isTyVar var  -> parens (pprKindedTyVarBndr var)
+
+      | otherwise    -> parens (hang (pprIdBndr var)
+                                   2 (vcat [ dcolon <+> pprType (idType var)
+                                           , pp_unf]))
   where
+    suppress_sigs = gopt Opt_SuppressTypeSignatures
+
     unf_info = unfoldingInfo (idInfo var)
     pp_unf | hasSomeUnfolding unf_info = ptext (sLit "Unf=") <> ppr unf_info
            | otherwise                 = empty
@@ -340,18 +347,18 @@ pprIdBndrInfo info
     prag_info = inlinePragInfo info
     occ_info  = occInfo info
     dmd_info  = demandInfo info
-    lbv_info  = lbvarInfo info
+    lbv_info  = oneShotInfo info
 
     has_prag  = not (isDefaultInlinePragma prag_info)
     has_occ   = not (isNoOcc occ_info)
     has_dmd   = not $ isTopDmd dmd_info 
-    has_lbv   = not (hasNoLBVarInfo lbv_info)
+    has_lbv   = not (hasNoOneShotInfo lbv_info)
 
     doc = showAttributes
           [ (has_prag, ptext (sLit "InlPrag=") <> ppr prag_info)
           , (has_occ,  ptext (sLit "Occ=") <> ppr occ_info)
           , (has_dmd,  ptext (sLit "Dmd=") <> ppr dmd_info)
-          , (has_lbv , ptext (sLit "Lbv=") <> ppr lbv_info)
+          , (has_lbv , ptext (sLit "OS=") <> ppr lbv_info)
           ]
 \end{code}
 
@@ -374,7 +381,7 @@ ppIdInfo id info
     , (True,           ptext (sLit "Str=") <> pprStrictness str_info)
     , (has_unf,        ptext (sLit "Unf=") <> ppr unf_info)
     , (not (null rules), ptext (sLit "RULES:") <+> vcat (map pprRule rules))
-    ]   -- Inline pragma, occ, demand, lbvar info
+    ]   -- Inline pragma, occ, demand, one-shot info
         -- printed out with all binders (when debug is on);
         -- see PprCore.pprIdBndr
   where
diff --git a/compiler/prelude/PrelRules.lhs b/compiler/prelude/PrelRules.lhs
index b6ded2eb27..11367edfec 100644
--- a/compiler/prelude/PrelRules.lhs
+++ b/compiler/prelude/PrelRules.lhs
@@ -141,10 +141,8 @@ primOpRules nm OrOp        = mkPrimOpRule nm 2 [ binaryLit (wordOp2 (.|.))
 primOpRules nm XorOp       = mkPrimOpRule nm 2 [ binaryLit (wordOp2 xor)
                                                , identityDynFlags zerow
                                                , equalArgs >> retLit zerow ]
-primOpRules nm SllOp       = mkPrimOpRule nm 2 [ binaryLit (wordShiftOp2 Bits.shiftL)
-                                               , rightIdentityDynFlags zeroi ]
-primOpRules nm SrlOp       = mkPrimOpRule nm 2 [ binaryLit (wordShiftOp2 shiftRightLogical)
-                                               , rightIdentityDynFlags zeroi ]
+primOpRules nm SllOp       = mkPrimOpRule nm 2 [ wordShiftRule Bits.shiftL ]
+primOpRules nm SrlOp       = mkPrimOpRule nm 2 [ wordShiftRule shiftRightLogical ]
 
 -- coercions
 primOpRules nm Word2IntOp     = mkPrimOpRule nm 1 [ liftLitDynFlags word2IntLit
@@ -373,14 +371,25 @@ wordOp2 op dflags (MachWord w1) (MachWord w2)
     = wordResult dflags (fromInteger w1 `op` fromInteger w2)
 wordOp2 _ _ _ _ = Nothing  -- Could find LitLit
 
-wordShiftOp2 :: (Integer -> Int -> Integer)
-             -> DynFlags -> Literal -> Literal
-             -> Maybe CoreExpr
--- Shifts take an Int; hence second arg of op is Int
-wordShiftOp2 op dflags (MachWord x) (MachInt n)
-  = wordResult dflags (x `op` fromInteger n)
-    -- Do the shift at type Integer
-wordShiftOp2 _ _ _ _ = Nothing
+wordShiftRule :: (Integer -> Int -> Integer) -> RuleM CoreExpr
+                 -- Shifts take an Int; hence second arg of op is Int
+-- See Note [Guarding against silly shifts]
+wordShiftRule shift_op
+  = do { dflags <- getDynFlags
+       ; [e1, Lit (MachInt shift_len)] <- getArgs
+       ; case e1 of
+           _ | shift_len == 0 
+             -> return e1
+             | shift_len < 0 || wordSizeInBits dflags < shift_len
+             -> return (mkRuntimeErrorApp rUNTIME_ERROR_ID wordPrimTy 
+                                        ("Bad shift length" ++ show shift_len))
+           Lit (MachWord x)
+             -> liftMaybe $ wordResult dflags (x `shift_op` fromInteger shift_len) 
+                    -- Do the shift at type Integer, but shift length is Int
+           _ -> mzero }
+
+wordSizeInBits :: DynFlags -> Integer
+wordSizeInBits dflags = toInteger (platformWordSize (targetPlatform dflags) `shiftL` 3)
 
 --------------------------
 floatOp2 :: (Rational -> Rational -> Rational)
@@ -522,6 +531,53 @@ idempotent = do [e1, e2] <- getArgs
                 return e1
 \end{code}
 
+Note [Guarding against silly shifts]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this code:
+
+  import Data.Bits( (.|.), shiftL )
+  chunkToBitmap :: [Bool] -> Word32
+  chunkToBitmap chunk = foldr (.|.) 0 [ 1 `shiftL` n | (True,n) <- zip chunk [0..] ]
+
+This optimises to:
+Shift.$wgo = \ (w_sCS :: GHC.Prim.Int#) (w1_sCT :: [GHC.Types.Bool]) ->
+    case w1_sCT of _ {
+      [] -> __word 0;
+      : x_aAW xs_aAX ->
+        case x_aAW of _ {
+          GHC.Types.False ->
+            case w_sCS of wild2_Xh {
+              __DEFAULT -> Shift.$wgo (GHC.Prim.+# wild2_Xh 1) xs_aAX;
+              9223372036854775807 -> __word 0  };
+          GHC.Types.True ->
+            case GHC.Prim.>=# w_sCS 64 of _ {
+              GHC.Types.False ->
+                case w_sCS of wild3_Xh {
+                  __DEFAULT ->
+                    case Shift.$wgo (GHC.Prim.+# wild3_Xh 1) xs_aAX of ww_sCW { __DEFAULT ->
+                      GHC.Prim.or# (GHC.Prim.narrow32Word#
+                                      (GHC.Prim.uncheckedShiftL# (__word 1) wild3_Xh))
+                                   ww_sCW
+                     };
+                  9223372036854775807 ->
+                    GHC.Prim.narrow32Word#
+!!!!-->                  (GHC.Prim.uncheckedShiftL# (__word 1) 9223372036854775807)
+                };
+              GHC.Types.True ->
+                case w_sCS of wild3_Xh {
+                  __DEFAULT -> Shift.$wgo (GHC.Prim.+# wild3_Xh 1) xs_aAX;
+                  9223372036854775807 -> __word 0
+                } } } }
+
+Note the massive shift on line "!!!!".  It can't happen, because we've checked 
+that w < 64, but the optimiser didn't spot that. We DO NO want to constant-fold this!
+Moreover, if the programmer writes (n `uncheckedShiftL` 9223372036854775807), we
+can't constant fold it, but if it gets to the assember we get
+     Error: operand type mismatch for `shl'
+
+So the best thing to do is to rewrite the shift with a call to error,
+when the second arg is stupid.
+
 %************************************************************************
 %*                                                                      *
 \subsection{Vaguely generic functions}
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index 37591afe87..34a5ef34ee 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -1618,8 +1618,8 @@ primop  CatchOp "catch#" GenPrimOp
    with
 	-- Catch is actually strict in its first argument
 	-- but we don't want to tell the strictness
-	-- analyser about that!
-        -- might use caught action multiply
+	-- analyser about that, so that exceptions stay inside it.
+   strictness  = { \ _arity -> mkStrictSig (mkTopDmdType [apply1Dmd,apply2Dmd,topDmd] topRes) }
    out_of_line = True
    has_side_effects = True
 
@@ -1651,6 +1651,7 @@ primop  MaskAsyncExceptionsOp "maskAsyncExceptions#" GenPrimOp
         (State# RealWorld -> (# State# RealWorld, a #))
      -> (State# RealWorld -> (# State# RealWorld, a #))
    with
+   strictness  = { \ _arity -> mkStrictSig (mkTopDmdType [apply1Dmd,topDmd] topRes) }
    out_of_line = True
    has_side_effects = True
 
@@ -1658,6 +1659,7 @@ primop  MaskUninterruptibleOp "maskUninterruptible#" GenPrimOp
         (State# RealWorld -> (# State# RealWorld, a #))
      -> (State# RealWorld -> (# State# RealWorld, a #))
    with
+   strictness  = { \ _arity -> mkStrictSig (mkTopDmdType [apply1Dmd,topDmd] topRes) }
    out_of_line = True
    has_side_effects = True
 
@@ -1665,6 +1667,7 @@ primop  UnmaskAsyncExceptionsOp "unmaskAsyncExceptions#" GenPrimOp
         (State# RealWorld -> (# State# RealWorld, a #))
      -> (State# RealWorld -> (# State# RealWorld, a #))
    with
+   strictness  = { \ _arity -> mkStrictSig (mkTopDmdType [apply1Dmd,topDmd] topRes) }
    out_of_line = True
    has_side_effects = True
 
@@ -1684,6 +1687,7 @@ primop	AtomicallyOp "atomically#" GenPrimOp
       (State# RealWorld -> (# State# RealWorld, a #) )
    -> State# RealWorld -> (# State# RealWorld, a #)
    with
+   strictness  = { \ _arity -> mkStrictSig (mkTopDmdType [apply1Dmd,topDmd] topRes) }
    out_of_line = True
    has_side_effects = True
 
@@ -1709,6 +1713,7 @@ primop  CatchRetryOp "catchRetry#" GenPrimOp
    -> (State# RealWorld -> (# State# RealWorld, a #) )
    -> (State# RealWorld -> (# State# RealWorld, a #) )
    with
+   strictness  = { \ _arity -> mkStrictSig (mkTopDmdType [apply1Dmd,apply1Dmd,topDmd] topRes) }
    out_of_line = True
    has_side_effects = True
 
@@ -1717,6 +1722,7 @@ primop  CatchSTMOp "catchSTM#" GenPrimOp
    -> (b -> State# RealWorld -> (# State# RealWorld, a #) )
    -> (State# RealWorld -> (# State# RealWorld, a #) )
    with
+   strictness  = { \ _arity -> mkStrictSig (mkTopDmdType [apply1Dmd,apply2Dmd,topDmd] topRes) }
    out_of_line = True
    has_side_effects = True
 
diff --git a/compiler/simplCore/OccurAnal.lhs b/compiler/simplCore/OccurAnal.lhs
index 6106388fa4..11391a3553 100644
--- a/compiler/simplCore/OccurAnal.lhs
+++ b/compiler/simplCore/OccurAnal.lhs
@@ -1084,7 +1084,7 @@ occAnalNonRecRhs env bndr rhs
   = occAnal rhs_env rhs
   where
     -- See Note [Use one-shot info]
-    env1 = env { occ_one_shots = argOneShots dmd }
+    env1 = env { occ_one_shots = argOneShots OneShotLam dmd }
 
     -- See Note [Cascading inlines]
     rhs_env | certainly_inline = env1
@@ -1234,13 +1234,14 @@ occAnal env expr@(Lam _ _)
         (final_usage, tagged_binders) = tagLamBinders body_usage binders'
                       -- Use binders' to put one-shot info on the lambdas
 
-        really_final_usage | linear    = final_usage
-                           | otherwise = mapVarEnv markInsideLam final_usage
+        really_final_usage
+          | all isOneShotBndr binders' = final_usage
+          | otherwise = mapVarEnv markInsideLam final_usage
     in
     (really_final_usage, mkLams tagged_binders body') }
   where
-    (binders, body)              = collectBinders expr
-    (env_body, binders', linear) = oneShotGroup env binders
+    (binders, body)      = collectBinders expr
+    (env_body, binders') = oneShotGroup env binders
 
 occAnal env (Case scrut bndr ty alts)
   = case occ_anal_scrut scrut alts     of { (scrut_usage, scrut') ->
@@ -1332,15 +1333,16 @@ occAnalApp env (Var fun, args)
     in
     (fun_uds +++ final_args_uds, mkApps (Var fun) args') }
   where
-    fun_uds  = mkOneOcc env fun (valArgCount args > 0)
-    is_exp = isExpandableApp fun (valArgCount args)
+    n_val_args = valArgCount args
+    fun_uds    = mkOneOcc env fun (n_val_args > 0)
+    is_exp     = isExpandableApp fun n_val_args
            -- See Note [CONLIKE pragma] in BasicTypes
            -- The definition of is_exp should match that in
            -- Simplify.prepareRhs
 
-    one_shots  = argsOneShots (idStrictness fun) (valArgCount args)
+    one_shots  = argsOneShots (idStrictness fun) n_val_args
                  -- See Note [Use one-shot info]
-        
+
     args_stuff = occAnalArgs env args one_shots
 
                         -- (foldr k z xs) may call k many times, but it never
@@ -1466,15 +1468,11 @@ instance Outputable OccEncl where
   ppr OccRhs     = ptext (sLit "occRhs")
   ppr OccVanilla = ptext (sLit "occVanilla")
 
-type OneShots = [Bool]
+type OneShots = [OneShotInfo]
         -- []           No info
         --
-        -- True:ctxt    Analysing a function-valued expression that will be
-        --                      applied just once
-        --
-        -- False:ctxt   Analysing a function-valued expression that may
-        --                      be applied many times; but when it is,
-        --                      the OneShots inside applies
+        -- one_shot_info:ctxt    Analysing a function-valued expression that
+        --                       will be applied as described by one_shot_info
 
 initOccEnv :: (Activation -> Bool) -> OccEnv
 initOccEnv active_rule
@@ -1502,38 +1500,37 @@ isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
 
 oneShotGroup :: OccEnv -> [CoreBndr] 
              -> ( OccEnv
-                , [CoreBndr]
-                , Bool )   -- True <=> all binders are one-shot
+                , [CoreBndr] )
         -- The result binders have one-shot-ness set that they might not have had originally.
         -- This happens in (build (\cn -> e)).  Here the occurrence analyser
         -- linearity context knows that c,n are one-shot, and it records that fact in
         -- the binder. This is useful to guide subsequent float-in/float-out tranformations
 
 oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs
-  = go ctxt bndrs [] True
+  = go ctxt bndrs []
   where
-    go ctxt [] rev_bndrs linear 
+    go ctxt [] rev_bndrs
       = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla }
-        , reverse rev_bndrs
-        , linear )
+        , reverse rev_bndrs )
 
-    go ctxt (bndr:bndrs) rev_bndrs lin_acc
+    go [] bndrs rev_bndrs
+      = ( env { occ_one_shots = [], occ_encl = OccVanilla }
+        , reverse rev_bndrs ++ bndrs )
+
+    go ctxt (bndr:bndrs) rev_bndrs
       | isId bndr
+      
       = case ctxt of
-          []            -> go [] bndrs (bndr:rev_bndrs) (lin_acc && one_shot)
-          (linear : ctxt) 
-             | one_shot  -> go ctxt bndrs (bndr : rev_bndrs) lin_acc
-             | linear    -> go ctxt bndrs (bndr': rev_bndrs) lin_acc
-             | otherwise -> go ctxt bndrs (bndr : rev_bndrs) False
-      | otherwise
-      = go ctxt bndrs (bndr:rev_bndrs) lin_acc
-      where
-        one_shot = isOneShotBndr bndr
-        bndr'    = setOneShotLambda bndr
+          []                -> go []   bndrs (bndr : rev_bndrs)
+          (one_shot : ctxt) -> go ctxt bndrs (bndr': rev_bndrs)
+                            where
+                               bndr' = updOneShotInfo bndr one_shot
+       | otherwise
+      = go ctxt bndrs (bndr:rev_bndrs)
 
 addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
 addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
-  = env { occ_one_shots = replicate (valArgCount args) True ++ ctxt }
+  = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt }
 \end{code}
 
 
diff --git a/compiler/simplCore/SetLevels.lhs b/compiler/simplCore/SetLevels.lhs
index 2fca56cf17..7bcc53f6de 100644
--- a/compiler/simplCore/SetLevels.lhs
+++ b/compiler/simplCore/SetLevels.lhs
@@ -815,7 +815,9 @@ lvlLamBndrs lvl bndrs
     new_lvl | any is_major bndrs = incMajorLvl lvl
             | otherwise          = incMinorLvl lvl
 
-    is_major bndr = isId bndr && not (isOneShotLambda bndr)
+    is_major bndr = isId bndr && not (isProbablyOneShotLambda bndr)
+       -- The "probably" part says "don't float things out of a
+       -- probable one-shot lambda"
 \end{code}
 
 \begin{code}
diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs
index 6c7dcc2042..36f292deb3 100644
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -1195,8 +1195,11 @@ tryEtaExpandRhs env bndr rhs
   = do { dflags <- getDynFlags
        ; (new_arity, new_rhs) <- try_expand dflags
 
-       ; WARN( new_arity < old_arity || new_arity < _dmd_arity,
-               (ptext (sLit "Arity decrease:") <+> (ppr bndr <+> ppr old_arity
+       ; WARN( new_arity < old_arity,
+               (ptext (sLit "Arity decrease:") <+> (ppr bndr
+                <+> ppr old_arity <+> ppr new_arity) $$ ppr new_rhs) )
+         WARN( new_arity < _dmd_arity,
+               (ptext (sLit "Arity less than dmd sig arity:") <+> (ppr bndr
                 <+> ppr new_arity <+> ppr _dmd_arity) $$ ppr new_rhs) )
                         -- Note [Arity decrease]
          return (new_arity, new_rhs) }
@@ -1211,13 +1214,23 @@ tryEtaExpandRhs env bndr rhs
       = do { tick (EtaExpansion bndr)
            ; return (new_arity, etaExpand new_arity rhs) }
       | otherwise
-      = return (manifest_arity, rhs)
+      = return (exprArity rhs, rhs)   -- See Note [Return exprArity, not manifestArity]
 
     manifest_arity = manifestArity rhs
     old_arity  = idArity bndr
     _dmd_arity = length $ fst $ splitStrictSig $ idStrictness bndr
 \end{code}
 
+Note [Return exprArity, not manifestArity]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+  f = \xy. blah
+  g = f 2
+The f will get arity 2, and we want g to get arity 1, even though
+exprEtaExpandArity (and hence findArity) may not eta-expand it.
+Hence tryEtaExpand should return (exprArity (f 2)), not its
+manifest arity (which is zero).
+
 Note [Eta-expanding at let bindings]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We now eta expand at let-bindings, which is where the payoff comes.
diff --git a/compiler/specialise/SpecConstr.lhs b/compiler/specialise/SpecConstr.lhs
index 44dc6f00ef..0249c99cf9 100644
--- a/compiler/specialise/SpecConstr.lhs
+++ b/compiler/specialise/SpecConstr.lhs
@@ -1549,7 +1549,7 @@ spec_one env fn arg_bndrs body (call_pat@(qvars, pats), rule_number)
                              `setIdArity` count isId spec_lam_args
               spec_str   = calcSpecStrictness fn spec_lam_args pats
                 -- Conditionally use result of new worker-wrapper transform
-              (spec_lam_args, spec_call_args) = mkWorkerArgs (sc_dflags env) qvars False body_ty
+              (spec_lam_args, spec_call_args) = mkWorkerArgs (sc_dflags env) qvars NoOneShotInfo body_ty
                 -- Usual w/w hack to avoid generating 
                 -- a spec_rhs of unlifted type and no args
 
diff --git a/compiler/stranal/DmdAnal.lhs b/compiler/stranal/DmdAnal.lhs
index 99eb7ac5ba..ad3cf28d3d 100644
--- a/compiler/stranal/DmdAnal.lhs
+++ b/compiler/stranal/DmdAnal.lhs
@@ -98,10 +98,11 @@ c) The application rule wouldn't be right either
    evaluation of f in a C(L) demand!
 
 \begin{code}
-dmdAnalThunk :: AnalEnv 
-             -> Demand 	-- This one takes a *Demand*
-             -> CoreExpr -> (DmdType, CoreExpr)
-dmdAnalThunk env dmd e
+dmdAnalArg :: AnalEnv 
+           -> Demand 	-- This one takes a *Demand*
+           -> CoreExpr -> (DmdType, CoreExpr)
+-- Used for function arguments
+dmdAnalArg env dmd e
   | exprIsTrivial e = dmdAnalStar env dmd e
   | otherwise       = dmdAnalStar env (oneifyDmd dmd) e
 
@@ -111,10 +112,13 @@ dmdAnalThunk env dmd e
 dmdAnalStar :: AnalEnv 
             -> Demand 	-- This one takes a *Demand*
             -> CoreExpr -> (DmdType, CoreExpr)
-dmdAnalStar env dmd e = toCleanDmd (dmdAnal env) dmd e
+dmdAnalStar env dmd e 
+  | (cd, defer_and_use) <- toCleanDmd dmd
+  , (dmd_ty, e')        <- dmdAnal env cd e
+  = (postProcessDmdTypeM defer_and_use dmd_ty, e')
 
 -- Main Demand Analsysis machinery
-dmdAnal :: AnalEnv 
+dmdAnal :: AnalEnv
         -> CleanDemand 	       -- The main one takes a *CleanDemand*
         -> CoreExpr -> (DmdType, CoreExpr)
 
@@ -168,7 +172,7 @@ dmdAnal env dmd (App fun arg)	-- Non-type arguments
         call_dmd          = mkCallDmd dmd
 	(fun_ty, fun') 	  = dmdAnal env call_dmd fun
 	(arg_dmd, res_ty) = splitDmdTy fun_ty
-        (arg_ty, arg') 	  = dmdAnalThunk env arg_dmd arg
+        (arg_ty, arg') 	  = dmdAnalArg env arg_dmd arg
     in
 --    pprTrace "dmdAnal:app" (vcat
 --         [ text "dmd =" <+> ppr dmd
@@ -183,13 +187,13 @@ dmdAnal env dmd (App fun arg)	-- Non-type arguments
 -- this is an anonymous lambda, since @dmdAnalRhs@ uses @collectBinders@
 dmdAnal env dmd (Lam var body)
   | isTyVar var
-  = let    
+  = let 
 	(body_ty, body') = dmdAnal env dmd body
     in
     (body_ty, Lam var body')
 
   | otherwise
-  = let (body_dmd, defer_me, one_shot) = peelCallDmd dmd	
+  = let (body_dmd, defer_and_use@(_,one_shot)) = peelCallDmd dmd
           -- body_dmd  - a demand to analyze the body
           -- one_shot  - one-shotness of the lambda
           --             hence, cardinality of its free vars
@@ -198,7 +202,7 @@ dmdAnal env dmd (Lam var body)
 	(body_ty, body') = dmdAnal env' body_dmd body
 	(lam_ty, var')   = annotateLamIdBndr env notArgOfDfun body_ty one_shot var
     in
-    (deferAndUse defer_me one_shot lam_ty, Lam var' body')
+    (postProcessDmdType defer_and_use lam_ty, Lam var' body')
 
 dmdAnal env dmd (Case scrut case_bndr ty [alt@(DataAlt dc, _, _)])
   -- Only one alternative with a product constructor
@@ -487,13 +491,13 @@ dmdTransform env var dmd
 
   | isGlobalId var	                         -- Imported function
   = let res = dmdTransformSig (idStrictness var) dmd in
---    pprTrace "dmdTransform" (vcat [ppr var, ppr (idStrictness var), ppr dmd, ppr res]) 
+--    pprTrace "dmdTransform" (vcat [ppr var, ppr (idStrictness var), ppr dmd, ppr res])
     res
 
   | Just (sig, top_lvl) <- lookupSigEnv env var  -- Local letrec bound thing
   , let fn_ty = dmdTransformSig sig dmd
-  = -- pprTrace "dmdTransform" (vcat [ppr var, ppr dmd, ppr fn_ty]) $
-    if isTopLevel top_lvl           
+  = -- pprTrace "dmdTransform" (vcat [ppr var, ppr sig, ppr dmd, ppr fn_ty]) $
+    if isTopLevel top_lvl
     then fn_ty   -- Don't record top level things
     else addVarDmd fn_ty var (mkOnceUsedDmd dmd)
 
@@ -593,10 +597,11 @@ dmdAnalRhs top_lvl rec_flag env id rhs
   where
     (bndrs, body)        = collectBinders rhs
     env_body             = foldl extendSigsWithLam env bndrs
-    (body_dmd_ty, body') = dmdAnal env_body body_dmd body
-    (rhs_dmd_ty, bndrs') = annotateLamBndrs env (isDFunId id) body_dmd_ty bndrs
-    id'		         = set_idStrictness env id sig_ty
+    (DmdType body_fv _      body_res, body')  = dmdAnal env_body body_dmd body
+    (DmdType rhs_fv rhs_dmds rhs_res, bndrs') = annotateLamBndrs env (isDFunId id)
+                                                  (DmdType body_fv [] body_res) bndrs
     sig_ty               = mkStrictSig (mkDmdType sig_fv rhs_dmds rhs_res')
+    id'		         = set_idStrictness env id sig_ty
 	-- See Note [NOINLINE and strictness]
 
     -- See Note [Product demands for function body]
@@ -604,8 +609,6 @@ dmdAnalRhs top_lvl rec_flag env id rhs
                  Nothing            -> cleanEvalDmd
                  Just (dc, _, _, _) -> cleanEvalProdDmd (dataConRepArity dc)
 
-    DmdType rhs_fv rhs_dmds rhs_res = rhs_dmd_ty
-
     -- See Note [Lazy and unleashable free variables]
     -- See Note [Aggregated demand for cardinality]
     rhs_fv1 = case rec_flag of
diff --git a/compiler/stranal/WorkWrap.lhs b/compiler/stranal/WorkWrap.lhs
index 6a448caebb..e23f615e4a 100644
--- a/compiler/stranal/WorkWrap.lhs
+++ b/compiler/stranal/WorkWrap.lhs
@@ -20,6 +20,7 @@ import CoreArity	( exprArity )
 import Var
 import Id
 import IdInfo
+import Type             ( isVoidTy )
 import UniqSupply
 import BasicTypes
 import DynFlags
@@ -46,9 +47,9 @@ analysis pass.
 \end{enumerate}
 
 and we return some ``plain'' bindings which have been
-worker/wrapper-ified, meaning: 
+worker/wrapper-ified, meaning:
 
-\begin{enumerate} 
+\begin{enumerate}
 
 \item Functions have been split into workers and wrappers where
 appropriate.  If a function has both strictness and CPR properties
@@ -155,13 +156,13 @@ It's very important to refrain from w/w-ing an INLINE function (ie one
 with an InlineRule) because the wrapper will then overwrite the
 InlineRule unfolding.
 
-Furthermore, if the programmer has marked something as INLINE, 
+Furthermore, if the programmer has marked something as INLINE,
 we may lose by w/w'ing it.
 
 If the strictness analyser is run twice, this test also prevents
 wrappers (which are INLINEd) from being re-done.  (You can end up with
 several liked-named Ids bouncing around at the same time---absolute
-mischief.)  
+mischief.)
 
 Notice that we refrain from w/w'ing an INLINE function even if it is
 in a recursive group.  It might not be the loop breaker.  (We could
@@ -178,7 +179,7 @@ one.  So we leave INLINABLE things alone too.
 
 This is a slight infelicity really, because it means that adding
 an INLINABLE pragma could make a program a bit less efficient,
-because you lose the worker/wrapper stuff.  But I don't see a way 
+because you lose the worker/wrapper stuff.  But I don't see a way
 to avoid that.
 
 Note [Don't w/w inline small non-loop-breaker things]
@@ -213,7 +214,7 @@ When should the wrapper inlining be active?  It must not be active
 earlier than the current Activation of the Id (eg it might have a
 NOINLINE pragma).  But in fact strictness analysis happens fairly
 late in the pipeline, and we want to prioritise specialisations over
-strictness.  Eg if we have 
+strictness.  Eg if we have
   module Foo where
     f :: Num a => a -> Int -> a
     f n 0 = n  	       	   -- Strict in the Int, hence wrapper
@@ -231,7 +232,7 @@ strictness.  Eg if we have
 Then we want the specialisation for 'f' to kick in before the wrapper does.
 
 Now in fact the 'gentle' simplification pass encourages this, by
-having rules on, but inlinings off.  But that's kind of lucky. It seems 
+having rules on, but inlinings off.  But that's kind of lucky. It seems
 more robust to give the wrapper an Activation of (ActiveAfter 0),
 so that it becomes active in an importing module at the same time that
 it appears in the first place in the defining module.
@@ -251,21 +252,21 @@ tryWW dflags is_rec fn_id rhs
   | isNeverActive inline_act
 	-- No point in worker/wrappering if the thing is never inlined!
 	-- Because the no-inline prag will prevent the wrapper ever
-	-- being inlined at a call site. 
-	-- 
+	-- being inlined at a call site.
+	--
 	-- Furthermore, don't even expose strictness info
   = return [ (fn_id, rhs) ]
 
-  | is_thunk && worthSplittingThunk fn_dmd res_info
+  | is_fun && (worth_splitting_args wrap_dmds rhs || returnsCPR res_info)
+  = checkSize dflags new_fn_id rhs $
+    splitFun dflags new_fn_id fn_info wrap_dmds res_info rhs
+
+  | is_thunk && (worthSplittingThunkDmd fn_dmd || returnsCPR res_info)
   	-- See Note [Thunk splitting]
   = ASSERT2( isNonRec is_rec, ppr new_fn_id )	-- The thunk must be non-recursive
-    checkSize dflags new_fn_id rhs $ 
+    checkSize dflags new_fn_id rhs $
     splitThunk dflags new_fn_id rhs
 
-  | is_fun && worthSplittingFun wrap_dmds res_info
-  = checkSize dflags new_fn_id rhs $
-    splitFun dflags new_fn_id fn_info wrap_dmds res_info rhs
-
   | otherwise
   = return [ (new_fn_id, rhs) ]
 
@@ -274,18 +275,24 @@ tryWW dflags is_rec fn_id rhs
     fn_dmd       = demandInfo fn_info
     inline_act   = inlinePragmaActivation (inlinePragInfo fn_info)
 
-	-- In practice it always will have a strictness 
+    worth_splitting_args [d] (Lam b _)
+      | isAbsDmd d && isVoidTy (idType b)
+      = False  -- Note [Do not split void functions]
+    worth_splitting_args wrap_dmds _
+      = any worthSplittingArgDmd wrap_dmds
+
+	-- In practice it always will have a strictness
 	-- signature, even if it's a uninformative one
     strict_sig  = strictnessInfo fn_info
     StrictSig (DmdType env wrap_dmds res_info) = strict_sig
 
-	-- new_fn_id has the DmdEnv zapped.  
+	-- new_fn_id has the DmdEnv zapped.
 	--	(a) it is never used again
 	--	(b) it wastes space
 	--	(c) it becomes incorrect as things are cloned, because
 	--	    we don't push the substitution into it
     new_fn_id | isEmptyVarEnv env = fn_id
-	      | otherwise	  = fn_id `setIdStrictness` 
+	      | otherwise	  = fn_id `setIdStrictness`
 				     StrictSig (mkTopDmdType wrap_dmds res_info)
 
     is_fun    = notNull wrap_dmds
@@ -316,14 +323,14 @@ checkSize dflags fn_id rhs thing_inside
 splitFun :: DynFlags -> Id -> IdInfo -> [Demand] -> DmdResult -> Expr Var
          -> UniqSM [(Id, CoreExpr)]
 splitFun dflags fn_id fn_info wrap_dmds res_info rhs
-  = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) ) 
+  = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) )
     (do {
 	-- The arity should match the signature
       (work_demands, wrap_fn, work_fn) <- mkWwBodies dflags fun_ty wrap_dmds res_info one_shots
     ; work_uniq <- getUniqueM
     ; let
 	work_rhs = work_fn rhs
-	work_id  = mkWorkerId work_uniq fn_id (exprType work_rhs) 
+	work_id  = mkWorkerId work_uniq fn_id (exprType work_rhs)
 		        `setIdOccInfo` occInfo fn_info
 				-- Copy over occurrence info from parent
 				-- Notably whether it's a loop breaker
@@ -331,20 +338,20 @@ splitFun dflags fn_id fn_info wrap_dmds res_info rhs
 				-- seems right-er to do so
 
 			`setInlineActivation` (inlinePragmaActivation inl_prag)
-				-- Any inline activation (which sets when inlining is active) 
+				-- Any inline activation (which sets when inlining is active)
 				-- on the original function is duplicated on the worker
 				-- It *matters* that the pragma stays on the wrapper
 				-- It seems sensible to have it on the worker too, although we
-				-- can't think of a compelling reason. (In ptic, INLINE things are 
+				-- can't think of a compelling reason. (In ptic, INLINE things are
 				-- not w/wd). However, the RuleMatchInfo is not transferred since
                                 -- it does not make sense for workers to be constructorlike.
 
 			`setIdStrictness` StrictSig (mkTopDmdType work_demands work_res_info)
-				-- Even though we may not be at top level, 
+				-- Even though we may not be at top level,
 				-- it's ok to give it an empty DmdEnv
 
                         `setIdArity` (exprArity work_rhs)
-                                -- Set the arity so that the Core Lint check that the 
+                                -- Set the arity so that the Core Lint check that the
                                 -- arity is consistent with the demand type goes through
 
 	wrap_rhs  = wrap_fn work_id
@@ -370,7 +377,7 @@ splitFun dflags fn_id fn_info wrap_dmds res_info rhs
     fun_ty          = idType fn_id
     inl_prag        = inlinePragInfo fn_info
     rule_match_info = inlinePragmaRuleMatchInfo inl_prag
-    arity           = arityInfo fn_info	
+    arity           = arityInfo fn_info
     		    -- The arity is set by the simplifier using exprEtaExpandArity
 		    -- So it may be more than the number of top-level-visible lambdas
 
@@ -383,21 +390,29 @@ splitFun dflags fn_id fn_info wrap_dmds res_info rhs
 -- make the wrapper and worker have corresponding one-shot arguments too.
 -- Otherwise we spuriously float stuff out of case-expression join points,
 -- which is very annoying.
-get_one_shots :: Expr Var -> [Bool]
+get_one_shots :: Expr Var -> [OneShotInfo]
 get_one_shots (Lam b e)
-  | isId b    = isOneShotLambda b : get_one_shots e
+  | isId b    = idOneShotInfo b : get_one_shots e
   | otherwise = get_one_shots e
 get_one_shots (Tick _ e) = get_one_shots e
-get_one_shots _    	 = noOneShotInfo
-
-noOneShotInfo :: [Bool]
-noOneShotInfo = repeat False
+get_one_shots _    	 = []
 \end{code}
 
+Note [Do not split void functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this rather common form of binding:
+        $j = \x:Void# -> ...no use of x...
+
+Since x is not used it'll be marked as absent.  But there is no point
+in w/w-ing because we'll simply add (\y:Void#), see WwLib.mkWorerArgs.
+
+If x has a more interesting type (eg Int, or Int#), there *is* a point
+in w/w so that we don't pass the argument at all.
+
 Note [Thunk splitting]
 ~~~~~~~~~~~~~~~~~~~~~~
 Suppose x is used strictly (never mind whether it has the CPR
-property).  
+property).
 
       let
 	x* = x-rhs
@@ -411,8 +426,8 @@ splitThunk transforms like this:
 
 Now simplifier will transform to
 
-      case x-rhs of 
-	I# a ->	let x* = I# a 
+      case x-rhs of
+	I# a ->	let x* = I# a
 	        in body
 
 which is what we want. Now suppose x-rhs is itself a case:
@@ -424,7 +439,7 @@ what would have happened before) which is fine.
 
 Notice that x certainly has the CPR property now!
 
-In fact, splitThunk uses the function argument w/w splitting 
+In fact, splitThunk uses the function argument w/w splitting
 function, so that if x's demand is deeper (say U(U(L,L),L))
 then the splitting will go deeper too.
 
@@ -434,7 +449,7 @@ then the splitting will go deeper too.
 --	x = e
 -- into
 --	x = let x = e
---	    in case x of 
+--	    in case x of
 --		 I# y -> let x = I# y in x }
 -- See comments above. Is it not beautifully short?
 -- Moreover, it works just as well when there are
diff --git a/compiler/stranal/WwLib.lhs b/compiler/stranal/WwLib.lhs
index 5c4cdbdbf6..fc94c9b921 100644
--- a/compiler/stranal/WwLib.lhs
+++ b/compiler/stranal/WwLib.lhs
@@ -11,8 +11,8 @@ module WwLib ( mkWwBodies, mkWWstr, mkWorkerArgs, deepSplitProductType_maybe ) w
 import CoreSyn
 import CoreUtils        ( exprType, mkCast )
 import Id               ( Id, idType, mkSysLocal, idDemandInfo, setIdDemandInfo,
-                          isOneShotLambda, setOneShotLambda, setIdUnfolding,
-                          setIdInfo
+                          setIdUnfolding,
+                          setIdInfo, idOneShotInfo, setIdOneShotInfo
                         )
 import IdInfo           ( vanillaIdInfo )
 import DataCon
@@ -23,7 +23,7 @@ import TysPrim          ( voidPrimTy )
 import TysWiredIn       ( tupleCon )
 import Type
 import Coercion hiding  ( substTy, substTyVarBndr )
-import BasicTypes       ( TupleSort(..) )
+import BasicTypes       ( TupleSort(..), OneShotInfo(..), worstOneShot )
 import Literal          ( absentLiteralOf )
 import TyCon
 import UniqSupply
@@ -108,7 +108,7 @@ mkWwBodies :: DynFlags
            -> Type                              -- Type of original function
            -> [Demand]                          -- Strictness of original function
            -> DmdResult                         -- Info about function result
-           -> [Bool]                            -- One-shot-ness of the function
+           -> [OneShotInfo]                     -- One-shot-ness of the function, value args only
            -> UniqSM ([Demand],                 -- Demands for worker (value) args
                       Id -> CoreExpr,           -- Wrapper body, lacking only the worker Id
                       CoreExpr -> CoreExpr)     -- Worker body, lacking the original function rhs
@@ -125,8 +125,8 @@ mkWwBodies :: DynFlags
 --                        E
 
 mkWwBodies dflags fun_ty demands res_info one_shots
-  = do  { let arg_info = demands `zip` (one_shots ++ repeat False)
-              all_one_shots = all snd arg_info
+  = do  { let arg_info = demands `zip` (one_shots ++ repeat NoOneShotInfo)
+              all_one_shots = foldr (worstOneShot . snd) OneShotLam arg_info
         ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) <- mkWWargs emptyTvSubst fun_ty arg_info
         ; (work_args, wrap_fn_str,  work_fn_str) <- mkWWstr dflags wrap_args
 
@@ -178,7 +178,7 @@ We use the state-token type which generates no code.
 
 \begin{code}
 mkWorkerArgs :: DynFlags -> [Var]
-             -> Bool    -- Whether all arguments are one-shot
+             -> OneShotInfo  -- Whether all arguments are one-shot
              -> Type    -- Type of body
              -> ([Var], -- Lambda bound args
                  [Var]) -- Args at call site
@@ -194,14 +194,11 @@ mkWorkerArgs dflags args all_one_shot res_ty
            -- see Note [Protecting the last value argument]
 
       -- see Note [All One-Shot Arguments of a Worker]
-      newArg = if all_one_shot
-               then setOneShotLambda voidArgId
-               else voidArgId
+      newArg = setIdOneShotInfo voidArgId all_one_shot
 \end{code}
 
 Note [Protecting the last value argument]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 If the user writes (\_ -> E), they might be intentionally disallowing
 the sharing of E. Since absence analysis and worker-wrapper are keen
 to remove such unused arguments, we add in a void argument to prevent
@@ -215,21 +212,27 @@ so f can't be inlined *under a lambda*.
 
 Note [All One-Shot Arguments of a Worker]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Sometimes, derived joint-points are just lambda-lifted thunks, whose
+Sometimes, derived join-points are just lambda-lifted thunks, whose
 only argument is of the unit type and is never used. This might
 interfere with the absence analysis, basing on which results these
 never-used arguments are eliminated in the worker. The additional
 argument `all_one_shot` of `mkWorkerArgs` is to prevent this.
 
-An example for this phenomenon is a `treejoin` program from the
-`nofib` suite, which features the following joint points:
+Example.  Suppose we have
+   foo = \p(one-shot) q(one-shot). y + 3
+Then we drop the unused args to give
+   foo   = \pq. $wfoo void#
+   $wfoo = \void(one-shot). y + 3
+
+But suppse foo didn't have all one-shot args:
+   foo = \p(not-one-shot) q(one-shot). expensive y + 3
+Then we drop the unused args to give
+   foo   = \pq. $wfoo void#
+   $wfoo = \void(not-one-shot). y + 3
+
+If we made the void-arg one-shot we might inline an expensive
+computation for y, which would be terrible!
 
-$j_s1l1 =
-  \ _ ->
-     case GHC.Prim.<=# 56320 y_aOy of _ {
-        GHC.Types.False -> $j_s1kP GHC.Prim.realWorld#;
-        GHC.Types.True ->  ... }
 
 %************************************************************************
 %*                                                                      *
@@ -271,8 +274,8 @@ the \x to get what we want.
 mkWWargs :: TvSubst             -- Freshening substitution to apply to the type
                                 --   See Note [Freshen type variables]
          -> Type                -- The type of the function
-         -> [(Demand,Bool)]     -- Demands and one-shot info for value arguments
-         -> UniqSM  ([Var],             -- Wrapper args
+         -> [(Demand,OneShotInfo)]     -- Demands and one-shot info for value arguments
+         -> UniqSM  ([Var],            -- Wrapper args
                      CoreExpr -> CoreExpr,      -- Wrapper fn
                      CoreExpr -> CoreExpr,      -- Worker fn
                      Type)                      -- Type of wrapper body
@@ -327,12 +330,11 @@ mkWWargs subst fun_ty arg_info
 applyToVars :: [Var] -> CoreExpr -> CoreExpr
 applyToVars vars fn = mkVarApps fn vars
 
-mk_wrap_arg :: Unique -> Type -> Demand -> Bool -> Id
+mk_wrap_arg :: Unique -> Type -> Demand -> OneShotInfo -> Id
 mk_wrap_arg uniq ty dmd one_shot
-  = set_one_shot one_shot (setIdDemandInfo (mkSysLocal (fsLit "w") uniq ty) dmd)
-  where
-    set_one_shot True  id = setOneShotLambda id
-    set_one_shot False id = id
+  = mkSysLocal (fsLit "w") uniq ty
+       `setIdDemandInfo` dmd
+       `setIdOneShotInfo` one_shot
 \end{code}
 
 Note [Freshen type variables]
@@ -462,13 +464,13 @@ mkWWstr_one dflags arg
 
   where
     dmd = idDemandInfo arg
+    one_shot = idOneShotInfo arg
         -- If the wrapper argument is a one-shot lambda, then
         -- so should (all) the corresponding worker arguments be
         -- This bites when we do w/w on a case join point
-    set_worker_arg_info worker_arg demand = set_one_shot (setIdDemandInfo worker_arg demand)
-
-    set_one_shot | isOneShotLambda arg = setOneShotLambda
-                 | otherwise           = \x -> x
+    set_worker_arg_info worker_arg demand 
+      = worker_arg `setIdDemandInfo`  demand
+                   `setIdOneShotInfo` one_shot
 
 ----------------------
 nop_fn :: CoreExpr -> CoreExpr
author	Simon Peyton Jones <simonpj@microsoft.com>	2013-11-22 17:13:05 +0000
committer	Simon Peyton Jones <simonpj@microsoft.com>	2013-12-02 17:33:59 +0000
commit	20cc59419b5fae60eea9c81f56020ef15256dc84 (patch)
tree	70a54aa0f99ceb69374ed7ec036f4381b649e5c3
parent	51deeb0db3abac9f4369d3f8a3744e1313ecebf4 (diff)
download	haskell-better-ho-cardinality.tar.gz