5 files changed, 150 insertions, 90 deletions
diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs
index db4701d45a..f21d0205f5 100644
--- a/compiler/GHC/Core/Opt/SpecConstr.hs
+++ b/compiler/GHC/Core/Opt/SpecConstr.hs
@@ -62,7 +62,7 @@ import GHC.Data.FastString
 import GHC.Types.Unique.FM
 import GHC.Utils.Monad
 import Control.Monad    ( zipWithM )
-import Data.List (nubBy, sortBy, partition)
+import Data.List (nubBy, sortBy, partition, dropWhileEnd, mapAccumL )
 import GHC.Builtin.Names ( specTyConKey )
 import GHC.Unit.Module
 import GHC.Exts( SpecConstrAnnotation(..) )
@@ -946,10 +946,13 @@ extendRecBndrs env bndrs  = (env { sc_subst = subst' }, bndrs')
                       where
                         (subst', bndrs') = substRecBndrs (sc_subst env) bndrs
 
+extendBndrs :: ScEnv -> [Var] -> (ScEnv, [Var])
+extendBndrs env bndrs = mapAccumL extendBndr env bndrs
+
 extendBndr :: ScEnv -> Var -> (ScEnv, Var)
-extendBndr  env bndr  = (env { sc_subst = subst' }, bndr')
-                      where
-                        (subst', bndr') = substBndr (sc_subst env) bndr
+extendBndr env bndr  = (env { sc_subst = subst' }, bndr')
+                     where
+                       (subst', bndr') = substBndr (sc_subst env) bndr
 
 extendValEnv :: ScEnv -> Id -> Maybe Value -> ScEnv
 extendValEnv env _  Nothing   = env
@@ -1102,6 +1105,9 @@ data Call = Call Id [CoreArg] ValueEnv
         -- The arguments of the call, together with the
         -- env giving the constructor bindings at the call site
         -- We keep the function mainly for debug output
+        --
+        -- The call is not necessarily saturated; we just put
+        -- in however many args are visible at the call site
 
 instance Outputable ScUsage where
   ppr (SCU { scu_calls = calls, scu_occs = occs })
@@ -1399,12 +1405,6 @@ scTopBindEnv env (NonRec bndr rhs)
 ----------------------
 scTopBind :: ScEnv -> ScUsage -> CoreBind -> UniqSM (ScUsage, CoreBind)
 
-{-
-scTopBind _ usage _
-  | pprTrace "scTopBind_usage" (ppr (scu_calls usage)) False
-  = error "false"
--}
-
 scTopBind env body_usage (Rec prs)
   | Just threshold <- sc_size env
   , not force_spec
@@ -1603,15 +1603,9 @@ specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs
   = -- pprTrace "specialise bot" (ppr fn) $
     return (nullUsage, spec_info)
 
-  | isNeverActive (idInlineActivation fn) -- See Note [Transfer activation]
-    || null arg_bndrs                     -- Only specialise functions
-  = -- pprTrace "specialise inactive" (ppr fn) $
-    case mb_unspec of    -- Behave as if there was a single, boring call
-      Just rhs_usg -> return (rhs_usg, spec_info { si_mb_unspec = Nothing })
-                         -- See Note [spec_usg includes rhs_usg]
-      Nothing      -> return (nullUsage, spec_info)
-
-  | Just all_calls <- lookupVarEnv bind_calls fn
+  | not (isNeverActive (idInlineActivation fn))  -- See Note [Transfer activation]
+  , not (null arg_bndrs)                         -- Only specialise functions
+  , Just all_calls <- lookupVarEnv bind_calls fn -- Some calls to it
   = -- pprTrace "specialise entry {" (ppr fn <+> ppr all_calls) $
     do  { (boring_call, new_pats) <- callsToNewPats env fn spec_info arg_occs all_calls
 
@@ -1650,10 +1644,13 @@ specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs
                                 , si_n_specs = spec_count + n_pats
                                 , si_mb_unspec = mb_unspec' }) }
 
-  | otherwise  -- No new seeds, so return nullUsage
-  = return (nullUsage, spec_info)
-
-
+  | otherwise  -- No calls, inactive, or not a function
+               -- Behave as if there was a single, boring call
+  = -- pprTrace "specialise inactive" (ppr fn $$ ppr mb_unspec) $
+    case mb_unspec of    -- Behave as if there was a single, boring call
+      Just rhs_usg -> return (rhs_usg, spec_info { si_mb_unspec = Nothing })
+                         -- See Note [spec_usg includes rhs_usg]
+      Nothing      -> return (nullUsage, spec_info)
 
 
 ---------------------
@@ -1686,58 +1683,70 @@ spec_one :: ScEnv
             f (b,c) ((:) (a,(b,c)) (x,v) hw) = f_spec b c v hw
 -}
 
-spec_one env fn arg_bndrs body (call_pat@(qvars, pats), rule_number)
+spec_one env fn arg_bndrs body (call_pat, rule_number)
+  | CP { cp_qvars = qvars, cp_args = pats } <- call_pat
   = do  { spec_uniq <- getUniqueM
-        ; let spec_env   = extendScSubstList (extendScInScope env qvars)
-                                             (arg_bndrs `zip` pats)
-              fn_name    = idName fn
-              fn_loc     = nameSrcSpan fn_name
-              fn_occ     = nameOccName fn_name
-              spec_occ   = mkSpecOcc fn_occ
+        ; let env1 = extendScSubstList (extendScInScope env qvars)
+                                       (arg_bndrs `zip` pats)
+              (body_env, extra_bndrs) = extendBndrs env1 (dropList pats arg_bndrs)
+              -- Remember, there may be fewer pats than arg_bndrs
+              -- See Note [SpecConstr call patterns]
+
+              fn_name  = idName fn
+              fn_loc   = nameSrcSpan fn_name
+              fn_occ   = nameOccName fn_name
+              spec_occ = mkSpecOcc fn_occ
               -- We use fn_occ rather than fn in the rule_name string
               -- as we don't want the uniq to end up in the rule, and
               -- hence in the ABI, as that can cause spurious ABI
               -- changes (#4012).
               rule_name  = mkFastString ("SC:" ++ occNameString fn_occ ++ show rule_number)
               spec_name  = mkInternalName spec_uniq spec_occ fn_loc
---      ; pprTrace "{spec_one" (ppr (sc_count env) <+> ppr fn
---                              <+> ppr pats <+> text "-->" <+> ppr spec_name) $
+--      ; pprTrace "spec_one {" (vcat [ text "function:" <+> ppr fn <+> ppr (idUnique fn)
+--                                    , text "sc_count:" <+> ppr (sc_count env)
+--                                    , text "pats:" <+> ppr pats
+--                                    , text "-->" <+> ppr spec_name
+--                                    , text "bndrs" <+> ppr arg_bndrs
+--                                    , text "body" <+> ppr body
+--                                    , text "how_bound" <+> ppr (sc_how_bound env) ]) $
 --        return ()
 
         -- Specialise the body
-        ; (spec_usg, spec_body) <- scExpr spec_env body
+        ; (spec_usg, spec_body) <- scExpr body_env body
 
---      ; pprTrace "done spec_one}" (ppr fn) $
+--      ; pprTrace "done spec_one }" (ppr fn $$ ppr (scu_calls spec_usg)) $
 --        return ()
 
                 -- And build the results
-        ; let (spec_lam_args, spec_call_args) = mkWorkerArgs (sc_dflags env)
-                                                             qvars body_ty
-                -- Usual w/w hack to avoid generating
+        ; let spec_body_ty   = exprType spec_body
+              spec_lam_args1 = qvars ++ extra_bndrs
+              (spec_lam_args, spec_call_args) = mkWorkerArgs (sc_dflags env)
+                                                             spec_lam_args1 spec_body_ty
+                -- mkWorkerArgs: usual w/w hack to avoid generating
                 -- a spec_rhs of unlifted type and no args
 
-              spec_lam_args_str = handOutStrictnessInformation (fst (splitDmdSig spec_str)) spec_lam_args
+              spec_str          = calcSpecStrictness fn spec_lam_args pats
+              spec_lam_args_str = handOutStrictnessInformation spec_str spec_lam_args
                 -- Annotate the variables with the strictness information from
                 -- the function (see Note [Strictness information in worker binders])
 
               spec_join_arity | isJoinId fn = Just (length spec_lam_args)
                               | otherwise   = Nothing
               spec_id    = mkLocalId spec_name Many
-                                     (mkLamTypes spec_lam_args body_ty)
+                                     (mkLamTypes spec_lam_args spec_body_ty)
                              -- See Note [Transfer strictness]
                              `setIdDmdSig` spec_str
                              `setIdCprSig` topCprSig
                              `setIdArity` count isId spec_lam_args
                              `asJoinId_maybe` spec_join_arity
-              spec_str   = calcSpecStrictness fn spec_lam_args pats
 
 
                 -- Conditionally use result of new worker-wrapper transform
               spec_rhs   = mkLams spec_lam_args_str spec_body
-              body_ty    = exprType spec_body
-              rule_rhs   = mkVarApps (Var spec_id) spec_call_args
+              rule_rhs   = mkVarApps (Var spec_id) $
+                           dropTail (length extra_bndrs) spec_call_args
               inline_act = idInlineActivation fn
-              this_mod   = sc_module spec_env
+              this_mod   = sc_module env
               rule       = mkRule this_mod True {- Auto -} True {- Local -}
                                   rule_name inline_act fn_name qvars pats rule_rhs
                            -- See Note [Transfer activation]
@@ -1747,8 +1756,9 @@ spec_one env fn arg_bndrs body (call_pat@(qvars, pats), rule_number)
 
 
 -- See Note [Strictness information in worker binders]
-handOutStrictnessInformation :: [Demand] -> [Var] -> [Var]
-handOutStrictnessInformation = go
+handOutStrictnessInformation :: DmdSig -> [Var] -> [Var]
+handOutStrictnessInformation str vs
+  = go (fst (splitDmdSig str)) vs
   where
     go _ [] = []
     go [] vs = vs
@@ -1757,7 +1767,7 @@ handOutStrictnessInformation = go
 
 calcSpecStrictness :: Id                     -- The original function
                    -> [Var] -> [CoreExpr]    -- Call pattern
-                   -> DmdSig              -- Strictness of specialised thing
+                   -> DmdSig                 -- Strictness of specialised thing
 -- See Note [Transfer strictness]
 calcSpecStrictness fn qvars pats
   = mkClosedDmdSig spec_dmds div
@@ -1871,19 +1881,39 @@ See # 5458.  Yuk.
 Note [SpecConstr call patterns]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 A "call patterns" that we collect is going to become the LHS of a RULE.
-It's important that it doesn't have
+
+Wrinkles:
+
+* The list of argument patterns, cp_args, is no longer than the
+  visible lambdas of the binding, ri_arg_occs.  This is done via
+  the zipWithM in callToPats.
+
+* The list of argument patterns can certainly be shorter than the
+  lambdas in the function definition (under-saturated).  For example
+      f x y = case x of { True -> e1; False -> e2 }
+      ....map (f True) e...
+  We want to specialise `f` for `f True`.
+
+* In fact we deliberately shrink the list of argument patterns,
+  cp_args, by trimming off all the boring ones at the end (see
+  `dropWhileEnd is_boring` in callToPats).  Since the RULE only
+  applies when it is saturated, this shrinking makes the RULE more
+  applicable.  But it does mean that the argument patterns do not
+  necessarily saturate the lambdas of the function.
+
+* It's important that the pattern arguments do not look like
      e |> Refl
-or
+  or
     e |> g1 |> g2
-because both of these will be optimised by Simplify.simplRule. In the
-former case such optimisation benign, because the rule will match more
-terms; but in the latter we may lose a binding of 'g1' or 'g2', and
-end up with a rule LHS that doesn't bind the template variables
-(#10602).
+  because both of these will be optimised by Simplify.simplRule. In the
+  former case such optimisation benign, because the rule will match more
+  terms; but in the latter we may lose a binding of 'g1' or 'g2', and
+  end up with a rule LHS that doesn't bind the template variables
+  (#10602).
 
-The simplifier eliminates such things, but SpecConstr itself constructs
-new terms by substituting.  So the 'mkCast' in the Cast case of scExpr
-is very important!
+  The simplifier eliminates such things, but SpecConstr itself constructs
+  new terms by substituting.  So the 'mkCast' in the Cast case of scExpr
+  is very important!
 
 Note [Choosing patterns]
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1968,8 +1998,14 @@ alternative would be to discard calls that mention coercion variables
 only in kind-casts, but I'm doing the simple thing for now.
 -}
 
-type CallPat = ([Var], [CoreExpr])      -- Quantified variables and arguments
-                                        -- See Note [SpecConstr call patterns]
+data CallPat = CP { cp_qvars :: [Var]           -- Quantified variables
+                  , cp_args  :: [CoreExpr] }    -- Arguments
+     -- See Note [SpecConstr call patterns]
+
+instance Outputable CallPat where
+  ppr (CP { cp_qvars = qvars, cp_args = args })
+    = text "CP" <> braces (sep [ text "cp_qvars =" <+> ppr qvars <> comma
+                               , text "cp_args =" <+> ppr args ])
 
 callsToNewPats :: ScEnv -> Id
                -> SpecInfo
@@ -1995,34 +2031,39 @@ callsToNewPats env fn spec_info@(SI { si_specs = done_specs }) bndr_occs calls
 
               -- Remove ones that have too many worker variables
               small_pats = filterOut too_big non_dups
-              too_big (vars,args) = not (isWorkerSmallEnough (sc_dflags env) (valArgCount args) vars)
+              too_big (CP { cp_qvars = vars, cp_args = args })
+                = not (isWorkerSmallEnough (sc_dflags env) (valArgCount args) vars)
                   -- We are about to construct w/w pair in 'spec_one'.
                   -- Omit specialisation leading to high arity workers.
                   -- See Note [Limit w/w arity] in GHC.Core.Opt.WorkWrap.Utils
 
                 -- Discard specialisations if there are too many of them
-              trimmed_pats = trim_pats env fn spec_info small_pats
+              (pats_were_discarded, trimmed_pats) = trim_pats env fn spec_info small_pats
 
 --        ; pprTrace "callsToPats" (vcat [ text "calls to" <+> ppr fn <> colon <+> ppr calls
 --                                       , text "done_specs:" <+> ppr (map os_pat done_specs)
 --                                       , text "good_pats:" <+> ppr good_pats ]) $
 --          return ()
 
-        ; return (have_boring_call, trimmed_pats) }
+        ; return (have_boring_call || pats_were_discarded, trimmed_pats) }
+          -- If any of the calls does not give rise to a specialisation, either
+          -- because it is boring, or because there are too many specialisations,
+          -- return a flag to say so, so that we know to keep the original function.
 
 
-trim_pats :: ScEnv -> Id -> SpecInfo -> [CallPat] -> [CallPat]
+trim_pats :: ScEnv -> Id -> SpecInfo -> [CallPat] -> (Bool, [CallPat])
+-- True <=> some patterns were discarded
 -- See Note [Choosing patterns]
 trim_pats env fn (SI { si_n_specs = done_spec_count }) pats
   | sc_force env
     || isNothing mb_scc
     || n_remaining >= n_pats
   = -- pprTrace "trim_pats: no-trim" (ppr (sc_force env) $$ ppr mb_scc $$ ppr n_remaining $$ ppr n_pats)
-    pats          -- No need to trim
+    (False, pats)          -- No need to trim
 
   | otherwise
   = emit_trace $  -- Need to trim, so keep the best ones
-    take n_remaining sorted_pats
+    (True, take n_remaining sorted_pats)
 
   where
     n_pats         = length pats
@@ -2041,7 +2082,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats
     pat_cons :: CallPat -> Int
     -- How many data constructors of literals are in
     -- the pattern.  More data-cons => less general
-    pat_cons (qs, ps) = foldr ((+) . n_cons) 0 ps
+    pat_cons (CP { cp_qvars = qs, cp_args = ps })
+       = foldr ((+) . n_cons) 0 ps
        where
           q_set = mkVarSet qs
           n_cons (Var v) | v `elemVarSet` q_set = 0
@@ -2072,12 +2114,21 @@ callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat)
         --      Type variables come first, since they may scope
         --      over the following term variables
         -- The [CoreExpr] are the argument patterns for the rule
-callToPats env bndr_occs call@(Call _ args con_env)
-  | args `ltLength` bndr_occs      -- Check saturated
-  = return Nothing
-  | otherwise
+callToPats env bndr_occs call@(Call fn args con_env)
   = do  { let in_scope = substInScope (sc_subst env)
-        ; (interesting, pats) <- argsToPats env in_scope con_env args bndr_occs
+
+        ; pairs <- zipWithM (argToPat env in_scope con_env) args bndr_occs
+                   -- This zip trims the args to be no longer than
+                   -- the lambdas in the function definition (bndr_occs)
+
+          -- Drop boring patterns from the end
+          -- See Note [SpecConstr call patterns]
+        ; let pairs' | isJoinId fn = pairs
+                     | otherwise   = dropWhileEnd is_boring pairs
+              is_boring (interesting, _) = not interesting
+              (interesting_s, pats) = unzip pairs'
+              interesting           = or interesting_s
+
         ; let pat_fvs = exprsFreeVarsList pats
                 -- To get determinism we need the list of free variables in
                 -- deterministic order. Otherwise we end up creating
@@ -2107,18 +2158,16 @@ callToPats env bndr_occs call@(Call _ args con_env)
               bad_covars :: CoVarSet
               bad_covars = mapUnionVarSet get_bad_covars pats
               get_bad_covars :: CoreArg -> CoVarSet
-              get_bad_covars (Type ty)
-                = filterVarSet (\v -> isId v && not (is_in_scope v)) $
-                  tyCoVarsOfType ty
-              get_bad_covars _
-                = emptyVarSet
+              get_bad_covars (Type ty) = filterVarSet bad_covar (tyCoVarsOfType ty)
+              get_bad_covars _         = emptyVarSet
+              bad_covar v = isId v && not (is_in_scope v)
 
         ; -- pprTrace "callToPats"  (ppr args $$ ppr bndr_occs) $
           WARN( not (isEmptyVarSet bad_covars)
               , text "SpecConstr: bad covars:" <+> ppr bad_covars
                 $$ ppr call )
           if interesting && isEmptyVarSet bad_covars
-          then return (Just (qvars', pats))
+          then return (Just (CP { cp_qvars = qvars', cp_args = pats }))
           else return Nothing }
 
     -- argToPat takes an actual argument, and returns an abstracted
@@ -2204,10 +2253,10 @@ argToPat env in_scope val_env arg arg_occ
   | Just (ConVal (DataAlt dc) args) <- isValue val_env arg
   , not (ignoreDataCon env dc)        -- See Note [NoSpecConstr]
   , Just arg_occs <- mb_scrut dc
-  = do  { let (ty_args, rest_args) = splitAtList (dataConUnivTyVars dc) args
-        ; (_, args') <- argsToPats env in_scope val_env rest_args arg_occs
-        ; return (True,
-                  mkConApp dc (ty_args ++ args')) }
+  = do { let (ty_args, rest_args) = splitAtList (dataConUnivTyVars dc) args
+       ; prs <- zipWithM (argToPat env in_scope val_env) rest_args arg_occs
+       ; let args' = map snd prs
+       ; return (True, mkConApp dc (ty_args ++ args')) }
   where
     mb_scrut dc = case arg_occ of
                     ScrutOcc bs | Just occs <- lookupUFM bs dc
@@ -2266,14 +2315,6 @@ wildCardPat ty
        ; let id = mkSysLocalOrCoVar (fsLit "sc") uniq Many ty
        ; return (False, varToCoreExpr id) }
 
-argsToPats :: ScEnv -> InScopeSet -> ValueEnv
-           -> [CoreArg] -> [ArgOcc]  -- Should be same length
-           -> UniqSM (Bool, [CoreArg])
-argsToPats env in_scope val_env args occs
-  = do { stuff <- zipWithM (argToPat env in_scope val_env) args occs
-       ; let (interesting_s, args') = unzip stuff
-       ; return (or interesting_s, args') }
-
 isValue :: ValueEnv -> CoreExpr -> Maybe Value
 isValue _env (Lit lit)
   | litIsLifted lit = Nothing
@@ -2324,7 +2365,8 @@ valueIsWorkFree LambdaVal       = True
 valueIsWorkFree (ConVal _ args) = all exprIsWorkFree args
 
 samePat :: CallPat -> CallPat -> Bool
-samePat (vs1, as1) (vs2, as2)
+samePat (CP { cp_qvars = vs1, cp_args = as1 })
+        (CP { cp_qvars = vs2, cp_args = as2 })
   = all2 same as1 as2
   where
     same (Var v1) (Var v2)
diff --git a/compiler/GHC/Core/Opt/Specialise.hs b/compiler/GHC/Core/Opt/Specialise.hs
index ffb50d45c7..fc62f5fa8a 100644
--- a/compiler/GHC/Core/Opt/Specialise.hs
+++ b/compiler/GHC/Core/Opt/Specialise.hs
@@ -2528,7 +2528,7 @@ pragma made the program slower!  The reason was that the specialised
 function $sinsertWith arising from the pragma looked rather like `f`
 above, and failed to specialise a call in its body like wimwam.
 Without the pragma, the original call to `insertWith` was completely
-monomorpic, and speciased in one go.
+monomorphic, and specialised in one go.
 -}
 
 instance Outputable DictBind where
diff --git a/testsuite/tests/simplCore/should_compile/T19672.hs b/testsuite/tests/simplCore/should_compile/T19672.hs
new file mode 100644
index 0000000000..e1f70f2b43
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T19672.hs
@@ -0,0 +1,7 @@
+module Foo where
+
+wimwam :: [a] -> Int -> Int
+wimwam []     x = x
+wimwam (y:ys) x = wimwam ys 0
+
+bar xs = map (wimwam [True]) xs
diff --git a/testsuite/tests/simplCore/should_compile/T19672.stderr b/testsuite/tests/simplCore/should_compile/T19672.stderr
new file mode 100644
index 0000000000..56b7c18fa9
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T19672.stderr
@@ -0,0 +1,8 @@
+
+==================== Tidy Core rules ====================
+"SC:wimwam0"
+    forall (sc :: Bool) (sc1 :: [Bool]).
+      wimwam @Bool (: @Bool sc sc1)
+      = bar_$swimwam sc sc1
+
+
diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T
index 623da259ef..dba67fa80b 100644
--- a/testsuite/tests/simplCore/should_compile/all.T
+++ b/testsuite/tests/simplCore/should_compile/all.T
@@ -355,3 +355,6 @@ test('T19581', [ grep_errmsg(r'case') ], compile, ['-O -ddump-simpl -dsuppress-u
 test('T19599', normal, compile, ['-O -ddump-rules'])
 test('T19599a', normal, compile, ['-O -ddump-rules'])
 test('T13873',  [ grep_errmsg(r'SPEC') ], compile, ['-O -ddump-rules'])
+
+# Look for a specialisation rule for wimwam
+test('T19672', normal, compile, ['-O2 -ddump-rules'])