summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/GHC/Core/Opt/SpecConstr.hs220
-rw-r--r--compiler/GHC/Core/Opt/Specialise.hs2
-rw-r--r--testsuite/tests/simplCore/should_compile/T19672.hs7
-rw-r--r--testsuite/tests/simplCore/should_compile/T19672.stderr8
-rw-r--r--testsuite/tests/simplCore/should_compile/all.T3
5 files changed, 150 insertions, 90 deletions
diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs
index db4701d45a..f21d0205f5 100644
--- a/compiler/GHC/Core/Opt/SpecConstr.hs
+++ b/compiler/GHC/Core/Opt/SpecConstr.hs
@@ -62,7 +62,7 @@ import GHC.Data.FastString
import GHC.Types.Unique.FM
import GHC.Utils.Monad
import Control.Monad ( zipWithM )
-import Data.List (nubBy, sortBy, partition)
+import Data.List (nubBy, sortBy, partition, dropWhileEnd, mapAccumL )
import GHC.Builtin.Names ( specTyConKey )
import GHC.Unit.Module
import GHC.Exts( SpecConstrAnnotation(..) )
@@ -946,10 +946,13 @@ extendRecBndrs env bndrs = (env { sc_subst = subst' }, bndrs')
where
(subst', bndrs') = substRecBndrs (sc_subst env) bndrs
+extendBndrs :: ScEnv -> [Var] -> (ScEnv, [Var])
+extendBndrs env bndrs = mapAccumL extendBndr env bndrs
+
extendBndr :: ScEnv -> Var -> (ScEnv, Var)
-extendBndr env bndr = (env { sc_subst = subst' }, bndr')
- where
- (subst', bndr') = substBndr (sc_subst env) bndr
+extendBndr env bndr = (env { sc_subst = subst' }, bndr')
+ where
+ (subst', bndr') = substBndr (sc_subst env) bndr
extendValEnv :: ScEnv -> Id -> Maybe Value -> ScEnv
extendValEnv env _ Nothing = env
@@ -1102,6 +1105,9 @@ data Call = Call Id [CoreArg] ValueEnv
-- The arguments of the call, together with the
-- env giving the constructor bindings at the call site
-- We keep the function mainly for debug output
+ --
+ -- The call is not necessarily saturated; we just put
+ -- in however many args are visible at the call site
instance Outputable ScUsage where
ppr (SCU { scu_calls = calls, scu_occs = occs })
@@ -1399,12 +1405,6 @@ scTopBindEnv env (NonRec bndr rhs)
----------------------
scTopBind :: ScEnv -> ScUsage -> CoreBind -> UniqSM (ScUsage, CoreBind)
-{-
-scTopBind _ usage _
- | pprTrace "scTopBind_usage" (ppr (scu_calls usage)) False
- = error "false"
--}
-
scTopBind env body_usage (Rec prs)
| Just threshold <- sc_size env
, not force_spec
@@ -1603,15 +1603,9 @@ specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs
= -- pprTrace "specialise bot" (ppr fn) $
return (nullUsage, spec_info)
- | isNeverActive (idInlineActivation fn) -- See Note [Transfer activation]
- || null arg_bndrs -- Only specialise functions
- = -- pprTrace "specialise inactive" (ppr fn) $
- case mb_unspec of -- Behave as if there was a single, boring call
- Just rhs_usg -> return (rhs_usg, spec_info { si_mb_unspec = Nothing })
- -- See Note [spec_usg includes rhs_usg]
- Nothing -> return (nullUsage, spec_info)
-
- | Just all_calls <- lookupVarEnv bind_calls fn
+ | not (isNeverActive (idInlineActivation fn)) -- See Note [Transfer activation]
+ , not (null arg_bndrs) -- Only specialise functions
+ , Just all_calls <- lookupVarEnv bind_calls fn -- Some calls to it
= -- pprTrace "specialise entry {" (ppr fn <+> ppr all_calls) $
do { (boring_call, new_pats) <- callsToNewPats env fn spec_info arg_occs all_calls
@@ -1650,10 +1644,13 @@ specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs
, si_n_specs = spec_count + n_pats
, si_mb_unspec = mb_unspec' }) }
- | otherwise -- No new seeds, so return nullUsage
- = return (nullUsage, spec_info)
-
-
+ | otherwise -- No calls, inactive, or not a function
+ -- Behave as if there was a single, boring call
+ = -- pprTrace "specialise inactive" (ppr fn $$ ppr mb_unspec) $
+ case mb_unspec of -- Behave as if there was a single, boring call
+ Just rhs_usg -> return (rhs_usg, spec_info { si_mb_unspec = Nothing })
+ -- See Note [spec_usg includes rhs_usg]
+ Nothing -> return (nullUsage, spec_info)
---------------------
@@ -1686,58 +1683,70 @@ spec_one :: ScEnv
f (b,c) ((:) (a,(b,c)) (x,v) hw) = f_spec b c v hw
-}
-spec_one env fn arg_bndrs body (call_pat@(qvars, pats), rule_number)
+spec_one env fn arg_bndrs body (call_pat, rule_number)
+ | CP { cp_qvars = qvars, cp_args = pats } <- call_pat
= do { spec_uniq <- getUniqueM
- ; let spec_env = extendScSubstList (extendScInScope env qvars)
- (arg_bndrs `zip` pats)
- fn_name = idName fn
- fn_loc = nameSrcSpan fn_name
- fn_occ = nameOccName fn_name
- spec_occ = mkSpecOcc fn_occ
+ ; let env1 = extendScSubstList (extendScInScope env qvars)
+ (arg_bndrs `zip` pats)
+ (body_env, extra_bndrs) = extendBndrs env1 (dropList pats arg_bndrs)
+ -- Remember, there may be fewer pats than arg_bndrs
+ -- See Note [SpecConstr call patterns]
+
+ fn_name = idName fn
+ fn_loc = nameSrcSpan fn_name
+ fn_occ = nameOccName fn_name
+ spec_occ = mkSpecOcc fn_occ
-- We use fn_occ rather than fn in the rule_name string
-- as we don't want the uniq to end up in the rule, and
-- hence in the ABI, as that can cause spurious ABI
-- changes (#4012).
rule_name = mkFastString ("SC:" ++ occNameString fn_occ ++ show rule_number)
spec_name = mkInternalName spec_uniq spec_occ fn_loc
--- ; pprTrace "{spec_one" (ppr (sc_count env) <+> ppr fn
--- <+> ppr pats <+> text "-->" <+> ppr spec_name) $
+-- ; pprTrace "spec_one {" (vcat [ text "function:" <+> ppr fn <+> ppr (idUnique fn)
+-- , text "sc_count:" <+> ppr (sc_count env)
+-- , text "pats:" <+> ppr pats
+-- , text "-->" <+> ppr spec_name
+-- , text "bndrs" <+> ppr arg_bndrs
+-- , text "body" <+> ppr body
+-- , text "how_bound" <+> ppr (sc_how_bound env) ]) $
-- return ()
-- Specialise the body
- ; (spec_usg, spec_body) <- scExpr spec_env body
+ ; (spec_usg, spec_body) <- scExpr body_env body
--- ; pprTrace "done spec_one}" (ppr fn) $
+-- ; pprTrace "done spec_one }" (ppr fn $$ ppr (scu_calls spec_usg)) $
-- return ()
-- And build the results
- ; let (spec_lam_args, spec_call_args) = mkWorkerArgs (sc_dflags env)
- qvars body_ty
- -- Usual w/w hack to avoid generating
+ ; let spec_body_ty = exprType spec_body
+ spec_lam_args1 = qvars ++ extra_bndrs
+ (spec_lam_args, spec_call_args) = mkWorkerArgs (sc_dflags env)
+ spec_lam_args1 spec_body_ty
+ -- mkWorkerArgs: usual w/w hack to avoid generating
-- a spec_rhs of unlifted type and no args
- spec_lam_args_str = handOutStrictnessInformation (fst (splitDmdSig spec_str)) spec_lam_args
+ spec_str = calcSpecStrictness fn spec_lam_args pats
+ spec_lam_args_str = handOutStrictnessInformation spec_str spec_lam_args
-- Annotate the variables with the strictness information from
-- the function (see Note [Strictness information in worker binders])
spec_join_arity | isJoinId fn = Just (length spec_lam_args)
| otherwise = Nothing
spec_id = mkLocalId spec_name Many
- (mkLamTypes spec_lam_args body_ty)
+ (mkLamTypes spec_lam_args spec_body_ty)
-- See Note [Transfer strictness]
`setIdDmdSig` spec_str
`setIdCprSig` topCprSig
`setIdArity` count isId spec_lam_args
`asJoinId_maybe` spec_join_arity
- spec_str = calcSpecStrictness fn spec_lam_args pats
-- Conditionally use result of new worker-wrapper transform
spec_rhs = mkLams spec_lam_args_str spec_body
- body_ty = exprType spec_body
- rule_rhs = mkVarApps (Var spec_id) spec_call_args
+ rule_rhs = mkVarApps (Var spec_id) $
+ dropTail (length extra_bndrs) spec_call_args
inline_act = idInlineActivation fn
- this_mod = sc_module spec_env
+ this_mod = sc_module env
rule = mkRule this_mod True {- Auto -} True {- Local -}
rule_name inline_act fn_name qvars pats rule_rhs
-- See Note [Transfer activation]
@@ -1747,8 +1756,9 @@ spec_one env fn arg_bndrs body (call_pat@(qvars, pats), rule_number)
-- See Note [Strictness information in worker binders]
-handOutStrictnessInformation :: [Demand] -> [Var] -> [Var]
-handOutStrictnessInformation = go
+handOutStrictnessInformation :: DmdSig -> [Var] -> [Var]
+handOutStrictnessInformation str vs
+ = go (fst (splitDmdSig str)) vs
where
go _ [] = []
go [] vs = vs
@@ -1757,7 +1767,7 @@ handOutStrictnessInformation = go
calcSpecStrictness :: Id -- The original function
-> [Var] -> [CoreExpr] -- Call pattern
- -> DmdSig -- Strictness of specialised thing
+ -> DmdSig -- Strictness of specialised thing
-- See Note [Transfer strictness]
calcSpecStrictness fn qvars pats
= mkClosedDmdSig spec_dmds div
@@ -1871,19 +1881,39 @@ See # 5458. Yuk.
Note [SpecConstr call patterns]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A "call patterns" that we collect is going to become the LHS of a RULE.
-It's important that it doesn't have
+
+Wrinkles:
+
+* The list of argument patterns, cp_args, is no longer than the
+ visible lambdas of the binding, ri_arg_occs. This is done via
+ the zipWithM in callToPats.
+
+* The list of argument patterns can certainly be shorter than the
+ lambdas in the function definition (under-saturated). For example
+ f x y = case x of { True -> e1; False -> e2 }
+ ....map (f True) e...
+ We want to specialise `f` for `f True`.
+
+* In fact we deliberately shrink the list of argument patterns,
+ cp_args, by trimming off all the boring ones at the end (see
+ `dropWhileEnd is_boring` in callToPats). Since the RULE only
+ applies when it is saturated, this shrinking makes the RULE more
+ applicable. But it does mean that the argument patterns do not
+ necessarily saturate the lambdas of the function.
+
+* It's important that the pattern arguments do not look like
e |> Refl
-or
+ or
e |> g1 |> g2
-because both of these will be optimised by Simplify.simplRule. In the
-former case such optimisation benign, because the rule will match more
-terms; but in the latter we may lose a binding of 'g1' or 'g2', and
-end up with a rule LHS that doesn't bind the template variables
-(#10602).
+ because both of these will be optimised by Simplify.simplRule. In the
+ former case such optimisation benign, because the rule will match more
+ terms; but in the latter we may lose a binding of 'g1' or 'g2', and
+ end up with a rule LHS that doesn't bind the template variables
+ (#10602).
-The simplifier eliminates such things, but SpecConstr itself constructs
-new terms by substituting. So the 'mkCast' in the Cast case of scExpr
-is very important!
+ The simplifier eliminates such things, but SpecConstr itself constructs
+ new terms by substituting. So the 'mkCast' in the Cast case of scExpr
+ is very important!
Note [Choosing patterns]
~~~~~~~~~~~~~~~~~~~~~~~~
@@ -1968,8 +1998,14 @@ alternative would be to discard calls that mention coercion variables
only in kind-casts, but I'm doing the simple thing for now.
-}
-type CallPat = ([Var], [CoreExpr]) -- Quantified variables and arguments
- -- See Note [SpecConstr call patterns]
+data CallPat = CP { cp_qvars :: [Var] -- Quantified variables
+ , cp_args :: [CoreExpr] } -- Arguments
+ -- See Note [SpecConstr call patterns]
+
+instance Outputable CallPat where
+ ppr (CP { cp_qvars = qvars, cp_args = args })
+ = text "CP" <> braces (sep [ text "cp_qvars =" <+> ppr qvars <> comma
+ , text "cp_args =" <+> ppr args ])
callsToNewPats :: ScEnv -> Id
-> SpecInfo
@@ -1995,34 +2031,39 @@ callsToNewPats env fn spec_info@(SI { si_specs = done_specs }) bndr_occs calls
-- Remove ones that have too many worker variables
small_pats = filterOut too_big non_dups
- too_big (vars,args) = not (isWorkerSmallEnough (sc_dflags env) (valArgCount args) vars)
+ too_big (CP { cp_qvars = vars, cp_args = args })
+ = not (isWorkerSmallEnough (sc_dflags env) (valArgCount args) vars)
-- We are about to construct w/w pair in 'spec_one'.
-- Omit specialisation leading to high arity workers.
-- See Note [Limit w/w arity] in GHC.Core.Opt.WorkWrap.Utils
-- Discard specialisations if there are too many of them
- trimmed_pats = trim_pats env fn spec_info small_pats
+ (pats_were_discarded, trimmed_pats) = trim_pats env fn spec_info small_pats
-- ; pprTrace "callsToPats" (vcat [ text "calls to" <+> ppr fn <> colon <+> ppr calls
-- , text "done_specs:" <+> ppr (map os_pat done_specs)
-- , text "good_pats:" <+> ppr good_pats ]) $
-- return ()
- ; return (have_boring_call, trimmed_pats) }
+ ; return (have_boring_call || pats_were_discarded, trimmed_pats) }
+ -- If any of the calls does not give rise to a specialisation, either
+ -- because it is boring, or because there are too many specialisations,
+ -- return a flag to say so, so that we know to keep the original function.
-trim_pats :: ScEnv -> Id -> SpecInfo -> [CallPat] -> [CallPat]
+trim_pats :: ScEnv -> Id -> SpecInfo -> [CallPat] -> (Bool, [CallPat])
+-- True <=> some patterns were discarded
-- See Note [Choosing patterns]
trim_pats env fn (SI { si_n_specs = done_spec_count }) pats
| sc_force env
|| isNothing mb_scc
|| n_remaining >= n_pats
= -- pprTrace "trim_pats: no-trim" (ppr (sc_force env) $$ ppr mb_scc $$ ppr n_remaining $$ ppr n_pats)
- pats -- No need to trim
+ (False, pats) -- No need to trim
| otherwise
= emit_trace $ -- Need to trim, so keep the best ones
- take n_remaining sorted_pats
+ (True, take n_remaining sorted_pats)
where
n_pats = length pats
@@ -2041,7 +2082,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats
pat_cons :: CallPat -> Int
-- How many data constructors of literals are in
-- the pattern. More data-cons => less general
- pat_cons (qs, ps) = foldr ((+) . n_cons) 0 ps
+ pat_cons (CP { cp_qvars = qs, cp_args = ps })
+ = foldr ((+) . n_cons) 0 ps
where
q_set = mkVarSet qs
n_cons (Var v) | v `elemVarSet` q_set = 0
@@ -2072,12 +2114,21 @@ callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat)
-- Type variables come first, since they may scope
-- over the following term variables
-- The [CoreExpr] are the argument patterns for the rule
-callToPats env bndr_occs call@(Call _ args con_env)
- | args `ltLength` bndr_occs -- Check saturated
- = return Nothing
- | otherwise
+callToPats env bndr_occs call@(Call fn args con_env)
= do { let in_scope = substInScope (sc_subst env)
- ; (interesting, pats) <- argsToPats env in_scope con_env args bndr_occs
+
+ ; pairs <- zipWithM (argToPat env in_scope con_env) args bndr_occs
+ -- This zip trims the args to be no longer than
+ -- the lambdas in the function definition (bndr_occs)
+
+ -- Drop boring patterns from the end
+ -- See Note [SpecConstr call patterns]
+ ; let pairs' | isJoinId fn = pairs
+ | otherwise = dropWhileEnd is_boring pairs
+ is_boring (interesting, _) = not interesting
+ (interesting_s, pats) = unzip pairs'
+ interesting = or interesting_s
+
; let pat_fvs = exprsFreeVarsList pats
-- To get determinism we need the list of free variables in
-- deterministic order. Otherwise we end up creating
@@ -2107,18 +2158,16 @@ callToPats env bndr_occs call@(Call _ args con_env)
bad_covars :: CoVarSet
bad_covars = mapUnionVarSet get_bad_covars pats
get_bad_covars :: CoreArg -> CoVarSet
- get_bad_covars (Type ty)
- = filterVarSet (\v -> isId v && not (is_in_scope v)) $
- tyCoVarsOfType ty
- get_bad_covars _
- = emptyVarSet
+ get_bad_covars (Type ty) = filterVarSet bad_covar (tyCoVarsOfType ty)
+ get_bad_covars _ = emptyVarSet
+ bad_covar v = isId v && not (is_in_scope v)
; -- pprTrace "callToPats" (ppr args $$ ppr bndr_occs) $
WARN( not (isEmptyVarSet bad_covars)
, text "SpecConstr: bad covars:" <+> ppr bad_covars
$$ ppr call )
if interesting && isEmptyVarSet bad_covars
- then return (Just (qvars', pats))
+ then return (Just (CP { cp_qvars = qvars', cp_args = pats }))
else return Nothing }
-- argToPat takes an actual argument, and returns an abstracted
@@ -2204,10 +2253,10 @@ argToPat env in_scope val_env arg arg_occ
| Just (ConVal (DataAlt dc) args) <- isValue val_env arg
, not (ignoreDataCon env dc) -- See Note [NoSpecConstr]
, Just arg_occs <- mb_scrut dc
- = do { let (ty_args, rest_args) = splitAtList (dataConUnivTyVars dc) args
- ; (_, args') <- argsToPats env in_scope val_env rest_args arg_occs
- ; return (True,
- mkConApp dc (ty_args ++ args')) }
+ = do { let (ty_args, rest_args) = splitAtList (dataConUnivTyVars dc) args
+ ; prs <- zipWithM (argToPat env in_scope val_env) rest_args arg_occs
+ ; let args' = map snd prs
+ ; return (True, mkConApp dc (ty_args ++ args')) }
where
mb_scrut dc = case arg_occ of
ScrutOcc bs | Just occs <- lookupUFM bs dc
@@ -2266,14 +2315,6 @@ wildCardPat ty
; let id = mkSysLocalOrCoVar (fsLit "sc") uniq Many ty
; return (False, varToCoreExpr id) }
-argsToPats :: ScEnv -> InScopeSet -> ValueEnv
- -> [CoreArg] -> [ArgOcc] -- Should be same length
- -> UniqSM (Bool, [CoreArg])
-argsToPats env in_scope val_env args occs
- = do { stuff <- zipWithM (argToPat env in_scope val_env) args occs
- ; let (interesting_s, args') = unzip stuff
- ; return (or interesting_s, args') }
-
isValue :: ValueEnv -> CoreExpr -> Maybe Value
isValue _env (Lit lit)
| litIsLifted lit = Nothing
@@ -2324,7 +2365,8 @@ valueIsWorkFree LambdaVal = True
valueIsWorkFree (ConVal _ args) = all exprIsWorkFree args
samePat :: CallPat -> CallPat -> Bool
-samePat (vs1, as1) (vs2, as2)
+samePat (CP { cp_qvars = vs1, cp_args = as1 })
+ (CP { cp_qvars = vs2, cp_args = as2 })
= all2 same as1 as2
where
same (Var v1) (Var v2)
diff --git a/compiler/GHC/Core/Opt/Specialise.hs b/compiler/GHC/Core/Opt/Specialise.hs
index ffb50d45c7..fc62f5fa8a 100644
--- a/compiler/GHC/Core/Opt/Specialise.hs
+++ b/compiler/GHC/Core/Opt/Specialise.hs
@@ -2528,7 +2528,7 @@ pragma made the program slower! The reason was that the specialised
function $sinsertWith arising from the pragma looked rather like `f`
above, and failed to specialise a call in its body like wimwam.
Without the pragma, the original call to `insertWith` was completely
-monomorpic, and speciased in one go.
+monomorphic, and specialised in one go.
-}
instance Outputable DictBind where
diff --git a/testsuite/tests/simplCore/should_compile/T19672.hs b/testsuite/tests/simplCore/should_compile/T19672.hs
new file mode 100644
index 0000000000..e1f70f2b43
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T19672.hs
@@ -0,0 +1,7 @@
+module Foo where
+
+wimwam :: [a] -> Int -> Int
+wimwam [] x = x
+wimwam (y:ys) x = wimwam ys 0
+
+bar xs = map (wimwam [True]) xs
diff --git a/testsuite/tests/simplCore/should_compile/T19672.stderr b/testsuite/tests/simplCore/should_compile/T19672.stderr
new file mode 100644
index 0000000000..56b7c18fa9
--- /dev/null
+++ b/testsuite/tests/simplCore/should_compile/T19672.stderr
@@ -0,0 +1,8 @@
+
+==================== Tidy Core rules ====================
+"SC:wimwam0"
+ forall (sc :: Bool) (sc1 :: [Bool]).
+ wimwam @Bool (: @Bool sc sc1)
+ = bar_$swimwam sc sc1
+
+
diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T
index 623da259ef..dba67fa80b 100644
--- a/testsuite/tests/simplCore/should_compile/all.T
+++ b/testsuite/tests/simplCore/should_compile/all.T
@@ -355,3 +355,6 @@ test('T19581', [ grep_errmsg(r'case') ], compile, ['-O -ddump-simpl -dsuppress-u
test('T19599', normal, compile, ['-O -ddump-rules'])
test('T19599a', normal, compile, ['-O -ddump-rules'])
test('T13873', [ grep_errmsg(r'SPEC') ], compile, ['-O -ddump-rules'])
+
+# Look for a specialisation rule for wimwam
+test('T19672', normal, compile, ['-O2 -ddump-rules'])