diff options
-rw-r--r-- | compiler/GHC/Core/Opt/SpecConstr.hs | 220 | ||||
-rw-r--r-- | compiler/GHC/Core/Opt/Specialise.hs | 2 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T19672.hs | 7 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/T19672.stderr | 8 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/all.T | 3 |
5 files changed, 150 insertions, 90 deletions
diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs index db4701d45a..f21d0205f5 100644 --- a/compiler/GHC/Core/Opt/SpecConstr.hs +++ b/compiler/GHC/Core/Opt/SpecConstr.hs @@ -62,7 +62,7 @@ import GHC.Data.FastString import GHC.Types.Unique.FM import GHC.Utils.Monad import Control.Monad ( zipWithM ) -import Data.List (nubBy, sortBy, partition) +import Data.List (nubBy, sortBy, partition, dropWhileEnd, mapAccumL ) import GHC.Builtin.Names ( specTyConKey ) import GHC.Unit.Module import GHC.Exts( SpecConstrAnnotation(..) ) @@ -946,10 +946,13 @@ extendRecBndrs env bndrs = (env { sc_subst = subst' }, bndrs') where (subst', bndrs') = substRecBndrs (sc_subst env) bndrs +extendBndrs :: ScEnv -> [Var] -> (ScEnv, [Var]) +extendBndrs env bndrs = mapAccumL extendBndr env bndrs + extendBndr :: ScEnv -> Var -> (ScEnv, Var) -extendBndr env bndr = (env { sc_subst = subst' }, bndr') - where - (subst', bndr') = substBndr (sc_subst env) bndr +extendBndr env bndr = (env { sc_subst = subst' }, bndr') + where + (subst', bndr') = substBndr (sc_subst env) bndr extendValEnv :: ScEnv -> Id -> Maybe Value -> ScEnv extendValEnv env _ Nothing = env @@ -1102,6 +1105,9 @@ data Call = Call Id [CoreArg] ValueEnv -- The arguments of the call, together with the -- env giving the constructor bindings at the call site -- We keep the function mainly for debug output + -- + -- The call is not necessarily saturated; we just put + -- in however many args are visible at the call site instance Outputable ScUsage where ppr (SCU { scu_calls = calls, scu_occs = occs }) @@ -1399,12 +1405,6 @@ scTopBindEnv env (NonRec bndr rhs) ---------------------- scTopBind :: ScEnv -> ScUsage -> CoreBind -> UniqSM (ScUsage, CoreBind) -{- -scTopBind _ usage _ - | pprTrace "scTopBind_usage" (ppr (scu_calls usage)) False - = error "false" --} - scTopBind env body_usage (Rec prs) | Just threshold <- sc_size env , not force_spec @@ -1603,15 +1603,9 @@ specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs = -- pprTrace "specialise bot" (ppr fn) $ return (nullUsage, spec_info) - | isNeverActive (idInlineActivation fn) -- See Note [Transfer activation] - || null arg_bndrs -- Only specialise functions - = -- pprTrace "specialise inactive" (ppr fn) $ - case mb_unspec of -- Behave as if there was a single, boring call - Just rhs_usg -> return (rhs_usg, spec_info { si_mb_unspec = Nothing }) - -- See Note [spec_usg includes rhs_usg] - Nothing -> return (nullUsage, spec_info) - - | Just all_calls <- lookupVarEnv bind_calls fn + | not (isNeverActive (idInlineActivation fn)) -- See Note [Transfer activation] + , not (null arg_bndrs) -- Only specialise functions + , Just all_calls <- lookupVarEnv bind_calls fn -- Some calls to it = -- pprTrace "specialise entry {" (ppr fn <+> ppr all_calls) $ do { (boring_call, new_pats) <- callsToNewPats env fn spec_info arg_occs all_calls @@ -1650,10 +1644,13 @@ specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs , si_n_specs = spec_count + n_pats , si_mb_unspec = mb_unspec' }) } - | otherwise -- No new seeds, so return nullUsage - = return (nullUsage, spec_info) - - + | otherwise -- No calls, inactive, or not a function + -- Behave as if there was a single, boring call + = -- pprTrace "specialise inactive" (ppr fn $$ ppr mb_unspec) $ + case mb_unspec of -- Behave as if there was a single, boring call + Just rhs_usg -> return (rhs_usg, spec_info { si_mb_unspec = Nothing }) + -- See Note [spec_usg includes rhs_usg] + Nothing -> return (nullUsage, spec_info) --------------------- @@ -1686,58 +1683,70 @@ spec_one :: ScEnv f (b,c) ((:) (a,(b,c)) (x,v) hw) = f_spec b c v hw -} -spec_one env fn arg_bndrs body (call_pat@(qvars, pats), rule_number) +spec_one env fn arg_bndrs body (call_pat, rule_number) + | CP { cp_qvars = qvars, cp_args = pats } <- call_pat = do { spec_uniq <- getUniqueM - ; let spec_env = extendScSubstList (extendScInScope env qvars) - (arg_bndrs `zip` pats) - fn_name = idName fn - fn_loc = nameSrcSpan fn_name - fn_occ = nameOccName fn_name - spec_occ = mkSpecOcc fn_occ + ; let env1 = extendScSubstList (extendScInScope env qvars) + (arg_bndrs `zip` pats) + (body_env, extra_bndrs) = extendBndrs env1 (dropList pats arg_bndrs) + -- Remember, there may be fewer pats than arg_bndrs + -- See Note [SpecConstr call patterns] + + fn_name = idName fn + fn_loc = nameSrcSpan fn_name + fn_occ = nameOccName fn_name + spec_occ = mkSpecOcc fn_occ -- We use fn_occ rather than fn in the rule_name string -- as we don't want the uniq to end up in the rule, and -- hence in the ABI, as that can cause spurious ABI -- changes (#4012). rule_name = mkFastString ("SC:" ++ occNameString fn_occ ++ show rule_number) spec_name = mkInternalName spec_uniq spec_occ fn_loc --- ; pprTrace "{spec_one" (ppr (sc_count env) <+> ppr fn --- <+> ppr pats <+> text "-->" <+> ppr spec_name) $ +-- ; pprTrace "spec_one {" (vcat [ text "function:" <+> ppr fn <+> ppr (idUnique fn) +-- , text "sc_count:" <+> ppr (sc_count env) +-- , text "pats:" <+> ppr pats +-- , text "-->" <+> ppr spec_name +-- , text "bndrs" <+> ppr arg_bndrs +-- , text "body" <+> ppr body +-- , text "how_bound" <+> ppr (sc_how_bound env) ]) $ -- return () -- Specialise the body - ; (spec_usg, spec_body) <- scExpr spec_env body + ; (spec_usg, spec_body) <- scExpr body_env body --- ; pprTrace "done spec_one}" (ppr fn) $ +-- ; pprTrace "done spec_one }" (ppr fn $$ ppr (scu_calls spec_usg)) $ -- return () -- And build the results - ; let (spec_lam_args, spec_call_args) = mkWorkerArgs (sc_dflags env) - qvars body_ty - -- Usual w/w hack to avoid generating + ; let spec_body_ty = exprType spec_body + spec_lam_args1 = qvars ++ extra_bndrs + (spec_lam_args, spec_call_args) = mkWorkerArgs (sc_dflags env) + spec_lam_args1 spec_body_ty + -- mkWorkerArgs: usual w/w hack to avoid generating -- a spec_rhs of unlifted type and no args - spec_lam_args_str = handOutStrictnessInformation (fst (splitDmdSig spec_str)) spec_lam_args + spec_str = calcSpecStrictness fn spec_lam_args pats + spec_lam_args_str = handOutStrictnessInformation spec_str spec_lam_args -- Annotate the variables with the strictness information from -- the function (see Note [Strictness information in worker binders]) spec_join_arity | isJoinId fn = Just (length spec_lam_args) | otherwise = Nothing spec_id = mkLocalId spec_name Many - (mkLamTypes spec_lam_args body_ty) + (mkLamTypes spec_lam_args spec_body_ty) -- See Note [Transfer strictness] `setIdDmdSig` spec_str `setIdCprSig` topCprSig `setIdArity` count isId spec_lam_args `asJoinId_maybe` spec_join_arity - spec_str = calcSpecStrictness fn spec_lam_args pats -- Conditionally use result of new worker-wrapper transform spec_rhs = mkLams spec_lam_args_str spec_body - body_ty = exprType spec_body - rule_rhs = mkVarApps (Var spec_id) spec_call_args + rule_rhs = mkVarApps (Var spec_id) $ + dropTail (length extra_bndrs) spec_call_args inline_act = idInlineActivation fn - this_mod = sc_module spec_env + this_mod = sc_module env rule = mkRule this_mod True {- Auto -} True {- Local -} rule_name inline_act fn_name qvars pats rule_rhs -- See Note [Transfer activation] @@ -1747,8 +1756,9 @@ spec_one env fn arg_bndrs body (call_pat@(qvars, pats), rule_number) -- See Note [Strictness information in worker binders] -handOutStrictnessInformation :: [Demand] -> [Var] -> [Var] -handOutStrictnessInformation = go +handOutStrictnessInformation :: DmdSig -> [Var] -> [Var] +handOutStrictnessInformation str vs + = go (fst (splitDmdSig str)) vs where go _ [] = [] go [] vs = vs @@ -1757,7 +1767,7 @@ handOutStrictnessInformation = go calcSpecStrictness :: Id -- The original function -> [Var] -> [CoreExpr] -- Call pattern - -> DmdSig -- Strictness of specialised thing + -> DmdSig -- Strictness of specialised thing -- See Note [Transfer strictness] calcSpecStrictness fn qvars pats = mkClosedDmdSig spec_dmds div @@ -1871,19 +1881,39 @@ See # 5458. Yuk. Note [SpecConstr call patterns] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A "call patterns" that we collect is going to become the LHS of a RULE. -It's important that it doesn't have + +Wrinkles: + +* The list of argument patterns, cp_args, is no longer than the + visible lambdas of the binding, ri_arg_occs. This is done via + the zipWithM in callToPats. + +* The list of argument patterns can certainly be shorter than the + lambdas in the function definition (under-saturated). For example + f x y = case x of { True -> e1; False -> e2 } + ....map (f True) e... + We want to specialise `f` for `f True`. + +* In fact we deliberately shrink the list of argument patterns, + cp_args, by trimming off all the boring ones at the end (see + `dropWhileEnd is_boring` in callToPats). Since the RULE only + applies when it is saturated, this shrinking makes the RULE more + applicable. But it does mean that the argument patterns do not + necessarily saturate the lambdas of the function. + +* It's important that the pattern arguments do not look like e |> Refl -or + or e |> g1 |> g2 -because both of these will be optimised by Simplify.simplRule. In the -former case such optimisation benign, because the rule will match more -terms; but in the latter we may lose a binding of 'g1' or 'g2', and -end up with a rule LHS that doesn't bind the template variables -(#10602). + because both of these will be optimised by Simplify.simplRule. In the + former case such optimisation benign, because the rule will match more + terms; but in the latter we may lose a binding of 'g1' or 'g2', and + end up with a rule LHS that doesn't bind the template variables + (#10602). -The simplifier eliminates such things, but SpecConstr itself constructs -new terms by substituting. So the 'mkCast' in the Cast case of scExpr -is very important! + The simplifier eliminates such things, but SpecConstr itself constructs + new terms by substituting. So the 'mkCast' in the Cast case of scExpr + is very important! Note [Choosing patterns] ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1968,8 +1998,14 @@ alternative would be to discard calls that mention coercion variables only in kind-casts, but I'm doing the simple thing for now. -} -type CallPat = ([Var], [CoreExpr]) -- Quantified variables and arguments - -- See Note [SpecConstr call patterns] +data CallPat = CP { cp_qvars :: [Var] -- Quantified variables + , cp_args :: [CoreExpr] } -- Arguments + -- See Note [SpecConstr call patterns] + +instance Outputable CallPat where + ppr (CP { cp_qvars = qvars, cp_args = args }) + = text "CP" <> braces (sep [ text "cp_qvars =" <+> ppr qvars <> comma + , text "cp_args =" <+> ppr args ]) callsToNewPats :: ScEnv -> Id -> SpecInfo @@ -1995,34 +2031,39 @@ callsToNewPats env fn spec_info@(SI { si_specs = done_specs }) bndr_occs calls -- Remove ones that have too many worker variables small_pats = filterOut too_big non_dups - too_big (vars,args) = not (isWorkerSmallEnough (sc_dflags env) (valArgCount args) vars) + too_big (CP { cp_qvars = vars, cp_args = args }) + = not (isWorkerSmallEnough (sc_dflags env) (valArgCount args) vars) -- We are about to construct w/w pair in 'spec_one'. -- Omit specialisation leading to high arity workers. -- See Note [Limit w/w arity] in GHC.Core.Opt.WorkWrap.Utils -- Discard specialisations if there are too many of them - trimmed_pats = trim_pats env fn spec_info small_pats + (pats_were_discarded, trimmed_pats) = trim_pats env fn spec_info small_pats -- ; pprTrace "callsToPats" (vcat [ text "calls to" <+> ppr fn <> colon <+> ppr calls -- , text "done_specs:" <+> ppr (map os_pat done_specs) -- , text "good_pats:" <+> ppr good_pats ]) $ -- return () - ; return (have_boring_call, trimmed_pats) } + ; return (have_boring_call || pats_were_discarded, trimmed_pats) } + -- If any of the calls does not give rise to a specialisation, either + -- because it is boring, or because there are too many specialisations, + -- return a flag to say so, so that we know to keep the original function. -trim_pats :: ScEnv -> Id -> SpecInfo -> [CallPat] -> [CallPat] +trim_pats :: ScEnv -> Id -> SpecInfo -> [CallPat] -> (Bool, [CallPat]) +-- True <=> some patterns were discarded -- See Note [Choosing patterns] trim_pats env fn (SI { si_n_specs = done_spec_count }) pats | sc_force env || isNothing mb_scc || n_remaining >= n_pats = -- pprTrace "trim_pats: no-trim" (ppr (sc_force env) $$ ppr mb_scc $$ ppr n_remaining $$ ppr n_pats) - pats -- No need to trim + (False, pats) -- No need to trim | otherwise = emit_trace $ -- Need to trim, so keep the best ones - take n_remaining sorted_pats + (True, take n_remaining sorted_pats) where n_pats = length pats @@ -2041,7 +2082,8 @@ trim_pats env fn (SI { si_n_specs = done_spec_count }) pats pat_cons :: CallPat -> Int -- How many data constructors of literals are in -- the pattern. More data-cons => less general - pat_cons (qs, ps) = foldr ((+) . n_cons) 0 ps + pat_cons (CP { cp_qvars = qs, cp_args = ps }) + = foldr ((+) . n_cons) 0 ps where q_set = mkVarSet qs n_cons (Var v) | v `elemVarSet` q_set = 0 @@ -2072,12 +2114,21 @@ callToPats :: ScEnv -> [ArgOcc] -> Call -> UniqSM (Maybe CallPat) -- Type variables come first, since they may scope -- over the following term variables -- The [CoreExpr] are the argument patterns for the rule -callToPats env bndr_occs call@(Call _ args con_env) - | args `ltLength` bndr_occs -- Check saturated - = return Nothing - | otherwise +callToPats env bndr_occs call@(Call fn args con_env) = do { let in_scope = substInScope (sc_subst env) - ; (interesting, pats) <- argsToPats env in_scope con_env args bndr_occs + + ; pairs <- zipWithM (argToPat env in_scope con_env) args bndr_occs + -- This zip trims the args to be no longer than + -- the lambdas in the function definition (bndr_occs) + + -- Drop boring patterns from the end + -- See Note [SpecConstr call patterns] + ; let pairs' | isJoinId fn = pairs + | otherwise = dropWhileEnd is_boring pairs + is_boring (interesting, _) = not interesting + (interesting_s, pats) = unzip pairs' + interesting = or interesting_s + ; let pat_fvs = exprsFreeVarsList pats -- To get determinism we need the list of free variables in -- deterministic order. Otherwise we end up creating @@ -2107,18 +2158,16 @@ callToPats env bndr_occs call@(Call _ args con_env) bad_covars :: CoVarSet bad_covars = mapUnionVarSet get_bad_covars pats get_bad_covars :: CoreArg -> CoVarSet - get_bad_covars (Type ty) - = filterVarSet (\v -> isId v && not (is_in_scope v)) $ - tyCoVarsOfType ty - get_bad_covars _ - = emptyVarSet + get_bad_covars (Type ty) = filterVarSet bad_covar (tyCoVarsOfType ty) + get_bad_covars _ = emptyVarSet + bad_covar v = isId v && not (is_in_scope v) ; -- pprTrace "callToPats" (ppr args $$ ppr bndr_occs) $ WARN( not (isEmptyVarSet bad_covars) , text "SpecConstr: bad covars:" <+> ppr bad_covars $$ ppr call ) if interesting && isEmptyVarSet bad_covars - then return (Just (qvars', pats)) + then return (Just (CP { cp_qvars = qvars', cp_args = pats })) else return Nothing } -- argToPat takes an actual argument, and returns an abstracted @@ -2204,10 +2253,10 @@ argToPat env in_scope val_env arg arg_occ | Just (ConVal (DataAlt dc) args) <- isValue val_env arg , not (ignoreDataCon env dc) -- See Note [NoSpecConstr] , Just arg_occs <- mb_scrut dc - = do { let (ty_args, rest_args) = splitAtList (dataConUnivTyVars dc) args - ; (_, args') <- argsToPats env in_scope val_env rest_args arg_occs - ; return (True, - mkConApp dc (ty_args ++ args')) } + = do { let (ty_args, rest_args) = splitAtList (dataConUnivTyVars dc) args + ; prs <- zipWithM (argToPat env in_scope val_env) rest_args arg_occs + ; let args' = map snd prs + ; return (True, mkConApp dc (ty_args ++ args')) } where mb_scrut dc = case arg_occ of ScrutOcc bs | Just occs <- lookupUFM bs dc @@ -2266,14 +2315,6 @@ wildCardPat ty ; let id = mkSysLocalOrCoVar (fsLit "sc") uniq Many ty ; return (False, varToCoreExpr id) } -argsToPats :: ScEnv -> InScopeSet -> ValueEnv - -> [CoreArg] -> [ArgOcc] -- Should be same length - -> UniqSM (Bool, [CoreArg]) -argsToPats env in_scope val_env args occs - = do { stuff <- zipWithM (argToPat env in_scope val_env) args occs - ; let (interesting_s, args') = unzip stuff - ; return (or interesting_s, args') } - isValue :: ValueEnv -> CoreExpr -> Maybe Value isValue _env (Lit lit) | litIsLifted lit = Nothing @@ -2324,7 +2365,8 @@ valueIsWorkFree LambdaVal = True valueIsWorkFree (ConVal _ args) = all exprIsWorkFree args samePat :: CallPat -> CallPat -> Bool -samePat (vs1, as1) (vs2, as2) +samePat (CP { cp_qvars = vs1, cp_args = as1 }) + (CP { cp_qvars = vs2, cp_args = as2 }) = all2 same as1 as2 where same (Var v1) (Var v2) diff --git a/compiler/GHC/Core/Opt/Specialise.hs b/compiler/GHC/Core/Opt/Specialise.hs index ffb50d45c7..fc62f5fa8a 100644 --- a/compiler/GHC/Core/Opt/Specialise.hs +++ b/compiler/GHC/Core/Opt/Specialise.hs @@ -2528,7 +2528,7 @@ pragma made the program slower! The reason was that the specialised function $sinsertWith arising from the pragma looked rather like `f` above, and failed to specialise a call in its body like wimwam. Without the pragma, the original call to `insertWith` was completely -monomorpic, and speciased in one go. +monomorphic, and specialised in one go. -} instance Outputable DictBind where diff --git a/testsuite/tests/simplCore/should_compile/T19672.hs b/testsuite/tests/simplCore/should_compile/T19672.hs new file mode 100644 index 0000000000..e1f70f2b43 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T19672.hs @@ -0,0 +1,7 @@ +module Foo where + +wimwam :: [a] -> Int -> Int +wimwam [] x = x +wimwam (y:ys) x = wimwam ys 0 + +bar xs = map (wimwam [True]) xs diff --git a/testsuite/tests/simplCore/should_compile/T19672.stderr b/testsuite/tests/simplCore/should_compile/T19672.stderr new file mode 100644 index 0000000000..56b7c18fa9 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/T19672.stderr @@ -0,0 +1,8 @@ + +==================== Tidy Core rules ==================== +"SC:wimwam0" + forall (sc :: Bool) (sc1 :: [Bool]). + wimwam @Bool (: @Bool sc sc1) + = bar_$swimwam sc sc1 + + diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T index 623da259ef..dba67fa80b 100644 --- a/testsuite/tests/simplCore/should_compile/all.T +++ b/testsuite/tests/simplCore/should_compile/all.T @@ -355,3 +355,6 @@ test('T19581', [ grep_errmsg(r'case') ], compile, ['-O -ddump-simpl -dsuppress-u test('T19599', normal, compile, ['-O -ddump-rules']) test('T19599a', normal, compile, ['-O -ddump-rules']) test('T13873', [ grep_errmsg(r'SPEC') ], compile, ['-O -ddump-rules']) + +# Look for a specialisation rule for wimwam +test('T19672', normal, compile, ['-O2 -ddump-rules']) |