diff options
-rw-r--r-- | compiler/GHC/Core/Rules.hs | 184 | ||||
-rw-r--r-- | compiler/GHC/Types/Var/Env.hs | 24 | ||||
-rw-r--r-- | docs/users_guide/9.8.1-notes.rst | 10 | ||||
-rw-r--r-- | docs/users_guide/exts/rewrite_rules.rst | 26 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/RewriteHigherOrderPatterns.hs | 46 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/RewriteHigherOrderPatterns.stderr | 30 | ||||
-rw-r--r-- | testsuite/tests/simplCore/should_compile/all.T | 1 |
7 files changed, 304 insertions, 17 deletions
diff --git a/compiler/GHC/Core/Rules.hs b/compiler/GHC/Core/Rules.hs index d635d6aebe..15b1946230 100644 --- a/compiler/GHC/Core/Rules.hs +++ b/compiler/GHC/Core/Rules.hs @@ -62,6 +62,7 @@ import GHC.Core.Coercion as Coercion import GHC.Core.Tidy ( tidyRules ) import GHC.Core.Map.Expr ( eqCoreExpr ) import GHC.Core.Opt.Arity( etaExpandToJoinPointRule ) +import GHC.Core.Make ( mkCoreLams ) import GHC.Tc.Utils.TcType ( tcSplitTyConApp_maybe ) import GHC.Builtin.Types ( anyTypeOfKind ) @@ -82,6 +83,7 @@ import GHC.Types.Basic import GHC.Data.FastString import GHC.Data.Maybe import GHC.Data.Bag +import GHC.Data.List.SetOps( hasNoDups ) import GHC.Utils.Misc as Utils import GHC.Utils.Outputable @@ -881,8 +883,13 @@ rvInScopeEnv renv = ISE (rnInScopeSet (rv_lcl renv)) (rv_unf renv) -- * The BindWrapper in a RuleSubst are the bindings floated out -- from nested matches; see the Let case of match, below -- -data RuleSubst = RS { rs_tv_subst :: TvSubstEnv -- Range is the - , rs_id_subst :: IdSubstEnv -- template variables +data RuleSubst = RS { -- Substitution; applied only to the template, not the target + -- Domain is the template variables + -- Range never includes template variables + rs_tv_subst :: TvSubstEnv + , rs_id_subst :: IdSubstEnv + + -- Floated bindings , rs_binds :: BindWrapper -- Floated bindings , rs_bndrs :: [Var] -- Variables bound by floated lets } @@ -1059,6 +1066,165 @@ match renv subst e1 (Var v2) mco -- Note [Expanding variables] -- because of the not-inRnEnvR ------------------------ Applications --------------------- +-- See Note [Matching higher order patterns] +match renv@(RV { rv_tmpls = tmpls, rv_lcl = rn_env }) + subst e1@App{} e2 + MRefl -- Like the App case we insist on Refl here + -- See Note [Casts in the target] + | (Var f, args) <- collectArgs e1 + , let f' = rnOccL rn_env f -- See similar rnOccL in match_var + , f' `elemVarSet` tmpls -- (HOP1) + , Just vs2 <- traverse arg_as_lcl_var args -- (HOP2), (HOP3) + , hasNoDups vs2 -- (HOP4) + , not can_decompose_app_instead + = match_tmpl_var renv subst f' (mkCoreLams vs2 e2) + -- match_tmpl_var checks (HOP5) and (HOP6) + where + arg_as_lcl_var :: CoreExpr -> Maybe Var + arg_as_lcl_var (Var v) + | Just v' <- rnOccL_maybe rn_env v + , not (v' `elemVarSet` tmpls) -- rnEnvL contains the template variables + = Just (to_target v') -- to_target: see (W1) + -- in Note [Matching higher order patterns] + arg_as_lcl_var _ = Nothing + + can_decompose_app_instead -- Template (e1 v), target (e2 v), and v # fvs(e2) + = case (e1, e2) of -- See (W2) in Note [Matching higher order patterns] + (App _ (Var v1), App f2 (Var v2)) + -> rnOccL rn_env v1 == rnOccR rn_env v2 + && not (v2 `elemVarSet` exprFreeVars f2) + _ -> False + + ---------------- + -- to_target: see (W1) in Note [Matching higher order patterns] + to_target :: Var -> Var -- From canonical variable back to target-expr variable + to_target v = lookupVarEnv rev_envR v `orElse` v + + rev_envR :: VarEnv Var -- Inverts rnEnvR: from canonical variable + -- back to target-expr variable + rev_envR = nonDetStrictFoldVarEnv_Directly add_one emptyVarEnv (rnEnvR rn_env) + add_one uniq var env = extendVarEnv env var (var `setVarUnique` uniq) + +{- Note [Matching higher order patterns] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Higher order patterns provide a limited form of higher order matching. +See GHC Proposal #555 + https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0555-template-patterns.rst +and #22465 for more details and related work. + +Consider the potential match: + + Template: forall f. foo (\x -> f x) + Target: foo (\x -> x*2 + x) + +The expression `x*2 + x` in the target is not literally an application of a +function to the variable `x`, so the simple application rule does not apply. +However, we can match them modulo beta equivalence with the substitution: + + [f :-> \x -> x*2 + x] + +The general problem of higher order matching is tricky to implement, but +the subproblem which we call /higher order pattern matching/ is sufficient +for the given example and much easier to implement. + +Design: + +We start with terminology. + +* /Template variables/. The forall'd variables are called the template + variables. In the example match above, `f` is a template variable. + +* /Local binders/. The local binders of a rule are the variables bound + inside the template. In the example match above, `x` is a local binder. + Note that local binders can be term variables and type variables. + +A /higher order pattern/ (HOP) is a sub-expression of the template, +of form (f x y z) where: + +* (HOP1) f is a template variable +* (HOP2) x, y, z are local binders (like y in rule "wombat" above; see definitions). +* (HOP3) The arguments x, y, z are term variables +* (HOP4) The arguments x, y, z are distinct (no duplicates) + +Matching of higher order patterns (HOP-matching). A higher order pattern (f x y z) +(in the template) matches any target expression e provided: + +* (HOP5) The target has the same type as the template +* (HOP6) No local binder is free in e, other than x, y, z. + +If these two condition hold, the higher order pattern (f x y z) matches +the target expression e, yielding the substitution [f :-> \x y z. e]. +Notice that this substitution is type preserving, and the RHS +of the substitution has no free local binders. + +HOP matching is small enough to be done in-line in the `match` function. +Two wrinkles: + +(W1) Consider the potential match: + Template: forall f. foo (\x -> f x) + Target: foo (\y -> (y, y)) + During matching we make `x` the canonical variable for the lambdas + and then we see: + Template: f x rnEnvL = [] + Target: (y, y) rnEnvR = [y :-> x] + We could bind [f :-> \x. (x,x)], by applying rnEnvR substitution to the target + expression. But that is tiresome (a) because it involves a traversal, and + (b) because rnEnvR is a VarEnv Var, and we don't have a substitution function + for that. + + So instead, we invert rnEnvR, and apply it to the binders, to get + [f :-> \y. (y,y)]. This is done by `to_target` in the HOP-matching case. + It takes a little bit of thinking to be sure this will work right in the case + of shadowing. E.g. Template (\x y. f x y) Target (\p p. p*p) + Here rnEnvR will be just [p :-> y], so after inversion we'll get + [f :-> \x p. p*p] + but that is fine. + +(W2) This wrinkle concerns the overlp between the new HOP rule and the existing + decompose-application rule. See 3.1 of GHC Proposal #555 for a discussion. + + Consider potential match: + Template: forall f. foo (\x y. Just (f y x)) + Target: foo (\p q. Just (h (1+q) p))) + During matching we will encounter: + Template: f x y + Target: h (1+q) p rnEnvR = [p:->x, q:->y] + The rnEnvR renaming `[p:->x, q:->y]` is done by the matcher (today) on the fly, + to make the bound variables of the template and target "line up". + But now we can: + * Either use the new HOP rule to succeed with + [f :-> \x y. h (1+x) y] + * Or use the existing decompose-application rule to match + (f x) against (h (1+q)) and `y` against `p`. + This will succeed with + [f :-> \y. h (1+y)] + + Note that the result of the HOP rule will always be eta-equivalent to + the result of the decompose-application rule. But the proposal specifies + that we should use the decompose-application rule because it involves + less eta-expansion. + + But take care: + Template: forall f. foo (\x y. Just (f y x)) + Target: foo (\p q. Just (h (p+q) p))) + Then during matching we will encounter: + Template: f x y + Target: h (p+q) p rnEnvR = [p:->x, q:->y] + Now, we cannot use the decompose-application rule, because p is free in + (h (p+q)). So, we can only use the new HOP rule. + +(W3) You might wonder if a HOP can have /type/ arguments, thus (in Core) + RULE forall h. + f (\(MkT @b (d::Num b) (x::b)) -> h @b d x) = ... + where the HOP is (h @b d x). In principle this might be possible, but + it seems fragile; e.g. we would still need to insist that the (invisible) + @b was a type variable. And since `h` gets a polymoprhic type, that + type would have to be declared by the programmer. + + Maybe one day. But for now, we insist (in `arg_as_lcl_var`)that a HOP + has only term-variable arguments. +-} + -- Note the match on MRefl! We fail if there is a cast in the target -- (e1 e2) ~ (d1 d2) |> co -- See Note [Cancel reflexive casts]: in the Cast equations for 'match' @@ -1358,7 +1524,7 @@ match_tmpl_var renv@(RV { rv_lcl = rn_env, rv_fltR = flt_env }) -- if the right side of the env is empty. | anyInRnEnvR rn_env (exprFreeVars e2) = Nothing -- Skolem-escape failure - -- e.g. match forall a. (\x-> a x) against (\y. y y) + -- e.g. match forall a. (\x -> a) against (\y -> y) | Just e1' <- lookupVarEnv id_subst v1' = if eqCoreExpr e1' e2' @@ -1378,6 +1544,7 @@ match_tmpl_var renv@(RV { rv_lcl = rn_env, rv_fltR = flt_env }) -- because no free var of e2' is in the rnEnvR of the envt ------------------------------------------ + match_ty :: RuleMatchEnv -> RuleSubst -> Type -- Template @@ -1389,12 +1556,13 @@ match_ty :: RuleMatchEnv -- newtype T = MkT Int -- We only want to replace (f T) with f', not (f Int). -match_ty renv subst ty1 ty2 - = do { tv_subst' - <- Unify.ruleMatchTyKiX (rv_tmpls renv) (rv_lcl renv) tv_subst ty1 ty2 +match_ty (RV { rv_tmpls = tmpls, rv_lcl = rn_env }) + subst@(RS { rs_tv_subst = tv_subst }) + ty1 ty2 + = do { tv_subst' <- Unify.ruleMatchTyKiX tmpls rn_env tv_subst ty1 ty2 + -- NB: ruleMatchTyKiX applis tv_subst to ty1 only + -- and of course only binds 'tmpls' ; return (subst { rs_tv_subst = tv_subst' }) } - where - tv_subst = rs_tv_subst subst {- Note [Matching variable types] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/GHC/Types/Var/Env.hs b/compiler/GHC/Types/Var/Env.hs index 2704a2b39b..59bd1dbf56 100644 --- a/compiler/GHC/Types/Var/Env.hs +++ b/compiler/GHC/Types/Var/Env.hs @@ -24,6 +24,7 @@ module GHC.Types.Var.Env ( elemVarEnvByKey, filterVarEnv, restrictVarEnv, partitionVarEnv, varEnvDomain, + nonDetStrictFoldVarEnv_Directly, -- * Deterministic Var environments (maps) DVarEnv, DIdEnv, DTyVarEnv, @@ -318,25 +319,30 @@ rnBndr2 env bL bR = fst $ rnBndr2_var env bL bR rnBndr2_var :: RnEnv2 -> Var -> Var -> (RnEnv2, Var) -- ^ Similar to 'rnBndr2' but returns the new variable as well as the --- new environment +-- new environment. +-- Postcondition: the type of the returned Var is that of bR rnBndr2_var (RV2 { envL = envL, envR = envR, in_scope = in_scope }) bL bR - = (RV2 { envL = extendVarEnv envL bL new_b -- See Note - , envR = extendVarEnv envR bR new_b -- [Rebinding] + = (RV2 { envL = extendVarEnv envL bL new_b -- See Note + , envR = extendVarEnv envR bR new_b -- [Rebinding] , in_scope = extendInScopeSet in_scope new_b }, new_b) where -- Find a new binder not in scope in either term - new_b | not (bL `elemInScopeSet` in_scope) = bL - | not (bR `elemInScopeSet` in_scope) = bR - | otherwise = uniqAway' in_scope bL + -- To avoid calling `uniqAway`, we try bL's Unique + -- But we always return a Var whose type is that of bR + new_b | not (bR `elemInScopeSet` in_scope) = bR + | not (bL `elemInScopeSet` in_scope) = bR `setVarUnique` varUnique bL + | otherwise = uniqAway' in_scope bR -- Note [Rebinding] -- ~~~~~~~~~~~~~~~~ -- If the new var is the same as the old one, note that - -- the extendVarEnv *deletes* any current renaming + -- the extendVarEnv *replaces* any current renaming -- E.g. (\x. \x. ...) ~ (\y. \z. ...) -- + -- envL envR in_scope -- Inside \x \y { [x->y], [y->y], {y} } - -- \x \z { [x->x], [y->y, z->x], {y,x} } + -- \x \z { [x->z], [y->y, z->z], {y,z} } + -- The envL binding [x->y] is replaced by [x->z] rnBndrL :: RnEnv2 -> Var -> (RnEnv2, Var) -- ^ Similar to 'rnBndr2' but used when there's a binder on the left @@ -530,6 +536,7 @@ lookupWithDefaultVarEnv :: VarEnv a -> a -> Var -> a elemVarEnv :: Var -> VarEnv a -> Bool elemVarEnvByKey :: Unique -> VarEnv a -> Bool disjointVarEnv :: VarEnv a -> VarEnv a -> Bool +nonDetStrictFoldVarEnv_Directly :: (Unique -> a -> r -> r) -> r -> VarEnv a -> r elemVarEnv = elemUFM elemVarEnvByKey = elemUFM_Directly @@ -565,6 +572,7 @@ unitVarEnv = unitUFM isEmptyVarEnv = isNullUFM partitionVarEnv = partitionUFM varEnvDomain = domUFMUnVarSet +nonDetStrictFoldVarEnv_Directly = nonDetStrictFoldUFM_Directly restrictVarEnv env vs = filterUFM_Directly keep env diff --git a/docs/users_guide/9.8.1-notes.rst b/docs/users_guide/9.8.1-notes.rst index 6d94368456..14c6d5bff6 100644 --- a/docs/users_guide/9.8.1-notes.rst +++ b/docs/users_guide/9.8.1-notes.rst @@ -11,7 +11,15 @@ Compiler - Added a new warning :ghc-flag:`-Wterm-variable-capture` that helps to make code compatible with the future extension ``RequiredTypeArguments``. -======= + +- Rewrite rules now support a limited form of higher order matching when a + pattern variable is applied to distinct locally bound variables. For example: :: + + forall f. foo (\x -> f x) + + Now matches: :: + + foo (\x -> x*2 + x) GHCi ~~~~ diff --git a/docs/users_guide/exts/rewrite_rules.rst b/docs/users_guide/exts/rewrite_rules.rst index 36c523f032..bd9f788769 100644 --- a/docs/users_guide/exts/rewrite_rules.rst +++ b/docs/users_guide/exts/rewrite_rules.rst @@ -228,6 +228,32 @@ From a semantic point of view: because ``y`` can match against ``0``. +- GHC implements **higher order matching** as described by + `GHC proposal #555 <https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0555-template-patterns.rst>`_. + When a pattern variable is applied to distinct locally bound variables it forms + what we call a **higher order pattern**. + When matching, higher order patterns are treated like pattern variables, but they are + allowed to match expressions that contain the locally bound variables that are part of + the higher order patterns. + + For example, we can use this to fix the broken rule from the example from the + previous bullet point:: + + {-# RULES + "test/case-tup" forall (x :: (Int, Int)) (f :: Int -> Int -> Int) (z :: Int). + test (case x of (l, r) -> f l r) z = case x of (m, n) -> test (f m n) z + #-} + + This modified rule does fire for:: + + prog :: (Int, Int) -> (Int, Int) + prog x = test (case x of (p, q) -> p) 0 + + Under higher order matching, ``f p q`` matches ``p`` by assigning ``f = \p q -> p``. + The resulting code after the rewrite is:: + + prog x = case x of (m, n) -> test ((\p q -> p) m n) 0 + - A rule that has a forall binder with a polymorphic type, is likely to fail to fire. E. g., :: {-# RULES forall (x :: forall a. Num a => a -> a). f x = blah #-} diff --git a/testsuite/tests/simplCore/should_compile/RewriteHigherOrderPatterns.hs b/testsuite/tests/simplCore/should_compile/RewriteHigherOrderPatterns.hs new file mode 100644 index 0000000000..32f4088271 --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/RewriteHigherOrderPatterns.hs @@ -0,0 +1,46 @@ +-- These are the examples from the Higher Order Patterns in Rewrite Rules proposal + +module RewriteHigherOrderPatterns where + +foo :: (Int -> Int) -> Bool +{-# NOINLINE foo #-} +foo _ = False + +{-# RULES "foo" forall f. foo (\x -> f x) = True #-} + +bar :: (Int -> Int -> Int -> Int) -> Bool +{-# NOINLINE bar #-} +bar _ = False + +{-# RULES "bar" forall f. bar (\x y z -> f x y z) = True #-} + +baz :: (Int -> Int) -> Bool +{-# NOINLINE baz #-} +baz _ = False + +{-# RULES "baz" forall f. baz (\x -> f x x) = True #-} + +qux :: (Int -> Int -> Int) -> Bool +{-# NOINLINE qux #-} +qux _ = False + +{-# RULES "qux" forall f. qux (\x y -> f x (2 :: Int) y) = True #-} + +-- instead of + and * we use 'two' and 'three' to avoid cluttering +-- the rule rewrites dump. + +two :: Int -> Int -> Int +{-# NOINLINE two #-} +two _ _ = 2 + +three :: Int -> Int -> Int -> Int +{-# NOINLINE three #-} +three _ _ _ = 3 + +ex1 = foo (\x -> two (two x 2) x) +ex2 = bar (\x y z -> two (two x y) z) +ex3 = bar (\x y z -> two (two x 2) z) +ex4 = baz (\x -> two x (two x 2)) +ex5 = baz (\x -> three (two x 1) 2 x) +ex6 = qux (\x y -> two (two x 2) y) +ex7 = qux (\x y -> three (two x 1) 2 y) diff --git a/testsuite/tests/simplCore/should_compile/RewriteHigherOrderPatterns.stderr b/testsuite/tests/simplCore/should_compile/RewriteHigherOrderPatterns.stderr new file mode 100644 index 0000000000..3a4b54dd9f --- /dev/null +++ b/testsuite/tests/simplCore/should_compile/RewriteHigherOrderPatterns.stderr @@ -0,0 +1,30 @@ +Rule fired + Rule: bar + Module: (RewriteHigherOrderPatterns) + Before: bar ValArg \ x y z -> two (two x y) z + After: (\ f -> True) (\ x y -> two (two x y)) + Cont: Stop[RhsCtxt(NonRecursive)] Bool +Rule fired + Rule: bar + Module: (RewriteHigherOrderPatterns) + Before: bar ValArg \ x _ z -> two (two x (I# 2#)) z + After: (\ f -> True) (\ x _ -> two (two x (I# 2#))) + Cont: Stop[RhsCtxt(NonRecursive)] Bool +Rule fired + Rule: foo + Module: (RewriteHigherOrderPatterns) + Before: foo ValArg \ x -> two (two x (I# 2#)) x + After: (\ f -> True) (\ x -> two (two x (I# 2#)) x) + Cont: Stop[RhsCtxt(NonRecursive)] Bool +Rule fired + Rule: qux + Module: (RewriteHigherOrderPatterns) + Before: qux ValArg \ x y -> three (two x (I# 1#)) (I# 2#) y + After: (\ f -> True) (\ x -> three (two x (I# 1#))) + Cont: Stop[RhsCtxt(NonRecursive)] Bool +Rule fired + Rule: baz + Module: (RewriteHigherOrderPatterns) + Before: baz ValArg \ x -> three (two x (I# 1#)) (I# 2#) x + After: (\ f -> True) (\ x -> three (two x (I# 1#)) (I# 2#)) + Cont: Stop[RhsCtxt(NonRecursive)] Bool diff --git a/testsuite/tests/simplCore/should_compile/all.T b/testsuite/tests/simplCore/should_compile/all.T index 1a2ac5f7d0..2200043dcd 100644 --- a/testsuite/tests/simplCore/should_compile/all.T +++ b/testsuite/tests/simplCore/should_compile/all.T @@ -473,3 +473,4 @@ test('T22715_2', normal, multimod_compile, ['T22715_2', '-v0 -O -fspecialise-agg test('T22802', normal, compile, ['-O']) test('T15205', normal, compile, ['-O -ddump-simpl -dno-typeable-binds -dsuppress-uniques']) +test('RewriteHigherOrderPatterns', normal, compile, ['-O -ddump-rule-rewrites -dsuppress-all -dsuppress-uniques']) |