From 63c3658932d4be877c8f28de1e2fe26865568bb3 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Fri, 6 Mar 2020 11:47:56 -0500 Subject: Make DeriveFunctor-generated code require fewer beta reductions Issue #17880 demonstrates that `DeriveFunctor`-generated code is surprisingly fragile when rank-_n_ types are involved. The culprit is that `$fmap` (the algorithm used to generate `fmap` implementations) was too keen on applying arguments with rank-_n_ types to lambdas, which fail to typecheck more often than not. In this patch, I change `$fmap` (both the specification and the implementation) to produce code that avoids creating as many lambdas, avoiding problems when rank-_n_ field types arise. See the comments titled "Functor instances" in `TcGenFunctor` for a more detailed description. Not only does this fix #17880, but it also ensures that the code that `DeriveFunctor` generates will continue to work after simplified subsumption is implemented (see #17775). What is truly amazing is that #17880 is actually a regression (introduced in GHC 7.6.3) caused by commit 49ca2a37bef18aa57235ff1dbbf1cc0434979b1e, the fix #7436. Prior to that commit, the version of `$fmap` that was used was almost identical to the one used in this patch! Why did that commit change `$fmap` then? It was to avoid severe performance issues that would arise for recursive `fmap` implementations, such as in the example below: ```hs data List a = Nil | Cons a (List a) deriving Functor -- ===> instance Functor List where fmap f Nil = Nil fmap f (Cons x xs) = Cons (f x) (fmap (\y -> f y) xs) ``` The fact that `\y -> f y` was eta expanded caused significant performance overheads. Commit 49ca2a37bef18aa57235ff1dbbf1cc0434979b1e fixed this performance issue, but it went too far. As a result, this patch partially reverts 49ca2a37bef18aa57235ff1dbbf1cc0434979b1e. To ensure that the performance issues pre-#7436 do not resurface, I have taken some precautionary measures: * I have added a special case to `$fmap` for situations where the last type variable in an application of some type occurs directly. If this special case fires, we avoid creating a lambda expression. This ensures that we generate `fmap f (Cons x xs) = Cons (f x) (fmap f xs)` in the derived `Functor List` instance above. For more details, see `Note [Avoid unnecessary eta expansion in derived fmap implementations]` in `TcGenFunctor`. * I have added a `T7436b` test case to ensure that the performance of this derived `Functor List`-style code does not regress. When implementing this, I discovered that `$replace`, the algorithm which generates implementations of `(<$)`, has a special case that is very similar to the `$fmap` special case described above. `$replace` marked this special case with a custom `Replacer` data type, which was a bit overkill. In order to use the same machinery for both `Functor` methods, I ripped out `Replacer` and instead implemented a simple way to detect the special case. See the updated commentary in `Note [Deriving <$]` for more details. --- compiler/typecheck/TcDerivUtils.hs | 2 +- compiler/typecheck/TcGenFunctor.hs | 391 +++++++++++++++------- compiler/typecheck/TcGenGenerics.hs | 2 +- testsuite/tests/deriving/should_compile/T17880.hs | 9 + testsuite/tests/deriving/should_compile/all.T | 1 + testsuite/tests/generics/GenDerivOutput.stderr | 2 +- testsuite/tests/perf/should_run/T7436b.hs | 20 ++ testsuite/tests/perf/should_run/T7436b.stdout | 1 + testsuite/tests/perf/should_run/all.T | 7 + 9 files changed, 310 insertions(+), 125 deletions(-) create mode 100644 testsuite/tests/deriving/should_compile/T17880.hs create mode 100644 testsuite/tests/perf/should_run/T7436b.hs create mode 100644 testsuite/tests/perf/should_run/T7436b.stdout diff --git a/compiler/typecheck/TcDerivUtils.hs b/compiler/typecheck/TcDerivUtils.hs index 5bfbe51ad6..9ff82eee84 100644 --- a/compiler/typecheck/TcDerivUtils.hs +++ b/compiler/typecheck/TcDerivUtils.hs @@ -968,7 +968,7 @@ cond_functorOK allowFunctions allowExQuantifiedLastTyVar _ _ rep_tc , ft_fun = \x y -> if allowFunctions then x `andValid` y else NotValid (badCon con functions) , ft_tup = \_ xs -> allValid xs - , ft_ty_app = \_ x -> x + , ft_ty_app = \_ _ x -> x , ft_bad_app = NotValid (badCon con wrong_arg) , ft_forall = \_ x -> x } diff --git a/compiler/typecheck/TcGenFunctor.hs b/compiler/typecheck/TcGenFunctor.hs index 6cc3642b8b..adb7b6c369 100644 --- a/compiler/typecheck/TcGenFunctor.hs +++ b/compiler/typecheck/TcGenFunctor.hs @@ -6,6 +6,7 @@ The deriving code for the Functor, Foldable, and Traversable classes (equivalent to the code in TcGenDeriv, for other classes) -} +{-# LANGUAGE CPP #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE FlexibleContexts #-} @@ -18,13 +19,15 @@ module TcGenFunctor ( gen_Functor_binds, gen_Foldable_binds, gen_Traversable_binds ) where +#include "HsVersions.h" + import GhcPrelude import Bag import DataCon import FastString import GHC.Hs -import Panic +import Outputable import PrelNames import RdrName import SrcLoc @@ -83,50 +86,66 @@ However, we have special cases for - functions More formally, we write the derivation of fmap code over type variable -'a for type 'b as ($fmap 'a 'b). In this general notation the derived +'a for type 'b as ($fmap 'a 'b x). In this general notation the derived instance for T is: instance Functor T where fmap f (T1 x1 x2) = T1 ($(fmap 'a 'b1) x1) ($(fmap 'a 'a) x2) fmap f (T2 x1) = T2 ($(fmap 'a '(T a)) x1) - $(fmap 'a 'b) = \x -> x -- when b does not contain a - $(fmap 'a 'a) = f - $(fmap 'a '(b1,b2)) = \x -> case x of (x1,x2) -> ($(fmap 'a 'b1) x1, $(fmap 'a 'b2) x2) - $(fmap 'a '(T b1 b2)) = fmap $(fmap 'a 'b2) -- when a only occurs in the last parameter, b2 - $(fmap 'a '(b -> c)) = \x b -> $(fmap 'a' 'c) (x ($(cofmap 'a 'b) b)) + $(fmap 'a 'b x) = x -- when b does not contain a + $(fmap 'a 'a x) = f x + $(fmap 'a '(b1,b2) x) = case x of (x1,x2) -> ($(fmap 'a 'b1 x1), $(fmap 'a 'b2 x2)) + $(fmap 'a '(T b1 a) x) = fmap f x -- when a only occurs directly as the last argument of T + $(fmap 'a '(T b1 b2) x) = fmap (\y. $(fmap 'a 'b2 y)) x -- when a only occurs in the last parameter, b2 + $(fmap 'a '(tb -> tc) x) = \(y:tb[b/a]) -> $(fmap 'a' 'tc' (x $(cofmap 'a 'tb y))) For functions, the type parameter 'a can occur in a contravariant position, which means we need to derive a function like: cofmap :: (a -> b) -> (f b -> f a) -This is pretty much the same as $fmap, only without the $(cofmap 'a 'a) case: - - $(cofmap 'a 'b) = \x -> x -- when b does not contain a - $(cofmap 'a 'a) = error "type variable in contravariant position" - $(cofmap 'a '(b1,b2)) = \x -> case x of (x1,x2) -> ($(cofmap 'a 'b1) x1, $(cofmap 'a 'b2) x2) - $(cofmap 'a '[b]) = map $(cofmap 'a 'b) - $(cofmap 'a '(T b1 b2)) = fmap $(cofmap 'a 'b2) -- when a only occurs in the last parameter, b2 - $(cofmap 'a '(b -> c)) = \x b -> $(cofmap 'a' 'c) (x ($(fmap 'a 'c) b)) - -Note that the code produced by $(fmap _ _) is always a higher order function, -with type `(a -> b) -> (g a -> g b)` for some g. When we need to do pattern -matching on the type, this means create a lambda function (see the (,) case above). -The resulting code for fmap can look a bit weird, for example: - - data X a = X (a,Int) - -- generated instance - instance Functor X where - fmap f (X x) = (\y -> case y of (x1,x2) -> X (f x1, (\z -> z) x2)) x - -The optimizer should be able to simplify this code by simple inlining. - -An older version of the deriving code tried to avoid these applied -lambda functions by producing a meta level function. But the function to -be mapped, `f`, is a function on the code level, not on the meta level, -so it was eta expanded to `\x -> [| f $x |]`. This resulted in too much eta expansion. -It is better to produce too many lambdas than to eta expand, see ticket #7436. +This is pretty much the same as $fmap, only without the $(cofmap 'a 'a x) and +$(cofmap 'a '(T b1 a) x) cases: + + $(cofmap 'a 'b x) = x -- when b does not contain a + $(cofmap 'a 'a x) = error "type variable in contravariant position" + $(cofmap 'a '(b1,b2) x) = case x of (x1,x2) -> ($(cofmap 'a 'b1) x1, $(cofmap 'a 'b2) x2) + $(cofmap 'a '(T b1 a) x) = error "type variable in contravariant position" -- when a only occurs directly as the last argument of T + $(cofmap 'a '(T b1 b2) x) = fmap (\y. $(cofmap 'a 'b2 y)) x -- when a only occurs in the last parameter, b2 + $(cofmap 'a '(tb -> tc) x) = \(y:tb[b/a]) -> $(cofmap 'a' 'tc' (x $(fmap 'a 'tb y))) + +Note that the code produced by $(fmap _ _ _) is always a higher order function, +with type `(a -> b) -> (g a -> g b)` for some g. + +Note that there are two distinct cases in $fmap (and $cofmap) that match on an +application of some type constructor T (where T is not a tuple type +constructor): + + $(fmap 'a '(T b1 a) x) = fmap f x -- when a only occurs directly as the last argument of T + $(fmap 'a '(T b1 b2) x) = fmap (\y. $(fmap 'a 'b2 y)) x -- when a only occurs in the last parameter, b2 + +While the latter case technically subsumes the former case, it is important to +give special treatment to the former case to avoid unnecessary eta expansion. +See Note [Avoid unnecessary eta expansion in derived fmap implementations]. + +We also generate code for (<$) in addition to fmap—see Note [Deriving <$] for +an explanation of why this is important. Just like $fmap/$cofmap above, there +is a similar algorithm for generating `p <$ x` (for some constant `p`): + + $(replace 'a 'b x) = x -- when b does not contain a + $(replace 'a 'a x) = p + $(replace 'a '(b1,b2) x) = case x of (x1,x2) -> ($(replace 'a 'b1 x1), $(replace 'a 'b2 x2)) + $(replace 'a '(T b1 a) x) = p <$ x -- when a only occurs directly as the last argument of T + $(replace 'a '(T b1 b2) x) = fmap (\y. $(replace 'a 'b2 y)) x -- when a only occurs in the last parameter, b2 + $(replace 'a '(tb -> tc) x) = \(y:tb[b/a]) -> $(replace 'a' 'tc' (x $(coreplace 'a 'tb y))) + + $(coreplace 'a 'b x) = x -- when b does not contain a + $(coreplace 'a 'a x) = error "type variable in contravariant position" + $(coreplace 'a '(b1,b2) x) = case x of (x1,x2) -> ($(coreplace 'a 'b1 x1), $(coreplace 'a 'b2 x2)) + $(coreplace 'a '(T b1 a) x) = error "type variable in contravariant position" -- when a only occurs directly as the last argument of T + $(coreplace 'a '(T b1 b2) x) = fmap (\y. $(coreplace 'a 'b2 y)) x -- when a only occurs in the last parameter, b2 + $(coreplace 'a '(tb -> tc) x) = \(y:tb[b/a]) -> $(coreplace 'a' 'tc' (x $(replace 'a 'tb y))) -} gen_Functor_binds :: SrcSpan -> TyCon -> (LHsBinds GhcPs, BagDerivStuff) @@ -154,30 +173,34 @@ gen_Functor_binds loc tycon fmap_match_ctxt = mkPrefixFunRhs fmap_name fmap_eqn con = flip evalState bs_RDRs $ - match_for_con fmap_match_ctxt [f_Pat] con =<< parts + match_for_con fmap_match_ctxt [f_Pat] con parts where - parts = sequence $ foldDataConArgs ft_fmap con + parts = foldDataConArgs ft_fmap con fmap_eqns = map fmap_eqn data_cons - ft_fmap :: FFoldType (State [RdrName] (LHsExpr GhcPs)) - ft_fmap = FT { ft_triv = mkSimpleLam $ \x -> return x - -- fmap f = \x -> x - , ft_var = return f_Expr - -- fmap f = f - , ft_fun = \g h -> do - gg <- g - hh <- h - mkSimpleLam2 $ \x b -> return $ - nlHsApp hh (nlHsApp x (nlHsApp gg b)) - -- fmap f = \x b -> h (x (g b)) - , ft_tup = \t gs -> do - gg <- sequence gs - mkSimpleLam $ mkSimpleTupleCase (match_for_con CaseAlt) t gg - -- fmap f = \x -> case x of (a1,a2,..) -> (g1 a1,g2 a2,..) - , ft_ty_app = \_ g -> nlHsApp fmap_Expr <$> g - -- fmap f = fmap g - , ft_forall = \_ g -> g + ft_fmap :: FFoldType (LHsExpr GhcPs -> State [RdrName] (LHsExpr GhcPs)) + ft_fmap = FT { ft_triv = \x -> pure x + -- fmap f x = x + , ft_var = \x -> pure $ nlHsApp f_Expr x + -- fmap f x = f x + , ft_fun = \g h x -> mkSimpleLam $ \b -> do + gg <- g b + h $ nlHsApp x gg + -- fmap f x = \b -> h (x (g b)) + , ft_tup = mkSimpleTupleCase (match_for_con CaseAlt) + -- fmap f x = case x of (a1,a2,..) -> (g1 a1,g2 a2,..) + , ft_ty_app = \_ arg_ty g x -> + -- If the argument type is a bare occurrence of the + -- data type's last type variable, then we can generate + -- more efficient code. + -- See Note [Avoid unnecessary eta expansion in derived fmap implementations] + if tcIsTyVarTy arg_ty + then pure $ nlHsApps fmap_RDR [f_Expr,x] + else do gg <- mkSimpleLam g + pure $ nlHsApps fmap_RDR [gg,x] + -- fmap f x = fmap g x + , ft_forall = \_ g x -> g x , ft_bad_app = panic "in other argument in ft_fmap" , ft_co_var = panic "contravariant in ft_fmap" } @@ -189,53 +212,157 @@ gen_Functor_binds loc tycon replace_match_ctxt = mkPrefixFunRhs replace_name replace_eqn con = flip evalState bs_RDRs $ - match_for_con replace_match_ctxt [z_Pat] con =<< parts + match_for_con replace_match_ctxt [z_Pat] con parts where - parts = traverse (fmap replace) $ foldDataConArgs ft_replace con + parts = foldDataConArgs ft_replace con replace_eqns = map replace_eqn data_cons - ft_replace :: FFoldType (State [RdrName] Replacer) - ft_replace = FT { ft_triv = fmap Nested $ mkSimpleLam $ \x -> return x - -- (p <$) = \x -> x - , ft_var = fmap Immediate $ mkSimpleLam $ \_ -> return z_Expr - -- (p <$) = const p - , ft_fun = \g h -> do - gg <- replace <$> g - hh <- replace <$> h - fmap Nested $ mkSimpleLam2 $ \x b -> return $ - nlHsApp hh (nlHsApp x (nlHsApp gg b)) - -- (<$) p = \x b -> h (x (g b)) - , ft_tup = \t gs -> do - gg <- traverse (fmap replace) gs - fmap Nested . mkSimpleLam $ - mkSimpleTupleCase (match_for_con CaseAlt) t gg - -- (p <$) = \x -> case x of (a1,a2,..) -> (g1 a1,g2 a2,..) - , ft_ty_app = \_ gm -> do - g <- gm - case g of - Nested g' -> pure . Nested $ - nlHsApp fmap_Expr $ g' - Immediate _ -> pure . Nested $ - nlHsApp replace_Expr z_Expr - -- (p <$) = fmap (p <$) - , ft_forall = \_ g -> g + ft_replace :: FFoldType (LHsExpr GhcPs -> State [RdrName] (LHsExpr GhcPs)) + ft_replace = FT { ft_triv = \x -> pure x + -- p <$ x = x + , ft_var = \_ -> pure z_Expr + -- p <$ _ = p + , ft_fun = \g h x -> mkSimpleLam $ \b -> do + gg <- g b + h $ nlHsApp x gg + -- p <$ x = \b -> h (x (g b)) + , ft_tup = mkSimpleTupleCase (match_for_con CaseAlt) + -- p <$ x = case x of (a1,a2,..) -> (g1 a1,g2 a2,..) + , ft_ty_app = \_ arg_ty g x -> + -- If the argument type is a bare occurrence of the + -- data type's last type variable, then we can generate + -- more efficient code. + -- See [Deriving <$] + if tcIsTyVarTy arg_ty + then pure $ nlHsApps replace_RDR [z_Expr,x] + else do gg <- mkSimpleLam g + pure $ nlHsApps fmap_RDR [gg,x] + -- p <$ x = fmap (p <$) x + , ft_forall = \_ g x -> g x , ft_bad_app = panic "in other argument in ft_replace" , ft_co_var = panic "contravariant in ft_replace" } -- Con a1 a2 ... -> Con (f1 a1) (f2 a2) ... - match_for_con :: HsMatchContext GhcPs - -> [LPat GhcPs] -> DataCon -> [LHsExpr GhcPs] - -> State [RdrName] (LMatch GhcPs (LHsExpr GhcPs)) + match_for_con :: Monad m + => HsMatchContext GhcPs + -> [LPat GhcPs] -> DataCon + -> [LHsExpr GhcPs -> m (LHsExpr GhcPs)] + -> m (LMatch GhcPs (LHsExpr GhcPs)) match_for_con ctxt = mkSimpleConMatch ctxt $ - \con_name xs -> return $ nlHsApps con_name xs -- Con x1 x2 .. + \con_name xsM -> do xs <- sequence xsM + pure $ nlHsApps con_name xs -- Con x1 x2 .. + +{- +Note [Avoid unnecessary eta expansion in derived fmap implementations] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +For the sake of simplicity, the algorithm that derived implementations of +fmap used to have a single case that dealt with applications of some type +constructor T (where T is not a tuple type constructor): + + $(fmap 'a '(T b1 b2) x) = fmap (\y. $(fmap 'a 'b2 y)) x -- when a only occurs in the last parameter, b2 + +This generated less than optimal code in certain situations, however. Consider +this example: + + data List a = Nil | Cons a (List a) deriving Functor + +This would generate the following Functor instance: + + instance Functor List where + fmap f Nil = Nil + fmap f (Cons x xs) = Cons (f x) (fmap (\y -> f y) xs) + +The code `fmap (\y -> f y) xs` is peculiar, since it eta expands an application +of `f`. What's worse, this eta expansion actually degrades performance! To see +why, we can trace an invocation of fmap on a small List: + + fmap id $ Cons 0 $ Cons 0 $ Cons 0 $ Cons 0 Nil + + Cons (id 0) $ fmap (\y -> id y) + $ Cons 0 $ Cons 0 $ Cons 0 Nil + + Cons (id 0) $ Cons ((\y -> id y) 0) + $ fmap (\y' -> (\y -> id y) y') + $ Cons 0 $ Cons 0 Nil + + Cons (id 0) $ Cons ((\y -> id y) 0) + $ Cons ((\y' -> (\y -> id y) y') 0) + $ fmap (\y'' -> (\y' -> (\y -> id y) y') y'') + $ Cons 0 Nil --- See Note [Deriving <$] -data Replacer = Immediate {replace :: LHsExpr GhcPs} - | Nested {replace :: LHsExpr GhcPs} + Cons (id 0) $ Cons ((\y -> id y) 0) + $ Cons ((\y' -> (\y -> id y) y') 0) + $ Cons ((\y'' -> (\y' -> (\y -> id y) y') y'') 0) + $ fmap (\y''' -> (\y'' -> (\y' -> (\y -> id y) y') y'') y''') + $ Nil -{- Note [Deriving <$] - ~~~~~~~~~~~~~~~~~~ + Cons (id 0) $ Cons ((\y -> id y) 0) + $ Cons ((\y' -> (\y -> id y) y') 0) + $ Cons ((\y'' -> (\y' -> (\y -> id y) y') y'') 0) + $ Nil + +Notice how the number of lambdas—and hence, the number of closures—one +needs to evaluate grows very quickly. In general, a List with N cons cells will +require (1 + 2 + ... (N-1)) beta reductions, which takes O(N^2) time! This is +what caused the performance issues observed in #7436. + +But hold on a second: shouldn't GHC's optimizer be able to eta reduce +`\y -> f y` to `f` and avoid these beta reductions? Unfortunately, this is not +the case. In general, eta reduction can change the semantics of a program. For +instance, (\x -> ⊥) `seq` () converges, but ⊥ `seq` () diverges. It just so +happens that the fmap implementation above would have the same semantics +regardless of whether or not `\y -> f y` or `f` is used, but GHC's optimizer is +not yet smart enough to realize this (see #17881). + +To avoid this quadratic blowup, we add a special case to $fmap that applies +`fmap f` directly: + + $(fmap 'a '(T b1 a) x) = fmap f x -- when a only occurs directly as the last argument of T + $(fmap 'a '(T b1 b2) x) = fmap (\y. $(fmap 'a 'b2 y)) x -- when a only occurs in the last parameter, b2 + +With this modified algorithm, the derived Functor List instance becomes: + + instance Functor List where + fmap f Nil = Nil + fmap f (Cons x xs) = Cons (f x) (fmap f xs) + +No lambdas in sight, just the way we like it. + +This special case does not prevent all sources quadratic closure buildup, +however. In this example: + + data PolyList a = PLNil | PLCons a (PolyList (PolyList a)) + deriving Functor + +We would derive the following code: + + instance Functor PolyList where + fmap f PLNil = PLNil + fmap f (PLCons x xs) = PLCons (f x) (fmap (\y -> fmap f y) xs) + +The use of `fmap (\y -> fmap f y) xs` builds up closures in much the same way +as `fmap (\y -> f y) xs`. The difference here is that even if we eta reduced +to `fmap (fmap f) xs`, GHC would /still/ build up a closure, since we are +recursively invoking fmap with a different argument (fmap f). Since we end up +paying the price of building a closure either way, we do not extend the special +case in $fmap any further, since it wouldn't buy us anything. + +The ft_ty_app field of FFoldType distinguishes between these two $fmap cases by +inspecting the argument type. If the argument type is a bare type variable, +then we can conclude the type variable /must/ be the same as the data type's +last type parameter. We know that this must be the case since there is an +invariant that the argument type in ft_ty_app will always contain the last +type parameter somewhere (see Note [FFoldType and functorLikeTraverse]), so +if the argument type is a bare variable, then that must be exactly the last +type parameter. + +Note that the ft_ty_app case of ft_replace (which derives implementations of +(<$)) also inspects the argument type to generate more efficient code. +See Note [Deriving <$]. + +Note [Deriving <$] +~~~~~~~~~~~~~~~~~~ We derive the definition of <$. Allowing this to take the default definition can lead to memory leaks: mapping over a structure with a constant function can @@ -298,9 +425,15 @@ Then we want to define x <$ Nes q = Nes (fmap (x <$) q) -We use the Replacer type to tag whether the expression derived for applying -<$ to the last type variable was the ft_var case (immediate) or one of the -others (letting ft_forall pass through as usual). +We inspect the argument type in ft_ty_app +(see Note [FFoldType and functorLikeTraverse]) to distinguish between these +two cases. If the argument type is a bare type variable, then we know that it +must be the same variable as the data type's last type parameter. +This is very similar to a trick that derived fmap implementations +use in their own ft_ty_app case. +See Note [Avoid unnecessary eta expansion in derived fmap implementations], +which explains why checking if the argument type is a bare variable is +the right thing to do. We could, but do not, give tuples special treatment to improve efficiency in some cases. Suppose we have @@ -342,9 +475,12 @@ data FFoldType a -- Describes how to fold over a Type in a functor like way , ft_fun :: a -> a -> a -- ^ Function type , ft_tup :: TyCon -> [a] -> a - -- ^ Tuple type - , ft_ty_app :: Type -> a -> a - -- ^ Type app, variable only in last argument + -- ^ Tuple type. The @[a]@ is the result of folding over the + -- arguments of the tuple. + , ft_ty_app :: Type -> Type -> a -> a + -- ^ Type app, variable only in last argument. The two 'Type's are + -- the function and argument parts of @fun_ty arg_ty@, + -- respectively. , ft_bad_app :: a -- ^ Type app, variable other than in last argument , ft_forall :: TcTyVar -> a -> a @@ -375,7 +511,7 @@ functorLikeTraverse var (FT { ft_triv = caseTrivial, ft_var = caseVar where (xr,xc) = go (not co) x (yr,yc) = go co y go co (AppTy x y) | xc = (caseWrongArg, True) - | yc = (caseTyApp x yr, True) + | yc = (caseTyApp x y yr, True) where (_, xc) = go co x (yr,yc) = go co y go co ty@(TyConApp con args) @@ -384,8 +520,8 @@ functorLikeTraverse var (FT { ft_triv = caseTrivial, ft_var = caseVar -- and at least one xr is True | isTupleTyCon con = (caseTuple con xrs, True) | or (init xcs) = (caseWrongArg, True) -- T (..var..) ty - | Just (fun_ty, _) <- splitAppTy_maybe ty -- T (..no var..) ty - = (caseTyApp fun_ty (last xrs), True) + | Just (fun_ty, arg_ty) <- splitAppTy_maybe ty -- T (..no var..) ty + = (caseTyApp fun_ty arg_ty (last xrs), True) | otherwise = (caseWrongArg, True) -- Non-decomposable (eg type function) where -- When folding over an unboxed tuple, we must explicitly drop the @@ -409,7 +545,7 @@ deepSubtypesContaining tv , ft_var = [] , ft_fun = (++) , ft_tup = \_ xs -> concat xs - , ft_ty_app = (:) + , ft_ty_app = \t _ ts -> t:ts , ft_bad_app = panic "in other argument in deepSubtypesContaining" , ft_co_var = panic "contravariant in deepSubtypesContaining" , ft_forall = \v xs -> filterOut ((v `elemVarSet`) . tyCoVarsOfType) xs }) @@ -464,10 +600,10 @@ mkSimpleLam2 lam = -- and its arguments, applying an expression (from @insides@) to each of the -- respective arguments of @con@. mkSimpleConMatch :: Monad m => HsMatchContext GhcPs - -> (RdrName -> [LHsExpr GhcPs] -> m (LHsExpr GhcPs)) + -> (RdrName -> [a] -> m (LHsExpr GhcPs)) -> [LPat GhcPs] -> DataCon - -> [LHsExpr GhcPs] + -> [LHsExpr GhcPs -> a] -> m (LMatch GhcPs (LHsExpr GhcPs)) mkSimpleConMatch ctxt fold extra_pats con insides = do let con_name = getRdrName con @@ -477,7 +613,7 @@ mkSimpleConMatch ctxt fold extra_pats con insides = do then bare_pat else nlParPat bare_pat rhs <- fold con_name - (zipWith (\i v -> i `nlHsApp` nlHsVar v) insides vars_needed) + (zipWith (\i v -> i $ nlHsVar v) insides vars_needed) return $ mkMatch ctxt (extra_pats ++ [pat]) rhs (noLoc emptyLocalBinds) @@ -730,7 +866,7 @@ gen_Foldable_binds loc tycon mkSimpleTupleCase (match_foldr z) t gg x return (Just lam) -- foldr f = (\x z -> case x of ...) - , ft_ty_app = \_ g -> do + , ft_ty_app = \_ _ g -> do gg <- g mapM (\gg' -> mkSimpleLam2 $ \x z -> return $ nlHsApps foldable_foldr_RDR [gg',z,x]) gg @@ -740,11 +876,12 @@ gen_Foldable_binds loc tycon , ft_fun = panic "function in ft_foldr" , ft_bad_app = panic "in other argument in ft_foldr" } - match_foldr :: LHsExpr GhcPs + match_foldr :: Monad m + => LHsExpr GhcPs -> [LPat GhcPs] -> DataCon -> [Maybe (LHsExpr GhcPs)] - -> State [RdrName] (LMatch GhcPs (LHsExpr GhcPs)) + -> m (LMatch GhcPs (LHsExpr GhcPs)) match_foldr z = mkSimpleConMatch2 LambdaExpr $ \_ xs -> return (mkFoldr xs) where -- g1 v1 (g2 v2 (.. z)) @@ -763,17 +900,18 @@ gen_Foldable_binds loc tycon lam <- mkSimpleLam $ mkSimpleTupleCase match_foldMap t gg return (Just lam) -- foldMap f = \x -> case x of (..,) - , ft_ty_app = \_ g -> fmap (nlHsApp foldMap_Expr) <$> g + , ft_ty_app = \_ _ g -> fmap (nlHsApp foldMap_Expr) <$> g -- foldMap f = foldMap g , ft_forall = \_ g -> g , ft_co_var = panic "contravariant in ft_foldMap" , ft_fun = panic "function in ft_foldMap" , ft_bad_app = panic "in other argument in ft_foldMap" } - match_foldMap :: [LPat GhcPs] + match_foldMap :: Monad m + => [LPat GhcPs] -> DataCon -> [Maybe (LHsExpr GhcPs)] - -> State [RdrName] (LMatch GhcPs (LHsExpr GhcPs)) + -> m (LMatch GhcPs (LHsExpr GhcPs)) match_foldMap = mkSimpleConMatch2 CaseAlt $ \_ xs -> return (mkFoldMap xs) where -- mappend v1 (mappend v2 ..) @@ -799,7 +937,7 @@ gen_Foldable_binds loc tycon Just ggg -> NullM <$> (mkSimpleLam $ mkSimpleTupleCase match_null t ggg) -- null = \x -> case x of (..,) - , ft_ty_app = \_ g -> flip fmap g $ \nestedResult -> + , ft_ty_app = \_ _ g -> flip fmap g $ \nestedResult -> case nestedResult of -- If e definitely contains the parameter, -- then we can test if (G e) contains it by @@ -818,10 +956,11 @@ gen_Foldable_binds loc tycon , ft_fun = panic "function in ft_null" , ft_bad_app = panic "in other argument in ft_null" } - match_null :: [LPat GhcPs] - -> DataCon - -> [Maybe (LHsExpr GhcPs)] - -> State [RdrName] (LMatch GhcPs (LHsExpr GhcPs)) + match_null :: Monad m + => [LPat GhcPs] + -> DataCon + -> [Maybe (LHsExpr GhcPs)] + -> m (LMatch GhcPs (LHsExpr GhcPs)) match_null = mkSimpleConMatch2 CaseAlt $ \_ xs -> return (mkNull xs) where -- v1 && v2 && .. @@ -920,7 +1059,7 @@ gen_Traversable_binds loc tycon return (Just lam) -- traverse f = \x -> case x of (a1,a2,..) -> -- liftA2 (,,) (g1 a1) (g2 a2) <*> .. - , ft_ty_app = \_ g -> fmap (nlHsApp traverse_Expr) <$> g + , ft_ty_app = \_ _ g -> fmap (nlHsApp traverse_Expr) <$> g -- traverse f = traverse g , ft_forall = \_ g -> g , ft_co_var = panic "contravariant in ft_trav" @@ -929,10 +1068,11 @@ gen_Traversable_binds loc tycon -- Con a1 a2 ... -> liftA2 (\b1 b2 ... -> Con b1 b2 ...) (g1 a1) -- (g2 a2) <*> ... - match_for_con :: [LPat GhcPs] + match_for_con :: Monad m + => [LPat GhcPs] -> DataCon -> [Maybe (LHsExpr GhcPs)] - -> State [RdrName] (LMatch GhcPs (LHsExpr GhcPs)) + -> m (LMatch GhcPs (LHsExpr GhcPs)) match_for_con = mkSimpleConMatch2 CaseAlt $ \con xs -> return (mkApCon con xs) where @@ -946,13 +1086,11 @@ gen_Traversable_binds loc tycon ----------------------------------------------------------------------- -f_Expr, z_Expr, fmap_Expr, replace_Expr, mempty_Expr, foldMap_Expr, +f_Expr, z_Expr, mempty_Expr, foldMap_Expr, traverse_Expr, coerce_Expr, pure_Expr, true_Expr, false_Expr, all_Expr, null_Expr :: LHsExpr GhcPs f_Expr = nlHsVar f_RDR z_Expr = nlHsVar z_RDR -fmap_Expr = nlHsVar fmap_RDR -replace_Expr = nlHsVar replace_RDR mempty_Expr = nlHsVar mempty_RDR foldMap_Expr = nlHsVar foldMap_RDR traverse_Expr = nlHsVar traverse_RDR @@ -1093,11 +1231,20 @@ a is the last type variable in a given datatype): * ft_ty_app: A type is being applied to the last type parameter, where the applied type does not mention the last type parameter (if it - did, it would fall under ft_bad_app). The Type argument to - ft_ty_app represents the applied type. + did, it would fall under ft_bad_app) and the argument type + mentions the last type parameter (if it did not, it would fall + under ft_triv). The first two Type arguments to + ft_ty_app represent the applied type and argument type, + respectively. + + Currently, only DeriveFunctor makes use of the argument type. + It inspects the argument type so that it can generate more + efficient implementations of fmap + (see Note [Avoid unnecessary eta expansion in derived fmap implementations]) + and (<$) (see Note [Deriving <$]) in certain cases. Note that functions, tuples, and foralls are distinct cases - and take precedence of ft_ty_app. (For example, (Int -> a) would + and take precedence over ft_ty_app. (For example, (Int -> a) would fall under (ft_fun Int a), not (ft_ty_app ((->) Int) a). Examples: Maybe a, Either b a diff --git a/compiler/typecheck/TcGenGenerics.hs b/compiler/typecheck/TcGenGenerics.hs index 724da9f2e0..146b91dab4 100644 --- a/compiler/typecheck/TcGenGenerics.hs +++ b/compiler/typecheck/TcGenGenerics.hs @@ -279,7 +279,7 @@ canDoGenerics1 rep_tc = -- (ty arg), where head of ty is neither (->) nor a tuple constructor and -- the parameter of interest does not occur in ty - , ft_ty_app = \_ arg -> arg + , ft_ty_app = \_ _ arg -> arg , ft_bad_app = bmbad con wrong_arg , ft_forall = \_ body -> body -- polytypes are handled elsewhere diff --git a/testsuite/tests/deriving/should_compile/T17880.hs b/testsuite/tests/deriving/should_compile/T17880.hs new file mode 100644 index 0000000000..59662f487c --- /dev/null +++ b/testsuite/tests/deriving/should_compile/T17880.hs @@ -0,0 +1,9 @@ +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE RankNTypes #-} +module T17880 where + +data T1 a = MkT1 (forall b. b -> (forall c. a -> c) -> a) + deriving Functor + +data T2 a = MkT2 (Int -> forall c. c -> a) + deriving Functor diff --git a/testsuite/tests/deriving/should_compile/all.T b/testsuite/tests/deriving/should_compile/all.T index e29ae0e0b5..a00617b057 100644 --- a/testsuite/tests/deriving/should_compile/all.T +++ b/testsuite/tests/deriving/should_compile/all.T @@ -122,3 +122,4 @@ test('T16518', normal, compile, ['']) test('T17324', normal, compile, ['']) test('T17339', normal, compile, ['-ddump-simpl -dsuppress-idinfo -dno-typeable-binds']) +test('T17880', normal, compile, ['']) diff --git a/testsuite/tests/generics/GenDerivOutput.stderr b/testsuite/tests/generics/GenDerivOutput.stderr index 463609b19e..6b4ded8242 100644 --- a/testsuite/tests/generics/GenDerivOutput.stderr +++ b/testsuite/tests/generics/GenDerivOutput.stderr @@ -93,7 +93,7 @@ Derived class instances: = GenDerivOutput.Cons (f a1) (GHC.Base.fmap f a2) (GHC.Base.<$) z GenDerivOutput.Nil = GenDerivOutput.Nil (GHC.Base.<$) z (GenDerivOutput.Cons a1 a2) - = GenDerivOutput.Cons ((\ b1 -> z) a1) ((GHC.Base.<$) z a2) + = GenDerivOutput.Cons z ((GHC.Base.<$) z a2) Derived type family instances: diff --git a/testsuite/tests/perf/should_run/T7436b.hs b/testsuite/tests/perf/should_run/T7436b.hs new file mode 100644 index 0000000000..e5f09aba9b --- /dev/null +++ b/testsuite/tests/perf/should_run/T7436b.hs @@ -0,0 +1,20 @@ +{-# LANGUAGE DeriveFunctor #-} +-- A variation on T7436 that tests a derived Functor instance. +module Main where + +data List a = Nil | Cons a (List a) + deriving Functor + +mkList :: Int -> List Int +mkList 0 = Nil +mkList n = Cons n (mkList (n-1)) + +sumList :: List Int -> Int +sumList = go 0 + where + go a Nil = a + go a (Cons n ns) = a `seq` go (a+n) ns + +main :: IO () +main = print $ sumList . fmap id $ mkList n + where n = 40000 diff --git a/testsuite/tests/perf/should_run/T7436b.stdout b/testsuite/tests/perf/should_run/T7436b.stdout new file mode 100644 index 0000000000..3d6a314caa --- /dev/null +++ b/testsuite/tests/perf/should_run/T7436b.stdout @@ -0,0 +1 @@ +800020000 diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T index 89f1fc8ecd..d0dec5b2a6 100644 --- a/testsuite/tests/perf/should_run/all.T +++ b/testsuite/tests/perf/should_run/all.T @@ -198,6 +198,13 @@ test('T7436', compile_and_run, ['-O']) +test('T7436b', + [collect_stats('max_bytes_used',4), + only_ways(['normal']) + ], + compile_and_run, + ['-O']) + test('T7797', [collect_stats('bytes allocated',5), extra_clean(['T7797a.hi', 'T7797a.o']), -- cgit v1.2.1