diff options
author | Richard Eisenberg <eir@cis.upenn.edu> | 2014-12-16 16:35:43 -0500 |
---|---|---|
committer | Richard Eisenberg <eir@cis.upenn.edu> | 2014-12-17 10:47:17 -0500 |
commit | 922168fda3b3a3b96033a9c5d38f3fe70a99fd63 (patch) | |
tree | caa9d5a6e1995b42ca933623166a357f0e8dcc81 | |
parent | ea22a8f721f440458554c7500686baef57da4d4d (diff) | |
download | haskell-922168fda3b3a3b96033a9c5d38f3fe70a99fd63.tar.gz |
Performance enhancements in TcFlatten.
This commit fixes some performance regressions introduced by 0cc47eb,
adding more `Coercible` magic to the solver. See Note
[flatten_many performance] in TcFlatten for more info.
The improvements do not quite restore the old numbers. Given that
the solver is really more involved now, I am accepting this regression.
The way forward (I believe) would be to have *two* flatteners: one
that deals only with nominal equalities and thus never checks roles,
and the more general one. A nice design of keeping this performant
without duplicating code eludes me, but someone else is welcome
to take a stab.
-rw-r--r-- | compiler/typecheck/TcFlatten.hs | 83 | ||||
-rw-r--r-- | compiler/utils/MonadUtils.hs | 3 | ||||
-rw-r--r-- | testsuite/tests/perf/compiler/all.T | 16 |
3 files changed, 87 insertions, 15 deletions
diff --git a/compiler/typecheck/TcFlatten.hs b/compiler/typecheck/TcFlatten.hs index 34c2c4a04a..818965d647 100644 --- a/compiler/typecheck/TcFlatten.hs +++ b/compiler/typecheck/TcFlatten.hs @@ -28,10 +28,11 @@ import TcSMonad as TcS import DynFlags( DynFlags ) import Util -import MonadUtils ( zipWithAndUnzipM ) import Bag import FastString import Control.Monad( when, liftM ) +import MonadUtils ( zipWithAndUnzipM ) +import GHC.Exts ( inline ) {- Note [The flattening story] @@ -643,6 +644,37 @@ soon throw out the phantoms when decomposing a TyConApp. (Or, the canonicaliser will emit an insoluble, in which case the unflattened version yields a better error message anyway.) +Note [flatten_many performance] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In programs with lots of type-level evaluation, flatten_many becomes +part of a tight loop. For example, see test perf/compiler/T9872a, which +calls flatten_many a whopping 7,106,808 times. It is thus important +that flatten_many be efficient. + +Performance testing showed that the current implementation is indeed +efficient. It's critically important that zipWithAndUnzipM be +specialized to TcS, and it's also quite helpful to actually `inline` +it. On test T9872a, here are the allocation stats (Dec 16, 2014): + + * Unspecialized, uninlined: 8,472,613,440 bytes allocated in the heap + * Specialized, uninlined: 6,639,253,488 bytes allocated in the heap + * Specialized, inlined: 6,281,539,792 bytes allocated in the heap + +To improve performance even further, flatten_many_nom is split off +from flatten_many, as nominal equality is the common case. This would +be natural to write using mapAndUnzipM, but even inlined, that function +is not as performant as a hand-written loop. + + * mapAndUnzipM, inlined: 7,463,047,432 bytes allocated in the heap + * hand-written recursion: 5,848,602,848 bytes allocated in the heap + +If you make any change here, pay close attention to the T9872{a,b,c} tests +and T5321Fun. + +If we need to make this yet more performant, a possible way forward is to +duplicate the flattener code for the nominal case, and make that case +faster. This doesn't seem quite worth it, yet. + -} ------------------ @@ -676,13 +708,24 @@ flatten_many :: FlattenEnv -> [Role] -> [Type] -> TcS ([Xi], [TcCoercion]) -- we merely want (a) Given/Solved/Derived/Wanted info -- (b) the GivenLoc/WantedLoc for when we create new evidence flatten_many fmode roles tys - = zipWithAndUnzipM go roles tys +-- See Note [flatten_many performance] + = inline zipWithAndUnzipM go roles tys where - go Nominal ty = flatten_one (fmode { fe_eq_rel = NomEq }) ty - go Representational ty = flatten_one (fmode { fe_eq_rel = ReprEq }) ty + go Nominal ty = flatten_one (setFEEqRel fmode NomEq) ty + go Representational ty = flatten_one (setFEEqRel fmode ReprEq) ty go Phantom ty = -- See Note [Phantoms in the flattener] return (ty, mkTcPhantomCo ty ty) +-- | Like 'flatten_many', but assumes that every role is nominal. +flatten_many_nom :: FlattenEnv -> [Type] -> TcS ([Xi], [TcCoercion]) +flatten_many_nom _ [] = return ([], []) +-- See Note [flatten_many performance] +flatten_many_nom fmode (ty:tys) + = ASSERT( fe_eq_rel fmode == NomEq ) + do { (xi, co) <- flatten_one fmode ty + ; (xis, cos) <- flatten_many_nom fmode tys + ; return (xi:xis, co:cos) } + ------------------ flatten_one :: FlattenEnv -> TcType -> TcS (Xi, TcCoercion) -- Flatten a type to get rid of type function applications, returning @@ -707,7 +750,7 @@ flatten_one fmode (AppTy ty1 ty2) return (mkAppTy xi1 ty2, co1 `mkTcAppCo` mkTcNomReflCo ty2) } where flatten_rhs xi1 co1 eq_rel2 - = do { (xi2,co2) <- flatten_one (fmode { fe_eq_rel = eq_rel2 }) ty2 + = do { (xi2,co2) <- flatten_one (setFEEqRel fmode eq_rel2) ty2 ; traceTcS "flatten/appty" (ppr ty1 $$ ppr ty2 $$ ppr xi1 $$ ppr co1 $$ ppr xi2 $$ ppr co2) @@ -757,14 +800,16 @@ flatten_one fmode ty@(ForAllTy {}) -- We allow for-alls when, but only when, no type function -- applications inside the forall involve the bound type variables. = do { let (tvs, rho) = splitForAllTys ty - ; (rho', co) <- flatten_one (fmode { fe_mode = FM_SubstOnly }) rho + ; (rho', co) <- flatten_one (setFEMode fmode FM_SubstOnly) rho -- Substitute only under a forall -- See Note [Flattening under a forall] ; return (mkForAllTys tvs rho', foldr mkTcForAllCo co tvs) } flattenTyConApp :: FlattenEnv -> TyCon -> [TcType] -> TcS (Xi, TcCoercion) flattenTyConApp fmode tc tys - = do { (xis, cos) <- flatten_many fmode (tyConRolesX role tc) tys + = do { (xis, cos) <- case fe_eq_rel fmode of + NomEq -> flatten_many_nom fmode tys + ReprEq -> flatten_many fmode (tyConRolesX role tc) tys ; return (mkTyConApp tc xis, mkTcTyConAppCo role tc cos) } where role = feRole fmode @@ -855,8 +900,7 @@ flatten_exact_fam_app fmode tc tys roles = tyConRolesX (feRole fmode) tc flatten_exact_fam_app_fully fmode tc tys - = do { let roles = tyConRolesX (feRole fmode) tc - ; (xis, cos) <- flatten_many (fmode { fe_mode = FM_FlattenAll }) roles tys + = do { (xis, cos) <- flatten_many_nom (setFEEqRel (setFEMode fmode FM_FlattenAll) NomEq) tys ; let ret_co = mkTcTyConAppCo (feRole fmode) tc cos -- ret_co :: F xis ~ F tys @@ -1222,7 +1266,7 @@ flattenTyVarFinal :: FlattenEnv -> TcTyVar -> TcS TyVar flattenTyVarFinal fmode tv = -- Done, but make sure the kind is zonked do { let kind = tyVarKind tv - kind_fmode = fmode { fe_mode = FM_SubstOnly } + kind_fmode = setFEMode fmode FM_SubstOnly ; (new_knd, _kind_co) <- flatten_one kind_fmode kind ; return (setVarType tv new_knd) } @@ -1506,3 +1550,22 @@ unsolved constraints. The flat form will be Flatten using the fun-eqs first. -} + +-- | Change the 'EqRel' in a 'FlattenEnv'. Avoids allocating a +-- new 'FlattenEnv' where possible. +setFEEqRel :: FlattenEnv -> EqRel -> FlattenEnv +setFEEqRel fmode@(FE { fe_eq_rel = old_eq_rel }) new_eq_rel + | old_eq_rel == new_eq_rel = fmode + | otherwise = fmode { fe_eq_rel = new_eq_rel } + +-- | Change the 'FlattenMode' in a 'FlattenEnv'. Avoids allocating +-- a new 'FlattenEnv' where possible. +setFEMode :: FlattenEnv -> FlattenMode -> FlattenEnv +setFEMode fmode@(FE { fe_mode = old_mode }) new_mode + | old_mode `eq` new_mode = fmode + | otherwise = fmode { fe_mode = new_mode } + where + FM_FlattenAll `eq` FM_FlattenAll = True + FM_SubstOnly `eq` FM_SubstOnly = True + FM_Avoid tv1 b1 `eq` FM_Avoid tv2 b2 = tv1 == tv2 && b1 == b2 + _ `eq` _ = False diff --git a/compiler/utils/MonadUtils.hs b/compiler/utils/MonadUtils.hs index edc863ab0c..0850ff43c4 100644 --- a/compiler/utils/MonadUtils.hs +++ b/compiler/utils/MonadUtils.hs @@ -77,6 +77,9 @@ zipWith3M_ f as bs cs = do { _ <- zipWith3M f as bs cs zipWithAndUnzipM :: Monad m => (a -> b -> m (c, d)) -> [a] -> [b] -> m ([c], [d]) +{-# INLINE zipWithAndUnzipM #-} +-- See Note [flatten_many performance] in TcFlatten for why this +-- pragma is essential. zipWithAndUnzipM f (x:xs) (y:ys) = do { (c, d) <- f x y ; (cs, ds) <- zipWithAndUnzipM f xs ys diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T index b98a9bc9d5..14826dff09 100644 --- a/testsuite/tests/perf/compiler/all.T +++ b/testsuite/tests/perf/compiler/all.T @@ -406,7 +406,7 @@ test('T5321Fun', # (increase due to new codegen) # 2014-09-03: 299656164 (specialisation and inlining) # 10/12/2014: 206406188 # Improvements in constraint solver - (wordsize(64), 408110888, 10)]) + (wordsize(64), 429921312, 10)]) # prev: 585521080 # 29/08/2012: 713385808 # (increase due to new codegen) # 15/05/2013: 628341952 # (reason for decrease unknown) @@ -415,6 +415,7 @@ test('T5321Fun', # 10/09/2014: 601629032 # post-AMP-cleanup # 06/11/2014: 541287000 # Simon's flat-skol changes to the constraint solver # 10/12/2014: 408110888 # Improvements in constraint solver + # 16/12/2014: 429921312 # Flattener parameterized over roles ], compile,['']) @@ -477,7 +478,7 @@ test('T5837', # 2014-12-01: 135914136 (Windows laptop, regression see below) # 2014-12-08 115905208 Constraint solver perf improvements (esp kick-out) - (wordsize(64), 234790312, 10)]) + (wordsize(64), 231155640, 10)]) # sample: 3926235424 (amd64/Linux, 15/2/2012) # 2012-10-02 81879216 # 2012-09-20 87254264 amd64/Linux @@ -489,6 +490,8 @@ test('T5837', # 2014-11-06 271028976 Linux, Accept big regression; # See Note [An alternative story for the inert substitution] in TcFlatten # 2014-12-08 234790312 Constraint solver perf improvements (esp kick-out) + # 2014-12-16 231155640 Mac Flattener parameterized over roles; + # some optimization ], compile_fail,['-ftype-function-depth=50']) @@ -556,8 +559,9 @@ test('T9675', test('T9872a', [ only_ways(['normal']), compiler_stats_num_field('bytes allocated', - [(wordsize(64), 5521332656, 5) + [(wordsize(64), 5848657456, 5) # 2014-12-10 5521332656 Initally created + # 2014-12-16 5848657456 Flattener parameterized over roles ]), ], compile_fail, @@ -566,8 +570,9 @@ test('T9872a', test('T9872b', [ only_ways(['normal']), compiler_stats_num_field('bytes allocated', - [(wordsize(64), 6483306280, 5) + [(wordsize(64), 6892251912, 5) # 2014-12-10 6483306280 Initally created + # 2014-12-16 6892251912 Flattener parameterized over roles ]), ], compile_fail, @@ -575,8 +580,9 @@ test('T9872b', test('T9872c', [ only_ways(['normal']), compiler_stats_num_field('bytes allocated', - [(wordsize(64), 5495850096, 5) + [(wordsize(64), 5842024784, 5) # 2014-12-10 5495850096 Initally created + # 2014-12-16 5842024784 Flattener parameterized over roles ]), ], compile_fail, |