summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBartosz Nitka <niteria@gmail.com>2017-05-19 08:08:01 -0700
committerBartosz Nitka <niteria@gmail.com>2017-05-31 05:26:28 -0700
commit69d9081d9fa3ff36fda36e4e11ef7e8f946ecf2a (patch)
tree42a8ca8c9517c07e46c15d932bb95d5acad0f1ed
parent8bfab438bdaa29b82c5ad57814bd60dcd02aa1c6 (diff)
downloadhaskell-69d9081d9fa3ff36fda36e4e11ef7e8f946ecf2a.tar.gz
Faster checkFamInstConsistency
This implements the idea from https://ghc.haskell.org/trac/ghc/ticket/13092#comment:14. It's explained in Note [Checking family instance optimization] in more detail. This improves the test case T13719 tenfold and cuts down the compile time on `:load` in `ghci` on our internal code base by half. Test Plan: ./validate Reviewers: simonpj, simonmar, rwbarton, austin, bgamari Reviewed By: simonpj Subscribers: thomie GHC Trac Issues: #13719 Differential Revision: https://phabricator.haskell.org/D3603
-rw-r--r--compiler/typecheck/FamInst.hs212
-rw-r--r--compiler/typecheck/TcRnDriver.hs2
-rw-r--r--testsuite/tests/perf/compiler/all.T6
-rw-r--r--testsuite/tests/perf/haddock/all.T6
4 files changed, 135 insertions, 91 deletions
diff --git a/compiler/typecheck/FamInst.hs b/compiler/typecheck/FamInst.hs
index dd3d173a8e..f69e41209f 100644
--- a/compiler/typecheck/FamInst.hs
+++ b/compiler/typecheck/FamInst.hs
@@ -41,10 +41,7 @@ import Panic
import VarSet
import Bag( Bag, unionBags, unitBag )
import Control.Monad
-import Unique
import NameEnv
-import Data.Set (Set)
-import qualified Data.Set as Set
import Data.List
#include "HsVersions.h"
@@ -220,81 +217,71 @@ certain that the modules in our `HscTypes.dep_finsts' are consistent.)
There is some fancy footwork regarding hs-boot module loops, see
Note [Don't check hs-boot type family instances too early]
--}
--- The optimisation of overlap tests is based on determining pairs of modules
--- whose family instances need to be checked for consistency.
---
-data ModulePair = ModulePair Module Module
- -- Invariant: first Module < second Module
- -- use the smart constructor
-
--- | Smart constructor that establishes the invariant
-modulePair :: Module -> Module -> ModulePair
-modulePair a b
- | a < b = ModulePair a b
- | otherwise = ModulePair b a
-
-instance Eq ModulePair where
- (ModulePair a1 b1) == (ModulePair a2 b2) = a1 == a2 && b1 == b2
-
-instance Ord ModulePair where
- (ModulePair a1 b1) `compare` (ModulePair a2 b2) =
- nonDetCmpModule a1 a2 `thenCmp`
- nonDetCmpModule b1 b2
- -- See Note [ModulePairSet determinism and performance]
-
-instance Outputable ModulePair where
- ppr (ModulePair m1 m2) = angleBrackets (ppr m1 <> comma <+> ppr m2)
-
--- Fast, nondeterministic comparison on Module. Don't use when the ordering
--- can change the ABI. See Note [ModulePairSet determinism and performance]
-nonDetCmpModule :: Module -> Module -> Ordering
-nonDetCmpModule a b =
- nonDetCmpUnique (getUnique $ moduleUnitId a) (getUnique $ moduleUnitId b)
- `thenCmp`
- nonDetCmpUnique (getUnique $ moduleName a) (getUnique $ moduleName b)
-
-type ModulePairSet = Set ModulePair
-{-
-Note [ModulePairSet determinism and performance]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The size of ModulePairSet is quadratic in the number of modules.
-The Ord instance for Module uses string comparison which is linear in the
-length of ModuleNames and UnitIds. This adds up to a significant cost, see
-#12191.
-
-To get reasonable performance ModulePairSet uses nondeterministic ordering
-on Module based on Uniques. It doesn't affect the ABI, because it only
-determines the order the modules are checked for family instance consistency.
-See Note [Unique Determinism] in Unique
--}
+Note [Checking family instance optimization]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+As explained in Note [Checking family instance consistency]
+we need to ensure that every pair of transitive imports that define type family
+instances is consistent.
-listToSet :: [ModulePair] -> ModulePairSet
-listToSet l = Set.fromList l
+Let's define df(A) = transitive imports of A that define type family instances
++ A, if A defines type family instances
+
+Then for every direct import A, df(A) is already consistent.
+
+Let's name the current module M.
+
+We want to make sure that df(M) is consistent.
+df(M) = df(D_1) U df(D_2) U ... U df(D_i) where D_1 .. D_i are direct imports.
+
+We perform the check iteratively, maintaining a set of consistent modules 'C'
+and trying to add df(D_i) to it.
+
+The key part is how to ensure that the union C U df(D_i) is consistent.
+
+Let's consider two modules: A and B from C U df(D_i).
+There are nine possible ways to choose A and B from C U df(D_i):
+
+ | A in C only | A in C and B in df(D_i) | A in df(D_i) only
+--------------------------------------------------------------------------------
+B in C only | Already checked | Already checked | Needs to be checked
+ | when checking C | when checking C |
+--------------------------------------------------------------------------------
+B in C and | Already checked | Already checked | Already checked when
+B in df(D_i) | when checking C | when checking C | checking df(D_i)
+--------------------------------------------------------------------------------
+B in df(D_i) | Needs to be | Already checked | Already checked when
+only | checked | when checking df(D_i) | checking df(D_i)
+
+That means to ensure that C U df(D_i) is consistent we need to check every
+module from C - df(D_i) against every module from df(D_i) - C and
+every module from df(D_i) - C against every module from C - df(D_i).
+But since the checks are symmetric it suffices to pick A from C - df(D_i)
+and B from df(D_i) - C.
+
+In other words these are the modules we need to check:
+ [ (m1, m2) | m1 <- C, m1 not in df(D_i)
+ , m2 <- df(D_i), m2 not in C ]
+
+One final thing to note here is that if there's lot of overlap between
+subsequent df(D_i)'s then we expect those set differences to be small.
+That situation should be pretty common in practice, there's usually
+a set of utility modules that every module imports directly or indirectly.
+
+This is basically the idea from #13092, comment:14.
+-}
--- | Check family instance consistency, given:
---
--- 1. The list of all modules transitively imported by us
--- which define a family instance (these are the ones
--- we have to check for consistency), and
---
--- 2. The list of modules which we directly imported
--- (these specify the sets of family instance defining
--- modules which are already known to be consistent).
---
--- See Note [Checking family instance consistency] for more
--- details, and Note [The type family instance consistency story]
--- for the big picture.
---
-- This function doesn't check ALL instances for consistency,
-- only ones that aren't involved in recursive knot-tying
-- loops; see Note [Don't check hs-boot type family instances too early].
-- It returns a modified 'TcGblEnv' that has saved the
-- instances that need to be checked later; use 'checkRecFamInstConsistency'
-- to check those.
-checkFamInstConsistency :: [Module] -> [Module] -> TcM TcGblEnv
-checkFamInstConsistency famInstMods directlyImpMods
+-- We don't need to check the current module, this is done in
+-- tcExtendLocalFamInstEnv.
+-- See Note [The type family instance consistency story].
+checkFamInstConsistency :: [Module] -> TcM TcGblEnv
+checkFamInstConsistency directlyImpMods
= do { dflags <- getDynFlags
; (eps, hpt) <- getEpsAndHpt
; let { -- Fetch the iface of a given module. Must succeed as
@@ -305,36 +292,89 @@ checkFamInstConsistency famInstMods directlyImpMods
(ppr mod $$ pprHPT hpt)
Just iface -> iface
- -- Which modules were checked for consistency when we compiled
- -- `mod`? Itself and its dep_finsts.
- ; modConsistent mod = mod : (dep_finsts . mi_deps . modIface $ mod)
+ -- Which family instance modules were checked for consistency
+ -- when we compiled `mod`?
+ -- Itself (if a family instance module) and its dep_finsts.
+ -- This is df(D_i) from
+ -- Note [Checking family instance optimization]
+ ; modConsistent :: Module -> [Module]
+ ; modConsistent mod =
+ if mi_finsts (modIface mod) then mod:deps else deps
+ where
+ deps = dep_finsts . mi_deps . modIface $ mod
; hmiModule = mi_module . hm_iface
; hmiFamInstEnv = extendFamInstEnvList emptyFamInstEnv
. md_fam_insts . hm_details
; hpt_fam_insts = mkModuleEnv [ (hmiModule hmi, hmiFamInstEnv hmi)
| hmi <- eltsHpt hpt]
- ; groups = map modConsistent directlyImpMods
- ; okPairs = listToSet $ concatMap allPairs groups
- -- instances of okPairs are consistent
- ; criticalPairs = listToSet $ allPairs famInstMods
- -- all pairs that we need to consider
- ; toCheckPairs =
- Set.elems $ criticalPairs `Set.difference` okPairs
- -- the difference gives us the pairs we need to check now
- -- See Note [ModulePairSet determinism and performance]
+
}
- ; pending_checks <- mapM (check hpt_fam_insts) toCheckPairs
+ ; pending_checks <- checkMany hpt_fam_insts modConsistent directlyImpMods
; tcg_env <- getGblEnv
; return tcg_env { tcg_pending_fam_checks
= foldl' (plusNameEnv_C (++)) emptyNameEnv pending_checks }
}
where
- allPairs [] = []
- allPairs (m:ms) = map (modulePair m) ms ++ allPairs ms
-
- check hpt_fam_insts (ModulePair m1 m2)
+ -- See Note [Checking family instance optimization]
+ checkMany
+ :: ModuleEnv FamInstEnv -- home package family instances
+ -> (Module -> [Module]) -- given A, modules checked when A was checked
+ -> [Module] -- modules to process
+ -> TcM [NameEnv [([FamInst], FamInstEnv)]]
+ checkMany hpt_fam_insts modConsistent mods = go [] emptyModuleSet mods []
+ where
+ go :: [Module] -- list of consistent modules
+ -> ModuleSet -- set of consistent modules, same elements as the
+ -- list above
+ -> [Module] -- modules to process
+ -> [NameEnv [([FamInst], FamInstEnv)]]
+ -- accumulator for pending checks
+ -> TcM [NameEnv [([FamInst], FamInstEnv)]]
+ go _ _ [] pending = return pending
+ go consistent consistent_set (mod:mods) pending = do
+ pending' <- sequence
+ [ check hpt_fam_insts m1 m2
+ | m1 <- to_check_from_mod
+ -- loop over toCheckFromMod first, it's usually smaller,
+ -- it may even be empty
+ , m2 <- to_check_from_consistent
+ ]
+ go consistent' consistent_set' mods (pending' ++ pending)
+ where
+ mod_deps_consistent = modConsistent mod
+ mod_deps_consistent_set = mkModuleSet mod_deps_consistent
+ consistent' = to_check_from_mod ++ consistent
+ consistent_set' =
+ extendModuleSetList consistent_set to_check_from_mod
+ to_check_from_consistent =
+ filterOut (`elemModuleSet` mod_deps_consistent_set) consistent
+ to_check_from_mod =
+ filterOut (`elemModuleSet` consistent_set) mod_deps_consistent
+ -- Why don't we just minusModuleSet here?
+ -- We could, but doing so means one of two things:
+ --
+ -- 1. When looping over the cartesian product we convert
+ -- a set into a non-deterministicly ordered list - then
+ -- tcg_pending_fam_checks will end up storing some
+ -- non-deterministically ordered lists as well and
+ -- we end up with non-local non-determinism. Which
+ -- happens to be fine for interface file determinism
+ -- in this case, today, because the order only
+ -- determines the order of deferred checks. But such
+ -- invariants are hard to keep.
+ --
+ -- 2. When looping over the cartesian product we convert
+ -- a set into a deterministically ordered list - this
+ -- adds some additional cost of sorting for every
+ -- direct import.
+ --
+ -- That also explains why we need to keep both 'consistent'
+ -- and 'consistentSet'.
+ --
+ -- See also Note [ModuleEnv performance and determinism].
+ check hpt_fam_insts m1 m2
= do { env1' <- getFamInsts hpt_fam_insts m1
; env2' <- getFamInsts hpt_fam_insts m2
-- We're checking each element of env1 against env2.
diff --git a/compiler/typecheck/TcRnDriver.hs b/compiler/typecheck/TcRnDriver.hs
index 6755985a93..bd0ee17574 100644
--- a/compiler/typecheck/TcRnDriver.hs
+++ b/compiler/typecheck/TcRnDriver.hs
@@ -365,7 +365,7 @@ tcRnImports hsc_env import_decls
; let { dir_imp_mods = moduleEnvKeys
. imp_mods
$ imports }
- ; tcg_env <- checkFamInstConsistency (imp_finsts imports) dir_imp_mods ;
+ ; tcg_env <- checkFamInstConsistency dir_imp_mods ;
; return tcg_env } }
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index 17da229b01..774f4c7bd7 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -1099,9 +1099,10 @@ test('T13379',
test('MultiLayerModules',
[ compiler_stats_num_field('bytes allocated',
- [(wordsize(64), 6956533312, 10),
+ [(wordsize(64), 6294813000, 10),
# initial: 12139116496
# 2017-05-12: 6956533312 Revert "Use a deterministic map for imp_dep_mods"
+ # 2017-05-31: 6294813000 Faster checkFamInstConsistency
]),
pre_cmd('./genMultiLayerModules'),
extra_files(['genMultiLayerModules']),
@@ -1111,8 +1112,9 @@ test('MultiLayerModules',
test('T13719',
[ compiler_stats_num_field('bytes allocated',
- [(wordsize(64), 49907410784, 10),
+ [(wordsize(64), 5187889872, 10),
# initial: 49907410784
+ # 2017-05-31: 5187889872 Faster checkFamInstConsistency
]),
pre_cmd('./genT13719'),
extra_files(['genT13719']),
diff --git a/testsuite/tests/perf/haddock/all.T b/testsuite/tests/perf/haddock/all.T
index bfce7ba7dc..57e4591661 100644
--- a/testsuite/tests/perf/haddock/all.T
+++ b/testsuite/tests/perf/haddock/all.T
@@ -65,7 +65,7 @@ test('haddock.Cabal',
[extra_files(['../../../../libraries/Cabal/Cabal/dist-install/haddock.t']),
unless(in_tree_compiler(), skip), req_haddock
,stats_num_field('bytes allocated',
- [(wordsize(64), 18865432648, 5)
+ [(wordsize(64), 18269309128, 5)
# 2012-08-14: 3255435248 (amd64/Linux)
# 2012-08-29: 3324606664 (amd64/Linux, new codegen)
# 2012-10-08: 3373401360 (amd64/Linux)
@@ -110,6 +110,7 @@ test('haddock.Cabal',
# 2017-02-16: 23867276992 Better Lint for join points
# 2017-02-17: 27784875792 (amd64/Linux) - Generalize kind of (->)
# 2017-02-12: 18865432648 (amd64/Linux) - Type-indexed Typeable
+ # 2017-05-31: 18269309128 (amd64/Linux) - Faster checkFamInstConsistency
,(platform('i386-unknown-mingw32'), 3293415576, 5)
# 2012-10-30: 1733638168 (x86/Windows)
@@ -133,7 +134,7 @@ test('haddock.compiler',
[extra_files(['../../../../compiler/stage2/haddock.t']),
unless(in_tree_compiler(), skip), req_haddock
,stats_num_field('bytes allocated',
- [(wordsize(64), 55777283352, 10)
+ [(wordsize(64), 52762752968, 10)
# 2012-08-14: 26070600504 (amd64/Linux)
# 2012-08-29: 26353100288 (amd64/Linux, new CG)
# 2012-09-18: 26882813032 (amd64/Linux)
@@ -151,6 +152,7 @@ test('haddock.compiler',
# 2016-11-29: 60911147344 (amd64/Linux) unknown cause
# 2017-02-11: 62070477608 (amd64/Linux) OccurAnal / One-Shot (#13227) (and others)
# 2017-02-25: 55777283352 (amd64/Linux) Early inline patch
+ # 2017-05-31: 52762752968 (amd64/Linux) Faster checkFamInstConsistency
,(platform('i386-unknown-mingw32'), 367546388, 10)
# 2012-10-30: 13773051312 (x86/Windows)