diff options
author | Bartosz Nitka <niteria@gmail.com> | 2018-01-05 15:20:05 +0000 |
---|---|---|
committer | Bartosz Nitka <niteria@gmail.com> | 2018-01-10 13:50:56 +0000 |
commit | dbdf77d92c9cd0bbb269137de0bf8754573cdc1e (patch) | |
tree | 17bbe7fb388308615ba009e4f887c719c9e58107 | |
parent | 1577908f2a9db0fcf6f749d40dd75481015f5497 (diff) | |
download | haskell-dbdf77d92c9cd0bbb269137de0bf8754573cdc1e.tar.gz |
Lift constructor tag allocation out of a loop
Before this change, for each constructor that we want
to allocate a tag for we would traverse a list of all
the constructors in a datatype to determine which tag
a constructor should get.
This is obviously quadratic and for datatypes with 10k
constructors it actually makes a big difference.
This change implements the plan outlined by @simonpj in
https://mail.haskell.org/pipermail/ghc-devs/2017-October/014974.html
which is basically about using a map and constructing it outside the
loop.
One place where things got a bit awkward was TysWiredIn.hs,
it would have been possible to just assign the tags by hand, but
that seemed error-prone to me, so I decided to go through a map
there as well.
Test Plan:
./validate
On a file with 10k constructors
Before:
8,130,522,344 bytes allocated in the heap
Total time 3.682s ( 3.920s elapsed)
After:
4,133,478,744 bytes allocated in the heap
Total time 2.509s ( 2.750s elapsed)
Reviewers: simonpj, bgamari
Reviewed By: simonpj
Subscribers: goldfire, rwbarton, thomie, simonmar, carter, simonpj
GHC Trac Issues: #14657
Differential Revision: https://phabricator.haskell.org/D4289
-rw-r--r-- | compiler/basicTypes/DataCon.hs | 5 | ||||
-rw-r--r-- | compiler/iface/BuildTyCl.hs | 13 | ||||
-rw-r--r-- | compiler/iface/TcIface.hs | 5 | ||||
-rw-r--r-- | compiler/prelude/TysWiredIn.hs | 10 | ||||
-rw-r--r-- | compiler/typecheck/TcTyClsDecls.hs | 13 | ||||
-rw-r--r-- | compiler/types/TyCon.hs | 25 | ||||
-rw-r--r-- | compiler/vectorise/Vectorise/Generic/PData.hs | 4 | ||||
-rw-r--r-- | compiler/vectorise/Vectorise/Type/TyConDecl.hs | 2 | ||||
-rw-r--r-- | testsuite/tests/perf/compiler/all.T | 12 | ||||
-rwxr-xr-x | testsuite/tests/perf/compiler/genManyConstructors | 25 |
10 files changed, 100 insertions, 14 deletions
diff --git a/compiler/basicTypes/DataCon.hs b/compiler/basicTypes/DataCon.hs index a6d05936c1..4351e38ce9 100644 --- a/compiler/basicTypes/DataCon.hs +++ b/compiler/basicTypes/DataCon.hs @@ -75,7 +75,6 @@ import Name import PrelNames import Var import Outputable -import ListSetOps import Util import BasicTypes import FastString @@ -862,6 +861,7 @@ mkDataCon :: Name -> Type -- ^ Original result type -> RuntimeRepInfo -- ^ See comments on 'TyCon.RuntimeRepInfo' -> TyCon -- ^ Representation type constructor + -> ConTag -- ^ Constructor tag -> ThetaType -- ^ The "stupid theta", context of the data -- declaration e.g. @data Eq a => T a ...@ -> Id -- ^ Worker Id @@ -874,7 +874,7 @@ mkDataCon name declared_infix prom_info fields univ_tvs ex_tvs user_tvbs eq_spec theta - orig_arg_tys orig_res_ty rep_info rep_tycon + orig_arg_tys orig_res_ty rep_info rep_tycon tag stupid_theta work_id rep -- Warning: mkDataCon is not a good place to check certain invariants. -- If the programmer writes the wrong result type in the decl, thus: @@ -918,7 +918,6 @@ mkDataCon name declared_infix prom_info -- source-language arguments. We add extra ones for the -- dictionary arguments right here. - tag = assoc "mkDataCon" (tyConDataCons rep_tycon `zip` [fIRST_TAG..]) con rep_arg_tys = dataConRepArgTys con rep_ty = diff --git a/compiler/iface/BuildTyCl.hs b/compiler/iface/BuildTyCl.hs index 113ec12b63..43e9408430 100644 --- a/compiler/iface/BuildTyCl.hs +++ b/compiler/iface/BuildTyCl.hs @@ -27,6 +27,7 @@ import Var import VarSet import BasicTypes import Name +import NameEnv import MkId import Class import TyCon @@ -107,13 +108,16 @@ buildDataCon :: FamInstEnvs -- or the GADT equalities -> [Type] -> Type -- Argument and result types -> TyCon -- Rep tycon + -> NameEnv ConTag -- Maps the Name of each DataCon to its + -- ConTag -> TcRnIf m n DataCon -- A wrapper for DataCon.mkDataCon that -- a) makes the worker Id -- b) makes the wrapper Id if necessary, including -- allocating its unique (hence monadic) -buildDataCon fam_envs src_name declared_infix prom_info src_bangs impl_bangs field_lbls - univ_tvs ex_tvs user_tvbs eq_spec ctxt arg_tys res_ty rep_tycon +buildDataCon fam_envs src_name declared_infix prom_info src_bangs impl_bangs + field_lbls univ_tvs ex_tvs user_tvbs eq_spec ctxt arg_tys res_ty + rep_tycon tag_map = do { wrap_name <- newImplicitBinder src_name mkDataConWrapperOcc ; work_name <- newImplicitBinder src_name mkDataConWorkerOcc -- This last one takes the name of the data constructor in the source @@ -124,10 +128,12 @@ buildDataCon fam_envs src_name declared_infix prom_info src_bangs impl_bangs fie ; us <- newUniqueSupply ; dflags <- getDynFlags ; let stupid_ctxt = mkDataConStupidTheta rep_tycon arg_tys univ_tvs + tag = lookupNameEnv_NF tag_map src_name + -- See Note [Constructor tag allocation], fixes #14657 data_con = mkDataCon src_name declared_infix prom_info src_bangs field_lbls univ_tvs ex_tvs user_tvbs eq_spec ctxt - arg_tys res_ty NoRRI rep_tycon + arg_tys res_ty NoRRI rep_tycon tag stupid_ctxt dc_wrk dc_rep dc_wrk = mkDataConWorkId work_name data_con dc_rep = initUs_ us (mkDataConRep dflags fam_envs wrap_name @@ -307,6 +313,7 @@ buildClass tycon_name binders roles fds arg_tys (mkTyConApp rec_tycon (mkTyVarTys univ_tvs)) rec_tycon + (mkTyConTagMap rec_tycon) ; rhs <- case () of _ | use_newtype diff --git a/compiler/iface/TcIface.hs b/compiler/iface/TcIface.hs index 6fad8da87c..70438f6337 100644 --- a/compiler/iface/TcIface.hs +++ b/compiler/iface/TcIface.hs @@ -897,6 +897,9 @@ tcIfaceDataCons tycon_name tycon tc_tybinders if_cons univ_tvs :: [TyVar] univ_tvs = binderVars (tyConTyVarBinders tc_tybinders) + tag_map :: NameEnv ConTag + tag_map = mkTyConTagMap tycon + tc_con_decl (IfCon { ifConInfix = is_infix, ifConExTvs = ex_bndrs, ifConUserTvBinders = user_bndrs, @@ -960,7 +963,7 @@ tcIfaceDataCons tycon_name tycon tc_tybinders if_cons lbl_names univ_tvs ex_tvs user_tv_bndrs eq_spec theta - arg_tys orig_res_ty tycon + arg_tys orig_res_ty tycon tag_map ; traceIf (text "Done interface-file tc_con_decl" <+> ppr dc_name) ; return con } mk_doc con_name = text "Constructor" <+> ppr con_name diff --git a/compiler/prelude/TysWiredIn.hs b/compiler/prelude/TysWiredIn.hs index fda6b221f3..72c24edc3c 100644 --- a/compiler/prelude/TysWiredIn.hs +++ b/compiler/prelude/TysWiredIn.hs @@ -150,7 +150,7 @@ import TyCon import Class ( Class, mkClass ) import RdrName import Name -import NameEnv ( NameEnv, mkNameEnv, lookupNameEnv ) +import NameEnv ( NameEnv, mkNameEnv, lookupNameEnv, lookupNameEnv_NF ) import NameSet ( NameSet, mkNameSet, elemNameSet ) import BasicTypes ( Arity, Boxity(..), TupleSort(..), ConTagZ, SourceText(..) ) @@ -517,6 +517,13 @@ pcDataConWithFixity' declared_infix dc_name wrk_key rri tyvars ex_tyvars user_tyvars arg_tys tycon = data_con where + tag_map = mkTyConTagMap tycon + -- This constructs the constructor Name to ConTag map once per + -- constructor, which is quadratic. It's OK here, because it's + -- only called for wired in data types that don't have a lot of + -- constructors. It's also likely that GHC will lift tag_map, since + -- we call pcDataConWithFixity' with static TyCons in the same module. + -- See Note [Constructor tag allocation] and #14657 data_con = mkDataCon dc_name declared_infix prom_info (map (const no_bang) arg_tys) [] -- No labelled fields @@ -527,6 +534,7 @@ pcDataConWithFixity' declared_infix dc_name wrk_key rri arg_tys (mkTyConApp tycon (mkTyVarTys tyvars)) rri tycon + (lookupNameEnv_NF tag_map dc_name) [] -- No stupid theta (mkDataConWorkId wrk_name data_con) NoDataConRep -- Wired-in types are too simple to need wrappers diff --git a/compiler/typecheck/TcTyClsDecls.hs b/compiler/typecheck/TcTyClsDecls.hs index 4625fb27df..cd08570af6 100644 --- a/compiler/typecheck/TcTyClsDecls.hs +++ b/compiler/typecheck/TcTyClsDecls.hs @@ -1691,16 +1691,19 @@ tcConDecls :: TyCon -> ([TyConBinder], Type) -- have all the names and the binders have the visibilities. tcConDecls rep_tycon (tmpl_bndrs, res_tmpl) = concatMapM $ addLocM $ - tcConDecl rep_tycon tmpl_bndrs res_tmpl + tcConDecl rep_tycon (mkTyConTagMap rep_tycon) tmpl_bndrs res_tmpl + -- It's important that we pay for tag allocation here, once per TyCon, + -- See Note [Constructor tag allocation], fixes #14657 tcConDecl :: TyCon -- Representation tycon. Knot-tied! + -> NameEnv ConTag -> [TyConBinder] -> Type -- Return type template (with its template tyvars) -- (tvs, T tys), where T is the family TyCon -> ConDecl GhcRn -> TcM [DataCon] -tcConDecl rep_tycon tmpl_bndrs res_tmpl +tcConDecl rep_tycon tag_map tmpl_bndrs res_tmpl (ConDeclH98 { con_name = name , con_ex_tvs = explicit_tkv_nms , con_mb_cxt = hs_ctxt @@ -1771,7 +1774,7 @@ tcConDecl rep_tycon tmpl_bndrs res_tmpl stricts Nothing field_lbls univ_tvs ex_tvs user_tvbs [{- no eq_preds -}] ctxt arg_tys - res_tmpl rep_tycon + res_tmpl rep_tycon tag_map -- NB: we put data_tc, the type constructor gotten from the -- constructor type signature into the data constructor; -- that way checkValidDataCon can complain if it's wrong. @@ -1780,7 +1783,7 @@ tcConDecl rep_tycon tmpl_bndrs res_tmpl ; mapM buildOneDataCon [name] } -tcConDecl rep_tycon tmpl_bndrs res_tmpl +tcConDecl rep_tycon tag_map tmpl_bndrs res_tmpl (ConDeclGADT { con_names = names , con_qvars = qtvs , con_mb_cxt = cxt, con_args = hs_args @@ -1851,7 +1854,7 @@ tcConDecl rep_tycon tmpl_bndrs res_tmpl rep_nm stricts Nothing field_lbls univ_tvs ex_tvs all_user_bndrs eq_preds - ctxt' arg_tys' res_ty' rep_tycon + ctxt' arg_tys' res_ty' rep_tycon tag_map -- NB: we put data_tc, the type constructor gotten from the -- constructor type signature into the data constructor; -- that way checkValidDataCon can complain if it's wrong. diff --git a/compiler/types/TyCon.hs b/compiler/types/TyCon.hs index 333f52c7fb..f30c59eb2b 100644 --- a/compiler/types/TyCon.hs +++ b/compiler/types/TyCon.hs @@ -97,6 +97,7 @@ module TyCon( tyConRuntimeRepInfo, tyConBinders, tyConResKind, tyConTyVarBinders, tcTyConScopedTyVars, + mkTyConTagMap, -- ** Manipulating TyCons expandSynTyCon_maybe, @@ -840,7 +841,7 @@ data AlgTyConRhs -- user declares the type to have no constructors -- -- INVARIANT: Kept in order of increasing 'DataCon' - -- tag (see the tag assignment in DataCon.mkDataCon) + -- tag (see the tag assignment in mkTyConTagMap) data_cons_size :: Int, -- ^ Cached value: length data_cons is_enum :: Bool -- ^ Cached value: is this an enumeration type? @@ -2330,6 +2331,28 @@ tyConRuntimeRepInfo _ = NoRRI -- could panic in that second case. But Douglas Adams told me not to. {- +Note [Constructor tag allocation] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When typechecking we need to allocate constructor tags to constructors. +They are allocated based on the position in the data_cons field of TyCon, +with the first constructor getting fIRST_TAG. + +We used to pay linear cost per constructor, with each constructor looking up +its relative index in the constructor list. That was quadratic and prohibitive +for large data types with more than 10k constructors. + +The current strategy is to build a NameEnv with a mapping from costructor's +Name to ConTag and pass it down to buildDataCon for efficient lookup. + +Relevant ticket: #14657 +-} + +mkTyConTagMap :: TyCon -> NameEnv ConTag +mkTyConTagMap tycon = + mkNameEnv $ map getName (tyConDataCons tycon) `zip` [fIRST_TAG..] + -- See Note [Constructor tag allocation] + +{- ************************************************************************ * * \subsection[TyCon-instances]{Instance declarations for @TyCon@} diff --git a/compiler/vectorise/Vectorise/Generic/PData.hs b/compiler/vectorise/Vectorise/Generic/PData.hs index 353d6963b6..29e6bc86ed 100644 --- a/compiler/vectorise/Vectorise/Generic/PData.hs +++ b/compiler/vectorise/Vectorise/Generic/PData.hs @@ -79,6 +79,7 @@ buildPDataDataCon orig_name vect_tc repr_tc repr fam_envs <- readGEnv global_fam_inst_env rep_nm <- liftDs $ newTyConRepName dc_name let univ_tvbs = mkTyVarBinders Specified tvs + tag_map = mkTyConTagMap repr_tc liftDs $ buildDataCon fam_envs dc_name False -- not infix rep_nm @@ -93,6 +94,7 @@ buildPDataDataCon orig_name vect_tc repr_tc repr comp_tys (mkFamilyTyConApp repr_tc (mkTyVarTys tvs)) repr_tc + tag_map where no_bang = HsSrcBang NoSourceText NoSrcUnpack NoSrcStrict @@ -125,6 +127,7 @@ buildPDatasDataCon orig_name vect_tc repr_tc repr fam_envs <- readGEnv global_fam_inst_env rep_nm <- liftDs $ newTyConRepName dc_name let univ_tvbs = mkTyVarBinders Specified tvs + tag_map = mkTyConTagMap repr_tc liftDs $ buildDataCon fam_envs dc_name False -- not infix rep_nm @@ -139,6 +142,7 @@ buildPDatasDataCon orig_name vect_tc repr_tc repr comp_tys (mkFamilyTyConApp repr_tc (mkTyVarTys tvs)) repr_tc + tag_map where no_bang = HsSrcBang NoSourceText NoSrcUnpack NoSrcStrict diff --git a/compiler/vectorise/Vectorise/Type/TyConDecl.hs b/compiler/vectorise/Vectorise/Type/TyConDecl.hs index e71637981a..4f1831e399 100644 --- a/compiler/vectorise/Vectorise/Type/TyConDecl.hs +++ b/compiler/vectorise/Vectorise/Type/TyConDecl.hs @@ -197,6 +197,7 @@ vectDataCon dc ; let ret_ty = mkFamilyTyConApp tycon' (mkTyVarTys univ_tvs) ; fam_envs <- readGEnv global_fam_inst_env ; rep_nm <- liftDs $ newTyConRepName name' + ; let tag_map = mkTyConTagMap tycon' ; liftDs $ buildDataCon fam_envs name' (dataConIsInfix dc) -- infix if the original is @@ -212,6 +213,7 @@ vectDataCon dc arg_tys -- argument types ret_ty -- return type tycon' -- representation tycon + tag_map } where name = dataConName dc diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T index 61b61ae78a..bd038a2407 100644 --- a/testsuite/tests/perf/compiler/all.T +++ b/testsuite/tests/perf/compiler/all.T @@ -1154,6 +1154,18 @@ test('MultiLayerModules', multimod_compile, ['MultiLayerModules', '-v0']) +test('ManyConstructors', + [ compiler_stats_num_field('bytes allocated', + [(wordsize(64), 4246959352, 10), + # initial: 8130527160 + # 2018-01-05: 4246959352 Lift constructor tag allocation out of a loop + ]), + pre_cmd('./genManyConstructors'), + extra_files(['genManyConstructors']), + ], + multimod_compile, + ['ManyConstructors', '-v0']) + test('T13701', [ compiler_stats_num_field('bytes allocated', [(platform('x86_64-apple-darwin'), 2217187888, 10), diff --git a/testsuite/tests/perf/compiler/genManyConstructors b/testsuite/tests/perf/compiler/genManyConstructors new file mode 100755 index 0000000000..ec4abdced7 --- /dev/null +++ b/testsuite/tests/perf/compiler/genManyConstructors @@ -0,0 +1,25 @@ +SIZE=10000 +MODULE=ManyConstructors + +# Generates a module with a large number of constructors that looks +# like this: +# +# module ManyConstructors where +# +# data A10000 = A0 +# | A00001 +# | A00002 +# ... +# | A10000 +# +# The point of this test is to check if we don't regress on #14657 reintroducing +# some code that's quadratic in the number of constructors in a data type. +# NB. This is not that artificial, I've seen data types of this size +# in the wild. + +echo "module $MODULE where" > $MODULE.hs +echo >> $MODULE.hs +echo "data A$SIZE = A0" >> $MODULE.hs +for i in $(seq -w 1 $SIZE); do + echo " | A$i" >> $MODULE.hs +done |