summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBartosz Nitka <niteria@gmail.com>2018-01-05 15:20:05 +0000
committerBartosz Nitka <niteria@gmail.com>2018-01-10 13:50:56 +0000
commitdbdf77d92c9cd0bbb269137de0bf8754573cdc1e (patch)
tree17bbe7fb388308615ba009e4f887c719c9e58107
parent1577908f2a9db0fcf6f749d40dd75481015f5497 (diff)
downloadhaskell-dbdf77d92c9cd0bbb269137de0bf8754573cdc1e.tar.gz
Lift constructor tag allocation out of a loop
Before this change, for each constructor that we want to allocate a tag for we would traverse a list of all the constructors in a datatype to determine which tag a constructor should get. This is obviously quadratic and for datatypes with 10k constructors it actually makes a big difference. This change implements the plan outlined by @simonpj in https://mail.haskell.org/pipermail/ghc-devs/2017-October/014974.html which is basically about using a map and constructing it outside the loop. One place where things got a bit awkward was TysWiredIn.hs, it would have been possible to just assign the tags by hand, but that seemed error-prone to me, so I decided to go through a map there as well. Test Plan: ./validate On a file with 10k constructors Before: 8,130,522,344 bytes allocated in the heap Total time 3.682s ( 3.920s elapsed) After: 4,133,478,744 bytes allocated in the heap Total time 2.509s ( 2.750s elapsed) Reviewers: simonpj, bgamari Reviewed By: simonpj Subscribers: goldfire, rwbarton, thomie, simonmar, carter, simonpj GHC Trac Issues: #14657 Differential Revision: https://phabricator.haskell.org/D4289
-rw-r--r--compiler/basicTypes/DataCon.hs5
-rw-r--r--compiler/iface/BuildTyCl.hs13
-rw-r--r--compiler/iface/TcIface.hs5
-rw-r--r--compiler/prelude/TysWiredIn.hs10
-rw-r--r--compiler/typecheck/TcTyClsDecls.hs13
-rw-r--r--compiler/types/TyCon.hs25
-rw-r--r--compiler/vectorise/Vectorise/Generic/PData.hs4
-rw-r--r--compiler/vectorise/Vectorise/Type/TyConDecl.hs2
-rw-r--r--testsuite/tests/perf/compiler/all.T12
-rwxr-xr-xtestsuite/tests/perf/compiler/genManyConstructors25
10 files changed, 100 insertions, 14 deletions
diff --git a/compiler/basicTypes/DataCon.hs b/compiler/basicTypes/DataCon.hs
index a6d05936c1..4351e38ce9 100644
--- a/compiler/basicTypes/DataCon.hs
+++ b/compiler/basicTypes/DataCon.hs
@@ -75,7 +75,6 @@ import Name
import PrelNames
import Var
import Outputable
-import ListSetOps
import Util
import BasicTypes
import FastString
@@ -862,6 +861,7 @@ mkDataCon :: Name
-> Type -- ^ Original result type
-> RuntimeRepInfo -- ^ See comments on 'TyCon.RuntimeRepInfo'
-> TyCon -- ^ Representation type constructor
+ -> ConTag -- ^ Constructor tag
-> ThetaType -- ^ The "stupid theta", context of the data
-- declaration e.g. @data Eq a => T a ...@
-> Id -- ^ Worker Id
@@ -874,7 +874,7 @@ mkDataCon name declared_infix prom_info
fields
univ_tvs ex_tvs user_tvbs
eq_spec theta
- orig_arg_tys orig_res_ty rep_info rep_tycon
+ orig_arg_tys orig_res_ty rep_info rep_tycon tag
stupid_theta work_id rep
-- Warning: mkDataCon is not a good place to check certain invariants.
-- If the programmer writes the wrong result type in the decl, thus:
@@ -918,7 +918,6 @@ mkDataCon name declared_infix prom_info
-- source-language arguments. We add extra ones for the
-- dictionary arguments right here.
- tag = assoc "mkDataCon" (tyConDataCons rep_tycon `zip` [fIRST_TAG..]) con
rep_arg_tys = dataConRepArgTys con
rep_ty =
diff --git a/compiler/iface/BuildTyCl.hs b/compiler/iface/BuildTyCl.hs
index 113ec12b63..43e9408430 100644
--- a/compiler/iface/BuildTyCl.hs
+++ b/compiler/iface/BuildTyCl.hs
@@ -27,6 +27,7 @@ import Var
import VarSet
import BasicTypes
import Name
+import NameEnv
import MkId
import Class
import TyCon
@@ -107,13 +108,16 @@ buildDataCon :: FamInstEnvs
-- or the GADT equalities
-> [Type] -> Type -- Argument and result types
-> TyCon -- Rep tycon
+ -> NameEnv ConTag -- Maps the Name of each DataCon to its
+ -- ConTag
-> TcRnIf m n DataCon
-- A wrapper for DataCon.mkDataCon that
-- a) makes the worker Id
-- b) makes the wrapper Id if necessary, including
-- allocating its unique (hence monadic)
-buildDataCon fam_envs src_name declared_infix prom_info src_bangs impl_bangs field_lbls
- univ_tvs ex_tvs user_tvbs eq_spec ctxt arg_tys res_ty rep_tycon
+buildDataCon fam_envs src_name declared_infix prom_info src_bangs impl_bangs
+ field_lbls univ_tvs ex_tvs user_tvbs eq_spec ctxt arg_tys res_ty
+ rep_tycon tag_map
= do { wrap_name <- newImplicitBinder src_name mkDataConWrapperOcc
; work_name <- newImplicitBinder src_name mkDataConWorkerOcc
-- This last one takes the name of the data constructor in the source
@@ -124,10 +128,12 @@ buildDataCon fam_envs src_name declared_infix prom_info src_bangs impl_bangs fie
; us <- newUniqueSupply
; dflags <- getDynFlags
; let stupid_ctxt = mkDataConStupidTheta rep_tycon arg_tys univ_tvs
+ tag = lookupNameEnv_NF tag_map src_name
+ -- See Note [Constructor tag allocation], fixes #14657
data_con = mkDataCon src_name declared_infix prom_info
src_bangs field_lbls
univ_tvs ex_tvs user_tvbs eq_spec ctxt
- arg_tys res_ty NoRRI rep_tycon
+ arg_tys res_ty NoRRI rep_tycon tag
stupid_ctxt dc_wrk dc_rep
dc_wrk = mkDataConWorkId work_name data_con
dc_rep = initUs_ us (mkDataConRep dflags fam_envs wrap_name
@@ -307,6 +313,7 @@ buildClass tycon_name binders roles fds
arg_tys
(mkTyConApp rec_tycon (mkTyVarTys univ_tvs))
rec_tycon
+ (mkTyConTagMap rec_tycon)
; rhs <- case () of
_ | use_newtype
diff --git a/compiler/iface/TcIface.hs b/compiler/iface/TcIface.hs
index 6fad8da87c..70438f6337 100644
--- a/compiler/iface/TcIface.hs
+++ b/compiler/iface/TcIface.hs
@@ -897,6 +897,9 @@ tcIfaceDataCons tycon_name tycon tc_tybinders if_cons
univ_tvs :: [TyVar]
univ_tvs = binderVars (tyConTyVarBinders tc_tybinders)
+ tag_map :: NameEnv ConTag
+ tag_map = mkTyConTagMap tycon
+
tc_con_decl (IfCon { ifConInfix = is_infix,
ifConExTvs = ex_bndrs,
ifConUserTvBinders = user_bndrs,
@@ -960,7 +963,7 @@ tcIfaceDataCons tycon_name tycon tc_tybinders if_cons
lbl_names
univ_tvs ex_tvs user_tv_bndrs
eq_spec theta
- arg_tys orig_res_ty tycon
+ arg_tys orig_res_ty tycon tag_map
; traceIf (text "Done interface-file tc_con_decl" <+> ppr dc_name)
; return con }
mk_doc con_name = text "Constructor" <+> ppr con_name
diff --git a/compiler/prelude/TysWiredIn.hs b/compiler/prelude/TysWiredIn.hs
index fda6b221f3..72c24edc3c 100644
--- a/compiler/prelude/TysWiredIn.hs
+++ b/compiler/prelude/TysWiredIn.hs
@@ -150,7 +150,7 @@ import TyCon
import Class ( Class, mkClass )
import RdrName
import Name
-import NameEnv ( NameEnv, mkNameEnv, lookupNameEnv )
+import NameEnv ( NameEnv, mkNameEnv, lookupNameEnv, lookupNameEnv_NF )
import NameSet ( NameSet, mkNameSet, elemNameSet )
import BasicTypes ( Arity, Boxity(..), TupleSort(..), ConTagZ,
SourceText(..) )
@@ -517,6 +517,13 @@ pcDataConWithFixity' declared_infix dc_name wrk_key rri
tyvars ex_tyvars user_tyvars arg_tys tycon
= data_con
where
+ tag_map = mkTyConTagMap tycon
+ -- This constructs the constructor Name to ConTag map once per
+ -- constructor, which is quadratic. It's OK here, because it's
+ -- only called for wired in data types that don't have a lot of
+ -- constructors. It's also likely that GHC will lift tag_map, since
+ -- we call pcDataConWithFixity' with static TyCons in the same module.
+ -- See Note [Constructor tag allocation] and #14657
data_con = mkDataCon dc_name declared_infix prom_info
(map (const no_bang) arg_tys)
[] -- No labelled fields
@@ -527,6 +534,7 @@ pcDataConWithFixity' declared_infix dc_name wrk_key rri
arg_tys (mkTyConApp tycon (mkTyVarTys tyvars))
rri
tycon
+ (lookupNameEnv_NF tag_map dc_name)
[] -- No stupid theta
(mkDataConWorkId wrk_name data_con)
NoDataConRep -- Wired-in types are too simple to need wrappers
diff --git a/compiler/typecheck/TcTyClsDecls.hs b/compiler/typecheck/TcTyClsDecls.hs
index 4625fb27df..cd08570af6 100644
--- a/compiler/typecheck/TcTyClsDecls.hs
+++ b/compiler/typecheck/TcTyClsDecls.hs
@@ -1691,16 +1691,19 @@ tcConDecls :: TyCon -> ([TyConBinder], Type)
-- have all the names and the binders have the visibilities.
tcConDecls rep_tycon (tmpl_bndrs, res_tmpl)
= concatMapM $ addLocM $
- tcConDecl rep_tycon tmpl_bndrs res_tmpl
+ tcConDecl rep_tycon (mkTyConTagMap rep_tycon) tmpl_bndrs res_tmpl
+ -- It's important that we pay for tag allocation here, once per TyCon,
+ -- See Note [Constructor tag allocation], fixes #14657
tcConDecl :: TyCon -- Representation tycon. Knot-tied!
+ -> NameEnv ConTag
-> [TyConBinder] -> Type
-- Return type template (with its template tyvars)
-- (tvs, T tys), where T is the family TyCon
-> ConDecl GhcRn
-> TcM [DataCon]
-tcConDecl rep_tycon tmpl_bndrs res_tmpl
+tcConDecl rep_tycon tag_map tmpl_bndrs res_tmpl
(ConDeclH98 { con_name = name
, con_ex_tvs = explicit_tkv_nms
, con_mb_cxt = hs_ctxt
@@ -1771,7 +1774,7 @@ tcConDecl rep_tycon tmpl_bndrs res_tmpl
stricts Nothing field_lbls
univ_tvs ex_tvs user_tvbs
[{- no eq_preds -}] ctxt arg_tys
- res_tmpl rep_tycon
+ res_tmpl rep_tycon tag_map
-- NB: we put data_tc, the type constructor gotten from the
-- constructor type signature into the data constructor;
-- that way checkValidDataCon can complain if it's wrong.
@@ -1780,7 +1783,7 @@ tcConDecl rep_tycon tmpl_bndrs res_tmpl
; mapM buildOneDataCon [name]
}
-tcConDecl rep_tycon tmpl_bndrs res_tmpl
+tcConDecl rep_tycon tag_map tmpl_bndrs res_tmpl
(ConDeclGADT { con_names = names
, con_qvars = qtvs
, con_mb_cxt = cxt, con_args = hs_args
@@ -1851,7 +1854,7 @@ tcConDecl rep_tycon tmpl_bndrs res_tmpl
rep_nm
stricts Nothing field_lbls
univ_tvs ex_tvs all_user_bndrs eq_preds
- ctxt' arg_tys' res_ty' rep_tycon
+ ctxt' arg_tys' res_ty' rep_tycon tag_map
-- NB: we put data_tc, the type constructor gotten from the
-- constructor type signature into the data constructor;
-- that way checkValidDataCon can complain if it's wrong.
diff --git a/compiler/types/TyCon.hs b/compiler/types/TyCon.hs
index 333f52c7fb..f30c59eb2b 100644
--- a/compiler/types/TyCon.hs
+++ b/compiler/types/TyCon.hs
@@ -97,6 +97,7 @@ module TyCon(
tyConRuntimeRepInfo,
tyConBinders, tyConResKind, tyConTyVarBinders,
tcTyConScopedTyVars,
+ mkTyConTagMap,
-- ** Manipulating TyCons
expandSynTyCon_maybe,
@@ -840,7 +841,7 @@ data AlgTyConRhs
-- user declares the type to have no constructors
--
-- INVARIANT: Kept in order of increasing 'DataCon'
- -- tag (see the tag assignment in DataCon.mkDataCon)
+ -- tag (see the tag assignment in mkTyConTagMap)
data_cons_size :: Int,
-- ^ Cached value: length data_cons
is_enum :: Bool -- ^ Cached value: is this an enumeration type?
@@ -2330,6 +2331,28 @@ tyConRuntimeRepInfo _ = NoRRI
-- could panic in that second case. But Douglas Adams told me not to.
{-
+Note [Constructor tag allocation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When typechecking we need to allocate constructor tags to constructors.
+They are allocated based on the position in the data_cons field of TyCon,
+with the first constructor getting fIRST_TAG.
+
+We used to pay linear cost per constructor, with each constructor looking up
+its relative index in the constructor list. That was quadratic and prohibitive
+for large data types with more than 10k constructors.
+
+The current strategy is to build a NameEnv with a mapping from costructor's
+Name to ConTag and pass it down to buildDataCon for efficient lookup.
+
+Relevant ticket: #14657
+-}
+
+mkTyConTagMap :: TyCon -> NameEnv ConTag
+mkTyConTagMap tycon =
+ mkNameEnv $ map getName (tyConDataCons tycon) `zip` [fIRST_TAG..]
+ -- See Note [Constructor tag allocation]
+
+{-
************************************************************************
* *
\subsection[TyCon-instances]{Instance declarations for @TyCon@}
diff --git a/compiler/vectorise/Vectorise/Generic/PData.hs b/compiler/vectorise/Vectorise/Generic/PData.hs
index 353d6963b6..29e6bc86ed 100644
--- a/compiler/vectorise/Vectorise/Generic/PData.hs
+++ b/compiler/vectorise/Vectorise/Generic/PData.hs
@@ -79,6 +79,7 @@ buildPDataDataCon orig_name vect_tc repr_tc repr
fam_envs <- readGEnv global_fam_inst_env
rep_nm <- liftDs $ newTyConRepName dc_name
let univ_tvbs = mkTyVarBinders Specified tvs
+ tag_map = mkTyConTagMap repr_tc
liftDs $ buildDataCon fam_envs dc_name
False -- not infix
rep_nm
@@ -93,6 +94,7 @@ buildPDataDataCon orig_name vect_tc repr_tc repr
comp_tys
(mkFamilyTyConApp repr_tc (mkTyVarTys tvs))
repr_tc
+ tag_map
where
no_bang = HsSrcBang NoSourceText NoSrcUnpack NoSrcStrict
@@ -125,6 +127,7 @@ buildPDatasDataCon orig_name vect_tc repr_tc repr
fam_envs <- readGEnv global_fam_inst_env
rep_nm <- liftDs $ newTyConRepName dc_name
let univ_tvbs = mkTyVarBinders Specified tvs
+ tag_map = mkTyConTagMap repr_tc
liftDs $ buildDataCon fam_envs dc_name
False -- not infix
rep_nm
@@ -139,6 +142,7 @@ buildPDatasDataCon orig_name vect_tc repr_tc repr
comp_tys
(mkFamilyTyConApp repr_tc (mkTyVarTys tvs))
repr_tc
+ tag_map
where
no_bang = HsSrcBang NoSourceText NoSrcUnpack NoSrcStrict
diff --git a/compiler/vectorise/Vectorise/Type/TyConDecl.hs b/compiler/vectorise/Vectorise/Type/TyConDecl.hs
index e71637981a..4f1831e399 100644
--- a/compiler/vectorise/Vectorise/Type/TyConDecl.hs
+++ b/compiler/vectorise/Vectorise/Type/TyConDecl.hs
@@ -197,6 +197,7 @@ vectDataCon dc
; let ret_ty = mkFamilyTyConApp tycon' (mkTyVarTys univ_tvs)
; fam_envs <- readGEnv global_fam_inst_env
; rep_nm <- liftDs $ newTyConRepName name'
+ ; let tag_map = mkTyConTagMap tycon'
; liftDs $ buildDataCon fam_envs
name'
(dataConIsInfix dc) -- infix if the original is
@@ -212,6 +213,7 @@ vectDataCon dc
arg_tys -- argument types
ret_ty -- return type
tycon' -- representation tycon
+ tag_map
}
where
name = dataConName dc
diff --git a/testsuite/tests/perf/compiler/all.T b/testsuite/tests/perf/compiler/all.T
index 61b61ae78a..bd038a2407 100644
--- a/testsuite/tests/perf/compiler/all.T
+++ b/testsuite/tests/perf/compiler/all.T
@@ -1154,6 +1154,18 @@ test('MultiLayerModules',
multimod_compile,
['MultiLayerModules', '-v0'])
+test('ManyConstructors',
+ [ compiler_stats_num_field('bytes allocated',
+ [(wordsize(64), 4246959352, 10),
+ # initial: 8130527160
+ # 2018-01-05: 4246959352 Lift constructor tag allocation out of a loop
+ ]),
+ pre_cmd('./genManyConstructors'),
+ extra_files(['genManyConstructors']),
+ ],
+ multimod_compile,
+ ['ManyConstructors', '-v0'])
+
test('T13701',
[ compiler_stats_num_field('bytes allocated',
[(platform('x86_64-apple-darwin'), 2217187888, 10),
diff --git a/testsuite/tests/perf/compiler/genManyConstructors b/testsuite/tests/perf/compiler/genManyConstructors
new file mode 100755
index 0000000000..ec4abdced7
--- /dev/null
+++ b/testsuite/tests/perf/compiler/genManyConstructors
@@ -0,0 +1,25 @@
+SIZE=10000
+MODULE=ManyConstructors
+
+# Generates a module with a large number of constructors that looks
+# like this:
+#
+# module ManyConstructors where
+#
+# data A10000 = A0
+# | A00001
+# | A00002
+# ...
+# | A10000
+#
+# The point of this test is to check if we don't regress on #14657 reintroducing
+# some code that's quadratic in the number of constructors in a data type.
+# NB. This is not that artificial, I've seen data types of this size
+# in the wild.
+
+echo "module $MODULE where" > $MODULE.hs
+echo >> $MODULE.hs
+echo "data A$SIZE = A0" >> $MODULE.hs
+for i in $(seq -w 1 $SIZE); do
+ echo " | A$i" >> $MODULE.hs
+done