From 3d38e8284b7382844f9862e8d8afbae9c7248b09 Mon Sep 17 00:00:00 2001 From: Simon Peyton Jones Date: Thu, 26 Apr 2018 15:59:13 +0100 Subject: Do not unpack class dictionaries with INLINABLE Matthew Pickering uncovered a bad performance hole in the way that single-method dictionaries work, described in Trac #14955. See Note [Do not unpack class dictionaries] in WwLib. I tried to fix this 6 years ago, but got it slightly wrong. This patch fixes it, which makes a dramatic improvement in the test case. Nofib highlights: not much happening: Program Size Allocs Runtime Elapsed TotalMem ----------------------------------------------------------------- VSM -0.3% +2.7% -7.4% -7.4% 0.0% cacheprof -0.0% +0.1% +0.3% +0.7% 0.0% integer -0.0% +1.1% +7.5% +7.5% 0.0% tak -0.1% -0.2% 0.024 0.024 0.0% ----------------------------------------------------------------- Min -4.4% -0.2% -7.4% -7.4% -8.0% Max +0.6% +2.7% +7.5% +7.5% 0.0% Geom Mean -0.1% +0.0% +0.1% +0.1% -0.2% I investigated VSM. The patch unpacks class dictionaries a bit more than before (i.e. does so if there is no INLINABLE pragma). And that gives better code in VSM (less dictionary selection etc), but one closure gets one word bigger. I'll accept these changes in exchange for more robust performance. Some ghci.debugger output wobbled around (order of bindings being displayed). I have no idea why; but I accepted the changes. --- compiler/stranal/WorkWrap.hs | 10 +++---- compiler/stranal/WwLib.hs | 64 +++++++++++++++++++++++++++++--------------- 2 files changed, 46 insertions(+), 28 deletions(-) (limited to 'compiler/stranal') diff --git a/compiler/stranal/WorkWrap.hs b/compiler/stranal/WorkWrap.hs index 9557cecdfe..8da2a1288a 100644 --- a/compiler/stranal/WorkWrap.hs +++ b/compiler/stranal/WorkWrap.hs @@ -494,8 +494,7 @@ splitFun :: DynFlags -> FamInstEnvs -> Id -> IdInfo -> [Demand] -> DmdResult -> splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) ) do -- The arity should match the signature - stuff <- mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty - wrap_dmds use_res_info + stuff <- mkWwBodies dflags fam_envs rhs_fvs fn_id wrap_dmds use_res_info case stuff of Just (work_demands, join_arity, wrap_fn, work_fn) -> do work_uniq <- getUniqueM @@ -527,7 +526,7 @@ splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs `setInlinePragma` work_prag - `setIdUnfolding` mkWorkerUnfolding dflags work_fn (unfoldingInfo fn_info) + `setIdUnfolding` mkWorkerUnfolding dflags work_fn fn_unfolding -- See Note [Worker-wrapper for INLINABLE functions] `setIdStrictness` mkClosedStrictSig work_demands work_res_info @@ -576,13 +575,12 @@ splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs Nothing -> return [(fn_id, rhs)] where - mb_join_arity = isJoinId_maybe fn_id rhs_fvs = exprFreeVars rhs - fun_ty = idType fn_id fn_inl_prag = inlinePragInfo fn_info fn_inline_spec = inl_inline fn_inl_prag fn_act = inl_act fn_inl_prag rule_match_info = inlinePragmaRuleMatchInfo fn_inl_prag + fn_unfolding = unfoldingInfo fn_info arity = arityInfo fn_info -- The arity is set by the simplifier using exprEtaExpandArity -- So it may be more than the number of top-level-visible lambdas @@ -691,7 +689,7 @@ then the splitting will go deeper too. splitThunk :: DynFlags -> FamInstEnvs -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)] splitThunk dflags fam_envs is_rec fn_id rhs = ASSERT(not (isJoinId fn_id)) - do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs [fn_id] + do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs False [fn_id] ; let res = [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ] ; if useful then ASSERT2( isNonRec is_rec, ppr fn_id ) -- The thunk must be non-recursive return res diff --git a/compiler/stranal/WwLib.hs b/compiler/stranal/WwLib.hs index 9d957c4251..ab0a4d1ee1 100644 --- a/compiler/stranal/WwLib.hs +++ b/compiler/stranal/WwLib.hs @@ -123,8 +123,7 @@ mkWwBodies :: DynFlags -> FamInstEnvs -> VarSet -- Free vars of RHS -- See Note [Freshen WW arguments] - -> Maybe JoinArity -- Just ar <=> is join point with join arity ar - -> Type -- Type of original function + -> Id -- The original function -> [Demand] -- Strictness of original function -> DmdResult -- Info about function result -> UniqSM (Maybe WwResult) @@ -140,12 +139,14 @@ mkWwBodies :: DynFlags -- let x = (a,b) in -- E -mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info +mkWwBodies dflags fam_envs rhs_fvs fun_id demands res_info = do { let empty_subst = mkEmptyTCvSubst (mkInScopeSet rhs_fvs) -- See Note [Freshen WW arguments] - ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) <- mkWWargs empty_subst fun_ty demands - ; (useful1, work_args, wrap_fn_str, work_fn_str) <- mkWWstr dflags fam_envs wrap_args + ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) + <- mkWWargs empty_subst fun_ty demands + ; (useful1, work_args, wrap_fn_str, work_fn_str) + <- mkWWstr dflags fam_envs has_inlineable_prag wrap_args -- Do CPR w/w. See Note [Always do CPR w/w] ; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty) @@ -158,7 +159,7 @@ mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info ; if isWorkerSmallEnough dflags work_args && not (too_many_args_for_join_point wrap_args) - && (useful1 && not only_one_void_argument || useful2) + && ((useful1 && not only_one_void_argument) || useful2) then return (Just (worker_args_dmds, length work_call_args, wrapper_body, worker_body)) else return Nothing @@ -171,6 +172,11 @@ mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info -- f's RHS is now trivial (size 1) we still want the __inline__ to prevent -- fw from being inlined into f's RHS where + fun_ty = idType fun_id + mb_join_arity = isJoinId_maybe fun_id + has_inlineable_prag = isStableUnfolding (realIdUnfolding fun_id) + -- See Note [Do not unpack class dictionaries] + -- Note [Do not split void functions] only_one_void_argument | [d] <- demands @@ -490,6 +496,8 @@ To avoid this: mkWWstr :: DynFlags -> FamInstEnvs + -> Bool -- True <=> INLINEABLE pragama on this function defn + -- See Note [Do not unpack class dictionaries] -> [Var] -- Wrapper args; have their demand info on them -- *Includes type variables* -> UniqSM (Bool, -- Is this useful @@ -501,13 +509,18 @@ mkWWstr :: DynFlags CoreExpr -> CoreExpr) -- Worker body, lacking the original body of the function, -- and lacking its lambdas. -- This fn does the reboxing -mkWWstr _ _ [] - = return (False, [], nop_fn, nop_fn) +mkWWstr dflags fam_envs has_inlineable_prag args + = go args + where + go_one arg = mkWWstr_one dflags fam_envs has_inlineable_prag arg -mkWWstr dflags fam_envs (arg : args) = do - (useful1, args1, wrap_fn1, work_fn1) <- mkWWstr_one dflags fam_envs arg - (useful2, args2, wrap_fn2, work_fn2) <- mkWWstr dflags fam_envs args - return (useful1 || useful2, args1 ++ args2, wrap_fn1 . wrap_fn2, work_fn1 . work_fn2) + go [] = return (False, [], nop_fn, nop_fn) + go (arg : args) = do { (useful1, args1, wrap_fn1, work_fn1) <- go_one arg + ; (useful2, args2, wrap_fn2, work_fn2) <- go args + ; return ( useful1 || useful2 + , args1 ++ args2 + , wrap_fn1 . wrap_fn2 + , work_fn1 . work_fn2) } {- Note [Unpacking arguments with product and polymorphic demands] @@ -544,9 +557,12 @@ as-yet-un-filled-in pkgState files. -- brings into scope work_args (via cases) -- * work_fn assumes work_args are in scope, a -- brings into scope wrap_arg (via lets) -mkWWstr_one :: DynFlags -> FamInstEnvs -> Var - -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr -> CoreExpr) -mkWWstr_one dflags fam_envs arg +mkWWstr_one :: DynFlags -> FamInstEnvs + -> Bool -- True <=> INLINEABLE pragama on this function defn + -- See Note [Do not unpack class dictionaries] + -> Var + -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr -> CoreExpr) +mkWWstr_one dflags fam_envs has_inlineable_prag arg | isTyVar arg = return (False, [arg], nop_fn, nop_fn) @@ -581,8 +597,10 @@ mkWWstr_one dflags fam_envs arg | isStrictDmd dmd , Just cs <- splitProdDmd_maybe dmd -- See Note [Unpacking arguments with product and polymorphic demands] + , not (has_inlineable_prag && isClassPred arg_ty) + -- See Note [Do not unpack class dictionaries] , Just (data_con, inst_tys, inst_con_arg_tys, co) - <- deepSplitProductType_maybe fam_envs (idType arg) + <- deepSplitProductType_maybe fam_envs arg_ty , cs `equalLength` inst_con_arg_tys -- See Note [mkWWstr and unsafeCoerce] = do { (uniq1:uniqs) <- getUniquesM @@ -594,7 +612,7 @@ mkWWstr_one dflags fam_envs arg -- in Simplify.hs; and see Trac #13890 rebox_fn = Let (NonRec arg_no_unf con_app) con_app = mkConApp2 data_con inst_tys unpk_args `mkCast` mkSymCo co - ; (_, worker_args, wrap_fn, work_fn) <- mkWWstr dflags fam_envs unpk_args + ; (_, worker_args, wrap_fn, work_fn) <- mkWWstr dflags fam_envs False unpk_args ; return (True, worker_args, unbox_fn . wrap_fn, work_fn . rebox_fn) } -- Don't pass the arg, rebox instead @@ -602,7 +620,8 @@ mkWWstr_one dflags fam_envs arg = return (False, [arg], nop_fn, nop_fn) where - dmd = idDemandInfo arg + arg_ty = idType arg + dmd = idDemandInfo arg mk_ww_arg uniq ty sub_dmd = setIdDemandInfo (mk_ww_local uniq ty) sub_dmd ---------------------- @@ -680,10 +699,12 @@ BUT if f is strict in the Ord dictionary, we might unpack it, to get and the type-class specialiser can't specialise that. An example is Trac #6056. -Moreover, dictionaries can have a lot of fields, so unpacking them can -increase closure sizes. +But in any other situation a dictionary is just an ordinary value, +and can be unpacked. So we track the INLINABLE pragma, and switch +off the unpacking in mkWWstr_one (see the isClassPred test). -Conclusion: don't unpack dictionaries. +Historical note: Trac #14955 describes how I got this fix wrong +the first time. -} deepSplitProductType_maybe @@ -699,7 +720,6 @@ deepSplitProductType_maybe fam_envs ty `orElse` (mkRepReflCo ty, ty) , Just (tc, tc_args) <- splitTyConApp_maybe ty1 , Just con <- isDataProductTyCon_maybe tc - , not (isClassTyCon tc) -- See Note [Do not unpack class dictionaries] , let arg_tys = dataConInstArgTys con tc_args strict_marks = dataConRepStrictness con = Just (con, tc_args, zipEqual "dspt" arg_tys strict_marks, co) -- cgit v1.2.1