diff options
-rw-r--r-- | compiler/stranal/WorkWrap.hs | 10 | ||||
-rw-r--r-- | compiler/stranal/WwLib.hs | 64 | ||||
-rw-r--r-- | testsuite/tests/ghci.debugger/scripts/break006.stdout | 4 | ||||
-rw-r--r-- | testsuite/tests/ghci.debugger/scripts/hist001.stdout | 2 | ||||
-rw-r--r-- | testsuite/tests/ghci.debugger/scripts/hist002.stdout | 2 | ||||
-rw-r--r-- | testsuite/tests/indexed-types/should_compile/T7837.stderr | 4 | ||||
-rw-r--r-- | testsuite/tests/perf/should_run/T14955.hs | 20 | ||||
-rw-r--r-- | testsuite/tests/perf/should_run/T14955.stdout | 1 | ||||
-rw-r--r-- | testsuite/tests/perf/should_run/T14955a.hs | 72 | ||||
-rw-r--r-- | testsuite/tests/perf/should_run/all.T | 9 |
10 files changed, 156 insertions, 32 deletions
diff --git a/compiler/stranal/WorkWrap.hs b/compiler/stranal/WorkWrap.hs index 9557cecdfe..8da2a1288a 100644 --- a/compiler/stranal/WorkWrap.hs +++ b/compiler/stranal/WorkWrap.hs @@ -494,8 +494,7 @@ splitFun :: DynFlags -> FamInstEnvs -> Id -> IdInfo -> [Demand] -> DmdResult -> splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs = WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) ) do -- The arity should match the signature - stuff <- mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty - wrap_dmds use_res_info + stuff <- mkWwBodies dflags fam_envs rhs_fvs fn_id wrap_dmds use_res_info case stuff of Just (work_demands, join_arity, wrap_fn, work_fn) -> do work_uniq <- getUniqueM @@ -527,7 +526,7 @@ splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs `setInlinePragma` work_prag - `setIdUnfolding` mkWorkerUnfolding dflags work_fn (unfoldingInfo fn_info) + `setIdUnfolding` mkWorkerUnfolding dflags work_fn fn_unfolding -- See Note [Worker-wrapper for INLINABLE functions] `setIdStrictness` mkClosedStrictSig work_demands work_res_info @@ -576,13 +575,12 @@ splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs Nothing -> return [(fn_id, rhs)] where - mb_join_arity = isJoinId_maybe fn_id rhs_fvs = exprFreeVars rhs - fun_ty = idType fn_id fn_inl_prag = inlinePragInfo fn_info fn_inline_spec = inl_inline fn_inl_prag fn_act = inl_act fn_inl_prag rule_match_info = inlinePragmaRuleMatchInfo fn_inl_prag + fn_unfolding = unfoldingInfo fn_info arity = arityInfo fn_info -- The arity is set by the simplifier using exprEtaExpandArity -- So it may be more than the number of top-level-visible lambdas @@ -691,7 +689,7 @@ then the splitting will go deeper too. splitThunk :: DynFlags -> FamInstEnvs -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)] splitThunk dflags fam_envs is_rec fn_id rhs = ASSERT(not (isJoinId fn_id)) - do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs [fn_id] + do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs False [fn_id] ; let res = [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ] ; if useful then ASSERT2( isNonRec is_rec, ppr fn_id ) -- The thunk must be non-recursive return res diff --git a/compiler/stranal/WwLib.hs b/compiler/stranal/WwLib.hs index 9d957c4251..ab0a4d1ee1 100644 --- a/compiler/stranal/WwLib.hs +++ b/compiler/stranal/WwLib.hs @@ -123,8 +123,7 @@ mkWwBodies :: DynFlags -> FamInstEnvs -> VarSet -- Free vars of RHS -- See Note [Freshen WW arguments] - -> Maybe JoinArity -- Just ar <=> is join point with join arity ar - -> Type -- Type of original function + -> Id -- The original function -> [Demand] -- Strictness of original function -> DmdResult -- Info about function result -> UniqSM (Maybe WwResult) @@ -140,12 +139,14 @@ mkWwBodies :: DynFlags -- let x = (a,b) in -- E -mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info +mkWwBodies dflags fam_envs rhs_fvs fun_id demands res_info = do { let empty_subst = mkEmptyTCvSubst (mkInScopeSet rhs_fvs) -- See Note [Freshen WW arguments] - ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) <- mkWWargs empty_subst fun_ty demands - ; (useful1, work_args, wrap_fn_str, work_fn_str) <- mkWWstr dflags fam_envs wrap_args + ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) + <- mkWWargs empty_subst fun_ty demands + ; (useful1, work_args, wrap_fn_str, work_fn_str) + <- mkWWstr dflags fam_envs has_inlineable_prag wrap_args -- Do CPR w/w. See Note [Always do CPR w/w] ; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty) @@ -158,7 +159,7 @@ mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info ; if isWorkerSmallEnough dflags work_args && not (too_many_args_for_join_point wrap_args) - && (useful1 && not only_one_void_argument || useful2) + && ((useful1 && not only_one_void_argument) || useful2) then return (Just (worker_args_dmds, length work_call_args, wrapper_body, worker_body)) else return Nothing @@ -171,6 +172,11 @@ mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info -- f's RHS is now trivial (size 1) we still want the __inline__ to prevent -- fw from being inlined into f's RHS where + fun_ty = idType fun_id + mb_join_arity = isJoinId_maybe fun_id + has_inlineable_prag = isStableUnfolding (realIdUnfolding fun_id) + -- See Note [Do not unpack class dictionaries] + -- Note [Do not split void functions] only_one_void_argument | [d] <- demands @@ -490,6 +496,8 @@ To avoid this: mkWWstr :: DynFlags -> FamInstEnvs + -> Bool -- True <=> INLINEABLE pragama on this function defn + -- See Note [Do not unpack class dictionaries] -> [Var] -- Wrapper args; have their demand info on them -- *Includes type variables* -> UniqSM (Bool, -- Is this useful @@ -501,13 +509,18 @@ mkWWstr :: DynFlags CoreExpr -> CoreExpr) -- Worker body, lacking the original body of the function, -- and lacking its lambdas. -- This fn does the reboxing -mkWWstr _ _ [] - = return (False, [], nop_fn, nop_fn) +mkWWstr dflags fam_envs has_inlineable_prag args + = go args + where + go_one arg = mkWWstr_one dflags fam_envs has_inlineable_prag arg -mkWWstr dflags fam_envs (arg : args) = do - (useful1, args1, wrap_fn1, work_fn1) <- mkWWstr_one dflags fam_envs arg - (useful2, args2, wrap_fn2, work_fn2) <- mkWWstr dflags fam_envs args - return (useful1 || useful2, args1 ++ args2, wrap_fn1 . wrap_fn2, work_fn1 . work_fn2) + go [] = return (False, [], nop_fn, nop_fn) + go (arg : args) = do { (useful1, args1, wrap_fn1, work_fn1) <- go_one arg + ; (useful2, args2, wrap_fn2, work_fn2) <- go args + ; return ( useful1 || useful2 + , args1 ++ args2 + , wrap_fn1 . wrap_fn2 + , work_fn1 . work_fn2) } {- Note [Unpacking arguments with product and polymorphic demands] @@ -544,9 +557,12 @@ as-yet-un-filled-in pkgState files. -- brings into scope work_args (via cases) -- * work_fn assumes work_args are in scope, a -- brings into scope wrap_arg (via lets) -mkWWstr_one :: DynFlags -> FamInstEnvs -> Var - -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr -> CoreExpr) -mkWWstr_one dflags fam_envs arg +mkWWstr_one :: DynFlags -> FamInstEnvs + -> Bool -- True <=> INLINEABLE pragama on this function defn + -- See Note [Do not unpack class dictionaries] + -> Var + -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr -> CoreExpr) +mkWWstr_one dflags fam_envs has_inlineable_prag arg | isTyVar arg = return (False, [arg], nop_fn, nop_fn) @@ -581,8 +597,10 @@ mkWWstr_one dflags fam_envs arg | isStrictDmd dmd , Just cs <- splitProdDmd_maybe dmd -- See Note [Unpacking arguments with product and polymorphic demands] + , not (has_inlineable_prag && isClassPred arg_ty) + -- See Note [Do not unpack class dictionaries] , Just (data_con, inst_tys, inst_con_arg_tys, co) - <- deepSplitProductType_maybe fam_envs (idType arg) + <- deepSplitProductType_maybe fam_envs arg_ty , cs `equalLength` inst_con_arg_tys -- See Note [mkWWstr and unsafeCoerce] = do { (uniq1:uniqs) <- getUniquesM @@ -594,7 +612,7 @@ mkWWstr_one dflags fam_envs arg -- in Simplify.hs; and see Trac #13890 rebox_fn = Let (NonRec arg_no_unf con_app) con_app = mkConApp2 data_con inst_tys unpk_args `mkCast` mkSymCo co - ; (_, worker_args, wrap_fn, work_fn) <- mkWWstr dflags fam_envs unpk_args + ; (_, worker_args, wrap_fn, work_fn) <- mkWWstr dflags fam_envs False unpk_args ; return (True, worker_args, unbox_fn . wrap_fn, work_fn . rebox_fn) } -- Don't pass the arg, rebox instead @@ -602,7 +620,8 @@ mkWWstr_one dflags fam_envs arg = return (False, [arg], nop_fn, nop_fn) where - dmd = idDemandInfo arg + arg_ty = idType arg + dmd = idDemandInfo arg mk_ww_arg uniq ty sub_dmd = setIdDemandInfo (mk_ww_local uniq ty) sub_dmd ---------------------- @@ -680,10 +699,12 @@ BUT if f is strict in the Ord dictionary, we might unpack it, to get and the type-class specialiser can't specialise that. An example is Trac #6056. -Moreover, dictionaries can have a lot of fields, so unpacking them can -increase closure sizes. +But in any other situation a dictionary is just an ordinary value, +and can be unpacked. So we track the INLINABLE pragma, and switch +off the unpacking in mkWWstr_one (see the isClassPred test). -Conclusion: don't unpack dictionaries. +Historical note: Trac #14955 describes how I got this fix wrong +the first time. -} deepSplitProductType_maybe @@ -699,7 +720,6 @@ deepSplitProductType_maybe fam_envs ty `orElse` (mkRepReflCo ty, ty) , Just (tc, tc_args) <- splitTyConApp_maybe ty1 , Just con <- isDataProductTyCon_maybe tc - , not (isClassTyCon tc) -- See Note [Do not unpack class dictionaries] , let arg_tys = dataConInstArgTys con tc_args strict_marks = dataConRepStrictness con = Just (con, tc_args, zipEqual "dspt" arg_tys strict_marks, co) diff --git a/testsuite/tests/ghci.debugger/scripts/break006.stdout b/testsuite/tests/ghci.debugger/scripts/break006.stdout index 8a2463fa4e..cd9f6ea1fe 100644 --- a/testsuite/tests/ghci.debugger/scripts/break006.stdout +++ b/testsuite/tests/ghci.debugger/scripts/break006.stdout @@ -4,14 +4,14 @@ f :: Integer -> a = _ x :: Integer = 1 xs :: [Integer] = [2,3] xs :: [Integer] = [2,3] -x :: Integer = 1 f :: Integer -> a = _ +x :: Integer = 1 _result :: [a] = _ y = (_t1::a) y = 2 xs :: [Integer] = [2,3] -x :: Integer = 1 f :: Integer -> Integer = _ +x :: Integer = 1 _result :: [Integer] = _ y :: Integer = 2 _t1 :: Integer = 2 diff --git a/testsuite/tests/ghci.debugger/scripts/hist001.stdout b/testsuite/tests/ghci.debugger/scripts/hist001.stdout index a19a34f315..b52e8aa5fe 100644 --- a/testsuite/tests/ghci.debugger/scripts/hist001.stdout +++ b/testsuite/tests/ghci.debugger/scripts/hist001.stdout @@ -20,8 +20,8 @@ _result :: a f :: Integer -> a x :: Integer xs :: [t] = [] -x :: Integer = 2 f :: Integer -> a = _ +x :: Integer = 2 _result :: a = _ _result = 3 Logged breakpoint at Test3.hs:2:18-31 diff --git a/testsuite/tests/ghci.debugger/scripts/hist002.stdout b/testsuite/tests/ghci.debugger/scripts/hist002.stdout index a19a34f315..b52e8aa5fe 100644 --- a/testsuite/tests/ghci.debugger/scripts/hist002.stdout +++ b/testsuite/tests/ghci.debugger/scripts/hist002.stdout @@ -20,8 +20,8 @@ _result :: a f :: Integer -> a x :: Integer xs :: [t] = [] -x :: Integer = 2 f :: Integer -> a = _ +x :: Integer = 2 _result :: a = _ _result = 3 Logged breakpoint at Test3.hs:2:18-31 diff --git a/testsuite/tests/indexed-types/should_compile/T7837.stderr b/testsuite/tests/indexed-types/should_compile/T7837.stderr index 6e0720e8d0..7900ce5ba2 100644 --- a/testsuite/tests/indexed-types/should_compile/T7837.stderr +++ b/testsuite/tests/indexed-types/should_compile/T7837.stderr @@ -2,3 +2,7 @@ Rule fired: Class op signum (BUILTIN) Rule fired: Class op abs (BUILTIN) Rule fired: Class op heq_sel (BUILTIN) Rule fired: normalize/Double (T7837) +Rule fired: Class op heq_sel (BUILTIN) +Rule fired: Class op $p1Norm (BUILTIN) +Rule fired: Class op / (BUILTIN) +Rule fired: Class op norm (BUILTIN) diff --git a/testsuite/tests/perf/should_run/T14955.hs b/testsuite/tests/perf/should_run/T14955.hs new file mode 100644 index 0000000000..d1b2206ba7 --- /dev/null +++ b/testsuite/tests/perf/should_run/T14955.hs @@ -0,0 +1,20 @@ +module Main where + +import T14955a + +--test1 :: [Bool] -> Bool +--test1 = ors + +--test2 :: [Bool] -> Bool +--test2 = dors boolDict + +--test2a :: [Bool] -> Bool +--test2a xs = dors boolDict xs + +test3 :: [Bool] -> Bool +test3 xs = pors xs + +--test4 :: [Bool] -> Bool +--test4 xs = porsProxy xs + +main = print (test3 (replicate 1000000 False)) diff --git a/testsuite/tests/perf/should_run/T14955.stdout b/testsuite/tests/perf/should_run/T14955.stdout new file mode 100644 index 0000000000..0519ecba6e --- /dev/null +++ b/testsuite/tests/perf/should_run/T14955.stdout @@ -0,0 +1 @@ +
\ No newline at end of file diff --git a/testsuite/tests/perf/should_run/T14955a.hs b/testsuite/tests/perf/should_run/T14955a.hs new file mode 100644 index 0000000000..2d77d8f2d4 --- /dev/null +++ b/testsuite/tests/perf/should_run/T14955a.hs @@ -0,0 +1,72 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE AllowAmbiguousTypes #-} +module T14955a where + +import Prelude (Bool(..), (||), (&&)) + +-- Implementation 1 + +class Prop r where + or :: r -> r -> r + and :: r -> r -> r + true :: r + false :: r + +instance Prop Bool where + or = (||) + and = (&&) + true = True + false = False + +-- Implementation 2 + +data PropDict r = PropDict { + dor :: r -> r -> r + , dand :: r -> r -> r + , dtrue :: r + , dfalse :: r + } + +boolDict = PropDict { + dor = (||) + , dand = (&&) + , dtrue = True + , dfalse = False } + +-- Implementation 3 + +class PropProxy r where + propDict :: PropDict r + +instance PropProxy Bool where + propDict = boolDict + +-- Implementation 4 + +class PropProxy2 r where + propDict2 :: PropDict r + dummy :: () + +instance PropProxy2 Bool where + propDict2 = boolDict + dummy = () + + +ors :: Prop r => [r] -> r +ors [] = true +ors (o:os) = o `or` ors os +{-# INLINABLE ors #-} + +dors :: PropDict r -> [r] -> r +dors pd [] = dtrue pd +dors pd (o:os) = dor pd o (dors pd os) + +pors :: PropProxy r => [r] -> r +pors [] = dtrue propDict +pors (o:os) = dor propDict o (pors os) +{-# INLINABLE pors #-} + +porsProxy :: PropProxy2 r => [r] -> r +porsProxy [] = dtrue propDict2 +porsProxy (o:os) = dor propDict2 o (porsProxy os) +{-# INLINABLE porsProxy #-} diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T index 20555a49ce..27405b0019 100644 --- a/testsuite/tests/perf/should_run/all.T +++ b/testsuite/tests/perf/should_run/all.T @@ -31,6 +31,15 @@ test('T10359', compile_and_run, ['-O']) +test('T14955', + [stats_num_field('bytes allocated', + [(wordsize(64), 48050760, 5), + (wordsize(32), 351508, 5)]), + only_ways(['normal']) + ], + multimod_compile_and_run, + ['T14955', '-O']) + # fortunately the values here are mostly independent of the wordsize, # because the test allocates an unboxed array of doubles. test('T3586', |