summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/stranal/WorkWrap.hs10
-rw-r--r--compiler/stranal/WwLib.hs64
-rw-r--r--testsuite/tests/ghci.debugger/scripts/break006.stdout4
-rw-r--r--testsuite/tests/ghci.debugger/scripts/hist001.stdout2
-rw-r--r--testsuite/tests/ghci.debugger/scripts/hist002.stdout2
-rw-r--r--testsuite/tests/indexed-types/should_compile/T7837.stderr4
-rw-r--r--testsuite/tests/perf/should_run/T14955.hs20
-rw-r--r--testsuite/tests/perf/should_run/T14955.stdout1
-rw-r--r--testsuite/tests/perf/should_run/T14955a.hs72
-rw-r--r--testsuite/tests/perf/should_run/all.T9
10 files changed, 156 insertions, 32 deletions
diff --git a/compiler/stranal/WorkWrap.hs b/compiler/stranal/WorkWrap.hs
index 9557cecdfe..8da2a1288a 100644
--- a/compiler/stranal/WorkWrap.hs
+++ b/compiler/stranal/WorkWrap.hs
@@ -494,8 +494,7 @@ splitFun :: DynFlags -> FamInstEnvs -> Id -> IdInfo -> [Demand] -> DmdResult ->
splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs
= WARN( not (wrap_dmds `lengthIs` arity), ppr fn_id <+> (ppr arity $$ ppr wrap_dmds $$ ppr res_info) ) do
-- The arity should match the signature
- stuff <- mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty
- wrap_dmds use_res_info
+ stuff <- mkWwBodies dflags fam_envs rhs_fvs fn_id wrap_dmds use_res_info
case stuff of
Just (work_demands, join_arity, wrap_fn, work_fn) -> do
work_uniq <- getUniqueM
@@ -527,7 +526,7 @@ splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs
`setInlinePragma` work_prag
- `setIdUnfolding` mkWorkerUnfolding dflags work_fn (unfoldingInfo fn_info)
+ `setIdUnfolding` mkWorkerUnfolding dflags work_fn fn_unfolding
-- See Note [Worker-wrapper for INLINABLE functions]
`setIdStrictness` mkClosedStrictSig work_demands work_res_info
@@ -576,13 +575,12 @@ splitFun dflags fam_envs fn_id fn_info wrap_dmds res_info rhs
Nothing -> return [(fn_id, rhs)]
where
- mb_join_arity = isJoinId_maybe fn_id
rhs_fvs = exprFreeVars rhs
- fun_ty = idType fn_id
fn_inl_prag = inlinePragInfo fn_info
fn_inline_spec = inl_inline fn_inl_prag
fn_act = inl_act fn_inl_prag
rule_match_info = inlinePragmaRuleMatchInfo fn_inl_prag
+ fn_unfolding = unfoldingInfo fn_info
arity = arityInfo fn_info
-- The arity is set by the simplifier using exprEtaExpandArity
-- So it may be more than the number of top-level-visible lambdas
@@ -691,7 +689,7 @@ then the splitting will go deeper too.
splitThunk :: DynFlags -> FamInstEnvs -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)]
splitThunk dflags fam_envs is_rec fn_id rhs
= ASSERT(not (isJoinId fn_id))
- do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs [fn_id]
+ do { (useful,_, wrap_fn, work_fn) <- mkWWstr dflags fam_envs False [fn_id]
; let res = [ (fn_id, Let (NonRec fn_id rhs) (wrap_fn (work_fn (Var fn_id)))) ]
; if useful then ASSERT2( isNonRec is_rec, ppr fn_id ) -- The thunk must be non-recursive
return res
diff --git a/compiler/stranal/WwLib.hs b/compiler/stranal/WwLib.hs
index 9d957c4251..ab0a4d1ee1 100644
--- a/compiler/stranal/WwLib.hs
+++ b/compiler/stranal/WwLib.hs
@@ -123,8 +123,7 @@ mkWwBodies :: DynFlags
-> FamInstEnvs
-> VarSet -- Free vars of RHS
-- See Note [Freshen WW arguments]
- -> Maybe JoinArity -- Just ar <=> is join point with join arity ar
- -> Type -- Type of original function
+ -> Id -- The original function
-> [Demand] -- Strictness of original function
-> DmdResult -- Info about function result
-> UniqSM (Maybe WwResult)
@@ -140,12 +139,14 @@ mkWwBodies :: DynFlags
-- let x = (a,b) in
-- E
-mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info
+mkWwBodies dflags fam_envs rhs_fvs fun_id demands res_info
= do { let empty_subst = mkEmptyTCvSubst (mkInScopeSet rhs_fvs)
-- See Note [Freshen WW arguments]
- ; (wrap_args, wrap_fn_args, work_fn_args, res_ty) <- mkWWargs empty_subst fun_ty demands
- ; (useful1, work_args, wrap_fn_str, work_fn_str) <- mkWWstr dflags fam_envs wrap_args
+ ; (wrap_args, wrap_fn_args, work_fn_args, res_ty)
+ <- mkWWargs empty_subst fun_ty demands
+ ; (useful1, work_args, wrap_fn_str, work_fn_str)
+ <- mkWWstr dflags fam_envs has_inlineable_prag wrap_args
-- Do CPR w/w. See Note [Always do CPR w/w]
; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty)
@@ -158,7 +159,7 @@ mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info
; if isWorkerSmallEnough dflags work_args
&& not (too_many_args_for_join_point wrap_args)
- && (useful1 && not only_one_void_argument || useful2)
+ && ((useful1 && not only_one_void_argument) || useful2)
then return (Just (worker_args_dmds, length work_call_args,
wrapper_body, worker_body))
else return Nothing
@@ -171,6 +172,11 @@ mkWwBodies dflags fam_envs rhs_fvs mb_join_arity fun_ty demands res_info
-- f's RHS is now trivial (size 1) we still want the __inline__ to prevent
-- fw from being inlined into f's RHS
where
+ fun_ty = idType fun_id
+ mb_join_arity = isJoinId_maybe fun_id
+ has_inlineable_prag = isStableUnfolding (realIdUnfolding fun_id)
+ -- See Note [Do not unpack class dictionaries]
+
-- Note [Do not split void functions]
only_one_void_argument
| [d] <- demands
@@ -490,6 +496,8 @@ To avoid this:
mkWWstr :: DynFlags
-> FamInstEnvs
+ -> Bool -- True <=> INLINEABLE pragama on this function defn
+ -- See Note [Do not unpack class dictionaries]
-> [Var] -- Wrapper args; have their demand info on them
-- *Includes type variables*
-> UniqSM (Bool, -- Is this useful
@@ -501,13 +509,18 @@ mkWWstr :: DynFlags
CoreExpr -> CoreExpr) -- Worker body, lacking the original body of the function,
-- and lacking its lambdas.
-- This fn does the reboxing
-mkWWstr _ _ []
- = return (False, [], nop_fn, nop_fn)
+mkWWstr dflags fam_envs has_inlineable_prag args
+ = go args
+ where
+ go_one arg = mkWWstr_one dflags fam_envs has_inlineable_prag arg
-mkWWstr dflags fam_envs (arg : args) = do
- (useful1, args1, wrap_fn1, work_fn1) <- mkWWstr_one dflags fam_envs arg
- (useful2, args2, wrap_fn2, work_fn2) <- mkWWstr dflags fam_envs args
- return (useful1 || useful2, args1 ++ args2, wrap_fn1 . wrap_fn2, work_fn1 . work_fn2)
+ go [] = return (False, [], nop_fn, nop_fn)
+ go (arg : args) = do { (useful1, args1, wrap_fn1, work_fn1) <- go_one arg
+ ; (useful2, args2, wrap_fn2, work_fn2) <- go args
+ ; return ( useful1 || useful2
+ , args1 ++ args2
+ , wrap_fn1 . wrap_fn2
+ , work_fn1 . work_fn2) }
{-
Note [Unpacking arguments with product and polymorphic demands]
@@ -544,9 +557,12 @@ as-yet-un-filled-in pkgState files.
-- brings into scope work_args (via cases)
-- * work_fn assumes work_args are in scope, a
-- brings into scope wrap_arg (via lets)
-mkWWstr_one :: DynFlags -> FamInstEnvs -> Var
- -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr -> CoreExpr)
-mkWWstr_one dflags fam_envs arg
+mkWWstr_one :: DynFlags -> FamInstEnvs
+ -> Bool -- True <=> INLINEABLE pragama on this function defn
+ -- See Note [Do not unpack class dictionaries]
+ -> Var
+ -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr -> CoreExpr)
+mkWWstr_one dflags fam_envs has_inlineable_prag arg
| isTyVar arg
= return (False, [arg], nop_fn, nop_fn)
@@ -581,8 +597,10 @@ mkWWstr_one dflags fam_envs arg
| isStrictDmd dmd
, Just cs <- splitProdDmd_maybe dmd
-- See Note [Unpacking arguments with product and polymorphic demands]
+ , not (has_inlineable_prag && isClassPred arg_ty)
+ -- See Note [Do not unpack class dictionaries]
, Just (data_con, inst_tys, inst_con_arg_tys, co)
- <- deepSplitProductType_maybe fam_envs (idType arg)
+ <- deepSplitProductType_maybe fam_envs arg_ty
, cs `equalLength` inst_con_arg_tys
-- See Note [mkWWstr and unsafeCoerce]
= do { (uniq1:uniqs) <- getUniquesM
@@ -594,7 +612,7 @@ mkWWstr_one dflags fam_envs arg
-- in Simplify.hs; and see Trac #13890
rebox_fn = Let (NonRec arg_no_unf con_app)
con_app = mkConApp2 data_con inst_tys unpk_args `mkCast` mkSymCo co
- ; (_, worker_args, wrap_fn, work_fn) <- mkWWstr dflags fam_envs unpk_args
+ ; (_, worker_args, wrap_fn, work_fn) <- mkWWstr dflags fam_envs False unpk_args
; return (True, worker_args, unbox_fn . wrap_fn, work_fn . rebox_fn) }
-- Don't pass the arg, rebox instead
@@ -602,7 +620,8 @@ mkWWstr_one dflags fam_envs arg
= return (False, [arg], nop_fn, nop_fn)
where
- dmd = idDemandInfo arg
+ arg_ty = idType arg
+ dmd = idDemandInfo arg
mk_ww_arg uniq ty sub_dmd = setIdDemandInfo (mk_ww_local uniq ty) sub_dmd
----------------------
@@ -680,10 +699,12 @@ BUT if f is strict in the Ord dictionary, we might unpack it, to get
and the type-class specialiser can't specialise that. An example is
Trac #6056.
-Moreover, dictionaries can have a lot of fields, so unpacking them can
-increase closure sizes.
+But in any other situation a dictionary is just an ordinary value,
+and can be unpacked. So we track the INLINABLE pragma, and switch
+off the unpacking in mkWWstr_one (see the isClassPred test).
-Conclusion: don't unpack dictionaries.
+Historical note: Trac #14955 describes how I got this fix wrong
+the first time.
-}
deepSplitProductType_maybe
@@ -699,7 +720,6 @@ deepSplitProductType_maybe fam_envs ty
`orElse` (mkRepReflCo ty, ty)
, Just (tc, tc_args) <- splitTyConApp_maybe ty1
, Just con <- isDataProductTyCon_maybe tc
- , not (isClassTyCon tc) -- See Note [Do not unpack class dictionaries]
, let arg_tys = dataConInstArgTys con tc_args
strict_marks = dataConRepStrictness con
= Just (con, tc_args, zipEqual "dspt" arg_tys strict_marks, co)
diff --git a/testsuite/tests/ghci.debugger/scripts/break006.stdout b/testsuite/tests/ghci.debugger/scripts/break006.stdout
index 8a2463fa4e..cd9f6ea1fe 100644
--- a/testsuite/tests/ghci.debugger/scripts/break006.stdout
+++ b/testsuite/tests/ghci.debugger/scripts/break006.stdout
@@ -4,14 +4,14 @@ f :: Integer -> a = _
x :: Integer = 1
xs :: [Integer] = [2,3]
xs :: [Integer] = [2,3]
-x :: Integer = 1
f :: Integer -> a = _
+x :: Integer = 1
_result :: [a] = _
y = (_t1::a)
y = 2
xs :: [Integer] = [2,3]
-x :: Integer = 1
f :: Integer -> Integer = _
+x :: Integer = 1
_result :: [Integer] = _
y :: Integer = 2
_t1 :: Integer = 2
diff --git a/testsuite/tests/ghci.debugger/scripts/hist001.stdout b/testsuite/tests/ghci.debugger/scripts/hist001.stdout
index a19a34f315..b52e8aa5fe 100644
--- a/testsuite/tests/ghci.debugger/scripts/hist001.stdout
+++ b/testsuite/tests/ghci.debugger/scripts/hist001.stdout
@@ -20,8 +20,8 @@ _result :: a
f :: Integer -> a
x :: Integer
xs :: [t] = []
-x :: Integer = 2
f :: Integer -> a = _
+x :: Integer = 2
_result :: a = _
_result = 3
Logged breakpoint at Test3.hs:2:18-31
diff --git a/testsuite/tests/ghci.debugger/scripts/hist002.stdout b/testsuite/tests/ghci.debugger/scripts/hist002.stdout
index a19a34f315..b52e8aa5fe 100644
--- a/testsuite/tests/ghci.debugger/scripts/hist002.stdout
+++ b/testsuite/tests/ghci.debugger/scripts/hist002.stdout
@@ -20,8 +20,8 @@ _result :: a
f :: Integer -> a
x :: Integer
xs :: [t] = []
-x :: Integer = 2
f :: Integer -> a = _
+x :: Integer = 2
_result :: a = _
_result = 3
Logged breakpoint at Test3.hs:2:18-31
diff --git a/testsuite/tests/indexed-types/should_compile/T7837.stderr b/testsuite/tests/indexed-types/should_compile/T7837.stderr
index 6e0720e8d0..7900ce5ba2 100644
--- a/testsuite/tests/indexed-types/should_compile/T7837.stderr
+++ b/testsuite/tests/indexed-types/should_compile/T7837.stderr
@@ -2,3 +2,7 @@ Rule fired: Class op signum (BUILTIN)
Rule fired: Class op abs (BUILTIN)
Rule fired: Class op heq_sel (BUILTIN)
Rule fired: normalize/Double (T7837)
+Rule fired: Class op heq_sel (BUILTIN)
+Rule fired: Class op $p1Norm (BUILTIN)
+Rule fired: Class op / (BUILTIN)
+Rule fired: Class op norm (BUILTIN)
diff --git a/testsuite/tests/perf/should_run/T14955.hs b/testsuite/tests/perf/should_run/T14955.hs
new file mode 100644
index 0000000000..d1b2206ba7
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T14955.hs
@@ -0,0 +1,20 @@
+module Main where
+
+import T14955a
+
+--test1 :: [Bool] -> Bool
+--test1 = ors
+
+--test2 :: [Bool] -> Bool
+--test2 = dors boolDict
+
+--test2a :: [Bool] -> Bool
+--test2a xs = dors boolDict xs
+
+test3 :: [Bool] -> Bool
+test3 xs = pors xs
+
+--test4 :: [Bool] -> Bool
+--test4 xs = porsProxy xs
+
+main = print (test3 (replicate 1000000 False))
diff --git a/testsuite/tests/perf/should_run/T14955.stdout b/testsuite/tests/perf/should_run/T14955.stdout
new file mode 100644
index 0000000000..0519ecba6e
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T14955.stdout
@@ -0,0 +1 @@
+ \ No newline at end of file
diff --git a/testsuite/tests/perf/should_run/T14955a.hs b/testsuite/tests/perf/should_run/T14955a.hs
new file mode 100644
index 0000000000..2d77d8f2d4
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T14955a.hs
@@ -0,0 +1,72 @@
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE AllowAmbiguousTypes #-}
+module T14955a where
+
+import Prelude (Bool(..), (||), (&&))
+
+-- Implementation 1
+
+class Prop r where
+ or :: r -> r -> r
+ and :: r -> r -> r
+ true :: r
+ false :: r
+
+instance Prop Bool where
+ or = (||)
+ and = (&&)
+ true = True
+ false = False
+
+-- Implementation 2
+
+data PropDict r = PropDict {
+ dor :: r -> r -> r
+ , dand :: r -> r -> r
+ , dtrue :: r
+ , dfalse :: r
+ }
+
+boolDict = PropDict {
+ dor = (||)
+ , dand = (&&)
+ , dtrue = True
+ , dfalse = False }
+
+-- Implementation 3
+
+class PropProxy r where
+ propDict :: PropDict r
+
+instance PropProxy Bool where
+ propDict = boolDict
+
+-- Implementation 4
+
+class PropProxy2 r where
+ propDict2 :: PropDict r
+ dummy :: ()
+
+instance PropProxy2 Bool where
+ propDict2 = boolDict
+ dummy = ()
+
+
+ors :: Prop r => [r] -> r
+ors [] = true
+ors (o:os) = o `or` ors os
+{-# INLINABLE ors #-}
+
+dors :: PropDict r -> [r] -> r
+dors pd [] = dtrue pd
+dors pd (o:os) = dor pd o (dors pd os)
+
+pors :: PropProxy r => [r] -> r
+pors [] = dtrue propDict
+pors (o:os) = dor propDict o (pors os)
+{-# INLINABLE pors #-}
+
+porsProxy :: PropProxy2 r => [r] -> r
+porsProxy [] = dtrue propDict2
+porsProxy (o:os) = dor propDict2 o (porsProxy os)
+{-# INLINABLE porsProxy #-}
diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T
index 20555a49ce..27405b0019 100644
--- a/testsuite/tests/perf/should_run/all.T
+++ b/testsuite/tests/perf/should_run/all.T
@@ -31,6 +31,15 @@ test('T10359',
compile_and_run,
['-O'])
+test('T14955',
+ [stats_num_field('bytes allocated',
+ [(wordsize(64), 48050760, 5),
+ (wordsize(32), 351508, 5)]),
+ only_ways(['normal'])
+ ],
+ multimod_compile_and_run,
+ ['T14955', '-O'])
+
# fortunately the values here are mostly independent of the wordsize,
# because the test allocates an unboxed array of doubles.
test('T3586',