summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
authorSimon Peyton Jones <simonpj@microsoft.com>2013-05-03 14:50:58 +0100
committerSimon Peyton Jones <simonpj@microsoft.com>2013-06-06 14:29:53 +0100
commit99d4e5b4a0bd32813ff8c74e91d2dcf6b3555176 (patch)
tree62098e1b36c61fe1a978a29d955f57b629c5ec79 /compiler/codeGen
parentda4ff650ae77930a5a10d4886c8bc7d37f081db7 (diff)
downloadhaskell-99d4e5b4a0bd32813ff8c74e91d2dcf6b3555176.tar.gz
Implement cardinality analysis
This major patch implements the cardinality analysis described in our paper "Higher order cardinality analysis". It is joint work with Ilya Sergey and Dimitrios Vytiniotis. The basic is augment the absence-analysis part of the demand analyser so that it can tell when something is used never at most once some other way The "at most once" information is used a) to enable transformations, and in particular to identify one-shot lambdas b) to allow updates on thunks to be omitted. There are two new flags, mainly there so you can do performance comparisons: -fkill-absence stops GHC doing absence analysis at all -fkill-one-shot stops GHC spotting one-shot lambdas and single-entry thunks The big changes are: * The Demand type is substantially refactored. In particular the UseDmd is factored as follows data UseDmd = UCall Count UseDmd | UProd [MaybeUsed] | UHead | Used data MaybeUsed = Abs | Use Count UseDmd data Count = One | Many Notice that UCall recurses straight to UseDmd, whereas UProd goes via MaybeUsed. The "Count" embodies the "at most once" or "many" idea. * The demand analyser itself was refactored a lot * The previously ad-hoc stuff in the occurrence analyser for foldr and build goes away entirely. Before if we had build (\cn -> ...x... ) then the "\cn" was hackily made one-shot (by spotting 'build' as special. That's essential to allow x to be inlined. Now the occurrence analyser propagates info gotten from 'build's stricness signature (so build isn't special); and that strictness sig is in turn derived entirely automatically. Much nicer! * The ticky stuff is improved to count single-entry thunks separately. One shortcoming is that there is no DEBUG way to spot if an allegedly-single-entry thunk is acually entered more than once. It would not be hard to generate a bit of code to check for this, and it would be reassuring. But it's fiddly and I have not done it. Despite all this fuss, the performance numbers are rather under-whelming. See the paper for more discussion. nucleic2 -0.8% -10.9% 0.10 0.10 +0.0% sphere -0.7% -1.5% 0.08 0.08 +0.0% -------------------------------------------------------------------------------- Min -4.7% -10.9% -9.3% -9.3% -50.0% Max -0.4% +0.5% +2.2% +2.3% +7.4% Geometric Mean -0.8% -0.2% -1.3% -1.3% -1.8% I don't quite know how much credence to place in the runtime changes, but movement seems generally in the right direction.
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/StgCmmBind.hs44
-rw-r--r--compiler/codeGen/StgCmmTicky.hs25
2 files changed, 51 insertions, 18 deletions
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index 8d0a35ff4f..0cd9dd6579 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -130,8 +130,7 @@ cgBind (StgNonRec name rhs)
= do { (info, fcode) <- cgRhs name rhs
; addBindC (cg_id info) info
; init <- fcode
- ; emit init
- }
+ ; emit init }
-- init cannot be used in body, so slightly better to sink it eagerly
cgBind (StgRec pairs)
@@ -209,9 +208,34 @@ cgRhs id (StgRhsCon cc con args)
buildDynCon id True cc con args
cgRhs name (StgRhsClosure cc bi fvs upd_flag _srt args body)
+ | null fvs -- See Note [Nested constant closures]
+ = do { (info, fcode) <- cgTopRhsClosure Recursive name cc bi upd_flag args body
+ ; return (info, fcode >> return mkNop) }
+ | otherwise
= do dflags <- getDynFlags
mkRhsClosure dflags name cc bi (nonVoidIds fvs) upd_flag args body
+{- Note [Nested constant closures]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we have
+ f x = let funny = not True
+ in ...
+then 'funny' is a nested closure (compiled with cgRhs) that has no free vars.
+This does not happen often, because let-floating takes them all to top
+level; but it CAN happen. (Reason: let-floating may make a function f smaller
+so it can be inlined, so now (f True) may generate a local no-fv closure.
+This actually happened during bootsrapping GHC itself, with f=mkRdrFunBind
+in TcGenDeriv.)
+
+If we have one of these things, AND they allocate, the heap check will
+refer to the static funny_closure; but there isn't one! (Why does the
+heap check refer to the static closure? Becuase nodeMustPointToIt is
+False, which is fair enough.)
+
+Simple solution: compile the RHS as if it was top level. Then
+everything works. A minor benefit is eliminating the allocation code
+too. -}
+
------------------------------------------------------------------------
-- Non-constructor right hand sides
------------------------------------------------------------------------
@@ -547,8 +571,9 @@ thunkCode cl_info fv_details _cc node arity body
; entryHeapCheck cl_info node' arity [] $ do
{ -- Overwrite with black hole if necessary
-- but *after* the heap-overflow check
+ ; tickyEnterThunk cl_info
; when (blackHoleOnEntry cl_info && node_points)
- (blackHoleIt cl_info node)
+ (blackHoleIt node)
-- Push update frame
; setupUpdate cl_info node $
@@ -568,14 +593,14 @@ thunkCode cl_info fv_details _cc node arity body
-- Update and black-hole wrappers
------------------------------------------------------------------------
-blackHoleIt :: ClosureInfo -> LocalReg -> FCode ()
+blackHoleIt :: LocalReg -> FCode ()
-- Only called for closures with no args
-- Node points to the closure
-blackHoleIt closure_info node
- = emitBlackHoleCode (closureSingleEntry closure_info) (CmmReg (CmmLocal node))
+blackHoleIt node_reg
+ = emitBlackHoleCode (CmmReg (CmmLocal node_reg))
-emitBlackHoleCode :: Bool -> CmmExpr -> FCode ()
-emitBlackHoleCode is_single_entry node = do
+emitBlackHoleCode :: CmmExpr -> FCode ()
+emitBlackHoleCode node = do
dflags <- getDynFlags
-- Eager blackholing is normally disabled, but can be turned on with
@@ -603,7 +628,6 @@ emitBlackHoleCode is_single_entry node = do
-- work with profiling.
when eager_blackholing $ do
- tickyBlackHole (not is_single_entry)
emitStore (cmmOffsetW dflags node (fixedHdrSize dflags))
(CmmReg (CmmGlobal CurrentTSO))
emitPrimCall [] MO_WriteBarrier []
@@ -614,7 +638,7 @@ setupUpdate :: ClosureInfo -> LocalReg -> FCode () -> FCode ()
-- so that the cost centre in the original closure can still be
-- extracted by a subsequent enterCostCentre
setupUpdate closure_info node body
- | closureReEntrant closure_info
+ | not (lfUpdatable (closureLFInfo closure_info))
= body
| not (isStaticClosure closure_info)
diff --git a/compiler/codeGen/StgCmmTicky.hs b/compiler/codeGen/StgCmmTicky.hs
index 004c940d54..6ac0fe3c5e 100644
--- a/compiler/codeGen/StgCmmTicky.hs
+++ b/compiler/codeGen/StgCmmTicky.hs
@@ -133,7 +133,7 @@ import TyCon
import Data.Maybe
import qualified Data.Char
-import Control.Monad ( when )
+import Control.Monad ( unless, when )
-----------------------------------------------------------------------------
--
@@ -238,13 +238,22 @@ tickyEnterDynCon = ifTicky $ bumpTickyCounter (fsLit "ENT_DYN_CON_ctr")
tickyEnterStaticCon = ifTicky $ bumpTickyCounter (fsLit "ENT_STATIC_CON_ctr")
tickyEnterViaNode = ifTicky $ bumpTickyCounter (fsLit "ENT_VIA_NODE_ctr")
-tickyEnterThunk :: FCode ()
-tickyEnterThunk = ifTicky $ do
- bumpTickyCounter (fsLit "ENT_DYN_THK_ctr")
- ifTickyDynThunk $ do
- ticky_ctr_lbl <- getTickyCtrLabel
- registerTickyCtrAtEntryDyn ticky_ctr_lbl
- bumpTickyEntryCount ticky_ctr_lbl
+tickyEnterThunk :: ClosureInfo -> FCode ()
+tickyEnterThunk cl_info
+ = ifTicky $ do
+ { bumpTickyCounter ctr
+ ; unless static $ do
+ ticky_ctr_lbl <- getTickyCtrLabel
+ registerTickyCtrAtEntryDyn ticky_ctr_lbl
+ bumpTickyEntryCount ticky_ctr_lbl }
+ where
+ updatable = closureSingleEntry cl_info
+ static = isStaticClosure cl_info
+
+ ctr | static = if updatable then fsLit "ENT_STATIC_THK_SINGLE_ctr"
+ else fsLit "ENT_STATIC_THK_MANY_ctr"
+ | otherwise = if updatable then fsLit "ENT_DYN_THK_SINGLE_ctr"
+ else fsLit "ENT_DYN_THK_MANY_ctr"
tickyEnterStdThunk :: FCode ()
tickyEnterStdThunk = tickyEnterThunk