summaryrefslogtreecommitdiff
path: root/compiler/GHC/CoreToStg
diff options
context:
space:
mode:
authorÖmer Sinan Ağacan <omeragacan@gmail.com>2019-10-14 15:44:58 +0300
committerÖmer Sinan Ağacan <omeragacan@gmail.com>2020-01-31 12:21:10 +0300
commitc846618ae0f8601515683a4c7677c20c3272a50f (patch)
tree22caa68b7c6cdf3464d078e556f7eac19400b0bf /compiler/GHC/CoreToStg
parent01b15b835a7555c501df862b4dc8cc8eaff86afc (diff)
downloadhaskell-c846618ae0f8601515683a4c7677c20c3272a50f.tar.gz
Do CafInfo/SRT analysis in Cmm
This patch removes all CafInfo predictions and various hacks to preserve predicted CafInfos from the compiler and assigns final CafInfos to interface Ids after code generation. SRT analysis is extended to support static data, and Cmm generator is modified to allow generating static_link fields after SRT analysis. This also fixes `-fcatch-bottoms`, which introduces error calls in case expressions in CorePrep, which runs *after* CoreTidy (which is where we decide on CafInfos) and turns previously non-CAFFY things into CAFFY. Fixes #17648 Fixes #9718 Evaluation ========== NoFib ----- Boot with: `make boot mode=fast` Run: `make mode=fast EXTRA_RUNTEST_OPTS="-cachegrind" NoFibRuns=1` -------------------------------------------------------------------------------- Program Size Allocs Instrs Reads Writes -------------------------------------------------------------------------------- CS -0.0% 0.0% -0.0% -0.0% -0.0% CSD -0.0% 0.0% -0.0% -0.0% -0.0% FS -0.0% 0.0% -0.0% -0.0% -0.0% S -0.0% 0.0% -0.0% -0.0% -0.0% VS -0.0% 0.0% -0.0% -0.0% -0.0% VSD -0.0% 0.0% -0.0% -0.0% -0.5% VSM -0.0% 0.0% -0.0% -0.0% -0.0% anna -0.1% 0.0% -0.0% -0.0% -0.0% ansi -0.0% 0.0% -0.0% -0.0% -0.0% atom -0.0% 0.0% -0.0% -0.0% -0.0% awards -0.0% 0.0% -0.0% -0.0% -0.0% banner -0.0% 0.0% -0.0% -0.0% -0.0% bernouilli -0.0% 0.0% -0.0% -0.0% -0.0% binary-trees -0.0% 0.0% -0.0% -0.0% -0.0% boyer -0.0% 0.0% -0.0% -0.0% -0.0% boyer2 -0.0% 0.0% -0.0% -0.0% -0.0% bspt -0.0% 0.0% -0.0% -0.0% -0.0% cacheprof -0.0% 0.0% -0.0% -0.0% -0.0% calendar -0.0% 0.0% -0.0% -0.0% -0.0% cichelli -0.0% 0.0% -0.0% -0.0% -0.0% circsim -0.0% 0.0% -0.0% -0.0% -0.0% clausify -0.0% 0.0% -0.0% -0.0% -0.0% comp_lab_zift -0.0% 0.0% -0.0% -0.0% -0.0% compress -0.0% 0.0% -0.0% -0.0% -0.0% compress2 -0.0% 0.0% -0.0% -0.0% -0.0% constraints -0.0% 0.0% -0.0% -0.0% -0.0% cryptarithm1 -0.0% 0.0% -0.0% -0.0% -0.0% cryptarithm2 -0.0% 0.0% -0.0% -0.0% -0.0% cse -0.0% 0.0% -0.0% -0.0% -0.0% digits-of-e1 -0.0% 0.0% -0.0% -0.0% -0.0% digits-of-e2 -0.0% 0.0% -0.0% -0.0% -0.0% dom-lt -0.0% 0.0% -0.0% -0.0% -0.0% eliza -0.0% 0.0% -0.0% -0.0% -0.0% event -0.0% 0.0% -0.0% -0.0% -0.0% exact-reals -0.0% 0.0% -0.0% -0.0% -0.0% exp3_8 -0.0% 0.0% -0.0% -0.0% -0.0% expert -0.0% 0.0% -0.0% -0.0% -0.0% fannkuch-redux -0.0% 0.0% -0.0% -0.0% -0.0% fasta -0.0% 0.0% -0.0% -0.0% -0.0% fem -0.0% 0.0% -0.0% -0.0% -0.0% fft -0.0% 0.0% -0.0% -0.0% -0.0% fft2 -0.0% 0.0% -0.0% -0.0% -0.0% fibheaps -0.0% 0.0% -0.0% -0.0% -0.0% fish -0.0% 0.0% -0.0% -0.0% -0.0% fluid -0.1% 0.0% -0.0% -0.0% -0.0% fulsom -0.0% 0.0% -0.0% -0.0% -0.0% gamteb -0.0% 0.0% -0.0% -0.0% -0.0% gcd -0.0% 0.0% -0.0% -0.0% -0.0% gen_regexps -0.0% 0.0% -0.0% -0.0% -0.0% genfft -0.0% 0.0% -0.0% -0.0% -0.0% gg -0.0% 0.0% -0.0% -0.0% -0.0% grep -0.0% 0.0% -0.0% -0.0% -0.0% hidden -0.0% 0.0% -0.0% -0.0% -0.0% hpg -0.1% 0.0% -0.0% -0.0% -0.0% ida -0.0% 0.0% -0.0% -0.0% -0.0% infer -0.0% 0.0% -0.0% -0.0% -0.0% integer -0.0% 0.0% -0.0% -0.0% -0.0% integrate -0.0% 0.0% -0.0% -0.0% -0.0% k-nucleotide -0.0% 0.0% -0.0% -0.0% -0.0% kahan -0.0% 0.0% -0.0% -0.0% -0.0% knights -0.0% 0.0% -0.0% -0.0% -0.0% lambda -0.0% 0.0% -0.0% -0.0% -0.0% last-piece -0.0% 0.0% -0.0% -0.0% -0.0% lcss -0.0% 0.0% -0.0% -0.0% -0.0% life -0.0% 0.0% -0.0% -0.0% -0.0% lift -0.0% 0.0% -0.0% -0.0% -0.0% linear -0.1% 0.0% -0.0% -0.0% -0.0% listcompr -0.0% 0.0% -0.0% -0.0% -0.0% listcopy -0.0% 0.0% -0.0% -0.0% -0.0% maillist -0.0% 0.0% -0.0% -0.0% -0.0% mandel -0.0% 0.0% -0.0% -0.0% -0.0% mandel2 -0.0% 0.0% -0.0% -0.0% -0.0% mate -0.0% 0.0% -0.0% -0.0% -0.0% minimax -0.0% 0.0% -0.0% -0.0% -0.0% mkhprog -0.0% 0.0% -0.0% -0.0% -0.0% multiplier -0.0% 0.0% -0.0% -0.0% -0.0% n-body -0.0% 0.0% -0.0% -0.0% -0.0% nucleic2 -0.0% 0.0% -0.0% -0.0% -0.0% para -0.0% 0.0% -0.0% -0.0% -0.0% paraffins -0.0% 0.0% -0.0% -0.0% -0.0% parser -0.1% 0.0% -0.0% -0.0% -0.0% parstof -0.1% 0.0% -0.0% -0.0% -0.0% pic -0.0% 0.0% -0.0% -0.0% -0.0% pidigits -0.0% 0.0% -0.0% -0.0% -0.0% power -0.0% 0.0% -0.0% -0.0% -0.0% pretty -0.0% 0.0% -0.3% -0.4% -0.4% primes -0.0% 0.0% -0.0% -0.0% -0.0% primetest -0.0% 0.0% -0.0% -0.0% -0.0% prolog -0.0% 0.0% -0.0% -0.0% -0.0% puzzle -0.0% 0.0% -0.0% -0.0% -0.0% queens -0.0% 0.0% -0.0% -0.0% -0.0% reptile -0.0% 0.0% -0.0% -0.0% -0.0% reverse-complem -0.0% 0.0% -0.0% -0.0% -0.0% rewrite -0.0% 0.0% -0.0% -0.0% -0.0% rfib -0.0% 0.0% -0.0% -0.0% -0.0% rsa -0.0% 0.0% -0.0% -0.0% -0.0% scc -0.0% 0.0% -0.3% -0.5% -0.4% sched -0.0% 0.0% -0.0% -0.0% -0.0% scs -0.0% 0.0% -0.0% -0.0% -0.0% simple -0.1% 0.0% -0.0% -0.0% -0.0% solid -0.0% 0.0% -0.0% -0.0% -0.0% sorting -0.0% 0.0% -0.0% -0.0% -0.0% spectral-norm -0.0% 0.0% -0.0% -0.0% -0.0% sphere -0.0% 0.0% -0.0% -0.0% -0.0% symalg -0.0% 0.0% -0.0% -0.0% -0.0% tak -0.0% 0.0% -0.0% -0.0% -0.0% transform -0.0% 0.0% -0.0% -0.0% -0.0% treejoin -0.0% 0.0% -0.0% -0.0% -0.0% typecheck -0.0% 0.0% -0.0% -0.0% -0.0% veritas -0.0% 0.0% -0.0% -0.0% -0.0% wang -0.0% 0.0% -0.0% -0.0% -0.0% wave4main -0.0% 0.0% -0.0% -0.0% -0.0% wheel-sieve1 -0.0% 0.0% -0.0% -0.0% -0.0% wheel-sieve2 -0.0% 0.0% -0.0% -0.0% -0.0% x2n1 -0.0% 0.0% -0.0% -0.0% -0.0% -------------------------------------------------------------------------------- Min -0.1% 0.0% -0.3% -0.5% -0.5% Max -0.0% 0.0% -0.0% -0.0% -0.0% Geometric Mean -0.0% -0.0% -0.0% -0.0% -0.0% -------------------------------------------------------------------------------- Program Size Allocs Instrs Reads Writes -------------------------------------------------------------------------------- circsim -0.1% 0.0% -0.0% -0.0% -0.0% constraints -0.0% 0.0% -0.0% -0.0% -0.0% fibheaps -0.0% 0.0% -0.0% -0.0% -0.0% gc_bench -0.0% 0.0% -0.0% -0.0% -0.0% hash -0.0% 0.0% -0.0% -0.0% -0.0% lcss -0.0% 0.0% -0.0% -0.0% -0.0% power -0.0% 0.0% -0.0% -0.0% -0.0% spellcheck -0.0% 0.0% -0.0% -0.0% -0.0% -------------------------------------------------------------------------------- Min -0.1% 0.0% -0.0% -0.0% -0.0% Max -0.0% 0.0% -0.0% -0.0% -0.0% Geometric Mean -0.0% +0.0% -0.0% -0.0% -0.0% Manual inspection of programs in testsuite/tests/programs --------------------------------------------------------- I built these programs with a bunch of dump flags and `-O` and compared STG, Cmm, and Asm dumps and file sizes. (Below the numbers in parenthesis show number of modules in the program) These programs have identical compiler (same .hi and .o sizes, STG, and Cmm and Asm dumps): - Queens (1), andre_monad (1), cholewo-eval (2), cvh_unboxing (3), andy_cherry (7), fun_insts (1), hs-boot (4), fast2haskell (2), jl_defaults (1), jq_readsPrec (1), jules_xref (1), jtod_circint (4), jules_xref2 (1), lennart_range (1), lex (1), life_space_leak (1), bargon-mangler-bug (7), record_upd (1), rittri (1), sanders_array (1), strict_anns (1), thurston-module-arith (2), okeefe_neural (1), joao-circular (6), 10queens (1) Programs with different compiler outputs: - jl_defaults (1): For some reason GHC HEAD marks a lot of top-level `[Int]` closures as CAFFY for no reason. With this patch we no longer make them CAFFY and generate less SRT entries. For some reason Main.o is slightly larger with this patch (1.3%) and the executable sizes are the same. (I'd expect both to be smaller) - launchbury (1): Same as jl_defaults: top-level `[Int]` closures marked as CAFFY for no reason. Similarly `Main.o` is 1.4% larger but the executable sizes are the same. - galois_raytrace (13): Differences are in the Parse module. There are a lot, but some of the changes are caused by the fact that for some reason (I think a bug) GHC HEAD marks the dictionary for `Functor Identity` as CAFFY. Parse.o is 0.4% larger, the executable size is the same. - north_array: We now generate less SRT entries because some of array primops used in this program like `NewArrayOp` get eliminated during Stg-to-Cmm and turn some CAFFY things into non-CAFFY. Main.o gets 24% larger (9224 bytes from 9000 bytes), executable sizes are the same. - seward-space-leak: Difference in this program is better shown by this smaller example: module Lib where data CDS = Case [CDS] [(Int, CDS)] | Call CDS CDS instance Eq CDS where Case sels1 rets1 == Case sels2 rets2 = sels1 == sels2 && rets1 == rets2 Call a1 b1 == Call a2 b2 = a1 == a2 && b1 == b2 _ == _ = False In this program GHC HEAD builds a new SRT for the recursive group of `(==)`, `(/=)` and the dictionary closure. Then `/=` points to `==` in its SRT field, and `==` uses the SRT object as its SRT. With this patch we use the closure for `/=` as the SRT and add `==` there. Then `/=` gets an empty SRT field and `==` points to `/=` in its SRT field. This change looks fine to me. Main.o gets 0.07% larger, executable sizes are identical. head.hackage ------------ head.hackage's CI script builds 428 packages from Hackage using this patch with no failures. Compiler performance -------------------- The compiler perf tests report that the compiler allocates slightly more (worst case observed so far is 4%). However most programs in the test suite are small, single file programs. To benchmark compiler performance on something more realistic I build Cabal (the library, 236 modules) with different optimisation levels. For the "max residency" row I run GHC with `+RTS -s -A100k -i0 -h` for more accurate numbers. Other rows are generated with just `-s`. (This is because `-i0` causes running GC much more frequently and as a result "bytes copied" gets inflated by more than 25x in some cases) * -O0 | | GHC HEAD | This MR | Diff | | --------------- | -------------- | -------------- | ------ | | Bytes allocated | 54,413,350,872 | 54,701,099,464 | +0.52% | | Bytes copied | 4,926,037,184 | 4,990,638,760 | +1.31% | | Max residency | 421,225,624 | 424,324,264 | +0.73% | * -O1 | | GHC HEAD | This MR | Diff | | --------------- | --------------- | --------------- | ------ | | Bytes allocated | 245,849,209,992 | 246,562,088,672 | +0.28% | | Bytes copied | 26,943,452,560 | 27,089,972,296 | +0.54% | | Max residency | 982,643,440 | 991,663,432 | +0.91% | * -O2 | | GHC HEAD | This MR | Diff | | --------------- | --------------- | --------------- | ------ | | Bytes allocated | 291,044,511,408 | 291,863,910,912 | +0.28% | | Bytes copied | 37,044,237,616 | 36,121,690,472 | -2.49% | | Max residency | 1,071,600,328 | 1,086,396,256 | +1.38% | Extra compiler allocations -------------------------- Runtime allocations of programs are as reported above (NoFib section). The compiler now allocates more than before. Main source of allocation in this patch compared to base commit is the new SRT algorithm (GHC.Cmm.Info.Build). Below is some of the extra work we do with this patch, numbers generated by profiled stage 2 compiler when building a pathological case (the test 'ManyConstructors') with '-O2': - We now sort the final STG for a module, which means traversing the entire program, generating free variable set for each top-level binding, doing SCC analysis, and re-ordering the program. In ManyConstructors this step allocates 97,889,952 bytes. - We now do SRT analysis on static data, which in a program like ManyConstructors causes analysing 10,000 bindings that we would previously just skip. This step allocates 70,898,352 bytes. - We now maintain an SRT map for the entire module as we compile Cmm groups: data ModuleSRTInfo = ModuleSRTInfo { ... , moduleSRTMap :: SRTMap } (SRTMap is just a strict Map from the 'containers' library) This map gets an entry for most bindings in a module (exceptions are THUNKs and CAFFY static functions). For ManyConstructors this map gets 50015 entries. - Once we're done with code generation we generate a NameSet from SRTMap for the non-CAFFY names in the current module. This set gets the same number of entries as the SRTMap. - Finally we update CafInfos in ModDetails for the non-CAFFY Ids, using the NameSet generated in the previous step. This usually does the least amount of allocation among the work listed here. Only place with this patch where we do less work in the CAF analysis in the tidying pass (CoreTidy). However that doesn't save us much, as the pass still needs to traverse the whole program and update IdInfos for other reasons. Only thing we don't here do is the `hasCafRefs` pass over the RHS of bindings, which is a stateless pass that returns a boolean value, so it doesn't allocate much. (Metric changes blow are all increased allocations) Metric changes -------------- Metric Increase: ManyAlternatives ManyConstructors T13035 T14683 T1969 T9961
Diffstat (limited to 'compiler/GHC/CoreToStg')
-rw-r--r--compiler/GHC/CoreToStg/Prep.hs103
1 files changed, 16 insertions, 87 deletions
diff --git a/compiler/GHC/CoreToStg/Prep.hs b/compiler/GHC/CoreToStg/Prep.hs
index 14716081d4..59de501fa8 100644
--- a/compiler/GHC/CoreToStg/Prep.hs
+++ b/compiler/GHC/CoreToStg/Prep.hs
@@ -30,7 +30,6 @@ import CoreFVs
import CoreMonad ( CoreToDo(..) )
import CoreLint ( endPassIO )
import CoreSyn
-import CoreSubst
import MkCore hiding( FloatBind(..) ) -- We use our own FloatBind here
import Type
import Literal
@@ -54,13 +53,11 @@ import ErrUtils
import DynFlags
import Util
import Outputable
-import GHC.Platform
import FastString
import Name ( NamedThing(..), nameSrcSpan )
import SrcLoc ( SrcSpan(..), realSrcLocSpan, mkRealSrcLoc )
import Data.Bits
import MonadUtils ( mapAccumLM )
-import Data.List ( mapAccumL )
import Control.Monad
import CostCentre ( CostCentre, ccFromThisModule )
import qualified Data.Set as S
@@ -266,40 +263,6 @@ where x is demanded, in which case we want to finish with
x* = f a
And then x will actually end up case-bound
-Note [CafInfo and floating]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-What happens when we try to float bindings to the top level? At this
-point all the CafInfo is supposed to be correct, and we must make certain
-that is true of the new top-level bindings. There are two cases
-to consider
-
-a) The top-level binding is marked asCafRefs. In that case we are
- basically fine. The floated bindings had better all be lazy lets,
- so they can float to top level, but they'll all have HasCafRefs
- (the default) which is safe.
-
-b) The top-level binding is marked NoCafRefs. This really happens
- Example. CoreTidy produces
- $fApplicativeSTM [NoCafRefs] = D:Alternative retry# ...blah...
- Now CorePrep has to eta-expand to
- $fApplicativeSTM = let sat = \xy. retry x y
- in D:Alternative sat ...blah...
- So what we *want* is
- sat [NoCafRefs] = \xy. retry x y
- $fApplicativeSTM [NoCafRefs] = D:Alternative sat ...blah...
-
- So, gruesomely, we must set the NoCafRefs flag on the sat bindings,
- *and* substitute the modified 'sat' into the old RHS.
-
- It should be the case that 'sat' is itself [NoCafRefs] (a value, no
- cafs) else the original top-level binding would not itself have been
- marked [NoCafRefs]. The DEBUG check in CoreToStg for
- consistentCafInfo will find this.
-
-This is all very gruesome and horrible. It would be better to figure
-out CafInfo later, after CorePrep. We'll do that in due course.
-Meanwhile this horrible hack works.
-
Note [Join points and floating]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Join points can float out of other join points but not out of value bindings:
@@ -503,8 +466,6 @@ cpePair top_lvl is_rec dmd is_unlifted env bndr rhs
; return (floats4, rhs4) }
where
- platform = targetPlatform (cpe_dynFlags env)
-
arity = idArity bndr -- We must match this arity
---------------------
@@ -520,14 +481,12 @@ cpePair top_lvl is_rec dmd is_unlifted env bndr rhs
| otherwise = dontFloat floats rhs
---------------------
- float_top floats rhs -- Urhgh! See Note [CafInfo and floating]
- | mayHaveCafRefs (idCafInfo bndr)
- , allLazyTop floats
+ float_top floats rhs
+ | allLazyTop floats
= return (floats, rhs)
- -- So the top-level binding is marked NoCafRefs
- | Just (floats', rhs') <- canFloatFromNoCaf platform floats rhs
- = return (floats', rhs')
+ | Just floats <- canFloat floats rhs
+ = return floats
| otherwise
= dontFloat floats rhs
@@ -1321,57 +1280,27 @@ deFloatTop (Floats _ floats)
---------------------------------------------------------------------------
-canFloatFromNoCaf :: Platform -> Floats -> CpeRhs -> Maybe (Floats, CpeRhs)
- -- Note [CafInfo and floating]
-canFloatFromNoCaf platform (Floats ok_to_spec fs) rhs
+canFloat :: Floats -> CpeRhs -> Maybe (Floats, CpeRhs)
+canFloat (Floats ok_to_spec fs) rhs
| OkToSpec <- ok_to_spec -- Worth trying
- , Just (subst, fs') <- go (emptySubst, nilOL) (fromOL fs)
- = Just (Floats OkToSpec fs', subst_expr subst rhs)
+ , Just fs' <- go nilOL (fromOL fs)
+ = Just (Floats OkToSpec fs', rhs)
| otherwise
= Nothing
where
- subst_expr = substExpr (text "CorePrep")
+ go :: OrdList FloatingBind -> [FloatingBind]
+ -> Maybe (OrdList FloatingBind)
- go :: (Subst, OrdList FloatingBind) -> [FloatingBind]
- -> Maybe (Subst, OrdList FloatingBind)
+ go (fbs_out) [] = Just fbs_out
- go (subst, fbs_out) [] = Just (subst, fbs_out)
+ go fbs_out (fb@(FloatLet _) : fbs_in)
+ = go (fbs_out `snocOL` fb) fbs_in
- go (subst, fbs_out) (FloatLet (NonRec b r) : fbs_in)
- | rhs_ok r
- = go (subst', fbs_out `snocOL` new_fb) fbs_in
- where
- (subst', b') = set_nocaf_bndr subst b
- new_fb = FloatLet (NonRec b' (subst_expr subst r))
+ go fbs_out (ft@FloatTick{} : fbs_in)
+ = go (fbs_out `snocOL` ft) fbs_in
- go (subst, fbs_out) (FloatLet (Rec prs) : fbs_in)
- | all rhs_ok rs
- = go (subst', fbs_out `snocOL` new_fb) fbs_in
- where
- (bs,rs) = unzip prs
- (subst', bs') = mapAccumL set_nocaf_bndr subst bs
- rs' = map (subst_expr subst') rs
- new_fb = FloatLet (Rec (bs' `zip` rs'))
+ go _ (FloatCase{} : _) = Nothing
- go (subst, fbs_out) (ft@FloatTick{} : fbs_in)
- = go (subst, fbs_out `snocOL` ft) fbs_in
-
- go _ _ = Nothing -- Encountered a caffy binding
-
- ------------
- set_nocaf_bndr subst bndr
- = (extendIdSubst subst bndr (Var bndr'), bndr')
- where
- bndr' = bndr `setIdCafInfo` NoCafRefs
-
- ------------
- rhs_ok :: CoreExpr -> Bool
- -- We can only float to top level from a NoCaf thing if
- -- the new binding is static. However it can't mention
- -- any non-static things or it would *already* be Caffy
- rhs_ok = rhsIsStatic platform (\_ -> False)
- (\_nt i -> pprPanic "rhsIsStatic" (integer i))
- -- Integer or Natural literals should not show up
wantFloatNested :: RecFlag -> Demand -> Bool -> Floats -> CpeRhs -> Bool
wantFloatNested is_rec dmd is_unlifted floats rhs