summaryrefslogtreecommitdiff
path: root/compiler/GHC/Cmm
diff options
context:
space:
mode:
authorÖmer Sinan Ağacan <omeragacan@gmail.com>2019-10-14 15:44:58 +0300
committerÖmer Sinan Ağacan <omeragacan@gmail.com>2020-01-31 12:21:10 +0300
commitc846618ae0f8601515683a4c7677c20c3272a50f (patch)
tree22caa68b7c6cdf3464d078e556f7eac19400b0bf /compiler/GHC/Cmm
parent01b15b835a7555c501df862b4dc8cc8eaff86afc (diff)
downloadhaskell-c846618ae0f8601515683a4c7677c20c3272a50f.tar.gz
Do CafInfo/SRT analysis in Cmm
This patch removes all CafInfo predictions and various hacks to preserve predicted CafInfos from the compiler and assigns final CafInfos to interface Ids after code generation. SRT analysis is extended to support static data, and Cmm generator is modified to allow generating static_link fields after SRT analysis. This also fixes `-fcatch-bottoms`, which introduces error calls in case expressions in CorePrep, which runs *after* CoreTidy (which is where we decide on CafInfos) and turns previously non-CAFFY things into CAFFY. Fixes #17648 Fixes #9718 Evaluation ========== NoFib ----- Boot with: `make boot mode=fast` Run: `make mode=fast EXTRA_RUNTEST_OPTS="-cachegrind" NoFibRuns=1` -------------------------------------------------------------------------------- Program Size Allocs Instrs Reads Writes -------------------------------------------------------------------------------- CS -0.0% 0.0% -0.0% -0.0% -0.0% CSD -0.0% 0.0% -0.0% -0.0% -0.0% FS -0.0% 0.0% -0.0% -0.0% -0.0% S -0.0% 0.0% -0.0% -0.0% -0.0% VS -0.0% 0.0% -0.0% -0.0% -0.0% VSD -0.0% 0.0% -0.0% -0.0% -0.5% VSM -0.0% 0.0% -0.0% -0.0% -0.0% anna -0.1% 0.0% -0.0% -0.0% -0.0% ansi -0.0% 0.0% -0.0% -0.0% -0.0% atom -0.0% 0.0% -0.0% -0.0% -0.0% awards -0.0% 0.0% -0.0% -0.0% -0.0% banner -0.0% 0.0% -0.0% -0.0% -0.0% bernouilli -0.0% 0.0% -0.0% -0.0% -0.0% binary-trees -0.0% 0.0% -0.0% -0.0% -0.0% boyer -0.0% 0.0% -0.0% -0.0% -0.0% boyer2 -0.0% 0.0% -0.0% -0.0% -0.0% bspt -0.0% 0.0% -0.0% -0.0% -0.0% cacheprof -0.0% 0.0% -0.0% -0.0% -0.0% calendar -0.0% 0.0% -0.0% -0.0% -0.0% cichelli -0.0% 0.0% -0.0% -0.0% -0.0% circsim -0.0% 0.0% -0.0% -0.0% -0.0% clausify -0.0% 0.0% -0.0% -0.0% -0.0% comp_lab_zift -0.0% 0.0% -0.0% -0.0% -0.0% compress -0.0% 0.0% -0.0% -0.0% -0.0% compress2 -0.0% 0.0% -0.0% -0.0% -0.0% constraints -0.0% 0.0% -0.0% -0.0% -0.0% cryptarithm1 -0.0% 0.0% -0.0% -0.0% -0.0% cryptarithm2 -0.0% 0.0% -0.0% -0.0% -0.0% cse -0.0% 0.0% -0.0% -0.0% -0.0% digits-of-e1 -0.0% 0.0% -0.0% -0.0% -0.0% digits-of-e2 -0.0% 0.0% -0.0% -0.0% -0.0% dom-lt -0.0% 0.0% -0.0% -0.0% -0.0% eliza -0.0% 0.0% -0.0% -0.0% -0.0% event -0.0% 0.0% -0.0% -0.0% -0.0% exact-reals -0.0% 0.0% -0.0% -0.0% -0.0% exp3_8 -0.0% 0.0% -0.0% -0.0% -0.0% expert -0.0% 0.0% -0.0% -0.0% -0.0% fannkuch-redux -0.0% 0.0% -0.0% -0.0% -0.0% fasta -0.0% 0.0% -0.0% -0.0% -0.0% fem -0.0% 0.0% -0.0% -0.0% -0.0% fft -0.0% 0.0% -0.0% -0.0% -0.0% fft2 -0.0% 0.0% -0.0% -0.0% -0.0% fibheaps -0.0% 0.0% -0.0% -0.0% -0.0% fish -0.0% 0.0% -0.0% -0.0% -0.0% fluid -0.1% 0.0% -0.0% -0.0% -0.0% fulsom -0.0% 0.0% -0.0% -0.0% -0.0% gamteb -0.0% 0.0% -0.0% -0.0% -0.0% gcd -0.0% 0.0% -0.0% -0.0% -0.0% gen_regexps -0.0% 0.0% -0.0% -0.0% -0.0% genfft -0.0% 0.0% -0.0% -0.0% -0.0% gg -0.0% 0.0% -0.0% -0.0% -0.0% grep -0.0% 0.0% -0.0% -0.0% -0.0% hidden -0.0% 0.0% -0.0% -0.0% -0.0% hpg -0.1% 0.0% -0.0% -0.0% -0.0% ida -0.0% 0.0% -0.0% -0.0% -0.0% infer -0.0% 0.0% -0.0% -0.0% -0.0% integer -0.0% 0.0% -0.0% -0.0% -0.0% integrate -0.0% 0.0% -0.0% -0.0% -0.0% k-nucleotide -0.0% 0.0% -0.0% -0.0% -0.0% kahan -0.0% 0.0% -0.0% -0.0% -0.0% knights -0.0% 0.0% -0.0% -0.0% -0.0% lambda -0.0% 0.0% -0.0% -0.0% -0.0% last-piece -0.0% 0.0% -0.0% -0.0% -0.0% lcss -0.0% 0.0% -0.0% -0.0% -0.0% life -0.0% 0.0% -0.0% -0.0% -0.0% lift -0.0% 0.0% -0.0% -0.0% -0.0% linear -0.1% 0.0% -0.0% -0.0% -0.0% listcompr -0.0% 0.0% -0.0% -0.0% -0.0% listcopy -0.0% 0.0% -0.0% -0.0% -0.0% maillist -0.0% 0.0% -0.0% -0.0% -0.0% mandel -0.0% 0.0% -0.0% -0.0% -0.0% mandel2 -0.0% 0.0% -0.0% -0.0% -0.0% mate -0.0% 0.0% -0.0% -0.0% -0.0% minimax -0.0% 0.0% -0.0% -0.0% -0.0% mkhprog -0.0% 0.0% -0.0% -0.0% -0.0% multiplier -0.0% 0.0% -0.0% -0.0% -0.0% n-body -0.0% 0.0% -0.0% -0.0% -0.0% nucleic2 -0.0% 0.0% -0.0% -0.0% -0.0% para -0.0% 0.0% -0.0% -0.0% -0.0% paraffins -0.0% 0.0% -0.0% -0.0% -0.0% parser -0.1% 0.0% -0.0% -0.0% -0.0% parstof -0.1% 0.0% -0.0% -0.0% -0.0% pic -0.0% 0.0% -0.0% -0.0% -0.0% pidigits -0.0% 0.0% -0.0% -0.0% -0.0% power -0.0% 0.0% -0.0% -0.0% -0.0% pretty -0.0% 0.0% -0.3% -0.4% -0.4% primes -0.0% 0.0% -0.0% -0.0% -0.0% primetest -0.0% 0.0% -0.0% -0.0% -0.0% prolog -0.0% 0.0% -0.0% -0.0% -0.0% puzzle -0.0% 0.0% -0.0% -0.0% -0.0% queens -0.0% 0.0% -0.0% -0.0% -0.0% reptile -0.0% 0.0% -0.0% -0.0% -0.0% reverse-complem -0.0% 0.0% -0.0% -0.0% -0.0% rewrite -0.0% 0.0% -0.0% -0.0% -0.0% rfib -0.0% 0.0% -0.0% -0.0% -0.0% rsa -0.0% 0.0% -0.0% -0.0% -0.0% scc -0.0% 0.0% -0.3% -0.5% -0.4% sched -0.0% 0.0% -0.0% -0.0% -0.0% scs -0.0% 0.0% -0.0% -0.0% -0.0% simple -0.1% 0.0% -0.0% -0.0% -0.0% solid -0.0% 0.0% -0.0% -0.0% -0.0% sorting -0.0% 0.0% -0.0% -0.0% -0.0% spectral-norm -0.0% 0.0% -0.0% -0.0% -0.0% sphere -0.0% 0.0% -0.0% -0.0% -0.0% symalg -0.0% 0.0% -0.0% -0.0% -0.0% tak -0.0% 0.0% -0.0% -0.0% -0.0% transform -0.0% 0.0% -0.0% -0.0% -0.0% treejoin -0.0% 0.0% -0.0% -0.0% -0.0% typecheck -0.0% 0.0% -0.0% -0.0% -0.0% veritas -0.0% 0.0% -0.0% -0.0% -0.0% wang -0.0% 0.0% -0.0% -0.0% -0.0% wave4main -0.0% 0.0% -0.0% -0.0% -0.0% wheel-sieve1 -0.0% 0.0% -0.0% -0.0% -0.0% wheel-sieve2 -0.0% 0.0% -0.0% -0.0% -0.0% x2n1 -0.0% 0.0% -0.0% -0.0% -0.0% -------------------------------------------------------------------------------- Min -0.1% 0.0% -0.3% -0.5% -0.5% Max -0.0% 0.0% -0.0% -0.0% -0.0% Geometric Mean -0.0% -0.0% -0.0% -0.0% -0.0% -------------------------------------------------------------------------------- Program Size Allocs Instrs Reads Writes -------------------------------------------------------------------------------- circsim -0.1% 0.0% -0.0% -0.0% -0.0% constraints -0.0% 0.0% -0.0% -0.0% -0.0% fibheaps -0.0% 0.0% -0.0% -0.0% -0.0% gc_bench -0.0% 0.0% -0.0% -0.0% -0.0% hash -0.0% 0.0% -0.0% -0.0% -0.0% lcss -0.0% 0.0% -0.0% -0.0% -0.0% power -0.0% 0.0% -0.0% -0.0% -0.0% spellcheck -0.0% 0.0% -0.0% -0.0% -0.0% -------------------------------------------------------------------------------- Min -0.1% 0.0% -0.0% -0.0% -0.0% Max -0.0% 0.0% -0.0% -0.0% -0.0% Geometric Mean -0.0% +0.0% -0.0% -0.0% -0.0% Manual inspection of programs in testsuite/tests/programs --------------------------------------------------------- I built these programs with a bunch of dump flags and `-O` and compared STG, Cmm, and Asm dumps and file sizes. (Below the numbers in parenthesis show number of modules in the program) These programs have identical compiler (same .hi and .o sizes, STG, and Cmm and Asm dumps): - Queens (1), andre_monad (1), cholewo-eval (2), cvh_unboxing (3), andy_cherry (7), fun_insts (1), hs-boot (4), fast2haskell (2), jl_defaults (1), jq_readsPrec (1), jules_xref (1), jtod_circint (4), jules_xref2 (1), lennart_range (1), lex (1), life_space_leak (1), bargon-mangler-bug (7), record_upd (1), rittri (1), sanders_array (1), strict_anns (1), thurston-module-arith (2), okeefe_neural (1), joao-circular (6), 10queens (1) Programs with different compiler outputs: - jl_defaults (1): For some reason GHC HEAD marks a lot of top-level `[Int]` closures as CAFFY for no reason. With this patch we no longer make them CAFFY and generate less SRT entries. For some reason Main.o is slightly larger with this patch (1.3%) and the executable sizes are the same. (I'd expect both to be smaller) - launchbury (1): Same as jl_defaults: top-level `[Int]` closures marked as CAFFY for no reason. Similarly `Main.o` is 1.4% larger but the executable sizes are the same. - galois_raytrace (13): Differences are in the Parse module. There are a lot, but some of the changes are caused by the fact that for some reason (I think a bug) GHC HEAD marks the dictionary for `Functor Identity` as CAFFY. Parse.o is 0.4% larger, the executable size is the same. - north_array: We now generate less SRT entries because some of array primops used in this program like `NewArrayOp` get eliminated during Stg-to-Cmm and turn some CAFFY things into non-CAFFY. Main.o gets 24% larger (9224 bytes from 9000 bytes), executable sizes are the same. - seward-space-leak: Difference in this program is better shown by this smaller example: module Lib where data CDS = Case [CDS] [(Int, CDS)] | Call CDS CDS instance Eq CDS where Case sels1 rets1 == Case sels2 rets2 = sels1 == sels2 && rets1 == rets2 Call a1 b1 == Call a2 b2 = a1 == a2 && b1 == b2 _ == _ = False In this program GHC HEAD builds a new SRT for the recursive group of `(==)`, `(/=)` and the dictionary closure. Then `/=` points to `==` in its SRT field, and `==` uses the SRT object as its SRT. With this patch we use the closure for `/=` as the SRT and add `==` there. Then `/=` gets an empty SRT field and `==` points to `/=` in its SRT field. This change looks fine to me. Main.o gets 0.07% larger, executable sizes are identical. head.hackage ------------ head.hackage's CI script builds 428 packages from Hackage using this patch with no failures. Compiler performance -------------------- The compiler perf tests report that the compiler allocates slightly more (worst case observed so far is 4%). However most programs in the test suite are small, single file programs. To benchmark compiler performance on something more realistic I build Cabal (the library, 236 modules) with different optimisation levels. For the "max residency" row I run GHC with `+RTS -s -A100k -i0 -h` for more accurate numbers. Other rows are generated with just `-s`. (This is because `-i0` causes running GC much more frequently and as a result "bytes copied" gets inflated by more than 25x in some cases) * -O0 | | GHC HEAD | This MR | Diff | | --------------- | -------------- | -------------- | ------ | | Bytes allocated | 54,413,350,872 | 54,701,099,464 | +0.52% | | Bytes copied | 4,926,037,184 | 4,990,638,760 | +1.31% | | Max residency | 421,225,624 | 424,324,264 | +0.73% | * -O1 | | GHC HEAD | This MR | Diff | | --------------- | --------------- | --------------- | ------ | | Bytes allocated | 245,849,209,992 | 246,562,088,672 | +0.28% | | Bytes copied | 26,943,452,560 | 27,089,972,296 | +0.54% | | Max residency | 982,643,440 | 991,663,432 | +0.91% | * -O2 | | GHC HEAD | This MR | Diff | | --------------- | --------------- | --------------- | ------ | | Bytes allocated | 291,044,511,408 | 291,863,910,912 | +0.28% | | Bytes copied | 37,044,237,616 | 36,121,690,472 | -2.49% | | Max residency | 1,071,600,328 | 1,086,396,256 | +1.38% | Extra compiler allocations -------------------------- Runtime allocations of programs are as reported above (NoFib section). The compiler now allocates more than before. Main source of allocation in this patch compared to base commit is the new SRT algorithm (GHC.Cmm.Info.Build). Below is some of the extra work we do with this patch, numbers generated by profiled stage 2 compiler when building a pathological case (the test 'ManyConstructors') with '-O2': - We now sort the final STG for a module, which means traversing the entire program, generating free variable set for each top-level binding, doing SCC analysis, and re-ordering the program. In ManyConstructors this step allocates 97,889,952 bytes. - We now do SRT analysis on static data, which in a program like ManyConstructors causes analysing 10,000 bindings that we would previously just skip. This step allocates 70,898,352 bytes. - We now maintain an SRT map for the entire module as we compile Cmm groups: data ModuleSRTInfo = ModuleSRTInfo { ... , moduleSRTMap :: SRTMap } (SRTMap is just a strict Map from the 'containers' library) This map gets an entry for most bindings in a module (exceptions are THUNKs and CAFFY static functions). For ManyConstructors this map gets 50015 entries. - Once we're done with code generation we generate a NameSet from SRTMap for the non-CAFFY names in the current module. This set gets the same number of entries as the SRTMap. - Finally we update CafInfos in ModDetails for the non-CAFFY Ids, using the NameSet generated in the previous step. This usually does the least amount of allocation among the work listed here. Only place with this patch where we do less work in the CAF analysis in the tidying pass (CoreTidy). However that doesn't save us much, as the pass still needs to traverse the whole program and update IdInfos for other reasons. Only thing we don't here do is the `hasCafRefs` pass over the RHS of bindings, which is a stateless pass that returns a boolean value, so it doesn't allocate much. (Metric changes blow are all increased allocations) Metric changes -------------- Metric Increase: ManyAlternatives ManyConstructors T13035 T14683 T1969 T9961
Diffstat (limited to 'compiler/GHC/Cmm')
-rw-r--r--compiler/GHC/Cmm/CLabel.hs7
-rw-r--r--compiler/GHC/Cmm/DebugBlock.hs2
-rw-r--r--compiler/GHC/Cmm/Info.hs14
-rw-r--r--compiler/GHC/Cmm/Info/Build.hs579
-rw-r--r--compiler/GHC/Cmm/Parser.y4
-rw-r--r--compiler/GHC/Cmm/Pipeline.hs18
-rw-r--r--compiler/GHC/Cmm/Ppr/Decl.hs15
-rw-r--r--compiler/GHC/Cmm/Utils.hs10
8 files changed, 470 insertions, 179 deletions
diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs
index e84278bf65..c83dba8f39 100644
--- a/compiler/GHC/Cmm/CLabel.hs
+++ b/compiler/GHC/Cmm/CLabel.hs
@@ -106,7 +106,8 @@ module GHC.Cmm.CLabel (
pprCLabel,
isInfoTableLabel,
- isConInfoTableLabel
+ isConInfoTableLabel,
+ isIdLabel
) where
#include "HsVersions.h"
@@ -262,6 +263,10 @@ data CLabel
deriving Eq
+isIdLabel :: CLabel -> Bool
+isIdLabel IdLabel{} = True
+isIdLabel _ = False
+
-- This is laborious, but necessary. We can't derive Ord because
-- Unique doesn't have an Ord instance. Note nonDetCmpUnique in the
-- implementation. See Note [No Ord for Unique]
diff --git a/compiler/GHC/Cmm/DebugBlock.hs b/compiler/GHC/Cmm/DebugBlock.hs
index 6b940c9867..ae86788d9c 100644
--- a/compiler/GHC/Cmm/DebugBlock.hs
+++ b/compiler/GHC/Cmm/DebugBlock.hs
@@ -161,7 +161,7 @@ cmmDebugGen modLoc decls = map (blocksForScope Nothing) topScopes
= DebugBlock { dblProcedure = g_entry graph
, dblLabel = label
, dblCLabel = case info of
- Just (Statics infoLbl _) -> infoLbl
+ Just (RawCmmStatics infoLbl _) -> infoLbl
Nothing
| g_entry graph == label -> entryLbl
| otherwise -> blockLbl label
diff --git a/compiler/GHC/Cmm/Info.hs b/compiler/GHC/Cmm/Info.hs
index a10db2b292..9e12fb170d 100644
--- a/compiler/GHC/Cmm/Info.hs
+++ b/compiler/GHC/Cmm/Info.hs
@@ -2,7 +2,6 @@
module GHC.Cmm.Info (
mkEmptyContInfoTable,
cmmToRawCmm,
- mkInfoTable,
srtEscape,
-- info table accessors
@@ -67,11 +66,11 @@ mkEmptyContInfoTable info_lbl
, cit_srt = Nothing
, cit_clo = Nothing }
-cmmToRawCmm :: DynFlags -> Stream IO CmmGroup a
+cmmToRawCmm :: DynFlags -> Stream IO CmmGroupSRTs a
-> IO (Stream IO RawCmmGroup a)
cmmToRawCmm dflags cmms
= do { uniqs <- mkSplitUniqSupply 'i'
- ; let do_one :: UniqSupply -> [CmmDecl] -> IO (UniqSupply, [RawCmmDecl])
+ ; let do_one :: UniqSupply -> [CmmDeclSRTs] -> IO (UniqSupply, [RawCmmDecl])
do_one uniqs cmm =
-- NB. strictness fixes a space leak. DO NOT REMOVE.
withTimingSilent dflags (text "Cmm -> Raw Cmm")
@@ -117,9 +116,8 @@ cmmToRawCmm dflags cmms
--
-- * The SRT slot is only there if there is SRT info to record
-mkInfoTable :: DynFlags -> CmmDecl -> UniqSM [RawCmmDecl]
-mkInfoTable _ (CmmData sec dat)
- = return [CmmData sec dat]
+mkInfoTable :: DynFlags -> CmmDeclSRTs -> UniqSM [RawCmmDecl]
+mkInfoTable _ (CmmData sec dat) = return [CmmData sec dat]
mkInfoTable dflags proc@(CmmProc infos entry_lbl live blocks)
--
@@ -169,7 +167,7 @@ mkInfoTable dflags proc@(CmmProc infos entry_lbl live blocks)
rel_std_info = map (makeRelativeRefTo dflags info_lbl) std_info
rel_extra_bits = map (makeRelativeRefTo dflags info_lbl) extra_bits
--
- return (top_decls, (lbl, Statics info_lbl $ map CmmStaticLit $
+ return (top_decls, (lbl, RawCmmStatics info_lbl $ map CmmStaticLit $
reverse rel_extra_bits ++ rel_std_info))
-----------------------------------------------------
@@ -423,7 +421,7 @@ mkProfLits _ (ProfilingInfo td cd)
; (cd_lit, cd_decl) <- newStringLit cd
; return ((td_lit,cd_lit), [td_decl,cd_decl]) }
-newStringLit :: ByteString -> UniqSM (CmmLit, GenCmmDecl CmmStatics info stmt)
+newStringLit :: ByteString -> UniqSM (CmmLit, GenCmmDecl RawCmmStatics info stmt)
newStringLit bytes
= do { uniq <- getUniqueM
; return (mkByteStringCLit (mkStringLitLabel uniq) bytes) }
diff --git a/compiler/GHC/Cmm/Info/Build.hs b/compiler/GHC/Cmm/Info/Build.hs
index 1ba79befcd..8dbe13d937 100644
--- a/compiler/GHC/Cmm/Info/Build.hs
+++ b/compiler/GHC/Cmm/Info/Build.hs
@@ -1,14 +1,17 @@
{-# LANGUAGE GADTs, BangPatterns, RecordWildCards,
- GeneralizedNewtypeDeriving, NondecreasingIndentation, TupleSections #-}
+ GeneralizedNewtypeDeriving, NondecreasingIndentation, TupleSections,
+ ScopedTypeVariables, OverloadedStrings #-}
module GHC.Cmm.Info.Build
- ( CAFSet, CAFEnv, cafAnal
- , doSRTs, ModuleSRTInfo, emptySRT
+ ( CAFSet, CAFEnv, cafAnal, cafAnalData
+ , doSRTs, ModuleSRTInfo (..), emptySRT
+ , SRTMap, srtMapNonCAFs
) where
import GhcPrelude hiding (succ)
import Id
+import IdInfo
import GHC.Cmm.BlockId
import GHC.Cmm.Dataflow.Block
import GHC.Cmm.Dataflow.Graph
@@ -28,6 +31,7 @@ import GHC.Runtime.Layout
import UniqSupply
import CostCentre
import GHC.StgToCmm.Heap
+import ErrUtils
import Control.Monad
import Data.Map (Map)
@@ -37,7 +41,9 @@ import qualified Data.Set as Set
import Data.Tuple
import Control.Monad.Trans.State
import Control.Monad.Trans.Class
+import Data.List (unzip4)
+import NameSet
{- Note [SRTs]
@@ -183,6 +189,63 @@ and the only SRT closure we generate is
g_srt = SRT_2 [c2_closure, c1_closure]
+Algorithm
+^^^^^^^^^
+
+0. let srtMap :: Map CAFLabel (Maybe SRTEntry) = {}
+ Maps closures to their SRT entries (i.e. how they appear in a SRT payload)
+
+1. Start with decls :: [CmmDecl]. This corresponds to an SCC of bindings in STG
+ after code-generation.
+
+2. CPS-convert each CmmDecl (cpsTop), resulting in a list [CmmDecl]. There might
+ be multiple CmmDecls in the result, due to proc-point splitting.
+
+3. In cpsTop, *before* proc-point splitting, when we still have a single
+ CmmDecl, we do cafAnal for procs:
+
+ * cafAnal performs a backwards analysis on the code blocks
+
+ * For each labelled block, the analysis produces a CAFSet (= Set CAFLabel),
+ representing all the CAFLabels reachable from this label.
+
+ * A label is added to the set if it refers to a FUN, THUNK, or RET,
+ and its CafInfo /= NoCafRefs.
+ (NB. all CafInfo for Ids in the current module should be initialised to
+ MayHaveCafRefs)
+
+ * The result is CAFEnv = LabelMap CAFSet
+
+ (Why *before* proc-point splitting? Because the analysis needs to propagate
+ information across branches, and proc-point splitting turns branches into
+ CmmCalls to top-level CmmDecls. The analysis would fail to find all the
+ references to CAFFY labels if we did it after proc-point splitting.)
+
+ For static data, cafAnalData simply returns set of all labels that refer to a
+ FUN, THUNK, and RET whose CafInfos /= NoCafRefs.
+
+4. The result of cpsTop is (CAFEnv, [CmmDecl]) for procs and (CAFSet, CmmDecl)
+ for static data. So after `mapM cpsTop decls` we have
+ [Either (CAFEnv, [CmmDecl]) (CAFSet, CmmDecl)]
+
+5. For procs concat the decls and union the CAFEnvs to get (CAFEnv, [CmmDecl])
+
+6. For static data generate a Map CLabel CAFSet (maps static data to their CAFSets)
+
+7. Dependency-analyse the decls using CAFEnv and CAFSets, giving us SCC CAFLabel
+
+8. For each SCC in dependency order
+ - Let lbls :: [CAFLabel] be the non-recursive labels in this SCC
+ - Apply CAFEnv to each label and concat the result :: [CAFLabel]
+ - For each CAFLabel in the set apply srtMap (and ignore Nothing) to get
+ srt :: [SRTEntry]
+ - Make a label for this SRT, call it l
+ - If the SRT is not empty (i.e. the group is CAFFY) add FUN_STATICs in the
+ group to the SRT (see Note [Invalid optimisation: shortcutting])
+ - Add to srtMap: lbls -> if null srt then Nothing else Just l
+
+9. At the end, for every top-level binding x, if srtMap x == Nothing, then the
+ binding is non-CAFFY, otherwise it is CAFFY.
Optimisations
^^^^^^^^^^^^^
@@ -382,6 +445,35 @@ newtype SRTEntry = SRTEntry CLabel
-- ---------------------------------------------------------------------
-- CAF analysis
+addCafLabel :: CLabel -> CAFSet -> CAFSet
+addCafLabel l s
+ | Just _ <- hasHaskellName l
+ , let caf_label = mkCAFLabel l
+ -- For imported Ids hasCAF will have accurate CafInfo
+ -- Locals are initialized as CAFFY. We turn labels with empty SRTs into
+ -- non-CAFFYs in doSRTs
+ , hasCAF l
+ = Set.insert caf_label s
+ | otherwise
+ = s
+
+cafAnalData
+ :: CmmStatics
+ -> CAFSet
+
+cafAnalData (CmmStaticsRaw _lbl _data) =
+ Set.empty
+
+cafAnalData (CmmStatics _lbl _itbl _ccs payload) =
+ foldl' analyzeStatic Set.empty payload
+ where
+ analyzeStatic s lit =
+ case lit of
+ CmmLabel c -> addCafLabel c s
+ CmmLabelOff c _ -> addCafLabel c s
+ CmmLabelDiffOff c1 c2 _ _ -> addCafLabel c1 $! addCafLabel c2 s
+ _ -> s
+
-- |
-- For each code block:
-- - collect the references reachable from this code block to FUN,
@@ -412,17 +504,24 @@ cafLattice = DataflowLattice Set.empty add
cafTransfers :: LabelSet -> Label -> CLabel -> TransferFun CAFSet
cafTransfers contLbls entry topLbl
- (BlockCC eNode middle xNode) fBase =
- let joined = cafsInNode xNode $! live'
+ block@(BlockCC eNode middle xNode) fBase =
+ let joined :: CAFSet
+ joined = cafsInNode xNode $! live'
+
+ result :: CAFSet
!result = foldNodesBwdOO cafsInNode middle joined
+ facts :: [Set CAFLabel]
facts = mapMaybe successorFact (successors xNode)
+
+ live' :: CAFSet
live' = joinFacts cafLattice facts
+ successorFact :: Label -> Maybe (Set CAFLabel)
successorFact s
-- If this is a loop back to the entry, we can refer to the
-- entry label.
- | s == entry = Just (add topLbl Set.empty)
+ | s == entry = Just (addCafLabel topLbl Set.empty)
-- If this is a continuation, we want to refer to the
-- SRT for the continuation's info table
| s `setMember` contLbls
@@ -432,18 +531,27 @@ cafTransfers contLbls entry topLbl
= lookupFact s fBase
cafsInNode :: CmmNode e x -> CAFSet -> CAFSet
- cafsInNode node set = foldExpDeep addCaf node set
+ cafsInNode node set = foldExpDeep addCafExpr node set
- addCaf expr !set =
+ addCafExpr :: CmmExpr -> Set CAFLabel -> Set CAFLabel
+ addCafExpr expr !set =
case expr of
- CmmLit (CmmLabel c) -> add c set
- CmmLit (CmmLabelOff c _) -> add c set
- CmmLit (CmmLabelDiffOff c1 c2 _ _) -> add c1 $! add c2 set
- _ -> set
- add l s | hasCAF l = Set.insert (mkCAFLabel l) s
- | otherwise = s
-
- in mapSingleton (entryLabel eNode) result
+ CmmLit (CmmLabel c) ->
+ addCafLabel c set
+ CmmLit (CmmLabelOff c _) ->
+ addCafLabel c set
+ CmmLit (CmmLabelDiffOff c1 c2 _ _) ->
+ addCafLabel c1 $! addCafLabel c2 set
+ _ ->
+ set
+ in
+ srtTrace "cafTransfers" (text "block:" <+> ppr block $$
+ text "contLbls:" <+> ppr contLbls $$
+ text "entry:" <+> ppr entry $$
+ text "topLbl:" <+> ppr topLbl $$
+ text "cafs in exit:" <+> ppr joined $$
+ text "result:" <+> ppr result) $
+ mapSingleton (entryLabel eNode) result
-- -----------------------------------------------------------------------------
@@ -460,17 +568,24 @@ data ModuleSRTInfo = ModuleSRTInfo
-- entries. e.g. if we have an SRT [a,b,c], and we know that b
-- points to [c,d], we can omit c and emit [a,b].
-- Used to implement the [Filter] optimisation.
+ , moduleSRTMap :: SRTMap
}
+
instance Outputable ModuleSRTInfo where
ppr ModuleSRTInfo{..} =
- text "ModuleSRTInfo:" <+> ppr dedupSRTs <+> ppr flatSRTs
+ text "ModuleSRTInfo {" $$
+ (nest 4 $ text "dedupSRTs =" <+> ppr dedupSRTs $$
+ text "flatSRTs =" <+> ppr flatSRTs $$
+ text "moduleSRTMap =" <+> ppr moduleSRTMap) $$ char '}'
emptySRT :: Module -> ModuleSRTInfo
emptySRT mod =
ModuleSRTInfo
{ thisModule = mod
, dedupSRTs = Map.empty
- , flatSRTs = Map.empty }
+ , flatSRTs = Map.empty
+ , moduleSRTMap = Map.empty
+ }
-- -----------------------------------------------------------------------------
-- Constructing SRTs
@@ -489,14 +604,33 @@ emptySRT mod =
-}
+data SomeLabel
+ = BlockLabel Label
+ | DeclLabel CLabel
+ deriving (Eq, Ord)
+
+instance Outputable SomeLabel where
+ ppr (BlockLabel l) = text "b:" <+> ppr l
+ ppr (DeclLabel l) = text "s:" <+> ppr l
+
+getBlockLabel :: SomeLabel -> Maybe Label
+getBlockLabel (BlockLabel l) = Just l
+getBlockLabel (DeclLabel _) = Nothing
+
+getBlockLabels :: [SomeLabel] -> [Label]
+getBlockLabels = mapMaybe getBlockLabel
+
-- | Return a (Label,CLabel) pair for each labelled block of a CmmDecl,
-- where the label is
-- - the info label for a continuation or dynamic closure
-- - the closure label for a top-level function (not a CAF)
-getLabelledBlocks :: CmmDecl -> [(Label, CAFLabel)]
-getLabelledBlocks (CmmData _ _) = []
+getLabelledBlocks :: CmmDecl -> [(SomeLabel, CAFLabel)]
+getLabelledBlocks (CmmData _ (CmmStaticsRaw _ _)) =
+ []
+getLabelledBlocks (CmmData _ (CmmStatics lbl _ _ _)) =
+ [ (DeclLabel lbl, mkCAFLabel lbl) ]
getLabelledBlocks (CmmProc top_info _ _ _) =
- [ (blockId, mkCAFLabel (cit_lbl info))
+ [ (BlockLabel blockId, mkCAFLabel (cit_lbl info))
| (blockId, info) <- mapToList (info_tbls top_info)
, let rep = cit_rep info
, not (isStaticRep rep) || not (isThunkRep rep)
@@ -509,20 +643,30 @@ getLabelledBlocks (CmmProc top_info _ _ _) =
-- SRTs. CAFs themselves are not included here; see getCAFs below.
depAnalSRTs
:: CAFEnv
+ -> Map CLabel CAFSet -- CAFEnv for statics
-> [CmmDecl]
- -> [SCC (Label, CAFLabel, Set CAFLabel)]
-depAnalSRTs cafEnv decls =
- srtTrace "depAnalSRTs" (ppr graph) graph
+ -> [SCC (SomeLabel, CAFLabel, Set CAFLabel)]
+depAnalSRTs cafEnv cafEnv_static decls =
+ srtTrace "depAnalSRTs" (text "decls:" <+> ppr decls $$
+ text "nodes:" <+> ppr (map node_payload nodes) $$
+ text "graph:" <+> ppr graph) graph
where
labelledBlocks = concatMap getLabelledBlocks decls
labelToBlock = Map.fromList (map swap labelledBlocks)
- graph = stronglyConnCompFromEdgedVerticesOrd
- [ let cafs' = Set.delete lbl cafs in
- DigraphNode (l,lbl,cafs') l
- (mapMaybe (flip Map.lookup labelToBlock) (Set.toList cafs'))
- | (l, lbl) <- labelledBlocks
- , Just cafs <- [mapLookup l cafEnv] ]
+ nodes :: [Node SomeLabel (SomeLabel, CAFLabel, Set CAFLabel)]
+ nodes = [ DigraphNode (l,lbl,cafs') l
+ (mapMaybe (flip Map.lookup labelToBlock) (Set.toList cafs'))
+ | (l, lbl) <- labelledBlocks
+ , Just (cafs :: Set CAFLabel) <-
+ [case l of
+ BlockLabel l -> mapLookup l cafEnv
+ DeclLabel cl -> Map.lookup cl cafEnv_static]
+ , let cafs' = Set.delete lbl cafs
+ ]
+
+ graph :: [SCC (SomeLabel, CAFLabel, Set CAFLabel)]
+ graph = stronglyConnCompFromEdgedVerticesOrd nodes
-- | Get (Label, CAFLabel, Set CAFLabel) for each block that represents a CAF.
-- These are treated differently from other labelled blocks:
@@ -565,11 +709,21 @@ getStaticFuns decls =
-- is empty, so we don't need to refer to it from other SRTs.
type SRTMap = Map CAFLabel (Maybe SRTEntry)
+
+-- | Given SRTMap of a module returns the set of non-CAFFY names in the module.
+-- Any Names not in the set are CAFFY.
+srtMapNonCAFs :: SRTMap -> NameSet
+srtMapNonCAFs srtMap = mkNameSet (mapMaybe get_name (Map.toList srtMap))
+ where
+ get_name (CAFLabel l, Nothing) = hasHaskellName l
+ get_name (_l, Just _srt_entry) = Nothing
+
-- | resolve a CAFLabel to its SRTEntry using the SRTMap
resolveCAF :: SRTMap -> CAFLabel -> Maybe SRTEntry
resolveCAF srtMap lbl@(CAFLabel l) =
- Map.findWithDefault (Just (SRTEntry (toClosureLbl l))) lbl srtMap
-
+ srtTrace "resolveCAF" ("l:" <+> ppr l <+> "resolved:" <+> ppr ret) ret
+ where
+ ret = Map.findWithDefault (Just (SRTEntry (toClosureLbl l))) lbl srtMap
-- | Attach SRTs to all info tables in the CmmDecls, and add SRT
-- declarations to the ModuleSRTInfo.
@@ -578,16 +732,33 @@ doSRTs
:: DynFlags
-> ModuleSRTInfo
-> [(CAFEnv, [CmmDecl])]
- -> IO (ModuleSRTInfo, [CmmDecl])
+ -> [(CAFSet, CmmDecl)]
+ -> IO (ModuleSRTInfo, [CmmDeclSRTs])
-doSRTs dflags moduleSRTInfo tops = do
+doSRTs dflags moduleSRTInfo procs data_ = do
us <- mkSplitUniqSupply 'u'
-- Ignore the original grouping of decls, and combine all the
-- CAFEnvs into a single CAFEnv.
- let (cafEnvs, declss) = unzip tops
- cafEnv = mapUnions cafEnvs
- decls = concat declss
+ let static_data_env :: Map CLabel CAFSet
+ static_data_env =
+ Map.fromList $
+ flip map data_ $
+ \(set, decl) ->
+ case decl of
+ CmmProc{} ->
+ pprPanic "doSRTs" (text "Proc in static data list:" <+> ppr decl)
+ CmmData _ static ->
+ case static of
+ CmmStatics lbl _ _ _ -> (lbl, set)
+ CmmStaticsRaw lbl _ -> (lbl, set)
+
+ static_data :: Set CLabel
+ static_data = Map.keysSet static_data_env
+
+ (proc_envs, procss) = unzip procs
+ cafEnv = mapUnions proc_envs
+ decls = map snd data_ ++ concat procss
staticFuns = mapFromList (getStaticFuns decls)
-- Put the decls in dependency order. Why? So that we can implement
@@ -597,56 +768,93 @@ doSRTs dflags moduleSRTInfo tops = do
-- to do this we need to process blocks before things that depend on
-- them.
let
- sccs = depAnalSRTs cafEnv decls
+ sccs :: [SCC (SomeLabel, CAFLabel, Set CAFLabel)]
+ sccs = depAnalSRTs cafEnv static_data_env decls
+
+ cafsWithSRTs :: [(Label, CAFLabel, Set CAFLabel)]
cafsWithSRTs = getCAFs cafEnv decls
+ srtTraceM "doSRTs" (text "data:" <+> ppr data_ $$
+ text "procs:" <+> ppr procs $$
+ text "static_data_env:" <+> ppr static_data_env $$
+ text "sccs:" <+> ppr sccs $$
+ text "cafsWithSRTs:" <+> ppr cafsWithSRTs)
+
-- On each strongly-connected group of decls, construct the SRT
-- closures and the SRT fields for info tables.
let result ::
- [ ( [CmmDecl] -- generated SRTs
+ [ ( [CmmDeclSRTs] -- generated SRTs
, [(Label, CLabel)] -- SRT fields for info tables
, [(Label, [SRTEntry])] -- SRTs to attach to static functions
+ , Bool -- Whether the group has CAF references
) ]
- ((result, _srtMap), moduleSRTInfo') =
+
+ (result, moduleSRTInfo') =
initUs_ us $
- flip runStateT moduleSRTInfo $
- flip runStateT Map.empty $ do
- nonCAFs <- mapM (doSCC dflags staticFuns) sccs
+ flip runStateT moduleSRTInfo $ do
+ nonCAFs <- mapM (doSCC dflags staticFuns static_data) sccs
cAFs <- forM cafsWithSRTs $ \(l, cafLbl, cafs) ->
- oneSRT dflags staticFuns [l] [cafLbl] True{-is a CAF-} cafs
+ oneSRT dflags staticFuns [BlockLabel l] [cafLbl]
+ True{-is a CAF-} cafs static_data
return (nonCAFs ++ cAFs)
- (declss, pairs, funSRTs) = unzip3 result
+ (srt_declss, pairs, funSRTs, has_caf_refs) = unzip4 result
+ srt_decls = concat srt_declss
+
+ unless (null srt_decls) $
+ dumpIfSet_dyn dflags Opt_D_dump_srts "SRTs" FormatCMM (ppr srt_decls)
-- Next, update the info tables with the SRTs
let
srtFieldMap = mapFromList (concat pairs)
funSRTMap = mapFromList (concat funSRTs)
- decls' = concatMap (updInfoSRTs dflags srtFieldMap funSRTMap) decls
-
- return (moduleSRTInfo', concat declss ++ decls')
+ has_caf_refs' = or has_caf_refs
+ decls' =
+ concatMap (updInfoSRTs dflags srtFieldMap funSRTMap has_caf_refs') decls
+
+ -- Finally update CafInfos for raw static literals (CmmStaticsRaw). Those are
+ -- not analysed in oneSRT so we never add entries for them to the SRTMap.
+ let srtMap_w_raws =
+ foldl' (\(srtMap :: SRTMap) (_, decl) ->
+ case decl of
+ CmmData _ CmmStatics{} ->
+ -- already updated by oneSRT
+ srtMap
+ CmmData _ (CmmStaticsRaw lbl _)
+ | isIdLabel lbl ->
+ -- not analysed by oneSRT, declare it non-CAFFY here
+ Map.insert (mkCAFLabel lbl) Nothing srtMap
+ | otherwise ->
+ -- Not an IdLabel, ignore
+ srtMap
+ CmmProc{} ->
+ pprPanic "doSRTs" (text "Found Proc in static data list:" <+> ppr decl))
+ (moduleSRTMap moduleSRTInfo') data_
+
+ return (moduleSRTInfo'{ moduleSRTMap = srtMap_w_raws }, srt_decls ++ decls')
-- | Build the SRT for a strongly-connected component of blocks
doSCC
:: DynFlags
- -> LabelMap CLabel -- which blocks are static function entry points
- -> SCC (Label, CAFLabel, Set CAFLabel)
- -> StateT SRTMap
- (StateT ModuleSRTInfo UniqSM)
- ( [CmmDecl] -- generated SRTs
+ -> LabelMap CLabel -- which blocks are static function entry points
+ -> Set CLabel -- static data
+ -> SCC (SomeLabel, CAFLabel, Set CAFLabel)
+ -> StateT ModuleSRTInfo UniqSM
+ ( [CmmDeclSRTs] -- generated SRTs
, [(Label, CLabel)] -- SRT fields for info tables
, [(Label, [SRTEntry])] -- SRTs to attach to static functions
+ , Bool -- Whether the group has CAF references
)
-doSCC dflags staticFuns (AcyclicSCC (l, cafLbl, cafs)) =
- oneSRT dflags staticFuns [l] [cafLbl] False cafs
+doSCC dflags staticFuns static_data (AcyclicSCC (l, cafLbl, cafs)) =
+ oneSRT dflags staticFuns [l] [cafLbl] False cafs static_data
-doSCC dflags staticFuns (CyclicSCC nodes) = do
+doSCC dflags staticFuns static_data (CyclicSCC nodes) = do
-- build a single SRT for the whole cycle, see Note [recursive SRTs]
- let (blockids, lbls, cafsets) = unzip3 nodes
+ let (lbls, caf_lbls, cafsets) = unzip3 nodes
cafs = Set.unions cafsets
- oneSRT dflags staticFuns blockids lbls False cafs
+ oneSRT dflags staticFuns lbls caf_lbls False cafs static_data
{- Note [recursive SRTs]
@@ -677,34 +885,40 @@ references to static function closures.
oneSRT
:: DynFlags
-> LabelMap CLabel -- which blocks are static function entry points
- -> [Label] -- blocks in this set
+ -> [SomeLabel] -- blocks in this set
-> [CAFLabel] -- labels for those blocks
-> Bool -- True <=> this SRT is for a CAF
-> Set CAFLabel -- SRT for this set
- -> StateT SRTMap
- (StateT ModuleSRTInfo UniqSM)
- ( [CmmDecl] -- SRT objects we built
+ -> Set CLabel -- Static data labels in this group
+ -> StateT ModuleSRTInfo UniqSM
+ ( [CmmDeclSRTs] -- SRT objects we built
, [(Label, CLabel)] -- SRT fields for these blocks' itbls
, [(Label, [SRTEntry])] -- SRTs to attach to static functions
+ , Bool -- Whether the group has CAF references
)
-oneSRT dflags staticFuns blockids lbls isCAF cafs = do
- srtMap <- get
- topSRT <- lift get
+oneSRT dflags staticFuns lbls caf_lbls isCAF cafs static_data = do
+ topSRT <- get
+
let
+ srtMap = moduleSRTMap topSRT
+
+ blockids = getBlockLabels lbls
+
-- Can we merge this SRT with a FUN_STATIC closure?
+ maybeFunClosure :: Maybe (CLabel, Label)
+ otherFunLabels :: [CLabel]
(maybeFunClosure, otherFunLabels) =
case [ (l,b) | b <- blockids, Just l <- [mapLookup b staticFuns] ] of
[] -> (Nothing, [])
- ((l,b):xs) -> (Just (l,b), map (mkCAFLabel . fst) xs)
+ ((l,b):xs) -> (Just (l,b), map fst xs)
- -- Remove recursive references from the SRT, except for (all but
- -- one of the) static functions. See Note [recursive SRTs].
- nonRec = cafs `Set.difference`
- (Set.fromList lbls `Set.difference` Set.fromList otherFunLabels)
+ -- Remove recursive references from the SRT
+ nonRec :: Set CAFLabel
+ nonRec = cafs `Set.difference` Set.fromList caf_lbls
- -- First resolve all the CAFLabels to SRTEntries
- -- Implements the [Inline] optimisation.
+ -- Resolve references to their SRT entries
+ resolved :: [SRTEntry]
resolved = mapMaybe (resolveCAF srtMap) (Set.toList nonRec)
-- The set of all SRTEntries in SRTs that we refer to from here.
@@ -714,10 +928,21 @@ oneSRT dflags staticFuns blockids lbls isCAF cafs = do
-- Remove SRTEntries that are also in an SRT that we refer to.
-- Implements the [Filter] optimisation.
- filtered = Set.difference (Set.fromList resolved) allBelow
-
- srtTrace "oneSRT:"
- (ppr cafs <+> ppr resolved <+> ppr allBelow <+> ppr filtered) $ return ()
+ filtered0 = Set.fromList resolved `Set.difference` allBelow
+
+ srtTraceM "oneSRT:"
+ (text "srtMap:" <+> ppr srtMap $$
+ text "nonRec:" <+> ppr nonRec $$
+ text "lbls:" <+> ppr lbls $$
+ text "caf_lbls:" <+> ppr caf_lbls $$
+ text "static_data:" <+> ppr static_data $$
+ text "cafs:" <+> ppr cafs $$
+ text "blockids:" <+> ppr blockids $$
+ text "maybeFunClosure:" <+> ppr maybeFunClosure $$
+ text "otherFunLabels:" <+> ppr otherFunLabels $$
+ text "resolved:" <+> ppr resolved $$
+ text "allBelow:" <+> ppr allBelow $$
+ text "filtered0:" <+> ppr filtered0)
let
isStaticFun = isJust maybeFunClosure
@@ -726,76 +951,114 @@ oneSRT dflags staticFuns blockids lbls isCAF cafs = do
-- update the SRTMap for the label to point to a closure. It's
-- important that we don't do this for static functions or CAFs,
-- see Note [Invalid optimisation: shortcutting].
+ updateSRTMap :: Maybe SRTEntry -> StateT ModuleSRTInfo UniqSM ()
updateSRTMap srtEntry =
- when (not isCAF && (not isStaticFun || isNothing srtEntry)) $ do
- let newSRTMap = Map.fromList [(cafLbl, srtEntry) | cafLbl <- lbls]
- put (Map.union newSRTMap srtMap)
+ srtTrace "updateSRTMap"
+ (ppr srtEntry <+> "isCAF:" <+> ppr isCAF <+>
+ "isStaticFun:" <+> ppr isStaticFun) $
+ when (not isCAF && (not isStaticFun || isNothing srtEntry)) $
+ modify' $ \state ->
+ let !srt_map =
+ foldl' (\srt_map cafLbl@(CAFLabel clbl) ->
+ -- Only map static data to Nothing (== not CAFFY). For CAFFY
+ -- statics we refer to the static itself instead of a SRT.
+ if not (Set.member clbl static_data) || isNothing srtEntry then
+ Map.insert cafLbl srtEntry srt_map
+ else
+ srt_map)
+ (moduleSRTMap state)
+ caf_lbls
+ in
+ state{ moduleSRTMap = srt_map }
this_mod = thisModule topSRT
- case Set.toList filtered of
- [] -> do
- srtTrace "oneSRT: empty" (ppr lbls) $ return ()
- updateSRTMap Nothing
- return ([], [], [])
-
- -- [Inline] - when we have only one entry there is no need to
- -- build an SRT object at all, instead we put the singleton SRT
- -- entry in the info table.
- [one@(SRTEntry lbl)]
- | -- Info tables refer to SRTs by offset (as noted in the section
- -- "Referring to an SRT from the info table" of Note [SRTs]). However,
- -- when dynamic linking is used we cannot guarantee that the offset
- -- between the SRT and the info table will fit in the offset field.
- -- Consequently we build a singleton SRT in in this case.
- not (labelDynamic dflags this_mod lbl)
-
- -- MachO relocations can't express offsets between compilation units at
- -- all, so we are always forced to build a singleton SRT in this case.
- && (not (osMachOTarget $ platformOS $ targetPlatform dflags)
- || isLocalCLabel this_mod lbl) -> do
-
- -- If we have a static function closure, then it becomes the
- -- SRT object, and everything else points to it. (the only way
- -- we could have multiple labels here is if this is a
- -- recursive group, see Note [recursive SRTs])
- case maybeFunClosure of
- Just (staticFunLbl,staticFunBlock) -> return ([], withLabels, [])
- where
- withLabels =
- [ (b, if b == staticFunBlock then lbl else staticFunLbl)
- | b <- blockids ]
+ allStaticData =
+ all (\(CAFLabel clbl) -> Set.member clbl static_data) caf_lbls
+
+ if Set.null filtered0 then do
+ srtTraceM "oneSRT: empty" (ppr caf_lbls)
+ updateSRTMap Nothing
+ return ([], [], [], False)
+ else do
+ -- We're going to build an SRT for this group, which should include function
+ -- references in the group. See Note [recursive SRTs].
+ let allBelow_funs =
+ Set.fromList (map (SRTEntry . toClosureLbl) otherFunLabels)
+ let filtered = filtered0 `Set.union` allBelow_funs
+ srtTraceM "oneSRT" (text "filtered:" <+> ppr filtered $$
+ text "allBelow_funs:" <+> ppr allBelow_funs)
+ case Set.toList filtered of
+ [] -> pprPanic "oneSRT" empty -- unreachable
+
+ -- [Inline] - when we have only one entry there is no need to
+ -- build an SRT object at all, instead we put the singleton SRT
+ -- entry in the info table.
+ [one@(SRTEntry lbl)]
+ | -- Info tables refer to SRTs by offset (as noted in the section
+ -- "Referring to an SRT from the info table" of Note [SRTs]). However,
+ -- when dynamic linking is used we cannot guarantee that the offset
+ -- between the SRT and the info table will fit in the offset field.
+ -- Consequently we build a singleton SRT in in this case.
+ not (labelDynamic dflags this_mod lbl)
+
+ -- MachO relocations can't express offsets between compilation units at
+ -- all, so we are always forced to build a singleton SRT in this case.
+ && (not (osMachOTarget $ platformOS $ targetPlatform dflags)
+ || isLocalCLabel this_mod lbl) -> do
+
+ -- If we have a static function closure, then it becomes the
+ -- SRT object, and everything else points to it. (the only way
+ -- we could have multiple labels here is if this is a
+ -- recursive group, see Note [recursive SRTs])
+ case maybeFunClosure of
+ Just (staticFunLbl,staticFunBlock) ->
+ return ([], withLabels, [], True)
+ where
+ withLabels =
+ [ (b, if b == staticFunBlock then lbl else staticFunLbl)
+ | b <- blockids ]
+ Nothing -> do
+ srtTraceM "oneSRT: one" (text "caf_lbls:" <+> ppr caf_lbls $$
+ text "one:" <+> ppr one)
+ updateSRTMap (Just one)
+ return ([], map (,lbl) blockids, [], True)
+
+ cafList | allStaticData ->
+ return ([], [], [], not (null cafList))
+
+ cafList ->
+ -- Check whether an SRT with the same entries has been emitted already.
+ -- Implements the [Common] optimisation.
+ case Map.lookup filtered (dedupSRTs topSRT) of
+ Just srtEntry@(SRTEntry srtLbl) -> do
+ srtTraceM "oneSRT [Common]" (ppr caf_lbls <+> ppr srtLbl)
+ updateSRTMap (Just srtEntry)
+ return ([], map (,srtLbl) blockids, [], True)
Nothing -> do
- updateSRTMap (Just one)
- return ([], map (,lbl) blockids, [])
-
- cafList ->
- -- Check whether an SRT with the same entries has been emitted already.
- -- Implements the [Common] optimisation.
- case Map.lookup filtered (dedupSRTs topSRT) of
- Just srtEntry@(SRTEntry srtLbl) -> do
- srtTrace "oneSRT [Common]" (ppr lbls <+> ppr srtLbl) $ return ()
- updateSRTMap (Just srtEntry)
- return ([], map (,srtLbl) blockids, [])
- Nothing -> do
- -- No duplicates: we have to build a new SRT object
- srtTrace "oneSRT: new" (ppr lbls <+> ppr filtered) $ return ()
- (decls, funSRTs, srtEntry) <-
- case maybeFunClosure of
- Just (fun,block) ->
- return ( [], [(block, cafList)], SRTEntry fun )
- Nothing -> do
- (decls, entry) <- lift . lift $ buildSRTChain dflags cafList
- return (decls, [], entry)
- updateSRTMap (Just srtEntry)
- let allBelowThis = Set.union allBelow filtered
- oldFlatSRTs = flatSRTs topSRT
- newFlatSRTs = Map.insert srtEntry allBelowThis oldFlatSRTs
- newDedupSRTs = Map.insert filtered srtEntry (dedupSRTs topSRT)
- lift (put (topSRT { dedupSRTs = newDedupSRTs
- , flatSRTs = newFlatSRTs }))
- let SRTEntry lbl = srtEntry
- return (decls, map (,lbl) blockids, funSRTs)
+ -- No duplicates: we have to build a new SRT object
+ (decls, funSRTs, srtEntry) <-
+ case maybeFunClosure of
+ Just (fun,block) ->
+ return ( [], [(block, cafList)], SRTEntry fun )
+ Nothing -> do
+ (decls, entry) <- lift $ buildSRTChain dflags cafList
+ return (decls, [], entry)
+ updateSRTMap (Just srtEntry)
+ let allBelowThis = Set.union allBelow filtered
+ newFlatSRTs = Map.insert srtEntry allBelowThis (flatSRTs topSRT)
+ -- When all definition in this group are static data we don't
+ -- generate any SRTs.
+ newDedupSRTs = Map.insert filtered srtEntry (dedupSRTs topSRT)
+ modify' (\state -> state{ dedupSRTs = newDedupSRTs,
+ flatSRTs = newFlatSRTs })
+ srtTraceM "oneSRT: new" (text "caf_lbls:" <+> ppr caf_lbls $$
+ text "filtered:" <+> ppr filtered $$
+ text "srtEntry:" <+> ppr srtEntry $$
+ text "newDedupSRTs:" <+> ppr newDedupSRTs $$
+ text "newFlatSRTs:" <+> ppr newFlatSRTs)
+ let SRTEntry lbl = srtEntry
+ return (decls, map (,lbl) blockids, funSRTs, True)
-- | build a static SRT object (or a chain of objects) from a list of
@@ -804,8 +1067,8 @@ buildSRTChain
:: DynFlags
-> [SRTEntry]
-> UniqSM
- ( [CmmDecl] -- The SRT object(s)
- , SRTEntry -- label to use in the info table
+ ( [CmmDeclSRTs] -- The SRT object(s)
+ , SRTEntry -- label to use in the info table
)
buildSRTChain _ [] = panic "buildSRT: empty"
buildSRTChain dflags cafSet =
@@ -821,7 +1084,7 @@ buildSRTChain dflags cafSet =
mAX_SRT_SIZE = 16
-buildSRT :: DynFlags -> [SRTEntry] -> UniqSM (CmmDecl, SRTEntry)
+buildSRT :: DynFlags -> [SRTEntry] -> UniqSM (CmmDeclSRTs, SRTEntry)
buildSRT dflags refs = do
id <- getUniqueM
let
@@ -835,20 +1098,30 @@ buildSRT dflags refs = do
[] -- no saved info
return (mkDataLits (Section Data lbl) lbl fields, SRTEntry lbl)
-
-- | Update info tables with references to their SRTs. Also generate
-- static closures, splicing in SRT fields as necessary.
updInfoSRTs
:: DynFlags
-> LabelMap CLabel -- SRT labels for each block
-> LabelMap [SRTEntry] -- SRTs to merge into FUN_STATIC closures
+ -> Bool -- Whether the CmmDecl's group has CAF references
-> CmmDecl
- -> [CmmDecl]
+ -> [CmmDeclSRTs]
+
+updInfoSRTs _ _ _ _ (CmmData s (CmmStaticsRaw lbl statics))
+ = [CmmData s (RawCmmStatics lbl statics)]
+
+updInfoSRTs dflags _ _ caffy (CmmData s (CmmStatics lbl itbl ccs payload))
+ = [CmmData s (RawCmmStatics lbl (map CmmStaticLit field_lits))]
+ where
+ caf_info = if caffy then MayHaveCafRefs else NoCafRefs
+ field_lits = mkStaticClosureFields dflags itbl ccs caf_info payload
-updInfoSRTs dflags srt_env funSRTEnv (CmmProc top_info top_l live g)
+updInfoSRTs dflags srt_env funSRTEnv caffy (CmmProc top_info top_l live g)
| Just (_,closure) <- maybeStaticClosure = [ proc, closure ]
| otherwise = [ proc ]
where
+ caf_info = if caffy then MayHaveCafRefs else NoCafRefs
proc = CmmProc top_info { info_tbls = newTopInfo } top_l live g
newTopInfo = mapMapWithKey updInfoTbl (info_tbls top_info)
updInfoTbl l info_tbl
@@ -858,7 +1131,7 @@ updInfoSRTs dflags srt_env funSRTEnv (CmmProc top_info top_l live g)
-- Generate static closures [FUN]. Note that this also generates
-- static closures for thunks (CAFs), because it's easier to treat
-- them uniformly in the code generator.
- maybeStaticClosure :: Maybe (CmmInfoTable, CmmDecl)
+ maybeStaticClosure :: Maybe (CmmInfoTable, CmmDeclSRTs)
maybeStaticClosure
| Just info_tbl@CmmInfoTable{..} <-
mapLookup (g_entry g) (info_tbls top_info)
@@ -873,20 +1146,20 @@ updInfoSRTs dflags srt_env funSRTEnv (CmmProc top_info top_l live g)
Just srtEntries -> srtTrace "maybeStaticFun" (ppr res)
(info_tbl { cit_rep = new_rep }, res)
where res = [ CmmLabel lbl | SRTEntry lbl <- srtEntries ]
- fields = mkStaticClosureFields dflags info_tbl ccs (idCafInfo id)
- srtEntries
+ fields = mkStaticClosureFields dflags info_tbl ccs caf_info srtEntries
new_rep = case cit_rep of
HeapRep sta ptrs nptrs ty ->
HeapRep sta (ptrs + length srtEntries) nptrs ty
_other -> panic "maybeStaticFun"
- lbl = mkLocalClosureLabel (idName id) (idCafInfo id)
+ lbl = mkLocalClosureLabel (idName id) caf_info
in
Just (newInfo, mkDataLits (Section Data lbl) lbl fields)
| otherwise = Nothing
-updInfoSRTs _ _ _ t = [t]
-
srtTrace :: String -> SDoc -> b -> b
-- srtTrace = pprTrace
srtTrace _ _ b = b
+
+srtTraceM :: Applicative f => String -> SDoc -> f ()
+srtTraceM str doc = srtTrace str doc (pure ())
diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y
index d7235d0167..886f429611 100644
--- a/compiler/GHC/Cmm/Parser.y
+++ b/compiler/GHC/Cmm/Parser.y
@@ -394,7 +394,7 @@ cmmdata :: { CmmParse () }
: 'section' STRING '{' data_label statics '}'
{ do lbl <- $4;
ss <- sequence $5;
- code (emitDecl (CmmData (Section (section $2) lbl) (Statics lbl $ concat ss))) }
+ code (emitDecl (CmmData (Section (section $2) lbl) (CmmStaticsRaw lbl (concat ss)))) }
data_label :: { CmmParse CLabel }
: NAME ':'
@@ -1175,7 +1175,7 @@ staticClosure :: UnitId -> FastString -> FastString -> [CmmLit] -> CmmParse ()
staticClosure pkg cl_label info payload
= do dflags <- getDynFlags
let lits = mkStaticClosure dflags (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] []
- code $ emitDataLits (mkCmmDataLabel pkg cl_label) lits
+ code $ emitRawDataLits (mkCmmDataLabel pkg cl_label) lits
foreignCall
:: String
diff --git a/compiler/GHC/Cmm/Pipeline.hs b/compiler/GHC/Cmm/Pipeline.hs
index 6db9e23ee1..9fd484fdb2 100644
--- a/compiler/GHC/Cmm/Pipeline.hs
+++ b/compiler/GHC/Cmm/Pipeline.hs
@@ -1,4 +1,6 @@
{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE TupleSections #-}
module GHC.Cmm.Pipeline (
-- | Converts C-- with an implicit stack and native C-- calls into
@@ -27,6 +29,7 @@ import HscTypes
import Control.Monad
import Outputable
import GHC.Platform
+import Data.Either (partitionEithers)
-----------------------------------------------------------------------------
-- | Top level driver for C-- pipeline
@@ -37,14 +40,15 @@ cmmPipeline
-- dynamic flags: -dcmm-lint -ddump-cmm-cps
-> ModuleSRTInfo -- Info about SRTs generated so far
-> CmmGroup -- Input C-- with Procedures
- -> IO (ModuleSRTInfo, CmmGroup) -- Output CPS transformed C--
+ -> IO (ModuleSRTInfo, CmmGroupSRTs) -- Output CPS transformed C--
cmmPipeline hsc_env srtInfo prog = withTimingSilent dflags (text "Cmm pipeline") forceRes $
do let dflags = hsc_dflags hsc_env
tops <- {-# SCC "tops" #-} mapM (cpsTop hsc_env) prog
- (srtInfo, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags srtInfo tops
+ let (procs, data_) = partitionEithers tops
+ (srtInfo, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags srtInfo procs data_
dumpWith dflags Opt_D_dump_cmm_cps "Post CPS Cmm" FormatCMM (ppr cmms)
return (srtInfo, cmms)
@@ -54,8 +58,8 @@ cmmPipeline hsc_env srtInfo prog = withTimingSilent dflags (text "Cmm pipeline")
dflags = hsc_dflags hsc_env
-cpsTop :: HscEnv -> CmmDecl -> IO (CAFEnv, [CmmDecl])
-cpsTop _ p@(CmmData {}) = return (mapEmpty, [p])
+cpsTop :: HscEnv -> CmmDecl -> IO (Either (CAFEnv, [CmmDecl]) (CAFSet, CmmDecl))
+cpsTop _ p@(CmmData _ statics) = return (Right (cafAnalData statics, p))
cpsTop hsc_env proc =
do
----------- Control-flow optimisations ----------------------------------
@@ -85,7 +89,9 @@ cpsTop hsc_env proc =
dump Opt_D_dump_cmm_switch "Post switch plan" g
----------- Proc points -------------------------------------------------
- let call_pps = {-# SCC "callProcPoints" #-} callProcPoints g
+ let
+ call_pps :: ProcPointSet -- LabelMap
+ call_pps = {-# SCC "callProcPoints" #-} callProcPoints g
proc_points <-
if splitting_proc_points
then do
@@ -144,7 +150,7 @@ cpsTop hsc_env proc =
-- See Note [unreachable blocks]
dumps Opt_D_dump_cmm_cfg "Post control-flow optimisations" g
- return (cafEnv, g)
+ return (Left (cafEnv, g))
where dflags = hsc_dflags hsc_env
platform = targetPlatform dflags
diff --git a/compiler/GHC/Cmm/Ppr/Decl.hs b/compiler/GHC/Cmm/Ppr/Decl.hs
index 2544e6a0d3..e91c4b6277 100644
--- a/compiler/GHC/Cmm/Ppr/Decl.hs
+++ b/compiler/GHC/Cmm/Ppr/Decl.hs
@@ -54,13 +54,13 @@ import qualified Data.ByteString as BS
pprCmms :: (Outputable info, Outputable g)
- => [GenCmmGroup CmmStatics info g] -> SDoc
+ => [GenCmmGroup RawCmmStatics info g] -> SDoc
pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms))
where
separator = space $$ text "-------------------" $$ space
writeCmms :: (Outputable info, Outputable g)
- => DynFlags -> Handle -> [GenCmmGroup CmmStatics info g] -> IO ()
+ => DynFlags -> Handle -> [GenCmmGroup RawCmmStatics info g] -> IO ()
writeCmms dflags handle cmms = printForC dflags handle (pprCmms cmms)
-----------------------------------------------------------------------------
@@ -72,6 +72,9 @@ instance (Outputable d, Outputable info, Outputable i)
instance Outputable CmmStatics where
ppr = pprStatics
+instance Outputable RawCmmStatics where
+ ppr = pprRawStatics
+
instance Outputable CmmStatic where
ppr = pprStatic
@@ -136,8 +139,14 @@ instance Outputable ForeignHint where
-- Strings are printed as C strings, and we print them as I8[],
-- following C--
--
+
pprStatics :: CmmStatics -> SDoc
-pprStatics (Statics lbl ds) = vcat ((ppr lbl <> colon) : map ppr ds)
+pprStatics (CmmStatics lbl itbl ccs payload) =
+ ppr lbl <> colon <+> ppr itbl <+> ppr ccs <+> ppr payload
+pprStatics (CmmStaticsRaw lbl ds) = pprRawStatics (RawCmmStatics lbl ds)
+
+pprRawStatics :: RawCmmStatics -> SDoc
+pprRawStatics (RawCmmStatics lbl ds) = vcat ((ppr lbl <> colon) : map ppr ds)
pprStatic :: CmmStatic -> SDoc
pprStatic s = case s of
diff --git a/compiler/GHC/Cmm/Utils.hs b/compiler/GHC/Cmm/Utils.hs
index 02d64da936..eda440040d 100644
--- a/compiler/GHC/Cmm/Utils.hs
+++ b/compiler/GHC/Cmm/Utils.hs
@@ -192,22 +192,22 @@ mkWordCLit :: DynFlags -> Integer -> CmmLit
mkWordCLit dflags wd = CmmInt wd (wordWidth dflags)
mkByteStringCLit
- :: CLabel -> ByteString -> (CmmLit, GenCmmDecl CmmStatics info stmt)
+ :: CLabel -> ByteString -> (CmmLit, GenCmmDecl RawCmmStatics info stmt)
-- We have to make a top-level decl for the string,
-- and return a literal pointing to it
mkByteStringCLit lbl bytes
- = (CmmLabel lbl, CmmData (Section sec lbl) $ Statics lbl [CmmString bytes])
+ = (CmmLabel lbl, CmmData (Section sec lbl) $ RawCmmStatics lbl [CmmString bytes])
where
-- This can not happen for String literals (as there \NUL is replaced by
-- C0 80). However, it can happen with Addr# literals.
sec = if 0 `BS.elem` bytes then ReadOnlyData else CString
-mkDataLits :: Section -> CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt
+mkDataLits :: Section -> CLabel -> [CmmLit] -> GenCmmDecl RawCmmStatics info stmt
-- Build a data-segment data block
mkDataLits section lbl lits
- = CmmData section (Statics lbl $ map CmmStaticLit lits)
+ = CmmData section (RawCmmStatics lbl $ map CmmStaticLit lits)
-mkRODataLits :: CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt
+mkRODataLits :: CLabel -> [CmmLit] -> GenCmmDecl RawCmmStatics info stmt
-- Build a read-only data block
mkRODataLits lbl lits
= mkDataLits section lbl lits