1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
|
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE LambdaCase #-}
-----------------------------------------------------------------------------
--
-- Stg to C-- code generation
--
-- (c) The University of Glasgow 2004-2006
--
-----------------------------------------------------------------------------
module GHC.StgToCmm ( codeGen ) where
import GHC.Prelude as Prelude
import GHC.Driver.Backend
import GHC.Driver.Session
import GHC.StgToCmm.Prof (initInfoTableProv, initCostCentres, ldvEnter)
import GHC.StgToCmm.Monad
import GHC.StgToCmm.Env
import GHC.StgToCmm.Bind
import GHC.StgToCmm.DataCon
import GHC.StgToCmm.Layout
import GHC.StgToCmm.Utils
import GHC.StgToCmm.Closure
import GHC.StgToCmm.Hpc
import GHC.StgToCmm.Ticky
import GHC.StgToCmm.Types (ModuleLFInfos)
import GHC.Cmm
import GHC.Cmm.Utils
import GHC.Cmm.CLabel
import GHC.Cmm.Graph
import GHC.Stg.Syntax
import GHC.Types.CostCentre
import GHC.Types.IPE
import GHC.Types.HpcInfo
import GHC.Types.Id
import GHC.Types.Id.Info
import GHC.Types.RepType
import GHC.Types.Basic
import GHC.Types.Var.Set ( isEmptyDVarSet )
import GHC.Types.Unique.FM
import GHC.Types.Name.Env
import GHC.Types.ForeignStubs
import GHC.Core.DataCon
import GHC.Core.TyCon
import GHC.Core.Multiplicity
import GHC.Unit.Module
import GHC.Utils.Error
import GHC.Utils.Outputable
import GHC.Utils.Panic.Plain
import GHC.Utils.Logger
import GHC.Utils.TmpFs
import GHC.Data.Stream
import GHC.Data.OrdList
import GHC.Types.Unique.Map
import Control.Monad (when,void, forM_)
import GHC.Utils.Misc
import System.IO.Unsafe
import qualified Data.ByteString as BS
import Data.Maybe
import Data.IORef
data CodeGenState = CodeGenState { codegen_used_info :: !(OrdList CmmInfoTable)
, codegen_state :: !CgState }
codeGen :: Logger
-> TmpFs
-> DynFlags
-> Module
-> InfoTableProvMap
-> [TyCon]
-> CollectedCCs -- (Local/global) cost-centres needing declaring/registering.
-> [CgStgTopBinding] -- Bindings to convert
-> HpcInfo
-> Stream IO CmmGroup (CStub, ModuleLFInfos) -- Output as a stream, so codegen can
-- be interleaved with output
codeGen logger tmpfs dflags this_mod ip_map@(InfoTableProvMap (UniqMap denv) _) data_tycons
cost_centre_info stg_binds hpc_info
= do { -- cg: run the code generator, and yield the resulting CmmGroup
-- Using an IORef to store the state is a bit crude, but otherwise
-- we would need to add a state monad layer which regresses
-- allocations by 0.5-2%.
; cgref <- liftIO $ initC >>= \s -> newIORef (CodeGenState mempty s)
; let cg :: FCode a -> Stream IO CmmGroup a
cg fcode = do
(a, cmm) <- liftIO . withTimingSilent logger (text "STG -> Cmm") (`seq` ()) $ do
CodeGenState ts st <- readIORef cgref
let (a,st') = runC dflags this_mod st (getCmm fcode)
-- NB. stub-out cgs_tops and cgs_stmts. This fixes
-- a big space leak. DO NOT REMOVE!
-- This is observed by the #3294 test
let !used_info
| gopt Opt_InfoTableMap dflags = toOL (mapMaybe topInfoTable (snd a)) `mappend` ts
| otherwise = mempty
writeIORef cgref $!
CodeGenState used_info
(st'{ cgs_tops = nilOL,
cgs_stmts = mkNop
})
return a
yield cmm
return a
-- Note [codegen-split-init] the cmm_init block must come
-- FIRST. This is because when -split-objs is on we need to
-- combine this block with its initialisation routines; see
-- Note [pipeline-split-init].
; cg (mkModuleInit cost_centre_info this_mod hpc_info)
; mapM_ (cg . cgTopBinding logger tmpfs dflags) stg_binds
-- Put datatype_stuff after code_stuff, because the
-- datatype closure table (for enumeration types) to
-- (say) PrelBase_True_closure, which is defined in
-- code_stuff
; let do_tycon tycon = do
-- Generate a table of static closures for an
-- enumeration type Note that the closure pointers are
-- tagged.
when (isEnumerationTyCon tycon) $ cg (cgEnumerationTyCon tycon)
-- Emit normal info_tables, for data constructors defined in this module.
mapM_ (cg . cgDataCon DefinitionSite) (tyConDataCons tycon)
; mapM_ do_tycon data_tycons
-- Emit special info tables for everything used in this module
-- This will only do something if `-fdistinct-info-tables` is turned on.
; mapM_ (\(dc, ns) -> forM_ ns $ \(k, _ss) -> cg (cgDataCon (UsageSite this_mod k) dc)) (nonDetEltsUFM denv)
; final_state <- liftIO (readIORef cgref)
; let cg_id_infos = cgs_binds . codegen_state $ final_state
used_info = fromOL . codegen_used_info $ final_state
; !foreign_stub <- cg (initInfoTableProv used_info ip_map this_mod)
-- See Note [Conveying CAF-info and LFInfo between modules] in
-- GHC.StgToCmm.Types
; let extractInfo info = (name, lf)
where
!name = idName (cg_id info)
!lf = cg_lf info
!generatedInfo
| gopt Opt_OmitInterfacePragmas dflags
= emptyNameEnv
| otherwise
= mkNameEnv (Prelude.map extractInfo (eltsUFM cg_id_infos))
; return (foreign_stub, generatedInfo)
}
---------------------------------------------------------------
-- Top-level bindings
---------------------------------------------------------------
{- 'cgTopBinding' is only used for top-level bindings, since they need
to be allocated statically (not in the heap) and need to be labelled.
No unboxed bindings can happen at top level.
In the code below, the static bindings are accumulated in the
@MkCgState@, and transferred into the ``statics'' slot by @forkStatics@.
This is so that we can write the top level processing in a compositional
style, with the increasing static environment being plumbed as a state
variable. -}
cgTopBinding :: Logger -> TmpFs -> DynFlags -> CgStgTopBinding -> FCode ()
cgTopBinding logger tmpfs dflags = \case
StgTopLifted (StgNonRec id rhs) -> do
let (info, fcode) = cgTopRhs dflags NonRecursive id rhs
fcode
addBindC info
StgTopLifted (StgRec pairs) -> do
let (bndrs, rhss) = unzip pairs
let pairs' = zip bndrs rhss
r = unzipWith (cgTopRhs dflags Recursive) pairs'
(infos, fcodes) = unzip r
addBindsC infos
sequence_ fcodes
StgTopStringLit id str -> do
let label = mkBytesLabel (idName id)
-- emit either a CmmString literal or dump the string in a file and emit a
-- CmmFileEmbed literal.
-- See Note [Embedding large binary blobs] in GHC.CmmToAsm.Ppr
let isNCG = backend dflags == NCG
isSmall = fromIntegral (BS.length str) <= binBlobThreshold dflags
asString = binBlobThreshold dflags == 0 || isSmall
(lit,decl) = if not isNCG || asString
then mkByteStringCLit label str
else mkFileEmbedLit label $ unsafePerformIO $ do
bFile <- newTempName logger tmpfs (tmpDir dflags) TFL_CurrentModule ".dat"
BS.writeFile bFile str
return bFile
emitDecl decl
addBindC (litIdInfo (targetPlatform dflags) id mkLFStringLit lit)
cgTopRhs :: DynFlags -> RecFlag -> Id -> CgStgRhs -> (CgIdInfo, FCode ())
-- The Id is passed along for setting up a binding...
cgTopRhs dflags _rec bndr (StgRhsCon _cc con mn _ts args)
= cgTopRhsCon dflags bndr con mn (assertNonVoidStgArgs args)
-- con args are always non-void,
-- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
cgTopRhs dflags rec bndr (StgRhsClosure fvs cc upd_flag args body)
= assert (isEmptyDVarSet fvs) -- There should be no free variables
cgTopRhsClosure (targetPlatform dflags) rec bndr cc upd_flag args body
---------------------------------------------------------------
-- Module initialisation code
---------------------------------------------------------------
mkModuleInit
:: CollectedCCs -- cost centre info
-> Module
-> HpcInfo
-> FCode ()
mkModuleInit cost_centre_info this_mod hpc_info
= do { initHpc this_mod hpc_info
; initCostCentres cost_centre_info
}
---------------------------------------------------------------
-- Generating static stuff for algebraic data types
---------------------------------------------------------------
cgEnumerationTyCon :: TyCon -> FCode ()
cgEnumerationTyCon tycon
= do platform <- getPlatform
emitRODataLits (mkLocalClosureTableLabel (tyConName tycon) NoCafRefs)
[ CmmLabelOff (mkLocalClosureLabel (dataConName con) NoCafRefs)
(tagForCon platform con)
| con <- tyConDataCons tycon]
cgDataCon :: ConInfoTableLocation -> DataCon -> FCode ()
-- Generate the entry code, info tables, and (for niladic constructor)
-- the static closure, for a constructor.
cgDataCon mn data_con
= do { massert (not (isUnboxedTupleDataCon data_con || isUnboxedSumDataCon data_con))
; profile <- getProfile
; platform <- getPlatform
; let
(tot_wds, -- #ptr_wds + #nonptr_wds
ptr_wds) -- #ptr_wds
= mkVirtConstrSizes profile arg_reps
nonptr_wds = tot_wds - ptr_wds
dyn_info_tbl =
mkDataConInfoTable profile data_con mn False ptr_wds nonptr_wds
-- We're generating info tables, so we don't know and care about
-- what the actual arguments are. Using () here as the place holder.
arg_reps :: [NonVoid PrimRep]
arg_reps = [ NonVoid rep_ty
| ty <- dataConRepArgTys data_con
, rep_ty <- typePrimRep (scaledThing ty)
, not (isVoidRep rep_ty) ]
; emitClosureAndInfoTable platform dyn_info_tbl NativeDirectCall [] $
-- NB: the closure pointer is assumed *untagged* on
-- entry to a constructor. If the pointer is tagged,
-- then we should not be entering it. This assumption
-- is used in ldvEnter and when tagging the pointer to
-- return it.
-- NB 2: We don't set CC when entering data (WDP 94/06)
do { tickyEnterDynCon
; ldvEnter (CmmReg nodeReg)
; tickyReturnOldCon (length arg_reps)
; void $ emitReturn [cmmOffsetB platform (CmmReg nodeReg) (tagForCon platform data_con)]
}
-- The case continuation code expects a tagged pointer
}
|