1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
|
{-# LANGUAGE CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- | Graph coloring register allocator.
module GHC.CmmToAsm.Reg.Graph (
regAlloc
) where
import GHC.Prelude
import qualified GHC.Data.Graph.Color as Color
import GHC.CmmToAsm.Reg.Liveness
import GHC.CmmToAsm.Reg.Graph.Spill
import GHC.CmmToAsm.Reg.Graph.SpillClean
import GHC.CmmToAsm.Reg.Graph.SpillCost
import GHC.CmmToAsm.Reg.Graph.Stats
import GHC.CmmToAsm.Reg.Graph.TrivColorable
import GHC.CmmToAsm.Instr
import GHC.CmmToAsm.Reg.Target
import GHC.CmmToAsm.Config
import GHC.Platform.Reg.Class
import GHC.Platform.Reg
import GHC.Data.Bag
import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Platform
import GHC.Types.Unique.FM
import GHC.Types.Unique.Set
import GHC.Types.Unique.Supply
import GHC.Utils.Misc (seqList)
import GHC.CmmToAsm.CFG
import Data.Maybe
import Control.Monad
-- | The maximum number of build\/spill cycles we'll allow.
--
-- It should only take 3 or 4 cycles for the allocator to converge.
-- If it takes any longer than this it's probably in an infinite loop,
-- so it's better just to bail out and report a bug.
maxSpinCount :: Int
maxSpinCount = 10
-- | The top level of the graph coloring register allocator.
regAlloc
:: (Outputable statics, Outputable instr, Instruction instr)
=> NCGConfig
-> UniqFM RegClass (UniqSet RealReg) -- ^ registers we can use for allocation
-> UniqSet Int -- ^ set of available spill slots.
-> Int -- ^ current number of spill slots
-> [LiveCmmDecl statics instr] -- ^ code annotated with liveness information.
-> Maybe CFG -- ^ CFG of basic blocks if available
-> UniqSM ( [NatCmmDecl statics instr]
, Maybe Int, [RegAllocStats statics instr] )
-- ^ code with registers allocated, additional stacks required
-- and stats for each stage of allocation
regAlloc config regsFree slotsFree slotsCount code cfg
= do
let platform = ncgPlatform config
triv = trivColorable platform
(targetVirtualRegSqueeze platform)
(targetRealRegSqueeze platform)
(code_final, debug_codeGraphs, slotsCount', _)
<- regAlloc_spin config 0
triv
regsFree slotsFree slotsCount [] code cfg
let needStack
| slotsCount == slotsCount'
= Nothing
| otherwise
= Just slotsCount'
return ( code_final
, needStack
, reverse debug_codeGraphs )
-- | Perform solver iterations for the graph coloring allocator.
--
-- We extract a register conflict graph from the provided cmm code,
-- and try to colour it. If that works then we use the solution rewrite
-- the code with real hregs. If coloring doesn't work we add spill code
-- and try to colour it again. After `maxSpinCount` iterations we give up.
--
regAlloc_spin
:: forall instr statics.
(Instruction instr,
Outputable instr,
Outputable statics)
=> NCGConfig
-> Int -- ^ Number of solver iterations we've already performed.
-> Color.Triv VirtualReg RegClass RealReg
-- ^ Function for calculating whether a register is trivially
-- colourable.
-> UniqFM RegClass (UniqSet RealReg) -- ^ Free registers that we can allocate.
-> UniqSet Int -- ^ Free stack slots that we can use.
-> Int -- ^ Number of spill slots in use
-> [RegAllocStats statics instr] -- ^ Current regalloc stats to add to.
-> [LiveCmmDecl statics instr] -- ^ Liveness annotated code to allocate.
-> Maybe CFG
-> UniqSM ( [NatCmmDecl statics instr]
, [RegAllocStats statics instr]
, Int -- Slots in use
, Color.Graph VirtualReg RegClass RealReg)
regAlloc_spin config spinCount triv regsFree slotsFree slotsCount debug_codeGraphs code cfg
= do
let platform = ncgPlatform config
-- If any of these dump flags are turned on we want to hang on to
-- intermediate structures in the allocator - otherwise tell the
-- allocator to ditch them early so we don't end up creating space leaks.
let dump = or
[ ncgDumpRegAllocStages config
, ncgDumpAsmStats config
, ncgDumpAsmConflicts config
]
-- Check that we're not running off down the garden path.
when (spinCount > maxSpinCount)
$ pprPanic "regAlloc_spin: max build/spill cycle count exceeded."
( text "It looks like the register allocator is stuck in an infinite loop."
$$ text "max cycles = " <> int maxSpinCount
$$ text "regsFree = " <> (hcat $ punctuate space $ map ppr
$ nonDetEltsUniqSet $ unionManyUniqSets
$ nonDetEltsUFM regsFree)
-- This is non-deterministic but we do not
-- currently support deterministic code-generation.
-- See Note [Unique Determinism and code generation]
$$ text "slotsFree = " <> ppr (sizeUniqSet slotsFree))
-- Build the register conflict graph from the cmm code.
(graph :: Color.Graph VirtualReg RegClass RealReg)
<- {-# SCC "BuildGraph" #-} buildGraph code
-- VERY IMPORTANT:
-- We really do want the graph to be fully evaluated _before_ we
-- start coloring. If we don't do this now then when the call to
-- Color.colorGraph forces bits of it, the heap will be filled with
-- half evaluated pieces of graph and zillions of apply thunks.
seqGraph graph `seq` return ()
-- Build a map of the cost of spilling each instruction.
-- This is a lazy binding, so the map will only be computed if we
-- actually have to spill to the stack.
let spillCosts = foldl' plusSpillCostInfo zeroSpillCostInfo
$ map (slurpSpillCostInfo platform cfg) code
-- The function to choose regs to leave uncolored.
let spill = chooseSpill spillCosts
-- Record startup state in our log.
let stat1
= if spinCount == 0
then Just $ RegAllocStatsStart
{ raLiveCmm = code
, raGraph = graph
, raSpillCosts = spillCosts
, raPlatform = platform
}
else Nothing
-- Try and color the graph.
let (graph_colored, rsSpill, rmCoalesce)
= {-# SCC "ColorGraph" #-}
Color.colorGraph
(ncgRegsIterative config)
spinCount
regsFree triv spill graph
-- Rewrite registers in the code that have been coalesced.
let patchF reg
| RegVirtual vr <- reg
= case lookupUFM rmCoalesce vr of
Just vr' -> patchF (RegVirtual vr')
Nothing -> reg
| otherwise
= reg
let (code_coalesced :: [LiveCmmDecl statics instr])
= map (patchEraseLive patchF) code
-- Check whether we've found a coloring.
if isEmptyUniqSet rsSpill
-- Coloring was successful because no registers needed to be spilled.
then do
-- if -fasm-lint is turned on then validate the graph.
-- This checks for bugs in the graph allocator itself.
let graph_colored_lint =
if ncgAsmLinting config
then Color.validateGraph (text "")
True -- Require all nodes to be colored.
graph_colored
else graph_colored
-- Rewrite the code to use real hregs, using the colored graph.
let code_patched
= map (patchRegsFromGraph platform graph_colored_lint)
code_coalesced
-- Clean out unneeded SPILL/RELOAD meta instructions.
-- The spill code generator just spills the entire live range
-- of a vreg, but it might not need to be on the stack for
-- its entire lifetime.
let code_spillclean
= map (cleanSpills platform) code_patched
-- Strip off liveness information from the allocated code.
-- Also rewrite SPILL/RELOAD meta instructions into real machine
-- instructions along the way
let code_final
= map (stripLive config) code_spillclean
-- Record what happened in this stage for debugging
let stat
= RegAllocStatsColored
{ raCode = code
, raGraph = graph
, raGraphColored = graph_colored_lint
, raCoalesced = rmCoalesce
, raCodeCoalesced = code_coalesced
, raPatched = code_patched
, raSpillClean = code_spillclean
, raFinal = code_final
, raSRMs = foldl' addSRM (0, 0, 0)
$ map countSRMs code_spillclean
, raPlatform = platform
}
-- Bundle up all the register allocator statistics.
-- .. but make sure to drop them on the floor if they're not
-- needed, otherwise we'll get a space leak.
let statList =
if dump then [stat] ++ maybeToList stat1 ++ debug_codeGraphs
else []
-- Ensure all the statistics are evaluated, to avoid space leaks.
seqList statList (return ())
return ( code_final
, statList
, slotsCount
, graph_colored_lint)
-- Coloring was unsuccessful. We need to spill some register to the
-- stack, make a new graph, and try to color it again.
else do
-- if -fasm-lint is turned on then validate the graph
let graph_colored_lint =
if ncgAsmLinting config
then Color.validateGraph (text "")
False -- don't require nodes to be colored
graph_colored
else graph_colored
-- Spill uncolored regs to the stack.
(code_spilled, slotsFree', slotsCount', spillStats)
<- regSpill platform code_coalesced slotsFree slotsCount rsSpill
-- Recalculate liveness information.
-- NOTE: we have to reverse the SCCs here to get them back into
-- the reverse-dependency order required by computeLiveness.
-- If they're not in the correct order that function will panic.
code_relive <- mapM (regLiveness platform . reverseBlocksInTops)
code_spilled
-- Record what happened in this stage for debugging.
let stat =
RegAllocStatsSpill
{ raCode = code
, raGraph = graph_colored_lint
, raCoalesced = rmCoalesce
, raSpillStats = spillStats
, raSpillCosts = spillCosts
, raSpilled = code_spilled }
-- Bundle up all the register allocator statistics.
-- .. but make sure to drop them on the floor if they're not
-- needed, otherwise we'll get a space leak.
let statList =
if dump
then [stat] ++ maybeToList stat1 ++ debug_codeGraphs
else []
-- Ensure all the statistics are evaluated, to avoid space leaks.
seqList statList (return ())
regAlloc_spin config (spinCount + 1) triv regsFree slotsFree'
slotsCount' statList code_relive cfg
-- | Build a graph from the liveness and coalesce information in this code.
buildGraph
:: Instruction instr
=> [LiveCmmDecl statics instr]
-> UniqSM (Color.Graph VirtualReg RegClass RealReg)
buildGraph code
= do
-- Slurp out the conflicts and reg->reg moves from this code.
let (conflictList, moveList) =
unzip $ map slurpConflicts code
-- Slurp out the spill/reload coalesces.
let moveList2 = map slurpReloadCoalesce code
-- Add the reg-reg conflicts to the graph.
let conflictBag = unionManyBags conflictList
let graph_conflict
= foldr graphAddConflictSet Color.initGraph conflictBag
-- Add the coalescences edges to the graph.
let moveBag
= unionBags (unionManyBags moveList2)
(unionManyBags moveList)
let graph_coalesce
= foldr graphAddCoalesce graph_conflict moveBag
return graph_coalesce
-- | Add some conflict edges to the graph.
-- Conflicts between virtual and real regs are recorded as exclusions.
graphAddConflictSet
:: UniqSet Reg
-> Color.Graph VirtualReg RegClass RealReg
-> Color.Graph VirtualReg RegClass RealReg
graphAddConflictSet set graph
= let virtuals = mkUniqSet
[ vr | RegVirtual vr <- nonDetEltsUniqSet set ]
graph1 = Color.addConflicts virtuals classOfVirtualReg graph
graph2 = foldr (\(r1, r2) -> Color.addExclusion r1 classOfVirtualReg r2)
graph1
[ (vr, rr)
| RegVirtual vr <- nonDetEltsUniqSet set
, RegReal rr <- nonDetEltsUniqSet set]
-- See Note [Unique Determinism and code generation]
in graph2
-- | Add some coalesence edges to the graph
-- Coalesences between virtual and real regs are recorded as preferences.
graphAddCoalesce
:: (Reg, Reg)
-> Color.Graph VirtualReg RegClass RealReg
-> Color.Graph VirtualReg RegClass RealReg
graphAddCoalesce (r1, r2) graph
| RegReal rr <- r1
, RegVirtual vr <- r2
= Color.addPreference (vr, classOfVirtualReg vr) rr graph
| RegReal rr <- r2
, RegVirtual vr <- r1
= Color.addPreference (vr, classOfVirtualReg vr) rr graph
| RegVirtual vr1 <- r1
, RegVirtual vr2 <- r2
= Color.addCoalesce
(vr1, classOfVirtualReg vr1)
(vr2, classOfVirtualReg vr2)
graph
-- We can't coalesce two real regs, but there could well be existing
-- hreg,hreg moves in the input code. We'll just ignore these
-- for coalescing purposes.
| RegReal _ <- r1
, RegReal _ <- r2
= graph
#if __GLASGOW_HASKELL__ <= 810
| otherwise
= panic "graphAddCoalesce"
#endif
-- | Patch registers in code using the reg -> reg mapping in this graph.
patchRegsFromGraph
:: (Outputable statics, Outputable instr, Instruction instr)
=> Platform -> Color.Graph VirtualReg RegClass RealReg
-> LiveCmmDecl statics instr -> LiveCmmDecl statics instr
patchRegsFromGraph platform graph code
= patchEraseLive patchF code
where
-- Function to lookup the hardreg for a virtual reg from the graph.
patchF reg
-- leave real regs alone.
| RegReal{} <- reg
= reg
-- this virtual has a regular node in the graph.
| RegVirtual vr <- reg
, Just node <- Color.lookupNode graph vr
= case Color.nodeColor node of
Just color -> RegReal color
Nothing -> RegVirtual vr
-- no node in the graph for this virtual, bad news.
| otherwise
= pprPanic "patchRegsFromGraph: register mapping failed."
( text "There is no node in the graph for register "
<> ppr reg
$$ ppr code
$$ Color.dotGraph
(\_ -> text "white")
(trivColorable platform
(targetVirtualRegSqueeze platform)
(targetRealRegSqueeze platform))
graph)
-----
-- for when laziness just isn't what you wanted...
-- We need to deepSeq the whole graph before trying to colour it to avoid
-- space leaks.
seqGraph :: Color.Graph VirtualReg RegClass RealReg -> ()
seqGraph graph = seqNodes (nonDetEltsUFM (Color.graphMap graph))
-- See Note [Unique Determinism and code generation]
seqNodes :: [Color.Node VirtualReg RegClass RealReg] -> ()
seqNodes ns
= case ns of
[] -> ()
(n : ns) -> seqNode n `seq` seqNodes ns
seqNode :: Color.Node VirtualReg RegClass RealReg -> ()
seqNode node
= seqVirtualReg (Color.nodeId node)
`seq` seqRegClass (Color.nodeClass node)
`seq` seqMaybeRealReg (Color.nodeColor node)
`seq` (seqVirtualRegList (nonDetEltsUniqSet (Color.nodeConflicts node)))
`seq` (seqRealRegList (nonDetEltsUniqSet (Color.nodeExclusions node)))
`seq` (seqRealRegList (Color.nodePreference node))
`seq` (seqVirtualRegList (nonDetEltsUniqSet (Color.nodeCoalesce node)))
-- It's OK to use nonDetEltsUniqSet for seq
seqVirtualReg :: VirtualReg -> ()
seqVirtualReg reg = reg `seq` ()
seqRealReg :: RealReg -> ()
seqRealReg reg = reg `seq` ()
seqRegClass :: RegClass -> ()
seqRegClass c = c `seq` ()
seqMaybeRealReg :: Maybe RealReg -> ()
seqMaybeRealReg mr
= case mr of
Nothing -> ()
Just r -> seqRealReg r
seqVirtualRegList :: [VirtualReg] -> ()
seqVirtualRegList rs
= case rs of
[] -> ()
(r : rs) -> seqVirtualReg r `seq` seqVirtualRegList rs
seqRealRegList :: [RealReg] -> ()
seqRealRegList rs
= case rs of
[] -> ()
(r : rs) -> seqRealReg r `seq` seqRealRegList rs
|