1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
|
{-# LANGUAGE GADTs #-}
{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
{-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
module GHC.Cmm.ContFlowOpt
( cmmCfgOpts
, cmmCfgOptsProc
, removeUnreachableBlocksProc
, replaceLabels
)
where
import GHC.Prelude hiding (succ, unzip, zip)
import GHC.Cmm.Dataflow.Block
import GHC.Cmm.Dataflow.Collections
import GHC.Cmm.Dataflow.Graph
import GHC.Cmm.Dataflow.Label
import GHC.Cmm.BlockId
import GHC.Cmm
import GHC.Cmm.Utils
import GHC.Cmm.Switch (mapSwitchTargets, switchTargetsToList)
import GHC.Data.Maybe
import GHC.Utils.Panic
import GHC.Utils.Misc
import Control.Monad
-- Note [What is shortcutting]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Consider this Cmm code:
--
-- L1: ...
-- goto L2;
-- L2: goto L3;
-- L3: ...
--
-- Here L2 is an empty block and contains only an unconditional branch
-- to L3. In this situation any block that jumps to L2 can jump
-- directly to L3:
--
-- L1: ...
-- goto L3;
-- L2: goto L3;
-- L3: ...
--
-- In this situation we say that we shortcut L2 to L3. One of
-- consequences of shortcutting is that some blocks of code may become
-- unreachable (in the example above this is true for L2).
-- Note [Control-flow optimisations]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- This optimisation does three things:
--
-- - If a block finishes in an unconditional branch to another block
-- and that is the only jump to that block we concatenate the
-- destination block at the end of the current one.
--
-- - If a block finishes in a call whose continuation block is a
-- goto, then we can shortcut the destination, making the
-- continuation block the destination of the goto - but see Note
-- [Shortcut call returns].
--
-- - For any block that is not a call we try to shortcut the
-- destination(s). Additionally, if a block ends with a
-- conditional branch we try to invert the condition.
--
-- Blocks are processed using postorder DFS traversal. A side effect
-- of determining traversal order with a graph search is elimination
-- of any blocks that are unreachable.
--
-- Transformations are improved by working from the end of the graph
-- towards the beginning, because we may be able to perform many
-- shortcuts in one go.
-- Note [Shortcut call returns]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- We are going to maintain the "current" graph (LabelMap CmmBlock) as
-- we go, and also a mapping from BlockId to BlockId, representing
-- continuation labels that we have renamed. This latter mapping is
-- important because we might shortcut a CmmCall continuation. For
-- example:
--
-- Sp[0] = L
-- call g returns to L
-- L: goto M
-- M: ...
--
-- So when we shortcut the L block, we need to replace not only
-- the continuation of the call, but also references to L in the
-- code (e.g. the assignment Sp[0] = L):
--
-- Sp[0] = M
-- call g returns to M
-- M: ...
--
-- So we keep track of which labels we have renamed and apply the mapping
-- at the end with replaceLabels.
-- Note [Shortcut call returns and proc-points]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Consider this code that you might get from a recursive
-- let-no-escape:
--
-- goto L1
-- L1:
-- if (Hp > HpLim) then L2 else L3
-- L2:
-- call stg_gc_noregs returns to L4
-- L4:
-- goto L1
-- L3:
-- ...
-- goto L1
--
-- Then the control-flow optimiser shortcuts L4. But that turns L1
-- into the call-return proc point, and every iteration of the loop
-- has to shuffle variables to and from the stack. So we must *not*
-- shortcut L4.
--
-- Moreover not shortcutting call returns is probably fine. If L4 can
-- concat with its branch target then it will still do so. And we
-- save some compile time because we don't have to traverse all the
-- code in replaceLabels.
--
-- However, we probably do want to do this if we are splitting proc
-- points, because L1 will be a proc-point anyway, so merging it with
-- L4 reduces the number of proc points. Unfortunately recursive
-- let-no-escapes won't generate very good code with proc-point
-- splitting on - we should probably compile them to explicitly use
-- the native calling convention instead.
cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
cmmCfgOpts split g = fst (blockConcat split g)
cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
where (g', env) = blockConcat split g
info' = info{ info_tbls = new_info_tbls }
new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))
-- If we changed any labels, then we have to update the info tables
-- too, except for the top-level info table because that might be
-- referred to by other procs.
upd_info (k,info)
| Just k' <- mapLookup k env
= (k', if k' == g_entry g'
then info
else info{ cit_lbl = infoTblLbl k' })
| otherwise
= (k,info)
cmmCfgOptsProc _ top = top
blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId)
blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
= (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
where
-- We might be able to shortcut the entry BlockId itself.
-- Remember to update the shortcut_map, since we also have to
-- update the info_tbls mapping now.
(new_entry, shortcut_map')
| Just entry_blk <- mapLookup entry_id new_blocks
, Just dest <- canShortcut entry_blk
= (dest, mapInsert entry_id dest shortcut_map)
| otherwise
= (entry_id, shortcut_map)
-- blocks are sorted in reverse postorder, but we want to go from the exit
-- towards beginning, so we use foldr below.
blocks = revPostorder g
blockmap = foldl' (flip addBlock) emptyBody blocks
-- Accumulator contains three components:
-- * map of blocks in a graph
-- * map of shortcut labels. See Note [Shortcut call returns]
-- * map containing number of predecessors for each block. We discard
-- it after we process all blocks.
(new_blocks, shortcut_map, _) =
foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks
-- Map of predecessors for initial graph. We increase number of
-- predecessors for entry block by one to denote that it is
-- target of a jump, even if no block in the current graph jumps
-- to it.
initialBackEdges = incPreds entry_id (predMap blocks)
maybe_concat :: CmmBlock
-> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
-> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int)
maybe_concat block (!blocks, !shortcut_map, !backEdges)
-- If:
-- (1) current block ends with unconditional branch to b' and
-- (2) it has exactly one predecessor (namely, current block)
--
-- Then:
-- (1) append b' block at the end of current block
-- (2) remove b' from the map of blocks
-- (3) remove information about b' from predecessors map
--
-- Since we know that the block has only one predecessor we call
-- mapDelete directly instead of calling decPreds.
--
-- Note that we always maintain an up-to-date list of predecessors, so
-- we can ignore the contents of shortcut_map
| CmmBranch b' <- last
, hasOnePredecessor b'
, Just blk' <- mapLookup b' blocks
= let bid' = entryLabel blk'
in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
, shortcut_map
, mapDelete b' backEdges )
-- If:
-- (1) we are splitting proc points (see Note
-- [Shortcut call returns and proc-points]) and
-- (2) current block is a CmmCall or CmmForeignCall with
-- continuation b' and
-- (3) we can shortcut that continuation to dest
-- Then:
-- (1) we change continuation to point to b'
-- (2) create mapping from b' to dest
-- (3) increase number of predecessors of dest by 1
-- (4) decrease number of predecessors of b' by 1
--
-- Later we will use replaceLabels to substitute all occurrences of b'
-- with dest.
| splitting_procs
, Just b' <- callContinuation_maybe last
, Just blk' <- mapLookup b' blocks
, Just dest <- canShortcut blk'
= ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
, mapInsert b' dest shortcut_map
, decPreds b' $ incPreds dest backEdges )
-- If:
-- (1) a block does not end with a call
-- Then:
-- (1) if it ends with a conditional attempt to invert the
-- conditional
-- (2) attempt to shortcut all destination blocks
-- (3) if new successors of a block are different from the old ones
-- update the of predecessors accordingly
--
-- A special case of this is a situation when a block ends with an
-- unconditional jump to a block that can be shortcut.
| Nothing <- callContinuation_maybe last
= let oldSuccs = successors last
newSuccs = successors rewrite_last
in ( mapInsert bid (blockJoinTail head rewrite_last) blocks
, shortcut_map
, if oldSuccs == newSuccs
then backEdges
else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )
-- Otherwise don't do anything
| otherwise
= ( blocks, shortcut_map, backEdges )
where
(head, last) = blockSplitTail block
bid = entryLabel block
-- Changes continuation of a call to a specified label
update_cont dest =
case last of
CmmCall{} -> last { cml_cont = Just dest }
CmmForeignCall{} -> last { succ = dest }
_ -> panic "Can't shortcut continuation."
-- Attempts to shortcut successors of last node
shortcut_last = mapSuccessors shortcut last
where
shortcut l =
case mapLookup l blocks of
Just b | Just dest <- canShortcut b -> dest
_otherwise -> l
rewrite_last
-- Sometimes we can get rid of the conditional completely.
| CmmCondBranch _cond t f _l <- shortcut_last
, t == f
= CmmBranch t
-- See Note [Invert Cmm conditionals]
| CmmCondBranch cond t f l <- shortcut_last
, hasOnePredecessor t -- inverting will make t a fallthrough
, likelyTrue l || (numPreds f > 1)
, Just cond' <- maybeInvertCmmExpr cond
= CmmCondBranch cond' f t (invertLikeliness l)
-- If all jump destinations of a switch go to the
-- same target eliminate the switch.
| CmmSwitch _expr targets <- shortcut_last
, (t:ts) <- switchTargetsToList targets
, all (== t) ts
= CmmBranch t
| otherwise
= shortcut_last
likelyTrue (Just True) = True
likelyTrue _ = False
invertLikeliness :: Maybe Bool -> Maybe Bool
invertLikeliness = fmap not
-- Number of predecessors for a block
numPreds bid = mapLookup bid backEdges `orElse` 0
hasOnePredecessor b = numPreds b == 1
{-
Note [Invert Cmm conditionals]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The native code generator always produces jumps to the true branch.
Falling through to the false branch is however faster. So we try to
arrange for that to happen.
This means we invert the condition if:
* The likely path will become a fallthrough.
* We can't guarantee a fallthrough for the false branch but for the
true branch.
In some cases it's faster to avoid inverting when the false branch is likely.
However determining when that is the case is neither easy nor cheap so for
now we always invert as this produces smaller binaries and code that is
equally fast on average. (On an i7-6700K)
TODO:
There is also the edge case when both branches have multiple predecessors.
In this case we could assume that we will end up with a jump for BOTH
branches. In this case it might be best to put the likely path in the true
branch especially if there are large numbers of predecessors as this saves
us the jump thats not taken. However I haven't tested this and as of early
2018 we almost never generate cmm where this would apply.
-}
-- Functions for incrementing and decrementing number of predecessors. If
-- decrementing would set the predecessor count to 0, we remove entry from the
-- map.
-- Invariant: if a block has no predecessors it should be dropped from the
-- graph because it is unreachable. maybe_concat is constructed to maintain
-- that invariant, but calling replaceLabels may introduce unreachable blocks.
-- We rely on subsequent passes in the Cmm pipeline to remove unreachable
-- blocks.
incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int
incPreds bid edges = mapInsertWith (+) bid 1 edges
decPreds bid edges = case mapLookup bid edges of
Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
Just _ -> mapDelete bid edges
_ -> edges
-- Checks if a block consists only of "goto dest". If it does than we return
-- "Just dest" label. See Note [What is shortcutting]
canShortcut :: CmmBlock -> Maybe BlockId
canShortcut block
| (_, middle, CmmBranch dest) <- blockSplit block
, all dont_care $ blockToList middle
= Just dest
| otherwise
= Nothing
where dont_care CmmComment{} = True
dont_care CmmTick{} = True
dont_care _other = False
-- Concatenates two blocks. First one is assumed to be open on exit, the second
-- is assumed to be closed on entry (i.e. it has a label attached to it, which
-- the splice function removes by calling snd on result of blockSplitHead).
splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
splice head rest = entry `blockJoinHead` code0 `blockAppend` code1
where (CmmEntry lbl sc0, code0) = blockSplitHead head
(CmmEntry _ sc1, code1) = blockSplitHead rest
entry = CmmEntry lbl (combineTickScopes sc0 sc1)
-- If node is a call with continuation call return Just label of that
-- continuation. Otherwise return Nothing.
callContinuation_maybe :: CmmNode O C -> Maybe BlockId
callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
callContinuation_maybe (CmmForeignCall { succ = b }) = Just b
callContinuation_maybe _ = Nothing
-- Map over the CmmGraph, replacing each label with its mapping in the
-- supplied LabelMap.
replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph
replaceLabels env g
| mapNull env = g
| otherwise = replace_eid $ mapGraphNodes1 txnode g
where
replace_eid g = g {g_entry = lookup (g_entry g)}
lookup id = mapLookup id env `orElse` id
txnode :: CmmNode e x -> CmmNode e x
txnode (CmmBranch bid) = CmmBranch (lookup bid)
txnode (CmmCondBranch p t f l) =
mkCmmCondBranch (exp p) (lookup t) (lookup f) l
txnode (CmmSwitch e ids) =
CmmSwitch (exp e) (mapSwitchTargets lookup ids)
txnode (CmmCall t k rg a res r) =
CmmCall (exp t) (liftM lookup k) rg a res r
txnode fc@CmmForeignCall{} =
fc{ args = map exp (args fc), succ = lookup (succ fc) }
txnode other = mapExpDeep exp other
exp :: CmmExpr -> CmmExpr
exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid))
exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
exp e = e
mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C
mkCmmCondBranch p t f l =
if t == f then CmmBranch t else CmmCondBranch p t f l
-- Build a map from a block to its set of predecessors.
predMap :: [CmmBlock] -> LabelMap Int
predMap blocks = foldr add_preds mapEmpty blocks
where
add_preds block env = foldr add env (successors block)
where add lbl env = mapInsertWith (+) lbl 1 env
-- Removing unreachable blocks
removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
| used_blocks `lengthLessThan` mapSize (toBlockMap g)
= CmmProc info' lbl live g'
| otherwise
= proc
where
g' = ofBlockList (g_entry g) used_blocks
info' = info { info_tbls = keep_used (info_tbls info) }
-- Remove any info_tbls for unreachable
keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable
keep_used bs = mapFoldlWithKey keep mapEmpty bs
keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable
keep env l i | l `setMember` used_lbls = mapInsert l i env
| otherwise = env
used_blocks :: [CmmBlock]
used_blocks = revPostorder g
used_lbls :: LabelSet
used_lbls = setFromList $ map entryLabel used_blocks
|