summaryrefslogtreecommitdiff
path: root/compiler/cmm/CmmContFlowOpt.hs
blob: f39ab3c62bd261f8533bdc418c5b2655d9b6d5ad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
{-# LANGUAGE GADTs #-}
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
module CmmContFlowOpt
    ( cmmCfgOpts
    , cmmCfgOptsProc
    , removeUnreachableBlocksProc
    , removeUnreachableBlocks
    , replaceLabels
    )
where

import Hoopl
import BlockId
import Cmm
import CmmUtils
import Maybes

import Control.Monad
import Prelude hiding (succ, unzip, zip)

-----------------------------------------------------------------------------
--
-- Control-flow optimisations
--
-----------------------------------------------------------------------------

cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
cmmCfgOpts split g = fst (blockConcat split g)

cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
    where (g', env) = blockConcat split g
          info' = info{ info_tbls = new_info_tbls }
          new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))

          -- If we changed any labels, then we have to update the info tables
          -- too, except for the top-level info table because that might be
          -- referred to by other procs.
          upd_info (k,info)
             | Just k' <- mapLookup k env
             = (k', if k' == g_entry g'
                       then info
                       else info{ cit_lbl = infoTblLbl k' })
             | otherwise
             = (k,info)

cmmCfgOptsProc _ top = top


-----------------------------------------------------------------------------
--
-- Block concatenation
--
-----------------------------------------------------------------------------

-- This optimisation does three things:
--
--   - If a block finishes with an unconditional branch, then we may
--     be able to duplicate the block it points to and remove the
--     branch.  We do this if either
--        a) the destination block is small (e.g. just another branch), or
--        b) this is the only jump to this particular destination block.
--
--   - If a block finishes in a call whose continuation block is a
--     goto, then we can shortcut the destination, making the
--     destination of the goto into the continuation.  E.g.
--             call g returns to L    ==>     call g returns to M
--          L: goto M                      M: ...blah...
--          M: ...blah...
--     (but see Note [shortcut call returns])
--
--   - Remove any unreachable blocks from the graph.  This is a side
--     effect of starting with a postorder DFS traversal of the graph

-- Both transformations are improved by working from the end of the
-- graph towards the beginning, because we may be able to perform many
-- shortcuts in one go.

-- We need to walk over the blocks from the end back to the
-- beginning.  We are going to maintain the "current" graph
-- (BlockEnv CmmBlock) as we go, and also a mapping from BlockId
-- to BlockId, representing continuation labels that we have
-- renamed.  This latter mapping is important because we might
-- shortcut a CmmCall continuation.  For example:
--
--    Sp[0] = L
--    call g returns to L
--
--    L: goto M
--
--    M: ...
--
-- So when we shortcut the L block, we need to replace not only
-- the continuation of the call, but also references to L in the
-- code (e.g. the assignment Sp[0] = L).  So we keep track of
-- which labels we have renamed and apply the mapping at the end
-- with replaceLabels.

blockConcat :: Bool -> CmmGraph -> (CmmGraph, BlockEnv BlockId)
blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
  = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
  where
     -- we might be able to shortcut the entry BlockId itself.
     -- remember to update the shortcut_map', since we also have to
     -- update the info_tbls mapping now.
     (new_entry, shortcut_map')
       | Just entry_blk <- mapLookup entry_id new_blocks
       , Just dest      <- canShortcut entry_blk
       = (dest, mapInsert entry_id dest shortcut_map)
       | otherwise
       = (entry_id, shortcut_map)

     blocks = postorderDfs g
     blockmap = foldr addBlock emptyBody blocks
      -- the initial blockmap is constructed from the postorderDfs result,
      -- so that we automatically throw away unreachable blocks.

     (new_blocks, shortcut_map) =
           foldr maybe_concat (blockmap, mapEmpty) blocks

     maybe_concat :: CmmBlock
                  -> (BlockEnv CmmBlock, BlockEnv BlockId)
                  -> (BlockEnv CmmBlock, BlockEnv BlockId)
     maybe_concat block (blocks, shortcut_map)
        | CmmBranch b' <- last
        , Just blk' <- mapLookup b' blocks
        , shouldConcatWith b' blk'
        = (mapInsert bid (splice head blk') blocks, shortcut_map)

        -- calls: if we can shortcut the continuation label, then
        -- we must *also* remember to substitute for the label in the
        -- code, because we will push it somewhere.
        | splitting_procs -- Note [shortcut call returns]
        , Just b'   <- callContinuation_maybe last
        , Just blk' <- mapLookup b' blocks
        , Just dest <- canShortcut blk'
        = (blocks, mapInsert b' dest shortcut_map)
           -- replaceLabels will substitute dest for b' everywhere, later

        -- non-calls: see if we can shortcut any of the successors,
        -- and check whether we should invert the conditional
        | Nothing <- callContinuation_maybe last
        = ( mapInsert bid (blockJoinTail head swapcond_last) blocks
          , shortcut_map )

        | otherwise
        = (blocks, shortcut_map)
        where
          (head, last) = blockSplitTail block
          bid = entryLabel block

          shortcut_last = mapSuccessors shortcut last
            where
              shortcut l =
                 case mapLookup l blocks of
                   Just b | Just dest <- canShortcut b  -> dest
                   _otherwise -> l

          -- for a conditional, we invert the conditional if that
          -- would make it more likely that the branch-not-taken case
          -- becomes a fallthrough.  This helps the native codegen a
          -- little bit, and probably has no effect on LLVM.  It's
          -- convenient to do it here, where we have the information
          -- about predecessors.
          --
          swapcond_last
            | CmmCondBranch cond t f <- shortcut_last
            , numPreds f > 1
            , numPreds t == 1
            , Just cond' <- maybeInvertCmmExpr cond
            = CmmCondBranch cond' f t

            | otherwise
            = shortcut_last


     shouldConcatWith b block
       | okToDuplicate block = True  -- short enough to duplicate
       | numPreds b == 1     = True  -- only one predecessor: go for it
       | otherwise           = False

     numPreds bid = mapLookup bid backEdges `orElse` 0

     canShortcut :: CmmBlock -> Maybe BlockId
     canShortcut block
       | (_, middle, CmmBranch dest) <- blockSplit block
       , isEmptyBlock middle
       = Just dest
       | otherwise
       = Nothing

     backEdges :: BlockEnv Int -- number of predecessors for each block
     backEdges = mapInsertWith (+) entry_id 1 $ -- add 1 for the entry id
                   predMap blocks

     splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
     splice head rest = head `blockAppend` snd (blockSplitHead rest)


callContinuation_maybe :: CmmNode O C -> Maybe BlockId
callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
callContinuation_maybe (CmmForeignCall { succ = b })   = Just b
callContinuation_maybe _ = Nothing

okToDuplicate :: CmmBlock -> Bool
okToDuplicate block
  = case blockSplit block of
      (_, m, CmmBranch _) -> isEmptyBlock m
      -- cheap and cheerful; we might expand this in the future to
      -- e.g. spot blocks that represent a single instruction or two.
      -- Be careful: a CmmCall can be more than one instruction, it
      -- has a CmmExpr inside it.
      _otherwise -> False


{-  Note [shortcut call returns]

Consider this code that you might get from a recursive let-no-escape:

      goto L1
     L1:
      if (Hp > HpLim) then L2 else L3
     L2:
      call stg_gc_noregs returns to L4
     L4:
      goto L1
     L3:
      ...
      goto L1

Then the control-flow optimiser shortcuts L4.  But that turns L1
into the call-return proc point, and every iteration of the loop
has to shuffle variables to and from the stack.  So we must *not*
shortcut L4.

Moreover not shortcutting call returns is probably fine.  If L4 can
concat with its branch target then it will still do so.  And we
save some compile time because we don't have to traverse all the
code in replaceLabels.

However, we probably do want to do this if we are splitting proc
points, because L1 will be a proc-point anyway, so merging it with L4
reduces the number of proc points.  Unfortunately recursive
let-no-escapes won't generate very good code with proc-point splitting
on - we should probably compile them to explicitly use the native
calling convention instead.
-}

------------------------------------------------------------------------
-- Map over the CmmGraph, replacing each label with its mapping in the
-- supplied BlockEnv.

replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph
replaceLabels env g
  | mapNull env = g
  | otherwise   = replace_eid $ mapGraphNodes1 txnode g
   where
     replace_eid g = g {g_entry = lookup (g_entry g)}
     lookup id = mapLookup id env `orElse` id

     txnode :: CmmNode e x -> CmmNode e x
     txnode (CmmBranch bid)         = CmmBranch (lookup bid)
     txnode (CmmCondBranch p t f)   = mkCmmCondBranch (exp p) (lookup t) (lookup f)
     txnode (CmmSwitch e arms)      = CmmSwitch (exp e) (map (liftM lookup) arms)
     txnode (CmmCall t k rg a res r) = CmmCall (exp t) (liftM lookup k) rg a res r
     txnode fc@CmmForeignCall{}     = fc{ args = map exp (args fc)
                                        , succ = lookup (succ fc) }
     txnode other                   = mapExpDeep exp other

     exp :: CmmExpr -> CmmExpr
     exp (CmmLit (CmmBlock bid))                = CmmLit (CmmBlock (lookup bid))
     exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
     exp e                                      = e

mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C
mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f

----------------------------------------------------------------
-- Build a map from a block to its set of predecessors. Very useful.

predMap :: [CmmBlock] -> BlockEnv Int
predMap blocks = foldr add_preds mapEmpty blocks -- find the back edges
  where
    add_preds block env = foldr add env (successors block)
      where add lbl env = mapInsertWith (+) lbl 1 env

-----------------------------------------------------------------------------
--
-- Removing unreachable blocks

removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
removeUnreachableBlocksProc (CmmProc info lbl live g)
   = CmmProc info lbl live (removeUnreachableBlocks g)

removeUnreachableBlocks :: CmmGraph -> CmmGraph
removeUnreachableBlocks g
  | length blocks < mapSize (toBlockMap g) = ofBlockList (g_entry g) blocks
  | otherwise = g
  where blocks = postorderDfs g