summaryrefslogtreecommitdiff
path: root/compiler/GHC/Stg/InferTags.hs
blob: 3c342d93bce9aa33b3ba7aaac5235941ddd72d0b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
{-# LANGUAGE TypeFamilies, DataKinds, GADTs, FlexibleInstances #-}
{-# LANGUAGE ConstraintKinds #-}

{-# LANGUAGE UndecidableInstances #-}
 -- To permit: type instance XLet 'InferTaggedBinders = XLet 'Vanilla



{-# OPTIONS_GHC -Wno-unused-imports #-}
{-# LANGUAGE FlexibleContexts #-}
module GHC.Stg.InferTags ( inferTags ) where

import GHC.Prelude

import GHC.Core.DataCon
import GHC.Types.Id
import GHC.Stg.Syntax
import GHC.Types.Basic ( Arity, TopLevelFlag(..), RecFlag(..) )
import GHC.Types.Var.Env
import GHC.Types.Var.Set
import GHC.Types.RepType (dataConRuntimeRepStrictness)
import GHC.Core (AltCon(..))
import Data.List (mapAccumL)
import GHC.Utils.Outputable
import GHC.Utils.Misc( zipWithEqual, zipEqual )

import GHC.Stg.InferTags.Types
import GHC.Driver.Ppr

{- Note [Tag inference]
~~~~~~~~~~~~~~~~~~~~~~~
The purpose of this pass is to attach to every binder a flag
to indicate whether or not it is "properly tagged".  A binder
is properly tagged if it is guaranteed:
 - to point to a heap-allocated value
 - and to have the tag of the value encoded in the pointer

  inferTags :: [GenStgTopBinding 'Vanilla] -> [GenStgTopBinding 'InferTaggedBinders]

For example
  let x = Just y in ...

Here x will be properly tagged: it will point to the heap-allocated
values for (Just y), and the tag-bits of the pointer will encode
the tag for Just.

We then take this information in GHC.Stg.InferTags.Rewrite to rewriteTopBinds

Note [Strict field invariant]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
As part of tag inference we introduce the strict field invariant.
Which consist of us saying that:

* Pointers in strict fields must be save to re-evaluate and be
  properly tagged.

Why? Because if we have code like:

case strictPair of
  SP x y ->
    case x of ...

It allows us to safely omit the code to enter x and the check
for the presence of a tag that goes along with it.
However we might still branch on the tag as usual.

This is enforced by the code GHC.Stg.InferTags.Rewrite
where we:

* Look at all constructor allocations.
* Check if arguments to their strict fields are known to be properly tagged
* If not we convert `StrictJust x` into `case x of x' -> StrictJust x'`

However we try to push the case up the AST into the next closure.

For a full example consider this code:

foo ... = ...
  let c = StrictJust x
  in ...

Naively we would rewrite `let c = StrictJust` into `let c = case x of x' -> StrictJust x'`
However that is horrible! We would end up allocating a thunk for `c` first, which only when
evaluated would allocate the constructor.

So instead we try to push the case "up" into a surrounding closure context. So for this case
we instead produce:

  foo ... = ...
    case x of x' ->
      DEFAULT -> let c = StrictJust x'
                in ...

Which means c remains a regular constructor allocation and we avoid unneccesary overhead.
The only problems to this approach are top level definitions and recursive bindings.

For top level bindings we accept the fact that some constructor applications end up as thunks.
It's a rare enough thing that it doesn't really matter and the computation will be shared anyway.

For recursive bindings the isse arises if we have:

  let rec {
    x = e1 -- e1 mentioning y
    y = StrictJust x
  }

We obviously can't wrap the case around the recursive group as `x` isn't in scope there.
This means if we can't proof that the arguments to the strict fields (in this case `x`)
are tagged we have to turn the above into:

  let rec {
    x = e1 -- e1 mentioning y
    y = case x of x' -> StrictJust x'
  }

But this rarely happens so is not a reason for concern.

Note [Tag inference passes]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
SPJ posed the good question why we bother having two different pass
parameterizations for tag inference. After all InferTaggedBinders
already has put the needed information on the binders.

Indeed we could the transformation described in Note [Strict field invariant]
as part of the StgToCmm transformation. But it wouldn't work well with the way
we currently produce Cmm code.

In particular we would have to analyze rhss *before* we can determine
if they should contain the required code for upholding the strict field
invariant or if the code should be placed in front of the code of a given
rhs. This means more dependencies between different parts of codeGen and
more complexity in general so I decided to implement this as an STG transformation
instead.

This doesn't actually mean we *need* two different parameterizations. But since
we already walk the whole AST I figured it would be more efficient to put the
relevant tag information into the StgApp nodes during this pass as well.

It avoids the awkward situation where codegeneration of the context of a let depends
on the rhs of the let itself, avoids the need for all binders to be be tuples and
seemed more efficient.

-}

{- *********************************************************************
*                                                                      *
                         Main inference algorithm
*                                                                      *
********************************************************************* -}

inferTags :: [GenStgTopBinding 'Vanilla] -> [GenStgTopBinding 'InferTaggedBinders]
inferTags binds =
  -- pprTrace "Binds" (pprGenStgTopBindings shortStgPprOpts $ binds) $
  snd (mapAccumL inferTagTopBind initEnv binds)

-----------------------
inferTagTopBind :: TagEnv 'Vanilla -> GenStgTopBinding 'Vanilla
                -> (TagEnv 'Vanilla, GenStgTopBinding 'InferTaggedBinders)
inferTagTopBind env (StgTopStringLit id bs)
  = (env, StgTopStringLit id bs)
inferTagTopBind env (StgTopLifted bind)
  = (env', StgTopLifted bind')
  where
    (env', bind') = inferTagBind TopLevel env bind


-----------------------
inferTagExpr :: Outputable (TagEnv p)
  => TagEnv p -> GenStgExpr p -> (TagInfo, GenStgExpr 'InferTaggedBinders)
inferTagExpr env (StgApp _ext fun args)
  = (info, StgApp noEnterInfo fun args)
  where
    info | Just (TagSig arity res_info) <- lookupSig env fun
         , arity == length args  -- Saturated
         = res_info
         | otherwise
         = TagDunno

inferTagExpr env (StgConApp con cn args tys)
  = (info, StgConApp con cn args tys)
  where
    info | isUnboxedTupleDataCon con
         = TagTuple (map (lookupInfo env) args)
         | otherwise
         = TagDunno

inferTagExpr _ (StgLit l)
  = (TagDunno, StgLit l)

inferTagExpr env (StgTick tick body)
  = (info, StgTick tick body')
  where
    (info, body') = inferTagExpr env body

inferTagExpr _ (StgOpApp op args ty)
  = -- Do any primops guarantee to return a properly tagged value?
    -- I think not.  Ditto foreign calls.
    (TagDunno, StgOpApp op args ty)

inferTagExpr env (StgLet ext bind body)
  = (info, StgLet ext' bind' body')
  where
    ext' = case te_ext env of ExtEqEv -> ext
    (env', bind') = inferTagBind NotTopLevel env bind
    (info, body') = inferTagExpr env' body

inferTagExpr env (StgLetNoEscape ext bind body)
  = (info, StgLetNoEscape ext' bind' body')
  where
    ext' = case te_ext env of ExtEqEv -> ext
    (env', bind') = inferTagBind NotTopLevel env bind
    (info, body') = inferTagExpr env' body

inferTagExpr env (StgCase scrut bndr ty alts)
  | [(DataAlt con, bndrs, rhs)] <- alts
  , isUnboxedTupleDataCon con
  , TagTuple infos <- scrut_info
  , let bndrs' = zipWithEqual "inferTagExpr" mk_bndr bndrs infos
        mk_bndr bndr info = (getBinderId env bndr, TagSig 0 info)
        alt_env = extendSigEnv env bndrs'
        (info, rhs') = inferTagExpr alt_env rhs
  = (info, StgCase scrut' (noSig env bndr) ty [(DataAlt con, bndrs', rhs')])
  | null alts -- Empty case, but I might just be paranoid.
  = (TagDunno, StgCase scrut' bndr' ty [])
  | otherwise
  = ( foldr combineAltInfo TagProper infos
    , StgCase scrut' bndr' ty alts')
  where
    (scrut_info, scrut') = inferTagExpr env scrut
    bndr' = (getBinderId env bndr, TagSig 0 TagProper)
    alt_env = extendSigEnv env [bndr']
    (infos, alts')
       = unzip [ (info, (con, bndrs', rhs'))
               | (con, bndrs, rhs) <- alts
               , let (info, rhs') = inferTagExpr alt_env rhs
                     bndrs' = addAltBndrInfo env con bndrs ]

addAltBndrInfo :: TagEnv p -> AltCon -> [BinderP p] -> [BinderP 'InferTaggedBinders]
addAltBndrInfo env (DataAlt con) bndrs
  = zipWithEqual "inferTagAlt" mk_bndr bndrs marks
  where
    mk_bndr bndr NotMarkedStrict = noSig env bndr
    mk_bndr bndr MarkedStrict    = (getBinderId env bndr, TagSig 0 TagProper)
    marks
      | isUnboxedSumDataCon con || isUnboxedTupleDataCon con
      = replicate (length bndrs) NotMarkedStrict
      | otherwise = (dataConRuntimeRepStrictness con)

addAltBndrInfo env _ bndrs = map (noSig env) bndrs

-----------------------------
inferTagBind :: Outputable (TagEnv p)
  => TopLevelFlag -> TagEnv p -> GenStgBinding p -> (TagEnv p, GenStgBinding 'InferTaggedBinders)
inferTagBind top env (StgNonRec bndr rhs)
  = (env', StgNonRec (id, sig) rhs')
  where
    id   = getBinderId env bndr
    env' = extendSigEnv env [(id, sig)]
    (sig,rhs') = inferTagRhs top [] env rhs

inferTagBind top env (StgRec pairs)
  = (env { te_env = sig_env }, StgRec pairs')
  where
    (bndrs, rhss)     = unzip pairs
    ids               = map (getBinderId env) bndrs
    init_sigs         = map initSig rhss
    (sig_env, pairs') = go env init_sigs rhss

    go :: forall q. TagEnv q -> [TagSig] -> [GenStgRhs q]
                 -> (TagSigEnv, [((Id,TagSig), GenStgRhs 'InferTaggedBinders)])
    go env sigs rhss
      --  | pprTrace "go" (ppr ids $$ ppr sigs $$ ppr sigs') False
      --  = undefined
       | sigs == sigs' = (te_env rhs_env, bndrs `zip` rhss')
       | otherwise     = go env' sigs' rhss'
       where
         bndrs = ids `zip` sigs
         rhs_env = extendSigEnv env bndrs
         (sigs', rhss') = unzip (map (inferTagRhs top ids rhs_env) rhss)
         env' = makeTagged env

initSig :: GenStgRhs p -> TagSig
-- Initial signature for the fixpoint loop
initSig StgRhsCon {}                = TagSig 0              TagProper
initSig (StgRhsClosure _ _ _ bndrs _) = TagSig (length bndrs) TagProper

-----------------------------
inferTagRhs :: Outputable (TagEnv p)
  => TopLevelFlag -- ^
  -> [Id] -- ^ List of ids in the recursive group, or [] otherwise
  -> TagEnv p -- ^
  -> GenStgRhs p -- ^
  -> (TagSig, GenStgRhs 'InferTaggedBinders)
inferTagRhs _top _grp_ids env (StgRhsClosure ext cc upd bndrs body)
  = --pprTrace "inferTagRhsClosure" (ppr (_top, _grp_ids, env,info')) $
    (TagSig arity info', StgRhsClosure ext' cc upd bndrs' body')
  where
    ext' = case te_ext env of ExtEqEv -> ext
    (info, body') = inferTagExpr env body
    arity = length bndrs
    info'
      | arity == 0
      = TagDunno
      -- TODO: We could preserve tuple fields for thunks
      -- as well.

      | otherwise  = info
    bndrs' = map (noSig env) bndrs

inferTagRhs top grp_ids env (StgRhsCon cc con cn ticks args)
-- Top level constructors, which have untagged arguments to strict fields
-- become thunks. Same goes for rhs which are part of a recursive group.
-- We encode this by giving changing RhsCon nodes the info TagDunno
  = --pprTrace "inferTagRhsCon" (ppr grp_ids) $
    let
        strictArgs = zipEqual "inferTagRhs" args (dataConRuntimeRepStrictness con)
        strictUntaggedIds = [v | (StgVarArg v, MarkedStrict) <- strictArgs
                            , lookupInfo env (StgVarArg v) /= TagProper] :: [Id]

        mkResult x = (TagSig 0 x, StgRhsCon cc con cn ticks args)
    in case () of
          -- All fields tagged or non-strict
        _ | null strictUntaggedIds -> mkResult TagProper
          -- -- Non-recursive local let
          -- | null grp_ids
          -- , NotTopLevel <- top
          -- -> mkResult TagProper
          -- Recursive local let, no bindings from grp in args
          -- | NotTopLevel <- top
          -- , mkVarSet grp_ids `disjointVarSet` mkVarSet strictUntaggedIds
          -- -> mkResult TagProper
          -- Otherwise we have a top level let with untagged args,
          -- or a recursive group where a bindings of the group is
          -- passed into a strict field
          | otherwise -> mkResult TagDunno