summaryrefslogtreecommitdiff
path: root/compiler/GHC/StgToCmm/DataCon.hs
blob: 2c9071129dc0c3a2e224a45f7054085707443b89 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469


-----------------------------------------------------------------------------
--
-- Stg to C--: code generation for constructors
--
-- This module provides the support code for StgToCmm to deal with
-- constructors on the RHSs of let(rec)s.
--
-- (c) The University of Glasgow 2004-2006
--
-----------------------------------------------------------------------------
{-# LANGUAGE NamedFieldPuns #-}

module GHC.StgToCmm.DataCon (
        cgTopRhsCon, buildDynCon, bindConArgs
    ) where

import GHC.Prelude

import GHC.Platform

import GHC.Stg.Syntax
import GHC.Core  ( AltCon(..) )

import GHC.StgToCmm.Monad
import GHC.StgToCmm.Env
import GHC.StgToCmm.Heap
import GHC.StgToCmm.Layout
import GHC.StgToCmm.Utils
import GHC.StgToCmm.Closure

import GHC.Cmm.Expr
import GHC.Cmm.Utils
import GHC.Cmm.CLabel
import GHC.Cmm.Graph
import GHC.Runtime.Heap.Layout
import GHC.Types.CostCentre
import GHC.Unit
import GHC.Core.DataCon
import GHC.Data.FastString
import GHC.Types.Basic
import GHC.Types.Id
import {-# SOURCE #-} GHC.StgToCmm.Bind
import GHC.Types.Id.Info( CafInfo( NoCafRefs ) )
import GHC.Types.Name (isInternalName)
import GHC.Types.RepType (countConRepArgs)
import GHC.Types.Rep.Virtual (isVirtualDataCon, VirtualConType(..), virtualDataConType)
import GHC.Types.Literal
import GHC.Builtin.Utils
import GHC.Utils.Panic
import GHC.Utils.Panic.Plain
import GHC.Utils.Misc
import GHC.Utils.Monad (mapMaybeM)

import Control.Monad
import Data.Char
import GHC.StgToCmm.Config (stgToCmmPlatform)
import GHC.StgToCmm.TagCheck (checkConArgsStatic, checkConArgsDyn, emitTagAssertionId)
import GHC.Utils.Outputable
import GHC.Utils.Trace

---------------------------------------------------------------
--      Top-level constructors
---------------------------------------------------------------

cgTopRhsCon :: StgToCmmConfig
            -> Id               -- Name of thing bound to this RHS
            -> DataCon          -- Id
            -> ConstructorNumber
            -> [NonVoid StgArg] -- Args
            -> (CgIdInfo, FCode ())
cgTopRhsCon cfg id con mn args
  | Just static_info <- precomputedStaticConInfo_maybe cfg id con args
  , let static_code | isInternalName name = pure ()
                    | otherwise           = gen_code
  = -- There is a pre-allocated static closure available; use it
    -- See Note [Precomputed static closures].
    -- For External bindings we must keep the binding,
    -- since importing modules will refer to it by name;
    -- but for Internal ones we can drop it altogether
    -- See Note [About the NameSorts] in "GHC.Types.Name" for Internal/External
    (static_info, static_code)

  -- Virtual constructor, just return the argument, behaves more like an closure
  | virtualDataConType con /= NonVirtual
  , [NonVoid (StgVarArg x)] <- args
  -- It could only be unboxed if we implemented top level unlifted boxy data types.
  = assert (virtualDataConType con == VirtualBoxed) $
    let fake_rhs = StgConApp con {-unused-}NoNumber [StgVarArg x] [idType x] :: CgStgExpr
    in
      pprTrace "cgTopRhsCon" (ppr id $$ ppr con $$ ppr args) $
      cgTopRhsClosure platform NonRecursive id dontCareCCS Updatable [] fake_rhs

  -- Otherwise generate a closure for the constructor.
  | otherwise
  = (id_Info, gen_code)

  where
   platform      = stgToCmmPlatform cfg
   id_Info       = litIdInfo platform id (mkConLFInfo con) (CmmLabel closure_label)
   name          = idName id
   caffy         = idCafInfo id -- any stgArgHasCafRefs args
   closure_label = mkClosureLabel name caffy

   gen_code =
     do { profile <- getProfile
        ; this_mod <- getModuleName
        ; when (platformOS platform == OSMinGW32) $
              -- Windows DLLs have a problem with static cross-DLL refs.
              massert (not (isDllConApp platform (stgToCmmExtDynRefs cfg) this_mod con (map fromNonVoid args)))
        ; assert (args `lengthIs` countConRepArgs con ) return ()
        ; checkConArgsStatic (text "TagCheck failed - Top level con") con (map fromNonVoid args)
        -- LAY IT OUT
        ; let
            (tot_wds, --  #ptr_wds + #nonptr_wds
             ptr_wds, --  #ptr_wds
             nv_args_w_offsets) =
                 mkVirtHeapOffsetsWithPadding profile StdHeader (addArgReps args)

        ; let
            -- Decompose padding into units of length 8, 4, 2, or 1 bytes to
            -- allow the implementation of mk_payload to use widthFromBytes,
            -- which only handles these cases.
            fix_padding (x@(Padding n off) : rest)
              | n == 0                 = fix_padding rest
              | n `elem` [1,2,4,8]     = x : fix_padding rest
              | n > 8                  = add_pad 8
              | n > 4                  = add_pad 4
              | n > 2                  = add_pad 2
              | otherwise              = add_pad 1
              where add_pad m = Padding m off : fix_padding (Padding (n-m) (off+m) : rest)
            fix_padding (x : rest)     = x : fix_padding rest
            fix_padding []             = []

            mk_payload (Padding len _) = return (CmmInt 0 (widthFromBytes len))
            mk_payload (FieldOff arg _) = do
                amode <- getArgAmode arg
                case amode of
                  CmmLit lit -> return lit
                  _          -> panic "GHC.StgToCmm.DataCon.cgTopRhsCon"

            nonptr_wds = tot_wds - ptr_wds

             -- we're not really going to emit an info table, so having
             -- to make a CmmInfoTable is a bit overkill, but mkStaticClosureFields
             -- needs to poke around inside it.
            info_tbl = mkDataConInfoTable profile con (addModuleLoc this_mod mn) True ptr_wds nonptr_wds


        ; payload <- mapM mk_payload (fix_padding nv_args_w_offsets)
                -- NB1: nv_args_w_offsets is sorted into ptrs then non-ptrs
                -- NB2: all the amodes should be Lits!
                --      TODO (osa): Why?

                -- BUILD THE OBJECT
                --
            -- We're generating info tables, so we don't know and care about
            -- what the actual arguments are. Using () here as the place holder.

        ; emitDataCon closure_label info_tbl dontCareCCS payload }

addModuleLoc :: Module -> ConstructorNumber -> ConInfoTableLocation
addModuleLoc this_mod mn = do
  case mn of
    NoNumber -> DefinitionSite
    Numbered n -> UsageSite this_mod n

---------------------------------------------------------------
--      Lay out and allocate non-top-level constructors
---------------------------------------------------------------

buildDynCon :: Id                 -- Name of the thing to which this constr will
                                  -- be bound
            -> ConstructorNumber
            -> Bool               -- is it genuinely bound to that name, or just
                                  -- for profiling?
            -> CostCentreStack    -- Where to grab cost centre from;
                                  -- current CCS if currentOrSubsumedCCS
            -> DataCon            -- The data constructor
            -> [NonVoid StgArg]   -- Its args
            -> FCode (CgIdInfo, FCode CmmAGraph)
               -- Return details about how to find it and initialization code
buildDynCon binder mn actually_bound cc con args
    = do cfg <- getStgToCmmConfig
         --   pprTrace "noCodeLocal:" (ppr (binder,con,args,cgInfo)) True
         case precomputedStaticConInfo_maybe cfg binder con args of
           Just cgInfo -> return (cgInfo, return mkNop)
           Nothing     -> buildDynCon' binder mn actually_bound cc con args


buildDynCon' :: Id
             -> ConstructorNumber
             -> Bool
             -> CostCentreStack
             -> DataCon
             -> [NonVoid StgArg]
             -> FCode (CgIdInfo, FCode CmmAGraph)
{- We used to pass a boolean indicating whether all the
args were of size zero, so we could use a static
constructor; but I concluded that it just isn't worth it.
Now I/O uses unboxed tuples there just aren't any constructors
with all size-zero args.

The reason for having a separate argument, rather than looking at
the addr modes of the args is that we may be in a "knot", and
premature looking at the args will cause the compiler to black-hole!
-}
-------- buildDynCon': Virtual constructor -----------
buildDynCon' binder _mn _actually_bound _ccs con args
  | vcon <- virtualDataConType con
  , vcon /= NonVirtual
  , [NonVoid (StgVarArg arg)] <- assert (length args == 1) args
  = do
      pprTraceM "buildDynCon" (ppr con)
      cfg <- getStgToCmmConfig
      let platform = stgToCmmPlatform cfg

      m_arg_cg_info <- (getCgInfo_maybe $ idName arg)
      case m_arg_cg_info of
        Just arg_info@CgIdInfo{ cg_loc, cg_id } -> do
          case vcon of
            -- A virtual con for regular boxed things is just the argument info under another name.
            VirtualBoxed -> do
              emitTagAssertionId "buildDynConVirt:" arg
              let virt_con_info = arg_info { cg_id = binder }
              return (virt_con_info, return mempty)
            -- These things usually don't have a pointer tag. But here we attach one to avoid these values from being entered.
            VirtualUnboxedHeap
              | CmmLoc{ cgl_cmm } <- cg_loc -> do
                  let virt_con_info = arg_info { cg_id = binder, cg_loc = cg_loc { cgl_cmm = cmmOffset platform cgl_cmm 1 } }
                  return (virt_con_info, return mempty)
              | otherwise -> panic "VirtualCon with LNE value as argument"
            NonVirtual -> panic "impossible" -- handled by guard on buildDynCon'/CmmLoc

        Nothing -> panic "buildDynCon': LFInfo for VCon args unknown" (ppr binder <> text " = " <> ppr con <+> ppr args)

          -- let !lf_info = mkLFArgument arg

          -- (id_info, reg) <- rhsIdInfo binder lf_info
          -- emit $ mkAssign (CmmLocal reg) ((CmmReg $ CmmLocal $ idToReg platform $ NonVoid arg))
          -- bindArgToGivenReg (NonVoid arg) reg
          -- return (id_info, return mempty)

-------- buildDynCon': the general case -----------
buildDynCon' binder mn actually_bound ccs con args
  = do  { (id_info, reg) <- rhsIdInfo binder lf_info
        ; return (id_info, gen_code reg)
        }
 where
  lf_info = mkConLFInfo con

  gen_code reg
    = do  { modu <- getModuleName
          ; cfg  <- getStgToCmmConfig
          ; let platform = stgToCmmPlatform cfg
                profile  = stgToCmmProfile  cfg
                (tot_wds, ptr_wds, args_w_offsets)
                   = mkVirtConstrOffsets profile (addArgReps args)
                nonptr_wds = tot_wds - ptr_wds
                info_tbl = mkDataConInfoTable profile con (addModuleLoc modu mn) False
                                ptr_wds nonptr_wds
          ; let ticky_name | actually_bound = Just binder
                           | otherwise = Nothing

          ; checkConArgsDyn (hang (text "TagCheck failed on constructor application.") 4 $
                                   text "On binder:" <> ppr binder $$ text "Constructor:" <> ppr con) con (map fromNonVoid args)
          ; hp_plus_n <- allocDynClosure ticky_name info_tbl lf_info
                                          use_cc blame_cc args_w_offsets
          ; return (mkRhsInit platform reg lf_info hp_plus_n) }
    where
      use_cc      -- cost-centre to stick in the object
        | isCurrentCCS ccs = cccsExpr
        | otherwise        = panic "buildDynCon: non-current CCS not implemented"

      blame_cc = use_cc -- cost-centre on which to blame the alloc (same)


{- Note [Precomputed static closures]
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

For Char/Int closures there are some value closures
built into the RTS. This is the case for all values in
the range mINT_INTLIKE .. mAX_INTLIKE (or CHARLIKE).
See Note [CHARLIKE and INTLIKE closures] in the RTS code.

Similarly zero-arity constructors have a closure
in their defining Module we can use.

If possible we prefer to refer to those existing
closure instead of building new ones.

This is true at compile time where we do this replacement
in this module.
But also at runtime where the GC does the same (but only for
INT/CHAR closures).

`precomputedStaticConInfo_maybe` checks if a given constructor application
can be replaced with a reference to a existing static closure.

If so the code will reference the existing closure when accessing
the binding.
Unless the binding is visible to other modules we also generate
no code for the binding itself. We can do this since then we can
always reference the existing closure.

See Note [About the NameSorts] for the definition of external names.
For external bindings we must still generate a closure,
but won't use it inside this module.
This can sometimes reduce cache pressure. Since:
* If somebody uses the exported binding:
  + This module will reference the existing closure.
  + GC will reference the existing closure.
  + The importing module will reference the built closure.
* If nobody uses the exported binding:
  + This module will reference the RTS closures.
  + GC references the RTS closures

In the later case we avoided loading the built closure into the cache which
is what we optimize for here.

Consider this example using Ints.

    module M(externalInt, foo, bar) where

    externalInt = 1 :: Int
    internalInt = 1 :: Int
    { -# NOINLINE foo #- }
    foo = Just internalInt :: Maybe Int
    bar = Just externalInt

    ==================== STG: ====================
    externalInt = I#! [1#];

    bar = Just! [externalInt];

    internalInt_rc = I#! [2#];

    foo = Just! [internalInt_rc];

For externally visible bindings we must generate closures
since those may be referenced by their symbol `<name>_closure`
when imported.

`externalInt` is visible to other modules so we generate a closure:

    [section ""data" . M.externalInt_closure" {
        M.externalInt_closure:
            const GHC.Types.I#_con_info;
            const 1;
    }]

It will be referenced inside this module via `M.externalInt_closure+1`

`internalInt` is however a internal name. As such we generate no code for
it. References to it are replaced with references to the static closure as
we can see in the closure built for `foo`:

    [section ""data" . M.foo_closure" {
        M.foo_closure:
            const GHC.Maybe.Just_con_info;
            const stg_INTLIKE_closure+289; // == I# 2
            const 3;
    }]

This holds for both local and top level bindings.

We don't support this optimization when compiling into Windows DLLs yet
because they don't support cross package data references well.
-}

-- (precomputedStaticConInfo_maybe cfg id con args)
--     returns (Just cg_id_info)
-- if there is a precomputed static closure for (con args).
-- In that case, cg_id_info addresses it.
-- See Note [Precomputed static closures]
precomputedStaticConInfo_maybe :: StgToCmmConfig -> Id -> DataCon -> [NonVoid StgArg] -> Maybe CgIdInfo
precomputedStaticConInfo_maybe cfg binder con []
-- Nullary constructors
  | isNullaryRepDataCon con
  = Just $ litIdInfo (stgToCmmPlatform cfg) binder (mkConLFInfo con)
                (CmmLabel (mkClosureLabel (dataConName con) NoCafRefs))
precomputedStaticConInfo_maybe cfg binder con [arg]
  -- Int/Char values with existing closures in the RTS
  | intClosure || charClosure
  , platformOS platform /= OSMinGW32 || not (stgToCmmPIE cfg || stgToCmmPIC cfg)
  , Just val <- getClosurePayload arg
  , inRange val
  = let intlike_lbl   = mkCmmClosureLabel rtsUnitId (fsLit label)
        val_int = fromIntegral val :: Int
        offsetW = (val_int - fromIntegral min_static_range) * (fixedHdrSizeW profile + 1)
                -- INTLIKE/CHARLIKE closures consist of a header and one word payload
        static_amode = cmmLabelOffW platform intlike_lbl offsetW
    in Just $ litIdInfo platform binder (mkConLFInfo con) static_amode
  where
    profile     = stgToCmmProfile  cfg
    platform    = stgToCmmPlatform cfg
    intClosure  = maybeIntLikeCon  con
    charClosure = maybeCharLikeCon con
    getClosurePayload (NonVoid (StgLitArg (LitNumber LitNumInt val))) = Just val
    getClosurePayload (NonVoid (StgLitArg (LitChar val))) = Just (fromIntegral . ord $ val)
    getClosurePayload _ = Nothing
    -- Avoid over/underflow by comparisons at type Integer!
    inRange :: Integer -> Bool
    inRange val
      = val >= min_static_range && val <= max_static_range

    constants = platformConstants platform

    min_static_range :: Integer
    min_static_range
      | intClosure = fromIntegral (pc_MIN_INTLIKE constants)
      | charClosure = fromIntegral (pc_MIN_CHARLIKE constants)
      | otherwise = panic "precomputedStaticConInfo_maybe: Unknown closure type"
    max_static_range
      | intClosure = fromIntegral (pc_MAX_INTLIKE constants)
      | charClosure = fromIntegral (pc_MAX_CHARLIKE constants)
      | otherwise = panic "precomputedStaticConInfo_maybe: Unknown closure type"
    label
      | intClosure = "stg_INTLIKE"
      | charClosure =  "stg_CHARLIKE"
      | otherwise = panic "precomputedStaticConInfo_maybe: Unknown closure type"

precomputedStaticConInfo_maybe _ _ _ _ = Nothing

---------------------------------------------------------------
--      Binding constructor arguments
---------------------------------------------------------------

bindConArgs :: AltCon -> LocalReg -> [NonVoid Id] -> FCode [LocalReg]
-- bindConArgs is called from cgAlt of a case
-- (bindConArgs con args) augments the environment with bindings for the
-- binders args, assuming that we have just returned from a 'case' which
-- found a con
bindConArgs (DataAlt con) base args
  | isVirtualDataCon con
  , [NonVoid arg] <- assert (length args == 1) args
  = case virtualDataConType con of
      NonVirtual -> panic "Impossible" -- checked by guard above
      VirtualBoxed -> do
        bindArgToGivenReg (NonVoid arg) base
        return [base]
      VirtualUnboxedHeap -> do
        pprTraceM "BindOffset" (ppr con)
        bindArgToGivenRegOffset (NonVoid arg) base (-1)
        return [base]


  | otherwise = assert (not (isUnboxedTupleDataCon con)) $
    do profile <- getProfile
       platform <- getPlatform
       let (_, _, args_w_offsets) = mkVirtConstrOffsets profile (addIdReps args)
           tag = tagForCon platform con

           -- The binding below forces the masking out of the tag bits
           -- when accessing the constructor field.
           bind_arg :: (NonVoid Id, ByteOff) -> FCode (Maybe LocalReg)
           bind_arg (arg@(NonVoid b), offset)
             | isDeadBinder b  -- See Note [Dead-binder optimisation] in GHC.StgToCmm.Expr
             = return Nothing
             | otherwise
             = do { emit $ mkTaggedObjectLoad platform (idToReg platform arg)
                                              base offset tag
                  ; Just <$> bindArgToReg arg }

       mapMaybeM bind_arg args_w_offsets

bindConArgs _other_con _base args
  = assert (null args ) return []