summaryrefslogtreecommitdiff
path: root/compiler/GHC/Stg/Debug.hs
blob: 678f95e50ec09abc8f08781322e17be056888372 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
{-# LANGUAGE TupleSections #-}

-- This module contains functions which implement
-- the -finfo-table-map and -fdistinct-constructor-tables flags
module GHC.Stg.Debug
  ( StgDebugOpts(..)
  , collectDebugInformation
  ) where

import GHC.Prelude

import GHC.Stg.Syntax

import GHC.Types.Id
import GHC.Types.Tickish
import GHC.Core.DataCon
import GHC.Types.IPE
import GHC.Unit.Module
import GHC.Types.Name   ( getName, getOccName, occNameString, nameSrcSpan)
import GHC.Data.FastString

import Control.Monad (when)
import Control.Monad.Trans.Reader
import GHC.Utils.Monad.State.Strict
import Control.Monad.Trans.Class
import GHC.Types.Unique.Map
import GHC.Types.SrcLoc
import Control.Applicative
import qualified Data.List.NonEmpty as NE
import Data.List.NonEmpty (NonEmpty(..))

data SpanWithLabel = SpanWithLabel RealSrcSpan String

data StgDebugOpts = StgDebugOpts
  { stgDebug_infoTableMap              :: !Bool
  , stgDebug_distinctConstructorTables :: !Bool
  }

data R = R { rOpts :: StgDebugOpts, rModLocation :: ModLocation, rSpan :: Maybe SpanWithLabel }

type M a = ReaderT R (State InfoTableProvMap) a

withSpan :: IpeSourceLocation -> M a -> M a
withSpan (new_s, new_l) act = local maybe_replace act
  where
    maybe_replace r@R{ rModLocation = cur_mod, rSpan = Just (SpanWithLabel old_s _old_l) }
      -- prefer spans from the current module
      | Just (unpackFS $ srcSpanFile old_s) == ml_hs_file cur_mod
      , Just (unpackFS $ srcSpanFile new_s) /= ml_hs_file cur_mod
      = r
    maybe_replace r
      = r { rSpan = Just (SpanWithLabel new_s new_l) }

collectDebugInformation :: StgDebugOpts -> ModLocation -> [StgTopBinding] -> ([StgTopBinding], InfoTableProvMap)
collectDebugInformation opts ml bs =
    runState (runReaderT (mapM collectTop bs) (R opts ml Nothing)) emptyInfoTableProvMap

collectTop :: StgTopBinding -> M StgTopBinding
collectTop (StgTopLifted t) = StgTopLifted <$> collectStgBind t
collectTop tb = return tb

collectStgBind :: StgBinding -> M StgBinding
collectStgBind  (StgNonRec bndr rhs) = do
    rhs' <- collectStgRhs bndr rhs
    return (StgNonRec bndr rhs')
collectStgBind (StgRec pairs) = do
    es <- mapM (\(b, e) -> (b,) <$> collectStgRhs b e) pairs
    return (StgRec es)

collectStgRhs :: Id -> StgRhs -> M StgRhs
collectStgRhs bndr (StgRhsClosure ext cc us bs e)= do
  let
    name = idName bndr
    -- If the name has a span, use that initially as the source position in-case
    -- we don't get anything better.
    with_span = case nameSrcSpan name of
                  RealSrcSpan pos _ -> withSpan (pos, occNameString (getOccName name))
                  _ -> id
  e' <- with_span $ collectExpr e
  recordInfo bndr e'
  return $ StgRhsClosure ext cc us bs e'
collectStgRhs _bndr (StgRhsCon cc dc _mn ticks args) = do
  n' <- numberDataCon dc ticks
  return (StgRhsCon cc dc n' ticks args)


recordInfo :: Id -> StgExpr -> M ()
recordInfo bndr new_rhs = do
  modLoc <- asks rModLocation
  let
    thisFile = maybe nilFS mkFastString $ ml_hs_file modLoc
    -- A span from the ticks surrounding the new_rhs
    best_span = quickSourcePos thisFile new_rhs
    -- A back-up span if the bndr had a source position, many do not (think internally generated ids)
    bndr_span = (\s -> SpanWithLabel s (occNameString (getOccName bndr)))
                  <$> srcSpanToRealSrcSpan (nameSrcSpan (getName bndr))
  recordStgIdPosition bndr best_span bndr_span

collectExpr :: StgExpr -> M StgExpr
collectExpr = go
  where
    go (StgApp occ as) = return $ StgApp occ as
    go (StgLit lit) = return $ StgLit lit
    go (StgConApp dc _mn as tys) = do
      n' <- numberDataCon dc []
      return (StgConApp dc n' as tys)
    go (StgOpApp op as ty) = return (StgOpApp op as ty)
    go (StgCase scrut bndr ty alts) =
      StgCase <$> collectExpr scrut <*> pure bndr <*> pure ty <*> mapM collectAlt alts
    go (StgLet ext bind body) = do
        bind' <- collectStgBind bind
        body' <- go body
        return (StgLet ext bind' body')
    go (StgLetNoEscape ext bind body) = do
        bind' <- collectStgBind bind
        body' <- go body
        return (StgLetNoEscape ext bind' body')

    go (StgTick tick e) = do
       let k = case tick of
                SourceNote ss fp -> withSpan (ss, fp)
                _ -> id
       e' <- k (go e)
       return (StgTick tick e')

collectAlt :: StgAlt -> M StgAlt
collectAlt alt = do e' <- collectExpr $ alt_rhs alt
                    return $! alt { alt_rhs = e' }

-- | Try to find the best source position surrounding a 'StgExpr'. The
-- heuristic strips ticks from the current expression until it finds one which
-- is from the module currently being compiled. This is the same method that
-- the DWARF information uses to give locations to info tables.
--
-- It is usually a better alternative than using the 'RealSrcSpan' which is carefully
-- propagated downwards by 'withSpan'. It's "quick" because it works only using immediate context rather
-- than looking at the parent context like 'withSpan'
quickSourcePos :: FastString -> StgExpr -> Maybe SpanWithLabel
quickSourcePos cur_mod (StgTick (SourceNote ss m) e)
  | srcSpanFile ss == cur_mod = Just (SpanWithLabel ss m)
  | otherwise = quickSourcePos cur_mod e
quickSourcePos _ _ = Nothing

recordStgIdPosition :: Id -> Maybe SpanWithLabel -> Maybe SpanWithLabel -> M ()
recordStgIdPosition id best_span ss = do
  opts <- asks rOpts
  when (stgDebug_infoTableMap opts) $ do
    cc <- asks rSpan
    --Useful for debugging why a certain Id gets given a certain span
    --pprTraceM "recordStgIdPosition" (ppr id $$ ppr cc $$ ppr best_span $$ ppr ss)
    let mbspan = (\(SpanWithLabel rss d) -> (rss, d)) <$> (best_span <|> cc <|> ss)
    lift $ modify (\env -> env { provClosure = addToUniqMap (provClosure env) (idName id) (idType id, mbspan) })

numberDataCon :: DataCon -> [StgTickish] -> M ConstructorNumber
-- Unboxed tuples and sums do not allocate so they
-- have no info tables.
numberDataCon dc _ | isUnboxedTupleDataCon dc = return NoNumber
numberDataCon dc _ | isUnboxedSumDataCon dc = return NoNumber
numberDataCon dc ts = do
  opts <- asks rOpts
  if not (stgDebug_distinctConstructorTables opts) then return NoNumber else do
    env <- lift get
    mcc <- asks rSpan
    let !mbest_span = (\(SpanWithLabel rss l) -> (rss, l)) <$> (selectTick ts <|> mcc)
    let !dcMap' = alterUniqMap (maybe (Just ((0, mbest_span) :| [] ))
                        (\xs@((k, _):|_) -> Just $! ((k + 1, mbest_span) `NE.cons` xs))) (provDC env) dc
    lift $ put (env { provDC = dcMap' })
    let r = lookupUniqMap dcMap' dc
    return $ case r of
      Nothing -> NoNumber
      Just res -> Numbered (fst (NE.head res))

selectTick :: [StgTickish] -> Maybe SpanWithLabel
selectTick [] = Nothing
selectTick (SourceNote rss d : ts ) = selectTick ts <|> Just (SpanWithLabel rss d)
selectTick (_:ts) = selectTick ts

{-
Note [Mapping Info Tables to Source Positions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

This note describes what the `-finfo-table-map` flag achieves.

When debugging memory issues it is very useful to be able to map a specific closure
to a position in the source. The prime example is being able to map a THUNK to
a specific place in the source program, the mapping is usually quite precise because
a fresh info table is created for each distinct THUNK.

The info table map is also used to generate stacktraces.
See Note [Stacktraces from Info Table Provenance Entries (IPE based stack unwinding)]
for details.

There are three parts to the implementation

1. In GHC.Stg.Debug, the SourceNote information is used in order to give a source location
   to some specific closures.
2. In GHC.Driver.GenerateCgIPEStub, the actually used info tables are collected after the
   Cmm pipeline. This is important as it's hard to predict beforehand what code generation
   will do and which ids will end up in the generated program. Additionally, info tables of
   return frames (used to create stacktraces) are generated in the Cmm pipeline and aren't
   available before.
3. During code generation, a mapping from the info table to the statically determined location
   is emitted which can then be queried at runtime by various tools.

-- Giving Source Locations to Closures

At the moment thunk and constructor closures are added to the map. This information
is collected in the `InfoTableProvMap` which provides a mapping from:

1. Data constructors to a list of where they are used.
2. `Name`s and where they originate from.
3. Stack represented info tables (return frames) to an approximated source location
   of the call that pushed a contiunation on the stacks.

During the CoreToStg phase, this map is populated whenever something is turned into
a StgRhsClosure or an StgConApp. The current source position is recorded
depending on the location indicated by the surrounding SourceNote.

The functions which add information to the map are `recordStgIdPosition` and
`numberDataCon`.

When the `-fdistinct-constructor-tables` flag is turned on then every
usage of a data constructor gets its own distinct info table. This is orchestrated
in `collectExpr` where an incrementing number is used to distinguish each
occurrence of a data constructor.

-- GenerateCgIPEStub

The info tables which are actually used in the generated program are collected after
the Cmm pipeline. `initInfoTableProv` is used to create a CStub, that initializes the
map in C code.

This step has to be done after the Cmm pipeline to make sure that all info tables are
really used and, even more importantly, return frame info tables are generated by the
pipeline.

-- Code Generation

The output of these two phases is combined together during code generation.
A C stub is generated which creates the static map from info table pointer to the
information about where that info table was created from. This is created by
`ipInitCode` in the same manner as a C stub is generated for cost centres.

This information can be consumed in two ways.

1. The complete mapping is emitted into the eventlog so that external tools such
as eventlog2html can use the information with the heap profile by info table mode.
2. The `lookupIPE` function can be used via the `whereFrom#` primop to introspect
information about a closure in a running Haskell program.

Note [Distinct Info Tables for Constructors]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In the old times, each usage of a data constructor used the same info table.
This made it impossible to distinguish which actual usuage of a data constructor was
contributing primarily to the allocation in a program. Using the `-fdistinct-info-tables` flag you
can cause code generation to generate a distinct info table for each usage of
a constructor. Then, when inspecting the heap you can see precisely which usage of a constructor
was responsible for each allocation.

-}