summaryrefslogtreecommitdiff
path: root/libraries/base/GHC/Stack/CloneStack.hs
blob: ecf2383d8dbca137895ee22b728d5fa833d146db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE UnliftedFFITypes#-}
{-# LANGUAGE GHCForeignImportPrim #-}

-- |
-- This module exposes an interface for capturing the state of a thread's
-- execution stack for diagnostics purposes: 'cloneMyStack',
-- 'cloneThreadStack'.
--
-- Such a "cloned" stack can be decoded with 'decode' to a stack trace, given
-- that the @-finfo-table-map@ is enabled.
--
-- @since 4.17.0.0
module GHC.Stack.CloneStack (
  StackSnapshot(..),
  StackEntry(..),
  cloneMyStack,
  cloneThreadStack,
  decode
  ) where

import Control.Concurrent.MVar
import Data.Maybe (catMaybes)
import Foreign
import GHC.Conc.Sync
import GHC.Exts (Int (I#), RealWorld, StackSnapshot#, ThreadId#, Array#, sizeofArray#, indexArray#, State#, StablePtr#)
import GHC.IO (IO (..))
import GHC.Stack.CCS (InfoProv (..), InfoProvEnt, ipeProv, peekInfoProv)
import GHC.Stable

-- | A frozen snapshot of the state of an execution stack.
--
-- @since 4.17.0.0
data StackSnapshot = StackSnapshot !StackSnapshot#

foreign import prim "stg_decodeStackzh" decodeStack# :: StackSnapshot# -> State# RealWorld -> (# State# RealWorld, Array# (Ptr InfoProvEnt) #)

foreign import prim "stg_cloneMyStackzh" cloneMyStack# :: State# RealWorld -> (# State# RealWorld, StackSnapshot# #)

foreign import prim "stg_sendCloneStackMessagezh" sendCloneStackMessage# :: ThreadId# -> StablePtr# PrimMVar -> State# RealWorld -> (# State# RealWorld, (# #) #)

{-
Note [Stack Cloning]
~~~~~~~~~~~~~~~~~~~~
"Cloning" a stack means that it's `StgStack` closure is copied including the
stack memory (`stack[]`). Closures referenced by stack closures are not copied,
i.e. pointer payloads are still referred to by the same pointer.
In other words: Only those parts that are affected by stack evaluation are
"cloned".

The stack pointer (sp) of the clone is adjusted to be valid, i.e. to point into
the cloned stack.

The clone is "offline"/"cold", i.e. it won't be evaluated any further. This is
useful for further analyses like stack unwinding or traversal because all
pointers stay valid.

StackSnapshot#
--------------
A cloned stack is represented in Haskell by `StackSnapshot !StackSnapshot#`.
`StackSnapshot#` is a primitive type, it's value is a pointer to the stack in
RTS (`StgStack*`).

To take advantage of the garbage collector, the representation cannot be `Ptr`
or `StablePtr`:
- Closures referenced by a `Ptr` may be garbage collected at any time (without
  checking if it's still in use).
- `StablePtr` has to be freed explictly, which would introduce nasty state
   handling.

By using a primitive type, the stack closure (and its transitive closures) is
kept and managed by the garbage collector as long as it's in use and
automatically freed later.
As closures referred to by stack closures (e.g. payloads) may be used by other
closures that are not related to stack cloning, the memory has to be managed by
the garbage collector; i.e. one cannot simply call free() in the RTS C code
because it's hard to figure out what to free while the garbage collector is
built to do this job.

RTS interface
-------------
There are two different ways to clone a stack:
1. `cloneMyStack#` - A primop for cloning the active thread's stack.
2. `sendCloneStackMessage#` - A primop for cloning another thread's stack.
   Sends a RTS message (Messages.c) with a MVar to that thread. The cloned
   stack is received by taking it out of this MVar.

`cloneMyStack#` has to be a primop, because new primitive types
(`StackSnapshot#`) cannot be marshalled by FFI. Using a `Ptr StackSnapshot` as
FFI return type would not save the snapshot from being garbage collected, as
discussed in the section above.

C API
-------------
`cloneStack` is the function that really clones a given stack and returns
the clone:
`StgStack* cloneStack(Capability* capability, const StgStack* stack)`

It's called directly by `stg_cloneMyStackzh` (`PrimOps.cmm`), the
`cloneMyStack#` primop.

To clone another thread's stack, there's a message passing mechanism such that
the receiver's capability clones its. So, there's no need to stop/pause the
other thread as it's capability will fulfill the cloning request when it's
ready to do so.

The message is defined in `Closures.h`:

```
typedef struct MessageCloneStack_ {
    StgHeader header;
    Message   *link;
    StgMVar   *result;
    StgTSO    *tso;
} MessageCloneStack;
```

The fields are:
- `header`: It's a closure and thus subject to garbage collection (no manual
   memory management needed)
- `link`: Messages form a singly linked list in `Capability`, referred to by
  `capability->inbox`.
- `result`: An `MVar`. When the message is sent it's empty, after cloning the
  `StackSnapshot` is put into it.
- `tso`: `tso->stackobj` is the stack to clone.

The asynchronous flow can be split into sending this message and putting the
cloned stack into the MVar (expecting the sender to get it from there).

Sending:
The public C function to send is
`void sendCloneStackMessage(StgTSO *tso, HsStablePtr mvar)`.
It prepares the message for the thread to clone (identified by it's `tso`) and
sets the `result` MVar (pointed to by `mvar`). Then it sends the message by
calling `sendMessage` which puts it into the Capabilities `inbox`.

Receiving:
Inbox processing is part of the big work finding loop in `schedule`. The
function that dispatches messages is `executeMessage`. From there
`void handleCloneStackMessage(MessageCloneStack *msg)` is called.

`handleCloneStackMessage` clones the stack, lifts the result to `StackSnapshot`
(MVar needs a lifted value, no primitive) and puts it into the MVar
(`msg->mvar`).
-}

{-
Note [Stack Decoding]
~~~~~~~~~~~~~~~~~~~~~
A cloned stack is decoded (unwound) by looking up the Info Table Provenance
Entries (IPE) for every stack frame with `lookupIPE` in the RTS.

The IPEs contain source locations and are pulled from the RTS/C world into
Haskell.

RTS interface
-------------

The primop decodeStack# returns an array of IPE pointers that are later
unmarshalled with HSC. If there is no IPE for a return frame (which can easily
happen when a library wasn't compiled with `-finfo-table-map`), it's
represented by a null pointer.

Caveats:
- decodeStack# has to be a primop (not a simple C FFI function), because
  there always has to be at least one active `TSO`. Otherwise, allocating
  memory with the garbage collector for the returned value fails.
- decodeStack# has to be defined outside of `primops.txt.pp` because its
  return type `Array# (Ptr InfoProvEnt)` cannot be defined there:
  `InfoProvEnt` and `Ptr` would have to be imported which seems to be too
  specific for this file.

Notes
-----
The relevant notes are:
  - Note [Mapping Info Tables to Source Positions]
  - Note [Stacktraces from Info Table Provenance Entries (IPE based stack unwinding)]
-}

-- | Clone the stack of the executing thread
--
-- @since 4.17.0.0
cloneMyStack :: IO StackSnapshot
cloneMyStack = IO $ \s ->
   case (cloneMyStack# s) of (# s1, stack #) -> (# s1, StackSnapshot stack #)

-- | Clone the stack of a thread identified by its 'ThreadId'
--
-- @since 4.17.0.0
cloneThreadStack :: ThreadId -> IO StackSnapshot
cloneThreadStack (ThreadId tid#) = do
  resultVar <- newEmptyMVar @StackSnapshot
  boxedPtr@(StablePtr ptr) <- newStablePtrPrimMVar resultVar
  -- Use the RTS's "message" mechanism to request that
  -- the thread captures its stack, saving the result
  -- into resultVar.
  IO $ \s -> case sendCloneStackMessage# tid# ptr s of (# s', (# #) #) -> (# s', () #)
  freeStablePtr boxedPtr
  takeMVar resultVar

-- | Representation for the source location where a return frame was pushed on the stack.
-- This happens every time when a @case ... of@ scrutinee is evaluated.
data StackEntry = StackEntry
  { functionName :: String,
    moduleName :: String,
    srcLoc :: String,
    closureType :: Word
  }
  deriving (Show, Eq)

-- | Decode a 'StackSnapshot' to a stacktrace (a list of 'StackEntry').
-- The stack trace is created from return frames with according 'InfoProvEnt'
-- entries. To generate them, use the GHC flag @-finfo-table-map@. If there are
-- no 'InfoProvEnt' entries, an empty list is returned.
--
-- Please note:
--
--   * To gather 'StackEntry' from libraries, these have to be
--     compiled with @-finfo-table-map@, too.
--   * Due to optimizations by GHC (e.g. inlining) the stacktrace may change
--     with different GHC parameters and versions.
--   * The stack trace is empty (by design) if there are no return frames on
--     the stack. (These are pushed every time when a @case ... of@ scrutinee
--     is evaluated.)
--
-- @since 4.17.0.0
decode :: StackSnapshot -> IO [StackEntry]
decode stackSnapshot = do
    stackEntries <- getDecodedStackArray stackSnapshot
    ipes <- mapM unmarshal stackEntries
    return $ catMaybes ipes

    where
      unmarshal :: Ptr InfoProvEnt -> IO (Maybe StackEntry)
      unmarshal ipe = if ipe == nullPtr then
                          pure Nothing
                       else do
                          infoProv <- (peekInfoProv . ipeProv) ipe
                          pure $ Just (toStackEntry infoProv)
      toStackEntry :: InfoProv -> StackEntry
      toStackEntry infoProv =
        StackEntry
        { functionName = ipLabel infoProv,
          moduleName = ipMod infoProv,
          srcLoc = ipLoc infoProv,
          -- read looks dangerous, be we can trust that the closure type is always there.
          closureType = read . ipDesc $ infoProv
        }

getDecodedStackArray :: StackSnapshot -> IO [Ptr InfoProvEnt]
getDecodedStackArray (StackSnapshot s) =
  IO $ \s0 -> case decodeStack# s s0 of
    (# s1, a #) -> (# s1, (go a ((I# (sizeofArray# a)) - 1)) #)
  where
    go :: Array# (Ptr InfoProvEnt) -> Int -> [Ptr InfoProvEnt]
    go stack 0 = [stackEntryAt stack 0]
    go stack i = (stackEntryAt stack i) : go stack (i - 1)

    stackEntryAt :: Array# (Ptr InfoProvEnt) -> Int -> Ptr InfoProvEnt
    stackEntryAt stack (I# i) = case indexArray# stack i of
      (# se #) -> se