summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Eichmann <EichmannD@gmail.com>2020-11-06 16:01:33 +0000
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-11-10 10:27:35 -0500
commit7814cd5bb0d145c4d83d7566885bdc3992b63d0c (patch)
tree205f5a29b4e340d25d78e7c9e4b73a2f80d4db2d
parent4c407f6e71f096835f8671e2d3ea6bda38074314 (diff)
downloadhaskell-7814cd5bb0d145c4d83d7566885bdc3992b63d0c.tar.gz
ghc-heap: expose decoding from heap representation
Co-authored-by: Sven Tennie <sven.tennie@gmail.com> Co-authored-by: Matthew Pickering <matthewtpickering@gmail.com> Co-authored-by: Ben Gamari <bgamari.foss@gmail.com>
-rw-r--r--includes/rts/storage/Heap.h9
-rw-r--r--libraries/ghc-heap/GHC/Exts/Heap.hs177
-rw-r--r--rts/Heap.c41
3 files changed, 144 insertions, 83 deletions
diff --git a/includes/rts/storage/Heap.h b/includes/rts/storage/Heap.h
index 2e908279bf..7a35941656 100644
--- a/includes/rts/storage/Heap.h
+++ b/includes/rts/storage/Heap.h
@@ -16,3 +16,12 @@ void heap_view_closure_ptrs_in_pap_payload(StgClosure *ptrs[], StgWord *nptrs
, StgClosure *fun, StgClosure **payload, StgWord size);
StgWord heap_view_closureSize(StgClosure *closure);
+
+/*
+ * Collect the pointers of a closure into the given array. `size` should be
+ * large enough to hold all collected pointers e.g.
+ * `heap_view_closureSize(closure)`. Returns the number of pointers collected.
+ * The caller must ensure that `closure` is not modified (or moved by the GC)
+ * for the duration of the call to `collect_pointers`.
+ */
+StgWord collect_pointers(StgClosure *closure, StgWord size, StgClosure *ptrs[]);
diff --git a/libraries/ghc-heap/GHC/Exts/Heap.hs b/libraries/ghc-heap/GHC/Exts/Heap.hs
index c1f2376729..7437398d90 100644
--- a/libraries/ghc-heap/GHC/Exts/Heap.hs
+++ b/libraries/ghc-heap/GHC/Exts/Heap.hs
@@ -7,6 +7,9 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TypeInType #-}
{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE ExplicitForAll #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE UnliftedFFITypes #-}
{-|
Module : GHC.Exts.Heap
@@ -25,6 +28,7 @@ module GHC.Exts.Heap (
, ClosureType(..)
, PrimType(..)
, HasHeapRep(getClosureData)
+ , getClosureDataFromHeapRep
-- * Info Table types
, StgInfoTable(..)
@@ -58,7 +62,7 @@ import GHC.Exts.Heap.Utils
import Control.Monad
import Data.Bits
-import GHC.Arr
+import Foreign
import GHC.Exts
import GHC.Int
import GHC.Word
@@ -66,13 +70,19 @@ import GHC.Word
#include "ghcconfig.h"
class HasHeapRep (a :: TYPE rep) where
- getClosureData :: a -> IO Closure
+
+ -- | Decode a closure to it's heap representation ('GenClosure').
+ getClosureData
+ :: a
+ -- ^ Closure to decode.
+ -> IO Closure
+ -- ^ Heap representation of the closure.
instance HasHeapRep (a :: TYPE 'LiftedRep) where
- getClosureData = getClosure
+ getClosureData = getClosureDataFromHeapObject
instance HasHeapRep (a :: TYPE 'UnliftedRep) where
- getClosureData x = getClosure (unsafeCoerce# x)
+ getClosureData x = getClosureDataFromHeapObject (unsafeCoerce# x)
instance Int# ~ a => HasHeapRep (a :: TYPE 'IntRep) where
getClosureData x = return $
@@ -102,49 +112,84 @@ instance Double# ~ a => HasHeapRep (a :: TYPE 'DoubleRep) where
getClosureData x = return $
DoubleClosure { ptipe = PDouble, doubleVal = D# x }
--- | This returns the raw representation of the given argument. The second
--- component of the triple is the raw words of the closure on the heap, and the
--- third component is those words that are actually pointers. Once back in the
--- Haskell world, the raw words that hold pointers may be outdated after a
--- garbage collector run, but the corresponding values in 'Box's will still
--- point to the correct value.
-getClosureRaw :: a -> IO (Ptr StgInfoTable, [Word], [Box])
-getClosureRaw x = do
+-- | Get the heap representation of a closure _at this moment_, even if it is
+-- unevaluated or an indirection or other exotic stuff. Beware when passing
+-- something to this function, the same caveats as for
+-- 'GHC.Exts.Heap.Closures.asBox' apply.
+--
+-- For most use cases 'getClosureData' is an easier to use alternative.
+--
+-- Currently TSO and STACK objects will return `UnsupportedClosure`. This is
+-- because it is not memory safe to extract TSO and STACK objects (done via
+-- `unpackClosure#`). Other threads may be mutating those objects and interleave
+-- with reads in `unpackClosure#`. This is particularly problematic with STACKs
+-- where pointer values may be overwritten by non-pointer values as the
+-- corresponding haskell thread runs.
+getClosureDataFromHeapObject
+ :: a
+ -- ^ Heap object to decode.
+ -> IO Closure
+ -- ^ Heap representation of the closure.
+getClosureDataFromHeapObject x = do
case unpackClosure# x of
--- This is a hack to cover the bootstrap compiler using the old version of
--- 'unpackClosure'. The new 'unpackClosure' return values are not merely
--- a reordering, so using the old version would not work.
- (# iptr, dat, pointers #) -> do
- let nelems = (I# (sizeofByteArray# dat)) `div` wORD_SIZE
- end = fromIntegral nelems - 1
- rawWds = [W# (indexWordArray# dat i) | I# i <- [0.. end] ]
- pelems = I# (sizeofArray# pointers)
- ptrList = amap' Box $ Array 0 (pelems - 1) pelems pointers
- pure (Ptr iptr, rawWds, ptrList)
-
--- From GHC.Runtime.Heap.Inspect
-amap' :: (t -> b) -> Array Int t -> [b]
-amap' f (Array i0 i _ arr#) = map g [0 .. i - i0]
- where g (I# i#) = case indexArray# arr# i# of
- (# e #) -> f e
-
--- | This function returns a parsed heap representation of the argument _at
--- this moment_, even if it is unevaluated or an indirection or other exotic
--- stuff. Beware when passing something to this function, the same caveats as
--- for 'asBox' apply.
-getClosure :: a -> IO Closure
-getClosure x = do
- (iptr, wds, pts) <- getClosureRaw x
- itbl <- peekItbl iptr
- -- The remaining words after the header
- let rawWds = drop (closureTypeHeaderSize (tipe itbl)) wds
- -- For data args in a pointers then non-pointers closure
- -- This is incorrect in non pointers-first setups
- -- not sure if that happens
- npts = drop (closureTypeHeaderSize (tipe itbl) + length pts) wds
+#if MIN_VERSION_ghc_prim(0,5,3)
+ (# infoTableAddr, heapRep, pointersArray #) -> do
+#else
+ -- This is a hack to cover the bootstrap compiler using the old version
+ -- of 'unpackClosure'. The new 'unpackClosure' return values are not
+ -- merely a reordering, so using the old version would not work.
+ (# infoTableAddr, pointersArray, heapRep #) -> do
+#endif
+ let infoTablePtr = Ptr infoTableAddr
+ ptrList = [case indexArray# pointersArray i of
+ (# ptr #) -> Box ptr
+ | I# i <- [0..(I# (sizeofArray# pointersArray)) - 1]
+ ]
+
+ infoTable <- peekItbl infoTablePtr
+ case tipe infoTable of
+ TSO -> pure $ UnsupportedClosure infoTable
+ STACK -> pure $ UnsupportedClosure infoTable
+ _ -> getClosureDataFromHeapRep heapRep infoTablePtr ptrList
+
+-- | Convert an unpacked heap object, to a `GenClosure b`. The inputs to this
+-- function can be generated from a heap object using `unpackClosure#`.
+getClosureDataFromHeapRep
+ :: ByteArray#
+ -- ^ Heap representation of the closure as returned by `unpackClosure#`.
+ -- This includes all of the object including the header, info table
+ -- pointer, pointer data, and non-pointer data. The ByteArray# may be
+ -- pinned or unpinned.
+ -> Ptr StgInfoTable
+ -- ^ Pointer to the `StgInfoTable` of the closure, extracted from the heap
+ -- representation. The info table must not be movable by GC i.e. must be in
+ -- pinned or off-heap memory.
+ -> [b]
+ -- ^ Pointers in the payload of the closure, extracted from the heap
+ -- representation as returned by `collect_pointers()` in `Heap.c`. The type
+ -- `b` is some representation of a pointer e.g. `Any` or `Ptr Any`.
+ -> IO (GenClosure b)
+ -- ^ Heap representation of the closure.
+getClosureDataFromHeapRep heapRep infoTablePtr pts = do
+ itbl <- peekItbl infoTablePtr
+ let -- heapRep as a list of words.
+ rawHeapWords :: [Word]
+ rawHeapWords = [W# (indexWordArray# heapRep i) | I# i <- [0.. end] ]
+ where
+ nelems = (I# (sizeofByteArray# heapRep)) `div` wORD_SIZE
+ end = fromIntegral nelems - 1
+
+ -- Just the payload of rawHeapWords (no header).
+ payloadWords :: [Word]
+ payloadWords = drop (closureTypeHeaderSize (tipe itbl)) rawHeapWords
+
+ -- The non-pointer words in the payload. Only valid for closures with a
+ -- "pointers first" layout. Not valid for bit field layout.
+ npts :: [Word]
+ npts = drop (closureTypeHeaderSize (tipe itbl) + length pts) rawHeapWords
case tipe itbl of
t | t >= CONSTR && t <= CONSTR_NOCAF -> do
- (p, m, n) <- dataConNames iptr
+ (p, m, n) <- dataConNames infoTablePtr
if m == "GHC.ByteCode.Instr" && n == "BreakInfo"
then pure $ UnsupportedClosure itbl
else pure $ ConstrClosure itbl pts npts p m n
@@ -164,9 +209,9 @@ getClosure x = do
unless (length pts >= 1) $
fail "Expected at least 1 ptr argument to AP"
-- We expect at least the arity, n_args, and fun fields
- unless (length rawWds >= 2) $
+ unless (length payloadWords >= 2) $
fail $ "Expected at least 2 raw words to AP"
- let splitWord = rawWds !! 0
+ let splitWord = payloadWords !! 0
pure $ APClosure itbl
#if defined(WORDS_BIGENDIAN)
(fromIntegral $ shiftR splitWord (wORD_SIZE_IN_BITS `div` 2))
@@ -181,9 +226,9 @@ getClosure x = do
unless (length pts >= 1) $
fail "Expected at least 1 ptr argument to PAP"
-- We expect at least the arity, n_args, and fun fields
- unless (length rawWds >= 2) $
+ unless (length payloadWords >= 2) $
fail "Expected at least 2 raw words to PAP"
- let splitWord = rawWds !! 0
+ let splitWord = payloadWords !! 0
pure $ PAPClosure itbl
#if defined(WORDS_BIGENDIAN)
(fromIntegral $ shiftR splitWord (wORD_SIZE_IN_BITS `div` 2))
@@ -218,10 +263,10 @@ getClosure x = do
unless (length pts >= 3) $
fail $ "Expected at least 3 ptr argument to BCO, found "
++ show (length pts)
- unless (length rawWds >= 4) $
+ unless (length payloadWords >= 4) $
fail $ "Expected at least 4 words to BCO, found "
- ++ show (length rawWds)
- let splitWord = rawWds !! 3
+ ++ show (length payloadWords)
+ let splitWord = payloadWords !! 3
pure $ BCOClosure itbl (pts !! 0) (pts !! 1) (pts !! 2)
#if defined(WORDS_BIGENDIAN)
(fromIntegral $ shiftR splitWord (wORD_SIZE_IN_BITS `div` 2))
@@ -230,27 +275,30 @@ getClosure x = do
(fromIntegral splitWord)
(fromIntegral $ shiftR splitWord (wORD_SIZE_IN_BITS `div` 2))
#endif
- (drop 4 rawWds)
+ (drop 4 payloadWords)
ARR_WORDS -> do
- unless (length rawWds >= 1) $
+ unless (length payloadWords >= 1) $
fail $ "Expected at least 1 words to ARR_WORDS, found "
- ++ show (length rawWds)
- pure $ ArrWordsClosure itbl (head rawWds) (tail rawWds)
+ ++ show (length payloadWords)
+ pure $ ArrWordsClosure itbl (head payloadWords) (tail payloadWords)
t | t >= MUT_ARR_PTRS_CLEAN && t <= MUT_ARR_PTRS_FROZEN_CLEAN -> do
- unless (length rawWds >= 2) $
+ unless (length payloadWords >= 2) $
fail $ "Expected at least 2 words to MUT_ARR_PTRS_* "
- ++ "found " ++ show (length rawWds)
- pure $ MutArrClosure itbl (rawWds !! 0) (rawWds !! 1) pts
+ ++ "found " ++ show (length payloadWords)
+ pure $ MutArrClosure itbl (payloadWords !! 0) (payloadWords !! 1) pts
t | t >= SMALL_MUT_ARR_PTRS_CLEAN && t <= SMALL_MUT_ARR_PTRS_FROZEN_CLEAN -> do
- unless (length rawWds >= 1) $
+ unless (length payloadWords >= 1) $
fail $ "Expected at least 1 word to SMALL_MUT_ARR_PTRS_* "
- ++ "found " ++ show (length rawWds)
- pure $ SmallMutArrClosure itbl (rawWds !! 0) pts
+ ++ "found " ++ show (length payloadWords)
+ pure $ SmallMutArrClosure itbl (payloadWords !! 0) pts
- t | t == MUT_VAR_CLEAN || t == MUT_VAR_DIRTY ->
+ t | t == MUT_VAR_CLEAN || t == MUT_VAR_DIRTY -> do
+ unless (length pts >= 1) $
+ fail $ "Expected at least 1 words to MUT_VAR, found "
+ ++ show (length pts)
pure $ MutVarClosure itbl (head pts)
t | t == MVAR_CLEAN || t == MVAR_DIRTY -> do
@@ -260,13 +308,12 @@ getClosure x = do
pure $ MVarClosure itbl (pts !! 0) (pts !! 1) (pts !! 2)
BLOCKING_QUEUE ->
- pure $ OtherClosure itbl pts wds
+ pure $ OtherClosure itbl pts rawHeapWords
-- pure $ BlockingQueueClosure itbl
-- (pts !! 0) (pts !! 1) (pts !! 2) (pts !! 3)
- -- pure $ OtherClosure itbl pts wds
+ -- pure $ OtherClosure itbl pts rawHeapWords
--
-
WEAK ->
pure $ WeakClosure
{ info = itbl
diff --git a/rts/Heap.c b/rts/Heap.c
index 0e31a779b4..509835eec1 100644
--- a/rts/Heap.c
+++ b/rts/Heap.c
@@ -76,23 +76,12 @@ void heap_view_closure_ptrs_in_pap_payload(StgClosure *ptrs[], StgWord *nptrs
}
}
-StgMutArrPtrs *heap_view_closurePtrs(Capability *cap, StgClosure *closure) {
- ASSERT(LOOKS_LIKE_CLOSURE_PTR(closure));
-
- StgWord size = heap_view_closureSize(closure);
- StgWord nptrs = 0;
- StgWord i;
-
- // First collect all pointers here, with the comfortable memory bound
- // of the whole closure. Afterwards we know how many pointers are in
- // the closure and then we can allocate space on the heap and copy them
- // there
- StgClosure *ptrs[size];
-
+// See Heap.h
+StgWord collect_pointers(StgClosure *closure, StgWord size, StgClosure *ptrs[size]) {
StgClosure **end;
- StgClosure **ptr;
-
const StgInfoTable *info = get_itbl(closure);
+ StgWord nptrs = 0;
+ StgWord i;
switch (info->type) {
case INVALID_OBJECT:
@@ -101,6 +90,7 @@ StgMutArrPtrs *heap_view_closurePtrs(Capability *cap, StgClosure *closure) {
// No pointers
case ARR_WORDS:
+ case STACK:
break;
// Default layout
@@ -123,7 +113,7 @@ StgMutArrPtrs *heap_view_closurePtrs(Capability *cap, StgClosure *closure) {
case FUN_0_2:
case FUN_STATIC:
end = closure->payload + info->layout.payload.ptrs;
- for (ptr = closure->payload; ptr < end; ptr++) {
+ for (StgClosure **ptr = closure->payload; ptr < end; ptr++) {
ptrs[nptrs++] = *ptr;
}
break;
@@ -136,7 +126,7 @@ StgMutArrPtrs *heap_view_closurePtrs(Capability *cap, StgClosure *closure) {
case THUNK_0_2:
case THUNK_STATIC:
end = ((StgThunk *)closure)->payload + info->layout.payload.ptrs;
- for (ptr = ((StgThunk *)closure)->payload; ptr < end; ptr++) {
+ for (StgClosure **ptr = ((StgThunk *)closure)->payload; ptr < end; ptr++) {
ptrs[nptrs++] = *ptr;
}
break;
@@ -228,6 +218,21 @@ StgMutArrPtrs *heap_view_closurePtrs(Capability *cap, StgClosure *closure) {
break;
}
+ return nptrs;
+}
+
+StgMutArrPtrs *heap_view_closurePtrs(Capability *cap, StgClosure *closure) {
+ ASSERT(LOOKS_LIKE_CLOSURE_PTR(closure));
+
+ StgWord size = heap_view_closureSize(closure);
+
+ // First collect all pointers here, with the comfortable memory bound
+ // of the whole closure. Afterwards we know how many pointers are in
+ // the closure and then we can allocate space on the heap and copy them
+ // there
+ StgClosure *ptrs[size];
+ StgWord nptrs = collect_pointers(closure, size, ptrs);
+
size = nptrs + mutArrPtrsCardTableSize(nptrs);
StgMutArrPtrs *arr =
(StgMutArrPtrs *)allocate(cap, sizeofW(StgMutArrPtrs) + size);
@@ -236,7 +241,7 @@ StgMutArrPtrs *heap_view_closurePtrs(Capability *cap, StgClosure *closure) {
arr->ptrs = nptrs;
arr->size = size;
- for (i = 0; i<nptrs; i++) {
+ for (StgWord i = 0; i<nptrs; i++) {
arr->payload[i] = ptrs[i];
}