summaryrefslogtreecommitdiff
path: root/libraries/heapview/GHC/HeapView.hs
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/heapview/GHC/HeapView.hs')
-rw-r--r--libraries/heapview/GHC/HeapView.hs1016
1 files changed, 1016 insertions, 0 deletions
diff --git a/libraries/heapview/GHC/HeapView.hs b/libraries/heapview/GHC/HeapView.hs
new file mode 100644
index 0000000000..cd4f7de738
--- /dev/null
+++ b/libraries/heapview/GHC/HeapView.hs
@@ -0,0 +1,1016 @@
+{-# LANGUAGE MagicHash, UnboxedTuples, CPP, ForeignFunctionInterface, GHCForeignImportPrim, UnliftedFFITypes, BangPatterns, RecordWildCards, DeriveFunctor, DeriveFoldable, DeriveTraversable, PatternGuards #-}
+{-|
+Module : GHC.HeapView
+Copyright : (c) 2012 Joachim Breitner
+License : BSD3
+Maintainer : Joachim Breitner <mail@joachim-breitner.de>
+
+With this module, you can investigate the heap representation of Haskell
+values, i.e. to investigate sharing and lazy evaluation.
+-}
+
+
+module GHC.HeapView (
+ -- * Heap data types
+ GenClosure(..),
+ Closure,
+ allPtrs,
+ ClosureType(..),
+ StgInfoTable(..),
+ HalfWord,
+ -- * Reading from the heap
+ getClosureData,
+ getBoxedClosureData,
+ getClosureRaw,
+ -- * Pretty printing
+ ppClosure,
+ -- * Heap maps
+ -- $heapmap
+ HeapTree(..),
+ buildHeapTree,
+ ppHeapTree,
+ HeapGraphEntry(..),
+ HeapGraphIndex,
+ HeapGraph(..),
+ lookupHeapGraph,
+ heapGraphRoot,
+ buildHeapGraph,
+ multiBuildHeapGraph,
+ addHeapGraph,
+ annotateHeapGraph,
+ updateHeapGraph,
+ ppHeapGraph,
+ -- * Boxes
+ Box(..),
+ asBox,
+ areBoxesEqual,
+ -- * Disassembler
+ disassembleBCO,
+ )
+ where
+
+import GHC.Exts ( Any,
+ Ptr(..), Addr#, Int(..), Word(..), Word#, Int#,
+ ByteArray#, Array#, sizeofByteArray#, sizeofArray#, indexArray#, indexWordArray#,
+ unsafeCoerce# )
+
+import GHC.Arr (Array(..))
+
+
+import Foreign hiding ( void )
+import Numeric ( showHex )
+import Data.Char
+import Data.List
+import Data.Maybe ( catMaybes )
+import Data.Monoid ( Monoid, (<>), mempty )
+import Data.Functor
+import Data.Function
+import Data.Foldable ( Foldable )
+import qualified Data.Foldable as F
+import Data.Traversable ( Traversable )
+import qualified Data.Traversable as T
+import Control.Monad
+import Control.Exception.Base (evaluate)
+
+import GHC.Disassembler
+
+#include "ghcautoconf.h"
+
+-- | An arbitrarily Haskell value in a safe Box. The point is that even
+-- unevaluated thunks can safely be moved around inside the Box, and when
+-- required, e.g. in 'getBoxedClosureData', the function knows how far it has
+-- to evalue the argument.
+data Box = Box Any
+
+#if SIZEOF_VOID_P == 8
+type HalfWord = Word32
+#else
+type HalfWord = Word16
+#endif
+
+instance Show Box where
+-- From libraries/base/GHC/Ptr.lhs
+ showsPrec _ (Box a) rs =
+ -- unsafePerformIO (print "↓" >> pClosure a) `seq`
+ pad_out (showHex addr "") ++ (if tag>0 then "/" ++ show tag else "") ++ rs
+ where
+ ptr = W# (aToWord# a)
+ tag = ptr .&. fromIntegral tAG_MASK -- ((1 `shiftL` TAG_BITS) -1)
+ addr = ptr - tag
+ -- want 0s prefixed to pad it out to a fixed length.
+ pad_out ls =
+ '0':'x':(replicate (2*wORD_SIZE - length ls) '0') ++ ls
+
+-- | Boxes can be compared, but this is not pure, as different heap objects can,
+-- after garbage collection, become the same object.
+areBoxesEqual :: Box -> Box -> IO Bool
+areBoxesEqual (Box a) (Box b) = case reallyUnsafePtrEqualityUpToTag# a b of
+ 0# -> return False
+ _ -> return True
+
+
+{-|
+ This takes an arbitrary value and puts it into a box. Note that calls like
+
+ > asBox (head list)
+
+ will put the thunk \"head list\" into the box, /not/ the element at the head
+ of the list. For that, use careful case expressions:
+
+ > case list of x:_ -> asBox x
+-}
+asBox :: a -> Box
+asBox x = Box (unsafeCoerce# x)
+
+{-
+ StgInfoTable parsing derived from ByteCodeItbls.lhs
+ Removed the code parameter for now
+ Replaced Type by an enumeration
+ Remove stuff dependent on GHCI_TABLES_NEXT_TO_CODE
+ -}
+
+{-| This is a somewhat faithful representation of an info table. See
+ <http://hackage.haskell.org/trac/ghc/browser/includes/rts/storage/InfoTables.h>
+ for more details on this data structure. Note that the 'Storable' instance
+ provided here does _not_ support writing.
+ -}
+data StgInfoTable = StgInfoTable {
+ ptrs :: HalfWord,
+ nptrs :: HalfWord,
+ tipe :: ClosureType,
+ srtlen :: HalfWord
+ }
+ deriving (Show)
+
+instance Storable StgInfoTable where
+
+ sizeOf itbl
+ = sum
+ [
+ fieldSz ptrs itbl,
+ fieldSz nptrs itbl,
+ sizeOf (undefined :: HalfWord),
+ fieldSz srtlen itbl
+ ]
+
+ alignment _
+ = wORD_SIZE
+
+ poke _a0 _itbl
+ = error "Storable StgInfoTable is read-only"
+
+ peek a0
+ = flip (evalStateT) (castPtr a0)
+ $ do
+ ptrs' <- load
+ nptrs' <- load
+ tipe' <- load
+ srtlen' <- load
+ return
+ StgInfoTable {
+ ptrs = ptrs',
+ nptrs = nptrs',
+ tipe = toEnum (fromIntegral (tipe'::HalfWord)),
+ srtlen = srtlen'
+ }
+
+fieldSz :: Storable b => (a -> b) -> a -> Int
+fieldSz sel x = sizeOf (sel x)
+
+load :: Storable a => PtrIO a
+load = do addr <- advance
+ lift (peek addr)
+
+type PtrIO = StateT (Ptr Word8) IO
+
+advance :: Storable a => PtrIO (Ptr a)
+advance = StateT adv where
+ adv addr = case castPtr addr of { addrCast -> return
+ (addrCast, addr `plusPtr` sizeOfPointee addrCast) }
+
+sizeOfPointee :: (Storable a) => Ptr a -> Int
+sizeOfPointee addr = sizeOf (typeHack addr)
+ where typeHack = undefined :: Ptr a -> a
+
+{-
+ Data Type representing Closures
+ -}
+
+
+{-| A closure type enumeration, in order matching the actual value on the heap.
+ Needs to be synchronized with
+ <http://hackage.haskell.org/trac/ghc/browser/includes/rts/storage/ClosureTypes.h>
+ -}
+data ClosureType =
+ INVALID_OBJECT
+ | CONSTR
+ | CONSTR_1_0
+ | CONSTR_0_1
+ | CONSTR_2_0
+ | CONSTR_1_1
+ | CONSTR_0_2
+ | CONSTR_STATIC
+ | CONSTR_NOCAF_STATIC
+ | FUN
+ | FUN_1_0
+ | FUN_0_1
+ | FUN_2_0
+ | FUN_1_1
+ | FUN_0_2
+ | FUN_STATIC
+ | THUNK
+ | THUNK_1_0
+ | THUNK_0_1
+ | THUNK_2_0
+ | THUNK_1_1
+ | THUNK_0_2
+ | THUNK_STATIC
+ | THUNK_SELECTOR
+ | BCO
+ | AP
+ | PAP
+ | AP_STACK
+ | IND
+ | IND_PERM
+ | IND_STATIC
+ | RET_BCO
+ | RET_SMALL
+ | RET_BIG
+ | RET_FUN
+ | UPDATE_FRAME
+ | CATCH_FRAME
+ | UNDERFLOW_FRAME
+ | STOP_FRAME
+ | BLOCKING_QUEUE
+ | BLACKHOLE
+ | MVAR_CLEAN
+ | MVAR_DIRTY
+ | ARR_WORDS
+ | MUT_ARR_PTRS_CLEAN
+ | MUT_ARR_PTRS_DIRTY
+ | MUT_ARR_PTRS_FROZEN0
+ | MUT_ARR_PTRS_FROZEN
+ | MUT_VAR_CLEAN
+ | MUT_VAR_DIRTY
+ | WEAK
+ | PRIM
+ | MUT_PRIM
+ | TSO
+ | STACK
+ | TREC_CHUNK
+ | ATOMICALLY_FRAME
+ | CATCH_RETRY_FRAME
+ | CATCH_STM_FRAME
+ | WHITEHOLE
+ deriving (Show, Eq, Enum, Ord)
+
+{-| This is the main data type of this module, representing a Haskell value on
+ the heap. This reflects
+ <http://hackage.haskell.org/trac/ghc/browser/includes/rts/storage/Closures.h>
+
+ The data type is parametrized by the type to store references in, which
+ is usually a 'Box' with appropriate type synonym 'Closure'.
+ -}
+data GenClosure b =
+ ConsClosure {
+ info :: StgInfoTable
+ , ptrArgs :: [b]
+ , dataArgs :: [Word]
+ , pkg :: String
+ , modl :: String
+ , name :: String
+ } |
+ ThunkClosure {
+ info :: StgInfoTable
+ , ptrArgs :: [b]
+ , dataArgs :: [Word]
+ } |
+ SelectorClosure {
+ info :: StgInfoTable
+ , selectee :: b
+ } |
+ IndClosure {
+ info :: StgInfoTable
+ , indirectee :: b
+ } |
+ BlackholeClosure {
+ info :: StgInfoTable
+ , indirectee :: b
+ } |
+ -- In GHCi, if Linker.h would allow a reverse lookup, we could for exported
+ -- functions fun actually find the name here.
+ -- At least the other direction works via "lookupSymbol
+ -- base_GHCziBase_zpzp_closure" and yields the same address (up to tags)
+ APClosure {
+ info :: StgInfoTable
+ , arity :: HalfWord
+ , n_args :: HalfWord
+ , fun :: b
+ , payload :: [b]
+ } |
+ PAPClosure {
+ info :: StgInfoTable
+ , arity :: HalfWord
+ , n_args :: HalfWord
+ , fun :: b
+ , payload :: [b]
+ } |
+ APStackClosure {
+ info :: StgInfoTable
+ , fun :: b
+ , payload :: [b]
+ } |
+ BCOClosure {
+ info :: StgInfoTable
+ , instrs :: b
+ , literals :: b
+ , bcoptrs :: b
+ , arity :: HalfWord
+ , size :: HalfWord
+ , bitmap :: Word
+ } |
+ ArrWordsClosure {
+ info :: StgInfoTable
+ , bytes :: Word
+ , arrWords :: [Word]
+ } |
+ MutArrClosure {
+ info :: StgInfoTable
+ , mccPtrs :: Word
+ , mccSize :: Word
+ , mccPayload :: [b]
+ -- Card table ignored
+ } |
+ MutVarClosure {
+ info :: StgInfoTable
+ , var :: b
+ } |
+ MVarClosure {
+ info :: StgInfoTable
+ , queueHead :: b
+ , queueTail :: b
+ , value :: b
+ } |
+ FunClosure {
+ info :: StgInfoTable
+ , ptrArgs :: [b]
+ , dataArgs :: [Word]
+ } |
+ BlockingQueueClosure {
+ info :: StgInfoTable
+ , link :: b
+ , blackHole :: b
+ , owner :: b
+ , queue :: b
+ } |
+ OtherClosure {
+ info :: StgInfoTable
+ , hvalues :: [b]
+ , rawWords :: [Word]
+ } |
+ UnsupportedClosure {
+ info :: StgInfoTable
+ }
+ deriving (Show, Functor, Foldable, Traversable)
+
+
+type Closure = GenClosure Box
+
+-- | For generic code, this function returns all referenced closures.
+allPtrs :: GenClosure b -> [b]
+allPtrs (ConsClosure {..}) = ptrArgs
+allPtrs (ThunkClosure {..}) = ptrArgs
+allPtrs (SelectorClosure {..}) = [selectee]
+allPtrs (IndClosure {..}) = [indirectee]
+allPtrs (BlackholeClosure {..}) = [indirectee]
+allPtrs (APClosure {..}) = fun:payload
+allPtrs (PAPClosure {..}) = fun:payload
+allPtrs (APStackClosure {..}) = fun:payload
+allPtrs (BCOClosure {..}) = [instrs,literals,bcoptrs]
+allPtrs (ArrWordsClosure {..}) = []
+allPtrs (MutArrClosure {..}) = mccPayload
+allPtrs (MutVarClosure {..}) = [var]
+allPtrs (MVarClosure {..}) = [queueHead,queueTail,value]
+allPtrs (FunClosure {..}) = ptrArgs
+allPtrs (BlockingQueueClosure {..}) = [link, blackHole, owner, queue]
+allPtrs (OtherClosure {..}) = hvalues
+allPtrs (UnsupportedClosure {..}) = []
+
+
+foreign import prim "aToWordzh" aToWord# :: Any -> Word#
+foreign import prim "slurpClosurezh" slurpClosure# :: Any -> (# Addr#, ByteArray#, Array# b #)
+foreign import prim "reallyUnsafePtrEqualityUpToTag" reallyUnsafePtrEqualityUpToTag# :: Any -> Any -> Int#
+
+--pClosure x = do
+-- getClosure x >>= print
+
+-- | This returns the raw representation of the given argument. The second
+-- component of the triple are the words on the heap, and the third component
+-- are those words that are actually pointers. Once back in Haskell word, the
+-- 'Word' may be outdated after a garbage collector run, but the corresponding
+-- 'Box' will still point to the correct value.
+getClosureRaw :: a -> IO (Ptr StgInfoTable, [Word], [Box])
+getClosureRaw x =
+ case slurpClosure# (unsafeCoerce# x) of
+ (# iptr, dat, ptrs #) -> do
+ let nelems = (I# (sizeofByteArray# dat)) `div` wORD_SIZE
+ rawWords = [W# (indexWordArray# dat i) | I# i <- [0.. fromIntegral nelems -1] ]
+ pelems = I# (sizeofArray# ptrs)
+ ptrList = amap' Box $ Array 0 (pelems - 1) pelems ptrs
+ -- This is just for good measure, and seems to be not important.
+ mapM_ evaluate ptrList
+ -- This seems to be required to avoid crashes as well
+ void $ evaluate nelems
+ -- The following deep evaluation is crucial to avoid crashes (but why)?
+ mapM_ evaluate rawWords
+ return (Ptr iptr, rawWords, ptrList)
+
+-- From compiler/ghci/RtClosureInspect.hs
+amap' :: (t -> b) -> Array Int t -> [b]
+amap' f (Array i0 i _ arr#) = map g [0 .. i - i0]
+ where g (I# i#) = case indexArray# arr# i# of
+ (# e #) -> f e
+
+-- derived from vacuum-1.0.0.2/src/GHC/Vacuum/Internal.hs, which got it from
+-- compiler/ghci/DebuggerUtils.hs
+dataConInfoPtrToNames :: Ptr StgInfoTable -> IO (String, String, String)
+dataConInfoPtrToNames ptr = do
+ conDescAddress <- getConDescAddress ptr
+ wl <- peekArray0 0 conDescAddress
+ let (pkg, modl, name) = parse wl
+ return (b2s pkg, b2s modl, b2s name)
+ where
+ b2s :: [Word8] -> String
+ b2s = fmap (chr . fromIntegral)
+
+ getConDescAddress :: Ptr StgInfoTable -> IO (Ptr Word8)
+ getConDescAddress ptr'
+ | True = do
+ offsetToString <- peek (ptr' `plusPtr` (negate wORD_SIZE))
+ return $ (ptr' `plusPtr` stdInfoTableSizeB)
+ `plusPtr` (fromIntegral (offsetToString :: Word))
+ -- This is code for !ghciTablesNextToCode:
+ {-
+ | otherwise = peek . intPtrToPtr
+ . (+ fromIntegral
+ stdInfoTableSizeB)
+ . ptrToIntPtr $ ptr
+ -}
+
+ -- hmmmmmm. Is there any way to tell this?
+ opt_SccProfilingOn = False
+
+ stdInfoTableSizeW :: Int
+ -- The size of a standard info table varies with profiling/ticky etc,
+ -- so we can't get it from Constants
+ -- It must vary in sync with mkStdInfoTable
+ stdInfoTableSizeW
+ = size_fixed + size_prof
+ where
+ size_fixed = 2 -- layout, type
+ size_prof | opt_SccProfilingOn = 2
+ | otherwise = 0
+
+ stdInfoTableSizeB :: Int
+ stdInfoTableSizeB = stdInfoTableSizeW * wORD_SIZE
+
+-- From vacuum-1.0.0.2/src/GHC/Vacuum/Internal.hs
+parse :: [Word8] -> ([Word8], [Word8], [Word8])
+parse input = if not . all (>0) . fmap length $ [pkg,modl,occ]
+ --then (error . concat)
+ -- ["getConDescAddress:parse:"
+ -- ,"(not . all (>0) . fmap le"
+ -- ,"ngth $ [pkg,modl,occ]"]
+ then ([], [], input) -- Not in the pkg.modl.occ format, for example END_TSO_QUEUE
+ else (pkg, modl, occ)
+-- = ASSERT (all (>0) (map length [pkg, modl, occ])) (pkg, modl, occ) -- XXXXXXXXXXXXXXXX
+ where
+ (pkg, rest1) = break (== fromIntegral (ord ':')) input
+ (modl, occ)
+ = (concat $ intersperse [dot] $ reverse modWords, occWord)
+ where
+ (modWords, occWord) = if (length rest1 < 1) -- XXXXXXXXx YUKX
+ --then error "getConDescAddress:parse:length rest1 < 1"
+ then parseModOcc [] []
+ else parseModOcc [] (tail rest1)
+ -- ASSERT (length rest1 > 0) (parseModOcc [] (tail rest1))
+ dot = fromIntegral (ord '.')
+ parseModOcc :: [[Word8]] -> [Word8] -> ([[Word8]], [Word8])
+ parseModOcc acc str
+ = case break (== dot) str of
+ (top, []) -> (acc, top)
+ (top, _:bot) -> parseModOcc (top : acc) bot
+
+
+-- | This function returns parsed heap representation of the argument _at this
+-- moment_, even if it is unevaluated or an indirection or other exotic stuff.
+-- Beware when passing something to this function, the same caveats as for
+-- 'asBox' apply.
+getClosureData :: a -> IO Closure
+getClosureData x = do
+ (iptr, wds, ptrs) <- getClosureRaw x
+ itbl <- peek iptr
+ case tipe itbl of
+ t | t >= CONSTR && t <= CONSTR_NOCAF_STATIC -> do
+ (pkg, modl, name) <- dataConInfoPtrToNames iptr
+ if modl == "ByteCodeInstr" && name == "BreakInfo"
+ then return $ UnsupportedClosure itbl
+ else return $ ConsClosure itbl ptrs (drop (length ptrs + 1) wds) pkg modl name
+
+ t | t >= THUNK && t <= THUNK_STATIC -> do
+ return $ ThunkClosure itbl ptrs (drop (length ptrs + 2) wds)
+
+ t | t >= FUN && t <= FUN_STATIC -> do
+ return $ FunClosure itbl ptrs (drop (length ptrs + 1) wds)
+
+ AP -> do
+ unless (length ptrs >= 1) $
+ fail "Expected at least 1 ptr argument to AP"
+ unless (length wds >= 3) $
+ fail "Expected at least 3 raw words to AP"
+ return $ APClosure itbl
+ (fromIntegral $ wds !! 2)
+ (fromIntegral $ shiftR (wds !! 2) (wORD_SIZE_IN_BITS `div` 2))
+ (head ptrs) (tail ptrs)
+
+ PAP -> do
+ unless (length ptrs >= 1) $
+ fail "Expected at least 1 ptr argument to PAP"
+ unless (length wds >= 3) $
+ fail "Expected at least 3 raw words to AP"
+ return $ PAPClosure itbl
+ (fromIntegral $ wds !! 2)
+ (fromIntegral $ shiftR (wds !! 2) (wORD_SIZE_IN_BITS `div` 2))
+ (head ptrs) (tail ptrs)
+
+ AP_STACK -> do
+ unless (length ptrs >= 1) $
+ fail "Expected at least 1 ptr argument to AP_STACK"
+ return $ APStackClosure itbl (head ptrs) (tail ptrs)
+
+ THUNK_SELECTOR -> do
+ unless (length ptrs >= 1) $
+ fail "Expected at least 1 ptr argument to THUNK_SELECTOR"
+ return $ SelectorClosure itbl (head ptrs)
+
+ IND -> do
+ unless (length ptrs >= 1) $
+ fail "Expected at least 1 ptr argument to IND"
+ return $ IndClosure itbl (head ptrs)
+ IND_STATIC -> do
+ unless (length ptrs >= 1) $
+ fail "Expected at least 1 ptr argument to IND_STATIC"
+ return $ IndClosure itbl (head ptrs)
+ BLACKHOLE -> do
+ unless (length ptrs >= 1) $
+ fail "Expected at least 1 ptr argument to BLACKHOLE"
+ return $ BlackholeClosure itbl (head ptrs)
+
+ BCO -> do
+ unless (length ptrs >= 3) $
+ fail $ "Expected at least 3 ptr argument to BCO, found " ++ show (length ptrs)
+ unless (length wds >= 6) $
+ fail $ "Expected at least 6 words to BCO, found " ++ show (length wds)
+ return $ BCOClosure itbl (ptrs !! 0) (ptrs !! 1) (ptrs !! 2)
+ (fromIntegral $ wds !! 4)
+ (fromIntegral $ shiftR (wds !! 4) (wORD_SIZE_IN_BITS `div` 2))
+ (wds !! 5)
+
+ ARR_WORDS -> do
+ unless (length wds >= 2) $
+ fail $ "Expected at least 2 words to ARR_WORDS, found " ++ show (length wds)
+ return $ ArrWordsClosure itbl (wds !! 1) (drop 2 wds)
+
+ t | t == MUT_ARR_PTRS_FROZEN || t == MUT_ARR_PTRS_FROZEN0 -> do
+ unless (length wds >= 3) $
+ fail $ "Expected at least 3 words to MUT_ARR_PTRS_FROZEN0 found " ++ show (length wds)
+ return $ MutArrClosure itbl (wds !! 1) (wds !! 2) ptrs
+
+ t | t == MUT_VAR_CLEAN || t == MUT_VAR_DIRTY ->
+ return $ MutVarClosure itbl (head ptrs)
+
+ t | t == MVAR_CLEAN || t == MVAR_DIRTY -> do
+ unless (length ptrs >= 3) $
+ fail $ "Expected at least 3 ptrs to MVAR, found " ++ show (length ptrs)
+ return $ MVarClosure itbl (ptrs !! 0) (ptrs !! 1) (ptrs !! 2)
+
+ BLOCKING_QUEUE ->
+ return $ OtherClosure itbl ptrs wds
+ -- return $ BlockingQueueClosure itbl
+ -- (ptrs !! 0) (ptrs !! 1) (ptrs !! 2) (ptrs !! 3)
+
+ -- return $ OtherClosure itbl ptrs wds
+ --
+ _ ->
+ return $ UnsupportedClosure itbl
+
+-- | Like 'getClosureData', but taking a 'Box', so it is easier to work with.
+getBoxedClosureData :: Box -> IO Closure
+getBoxedClosureData (Box a) = getClosureData a
+
+
+isChar :: GenClosure b -> Maybe Char
+isChar (ConsClosure { name = "C#", dataArgs = [ch], ptrArgs = []}) = Just (chr (fromIntegral ch))
+isChar _ = Nothing
+
+isCons :: GenClosure b -> Maybe (b, b)
+isCons (ConsClosure { name = ":", dataArgs = [], ptrArgs = [h,t]}) = Just (h,t)
+isCons _ = Nothing
+
+isTup :: GenClosure b -> Maybe [b]
+isTup (ConsClosure { dataArgs = [], ..}) =
+ if length name >= 3 &&
+ head name == '(' && last name == ')' &&
+ all (==',') (tail (init name))
+ then Just ptrArgs else Nothing
+isTup _ = Nothing
+
+
+isNil :: GenClosure b -> Bool
+isNil (ConsClosure { name = "[]", dataArgs = [], ptrArgs = []}) = True
+isNil _ = False
+
+-- | A pretty-printer that tries to generate valid Haskell for evalutated data.
+-- It assumes that for the included boxes, you already replaced them by Strings
+-- using 'Data.Foldable.map' or, if you need to do IO, 'Data.Foldable.mapM'.
+--
+-- The parameter gives the precedendence, to avoid avoidable parenthesises.
+ppClosure :: (Int -> b -> String) -> Int -> GenClosure b -> String
+ppClosure showBox prec c = case c of
+ _ | Just ch <- isChar c -> app $
+ ["C#", show ch]
+ _ | Just (h,t) <- isCons c -> addBraces (5 <= prec) $
+ showBox 5 h ++ " : " ++ showBox 4 t
+ _ | Just vs <- isTup c ->
+ "(" ++ intercalate "," (map (showBox 0) vs) ++ ")"
+ ConsClosure {..} -> app $
+ name : map (showBox 10) ptrArgs ++ map show dataArgs
+ ThunkClosure {..} -> app $
+ "_thunk" : map (showBox 10) ptrArgs ++ map show dataArgs
+ SelectorClosure {..} -> app
+ ["_sel", showBox 10 selectee]
+ IndClosure {..} -> app
+ ["_ind", showBox 10 indirectee]
+ BlackholeClosure {..} -> app
+ ["_bh", showBox 10 indirectee]
+ APClosure {..} -> app $ map (showBox 10) $
+ fun : payload
+ PAPClosure {..} -> app $ map (showBox 10) $
+ fun : payload
+ APStackClosure {..} -> app $ map (showBox 10) $
+ fun : payload
+ BCOClosure {..} -> app
+ ["_bco"]
+ ArrWordsClosure {..} -> app
+ ["toArray", "("++show (length arrWords) ++ " words)", intercalate "," (shorten (map show arrWords)) ]
+ MutArrClosure {..} -> app
+ ["toMutArray", "("++show (length mccPayload) ++ " ptrs)", intercalate "," (shorten (map (showBox 10) mccPayload))]
+ MutVarClosure {..} -> app $
+ ["_mutVar", (showBox 10) var]
+ MVarClosure {..} -> app $
+ ["MVar", (showBox 10) value]
+ FunClosure {..} ->
+ "_fun" ++ braceize (map (showBox 0) ptrArgs ++ map show dataArgs)
+ BlockingQueueClosure {..} ->
+ "_blockingQueue"
+ OtherClosure {..} ->
+ "_other"
+ UnsupportedClosure {..} ->
+ "_unsupported"
+ where
+ app [a] = a ++ "()"
+ app xs = addBraces (10 <= prec) (intercalate " " xs)
+
+ shorten xs = if length xs > 20 then take 20 xs ++ ["(and more)"] else xs
+
+{- $heapmap
+
+ For more global views of the heap, you can use heap maps. These come in
+ variations, either a trees or as graphs, depending on
+ whether you want to detect cycles and sharing or not.
+
+ The entries of a 'HeapGraph' can be annotated with arbitrary values. Most
+ operations expect this to be in the 'Monoid' class: They use 'mempty' to
+ annotate closures added because the passed values reference them, and they
+ use 'mappend' to combine the annotations when two values conincide, e.g.
+ during 'updateHeapGraph'.
+-}
+
+-- | Heap maps as tree, i.e. no sharing, no cycles.
+data HeapTree = HeapTree Box (GenClosure HeapTree) | EndOfHeapTree
+
+heapTreeClosure :: HeapTree -> Maybe (GenClosure HeapTree)
+heapTreeClosure (HeapTree _ c) = Just c
+heapTreeClosure EndOfHeapTree = Nothing
+
+-- | Constructing an 'HeapTree' from a boxed value. It takes a depth parameter
+-- that prevents it from running ad infinitum for cyclic or infinite
+-- structures.
+buildHeapTree :: Int -> Box -> IO HeapTree
+buildHeapTree 0 _ = do
+ return $ EndOfHeapTree
+buildHeapTree n b = do
+ c <- getBoxedClosureData b
+ c' <- T.mapM (buildHeapTree (n-1)) c
+ return $ HeapTree b c'
+
+-- | Pretty-Printing a heap Tree
+--
+-- Example output for @[Just 4, Nothing, *something*]@, where *something* is an
+-- unevaluated expression depending on the command line argument.
+--
+-- >[Just (I# 4),Nothing,Just (_thunk ["arg1","arg2"])]
+ppHeapTree :: HeapTree -> String
+ppHeapTree = go 0
+ where
+ go _ EndOfHeapTree = "..."
+ go prec t@(HeapTree _ c')
+ | Just s <- isHeapTreeString t = show s
+ | Just l <- isHeapTreeList t = "[" ++ intercalate "," (map ppHeapTree l) ++ "]"
+ | Just bc <- disassembleBCO heapTreeClosure c'
+ = app ("_bco" : map (go 10) (concatMap F.toList bc))
+ | otherwise = ppClosure go prec c'
+ where
+ app [a] = a ++ "()"
+ app xs = addBraces (10 <= prec) (intercalate " " xs)
+
+isHeapTreeList :: HeapTree -> Maybe ([HeapTree])
+isHeapTreeList tree = do
+ c <- heapTreeClosure tree
+ if isNil c
+ then return []
+ else do
+ (h,t) <- isCons c
+ t' <- isHeapTreeList t
+ return $ (:) h t'
+
+isHeapTreeString :: HeapTree -> Maybe String
+isHeapTreeString t = do
+ list <- isHeapTreeList t
+ -- We do not want to print empty lists as "" as we do not know that they
+ -- are really strings.
+ if (null list)
+ then Nothing
+ else mapM (isChar <=< heapTreeClosure) list
+
+-- | For heap graphs, i.e. data structures that also represent sharing and
+-- cyclic structures, these are the entries. If the referenced value is
+-- @Nothing@, then we do not have that value in the map, most likely due to
+-- exceeding the recursion bound passed to 'buildHeapGraph'.
+--
+-- Besides a pointer to the stored value and the closure representation we
+-- also keep track of whether the value was still alive at the last update of the
+-- heap graph. In addition we have a slot for arbitrary data, for the user's convenience.
+data HeapGraphEntry a = HeapGraphEntry {
+ hgeBox :: Box,
+ hgeClosure :: GenClosure (Maybe HeapGraphIndex),
+ hgeLive :: Bool,
+ hgeData :: a}
+ deriving (Show, Functor)
+type HeapGraphIndex = Int
+
+-- | The whole graph. The suggested interface is to only use 'lookupHeapGraph',
+-- as the internal representation may change. Nevertheless, we export it here:
+-- Sometimes the user knows better what he needs than we do.
+newtype HeapGraph a = HeapGraph (M.IntMap (HeapGraphEntry a))
+ deriving (Show)
+
+lookupHeapGraph :: HeapGraphIndex -> (HeapGraph a) -> Maybe (HeapGraphEntry a)
+lookupHeapGraph i (HeapGraph m) = M.lookup i m
+
+heapGraphRoot :: HeapGraphIndex
+heapGraphRoot = 0
+
+-- | Creates a 'HeapGraph' for the value in the box, but not recursing further
+-- than the given limit. The initial value has index 'heapGraphRoot'.
+buildHeapGraph
+ :: Monoid a
+ => Int -- ^ Search limit
+ -> a -- ^ Data value for the root
+ -> Box -- ^ The value to start with
+ -> IO (HeapGraph a)
+buildHeapGraph limit rootD initialBox =
+ fst <$> multiBuildHeapGraph limit [(rootD, initialBox)]
+
+-- | Creates a 'HeapGraph' for the values in multiple boxes, but not recursing
+-- further than the given limit.
+--
+-- Returns the 'HeapGraph' and the indices of initial values. The arbitrary
+-- type @a@ can be used to make the connection between the input and the
+-- resulting list of indices, and to store additional data.
+multiBuildHeapGraph
+ :: Monoid a
+ => Int -- ^ Search limit
+ -> [(a, Box)] -- ^ Starting values with associated data entry
+ -> IO (HeapGraph a, [(a, HeapGraphIndex)])
+multiBuildHeapGraph limit = generalBuildHeapGraph limit (HeapGraph M.empty)
+
+-- | Adds an entry to an existing 'HeapGraph'.
+--
+-- Returns the updated 'HeapGraph' and the index of the added value.
+addHeapGraph
+ :: Monoid a
+ => Int -- ^ Search limit
+ -> a -- ^ Data to be stored with the added value
+ -> Box -- ^ Value to add to the graph
+ -> HeapGraph a -- ^ Graph to extend
+ -> IO (HeapGraphIndex, HeapGraph a)
+addHeapGraph limit d box hg = do
+ (hg', [(_,i)]) <- generalBuildHeapGraph limit hg [(d,box)]
+ return (i, hg')
+
+-- | Adds the given annotation to the entry at the given index, using the
+-- 'mappend' operation of its 'Monoid' instance.
+annotateHeapGraph :: Monoid a => a -> HeapGraphIndex -> HeapGraph a -> HeapGraph a
+annotateHeapGraph d i (HeapGraph hg) = HeapGraph $ M.update go i hg
+ where
+ go hge = Just $ hge { hgeData = hgeData hge <> d }
+
+generalBuildHeapGraph
+ :: Monoid a
+ => Int
+ -> HeapGraph a
+ -> [(a,Box)]
+ -> IO (HeapGraph a, [(a, HeapGraphIndex)])
+generalBuildHeapGraph limit _ _ | limit <= 0 = error "buildHeapGraph: limit has to be positive"
+generalBuildHeapGraph limit (HeapGraph hg) addBoxes = do
+ -- First collect all boxes from the existing heap graph
+ let boxList = [ (hgeBox hge, i) | (i, hge) <- M.toList hg ]
+ indices | M.null hg = [0..]
+ | otherwise = [1 + fst (M.findMax hg)..]
+
+ initialState = (boxList, indices, [])
+ -- It is ok to use the Monoid (IntMap a) instance here, because
+ -- we will, besides the first time, use 'tell' only to add singletons not
+ -- already there
+ (is, hg') <- runWriterT (evalStateT run initialState)
+ -- Now add the annotations of the root values
+ let hg'' = foldl' (flip (uncurry annotateHeapGraph)) (HeapGraph hg') is
+ return (hg'', is)
+ where
+ run = do
+ lift $ tell hg -- Start with the initial map
+ forM addBoxes $ \(d, b) -> do
+ -- Cannot fail, as limit is not zero here
+ Just i <- add limit b
+ return (d, i)
+
+ add 0 _ = return Nothing
+ add n b = do
+ -- If the box is in the map, return the index
+ (existing,_,_) <- get
+ mbI <- liftIO $ findM (areBoxesEqual b . fst) existing
+ case mbI of
+ Just (_,i) -> return $ Just i
+ Nothing -> do
+ -- Otherwise, allocate a new index
+ i <- nextI
+ -- And register it
+ modify (\(x,y,z) -> ((b,i):x, y, z))
+ -- Look up the closure
+ c <- liftIO $ getBoxedClosureData b
+ -- Find indicies for all boxes contained in the map
+ c' <- T.mapM (add (n-1)) c
+ -- Add add the resulting closure to the map
+ lift $ tell (M.singleton i (HeapGraphEntry b c' True mempty))
+ return $ Just i
+ nextI = do
+ i <- gets (head . (\(_,b,_) -> b))
+ modify (\(a,b,c) -> (a, tail b, c))
+ return i
+
+-- | This function updates a heap graph to reflect the current state of
+-- closures on the heap, conforming to the following specification.
+--
+-- * Every entry whose value has been garbage collected by now is marked as
+-- dead by setting 'hgeLive' to @False@
+-- * Every entry whose value is still live gets the 'hgeClosure' field updated
+-- and newly referenced closures are, up to the given depth, added to the graph.
+-- * A map mapping previous indicies to the corresponding new indicies is returned as well.
+-- * The closure at 'heapGraphRoot' stays at 'heapGraphRoot'
+updateHeapGraph :: Monoid a => Int -> HeapGraph a -> IO (HeapGraph a, HeapGraphIndex -> HeapGraphIndex)
+updateHeapGraph limit (HeapGraph startHG) = do
+ (hg', indexMap) <- runWriterT $ foldM go (HeapGraph M.empty) (M.toList startHG)
+ return (hg', (M.!) indexMap)
+ where
+ go hg (i, hge) = do
+ (j, hg') <- liftIO $ addHeapGraph limit (hgeData hge) (hgeBox hge) hg
+ tell (M.singleton i j)
+ return hg'
+
+-- | Pretty-prints a HeapGraph. The resulting string contains newlines. Example
+-- for @let s = \"Ki\" in (s, s, cycle \"Ho\")@:
+--
+-- >let x1 = "Ki"
+-- > x6 = C# 'H' : C# 'o' : x6
+-- >in (x1,x1,x6)
+ppHeapGraph :: HeapGraph a -> String
+ppHeapGraph (HeapGraph m) = letWrapper ++ ppRef 0 (Just heapGraphRoot)
+ where
+ -- All variables occuring more than once
+ bindings = boundMultipleTimes (HeapGraph m) [heapGraphRoot]
+
+ letWrapper =
+ if null bindings
+ then ""
+ else "let " ++ intercalate "\n " (map ppBinding bindings) ++ "\nin "
+
+ bindingLetter i = case hgeClosure (iToE i) of
+ ThunkClosure {..} -> 't'
+ SelectorClosure {..} -> 't'
+ APClosure {..} -> 't'
+ PAPClosure {..} -> 'f'
+ BCOClosure {..} -> 't'
+ FunClosure {..} -> 'f'
+ _ -> 'x'
+
+ ppBindingMap = M.fromList $
+ concat $
+ map (zipWith (\j (i,c) -> (i, [c] ++ show j)) [(1::Int)..]) $
+ groupBy ((==) `on` snd) $
+ sortBy (compare `on` snd)
+ [ (i, bindingLetter i) | i <- bindings ]
+
+ ppVar i = ppBindingMap M.! i
+ ppBinding i = ppVar i ++ " = " ++ ppEntry 0 (iToE i)
+
+ ppEntry prec hge
+ | Just s <- isString hge = show s
+ | Just l <- isList hge = "[" ++ intercalate "," (map (ppRef 0) l) ++ "]"
+ | Just bc <- disassembleBCO (fmap (hgeClosure . iToE)) (hgeClosure hge)
+ = app ("_bco" : map (ppRef 10) (concatMap F.toList bc))
+ | otherwise = ppClosure ppRef prec (hgeClosure hge)
+ where
+ app [a] = a ++ "()"
+ app xs = addBraces (10 <= prec) (intercalate " " xs)
+
+ ppRef _ Nothing = "..."
+ ppRef prec (Just i) | i `elem` bindings = ppVar i
+ | otherwise = ppEntry prec (iToE i)
+ iToE i = m M.! i
+
+ iToUnboundE i = if i `elem` bindings then Nothing else M.lookup i m
+
+ isList :: HeapGraphEntry a -> Maybe ([Maybe HeapGraphIndex])
+ isList hge =
+ if isNil (hgeClosure hge)
+ then return []
+ else do
+ (h,t) <- isCons (hgeClosure hge)
+ ti <- t
+ e <- iToUnboundE ti
+ t' <- isList e
+ return $ (:) h t'
+
+ isString :: HeapGraphEntry a -> Maybe String
+ isString e = do
+ list <- isList e
+ -- We do not want to print empty lists as "" as we do not know that they
+ -- are really strings.
+ if (null list)
+ then Nothing
+ else mapM (isChar . hgeClosure <=< iToUnboundE <=< id) list
+
+
+-- | In the given HeapMap, list all indices that are used more than once. The
+-- second parameter adds external references, commonly @[heapGraphRoot]@.
+boundMultipleTimes :: HeapGraph a -> [HeapGraphIndex] -> [HeapGraphIndex]
+boundMultipleTimes (HeapGraph m) roots = map head $ filter (not.null) $ map tail $ group $ sort $
+ roots ++ concatMap (catMaybes . allPtrs . hgeClosure) (M.elems m)
+
+-- | This function integrates the disassembler in "GHC.Disassembler". The first
+-- argument should a function that dereferences the pointer in the closure to a
+-- closure.
+--
+-- If any of these return 'Nothing', then 'disassembleBCO' returns Nothing
+disassembleBCO :: (a -> Maybe (GenClosure b)) -> GenClosure a -> Maybe [BCI b]
+disassembleBCO deref (BCOClosure {..}) = do
+ opsC <- deref instrs
+ litsC <- deref literals
+ ptrsC <- deref bcoptrs
+ return $ disassemble (mccPayload ptrsC) (arrWords litsC) (toBytes (bytes opsC) (arrWords opsC))
+disassembleBCO _ _ = Nothing
+
+-- Utilities
+
+findM :: (a -> IO Bool) -> [a] -> IO (Maybe a)
+findM _p [] = return Nothing
+findM p (x:xs) = do
+ b <- p x
+ if b then return (Just x) else findM p xs
+
+addBraces :: Bool -> String -> String
+addBraces True t = "(" ++ t ++ ")"
+addBraces False t = t
+
+braceize :: [String] -> String
+braceize [] = ""
+braceize xs = "{" ++ intercalate "," xs ++ "}"
+
+-- This used to be available via GHC.Constants
+#include "MachDeps.h"
+wORD_SIZE, tAG_MASK, wORD_SIZE_IN_BITS :: Int
+wORD_SIZE = SIZEOF_HSWORD
+tAG_MASK = (1 `shift` TAG_BITS) - 1
+wORD_SIZE_IN_BITS = WORD_SIZE_IN_BITS
+