diff options
-rw-r--r-- | libraries/heapview/.gitignore | 4 | ||||
-rw-r--r-- | libraries/heapview/GHC/AssertNF.hs | 150 | ||||
-rw-r--r-- | libraries/heapview/GHC/Disassembler.hs | 290 | ||||
-rw-r--r-- | libraries/heapview/GHC/HeapView.hs | 1016 | ||||
-rw-r--r-- | libraries/heapview/GHC/HeapView/Debug.hs | 68 | ||||
-rw-r--r-- | libraries/heapview/LICENSE | 30 | ||||
-rw-r--r-- | libraries/heapview/Setup.hs | 6 | ||||
-rw-r--r-- | libraries/heapview/heapview.cabal | 53 |
8 files changed, 1617 insertions, 0 deletions
diff --git a/libraries/heapview/.gitignore b/libraries/heapview/.gitignore new file mode 100644 index 0000000000..89cf73d0b3 --- /dev/null +++ b/libraries/heapview/.gitignore @@ -0,0 +1,4 @@ +GNUmakefile +/dist-install/ +/dist/ +ghc.mk diff --git a/libraries/heapview/GHC/AssertNF.hs b/libraries/heapview/GHC/AssertNF.hs new file mode 100644 index 0000000000..ce1a52f972 --- /dev/null +++ b/libraries/heapview/GHC/AssertNF.hs @@ -0,0 +1,150 @@ +{-# LANGUAGE BangPatterns, DoAndIfThenElse #-} + +{-| +Module : GHC.AssertNF +Copyright : (c) 2013 Joachim Breitner +License : BSD3 +Maintainer : Joachim Breitner <mail@joachim-breitner.de> + +To avoid space leaks and unwanted evaluation behaviour, the programmer might want his data to be fully evaluated at certain positions in the code. This can be enforced, for example, by ample use of "Control.DeepSeq", but this comes at a cost. + +Experienced users hence use 'Control.DeepSeq.deepseq' only to find out about the existence of space leaks and optimize their code to not create the thunks in the first place, until the code no longer shows better performance with 'deepseq'. + +This module provides an alternative approach: An explicit assertion about the evaluation state. If the programmer expect a certain value to be fully evaluated at a specific point of the program (e.g. before a call to 'writeIORef'), he can state that, and as long as assertions are enabled, this statement will be checked. In the production code the assertions can be disabled, to avoid the run-time cost. + +-} + + +module GHC.AssertNF ( + assertNF, + assertNFNamed, + assertNFHere, + disableAssertNF, + isNF, + ) +where + +import GHC.HeapView +import Debug.Trace +import Control.Monad +import Text.Printf +import Language.Haskell.TH (Q, Exp(AppE,VarE,LitE), Lit(StringL), Loc, location, loc_filename, loc_start, mkName) +import Data.IORef +import System.IO.Unsafe ( unsafePerformIO ) + +enabledRef :: IORef Bool +enabledRef = unsafePerformIO $ newIORef True +{-# NOINLINE enabledRef #-} + +-- Everything is in normal form, unless it is a +-- thunk explicitly marked as such. +-- Indirection are also considered to be in HNF +isHNF :: Closure -> IO Bool +isHNF c = do + case c of + ThunkClosure {} -> return False + APClosure {} -> return False + SelectorClosure {} -> return False + BCOClosure {} -> return False + _ -> return True + +-- | The function 'assertNF' checks whether its argument is fully evaluated and +-- deeply evaluated. If this is not the case, a warning is printed to the standard output, +-- giving the number of thunks found and printing the shape of the unevaluated object: +-- +-- >> let x = 1 + 2 +-- >> let y = (x,x) +-- >> assertNF y +-- >Parameter not in normal form: 2 thunks found: +-- >let t1 = _bco +-- >in (t1,t1) +-- >> x +-- >3 +-- >> assertNF y +-- >> +-- +assertNF :: a -> IO () +assertNF = assertNF' "Parameter not in normal form" + +-- | In order to better identify the source of error messages from 'assertNF', this variant allows you to include a name that is printed in the output: +-- +-- >> assertNFNamed "y" y +-- >y not in normal form: 2 thunks found: +-- >let t1 = _bco +-- >in (t1,t1) +-- +assertNFNamed :: String -> a -> IO () +assertNFNamed valName = assertNF' (valName ++ " not in normal form") + +-- | This function, when called as @$assertNFHere@ in a module with @-XTemplateHaskell@ enabled, will cause the current filename and position be included in the error message: +-- +-- >Parameter at Test.hs:18:1 not in normal form: 2 thunks found: +-- >let t1 = _bco +-- >in (t1,t1) +-- +assertNFHere :: Q Exp +assertNFHere = do + locStr <- formatLoc <$> location + -- We don't use ''assertNF here, so that this module can be used on a + -- compiler that does not support TH. + return $ AppE (VarE (mkName "GHC.AssertNF.assertNFNamed")) + (LitE (StringL locStr)) + where formatLoc :: Loc -> String + formatLoc loc = let file = loc_filename loc + (line, col) = loc_start loc + in printf "parameter at %s:%d:%d" file line col + +assertNF' :: String -> a -> IO () +assertNF' str x = do + en <- readIORef enabledRef + when en $ do + depths <- assertNFBoxed 0 (asBox x) + unless (null depths) $ do + g <- buildHeapGraph (maximum depths + 3) () (asBox x) + -- +3 for good mesure; applications don't look good otherwise + traceIO $ str ++ ": " ++ show (length depths) ++ " thunks found:\n" ++ + ppHeapGraph g + + +assertNFBoxed :: Int -> Box -> IO [Int] +assertNFBoxed !d b = do + c <- getBoxedClosureData b + nf <- isHNF c + if nf + then do + c' <- getBoxedClosureData b + concat <$> mapM (assertNFBoxed (d+1)) (allPtrs c') + else do + return [d] + +-- | Invoke this function at the top of your 'main' method to turn every call +-- to 'assertNF' and its variants to noops. +disableAssertNF :: IO () +disableAssertNF = writeIORef enabledRef False + +-- | A variant of 'assertNF' that does not print anything and just returns +-- 'True' if the value is in normal form, or 'False' otherwise. This function +-- is not affected by 'disableAssertNF'. +isNF :: a -> IO Bool +isNF x = isNFBoxed (asBox x) + +isNFBoxed :: Box -> IO Bool +isNFBoxed b = do + c <- getBoxedClosureData b + nf <- isHNF c + if nf + then do + c' <- getBoxedClosureData b + allM isNFBoxed (allPtrs c') + else do + return False + +-- From Control.Monad.Loops in monad-loops, but I'd like to avoid too many +-- trivial dependencies +allM :: (Monad m) => (a -> m Bool) -> [a] -> m Bool +allM _ [] = return True +allM p (x:xs) = do + q <- p x + if q + then allM p xs + else return False diff --git a/libraries/heapview/GHC/Disassembler.hs b/libraries/heapview/GHC/Disassembler.hs new file mode 100644 index 0000000000..95e1d89d0a --- /dev/null +++ b/libraries/heapview/GHC/Disassembler.hs @@ -0,0 +1,290 @@ +{-# LANGUAGE CPP, ScopedTypeVariables, DoAndIfThenElse, NondecreasingIndentation, DeriveFunctor, DeriveFoldable, DeriveTraversable #-} +-- | A disassembler for ByteCode objects as used by GHCi. +module GHC.Disassembler ( + toBytes, + disassemble, + BCI(..) ) where + +import Data.Word +import Data.Int +import Data.Bits +import Data.Foldable ( Foldable ) +import Data.Traversable ( Traversable ) + +#include "ghcautoconf.h" +#include "rts/Bytecodes.h" + +-- | Converts the first @n@ bytes of this list of Words to a ByteString. +toBytes :: Word -> [Word] -> ByteString +toBytes n = + BS.take (fromIntegral n) . + toLazyByteString . + mconcat . + map (wordHost . fromIntegral) + +-- | Given a list of pointers, a list of literals and a ByteString containing +-- byte code instructions, disassembles them into a list of byte code instructions. +disassemble :: forall box. [box] -> [Word] -> ByteString -> [BCI box] +disassemble ptrs lits = runGet $ do + -- Ignore length tag. Needs to be skipped with GHC versions with + -- http://hackage.haskell.org/trac/ghc/ticket/7518 included + _ <- getWord16host +#if SIZEOF_VOID_P == 8 + _ <- getWord16host + _ <- getWord16host +#endif + nextInst + where + getLiteral :: Get Word + getLiteral = ((!!) lits) . fromIntegral <$> getWord16host + + getLiterals = do + p <- fromIntegral <$> getWord16host + n <- fromIntegral <$> getWord16host + return $ take n (drop p lits) + + getAddr :: Int -> box + getAddr p = ptrs !! p + + getPtr :: Get box + getPtr = getAddr . fromIntegral <$> getWord16host + + nextInst :: Get [BCI box] + nextInst = do + e <- isEmpty + if e then return [] else do + w <- getWord16host + let large = 0 /= w .&. 0x8000 + + let getLarge = if large then getWordhost else fromIntegral `fmap` getWord16host + let getLargeInt = if large then getInthost else fromIntegral `fmap` getInt16host + + i <- case w .&. 0xff of + bci_STKCHECK -> do + n <- getLarge + return $ BCISTKCHECK (n + 1) + bci_PUSH_L -> do + o1 <- getWord16host + return $ BCIPUSH_L o1 + bci_PUSH_LL -> do + o1 <- getWord16host + o2 <- getWord16host + return $ BCIPUSH_LL o1 o2 + bci_PUSH_LLL -> do + o1 <- getWord16host + o2 <- getWord16host + o3 <- getWord16host + return $ BCIPUSH_LLL o1 o2 o3 + bci_PUSH_G -> do + p <- getPtr + return $ BCIPUSH_G p + bci_PUSH_ALTS -> do + p <- getPtr + return $ BCIPUSH_ALTS p + bci_PUSH_ALTS_P -> do + p <- getPtr + return $ BCIPUSH_ALTS_P p + bci_PUSH_ALTS_N -> do + p <- getPtr + return $ BCIPUSH_ALTS_N p + bci_PUSH_ALTS_F -> do + p <- getPtr + return $ BCIPUSH_ALTS_F p + bci_PUSH_ALTS_D -> do + p <- getPtr + return $ BCIPUSH_ALTS_D p + bci_PUSH_ALTS_L -> do + p <- getPtr + return $ BCIPUSH_ALTS_L p + bci_PUSH_ALTS_V -> do + p <- getPtr + return $ BCIPUSH_ALTS_V p + bci_PUSH_UBX -> do + ubx_lits <- getLiterals + return $ BCIPUSH_UBX ubx_lits + bci_PUSH_APPLY_N -> do + return BCIPUSH_APPLY_N + bci_PUSH_APPLY_F -> do + return BCIPUSH_APPLY_F + bci_PUSH_APPLY_D -> do + return BCIPUSH_APPLY_D + bci_PUSH_APPLY_L -> do + return BCIPUSH_APPLY_L + bci_PUSH_APPLY_V -> do + return BCIPUSH_APPLY_V + bci_PUSH_APPLY_P -> do + return BCIPUSH_APPLY_P + bci_PUSH_APPLY_PP -> do + return BCIPUSH_APPLY_PP + bci_PUSH_APPLY_PPP -> do + return BCIPUSH_APPLY_PPP + bci_PUSH_APPLY_PPPP -> do + return BCIPUSH_APPLY_PPPP + bci_PUSH_APPLY_PPPPP -> do + return BCIPUSH_APPLY_PPPPP + bci_PUSH_APPLY_PPPPPP -> do + return BCIPUSH_APPLY_PPPPPP + bci_SLIDE -> do + p <- getWord16host + n <- getWord16host + return $ BCISLIDE p n + bci_ALLOC_AP -> do + n <- getWord16host + return $ BCIALLOC_AP n + bci_ALLOC_AP_NOUPD -> do + n <- getWord16host + return $ BCIALLOC_AP_NOUPD n + bci_ALLOC_PAP -> do + a <- getWord16host + n <- getWord16host + return $ BCIALLOC_PAP a n + bci_MKAP -> do + n <- getWord16host + s <- getWord16host + return $ BCIMKAP n s + bci_MKPAP -> do + n <- getWord16host + s <- getWord16host + return $ BCIMKPAP n s + bci_UNPACK -> do + n <- getWord16host + return $ BCIUNPACK n + bci_PACK -> do + p <- getLiteral + n <- getWord16host + return $ BCIPACK p n + bci_TESTLT_I -> do + d <- getLargeInt + t <- getLargeInt + return $ BCITESTLT_I d t + bci_TESTEQ_I -> do + d <- getLargeInt + t <- getLargeInt + return $ BCITESTEQ_I d t + bci_TESTLT_W -> do + d <- getLarge + t <- getLargeInt + return $ BCITESTLT_W d t + bci_TESTEQ_W -> do + d <- getLarge + t <- getLargeInt + return $ BCITESTEQ_W d t + bci_TESTLT_F -> do + d <- getLarge + t <- getLargeInt + return $ BCITESTLT_F d t + bci_TESTEQ_F -> do + d <- getLarge + t <- getLargeInt + return $ BCITESTEQ_F d t + bci_TESTLT_D -> do + d <- getLarge + t <- getLargeInt + return $ BCITESTLT_D d t + bci_TESTEQ_D -> do + d <- getLarge + t <- getLargeInt + return $ BCITESTEQ_D d t + bci_TESTLT_P -> do + d <- getWord16host + t <- getLargeInt + return $ BCITESTLT_P d t + bci_TESTEQ_P -> do + d <- getWord16host + t <- getLargeInt + return $ BCITESTEQ_P d t + bci_CASEFAIL -> do + return BCICASEFAIL + bci_JMP -> do + return BCIJMP + bci_CCALL -> do + p <- getLiteral + return $ BCICCALL p + bci_SWIZZLE -> do + p <- getWord16host + n <- getInt16host + return $ BCISWIZZLE p n + bci_ENTER -> do + return BCIENTER + bci_RETURN -> do + return BCIRETURN + bci_RETURN_P -> do + return BCIRETURN_P + bci_RETURN_N -> do + return BCIRETURN_N + bci_RETURN_F -> do + return BCIRETURN_F + bci_RETURN_D -> do + return BCIRETURN_D + bci_RETURN_L -> do + return BCIRETURN_L + bci_RETURN_V -> do + return BCIRETURN_V + bci_BRK_FUN -> do + _ <- getWord16host + _ <- getWord16host + _ <- getWord16host + return BCIBRK_FUN + x -> error $ "Unknown opcode " ++ show x + (i :) `fmap` nextInst + + +-- | The various byte code instructions that GHCi supports. +data BCI box + = BCISTKCHECK Word + | BCIPUSH_L Word16 + | BCIPUSH_LL Word16 Word16 + | BCIPUSH_LLL Word16 Word16 Word16 + | BCIPUSH_G box + | BCIPUSH_ALTS box + | BCIPUSH_ALTS_P box + | BCIPUSH_ALTS_N box + | BCIPUSH_ALTS_F box + | BCIPUSH_ALTS_D box + | BCIPUSH_ALTS_L box + | BCIPUSH_ALTS_V box + | BCIPUSH_UBX [Word] + | BCIPUSH_APPLY_N + | BCIPUSH_APPLY_F + | BCIPUSH_APPLY_D + | BCIPUSH_APPLY_L + | BCIPUSH_APPLY_V + | BCIPUSH_APPLY_P + | BCIPUSH_APPLY_PP + | BCIPUSH_APPLY_PPP + | BCIPUSH_APPLY_PPPP + | BCIPUSH_APPLY_PPPPP + | BCIPUSH_APPLY_PPPPPP +/* | BCIPUSH_APPLY_PPPPPPP */ + | BCISLIDE Word16 Word16 + | BCIALLOC_AP Word16 + | BCIALLOC_AP_NOUPD Word16 + | BCIALLOC_PAP Word16 Word16 + | BCIMKAP Word16 Word16 + | BCIMKPAP Word16 Word16 + | BCIUNPACK Word16 + | BCIPACK Word Word16 + | BCITESTLT_I Int Int + | BCITESTEQ_I Int Int + | BCITESTLT_F Word Int + | BCITESTEQ_F Word Int + | BCITESTLT_D Word Int + | BCITESTEQ_D Word Int + | BCITESTLT_P Word16 Int + | BCITESTEQ_P Word16 Int + | BCICASEFAIL + | BCIJMP + | BCICCALL Word + | BCISWIZZLE Word16 Int16 + | BCIENTER + | BCIRETURN + | BCIRETURN_P + | BCIRETURN_N + | BCIRETURN_F + | BCIRETURN_D + | BCIRETURN_L + | BCIRETURN_V + | BCIBRK_FUN -- ^ We do not parse this opcode's arguments + | BCITESTLT_W Word Int + | BCITESTEQ_W Word Int + deriving (Show, Functor, Traversable, Foldable) diff --git a/libraries/heapview/GHC/HeapView.hs b/libraries/heapview/GHC/HeapView.hs new file mode 100644 index 0000000000..cd4f7de738 --- /dev/null +++ b/libraries/heapview/GHC/HeapView.hs @@ -0,0 +1,1016 @@ +{-# LANGUAGE MagicHash, UnboxedTuples, CPP, ForeignFunctionInterface, GHCForeignImportPrim, UnliftedFFITypes, BangPatterns, RecordWildCards, DeriveFunctor, DeriveFoldable, DeriveTraversable, PatternGuards #-} +{-| +Module : GHC.HeapView +Copyright : (c) 2012 Joachim Breitner +License : BSD3 +Maintainer : Joachim Breitner <mail@joachim-breitner.de> + +With this module, you can investigate the heap representation of Haskell +values, i.e. to investigate sharing and lazy evaluation. +-} + + +module GHC.HeapView ( + -- * Heap data types + GenClosure(..), + Closure, + allPtrs, + ClosureType(..), + StgInfoTable(..), + HalfWord, + -- * Reading from the heap + getClosureData, + getBoxedClosureData, + getClosureRaw, + -- * Pretty printing + ppClosure, + -- * Heap maps + -- $heapmap + HeapTree(..), + buildHeapTree, + ppHeapTree, + HeapGraphEntry(..), + HeapGraphIndex, + HeapGraph(..), + lookupHeapGraph, + heapGraphRoot, + buildHeapGraph, + multiBuildHeapGraph, + addHeapGraph, + annotateHeapGraph, + updateHeapGraph, + ppHeapGraph, + -- * Boxes + Box(..), + asBox, + areBoxesEqual, + -- * Disassembler + disassembleBCO, + ) + where + +import GHC.Exts ( Any, + Ptr(..), Addr#, Int(..), Word(..), Word#, Int#, + ByteArray#, Array#, sizeofByteArray#, sizeofArray#, indexArray#, indexWordArray#, + unsafeCoerce# ) + +import GHC.Arr (Array(..)) + + +import Foreign hiding ( void ) +import Numeric ( showHex ) +import Data.Char +import Data.List +import Data.Maybe ( catMaybes ) +import Data.Monoid ( Monoid, (<>), mempty ) +import Data.Functor +import Data.Function +import Data.Foldable ( Foldable ) +import qualified Data.Foldable as F +import Data.Traversable ( Traversable ) +import qualified Data.Traversable as T +import Control.Monad +import Control.Exception.Base (evaluate) + +import GHC.Disassembler + +#include "ghcautoconf.h" + +-- | An arbitrarily Haskell value in a safe Box. The point is that even +-- unevaluated thunks can safely be moved around inside the Box, and when +-- required, e.g. in 'getBoxedClosureData', the function knows how far it has +-- to evalue the argument. +data Box = Box Any + +#if SIZEOF_VOID_P == 8 +type HalfWord = Word32 +#else +type HalfWord = Word16 +#endif + +instance Show Box where +-- From libraries/base/GHC/Ptr.lhs + showsPrec _ (Box a) rs = + -- unsafePerformIO (print "↓" >> pClosure a) `seq` + pad_out (showHex addr "") ++ (if tag>0 then "/" ++ show tag else "") ++ rs + where + ptr = W# (aToWord# a) + tag = ptr .&. fromIntegral tAG_MASK -- ((1 `shiftL` TAG_BITS) -1) + addr = ptr - tag + -- want 0s prefixed to pad it out to a fixed length. + pad_out ls = + '0':'x':(replicate (2*wORD_SIZE - length ls) '0') ++ ls + +-- | Boxes can be compared, but this is not pure, as different heap objects can, +-- after garbage collection, become the same object. +areBoxesEqual :: Box -> Box -> IO Bool +areBoxesEqual (Box a) (Box b) = case reallyUnsafePtrEqualityUpToTag# a b of + 0# -> return False + _ -> return True + + +{-| + This takes an arbitrary value and puts it into a box. Note that calls like + + > asBox (head list) + + will put the thunk \"head list\" into the box, /not/ the element at the head + of the list. For that, use careful case expressions: + + > case list of x:_ -> asBox x +-} +asBox :: a -> Box +asBox x = Box (unsafeCoerce# x) + +{- + StgInfoTable parsing derived from ByteCodeItbls.lhs + Removed the code parameter for now + Replaced Type by an enumeration + Remove stuff dependent on GHCI_TABLES_NEXT_TO_CODE + -} + +{-| This is a somewhat faithful representation of an info table. See + <http://hackage.haskell.org/trac/ghc/browser/includes/rts/storage/InfoTables.h> + for more details on this data structure. Note that the 'Storable' instance + provided here does _not_ support writing. + -} +data StgInfoTable = StgInfoTable { + ptrs :: HalfWord, + nptrs :: HalfWord, + tipe :: ClosureType, + srtlen :: HalfWord + } + deriving (Show) + +instance Storable StgInfoTable where + + sizeOf itbl + = sum + [ + fieldSz ptrs itbl, + fieldSz nptrs itbl, + sizeOf (undefined :: HalfWord), + fieldSz srtlen itbl + ] + + alignment _ + = wORD_SIZE + + poke _a0 _itbl + = error "Storable StgInfoTable is read-only" + + peek a0 + = flip (evalStateT) (castPtr a0) + $ do + ptrs' <- load + nptrs' <- load + tipe' <- load + srtlen' <- load + return + StgInfoTable { + ptrs = ptrs', + nptrs = nptrs', + tipe = toEnum (fromIntegral (tipe'::HalfWord)), + srtlen = srtlen' + } + +fieldSz :: Storable b => (a -> b) -> a -> Int +fieldSz sel x = sizeOf (sel x) + +load :: Storable a => PtrIO a +load = do addr <- advance + lift (peek addr) + +type PtrIO = StateT (Ptr Word8) IO + +advance :: Storable a => PtrIO (Ptr a) +advance = StateT adv where + adv addr = case castPtr addr of { addrCast -> return + (addrCast, addr `plusPtr` sizeOfPointee addrCast) } + +sizeOfPointee :: (Storable a) => Ptr a -> Int +sizeOfPointee addr = sizeOf (typeHack addr) + where typeHack = undefined :: Ptr a -> a + +{- + Data Type representing Closures + -} + + +{-| A closure type enumeration, in order matching the actual value on the heap. + Needs to be synchronized with + <http://hackage.haskell.org/trac/ghc/browser/includes/rts/storage/ClosureTypes.h> + -} +data ClosureType = + INVALID_OBJECT + | CONSTR + | CONSTR_1_0 + | CONSTR_0_1 + | CONSTR_2_0 + | CONSTR_1_1 + | CONSTR_0_2 + | CONSTR_STATIC + | CONSTR_NOCAF_STATIC + | FUN + | FUN_1_0 + | FUN_0_1 + | FUN_2_0 + | FUN_1_1 + | FUN_0_2 + | FUN_STATIC + | THUNK + | THUNK_1_0 + | THUNK_0_1 + | THUNK_2_0 + | THUNK_1_1 + | THUNK_0_2 + | THUNK_STATIC + | THUNK_SELECTOR + | BCO + | AP + | PAP + | AP_STACK + | IND + | IND_PERM + | IND_STATIC + | RET_BCO + | RET_SMALL + | RET_BIG + | RET_FUN + | UPDATE_FRAME + | CATCH_FRAME + | UNDERFLOW_FRAME + | STOP_FRAME + | BLOCKING_QUEUE + | BLACKHOLE + | MVAR_CLEAN + | MVAR_DIRTY + | ARR_WORDS + | MUT_ARR_PTRS_CLEAN + | MUT_ARR_PTRS_DIRTY + | MUT_ARR_PTRS_FROZEN0 + | MUT_ARR_PTRS_FROZEN + | MUT_VAR_CLEAN + | MUT_VAR_DIRTY + | WEAK + | PRIM + | MUT_PRIM + | TSO + | STACK + | TREC_CHUNK + | ATOMICALLY_FRAME + | CATCH_RETRY_FRAME + | CATCH_STM_FRAME + | WHITEHOLE + deriving (Show, Eq, Enum, Ord) + +{-| This is the main data type of this module, representing a Haskell value on + the heap. This reflects + <http://hackage.haskell.org/trac/ghc/browser/includes/rts/storage/Closures.h> + + The data type is parametrized by the type to store references in, which + is usually a 'Box' with appropriate type synonym 'Closure'. + -} +data GenClosure b = + ConsClosure { + info :: StgInfoTable + , ptrArgs :: [b] + , dataArgs :: [Word] + , pkg :: String + , modl :: String + , name :: String + } | + ThunkClosure { + info :: StgInfoTable + , ptrArgs :: [b] + , dataArgs :: [Word] + } | + SelectorClosure { + info :: StgInfoTable + , selectee :: b + } | + IndClosure { + info :: StgInfoTable + , indirectee :: b + } | + BlackholeClosure { + info :: StgInfoTable + , indirectee :: b + } | + -- In GHCi, if Linker.h would allow a reverse lookup, we could for exported + -- functions fun actually find the name here. + -- At least the other direction works via "lookupSymbol + -- base_GHCziBase_zpzp_closure" and yields the same address (up to tags) + APClosure { + info :: StgInfoTable + , arity :: HalfWord + , n_args :: HalfWord + , fun :: b + , payload :: [b] + } | + PAPClosure { + info :: StgInfoTable + , arity :: HalfWord + , n_args :: HalfWord + , fun :: b + , payload :: [b] + } | + APStackClosure { + info :: StgInfoTable + , fun :: b + , payload :: [b] + } | + BCOClosure { + info :: StgInfoTable + , instrs :: b + , literals :: b + , bcoptrs :: b + , arity :: HalfWord + , size :: HalfWord + , bitmap :: Word + } | + ArrWordsClosure { + info :: StgInfoTable + , bytes :: Word + , arrWords :: [Word] + } | + MutArrClosure { + info :: StgInfoTable + , mccPtrs :: Word + , mccSize :: Word + , mccPayload :: [b] + -- Card table ignored + } | + MutVarClosure { + info :: StgInfoTable + , var :: b + } | + MVarClosure { + info :: StgInfoTable + , queueHead :: b + , queueTail :: b + , value :: b + } | + FunClosure { + info :: StgInfoTable + , ptrArgs :: [b] + , dataArgs :: [Word] + } | + BlockingQueueClosure { + info :: StgInfoTable + , link :: b + , blackHole :: b + , owner :: b + , queue :: b + } | + OtherClosure { + info :: StgInfoTable + , hvalues :: [b] + , rawWords :: [Word] + } | + UnsupportedClosure { + info :: StgInfoTable + } + deriving (Show, Functor, Foldable, Traversable) + + +type Closure = GenClosure Box + +-- | For generic code, this function returns all referenced closures. +allPtrs :: GenClosure b -> [b] +allPtrs (ConsClosure {..}) = ptrArgs +allPtrs (ThunkClosure {..}) = ptrArgs +allPtrs (SelectorClosure {..}) = [selectee] +allPtrs (IndClosure {..}) = [indirectee] +allPtrs (BlackholeClosure {..}) = [indirectee] +allPtrs (APClosure {..}) = fun:payload +allPtrs (PAPClosure {..}) = fun:payload +allPtrs (APStackClosure {..}) = fun:payload +allPtrs (BCOClosure {..}) = [instrs,literals,bcoptrs] +allPtrs (ArrWordsClosure {..}) = [] +allPtrs (MutArrClosure {..}) = mccPayload +allPtrs (MutVarClosure {..}) = [var] +allPtrs (MVarClosure {..}) = [queueHead,queueTail,value] +allPtrs (FunClosure {..}) = ptrArgs +allPtrs (BlockingQueueClosure {..}) = [link, blackHole, owner, queue] +allPtrs (OtherClosure {..}) = hvalues +allPtrs (UnsupportedClosure {..}) = [] + + +foreign import prim "aToWordzh" aToWord# :: Any -> Word# +foreign import prim "slurpClosurezh" slurpClosure# :: Any -> (# Addr#, ByteArray#, Array# b #) +foreign import prim "reallyUnsafePtrEqualityUpToTag" reallyUnsafePtrEqualityUpToTag# :: Any -> Any -> Int# + +--pClosure x = do +-- getClosure x >>= print + +-- | This returns the raw representation of the given argument. The second +-- component of the triple are the words on the heap, and the third component +-- are those words that are actually pointers. Once back in Haskell word, the +-- 'Word' may be outdated after a garbage collector run, but the corresponding +-- 'Box' will still point to the correct value. +getClosureRaw :: a -> IO (Ptr StgInfoTable, [Word], [Box]) +getClosureRaw x = + case slurpClosure# (unsafeCoerce# x) of + (# iptr, dat, ptrs #) -> do + let nelems = (I# (sizeofByteArray# dat)) `div` wORD_SIZE + rawWords = [W# (indexWordArray# dat i) | I# i <- [0.. fromIntegral nelems -1] ] + pelems = I# (sizeofArray# ptrs) + ptrList = amap' Box $ Array 0 (pelems - 1) pelems ptrs + -- This is just for good measure, and seems to be not important. + mapM_ evaluate ptrList + -- This seems to be required to avoid crashes as well + void $ evaluate nelems + -- The following deep evaluation is crucial to avoid crashes (but why)? + mapM_ evaluate rawWords + return (Ptr iptr, rawWords, ptrList) + +-- From compiler/ghci/RtClosureInspect.hs +amap' :: (t -> b) -> Array Int t -> [b] +amap' f (Array i0 i _ arr#) = map g [0 .. i - i0] + where g (I# i#) = case indexArray# arr# i# of + (# e #) -> f e + +-- derived from vacuum-1.0.0.2/src/GHC/Vacuum/Internal.hs, which got it from +-- compiler/ghci/DebuggerUtils.hs +dataConInfoPtrToNames :: Ptr StgInfoTable -> IO (String, String, String) +dataConInfoPtrToNames ptr = do + conDescAddress <- getConDescAddress ptr + wl <- peekArray0 0 conDescAddress + let (pkg, modl, name) = parse wl + return (b2s pkg, b2s modl, b2s name) + where + b2s :: [Word8] -> String + b2s = fmap (chr . fromIntegral) + + getConDescAddress :: Ptr StgInfoTable -> IO (Ptr Word8) + getConDescAddress ptr' + | True = do + offsetToString <- peek (ptr' `plusPtr` (negate wORD_SIZE)) + return $ (ptr' `plusPtr` stdInfoTableSizeB) + `plusPtr` (fromIntegral (offsetToString :: Word)) + -- This is code for !ghciTablesNextToCode: + {- + | otherwise = peek . intPtrToPtr + . (+ fromIntegral + stdInfoTableSizeB) + . ptrToIntPtr $ ptr + -} + + -- hmmmmmm. Is there any way to tell this? + opt_SccProfilingOn = False + + stdInfoTableSizeW :: Int + -- The size of a standard info table varies with profiling/ticky etc, + -- so we can't get it from Constants + -- It must vary in sync with mkStdInfoTable + stdInfoTableSizeW + = size_fixed + size_prof + where + size_fixed = 2 -- layout, type + size_prof | opt_SccProfilingOn = 2 + | otherwise = 0 + + stdInfoTableSizeB :: Int + stdInfoTableSizeB = stdInfoTableSizeW * wORD_SIZE + +-- From vacuum-1.0.0.2/src/GHC/Vacuum/Internal.hs +parse :: [Word8] -> ([Word8], [Word8], [Word8]) +parse input = if not . all (>0) . fmap length $ [pkg,modl,occ] + --then (error . concat) + -- ["getConDescAddress:parse:" + -- ,"(not . all (>0) . fmap le" + -- ,"ngth $ [pkg,modl,occ]"] + then ([], [], input) -- Not in the pkg.modl.occ format, for example END_TSO_QUEUE + else (pkg, modl, occ) +-- = ASSERT (all (>0) (map length [pkg, modl, occ])) (pkg, modl, occ) -- XXXXXXXXXXXXXXXX + where + (pkg, rest1) = break (== fromIntegral (ord ':')) input + (modl, occ) + = (concat $ intersperse [dot] $ reverse modWords, occWord) + where + (modWords, occWord) = if (length rest1 < 1) -- XXXXXXXXx YUKX + --then error "getConDescAddress:parse:length rest1 < 1" + then parseModOcc [] [] + else parseModOcc [] (tail rest1) + -- ASSERT (length rest1 > 0) (parseModOcc [] (tail rest1)) + dot = fromIntegral (ord '.') + parseModOcc :: [[Word8]] -> [Word8] -> ([[Word8]], [Word8]) + parseModOcc acc str + = case break (== dot) str of + (top, []) -> (acc, top) + (top, _:bot) -> parseModOcc (top : acc) bot + + +-- | This function returns parsed heap representation of the argument _at this +-- moment_, even if it is unevaluated or an indirection or other exotic stuff. +-- Beware when passing something to this function, the same caveats as for +-- 'asBox' apply. +getClosureData :: a -> IO Closure +getClosureData x = do + (iptr, wds, ptrs) <- getClosureRaw x + itbl <- peek iptr + case tipe itbl of + t | t >= CONSTR && t <= CONSTR_NOCAF_STATIC -> do + (pkg, modl, name) <- dataConInfoPtrToNames iptr + if modl == "ByteCodeInstr" && name == "BreakInfo" + then return $ UnsupportedClosure itbl + else return $ ConsClosure itbl ptrs (drop (length ptrs + 1) wds) pkg modl name + + t | t >= THUNK && t <= THUNK_STATIC -> do + return $ ThunkClosure itbl ptrs (drop (length ptrs + 2) wds) + + t | t >= FUN && t <= FUN_STATIC -> do + return $ FunClosure itbl ptrs (drop (length ptrs + 1) wds) + + AP -> do + unless (length ptrs >= 1) $ + fail "Expected at least 1 ptr argument to AP" + unless (length wds >= 3) $ + fail "Expected at least 3 raw words to AP" + return $ APClosure itbl + (fromIntegral $ wds !! 2) + (fromIntegral $ shiftR (wds !! 2) (wORD_SIZE_IN_BITS `div` 2)) + (head ptrs) (tail ptrs) + + PAP -> do + unless (length ptrs >= 1) $ + fail "Expected at least 1 ptr argument to PAP" + unless (length wds >= 3) $ + fail "Expected at least 3 raw words to AP" + return $ PAPClosure itbl + (fromIntegral $ wds !! 2) + (fromIntegral $ shiftR (wds !! 2) (wORD_SIZE_IN_BITS `div` 2)) + (head ptrs) (tail ptrs) + + AP_STACK -> do + unless (length ptrs >= 1) $ + fail "Expected at least 1 ptr argument to AP_STACK" + return $ APStackClosure itbl (head ptrs) (tail ptrs) + + THUNK_SELECTOR -> do + unless (length ptrs >= 1) $ + fail "Expected at least 1 ptr argument to THUNK_SELECTOR" + return $ SelectorClosure itbl (head ptrs) + + IND -> do + unless (length ptrs >= 1) $ + fail "Expected at least 1 ptr argument to IND" + return $ IndClosure itbl (head ptrs) + IND_STATIC -> do + unless (length ptrs >= 1) $ + fail "Expected at least 1 ptr argument to IND_STATIC" + return $ IndClosure itbl (head ptrs) + BLACKHOLE -> do + unless (length ptrs >= 1) $ + fail "Expected at least 1 ptr argument to BLACKHOLE" + return $ BlackholeClosure itbl (head ptrs) + + BCO -> do + unless (length ptrs >= 3) $ + fail $ "Expected at least 3 ptr argument to BCO, found " ++ show (length ptrs) + unless (length wds >= 6) $ + fail $ "Expected at least 6 words to BCO, found " ++ show (length wds) + return $ BCOClosure itbl (ptrs !! 0) (ptrs !! 1) (ptrs !! 2) + (fromIntegral $ wds !! 4) + (fromIntegral $ shiftR (wds !! 4) (wORD_SIZE_IN_BITS `div` 2)) + (wds !! 5) + + ARR_WORDS -> do + unless (length wds >= 2) $ + fail $ "Expected at least 2 words to ARR_WORDS, found " ++ show (length wds) + return $ ArrWordsClosure itbl (wds !! 1) (drop 2 wds) + + t | t == MUT_ARR_PTRS_FROZEN || t == MUT_ARR_PTRS_FROZEN0 -> do + unless (length wds >= 3) $ + fail $ "Expected at least 3 words to MUT_ARR_PTRS_FROZEN0 found " ++ show (length wds) + return $ MutArrClosure itbl (wds !! 1) (wds !! 2) ptrs + + t | t == MUT_VAR_CLEAN || t == MUT_VAR_DIRTY -> + return $ MutVarClosure itbl (head ptrs) + + t | t == MVAR_CLEAN || t == MVAR_DIRTY -> do + unless (length ptrs >= 3) $ + fail $ "Expected at least 3 ptrs to MVAR, found " ++ show (length ptrs) + return $ MVarClosure itbl (ptrs !! 0) (ptrs !! 1) (ptrs !! 2) + + BLOCKING_QUEUE -> + return $ OtherClosure itbl ptrs wds + -- return $ BlockingQueueClosure itbl + -- (ptrs !! 0) (ptrs !! 1) (ptrs !! 2) (ptrs !! 3) + + -- return $ OtherClosure itbl ptrs wds + -- + _ -> + return $ UnsupportedClosure itbl + +-- | Like 'getClosureData', but taking a 'Box', so it is easier to work with. +getBoxedClosureData :: Box -> IO Closure +getBoxedClosureData (Box a) = getClosureData a + + +isChar :: GenClosure b -> Maybe Char +isChar (ConsClosure { name = "C#", dataArgs = [ch], ptrArgs = []}) = Just (chr (fromIntegral ch)) +isChar _ = Nothing + +isCons :: GenClosure b -> Maybe (b, b) +isCons (ConsClosure { name = ":", dataArgs = [], ptrArgs = [h,t]}) = Just (h,t) +isCons _ = Nothing + +isTup :: GenClosure b -> Maybe [b] +isTup (ConsClosure { dataArgs = [], ..}) = + if length name >= 3 && + head name == '(' && last name == ')' && + all (==',') (tail (init name)) + then Just ptrArgs else Nothing +isTup _ = Nothing + + +isNil :: GenClosure b -> Bool +isNil (ConsClosure { name = "[]", dataArgs = [], ptrArgs = []}) = True +isNil _ = False + +-- | A pretty-printer that tries to generate valid Haskell for evalutated data. +-- It assumes that for the included boxes, you already replaced them by Strings +-- using 'Data.Foldable.map' or, if you need to do IO, 'Data.Foldable.mapM'. +-- +-- The parameter gives the precedendence, to avoid avoidable parenthesises. +ppClosure :: (Int -> b -> String) -> Int -> GenClosure b -> String +ppClosure showBox prec c = case c of + _ | Just ch <- isChar c -> app $ + ["C#", show ch] + _ | Just (h,t) <- isCons c -> addBraces (5 <= prec) $ + showBox 5 h ++ " : " ++ showBox 4 t + _ | Just vs <- isTup c -> + "(" ++ intercalate "," (map (showBox 0) vs) ++ ")" + ConsClosure {..} -> app $ + name : map (showBox 10) ptrArgs ++ map show dataArgs + ThunkClosure {..} -> app $ + "_thunk" : map (showBox 10) ptrArgs ++ map show dataArgs + SelectorClosure {..} -> app + ["_sel", showBox 10 selectee] + IndClosure {..} -> app + ["_ind", showBox 10 indirectee] + BlackholeClosure {..} -> app + ["_bh", showBox 10 indirectee] + APClosure {..} -> app $ map (showBox 10) $ + fun : payload + PAPClosure {..} -> app $ map (showBox 10) $ + fun : payload + APStackClosure {..} -> app $ map (showBox 10) $ + fun : payload + BCOClosure {..} -> app + ["_bco"] + ArrWordsClosure {..} -> app + ["toArray", "("++show (length arrWords) ++ " words)", intercalate "," (shorten (map show arrWords)) ] + MutArrClosure {..} -> app + ["toMutArray", "("++show (length mccPayload) ++ " ptrs)", intercalate "," (shorten (map (showBox 10) mccPayload))] + MutVarClosure {..} -> app $ + ["_mutVar", (showBox 10) var] + MVarClosure {..} -> app $ + ["MVar", (showBox 10) value] + FunClosure {..} -> + "_fun" ++ braceize (map (showBox 0) ptrArgs ++ map show dataArgs) + BlockingQueueClosure {..} -> + "_blockingQueue" + OtherClosure {..} -> + "_other" + UnsupportedClosure {..} -> + "_unsupported" + where + app [a] = a ++ "()" + app xs = addBraces (10 <= prec) (intercalate " " xs) + + shorten xs = if length xs > 20 then take 20 xs ++ ["(and more)"] else xs + +{- $heapmap + + For more global views of the heap, you can use heap maps. These come in + variations, either a trees or as graphs, depending on + whether you want to detect cycles and sharing or not. + + The entries of a 'HeapGraph' can be annotated with arbitrary values. Most + operations expect this to be in the 'Monoid' class: They use 'mempty' to + annotate closures added because the passed values reference them, and they + use 'mappend' to combine the annotations when two values conincide, e.g. + during 'updateHeapGraph'. +-} + +-- | Heap maps as tree, i.e. no sharing, no cycles. +data HeapTree = HeapTree Box (GenClosure HeapTree) | EndOfHeapTree + +heapTreeClosure :: HeapTree -> Maybe (GenClosure HeapTree) +heapTreeClosure (HeapTree _ c) = Just c +heapTreeClosure EndOfHeapTree = Nothing + +-- | Constructing an 'HeapTree' from a boxed value. It takes a depth parameter +-- that prevents it from running ad infinitum for cyclic or infinite +-- structures. +buildHeapTree :: Int -> Box -> IO HeapTree +buildHeapTree 0 _ = do + return $ EndOfHeapTree +buildHeapTree n b = do + c <- getBoxedClosureData b + c' <- T.mapM (buildHeapTree (n-1)) c + return $ HeapTree b c' + +-- | Pretty-Printing a heap Tree +-- +-- Example output for @[Just 4, Nothing, *something*]@, where *something* is an +-- unevaluated expression depending on the command line argument. +-- +-- >[Just (I# 4),Nothing,Just (_thunk ["arg1","arg2"])] +ppHeapTree :: HeapTree -> String +ppHeapTree = go 0 + where + go _ EndOfHeapTree = "..." + go prec t@(HeapTree _ c') + | Just s <- isHeapTreeString t = show s + | Just l <- isHeapTreeList t = "[" ++ intercalate "," (map ppHeapTree l) ++ "]" + | Just bc <- disassembleBCO heapTreeClosure c' + = app ("_bco" : map (go 10) (concatMap F.toList bc)) + | otherwise = ppClosure go prec c' + where + app [a] = a ++ "()" + app xs = addBraces (10 <= prec) (intercalate " " xs) + +isHeapTreeList :: HeapTree -> Maybe ([HeapTree]) +isHeapTreeList tree = do + c <- heapTreeClosure tree + if isNil c + then return [] + else do + (h,t) <- isCons c + t' <- isHeapTreeList t + return $ (:) h t' + +isHeapTreeString :: HeapTree -> Maybe String +isHeapTreeString t = do + list <- isHeapTreeList t + -- We do not want to print empty lists as "" as we do not know that they + -- are really strings. + if (null list) + then Nothing + else mapM (isChar <=< heapTreeClosure) list + +-- | For heap graphs, i.e. data structures that also represent sharing and +-- cyclic structures, these are the entries. If the referenced value is +-- @Nothing@, then we do not have that value in the map, most likely due to +-- exceeding the recursion bound passed to 'buildHeapGraph'. +-- +-- Besides a pointer to the stored value and the closure representation we +-- also keep track of whether the value was still alive at the last update of the +-- heap graph. In addition we have a slot for arbitrary data, for the user's convenience. +data HeapGraphEntry a = HeapGraphEntry { + hgeBox :: Box, + hgeClosure :: GenClosure (Maybe HeapGraphIndex), + hgeLive :: Bool, + hgeData :: a} + deriving (Show, Functor) +type HeapGraphIndex = Int + +-- | The whole graph. The suggested interface is to only use 'lookupHeapGraph', +-- as the internal representation may change. Nevertheless, we export it here: +-- Sometimes the user knows better what he needs than we do. +newtype HeapGraph a = HeapGraph (M.IntMap (HeapGraphEntry a)) + deriving (Show) + +lookupHeapGraph :: HeapGraphIndex -> (HeapGraph a) -> Maybe (HeapGraphEntry a) +lookupHeapGraph i (HeapGraph m) = M.lookup i m + +heapGraphRoot :: HeapGraphIndex +heapGraphRoot = 0 + +-- | Creates a 'HeapGraph' for the value in the box, but not recursing further +-- than the given limit. The initial value has index 'heapGraphRoot'. +buildHeapGraph + :: Monoid a + => Int -- ^ Search limit + -> a -- ^ Data value for the root + -> Box -- ^ The value to start with + -> IO (HeapGraph a) +buildHeapGraph limit rootD initialBox = + fst <$> multiBuildHeapGraph limit [(rootD, initialBox)] + +-- | Creates a 'HeapGraph' for the values in multiple boxes, but not recursing +-- further than the given limit. +-- +-- Returns the 'HeapGraph' and the indices of initial values. The arbitrary +-- type @a@ can be used to make the connection between the input and the +-- resulting list of indices, and to store additional data. +multiBuildHeapGraph + :: Monoid a + => Int -- ^ Search limit + -> [(a, Box)] -- ^ Starting values with associated data entry + -> IO (HeapGraph a, [(a, HeapGraphIndex)]) +multiBuildHeapGraph limit = generalBuildHeapGraph limit (HeapGraph M.empty) + +-- | Adds an entry to an existing 'HeapGraph'. +-- +-- Returns the updated 'HeapGraph' and the index of the added value. +addHeapGraph + :: Monoid a + => Int -- ^ Search limit + -> a -- ^ Data to be stored with the added value + -> Box -- ^ Value to add to the graph + -> HeapGraph a -- ^ Graph to extend + -> IO (HeapGraphIndex, HeapGraph a) +addHeapGraph limit d box hg = do + (hg', [(_,i)]) <- generalBuildHeapGraph limit hg [(d,box)] + return (i, hg') + +-- | Adds the given annotation to the entry at the given index, using the +-- 'mappend' operation of its 'Monoid' instance. +annotateHeapGraph :: Monoid a => a -> HeapGraphIndex -> HeapGraph a -> HeapGraph a +annotateHeapGraph d i (HeapGraph hg) = HeapGraph $ M.update go i hg + where + go hge = Just $ hge { hgeData = hgeData hge <> d } + +generalBuildHeapGraph + :: Monoid a + => Int + -> HeapGraph a + -> [(a,Box)] + -> IO (HeapGraph a, [(a, HeapGraphIndex)]) +generalBuildHeapGraph limit _ _ | limit <= 0 = error "buildHeapGraph: limit has to be positive" +generalBuildHeapGraph limit (HeapGraph hg) addBoxes = do + -- First collect all boxes from the existing heap graph + let boxList = [ (hgeBox hge, i) | (i, hge) <- M.toList hg ] + indices | M.null hg = [0..] + | otherwise = [1 + fst (M.findMax hg)..] + + initialState = (boxList, indices, []) + -- It is ok to use the Monoid (IntMap a) instance here, because + -- we will, besides the first time, use 'tell' only to add singletons not + -- already there + (is, hg') <- runWriterT (evalStateT run initialState) + -- Now add the annotations of the root values + let hg'' = foldl' (flip (uncurry annotateHeapGraph)) (HeapGraph hg') is + return (hg'', is) + where + run = do + lift $ tell hg -- Start with the initial map + forM addBoxes $ \(d, b) -> do + -- Cannot fail, as limit is not zero here + Just i <- add limit b + return (d, i) + + add 0 _ = return Nothing + add n b = do + -- If the box is in the map, return the index + (existing,_,_) <- get + mbI <- liftIO $ findM (areBoxesEqual b . fst) existing + case mbI of + Just (_,i) -> return $ Just i + Nothing -> do + -- Otherwise, allocate a new index + i <- nextI + -- And register it + modify (\(x,y,z) -> ((b,i):x, y, z)) + -- Look up the closure + c <- liftIO $ getBoxedClosureData b + -- Find indicies for all boxes contained in the map + c' <- T.mapM (add (n-1)) c + -- Add add the resulting closure to the map + lift $ tell (M.singleton i (HeapGraphEntry b c' True mempty)) + return $ Just i + nextI = do + i <- gets (head . (\(_,b,_) -> b)) + modify (\(a,b,c) -> (a, tail b, c)) + return i + +-- | This function updates a heap graph to reflect the current state of +-- closures on the heap, conforming to the following specification. +-- +-- * Every entry whose value has been garbage collected by now is marked as +-- dead by setting 'hgeLive' to @False@ +-- * Every entry whose value is still live gets the 'hgeClosure' field updated +-- and newly referenced closures are, up to the given depth, added to the graph. +-- * A map mapping previous indicies to the corresponding new indicies is returned as well. +-- * The closure at 'heapGraphRoot' stays at 'heapGraphRoot' +updateHeapGraph :: Monoid a => Int -> HeapGraph a -> IO (HeapGraph a, HeapGraphIndex -> HeapGraphIndex) +updateHeapGraph limit (HeapGraph startHG) = do + (hg', indexMap) <- runWriterT $ foldM go (HeapGraph M.empty) (M.toList startHG) + return (hg', (M.!) indexMap) + where + go hg (i, hge) = do + (j, hg') <- liftIO $ addHeapGraph limit (hgeData hge) (hgeBox hge) hg + tell (M.singleton i j) + return hg' + +-- | Pretty-prints a HeapGraph. The resulting string contains newlines. Example +-- for @let s = \"Ki\" in (s, s, cycle \"Ho\")@: +-- +-- >let x1 = "Ki" +-- > x6 = C# 'H' : C# 'o' : x6 +-- >in (x1,x1,x6) +ppHeapGraph :: HeapGraph a -> String +ppHeapGraph (HeapGraph m) = letWrapper ++ ppRef 0 (Just heapGraphRoot) + where + -- All variables occuring more than once + bindings = boundMultipleTimes (HeapGraph m) [heapGraphRoot] + + letWrapper = + if null bindings + then "" + else "let " ++ intercalate "\n " (map ppBinding bindings) ++ "\nin " + + bindingLetter i = case hgeClosure (iToE i) of + ThunkClosure {..} -> 't' + SelectorClosure {..} -> 't' + APClosure {..} -> 't' + PAPClosure {..} -> 'f' + BCOClosure {..} -> 't' + FunClosure {..} -> 'f' + _ -> 'x' + + ppBindingMap = M.fromList $ + concat $ + map (zipWith (\j (i,c) -> (i, [c] ++ show j)) [(1::Int)..]) $ + groupBy ((==) `on` snd) $ + sortBy (compare `on` snd) + [ (i, bindingLetter i) | i <- bindings ] + + ppVar i = ppBindingMap M.! i + ppBinding i = ppVar i ++ " = " ++ ppEntry 0 (iToE i) + + ppEntry prec hge + | Just s <- isString hge = show s + | Just l <- isList hge = "[" ++ intercalate "," (map (ppRef 0) l) ++ "]" + | Just bc <- disassembleBCO (fmap (hgeClosure . iToE)) (hgeClosure hge) + = app ("_bco" : map (ppRef 10) (concatMap F.toList bc)) + | otherwise = ppClosure ppRef prec (hgeClosure hge) + where + app [a] = a ++ "()" + app xs = addBraces (10 <= prec) (intercalate " " xs) + + ppRef _ Nothing = "..." + ppRef prec (Just i) | i `elem` bindings = ppVar i + | otherwise = ppEntry prec (iToE i) + iToE i = m M.! i + + iToUnboundE i = if i `elem` bindings then Nothing else M.lookup i m + + isList :: HeapGraphEntry a -> Maybe ([Maybe HeapGraphIndex]) + isList hge = + if isNil (hgeClosure hge) + then return [] + else do + (h,t) <- isCons (hgeClosure hge) + ti <- t + e <- iToUnboundE ti + t' <- isList e + return $ (:) h t' + + isString :: HeapGraphEntry a -> Maybe String + isString e = do + list <- isList e + -- We do not want to print empty lists as "" as we do not know that they + -- are really strings. + if (null list) + then Nothing + else mapM (isChar . hgeClosure <=< iToUnboundE <=< id) list + + +-- | In the given HeapMap, list all indices that are used more than once. The +-- second parameter adds external references, commonly @[heapGraphRoot]@. +boundMultipleTimes :: HeapGraph a -> [HeapGraphIndex] -> [HeapGraphIndex] +boundMultipleTimes (HeapGraph m) roots = map head $ filter (not.null) $ map tail $ group $ sort $ + roots ++ concatMap (catMaybes . allPtrs . hgeClosure) (M.elems m) + +-- | This function integrates the disassembler in "GHC.Disassembler". The first +-- argument should a function that dereferences the pointer in the closure to a +-- closure. +-- +-- If any of these return 'Nothing', then 'disassembleBCO' returns Nothing +disassembleBCO :: (a -> Maybe (GenClosure b)) -> GenClosure a -> Maybe [BCI b] +disassembleBCO deref (BCOClosure {..}) = do + opsC <- deref instrs + litsC <- deref literals + ptrsC <- deref bcoptrs + return $ disassemble (mccPayload ptrsC) (arrWords litsC) (toBytes (bytes opsC) (arrWords opsC)) +disassembleBCO _ _ = Nothing + +-- Utilities + +findM :: (a -> IO Bool) -> [a] -> IO (Maybe a) +findM _p [] = return Nothing +findM p (x:xs) = do + b <- p x + if b then return (Just x) else findM p xs + +addBraces :: Bool -> String -> String +addBraces True t = "(" ++ t ++ ")" +addBraces False t = t + +braceize :: [String] -> String +braceize [] = "" +braceize xs = "{" ++ intercalate "," xs ++ "}" + +-- This used to be available via GHC.Constants +#include "MachDeps.h" +wORD_SIZE, tAG_MASK, wORD_SIZE_IN_BITS :: Int +wORD_SIZE = SIZEOF_HSWORD +tAG_MASK = (1 `shift` TAG_BITS) - 1 +wORD_SIZE_IN_BITS = WORD_SIZE_IN_BITS + diff --git a/libraries/heapview/GHC/HeapView/Debug.hs b/libraries/heapview/GHC/HeapView/Debug.hs new file mode 100644 index 0000000000..fc31d39cb8 --- /dev/null +++ b/libraries/heapview/GHC/HeapView/Debug.hs @@ -0,0 +1,68 @@ +-- | Utilities to debug "GHC.HeapView". +module GHC.HeapView.Debug where + +import GHC.HeapView +import Text.Printf +import System.IO +import Control.Monad +import System.Mem +import Data.Maybe +import Data.Char +import Data.IORef + +-- | This function walks the heap referenced by the argument, printing the +-- \"path\", i.e. the pointer indices from the initial to the current closure +-- and the closure itself. When the runtime crashes, the problem is likely +-- related to one of the earlier steps. +walkHeap + :: Bool -- ^ Whether to check for cycles + -> Bool -- ^ Whether to GC in every step + -> Box -- ^ The closure to investigate + -> IO () +walkHeap slow check x = do + seenRef <- newIORef [] + go seenRef [] x + where + go seenRef prefix b = do + _ <- printf "At %s:\n" (show prefix) + seen <- readIORef seenRef + previous <- if check then findM (areBoxesEqual b . fst) seen else return Nothing + case previous of + Just (_,p') -> printf "Seen at %s.\n" (show p') + Nothing -> do + hFlush stdout + c <- getBoxedClosureData b + putStrLn (ppClosure (\_ box -> show box) 0 c) + when slow performGC + isCC <- isCharCons c + unless isCC $ do + modifyIORef seenRef ((b,prefix):) + forM_ (zip [(0::Int)..] (allPtrs c)) $ \(n,box) -> + go seenRef (prefix ++ [n]) box + +walkPrefix :: [Int] -> a -> IO Box +walkPrefix is v = go is (asBox v) + where + go [] a = return a + go (x:xs) a = do + c <- getBoxedClosureData a + walkPrefix xs (allPtrs c !! x) + + +findM :: (a -> IO Bool) -> [a] -> IO (Maybe a) +findM _p [] = return Nothing +findM p (x:xs) = do + b <- p x + if b then return (Just x) else findM p xs + +isCharCons :: GenClosure Box -> IO Bool +isCharCons c | Just (h,_) <- isCons c = (isJust . isChar) <$> getBoxedClosureData h +isCharCons _ = return False + +isCons :: GenClosure b -> Maybe (b, b) +isCons (ConsClosure { name = ":", dataArgs = [], ptrArgs = [h,t]}) = Just (h,t) +isCons _ = Nothing + +isChar :: GenClosure b -> Maybe Char +isChar (ConsClosure { name = "C#", dataArgs = [ch], ptrArgs = []}) = Just (chr (fromIntegral ch)) +isChar _ = Nothing diff --git a/libraries/heapview/LICENSE b/libraries/heapview/LICENSE new file mode 100644 index 0000000000..682cb5ac5c --- /dev/null +++ b/libraries/heapview/LICENSE @@ -0,0 +1,30 @@ +Copyright (c) 2012-2013, Joachim Breitner + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Joachim Breitner nor the names of other + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/libraries/heapview/Setup.hs b/libraries/heapview/Setup.hs new file mode 100644 index 0000000000..6fa548caf7 --- /dev/null +++ b/libraries/heapview/Setup.hs @@ -0,0 +1,6 @@ +module Main (main) where + +import Distribution.Simple + +main :: IO () +main = defaultMain diff --git a/libraries/heapview/heapview.cabal b/libraries/heapview/heapview.cabal new file mode 100644 index 0000000000..0c4c029d9d --- /dev/null +++ b/libraries/heapview/heapview.cabal @@ -0,0 +1,53 @@ +name: heapview +version: 1.0.0.0 +-- NOTE: Don't forget to update ./changelog.md +license: BSD3 +license-file: LICENSE +maintainer: libraries@haskell.org +bug-reports: http://ghc.haskell.org/trac/ghc/newticket +synopsis: Functions for walking GHC's heap +category: GHC +description: + This package provides functions for walking the GHC heap data structures + and retrieving information about those data structures. + +build-type: Simple +cabal-version: >=1.10 +tested-with: GHC==7.11 + +source-repository head + type: git + location: http://git.haskell.org/ghc.git + subdir: libraries/heapview + +library + default-language: Haskell2010 + other-extensions: + BangPatterns + CPP + DeriveFoldable + DeriveFunctor + DeriveTraversable + ForeignFunctionInterface + GHCForeignImportPrim + MagicHash + PatternGuards + RecordWildCards + UnboxedTuples + UnliftedFFITypes + + build-depends: rts == 1.0.* + , ghc-prim == 0.5.0.0 + , base >= 4.9.0 && < 4.11 + , binary >= 0.5 && < 0.9 + , bytestring >= 0.10.6.0 + , containers >= 0.5 && < 0.6 + , deepseq >= 1.4 + , transformers >= 0.2 && < 0.6 + + ghc-options: -Wall + + exposed-modules: GHC.AssertNF + GHC.Disassembler + GHC.HeapView + GHC.HeapView.Debug |