summaryrefslogtreecommitdiff
path: root/libraries/ghc-heap/GHC/Exts/Heap/Utils.hsc
blob: 404063bd57a116dea1b6278f15a231d1e2bf917d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
{-# LANGUAGE CPP, MagicHash #-}

module GHC.Exts.Heap.Utils (
    dataConNames
    ) where

#include "Rts.h"

import Prelude -- See note [Why do we import Prelude here?]
import GHC.Exts.Heap.Constants
import GHC.Exts.Heap.InfoTable

import Data.Char
import Data.List (intercalate)
import Foreign
import GHC.CString
import GHC.Exts

{- To find the string in the constructor's info table we need to consider
      the layout of info tables relative to the entry code for a closure.

      An info table can be next to the entry code for the closure, or it can
      be separate. The former (faster) is used in registerised versions of ghc,
      and the latter (portable) is for non-registerised versions.

      The diagrams below show where the string is to be found relative to
      the normal info table of the closure.

      1) Tables next to code:

         --------------
         |            |   <- pointer to the start of the string
         --------------
         |            |   <- the (start of the) info table structure
         |            |
         |            |
         --------------
         | entry code |
         |    ....    |

         In this case the pointer to the start of the string can be found in
         the memory location _one word before_ the first entry in the normal info
         table.

      2) Tables NOT next to code:

                                 --------------
         info table structure -> |     *------------------> --------------
                                 |            |             | entry code |
                                 |            |             |    ....    |
                                 --------------
         ptr to start of str ->  |            |
                                 --------------

         In this case the pointer to the start of the string can be found
         in the memory location: info_table_ptr + info_table_size
-}

-- Given a ptr to an 'StgInfoTable' for a data constructor
-- return (Package, Module, Name)
dataConNames :: Ptr StgInfoTable -> IO (String, String, String)
dataConNames ptr = do
    conDescAddress <- getConDescAddress
    pure $ parse conDescAddress
  where
    -- Retrieve the con_desc field address pointing to
    -- 'Package:Module.Name' string
    getConDescAddress :: IO (Ptr Word8)
    getConDescAddress
#if defined(TABLES_NEXT_TO_CODE)
      = do
        offsetToString <- peek (ptr `plusPtr` negate wORD_SIZE)
        pure $ (ptr `plusPtr` stdInfoTableSizeB)
                    `plusPtr` fromIntegral (offsetToString :: Int32)
#else
      = peek $ intPtrToPtr $ ptrToIntPtr ptr + fromIntegral stdInfoTableSizeB
#endif

    stdInfoTableSizeW :: Int
    -- The size of a standard info table varies with profiling/ticky etc,
    -- so we can't get it from Constants
    -- It must vary in sync with mkStdInfoTable
    stdInfoTableSizeW
      = size_fixed + size_prof
      where
        size_fixed = 2  -- layout, type
##if defined(PROFILING)
        size_prof = 2
##else
        size_prof = 0
##endif

    stdInfoTableSizeB :: Int
    stdInfoTableSizeB = stdInfoTableSizeW * wORD_SIZE

-- parsing names is a little bit fiddly because we have a string in the form:
-- pkg:A.B.C.foo, and we want to split it into three parts: ("pkg", "A.B.C", "foo").
-- Thus we split at the leftmost colon and the rightmost occurrence of the dot.
-- It would be easier if the string was in the form pkg:A.B.C:foo, but alas
-- this is not the conventional way of writing Haskell names. We stick with
-- convention, even though it makes the parsing code more troublesome.
-- Warning: this code assumes that the string is well formed.
parse :: Ptr Word8 -> (String, String, String)
parse (Ptr addr) = if not . all (>0) . fmap length $ [p,m,occ]
                     then ([], [], input)
                     else (p, m, occ)
  where
    input = unpackCStringUtf8## addr
    (p, rest1) = break (== ':') input
    (m, occ)
        = (intercalate "." $ reverse modWords, occWord)
        where
        (modWords, occWord) = parseModOcc [] (drop 1 rest1)
    -- We only look for dots if str could start with a module name,
    -- i.e. if it starts with an upper case character.
    -- Otherwise we might think that "X.:->" is the module name in
    -- "X.:->.+", whereas actually "X" is the module name and
    -- ":->.+" is a constructor name.
    parseModOcc :: [String] -> String -> ([String], String)
    parseModOcc acc str@(c : _)
        | isUpper c =
            case break (== '.') str of
                (top, []) -> (acc, top)
                (top, _:bot) -> parseModOcc (top : acc) bot
    parseModOcc acc str = (acc, str)