summaryrefslogtreecommitdiff
path: root/compiler/GHC/Types/Unique/Supply.hs
blob: a40194673238b6d331e6502bdb78e85a3450c41b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
{-
(c) The University of Glasgow 2006
(c) The GRASP/AQUA Project, Glasgow University, 1992-1998
-}

{-# OPTIONS_GHC -fno-state-hack #-}
    -- This -fno-state-hack is important
    -- See Note [Optimising the unique supply]

{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE BangPatterns #-}

#if !defined(GHC_LOADED_INTO_GHCI)
{-# LANGUAGE UnboxedTuples #-}
#endif

module GHC.Types.Unique.Supply (
        -- * Main data type
        UniqSupply, -- Abstractly

        -- ** Operations on supplies
        uniqFromSupply, uniqsFromSupply, -- basic ops
        takeUniqFromSupply,

        mkSplitUniqSupply,
        splitUniqSupply, listSplitUniqSupply,

        -- * Unique supply monad and its abstraction
        UniqSM, MonadUnique(..),

        -- ** Operations on the monad
        initUs, initUs_,

        -- * Set supply strategy
        initUniqSupply
  ) where

import GHC.Prelude

import GHC.Types.Unique
import GHC.Utils.Panic.Plain (panic)

import GHC.IO

import GHC.Utils.Monad
import Control.Monad
import Data.Bits
import Data.Char
import GHC.Exts( inline )

#include "Unique.h"

{-
************************************************************************
*                                                                      *
\subsection{Splittable Unique supply: @UniqSupply@}
*                                                                      *
************************************************************************
-}

{- Note [How the unique supply works]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The basic idea (due to Lennart Augustsson) is that a UniqSupply is
lazily-evaluated infinite tree.

* At each MkSplitUniqSupply node is a unique Int, and two
  sub-trees (see data UniqSupply)

* takeUniqFromSupply :: UniqSupply -> (Unique, UniqSupply)
  returns the unique Int and one of the sub-trees

* splitUniqSupply :: UniqSupply -> (UniqSupply, UniqSupply)
  returns the two sub-trees

* When you poke on one of the thunks, it does a foreign call
  to get a fresh Int from a thread-safe counter, and returns
  a fresh MkSplitUniqSupply node.  This has to be as efficient
  as possible: it should allocate only
     * The fresh node
     * A thunk for each sub-tree

Note [Optimising the unique supply]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The inner loop of mkSplitUniqSupply is a function closure

     mk_supply :: IO UniqSupply
     mk_supply = unsafeInterleaveIO $
                 genSym      >>= \ u ->
                 mk_supply   >>= \ s1 ->
                 mk_supply   >>= \ s2 ->
                 return (MkSplitUniqSupply (mask .|. u) s1 s2)

It's a classic example of an IO action that is captured
and the called repeatedly (see #18238 for some discussion).
It turns out that we can get something like

  $wmkSplitUniqSupply c# s
    = letrec
        mk_supply
          = \s -> unsafeDupableInterleaveIO1
                    (\s2 -> case noDuplicate# s2 of s3 ->
                            ...
                            case mk_supply s4 of (# s5, t1 #) ->
                            ...
                            (# s6, MkSplitUniqSupply ... #)
      in mk_supply s

This is bad becuase we allocate that inner (\s2...) every time.
Why doesn't full laziness float out the (\s2...)?  Because of
the state hack (#18238).

So for this module we switch the state hack off -- it's an example
of when it makes things worse rather than better.  And we use
multiShotIO (see Note [multiShotIO]) thus:

     mk_supply = multiShotIO $
                 unsafeInterleaveIO $
                 genSym      >>= \ u ->
                 ...

Now full laziness can float that lambda out, and we get

  $wmkSplitUniqSupply c# s
    = letrec
        lvl = \s2 -> case noDuplicate# s2 of s3 ->
                     ...
                     case unsafeDupableInterleaveIO
                              lvl s4 of (# s5, t1 #) ->
                     ...
                     (# s6, MkSplitUniqSupply ... #)
      in unsafeDupableInterleaveIO1 lvl s

This is all terribly delicate.  It just so happened that before I
fixed #18078, and even with the state-hack still enabled, we were
getting this:

  $wmkSplitUniqSupply c# s
    = letrec
        mk_supply = \s2 -> case noDuplicate# s2 of s3 ->
                           ...
                           case mks_help s3 of (# s5,t1 #) ->
                           ...
                           (# s6, MkSplitUniqSupply ... #)
        mks_help = unsafeDupableInterleaveIO mk_supply
           -- mks_help marked as loop breaker
      in mks_help s

The fact that we didn't need full laziness was somewhat fortuitious.
We got the right number of allocations. But the partial application of
the arity-2 unsafeDupableInterleaveIO in mks_help makes it quite a
bit slower.  (Test perf/should_run/UniqLoop had a 20% perf change.)

Sigh.  The test perf/should_run/UniqLoop keeps track of this loop.
Watch it carefully.

Note [multiShotIO]
~~~~~~~~~~~~~~~~~~
The function multiShotIO :: IO a -> IO a
says that the argument IO action may be invoked repeatedly (is
multi-shot), and so there should be a multi-shot lambda around it.
It's quite easy to define, in any module with `-fno-state-hack`:
    multiShotIO :: IO a -> IO a
    {-# INLINE multiShotIO #-}
    multiShotIO (IO m) = IO (\s -> inline m s)

Because of -fno-state-hack, that '\s' will be multi-shot. Now,
ignoring the casts from IO:
    multiShotIO (\ss{one-shot}. blah)
    ==> let m = \ss{one-shot}. blah
        in \s. inline m s
    ==> \s. (\ss{one-shot}.blah) s
    ==> \s. blah[s/ss]

The magic `inline` function does two things
* It prevents eta reduction.  If we wrote just
      multiShotIO (IO m) = IO (\s -> m s)
  the lamda would eta-reduce to 'm' and all would be lost.

* It helps ensure that 'm' really does inline.

Note that 'inline' evaporates in phase 0.  See Note [inlineIdMagic]
in GHC.Core.Opt.ConstantFold.match_inline.

The INLINE pragma on multiShotIO is very important, else the
'inline' call will evaporate when compiling the module that
defines 'multiShotIO', before it is ever exported.
-}


-- | Unique Supply
--
-- A value of type 'UniqSupply' is unique, and it can
-- supply /one/ distinct 'Unique'.  Also, from the supply, one can
-- also manufacture an arbitrary number of further 'UniqueSupply' values,
-- which will be distinct from the first and from all others.
data UniqSupply
  = MkSplitUniqSupply {-# UNPACK #-} !Int -- make the Unique with this
                   UniqSupply UniqSupply
                                -- when split => these two supplies

mkSplitUniqSupply :: Char -> IO UniqSupply
-- ^ Create a unique supply out of thin air. The character given must
-- be distinct from those of all calls to this function in the compiler
-- for the values generated to be truly unique.

-- See Note [How the unique supply works]
-- See Note [Optimising the unique supply]
mkSplitUniqSupply c
  = mk_supply
  where
     !mask = ord c `shiftL` uNIQUE_BITS

        -- Here comes THE MAGIC: see Note [How the unique supply works]
        -- This is one of the most hammered bits in the whole compiler
        -- See Note [Optimising the unique supply]
        -- NB: Use unsafeInterleaveIO for thread-safety.
     mk_supply = multiShotIO $
                 unsafeInterleaveIO $
                 genSym      >>= \ u ->
                 mk_supply   >>= \ s1 ->
                 mk_supply   >>= \ s2 ->
                 return (MkSplitUniqSupply (mask .|. u) s1 s2)

multiShotIO :: IO a -> IO a
{-# INLINE multiShotIO #-}
-- See Note [multiShotIO]
multiShotIO (IO m) = IO (\s -> inline m s)

foreign import ccall unsafe "genSym" genSym :: IO Int
foreign import ccall unsafe "initGenSym" initUniqSupply :: Int -> Int -> IO ()

splitUniqSupply :: UniqSupply -> (UniqSupply, UniqSupply)
-- ^ Build two 'UniqSupply' from a single one, each of which
-- can supply its own 'Unique'.
listSplitUniqSupply :: UniqSupply -> [UniqSupply]
-- ^ Create an infinite list of 'UniqSupply' from a single one
uniqFromSupply  :: UniqSupply -> Unique
-- ^ Obtain the 'Unique' from this particular 'UniqSupply'
uniqsFromSupply :: UniqSupply -> [Unique] -- Infinite
-- ^ Obtain an infinite list of 'Unique' that can be generated by constant splitting of the supply
takeUniqFromSupply :: UniqSupply -> (Unique, UniqSupply)
-- ^ Obtain the 'Unique' from this particular 'UniqSupply', and a new supply

splitUniqSupply (MkSplitUniqSupply _ s1 s2) = (s1, s2)
listSplitUniqSupply  (MkSplitUniqSupply _ s1 s2) = s1 : listSplitUniqSupply s2

uniqFromSupply  (MkSplitUniqSupply n _ _)  = mkUniqueGrimily n
uniqsFromSupply (MkSplitUniqSupply n _ s2) = mkUniqueGrimily n : uniqsFromSupply s2
takeUniqFromSupply (MkSplitUniqSupply n s1 _) = (mkUniqueGrimily n, s1)

{-
************************************************************************
*                                                                      *
\subsubsection[UniqSupply-monad]{@UniqSupply@ monad: @UniqSM@}
*                                                                      *
************************************************************************
-}

-- Avoids using unboxed tuples when loading into GHCi
#if !defined(GHC_LOADED_INTO_GHCI)

type UniqResult result = (# result, UniqSupply #)

pattern UniqResult :: a -> b -> (# a, b #)
pattern UniqResult x y = (# x, y #)
{-# COMPLETE UniqResult #-}

#else

data UniqResult result = UniqResult !result {-# UNPACK #-} !UniqSupply
  deriving (Functor)

#endif

-- | A monad which just gives the ability to obtain 'Unique's
newtype UniqSM result = USM { unUSM :: UniqSupply -> UniqResult result }
    deriving (Functor)

instance Monad UniqSM where
  (>>=) = thenUs
  (>>)  = (*>)

instance Applicative UniqSM where
    pure = returnUs
    (USM f) <*> (USM x) = USM $ \us0 -> case f us0 of
                            UniqResult ff us1 -> case x us1 of
                              UniqResult xx us2 -> UniqResult (ff xx) us2
    (*>) = thenUs_

-- TODO: try to get rid of this instance
instance MonadFail UniqSM where
    fail = panic

-- | Run the 'UniqSM' action, returning the final 'UniqSupply'
initUs :: UniqSupply -> UniqSM a -> (a, UniqSupply)
initUs init_us m = case unUSM m init_us of { UniqResult r us -> (r, us) }

-- | Run the 'UniqSM' action, discarding the final 'UniqSupply'
initUs_ :: UniqSupply -> UniqSM a -> a
initUs_ init_us m = case unUSM m init_us of { UniqResult r _ -> r }

{-# INLINE thenUs #-}
{-# INLINE returnUs #-}
{-# INLINE splitUniqSupply #-}

-- @thenUs@ is where we split the @UniqSupply@.

liftUSM :: UniqSM a -> UniqSupply -> (a, UniqSupply)
liftUSM (USM m) us0 = case m us0 of UniqResult a us1 -> (a, us1)

instance MonadFix UniqSM where
    mfix m = USM (\us0 -> let (r,us1) = liftUSM (m r) us0 in UniqResult r us1)

thenUs :: UniqSM a -> (a -> UniqSM b) -> UniqSM b
thenUs (USM expr) cont
  = USM (\us0 -> case (expr us0) of
                   UniqResult result us1 -> unUSM (cont result) us1)

thenUs_ :: UniqSM a -> UniqSM b -> UniqSM b
thenUs_ (USM expr) (USM cont)
  = USM (\us0 -> case (expr us0) of { UniqResult _ us1 -> cont us1 })

returnUs :: a -> UniqSM a
returnUs result = USM (\us -> UniqResult result us)

getUs :: UniqSM UniqSupply
getUs = USM (\us0 -> case splitUniqSupply us0 of (us1,us2) -> UniqResult us1 us2)

-- | A monad for generating unique identifiers
class Monad m => MonadUnique m where
    -- | Get a new UniqueSupply
    getUniqueSupplyM :: m UniqSupply
    -- | Get a new unique identifier
    getUniqueM  :: m Unique
    -- | Get an infinite list of new unique identifiers
    getUniquesM :: m [Unique]

    -- This default definition of getUniqueM, while correct, is not as
    -- efficient as it could be since it needlessly generates and throws away
    -- an extra Unique. For your instances consider providing an explicit
    -- definition for 'getUniqueM' which uses 'takeUniqFromSupply' directly.
    getUniqueM  = liftM uniqFromSupply  getUniqueSupplyM
    getUniquesM = liftM uniqsFromSupply getUniqueSupplyM

instance MonadUnique UniqSM where
    getUniqueSupplyM = getUs
    getUniqueM  = getUniqueUs
    getUniquesM = getUniquesUs

getUniqueUs :: UniqSM Unique
getUniqueUs = USM (\us0 -> case takeUniqFromSupply us0 of
                           (u,us1) -> UniqResult u us1)

getUniquesUs :: UniqSM [Unique]
getUniquesUs = USM (\us0 -> case splitUniqSupply us0 of
                            (us1,us2) -> UniqResult (uniqsFromSupply us1) us2)