summaryrefslogtreecommitdiff
path: root/compiler/ghci/ByteCodeAsm.lhs
blob: e332413daed8eba05623eedd78119ba3cedd33fc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
%
% (c) The University of Glasgow 2002
%
\section[ByteCodeLink]{Bytecode assembler and linker}

\begin{code}
{-# OPTIONS -optc-DNON_POSIX_SOURCE #-}

module ByteCodeAsm (  
	assembleBCOs, assembleBCO,

	CompiledByteCode(..), 
	UnlinkedBCO(..), BCOPtr(..), bcoFreeNames,
	SizedSeq, sizeSS, ssElts,
	iNTERP_STACK_CHECK_THRESH
  ) where

#include "HsVersions.h"

import ByteCodeInstr
import ByteCodeItbls	( ItblEnv, mkITbls )

import Name		( Name, getName )
import NameSet
import FiniteMap	( addToFM, lookupFM, emptyFM )
import Literal		( Literal(..) )
import TyCon		( TyCon )
import PrimOp		( PrimOp )
import Constants	( wORD_SIZE )
import FastString	( FastString(..) )
import SMRep		( CgRep(..), StgWord )
import FiniteMap
import Outputable

import Control.Monad	( foldM )
import Control.Monad.ST	( runST )

import GHC.Word		( Word(..) )
import Data.Array.MArray
import Data.Array.Unboxed ( listArray )
import Data.Array.Base	( UArray(..) )
import Data.Array.ST	( castSTUArray )
import Foreign		( Word16, free )
import Data.Int		( Int64 )
import Data.Char	( ord )

import GHC.Base		( ByteArray# )
import GHC.IOBase	( IO(..) )
import GHC.Ptr		( Ptr(..) )

-- -----------------------------------------------------------------------------
-- Unlinked BCOs

-- CompiledByteCode represents the result of byte-code 
-- compiling a bunch of functions and data types

data CompiledByteCode 
  = ByteCode [UnlinkedBCO] -- Bunch of interpretable bindings
	     ItblEnv       -- A mapping from DataCons to their itbls

instance Outputable CompiledByteCode where
  ppr (ByteCode bcos _) = ppr bcos


data UnlinkedBCO
   = UnlinkedBCO {
	unlinkedBCOName   :: Name,
	unlinkedBCOArity  :: Int,
	unlinkedBCOInstrs :: ByteArray#,			 -- insns
	unlinkedBCOBitmap :: ByteArray#,			 -- bitmap
        unlinkedBCOLits   :: (SizedSeq (Either Word FastString)), -- literals
			-- Either literal words or a pointer to a asciiz
			-- string, denoting a label whose *address* should
			-- be determined at link time
        unlinkedBCOPtrs   :: (SizedSeq BCOPtr), 	-- ptrs
 	unlinkedBCOItbls  :: (SizedSeq Name)	 	-- itbl refs
   }

data BCOPtr
  = BCOPtrName   Name
  | BCOPtrPrimOp PrimOp
  | BCOPtrBCO    UnlinkedBCO

-- | Finds external references.  Remember to remove the names
-- defined by this group of BCOs themselves
bcoFreeNames :: UnlinkedBCO -> NameSet
bcoFreeNames bco
  = bco_refs bco `minusNameSet` mkNameSet [unlinkedBCOName bco]
  where
    bco_refs (UnlinkedBCO _ _ _ _ _ ptrs itbls)
	= unionManyNameSets (
	     mkNameSet [ n | BCOPtrName n <- ssElts ptrs ] :
	     mkNameSet (ssElts itbls) :
	     map bco_refs [ bco | BCOPtrBCO bco <- ssElts ptrs ]
	  )

instance Outputable UnlinkedBCO where
   ppr (UnlinkedBCO nm arity insns bitmap lits ptrs itbls)
      = sep [text "BCO", ppr nm, text "with", 
             int (sizeSS lits), text "lits",
             int (sizeSS ptrs), text "ptrs",
             int (sizeSS itbls), text "itbls"]

-- -----------------------------------------------------------------------------
-- The bytecode assembler

-- The object format for bytecodes is: 16 bits for the opcode, and 16
-- for each field -- so the code can be considered a sequence of
-- 16-bit ints.  Each field denotes either a stack offset or number of
-- items on the stack (eg SLIDE), and index into the pointer table (eg
-- PUSH_G), an index into the literal table (eg PUSH_I/D/L), or a
-- bytecode address in this BCO.

-- Top level assembler fn.
assembleBCOs :: [ProtoBCO Name] -> [TyCon] -> IO CompiledByteCode
assembleBCOs proto_bcos tycons
  = do	itblenv <- mkITbls tycons
	bcos    <- mapM assembleBCO proto_bcos
        return (ByteCode bcos itblenv)

assembleBCO :: ProtoBCO Name -> IO UnlinkedBCO
assembleBCO (ProtoBCO nm instrs bitmap bsize arity origin malloced)
   = let
         -- pass 1: collect up the offsets of the local labels.
         -- Remember that the first insn starts at offset 1 since offset 0
         -- (eventually) will hold the total # of insns.
         label_env = mkLabelEnv emptyFM 1 instrs

         mkLabelEnv env i_offset [] = env
         mkLabelEnv env i_offset (i:is)
            = let new_env 
                     = case i of LABEL n -> addToFM env n i_offset ; _ -> env
              in  mkLabelEnv new_env (i_offset + instrSize16s i) is

         findLabel lab
            = case lookupFM label_env lab of
                 Just bco_offset -> bco_offset
                 Nothing -> pprPanic "assembleBCO.findLabel" (int lab)
     in
     do  -- pass 2: generate the instruction, ptr and nonptr bits
         insns <- return emptySS :: IO (SizedSeq Word16)
         lits  <- return emptySS :: IO (SizedSeq (Either Word FastString))
         ptrs  <- return emptySS :: IO (SizedSeq BCOPtr)
         itbls <- return emptySS :: IO (SizedSeq Name)
         let init_asm_state = (insns,lits,ptrs,itbls)
         (final_insns, final_lits, final_ptrs, final_itbls) 
            <- mkBits findLabel init_asm_state instrs

	 let asm_insns = ssElts final_insns
	     n_insns   = sizeSS final_insns

             insns_arr
		 | n_insns > 65535 = panic "linkBCO: >= 64k insns in BCO"
                 | otherwise = mkInstrArray n_insns asm_insns
             insns_barr = case insns_arr of UArray _lo _hi barr -> barr

	     bitmap_arr = mkBitmapArray bsize bitmap
             bitmap_barr = case bitmap_arr of UArray _lo _hi barr -> barr

         let ul_bco = UnlinkedBCO nm arity insns_barr bitmap_barr final_lits 
					final_ptrs final_itbls

         -- 8 Aug 01: Finalisers aren't safe when attached to non-primitive
         -- objects, since they might get run too early.  Disable this until
         -- we figure out what to do.
         -- when (notNull malloced) (addFinalizer ul_bco (mapM_ zonk malloced))

         return ul_bco
     where
         zonk ptr = do -- putStrLn ("freeing malloc'd block at " ++ show (A# a#))
                           free ptr

mkBitmapArray :: Int -> [StgWord] -> UArray Int StgWord
mkBitmapArray bsize bitmap
  = listArray (0, length bitmap) (fromIntegral bsize : bitmap)

mkInstrArray :: Int -> [Word16]	-> UArray Int Word16
mkInstrArray n_insns asm_insns
  = listArray (0, n_insns) (fromIntegral n_insns : asm_insns)

-- instrs nonptrs ptrs itbls
type AsmState = (SizedSeq Word16, 
                 SizedSeq (Either Word FastString),
                 SizedSeq BCOPtr, 
                 SizedSeq Name)

data SizedSeq a = SizedSeq !Int [a]
emptySS = SizedSeq 0 []

-- Why are these two monadic???
addToSS (SizedSeq n r_xs) x = return (SizedSeq (n+1) (x:r_xs))
addListToSS (SizedSeq n r_xs) xs 
   = return (SizedSeq (n + length xs) (reverse xs ++ r_xs))

ssElts :: SizedSeq a -> [a]
ssElts (SizedSeq n r_xs) = reverse r_xs

sizeSS :: SizedSeq a -> Int
sizeSS (SizedSeq n r_xs) = n

-- Bring in all the bci_ bytecode constants.
#include "Bytecodes.h"

-- This is where all the action is (pass 2 of the assembler)
mkBits :: (Int -> Int) 			-- label finder
       -> AsmState
       -> [BCInstr]			-- instructions (in)
       -> IO AsmState

mkBits findLabel st proto_insns
  = foldM doInstr st proto_insns
    where
       doInstr :: AsmState -> BCInstr -> IO AsmState
       doInstr st i
          = case i of
               STKCHECK  n        -> instr2 st bci_STKCHECK n
               PUSH_L    o1       -> instr2 st bci_PUSH_L o1
               PUSH_LL   o1 o2    -> instr3 st bci_PUSH_LL o1 o2
               PUSH_LLL  o1 o2 o3 -> instr4 st bci_PUSH_LLL o1 o2 o3
               PUSH_G    nm       -> do (p, st2) <- ptr st (BCOPtrName nm)
                                        instr2 st2 bci_PUSH_G p
               PUSH_PRIMOP op     -> do (p, st2) <- ptr st (BCOPtrPrimOp op)
                                        instr2 st2 bci_PUSH_G p
               PUSH_BCO proto     -> do ul_bco <- assembleBCO proto
 					(p, st2) <- ptr st (BCOPtrBCO ul_bco)
                                        instr2 st2 bci_PUSH_G p
               PUSH_ALTS proto    -> do ul_bco <- assembleBCO proto
 					(p, st2) <- ptr st (BCOPtrBCO ul_bco)
                                        instr2 st2 bci_PUSH_ALTS p
               PUSH_ALTS_UNLIFTED proto pk -> do 
					ul_bco <- assembleBCO proto
 					(p, st2) <- ptr st (BCOPtrBCO ul_bco)
                                        instr2 st2 (push_alts pk) p
               PUSH_UBX  (Left lit) nws  
                                  -> do (np, st2) <- literal st lit
                                        instr3 st2 bci_PUSH_UBX np nws
               PUSH_UBX  (Right aa) nws  
                                  -> do (np, st2) <- addr st aa
                                        instr3 st2 bci_PUSH_UBX np nws

	       PUSH_APPLY_N	    -> do instr1 st bci_PUSH_APPLY_N
	       PUSH_APPLY_V	    -> do instr1 st bci_PUSH_APPLY_V
	       PUSH_APPLY_F	    -> do instr1 st bci_PUSH_APPLY_F
	       PUSH_APPLY_D	    -> do instr1 st bci_PUSH_APPLY_D
	       PUSH_APPLY_L	    -> do instr1 st bci_PUSH_APPLY_L
	       PUSH_APPLY_P	    -> do instr1 st bci_PUSH_APPLY_P
	       PUSH_APPLY_PP	    -> do instr1 st bci_PUSH_APPLY_PP
	       PUSH_APPLY_PPP	    -> do instr1 st bci_PUSH_APPLY_PPP
	       PUSH_APPLY_PPPP	    -> do instr1 st bci_PUSH_APPLY_PPPP
	       PUSH_APPLY_PPPPP	    -> do instr1 st bci_PUSH_APPLY_PPPPP
	       PUSH_APPLY_PPPPPP    -> do instr1 st bci_PUSH_APPLY_PPPPPP

               SLIDE     n by     -> instr3 st bci_SLIDE n by
               ALLOC_AP  n        -> instr2 st bci_ALLOC_AP n
               ALLOC_PAP arity n  -> instr3 st bci_ALLOC_PAP arity n
               MKAP      off sz   -> instr3 st bci_MKAP off sz
               MKPAP     off sz   -> instr3 st bci_MKPAP off sz
               UNPACK    n        -> instr2 st bci_UNPACK n
               PACK      dcon sz  -> do (itbl_no,st2) <- itbl st dcon
                                        instr3 st2 bci_PACK itbl_no sz
               LABEL     lab      -> return st
               TESTLT_I  i l      -> do (np, st2) <- int st i
                                        instr3 st2 bci_TESTLT_I np (findLabel l)
               TESTEQ_I  i l      -> do (np, st2) <- int st i
                                        instr3 st2 bci_TESTEQ_I np (findLabel l)
               TESTLT_F  f l      -> do (np, st2) <- float st f
                                        instr3 st2 bci_TESTLT_F np (findLabel l)
               TESTEQ_F  f l      -> do (np, st2) <- float st f
                                        instr3 st2 bci_TESTEQ_F np (findLabel l)
               TESTLT_D  d l      -> do (np, st2) <- double st d
                                        instr3 st2 bci_TESTLT_D np (findLabel l)
               TESTEQ_D  d l      -> do (np, st2) <- double st d
                                        instr3 st2 bci_TESTEQ_D np (findLabel l)
               TESTLT_P  i l      -> instr3 st bci_TESTLT_P i (findLabel l)
               TESTEQ_P  i l      -> instr3 st bci_TESTEQ_P i (findLabel l)
               CASEFAIL           -> instr1 st bci_CASEFAIL
               SWIZZLE   stkoff n -> instr3 st bci_SWIZZLE stkoff n
               JMP       l        -> instr2 st bci_JMP (findLabel l)
               ENTER              -> instr1 st bci_ENTER
               RETURN             -> instr1 st bci_RETURN
               RETURN_UBX rep     -> instr1 st (return_ubx rep)
               CCALL off m_addr   -> do (np, st2) <- addr st m_addr
                                        instr3 st2 bci_CCALL off np

       i2s :: Int -> Word16
       i2s = fromIntegral

       instr1 (st_i0,st_l0,st_p0,st_I0) i1
          = do st_i1 <- addToSS st_i0 i1
               return (st_i1,st_l0,st_p0,st_I0)

       instr2 (st_i0,st_l0,st_p0,st_I0) i1 i2
          = do st_i1 <- addToSS st_i0 (i2s i1)
               st_i2 <- addToSS st_i1 (i2s i2)
               return (st_i2,st_l0,st_p0,st_I0)

       instr3 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3
          = do st_i1 <- addToSS st_i0 (i2s i1)
               st_i2 <- addToSS st_i1 (i2s i2)
               st_i3 <- addToSS st_i2 (i2s i3)
               return (st_i3,st_l0,st_p0,st_I0)

       instr4 (st_i0,st_l0,st_p0,st_I0) i1 i2 i3 i4
          = do st_i1 <- addToSS st_i0 (i2s i1)
               st_i2 <- addToSS st_i1 (i2s i2)
               st_i3 <- addToSS st_i2 (i2s i3)
               st_i4 <- addToSS st_i3 (i2s i4)
               return (st_i4,st_l0,st_p0,st_I0)

       float (st_i0,st_l0,st_p0,st_I0) f
          = do let ws = mkLitF f
               st_l1 <- addListToSS st_l0 (map Left ws)
               return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))

       double (st_i0,st_l0,st_p0,st_I0) d
          = do let ws = mkLitD d
               st_l1 <- addListToSS st_l0 (map Left ws)
               return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))

       int (st_i0,st_l0,st_p0,st_I0) i
          = do let ws = mkLitI i
               st_l1 <- addListToSS st_l0 (map Left ws)
               return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))

       int64 (st_i0,st_l0,st_p0,st_I0) i
          = do let ws = mkLitI64 i
               st_l1 <- addListToSS st_l0 (map Left ws)
               return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))

       addr (st_i0,st_l0,st_p0,st_I0) a
          = do let ws = mkLitPtr a
               st_l1 <- addListToSS st_l0 (map Left ws)
               return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))

       litlabel (st_i0,st_l0,st_p0,st_I0) fs
          = do st_l1 <- addListToSS st_l0 [Right fs]
               return (sizeSS st_l0, (st_i0,st_l1,st_p0,st_I0))

       ptr (st_i0,st_l0,st_p0,st_I0) p
          = do st_p1 <- addToSS st_p0 p
               return (sizeSS st_p0, (st_i0,st_l0,st_p1,st_I0))

       itbl (st_i0,st_l0,st_p0,st_I0) dcon
          = do st_I1 <- addToSS st_I0 (getName dcon)
               return (sizeSS st_I0, (st_i0,st_l0,st_p0,st_I1))

       literal st (MachLabel fs _) = litlabel st fs
       literal st (MachWord w)     = int st (fromIntegral w)
       literal st (MachInt j)      = int st (fromIntegral j)
       literal st (MachFloat r)    = float st (fromRational r)
       literal st (MachDouble r)   = double st (fromRational r)
       literal st (MachChar c)     = int st (ord c)
       literal st (MachInt64 ii)   = int64 st (fromIntegral ii)
       literal st (MachWord64 ii)  = int64 st (fromIntegral ii)
       literal st other            = pprPanic "ByteCodeLink.literal" (ppr other)


push_alts NonPtrArg = bci_PUSH_ALTS_N
push_alts FloatArg  = bci_PUSH_ALTS_F
push_alts DoubleArg = bci_PUSH_ALTS_D
push_alts VoidArg   = bci_PUSH_ALTS_V
push_alts LongArg   = bci_PUSH_ALTS_L
push_alts PtrArg    = bci_PUSH_ALTS_P

return_ubx NonPtrArg = bci_RETURN_N
return_ubx FloatArg  = bci_RETURN_F
return_ubx DoubleArg = bci_RETURN_D
return_ubx VoidArg   = bci_RETURN_V
return_ubx LongArg   = bci_RETURN_L
return_ubx PtrArg    = bci_RETURN_P


-- The size in 16-bit entities of an instruction.
instrSize16s :: BCInstr -> Int
instrSize16s instr
   = case instr of
        STKCHECK{}		-> 2
        PUSH_L{}		-> 2
        PUSH_LL{}		-> 3
        PUSH_LLL{}		-> 4
        PUSH_G{}		-> 2
        PUSH_PRIMOP{}		-> 2
        PUSH_BCO{}		-> 2
        PUSH_ALTS{}		-> 2
        PUSH_ALTS_UNLIFTED{}	-> 2
	PUSH_UBX{}		-> 3
	PUSH_APPLY_N{}		-> 1
	PUSH_APPLY_V{}		-> 1
	PUSH_APPLY_F{}		-> 1
	PUSH_APPLY_D{}		-> 1
	PUSH_APPLY_L{}		-> 1
	PUSH_APPLY_P{}		-> 1
	PUSH_APPLY_PP{}		-> 1
	PUSH_APPLY_PPP{}	-> 1
	PUSH_APPLY_PPPP{}	-> 1
	PUSH_APPLY_PPPPP{}	-> 1
	PUSH_APPLY_PPPPPP{}	-> 1
        SLIDE{}			-> 3
        ALLOC_AP{}		-> 2
        ALLOC_PAP{}		-> 3
        MKAP{}			-> 3
        MKPAP{}			-> 3
        UNPACK{}		-> 2
        PACK{}			-> 3
        LABEL{}			-> 0	-- !!
        TESTLT_I{}		-> 3
        TESTEQ_I{}		-> 3
        TESTLT_F{}		-> 3
        TESTEQ_F{}		-> 3
        TESTLT_D{}		-> 3
        TESTEQ_D{}		-> 3
        TESTLT_P{}		-> 3
        TESTEQ_P{}		-> 3
        JMP{}			-> 2
        CASEFAIL{}		-> 1
        ENTER{}			-> 1
        RETURN{}		-> 1
        RETURN_UBX{}		-> 1
	CCALL{}			-> 3
        SWIZZLE{}		-> 3

-- Make lists of host-sized words for literals, so that when the
-- words are placed in memory at increasing addresses, the
-- bit pattern is correct for the host's word size and endianness.
mkLitI   :: Int    -> [Word]
mkLitF   :: Float  -> [Word]
mkLitD   :: Double -> [Word]
mkLitPtr :: Ptr () -> [Word]
mkLitI64 :: Int64  -> [Word]

mkLitF f
   = runST (do
        arr <- newArray_ ((0::Int),0)
        writeArray arr 0 f
        f_arr <- castSTUArray arr
        w0 <- readArray f_arr 0
        return [w0 :: Word]
     )

mkLitD d
   | wORD_SIZE == 4
   = runST (do
        arr <- newArray_ ((0::Int),1)
        writeArray arr 0 d
        d_arr <- castSTUArray arr
        w0 <- readArray d_arr 0
        w1 <- readArray d_arr 1
        return [w0 :: Word, w1]
     )
   | wORD_SIZE == 8
   = runST (do
        arr <- newArray_ ((0::Int),0)
        writeArray arr 0 d
        d_arr <- castSTUArray arr
        w0 <- readArray d_arr 0
        return [w0 :: Word]
     )

mkLitI64 ii
   | wORD_SIZE == 4
   = runST (do
        arr <- newArray_ ((0::Int),1)
        writeArray arr 0 ii
        d_arr <- castSTUArray arr
        w0 <- readArray d_arr 0
        w1 <- readArray d_arr 1
        return [w0 :: Word,w1]
     )
   | wORD_SIZE == 8
   = runST (do
        arr <- newArray_ ((0::Int),0)
        writeArray arr 0 ii
        d_arr <- castSTUArray arr
        w0 <- readArray d_arr 0
        return [w0 :: Word]
     )

mkLitI i
   = runST (do
        arr <- newArray_ ((0::Int),0)
        writeArray arr 0 i
        i_arr <- castSTUArray arr
        w0 <- readArray i_arr 0
        return [w0 :: Word]
     )

mkLitPtr a
   = runST (do
        arr <- newArray_ ((0::Int),0)
        writeArray arr 0 a
        a_arr <- castSTUArray arr
        w0 <- readArray a_arr 0
        return [w0 :: Word]
     )

iNTERP_STACK_CHECK_THRESH = (INTERP_STACK_CHECK_THRESH :: Int)
\end{code}