1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
|
%
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
%
% $Id: CgMonad.lhs,v 1.29 2001/08/31 12:39:06 rje Exp $
%
\section[CgMonad]{The code generation monad}
See the beginning of the top-level @CodeGen@ module, to see how this
monadic stuff fits into the Big Picture.
\begin{code}
module CgMonad (
Code, -- type
FCode, -- type
initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs,
returnFC, fixC, absC, nopC, getAbsC,
forkClosureBody, forkStatics, forkAlts, forkEval,
forkEvalHelp, forkAbsC,
SemiTaggingStuff,
EndOfBlockInfo(..),
setEndOfBlockInfo, getEndOfBlockInfo,
setSRTLabel, getSRTLabel,
setTickyCtrLabel, getTickyCtrLabel,
StackUsage, Slot(..), HeapUsage,
profCtrC, profCtrAbsC,
costCentresC, moduleName,
Sequel(..), -- ToDo: unabstract?
sequelToAmode,
-- ideally we wouldn't export these, but some other modules access internal state
getState, setState, getInfoDown,
-- more localised access to monad state
getUsage, setUsage,
getBinds, setBinds, getStaticBinds,
-- out of general friendliness, we also export ...
CgInfoDownwards(..), CgState(..), -- non-abstract
CompilationInfo(..)
) where
#include "HsVersions.h"
import {-# SOURCE #-} CgBindery ( CgBindings, nukeVolatileBinds )
import {-# SOURCE #-} CgUsages ( getSpRelOffset )
import AbsCSyn
import AbsCUtils ( mkAbsCStmts )
import CmdLineOpts ( opt_SccProfilingOn, opt_DoTickyProfiling )
import CLabel ( CLabel, mkUpdInfoLabel, mkTopTickyCtrLabel )
import Module ( Module )
import DataCon ( ConTag )
import Id ( Id )
import VarEnv
import PrimRep ( PrimRep(..) )
import Outputable
infixr 9 `thenC` -- Right-associative!
infixr 9 `thenFC`
\end{code}
%************************************************************************
%* *
\subsection[CgMonad-environment]{Stuff for manipulating environments}
%* *
%************************************************************************
This monadery has some information that it only passes {\em
downwards}, as well as some ``state'' which is modified as we go
along.
\begin{code}
data CgInfoDownwards -- information only passed *downwards* by the monad
= MkCgInfoDown
CompilationInfo -- COMPLETELY STATIC info about this compilation
-- (e.g., what flags were passed to the compiler)
CgBindings -- [Id -> info] : static environment
CLabel -- label of the current SRT
CLabel -- current destination for ticky counts
EndOfBlockInfo -- Info for stuff to do at end of basic block:
data CompilationInfo
= MkCompInfo
Module -- the module name
data CgState
= MkCgState
AbstractC -- code accumulated so far
CgBindings -- [Id -> info] : *local* bindings environment
-- Bindings for top-level things are given in the info-down part
CgStksAndHeapUsage
\end{code}
@EndOfBlockInfo@ tells what to do at the end of this block of code or,
if the expression is a @case@, what to do at the end of each
alternative.
\begin{code}
data EndOfBlockInfo
= EndOfBlockInfo
VirtualSpOffset -- Args Sp: trim the stack to this point at a
-- return; push arguments starting just
-- above this point on a tail call.
-- This is therefore the stk ptr as seen
-- by a case alternative.
Sequel
initEobInfo = EndOfBlockInfo 0 (OnStack 0)
\end{code}
Any addressing modes inside @Sequel@ must be ``robust,'' in the sense
that it must survive stack pointer adjustments at the end of the
block.
\begin{code}
data Sequel
= OnStack
VirtualSpOffset -- Continuation is on the stack, at the
-- specified location
| UpdateCode
| CaseAlts
CAddrMode -- Jump to this; if the continuation is for a vectored
-- case this might be the label of a return
-- vector Guaranteed to be a non-volatile
-- addressing mode (I think)
SemiTaggingStuff
| SeqFrame -- like CaseAlts but push a seq frame too.
CAddrMode
SemiTaggingStuff
type SemiTaggingStuff
= Maybe -- Maybe[1] we don't have any semi-tagging stuff...
([(ConTag, JoinDetails)], -- Alternatives
Maybe (Maybe Id, JoinDetails) -- Default (but Maybe[2] we don't have one)
-- Maybe[3] the default is a
-- bind-default (Just b); that is,
-- it expects a ptr to the thing
-- in Node, bound to b
)
type JoinDetails
= (AbstractC, CLabel) -- Code to load regs from heap object + profiling macros,
-- and join point label
-- The abstract C is executed only from a successful semitagging
-- venture, when a case has looked at a variable, found that it's
-- evaluated, and wants to load up the contents and go to the join
-- point.
-- DIRE WARNING.
-- The OnStack case of sequelToAmode delivers an Amode which is only
-- valid just before the final control transfer, because it assumes
-- that Sp is pointing to the top word of the return address. This
-- seems unclean but there you go.
-- sequelToAmode returns an amode which refers to an info table. The info
-- table will always be of the RET(_VEC)?_(BIG|SMALL) kind. We're careful
-- not to handle real code pointers, just in case we're compiling for
-- an unregisterised/untailcallish architecture, where info pointers and
-- code pointers aren't the same.
sequelToAmode :: Sequel -> FCode CAddrMode
sequelToAmode (OnStack virt_sp_offset)
= getSpRelOffset virt_sp_offset `thenFC` \ sp_rel ->
returnFC (CVal sp_rel RetRep)
sequelToAmode UpdateCode = returnFC (CLbl mkUpdInfoLabel RetRep)
sequelToAmode (CaseAlts amode _) = returnFC amode
sequelToAmode (SeqFrame _ _) = panic "sequelToAmode: SeqFrame"
type CgStksAndHeapUsage -- stacks and heap usage information
= (StackUsage, HeapUsage)
data Slot = Free | NonPointer
deriving
#ifdef DEBUG
(Eq,Show)
#else
Eq
#endif
type StackUsage =
(Int, -- virtSp: Virtual offset of topmost allocated slot
[(Int,Slot)], -- free: List of free slots, in increasing order
Int, -- realSp: Virtual offset of real stack pointer
Int) -- hwSp: Highest value ever taken by virtSp
type HeapUsage =
(HeapOffset, -- virtHp: Virtual offset of highest-allocated word
HeapOffset) -- realHp: Virtual offset of real heap ptr
\end{code}
NB: absolutely every one of the above Ints is really
a VirtualOffset of some description (the code generator
works entirely in terms of VirtualOffsets).
Initialisation.
\begin{code}
initialStateC = MkCgState AbsCNop emptyVarEnv initUsage
initUsage :: CgStksAndHeapUsage
initUsage = ((0,[],0,0), (0,0))
\end{code}
"envInitForAlternatives" initialises the environment for a case alternative,
assuming that the alternative is entered after an evaluation.
This involves:
- zapping any volatile bindings, which aren't valid.
- zapping the heap usage. It should be restored by a heap check.
- setting the virtual AND real stack pointer fields to the given
virtual stack offsets. this doesn't represent any {\em code}; it is a
prediction of where the real stack pointer will be when we come back
from the case analysis.
- BUT LEAVING the rest of the stack-usage info because it is all
valid. In particular, we leave the tail stack pointers unchanged,
becuase the alternative has to de-allocate the original @case@
expression's stack. \end{itemize}
@stateIncUsage@$~e_1~e_2$ incorporates in $e_1$ the stack and heap high water
marks found in $e_2$.
\begin{code}
stateIncUsage :: CgState -> CgState -> CgState
stateIncUsage (MkCgState abs_c bs ((v,f,r,h1),(vH1,rH1)))
(MkCgState _ _ ((_,_,_,h2),(vH2, _)))
= MkCgState abs_c
bs
((v,f,r,h1 `max` h2),
(vH1 `max` vH2, rH1))
\end{code}
%************************************************************************
%* *
\subsection[CgMonad-basics]{Basic code-generation monad magic}
%* *
%************************************************************************
\begin{code}
newtype FCode a = FCode (CgInfoDownwards -> CgState -> (a, CgState))
type Code = FCode ()
instance Monad FCode where
(>>=) = thenFC
return = returnFC
{-# INLINE thenC #-}
{-# INLINE thenFC #-}
{-# INLINE returnFC #-}
\end{code}
The Abstract~C is not in the environment so as to improve strictness.
\begin{code}
initC :: CompilationInfo -> Code -> AbstractC
initC cg_info (FCode code)
= case (code (MkCgInfoDown
cg_info
(error "initC: statics")
(error "initC: srt")
(mkTopTickyCtrLabel)
initEobInfo)
initialStateC) of
((),MkCgState abc _ _) -> abc
returnFC :: a -> FCode a
returnFC val = FCode (\info_down state -> (val, state))
\end{code}
\begin{code}
thenC :: Code -> FCode a -> FCode a
thenC (FCode m) (FCode k) =
FCode (\info_down state -> let (_,new_state) = m info_down state in
k info_down new_state)
listCs :: [Code] -> Code
listCs [] = return ()
listCs (fc:fcs) = do
fc
listCs fcs
mapCs :: (a -> Code) -> [a] -> Code
mapCs = mapM_
\end{code}
\begin{code}
thenFC :: FCode a -> (a -> FCode c) -> FCode c
thenFC (FCode m) k = FCode (
\info_down state ->
let
(m_result, new_state) = m info_down state
(FCode kcode) = k m_result
in
kcode info_down new_state
)
listFCs :: [FCode a] -> FCode [a]
listFCs = sequence
mapFCs :: (a -> FCode b) -> [a] -> FCode [b]
mapFCs = mapM
\end{code}
And the knot-tying combinator:
\begin{code}
fixC :: (a -> FCode a) -> FCode a
fixC fcode = FCode (
\info_down state ->
let
FCode fc = fcode v
result@(v,_) = fc info_down state
-- ^--------^
in
result
)
\end{code}
Operators for getting and setting the state and "info_down".
To maximise encapsulation, code should try to only get and set the
state it actually uses.
\begin{code}
getState :: FCode CgState
getState = FCode $ \info_down state -> (state,state)
setState :: CgState -> FCode ()
setState state = FCode $ \info_down _ -> ((),state)
getUsage :: FCode CgStksAndHeapUsage
getUsage = do
MkCgState absC binds usage <- getState
return usage
setUsage :: CgStksAndHeapUsage -> FCode ()
setUsage newusage = do
MkCgState absC binds usage <- getState
setState $ MkCgState absC binds newusage
getBinds :: FCode CgBindings
getBinds = do
MkCgState absC binds usage <- getState
return binds
setBinds :: CgBindings -> FCode ()
setBinds newbinds = do
MkCgState absC binds usage <- getState
setState $ MkCgState absC newbinds usage
getStaticBinds :: FCode CgBindings
getStaticBinds = do
(MkCgInfoDown _ static_binds _ _ _) <- getInfoDown
return static_binds
withState :: FCode a -> CgState -> FCode (a,CgState)
withState (FCode fcode) newstate = FCode $ \info_down state ->
let (retval, state2) = fcode info_down newstate in ((retval,state2), state)
getInfoDown :: FCode CgInfoDownwards
getInfoDown = FCode $ \info_down state -> (info_down,state)
withInfoDown :: FCode a -> CgInfoDownwards -> FCode a
withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state
doFCode :: FCode a -> CgInfoDownwards -> CgState -> (a,CgState)
doFCode (FCode fcode) info_down state = fcode info_down state
\end{code}
@forkClosureBody@ takes a code, $c$, and compiles it in a completely
fresh environment, except that:
- compilation info and statics are passed in unchanged.
The current environment is passed on completely unaltered, except that
abstract C from the fork is incorporated.
@forkAbsC@ takes a code and compiles it in the current environment,
returning the abstract C thus constructed. The current environment
is passed on completely unchanged. It is pretty similar to @getAbsC@,
except that the latter does affect the environment. ToDo: combine?
@forkStatics@ $fc$ compiles $fc$ in an environment whose statics come
from the current bindings, but which is otherwise freshly initialised.
The Abstract~C returned is attached to the current state, but the
bindings and usage information is otherwise unchanged.
\begin{code}
forkClosureBody :: Code -> Code
forkClosureBody (FCode code) = do
(MkCgInfoDown cg_info statics srt ticky _) <- getInfoDown
(MkCgState absC_in binds un_usage) <- getState
let body_info_down = MkCgInfoDown cg_info statics srt ticky initEobInfo
let ((),fork_state) = code body_info_down initialStateC
let MkCgState absC_fork _ _ = fork_state
setState $ MkCgState (AbsCStmts absC_in absC_fork) binds un_usage
forkStatics :: FCode a -> FCode a
forkStatics (FCode fcode) = FCode (
\(MkCgInfoDown cg_info _ srt ticky _)
(MkCgState absC_in statics un_usage)
->
let
(result, state) = fcode rhs_info_down initialStateC
MkCgState absC_fork _ _ = state -- Don't merge these this line with the one
-- above or it becomes too strict!
rhs_info_down = MkCgInfoDown cg_info statics srt ticky initEobInfo
in
(result, MkCgState (AbsCStmts absC_in absC_fork) statics un_usage)
)
forkAbsC :: Code -> FCode AbstractC
forkAbsC (FCode code) =
do
info_down <- getInfoDown
(MkCgState absC1 bs usage) <- getState
let ((),MkCgState absC2 _ ((_, _, _,h2), _)) = code info_down (MkCgState AbsCNop bs usage)
let ((v, f, r, h1), heap_usage) = usage
let new_usage = ((v, f, r, h1 `max` h2), heap_usage)
setState $ MkCgState absC1 bs new_usage
return absC2
\end{code}
@forkAlts@ $bs~d$ takes fcodes $bs$ for the branches of a @case@, and
an fcode for the default case $d$, and compiles each in the current
environment. The current environment is passed on unmodified, except
that
- the worst stack high-water mark is incorporated
- the virtual Hp is moved on to the worst virtual Hp for the branches
\begin{code}
forkAlts :: [FCode a] -> FCode b -> FCode ([a],b)
forkAlts branch_fcodes (FCode deflt_fcode) =
do
info_down <- getInfoDown
in_state <- getState
let compile (FCode fc) = fc info_down in_state
let (branch_results, branch_out_states) = unzip (map compile branch_fcodes)
let (deflt_result, deflt_out_state) = deflt_fcode info_down in_state
setState $ foldl stateIncUsage in_state (deflt_out_state:branch_out_states)
-- NB foldl. in_state is the *left* argument to stateIncUsage
return (branch_results, deflt_result)
\end{code}
@forkEval@ takes two blocks of code.
- The first meddles with the environment to set it up as expected by
the alternatives of a @case@ which does an eval (or gc-possible primop).
- The second block is the code for the alternatives.
(plus info for semi-tagging purposes)
@forkEval@ picks up the virtual stack pointer and returns a suitable
@EndOfBlockInfo@ for the caller to use, together with whatever value
is returned by the second block.
It uses @initEnvForAlternatives@ to initialise the environment, and
@stateIncUsageAlt@ to incorporate usage; the latter ignores the heap
usage.
\begin{code}
forkEval :: EndOfBlockInfo -- For the body
-> Code -- Code to set environment
-> FCode Sequel -- Semi-tagging info to store
-> FCode EndOfBlockInfo -- The new end of block info
forkEval body_eob_info env_code body_code
= forkEvalHelp body_eob_info env_code body_code `thenFC` \ (v, sequel) ->
returnFC (EndOfBlockInfo v sequel)
forkEvalHelp :: EndOfBlockInfo -- For the body
-> Code -- Code to set environment
-> FCode a -- The code to do after the eval
-> FCode (Int, -- Sp
a) -- Result of the FCode
forkEvalHelp body_eob_info env_code body_code =
do
info_down@(MkCgInfoDown cg_info statics srt ticky _) <- getInfoDown
state <- getState
let info_down_for_body = MkCgInfoDown cg_info statics srt ticky body_eob_info
let (_,MkCgState _ binds ((v,f,_,_),_)) =
doFCode env_code info_down_for_body state
let state_for_body = MkCgState AbsCNop
(nukeVolatileBinds binds)
((v,f,v,v), (0,0))
let (value_returned, state_at_end_return) =
doFCode body_code info_down_for_body state_for_body
setState $ state `stateIncUsageEval` state_at_end_return
return (v,value_returned)
stateIncUsageEval :: CgState -> CgState -> CgState
stateIncUsageEval (MkCgState absC1 bs ((v,f,r,h1),heap_usage))
(MkCgState absC2 _ ((_,_,_,h2), _))
= MkCgState (absC1 `mkAbsCStmts` absC2)
-- The AbsC coming back should consist only of nested declarations,
-- notably of the return vector!
bs
((v,f,r,h1 `max` h2), heap_usage)
-- We don't max the heap high-watermark because stateIncUsageEval is
-- used only in forkEval, which in turn is only used for blocks of code
-- which do their own heap-check.
\end{code}
%************************************************************************
%* *
\subsection[CgMonad-spitting-AbstractC]{Spitting out @AbstractC@}
%* *
%************************************************************************
@nopC@ is the no-op for the @Code@ monad; it adds no Abstract~C to the
environment; @absC@ glues @ab_C@ onto the Abstract~C collected so far.
\begin{code}
nopC :: Code
nopC = return ()
absC :: AbstractC -> Code
absC more_absC = do
state@(MkCgState absC binds usage) <- getState
setState $ MkCgState (mkAbsCStmts absC more_absC) binds usage
\end{code}
These two are just like @absC@, except they examine the compilation
info (whether SCC profiling or profiling-ctrs going) and possibly emit
nothing.
\begin{code}
costCentresC :: FAST_STRING -> [CAddrMode] -> Code
costCentresC macro args =
if opt_SccProfilingOn then do
(MkCgState absC binds usage) <- getState
setState $ MkCgState (mkAbsCStmts absC (CCallProfCCMacro macro args)) binds usage
else
nopC
profCtrC :: FAST_STRING -> [CAddrMode] -> Code
profCtrC macro args =
if not opt_DoTickyProfiling
then nopC
else do
(MkCgState absC binds usage) <- getState
setState $ MkCgState (mkAbsCStmts absC (CCallProfCtrMacro macro args)) binds usage
profCtrAbsC :: FAST_STRING -> [CAddrMode] -> AbstractC
profCtrAbsC macro args
= if not opt_DoTickyProfiling
then AbsCNop
else CCallProfCtrMacro macro args
{- Try to avoid adding too many special compilation strategies here.
It's better to modify the header files as necessary for particular
targets, so that we can get away with as few variants of .hc files
as possible.
-}
\end{code}
@getAbsC@ compiles the code in the current environment, and returns
the abstract C thus constructed (leaving the abstract C being carried
around in the state untouched). @getAbsC@ does not generate any
in-line Abstract~C itself, but the environment it returns is that
obtained from the compilation.
\begin{code}
getAbsC :: Code -> FCode AbstractC
getAbsC code = do
MkCgState absC binds usage <- getState
((),MkCgState absC2 binds2 usage2) <- withState code (MkCgState AbsCNop binds usage)
setState $ MkCgState absC binds2 usage2
return absC2
\end{code}
\begin{code}
moduleName :: FCode Module
moduleName = do
(MkCgInfoDown (MkCompInfo mod_name) _ _ _ _) <- getInfoDown
return mod_name
\end{code}
\begin{code}
setEndOfBlockInfo :: EndOfBlockInfo -> Code -> Code
setEndOfBlockInfo eob_info code = do
(MkCgInfoDown c_info statics srt ticky _) <- getInfoDown
withInfoDown code (MkCgInfoDown c_info statics srt ticky eob_info)
getEndOfBlockInfo :: FCode EndOfBlockInfo
getEndOfBlockInfo = do
(MkCgInfoDown c_info statics _ _ eob_info) <- getInfoDown
return eob_info
\end{code}
\begin{code}
getSRTLabel :: FCode CLabel
getSRTLabel = do
(MkCgInfoDown _ _ srt _ _) <- getInfoDown
return srt
setSRTLabel :: CLabel -> Code -> Code
setSRTLabel srt code = do
(MkCgInfoDown c_info statics _ ticky eob_info) <- getInfoDown
withInfoDown code (MkCgInfoDown c_info statics srt ticky eob_info)
\end{code}
\begin{code}
getTickyCtrLabel :: FCode CLabel
getTickyCtrLabel = do
(MkCgInfoDown _ _ _ ticky _) <- getInfoDown
return ticky
setTickyCtrLabel :: CLabel -> Code -> Code
setTickyCtrLabel ticky code = do
(MkCgInfoDown c_info statics srt _ eob_info) <- getInfoDown
withInfoDown code (MkCgInfoDown c_info statics srt ticky eob_info)
\end{code}
|