summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/Bitmap.hs79
-rw-r--r--compiler/codeGen/CgBindery.hi-boot-57
-rw-r--r--compiler/codeGen/CgBindery.hi-boot-68
-rw-r--r--compiler/codeGen/CgBindery.lhs494
-rw-r--r--compiler/codeGen/CgBindery.lhs-boot11
-rw-r--r--compiler/codeGen/CgCallConv.hs512
-rw-r--r--compiler/codeGen/CgCase.lhs634
-rw-r--r--compiler/codeGen/CgClosure.lhs599
-rw-r--r--compiler/codeGen/CgCon.lhs457
-rw-r--r--compiler/codeGen/CgExpr.hi-boot-53
-rw-r--r--compiler/codeGen/CgExpr.hi-boot-63
-rw-r--r--compiler/codeGen/CgExpr.lhs454
-rw-r--r--compiler/codeGen/CgExpr.lhs-boot7
-rw-r--r--compiler/codeGen/CgForeignCall.hs256
-rw-r--r--compiler/codeGen/CgHeapery.lhs588
-rw-r--r--compiler/codeGen/CgInfoTbls.hs591
-rw-r--r--compiler/codeGen/CgLetNoEscape.lhs212
-rw-r--r--compiler/codeGen/CgMonad.lhs853
-rw-r--r--compiler/codeGen/CgParallel.hs90
-rw-r--r--compiler/codeGen/CgPrimOp.hs584
-rw-r--r--compiler/codeGen/CgProf.hs478
-rw-r--r--compiler/codeGen/CgStackery.lhs339
-rw-r--r--compiler/codeGen/CgTailCall.lhs455
-rw-r--r--compiler/codeGen/CgTicky.hs370
-rw-r--r--compiler/codeGen/CgUsages.hi-boot-53
-rw-r--r--compiler/codeGen/CgUsages.hi-boot-63
-rw-r--r--compiler/codeGen/CgUtils.hs688
-rw-r--r--compiler/codeGen/ClosureInfo.hi-boot-54
-rw-r--r--compiler/codeGen/ClosureInfo.hi-boot-64
-rw-r--r--compiler/codeGen/ClosureInfo.lhs951
-rw-r--r--compiler/codeGen/ClosureInfo.lhs-boot6
-rw-r--r--compiler/codeGen/CodeGen.lhs343
-rw-r--r--compiler/codeGen/SMRep.lhs361
33 files changed, 10447 insertions, 0 deletions
diff --git a/compiler/codeGen/Bitmap.hs b/compiler/codeGen/Bitmap.hs
new file mode 100644
index 0000000000..c0b490978c
--- /dev/null
+++ b/compiler/codeGen/Bitmap.hs
@@ -0,0 +1,79 @@
+--
+-- (c) The University of Glasgow 2003
+--
+
+-- Functions for constructing bitmaps, which are used in various
+-- places in generated code (stack frame liveness masks, function
+-- argument liveness masks, SRT bitmaps).
+
+module Bitmap (
+ Bitmap, mkBitmap,
+ intsToBitmap, intsToReverseBitmap,
+ mAX_SMALL_BITMAP_SIZE
+ ) where
+
+#include "HsVersions.h"
+#include "../includes/MachDeps.h"
+
+import SMRep
+import Constants
+import DATA_BITS
+
+{-|
+A bitmap represented by a sequence of 'StgWord's on the /target/
+architecture. These are used for bitmaps in info tables and other
+generated code which need to be emitted as sequences of StgWords.
+-}
+type Bitmap = [StgWord]
+
+-- | Make a bitmap from a sequence of bits
+mkBitmap :: [Bool] -> Bitmap
+mkBitmap [] = []
+mkBitmap stuff = chunkToBitmap chunk : mkBitmap rest
+ where (chunk, rest) = splitAt wORD_SIZE_IN_BITS stuff
+
+chunkToBitmap :: [Bool] -> StgWord
+chunkToBitmap chunk =
+ foldr (.|.) 0 [ 1 `shiftL` n | (True,n) <- zip chunk [0..] ]
+
+-- | Make a bitmap where the slots specified are the /ones/ in the bitmap.
+-- eg. @[1,2,4], size 4 ==> 0xb@.
+--
+-- The list of @Int@s /must/ be already sorted.
+intsToBitmap :: Int -> [Int] -> Bitmap
+intsToBitmap size slots{- must be sorted -}
+ | size <= 0 = []
+ | otherwise =
+ (foldr (.|.) 0 (map (1 `shiftL`) these)) :
+ intsToBitmap (size - wORD_SIZE_IN_BITS)
+ (map (\x -> x - wORD_SIZE_IN_BITS) rest)
+ where (these,rest) = span (<wORD_SIZE_IN_BITS) slots
+
+-- | Make a bitmap where the slots specified are the /zeros/ in the bitmap.
+-- eg. @[1,2,4], size 4 ==> 0x8@ (we leave any bits outside the size as zero,
+-- just to make the bitmap easier to read).
+--
+-- The list of @Int@s /must/ be already sorted.
+intsToReverseBitmap :: Int -> [Int] -> Bitmap
+intsToReverseBitmap size slots{- must be sorted -}
+ | size <= 0 = []
+ | otherwise =
+ (foldr xor init (map (1 `shiftL`) these)) :
+ intsToReverseBitmap (size - wORD_SIZE_IN_BITS)
+ (map (\x -> x - wORD_SIZE_IN_BITS) rest)
+ where (these,rest) = span (<wORD_SIZE_IN_BITS) slots
+ init
+ | size >= wORD_SIZE_IN_BITS = complement 0
+ | otherwise = (1 `shiftL` size) - 1
+
+{- |
+Magic number, must agree with @BITMAP_BITS_SHIFT@ in InfoTables.h.
+Some kinds of bitmap pack a size\/bitmap into a single word if
+possible, or fall back to an external pointer when the bitmap is too
+large. This value represents the largest size of bitmap that can be
+packed into a single word.
+-}
+mAX_SMALL_BITMAP_SIZE :: Int
+mAX_SMALL_BITMAP_SIZE | wORD_SIZE == 4 = 27
+ | otherwise = 58
+
diff --git a/compiler/codeGen/CgBindery.hi-boot-5 b/compiler/codeGen/CgBindery.hi-boot-5
new file mode 100644
index 0000000000..f375fcc6e1
--- /dev/null
+++ b/compiler/codeGen/CgBindery.hi-boot-5
@@ -0,0 +1,7 @@
+__interface CgBindery 1 0 where
+__export CgBindery CgBindings CgIdInfo VolatileLoc StableLoc nukeVolatileBinds;
+1 type CgBindings = VarEnv.IdEnv CgIdInfo;
+1 data CgIdInfo;
+1 data VolatileLoc;
+1 data StableLoc;
+1 nukeVolatileBinds :: CgBindings -> CgBindings ;
diff --git a/compiler/codeGen/CgBindery.hi-boot-6 b/compiler/codeGen/CgBindery.hi-boot-6
new file mode 100644
index 0000000000..7d1f300a86
--- /dev/null
+++ b/compiler/codeGen/CgBindery.hi-boot-6
@@ -0,0 +1,8 @@
+module CgBindery where
+
+type CgBindings = VarEnv.IdEnv CgIdInfo
+data CgIdInfo
+data VolatileLoc
+data StableLoc
+
+nukeVolatileBinds :: CgBindings -> CgBindings
diff --git a/compiler/codeGen/CgBindery.lhs b/compiler/codeGen/CgBindery.lhs
new file mode 100644
index 0000000000..f78edda655
--- /dev/null
+++ b/compiler/codeGen/CgBindery.lhs
@@ -0,0 +1,494 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+\section[CgBindery]{Utility functions related to doing @CgBindings@}
+
+\begin{code}
+module CgBindery (
+ CgBindings, CgIdInfo,
+ StableLoc, VolatileLoc,
+
+ cgIdInfoId, cgIdInfoArgRep, cgIdInfoLF,
+
+ stableIdInfo, heapIdInfo,
+ letNoEscapeIdInfo, idInfoToAmode,
+
+ addBindC, addBindsC,
+
+ nukeVolatileBinds,
+ nukeDeadBindings,
+ getLiveStackSlots,
+
+ bindArgsToStack, rebindToStack,
+ bindNewToNode, bindNewToReg, bindArgsToRegs,
+ bindNewToTemp,
+ getArgAmode, getArgAmodes,
+ getCgIdInfo,
+ getCAddrModeIfVolatile, getVolatileRegs,
+ maybeLetNoEscape,
+ ) where
+
+#include "HsVersions.h"
+
+import CgMonad
+import CgHeapery ( getHpRelOffset )
+import CgStackery ( freeStackSlots, getSpRelOffset )
+import CgUtils ( cgLit, cmmOffsetW )
+import CLabel ( mkClosureLabel, pprCLabel )
+import ClosureInfo ( mkLFImported, mkLFArgument, LambdaFormInfo )
+
+import Cmm
+import PprCmm ( {- instance Outputable -} )
+import SMRep ( CgRep(..), WordOff, isFollowableArg,
+ isVoidArg, cgRepSizeW, argMachRep,
+ idCgRep, typeCgRep )
+import Id ( Id, idName )
+import VarEnv
+import VarSet ( varSetElems )
+import Literal ( literalType )
+import Maybes ( catMaybes )
+import Name ( isExternalName )
+import StgSyn ( StgArg, StgLiveVars, GenStgArg(..), isStgTypeArg )
+import Unique ( Uniquable(..) )
+import UniqSet ( elementOfUniqSet )
+import Outputable
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[Bindery-datatypes]{Data types}
+%* *
+%************************************************************************
+
+@(CgBinding a b)@ is a type of finite maps from a to b.
+
+The assumption used to be that @lookupCgBind@ must get exactly one
+match. This is {\em completely wrong} in the case of compiling
+letrecs (where knot-tying is used). An initial binding is fed in (and
+never evaluated); eventually, a correct binding is put into the
+environment. So there can be two bindings for a given name.
+
+\begin{code}
+type CgBindings = IdEnv CgIdInfo
+
+data CgIdInfo
+ = CgIdInfo
+ { cg_id :: Id -- Id that this is the info for
+ -- Can differ from the Id at occurrence sites by
+ -- virtue of being externalised, for splittable C
+ , cg_rep :: CgRep
+ , cg_vol :: VolatileLoc
+ , cg_stb :: StableLoc
+ , cg_lf :: LambdaFormInfo }
+
+mkCgIdInfo id vol stb lf
+ = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb,
+ cg_lf = lf, cg_rep = idCgRep id }
+
+voidIdInfo id = CgIdInfo { cg_id = id, cg_vol = NoVolatileLoc
+ , cg_stb = VoidLoc, cg_lf = mkLFArgument id
+ , cg_rep = VoidArg }
+ -- Used just for VoidRep things
+
+data VolatileLoc -- These locations die across a call
+ = NoVolatileLoc
+ | RegLoc CmmReg -- In one of the registers (global or local)
+ | VirHpLoc VirtualHpOffset -- Hp+offset (address of closure)
+ | VirNodeLoc VirtualHpOffset -- Cts of offset indirect from Node
+ -- ie *(Node+offset)
+\end{code}
+
+@StableLoc@ encodes where an Id can be found, used by
+the @CgBindings@ environment in @CgBindery@.
+
+\begin{code}
+data StableLoc
+ = NoStableLoc
+
+ | VirStkLoc VirtualSpOffset -- The thing is held in this
+ -- stack slot
+
+ | VirStkLNE VirtualSpOffset -- A let-no-escape thing; the
+ -- value is this stack pointer
+ -- (as opposed to the contents of the slot)
+
+ | StableLoc CmmExpr
+ | VoidLoc -- Used only for VoidRep variables. They never need to
+ -- be saved, so it makes sense to treat treat them as
+ -- having a stable location
+\end{code}
+
+\begin{code}
+instance Outputable CgIdInfo where
+ ppr (CgIdInfo id rep vol stb lf)
+ = ppr id <+> ptext SLIT("-->") <+> vcat [ppr vol, ppr stb]
+
+instance Outputable VolatileLoc where
+ ppr NoVolatileLoc = empty
+ ppr (RegLoc r) = ptext SLIT("reg") <+> ppr r
+ ppr (VirHpLoc v) = ptext SLIT("vh") <+> ppr v
+ ppr (VirNodeLoc v) = ptext SLIT("vn") <+> ppr v
+
+instance Outputable StableLoc where
+ ppr NoStableLoc = empty
+ ppr VoidLoc = ptext SLIT("void")
+ ppr (VirStkLoc v) = ptext SLIT("vs") <+> ppr v
+ ppr (VirStkLNE v) = ptext SLIT("lne") <+> ppr v
+ ppr (StableLoc a) = ptext SLIT("amode") <+> ppr a
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[Bindery-idInfo]{Manipulating IdInfo}
+%* *
+%************************************************************************
+
+\begin{code}
+stableIdInfo id amode lf_info = mkCgIdInfo id NoVolatileLoc (StableLoc amode) lf_info
+heapIdInfo id offset lf_info = mkCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info
+letNoEscapeIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLNE sp) lf_info
+stackIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLoc sp) lf_info
+nodeIdInfo id offset lf_info = mkCgIdInfo id (VirNodeLoc offset) NoStableLoc lf_info
+regIdInfo id reg lf_info = mkCgIdInfo id (RegLoc reg) NoStableLoc lf_info
+
+idInfoToAmode :: CgIdInfo -> FCode CmmExpr
+idInfoToAmode info
+ = case cg_vol info of {
+ RegLoc reg -> returnFC (CmmReg reg) ;
+ VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetW (CmmReg nodeReg) nd_off) mach_rep) ;
+ VirHpLoc hp_off -> getHpRelOffset hp_off ;
+ NoVolatileLoc ->
+
+ case cg_stb info of
+ StableLoc amode -> returnFC amode
+ VirStkLoc sp_off -> do { sp_rel <- getSpRelOffset sp_off
+ ; return (CmmLoad sp_rel mach_rep) }
+
+ VirStkLNE sp_off -> getSpRelOffset sp_off
+
+ VoidLoc -> return $ pprPanic "idInfoToAmode: void" (ppr (cg_id info))
+ -- We return a 'bottom' amode, rather than panicing now
+ -- In this way getArgAmode returns a pair of (VoidArg, bottom)
+ -- and that's exactly what we want
+
+ NoStableLoc -> pprPanic "idInfoToAmode: no loc" (ppr (cg_id info))
+ }
+ where
+ mach_rep = argMachRep (cg_rep info)
+
+cgIdInfoId :: CgIdInfo -> Id
+cgIdInfoId = cg_id
+
+cgIdInfoLF :: CgIdInfo -> LambdaFormInfo
+cgIdInfoLF = cg_lf
+
+cgIdInfoArgRep :: CgIdInfo -> CgRep
+cgIdInfoArgRep = cg_rep
+
+maybeLetNoEscape (CgIdInfo { cg_stb = VirStkLNE sp_off }) = Just sp_off
+maybeLetNoEscape other = Nothing
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[CgMonad-bindery]{Monad things for fiddling with @CgBindings@}
+%* *
+%************************************************************************
+
+.There are three basic routines, for adding (@addBindC@), modifying
+(@modifyBindC@) and looking up (@getCgIdInfo@) bindings.
+
+A @Id@ is bound to a @(VolatileLoc, StableLoc)@ triple.
+The name should not already be bound. (nice ASSERT, eh?)
+
+\begin{code}
+addBindC :: Id -> CgIdInfo -> Code
+addBindC name stuff_to_bind = do
+ binds <- getBinds
+ setBinds $ extendVarEnv binds name stuff_to_bind
+
+addBindsC :: [(Id, CgIdInfo)] -> Code
+addBindsC new_bindings = do
+ binds <- getBinds
+ let new_binds = foldl (\ binds (name,info) -> extendVarEnv binds name info)
+ binds
+ new_bindings
+ setBinds new_binds
+
+modifyBindC :: Id -> (CgIdInfo -> CgIdInfo) -> Code
+modifyBindC name mangle_fn = do
+ binds <- getBinds
+ setBinds $ modifyVarEnv mangle_fn binds name
+
+getCgIdInfo :: Id -> FCode CgIdInfo
+getCgIdInfo id
+ = do { -- Try local bindings first
+ ; local_binds <- getBinds
+ ; case lookupVarEnv local_binds id of {
+ Just info -> return info ;
+ Nothing -> do
+
+ { -- Try top-level bindings
+ static_binds <- getStaticBinds
+ ; case lookupVarEnv static_binds id of {
+ Just info -> return info ;
+ Nothing ->
+
+ -- Should be imported; make up a CgIdInfo for it
+ let
+ name = idName id
+ in
+ if isExternalName name then do
+ hmods <- getHomeModules
+ let ext_lbl = CmmLit (CmmLabel (mkClosureLabel hmods name))
+ return (stableIdInfo id ext_lbl (mkLFImported id))
+ else
+ if isVoidArg (idCgRep id) then
+ -- Void things are never in the environment
+ return (voidIdInfo id)
+ else
+ -- Bug
+ cgLookupPanic id
+ }}}}
+
+
+cgLookupPanic :: Id -> FCode a
+cgLookupPanic id
+ = do static_binds <- getStaticBinds
+ local_binds <- getBinds
+ srt <- getSRTLabel
+ pprPanic "cgPanic"
+ (vcat [ppr id,
+ ptext SLIT("static binds for:"),
+ vcat [ ppr (cg_id info) | info <- varEnvElts static_binds ],
+ ptext SLIT("local binds for:"),
+ vcat [ ppr (cg_id info) | info <- varEnvElts local_binds ],
+ ptext SLIT("SRT label") <+> pprCLabel srt
+ ])
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[Bindery-nuke-volatile]{Nuking volatile bindings}
+%* *
+%************************************************************************
+
+We sometimes want to nuke all the volatile bindings; we must be sure
+we don't leave any (NoVolatile, NoStable) binds around...
+
+\begin{code}
+nukeVolatileBinds :: CgBindings -> CgBindings
+nukeVolatileBinds binds
+ = mkVarEnv (foldr keep_if_stable [] (varEnvElts binds))
+ where
+ keep_if_stable (CgIdInfo { cg_stb = NoStableLoc }) acc = acc
+ keep_if_stable info acc
+ = (cg_id info, info { cg_vol = NoVolatileLoc }) : acc
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[lookup-interface]{Interface functions to looking up bindings}
+%* *
+%************************************************************************
+
+\begin{code}
+getCAddrModeIfVolatile :: Id -> FCode (Maybe CmmExpr)
+getCAddrModeIfVolatile id
+ = do { info <- getCgIdInfo id
+ ; case cg_stb info of
+ NoStableLoc -> do -- Aha! So it is volatile!
+ amode <- idInfoToAmode info
+ return $ Just amode
+ a_stable_loc -> return Nothing }
+\end{code}
+
+@getVolatileRegs@ gets a set of live variables, and returns a list of
+all registers on which these variables depend. These are the regs
+which must be saved and restored across any C calls. If a variable is
+both in a volatile location (depending on a register) {\em and} a
+stable one (notably, on the stack), we modify the current bindings to
+forget the volatile one.
+
+\begin{code}
+getVolatileRegs :: StgLiveVars -> FCode [GlobalReg]
+
+getVolatileRegs vars = do
+ do { stuff <- mapFCs snaffle_it (varSetElems vars)
+ ; returnFC $ catMaybes stuff }
+ where
+ snaffle_it var = do
+ { info <- getCgIdInfo var
+ ; let
+ -- commoned-up code...
+ consider_reg reg
+ = -- We assume that all regs can die across C calls
+ -- We leave it to the save-macros to decide which
+ -- regs *really* need to be saved.
+ case cg_stb info of
+ NoStableLoc -> returnFC (Just reg) -- got one!
+ is_a_stable_loc -> do
+ { -- has both volatile & stable locations;
+ -- force it to rely on the stable location
+ modifyBindC var nuke_vol_bind
+ ; return Nothing }
+
+ ; case cg_vol info of
+ RegLoc (CmmGlobal reg) -> consider_reg reg
+ VirNodeLoc _ -> consider_reg node
+ other_loc -> returnFC Nothing -- Local registers
+ }
+
+ nuke_vol_bind info = info { cg_vol = NoVolatileLoc }
+\end{code}
+
+\begin{code}
+getArgAmode :: StgArg -> FCode (CgRep, CmmExpr)
+getArgAmode (StgVarArg var)
+ = do { info <- getCgIdInfo var
+ ; amode <- idInfoToAmode info
+ ; return (cgIdInfoArgRep info, amode ) }
+
+getArgAmode (StgLitArg lit)
+ = do { cmm_lit <- cgLit lit
+ ; return (typeCgRep (literalType lit), CmmLit cmm_lit) }
+
+getArgAmode (StgTypeArg _) = panic "getArgAmode: type arg"
+
+getArgAmodes :: [StgArg] -> FCode [(CgRep, CmmExpr)]
+getArgAmodes [] = returnFC []
+getArgAmodes (atom:atoms)
+ | isStgTypeArg atom = getArgAmodes atoms
+ | otherwise = do { amode <- getArgAmode atom
+ ; amodes <- getArgAmodes atoms
+ ; return ( amode : amodes ) }
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[binding-and-rebinding-interface]{Interface functions for binding and re-binding names}
+%* *
+%************************************************************************
+
+\begin{code}
+bindArgsToStack :: [(Id, VirtualSpOffset)] -> Code
+bindArgsToStack args
+ = mapCs bind args
+ where
+ bind(id, offset) = addBindC id (stackIdInfo id offset (mkLFArgument id))
+
+bindArgsToRegs :: [(Id, GlobalReg)] -> Code
+bindArgsToRegs args
+ = mapCs bind args
+ where
+ bind (arg, reg) = bindNewToReg arg (CmmGlobal reg) (mkLFArgument arg)
+
+bindNewToNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Code
+bindNewToNode id offset lf_info
+ = addBindC id (nodeIdInfo id offset lf_info)
+
+-- Create a new temporary whose unique is that in the id,
+-- bind the id to it, and return the addressing mode for the
+-- temporary.
+bindNewToTemp :: Id -> FCode CmmReg
+bindNewToTemp id
+ = do addBindC id (regIdInfo id temp_reg lf_info)
+ return temp_reg
+ where
+ uniq = getUnique id
+ temp_reg = CmmLocal (LocalReg uniq (argMachRep (idCgRep id)))
+ lf_info = mkLFArgument id -- Always used of things we
+ -- know nothing about
+
+bindNewToReg :: Id -> CmmReg -> LambdaFormInfo -> Code
+bindNewToReg name reg lf_info
+ = addBindC name info
+ where
+ info = mkCgIdInfo name (RegLoc reg) NoStableLoc lf_info
+\end{code}
+
+\begin{code}
+rebindToStack :: Id -> VirtualSpOffset -> Code
+rebindToStack name offset
+ = modifyBindC name replace_stable_fn
+ where
+ replace_stable_fn info = info { cg_stb = VirStkLoc offset }
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[CgMonad-deadslots]{Finding dead stack slots}
+%* *
+%************************************************************************
+
+nukeDeadBindings does the following:
+
+ - Removes all bindings from the environment other than those
+ for variables in the argument to nukeDeadBindings.
+ - Collects any stack slots so freed, and returns them to the stack free
+ list.
+ - Moves the virtual stack pointer to point to the topmost used
+ stack locations.
+
+You can have multi-word slots on the stack (where a Double# used to
+be, for instance); if dead, such a slot will be reported as *several*
+offsets (one per word).
+
+Probably *naughty* to look inside monad...
+
+\begin{code}
+nukeDeadBindings :: StgLiveVars -- All the *live* variables
+ -> Code
+nukeDeadBindings live_vars = do
+ binds <- getBinds
+ let (dead_stk_slots, bs') =
+ dead_slots live_vars
+ [] []
+ [ (cg_id b, b) | b <- varEnvElts binds ]
+ setBinds $ mkVarEnv bs'
+ freeStackSlots dead_stk_slots
+\end{code}
+
+Several boring auxiliary functions to do the dirty work.
+
+\begin{code}
+dead_slots :: StgLiveVars
+ -> [(Id,CgIdInfo)]
+ -> [VirtualSpOffset]
+ -> [(Id,CgIdInfo)]
+ -> ([VirtualSpOffset], [(Id,CgIdInfo)])
+
+-- dead_slots carries accumulating parameters for
+-- filtered bindings, dead slots
+dead_slots live_vars fbs ds []
+ = (ds, reverse fbs) -- Finished; rm the dups, if any
+
+dead_slots live_vars fbs ds ((v,i):bs)
+ | v `elementOfUniqSet` live_vars
+ = dead_slots live_vars ((v,i):fbs) ds bs
+ -- Live, so don't record it in dead slots
+ -- Instead keep it in the filtered bindings
+
+ | otherwise
+ = case cg_stb i of
+ VirStkLoc offset
+ | size > 0
+ -> dead_slots live_vars fbs ([offset-size+1 .. offset] ++ ds) bs
+
+ _ -> dead_slots live_vars fbs ds bs
+ where
+ size :: WordOff
+ size = cgRepSizeW (cg_rep i)
+\end{code}
+
+\begin{code}
+getLiveStackSlots :: FCode [VirtualSpOffset]
+-- Return the offsets of slots in stack containig live pointers
+getLiveStackSlots
+ = do { binds <- getBinds
+ ; return [off | CgIdInfo { cg_stb = VirStkLoc off,
+ cg_rep = rep } <- varEnvElts binds,
+ isFollowableArg rep] }
+\end{code}
diff --git a/compiler/codeGen/CgBindery.lhs-boot b/compiler/codeGen/CgBindery.lhs-boot
new file mode 100644
index 0000000000..e504a6a9ba
--- /dev/null
+++ b/compiler/codeGen/CgBindery.lhs-boot
@@ -0,0 +1,11 @@
+\begin{code}
+module CgBindery where
+import VarEnv( IdEnv )
+
+data CgIdInfo
+data VolatileLoc
+data StableLoc
+type CgBindings = IdEnv CgIdInfo
+
+nukeVolatileBinds :: CgBindings -> CgBindings
+\end{code} \ No newline at end of file
diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs
new file mode 100644
index 0000000000..f463255807
--- /dev/null
+++ b/compiler/codeGen/CgCallConv.hs
@@ -0,0 +1,512 @@
+-----------------------------------------------------------------------------
+--
+-- CgCallConv
+--
+-- The datatypes and functions here encapsulate the
+-- calling and return conventions used by the code generator.
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+
+module CgCallConv (
+ -- Argument descriptors
+ mkArgDescr, argDescrType,
+
+ -- Liveness
+ isBigLiveness, buildContLiveness, mkRegLiveness,
+ smallLiveness, mkLivenessCLit,
+
+ -- Register assignment
+ assignCallRegs, assignReturnRegs, assignPrimOpCallRegs,
+
+ -- Calls
+ constructSlowCall, slowArgs, slowCallPattern,
+
+ -- Returns
+ CtrlReturnConvention(..),
+ ctrlReturnConvAlg,
+ dataReturnConvPrim,
+ getSequelAmode
+ ) where
+
+#include "HsVersions.h"
+
+import CgUtils ( emitRODataLits, mkWordCLit )
+import CgMonad
+
+import Constants ( mAX_FAMILY_SIZE_FOR_VEC_RETURNS,
+ mAX_Vanilla_REG, mAX_Float_REG,
+ mAX_Double_REG, mAX_Long_REG,
+ mAX_Real_Vanilla_REG, mAX_Real_Float_REG,
+ mAX_Real_Double_REG, mAX_Real_Long_REG,
+ bITMAP_BITS_SHIFT
+ )
+
+import ClosureInfo ( ArgDescr(..), Liveness(..) )
+import CgStackery ( getSpRelOffset )
+import SMRep
+import MachOp ( wordRep )
+import Cmm ( CmmExpr(..), GlobalReg(..), CmmLit(..), CmmReg(..), node )
+import CmmUtils ( mkLblExpr )
+import CLabel
+import Maybes ( mapCatMaybes )
+import Id ( Id )
+import Name ( Name )
+import TyCon ( TyCon, tyConFamilySize )
+import Bitmap ( Bitmap, mAX_SMALL_BITMAP_SIZE,
+ mkBitmap, intsToReverseBitmap )
+import Util ( isn'tIn, sortLe )
+import StaticFlags ( opt_Unregisterised )
+import FastString ( LitString )
+import Outputable
+import DATA_BITS
+
+
+-------------------------------------------------------------------------
+--
+-- Making argument descriptors
+--
+-- An argument descriptor describes the layout of args on the stack,
+-- both for * GC (stack-layout) purposes, and
+-- * saving/restoring registers when a heap-check fails
+--
+-- Void arguments aren't important, therefore (contrast constructSlowCall)
+--
+-------------------------------------------------------------------------
+
+-- bring in ARG_P, ARG_N, etc.
+#include "../includes/StgFun.h"
+
+-------------------------
+argDescrType :: ArgDescr -> Int
+-- The "argument type" RTS field type
+argDescrType (ArgSpec n) = n
+argDescrType (ArgGen liveness)
+ | isBigLiveness liveness = ARG_GEN_BIG
+ | otherwise = ARG_GEN
+
+
+mkArgDescr :: Name -> [Id] -> FCode ArgDescr
+mkArgDescr nm args
+ = case stdPattern arg_reps of
+ Just spec_id -> return (ArgSpec spec_id)
+ Nothing -> do { liveness <- mkLiveness nm size bitmap
+ ; return (ArgGen liveness) }
+ where
+ arg_reps = filter nonVoidArg (map idCgRep args)
+ -- Getting rid of voids eases matching of standard patterns
+
+ bitmap = mkBitmap arg_bits
+ arg_bits = argBits arg_reps
+ size = length arg_bits
+
+argBits :: [CgRep] -> [Bool] -- True for non-ptr, False for ptr
+argBits [] = []
+argBits (PtrArg : args) = False : argBits args
+argBits (arg : args) = take (cgRepSizeW arg) (repeat True) ++ argBits args
+
+stdPattern :: [CgRep] -> Maybe Int
+stdPattern [] = Just ARG_NONE -- just void args, probably
+
+stdPattern [PtrArg] = Just ARG_P
+stdPattern [FloatArg] = Just ARG_F
+stdPattern [DoubleArg] = Just ARG_D
+stdPattern [LongArg] = Just ARG_L
+stdPattern [NonPtrArg] = Just ARG_N
+
+stdPattern [NonPtrArg,NonPtrArg] = Just ARG_NN
+stdPattern [NonPtrArg,PtrArg] = Just ARG_NP
+stdPattern [PtrArg,NonPtrArg] = Just ARG_PN
+stdPattern [PtrArg,PtrArg] = Just ARG_PP
+
+stdPattern [NonPtrArg,NonPtrArg,NonPtrArg] = Just ARG_NNN
+stdPattern [NonPtrArg,NonPtrArg,PtrArg] = Just ARG_NNP
+stdPattern [NonPtrArg,PtrArg,NonPtrArg] = Just ARG_NPN
+stdPattern [NonPtrArg,PtrArg,PtrArg] = Just ARG_NPP
+stdPattern [PtrArg,NonPtrArg,NonPtrArg] = Just ARG_PNN
+stdPattern [PtrArg,NonPtrArg,PtrArg] = Just ARG_PNP
+stdPattern [PtrArg,PtrArg,NonPtrArg] = Just ARG_PPN
+stdPattern [PtrArg,PtrArg,PtrArg] = Just ARG_PPP
+
+stdPattern [PtrArg,PtrArg,PtrArg,PtrArg] = Just ARG_PPPP
+stdPattern [PtrArg,PtrArg,PtrArg,PtrArg,PtrArg] = Just ARG_PPPPP
+stdPattern [PtrArg,PtrArg,PtrArg,PtrArg,PtrArg,PtrArg] = Just ARG_PPPPPP
+stdPattern other = Nothing
+
+
+-------------------------------------------------------------------------
+--
+-- Liveness info
+--
+-------------------------------------------------------------------------
+
+mkLiveness :: Name -> Int -> Bitmap -> FCode Liveness
+mkLiveness name size bits
+ | size > mAX_SMALL_BITMAP_SIZE -- Bitmap does not fit in one word
+ = do { let lbl = mkBitmapLabel name
+ ; emitRODataLits lbl ( mkWordCLit (fromIntegral size)
+ : map mkWordCLit bits)
+ ; return (BigLiveness lbl) }
+
+ | otherwise -- Bitmap fits in one word
+ = let
+ small_bits = case bits of
+ [] -> 0
+ [b] -> fromIntegral b
+ _ -> panic "livenessToAddrMode"
+ in
+ return (smallLiveness size small_bits)
+
+smallLiveness :: Int -> StgWord -> Liveness
+smallLiveness size small_bits = SmallLiveness bits
+ where bits = fromIntegral size .|. (small_bits `shiftL` bITMAP_BITS_SHIFT)
+
+-------------------
+isBigLiveness :: Liveness -> Bool
+isBigLiveness (BigLiveness _) = True
+isBigLiveness (SmallLiveness _) = False
+
+-------------------
+mkLivenessCLit :: Liveness -> CmmLit
+mkLivenessCLit (BigLiveness lbl) = CmmLabel lbl
+mkLivenessCLit (SmallLiveness bits) = mkWordCLit bits
+
+
+-------------------------------------------------------------------------
+--
+-- Bitmap describing register liveness
+-- across GC when doing a "generic" heap check
+-- (a RET_DYN stack frame).
+--
+-- NB. Must agree with these macros (currently in StgMacros.h):
+-- GET_NON_PTRS(), GET_PTRS(), GET_LIVENESS().
+-------------------------------------------------------------------------
+
+mkRegLiveness :: [(Id, GlobalReg)] -> Int -> Int -> StgWord
+mkRegLiveness regs ptrs nptrs
+ = (fromIntegral nptrs `shiftL` 16) .|.
+ (fromIntegral ptrs `shiftL` 24) .|.
+ all_non_ptrs `xor` reg_bits regs
+ where
+ all_non_ptrs = 0xff
+
+ reg_bits [] = 0
+ reg_bits ((id, VanillaReg i) : regs) | isFollowableArg (idCgRep id)
+ = (1 `shiftL` (i - 1)) .|. reg_bits regs
+ reg_bits (_ : regs)
+ = reg_bits regs
+
+-------------------------------------------------------------------------
+--
+-- Pushing the arguments for a slow call
+--
+-------------------------------------------------------------------------
+
+-- For a slow call, we must take a bunch of arguments and intersperse
+-- some stg_ap_<pattern>_ret_info return addresses.
+constructSlowCall
+ :: [(CgRep,CmmExpr)]
+ -> (CLabel, -- RTS entry point for call
+ [(CgRep,CmmExpr)], -- args to pass to the entry point
+ [(CgRep,CmmExpr)]) -- stuff to save on the stack
+
+ -- don't forget the zero case
+constructSlowCall []
+ = (mkRtsApFastLabel SLIT("stg_ap_0"), [], [])
+
+constructSlowCall amodes
+ = (stg_ap_pat, these, rest)
+ where
+ stg_ap_pat = mkRtsApFastLabel arg_pat
+ (arg_pat, these, rest) = matchSlowPattern amodes
+
+enterRtsRetLabel arg_pat
+ | tablesNextToCode = mkRtsRetInfoLabel arg_pat
+ | otherwise = mkRtsRetLabel arg_pat
+
+-- | 'slowArgs' takes a list of function arguments and prepares them for
+-- pushing on the stack for "extra" arguments to a function which requires
+-- fewer arguments than we currently have.
+slowArgs :: [(CgRep,CmmExpr)] -> [(CgRep,CmmExpr)]
+slowArgs [] = []
+slowArgs amodes = (NonPtrArg, mkLblExpr stg_ap_pat) : args ++ slowArgs rest
+ where (arg_pat, args, rest) = matchSlowPattern amodes
+ stg_ap_pat = mkRtsRetInfoLabel arg_pat
+
+matchSlowPattern :: [(CgRep,CmmExpr)]
+ -> (LitString, [(CgRep,CmmExpr)], [(CgRep,CmmExpr)])
+matchSlowPattern amodes = (arg_pat, these, rest)
+ where (arg_pat, n) = slowCallPattern (map fst amodes)
+ (these, rest) = splitAt n amodes
+
+-- These cases were found to cover about 99% of all slow calls:
+slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_pppppp"), 6)
+slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_ppppp"), 5)
+slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_pppp"), 4)
+slowCallPattern (PtrArg: PtrArg: PtrArg: VoidArg: _) = (SLIT("stg_ap_pppv"), 4)
+slowCallPattern (PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_ppp"), 3)
+slowCallPattern (PtrArg: PtrArg: VoidArg: _) = (SLIT("stg_ap_ppv"), 3)
+slowCallPattern (PtrArg: PtrArg: _) = (SLIT("stg_ap_pp"), 2)
+slowCallPattern (PtrArg: VoidArg: _) = (SLIT("stg_ap_pv"), 2)
+slowCallPattern (PtrArg: _) = (SLIT("stg_ap_p"), 1)
+slowCallPattern (VoidArg: _) = (SLIT("stg_ap_v"), 1)
+slowCallPattern (NonPtrArg: _) = (SLIT("stg_ap_n"), 1)
+slowCallPattern (FloatArg: _) = (SLIT("stg_ap_f"), 1)
+slowCallPattern (DoubleArg: _) = (SLIT("stg_ap_d"), 1)
+slowCallPattern (LongArg: _) = (SLIT("stg_ap_l"), 1)
+slowCallPattern _ = panic "CgStackery.slowCallPattern"
+
+-------------------------------------------------------------------------
+--
+-- Return conventions
+--
+-------------------------------------------------------------------------
+
+-- A @CtrlReturnConvention@ says how {\em control} is returned.
+
+data CtrlReturnConvention
+ = VectoredReturn Int -- size of the vector table (family size)
+ | UnvectoredReturn Int -- family size
+
+ctrlReturnConvAlg :: TyCon -> CtrlReturnConvention
+ctrlReturnConvAlg tycon
+ = case (tyConFamilySize tycon) of
+ size -> -- we're supposed to know...
+ if (size > (1::Int) && size <= mAX_FAMILY_SIZE_FOR_VEC_RETURNS) then
+ VectoredReturn size
+ else
+ UnvectoredReturn size
+ -- NB: unvectored returns Include size 0 (no constructors), so that
+ -- the following perverse code compiles (it crashed GHC in 5.02)
+ -- data T1
+ -- data T2 = T2 !T1 Int
+ -- The only value of type T1 is bottom, which never returns anyway.
+
+dataReturnConvPrim :: CgRep -> CmmReg
+dataReturnConvPrim PtrArg = CmmGlobal (VanillaReg 1)
+dataReturnConvPrim NonPtrArg = CmmGlobal (VanillaReg 1)
+dataReturnConvPrim LongArg = CmmGlobal (LongReg 1)
+dataReturnConvPrim FloatArg = CmmGlobal (FloatReg 1)
+dataReturnConvPrim DoubleArg = CmmGlobal (DoubleReg 1)
+dataReturnConvPrim VoidArg = panic "dataReturnConvPrim: void"
+
+
+-- getSequelAmode returns an amode which refers to an info table. The info
+-- table will always be of the RET(_VEC)?_(BIG|SMALL) kind. We're careful
+-- not to handle real code pointers, just in case we're compiling for
+-- an unregisterised/untailcallish architecture, where info pointers and
+-- code pointers aren't the same.
+-- DIRE WARNING.
+-- The OnStack case of sequelToAmode delivers an Amode which is only
+-- valid just before the final control transfer, because it assumes
+-- that Sp is pointing to the top word of the return address. This
+-- seems unclean but there you go.
+
+getSequelAmode :: FCode CmmExpr
+getSequelAmode
+ = do { EndOfBlockInfo virt_sp sequel <- getEndOfBlockInfo
+ ; case sequel of
+ OnStack -> do { sp_rel <- getSpRelOffset virt_sp
+ ; returnFC (CmmLoad sp_rel wordRep) }
+
+ UpdateCode -> returnFC (CmmLit (CmmLabel mkUpdInfoLabel))
+ CaseAlts lbl _ _ True -> returnFC (CmmLit (CmmLabel mkSeqInfoLabel))
+ CaseAlts lbl _ _ False -> returnFC (CmmLit (CmmLabel lbl))
+ }
+
+-------------------------------------------------------------------------
+--
+-- Build a liveness mask for the current stack
+--
+-------------------------------------------------------------------------
+
+-- There are four kinds of things on the stack:
+--
+-- - pointer variables (bound in the environment)
+-- - non-pointer variables (boudn in the environment)
+-- - free slots (recorded in the stack free list)
+-- - non-pointer data slots (recorded in the stack free list)
+--
+-- We build up a bitmap of non-pointer slots by searching the environment
+-- for all the pointer variables, and subtracting these from a bitmap
+-- with initially all bits set (up to the size of the stack frame).
+
+buildContLiveness :: Name -- Basis for label (only)
+ -> [VirtualSpOffset] -- Live stack slots
+ -> FCode Liveness
+buildContLiveness name live_slots
+ = do { stk_usg <- getStkUsage
+ ; let StackUsage { realSp = real_sp,
+ frameSp = frame_sp } = stk_usg
+
+ start_sp :: VirtualSpOffset
+ start_sp = real_sp - retAddrSizeW
+ -- In a continuation, we want a liveness mask that
+ -- starts from just after the return address, which is
+ -- on the stack at real_sp.
+
+ frame_size :: WordOff
+ frame_size = start_sp - frame_sp
+ -- real_sp points to the frame-header for the current
+ -- stack frame, and the end of this frame is frame_sp.
+ -- The size is therefore real_sp - frame_sp - retAddrSizeW
+ -- (subtract one for the frame-header = return address).
+
+ rel_slots :: [WordOff]
+ rel_slots = sortLe (<=)
+ [ start_sp - ofs -- Get slots relative to top of frame
+ | ofs <- live_slots ]
+
+ bitmap = intsToReverseBitmap frame_size rel_slots
+
+ ; WARN( not (all (>=0) rel_slots),
+ ppr name $$ ppr live_slots $$ ppr frame_size $$ ppr start_sp $$ ppr rel_slots )
+ mkLiveness name frame_size bitmap }
+
+
+-------------------------------------------------------------------------
+--
+-- Register assignment
+--
+-------------------------------------------------------------------------
+
+-- How to assign registers for
+--
+-- 1) Calling a fast entry point.
+-- 2) Returning an unboxed tuple.
+-- 3) Invoking an out-of-line PrimOp.
+--
+-- Registers are assigned in order.
+--
+-- If we run out, we don't attempt to assign any further registers (even
+-- though we might have run out of only one kind of register); we just
+-- return immediately with the left-overs specified.
+--
+-- The alternative version @assignAllRegs@ uses the complete set of
+-- registers, including those that aren't mapped to real machine
+-- registers. This is used for calling special RTS functions and PrimOps
+-- which expect their arguments to always be in the same registers.
+
+assignCallRegs, assignPrimOpCallRegs, assignReturnRegs
+ :: [(CgRep,a)] -- Arg or result values to assign
+ -> ([(a, GlobalReg)], -- Register assignment in same order
+ -- for *initial segment of* input list
+ -- (but reversed; doesn't matter)
+ -- VoidRep args do not appear here
+ [(CgRep,a)]) -- Leftover arg or result values
+
+assignCallRegs args
+ = assign_regs args (mkRegTbl [node])
+ -- The entry convention for a function closure
+ -- never uses Node for argument passing; instead
+ -- Node points to the function closure itself
+
+assignPrimOpCallRegs args
+ = assign_regs args (mkRegTbl_allRegs [])
+ -- For primops, *all* arguments must be passed in registers
+
+assignReturnRegs args
+ = assign_regs args (mkRegTbl [])
+ -- For returning unboxed tuples etc,
+ -- we use all regs
+
+assign_regs :: [(CgRep,a)] -- Arg or result values to assign
+ -> AvailRegs -- Regs still avail: Vanilla, Float, Double, Longs
+ -> ([(a, GlobalReg)], [(CgRep, a)])
+assign_regs args supply
+ = go args [] supply
+ where
+ go [] acc supply = (acc, []) -- Return the results reversed (doesn't matter)
+ go ((VoidArg,_) : args) acc supply -- Skip void arguments; they aren't passed, and
+ = go args acc supply -- there's nothign to bind them to
+ go ((rep,arg) : args) acc supply
+ = case assign_reg rep supply of
+ Just (reg, supply') -> go args ((arg,reg):acc) supply'
+ Nothing -> (acc, (rep,arg):args) -- No more regs
+
+assign_reg :: CgRep -> AvailRegs -> Maybe (GlobalReg, AvailRegs)
+assign_reg FloatArg (vs, f:fs, ds, ls) = Just (FloatReg f, (vs, fs, ds, ls))
+assign_reg DoubleArg (vs, fs, d:ds, ls) = Just (DoubleReg d, (vs, fs, ds, ls))
+assign_reg LongArg (vs, fs, ds, l:ls) = Just (LongReg l, (vs, fs, ds, ls))
+assign_reg PtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls))
+assign_reg NonPtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls))
+ -- PtrArg and NonPtrArg both go in a vanilla register
+assign_reg other not_enough_regs = Nothing
+
+
+-------------------------------------------------------------------------
+--
+-- Register supplies
+--
+-------------------------------------------------------------------------
+
+-- Vanilla registers can contain pointers, Ints, Chars.
+-- Floats and doubles have separate register supplies.
+--
+-- We take these register supplies from the *real* registers, i.e. those
+-- that are guaranteed to map to machine registers.
+
+useVanillaRegs | opt_Unregisterised = 0
+ | otherwise = mAX_Real_Vanilla_REG
+useFloatRegs | opt_Unregisterised = 0
+ | otherwise = mAX_Real_Float_REG
+useDoubleRegs | opt_Unregisterised = 0
+ | otherwise = mAX_Real_Double_REG
+useLongRegs | opt_Unregisterised = 0
+ | otherwise = mAX_Real_Long_REG
+
+vanillaRegNos, floatRegNos, doubleRegNos, longRegNos :: [Int]
+vanillaRegNos = regList useVanillaRegs
+floatRegNos = regList useFloatRegs
+doubleRegNos = regList useDoubleRegs
+longRegNos = regList useLongRegs
+
+allVanillaRegNos, allFloatRegNos, allDoubleRegNos, allLongRegNos :: [Int]
+allVanillaRegNos = regList mAX_Vanilla_REG
+allFloatRegNos = regList mAX_Float_REG
+allDoubleRegNos = regList mAX_Double_REG
+allLongRegNos = regList mAX_Long_REG
+
+regList 0 = []
+regList n = [1 .. n]
+
+type AvailRegs = ( [Int] -- available vanilla regs.
+ , [Int] -- floats
+ , [Int] -- doubles
+ , [Int] -- longs (int64 and word64)
+ )
+
+mkRegTbl :: [GlobalReg] -> AvailRegs
+mkRegTbl regs_in_use
+ = mkRegTbl' regs_in_use vanillaRegNos floatRegNos doubleRegNos longRegNos
+
+mkRegTbl_allRegs :: [GlobalReg] -> AvailRegs
+mkRegTbl_allRegs regs_in_use
+ = mkRegTbl' regs_in_use allVanillaRegNos allFloatRegNos allDoubleRegNos allLongRegNos
+
+mkRegTbl' regs_in_use vanillas floats doubles longs
+ = (ok_vanilla, ok_float, ok_double, ok_long)
+ where
+ ok_vanilla = mapCatMaybes (select VanillaReg) vanillas
+ ok_float = mapCatMaybes (select FloatReg) floats
+ ok_double = mapCatMaybes (select DoubleReg) doubles
+ ok_long = mapCatMaybes (select LongReg) longs
+ -- rep isn't looked at, hence we can use any old rep.
+
+ select :: (Int -> GlobalReg) -> Int{-cand-} -> Maybe Int
+ -- one we've unboxed the Int, we make a GlobalReg
+ -- and see if it is already in use; if not, return its number.
+
+ select mk_reg_fun cand
+ = let
+ reg = mk_reg_fun cand
+ in
+ if reg `not_elem` regs_in_use
+ then Just cand
+ else Nothing
+ where
+ not_elem = isn'tIn "mkRegTbl"
+
+
diff --git a/compiler/codeGen/CgCase.lhs b/compiler/codeGen/CgCase.lhs
new file mode 100644
index 0000000000..e7c08940c5
--- /dev/null
+++ b/compiler/codeGen/CgCase.lhs
@@ -0,0 +1,634 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgCase.lhs,v 1.75 2005/06/21 10:44:41 simonmar Exp $
+%
+%********************************************************
+%* *
+\section[CgCase]{Converting @StgCase@ expressions}
+%* *
+%********************************************************
+
+\begin{code}
+module CgCase ( cgCase, saveVolatileVarsAndRegs,
+ restoreCurrentCostCentre
+ ) where
+
+#include "HsVersions.h"
+
+import {-# SOURCE #-} CgExpr ( cgExpr )
+
+import CgMonad
+import StgSyn
+import CgBindery ( getArgAmodes,
+ bindNewToReg, bindNewToTemp,
+ getCgIdInfo, getArgAmode,
+ rebindToStack, getCAddrModeIfVolatile,
+ nukeDeadBindings, idInfoToAmode
+ )
+import CgCon ( bindConArgs, bindUnboxedTupleComponents )
+import CgHeapery ( altHeapCheck, unbxTupleHeapCheck )
+import CgCallConv ( dataReturnConvPrim, ctrlReturnConvAlg,
+ CtrlReturnConvention(..)
+ )
+import CgStackery ( allocPrimStack, allocStackTop, getSpRelOffset,
+ deAllocStackTop, freeStackSlots
+ )
+import CgTailCall ( performTailCall )
+import CgPrimOp ( cgPrimOp )
+import CgForeignCall ( cgForeignCall )
+import CgUtils ( newTemp, cgLit, emitLitSwitch, emitSwitch,
+ tagToClosure )
+import CgProf ( curCCS, curCCSAddr )
+import CgInfoTbls ( emitDirectReturnTarget, emitAlgReturnTarget,
+ dataConTagZ )
+import SMRep ( CgRep(..), retAddrSizeW, nonVoidArg, isVoidArg,
+ idCgRep, tyConCgRep, typeHint )
+import CmmUtils ( CmmStmts, noStmts, oneStmt, plusStmts )
+import Cmm
+import MachOp ( wordRep )
+import ClosureInfo ( mkLFArgument )
+import StaticFlags ( opt_SccProfilingOn )
+import Id ( Id, idName, isDeadBinder, idType )
+import ForeignCall ( ForeignCall(..), CCallSpec(..), playSafe )
+import VarSet ( varSetElems )
+import CoreSyn ( AltCon(..) )
+import PrimOp ( PrimOp(..), primOpOutOfLine )
+import TyCon ( isEnumerationTyCon, tyConFamilySize )
+import Util ( isSingleton )
+import Outputable
+\end{code}
+
+\begin{code}
+data GCFlag
+ = GCMayHappen -- The scrutinee may involve GC, so everything must be
+ -- tidy before the code for the scrutinee.
+
+ | NoGC -- The scrutinee is a primitive value, or a call to a
+ -- primitive op which does no GC. Hence the case can
+ -- be done inline, without tidying up first.
+\end{code}
+
+It is quite interesting to decide whether to put a heap-check
+at the start of each alternative. Of course we certainly have
+to do so if the case forces an evaluation, or if there is a primitive
+op which can trigger GC.
+
+A more interesting situation is this:
+
+ \begin{verbatim}
+ !A!;
+ ...A...
+ case x# of
+ 0# -> !B!; ...B...
+ default -> !C!; ...C...
+ \end{verbatim}
+
+where \tr{!x!} indicates a possible heap-check point. The heap checks
+in the alternatives {\em can} be omitted, in which case the topmost
+heapcheck will take their worst case into account.
+
+In favour of omitting \tr{!B!}, \tr{!C!}:
+
+ - {\em May} save a heap overflow test,
+ if ...A... allocates anything. The other advantage
+ of this is that we can use relative addressing
+ from a single Hp to get at all the closures so allocated.
+
+ - No need to save volatile vars etc across the case
+
+Against:
+
+ - May do more allocation than reqd. This sometimes bites us
+ badly. For example, nfib (ha!) allocates about 30\% more space if the
+ worst-casing is done, because many many calls to nfib are leaf calls
+ which don't need to allocate anything.
+
+ This never hurts us if there is only one alternative.
+
+\begin{code}
+cgCase :: StgExpr
+ -> StgLiveVars
+ -> StgLiveVars
+ -> Id
+ -> SRT
+ -> AltType
+ -> [StgAlt]
+ -> Code
+\end{code}
+
+Special case #1: case of literal.
+
+\begin{code}
+cgCase (StgLit lit) live_in_whole_case live_in_alts bndr srt
+ alt_type@(PrimAlt tycon) alts
+ = do { tmp_reg <- bindNewToTemp bndr
+ ; cm_lit <- cgLit lit
+ ; stmtC (CmmAssign tmp_reg (CmmLit cm_lit))
+ ; cgPrimAlts NoGC alt_type tmp_reg alts }
+\end{code}
+
+Special case #2: scrutinising a primitive-typed variable. No
+evaluation required. We don't save volatile variables, nor do we do a
+heap-check in the alternatives. Instead, the heap usage of the
+alternatives is worst-cased and passed upstream. This can result in
+allocating more heap than strictly necessary, but it will sometimes
+eliminate a heap check altogether.
+
+\begin{code}
+cgCase (StgApp v []) live_in_whole_case live_in_alts bndr srt
+ alt_type@(PrimAlt tycon) alts
+ = do { -- Careful! we can't just bind the default binder to the same thing
+ -- as the scrutinee, since it might be a stack location, and having
+ -- two bindings pointing at the same stack locn doesn't work (it
+ -- confuses nukeDeadBindings). Hence, use a new temp.
+ v_info <- getCgIdInfo v
+ ; amode <- idInfoToAmode v_info
+ ; tmp_reg <- bindNewToTemp bndr
+ ; stmtC (CmmAssign tmp_reg amode)
+ ; cgPrimAlts NoGC alt_type tmp_reg alts }
+\end{code}
+
+Special case #3: inline PrimOps and foreign calls.
+
+\begin{code}
+cgCase (StgOpApp op@(StgPrimOp primop) args _)
+ live_in_whole_case live_in_alts bndr srt alt_type alts
+ | not (primOpOutOfLine primop)
+ = cgInlinePrimOp primop args bndr alt_type live_in_alts alts
+\end{code}
+
+TODO: Case-of-case of primop can probably be done inline too (but
+maybe better to translate it out beforehand). See
+ghc/lib/misc/PackedString.lhs for examples where this crops up (with
+4.02).
+
+Special case #4: inline foreign calls: an unsafe foreign call can be done
+right here, just like an inline primop.
+
+\begin{code}
+cgCase (StgOpApp op@(StgFCallOp fcall _) args _)
+ live_in_whole_case live_in_alts bndr srt alt_type alts
+ | unsafe_foreign_call
+ = ASSERT( isSingleton alts )
+ do -- *must* be an unboxed tuple alt.
+ -- exactly like the cgInlinePrimOp case for unboxed tuple alts..
+ { res_tmps <- mapFCs bindNewToTemp non_void_res_ids
+ ; let res_hints = map (typeHint.idType) non_void_res_ids
+ ; cgForeignCall (zip res_tmps res_hints) fcall args live_in_alts
+ ; cgExpr rhs }
+ where
+ (_, res_ids, _, rhs) = head alts
+ non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids
+
+ unsafe_foreign_call
+ = case fcall of
+ CCall (CCallSpec _ _ s) -> not (playSafe s)
+ _other -> False
+\end{code}
+
+Special case: scrutinising a non-primitive variable.
+This can be done a little better than the general case, because
+we can reuse/trim the stack slot holding the variable (if it is in one).
+
+\begin{code}
+cgCase (StgApp fun args)
+ live_in_whole_case live_in_alts bndr srt alt_type alts
+ = do { fun_info <- getCgIdInfo fun
+ ; arg_amodes <- getArgAmodes args
+
+ -- Nuking dead bindings *before* calculating the saves is the
+ -- value-add here. We might end up freeing up some slots currently
+ -- occupied by variables only required for the call.
+ -- NOTE: we need to look up the variables used in the call before
+ -- doing this, because some of them may not be in the environment
+ -- afterward.
+ ; nukeDeadBindings live_in_alts
+ ; (save_assts, alts_eob_info, maybe_cc_slot)
+ <- saveVolatileVarsAndRegs live_in_alts
+
+ ; scrut_eob_info
+ <- forkEval alts_eob_info
+ (allocStackTop retAddrSizeW >> nopC)
+ (do { deAllocStackTop retAddrSizeW
+ ; cgEvalAlts maybe_cc_slot bndr srt alt_type alts })
+
+ ; setEndOfBlockInfo (maybeReserveSeqFrame alt_type scrut_eob_info)
+ (performTailCall fun_info arg_amodes save_assts) }
+\end{code}
+
+Note about return addresses: we *always* push a return address, even
+if because of an optimisation we end up jumping direct to the return
+code (not through the address itself). The alternatives always assume
+that the return address is on the stack. The return address is
+required in case the alternative performs a heap check, since it
+encodes the liveness of the slots in the activation record.
+
+On entry to the case alternative, we can re-use the slot containing
+the return address immediately after the heap check. That's what the
+deAllocStackTop call is doing above.
+
+Finally, here is the general case.
+
+\begin{code}
+cgCase expr live_in_whole_case live_in_alts bndr srt alt_type alts
+ = do { -- Figure out what volatile variables to save
+ nukeDeadBindings live_in_whole_case
+
+ ; (save_assts, alts_eob_info, maybe_cc_slot)
+ <- saveVolatileVarsAndRegs live_in_alts
+
+ -- Save those variables right now!
+ ; emitStmts save_assts
+
+ -- generate code for the alts
+ ; scrut_eob_info
+ <- forkEval alts_eob_info
+ (do { nukeDeadBindings live_in_alts
+ ; allocStackTop retAddrSizeW -- space for retn address
+ ; nopC })
+ (do { deAllocStackTop retAddrSizeW
+ ; cgEvalAlts maybe_cc_slot bndr srt alt_type alts })
+
+ ; setEndOfBlockInfo (maybeReserveSeqFrame alt_type scrut_eob_info)
+ (cgExpr expr)
+ }
+\end{code}
+
+There's a lot of machinery going on behind the scenes to manage the
+stack pointer here. forkEval takes the virtual Sp and free list from
+the first argument, and turns that into the *real* Sp for the second
+argument. It also uses this virtual Sp as the args-Sp in the EOB info
+returned, so that the scrutinee will trim the real Sp back to the
+right place before doing whatever it does.
+ --SDM (who just spent an hour figuring this out, and didn't want to
+ forget it).
+
+Why don't we push the return address just before evaluating the
+scrutinee? Because the slot reserved for the return address might
+contain something useful, so we wait until performing a tail call or
+return before pushing the return address (see
+CgTailCall.pushReturnAddress).
+
+This also means that the environment doesn't need to know about the
+free stack slot for the return address (for generating bitmaps),
+because we don't reserve it until just before the eval.
+
+TODO!! Problem: however, we have to save the current cost centre
+stack somewhere, because at the eval point the current CCS might be
+different. So we pick a free stack slot and save CCCS in it. One
+consequence of this is that activation records on the stack don't
+follow the layout of closures when we're profiling. The CCS could be
+anywhere within the record).
+
+\begin{code}
+maybeReserveSeqFrame PolyAlt (EndOfBlockInfo args_sp (CaseAlts amode stuff bndr _))
+ = EndOfBlockInfo (args_sp + retAddrSizeW) (CaseAlts amode stuff bndr True)
+maybeReserveSeqFrame other scrut_eob_info = scrut_eob_info
+\end{code}
+
+
+%************************************************************************
+%* *
+ Inline primops
+%* *
+%************************************************************************
+
+\begin{code}
+cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts
+ | isVoidArg (idCgRep bndr)
+ = ASSERT( con == DEFAULT && isSingleton alts && null bs )
+ do { -- VOID RESULT; just sequencing,
+ -- so get in there and do it
+ cgPrimOp [] primop args live_in_alts
+ ; cgExpr rhs }
+ where
+ (con,bs,_,rhs) = head alts
+
+cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts
+ = do { -- PRIMITIVE ALTS, with non-void result
+ tmp_reg <- bindNewToTemp bndr
+ ; cgPrimOp [tmp_reg] primop args live_in_alts
+ ; cgPrimAlts NoGC (PrimAlt tycon) tmp_reg alts }
+
+cgInlinePrimOp primop args bndr (UbxTupAlt tycon) live_in_alts alts
+ = ASSERT( isSingleton alts )
+ do { -- UNBOXED TUPLE ALTS
+ -- No heap check, no yield, just get in there and do it.
+ -- NB: the case binder isn't bound to anything;
+ -- it has a unboxed tuple type
+
+ res_tmps <- mapFCs bindNewToTemp non_void_res_ids
+ ; cgPrimOp res_tmps primop args live_in_alts
+ ; cgExpr rhs }
+ where
+ (_, res_ids, _, rhs) = head alts
+ non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids
+
+cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts
+ = do { -- ENUMERATION TYPE RETURN
+ -- Typical: case a ># b of { True -> ..; False -> .. }
+ -- The primop itself returns an index into the table of
+ -- closures for the enumeration type.
+ tag_amode <- ASSERT( isEnumerationTyCon tycon )
+ do_enum_primop primop
+
+ -- Bind the default binder if necessary
+ -- (avoiding it avoids the assignment)
+ -- The deadness info is set by StgVarInfo
+ ; hmods <- getHomeModules
+ ; whenC (not (isDeadBinder bndr))
+ (do { tmp_reg <- bindNewToTemp bndr
+ ; stmtC (CmmAssign tmp_reg (tagToClosure hmods tycon tag_amode)) })
+
+ -- Compile the alts
+ ; (branches, mb_deflt) <- cgAlgAlts NoGC Nothing{-cc_slot-}
+ (AlgAlt tycon) alts
+
+ -- Do the switch
+ ; emitSwitch tag_amode branches mb_deflt 0 (tyConFamilySize tycon - 1)
+ }
+ where
+
+ do_enum_primop :: PrimOp -> FCode CmmExpr -- Returns amode for result
+ do_enum_primop TagToEnumOp -- No code!
+ | [arg] <- args = do
+ (_,e) <- getArgAmode arg
+ return e
+ do_enum_primop primop
+ = do tmp <- newTemp wordRep
+ cgPrimOp [tmp] primop args live_in_alts
+ returnFC (CmmReg tmp)
+
+cgInlinePrimOp primop arg_amodes bndr PolyAlt live_in_alts alts
+ = pprPanic "cgCase: case of primop has polymorphic type" (ppr bndr)
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[CgCase-alts]{Alternatives}
+%* *
+%************************************************************************
+
+@cgEvalAlts@ returns an addressing mode for a continuation for the
+alternatives of a @case@, used in a context when there
+is some evaluation to be done.
+
+\begin{code}
+cgEvalAlts :: Maybe VirtualSpOffset -- Offset of cost-centre to be restored, if any
+ -> Id
+ -> SRT -- SRT for the continuation
+ -> AltType
+ -> [StgAlt]
+ -> FCode Sequel -- Any addr modes inside are guaranteed
+ -- to be a label so that we can duplicate it
+ -- without risk of duplicating code
+
+cgEvalAlts cc_slot bndr srt alt_type@(PrimAlt tycon) alts
+ = do { let rep = tyConCgRep tycon
+ reg = dataReturnConvPrim rep -- Bottom for voidRep
+
+ ; abs_c <- forkProc $ do
+ { -- Bind the case binder, except if it's void
+ -- (reg is bottom in that case)
+ whenC (nonVoidArg rep) $
+ bindNewToReg bndr reg (mkLFArgument bndr)
+ ; restoreCurrentCostCentre cc_slot True
+ ; cgPrimAlts GCMayHappen alt_type reg alts }
+
+ ; lbl <- emitDirectReturnTarget (idName bndr) abs_c srt
+ ; returnFC (CaseAlts lbl Nothing bndr False) }
+
+cgEvalAlts cc_slot bndr srt (UbxTupAlt _) [(con,args,_,rhs)]
+ = -- Unboxed tuple case
+ -- By now, the simplifier should have have turned it
+ -- into case e of (# a,b #) -> e
+ -- There shouldn't be a
+ -- case e of DEFAULT -> e
+ ASSERT2( case con of { DataAlt _ -> True; other -> False },
+ text "cgEvalAlts: dodgy case of unboxed tuple type" )
+ do { -- forkAbsC for the RHS, so that the envt is
+ -- not changed for the emitDirectReturn call
+ abs_c <- forkProc $ do
+ { (live_regs, ptrs, nptrs, _) <- bindUnboxedTupleComponents args
+ -- Restore the CC *after* binding the tuple components,
+ -- so that we get the stack offset of the saved CC right.
+ ; restoreCurrentCostCentre cc_slot True
+ -- Generate a heap check if necessary
+ -- and finally the code for the alternative
+ ; unbxTupleHeapCheck live_regs ptrs nptrs noStmts
+ (cgExpr rhs) }
+ ; lbl <- emitDirectReturnTarget (idName bndr) abs_c srt
+ ; returnFC (CaseAlts lbl Nothing bndr False) }
+
+cgEvalAlts cc_slot bndr srt alt_type alts
+ = -- Algebraic and polymorphic case
+ do { -- Bind the default binder
+ bindNewToReg bndr nodeReg (mkLFArgument bndr)
+
+ -- Generate sequel info for use downstream
+ -- At the moment, we only do it if the type is vector-returnable.
+ -- Reason: if not, then it costs extra to label the
+ -- alternatives, because we'd get return code like:
+ --
+ -- switch TagReg { 0 : JMP(alt_1); 1 : JMP(alt_2) ..etc }
+ --
+ -- which is worse than having the alt code in the switch statement
+
+ ; (alts, mb_deflt) <- cgAlgAlts GCMayHappen cc_slot alt_type alts
+
+ ; (lbl, branches) <- emitAlgReturnTarget (idName bndr)
+ alts mb_deflt srt ret_conv
+
+ ; returnFC (CaseAlts lbl branches bndr False) }
+ where
+ ret_conv = case alt_type of
+ AlgAlt tc -> ctrlReturnConvAlg tc
+ PolyAlt -> UnvectoredReturn 0
+\end{code}
+
+
+HWL comment on {\em GrAnSim\/} (adding GRAN_YIELDs for context switch): If
+we do an inlining of the case no separate functions for returning are
+created, so we don't have to generate a GRAN_YIELD in that case. This info
+must be propagated to cgAlgAltRhs (where the GRAN_YIELD macro might be
+emitted). Hence, the new Bool arg to cgAlgAltRhs.
+
+%************************************************************************
+%* *
+\subsection[CgCase-alg-alts]{Algebraic alternatives}
+%* *
+%************************************************************************
+
+In @cgAlgAlts@, none of the binders in the alternatives are
+assumed to be yet bound.
+
+HWL comment on {\em GrAnSim\/} (adding GRAN_YIELDs for context switch): The
+last arg of cgAlgAlts indicates if we want a context switch at the
+beginning of each alternative. Normally we want that. The only exception
+are inlined alternatives.
+
+\begin{code}
+cgAlgAlts :: GCFlag
+ -> Maybe VirtualSpOffset
+ -> AltType -- ** AlgAlt or PolyAlt only **
+ -> [StgAlt] -- The alternatives
+ -> FCode ( [(ConTagZ, CgStmts)], -- The branches
+ Maybe CgStmts ) -- The default case
+
+cgAlgAlts gc_flag cc_slot alt_type alts
+ = do alts <- forkAlts [ cgAlgAlt gc_flag cc_slot alt_type alt | alt <- alts]
+ let
+ mb_deflt = case alts of -- DEFAULT is always first, if present
+ ((DEFAULT,blks) : _) -> Just blks
+ other -> Nothing
+
+ branches = [(dataConTagZ con, blks)
+ | (DataAlt con, blks) <- alts]
+ -- in
+ return (branches, mb_deflt)
+
+
+cgAlgAlt :: GCFlag
+ -> Maybe VirtualSpOffset -- Turgid state
+ -> AltType -- ** AlgAlt or PolyAlt only **
+ -> StgAlt
+ -> FCode (AltCon, CgStmts)
+
+cgAlgAlt gc_flag cc_slot alt_type (con, args, use_mask, rhs)
+ = do { abs_c <- getCgStmts $ do
+ { bind_con_args con args
+ ; restoreCurrentCostCentre cc_slot True
+ ; maybeAltHeapCheck gc_flag alt_type (cgExpr rhs) }
+ ; return (con, abs_c) }
+ where
+ bind_con_args DEFAULT args = nopC
+ bind_con_args (DataAlt dc) args = bindConArgs dc args
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[CgCase-prim-alts]{Primitive alternatives}
+%* *
+%************************************************************************
+
+@cgPrimAlts@ generates suitable a @CSwitch@
+for dealing with the alternatives of a primitive @case@, given an
+addressing mode for the thing to scrutinise. It also keeps track of
+the maximum stack depth encountered down any branch.
+
+As usual, no binders in the alternatives are yet bound.
+
+\begin{code}
+cgPrimAlts :: GCFlag
+ -> AltType -- Always PrimAlt, but passed to maybeAltHeapCheck
+ -> CmmReg -- Scrutinee
+ -> [StgAlt] -- Alternatives
+ -> Code
+-- NB: cgPrimAlts emits code that does the case analysis.
+-- It's often used in inline situations, rather than to genearte
+-- a labelled return point. That's why its interface is a little
+-- different to cgAlgAlts
+--
+-- INVARIANT: the default binder is already bound
+cgPrimAlts gc_flag alt_type scrutinee alts
+ = do { tagged_absCs <- forkAlts (map (cgPrimAlt gc_flag alt_type) alts)
+ ; let ((DEFAULT, deflt_absC) : others) = tagged_absCs -- There is always a default
+ alt_absCs = [(lit,rhs) | (LitAlt lit, rhs) <- others]
+ ; emitLitSwitch (CmmReg scrutinee) alt_absCs deflt_absC }
+
+cgPrimAlt :: GCFlag
+ -> AltType
+ -> StgAlt -- The alternative
+ -> FCode (AltCon, CgStmts) -- Its compiled form
+
+cgPrimAlt gc_flag alt_type (con, [], [], rhs)
+ = ASSERT( case con of { DEFAULT -> True; LitAlt _ -> True; other -> False } )
+ do { abs_c <- getCgStmts (maybeAltHeapCheck gc_flag alt_type (cgExpr rhs))
+ ; returnFC (con, abs_c) }
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[CgCase-tidy]{Code for tidying up prior to an eval}
+%* *
+%************************************************************************
+
+\begin{code}
+maybeAltHeapCheck
+ :: GCFlag
+ -> AltType -- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt
+ -> Code -- Continuation
+ -> Code
+maybeAltHeapCheck NoGC _ code = code
+maybeAltHeapCheck GCMayHappen alt_type code = altHeapCheck alt_type code
+
+saveVolatileVarsAndRegs
+ :: StgLiveVars -- Vars which should be made safe
+ -> FCode (CmmStmts, -- Assignments to do the saves
+ EndOfBlockInfo, -- sequel for the alts
+ Maybe VirtualSpOffset) -- Slot for current cost centre
+
+saveVolatileVarsAndRegs vars
+ = do { var_saves <- saveVolatileVars vars
+ ; (maybe_cc_slot, cc_save) <- saveCurrentCostCentre
+ ; eob_info <- getEndOfBlockInfo
+ ; returnFC (var_saves `plusStmts` cc_save,
+ eob_info,
+ maybe_cc_slot) }
+
+
+saveVolatileVars :: StgLiveVars -- Vars which should be made safe
+ -> FCode CmmStmts -- Assignments to to the saves
+
+saveVolatileVars vars
+ = do { stmts_s <- mapFCs save_it (varSetElems vars)
+ ; return (foldr plusStmts noStmts stmts_s) }
+ where
+ save_it var
+ = do { v <- getCAddrModeIfVolatile var
+ ; case v of
+ Nothing -> return noStmts -- Non-volatile
+ Just vol_amode -> save_var var vol_amode -- Aha! It's volatile
+ }
+
+ save_var var vol_amode
+ = do { slot <- allocPrimStack (idCgRep var)
+ ; rebindToStack var slot
+ ; sp_rel <- getSpRelOffset slot
+ ; returnFC (oneStmt (CmmStore sp_rel vol_amode)) }
+\end{code}
+
+---------------------------------------------------------------------------
+
+When we save the current cost centre (which is done for lexical
+scoping), we allocate a free stack location, and return (a)~the
+virtual offset of the location, to pass on to the alternatives, and
+(b)~the assignment to do the save (just as for @saveVolatileVars@).
+
+\begin{code}
+saveCurrentCostCentre ::
+ FCode (Maybe VirtualSpOffset, -- Where we decide to store it
+ CmmStmts) -- Assignment to save it
+
+saveCurrentCostCentre
+ | not opt_SccProfilingOn
+ = returnFC (Nothing, noStmts)
+ | otherwise
+ = do { slot <- allocPrimStack PtrArg
+ ; sp_rel <- getSpRelOffset slot
+ ; returnFC (Just slot,
+ oneStmt (CmmStore sp_rel curCCS)) }
+
+-- Sometimes we don't free the slot containing the cost centre after restoring it
+-- (see CgLetNoEscape.cgLetNoEscapeBody).
+restoreCurrentCostCentre :: Maybe VirtualSpOffset -> Bool -> Code
+restoreCurrentCostCentre Nothing _freeit = nopC
+restoreCurrentCostCentre (Just slot) freeit
+ = do { sp_rel <- getSpRelOffset slot
+ ; whenC freeit (freeStackSlots [slot])
+ ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel wordRep)) }
+\end{code}
+
diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs
new file mode 100644
index 0000000000..1a2cbc5202
--- /dev/null
+++ b/compiler/codeGen/CgClosure.lhs
@@ -0,0 +1,599 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgClosure.lhs,v 1.72 2005/05/18 12:06:51 simonmar Exp $
+%
+\section[CgClosure]{Code generation for closures}
+
+This module provides the support code for @StgToAbstractC@ to deal
+with {\em closures} on the RHSs of let(rec)s. See also
+@CgCon@, which deals with constructors.
+
+\begin{code}
+module CgClosure ( cgTopRhsClosure,
+ cgStdRhsClosure,
+ cgRhsClosure,
+ emitBlackHoleCode,
+ ) where
+
+#include "HsVersions.h"
+
+import {-# SOURCE #-} CgExpr ( cgExpr )
+
+import CgMonad
+import CgBindery
+import CgHeapery
+import CgStackery ( mkVirtStkOffsets, pushUpdateFrame, getVirtSp,
+ setRealAndVirtualSp )
+import CgProf ( chooseDynCostCentres, ldvEnter, enterCostCentre,
+ costCentreFrom )
+import CgTicky
+import CgParallel ( granYield, granFetchAndReschedule )
+import CgInfoTbls ( emitClosureCodeAndInfoTable, getSRTInfo )
+import CgCallConv ( assignCallRegs, mkArgDescr )
+import CgUtils ( emitDataLits, addIdReps, cmmRegOffW,
+ emitRtsCallWithVols )
+import ClosureInfo -- lots and lots of stuff
+import SMRep ( CgRep, cgRepSizeW, argMachRep, fixedHdrSize, WordOff,
+ idCgRep )
+import MachOp ( MachHint(..) )
+import Cmm
+import CmmUtils ( CmmStmts, mkStmts, oneStmt, plusStmts, noStmts,
+ mkLblExpr )
+import CLabel
+import StgSyn
+import StaticFlags ( opt_DoTickyProfiling )
+import CostCentre
+import Id ( Id, idName, idType )
+import Name ( Name, isExternalName )
+import Module ( Module, pprModule )
+import ListSetOps ( minusList )
+import Util ( isIn, mapAccumL, zipWithEqual )
+import BasicTypes ( TopLevelFlag(..) )
+import Constants ( oFFSET_StgInd_indirectee, wORD_SIZE )
+import Outputable
+import FastString
+\end{code}
+
+%********************************************************
+%* *
+\subsection[closures-no-free-vars]{Top-level closures}
+%* *
+%********************************************************
+
+For closures bound at top level, allocate in static space.
+They should have no free variables.
+
+\begin{code}
+cgTopRhsClosure :: Id
+ -> CostCentreStack -- Optional cost centre annotation
+ -> StgBinderInfo
+ -> SRT
+ -> UpdateFlag
+ -> [Id] -- Args
+ -> StgExpr
+ -> FCode (Id, CgIdInfo)
+
+cgTopRhsClosure id ccs binder_info srt upd_flag args body = do
+ { -- LAY OUT THE OBJECT
+ let name = idName id
+ ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args
+ ; srt_info <- getSRTInfo name srt
+ ; mod_name <- moduleName
+ ; let descr = closureDescription mod_name name
+ closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr
+ closure_label = mkLocalClosureLabel name
+ cg_id_info = stableIdInfo id (mkLblExpr closure_label) lf_info
+ closure_rep = mkStaticClosureFields closure_info ccs True []
+
+ -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY)
+ ; emitDataLits closure_label closure_rep
+ ; forkClosureBody (closureCodeBody binder_info closure_info
+ ccs args body)
+
+ ; returnFC (id, cg_id_info) }
+\end{code}
+
+%********************************************************
+%* *
+\subsection[non-top-level-closures]{Non top-level closures}
+%* *
+%********************************************************
+
+For closures with free vars, allocate in heap.
+
+\begin{code}
+cgStdRhsClosure
+ :: Id
+ -> CostCentreStack -- Optional cost centre annotation
+ -> StgBinderInfo
+ -> [Id] -- Free vars
+ -> [Id] -- Args
+ -> StgExpr
+ -> LambdaFormInfo
+ -> [StgArg] -- payload
+ -> FCode (Id, CgIdInfo)
+
+cgStdRhsClosure bndr cc bndr_info fvs args body lf_info payload
+ = do -- AHA! A STANDARD-FORM THUNK
+ { -- LAY OUT THE OBJECT
+ amodes <- getArgAmodes payload
+ ; mod_name <- moduleName
+ ; let (tot_wds, ptr_wds, amodes_w_offsets)
+ = mkVirtHeapOffsets (isLFThunk lf_info) amodes
+
+ descr = closureDescription mod_name (idName bndr)
+ closure_info = mkClosureInfo False -- Not static
+ bndr lf_info tot_wds ptr_wds
+ NoC_SRT -- No SRT for a std-form closure
+ descr
+
+ ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
+
+ -- BUILD THE OBJECT
+ ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets
+
+ -- RETURN
+ ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) }
+\end{code}
+
+Here's the general case.
+
+\begin{code}
+cgRhsClosure :: Id
+ -> CostCentreStack -- Optional cost centre annotation
+ -> StgBinderInfo
+ -> SRT
+ -> [Id] -- Free vars
+ -> UpdateFlag
+ -> [Id] -- Args
+ -> StgExpr
+ -> FCode (Id, CgIdInfo)
+
+cgRhsClosure bndr cc bndr_info srt fvs upd_flag args body = do
+ { -- LAY OUT THE OBJECT
+ -- If the binder is itself a free variable, then don't store
+ -- it in the closure. Instead, just bind it to Node on entry.
+ -- NB we can be sure that Node will point to it, because we
+ -- havn't told mkClosureLFInfo about this; so if the binder
+ -- _was_ a free var of its RHS, mkClosureLFInfo thinks it *is*
+ -- stored in the closure itself, so it will make sure that
+ -- Node points to it...
+ let
+ name = idName bndr
+ is_elem = isIn "cgRhsClosure"
+ bndr_is_a_fv = bndr `is_elem` fvs
+ reduced_fvs | bndr_is_a_fv = fvs `minusList` [bndr]
+ | otherwise = fvs
+
+ ; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args
+ ; fv_infos <- mapFCs getCgIdInfo reduced_fvs
+ ; srt_info <- getSRTInfo name srt
+ ; mod_name <- moduleName
+ ; let bind_details :: [(CgIdInfo, VirtualHpOffset)]
+ (tot_wds, ptr_wds, bind_details)
+ = mkVirtHeapOffsets (isLFThunk lf_info) (map add_rep fv_infos)
+
+ add_rep info = (cgIdInfoArgRep info, info)
+
+ descr = closureDescription mod_name name
+ closure_info = mkClosureInfo False -- Not static
+ bndr lf_info tot_wds ptr_wds
+ srt_info descr
+
+ -- BUILD ITS INFO TABLE AND CODE
+ ; forkClosureBody (do
+ { -- Bind the fvs
+ let bind_fv (info, offset)
+ = bindNewToNode (cgIdInfoId info) offset (cgIdInfoLF info)
+ ; mapCs bind_fv bind_details
+
+ -- Bind the binder itself, if it is a free var
+ ; whenC bndr_is_a_fv (bindNewToReg bndr nodeReg lf_info)
+
+ -- Compile the body
+ ; closureCodeBody bndr_info closure_info cc args body })
+
+ -- BUILD THE OBJECT
+ ; let
+ to_amode (info, offset) = do { amode <- idInfoToAmode info
+ ; return (amode, offset) }
+ ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body
+ ; amodes_w_offsets <- mapFCs to_amode bind_details
+ ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets
+
+ -- RETURN
+ ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) }
+
+
+mkClosureLFInfo :: Id -- The binder
+ -> TopLevelFlag -- True of top level
+ -> [Id] -- Free vars
+ -> UpdateFlag -- Update flag
+ -> [Id] -- Args
+ -> FCode LambdaFormInfo
+mkClosureLFInfo bndr top fvs upd_flag args
+ | null args = return (mkLFThunk (idType bndr) top fvs upd_flag)
+ | otherwise = do { arg_descr <- mkArgDescr (idName bndr) args
+ ; return (mkLFReEntrant top fvs args arg_descr) }
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[code-for-closures]{The code for closures}
+%* *
+%************************************************************************
+
+\begin{code}
+closureCodeBody :: StgBinderInfo
+ -> ClosureInfo -- Lots of information about this closure
+ -> CostCentreStack -- Optional cost centre attached to closure
+ -> [Id]
+ -> StgExpr
+ -> Code
+\end{code}
+
+There are two main cases for the code for closures. If there are {\em
+no arguments}, then the closure is a thunk, and not in normal form.
+So it should set up an update frame (if it is shared).
+NB: Thunks cannot have a primitive type!
+
+\begin{code}
+closureCodeBody binder_info cl_info cc [{- No args i.e. thunk -}] body = do
+ { body_absC <- getCgStmts $ do
+ { tickyEnterThunk cl_info
+ ; ldvEnter (CmmReg nodeReg) -- NB: Node always points when profiling
+ ; thunkWrapper cl_info $ do
+ -- We only enter cc after setting up update so
+ -- that cc of enclosing scope will be recorded
+ -- in update frame CAF/DICT functions will be
+ -- subsumed by this enclosing cc
+ { enterCostCentre cl_info cc body
+ ; cgExpr body }
+ }
+
+ ; emitClosureCodeAndInfoTable cl_info [] body_absC }
+\end{code}
+
+If there is /at least one argument/, then this closure is in
+normal form, so there is no need to set up an update frame.
+
+The Macros for GrAnSim are produced at the beginning of the
+argSatisfactionCheck (by calling fetchAndReschedule). There info if
+Node points to closure is available. -- HWL
+
+\begin{code}
+closureCodeBody binder_info cl_info cc args body
+ = ASSERT( length args > 0 )
+ do { -- Get the current virtual Sp (it might not be zero,
+ -- eg. if we're compiling a let-no-escape).
+ vSp <- getVirtSp
+ ; let (reg_args, other_args) = assignCallRegs (addIdReps args)
+ (sp_top, stk_args) = mkVirtStkOffsets vSp other_args
+
+ -- Allocate the global ticky counter
+ ; let ticky_ctr_lbl = mkRednCountsLabel (closureName cl_info)
+ ; emitTickyCounter cl_info args sp_top
+
+ -- ...and establish the ticky-counter
+ -- label for this block
+ ; setTickyCtrLabel ticky_ctr_lbl $ do
+
+ -- Emit the slow-entry code
+ { reg_save_code <- mkSlowEntryCode cl_info reg_args
+
+ -- Emit the main entry code
+ ; blks <- forkProc $
+ mkFunEntryCode cl_info cc reg_args stk_args
+ sp_top reg_save_code body
+ ; emitClosureCodeAndInfoTable cl_info [] blks
+ }}
+
+
+
+mkFunEntryCode :: ClosureInfo
+ -> CostCentreStack
+ -> [(Id,GlobalReg)] -- Args in regs
+ -> [(Id,VirtualSpOffset)] -- Args on stack
+ -> VirtualSpOffset -- Last allocated word on stack
+ -> CmmStmts -- Register-save code in case of GC
+ -> StgExpr
+ -> Code
+-- The main entry code for the closure
+mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do
+ { -- Bind args to regs/stack as appropriate,
+ -- and record expected position of sps
+ ; bindArgsToRegs reg_args
+ ; bindArgsToStack stk_args
+ ; setRealAndVirtualSp sp_top
+
+ -- Enter the cost-centre, if required
+ -- ToDo: It's not clear why this is outside the funWrapper,
+ -- but the tickyEnterFun is inside. Perhaps we can put
+ -- them together?
+ ; enterCostCentre cl_info cc body
+
+ -- Do the business
+ ; funWrapper cl_info reg_args reg_save_code $ do
+ { tickyEnterFun cl_info
+ ; cgExpr body }
+ }
+\end{code}
+
+The "slow entry" code for a function. This entry point takes its
+arguments on the stack. It loads the arguments into registers
+according to the calling convention, and jumps to the function's
+normal entry point. The function's closure is assumed to be in
+R1/node.
+
+The slow entry point is used in two places:
+
+ (a) unknown calls: eg. stg_PAP_entry
+ (b) returning from a heap-check failure
+
+\begin{code}
+mkSlowEntryCode :: ClosureInfo -> [(Id,GlobalReg)] -> FCode CmmStmts
+-- If this function doesn't have a specialised ArgDescr, we need
+-- to generate the function's arg bitmap, slow-entry code, and
+-- register-save code for the heap-check failure
+-- Here, we emit the slow-entry code, and
+-- return the register-save assignments
+mkSlowEntryCode cl_info reg_args
+ | Just (_, ArgGen _) <- closureFunInfo cl_info
+ = do { emitSimpleProc slow_lbl (emitStmts load_stmts)
+ ; return save_stmts }
+ | otherwise = return noStmts
+ where
+ name = closureName cl_info
+ slow_lbl = mkSlowEntryLabel name
+
+ load_stmts = mkStmts load_assts `plusStmts` mkStmts [stk_adj_pop, jump_to_entry]
+ save_stmts = oneStmt stk_adj_push `plusStmts` mkStmts save_assts
+
+ reps_w_regs :: [(CgRep,GlobalReg)]
+ reps_w_regs = [(idCgRep id, reg) | (id,reg) <- reverse reg_args]
+ (final_stk_offset, stk_offsets)
+ = mapAccumL (\off (rep,_) -> (off + cgRepSizeW rep, off))
+ 0 reps_w_regs
+
+ load_assts = zipWithEqual "mk_load" mk_load reps_w_regs stk_offsets
+ mk_load (rep,reg) offset = CmmAssign (CmmGlobal reg)
+ (CmmLoad (cmmRegOffW spReg offset)
+ (argMachRep rep))
+
+ save_assts = zipWithEqual "mk_save" mk_save reps_w_regs stk_offsets
+ mk_save (rep,reg) offset = ASSERT( argMachRep rep == globalRegRep reg )
+ CmmStore (cmmRegOffW spReg offset)
+ (CmmReg (CmmGlobal reg))
+
+ stk_adj_pop = CmmAssign spReg (cmmRegOffW spReg final_stk_offset)
+ stk_adj_push = CmmAssign spReg (cmmRegOffW spReg (- final_stk_offset))
+ jump_to_entry = CmmJump (mkLblExpr (enterLocalIdLabel name)) []
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsubsection[closure-code-wrappers]{Wrappers around closure code}
+%* *
+%************************************************************************
+
+\begin{code}
+thunkWrapper:: ClosureInfo -> Code -> Code
+thunkWrapper closure_info thunk_code = do
+ { let node_points = nodeMustPointToIt (closureLFInfo closure_info)
+
+ -- HWL: insert macros for GrAnSim; 2 versions depending on liveness of node
+ -- (we prefer fetchAndReschedule-style context switches to yield ones)
+ ; if node_points
+ then granFetchAndReschedule [] node_points
+ else granYield [] node_points
+
+ -- Stack and/or heap checks
+ ; thunkEntryChecks closure_info $ do
+ { -- Overwrite with black hole if necessary
+ whenC (blackHoleOnEntry closure_info && node_points)
+ (blackHoleIt closure_info)
+ ; setupUpdate closure_info thunk_code }
+ -- setupUpdate *encloses* the thunk_code
+ }
+
+funWrapper :: ClosureInfo -- Closure whose code body this is
+ -> [(Id,GlobalReg)] -- List of argument registers (if any)
+ -> CmmStmts -- reg saves for the heap check failure
+ -> Code -- Body of function being compiled
+ -> Code
+funWrapper closure_info arg_regs reg_save_code fun_body = do
+ { let node_points = nodeMustPointToIt (closureLFInfo closure_info)
+
+ -- Enter for Ldv profiling
+ ; whenC node_points (ldvEnter (CmmReg nodeReg))
+
+ -- GranSim yeild poin
+ ; granYield arg_regs node_points
+
+ -- Heap and/or stack checks wrap the function body
+ ; funEntryChecks closure_info reg_save_code
+ fun_body
+ }
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsubsubsection[update-and-BHs]{Update and black-hole wrappers}
+%* *
+%************************************************************************
+
+
+\begin{code}
+blackHoleIt :: ClosureInfo -> Code
+-- Only called for closures with no args
+-- Node points to the closure
+blackHoleIt closure_info = emitBlackHoleCode (closureSingleEntry closure_info)
+
+emitBlackHoleCode :: Bool -> Code
+emitBlackHoleCode is_single_entry
+ | eager_blackholing = do
+ tickyBlackHole (not is_single_entry)
+ stmtC (CmmStore (CmmReg nodeReg) (CmmLit (CmmLabel bh_lbl)))
+ | otherwise =
+ nopC
+ where
+ bh_lbl | is_single_entry = mkRtsDataLabel SLIT("stg_SE_BLACKHOLE_info")
+ | otherwise = mkRtsDataLabel SLIT("stg_BLACKHOLE_info")
+
+ -- If we wanted to do eager blackholing with slop filling,
+ -- we'd need to do it at the *end* of a basic block, otherwise
+ -- we overwrite the free variables in the thunk that we still
+ -- need. We have a patch for this from Andy Cheadle, but not
+ -- incorporated yet. --SDM [6/2004]
+ --
+ -- Profiling needs slop filling (to support LDV profiling), so
+ -- currently eager blackholing doesn't work with profiling.
+ --
+ -- TICKY_TICKY needs EAGER_BLACKHOLING to verify no double-entries of
+ -- single-entry thunks.
+ eager_blackholing
+ | opt_DoTickyProfiling = True
+ | otherwise = False
+
+\end{code}
+
+\begin{code}
+setupUpdate :: ClosureInfo -> Code -> Code -- Only called for closures with no args
+ -- Nota Bene: this function does not change Node (even if it's a CAF),
+ -- so that the cost centre in the original closure can still be
+ -- extracted by a subsequent enterCostCentre
+setupUpdate closure_info code
+ | closureReEntrant closure_info
+ = code
+
+ | not (isStaticClosure closure_info)
+ = if closureUpdReqd closure_info
+ then do { tickyPushUpdateFrame; pushUpdateFrame (CmmReg nodeReg) code }
+ else do { tickyUpdateFrameOmitted; code }
+
+ | otherwise -- A static closure
+ = do { tickyUpdateBhCaf closure_info
+
+ ; if closureUpdReqd closure_info
+ then do -- Blackhole the (updatable) CAF:
+ { upd_closure <- link_caf closure_info True
+ ; pushUpdateFrame upd_closure code }
+ else do
+ { -- No update reqd, you'd think we don't need to
+ -- black-hole it. But when ticky-ticky is on, we
+ -- black-hole it regardless, to catch errors in which
+ -- an allegedly single-entry closure is entered twice
+ --
+ -- We discard the pointer returned by link_caf, because
+ -- we don't push an update frame
+ whenC opt_DoTickyProfiling -- Blackhole even a SE CAF
+ (link_caf closure_info False >> nopC)
+ ; tickyUpdateFrameOmitted
+ ; code }
+ }
+
+
+-----------------------------------------------------------------------------
+-- Entering a CAF
+--
+-- When a CAF is first entered, it creates a black hole in the heap,
+-- and updates itself with an indirection to this new black hole.
+--
+-- We update the CAF with an indirection to a newly-allocated black
+-- hole in the heap. We also set the blocking queue on the newly
+-- allocated black hole to be empty.
+--
+-- Why do we make a black hole in the heap when we enter a CAF?
+--
+-- - for a generational garbage collector, which needs a fast
+-- test for whether an updatee is in an old generation or not
+--
+-- - for the parallel system, which can implement updates more
+-- easily if the updatee is always in the heap. (allegedly).
+--
+-- When debugging, we maintain a separate CAF list so we can tell when
+-- a CAF has been garbage collected.
+
+-- newCAF must be called before the itbl ptr is overwritten, since
+-- newCAF records the old itbl ptr in order to do CAF reverting
+-- (which Hugs needs to do in order that combined mode works right.)
+--
+
+-- ToDo [Feb 04] This entire link_caf nonsense could all be moved
+-- into the "newCAF" RTS procedure, which we call anyway, including
+-- the allocation of the black-hole indirection closure.
+-- That way, code size would fall, the CAF-handling code would
+-- be closer together, and the compiler wouldn't need to know
+-- about off_indirectee etc.
+
+link_caf :: ClosureInfo
+ -> Bool -- True <=> updatable, False <=> single-entry
+ -> FCode CmmExpr -- Returns amode for closure to be updated
+-- To update a CAF we must allocate a black hole, link the CAF onto the
+-- CAF list, then update the CAF to point to the fresh black hole.
+-- This function returns the address of the black hole, so it can be
+-- updated with the new value when available. The reason for all of this
+-- is that we only want to update dynamic heap objects, not static ones,
+-- so that generational GC is easier.
+link_caf cl_info is_upd = do
+ { -- Alloc black hole specifying CC_HDR(Node) as the cost centre
+ ; let use_cc = costCentreFrom (CmmReg nodeReg)
+ blame_cc = use_cc
+ ; hp_offset <- allocDynClosure bh_cl_info use_cc blame_cc []
+ ; hp_rel <- getHpRelOffset hp_offset
+
+ -- Call the RTS function newCAF to add the CAF to the CafList
+ -- so that the garbage collector can find them
+ -- This must be done *before* the info table pointer is overwritten,
+ -- because the old info table ptr is needed for reversion
+ ; emitRtsCallWithVols SLIT("newCAF") [(CmmReg nodeReg,PtrHint)] [node]
+ -- node is live, so save it.
+
+ -- Overwrite the closure with a (static) indirection
+ -- to the newly-allocated black hole
+ ; stmtsC [ CmmStore (cmmRegOffW nodeReg off_indirectee) hp_rel
+ , CmmStore (CmmReg nodeReg) ind_static_info ]
+
+ ; returnFC hp_rel }
+ where
+ bh_cl_info :: ClosureInfo
+ bh_cl_info | is_upd = cafBlackHoleClosureInfo cl_info
+ | otherwise = seCafBlackHoleClosureInfo cl_info
+
+ ind_static_info :: CmmExpr
+ ind_static_info = mkLblExpr mkIndStaticInfoLabel
+
+ off_indirectee :: WordOff
+ off_indirectee = fixedHdrSize + oFFSET_StgInd_indirectee*wORD_SIZE
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[CgClosure-Description]{Profiling Closure Description.}
+%* *
+%************************************************************************
+
+For "global" data constructors the description is simply occurrence
+name of the data constructor itself. Otherwise it is determined by
+@closureDescription@ from the let binding information.
+
+\begin{code}
+closureDescription :: Module -- Module
+ -> Name -- Id of closure binding
+ -> String
+ -- Not called for StgRhsCon which have global info tables built in
+ -- CgConTbls.lhs with a description generated from the data constructor
+closureDescription mod_name name
+ = showSDocDump (char '<' <>
+ (if isExternalName name
+ then ppr name -- ppr will include the module name prefix
+ else pprModule mod_name <> char '.' <> ppr name) <>
+ char '>')
+ -- showSDocDump, because we want to see the unique on the Name.
+\end{code}
+
diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs
new file mode 100644
index 0000000000..bfb55bf46e
--- /dev/null
+++ b/compiler/codeGen/CgCon.lhs
@@ -0,0 +1,457 @@
+%
+% (c) The GRASP Project, Glasgow University, 1992-1998
+%
+\section[CgCon]{Code generation for constructors}
+
+This module provides the support code for @StgToAbstractC@ to deal
+with {\em constructors} on the RHSs of let(rec)s. See also
+@CgClosure@, which deals with closures.
+
+\begin{code}
+module CgCon (
+ cgTopRhsCon, buildDynCon,
+ bindConArgs, bindUnboxedTupleComponents,
+ cgReturnDataCon,
+ cgTyCon
+ ) where
+
+#include "HsVersions.h"
+
+import CgMonad
+import StgSyn
+
+import CgBindery ( getArgAmodes, bindNewToNode,
+ bindArgsToRegs, idInfoToAmode, stableIdInfo,
+ heapIdInfo, CgIdInfo, bindArgsToStack
+ )
+import CgStackery ( mkVirtStkOffsets, freeStackSlots,
+ getRealSp, getVirtSp, setRealAndVirtualSp )
+import CgUtils ( addIdReps, cmmLabelOffW, emitRODataLits, emitDataLits )
+import CgCallConv ( assignReturnRegs )
+import Constants ( mAX_INTLIKE, mIN_INTLIKE, mAX_CHARLIKE, mIN_CHARLIKE )
+import CgHeapery ( allocDynClosure, layOutDynConstr,
+ layOutStaticConstr, mkStaticClosureFields )
+import CgTailCall ( performReturn, emitKnownConReturnCode, returnUnboxedTuple )
+import CgProf ( mkCCostCentreStack, ldvEnter, curCCS )
+import CgTicky
+import CgInfoTbls ( emitClosureCodeAndInfoTable, dataConTagZ )
+import CLabel
+import ClosureInfo ( mkConLFInfo, mkLFArgument )
+import CmmUtils ( mkLblExpr )
+import Cmm
+import SMRep ( WordOff, CgRep, separateByPtrFollowness,
+ fixedHdrSize, typeCgRep )
+import CostCentre ( currentOrSubsumedCCS, dontCareCCS, CostCentreStack,
+ currentCCS )
+import Constants ( mIN_INTLIKE, mAX_INTLIKE, mIN_CHARLIKE, mAX_CHARLIKE )
+import TyCon ( TyCon, tyConDataCons, isEnumerationTyCon, tyConName )
+import DataCon ( DataCon, dataConRepArgTys, isNullaryRepDataCon,
+ isUnboxedTupleCon, dataConWorkId,
+ dataConName, dataConRepArity
+ )
+import Id ( Id, idName, isDeadBinder )
+import Type ( Type )
+import PrelInfo ( maybeCharLikeCon, maybeIntLikeCon )
+import Outputable
+import Util ( lengthIs )
+import ListSetOps ( assocMaybe )
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[toplevel-constructors]{Top-level constructors}
+%* *
+%************************************************************************
+
+\begin{code}
+cgTopRhsCon :: Id -- Name of thing bound to this RHS
+ -> DataCon -- Id
+ -> [StgArg] -- Args
+ -> FCode (Id, CgIdInfo)
+cgTopRhsCon id con args
+ = do {
+ ; hmods <- getHomeModules
+#if mingw32_TARGET_OS
+ -- Windows DLLs have a problem with static cross-DLL refs.
+ ; ASSERT( not (isDllConApp hmods con args) ) return ()
+#endif
+ ; ASSERT( args `lengthIs` dataConRepArity con ) return ()
+
+ -- LAY IT OUT
+ ; amodes <- getArgAmodes args
+
+ ; let
+ name = idName id
+ lf_info = mkConLFInfo con
+ closure_label = mkClosureLabel hmods name
+ caffy = any stgArgHasCafRefs args
+ (closure_info, amodes_w_offsets) = layOutStaticConstr hmods con amodes
+ closure_rep = mkStaticClosureFields
+ closure_info
+ dontCareCCS -- Because it's static data
+ caffy -- Has CAF refs
+ payload
+
+ payload = map get_lit amodes_w_offsets
+ get_lit (CmmLit lit, _offset) = lit
+ get_lit other = pprPanic "CgCon.get_lit" (ppr other)
+ -- NB1: amodes_w_offsets is sorted into ptrs first, then non-ptrs
+ -- NB2: all the amodes should be Lits!
+
+ -- BUILD THE OBJECT
+ ; emitDataLits closure_label closure_rep
+
+ -- RETURN
+ ; returnFC (id, stableIdInfo id (mkLblExpr closure_label) lf_info) }
+\end{code}
+
+%************************************************************************
+%* *
+%* non-top-level constructors *
+%* *
+%************************************************************************
+\subsection[code-for-constructors]{The code for constructors}
+
+\begin{code}
+buildDynCon :: Id -- Name of the thing to which this constr will
+ -- be bound
+ -> CostCentreStack -- Where to grab cost centre from;
+ -- current CCS if currentOrSubsumedCCS
+ -> DataCon -- The data constructor
+ -> [(CgRep,CmmExpr)] -- Its args
+ -> FCode CgIdInfo -- Return details about how to find it
+
+-- We used to pass a boolean indicating whether all the
+-- args were of size zero, so we could use a static
+-- construtor; but I concluded that it just isn't worth it.
+-- Now I/O uses unboxed tuples there just aren't any constructors
+-- with all size-zero args.
+--
+-- The reason for having a separate argument, rather than looking at
+-- the addr modes of the args is that we may be in a "knot", and
+-- premature looking at the args will cause the compiler to black-hole!
+\end{code}
+
+First we deal with the case of zero-arity constructors. Now, they
+will probably be unfolded, so we don't expect to see this case much,
+if at all, but it does no harm, and sets the scene for characters.
+
+In the case of zero-arity constructors, or, more accurately, those
+which have exclusively size-zero (VoidRep) args, we generate no code
+at all.
+
+\begin{code}
+buildDynCon binder cc con []
+ = do hmods <- getHomeModules
+ returnFC (stableIdInfo binder
+ (mkLblExpr (mkClosureLabel hmods (dataConName con)))
+ (mkConLFInfo con))
+\end{code}
+
+The following three paragraphs about @Char@-like and @Int@-like
+closures are obsolete, but I don't understand the details well enough
+to properly word them, sorry. I've changed the treatment of @Char@s to
+be analogous to @Int@s: only a subset is preallocated, because @Char@
+has now 31 bits. Only literals are handled here. -- Qrczak
+
+Now for @Char@-like closures. We generate an assignment of the
+address of the closure to a temporary. It would be possible simply to
+generate no code, and record the addressing mode in the environment,
+but we'd have to be careful if the argument wasn't a constant --- so
+for simplicity we just always asssign to a temporary.
+
+Last special case: @Int@-like closures. We only special-case the
+situation in which the argument is a literal in the range
+@mIN_INTLIKE@..@mAX_INTLILKE@. NB: for @Char@-like closures we can
+work with any old argument, but for @Int@-like ones the argument has
+to be a literal. Reason: @Char@ like closures have an argument type
+which is guaranteed in range.
+
+Because of this, we use can safely return an addressing mode.
+
+\begin{code}
+buildDynCon binder cc con [arg_amode]
+ | maybeIntLikeCon con
+ , (_, CmmLit (CmmInt val _)) <- arg_amode
+ , let val_int = (fromIntegral val) :: Int
+ , val_int <= mAX_INTLIKE && val_int >= mIN_INTLIKE
+ = do { let intlike_lbl = mkRtsDataLabel SLIT("stg_INTLIKE_closure")
+ offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1)
+ -- INTLIKE closures consist of a header and one word payload
+ intlike_amode = CmmLit (cmmLabelOffW intlike_lbl offsetW)
+ ; returnFC (stableIdInfo binder intlike_amode (mkConLFInfo con)) }
+
+buildDynCon binder cc con [arg_amode]
+ | maybeCharLikeCon con
+ , (_, CmmLit (CmmInt val _)) <- arg_amode
+ , let val_int = (fromIntegral val) :: Int
+ , val_int <= mAX_CHARLIKE && val_int >= mIN_CHARLIKE
+ = do { let charlike_lbl = mkRtsDataLabel SLIT("stg_CHARLIKE_closure")
+ offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1)
+ -- CHARLIKE closures consist of a header and one word payload
+ charlike_amode = CmmLit (cmmLabelOffW charlike_lbl offsetW)
+ ; returnFC (stableIdInfo binder charlike_amode (mkConLFInfo con)) }
+\end{code}
+
+Now the general case.
+
+\begin{code}
+buildDynCon binder ccs con args
+ = do {
+ ; hmods <- getHomeModules
+ ; let
+ (closure_info, amodes_w_offsets) = layOutDynConstr hmods con args
+
+ ; hp_off <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets
+ ; returnFC (heapIdInfo binder hp_off lf_info) }
+ where
+ lf_info = mkConLFInfo con
+
+ use_cc -- cost-centre to stick in the object
+ | currentOrSubsumedCCS ccs = curCCS
+ | otherwise = CmmLit (mkCCostCentreStack ccs)
+
+ blame_cc = use_cc -- cost-centre on which to blame the alloc (same)
+\end{code}
+
+
+%************************************************************************
+%* *
+%* constructor-related utility function: *
+%* bindConArgs is called from cgAlt of a case *
+%* *
+%************************************************************************
+\subsection[constructor-utilities]{@bindConArgs@: constructor-related utility}
+
+@bindConArgs@ $con args$ augments the environment with bindings for the
+binders $args$, assuming that we have just returned from a @case@ which
+found a $con$.
+
+\begin{code}
+bindConArgs :: DataCon -> [Id] -> Code
+bindConArgs con args
+ = do hmods <- getHomeModules
+ let
+ bind_arg (arg, offset) = bindNewToNode arg offset (mkLFArgument arg)
+ (_, args_w_offsets) = layOutDynConstr hmods con (addIdReps args)
+ --
+ ASSERT(not (isUnboxedTupleCon con)) return ()
+ mapCs bind_arg args_w_offsets
+\end{code}
+
+Unboxed tuples are handled slightly differently - the object is
+returned in registers and on the stack instead of the heap.
+
+\begin{code}
+bindUnboxedTupleComponents
+ :: [Id] -- Args
+ -> FCode ([(Id,GlobalReg)], -- Regs assigned
+ WordOff, -- Number of pointer stack slots
+ WordOff, -- Number of non-pointer stack slots
+ VirtualSpOffset) -- Offset of return address slot
+ -- (= realSP on entry)
+
+bindUnboxedTupleComponents args
+ = do {
+ vsp <- getVirtSp
+ ; rsp <- getRealSp
+
+ -- Assign as many components as possible to registers
+ ; let (reg_args, stk_args) = assignReturnRegs (addIdReps args)
+
+ -- Separate the rest of the args into pointers and non-pointers
+ (ptr_args, nptr_args) = separateByPtrFollowness stk_args
+
+ -- Allocate the rest on the stack
+ -- The real SP points to the return address, above which any
+ -- leftover unboxed-tuple components will be allocated
+ (ptr_sp, ptr_offsets) = mkVirtStkOffsets rsp ptr_args
+ (nptr_sp, nptr_offsets) = mkVirtStkOffsets ptr_sp nptr_args
+ ptrs = ptr_sp - rsp
+ nptrs = nptr_sp - ptr_sp
+
+ -- The stack pointer points to the last stack-allocated component
+ ; setRealAndVirtualSp nptr_sp
+
+ -- We have just allocated slots starting at real SP + 1, and set the new
+ -- virtual SP to the topmost allocated slot.
+ -- If the virtual SP started *below* the real SP, we've just jumped over
+ -- some slots that won't be in the free-list, so put them there
+ -- This commonly happens because we've freed the return-address slot
+ -- (trimming back the virtual SP), but the real SP still points to that slot
+ ; freeStackSlots [vsp+1,vsp+2 .. rsp]
+
+ ; bindArgsToRegs reg_args
+ ; bindArgsToStack ptr_offsets
+ ; bindArgsToStack nptr_offsets
+
+ ; returnFC (reg_args, ptrs, nptrs, rsp) }
+\end{code}
+
+%************************************************************************
+%* *
+ Actually generate code for a constructor return
+%* *
+%************************************************************************
+
+
+Note: it's the responsibility of the @cgReturnDataCon@ caller to be
+sure the @amodes@ passed don't conflict with each other.
+\begin{code}
+cgReturnDataCon :: DataCon -> [(CgRep, CmmExpr)] -> Code
+
+cgReturnDataCon con amodes
+ = ASSERT( amodes `lengthIs` dataConRepArity con )
+ do { EndOfBlockInfo _ sequel <- getEndOfBlockInfo
+ ; case sequel of
+ CaseAlts _ (Just (alts, deflt_lbl)) bndr _
+ -> -- Ho! We know the constructor so we can
+ -- go straight to the right alternative
+ case assocMaybe alts (dataConTagZ con) of {
+ Just join_lbl -> build_it_then (jump_to join_lbl);
+ Nothing
+ -- Special case! We're returning a constructor to the default case
+ -- of an enclosing case. For example:
+ --
+ -- case (case e of (a,b) -> C a b) of
+ -- D x -> ...
+ -- y -> ...<returning here!>...
+ --
+ -- In this case,
+ -- if the default is a non-bind-default (ie does not use y),
+ -- then we should simply jump to the default join point;
+
+ | isDeadBinder bndr -> performReturn (jump_to deflt_lbl)
+ | otherwise -> build_it_then (jump_to deflt_lbl) }
+
+ other_sequel -- The usual case
+ | isUnboxedTupleCon con -> returnUnboxedTuple amodes
+ | otherwise -> build_it_then (emitKnownConReturnCode con)
+ }
+ where
+ jump_to lbl = stmtC (CmmJump (CmmLit lbl) [])
+ build_it_then return_code
+ = do { -- BUILD THE OBJECT IN THE HEAP
+ -- The first "con" says that the name bound to this
+ -- closure is "con", which is a bit of a fudge, but it only
+ -- affects profiling
+
+ -- This Id is also used to get a unique for a
+ -- temporary variable, if the closure is a CHARLIKE.
+ -- funnily enough, this makes the unique always come
+ -- out as '54' :-)
+ tickyReturnNewCon (length amodes)
+ ; idinfo <- buildDynCon (dataConWorkId con) currentCCS con amodes
+ ; amode <- idInfoToAmode idinfo
+ ; checkedAbsC (CmmAssign nodeReg amode)
+ ; performReturn return_code }
+\end{code}
+
+
+%************************************************************************
+%* *
+ Generating static stuff for algebraic data types
+%* *
+%************************************************************************
+
+ [These comments are rather out of date]
+
+\begin{tabular}{lll}
+Info tbls & Macro & Kind of constructor \\
+\hline
+info & @CONST_INFO_TABLE@& Zero arity (no info -- compiler uses static closure)\\
+info & @CHARLIKE_INFO_TABLE@& Charlike (no info -- compiler indexes fixed array)\\
+info & @INTLIKE_INFO_TABLE@& Intlike; the one macro generates both info tbls\\
+info & @SPEC_INFO_TABLE@& SPECish, and bigger than or equal to @MIN_UPD_SIZE@\\
+info & @GEN_INFO_TABLE@& GENish (hence bigger than or equal to @MIN_UPD_SIZE@)\\
+\end{tabular}
+
+Possible info tables for constructor con:
+
+\begin{description}
+\item[@_con_info@:]
+Used for dynamically let(rec)-bound occurrences of
+the constructor, and for updates. For constructors
+which are int-like, char-like or nullary, when GC occurs,
+the closure tries to get rid of itself.
+
+\item[@_static_info@:]
+Static occurrences of the constructor
+macro: @STATIC_INFO_TABLE@.
+\end{description}
+
+For zero-arity constructors, \tr{con}, we NO LONGER generate a static closure;
+it's place is taken by the top level defn of the constructor.
+
+For charlike and intlike closures there is a fixed array of static
+closures predeclared.
+
+\begin{code}
+cgTyCon :: TyCon -> FCode [Cmm] -- each constructor gets a separate Cmm
+cgTyCon tycon
+ = do { constrs <- mapM (getCmm . cgDataCon) (tyConDataCons tycon)
+
+ -- Generate a table of static closures for an enumeration type
+ -- Put the table after the data constructor decls, because the
+ -- datatype closure table (for enumeration types)
+ -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff
+ ; extra <-
+ if isEnumerationTyCon tycon then do
+ tbl <- getCmm (emitRODataLits (mkLocalClosureTableLabel
+ (tyConName tycon))
+ [ CmmLabel (mkLocalClosureLabel (dataConName con))
+ | con <- tyConDataCons tycon])
+ return [tbl]
+ else
+ return []
+
+ ; return (extra ++ constrs)
+ }
+\end{code}
+
+Generate the entry code, info tables, and (for niladic constructor) the
+static closure, for a constructor.
+
+\begin{code}
+cgDataCon :: DataCon -> Code
+cgDataCon data_con
+ = do { -- Don't need any dynamic closure code for zero-arity constructors
+ hmods <- getHomeModules
+
+ ; let
+ -- To allow the debuggers, interpreters, etc to cope with
+ -- static data structures (ie those built at compile
+ -- time), we take care that info-table contains the
+ -- information we need.
+ (static_cl_info, _) =
+ layOutStaticConstr hmods data_con arg_reps
+
+ (dyn_cl_info, arg_things) =
+ layOutDynConstr hmods data_con arg_reps
+
+ emit_info cl_info ticky_code
+ = do { code_blks <- getCgStmts the_code
+ ; emitClosureCodeAndInfoTable cl_info [] code_blks }
+ where
+ the_code = do { ticky_code
+ ; ldvEnter (CmmReg nodeReg)
+ ; body_code }
+
+ arg_reps :: [(CgRep, Type)]
+ arg_reps = [(typeCgRep ty, ty) | ty <- dataConRepArgTys data_con]
+
+ body_code = do {
+ -- NB: We don't set CC when entering data (WDP 94/06)
+ tickyReturnOldCon (length arg_things)
+ ; performReturn (emitKnownConReturnCode data_con) }
+ -- noStmts: Ptr to thing already in Node
+
+ ; whenC (not (isNullaryRepDataCon data_con))
+ (emit_info dyn_cl_info tickyEnterDynCon)
+
+ -- Dynamic-Closure first, to reduce forward references
+ ; emit_info static_cl_info tickyEnterStaticCon }
+
+ where
+\end{code}
diff --git a/compiler/codeGen/CgExpr.hi-boot-5 b/compiler/codeGen/CgExpr.hi-boot-5
new file mode 100644
index 0000000000..588e63f8f1
--- /dev/null
+++ b/compiler/codeGen/CgExpr.hi-boot-5
@@ -0,0 +1,3 @@
+__interface CgExpr 1 0 where
+__export CgExpr cgExpr;
+1 cgExpr :: StgSyn.StgExpr -> CgMonad.Code ;
diff --git a/compiler/codeGen/CgExpr.hi-boot-6 b/compiler/codeGen/CgExpr.hi-boot-6
new file mode 100644
index 0000000000..dc2d75cefe
--- /dev/null
+++ b/compiler/codeGen/CgExpr.hi-boot-6
@@ -0,0 +1,3 @@
+module CgExpr where
+
+cgExpr :: StgSyn.StgExpr -> CgMonad.Code
diff --git a/compiler/codeGen/CgExpr.lhs b/compiler/codeGen/CgExpr.lhs
new file mode 100644
index 0000000000..33d72f1608
--- /dev/null
+++ b/compiler/codeGen/CgExpr.lhs
@@ -0,0 +1,454 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgExpr.lhs,v 1.62 2005/06/21 10:44:41 simonmar Exp $
+%
+%********************************************************
+%* *
+\section[CgExpr]{Converting @StgExpr@s}
+%* *
+%********************************************************
+
+\begin{code}
+module CgExpr ( cgExpr ) where
+
+#include "HsVersions.h"
+
+import Constants ( mAX_SPEC_SELECTEE_SIZE, mAX_SPEC_AP_SIZE )
+import StgSyn
+import CgMonad
+
+import SMRep ( fixedHdrSize, isFollowableArg, CgRep(..), argMachRep,
+ nonVoidArg, idCgRep, typeCgRep, typeHint,
+ primRepToCgRep )
+import CoreSyn ( AltCon(..) )
+import CgProf ( emitSetCCC )
+import CgHeapery ( layOutDynConstr )
+import CgBindery ( getArgAmodes, getArgAmode, CgIdInfo,
+ nukeDeadBindings, addBindC, addBindsC )
+import CgCase ( cgCase, saveVolatileVarsAndRegs )
+import CgClosure ( cgRhsClosure, cgStdRhsClosure )
+import CgCon ( buildDynCon, cgReturnDataCon )
+import CgLetNoEscape ( cgLetNoEscapeClosure )
+import CgCallConv ( dataReturnConvPrim )
+import CgTailCall
+import CgInfoTbls ( emitDirectReturnInstr )
+import CgForeignCall ( emitForeignCall, shimForeignCallArg )
+import CgPrimOp ( cgPrimOp )
+import CgUtils ( addIdReps, newTemp, assignTemp, cgLit, tagToClosure )
+import ClosureInfo ( mkSelectorLFInfo, mkApLFInfo )
+import Cmm ( CmmExpr(..), CmmStmt(..), CmmReg, nodeReg )
+import MachOp ( wordRep, MachHint )
+import VarSet
+import Literal ( literalType )
+import PrimOp ( primOpOutOfLine, getPrimOpResultInfo,
+ PrimOp(..), PrimOpResultInfo(..) )
+import Id ( Id )
+import TyCon ( isUnboxedTupleTyCon, isEnumerationTyCon )
+import Type ( Type, tyConAppArgs, tyConAppTyCon, repType,
+ PrimRep(VoidRep) )
+import Maybes ( maybeToBool )
+import ListSetOps ( assocMaybe )
+import BasicTypes ( RecFlag(..) )
+import Util ( lengthIs )
+import Outputable
+\end{code}
+
+This module provides the support code for @StgToAbstractC@ to deal
+with STG {\em expressions}. See also @CgClosure@, which deals
+with closures, and @CgCon@, which deals with constructors.
+
+\begin{code}
+cgExpr :: StgExpr -- input
+ -> Code -- output
+\end{code}
+
+%********************************************************
+%* *
+%* Tail calls *
+%* *
+%********************************************************
+
+``Applications'' mean {\em tail calls}, a service provided by module
+@CgTailCall@. This includes literals, which show up as
+@(STGApp (StgLitArg 42) [])@.
+
+\begin{code}
+cgExpr (StgApp fun args) = cgTailCall fun args
+\end{code}
+
+%********************************************************
+%* *
+%* STG ConApps (for inline versions) *
+%* *
+%********************************************************
+
+\begin{code}
+cgExpr (StgConApp con args)
+ = do { amodes <- getArgAmodes args
+ ; cgReturnDataCon con amodes }
+\end{code}
+
+Literals are similar to constructors; they return by putting
+themselves in an appropriate register and returning to the address on
+top of the stack.
+
+\begin{code}
+cgExpr (StgLit lit)
+ = do { cmm_lit <- cgLit lit
+ ; performPrimReturn rep (CmmLit cmm_lit) }
+ where
+ rep = typeCgRep (literalType lit)
+\end{code}
+
+
+%********************************************************
+%* *
+%* PrimOps and foreign calls.
+%* *
+%********************************************************
+
+NOTE about "safe" foreign calls: a safe foreign call is never compiled
+inline in a case expression. When we see
+
+ case (ccall ...) of { ... }
+
+We generate a proper return address for the alternatives and push the
+stack frame before doing the call, so that in the event that the call
+re-enters the RTS the stack is in a sane state.
+
+\begin{code}
+cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do
+ {-
+ First, copy the args into temporaries. We're going to push
+ a return address right before doing the call, so the args
+ must be out of the way.
+ -}
+ reps_n_amodes <- getArgAmodes stg_args
+ let
+ -- Get the *non-void* args, and jiggle them with shimForeignCall
+ arg_exprs = [ shimForeignCallArg stg_arg expr
+ | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes,
+ nonVoidArg rep]
+
+ -- in
+ arg_tmps <- mapM assignTemp arg_exprs
+ let
+ arg_hints = zip arg_tmps (map (typeHint.stgArgType) stg_args)
+ -- in
+ {-
+ Now, allocate some result regs.
+ -}
+ (res_reps,res_regs,res_hints) <- newUnboxedTupleRegs res_ty
+ ccallReturnUnboxedTuple (zip res_reps (map CmmReg res_regs)) $
+ emitForeignCall (zip res_regs res_hints) fcall
+ arg_hints emptyVarSet{-no live vars-}
+
+-- tagToEnum# is special: we need to pull the constructor out of the table,
+-- and perform an appropriate return.
+
+cgExpr (StgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty)
+ = ASSERT(isEnumerationTyCon tycon)
+ do { (_,amode) <- getArgAmode arg
+ ; amode' <- assignTemp amode -- We're going to use it twice,
+ -- so save in a temp if non-trivial
+ ; hmods <- getHomeModules
+ ; stmtC (CmmAssign nodeReg (tagToClosure hmods tycon amode'))
+ ; performReturn (emitAlgReturnCode tycon amode') }
+ where
+ -- If you're reading this code in the attempt to figure
+ -- out why the compiler panic'ed here, it is probably because
+ -- you used tagToEnum# in a non-monomorphic setting, e.g.,
+ -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x#
+ -- That won't work.
+ tycon = tyConAppTyCon res_ty
+
+
+cgExpr x@(StgOpApp op@(StgPrimOp primop) args res_ty)
+ | primOpOutOfLine primop
+ = tailCallPrimOp primop args
+
+ | ReturnsPrim VoidRep <- result_info
+ = do cgPrimOp [] primop args emptyVarSet
+ performReturn emitDirectReturnInstr
+
+ | ReturnsPrim rep <- result_info
+ = do cgPrimOp [dataReturnConvPrim (primRepToCgRep rep)]
+ primop args emptyVarSet
+ performReturn emitDirectReturnInstr
+
+ | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon
+ = do (reps, regs, _hints) <- newUnboxedTupleRegs res_ty
+ cgPrimOp regs primop args emptyVarSet{-no live vars-}
+ returnUnboxedTuple (zip reps (map CmmReg regs))
+
+ | ReturnsAlg tycon <- result_info, isEnumerationTyCon tycon
+ -- c.f. cgExpr (...TagToEnumOp...)
+ = do tag_reg <- newTemp wordRep
+ hmods <- getHomeModules
+ cgPrimOp [tag_reg] primop args emptyVarSet
+ stmtC (CmmAssign nodeReg (tagToClosure hmods tycon (CmmReg tag_reg)))
+ performReturn (emitAlgReturnCode tycon (CmmReg tag_reg))
+ where
+ result_info = getPrimOpResultInfo primop
+\end{code}
+
+%********************************************************
+%* *
+%* Case expressions *
+%* *
+%********************************************************
+Case-expression conversion is complicated enough to have its own
+module, @CgCase@.
+\begin{code}
+
+cgExpr (StgCase expr live_vars save_vars bndr srt alt_type alts)
+ = cgCase expr live_vars save_vars bndr srt alt_type alts
+\end{code}
+
+
+%********************************************************
+%* *
+%* Let and letrec *
+%* *
+%********************************************************
+\subsection[let-and-letrec-codegen]{Converting @StgLet@ and @StgLetrec@}
+
+\begin{code}
+cgExpr (StgLet (StgNonRec name rhs) expr)
+ = cgRhs name rhs `thenFC` \ (name, info) ->
+ addBindC name info `thenC`
+ cgExpr expr
+
+cgExpr (StgLet (StgRec pairs) expr)
+ = fixC (\ new_bindings -> addBindsC new_bindings `thenC`
+ listFCs [ cgRhs b e | (b,e) <- pairs ]
+ ) `thenFC` \ new_bindings ->
+
+ addBindsC new_bindings `thenC`
+ cgExpr expr
+\end{code}
+
+\begin{code}
+cgExpr (StgLetNoEscape live_in_whole_let live_in_rhss bindings body)
+ = do { -- Figure out what volatile variables to save
+ ; nukeDeadBindings live_in_whole_let
+ ; (save_assts, rhs_eob_info, maybe_cc_slot)
+ <- saveVolatileVarsAndRegs live_in_rhss
+
+ -- Save those variables right now!
+ ; emitStmts save_assts
+
+ -- Produce code for the rhss
+ -- and add suitable bindings to the environment
+ ; cgLetNoEscapeBindings live_in_rhss rhs_eob_info
+ maybe_cc_slot bindings
+
+ -- Do the body
+ ; setEndOfBlockInfo rhs_eob_info (cgExpr body) }
+\end{code}
+
+
+%********************************************************
+%* *
+%* SCC Expressions *
+%* *
+%********************************************************
+
+SCC expressions are treated specially. They set the current cost
+centre.
+
+\begin{code}
+cgExpr (StgSCC cc expr) = do emitSetCCC cc; cgExpr expr
+\end{code}
+
+%********************************************************
+%* *
+%* Non-top-level bindings *
+%* *
+%********************************************************
+\subsection[non-top-level-bindings]{Converting non-top-level bindings}
+
+We rely on the support code in @CgCon@ (to do constructors) and
+in @CgClosure@ (to do closures).
+
+\begin{code}
+cgRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo)
+ -- the Id is passed along so a binding can be set up
+
+cgRhs name (StgRhsCon maybe_cc con args)
+ = do { amodes <- getArgAmodes args
+ ; idinfo <- buildDynCon name maybe_cc con amodes
+ ; returnFC (name, idinfo) }
+
+cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body)
+ = do hmods <- getHomeModules
+ mkRhsClosure hmods name cc bi srt fvs upd_flag args body
+\end{code}
+
+mkRhsClosure looks for two special forms of the right-hand side:
+ a) selector thunks.
+ b) AP thunks
+
+If neither happens, it just calls mkClosureLFInfo. You might think
+that mkClosureLFInfo should do all this, but it seems wrong for the
+latter to look at the structure of an expression
+
+Selectors
+~~~~~~~~~
+We look at the body of the closure to see if it's a selector---turgid,
+but nothing deep. We are looking for a closure of {\em exactly} the
+form:
+
+... = [the_fv] \ u [] ->
+ case the_fv of
+ con a_1 ... a_n -> a_i
+
+
+\begin{code}
+mkRhsClosure hmods bndr cc bi srt
+ [the_fv] -- Just one free var
+ upd_flag -- Updatable thunk
+ [] -- A thunk
+ body@(StgCase (StgApp scrutinee [{-no args-}])
+ _ _ _ _ -- ignore uniq, etc.
+ (AlgAlt tycon)
+ [(DataAlt con, params, use_mask,
+ (StgApp selectee [{-no args-}]))])
+ | the_fv == scrutinee -- Scrutinee is the only free variable
+ && maybeToBool maybe_offset -- Selectee is a component of the tuple
+ && offset_into_int <= mAX_SPEC_SELECTEE_SIZE -- Offset is small enough
+ = -- NOT TRUE: ASSERT(is_single_constructor)
+ -- The simplifier may have statically determined that the single alternative
+ -- is the only possible case and eliminated the others, even if there are
+ -- other constructors in the datatype. It's still ok to make a selector
+ -- thunk in this case, because we *know* which constructor the scrutinee
+ -- will evaluate to.
+ cgStdRhsClosure bndr cc bi [the_fv] [] body lf_info [StgVarArg the_fv]
+ where
+ lf_info = mkSelectorLFInfo bndr offset_into_int
+ (isUpdatable upd_flag)
+ (_, params_w_offsets) = layOutDynConstr hmods con (addIdReps params)
+ -- Just want the layout
+ maybe_offset = assocMaybe params_w_offsets selectee
+ Just the_offset = maybe_offset
+ offset_into_int = the_offset - fixedHdrSize
+\end{code}
+
+Ap thunks
+~~~~~~~~~
+
+A more generic AP thunk of the form
+
+ x = [ x_1...x_n ] \.. [] -> x_1 ... x_n
+
+A set of these is compiled statically into the RTS, so we just use
+those. We could extend the idea to thunks where some of the x_i are
+global ids (and hence not free variables), but this would entail
+generating a larger thunk. It might be an option for non-optimising
+compilation, though.
+
+We only generate an Ap thunk if all the free variables are pointers,
+for semi-obvious reasons.
+
+\begin{code}
+mkRhsClosure hmods bndr cc bi srt
+ fvs
+ upd_flag
+ [] -- No args; a thunk
+ body@(StgApp fun_id args)
+
+ | args `lengthIs` (arity-1)
+ && all isFollowableArg (map idCgRep fvs)
+ && isUpdatable upd_flag
+ && arity <= mAX_SPEC_AP_SIZE
+
+ -- Ha! an Ap thunk
+ = cgStdRhsClosure bndr cc bi fvs [] body lf_info payload
+
+ where
+ lf_info = mkApLFInfo bndr upd_flag arity
+ -- the payload has to be in the correct order, hence we can't
+ -- just use the fvs.
+ payload = StgVarArg fun_id : args
+ arity = length fvs
+\end{code}
+
+The default case
+~~~~~~~~~~~~~~~~
+\begin{code}
+mkRhsClosure hmods bndr cc bi srt fvs upd_flag args body
+ = cgRhsClosure bndr cc bi srt fvs upd_flag args body
+\end{code}
+
+
+%********************************************************
+%* *
+%* Let-no-escape bindings
+%* *
+%********************************************************
+\begin{code}
+cgLetNoEscapeBindings live_in_rhss rhs_eob_info maybe_cc_slot
+ (StgNonRec binder rhs)
+ = do { (binder,info) <- cgLetNoEscapeRhs live_in_rhss rhs_eob_info
+ maybe_cc_slot
+ NonRecursive binder rhs
+ ; addBindC binder info }
+
+cgLetNoEscapeBindings live_in_rhss rhs_eob_info maybe_cc_slot (StgRec pairs)
+ = do { new_bindings <- fixC (\ new_bindings -> do
+ { addBindsC new_bindings
+ ; listFCs [ cgLetNoEscapeRhs full_live_in_rhss
+ rhs_eob_info maybe_cc_slot Recursive b e
+ | (b,e) <- pairs ] })
+
+ ; addBindsC new_bindings }
+ where
+ -- We add the binders to the live-in-rhss set so that we don't
+ -- delete the bindings for the binder from the environment!
+ full_live_in_rhss = live_in_rhss `unionVarSet` (mkVarSet [b | (b,r) <- pairs])
+
+cgLetNoEscapeRhs
+ :: StgLiveVars -- Live in rhss
+ -> EndOfBlockInfo
+ -> Maybe VirtualSpOffset
+ -> RecFlag
+ -> Id
+ -> StgRhs
+ -> FCode (Id, CgIdInfo)
+
+cgLetNoEscapeRhs full_live_in_rhss rhs_eob_info maybe_cc_slot rec binder
+ (StgRhsClosure cc bi _ upd_flag srt args body)
+ = -- We could check the update flag, but currently we don't switch it off
+ -- for let-no-escaped things, so we omit the check too!
+ -- case upd_flag of
+ -- Updatable -> panic "cgLetNoEscapeRhs" -- Nothing to update!
+ -- other -> cgLetNoEscapeClosure binder cc bi live_in_whole_let live_in_rhss args body
+ cgLetNoEscapeClosure binder cc bi srt full_live_in_rhss rhs_eob_info
+ maybe_cc_slot rec args body
+
+-- For a constructor RHS we want to generate a single chunk of code which
+-- can be jumped to from many places, which will return the constructor.
+-- It's easy; just behave as if it was an StgRhsClosure with a ConApp inside!
+cgLetNoEscapeRhs full_live_in_rhss rhs_eob_info maybe_cc_slot rec binder
+ (StgRhsCon cc con args)
+ = cgLetNoEscapeClosure binder cc noBinderInfo{-safe-} NoSRT
+ full_live_in_rhss rhs_eob_info maybe_cc_slot rec
+ [] --No args; the binder is data structure, not a function
+ (StgConApp con args)
+\end{code}
+
+Little helper for primitives that return unboxed tuples.
+
+\begin{code}
+newUnboxedTupleRegs :: Type -> FCode ([CgRep], [CmmReg], [MachHint])
+newUnboxedTupleRegs res_ty =
+ let
+ ty_args = tyConAppArgs (repType res_ty)
+ (reps,hints) = unzip [ (rep, typeHint ty) | ty <- ty_args,
+ let rep = typeCgRep ty,
+ nonVoidArg rep ]
+ in do
+ regs <- mapM (newTemp . argMachRep) reps
+ return (reps,regs,hints)
+\end{code}
diff --git a/compiler/codeGen/CgExpr.lhs-boot b/compiler/codeGen/CgExpr.lhs-boot
new file mode 100644
index 0000000000..29cdc3a605
--- /dev/null
+++ b/compiler/codeGen/CgExpr.lhs-boot
@@ -0,0 +1,7 @@
+\begin{code}
+module CgExpr where
+import StgSyn( StgExpr )
+import CgMonad( Code )
+
+cgExpr :: StgExpr -> Code
+\end{code}
diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs
new file mode 100644
index 0000000000..10f41bdf8b
--- /dev/null
+++ b/compiler/codeGen/CgForeignCall.hs
@@ -0,0 +1,256 @@
+-----------------------------------------------------------------------------
+--
+-- Code generation for foreign calls.
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+module CgForeignCall (
+ cgForeignCall,
+ emitForeignCall,
+ emitForeignCall',
+ shimForeignCallArg,
+ emitSaveThreadState, -- will be needed by the Cmm parser
+ emitLoadThreadState, -- ditto
+ emitCloseNursery,
+ emitOpenNursery,
+ ) where
+
+#include "HsVersions.h"
+
+import StgSyn ( StgLiveVars, StgArg, stgArgType )
+import CgProf ( curCCS, curCCSAddr )
+import CgBindery ( getVolatileRegs, getArgAmodes )
+import CgMonad
+import CgUtils ( cmmOffsetW, cmmOffsetB, cmmLoadIndexW, newTemp,
+ assignTemp )
+import Type ( tyConAppTyCon, repType )
+import TysPrim
+import CLabel ( mkForeignLabel, mkRtsCodeLabel )
+import Cmm
+import CmmUtils
+import MachOp
+import SMRep
+import ForeignCall
+import Constants
+import StaticFlags ( opt_SccProfilingOn )
+import Outputable
+
+import Monad ( when )
+
+-- -----------------------------------------------------------------------------
+-- Code generation for Foreign Calls
+
+cgForeignCall
+ :: [(CmmReg,MachHint)] -- where to put the results
+ -> ForeignCall -- the op
+ -> [StgArg] -- arguments
+ -> StgLiveVars -- live vars, in case we need to save them
+ -> Code
+cgForeignCall results fcall stg_args live
+ = do
+ reps_n_amodes <- getArgAmodes stg_args
+ let
+ -- Get the *non-void* args, and jiggle them with shimForeignCall
+ arg_exprs = [ shimForeignCallArg stg_arg expr
+ | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes,
+ nonVoidArg rep]
+
+ arg_hints = zip arg_exprs (map (typeHint.stgArgType) stg_args)
+ -- in
+ emitForeignCall results fcall arg_hints live
+
+
+emitForeignCall
+ :: [(CmmReg,MachHint)] -- where to put the results
+ -> ForeignCall -- the op
+ -> [(CmmExpr,MachHint)] -- arguments
+ -> StgLiveVars -- live vars, in case we need to save them
+ -> Code
+
+emitForeignCall results (CCall (CCallSpec target cconv safety)) args live
+ = do vols <- getVolatileRegs live
+ emitForeignCall' safety results
+ (CmmForeignCall cmm_target cconv) call_args (Just vols)
+ where
+ (call_args, cmm_target)
+ = case target of
+ StaticTarget lbl -> (args, CmmLit (CmmLabel
+ (mkForeignLabel lbl call_size False)))
+ DynamicTarget -> case args of (fn,_):rest -> (rest, fn)
+
+ -- in the stdcall calling convention, the symbol needs @size appended
+ -- to it, where size is the total number of bytes of arguments. We
+ -- attach this info to the CLabel here, and the CLabel pretty printer
+ -- will generate the suffix when the label is printed.
+ call_size
+ | StdCallConv <- cconv = Just (sum (map (arg_size.cmmExprRep.fst) args))
+ | otherwise = Nothing
+
+ -- ToDo: this might not be correct for 64-bit API
+ arg_size rep = max (machRepByteWidth rep) wORD_SIZE
+
+emitForeignCall results (DNCall _) args live
+ = panic "emitForeignCall: DNCall"
+
+
+-- alternative entry point, used by CmmParse
+emitForeignCall'
+ :: Safety
+ -> [(CmmReg,MachHint)] -- where to put the results
+ -> CmmCallTarget -- the op
+ -> [(CmmExpr,MachHint)] -- arguments
+ -> Maybe [GlobalReg] -- live vars, in case we need to save them
+ -> Code
+emitForeignCall' safety results target args vols
+ | not (playSafe safety) = do
+ temp_args <- load_args_into_temps args
+ stmtC (CmmCall target results temp_args vols)
+
+ | otherwise = do
+ id <- newTemp wordRep
+ temp_args <- load_args_into_temps args
+ emitSaveThreadState
+ stmtC (CmmCall (CmmForeignCall suspendThread CCallConv)
+ [(id,PtrHint)]
+ [ (CmmReg (CmmGlobal BaseReg), PtrHint) ]
+ vols
+ )
+ stmtC (CmmCall target results temp_args vols)
+ stmtC (CmmCall (CmmForeignCall resumeThread CCallConv)
+ [ (CmmGlobal BaseReg, PtrHint) ]
+ -- Assign the result to BaseReg: we
+ -- might now have a different
+ -- Capability!
+ [ (CmmReg id, PtrHint) ]
+ vols
+ )
+ emitLoadThreadState
+
+
+suspendThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("suspendThread")))
+resumeThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("resumeThread")))
+
+
+-- we might need to load arguments into temporaries before
+-- making the call, because certain global registers might
+-- overlap with registers that the C calling convention uses
+-- for passing arguments.
+--
+-- This is a HACK; really it should be done in the back end, but
+-- it's easier to generate the temporaries here.
+load_args_into_temps args = mapM maybe_assignTemp args
+
+maybe_assignTemp (e, hint)
+ | hasNoGlobalRegs e = return (e, hint)
+ | otherwise = do
+ -- don't use assignTemp, it uses its own notion of "trivial"
+ -- expressions, which are wrong here
+ reg <- newTemp (cmmExprRep e)
+ stmtC (CmmAssign reg e)
+ return (CmmReg reg, hint)
+
+-- -----------------------------------------------------------------------------
+-- Save/restore the thread state in the TSO
+
+-- This stuff can't be done in suspendThread/resumeThread, because it
+-- refers to global registers which aren't available in the C world.
+
+emitSaveThreadState = do
+ -- CurrentTSO->sp = Sp;
+ stmtC $ CmmStore (cmmOffset stgCurrentTSO tso_SP) stgSp
+ emitCloseNursery
+ -- and save the current cost centre stack in the TSO when profiling:
+ when opt_SccProfilingOn $
+ stmtC (CmmStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS)
+
+ -- CurrentNursery->free = Hp+1;
+emitCloseNursery = stmtC $ CmmStore nursery_bdescr_free (cmmOffsetW stgHp 1)
+
+emitLoadThreadState = do
+ tso <- newTemp wordRep
+ stmtsC [
+ -- tso = CurrentTSO;
+ CmmAssign tso stgCurrentTSO,
+ -- Sp = tso->sp;
+ CmmAssign sp (CmmLoad (cmmOffset (CmmReg tso) tso_SP)
+ wordRep),
+ -- SpLim = tso->stack + RESERVED_STACK_WORDS;
+ CmmAssign spLim (cmmOffsetW (cmmOffset (CmmReg tso) tso_STACK)
+ rESERVED_STACK_WORDS)
+ ]
+ emitOpenNursery
+ -- and load the current cost centre stack from the TSO when profiling:
+ when opt_SccProfilingOn $
+ stmtC (CmmStore curCCSAddr
+ (CmmLoad (cmmOffset (CmmReg tso) tso_CCCS) wordRep))
+
+emitOpenNursery = stmtsC [
+ -- Hp = CurrentNursery->free - 1;
+ CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free wordRep) (-1)),
+
+ -- HpLim = CurrentNursery->start +
+ -- CurrentNursery->blocks*BLOCK_SIZE_W - 1;
+ CmmAssign hpLim
+ (cmmOffsetExpr
+ (CmmLoad nursery_bdescr_start wordRep)
+ (cmmOffset
+ (CmmMachOp mo_wordMul [
+ CmmMachOp (MO_S_Conv I32 wordRep)
+ [CmmLoad nursery_bdescr_blocks I32],
+ CmmLit (mkIntCLit bLOCK_SIZE)
+ ])
+ (-1)
+ )
+ )
+ ]
+
+
+nursery_bdescr_free = cmmOffset stgCurrentNursery oFFSET_bdescr_free
+nursery_bdescr_start = cmmOffset stgCurrentNursery oFFSET_bdescr_start
+nursery_bdescr_blocks = cmmOffset stgCurrentNursery oFFSET_bdescr_blocks
+
+tso_SP = tsoFieldB oFFSET_StgTSO_sp
+tso_STACK = tsoFieldB oFFSET_StgTSO_stack
+tso_CCCS = tsoProfFieldB oFFSET_StgTSO_CCCS
+
+-- The TSO struct has a variable header, and an optional StgTSOProfInfo in
+-- the middle. The fields we're interested in are after the StgTSOProfInfo.
+tsoFieldB :: ByteOff -> ByteOff
+tsoFieldB off
+ | opt_SccProfilingOn = off + sIZEOF_StgTSOProfInfo + fixedHdrSize * wORD_SIZE
+ | otherwise = off + fixedHdrSize * wORD_SIZE
+
+tsoProfFieldB :: ByteOff -> ByteOff
+tsoProfFieldB off = off + fixedHdrSize * wORD_SIZE
+
+stgSp = CmmReg sp
+stgHp = CmmReg hp
+stgCurrentTSO = CmmReg currentTSO
+stgCurrentNursery = CmmReg currentNursery
+
+sp = CmmGlobal Sp
+spLim = CmmGlobal SpLim
+hp = CmmGlobal Hp
+hpLim = CmmGlobal HpLim
+currentTSO = CmmGlobal CurrentTSO
+currentNursery = CmmGlobal CurrentNursery
+
+-- -----------------------------------------------------------------------------
+-- For certain types passed to foreign calls, we adjust the actual
+-- value passed to the call. For ByteArray#/Array# we pass the
+-- address of the actual array, not the address of the heap object.
+
+shimForeignCallArg :: StgArg -> CmmExpr -> CmmExpr
+shimForeignCallArg arg expr
+ | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon
+ = cmmOffsetB expr arrPtrsHdrSize
+
+ | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon
+ = cmmOffsetB expr arrWordsHdrSize
+
+ | otherwise = expr
+ where
+ -- should be a tycon app, since this is a foreign call
+ tycon = tyConAppTyCon (repType (stgArgType arg))
diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs
new file mode 100644
index 0000000000..184af904df
--- /dev/null
+++ b/compiler/codeGen/CgHeapery.lhs
@@ -0,0 +1,588 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgHeapery.lhs,v 1.47 2005/06/21 10:44:41 simonmar Exp $
+%
+\section[CgHeapery]{Heap management functions}
+
+\begin{code}
+module CgHeapery (
+ initHeapUsage, getVirtHp, setVirtHp, setRealHp,
+ getHpRelOffset, hpRel,
+
+ funEntryChecks, thunkEntryChecks,
+ altHeapCheck, unbxTupleHeapCheck,
+ hpChkGen, hpChkNodePointsAssignSp0,
+ stkChkGen, stkChkNodePoints,
+
+ layOutDynConstr, layOutStaticConstr,
+ mkVirtHeapOffsets, mkStaticClosureFields, mkStaticClosure,
+
+ allocDynClosure, emitSetDynHdr
+ ) where
+
+#include "HsVersions.h"
+
+import StgSyn ( AltType(..) )
+import CLabel ( CLabel, mkRtsCodeLabel )
+import CgUtils ( mkWordCLit, cmmRegOffW, cmmOffsetW,
+ cmmOffsetExprB )
+import CgMonad
+import CgProf ( staticProfHdr, profDynAlloc, dynProfHdr )
+import CgTicky ( staticTickyHdr, tickyDynAlloc, tickyAllocHeap )
+import CgParallel ( staticGranHdr, staticParHdr, doGranAllocate )
+import CgStackery ( getFinalStackHW, getRealSp )
+import CgCallConv ( mkRegLiveness )
+import ClosureInfo ( closureSize, staticClosureNeedsLink,
+ mkConInfo, closureNeedsUpdSpace,
+ infoTableLabelFromCI, closureLabelFromCI,
+ nodeMustPointToIt, closureLFInfo,
+ ClosureInfo )
+import SMRep ( CgRep(..), cgRepSizeW, separateByPtrFollowness,
+ WordOff, fixedHdrSize, thunkHdrSize,
+ isVoidArg, primRepToCgRep )
+
+import Cmm ( CmmLit(..), CmmStmt(..), CmmExpr(..), GlobalReg(..),
+ CmmReg(..), hpReg, nodeReg, spReg )
+import MachOp ( mo_wordULt, mo_wordUGt, mo_wordSub )
+import CmmUtils ( mkIntCLit, CmmStmts, noStmts, oneStmt, plusStmts,
+ mkStmts )
+import Id ( Id )
+import DataCon ( DataCon )
+import TyCon ( tyConPrimRep )
+import CostCentre ( CostCentreStack )
+import Util ( mapAccumL, filterOut )
+import Constants ( wORD_SIZE )
+import Packages ( HomeModules )
+import Outputable
+
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[CgUsages-heapery]{Monad things for fiddling with heap usage}
+%* *
+%************************************************************************
+
+The heap always grows upwards, so hpRel is easy
+
+\begin{code}
+hpRel :: VirtualHpOffset -- virtual offset of Hp
+ -> VirtualHpOffset -- virtual offset of The Thing
+ -> WordOff -- integer word offset
+hpRel hp off = off - hp
+\end{code}
+
+@initHeapUsage@ applies a function to the amount of heap that it uses.
+It initialises the heap usage to zeros, and passes on an unchanged
+heap usage.
+
+It is usually a prelude to performing a GC check, so everything must
+be in a tidy and consistent state.
+
+rje: Note the slightly suble fixed point behaviour needed here
+
+\begin{code}
+initHeapUsage :: (VirtualHpOffset -> Code) -> Code
+initHeapUsage fcode
+ = do { orig_hp_usage <- getHpUsage
+ ; setHpUsage initHpUsage
+ ; fixC (\heap_usage2 -> do
+ { fcode (heapHWM heap_usage2)
+ ; getHpUsage })
+ ; setHpUsage orig_hp_usage }
+
+setVirtHp :: VirtualHpOffset -> Code
+setVirtHp new_virtHp
+ = do { hp_usage <- getHpUsage
+ ; setHpUsage (hp_usage {virtHp = new_virtHp}) }
+
+getVirtHp :: FCode VirtualHpOffset
+getVirtHp
+ = do { hp_usage <- getHpUsage
+ ; return (virtHp hp_usage) }
+
+setRealHp :: VirtualHpOffset -> Code
+setRealHp new_realHp
+ = do { hp_usage <- getHpUsage
+ ; setHpUsage (hp_usage {realHp = new_realHp}) }
+
+getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr
+getHpRelOffset virtual_offset
+ = do { hp_usg <- getHpUsage
+ ; return (cmmRegOffW hpReg (hpRel (realHp hp_usg) virtual_offset)) }
+\end{code}
+
+
+%************************************************************************
+%* *
+ Layout of heap objects
+%* *
+%************************************************************************
+
+\begin{code}
+layOutDynConstr, layOutStaticConstr
+ :: HomeModules
+ -> DataCon
+ -> [(CgRep,a)]
+ -> (ClosureInfo,
+ [(a,VirtualHpOffset)])
+
+layOutDynConstr = layOutConstr False
+layOutStaticConstr = layOutConstr True
+
+layOutConstr is_static hmods data_con args
+ = (mkConInfo hmods is_static data_con tot_wds ptr_wds,
+ things_w_offsets)
+ where
+ (tot_wds, -- #ptr_wds + #nonptr_wds
+ ptr_wds, -- #ptr_wds
+ things_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} args
+\end{code}
+
+@mkVirtHeapOffsets@ always returns boxed things with smaller offsets
+than the unboxed things, and furthermore, the offsets in the result
+list
+
+\begin{code}
+mkVirtHeapOffsets
+ :: Bool -- True <=> is a thunk
+ -> [(CgRep,a)] -- Things to make offsets for
+ -> (WordOff, -- _Total_ number of words allocated
+ WordOff, -- Number of words allocated for *pointers*
+ [(a, VirtualHpOffset)])
+ -- Things with their offsets from start of
+ -- object in order of increasing offset
+
+-- First in list gets lowest offset, which is initial offset + 1.
+
+mkVirtHeapOffsets is_thunk things
+ = let non_void_things = filterOut (isVoidArg . fst) things
+ (ptrs, non_ptrs) = separateByPtrFollowness non_void_things
+ (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs
+ (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs
+ in
+ (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets)
+ where
+ hdr_size | is_thunk = thunkHdrSize
+ | otherwise = fixedHdrSize
+
+ computeOffset wds_so_far (rep, thing)
+ = (wds_so_far + cgRepSizeW rep, (thing, hdr_size + wds_so_far))
+\end{code}
+
+
+%************************************************************************
+%* *
+ Lay out a static closure
+%* *
+%************************************************************************
+
+Make a static closure, adding on any extra padding needed for CAFs,
+and adding a static link field if necessary.
+
+\begin{code}
+mkStaticClosureFields
+ :: ClosureInfo
+ -> CostCentreStack
+ -> Bool -- Has CAF refs
+ -> [CmmLit] -- Payload
+ -> [CmmLit] -- The full closure
+mkStaticClosureFields cl_info ccs caf_refs payload
+ = mkStaticClosure info_lbl ccs payload padding_wds
+ static_link_field saved_info_field
+ where
+ info_lbl = infoTableLabelFromCI cl_info
+
+ -- CAFs must have consistent layout, regardless of whether they
+ -- are actually updatable or not. The layout of a CAF is:
+ --
+ -- 3 saved_info
+ -- 2 static_link
+ -- 1 indirectee
+ -- 0 info ptr
+ --
+ -- the static_link and saved_info fields must always be in the same
+ -- place. So we use closureNeedsUpdSpace rather than
+ -- closureUpdReqd here:
+
+ is_caf = closureNeedsUpdSpace cl_info
+
+ padding_wds
+ | not is_caf = []
+ | otherwise = ASSERT(null payload) [mkIntCLit 0]
+
+ static_link_field
+ | is_caf || staticClosureNeedsLink cl_info = [static_link_value]
+ | otherwise = []
+
+ saved_info_field
+ | is_caf = [mkIntCLit 0]
+ | otherwise = []
+
+ -- for a static constructor which has NoCafRefs, we set the
+ -- static link field to a non-zero value so the garbage
+ -- collector will ignore it.
+ static_link_value
+ | caf_refs = mkIntCLit 0
+ | otherwise = mkIntCLit 1
+
+
+mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit]
+ -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
+mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field
+ = [CmmLabel info_lbl]
+ ++ variable_header_words
+ ++ payload
+ ++ padding_wds
+ ++ static_link_field
+ ++ saved_info_field
+ where
+ variable_header_words
+ = staticGranHdr
+ ++ staticParHdr
+ ++ staticProfHdr ccs
+ ++ staticTickyHdr
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[CgHeapery-heap-overflow]{Heap overflow checking}
+%* *
+%************************************************************************
+
+The new code for heapChecks. For GrAnSim the code for doing a heap check
+and doing a context switch has been separated. Especially, the HEAP_CHK
+macro only performs a heap check. THREAD_CONTEXT_SWITCH should be used for
+doing a context switch. GRAN_FETCH_AND_RESCHEDULE must be put at the
+beginning of every slow entry code in order to simulate the fetching of
+closures. If fetching is necessary (i.e. current closure is not local) then
+an automatic context switch is done.
+
+--------------------------------------------------------------
+A heap/stack check at a function or thunk entry point.
+
+\begin{code}
+funEntryChecks :: ClosureInfo -> CmmStmts -> Code -> Code
+funEntryChecks cl_info reg_save_code code
+ = hpStkCheck cl_info True reg_save_code code
+
+thunkEntryChecks :: ClosureInfo -> Code -> Code
+thunkEntryChecks cl_info code
+ = hpStkCheck cl_info False noStmts code
+
+hpStkCheck :: ClosureInfo -- Function closure
+ -> Bool -- Is a function? (not a thunk)
+ -> CmmStmts -- Register saves
+ -> Code
+ -> Code
+
+hpStkCheck cl_info is_fun reg_save_code code
+ = getFinalStackHW $ \ spHw -> do
+ { sp <- getRealSp
+ ; let stk_words = spHw - sp
+ ; initHeapUsage $ \ hpHw -> do
+ { -- Emit heap checks, but be sure to do it lazily so
+ -- that the conditionals on hpHw don't cause a black hole
+ codeOnly $ do
+ { do_checks stk_words hpHw full_save_code rts_label
+ ; tickyAllocHeap hpHw }
+ ; setRealHp hpHw
+ ; code }
+ }
+ where
+ node_asst
+ | nodeMustPointToIt (closureLFInfo cl_info)
+ = noStmts
+ | otherwise
+ = oneStmt (CmmAssign nodeReg (CmmLit (CmmLabel closure_lbl)))
+ closure_lbl = closureLabelFromCI cl_info
+
+ full_save_code = node_asst `plusStmts` reg_save_code
+
+ rts_label | is_fun = CmmReg (CmmGlobal GCFun)
+ -- Function entry point
+ | otherwise = CmmReg (CmmGlobal GCEnter1)
+ -- Thunk or case return
+ -- In the thunk/case-return case, R1 points to a closure
+ -- which should be (re)-entered after GC
+\end{code}
+
+Heap checks in a case alternative are nice and easy, provided this is
+a bog-standard algebraic case. We have in our hand:
+
+ * one return address, on the stack,
+ * one return value, in Node.
+
+the canned code for this heap check failure just pushes Node on the
+stack, saying 'EnterGHC' to return. The scheduler will return by
+entering the top value on the stack, which in turn will return through
+the return address, getting us back to where we were. This is
+therefore only valid if the return value is *lifted* (just being
+boxed isn't good enough).
+
+For primitive returns, we have an unlifted value in some register
+(either R1 or FloatReg1 or DblReg1). This means using specialised
+heap-check code for these cases.
+
+\begin{code}
+altHeapCheck
+ :: AltType -- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt
+ -- (Unboxed tuples are dealt with by ubxTupleHeapCheck)
+ -> Code -- Continuation
+ -> Code
+altHeapCheck alt_type code
+ = initHeapUsage $ \ hpHw -> do
+ { codeOnly $ do
+ { do_checks 0 {- no stack chk -} hpHw
+ noStmts {- nothign to save -}
+ (rts_label alt_type)
+ ; tickyAllocHeap hpHw }
+ ; setRealHp hpHw
+ ; code }
+ where
+ rts_label PolyAlt = CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "stg_gc_unpt_r1")))
+ -- Do *not* enter R1 after a heap check in
+ -- a polymorphic case. It might be a function
+ -- and the entry code for a function (currently)
+ -- applies it
+ --
+ -- However R1 is guaranteed to be a pointer
+
+ rts_label (AlgAlt tc) = stg_gc_enter1
+ -- Enter R1 after the heap check; it's a pointer
+
+ rts_label (PrimAlt tc)
+ = CmmLit $ CmmLabel $
+ case primRepToCgRep (tyConPrimRep tc) of
+ VoidArg -> mkRtsCodeLabel SLIT( "stg_gc_noregs")
+ FloatArg -> mkRtsCodeLabel SLIT( "stg_gc_f1")
+ DoubleArg -> mkRtsCodeLabel SLIT( "stg_gc_d1")
+ LongArg -> mkRtsCodeLabel SLIT( "stg_gc_l1")
+ -- R1 is boxed but unlifted:
+ PtrArg -> mkRtsCodeLabel SLIT( "stg_gc_unpt_r1")
+ -- R1 is unboxed:
+ NonPtrArg -> mkRtsCodeLabel SLIT( "stg_gc_unbx_r1")
+
+ rts_label (UbxTupAlt _) = panic "altHeapCheck"
+\end{code}
+
+
+Unboxed tuple alternatives and let-no-escapes (the two most annoying
+constructs to generate code for!) For unboxed tuple returns, there
+are an arbitrary number of possibly unboxed return values, some of
+which will be in registers, and the others will be on the stack. We
+always organise the stack-resident fields into pointers &
+non-pointers, and pass the number of each to the heap check code.
+
+\begin{code}
+unbxTupleHeapCheck
+ :: [(Id, GlobalReg)] -- Live registers
+ -> WordOff -- no. of stack slots containing ptrs
+ -> WordOff -- no. of stack slots containing nonptrs
+ -> CmmStmts -- code to insert in the failure path
+ -> Code
+ -> Code
+
+unbxTupleHeapCheck regs ptrs nptrs fail_code code
+ -- We can't manage more than 255 pointers/non-pointers
+ -- in a generic heap check.
+ | ptrs > 255 || nptrs > 255 = panic "altHeapCheck"
+ | otherwise
+ = initHeapUsage $ \ hpHw -> do
+ { codeOnly $ do { do_checks 0 {- no stack check -} hpHw
+ full_fail_code rts_label
+ ; tickyAllocHeap hpHw }
+ ; setRealHp hpHw
+ ; code }
+ where
+ full_fail_code = fail_code `plusStmts` oneStmt assign_liveness
+ assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho!
+ (CmmLit (mkWordCLit liveness))
+ liveness = mkRegLiveness regs ptrs nptrs
+ rts_label = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("stg_gc_ut")))
+
+\end{code}
+
+
+%************************************************************************
+%* *
+ Heap/Stack Checks.
+%* *
+%************************************************************************
+
+When failing a check, we save a return address on the stack and
+jump to a pre-compiled code fragment that saves the live registers
+and returns to the scheduler.
+
+The return address in most cases will be the beginning of the basic
+block in which the check resides, since we need to perform the check
+again on re-entry because someone else might have stolen the resource
+in the meantime.
+
+\begin{code}
+do_checks :: WordOff -- Stack headroom
+ -> WordOff -- Heap headroom
+ -> CmmStmts -- Assignments to perform on failure
+ -> CmmExpr -- Rts address to jump to on failure
+ -> Code
+do_checks 0 0 _ _ = nopC
+do_checks stk hp reg_save_code rts_lbl
+ = do_checks' (CmmLit (mkIntCLit (stk*wORD_SIZE)))
+ (CmmLit (mkIntCLit (hp*wORD_SIZE)))
+ (stk /= 0) (hp /= 0) reg_save_code rts_lbl
+
+-- The offsets are now in *bytes*
+do_checks' stk_expr hp_expr stk_nonzero hp_nonzero reg_save_code rts_lbl
+ = do { doGranAllocate hp_expr
+
+ -- Emit a block for the heap-check-failure code
+ ; blk_id <- forkLabelledCode $ do
+ { whenC hp_nonzero $
+ stmtC (CmmAssign (CmmGlobal HpAlloc) hp_expr)
+ ; emitStmts reg_save_code
+ ; stmtC (CmmJump rts_lbl []) }
+
+ -- Check for stack overflow *FIRST*; otherwise
+ -- we might bumping Hp and then failing stack oflo
+ ; whenC stk_nonzero
+ (stmtC (CmmCondBranch stk_oflo blk_id))
+
+ ; whenC hp_nonzero
+ (stmtsC [CmmAssign hpReg
+ (cmmOffsetExprB (CmmReg hpReg) hp_expr),
+ CmmCondBranch hp_oflo blk_id])
+ -- Bump heap pointer, and test for heap exhaustion
+ -- Note that we don't move the heap pointer unless the
+ -- stack check succeeds. Otherwise we might end up
+ -- with slop at the end of the current block, which can
+ -- confuse the LDV profiler.
+ }
+ where
+ -- Stk overflow if (Sp - stk_bytes < SpLim)
+ stk_oflo = CmmMachOp mo_wordULt
+ [CmmMachOp mo_wordSub [CmmReg spReg, stk_expr],
+ CmmReg (CmmGlobal SpLim)]
+
+ -- Hp overflow if (Hpp > HpLim)
+ -- (Hp has been incremented by now)
+ -- HpLim points to the LAST WORD of valid allocation space.
+ hp_oflo = CmmMachOp mo_wordUGt
+ [CmmReg hpReg, CmmReg (CmmGlobal HpLim)]
+\end{code}
+
+%************************************************************************
+%* *
+ Generic Heap/Stack Checks - used in the RTS
+%* *
+%************************************************************************
+
+\begin{code}
+hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code
+hpChkGen bytes liveness reentry
+ = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assigns stg_gc_gen
+ where
+ assigns = mkStmts [
+ CmmAssign (CmmGlobal (VanillaReg 9)) liveness,
+ CmmAssign (CmmGlobal (VanillaReg 10)) reentry
+ ]
+
+-- a heap check where R1 points to the closure to enter on return, and
+-- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP).
+hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> Code
+hpChkNodePointsAssignSp0 bytes sp0
+ = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assign stg_gc_enter1
+ where assign = oneStmt (CmmStore (CmmReg spReg) sp0)
+
+stkChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code
+stkChkGen bytes liveness reentry
+ = do_checks' bytes (CmmLit (mkIntCLit 0)) True False assigns stg_gc_gen
+ where
+ assigns = mkStmts [
+ CmmAssign (CmmGlobal (VanillaReg 9)) liveness,
+ CmmAssign (CmmGlobal (VanillaReg 10)) reentry
+ ]
+
+stkChkNodePoints :: CmmExpr -> Code
+stkChkNodePoints bytes
+ = do_checks' bytes (CmmLit (mkIntCLit 0)) True False noStmts stg_gc_enter1
+
+stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("stg_gc_gen")))
+stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[initClosure]{Initialise a dynamic closure}
+%* *
+%************************************************************************
+
+@allocDynClosure@ puts the thing in the heap, and modifies the virtual Hp
+to account for this.
+
+\begin{code}
+allocDynClosure
+ :: ClosureInfo
+ -> CmmExpr -- Cost Centre to stick in the object
+ -> CmmExpr -- Cost Centre to blame for this alloc
+ -- (usually the same; sometimes "OVERHEAD")
+
+ -> [(CmmExpr, VirtualHpOffset)] -- Offsets from start of the object
+ -- ie Info ptr has offset zero.
+ -> FCode VirtualHpOffset -- Returns virt offset of object
+
+allocDynClosure cl_info use_cc blame_cc amodes_with_offsets
+ = do { virt_hp <- getVirtHp
+
+ -- FIND THE OFFSET OF THE INFO-PTR WORD
+ ; let info_offset = virt_hp + 1
+ -- info_offset is the VirtualHpOffset of the first
+ -- word of the new object
+ -- Remember, virtHp points to last allocated word,
+ -- ie 1 *before* the info-ptr word of new object.
+
+ info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info))
+ hdr_w_offsets = initDynHdr info_ptr use_cc `zip` [0..]
+
+ -- SAY WHAT WE ARE ABOUT TO DO
+ ; profDynAlloc cl_info use_cc
+ -- ToDo: This is almost certainly wrong
+ -- We're ignoring blame_cc. But until we've
+ -- fixed the boxing hack in chooseDynCostCentres etc,
+ -- we're worried about making things worse by "fixing"
+ -- this part to use blame_cc!
+
+ ; tickyDynAlloc cl_info
+
+ -- ALLOCATE THE OBJECT
+ ; base <- getHpRelOffset info_offset
+ ; hpStore base (hdr_w_offsets ++ amodes_with_offsets)
+
+ -- BUMP THE VIRTUAL HEAP POINTER
+ ; setVirtHp (virt_hp + closureSize cl_info)
+
+ -- RETURN PTR TO START OF OBJECT
+ ; returnFC info_offset }
+
+
+initDynHdr :: CmmExpr
+ -> CmmExpr -- Cost centre to put in object
+ -> [CmmExpr]
+initDynHdr info_ptr cc
+ = [info_ptr]
+ -- ToDo: Gransim stuff
+ -- ToDo: Parallel stuff
+ ++ dynProfHdr cc
+ -- No ticky header
+
+hpStore :: CmmExpr -> [(CmmExpr, VirtualHpOffset)] -> Code
+-- Store the item (expr,off) in base[off]
+hpStore base es
+ = stmtsC [ CmmStore (cmmOffsetW base off) val
+ | (val, off) <- es ]
+
+emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> Code
+emitSetDynHdr base info_ptr ccs
+ = hpStore base (zip (initDynHdr info_ptr ccs) [0..])
+\end{code}
diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs
new file mode 100644
index 0000000000..b769950d87
--- /dev/null
+++ b/compiler/codeGen/CgInfoTbls.hs
@@ -0,0 +1,591 @@
+-----------------------------------------------------------------------------
+--
+-- Building info tables.
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+module CgInfoTbls (
+ emitClosureCodeAndInfoTable,
+ emitInfoTableAndCode,
+ dataConTagZ,
+ getSRTInfo,
+ emitDirectReturnTarget, emitAlgReturnTarget,
+ emitDirectReturnInstr, emitVectoredReturnInstr,
+ mkRetInfoTable,
+ mkStdInfoTable,
+ stdInfoTableSizeB,
+ mkFunGenInfoExtraBits,
+ entryCode, closureInfoPtr,
+ getConstrTag,
+ infoTable, infoTableClosureType,
+ infoTablePtrs, infoTableNonPtrs,
+ funInfoTable,
+ retVec
+ ) where
+
+
+#include "HsVersions.h"
+
+import ClosureInfo ( ClosureInfo, closureTypeDescr, closureName,
+ infoTableLabelFromCI, Liveness,
+ closureValDescr, closureSRT, closureSMRep,
+ closurePtrsSize, closureNonHdrSize, closureFunInfo,
+ C_SRT(..), needsSRT, isConstrClosure_maybe,
+ ArgDescr(..) )
+import SMRep ( StgHalfWord, hALF_WORD_SIZE_IN_BITS, hALF_WORD_SIZE,
+ WordOff, ByteOff,
+ smRepClosureTypeInt, tablesNextToCode,
+ rET_BIG, rET_SMALL, rET_VEC_BIG, rET_VEC_SMALL )
+import CgBindery ( getLiveStackSlots )
+import CgCallConv ( isBigLiveness, mkLivenessCLit, buildContLiveness,
+ argDescrType, getSequelAmode,
+ CtrlReturnConvention(..) )
+import CgUtils ( mkStringCLit, packHalfWordsCLit, mkWordCLit,
+ cmmOffsetB, cmmOffsetExprW, cmmLabelOffW, cmmOffsetW,
+ emitDataLits, emitRODataLits, emitSwitch, cmmNegate,
+ newTemp )
+import CgMonad
+
+import CmmUtils ( mkIntCLit, zeroCLit )
+import Cmm ( CmmStmt(..), CmmExpr(..), CmmLit(..), LocalReg,
+ CmmBasicBlock, nodeReg )
+import MachOp ( MachOp(..), wordRep, halfWordRep )
+import CLabel
+import StgSyn ( SRT(..) )
+import Name ( Name )
+import DataCon ( DataCon, dataConTag, fIRST_TAG )
+import Unique ( Uniquable(..) )
+import DynFlags ( DynFlags(..), HscTarget(..) )
+import StaticFlags ( opt_SccProfilingOn )
+import ListSetOps ( assocDefault )
+import Maybes ( isJust )
+import Constants ( wORD_SIZE, sIZEOF_StgFunInfoExtraRev )
+import Outputable
+
+
+-------------------------------------------------------------------------
+--
+-- Generating the info table and code for a closure
+--
+-------------------------------------------------------------------------
+
+-- Here we make a concrete info table, represented as a list of CmmAddr
+-- (it can't be simply a list of Word, because the SRT field is
+-- represented by a label+offset expression).
+
+-- With tablesNextToCode, the layout is
+-- <reversed variable part>
+-- <normal forward StgInfoTable, but without
+-- an entry point at the front>
+-- <code>
+--
+-- Without tablesNextToCode, the layout of an info table is
+-- <entry label>
+-- <normal forward rest of StgInfoTable>
+-- <forward variable part>
+--
+-- See includes/InfoTables.h
+
+emitClosureCodeAndInfoTable :: ClosureInfo -> [LocalReg] -> CgStmts -> Code
+emitClosureCodeAndInfoTable cl_info args body
+ = do { ty_descr_lit <-
+ if opt_SccProfilingOn
+ then mkStringCLit (closureTypeDescr cl_info)
+ else return (mkIntCLit 0)
+ ; cl_descr_lit <-
+ if opt_SccProfilingOn
+ then mkStringCLit cl_descr_string
+ else return (mkIntCLit 0)
+ ; let std_info = mkStdInfoTable ty_descr_lit cl_descr_lit
+ cl_type srt_len layout_lit
+
+ ; blks <- cgStmtsToBlocks body
+ ; emitInfoTableAndCode info_lbl std_info extra_bits args blks }
+ where
+ info_lbl = infoTableLabelFromCI cl_info
+
+ cl_descr_string = closureValDescr cl_info
+ cl_type = smRepClosureTypeInt (closureSMRep cl_info)
+
+ srt = closureSRT cl_info
+ needs_srt = needsSRT srt
+
+ mb_con = isConstrClosure_maybe cl_info
+ is_con = isJust mb_con
+
+ (srt_label,srt_len)
+ = case mb_con of
+ Just con -> -- Constructors don't have an SRT
+ -- We keep the *zero-indexed* tag in the srt_len
+ -- field of the info table.
+ (mkIntCLit 0, fromIntegral (dataConTagZ con))
+
+ Nothing -> -- Not a constructor
+ srtLabelAndLength srt info_lbl
+
+ ptrs = closurePtrsSize cl_info
+ nptrs = size - ptrs
+ size = closureNonHdrSize cl_info
+ layout_lit = packHalfWordsCLit ptrs nptrs
+
+ extra_bits
+ | is_fun = fun_extra_bits
+ | is_con = []
+ | needs_srt = [srt_label]
+ | otherwise = []
+
+ maybe_fun_stuff = closureFunInfo cl_info
+ is_fun = isJust maybe_fun_stuff
+ (Just (arity, arg_descr)) = maybe_fun_stuff
+
+ fun_extra_bits
+ | ArgGen liveness <- arg_descr
+ = [ fun_amode,
+ srt_label,
+ makeRelativeRefTo info_lbl $ mkLivenessCLit liveness,
+ slow_entry ]
+ | needs_srt = [fun_amode, srt_label]
+ | otherwise = [fun_amode]
+
+ slow_entry = makeRelativeRefTo info_lbl (CmmLabel slow_entry_label)
+ slow_entry_label = mkSlowEntryLabel (closureName cl_info)
+
+ fun_amode = packHalfWordsCLit fun_type arity
+ fun_type = argDescrType arg_descr
+
+-- We keep the *zero-indexed* tag in the srt_len field of the info
+-- table of a data constructor.
+dataConTagZ :: DataCon -> ConTagZ
+dataConTagZ con = dataConTag con - fIRST_TAG
+
+-- A low-level way to generate the variable part of a fun-style info table.
+-- (must match fun_extra_bits above). Used by the C-- parser.
+mkFunGenInfoExtraBits :: Int -> Int -> CmmLit -> CmmLit -> CmmLit -> [CmmLit]
+mkFunGenInfoExtraBits fun_type arity srt_label liveness slow_entry
+ = [ packHalfWordsCLit fun_type arity,
+ srt_label,
+ liveness,
+ slow_entry ]
+
+-------------------------------------------------------------------------
+--
+-- Generating the info table and code for a return point
+--
+-------------------------------------------------------------------------
+
+-- Here's the layout of a return-point info table
+--
+-- Tables next to code:
+--
+-- <reversed vector table>
+-- <srt slot>
+-- <standard info table>
+-- ret-addr --> <entry code (if any)>
+--
+-- Not tables-next-to-code:
+--
+-- ret-addr --> <ptr to entry code>
+-- <standard info table>
+-- <srt slot>
+-- <forward vector table>
+--
+-- * The vector table is only present for vectored returns
+--
+-- * The SRT slot is only there if either
+-- (a) there is SRT info to record, OR
+-- (b) if the return is vectored
+-- The latter (b) is necessary so that the vector is in a
+-- predictable place
+
+vectorSlot :: CmmExpr -> CmmExpr -> CmmExpr
+-- Get the vector slot from the info pointer
+vectorSlot info_amode zero_indexed_tag
+ | tablesNextToCode
+ = cmmOffsetExprW (cmmOffsetW info_amode (- (stdInfoTableSizeW + 2)))
+ (cmmNegate zero_indexed_tag)
+ -- The "2" is one for the SRT slot, and one more
+ -- to get to the first word of the vector
+
+ | otherwise
+ = cmmOffsetExprW (cmmOffsetW info_amode (stdInfoTableSizeW + 2))
+ zero_indexed_tag
+ -- The "2" is one for the entry-code slot and one for the SRT slot
+
+retVec :: CmmExpr -> CmmExpr -> CmmExpr
+-- Get a return vector from the info pointer
+retVec info_amode zero_indexed_tag
+ = let slot = vectorSlot info_amode zero_indexed_tag
+ tableEntry = CmmLoad slot wordRep
+ in if tablesNextToCode
+ then CmmMachOp (MO_Add wordRep) [tableEntry, info_amode]
+ else tableEntry
+
+emitReturnTarget
+ :: Name
+ -> CgStmts -- The direct-return code (if any)
+ -- (empty for vectored returns)
+ -> [CmmLit] -- Vector of return points
+ -- (empty for non-vectored returns)
+ -> SRT
+ -> FCode CLabel
+emitReturnTarget name stmts vector srt
+ = do { live_slots <- getLiveStackSlots
+ ; liveness <- buildContLiveness name live_slots
+ ; srt_info <- getSRTInfo name srt
+
+ ; let
+ cl_type = case (null vector, isBigLiveness liveness) of
+ (True, True) -> rET_BIG
+ (True, False) -> rET_SMALL
+ (False, True) -> rET_VEC_BIG
+ (False, False) -> rET_VEC_SMALL
+
+ (std_info, extra_bits) =
+ mkRetInfoTable info_lbl liveness srt_info cl_type vector
+
+ ; blks <- cgStmtsToBlocks stmts
+ ; emitInfoTableAndCode info_lbl std_info extra_bits args blks
+ ; return info_lbl }
+ where
+ args = {- trace "emitReturnTarget: missing args" -} []
+ uniq = getUnique name
+ info_lbl = mkReturnInfoLabel uniq
+
+
+mkRetInfoTable
+ :: CLabel -- info label
+ -> Liveness -- liveness
+ -> C_SRT -- SRT Info
+ -> Int -- type (eg. rET_SMALL)
+ -> [CmmLit] -- vector
+ -> ([CmmLit],[CmmLit])
+mkRetInfoTable info_lbl liveness srt_info cl_type vector
+ = (std_info, extra_bits)
+ where
+ (srt_label, srt_len) = srtLabelAndLength srt_info info_lbl
+
+ srt_slot | need_srt = [srt_label]
+ | otherwise = []
+
+ need_srt = needsSRT srt_info || not (null vector)
+ -- If there's a vector table then we must allocate
+ -- an SRT slot, so that the vector table is at a
+ -- known offset from the info pointer
+
+ liveness_lit = makeRelativeRefTo info_lbl $ mkLivenessCLit liveness
+ std_info = mkStdInfoTable zeroCLit zeroCLit cl_type srt_len liveness_lit
+ extra_bits = srt_slot ++ map (makeRelativeRefTo info_lbl) vector
+
+
+emitDirectReturnTarget
+ :: Name
+ -> CgStmts -- The direct-return code
+ -> SRT
+ -> FCode CLabel
+emitDirectReturnTarget name code srt
+ = emitReturnTarget name code [] srt
+
+emitAlgReturnTarget
+ :: Name -- Just for its unique
+ -> [(ConTagZ, CgStmts)] -- Tagged branches
+ -> Maybe CgStmts -- Default branch (if any)
+ -> SRT -- Continuation's SRT
+ -> CtrlReturnConvention
+ -> FCode (CLabel, SemiTaggingStuff)
+
+emitAlgReturnTarget name branches mb_deflt srt ret_conv
+ = case ret_conv of
+ UnvectoredReturn fam_sz -> do
+ { blks <- getCgStmts $
+ emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
+ -- NB: tag_expr is zero-based
+ ; lbl <- emitDirectReturnTarget name blks srt
+ ; return (lbl, Nothing) }
+ -- Nothing: the internal branches in the switch don't have
+ -- global labels, so we can't use them at the 'call site'
+
+ VectoredReturn fam_sz -> do
+ { let tagged_lbls = zip (map fst branches) $
+ map (CmmLabel . mkAltLabel uniq . fst) branches
+ deflt_lbl | isJust mb_deflt = CmmLabel $ mkDefaultLabel uniq
+ | otherwise = mkIntCLit 0
+ ; let vector = [ assocDefault deflt_lbl tagged_lbls i
+ | i <- [0..fam_sz-1]]
+ ; lbl <- emitReturnTarget name noCgStmts vector srt
+ ; mapFCs emit_alt branches
+ ; emit_deflt mb_deflt
+ ; return (lbl, Just (tagged_lbls, deflt_lbl)) }
+ where
+ uniq = getUnique name
+ tag_expr = getConstrTag (CmmReg nodeReg)
+
+ emit_alt :: (Int, CgStmts) -> FCode (Int, CmmLit)
+ -- Emit the code for the alternative as a top-level
+ -- code block returning a label for it
+ emit_alt (tag, stmts) = do { let lbl = mkAltLabel uniq tag
+ ; blks <- cgStmtsToBlocks stmts
+ ; emitProc [] lbl [] blks
+ ; return (tag, CmmLabel lbl) }
+
+ emit_deflt (Just stmts) = do { let lbl = mkDefaultLabel uniq
+ ; blks <- cgStmtsToBlocks stmts
+ ; emitProc [] lbl [] blks
+ ; return (CmmLabel lbl) }
+ emit_deflt Nothing = return (mkIntCLit 0)
+ -- Nothing case: the simplifier might have eliminated a case
+ -- so we may have e.g. case xs of
+ -- [] -> e
+ -- In that situation the default should never be taken,
+ -- so we just use a NULL pointer
+
+--------------------------------
+emitDirectReturnInstr :: Code
+emitDirectReturnInstr
+ = do { info_amode <- getSequelAmode
+ ; stmtC (CmmJump (entryCode info_amode) []) }
+
+emitVectoredReturnInstr :: CmmExpr -- _Zero-indexed_ constructor tag
+ -> Code
+emitVectoredReturnInstr zero_indexed_tag
+ = do { info_amode <- getSequelAmode
+ -- HACK! assign info_amode to a temp, because retVec
+ -- uses it twice and the NCG doesn't have any CSE yet.
+ -- Only do this for the NCG, because gcc is too stupid
+ -- to optimise away the extra tmp (grrr).
+ ; dflags <- getDynFlags
+ ; x <- if hscTarget dflags == HscAsm
+ then do z <- newTemp wordRep
+ stmtC (CmmAssign z info_amode)
+ return (CmmReg z)
+ else
+ return info_amode
+ ; let target = retVec x zero_indexed_tag
+ ; stmtC (CmmJump target []) }
+
+
+-------------------------------------------------------------------------
+--
+-- Generating a standard info table
+--
+-------------------------------------------------------------------------
+
+-- The standard bits of an info table. This part of the info table
+-- corresponds to the StgInfoTable type defined in InfoTables.h.
+--
+-- Its shape varies with ticky/profiling/tables next to code etc
+-- so we can't use constant offsets from Constants
+
+mkStdInfoTable
+ :: CmmLit -- closure type descr (profiling)
+ -> CmmLit -- closure descr (profiling)
+ -> Int -- closure type
+ -> StgHalfWord -- SRT length
+ -> CmmLit -- layout field
+ -> [CmmLit]
+
+mkStdInfoTable type_descr closure_descr cl_type srt_len layout_lit
+ = -- Parallel revertible-black hole field
+ prof_info
+ -- Ticky info (none at present)
+ -- Debug info (none at present)
+ ++ [layout_lit, type_lit]
+
+ where
+ prof_info
+ | opt_SccProfilingOn = [type_descr, closure_descr]
+ | otherwise = []
+
+ type_lit = packHalfWordsCLit cl_type srt_len
+
+stdInfoTableSizeW :: WordOff
+-- The size of a standard info table varies with profiling/ticky etc,
+-- so we can't get it from Constants
+-- It must vary in sync with mkStdInfoTable
+stdInfoTableSizeW
+ = size_fixed + size_prof
+ where
+ size_fixed = 2 -- layout, type
+ size_prof | opt_SccProfilingOn = 2
+ | otherwise = 0
+
+stdInfoTableSizeB = stdInfoTableSizeW * wORD_SIZE :: ByteOff
+
+stdSrtBitmapOffset :: ByteOff
+-- Byte offset of the SRT bitmap half-word which is
+-- in the *higher-addressed* part of the type_lit
+stdSrtBitmapOffset = stdInfoTableSizeB - hALF_WORD_SIZE
+
+stdClosureTypeOffset :: ByteOff
+-- Byte offset of the closure type half-word
+stdClosureTypeOffset = stdInfoTableSizeB - wORD_SIZE
+
+stdPtrsOffset, stdNonPtrsOffset :: ByteOff
+stdPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE
+stdNonPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE + hALF_WORD_SIZE
+
+-------------------------------------------------------------------------
+--
+-- Accessing fields of an info table
+--
+-------------------------------------------------------------------------
+
+closureInfoPtr :: CmmExpr -> CmmExpr
+-- Takes a closure pointer and returns the info table pointer
+closureInfoPtr e = CmmLoad e wordRep
+
+entryCode :: CmmExpr -> CmmExpr
+-- Takes an info pointer (the first word of a closure)
+-- and returns its entry code
+entryCode e | tablesNextToCode = e
+ | otherwise = CmmLoad e wordRep
+
+getConstrTag :: CmmExpr -> CmmExpr
+-- Takes a closure pointer, and return the *zero-indexed*
+-- constructor tag obtained from the info table
+-- This lives in the SRT field of the info table
+-- (constructors don't need SRTs).
+getConstrTag closure_ptr
+ = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableConstrTag info_table]
+ where
+ info_table = infoTable (closureInfoPtr closure_ptr)
+
+infoTable :: CmmExpr -> CmmExpr
+-- Takes an info pointer (the first word of a closure)
+-- and returns a pointer to the first word of the standard-form
+-- info table, excluding the entry-code word (if present)
+infoTable info_ptr
+ | tablesNextToCode = cmmOffsetB info_ptr (- stdInfoTableSizeB)
+ | otherwise = cmmOffsetW info_ptr 1 -- Past the entry code pointer
+
+infoTableConstrTag :: CmmExpr -> CmmExpr
+-- Takes an info table pointer (from infoTable) and returns the constr tag
+-- field of the info table (same as the srt_bitmap field)
+infoTableConstrTag = infoTableSrtBitmap
+
+infoTableSrtBitmap :: CmmExpr -> CmmExpr
+-- Takes an info table pointer (from infoTable) and returns the srt_bitmap
+-- field of the info table
+infoTableSrtBitmap info_tbl
+ = CmmLoad (cmmOffsetB info_tbl stdSrtBitmapOffset) halfWordRep
+
+infoTableClosureType :: CmmExpr -> CmmExpr
+-- Takes an info table pointer (from infoTable) and returns the closure type
+-- field of the info table.
+infoTableClosureType info_tbl
+ = CmmLoad (cmmOffsetB info_tbl stdClosureTypeOffset) halfWordRep
+
+infoTablePtrs :: CmmExpr -> CmmExpr
+infoTablePtrs info_tbl
+ = CmmLoad (cmmOffsetB info_tbl stdPtrsOffset) halfWordRep
+
+infoTableNonPtrs :: CmmExpr -> CmmExpr
+infoTableNonPtrs info_tbl
+ = CmmLoad (cmmOffsetB info_tbl stdNonPtrsOffset) halfWordRep
+
+funInfoTable :: CmmExpr -> CmmExpr
+-- Takes the info pointer of a function,
+-- and returns a pointer to the first word of the StgFunInfoExtra struct
+-- in the info table.
+funInfoTable info_ptr
+ | tablesNextToCode
+ = cmmOffsetB info_ptr (- stdInfoTableSizeB - sIZEOF_StgFunInfoExtraRev)
+ | otherwise
+ = cmmOffsetW info_ptr (1 + stdInfoTableSizeW)
+ -- Past the entry code pointer
+
+-------------------------------------------------------------------------
+--
+-- Emit the code for a closure (or return address)
+-- and its associated info table
+--
+-------------------------------------------------------------------------
+
+-- The complication here concerns whether or not we can
+-- put the info table next to the code
+
+emitInfoTableAndCode
+ :: CLabel -- Label of info table
+ -> [CmmLit] -- ...its invariant part
+ -> [CmmLit] -- ...and its variant part
+ -> [LocalReg] -- ...args
+ -> [CmmBasicBlock] -- ...and body
+ -> Code
+
+emitInfoTableAndCode info_lbl std_info extra_bits args blocks
+ | tablesNextToCode -- Reverse the extra_bits; and emit the top-level proc
+ = emitProc (reverse extra_bits ++ std_info)
+ entry_lbl args blocks
+ -- NB: the info_lbl is discarded
+
+ | null blocks -- No actual code; only the info table is significant
+ = -- Use a zero place-holder in place of the
+ -- entry-label in the info table
+ emitRODataLits info_lbl (zeroCLit : std_info ++ extra_bits)
+
+ | otherwise -- Separately emit info table (with the function entry
+ = -- point as first entry) and the entry code
+ do { emitDataLits info_lbl (CmmLabel entry_lbl : std_info ++ extra_bits)
+ ; emitProc [] entry_lbl args blocks }
+
+ where
+ entry_lbl = infoLblToEntryLbl info_lbl
+
+-------------------------------------------------------------------------
+--
+-- Static reference tables
+--
+-------------------------------------------------------------------------
+
+-- There is just one SRT for each top level binding; all the nested
+-- bindings use sub-sections of this SRT. The label is passed down to
+-- the nested bindings via the monad.
+
+getSRTInfo :: Name -> SRT -> FCode C_SRT
+getSRTInfo id NoSRT = return NoC_SRT
+getSRTInfo id (SRT off len bmp)
+ | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape]
+ = do { srt_lbl <- getSRTLabel
+ ; let srt_desc_lbl = mkSRTDescLabel id
+ ; emitRODataLits srt_desc_lbl
+ ( cmmLabelOffW srt_lbl off
+ : mkWordCLit (fromIntegral len)
+ : map mkWordCLit bmp)
+ ; return (C_SRT srt_desc_lbl 0 srt_escape) }
+
+ | otherwise
+ = do { srt_lbl <- getSRTLabel
+ ; return (C_SRT srt_lbl off (fromIntegral (head bmp))) }
+ -- The fromIntegral converts to StgHalfWord
+
+srt_escape = (-1) :: StgHalfWord
+
+srtLabelAndLength :: C_SRT -> CLabel -> (CmmLit, StgHalfWord)
+srtLabelAndLength NoC_SRT _
+ = (zeroCLit, 0)
+srtLabelAndLength (C_SRT lbl off bitmap) info_lbl
+ = (makeRelativeRefTo info_lbl $ cmmLabelOffW lbl off, bitmap)
+
+-------------------------------------------------------------------------
+--
+-- Position independent code
+--
+-------------------------------------------------------------------------
+-- In order to support position independent code, we mustn't put absolute
+-- references into read-only space. Info tables in the tablesNextToCode
+-- case must be in .text, which is read-only, so we doctor the CmmLits
+-- to use relative offsets instead.
+
+-- Note that this is done even when the -fPIC flag is not specified,
+-- as we want to keep binary compatibility between PIC and non-PIC.
+
+makeRelativeRefTo :: CLabel -> CmmLit -> CmmLit
+
+makeRelativeRefTo info_lbl (CmmLabel lbl)
+ | tablesNextToCode
+ = CmmLabelDiffOff lbl info_lbl 0
+makeRelativeRefTo info_lbl (CmmLabelOff lbl off)
+ | tablesNextToCode
+ = CmmLabelDiffOff lbl info_lbl off
+makeRelativeRefTo _ lit = lit
diff --git a/compiler/codeGen/CgLetNoEscape.lhs b/compiler/codeGen/CgLetNoEscape.lhs
new file mode 100644
index 0000000000..39860f4ee0
--- /dev/null
+++ b/compiler/codeGen/CgLetNoEscape.lhs
@@ -0,0 +1,212 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1993-1998
+%
+% $Id: CgLetNoEscape.lhs,v 1.26 2004/09/30 10:35:47 simonpj Exp $
+%
+%********************************************************
+%* *
+\section[CgLetNoEscape]{Handling ``let-no-escapes''}
+%* *
+%********************************************************
+
+\begin{code}
+module CgLetNoEscape ( cgLetNoEscapeClosure ) where
+
+#include "HsVersions.h"
+
+import {-# SOURCE #-} CgExpr ( cgExpr )
+
+import StgSyn
+import CgMonad
+
+import CgBindery ( CgIdInfo, letNoEscapeIdInfo, nukeDeadBindings )
+import CgCase ( restoreCurrentCostCentre )
+import CgCon ( bindUnboxedTupleComponents )
+import CgHeapery ( unbxTupleHeapCheck )
+import CgInfoTbls ( emitDirectReturnTarget )
+import CgStackery ( allocStackTop, deAllocStackTop, getSpRelOffset )
+import Cmm ( CmmStmt(..) )
+import CmmUtils ( mkLblExpr, oneStmt )
+import CLabel ( mkReturnInfoLabel )
+import ClosureInfo ( mkLFLetNoEscape )
+import CostCentre ( CostCentreStack )
+import Id ( Id, idName )
+import Var ( idUnique )
+import SMRep ( retAddrSizeW )
+import BasicTypes ( RecFlag(..) )
+import Outputable
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[what-is-non-escaping]{What {\em is} a ``non-escaping let''?}
+%* *
+%************************************************************************
+
+[The {\em code} that detects these things is elsewhere.]
+
+Consider:
+\begin{verbatim}
+ let x = fvs \ args -> e
+ in
+ if ... then x else
+ if ... then x else ...
+\end{verbatim}
+@x@ is used twice (so we probably can't unfold it), but when it is
+entered, the stack is deeper than it was when the definition of @x@
+happened. Specifically, if instead of allocating a closure for @x@,
+we saved all @x@'s fvs on the stack, and remembered the stack depth at
+that moment, then whenever we enter @x@ we can simply set the stack
+pointer(s) to these remembered (compile-time-fixed) values, and jump
+to the code for @x@.
+
+All of this is provided x is:
+\begin{enumerate}
+\item
+non-updatable;
+\item
+guaranteed to be entered before the stack retreats -- ie x is not
+buried in a heap-allocated closure, or passed as an argument to something;
+\item
+all the enters have exactly the right number of arguments,
+no more no less;
+\item
+all the enters are tail calls; that is, they return to the
+caller enclosing the definition of @x@.
+\end{enumerate}
+
+Under these circumstances we say that @x@ is {\em non-escaping}.
+
+An example of when (4) does {\em not} hold:
+\begin{verbatim}
+ let x = ...
+ in case x of ...alts...
+\end{verbatim}
+
+Here, @x@ is certainly entered only when the stack is deeper than when
+@x@ is defined, but here it must return to \tr{...alts...} So we can't
+just adjust the stack down to @x@'s recalled points, because that
+would lost @alts@' context.
+
+Things can get a little more complicated. Consider:
+\begin{verbatim}
+ let y = ...
+ in let x = fvs \ args -> ...y...
+ in ...x...
+\end{verbatim}
+
+Now, if @x@ is used in a non-escaping way in \tr{...x...}, {\em and}
+@y@ is used in a non-escaping way in \tr{...y...}, {\em then} @y@ is
+non-escaping.
+
+@x@ can even be recursive! Eg:
+\begin{verbatim}
+ letrec x = [y] \ [v] -> if v then x True else ...
+ in
+ ...(x b)...
+\end{verbatim}
+
+
+%************************************************************************
+%* *
+\subsection[codeGen-for-non-escaping]{Generating code for a ``non-escaping let''}
+%* *
+%************************************************************************
+
+
+Generating code for this is fun. It is all very very similar to what
+we do for a case expression. The duality is between
+\begin{verbatim}
+ let-no-escape x = b
+ in e
+\end{verbatim}
+and
+\begin{verbatim}
+ case e of ... -> b
+\end{verbatim}
+
+That is, the RHS of @x@ (ie @b@) will execute {\em later}, just like
+the alternative of the case; it needs to be compiled in an environment
+in which all volatile bindings are forgotten, and the free vars are
+bound only to stable things like stack locations.. The @e@ part will
+execute {\em next}, just like the scrutinee of a case.
+
+First, we need to save all @x@'s free vars
+on the stack, if they aren't there already.
+
+\begin{code}
+cgLetNoEscapeClosure
+ :: Id -- binder
+ -> CostCentreStack -- NB: *** NOT USED *** ToDo (WDP 94/06)
+ -> StgBinderInfo -- NB: ditto
+ -> SRT
+ -> StgLiveVars -- variables live in RHS, including the binders
+ -- themselves in the case of a recursive group
+ -> EndOfBlockInfo -- where are we going to?
+ -> Maybe VirtualSpOffset -- Slot for current cost centre
+ -> RecFlag -- is the binding recursive?
+ -> [Id] -- args (as in \ args -> body)
+ -> StgExpr -- body (as in above)
+ -> FCode (Id, CgIdInfo)
+
+-- ToDo: deal with the cost-centre issues
+
+cgLetNoEscapeClosure
+ bndr cc binder_info srt full_live_in_rhss
+ rhs_eob_info cc_slot rec args body
+ = let
+ arity = length args
+ lf_info = mkLFLetNoEscape arity
+ in
+ -- saveVolatileVarsAndRegs done earlier in cgExpr.
+
+ do { (vSp, _) <- forkEvalHelp rhs_eob_info
+
+ (do { allocStackTop retAddrSizeW
+ ; nukeDeadBindings full_live_in_rhss })
+
+ (do { deAllocStackTop retAddrSizeW
+ ; abs_c <- forkProc $ cgLetNoEscapeBody bndr cc
+ cc_slot args body
+
+ -- Ignore the label that comes back from
+ -- mkRetDirectTarget. It must be conjured up elswhere
+ ; emitDirectReturnTarget (idName bndr) abs_c srt
+ ; return () })
+
+ ; returnFC (bndr, letNoEscapeIdInfo bndr vSp lf_info) }
+\end{code}
+
+\begin{code}
+cgLetNoEscapeBody :: Id -- Name of the joint point
+ -> CostCentreStack
+ -> Maybe VirtualSpOffset
+ -> [Id] -- Args
+ -> StgExpr -- Body
+ -> Code
+
+cgLetNoEscapeBody bndr cc cc_slot all_args body = do
+ { (arg_regs, ptrs, nptrs, ret_slot) <- bindUnboxedTupleComponents all_args
+
+ -- restore the saved cost centre. BUT: we must not free the stack slot
+ -- containing the cost centre, because it might be needed for a
+ -- recursive call to this let-no-escape.
+ ; restoreCurrentCostCentre cc_slot False{-don't free-}
+
+ -- Enter the closures cc, if required
+ ; -- enterCostCentreCode closure_info cc IsFunction
+
+ -- The "return address" slot doesn't have a return address in it;
+ -- but the heap-check needs it filled in if the heap-check fails.
+ -- So we pass code to fill it in to the heap-check macro
+ ; sp_rel <- getSpRelOffset ret_slot
+
+ ; let lbl = mkReturnInfoLabel (idUnique bndr)
+ frame_hdr_asst = oneStmt (CmmStore sp_rel (mkLblExpr lbl))
+
+ -- Do heap check [ToDo: omit for non-recursive case by recording in
+ -- in envt and absorbing at call site]
+ ; unbxTupleHeapCheck arg_regs ptrs nptrs frame_hdr_asst
+ (cgExpr body)
+ }
+\end{code}
diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs
new file mode 100644
index 0000000000..4f95c9b36a
--- /dev/null
+++ b/compiler/codeGen/CgMonad.lhs
@@ -0,0 +1,853 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgMonad.lhs,v 1.45 2005/06/21 10:44:41 simonmar Exp $
+%
+\section[CgMonad]{The code generation monad}
+
+See the beginning of the top-level @CodeGen@ module, to see how this
+monadic stuff fits into the Big Picture.
+
+\begin{code}
+module CgMonad (
+ Code, -- type
+ FCode, -- type
+
+ initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs,
+ returnFC, fixC, checkedAbsC,
+ stmtC, stmtsC, labelC, emitStmts, nopC, whenC, newLabelC,
+ newUnique, newUniqSupply,
+
+ CgStmts, emitCgStmts, forkCgStmts, cgStmtsToBlocks,
+ getCgStmts', getCgStmts,
+ noCgStmts, oneCgStmt, consCgStmt,
+
+ getCmm,
+ emitData, emitProc, emitSimpleProc,
+
+ forkLabelledCode,
+ forkClosureBody, forkStatics, forkAlts, forkEval,
+ forkEvalHelp, forkProc, codeOnly,
+ SemiTaggingStuff, ConTagZ,
+
+ EndOfBlockInfo(..),
+ setEndOfBlockInfo, getEndOfBlockInfo,
+
+ setSRTLabel, getSRTLabel,
+ setTickyCtrLabel, getTickyCtrLabel,
+
+ StackUsage(..), HeapUsage(..),
+ VirtualSpOffset, VirtualHpOffset,
+ initStkUsage, initHpUsage,
+ getHpUsage, setHpUsage,
+ heapHWM,
+
+ moduleName,
+
+ Sequel(..), -- ToDo: unabstract?
+
+ -- ideally we wouldn't export these, but some other modules access internal state
+ getState, setState, getInfoDown, getDynFlags, getHomeModules,
+
+ -- more localised access to monad state
+ getStkUsage, setStkUsage,
+ getBinds, setBinds, getStaticBinds,
+
+ -- out of general friendliness, we also export ...
+ CgInfoDownwards(..), CgState(..) -- non-abstract
+ ) where
+
+#include "HsVersions.h"
+
+import {-# SOURCE #-} CgBindery ( CgBindings, nukeVolatileBinds )
+
+import DynFlags ( DynFlags )
+import Packages ( HomeModules )
+import Cmm
+import CmmUtils ( CmmStmts, isNopStmt )
+import CLabel
+import SMRep ( WordOff )
+import Module ( Module )
+import Id ( Id )
+import VarEnv
+import OrdList
+import Unique ( Unique )
+import Util ( mapAccumL )
+import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply, uniqFromSupply )
+import FastString
+import Outputable
+
+import Control.Monad ( liftM )
+
+infixr 9 `thenC` -- Right-associative!
+infixr 9 `thenFC`
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[CgMonad-environment]{Stuff for manipulating environments}
+%* *
+%************************************************************************
+
+This monadery has some information that it only passes {\em
+downwards}, as well as some ``state'' which is modified as we go
+along.
+
+\begin{code}
+data CgInfoDownwards -- information only passed *downwards* by the monad
+ = MkCgInfoDown {
+ cgd_dflags :: DynFlags,
+ cgd_hmods :: HomeModules, -- Packages we depend on
+ cgd_mod :: Module, -- Module being compiled
+ cgd_statics :: CgBindings, -- [Id -> info] : static environment
+ cgd_srt :: CLabel, -- label of the current SRT
+ cgd_ticky :: CLabel, -- current destination for ticky counts
+ cgd_eob :: EndOfBlockInfo -- Info for stuff to do at end of basic block:
+ }
+
+initCgInfoDown :: DynFlags -> HomeModules -> Module -> CgInfoDownwards
+initCgInfoDown dflags hmods mod
+ = MkCgInfoDown { cgd_dflags = dflags,
+ cgd_hmods = hmods,
+ cgd_mod = mod,
+ cgd_statics = emptyVarEnv,
+ cgd_srt = error "initC: srt",
+ cgd_ticky = mkTopTickyCtrLabel,
+ cgd_eob = initEobInfo }
+
+data CgState
+ = MkCgState {
+ cgs_stmts :: OrdList CgStmt, -- Current proc
+ cgs_tops :: OrdList CmmTop,
+ -- Other procedures and data blocks in this compilation unit
+ -- Both the latter two are ordered only so that we can
+ -- reduce forward references, when it's easy to do so
+
+ cgs_binds :: CgBindings, -- [Id -> info] : *local* bindings environment
+ -- Bindings for top-level things are given in
+ -- the info-down part
+
+ cgs_stk_usg :: StackUsage,
+ cgs_hp_usg :: HeapUsage,
+
+ cgs_uniqs :: UniqSupply }
+
+initCgState :: UniqSupply -> CgState
+initCgState uniqs
+ = MkCgState { cgs_stmts = nilOL, cgs_tops = nilOL,
+ cgs_binds = emptyVarEnv,
+ cgs_stk_usg = initStkUsage,
+ cgs_hp_usg = initHpUsage,
+ cgs_uniqs = uniqs }
+\end{code}
+
+@EndOfBlockInfo@ tells what to do at the end of this block of code or,
+if the expression is a @case@, what to do at the end of each
+alternative.
+
+\begin{code}
+data EndOfBlockInfo
+ = EndOfBlockInfo
+ VirtualSpOffset -- Args Sp: trim the stack to this point at a
+ -- return; push arguments starting just
+ -- above this point on a tail call.
+
+ -- This is therefore the stk ptr as seen
+ -- by a case alternative.
+ Sequel
+
+initEobInfo = EndOfBlockInfo 0 OnStack
+\end{code}
+
+Any addressing modes inside @Sequel@ must be ``robust,'' in the sense
+that it must survive stack pointer adjustments at the end of the
+block.
+
+\begin{code}
+data Sequel
+ = OnStack -- Continuation is on the stack
+ | UpdateCode -- Continuation is update
+
+ | CaseAlts
+ CLabel -- Jump to this; if the continuation is for a vectored
+ -- case this might be the label of a return vector
+ SemiTaggingStuff
+ Id -- The case binder, only used to see if it's dead
+ Bool -- True <=> polymorphic, push a SEQ frame too
+
+type SemiTaggingStuff
+ = Maybe -- Maybe[1] we don't have any semi-tagging stuff...
+ ([(ConTagZ, CmmLit)], -- Alternatives
+ CmmLit) -- Default (will be a can't happen RTS label if can't happen)
+
+type ConTagZ = Int -- A *zero-indexed* contructor tag
+
+-- The case branch is executed only from a successful semitagging
+-- venture, when a case has looked at a variable, found that it's
+-- evaluated, and wants to load up the contents and go to the join
+-- point.
+\end{code}
+
+%************************************************************************
+%* *
+ CgStmt type
+%* *
+%************************************************************************
+
+The CgStmts type is what the code generator outputs: it is a tree of
+statements, including in-line labels. The job of flattenCgStmts is to
+turn this into a list of basic blocks, each of which ends in a jump
+statement (either a local branch or a non-local jump).
+
+\begin{code}
+type CgStmts = OrdList CgStmt
+
+data CgStmt
+ = CgStmt CmmStmt
+ | CgLabel BlockId
+ | CgFork BlockId CgStmts
+
+flattenCgStmts :: BlockId -> CgStmts -> [CmmBasicBlock]
+flattenCgStmts id stmts =
+ case flatten (fromOL stmts) of
+ ([],blocks) -> blocks
+ (block,blocks) -> BasicBlock id block : blocks
+ where
+ flatten [] = ([],[])
+
+ -- A label at the end of a function or fork: this label must not be reachable,
+ -- but it might be referred to from another BB that also isn't reachable.
+ -- Eliminating these has to be done with a dead-code analysis. For now,
+ -- we just make it into a well-formed block by adding a recursive jump.
+ flatten [CgLabel id]
+ = ( [], [BasicBlock id [CmmBranch id]] )
+
+ -- A jump/branch: throw away all the code up to the next label, because
+ -- it is unreachable. Be careful to keep forks that we find on the way.
+ flatten (CgStmt stmt : stmts)
+ | isJump stmt
+ = case dropWhile isOrdinaryStmt stmts of
+ [] -> ( [stmt], [] )
+ [CgLabel id] -> ( [stmt], [BasicBlock id [CmmBranch id]])
+ (CgLabel id : stmts) -> ( [stmt], BasicBlock id block : blocks )
+ where (block,blocks) = flatten stmts
+ (CgFork fork_id stmts : ss) ->
+ flatten (CgFork fork_id stmts : CgStmt stmt : ss)
+
+ flatten (s:ss) =
+ case s of
+ CgStmt stmt -> (stmt:block,blocks)
+ CgLabel id -> ([CmmBranch id],BasicBlock id block:blocks)
+ CgFork fork_id stmts ->
+ (block, BasicBlock fork_id fork_block : fork_blocks ++ blocks)
+ where (fork_block, fork_blocks) = flatten (fromOL stmts)
+ where (block,blocks) = flatten ss
+
+isJump (CmmJump _ _) = True
+isJump (CmmBranch _) = True
+isJump _ = False
+
+isOrdinaryStmt (CgStmt _) = True
+isOrdinaryStmt _ = False
+\end{code}
+
+%************************************************************************
+%* *
+ Stack and heap models
+%* *
+%************************************************************************
+
+\begin{code}
+type VirtualHpOffset = WordOff -- Both are in
+type VirtualSpOffset = WordOff -- units of words
+
+data StackUsage
+ = StackUsage {
+ virtSp :: VirtualSpOffset,
+ -- Virtual offset of topmost allocated slot
+
+ frameSp :: VirtualSpOffset,
+ -- Virtual offset of the return address of the enclosing frame.
+ -- This RA describes the liveness/pointedness of
+ -- all the stack from frameSp downwards
+ -- INVARIANT: less than or equal to virtSp
+
+ freeStk :: [VirtualSpOffset],
+ -- List of free slots, in *increasing* order
+ -- INVARIANT: all <= virtSp
+ -- All slots <= virtSp are taken except these ones
+
+ realSp :: VirtualSpOffset,
+ -- Virtual offset of real stack pointer register
+
+ hwSp :: VirtualSpOffset
+ } -- Highest value ever taken by virtSp
+
+-- INVARIANT: The environment contains no Stable references to
+-- stack slots below (lower offset) frameSp
+-- It can contain volatile references to this area though.
+
+data HeapUsage =
+ HeapUsage {
+ virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word
+ realHp :: VirtualHpOffset -- realHp: Virtual offset of real heap ptr
+ }
+\end{code}
+
+The heap high water mark is the larger of virtHp and hwHp. The latter is
+only records the high water marks of forked-off branches, so to find the
+heap high water mark you have to take the max of virtHp and hwHp. Remember,
+virtHp never retreats!
+
+Note Jan 04: ok, so why do we only look at the virtual Hp??
+
+\begin{code}
+heapHWM :: HeapUsage -> VirtualHpOffset
+heapHWM = virtHp
+\end{code}
+
+Initialisation.
+
+\begin{code}
+initStkUsage :: StackUsage
+initStkUsage = StackUsage {
+ virtSp = 0,
+ frameSp = 0,
+ freeStk = [],
+ realSp = 0,
+ hwSp = 0
+ }
+
+initHpUsage :: HeapUsage
+initHpUsage = HeapUsage {
+ virtHp = 0,
+ realHp = 0
+ }
+\end{code}
+
+@stateIncUsage@$~e_1~e_2$ incorporates in $e_1$ the stack and heap high water
+marks found in $e_2$.
+
+\begin{code}
+stateIncUsage :: CgState -> CgState -> CgState
+stateIncUsage s1 s2@(MkCgState { cgs_stk_usg = stk_usg, cgs_hp_usg = hp_usg })
+ = s1 { cgs_hp_usg = cgs_hp_usg s1 `maxHpHw` virtHp hp_usg,
+ cgs_stk_usg = cgs_stk_usg s1 `maxStkHw` hwSp stk_usg }
+ `addCodeBlocksFrom` s2
+
+stateIncUsageEval :: CgState -> CgState -> CgState
+stateIncUsageEval s1 s2
+ = s1 { cgs_stk_usg = cgs_stk_usg s1 `maxStkHw` hwSp (cgs_stk_usg s2) }
+ `addCodeBlocksFrom` s2
+ -- We don't max the heap high-watermark because stateIncUsageEval is
+ -- used only in forkEval, which in turn is only used for blocks of code
+ -- which do their own heap-check.
+
+addCodeBlocksFrom :: CgState -> CgState -> CgState
+-- Add code blocks from the latter to the former
+-- (The cgs_stmts will often be empty, but not always; see codeOnly)
+s1 `addCodeBlocksFrom` s2
+ = s1 { cgs_stmts = cgs_stmts s1 `appOL` cgs_stmts s2,
+ cgs_tops = cgs_tops s1 `appOL` cgs_tops s2 }
+
+maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage
+hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw }
+
+maxStkHw :: StackUsage -> VirtualSpOffset -> StackUsage
+stk_usg `maxStkHw` hw = stk_usg { hwSp = hwSp stk_usg `max` hw }
+\end{code}
+
+%************************************************************************
+%* *
+ The FCode monad
+%* *
+%************************************************************************
+
+\begin{code}
+newtype FCode a = FCode (CgInfoDownwards -> CgState -> (a, CgState))
+type Code = FCode ()
+
+instance Monad FCode where
+ (>>=) = thenFC
+ return = returnFC
+
+{-# INLINE thenC #-}
+{-# INLINE thenFC #-}
+{-# INLINE returnFC #-}
+\end{code}
+The Abstract~C is not in the environment so as to improve strictness.
+
+\begin{code}
+initC :: DynFlags -> HomeModules -> Module -> FCode a -> IO a
+
+initC dflags hmods mod (FCode code)
+ = do { uniqs <- mkSplitUniqSupply 'c'
+ ; case code (initCgInfoDown dflags hmods mod) (initCgState uniqs) of
+ (res, _) -> return res
+ }
+
+returnFC :: a -> FCode a
+returnFC val = FCode (\info_down state -> (val, state))
+\end{code}
+
+\begin{code}
+thenC :: Code -> FCode a -> FCode a
+thenC (FCode m) (FCode k) =
+ FCode (\info_down state -> let (_,new_state) = m info_down state in
+ k info_down new_state)
+
+listCs :: [Code] -> Code
+listCs [] = return ()
+listCs (fc:fcs) = do
+ fc
+ listCs fcs
+
+mapCs :: (a -> Code) -> [a] -> Code
+mapCs = mapM_
+\end{code}
+
+\begin{code}
+thenFC :: FCode a -> (a -> FCode c) -> FCode c
+thenFC (FCode m) k = FCode (
+ \info_down state ->
+ let
+ (m_result, new_state) = m info_down state
+ (FCode kcode) = k m_result
+ in
+ kcode info_down new_state
+ )
+
+listFCs :: [FCode a] -> FCode [a]
+listFCs = sequence
+
+mapFCs :: (a -> FCode b) -> [a] -> FCode [b]
+mapFCs = mapM
+\end{code}
+
+And the knot-tying combinator:
+\begin{code}
+fixC :: (a -> FCode a) -> FCode a
+fixC fcode = FCode (
+ \info_down state ->
+ let
+ FCode fc = fcode v
+ result@(v,_) = fc info_down state
+ -- ^--------^
+ in
+ result
+ )
+\end{code}
+
+%************************************************************************
+%* *
+ Operators for getting and setting the state and "info_down".
+
+%* *
+%************************************************************************
+
+\begin{code}
+getState :: FCode CgState
+getState = FCode $ \info_down state -> (state,state)
+
+setState :: CgState -> FCode ()
+setState state = FCode $ \info_down _ -> ((),state)
+
+getStkUsage :: FCode StackUsage
+getStkUsage = do
+ state <- getState
+ return $ cgs_stk_usg state
+
+setStkUsage :: StackUsage -> Code
+setStkUsage new_stk_usg = do
+ state <- getState
+ setState $ state {cgs_stk_usg = new_stk_usg}
+
+getHpUsage :: FCode HeapUsage
+getHpUsage = do
+ state <- getState
+ return $ cgs_hp_usg state
+
+setHpUsage :: HeapUsage -> Code
+setHpUsage new_hp_usg = do
+ state <- getState
+ setState $ state {cgs_hp_usg = new_hp_usg}
+
+getBinds :: FCode CgBindings
+getBinds = do
+ state <- getState
+ return $ cgs_binds state
+
+setBinds :: CgBindings -> FCode ()
+setBinds new_binds = do
+ state <- getState
+ setState $ state {cgs_binds = new_binds}
+
+getStaticBinds :: FCode CgBindings
+getStaticBinds = do
+ info <- getInfoDown
+ return (cgd_statics info)
+
+withState :: FCode a -> CgState -> FCode (a,CgState)
+withState (FCode fcode) newstate = FCode $ \info_down state ->
+ let (retval, state2) = fcode info_down newstate in ((retval,state2), state)
+
+newUniqSupply :: FCode UniqSupply
+newUniqSupply = do
+ state <- getState
+ let (us1, us2) = splitUniqSupply (cgs_uniqs state)
+ setState $ state { cgs_uniqs = us1 }
+ return us2
+
+newUnique :: FCode Unique
+newUnique = do
+ us <- newUniqSupply
+ return (uniqFromSupply us)
+
+------------------
+getInfoDown :: FCode CgInfoDownwards
+getInfoDown = FCode $ \info_down state -> (info_down,state)
+
+getDynFlags :: FCode DynFlags
+getDynFlags = liftM cgd_dflags getInfoDown
+
+getHomeModules :: FCode HomeModules
+getHomeModules = liftM cgd_hmods getInfoDown
+
+withInfoDown :: FCode a -> CgInfoDownwards -> FCode a
+withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state
+
+doFCode :: FCode a -> CgInfoDownwards -> CgState -> (a,CgState)
+doFCode (FCode fcode) info_down state = fcode info_down state
+\end{code}
+
+
+%************************************************************************
+%* *
+ Forking
+%* *
+%************************************************************************
+
+@forkClosureBody@ takes a code, $c$, and compiles it in a completely
+fresh environment, except that:
+ - compilation info and statics are passed in unchanged.
+The current environment is passed on completely unaltered, except that
+abstract C from the fork is incorporated.
+
+@forkProc@ takes a code and compiles it in the current environment,
+returning the basic blocks thus constructed. The current environment
+is passed on completely unchanged. It is pretty similar to
+@getBlocks@, except that the latter does affect the environment.
+
+@forkStatics@ $fc$ compiles $fc$ in an environment whose statics come
+from the current bindings, but which is otherwise freshly initialised.
+The Abstract~C returned is attached to the current state, but the
+bindings and usage information is otherwise unchanged.
+
+\begin{code}
+forkClosureBody :: Code -> Code
+forkClosureBody body_code
+ = do { info <- getInfoDown
+ ; us <- newUniqSupply
+ ; state <- getState
+ ; let body_info_down = info { cgd_eob = initEobInfo }
+ ((),fork_state) = doFCode body_code body_info_down
+ (initCgState us)
+ ; ASSERT( isNilOL (cgs_stmts fork_state) )
+ setState $ state `addCodeBlocksFrom` fork_state }
+
+forkStatics :: FCode a -> FCode a
+forkStatics body_code
+ = do { info <- getInfoDown
+ ; us <- newUniqSupply
+ ; state <- getState
+ ; let rhs_info_down = info { cgd_statics = cgs_binds state,
+ cgd_eob = initEobInfo }
+ (result, fork_state_out) = doFCode body_code rhs_info_down
+ (initCgState us)
+ ; ASSERT( isNilOL (cgs_stmts fork_state_out) )
+ setState (state `addCodeBlocksFrom` fork_state_out)
+ ; return result }
+
+forkProc :: Code -> FCode CgStmts
+forkProc body_code
+ = do { info_down <- getInfoDown
+ ; us <- newUniqSupply
+ ; state <- getState
+ ; let fork_state_in = (initCgState us)
+ { cgs_binds = cgs_binds state,
+ cgs_stk_usg = cgs_stk_usg state,
+ cgs_hp_usg = cgs_hp_usg state }
+ -- ToDo: is the hp usage necesary?
+ (code_blks, fork_state_out) = doFCode (getCgStmts body_code)
+ info_down fork_state_in
+ ; setState $ state `stateIncUsageEval` fork_state_out
+ ; return code_blks }
+
+codeOnly :: Code -> Code
+-- Emit any code from the inner thing into the outer thing
+-- Do not affect anything else in the outer state
+-- Used in almost-circular code to prevent false loop dependencies
+codeOnly body_code
+ = do { info_down <- getInfoDown
+ ; us <- newUniqSupply
+ ; state <- getState
+ ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state,
+ cgs_stk_usg = cgs_stk_usg state,
+ cgs_hp_usg = cgs_hp_usg state }
+ ((), fork_state_out) = doFCode body_code info_down fork_state_in
+ ; setState $ state `addCodeBlocksFrom` fork_state_out }
+\end{code}
+
+@forkAlts@ $bs~d$ takes fcodes $bs$ for the branches of a @case@, and
+an fcode for the default case $d$, and compiles each in the current
+environment. The current environment is passed on unmodified, except
+that
+ - the worst stack high-water mark is incorporated
+ - the virtual Hp is moved on to the worst virtual Hp for the branches
+
+\begin{code}
+forkAlts :: [FCode a] -> FCode [a]
+
+forkAlts branch_fcodes
+ = do { info_down <- getInfoDown
+ ; us <- newUniqSupply
+ ; state <- getState
+ ; let compile us branch
+ = (us2, doFCode branch info_down branch_state)
+ where
+ (us1,us2) = splitUniqSupply us
+ branch_state = (initCgState us1) {
+ cgs_binds = cgs_binds state,
+ cgs_stk_usg = cgs_stk_usg state,
+ cgs_hp_usg = cgs_hp_usg state }
+
+ (_us, results) = mapAccumL compile us branch_fcodes
+ (branch_results, branch_out_states) = unzip results
+ ; setState $ foldl stateIncUsage state branch_out_states
+ -- NB foldl. state is the *left* argument to stateIncUsage
+ ; return branch_results }
+\end{code}
+
+@forkEval@ takes two blocks of code.
+
+ - The first meddles with the environment to set it up as expected by
+ the alternatives of a @case@ which does an eval (or gc-possible primop).
+ - The second block is the code for the alternatives.
+ (plus info for semi-tagging purposes)
+
+@forkEval@ picks up the virtual stack pointer and returns a suitable
+@EndOfBlockInfo@ for the caller to use, together with whatever value
+is returned by the second block.
+
+It uses @initEnvForAlternatives@ to initialise the environment, and
+@stateIncUsageAlt@ to incorporate usage; the latter ignores the heap
+usage.
+
+\begin{code}
+forkEval :: EndOfBlockInfo -- For the body
+ -> Code -- Code to set environment
+ -> FCode Sequel -- Semi-tagging info to store
+ -> FCode EndOfBlockInfo -- The new end of block info
+
+forkEval body_eob_info env_code body_code
+ = do { (v, sequel) <- forkEvalHelp body_eob_info env_code body_code
+ ; returnFC (EndOfBlockInfo v sequel) }
+
+forkEvalHelp :: EndOfBlockInfo -- For the body
+ -> Code -- Code to set environment
+ -> FCode a -- The code to do after the eval
+ -> FCode (VirtualSpOffset, -- Sp
+ a) -- Result of the FCode
+ -- A disturbingly complicated function
+forkEvalHelp body_eob_info env_code body_code
+ = do { info_down <- getInfoDown
+ ; us <- newUniqSupply
+ ; state <- getState
+ ; let { info_down_for_body = info_down {cgd_eob = body_eob_info}
+ ; (_, env_state) = doFCode env_code info_down_for_body
+ (state {cgs_uniqs = us})
+ ; state_for_body = (initCgState (cgs_uniqs env_state))
+ { cgs_binds = binds_for_body,
+ cgs_stk_usg = stk_usg_for_body }
+ ; binds_for_body = nukeVolatileBinds (cgs_binds env_state)
+ ; stk_usg_from_env = cgs_stk_usg env_state
+ ; virtSp_from_env = virtSp stk_usg_from_env
+ ; stk_usg_for_body = stk_usg_from_env {realSp = virtSp_from_env,
+ hwSp = virtSp_from_env}
+ ; (value_returned, state_at_end_return)
+ = doFCode body_code info_down_for_body state_for_body
+ }
+ ; ASSERT( isNilOL (cgs_stmts state_at_end_return) )
+ -- The code coming back should consist only of nested declarations,
+ -- notably of the return vector!
+ setState $ state `stateIncUsageEval` state_at_end_return
+ ; return (virtSp_from_env, value_returned) }
+
+
+-- ----------------------------------------------------------------------------
+-- Combinators for emitting code
+
+nopC :: Code
+nopC = return ()
+
+whenC :: Bool -> Code -> Code
+whenC True code = code
+whenC False code = nopC
+
+stmtC :: CmmStmt -> Code
+stmtC stmt = emitCgStmt (CgStmt stmt)
+
+labelC :: BlockId -> Code
+labelC id = emitCgStmt (CgLabel id)
+
+newLabelC :: FCode BlockId
+newLabelC = do { id <- newUnique; return (BlockId id) }
+
+checkedAbsC :: CmmStmt -> Code
+-- Emit code, eliminating no-ops
+checkedAbsC stmt = emitStmts (if isNopStmt stmt then nilOL
+ else unitOL stmt)
+
+stmtsC :: [CmmStmt] -> Code
+stmtsC stmts = emitStmts (toOL stmts)
+
+-- Emit code; no no-op checking
+emitStmts :: CmmStmts -> Code
+emitStmts stmts = emitCgStmts (fmap CgStmt stmts)
+
+-- forkLabelledCode is for emitting a chunk of code with a label, outside
+-- of the current instruction stream.
+forkLabelledCode :: Code -> FCode BlockId
+forkLabelledCode code = getCgStmts code >>= forkCgStmts
+
+emitCgStmt :: CgStmt -> Code
+emitCgStmt stmt
+ = do { state <- getState
+ ; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt }
+ }
+
+emitData :: Section -> [CmmStatic] -> Code
+emitData sect lits
+ = do { state <- getState
+ ; setState $ state { cgs_tops = cgs_tops state `snocOL` data_block } }
+ where
+ data_block = CmmData sect lits
+
+emitProc :: [CmmLit] -> CLabel -> [LocalReg] -> [CmmBasicBlock] -> Code
+emitProc lits lbl args blocks
+ = do { let proc_block = CmmProc (map CmmStaticLit lits) lbl args blocks
+ ; state <- getState
+ ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } }
+
+emitSimpleProc :: CLabel -> Code -> Code
+-- Emit a procedure whose body is the specified code; no info table
+emitSimpleProc lbl code
+ = do { stmts <- getCgStmts code
+ ; blks <- cgStmtsToBlocks stmts
+ ; emitProc [] lbl [] blks }
+
+getCmm :: Code -> FCode Cmm
+-- Get all the CmmTops (there should be no stmts)
+getCmm code
+ = do { state1 <- getState
+ ; ((), state2) <- withState code (state1 { cgs_tops = nilOL })
+ ; setState $ state2 { cgs_tops = cgs_tops state1 }
+ ; return (Cmm (fromOL (cgs_tops state2))) }
+
+-- ----------------------------------------------------------------------------
+-- CgStmts
+
+-- These functions deal in terms of CgStmts, which is an abstract type
+-- representing the code in the current proc.
+
+
+-- emit CgStmts into the current instruction stream
+emitCgStmts :: CgStmts -> Code
+emitCgStmts stmts
+ = do { state <- getState
+ ; setState $ state { cgs_stmts = cgs_stmts state `appOL` stmts } }
+
+-- emit CgStmts outside the current instruction stream, and return a label
+forkCgStmts :: CgStmts -> FCode BlockId
+forkCgStmts stmts
+ = do { id <- newLabelC
+ ; emitCgStmt (CgFork id stmts)
+ ; return id
+ }
+
+-- turn CgStmts into [CmmBasicBlock], for making a new proc.
+cgStmtsToBlocks :: CgStmts -> FCode [CmmBasicBlock]
+cgStmtsToBlocks stmts
+ = do { id <- newLabelC
+ ; return (flattenCgStmts id stmts)
+ }
+
+-- collect the code emitted by an FCode computation
+getCgStmts' :: FCode a -> FCode (a, CgStmts)
+getCgStmts' fcode
+ = do { state1 <- getState
+ ; (a, state2) <- withState fcode (state1 { cgs_stmts = nilOL })
+ ; setState $ state2 { cgs_stmts = cgs_stmts state1 }
+ ; return (a, cgs_stmts state2) }
+
+getCgStmts :: FCode a -> FCode CgStmts
+getCgStmts fcode = do { (_,stmts) <- getCgStmts' fcode; return stmts }
+
+-- Simple ways to construct CgStmts:
+noCgStmts :: CgStmts
+noCgStmts = nilOL
+
+oneCgStmt :: CmmStmt -> CgStmts
+oneCgStmt stmt = unitOL (CgStmt stmt)
+
+consCgStmt :: CmmStmt -> CgStmts -> CgStmts
+consCgStmt stmt stmts = CgStmt stmt `consOL` stmts
+
+-- ----------------------------------------------------------------------------
+-- Get the current module name
+
+moduleName :: FCode Module
+moduleName = do { info <- getInfoDown; return (cgd_mod info) }
+
+-- ----------------------------------------------------------------------------
+-- Get/set the end-of-block info
+
+setEndOfBlockInfo :: EndOfBlockInfo -> Code -> Code
+setEndOfBlockInfo eob_info code = do
+ info <- getInfoDown
+ withInfoDown code (info {cgd_eob = eob_info})
+
+getEndOfBlockInfo :: FCode EndOfBlockInfo
+getEndOfBlockInfo = do
+ info <- getInfoDown
+ return (cgd_eob info)
+
+-- ----------------------------------------------------------------------------
+-- Get/set the current SRT label
+
+-- There is just one SRT for each top level binding; all the nested
+-- bindings use sub-sections of this SRT. The label is passed down to
+-- the nested bindings via the monad.
+
+getSRTLabel :: FCode CLabel -- Used only by cgPanic
+getSRTLabel = do info <- getInfoDown
+ return (cgd_srt info)
+
+setSRTLabel :: CLabel -> FCode a -> FCode a
+setSRTLabel srt_lbl code
+ = do info <- getInfoDown
+ withInfoDown code (info { cgd_srt = srt_lbl})
+
+-- ----------------------------------------------------------------------------
+-- Get/set the current ticky counter label
+
+getTickyCtrLabel :: FCode CLabel
+getTickyCtrLabel = do
+ info <- getInfoDown
+ return (cgd_ticky info)
+
+setTickyCtrLabel :: CLabel -> Code -> Code
+setTickyCtrLabel ticky code = do
+ info <- getInfoDown
+ withInfoDown code (info {cgd_ticky = ticky})
+\end{code}
diff --git a/compiler/codeGen/CgParallel.hs b/compiler/codeGen/CgParallel.hs
new file mode 100644
index 0000000000..b826a33cba
--- /dev/null
+++ b/compiler/codeGen/CgParallel.hs
@@ -0,0 +1,90 @@
+-- Code generation relaed to GpH
+-- (a) parallel
+-- (b) GranSim
+
+module CgParallel(
+ staticGranHdr,staticParHdr,
+ granFetchAndReschedule, granYield,
+ doGranAllocate
+ ) where
+
+import CgMonad
+import CgCallConv ( mkRegLiveness )
+import Id ( Id )
+import Cmm ( CmmLit, GlobalReg(..), node, CmmExpr )
+import StaticFlags ( opt_GranMacros )
+import Outputable
+
+staticParHdr :: [CmmLit]
+-- Parallel header words in a static closure
+staticParHdr = []
+
+--------------------------------------------------------
+-- GranSim stuff
+--------------------------------------------------------
+
+staticGranHdr :: [CmmLit]
+-- Gransim header words in a static closure
+staticGranHdr = []
+
+doGranAllocate :: CmmExpr -> Code
+-- macro DO_GRAN_ALLOCATE
+doGranAllocate hp
+ | not opt_GranMacros = nopC
+ | otherwise = panic "doGranAllocate"
+
+
+
+-------------------------
+granFetchAndReschedule :: [(Id,GlobalReg)] -- Live registers
+ -> Bool -- Node reqd?
+ -> Code
+-- Emit code for simulating a fetch and then reschedule.
+granFetchAndReschedule regs node_reqd
+ | opt_GranMacros && (node `elem` map snd regs || node_reqd)
+ = do { fetch
+ ; reschedule liveness node_reqd }
+ | otherwise
+ = nopC
+ where
+ liveness = mkRegLiveness regs 0 0
+
+fetch = panic "granFetch"
+ -- Was: absC (CMacroStmt GRAN_FETCH [])
+ --HWL: generate GRAN_FETCH macro for GrAnSim
+ -- currently GRAN_FETCH and GRAN_FETCH_AND_RESCHEDULE are miai
+
+reschedule liveness node_reqd = panic "granReschedule"
+ -- Was: absC (CMacroStmt GRAN_RESCHEDULE [
+ -- mkIntCLit (I# (word2Int# liveness_mask)),
+ -- mkIntCLit (if node_reqd then 1 else 0)])
+
+
+-------------------------
+-- The @GRAN_YIELD@ macro is taken from JSM's code for Concurrent Haskell. It
+-- allows to context-switch at places where @node@ is not alive (it uses the
+-- @Continue@ rather than the @EnterNodeCode@ function in the RTS). We emit
+-- this kind of macro at the beginning of the following kinds of basic bocks:
+-- \begin{itemize}
+-- \item Slow entry code where node is not alive (see @CgClosure.lhs@). Normally
+-- we use @fetchAndReschedule@ at a slow entry code.
+-- \item Fast entry code (see @CgClosure.lhs@).
+-- \item Alternatives in case expressions (@CLabelledCode@ structures), provided
+-- that they are not inlined (see @CgCases.lhs@). These alternatives will
+-- be turned into separate functions.
+
+granYield :: [(Id,GlobalReg)] -- Live registers
+ -> Bool -- Node reqd?
+ -> Code
+
+granYield regs node_reqd
+ | opt_GranMacros && node_reqd = yield liveness
+ | otherwise = nopC
+ where
+ liveness = mkRegLiveness regs 0 0
+
+yield liveness = panic "granYield"
+ -- Was : absC (CMacroStmt GRAN_YIELD
+ -- [mkIntCLit (I# (word2Int# liveness_mask))])
+
+
diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs
new file mode 100644
index 0000000000..bc7c9140ed
--- /dev/null
+++ b/compiler/codeGen/CgPrimOp.hs
@@ -0,0 +1,584 @@
+-----------------------------------------------------------------------------
+--
+-- Code generation for PrimOps.
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+module CgPrimOp (
+ cgPrimOp
+ ) where
+
+#include "HsVersions.h"
+
+import ForeignCall ( CCallConv(CCallConv) )
+import StgSyn ( StgLiveVars, StgArg )
+import CgForeignCall ( emitForeignCall' )
+import CgBindery ( getVolatileRegs, getArgAmodes )
+import CgMonad
+import CgInfoTbls ( getConstrTag )
+import CgUtils ( cmmOffsetW, cmmOffsetB, cmmLoadIndexW )
+import ForeignCall
+import Cmm
+import CLabel ( mkMAP_FROZEN_infoLabel, mkMAP_DIRTY_infoLabel,
+ mkDirty_MUT_VAR_Label, mkRtsCodeLabel )
+import CmmUtils
+import MachOp
+import SMRep
+import PrimOp ( PrimOp(..) )
+import SMRep ( tablesNextToCode )
+import Constants ( wORD_SIZE, wORD_SIZE_IN_BITS )
+import StaticFlags ( opt_Parallel )
+import Outputable
+
+-- ---------------------------------------------------------------------------
+-- Code generation for PrimOps
+
+cgPrimOp :: [CmmReg] -- where to put the results
+ -> PrimOp -- the op
+ -> [StgArg] -- arguments
+ -> StgLiveVars -- live vars, in case we need to save them
+ -> Code
+
+cgPrimOp results op args live
+ = do arg_exprs <- getArgAmodes args
+ let non_void_args = [ e | (r,e) <- arg_exprs, nonVoidArg r ]
+ emitPrimOp results op non_void_args live
+
+
+emitPrimOp :: [CmmReg] -- where to put the results
+ -> PrimOp -- the op
+ -> [CmmExpr] -- arguments
+ -> StgLiveVars -- live vars, in case we need to save them
+ -> Code
+
+-- First we handle various awkward cases specially. The remaining
+-- easy cases are then handled by translateOp, defined below.
+
+emitPrimOp [res_r,res_c] IntAddCOp [aa,bb] live
+{-
+ With some bit-twiddling, we can define int{Add,Sub}Czh portably in
+ C, and without needing any comparisons. This may not be the
+ fastest way to do it - if you have better code, please send it! --SDM
+
+ Return : r = a + b, c = 0 if no overflow, 1 on overflow.
+
+ We currently don't make use of the r value if c is != 0 (i.e.
+ overflow), we just convert to big integers and try again. This
+ could be improved by making r and c the correct values for
+ plugging into a new J#.
+
+ { r = ((I_)(a)) + ((I_)(b)); \
+ c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
+ >> (BITS_IN (I_) - 1); \
+ }
+ Wading through the mass of bracketry, it seems to reduce to:
+ c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1)
+
+-}
+ = stmtsC [
+ CmmAssign res_r (CmmMachOp mo_wordAdd [aa,bb]),
+ CmmAssign res_c $
+ CmmMachOp mo_wordUShr [
+ CmmMachOp mo_wordAnd [
+ CmmMachOp mo_wordNot [CmmMachOp mo_wordXor [aa,bb]],
+ CmmMachOp mo_wordXor [aa, CmmReg res_r]
+ ],
+ CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1))
+ ]
+ ]
+
+
+emitPrimOp [res_r,res_c] IntSubCOp [aa,bb] live
+{- Similarly:
+ #define subIntCzh(r,c,a,b) \
+ { r = ((I_)(a)) - ((I_)(b)); \
+ c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \
+ >> (BITS_IN (I_) - 1); \
+ }
+
+ c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1)
+-}
+ = stmtsC [
+ CmmAssign res_r (CmmMachOp mo_wordSub [aa,bb]),
+ CmmAssign res_c $
+ CmmMachOp mo_wordUShr [
+ CmmMachOp mo_wordAnd [
+ CmmMachOp mo_wordXor [aa,bb],
+ CmmMachOp mo_wordXor [aa, CmmReg res_r]
+ ],
+ CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1))
+ ]
+ ]
+
+
+emitPrimOp [res] ParOp [arg] live
+ = do
+ -- for now, just implement this in a C function
+ -- later, we might want to inline it.
+ vols <- getVolatileRegs live
+ emitForeignCall' PlayRisky
+ [(res,NoHint)]
+ (CmmForeignCall newspark CCallConv)
+ [(CmmReg (CmmGlobal BaseReg), PtrHint), (arg,PtrHint)]
+ (Just vols)
+ where
+ newspark = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("newSpark")))
+
+emitPrimOp [res] ReadMutVarOp [mutv] live
+ = stmtC (CmmAssign res (cmmLoadIndexW mutv fixedHdrSize))
+
+emitPrimOp [] WriteMutVarOp [mutv,var] live
+ = do
+ stmtC (CmmStore (cmmOffsetW mutv fixedHdrSize) var)
+ vols <- getVolatileRegs live
+ emitForeignCall' PlayRisky
+ [{-no results-}]
+ (CmmForeignCall (CmmLit (CmmLabel mkDirty_MUT_VAR_Label))
+ CCallConv)
+ [(CmmReg (CmmGlobal BaseReg), PtrHint), (mutv,PtrHint)]
+ (Just vols)
+
+-- #define sizzeofByteArrayzh(r,a) \
+-- r = (((StgArrWords *)(a))->words * sizeof(W_))
+emitPrimOp [res] SizeofByteArrayOp [arg] live
+ = stmtC $
+ CmmAssign res (CmmMachOp mo_wordMul [
+ cmmLoadIndexW arg fixedHdrSize,
+ CmmLit (mkIntCLit wORD_SIZE)
+ ])
+
+-- #define sizzeofMutableByteArrayzh(r,a) \
+-- r = (((StgArrWords *)(a))->words * sizeof(W_))
+emitPrimOp [res] SizeofMutableByteArrayOp [arg] live
+ = emitPrimOp [res] SizeofByteArrayOp [arg] live
+
+
+-- #define touchzh(o) /* nothing */
+emitPrimOp [] TouchOp [arg] live
+ = nopC
+
+-- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a)
+emitPrimOp [res] ByteArrayContents_Char [arg] live
+ = stmtC (CmmAssign res (cmmOffsetB arg arrWordsHdrSize))
+
+-- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn)
+emitPrimOp [res] StableNameToIntOp [arg] live
+ = stmtC (CmmAssign res (cmmLoadIndexW arg fixedHdrSize))
+
+-- #define eqStableNamezh(r,sn1,sn2) \
+-- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn))
+emitPrimOp [res] EqStableNameOp [arg1,arg2] live
+ = stmtC (CmmAssign res (CmmMachOp mo_wordEq [
+ cmmLoadIndexW arg1 fixedHdrSize,
+ cmmLoadIndexW arg2 fixedHdrSize
+ ]))
+
+
+emitPrimOp [res] ReallyUnsafePtrEqualityOp [arg1,arg2] live
+ = stmtC (CmmAssign res (CmmMachOp mo_wordEq [arg1,arg2]))
+
+-- #define addrToHValuezh(r,a) r=(P_)a
+emitPrimOp [res] AddrToHValueOp [arg] live
+ = stmtC (CmmAssign res arg)
+
+-- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info))
+emitPrimOp [res] DataToTagOp [arg] live
+ = stmtC (CmmAssign res (getConstrTag arg))
+
+{- Freezing arrays-of-ptrs requires changing an info table, for the
+ benefit of the generational collector. It needs to scavenge mutable
+ objects, even if they are in old space. When they become immutable,
+ they can be removed from this scavenge list. -}
+
+-- #define unsafeFreezzeArrayzh(r,a)
+-- {
+-- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info);
+-- r = a;
+-- }
+emitPrimOp [res] UnsafeFreezeArrayOp [arg] live
+ = stmtsC [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)),
+ CmmAssign res arg ]
+
+-- #define unsafeFreezzeByteArrayzh(r,a) r=(a)
+emitPrimOp [res] UnsafeFreezeByteArrayOp [arg] live
+ = stmtC (CmmAssign res arg)
+
+-- Reading/writing pointer arrays
+
+emitPrimOp [r] ReadArrayOp [obj,ix] live = doReadPtrArrayOp r obj ix
+emitPrimOp [r] IndexArrayOp [obj,ix] live = doReadPtrArrayOp r obj ix
+emitPrimOp [] WriteArrayOp [obj,ix,v] live = doWritePtrArrayOp obj ix v
+
+-- IndexXXXoffAddr
+
+emitPrimOp res IndexOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args
+emitPrimOp res IndexOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args
+emitPrimOp res IndexOffAddrOp_Int args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res IndexOffAddrOp_Word args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res IndexOffAddrOp_Addr args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res IndexOffAddrOp_Float args live = doIndexOffAddrOp Nothing F32 res args
+emitPrimOp res IndexOffAddrOp_Double args live = doIndexOffAddrOp Nothing F64 res args
+emitPrimOp res IndexOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res IndexOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8 res args
+emitPrimOp res IndexOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args
+emitPrimOp res IndexOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args
+emitPrimOp res IndexOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing I64 res args
+emitPrimOp res IndexOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args
+emitPrimOp res IndexOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args
+emitPrimOp res IndexOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args
+emitPrimOp res IndexOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing I64 res args
+
+-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr.
+
+emitPrimOp res ReadOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args
+emitPrimOp res ReadOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args
+emitPrimOp res ReadOffAddrOp_Int args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res ReadOffAddrOp_Word args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res ReadOffAddrOp_Addr args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res ReadOffAddrOp_Float args live = doIndexOffAddrOp Nothing F32 res args
+emitPrimOp res ReadOffAddrOp_Double args live = doIndexOffAddrOp Nothing F64 res args
+emitPrimOp res ReadOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args
+emitPrimOp res ReadOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8 res args
+emitPrimOp res ReadOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args
+emitPrimOp res ReadOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args
+emitPrimOp res ReadOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing I64 res args
+emitPrimOp res ReadOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args
+emitPrimOp res ReadOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args
+emitPrimOp res ReadOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args
+emitPrimOp res ReadOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing I64 res args
+
+-- IndexXXXArray
+
+emitPrimOp res IndexByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args
+emitPrimOp res IndexByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args
+emitPrimOp res IndexByteArrayOp_Int args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res IndexByteArrayOp_Word args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res IndexByteArrayOp_Addr args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res IndexByteArrayOp_Float args live = doIndexByteArrayOp Nothing F32 res args
+emitPrimOp res IndexByteArrayOp_Double args live = doIndexByteArrayOp Nothing F64 res args
+emitPrimOp res IndexByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res IndexByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8 res args
+emitPrimOp res IndexByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16 res args
+emitPrimOp res IndexByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32 res args
+emitPrimOp res IndexByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing I64 res args
+emitPrimOp res IndexByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args
+emitPrimOp res IndexByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16 res args
+emitPrimOp res IndexByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args
+emitPrimOp res IndexByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing I64 res args
+
+-- ReadXXXArray, identical to IndexXXXArray.
+
+emitPrimOp res ReadByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args
+emitPrimOp res ReadByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args
+emitPrimOp res ReadByteArrayOp_Int args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res ReadByteArrayOp_Word args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res ReadByteArrayOp_Addr args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res ReadByteArrayOp_Float args live = doIndexByteArrayOp Nothing F32 res args
+emitPrimOp res ReadByteArrayOp_Double args live = doIndexByteArrayOp Nothing F64 res args
+emitPrimOp res ReadByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args
+emitPrimOp res ReadByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8 res args
+emitPrimOp res ReadByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16 res args
+emitPrimOp res ReadByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32 res args
+emitPrimOp res ReadByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing I64 res args
+emitPrimOp res ReadByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args
+emitPrimOp res ReadByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16 res args
+emitPrimOp res ReadByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args
+emitPrimOp res ReadByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing I64 res args
+
+-- WriteXXXoffAddr
+
+emitPrimOp res WriteOffAddrOp_Char args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args
+emitPrimOp res WriteOffAddrOp_WideChar args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args
+emitPrimOp res WriteOffAddrOp_Int args live = doWriteOffAddrOp Nothing wordRep res args
+emitPrimOp res WriteOffAddrOp_Word args live = doWriteOffAddrOp Nothing wordRep res args
+emitPrimOp res WriteOffAddrOp_Addr args live = doWriteOffAddrOp Nothing wordRep res args
+emitPrimOp res WriteOffAddrOp_Float args live = doWriteOffAddrOp Nothing F32 res args
+emitPrimOp res WriteOffAddrOp_Double args live = doWriteOffAddrOp Nothing F64 res args
+emitPrimOp res WriteOffAddrOp_StablePtr args live = doWriteOffAddrOp Nothing wordRep res args
+emitPrimOp res WriteOffAddrOp_Int8 args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args
+emitPrimOp res WriteOffAddrOp_Int16 args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args
+emitPrimOp res WriteOffAddrOp_Int32 args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args
+emitPrimOp res WriteOffAddrOp_Int64 args live = doWriteOffAddrOp Nothing I64 res args
+emitPrimOp res WriteOffAddrOp_Word8 args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args
+emitPrimOp res WriteOffAddrOp_Word16 args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args
+emitPrimOp res WriteOffAddrOp_Word32 args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args
+emitPrimOp res WriteOffAddrOp_Word64 args live = doWriteOffAddrOp Nothing I64 res args
+
+-- WriteXXXArray
+
+emitPrimOp res WriteByteArrayOp_Char args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args
+emitPrimOp res WriteByteArrayOp_WideChar args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args
+emitPrimOp res WriteByteArrayOp_Int args live = doWriteByteArrayOp Nothing wordRep res args
+emitPrimOp res WriteByteArrayOp_Word args live = doWriteByteArrayOp Nothing wordRep res args
+emitPrimOp res WriteByteArrayOp_Addr args live = doWriteByteArrayOp Nothing wordRep res args
+emitPrimOp res WriteByteArrayOp_Float args live = doWriteByteArrayOp Nothing F32 res args
+emitPrimOp res WriteByteArrayOp_Double args live = doWriteByteArrayOp Nothing F64 res args
+emitPrimOp res WriteByteArrayOp_StablePtr args live = doWriteByteArrayOp Nothing wordRep res args
+emitPrimOp res WriteByteArrayOp_Int8 args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args
+emitPrimOp res WriteByteArrayOp_Int16 args live = doWriteByteArrayOp (Just mo_WordTo16) I16 res args
+emitPrimOp res WriteByteArrayOp_Int32 args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args
+emitPrimOp res WriteByteArrayOp_Int64 args live = doWriteByteArrayOp Nothing I64 res args
+emitPrimOp res WriteByteArrayOp_Word8 args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args
+emitPrimOp res WriteByteArrayOp_Word16 args live = doWriteByteArrayOp (Just mo_WordTo16) I16 res args
+emitPrimOp res WriteByteArrayOp_Word32 args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args
+emitPrimOp res WriteByteArrayOp_Word64 args live = doWriteByteArrayOp Nothing I64 res args
+
+
+-- The rest just translate straightforwardly
+emitPrimOp [res] op [arg] live
+ | nopOp op
+ = stmtC (CmmAssign res arg)
+
+ | Just (mop,rep) <- narrowOp op
+ = stmtC (CmmAssign res (CmmMachOp (mop rep wordRep) [
+ CmmMachOp (mop wordRep rep) [arg]]))
+
+emitPrimOp [res] op args live
+ | Just prim <- callishOp op
+ = do vols <- getVolatileRegs live
+ emitForeignCall' PlayRisky
+ [(res,NoHint)]
+ (CmmPrim prim)
+ [(a,NoHint) | a<-args] -- ToDo: hints?
+ (Just vols)
+
+ | Just mop <- translateOp op
+ = let stmt = CmmAssign res (CmmMachOp mop args) in
+ stmtC stmt
+
+emitPrimOp _ op _ _
+ = pprPanic "emitPrimOp: can't translate PrimOp" (ppr op)
+
+
+-- These PrimOps are NOPs in Cmm
+
+nopOp Int2WordOp = True
+nopOp Word2IntOp = True
+nopOp Int2AddrOp = True
+nopOp Addr2IntOp = True
+nopOp ChrOp = True -- Int# and Char# are rep'd the same
+nopOp OrdOp = True
+nopOp _ = False
+
+-- These PrimOps turn into double casts
+
+narrowOp Narrow8IntOp = Just (MO_S_Conv, I8)
+narrowOp Narrow16IntOp = Just (MO_S_Conv, I16)
+narrowOp Narrow32IntOp = Just (MO_S_Conv, I32)
+narrowOp Narrow8WordOp = Just (MO_U_Conv, I8)
+narrowOp Narrow16WordOp = Just (MO_U_Conv, I16)
+narrowOp Narrow32WordOp = Just (MO_U_Conv, I32)
+narrowOp _ = Nothing
+
+-- Native word signless ops
+
+translateOp IntAddOp = Just mo_wordAdd
+translateOp IntSubOp = Just mo_wordSub
+translateOp WordAddOp = Just mo_wordAdd
+translateOp WordSubOp = Just mo_wordSub
+translateOp AddrAddOp = Just mo_wordAdd
+translateOp AddrSubOp = Just mo_wordSub
+
+translateOp IntEqOp = Just mo_wordEq
+translateOp IntNeOp = Just mo_wordNe
+translateOp WordEqOp = Just mo_wordEq
+translateOp WordNeOp = Just mo_wordNe
+translateOp AddrEqOp = Just mo_wordEq
+translateOp AddrNeOp = Just mo_wordNe
+
+translateOp AndOp = Just mo_wordAnd
+translateOp OrOp = Just mo_wordOr
+translateOp XorOp = Just mo_wordXor
+translateOp NotOp = Just mo_wordNot
+translateOp SllOp = Just mo_wordShl
+translateOp SrlOp = Just mo_wordUShr
+
+translateOp AddrRemOp = Just mo_wordURem
+
+-- Native word signed ops
+
+translateOp IntMulOp = Just mo_wordMul
+translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordRep)
+translateOp IntQuotOp = Just mo_wordSQuot
+translateOp IntRemOp = Just mo_wordSRem
+translateOp IntNegOp = Just mo_wordSNeg
+
+
+translateOp IntGeOp = Just mo_wordSGe
+translateOp IntLeOp = Just mo_wordSLe
+translateOp IntGtOp = Just mo_wordSGt
+translateOp IntLtOp = Just mo_wordSLt
+
+translateOp ISllOp = Just mo_wordShl
+translateOp ISraOp = Just mo_wordSShr
+translateOp ISrlOp = Just mo_wordUShr
+
+-- Native word unsigned ops
+
+translateOp WordGeOp = Just mo_wordUGe
+translateOp WordLeOp = Just mo_wordULe
+translateOp WordGtOp = Just mo_wordUGt
+translateOp WordLtOp = Just mo_wordULt
+
+translateOp WordMulOp = Just mo_wordMul
+translateOp WordQuotOp = Just mo_wordUQuot
+translateOp WordRemOp = Just mo_wordURem
+
+translateOp AddrGeOp = Just mo_wordUGe
+translateOp AddrLeOp = Just mo_wordULe
+translateOp AddrGtOp = Just mo_wordUGt
+translateOp AddrLtOp = Just mo_wordULt
+
+-- Char# ops
+
+translateOp CharEqOp = Just (MO_Eq wordRep)
+translateOp CharNeOp = Just (MO_Ne wordRep)
+translateOp CharGeOp = Just (MO_U_Ge wordRep)
+translateOp CharLeOp = Just (MO_U_Le wordRep)
+translateOp CharGtOp = Just (MO_U_Gt wordRep)
+translateOp CharLtOp = Just (MO_U_Lt wordRep)
+
+-- Double ops
+
+translateOp DoubleEqOp = Just (MO_Eq F64)
+translateOp DoubleNeOp = Just (MO_Ne F64)
+translateOp DoubleGeOp = Just (MO_S_Ge F64)
+translateOp DoubleLeOp = Just (MO_S_Le F64)
+translateOp DoubleGtOp = Just (MO_S_Gt F64)
+translateOp DoubleLtOp = Just (MO_S_Lt F64)
+
+translateOp DoubleAddOp = Just (MO_Add F64)
+translateOp DoubleSubOp = Just (MO_Sub F64)
+translateOp DoubleMulOp = Just (MO_Mul F64)
+translateOp DoubleDivOp = Just (MO_S_Quot F64)
+translateOp DoubleNegOp = Just (MO_S_Neg F64)
+
+-- Float ops
+
+translateOp FloatEqOp = Just (MO_Eq F32)
+translateOp FloatNeOp = Just (MO_Ne F32)
+translateOp FloatGeOp = Just (MO_S_Ge F32)
+translateOp FloatLeOp = Just (MO_S_Le F32)
+translateOp FloatGtOp = Just (MO_S_Gt F32)
+translateOp FloatLtOp = Just (MO_S_Lt F32)
+
+translateOp FloatAddOp = Just (MO_Add F32)
+translateOp FloatSubOp = Just (MO_Sub F32)
+translateOp FloatMulOp = Just (MO_Mul F32)
+translateOp FloatDivOp = Just (MO_S_Quot F32)
+translateOp FloatNegOp = Just (MO_S_Neg F32)
+
+-- Conversions
+
+translateOp Int2DoubleOp = Just (MO_S_Conv wordRep F64)
+translateOp Double2IntOp = Just (MO_S_Conv F64 wordRep)
+
+translateOp Int2FloatOp = Just (MO_S_Conv wordRep F32)
+translateOp Float2IntOp = Just (MO_S_Conv F32 wordRep)
+
+translateOp Float2DoubleOp = Just (MO_S_Conv F32 F64)
+translateOp Double2FloatOp = Just (MO_S_Conv F64 F32)
+
+-- Word comparisons masquerading as more exotic things.
+
+translateOp SameMutVarOp = Just mo_wordEq
+translateOp SameMVarOp = Just mo_wordEq
+translateOp SameMutableArrayOp = Just mo_wordEq
+translateOp SameMutableByteArrayOp = Just mo_wordEq
+translateOp SameTVarOp = Just mo_wordEq
+translateOp EqStablePtrOp = Just mo_wordEq
+
+translateOp _ = Nothing
+
+-- These primops are implemented by CallishMachOps, because they sometimes
+-- turn into foreign calls depending on the backend.
+
+callishOp DoublePowerOp = Just MO_F64_Pwr
+callishOp DoubleSinOp = Just MO_F64_Sin
+callishOp DoubleCosOp = Just MO_F64_Cos
+callishOp DoubleTanOp = Just MO_F64_Tan
+callishOp DoubleSinhOp = Just MO_F64_Sinh
+callishOp DoubleCoshOp = Just MO_F64_Cosh
+callishOp DoubleTanhOp = Just MO_F64_Tanh
+callishOp DoubleAsinOp = Just MO_F64_Asin
+callishOp DoubleAcosOp = Just MO_F64_Acos
+callishOp DoubleAtanOp = Just MO_F64_Atan
+callishOp DoubleLogOp = Just MO_F64_Log
+callishOp DoubleExpOp = Just MO_F64_Exp
+callishOp DoubleSqrtOp = Just MO_F64_Sqrt
+
+callishOp FloatPowerOp = Just MO_F32_Pwr
+callishOp FloatSinOp = Just MO_F32_Sin
+callishOp FloatCosOp = Just MO_F32_Cos
+callishOp FloatTanOp = Just MO_F32_Tan
+callishOp FloatSinhOp = Just MO_F32_Sinh
+callishOp FloatCoshOp = Just MO_F32_Cosh
+callishOp FloatTanhOp = Just MO_F32_Tanh
+callishOp FloatAsinOp = Just MO_F32_Asin
+callishOp FloatAcosOp = Just MO_F32_Acos
+callishOp FloatAtanOp = Just MO_F32_Atan
+callishOp FloatLogOp = Just MO_F32_Log
+callishOp FloatExpOp = Just MO_F32_Exp
+callishOp FloatSqrtOp = Just MO_F32_Sqrt
+
+callishOp _ = Nothing
+
+------------------------------------------------------------------------------
+-- Helpers for translating various minor variants of array indexing.
+
+doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx]
+ = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx
+doIndexOffAddrOp _ _ _ _
+ = panic "CgPrimOp: doIndexOffAddrOp"
+
+doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx]
+ = mkBasicIndexedRead arrWordsHdrSize maybe_post_read_cast rep res addr idx
+doIndexByteArrayOp _ _ _ _
+ = panic "CgPrimOp: doIndexByteArrayOp"
+
+doReadPtrArrayOp res addr idx
+ = mkBasicIndexedRead arrPtrsHdrSize Nothing wordRep res addr idx
+
+
+doWriteOffAddrOp maybe_pre_write_cast rep [] [addr,idx,val]
+ = mkBasicIndexedWrite 0 maybe_pre_write_cast rep addr idx val
+doWriteOffAddrOp _ _ _ _
+ = panic "CgPrimOp: doWriteOffAddrOp"
+
+doWriteByteArrayOp maybe_pre_write_cast rep [] [addr,idx,val]
+ = mkBasicIndexedWrite arrWordsHdrSize maybe_pre_write_cast rep addr idx val
+doWriteByteArrayOp _ _ _ _
+ = panic "CgPrimOp: doWriteByteArrayOp"
+
+doWritePtrArrayOp addr idx val
+ = do stmtC (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel)))
+ mkBasicIndexedWrite arrPtrsHdrSize Nothing wordRep addr idx val
+
+
+mkBasicIndexedRead off Nothing read_rep res base idx
+ = stmtC (CmmAssign res (cmmLoadIndexOffExpr off read_rep base idx))
+mkBasicIndexedRead off (Just cast) read_rep res base idx
+ = stmtC (CmmAssign res (CmmMachOp cast [
+ cmmLoadIndexOffExpr off read_rep base idx]))
+
+mkBasicIndexedWrite off Nothing write_rep base idx val
+ = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) val)
+mkBasicIndexedWrite off (Just cast) write_rep base idx val
+ = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) (CmmMachOp cast [val]))
+
+-- ----------------------------------------------------------------------------
+-- Misc utils
+
+cmmIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr
+cmmIndexOffExpr off rep base idx
+ = cmmIndexExpr rep (cmmOffsetB base off) idx
+
+cmmLoadIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr
+cmmLoadIndexOffExpr off rep base idx
+ = CmmLoad (cmmIndexOffExpr off rep base idx) rep
+
+setInfo :: CmmExpr -> CmmExpr -> CmmStmt
+setInfo closure_ptr info_ptr = CmmStore closure_ptr info_ptr
+
diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs
new file mode 100644
index 0000000000..1488e34956
--- /dev/null
+++ b/compiler/codeGen/CgProf.hs
@@ -0,0 +1,478 @@
+-----------------------------------------------------------------------------
+--
+-- Code generation for profiling
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+module CgProf (
+ mkCCostCentre, mkCCostCentreStack,
+
+ -- Cost-centre Profiling
+ dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf,
+ enterCostCentre, enterCostCentrePAP, enterCostCentreThunk,
+ chooseDynCostCentres,
+ costCentreFrom,
+ curCCS, curCCSAddr,
+ emitCostCentreDecl, emitCostCentreStackDecl,
+ emitRegisterCC, emitRegisterCCS,
+ emitSetCCC, emitCCS,
+
+ -- Lag/drag/void stuff
+ ldvEnter, ldvRecordCreate
+ ) where
+
+#include "HsVersions.h"
+#include "MachDeps.h"
+ -- For WORD_SIZE_IN_BITS only.
+#include "../includes/Constants.h"
+ -- For LDV_CREATE_MASK, LDV_STATE_USE
+ -- which are StgWords
+#include "../includes/DerivedConstants.h"
+ -- For REP_xxx constants, which are MachReps
+
+import ClosureInfo ( ClosureInfo, closureSize,
+ closureName, isToplevClosure, closureReEntrant, )
+import CgUtils
+import CgMonad
+import SMRep ( StgWord, profHdrSize )
+
+import Cmm
+import MachOp
+import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr )
+import CLabel ( mkCCLabel, mkCCSLabel, mkRtsDataLabel )
+
+import Module ( moduleString )
+import Id ( Id )
+import CostCentre
+import StgSyn ( GenStgExpr(..), StgExpr )
+import StaticFlags ( opt_SccProfilingOn )
+import FastString ( FastString, mkFastString, LitString )
+import Constants -- Lots of field offsets
+import Outputable
+
+import Maybe
+import Char ( ord )
+import Monad ( when )
+
+-----------------------------------------------------------------------------
+--
+-- Cost-centre-stack Profiling
+--
+-----------------------------------------------------------------------------
+
+-- Expression representing the current cost centre stack
+curCCS :: CmmExpr
+curCCS = CmmLoad curCCSAddr wordRep
+
+-- Address of current CCS variable, for storing into
+curCCSAddr :: CmmExpr
+curCCSAddr = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCCS")))
+
+mkCCostCentre :: CostCentre -> CmmLit
+mkCCostCentre cc = CmmLabel (mkCCLabel cc)
+
+mkCCostCentreStack :: CostCentreStack -> CmmLit
+mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs)
+
+costCentreFrom :: CmmExpr -- A closure pointer
+ -> CmmExpr -- The cost centre from that closure
+costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) wordRep
+
+staticProfHdr :: CostCentreStack -> [CmmLit]
+-- The profiling header words in a static closure
+-- Was SET_STATIC_PROF_HDR
+staticProfHdr ccs = ifProfilingL [mkCCostCentreStack ccs,
+ staticLdvInit]
+
+dynProfHdr :: CmmExpr -> [CmmExpr]
+-- Profiling header words in a dynamic closure
+dynProfHdr ccs = ifProfilingL [ccs, dynLdvInit]
+
+initUpdFrameProf :: CmmExpr -> Code
+-- Initialise the profiling field of an update frame
+initUpdFrameProf frame_amode
+ = ifProfiling $ -- frame->header.prof.ccs = CCCS
+ stmtC (CmmStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS)
+ -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0)
+ -- is unnecessary because it is not used anyhow.
+
+-- -----------------------------------------------------------------------------
+-- Recording allocation in a cost centre
+
+-- | Record the allocation of a closure. The CmmExpr is the cost
+-- centre stack to which to attribute the allocation.
+profDynAlloc :: ClosureInfo -> CmmExpr -> Code
+profDynAlloc cl_info ccs
+ = ifProfiling $
+ profAlloc (CmmLit (mkIntCLit (closureSize cl_info))) ccs
+
+-- | Record the allocation of a closure (size is given by a CmmExpr)
+-- The size must be in words, because the allocation counter in a CCS counts
+-- in words.
+profAlloc :: CmmExpr -> CmmExpr -> Code
+profAlloc words ccs
+ = ifProfiling $
+ stmtC (addToMemE alloc_rep
+ (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc)
+ (CmmMachOp (MO_U_Conv wordRep alloc_rep) $
+ [CmmMachOp mo_wordSub [words,
+ CmmLit (mkIntCLit profHdrSize)]]))
+ -- subtract the "profiling overhead", which is the
+ -- profiling header in a closure.
+ where
+ alloc_rep = REP_CostCentreStack_mem_alloc
+
+-- ----------------------------------------------------------------------
+-- Setting the cost centre in a new closure
+
+chooseDynCostCentres :: CostCentreStack
+ -> [Id] -- Args
+ -> StgExpr -- Body
+ -> FCode (CmmExpr, CmmExpr)
+-- Called when alllcating a closure
+-- Tells which cost centre to put in the object, and which
+-- to blame the cost of allocation on
+chooseDynCostCentres ccs args body = do
+ -- Cost-centre we record in the object
+ use_ccs <- emitCCS ccs
+
+ -- Cost-centre on whom we blame the allocation
+ let blame_ccs
+ | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS)
+ | otherwise = use_ccs
+
+ return (use_ccs, blame_ccs)
+
+
+-- Some CostCentreStacks are a sequence of pushes on top of CCCS.
+-- These pushes must be performed before we can refer to the stack in
+-- an expression.
+emitCCS :: CostCentreStack -> FCode CmmExpr
+emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's)
+ where
+ (cc's, ccs') = decomposeCCS ccs
+
+ push_em ccs [] = return ccs
+ push_em ccs (cc:rest) = do
+ tmp <- newTemp wordRep
+ pushCostCentre tmp ccs cc
+ push_em (CmmReg tmp) rest
+
+ccsExpr :: CostCentreStack -> CmmExpr
+ccsExpr ccs
+ | isCurrentCCS ccs = curCCS
+ | otherwise = CmmLit (mkCCostCentreStack ccs)
+
+
+isBox :: StgExpr -> Bool
+-- If it's an utterly trivial RHS, then it must be
+-- one introduced by boxHigherOrderArgs for profiling,
+-- so we charge it to "OVERHEAD".
+-- This looks like a GROSS HACK to me --SDM
+isBox (StgApp fun []) = True
+isBox other = False
+
+
+-- -----------------------------------------------------------------------
+-- Setting the current cost centre on entry to a closure
+
+-- For lexically scoped profiling we have to load the cost centre from
+-- the closure entered, if the costs are not supposed to be inherited.
+-- This is done immediately on entering the fast entry point.
+
+-- Load current cost centre from closure, if not inherited.
+-- Node is guaranteed to point to it, if profiling and not inherited.
+
+enterCostCentre
+ :: ClosureInfo
+ -> CostCentreStack
+ -> StgExpr -- The RHS of the closure
+ -> Code
+
+-- We used to have a special case for bindings of form
+-- f = g True
+-- where g has arity 2. The RHS is a thunk, but we don't
+-- need to update it; and we want to subsume costs.
+-- We don't have these sort of PAPs any more, so the special
+-- case has gone away.
+
+enterCostCentre closure_info ccs body
+ = ifProfiling $
+ ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs)
+ enter_cost_centre closure_info ccs body
+
+enter_cost_centre closure_info ccs body
+ | isSubsumedCCS ccs
+ = ASSERT(isToplevClosure closure_info)
+ ASSERT(re_entrant)
+ enter_ccs_fsub
+
+ | isDerivedFromCurrentCCS ccs
+ = do {
+ if re_entrant && not is_box
+ then
+ enter_ccs_fun node_ccs
+ else
+ stmtC (CmmStore curCCSAddr node_ccs)
+
+ -- don't forget to bump the scc count. This closure might have been
+ -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal
+ -- pass has turned into simply let x = e in ...x... and attached
+ -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that
+ -- we don't lose the scc counter, bump it in the entry code for x.
+ -- ToDo: for a multi-push we should really bump the counter for
+ -- each of the intervening CCSs, not just the top one.
+ ; when (not (isCurrentCCS ccs)) $
+ stmtC (bumpSccCount curCCS)
+ }
+
+ | isCafCCS ccs
+ = ASSERT(isToplevClosure closure_info)
+ ASSERT(not re_entrant)
+ do { -- This is just a special case of the isDerivedFromCurrentCCS
+ -- case above. We could delete this, but it's a micro
+ -- optimisation and saves a bit of code.
+ stmtC (CmmStore curCCSAddr enc_ccs)
+ ; stmtC (bumpSccCount node_ccs)
+ }
+
+ | otherwise
+ = panic "enterCostCentre"
+ where
+ enc_ccs = CmmLit (mkCCostCentreStack ccs)
+ re_entrant = closureReEntrant closure_info
+ node_ccs = costCentreFrom (CmmReg nodeReg)
+ is_box = isBox body
+
+-- set the current CCS when entering a PAP
+enterCostCentrePAP :: CmmExpr -> Code
+enterCostCentrePAP closure =
+ ifProfiling $ do
+ enter_ccs_fun (costCentreFrom closure)
+ enteringPAP 1
+
+enterCostCentreThunk :: CmmExpr -> Code
+enterCostCentreThunk closure =
+ ifProfiling $ do
+ stmtC $ CmmStore curCCSAddr (costCentreFrom closure)
+
+enter_ccs_fun stack = emitRtsCall SLIT("EnterFunCCS") [(stack,PtrHint)]
+ -- ToDo: vols
+
+enter_ccs_fsub = enteringPAP 0
+
+-- When entering a PAP, EnterFunCCS is called by both the PAP entry
+-- code and the function entry code; we don't want the function's
+-- entry code to also update CCCS in the event that it was called via
+-- a PAP, so we set the flag entering_PAP to indicate that we are
+-- entering via a PAP.
+enteringPAP :: Integer -> Code
+enteringPAP n
+ = stmtC (CmmStore (CmmLit (CmmLabel (mkRtsDataLabel SLIT("entering_PAP"))))
+ (CmmLit (CmmInt n cIntRep)))
+
+ifProfiling :: Code -> Code
+ifProfiling code
+ | opt_SccProfilingOn = code
+ | otherwise = nopC
+
+ifProfilingL :: [a] -> [a]
+ifProfilingL xs
+ | opt_SccProfilingOn = xs
+ | otherwise = []
+
+
+-- ---------------------------------------------------------------------------
+-- Initialising Cost Centres & CCSs
+
+emitCostCentreDecl
+ :: CostCentre
+ -> Code
+emitCostCentreDecl cc = do
+ { label <- mkStringCLit (costCentreUserName cc)
+ ; modl <- mkStringCLit (moduleString (cc_mod cc))
+ ; let
+ lits = [ zero, -- StgInt ccID,
+ label, -- char *label,
+ modl, -- char *module,
+ zero, -- StgWord time_ticks
+ zero64, -- StgWord64 mem_alloc
+ subsumed, -- StgInt is_caf
+ zero -- struct _CostCentre *link
+ ]
+ ; emitDataLits (mkCCLabel cc) lits
+ }
+ where
+ subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF
+ | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring
+
+
+emitCostCentreStackDecl
+ :: CostCentreStack
+ -> Code
+emitCostCentreStackDecl ccs
+ | Just cc <- maybeSingletonCCS ccs = do
+ { let
+ -- Note: to avoid making any assumptions about how the
+ -- C compiler (that compiles the RTS, in particular) does
+ -- layouts of structs containing long-longs, simply
+ -- pad out the struct with zero words until we hit the
+ -- size of the overall struct (which we get via DerivedConstants.h)
+ --
+ lits = zero : mkCCostCentre cc : replicate (sizeof_ccs_words - 2) zero
+ ; emitDataLits (mkCCSLabel ccs) lits
+ }
+ | otherwise = pprPanic "emitCostCentreStackDecl" (ppr ccs)
+
+zero = mkIntCLit 0
+zero64 = CmmInt 0 I64
+
+sizeof_ccs_words :: Int
+sizeof_ccs_words
+ -- round up to the next word.
+ | ms == 0 = ws
+ | otherwise = ws + 1
+ where
+ (ws,ms) = SIZEOF_CostCentreStack `divMod` wORD_SIZE
+
+-- ---------------------------------------------------------------------------
+-- Registering CCs and CCSs
+
+-- (cc)->link = CC_LIST;
+-- CC_LIST = (cc);
+-- (cc)->ccID = CC_ID++;
+
+emitRegisterCC :: CostCentre -> Code
+emitRegisterCC cc = do
+ { tmp <- newTemp cIntRep
+ ; stmtsC [
+ CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_link)
+ (CmmLoad cC_LIST wordRep),
+ CmmStore cC_LIST cc_lit,
+ CmmAssign tmp (CmmLoad cC_ID cIntRep),
+ CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_ccID) (CmmReg tmp),
+ CmmStore cC_ID (cmmRegOffB tmp 1)
+ ]
+ }
+ where
+ cc_lit = CmmLit (CmmLabel (mkCCLabel cc))
+
+-- (ccs)->prevStack = CCS_LIST;
+-- CCS_LIST = (ccs);
+-- (ccs)->ccsID = CCS_ID++;
+
+emitRegisterCCS :: CostCentreStack -> Code
+emitRegisterCCS ccs = do
+ { tmp <- newTemp cIntRep
+ ; stmtsC [
+ CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_prevStack)
+ (CmmLoad cCS_LIST wordRep),
+ CmmStore cCS_LIST ccs_lit,
+ CmmAssign tmp (CmmLoad cCS_ID cIntRep),
+ CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_ccsID) (CmmReg tmp),
+ CmmStore cCS_ID (cmmRegOffB tmp 1)
+ ]
+ }
+ where
+ ccs_lit = CmmLit (CmmLabel (mkCCSLabel ccs))
+
+
+cC_LIST = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CC_LIST")))
+cC_ID = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CC_ID")))
+
+cCS_LIST = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCS_LIST")))
+cCS_ID = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCS_ID")))
+
+-- ---------------------------------------------------------------------------
+-- Set the current cost centre stack
+
+emitSetCCC :: CostCentre -> Code
+emitSetCCC cc
+ | not opt_SccProfilingOn = nopC
+ | otherwise = do
+ tmp <- newTemp wordRep
+ ASSERT( sccAbleCostCentre cc )
+ pushCostCentre tmp curCCS cc
+ stmtC (CmmStore curCCSAddr (CmmReg tmp))
+ when (isSccCountCostCentre cc) $
+ stmtC (bumpSccCount curCCS)
+
+pushCostCentre :: CmmReg -> CmmExpr -> CostCentre -> Code
+pushCostCentre result ccs cc
+ = emitRtsCallWithResult result PtrHint
+ SLIT("PushCostCentre") [(ccs,PtrHint),
+ (CmmLit (mkCCostCentre cc), PtrHint)]
+
+bumpSccCount :: CmmExpr -> CmmStmt
+bumpSccCount ccs
+ = addToMem REP_CostCentreStack_scc_count
+ (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1
+
+-----------------------------------------------------------------------------
+--
+-- Lag/drag/void stuff
+--
+-----------------------------------------------------------------------------
+
+--
+-- Initial value for the LDV field in a static closure
+--
+staticLdvInit :: CmmLit
+staticLdvInit = zeroCLit
+
+--
+-- Initial value of the LDV field in a dynamic closure
+--
+dynLdvInit :: CmmExpr
+dynLdvInit = -- (era << LDV_SHIFT) | LDV_STATE_CREATE
+ CmmMachOp mo_wordOr [
+ CmmMachOp mo_wordShl [loadEra, CmmLit (mkIntCLit lDV_SHIFT) ],
+ CmmLit (mkWordCLit lDV_STATE_CREATE)
+ ]
+
+--
+-- Initialise the LDV word of a new closure
+--
+ldvRecordCreate :: CmmExpr -> Code
+ldvRecordCreate closure = stmtC $ CmmStore (ldvWord closure) dynLdvInit
+
+--
+-- Called when a closure is entered, marks the closure as having been "used".
+-- The closure is not an 'inherently used' one.
+-- The closure is not IND or IND_OLDGEN because neither is considered for LDV
+-- profiling.
+--
+ldvEnter :: CmmExpr -> Code
+-- Argument is a closure pointer
+ldvEnter cl_ptr
+ = ifProfiling $
+ -- if (era > 0) {
+ -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) |
+ -- era | LDV_STATE_USE }
+ emitIf (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit])
+ (stmtC (CmmStore ldv_wd new_ldv_wd))
+ where
+ ldv_wd = ldvWord cl_ptr
+ new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd wordRep)
+ (CmmLit (mkWordCLit lDV_CREATE_MASK)))
+ (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE)))
+
+loadEra :: CmmExpr
+loadEra = CmmMachOp (MO_U_Conv cIntRep wordRep)
+ [CmmLoad (mkLblExpr (mkRtsDataLabel SLIT("era"))) cIntRep]
+
+ldvWord :: CmmExpr -> CmmExpr
+-- Takes the address of a closure, and returns
+-- the address of the LDV word in the closure
+ldvWord closure_ptr = cmmOffsetB closure_ptr oFFSET_StgHeader_ldvw
+
+-- LDV constants, from ghc/includes/Constants.h
+lDV_SHIFT = (LDV_SHIFT :: Int)
+--lDV_STATE_MASK = (LDV_STATE_MASK :: StgWord)
+lDV_CREATE_MASK = (LDV_CREATE_MASK :: StgWord)
+--lDV_LAST_MASK = (LDV_LAST_MASK :: StgWord)
+lDV_STATE_CREATE = (LDV_STATE_CREATE :: StgWord)
+lDV_STATE_USE = (LDV_STATE_USE :: StgWord)
+
diff --git a/compiler/codeGen/CgStackery.lhs b/compiler/codeGen/CgStackery.lhs
new file mode 100644
index 0000000000..7cb310d521
--- /dev/null
+++ b/compiler/codeGen/CgStackery.lhs
@@ -0,0 +1,339 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgStackery.lhs,v 1.27 2004/09/30 10:35:49 simonpj Exp $
+%
+\section[CgStackery]{Stack management functions}
+
+Stack-twiddling operations, which are pretty low-down and grimy.
+(This is the module that knows all about stack layouts, etc.)
+
+\begin{code}
+module CgStackery (
+ spRel, getVirtSp, getRealSp, setRealSp,
+ setRealAndVirtualSp, getSpRelOffset,
+
+ allocPrimStack, allocStackTop, deAllocStackTop,
+ adjustStackHW, getFinalStackHW,
+ setStackFrame, getStackFrame,
+ mkVirtStkOffsets, mkStkAmodes,
+ freeStackSlots,
+ pushUpdateFrame, emitPushUpdateFrame,
+ ) where
+
+#include "HsVersions.h"
+
+import CgMonad
+import CgUtils ( cmmOffsetB, cmmRegOffW )
+import CgProf ( initUpdFrameProf )
+import SMRep
+import Cmm
+import CmmUtils ( CmmStmts, mkLblExpr )
+import CLabel ( mkUpdInfoLabel )
+import Constants
+import Util ( sortLe )
+import FastString ( LitString )
+import OrdList ( toOL )
+import Outputable
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[CgUsages-stackery]{Monad things for fiddling with stack usage}
+%* *
+%************************************************************************
+
+spRel is a little function that abstracts the stack direction. Note that most
+of the code generator is dependent on the stack direction anyway, so
+changing this on its own spells certain doom. ToDo: remove?
+
+ THIS IS DIRECTION SENSITIVE!
+
+Stack grows down, positive virtual offsets correspond to negative
+additions to the stack pointer.
+
+\begin{code}
+spRel :: VirtualSpOffset -- virtual offset of Sp
+ -> VirtualSpOffset -- virtual offset of The Thing
+ -> WordOff -- integer offset
+spRel sp off = sp - off
+\end{code}
+
+@setRealAndVirtualSp@ sets into the environment the offsets of the
+current position of the real and virtual stack pointers in the current
+stack frame. The high-water mark is set too. It generates no code.
+It is used to initialise things at the beginning of a closure body.
+
+\begin{code}
+setRealAndVirtualSp :: VirtualSpOffset -- New real Sp
+ -> Code
+
+setRealAndVirtualSp new_sp
+ = do { stk_usg <- getStkUsage
+ ; setStkUsage (stk_usg {virtSp = new_sp,
+ realSp = new_sp,
+ hwSp = new_sp}) }
+
+getVirtSp :: FCode VirtualSpOffset
+getVirtSp
+ = do { stk_usg <- getStkUsage
+ ; return (virtSp stk_usg) }
+
+getRealSp :: FCode VirtualSpOffset
+getRealSp
+ = do { stk_usg <- getStkUsage
+ ; return (realSp stk_usg) }
+
+setRealSp :: VirtualSpOffset -> Code
+setRealSp new_real_sp
+ = do { stk_usg <- getStkUsage
+ ; setStkUsage (stk_usg {realSp = new_real_sp}) }
+
+getSpRelOffset :: VirtualSpOffset -> FCode CmmExpr
+getSpRelOffset virtual_offset
+ = do { real_sp <- getRealSp
+ ; return (cmmRegOffW spReg (spRel real_sp virtual_offset)) }
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[CgStackery-layout]{Laying out a stack frame}
+%* *
+%************************************************************************
+
+'mkVirtStkOffsets' is given a list of arguments. The first argument
+gets the /largest/ virtual stack offset (remember, virtual offsets
+increase towards the top of stack).
+
+\begin{code}
+mkVirtStkOffsets
+ :: VirtualSpOffset -- Offset of the last allocated thing
+ -> [(CgRep,a)] -- things to make offsets for
+ -> (VirtualSpOffset, -- OUTPUTS: Topmost allocated word
+ [(a, VirtualSpOffset)]) -- things with offsets (voids filtered out)
+
+mkVirtStkOffsets init_Sp_offset things
+ = loop init_Sp_offset [] (reverse things)
+ where
+ loop offset offs [] = (offset,offs)
+ loop offset offs ((VoidArg,t):things) = loop offset offs things
+ -- ignore Void arguments
+ loop offset offs ((rep,t):things)
+ = loop thing_slot ((t,thing_slot):offs) things
+ where
+ thing_slot = offset + cgRepSizeW rep
+ -- offset of thing is offset+size, because we're
+ -- growing the stack *downwards* as the offsets increase.
+
+-- | 'mkStkAmodes' is a higher-level version of
+-- 'mkVirtStkOffsets'. It starts from the tail-call locations.
+-- It returns a single list of addressing modes for the stack
+-- locations, and therefore is in the monad. It /doesn't/ adjust the
+-- high water mark.
+
+mkStkAmodes
+ :: VirtualSpOffset -- Tail call positions
+ -> [(CgRep,CmmExpr)] -- things to make offsets for
+ -> FCode (VirtualSpOffset, -- OUTPUTS: Topmost allocated word
+ CmmStmts) -- Assignments to appropriate stk slots
+
+mkStkAmodes tail_Sp things
+ = do { rSp <- getRealSp
+ ; let (last_Sp_offset, offsets) = mkVirtStkOffsets tail_Sp things
+ abs_cs = [ CmmStore (cmmRegOffW spReg (spRel rSp offset)) amode
+ | (amode, offset) <- offsets
+ ]
+ ; returnFC (last_Sp_offset, toOL abs_cs) }
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[CgStackery-monadery]{Inside-monad functions for stack manipulation}
+%* *
+%************************************************************************
+
+Allocate a virtual offset for something.
+
+\begin{code}
+allocPrimStack :: CgRep -> FCode VirtualSpOffset
+allocPrimStack rep
+ = do { stk_usg <- getStkUsage
+ ; let free_stk = freeStk stk_usg
+ ; case find_block free_stk of
+ Nothing -> do
+ { let push_virt_sp = virtSp stk_usg + size
+ ; setStkUsage (stk_usg { virtSp = push_virt_sp,
+ hwSp = hwSp stk_usg `max` push_virt_sp })
+ -- Adjust high water mark
+ ; return push_virt_sp }
+ Just slot -> do
+ { setStkUsage (stk_usg { freeStk = delete_block free_stk slot })
+ ; return slot }
+ }
+ where
+ size :: WordOff
+ size = cgRepSizeW rep
+
+ -- Find_block looks for a contiguous chunk of free slots
+ -- returning the offset of its topmost word
+ find_block :: [VirtualSpOffset] -> Maybe VirtualSpOffset
+ find_block [] = Nothing
+ find_block (slot:slots)
+ | take size (slot:slots) == [slot..top_slot]
+ = Just top_slot
+ | otherwise
+ = find_block slots
+ where -- The stack grows downwards, with increasing virtual offsets.
+ -- Therefore, the address of a multi-word object is the *highest*
+ -- virtual offset it occupies (top_slot below).
+ top_slot = slot+size-1
+
+ delete_block free_stk slot = [ s | s <- free_stk,
+ (s<=slot-size) || (s>slot) ]
+ -- Retain slots which are not in the range
+ -- slot-size+1..slot
+\end{code}
+
+Allocate a chunk ON TOP OF the stack.
+
+\begin{code}
+allocStackTop :: WordOff -> FCode VirtualSpOffset
+allocStackTop size
+ = do { stk_usg <- getStkUsage
+ ; let push_virt_sp = virtSp stk_usg + size
+ ; setStkUsage (stk_usg { virtSp = push_virt_sp,
+ hwSp = hwSp stk_usg `max` push_virt_sp })
+ ; return push_virt_sp }
+\end{code}
+
+Pop some words from the current top of stack. This is used for
+de-allocating the return address in a case alternative.
+
+\begin{code}
+deAllocStackTop :: WordOff -> FCode VirtualSpOffset
+deAllocStackTop size
+ = do { stk_usg <- getStkUsage
+ ; let pop_virt_sp = virtSp stk_usg - size
+ ; setStkUsage (stk_usg { virtSp = pop_virt_sp })
+ ; return pop_virt_sp }
+\end{code}
+
+\begin{code}
+adjustStackHW :: VirtualSpOffset -> Code
+adjustStackHW offset
+ = do { stk_usg <- getStkUsage
+ ; setStkUsage (stk_usg { hwSp = hwSp stk_usg `max` offset }) }
+\end{code}
+
+A knot-tying beast.
+
+\begin{code}
+getFinalStackHW :: (VirtualSpOffset -> Code) -> Code
+getFinalStackHW fcode
+ = do { fixC (\hw_sp -> do
+ { fcode hw_sp
+ ; stk_usg <- getStkUsage
+ ; return (hwSp stk_usg) })
+ ; return () }
+\end{code}
+
+\begin{code}
+setStackFrame :: VirtualSpOffset -> Code
+setStackFrame offset
+ = do { stk_usg <- getStkUsage
+ ; setStkUsage (stk_usg { frameSp = offset }) }
+
+getStackFrame :: FCode VirtualSpOffset
+getStackFrame
+ = do { stk_usg <- getStkUsage
+ ; return (frameSp stk_usg) }
+\end{code}
+
+
+%********************************************************
+%* *
+%* Setting up update frames *
+%* *
+%********************************************************
+
+@pushUpdateFrame@ $updatee$ pushes a general update frame which
+points to $updatee$ as the thing to be updated. It is only used
+when a thunk has just been entered, so the (real) stack pointers
+are guaranteed to be nicely aligned with the top of stack.
+@pushUpdateFrame@ adjusts the virtual and tail stack pointers
+to reflect the frame pushed.
+
+\begin{code}
+pushUpdateFrame :: CmmExpr -> Code -> Code
+
+pushUpdateFrame updatee code
+ = do {
+#ifdef DEBUG
+ EndOfBlockInfo _ sequel <- getEndOfBlockInfo ;
+ ASSERT(case sequel of { OnStack -> True; _ -> False})
+#endif
+
+ allocStackTop (fixedHdrSize +
+ sIZEOF_StgUpdateFrame_NoHdr `quot` wORD_SIZE)
+ ; vsp <- getVirtSp
+ ; setStackFrame vsp
+ ; frame_addr <- getSpRelOffset vsp
+ -- The location of the lowest-address
+ -- word of the update frame itself
+
+ ; setEndOfBlockInfo (EndOfBlockInfo vsp UpdateCode) $
+ do { emitPushUpdateFrame frame_addr updatee
+ ; code }
+ }
+
+emitPushUpdateFrame :: CmmExpr -> CmmExpr -> Code
+emitPushUpdateFrame frame_addr updatee = do
+ stmtsC [ -- Set the info word
+ CmmStore frame_addr (mkLblExpr mkUpdInfoLabel)
+ , -- And the updatee
+ CmmStore (cmmOffsetB frame_addr off_updatee) updatee ]
+ initUpdFrameProf frame_addr
+
+off_updatee :: ByteOff
+off_updatee = fixedHdrSize*wORD_SIZE + oFFSET_StgUpdateFrame_updatee
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[CgStackery-free]{Free stack slots}
+%* *
+%************************************************************************
+
+Explicitly free some stack space.
+
+\begin{code}
+freeStackSlots :: [VirtualSpOffset] -> Code
+freeStackSlots extra_free
+ = do { stk_usg <- getStkUsage
+ ; let all_free = addFreeSlots (freeStk stk_usg) (sortLe (<=) extra_free)
+ ; let (new_vsp, new_free) = trim (virtSp stk_usg) all_free
+ ; setStkUsage (stk_usg { virtSp = new_vsp, freeStk = new_free }) }
+
+addFreeSlots :: [VirtualSpOffset] -> [VirtualSpOffset] -> [VirtualSpOffset]
+-- Merge the two, assuming both are in increasing order
+addFreeSlots cs [] = cs
+addFreeSlots [] ns = ns
+addFreeSlots (c:cs) (n:ns)
+ | c < n = c : addFreeSlots cs (n:ns)
+ | otherwise = n : addFreeSlots (c:cs) ns
+
+trim :: VirtualSpOffset -> [VirtualSpOffset] -> (VirtualSpOffset, [VirtualSpOffset])
+-- Try to trim back the virtual stack pointer, where there is a
+-- continuous bunch of free slots at the end of the free list
+trim vsp [] = (vsp, [])
+trim vsp (slot:slots)
+ = case trim vsp slots of
+ (vsp', [])
+ | vsp' < slot -> pprTrace "trim: strange" (ppr vsp <+> ppr (slot:slots))
+ (vsp', [])
+ | vsp' == slot -> (vsp'-1, [])
+ | otherwise -> (vsp', [slot])
+ (vsp', slots') -> (vsp', slot:slots')
+\end{code}
diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs
new file mode 100644
index 0000000000..dd7327b745
--- /dev/null
+++ b/compiler/codeGen/CgTailCall.lhs
@@ -0,0 +1,455 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+% $Id: CgTailCall.lhs,v 1.43 2005/06/21 10:44:41 simonmar Exp $
+%
+%********************************************************
+%* *
+\section[CgTailCall]{Tail calls: converting @StgApps@}
+%* *
+%********************************************************
+
+\begin{code}
+module CgTailCall (
+ cgTailCall, performTailCall,
+ performReturn, performPrimReturn,
+ emitKnownConReturnCode, emitAlgReturnCode,
+ returnUnboxedTuple, ccallReturnUnboxedTuple,
+ pushUnboxedTuple,
+ tailCallPrimOp,
+
+ pushReturnAddress
+ ) where
+
+#include "HsVersions.h"
+
+import CgMonad
+import CgBindery ( getArgAmodes, getCgIdInfo, CgIdInfo, maybeLetNoEscape,
+ idInfoToAmode, cgIdInfoId, cgIdInfoLF,
+ cgIdInfoArgRep )
+import CgInfoTbls ( entryCode, emitDirectReturnInstr, dataConTagZ,
+ emitVectoredReturnInstr, closureInfoPtr )
+import CgCallConv
+import CgStackery ( setRealSp, mkStkAmodes, adjustStackHW,
+ getSpRelOffset )
+import CgHeapery ( setRealHp, getHpRelOffset )
+import CgUtils ( emitSimultaneously )
+import CgTicky
+import ClosureInfo
+import SMRep ( CgRep, isVoidArg, separateByPtrFollowness )
+import Cmm
+import CmmUtils
+import CLabel ( CLabel, mkRtsPrimOpLabel, mkSeqInfoLabel )
+import Type ( isUnLiftedType )
+import Id ( Id, idName, idUnique, idType )
+import DataCon ( DataCon, dataConTyCon )
+import StgSyn ( StgArg )
+import TyCon ( TyCon )
+import PrimOp ( PrimOp )
+import Outputable
+
+import Monad ( when )
+
+-----------------------------------------------------------------------------
+-- Tail Calls
+
+cgTailCall :: Id -> [StgArg] -> Code
+
+-- Here's the code we generate for a tail call. (NB there may be no
+-- arguments, in which case this boils down to just entering a variable.)
+--
+-- * Put args in the top locations of the stack.
+-- * Adjust the stack ptr
+-- * Make R1 point to the function closure if necessary.
+-- * Perform the call.
+--
+-- Things to be careful about:
+--
+-- * Don't overwrite stack locations before you have finished with
+-- them (remember you need the function and the as-yet-unmoved
+-- arguments).
+-- * Preferably, generate no code to replace x by x on the stack (a
+-- common situation in tail-recursion).
+-- * Adjust the stack high water mark appropriately.
+--
+-- Treat unboxed locals exactly like literals (above) except use the addr
+-- mode for the local instead of (CLit lit) in the assignment.
+
+cgTailCall fun args
+ = do { fun_info <- getCgIdInfo fun
+
+ ; if isUnLiftedType (idType fun)
+ then -- Primitive return
+ ASSERT( null args )
+ do { fun_amode <- idInfoToAmode fun_info
+ ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode }
+
+ else -- Normal case, fun is boxed
+ do { arg_amodes <- getArgAmodes args
+ ; performTailCall fun_info arg_amodes noStmts }
+ }
+
+
+-- -----------------------------------------------------------------------------
+-- The guts of a tail-call
+
+performTailCall
+ :: CgIdInfo -- The function
+ -> [(CgRep,CmmExpr)] -- Args
+ -> CmmStmts -- Pending simultaneous assignments
+ -- *** GUARANTEED to contain only stack assignments.
+ -> Code
+
+performTailCall fun_info arg_amodes pending_assts
+ | Just join_sp <- maybeLetNoEscape fun_info
+ = -- A let-no-escape is slightly different, because we
+ -- arrange the stack arguments into pointers and non-pointers
+ -- to make the heap check easier. The tail-call sequence
+ -- is very similar to returning an unboxed tuple, so we
+ -- share some code.
+ do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
+ ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
+ ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
+ ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }
+
+ | otherwise
+ = do { fun_amode <- idInfoToAmode fun_info
+ ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
+ opt_node_asst | nodeMustPointToIt lf_info = node_asst
+ | otherwise = noStmts
+ ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
+ ; hmods <- getHomeModules
+
+ ; case (getCallMethod hmods fun_name lf_info (length arg_amodes)) of
+
+ -- Node must always point to things we enter
+ EnterIt -> do
+ { emitSimultaneously (node_asst `plusStmts` pending_assts)
+ ; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
+ ; doFinalJump sp False (stmtC (CmmJump target [])) }
+
+ -- A function, but we have zero arguments. It is already in WHNF,
+ -- so we can just return it.
+ -- As with any return, Node must point to it.
+ ReturnIt -> do
+ { emitSimultaneously (node_asst `plusStmts` pending_assts)
+ ; doFinalJump sp False emitDirectReturnInstr }
+
+ -- A real constructor. Don't bother entering it,
+ -- just do the right sort of return instead.
+ -- As with any return, Node must point to it.
+ ReturnCon con -> do
+ { emitSimultaneously (node_asst `plusStmts` pending_assts)
+ ; doFinalJump sp False (emitKnownConReturnCode con) }
+
+ JumpToIt lbl -> do
+ { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
+ ; doFinalJump sp False (jumpToLbl lbl) }
+
+ -- A slow function call via the RTS apply routines
+ -- Node must definitely point to the thing
+ SlowCall -> do
+ { when (not (null arg_amodes)) $ do
+ { if (isKnownFun lf_info)
+ then tickyKnownCallTooFewArgs
+ else tickyUnknownCall
+ ; tickySlowCallPat (map fst arg_amodes)
+ }
+
+ ; let (apply_lbl, args, extra_args)
+ = constructSlowCall arg_amodes
+
+ ; directCall sp apply_lbl args extra_args
+ (node_asst `plusStmts` pending_assts)
+ }
+
+ -- A direct function call (possibly with some left-over arguments)
+ DirectEntry lbl arity -> do
+ { if arity == length arg_amodes
+ then tickyKnownCallExact
+ else do tickyKnownCallExtraArgs
+ tickySlowCallPat (map fst (drop arity arg_amodes))
+
+ ; let
+ -- The args beyond the arity go straight on the stack
+ (arity_args, extra_args) = splitAt arity arg_amodes
+
+ ; directCall sp lbl arity_args extra_args
+ (opt_node_asst `plusStmts` pending_assts)
+ }
+ }
+ where
+ fun_name = idName (cgIdInfoId fun_info)
+ lf_info = cgIdInfoLF fun_info
+
+
+
+directCall sp lbl args extra_args assts = do
+ let
+ -- First chunk of args go in registers
+ (reg_arg_amodes, stk_args) = assignCallRegs args
+
+ -- Any "extra" arguments are placed in frames on the
+ -- stack after the other arguments.
+ slow_stk_args = slowArgs extra_args
+
+ reg_assts = assignToRegs reg_arg_amodes
+ --
+ (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
+
+ emitSimultaneously (reg_assts `plusStmts`
+ stk_assts `plusStmts`
+ assts)
+
+ doFinalJump final_sp False (jumpToLbl lbl)
+
+-- -----------------------------------------------------------------------------
+-- The final clean-up before we do a jump at the end of a basic block.
+-- This code is shared by tail-calls and returns.
+
+doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code
+doFinalJump final_sp is_let_no_escape jump_code
+ = do { -- Adjust the high-water mark if necessary
+ adjustStackHW final_sp
+
+ -- Push a return address if necessary (after the assignments
+ -- above, in case we clobber a live stack location)
+ --
+ -- DONT push the return address when we're about to jump to a
+ -- let-no-escape: the final tail call in the let-no-escape
+ -- will do this.
+ ; eob <- getEndOfBlockInfo
+ ; whenC (not is_let_no_escape) (pushReturnAddress eob)
+
+ -- Final adjustment of Sp/Hp
+ ; adjustSpAndHp final_sp
+
+ -- and do the jump
+ ; jump_code }
+
+-- -----------------------------------------------------------------------------
+-- A general return (just a special case of doFinalJump, above)
+
+performReturn :: Code -- The code to execute to actually do the return
+ -> Code
+
+performReturn finish_code
+ = do { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
+ ; doFinalJump args_sp False{-not a LNE-} finish_code }
+
+-- -----------------------------------------------------------------------------
+-- Primitive Returns
+-- Just load the return value into the right register, and return.
+
+performPrimReturn :: CgRep -> CmmExpr -- The thing to return
+ -> Code
+performPrimReturn rep amode
+ = do { whenC (not (isVoidArg rep))
+ (stmtC (CmmAssign ret_reg amode))
+ ; performReturn emitDirectReturnInstr }
+ where
+ ret_reg = dataReturnConvPrim rep
+
+-- -----------------------------------------------------------------------------
+-- Algebraic constructor returns
+
+-- Constructor is built on the heap; Node is set.
+-- All that remains is to do the right sort of jump.
+
+emitKnownConReturnCode :: DataCon -> Code
+emitKnownConReturnCode con
+ = emitAlgReturnCode (dataConTyCon con)
+ (CmmLit (mkIntCLit (dataConTagZ con)))
+ -- emitAlgReturnCode requires zero-indexed tag
+
+emitAlgReturnCode :: TyCon -> CmmExpr -> Code
+-- emitAlgReturnCode is used both by emitKnownConReturnCode,
+-- and by by PrimOps that return enumerated types (i.e.
+-- all the comparison operators).
+emitAlgReturnCode tycon tag
+ = do { case ctrlReturnConvAlg tycon of
+ VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz
+ ; emitVectoredReturnInstr tag }
+ UnvectoredReturn _ -> emitDirectReturnInstr
+ }
+
+
+-- ---------------------------------------------------------------------------
+-- Unboxed tuple returns
+
+-- These are a bit like a normal tail call, except that:
+--
+-- - The tail-call target is an info table on the stack
+--
+-- - We separate stack arguments into pointers and non-pointers,
+-- to make it easier to leave things in a sane state for a heap check.
+-- This is OK because we can never partially-apply an unboxed tuple,
+-- unlike a function. The same technique is used when calling
+-- let-no-escape functions, because they also can't be partially
+-- applied.
+
+returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
+returnUnboxedTuple amodes
+ = do { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
+ ; tickyUnboxedTupleReturn (length amodes)
+ ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
+ ; emitSimultaneously assts
+ ; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr }
+
+pushUnboxedTuple :: VirtualSpOffset -- Sp at which to start pushing
+ -> [(CgRep, CmmExpr)] -- amodes of the components
+ -> FCode (VirtualSpOffset, -- final Sp
+ CmmStmts) -- assignments (regs+stack)
+
+pushUnboxedTuple sp []
+ = return (sp, noStmts)
+pushUnboxedTuple sp amodes
+ = do { let (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
+
+ -- separate the rest of the args into pointers and non-pointers
+ (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
+ reg_arg_assts = assignToRegs reg_arg_amodes
+
+ -- push ptrs, then nonptrs, on the stack
+ ; (ptr_sp, ptr_assts) <- mkStkAmodes sp ptr_args
+ ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
+
+ ; returnFC (final_sp,
+ reg_arg_assts `plusStmts`
+ ptr_assts `plusStmts` nptr_assts) }
+
+
+-- -----------------------------------------------------------------------------
+-- Returning unboxed tuples. This is mainly to support _ccall_GC_, where
+-- we want to do things in a slightly different order to normal:
+--
+-- - push return address
+-- - adjust stack pointer
+-- - r = call(args...)
+-- - assign regs for unboxed tuple (usually just R1 = r)
+-- - return to continuation
+--
+-- The return address (i.e. stack frame) must be on the stack before
+-- doing the call in case the call ends up in the garbage collector.
+--
+-- Sadly, the information about the continuation is lost after we push it
+-- (in order to avoid pushing it again), so we end up doing a needless
+-- indirect jump (ToDo).
+
+ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
+ccallReturnUnboxedTuple amodes before_jump
+ = do { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
+
+ -- Push a return address if necessary
+ ; pushReturnAddress eob
+ ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
+ (do { adjustSpAndHp args_sp
+ ; before_jump
+ ; returnUnboxedTuple amodes })
+ }
+
+-- -----------------------------------------------------------------------------
+-- Calling an out-of-line primop
+
+tailCallPrimOp :: PrimOp -> [StgArg] -> Code
+tailCallPrimOp op args
+ = do { -- We're going to perform a normal-looking tail call,
+ -- except that *all* the arguments will be in registers.
+ -- Hence the ASSERT( null leftovers )
+ arg_amodes <- getArgAmodes args
+ ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
+ jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)
+
+ ; ASSERT(null leftovers) -- no stack-resident args
+ emitSimultaneously (assignToRegs arg_regs)
+
+ ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
+ ; doFinalJump args_sp False{-not a LNE-} jump_to_primop }
+
+-- -----------------------------------------------------------------------------
+-- Return Addresses
+
+-- We always push the return address just before performing a tail call
+-- or return. The reason we leave it until then is because the stack
+-- slot that the return address is to go into might contain something
+-- useful.
+--
+-- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
+-- case expression and the return address is still to be pushed.
+--
+-- There are cases where it doesn't look necessary to push the return
+-- address: for example, just before doing a return to a known
+-- continuation. However, the continuation will expect to find the
+-- return address on the stack in case it needs to do a heap check.
+
+pushReturnAddress :: EndOfBlockInfo -> Code
+
+pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False))
+ = do { sp_rel <- getSpRelOffset args_sp
+ ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
+
+-- For a polymorphic case, we have two return addresses to push: the case
+-- return, and stg_seq_frame_info which turns a possible vectored return
+-- into a direct one.
+pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True))
+ = do { sp_rel <- getSpRelOffset (args_sp-1)
+ ; stmtC (CmmStore sp_rel (mkLblExpr lbl))
+ ; sp_rel <- getSpRelOffset args_sp
+ ; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) }
+
+pushReturnAddress _ = nopC
+
+-- -----------------------------------------------------------------------------
+-- Misc.
+
+jumpToLbl :: CLabel -> Code
+-- Passes no argument to the destination procedure
+jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])
+
+assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
+assignToRegs reg_args
+ = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
+ | (expr, reg_id) <- reg_args ]
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[CgStackery-adjust]{Adjusting the stack pointers}
+%* *
+%************************************************************************
+
+This function adjusts the stack and heap pointers just before a tail
+call or return. The stack pointer is adjusted to its final position
+(i.e. to point to the last argument for a tail call, or the activation
+record for a return). The heap pointer may be moved backwards, in
+cases where we overallocated at the beginning of the basic block (see
+CgCase.lhs for discussion).
+
+These functions {\em do not} deal with high-water-mark adjustment.
+That's done by functions which allocate stack space.
+
+\begin{code}
+adjustSpAndHp :: VirtualSpOffset -- New offset for Arg stack ptr
+ -> Code
+adjustSpAndHp newRealSp
+ = do { -- Adjust stack, if necessary.
+ -- NB: the conditional on the monad-carried realSp
+ -- is out of line (via codeOnly), to avoid a black hole
+ ; new_sp <- getSpRelOffset newRealSp
+ ; checkedAbsC (CmmAssign spReg new_sp) -- Will generate no code in the case
+ ; setRealSp newRealSp -- where realSp==newRealSp
+
+ -- Adjust heap. The virtual heap pointer may be less than the real Hp
+ -- because the latter was advanced to deal with the worst-case branch
+ -- of the code, and we may be in a better-case branch. In that case,
+ -- move the real Hp *back* and retract some ticky allocation count.
+ ; hp_usg <- getHpUsage
+ ; let rHp = realHp hp_usg
+ vHp = virtHp hp_usg
+ ; new_hp <- getHpRelOffset vHp
+ ; checkedAbsC (CmmAssign hpReg new_hp) -- Generates nothing when vHp==rHp
+ ; tickyAllocHeap (vHp - rHp) -- ...ditto
+ ; setRealHp vHp
+ }
+\end{code}
diff --git a/compiler/codeGen/CgTicky.hs b/compiler/codeGen/CgTicky.hs
new file mode 100644
index 0000000000..3e72981c50
--- /dev/null
+++ b/compiler/codeGen/CgTicky.hs
@@ -0,0 +1,370 @@
+-----------------------------------------------------------------------------
+--
+-- Code generation for ticky-ticky profiling
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+module CgTicky (
+ emitTickyCounter,
+
+ tickyDynAlloc,
+ tickyAllocHeap,
+ tickyAllocPrim,
+ tickyAllocThunk,
+ tickyAllocPAP,
+
+ tickyPushUpdateFrame,
+ tickyUpdateFrameOmitted,
+
+ tickyEnterDynCon,
+ tickyEnterStaticCon,
+ tickyEnterViaNode,
+
+ tickyEnterFun,
+ tickyEnterThunk,
+
+ tickyUpdateBhCaf,
+ tickyBlackHole,
+ tickyUnboxedTupleReturn, tickyVectoredReturn,
+ tickyReturnOldCon, tickyReturnNewCon,
+
+ tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs,
+ tickyUnknownCall, tickySlowCallPat,
+
+ staticTickyHdr,
+ ) where
+
+#include "HsVersions.h"
+#include "../includes/DerivedConstants.h"
+ -- For REP_xxx constants, which are MachReps
+
+import ClosureInfo ( ClosureInfo, closureSize, slopSize, closureSMRep,
+ closureUpdReqd, closureName, isStaticClosure )
+import CgUtils
+import CgMonad
+import SMRep ( ClosureType(..), smRepClosureType, CgRep )
+
+import Cmm
+import MachOp
+import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr, cmmIndexExpr )
+import CLabel ( CLabel, mkRtsDataLabel, mkRednCountsLabel )
+
+import Name ( isInternalName )
+import Id ( Id, idType )
+import StaticFlags ( opt_DoTickyProfiling )
+import BasicTypes ( Arity )
+import FastString ( FastString, mkFastString, LitString )
+import Constants -- Lots of field offsets
+import Outputable
+
+-- Turgid imports for showTypeCategory
+import PrelNames
+import TcType ( Type, isDictTy, tcSplitTyConApp_maybe,
+ tcSplitFunTy_maybe )
+import TyCon ( isPrimTyCon, isTupleTyCon, isEnumerationTyCon,
+ maybeTyConSingleCon )
+import Maybe
+
+-----------------------------------------------------------------------------
+--
+-- Ticky-ticky profiling
+--
+-----------------------------------------------------------------------------
+
+staticTickyHdr :: [CmmLit]
+-- The ticky header words in a static closure
+-- Was SET_STATIC_TICKY_HDR
+staticTickyHdr
+ | not opt_DoTickyProfiling = []
+ | otherwise = [zeroCLit]
+
+emitTickyCounter :: ClosureInfo -> [Id] -> Int -> Code
+emitTickyCounter cl_info args on_stk
+ = ifTicky $
+ do { mod_name <- moduleName
+ ; fun_descr_lit <- mkStringCLit (fun_descr mod_name)
+ ; arg_descr_lit <- mkStringCLit arg_descr
+ ; emitDataLits ticky_ctr_label -- Must match layout of StgEntCounter
+ [ CmmInt 0 I16,
+ CmmInt (fromIntegral (length args)) I16, -- Arity
+ CmmInt (fromIntegral on_stk) I16, -- Words passed on stack
+ CmmInt 0 I16, -- 2-byte gap
+ fun_descr_lit,
+ arg_descr_lit,
+ zeroCLit, -- Entry count
+ zeroCLit, -- Allocs
+ zeroCLit -- Link
+ ] }
+ where
+ name = closureName cl_info
+ ticky_ctr_label = mkRednCountsLabel name
+ arg_descr = map (showTypeCategory . idType) args
+ fun_descr mod_name = ppr_for_ticky_name mod_name name
+
+-- When printing the name of a thing in a ticky file, we want to
+-- give the module name even for *local* things. We print
+-- just "x (M)" rather that "M.x" to distinguish them from the global kind.
+ppr_for_ticky_name mod_name name
+ | isInternalName name = showSDocDebug (ppr name <+> (parens (ppr mod_name)))
+ | otherwise = showSDocDebug (ppr name)
+
+-- -----------------------------------------------------------------------------
+-- Ticky stack frames
+
+tickyPushUpdateFrame = ifTicky $ bumpTickyCounter SLIT("UPDF_PUSHED_ctr")
+tickyUpdateFrameOmitted = ifTicky $ bumpTickyCounter SLIT("UPDF_OMITTED_ctr")
+
+-- -----------------------------------------------------------------------------
+-- Ticky entries
+
+tickyEnterDynCon = ifTicky $ bumpTickyCounter SLIT("ENT_DYN_CON_ctr")
+tickyEnterDynThunk = ifTicky $ bumpTickyCounter SLIT("ENT_DYN_THK_ctr")
+tickyEnterStaticCon = ifTicky $ bumpTickyCounter SLIT("ENT_STATIC_CON_ctr")
+tickyEnterStaticThunk = ifTicky $ bumpTickyCounter SLIT("ENT_STATIC_THK_ctr")
+tickyEnterViaNode = ifTicky $ bumpTickyCounter SLIT("ENT_VIA_NODE_ctr")
+
+tickyEnterThunk :: ClosureInfo -> Code
+tickyEnterThunk cl_info
+ | isStaticClosure cl_info = tickyEnterStaticThunk
+ | otherwise = tickyEnterDynThunk
+
+tickyBlackHole :: Bool{-updatable-} -> Code
+tickyBlackHole updatable
+ = ifTicky (bumpTickyCounter ctr)
+ where
+ ctr | updatable = SLIT("UPD_BH_SINGLE_ENTRY_ctr")
+ | otherwise = SLIT("UPD_BH_UPDATABLE_ctr")
+
+tickyUpdateBhCaf cl_info
+ = ifTicky (bumpTickyCounter ctr)
+ where
+ ctr | closureUpdReqd cl_info = SLIT("UPD_CAF_BH_SINGLE_ENTRY_ctr")
+ | otherwise = SLIT("UPD_CAF_BH_UPDATABLE_ctr")
+
+tickyEnterFun :: ClosureInfo -> Code
+tickyEnterFun cl_info
+ = ifTicky $
+ do { bumpTickyCounter ctr
+ ; fun_ctr_lbl <- getTickyCtrLabel
+ ; registerTickyCtr fun_ctr_lbl
+ ; bumpTickyCounter' fun_ctr_lbl }
+ where
+ ctr | isStaticClosure cl_info = SLIT("TICK_ENT_STATIC_FUN_DIRECT")
+ | otherwise = SLIT("TICK_ENT_DYN_FUN_DIRECT")
+
+registerTickyCtr :: CLabel -> Code
+-- Register a ticky counter
+-- if ( ! f_ct.registeredp ) {
+-- f_ct.link = ticky_entry_ctrs; /* hook this one onto the front of the list */
+-- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */
+-- f_ct.registeredp = 1 }
+registerTickyCtr ctr_lbl
+ = emitIf test (stmtsC register_stmts)
+ where
+ test = CmmMachOp (MO_Not I16)
+ [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl
+ oFFSET_StgEntCounter_registeredp)) I16]
+ register_stmts
+ = [ CmmStore (CmmLit (cmmLabelOffB ctr_lbl oFFSET_StgEntCounter_link))
+ (CmmLoad ticky_entry_ctrs wordRep)
+ , CmmStore ticky_entry_ctrs (mkLblExpr ctr_lbl)
+ , CmmStore (CmmLit (cmmLabelOffB ctr_lbl
+ oFFSET_StgEntCounter_registeredp))
+ (CmmLit (mkIntCLit 1)) ]
+ ticky_entry_ctrs = mkLblExpr (mkRtsDataLabel SLIT("ticky_entry_ctrs"))
+
+tickyReturnOldCon, tickyReturnNewCon :: Arity -> Code
+tickyReturnOldCon arity
+ = ifTicky $ do { bumpTickyCounter SLIT("RET_OLD_ctr")
+ ; bumpHistogram SLIT("RET_OLD_hst") arity }
+tickyReturnNewCon arity
+ | not opt_DoTickyProfiling = nopC
+ | otherwise
+ = ifTicky $ do { bumpTickyCounter SLIT("RET_NEW_ctr")
+ ; bumpHistogram SLIT("RET_NEW_hst") arity }
+
+tickyUnboxedTupleReturn :: Int -> Code
+tickyUnboxedTupleReturn arity
+ = ifTicky $ do { bumpTickyCounter SLIT("RET_UNBOXED_TUP_ctr")
+ ; bumpHistogram SLIT("RET_UNBOXED_TUP_hst") arity }
+
+tickyVectoredReturn :: Int -> Code
+tickyVectoredReturn family_size
+ = ifTicky $ do { bumpTickyCounter SLIT("VEC_RETURN_ctr")
+ ; bumpHistogram SLIT("RET_VEC_RETURN_hst") family_size }
+
+-- -----------------------------------------------------------------------------
+-- Ticky calls
+
+-- Ticks at a *call site*:
+tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_TOO_FEW_ARGS_ctr")
+tickyKnownCallExact = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_ctr")
+tickyKnownCallExtraArgs = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_EXTRA_ctr")
+tickyUnknownCall = ifTicky $ bumpTickyCounter SLIT("UNKNOWN_CALL_ctr")
+
+-- Tick for the call pattern at slow call site (i.e. in addition to
+-- tickyUnknownCall, tickyKnownCallExtraArgs, etc.)
+tickySlowCallPat :: [CgRep] -> Code
+tickySlowCallPat args = return ()
+{- LATER: (introduces recursive module dependency now).
+ case callPattern args of
+ (str, True) -> bumpTickyCounter' (mkRtsSlowTickyCtrLabel pat)
+ (str, False) -> bumpTickyCounter SLIT("TICK_SLOW_CALL_OTHER")
+
+callPattern :: [CgRep] -> (String,Bool)
+callPattern reps
+ | match == length reps = (chars, True)
+ | otherwise = (chars, False)
+ where (_,match) = findMatch reps
+ chars = map argChar reps
+
+argChar VoidArg = 'v'
+argChar PtrArg = 'p'
+argChar NonPtrArg = 'n'
+argChar LongArg = 'l'
+argChar FloatArg = 'f'
+argChar DoubleArg = 'd'
+-}
+
+-- -----------------------------------------------------------------------------
+-- Ticky allocation
+
+tickyDynAlloc :: ClosureInfo -> Code
+-- Called when doing a dynamic heap allocation
+tickyDynAlloc cl_info
+ = ifTicky $
+ case smRepClosureType (closureSMRep cl_info) of
+ Constr -> tick_alloc_con
+ ConstrNoCaf -> tick_alloc_con
+ Fun -> tick_alloc_fun
+ Thunk -> tick_alloc_thk
+ ThunkSelector -> tick_alloc_thk
+ where
+ -- will be needed when we fill in stubs
+ cl_size = closureSize cl_info
+ slop_size = slopSize cl_info
+
+ tick_alloc_thk
+ | closureUpdReqd cl_info = tick_alloc_up_thk
+ | otherwise = tick_alloc_se_thk
+
+ tick_alloc_con = panic "ToDo: tick_alloc"
+ tick_alloc_fun = panic "ToDo: tick_alloc"
+ tick_alloc_up_thk = panic "ToDo: tick_alloc"
+ tick_alloc_se_thk = panic "ToDo: tick_alloc"
+
+tickyAllocPrim :: CmmExpr -> CmmExpr -> CmmExpr -> Code
+tickyAllocPrim hdr goods slop = ifTicky $ panic "ToDo: tickyAllocPrim"
+
+tickyAllocThunk :: CmmExpr -> CmmExpr -> Code
+tickyAllocThunk goods slop = ifTicky $ panic "ToDo: tickyAllocThunk"
+
+tickyAllocPAP :: CmmExpr -> CmmExpr -> Code
+tickyAllocPAP goods slop = ifTicky $ panic "ToDo: tickyAllocPAP"
+
+tickyAllocHeap :: VirtualHpOffset -> Code
+-- Called when doing a heap check [TICK_ALLOC_HEAP]
+tickyAllocHeap hp
+ = ifTicky $
+ do { ticky_ctr <- getTickyCtrLabel
+ ; stmtsC $
+ if hp == 0 then [] -- Inside the stmtC to avoid control
+ else [ -- dependency on the argument
+ -- Bump the allcoation count in the StgEntCounter
+ addToMem REP_StgEntCounter_allocs
+ (CmmLit (cmmLabelOffB ticky_ctr
+ oFFSET_StgEntCounter_allocs)) hp,
+ -- Bump ALLOC_HEAP_ctr
+ addToMemLbl cLongRep (mkRtsDataLabel SLIT("ALLOC_HEAP_ctr")) 1,
+ -- Bump ALLOC_HEAP_tot
+ addToMemLbl cLongRep (mkRtsDataLabel SLIT("ALLOC_HEAP_tot")) hp] }
+
+-- -----------------------------------------------------------------------------
+-- Ticky utils
+
+ifTicky :: Code -> Code
+ifTicky code
+ | opt_DoTickyProfiling = code
+ | otherwise = nopC
+
+addToMemLbl :: MachRep -> CLabel -> Int -> CmmStmt
+addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n
+
+-- All the ticky-ticky counters are declared "unsigned long" in C
+bumpTickyCounter :: LitString -> Code
+bumpTickyCounter lbl = bumpTickyCounter' (mkRtsDataLabel lbl)
+
+bumpTickyCounter' :: CLabel -> Code
+bumpTickyCounter' lbl = stmtC (addToMemLbl cLongRep lbl 1)
+
+addToMemLong = addToMem cLongRep
+
+bumpHistogram :: LitString -> Int -> Code
+bumpHistogram lbl n
+ = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLongRep))
+
+bumpHistogramE :: LitString -> CmmExpr -> Code
+bumpHistogramE lbl n
+ = do t <- newTemp cLongRep
+ stmtC (CmmAssign t n)
+ emitIf (CmmMachOp (MO_U_Le cLongRep) [CmmReg t, eight]) $
+ stmtC (CmmAssign t eight)
+ stmtC (addToMemLong (cmmIndexExpr cLongRep
+ (CmmLit (CmmLabel (mkRtsDataLabel lbl)))
+ (CmmReg t))
+ 1)
+ where
+ eight = CmmLit (CmmInt 8 cLongRep)
+
+------------------------------------------------------------------
+-- Showing the "type category" for ticky-ticky profiling
+
+showTypeCategory :: Type -> Char
+ {- {C,I,F,D} char, int, float, double
+ T tuple
+ S other single-constructor type
+ {c,i,f,d} unboxed ditto
+ t *unpacked* tuple
+ s *unpacked" single-cons...
+
+ v void#
+ a primitive array
+
+ E enumeration type
+ + dictionary, unless it's a ...
+ L List
+ > function
+ M other (multi-constructor) data-con type
+ . other type
+ - reserved for others to mark as "uninteresting"
+ -}
+showTypeCategory ty
+ = if isDictTy ty
+ then '+'
+ else
+ case tcSplitTyConApp_maybe ty of
+ Nothing -> if isJust (tcSplitFunTy_maybe ty)
+ then '>'
+ else '.'
+
+ Just (tycon, _) ->
+ let utc = getUnique tycon in
+ if utc == charDataConKey then 'C'
+ else if utc == intDataConKey then 'I'
+ else if utc == floatDataConKey then 'F'
+ else if utc == doubleDataConKey then 'D'
+ else if utc == smallIntegerDataConKey ||
+ utc == largeIntegerDataConKey then 'J'
+ else if utc == charPrimTyConKey then 'c'
+ else if (utc == intPrimTyConKey || utc == wordPrimTyConKey
+ || utc == addrPrimTyConKey) then 'i'
+ else if utc == floatPrimTyConKey then 'f'
+ else if utc == doublePrimTyConKey then 'd'
+ else if isPrimTyCon tycon {- array, we hope -} then 'A' -- Bogus
+ else if isEnumerationTyCon tycon then 'E'
+ else if isTupleTyCon tycon then 'T'
+ else if isJust (maybeTyConSingleCon tycon) then 'S'
+ else if utc == listTyConKey then 'L'
+ else 'M' -- oh, well...
diff --git a/compiler/codeGen/CgUsages.hi-boot-5 b/compiler/codeGen/CgUsages.hi-boot-5
new file mode 100644
index 0000000000..abb98cec1a
--- /dev/null
+++ b/compiler/codeGen/CgUsages.hi-boot-5
@@ -0,0 +1,3 @@
+__interface CgUsages 1 0 where
+__export CgUsages getSpRelOffset;
+1 getSpRelOffset :: AbsCSyn.VirtualSpOffset -> CgMonad.FCode AbsCSyn.RegRelative ;
diff --git a/compiler/codeGen/CgUsages.hi-boot-6 b/compiler/codeGen/CgUsages.hi-boot-6
new file mode 100644
index 0000000000..9640603cfb
--- /dev/null
+++ b/compiler/codeGen/CgUsages.hi-boot-6
@@ -0,0 +1,3 @@
+module CgUsages where
+
+getSpRelOffset :: AbsCSyn.VirtualSpOffset -> CgMonad.FCode AbsCSyn.RegRelative
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
new file mode 100644
index 0000000000..2f69927db0
--- /dev/null
+++ b/compiler/codeGen/CgUtils.hs
@@ -0,0 +1,688 @@
+-----------------------------------------------------------------------------
+--
+-- Code generator utilities; mostly monadic
+--
+-- (c) The University of Glasgow 2004
+--
+-----------------------------------------------------------------------------
+
+module CgUtils (
+ addIdReps,
+ cgLit,
+ emitDataLits, emitRODataLits, emitIf, emitIfThenElse,
+ emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult,
+ assignTemp, newTemp,
+ emitSimultaneously,
+ emitSwitch, emitLitSwitch,
+ tagToClosure,
+
+ cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord,
+ cmmOffsetExprW, cmmOffsetExprB,
+ cmmRegOffW, cmmRegOffB,
+ cmmLabelOffW, cmmLabelOffB,
+ cmmOffsetW, cmmOffsetB,
+ cmmOffsetLitW, cmmOffsetLitB,
+ cmmLoadIndexW,
+
+ addToMem, addToMemE,
+ mkWordCLit,
+ mkStringCLit,
+ packHalfWordsCLit,
+ blankWord
+ ) where
+
+#include "HsVersions.h"
+
+import CgMonad
+import TyCon ( TyCon, tyConName )
+import Id ( Id )
+import Constants ( wORD_SIZE )
+import SMRep ( CgRep, StgWord, hALF_WORD_SIZE_IN_BITS, ByteOff,
+ WordOff, idCgRep )
+import PprCmm ( {- instances -} )
+import Cmm
+import CLabel
+import CmmUtils
+import MachOp ( MachRep(..), wordRep, MachOp(..), MachHint(..),
+ mo_wordOr, mo_wordAnd, mo_wordNe, mo_wordEq,
+ mo_wordULt, mo_wordUGt, mo_wordUGe, machRepByteWidth )
+import ForeignCall ( CCallConv(..) )
+import Literal ( Literal(..) )
+import CLabel ( CLabel, mkStringLitLabel )
+import Digraph ( SCC(..), stronglyConnComp )
+import ListSetOps ( assocDefault )
+import Util ( filterOut, sortLe )
+import DynFlags ( DynFlags(..), HscTarget(..) )
+import Packages ( HomeModules )
+import FastString ( LitString, FastString, bytesFS )
+import Outputable
+
+import Char ( ord )
+import DATA_BITS
+import DATA_WORD ( Word8 )
+import Maybe ( isNothing )
+
+-------------------------------------------------------------------------
+--
+-- Random small functions
+--
+-------------------------------------------------------------------------
+
+addIdReps :: [Id] -> [(CgRep, Id)]
+addIdReps ids = [(idCgRep id, id) | id <- ids]
+
+-------------------------------------------------------------------------
+--
+-- Literals
+--
+-------------------------------------------------------------------------
+
+cgLit :: Literal -> FCode CmmLit
+cgLit (MachStr s) = mkByteStringCLit (bytesFS s)
+ -- not unpackFS; we want the UTF-8 byte stream.
+cgLit other_lit = return (mkSimpleLit other_lit)
+
+mkSimpleLit :: Literal -> CmmLit
+mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordRep
+mkSimpleLit MachNullAddr = zeroCLit
+mkSimpleLit (MachInt i) = CmmInt i wordRep
+mkSimpleLit (MachInt64 i) = CmmInt i I64
+mkSimpleLit (MachWord i) = CmmInt i wordRep
+mkSimpleLit (MachWord64 i) = CmmInt i I64
+mkSimpleLit (MachFloat r) = CmmFloat r F32
+mkSimpleLit (MachDouble r) = CmmFloat r F64
+mkSimpleLit (MachLabel fs ms) = CmmLabel (mkForeignLabel fs ms is_dyn)
+ where
+ is_dyn = False -- ToDo: fix me
+
+mkLtOp :: Literal -> MachOp
+-- On signed literals we must do a signed comparison
+mkLtOp (MachInt _) = MO_S_Lt wordRep
+mkLtOp (MachFloat _) = MO_S_Lt F32
+mkLtOp (MachDouble _) = MO_S_Lt F64
+mkLtOp lit = MO_U_Lt (cmmLitRep (mkSimpleLit lit))
+
+
+---------------------------------------------------
+--
+-- Cmm data type functions
+--
+---------------------------------------------------
+
+-----------------------
+-- The "B" variants take byte offsets
+cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr
+cmmRegOffB = cmmRegOff
+
+cmmOffsetB :: CmmExpr -> ByteOff -> CmmExpr
+cmmOffsetB = cmmOffset
+
+cmmOffsetExprB :: CmmExpr -> CmmExpr -> CmmExpr
+cmmOffsetExprB = cmmOffsetExpr
+
+cmmLabelOffB :: CLabel -> ByteOff -> CmmLit
+cmmLabelOffB = cmmLabelOff
+
+cmmOffsetLitB :: CmmLit -> ByteOff -> CmmLit
+cmmOffsetLitB = cmmOffsetLit
+
+-----------------------
+-- The "W" variants take word offsets
+cmmOffsetExprW :: CmmExpr -> CmmExpr -> CmmExpr
+-- The second arg is a *word* offset; need to change it to bytes
+cmmOffsetExprW e (CmmLit (CmmInt n _)) = cmmOffsetW e (fromInteger n)
+cmmOffsetExprW e wd_off = cmmIndexExpr wordRep e wd_off
+
+cmmOffsetW :: CmmExpr -> WordOff -> CmmExpr
+cmmOffsetW e n = cmmOffsetB e (wORD_SIZE * n)
+
+cmmRegOffW :: CmmReg -> WordOff -> CmmExpr
+cmmRegOffW reg wd_off = cmmRegOffB reg (wd_off * wORD_SIZE)
+
+cmmOffsetLitW :: CmmLit -> WordOff -> CmmLit
+cmmOffsetLitW lit wd_off = cmmOffsetLitB lit (wORD_SIZE * wd_off)
+
+cmmLabelOffW :: CLabel -> WordOff -> CmmLit
+cmmLabelOffW lbl wd_off = cmmLabelOffB lbl (wORD_SIZE * wd_off)
+
+cmmLoadIndexW :: CmmExpr -> Int -> CmmExpr
+cmmLoadIndexW base off
+ = CmmLoad (cmmOffsetW base off) wordRep
+
+-----------------------
+cmmNeWord, cmmEqWord, cmmOrWord, cmmAndWord :: CmmExpr -> CmmExpr -> CmmExpr
+cmmOrWord e1 e2 = CmmMachOp mo_wordOr [e1, e2]
+cmmAndWord e1 e2 = CmmMachOp mo_wordAnd [e1, e2]
+cmmNeWord e1 e2 = CmmMachOp mo_wordNe [e1, e2]
+cmmEqWord e1 e2 = CmmMachOp mo_wordEq [e1, e2]
+cmmULtWord e1 e2 = CmmMachOp mo_wordULt [e1, e2]
+cmmUGeWord e1 e2 = CmmMachOp mo_wordUGe [e1, e2]
+cmmUGtWord e1 e2 = CmmMachOp mo_wordUGt [e1, e2]
+
+cmmNegate :: CmmExpr -> CmmExpr
+cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep)
+cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprRep e)) [e]
+
+blankWord :: CmmStatic
+blankWord = CmmUninitialised wORD_SIZE
+
+-----------------------
+-- Making literals
+
+mkWordCLit :: StgWord -> CmmLit
+mkWordCLit wd = CmmInt (fromIntegral wd) wordRep
+
+packHalfWordsCLit :: (Integral a, Integral b) => a -> b -> CmmLit
+-- Make a single word literal in which the lower_half_word is
+-- at the lower address, and the upper_half_word is at the
+-- higher address
+-- ToDo: consider using half-word lits instead
+-- but be careful: that's vulnerable when reversed
+packHalfWordsCLit lower_half_word upper_half_word
+#ifdef WORDS_BIGENDIAN
+ = mkWordCLit ((fromIntegral lower_half_word `shiftL` hALF_WORD_SIZE_IN_BITS)
+ .|. fromIntegral upper_half_word)
+#else
+ = mkWordCLit ((fromIntegral lower_half_word)
+ .|. (fromIntegral upper_half_word `shiftL` hALF_WORD_SIZE_IN_BITS))
+#endif
+
+--------------------------------------------------------------------------
+--
+-- Incrementing a memory location
+--
+--------------------------------------------------------------------------
+
+addToMem :: MachRep -- rep of the counter
+ -> CmmExpr -- Address
+ -> Int -- What to add (a word)
+ -> CmmStmt
+addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) rep))
+
+addToMemE :: MachRep -- rep of the counter
+ -> CmmExpr -- Address
+ -> CmmExpr -- What to add (a word-typed expression)
+ -> CmmStmt
+addToMemE rep ptr n
+ = CmmStore ptr (CmmMachOp (MO_Add rep) [CmmLoad ptr rep, n])
+
+-------------------------------------------------------------------------
+--
+-- Converting a closure tag to a closure for enumeration types
+-- (this is the implementation of tagToEnum#).
+--
+-------------------------------------------------------------------------
+
+tagToClosure :: HomeModules -> TyCon -> CmmExpr -> CmmExpr
+tagToClosure hmods tycon tag
+ = CmmLoad (cmmOffsetExprW closure_tbl tag) wordRep
+ where closure_tbl = CmmLit (CmmLabel lbl)
+ lbl = mkClosureTableLabel hmods (tyConName tycon)
+
+-------------------------------------------------------------------------
+--
+-- Conditionals and rts calls
+--
+-------------------------------------------------------------------------
+
+emitIf :: CmmExpr -- Boolean
+ -> Code -- Then part
+ -> Code
+-- Emit (if e then x)
+-- ToDo: reverse the condition to avoid the extra branch instruction if possible
+-- (some conditionals aren't reversible. eg. floating point comparisons cannot
+-- be inverted because there exist some values for which both comparisons
+-- return False, such as NaN.)
+emitIf cond then_part
+ = do { then_id <- newLabelC
+ ; join_id <- newLabelC
+ ; stmtC (CmmCondBranch cond then_id)
+ ; stmtC (CmmBranch join_id)
+ ; labelC then_id
+ ; then_part
+ ; labelC join_id
+ }
+
+emitIfThenElse :: CmmExpr -- Boolean
+ -> Code -- Then part
+ -> Code -- Else part
+ -> Code
+-- Emit (if e then x else y)
+emitIfThenElse cond then_part else_part
+ = do { then_id <- newLabelC
+ ; else_id <- newLabelC
+ ; join_id <- newLabelC
+ ; stmtC (CmmCondBranch cond then_id)
+ ; else_part
+ ; stmtC (CmmBranch join_id)
+ ; labelC then_id
+ ; then_part
+ ; labelC join_id
+ }
+
+emitRtsCall :: LitString -> [(CmmExpr,MachHint)] -> Code
+emitRtsCall fun args = emitRtsCall' [] fun args Nothing
+ -- The 'Nothing' says "save all global registers"
+
+emitRtsCallWithVols :: LitString -> [(CmmExpr,MachHint)] -> [GlobalReg] -> Code
+emitRtsCallWithVols fun args vols
+ = emitRtsCall' [] fun args (Just vols)
+
+emitRtsCallWithResult :: CmmReg -> MachHint -> LitString
+ -> [(CmmExpr,MachHint)] -> Code
+emitRtsCallWithResult res hint fun args
+ = emitRtsCall' [(res,hint)] fun args Nothing
+
+-- Make a call to an RTS C procedure
+emitRtsCall'
+ :: [(CmmReg,MachHint)]
+ -> LitString
+ -> [(CmmExpr,MachHint)]
+ -> Maybe [GlobalReg]
+ -> Code
+emitRtsCall' res fun args vols = stmtC (CmmCall target res args vols)
+ where
+ target = CmmForeignCall fun_expr CCallConv
+ fun_expr = mkLblExpr (mkRtsCodeLabel fun)
+
+
+-------------------------------------------------------------------------
+--
+-- Strings gnerate a top-level data block
+--
+-------------------------------------------------------------------------
+
+emitDataLits :: CLabel -> [CmmLit] -> Code
+-- Emit a data-segment data block
+emitDataLits lbl lits
+ = emitData Data (CmmDataLabel lbl : map CmmStaticLit lits)
+
+emitRODataLits :: CLabel -> [CmmLit] -> Code
+-- Emit a read-only data block
+emitRODataLits lbl lits
+ = emitData section (CmmDataLabel lbl : map CmmStaticLit lits)
+ where section | any needsRelocation lits = RelocatableReadOnlyData
+ | otherwise = ReadOnlyData
+ needsRelocation (CmmLabel _) = True
+ needsRelocation (CmmLabelOff _ _) = True
+ needsRelocation _ = False
+
+mkStringCLit :: String -> FCode CmmLit
+-- Make a global definition for the string,
+-- and return its label
+mkStringCLit str = mkByteStringCLit (map (fromIntegral.ord) str)
+
+mkByteStringCLit :: [Word8] -> FCode CmmLit
+mkByteStringCLit bytes
+ = do { uniq <- newUnique
+ ; let lbl = mkStringLitLabel uniq
+ ; emitData ReadOnlyData [CmmDataLabel lbl, CmmString bytes]
+ ; return (CmmLabel lbl) }
+
+-------------------------------------------------------------------------
+--
+-- Assigning expressions to temporaries
+--
+-------------------------------------------------------------------------
+
+assignTemp :: CmmExpr -> FCode CmmExpr
+-- For a non-trivial expression, e, create a local
+-- variable and assign the expression to it
+assignTemp e
+ | isTrivialCmmExpr e = return e
+ | otherwise = do { reg <- newTemp (cmmExprRep e)
+ ; stmtC (CmmAssign reg e)
+ ; return (CmmReg reg) }
+
+
+newTemp :: MachRep -> FCode CmmReg
+newTemp rep = do { uniq <- newUnique; return (CmmLocal (LocalReg uniq rep)) }
+
+
+-------------------------------------------------------------------------
+--
+-- Building case analysis
+--
+-------------------------------------------------------------------------
+
+emitSwitch
+ :: CmmExpr -- Tag to switch on
+ -> [(ConTagZ, CgStmts)] -- Tagged branches
+ -> Maybe CgStmts -- Default branch (if any)
+ -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour
+ -- outside this range is undefined
+ -> Code
+
+-- ONLY A DEFAULT BRANCH: no case analysis to do
+emitSwitch tag_expr [] (Just stmts) _ _
+ = emitCgStmts stmts
+
+-- Right, off we go
+emitSwitch tag_expr branches mb_deflt lo_tag hi_tag
+ = -- Just sort the branches before calling mk_sritch
+ do { mb_deflt_id <-
+ case mb_deflt of
+ Nothing -> return Nothing
+ Just stmts -> do id <- forkCgStmts stmts; return (Just id)
+
+ ; dflags <- getDynFlags
+ ; let via_C | HscC <- hscTarget dflags = True
+ | otherwise = False
+
+ ; stmts <- mk_switch tag_expr (sortLe le branches)
+ mb_deflt_id lo_tag hi_tag via_C
+ ; emitCgStmts stmts
+ }
+ where
+ (t1,_) `le` (t2,_) = t1 <= t2
+
+
+mk_switch :: CmmExpr -> [(ConTagZ, CgStmts)]
+ -> Maybe BlockId -> ConTagZ -> ConTagZ -> Bool
+ -> FCode CgStmts
+
+-- SINGLETON TAG RANGE: no case analysis to do
+mk_switch tag_expr [(tag,stmts)] _ lo_tag hi_tag via_C
+ | lo_tag == hi_tag
+ = ASSERT( tag == lo_tag )
+ return stmts
+
+-- SINGLETON BRANCH, NO DEFUALT: no case analysis to do
+mk_switch tag_expr [(tag,stmts)] Nothing lo_tag hi_tag via_C
+ = return stmts
+ -- The simplifier might have eliminated a case
+ -- so we may have e.g. case xs of
+ -- [] -> e
+ -- In that situation we can be sure the (:) case
+ -- can't happen, so no need to test
+
+-- SINGLETON BRANCH: one equality check to do
+mk_switch tag_expr [(tag,stmts)] (Just deflt) lo_tag hi_tag via_C
+ = return (CmmCondBranch cond deflt `consCgStmt` stmts)
+ where
+ cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag))
+ -- We have lo_tag < hi_tag, but there's only one branch,
+ -- so there must be a default
+
+-- ToDo: we might want to check for the two branch case, where one of
+-- the branches is the tag 0, because comparing '== 0' is likely to be
+-- more efficient than other kinds of comparison.
+
+-- DENSE TAG RANGE: use a switch statment.
+--
+-- We also use a switch uncoditionally when compiling via C, because
+-- this will get emitted as a C switch statement and the C compiler
+-- should do a good job of optimising it. Also, older GCC versions
+-- (2.95 in particular) have problems compiling the complicated
+-- if-trees generated by this code, so compiling to a switch every
+-- time works around that problem.
+--
+mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C
+ | use_switch -- Use a switch
+ = do { branch_ids <- mapM forkCgStmts (map snd branches)
+ ; let
+ tagged_blk_ids = zip (map fst branches) (map Just branch_ids)
+
+ find_branch :: ConTagZ -> Maybe BlockId
+ find_branch i = assocDefault mb_deflt tagged_blk_ids i
+
+ -- NB. we have eliminated impossible branches at
+ -- either end of the range (see below), so the first
+ -- tag of a real branch is real_lo_tag (not lo_tag).
+ arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]]
+
+ switch_stmt = CmmSwitch (cmmOffset tag_expr (- real_lo_tag)) arms
+
+ ; ASSERT(not (all isNothing arms))
+ return (oneCgStmt switch_stmt)
+ }
+
+ -- if we can knock off a bunch of default cases with one if, then do so
+ | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches
+ = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
+ ; let cond = cmmULtWord tag_expr' (CmmLit (mkIntCLit lowest_branch))
+ branch = CmmCondBranch cond deflt
+ ; stmts <- mk_switch tag_expr' branches mb_deflt
+ lowest_branch hi_tag via_C
+ ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
+ }
+
+ | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches
+ = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
+ ; let cond = cmmUGtWord tag_expr' (CmmLit (mkIntCLit highest_branch))
+ branch = CmmCondBranch cond deflt
+ ; stmts <- mk_switch tag_expr' branches mb_deflt
+ lo_tag highest_branch via_C
+ ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts))
+ }
+
+ | otherwise -- Use an if-tree
+ = do { (assign_tag, tag_expr') <- assignTemp' tag_expr
+ -- To avoid duplication
+ ; lo_stmts <- mk_switch tag_expr' lo_branches mb_deflt
+ lo_tag (mid_tag-1) via_C
+ ; hi_stmts <- mk_switch tag_expr' hi_branches mb_deflt
+ mid_tag hi_tag via_C
+ ; hi_id <- forkCgStmts hi_stmts
+ ; let cond = cmmUGeWord tag_expr' (CmmLit (mkIntCLit mid_tag))
+ branch_stmt = CmmCondBranch cond hi_id
+ ; return (assign_tag `consCgStmt` (branch_stmt `consCgStmt` lo_stmts))
+ }
+ -- we test (e >= mid_tag) rather than (e < mid_tag), because
+ -- the former works better when e is a comparison, and there
+ -- are two tags 0 & 1 (mid_tag == 1). In this case, the code
+ -- generator can reduce the condition to e itself without
+ -- having to reverse the sense of the comparison: comparisons
+ -- can't always be easily reversed (eg. floating
+ -- pt. comparisons).
+ where
+ use_switch = {- pprTrace "mk_switch" (
+ ppr tag_expr <+> text "n_tags:" <+> int n_tags <+>
+ text "n_branches:" <+> int n_branches <+>
+ text "lo_tag: " <+> int lo_tag <+>
+ text "hi_tag: " <+> int hi_tag <+>
+ text "real_lo_tag: " <+> int real_lo_tag <+>
+ text "real_hi_tag: " <+> int real_hi_tag) $ -}
+ ASSERT( n_branches > 1 && n_tags > 1 )
+ n_tags > 2 && (small || dense || via_C)
+ -- a 2-branch switch always turns into an if.
+ small = n_tags <= 4
+ dense = n_branches > (n_tags `div` 2)
+ exhaustive = n_tags == n_branches
+ n_branches = length branches
+
+ -- ignore default slots at each end of the range if there's
+ -- no default branch defined.
+ lowest_branch = fst (head branches)
+ highest_branch = fst (last branches)
+
+ real_lo_tag
+ | isNothing mb_deflt = lowest_branch
+ | otherwise = lo_tag
+
+ real_hi_tag
+ | isNothing mb_deflt = highest_branch
+ | otherwise = hi_tag
+
+ n_tags = real_hi_tag - real_lo_tag + 1
+
+ -- INVARIANT: Provided hi_tag > lo_tag (which is true)
+ -- lo_tag <= mid_tag < hi_tag
+ -- lo_branches have tags < mid_tag
+ -- hi_branches have tags >= mid_tag
+
+ (mid_tag,_) = branches !! (n_branches `div` 2)
+ -- 2 branches => n_branches `div` 2 = 1
+ -- => branches !! 1 give the *second* tag
+ -- There are always at least 2 branches here
+
+ (lo_branches, hi_branches) = span is_lo branches
+ is_lo (t,_) = t < mid_tag
+
+
+assignTemp' e
+ | isTrivialCmmExpr e = return (CmmNop, e)
+ | otherwise = do { reg <- newTemp (cmmExprRep e)
+ ; return (CmmAssign reg e, CmmReg reg) }
+
+
+emitLitSwitch :: CmmExpr -- Tag to switch on
+ -> [(Literal, CgStmts)] -- Tagged branches
+ -> CgStmts -- Default branch (always)
+ -> Code -- Emit the code
+-- Used for general literals, whose size might not be a word,
+-- where there is always a default case, and where we don't know
+-- the range of values for certain. For simplicity we always generate a tree.
+--
+-- ToDo: for integers we could do better here, perhaps by generalising
+-- mk_switch and using that. --SDM 15/09/2004
+emitLitSwitch scrut [] deflt
+ = emitCgStmts deflt
+emitLitSwitch scrut branches deflt_blk
+ = do { scrut' <- assignTemp scrut
+ ; deflt_blk_id <- forkCgStmts deflt_blk
+ ; blk <- mk_lit_switch scrut' deflt_blk_id (sortLe le branches)
+ ; emitCgStmts blk }
+ where
+ le (t1,_) (t2,_) = t1 <= t2
+
+mk_lit_switch :: CmmExpr -> BlockId
+ -> [(Literal,CgStmts)]
+ -> FCode CgStmts
+mk_lit_switch scrut deflt_blk_id [(lit,blk)]
+ = return (consCgStmt if_stmt blk)
+ where
+ cmm_lit = mkSimpleLit lit
+ rep = cmmLitRep cmm_lit
+ cond = CmmMachOp (MO_Ne rep) [scrut, CmmLit cmm_lit]
+ if_stmt = CmmCondBranch cond deflt_blk_id
+
+mk_lit_switch scrut deflt_blk_id branches
+ = do { hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches
+ ; lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches
+ ; lo_blk_id <- forkCgStmts lo_blk
+ ; let if_stmt = CmmCondBranch cond lo_blk_id
+ ; return (if_stmt `consCgStmt` hi_blk) }
+ where
+ n_branches = length branches
+ (mid_lit,_) = branches !! (n_branches `div` 2)
+ -- See notes above re mid_tag
+
+ (lo_branches, hi_branches) = span is_lo branches
+ is_lo (t,_) = t < mid_lit
+
+ cond = CmmMachOp (mkLtOp mid_lit)
+ [scrut, CmmLit (mkSimpleLit mid_lit)]
+
+-------------------------------------------------------------------------
+--
+-- Simultaneous assignment
+--
+-------------------------------------------------------------------------
+
+
+emitSimultaneously :: CmmStmts -> Code
+-- Emit code to perform the assignments in the
+-- input simultaneously, using temporary variables when necessary.
+--
+-- The Stmts must be:
+-- CmmNop, CmmComment, CmmAssign, CmmStore
+-- and nothing else
+
+
+-- We use the strongly-connected component algorithm, in which
+-- * the vertices are the statements
+-- * an edge goes from s1 to s2 iff
+-- s1 assigns to something s2 uses
+-- that is, if s1 should *follow* s2 in the final order
+
+type CVertex = (Int, CmmStmt) -- Give each vertex a unique number,
+ -- for fast comparison
+
+emitSimultaneously stmts
+ = codeOnly $
+ case filterOut isNopStmt (stmtList stmts) of
+ -- Remove no-ops
+ [] -> nopC
+ [stmt] -> stmtC stmt -- It's often just one stmt
+ stmt_list -> doSimultaneously1 (zip [(1::Int)..] stmt_list)
+
+doSimultaneously1 :: [CVertex] -> Code
+doSimultaneously1 vertices
+ = let
+ edges = [ (vertex, key1, edges_from stmt1)
+ | vertex@(key1, stmt1) <- vertices
+ ]
+ edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices,
+ stmt1 `mustFollow` stmt2
+ ]
+ components = stronglyConnComp edges
+
+ -- do_components deal with one strongly-connected component
+ -- Not cyclic, or singleton? Just do it
+ do_component (AcyclicSCC (n,stmt)) = stmtC stmt
+ do_component (CyclicSCC [(n,stmt)]) = stmtC stmt
+
+ -- Cyclic? Then go via temporaries. Pick one to
+ -- break the loop and try again with the rest.
+ do_component (CyclicSCC ((n,first_stmt) : rest))
+ = do { from_temp <- go_via_temp first_stmt
+ ; doSimultaneously1 rest
+ ; stmtC from_temp }
+
+ go_via_temp (CmmAssign dest src)
+ = do { tmp <- newTemp (cmmRegRep dest)
+ ; stmtC (CmmAssign tmp src)
+ ; return (CmmAssign dest (CmmReg tmp)) }
+ go_via_temp (CmmStore dest src)
+ = do { tmp <- newTemp (cmmExprRep src)
+ ; stmtC (CmmAssign tmp src)
+ ; return (CmmStore dest (CmmReg tmp)) }
+ in
+ mapCs do_component components
+
+mustFollow :: CmmStmt -> CmmStmt -> Bool
+CmmAssign reg _ `mustFollow` stmt = anySrc (reg `regUsedIn`) stmt
+CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprRep e)) stmt
+CmmNop `mustFollow` stmt = False
+CmmComment _ `mustFollow` stmt = False
+
+
+anySrc :: (CmmExpr -> Bool) -> CmmStmt -> Bool
+-- True if the fn is true of any input of the stmt
+anySrc p (CmmAssign _ e) = p e
+anySrc p (CmmStore e1 e2) = p e1 || p e2 -- Might be used in either side
+anySrc p (CmmComment _) = False
+anySrc p CmmNop = False
+anySrc p other = True -- Conservative
+
+regUsedIn :: CmmReg -> CmmExpr -> Bool
+reg `regUsedIn` CmmLit _ = False
+reg `regUsedIn` CmmLoad e _ = reg `regUsedIn` e
+reg `regUsedIn` CmmReg reg' = reg == reg'
+reg `regUsedIn` CmmRegOff reg' _ = reg == reg'
+reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es
+
+locUsedIn :: CmmExpr -> MachRep -> CmmExpr -> Bool
+-- (locUsedIn a r e) checks whether writing to r[a] could affect the value of
+-- 'e'. Returns True if it's not sure.
+locUsedIn loc rep (CmmLit _) = False
+locUsedIn loc rep (CmmLoad e ld_rep) = possiblySameLoc loc rep e ld_rep
+locUsedIn loc rep (CmmReg reg') = False
+locUsedIn loc rep (CmmRegOff reg' _) = False
+locUsedIn loc rep (CmmMachOp _ es) = any (locUsedIn loc rep) es
+
+possiblySameLoc :: CmmExpr -> MachRep -> CmmExpr -> MachRep -> Bool
+-- Assumes that distinct registers (eg Hp, Sp) do not
+-- point to the same location, nor any offset thereof.
+possiblySameLoc (CmmReg r1) rep1 (CmmReg r2) rep2 = r1==r2
+possiblySameLoc (CmmReg r1) rep1 (CmmRegOff r2 0) rep2 = r1==r2
+possiblySameLoc (CmmRegOff r1 0) rep1 (CmmReg r2) rep2 = r1==r2
+possiblySameLoc (CmmRegOff r1 start1) rep1 (CmmRegOff r2 start2) rep2
+ = r1==r2 && end1 > start2 && end2 > start1
+ where
+ end1 = start1 + machRepByteWidth rep1
+ end2 = start2 + machRepByteWidth rep2
+
+possiblySameLoc l1 rep1 (CmmLit _) rep2 = False
+possiblySameLoc l1 rep1 l2 rep2 = True -- Conservative
diff --git a/compiler/codeGen/ClosureInfo.hi-boot-5 b/compiler/codeGen/ClosureInfo.hi-boot-5
new file mode 100644
index 0000000000..2291f93cc6
--- /dev/null
+++ b/compiler/codeGen/ClosureInfo.hi-boot-5
@@ -0,0 +1,4 @@
+__interface ClosureInfo 1 0 where
+__export ClosureInfo ClosureInfo LambdaFormInfo;
+1 data LambdaFormInfo;
+1 data ClosureInfo;
diff --git a/compiler/codeGen/ClosureInfo.hi-boot-6 b/compiler/codeGen/ClosureInfo.hi-boot-6
new file mode 100644
index 0000000000..d313ccde80
--- /dev/null
+++ b/compiler/codeGen/ClosureInfo.hi-boot-6
@@ -0,0 +1,4 @@
+module ClosureInfo where
+
+data LambdaFormInfo
+data ClosureInfo
diff --git a/compiler/codeGen/ClosureInfo.lhs b/compiler/codeGen/ClosureInfo.lhs
new file mode 100644
index 0000000000..84d9dd95ef
--- /dev/null
+++ b/compiler/codeGen/ClosureInfo.lhs
@@ -0,0 +1,951 @@
+%
+% (c) The Univserity of Glasgow 1992-2004
+%
+
+ Data structures which describe closures, and
+ operations over those data structures
+
+ Nothing monadic in here
+
+Much of the rationale for these things is in the ``details'' part of
+the STG paper.
+
+\begin{code}
+module ClosureInfo (
+ ClosureInfo, LambdaFormInfo, SMRep, -- all abstract
+ StandardFormInfo,
+
+ ArgDescr(..), Liveness(..),
+ C_SRT(..), needsSRT,
+
+ mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo,
+ mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape,
+
+ mkClosureInfo, mkConInfo,
+
+ closureSize, closureNonHdrSize,
+ closureGoodStuffSize, closurePtrsSize,
+ slopSize,
+
+ closureName, infoTableLabelFromCI,
+ closureLabelFromCI, closureSRT,
+ closureLFInfo, isLFThunk,closureSMRep, closureUpdReqd,
+ closureNeedsUpdSpace, closureIsThunk,
+ closureSingleEntry, closureReEntrant, isConstrClosure_maybe,
+ closureFunInfo, isStandardFormThunk, isKnownFun,
+
+ enterIdLabel, enterLocalIdLabel, enterReturnPtLabel,
+
+ nodeMustPointToIt,
+ CallMethod(..), getCallMethod,
+
+ blackHoleOnEntry,
+
+ staticClosureRequired,
+ getClosureType,
+
+ isToplevClosure,
+ closureValDescr, closureTypeDescr, -- profiling
+
+ isStaticClosure,
+ cafBlackHoleClosureInfo, seCafBlackHoleClosureInfo,
+
+ staticClosureNeedsLink,
+ ) where
+
+#include "../includes/MachDeps.h"
+#include "HsVersions.h"
+
+import StgSyn
+import SMRep -- all of it
+
+import CLabel
+
+import Constants ( mIN_PAYLOAD_SIZE )
+import Packages ( isDllName, HomeModules )
+import StaticFlags ( opt_SccProfilingOn, opt_OmitBlackHoling,
+ opt_Parallel, opt_DoTickyProfiling )
+import Id ( Id, idType, idArity, idName )
+import DataCon ( DataCon, dataConTyCon, isNullaryRepDataCon, dataConName )
+import Name ( Name, nameUnique, getOccName, getOccString )
+import OccName ( occNameString )
+import Type ( isUnLiftedType, Type, repType, splitTyConApp_maybe )
+import TcType ( tcSplitSigmaTy )
+import TyCon ( isFunTyCon, isAbstractTyCon )
+import BasicTypes ( TopLevelFlag(..), isNotTopLevel, isTopLevel, ipNameName )
+import FastString
+import Outputable
+import Constants
+
+import TypeRep -- TEMP
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection[ClosureInfo-datatypes]{Data types for closure information}
+%* *
+%************************************************************************
+
+Information about a closure, from the code generator's point of view.
+
+A ClosureInfo decribes the info pointer of a closure. It has
+enough information
+ a) to construct the info table itself
+ b) to allocate a closure containing that info pointer (i.e.
+ it knows the info table label)
+
+We make a ClosureInfo for
+ - each let binding (both top level and not)
+ - each data constructor (for its shared static and
+ dynamic info tables)
+
+\begin{code}
+data ClosureInfo
+ = ClosureInfo {
+ closureName :: !Name, -- The thing bound to this closure
+ closureLFInfo :: !LambdaFormInfo, -- NOTE: not an LFCon (see below)
+ closureSMRep :: !SMRep, -- representation used by storage mgr
+ closureSRT :: !C_SRT, -- What SRT applies to this closure
+ closureType :: !Type, -- Type of closure (ToDo: remove)
+ closureDescr :: !String -- closure description (for profiling)
+ }
+
+ -- Constructor closures don't have a unique info table label (they use
+ -- the constructor's info table), and they don't have an SRT.
+ | ConInfo {
+ closureCon :: !DataCon,
+ closureSMRep :: !SMRep,
+ closureDllCon :: !Bool -- is in a separate DLL
+ }
+
+-- C_SRT is what StgSyn.SRT gets translated to...
+-- we add a label for the table, and expect only the 'offset/length' form
+
+data C_SRT = NoC_SRT
+ | C_SRT !CLabel !WordOff !StgHalfWord {-bitmap or escape-}
+
+needsSRT :: C_SRT -> Bool
+needsSRT NoC_SRT = False
+needsSRT (C_SRT _ _ _) = True
+\end{code}
+
+%************************************************************************
+%* *
+\subsubsection[LambdaFormInfo-datatype]{@LambdaFormInfo@: source-derivable info}
+%* *
+%************************************************************************
+
+Information about an identifier, from the code generator's point of
+view. Every identifier is bound to a LambdaFormInfo in the
+environment, which gives the code generator enough info to be able to
+tail call or return that identifier.
+
+Note that a closure is usually bound to an identifier, so a
+ClosureInfo contains a LambdaFormInfo.
+
+\begin{code}
+data LambdaFormInfo
+ = LFReEntrant -- Reentrant closure (a function)
+ TopLevelFlag -- True if top level
+ !Int -- Arity. Invariant: always > 0
+ !Bool -- True <=> no fvs
+ ArgDescr -- Argument descriptor (should reall be in ClosureInfo)
+
+ | LFCon -- A saturated constructor application
+ DataCon -- The constructor
+
+ | LFThunk -- Thunk (zero arity)
+ TopLevelFlag
+ !Bool -- True <=> no free vars
+ !Bool -- True <=> updatable (i.e., *not* single-entry)
+ StandardFormInfo
+ !Bool -- True <=> *might* be a function type
+
+ | LFUnknown -- Used for function arguments and imported things.
+ -- We know nothing about this closure. Treat like
+ -- updatable "LFThunk"...
+ -- Imported things which we do know something about use
+ -- one of the other LF constructors (eg LFReEntrant for
+ -- known functions)
+ !Bool -- True <=> *might* be a function type
+
+ | LFLetNoEscape -- See LetNoEscape module for precise description of
+ -- these "lets".
+ !Int -- arity;
+
+ | LFBlackHole -- Used for the closures allocated to hold the result
+ -- of a CAF. We want the target of the update frame to
+ -- be in the heap, so we make a black hole to hold it.
+ CLabel -- Flavour (info label, eg CAF_BLACKHOLE_info).
+
+
+-------------------------
+-- An ArgDsecr describes the argument pattern of a function
+
+data ArgDescr
+ = ArgSpec -- Fits one of the standard patterns
+ !Int -- RTS type identifier ARG_P, ARG_N, ...
+
+ | ArgGen -- General case
+ Liveness -- Details about the arguments
+
+
+-------------------------
+-- We represent liveness bitmaps as a Bitmap (whose internal
+-- representation really is a bitmap). These are pinned onto case return
+-- vectors to indicate the state of the stack for the garbage collector.
+--
+-- In the compiled program, liveness bitmaps that fit inside a single
+-- word (StgWord) are stored as a single word, while larger bitmaps are
+-- stored as a pointer to an array of words.
+
+data Liveness
+ = SmallLiveness -- Liveness info that fits in one word
+ StgWord -- Here's the bitmap
+
+ | BigLiveness -- Liveness info witha a multi-word bitmap
+ CLabel -- Label for the bitmap
+
+
+-------------------------
+-- StandardFormInfo tells whether this thunk has one of
+-- a small number of standard forms
+
+data StandardFormInfo
+ = NonStandardThunk
+ -- Not of of the standard forms
+
+ | SelectorThunk
+ -- A SelectorThunk is of form
+ -- case x of
+ -- con a1,..,an -> ak
+ -- and the constructor is from a single-constr type.
+ WordOff -- 0-origin offset of ak within the "goods" of
+ -- constructor (Recall that the a1,...,an may be laid
+ -- out in the heap in a non-obvious order.)
+
+ | ApThunk
+ -- An ApThunk is of form
+ -- x1 ... xn
+ -- The code for the thunk just pushes x2..xn on the stack and enters x1.
+ -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled
+ -- in the RTS to save space.
+ Int -- Arity, n
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[ClosureInfo-construction]{Functions which build LFInfos}
+%* *
+%************************************************************************
+
+\begin{code}
+mkLFReEntrant :: TopLevelFlag -- True of top level
+ -> [Id] -- Free vars
+ -> [Id] -- Args
+ -> ArgDescr -- Argument descriptor
+ -> LambdaFormInfo
+
+mkLFReEntrant top fvs args arg_descr
+ = LFReEntrant top (length args) (null fvs) arg_descr
+
+mkLFThunk thunk_ty top fvs upd_flag
+ = ASSERT( not (isUpdatable upd_flag) || not (isUnLiftedType thunk_ty) )
+ LFThunk top (null fvs)
+ (isUpdatable upd_flag)
+ NonStandardThunk
+ (might_be_a_function thunk_ty)
+
+might_be_a_function :: Type -> Bool
+might_be_a_function ty
+ | Just (tc,_) <- splitTyConApp_maybe (repType ty),
+ not (isFunTyCon tc) && not (isAbstractTyCon tc) = False
+ -- don't forget to check for abstract types, which might
+ -- be functions too.
+ | otherwise = True
+\end{code}
+
+@mkConLFInfo@ is similar, for constructors.
+
+\begin{code}
+mkConLFInfo :: DataCon -> LambdaFormInfo
+mkConLFInfo con = LFCon con
+
+mkSelectorLFInfo id offset updatable
+ = LFThunk NotTopLevel False updatable (SelectorThunk offset)
+ (might_be_a_function (idType id))
+
+mkApLFInfo id upd_flag arity
+ = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity)
+ (might_be_a_function (idType id))
+\end{code}
+
+Miscellaneous LF-infos.
+
+\begin{code}
+mkLFArgument id = LFUnknown (might_be_a_function (idType id))
+
+mkLFLetNoEscape = LFLetNoEscape
+
+mkLFImported :: Id -> LambdaFormInfo
+mkLFImported id
+ = case idArity id of
+ n | n > 0 -> LFReEntrant TopLevel n True (panic "arg_descr") -- n > 0
+ other -> mkLFArgument id -- Not sure of exact arity
+\end{code}
+
+\begin{code}
+isLFThunk :: LambdaFormInfo -> Bool
+isLFThunk (LFThunk _ _ _ _ _) = True
+isLFThunk (LFBlackHole _) = True
+ -- return True for a blackhole: this function is used to determine
+ -- whether to use the thunk header in SMP mode, and a blackhole
+ -- must have one.
+isLFThunk _ = False
+\end{code}
+
+%************************************************************************
+%* *
+ Building ClosureInfos
+%* *
+%************************************************************************
+
+\begin{code}
+mkClosureInfo :: Bool -- Is static
+ -> Id
+ -> LambdaFormInfo
+ -> Int -> Int -- Total and pointer words
+ -> C_SRT
+ -> String -- String descriptor
+ -> ClosureInfo
+mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info descr
+ = ClosureInfo { closureName = name,
+ closureLFInfo = lf_info,
+ closureSMRep = sm_rep,
+ closureSRT = srt_info,
+ closureType = idType id,
+ closureDescr = descr }
+ where
+ name = idName id
+ sm_rep = chooseSMRep is_static lf_info tot_wds ptr_wds
+
+mkConInfo :: HomeModules
+ -> Bool -- Is static
+ -> DataCon
+ -> Int -> Int -- Total and pointer words
+ -> ClosureInfo
+mkConInfo hmods is_static data_con tot_wds ptr_wds
+ = ConInfo { closureSMRep = sm_rep,
+ closureCon = data_con,
+ closureDllCon = isDllName hmods (dataConName data_con) }
+ where
+ sm_rep = chooseSMRep is_static (mkConLFInfo data_con) tot_wds ptr_wds
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[ClosureInfo-sizes]{Functions about closure {\em sizes}}
+%* *
+%************************************************************************
+
+\begin{code}
+closureSize :: ClosureInfo -> WordOff
+closureSize cl_info = hdr_size + closureNonHdrSize cl_info
+ where hdr_size | closureIsThunk cl_info = thunkHdrSize
+ | otherwise = fixedHdrSize
+ -- All thunks use thunkHdrSize, even if they are non-updatable.
+ -- this is because we don't have separate closure types for
+ -- updatable vs. non-updatable thunks, so the GC can't tell the
+ -- difference. If we ever have significant numbers of non-
+ -- updatable thunks, it might be worth fixing this.
+
+closureNonHdrSize :: ClosureInfo -> WordOff
+closureNonHdrSize cl_info
+ = tot_wds + computeSlopSize tot_wds cl_info
+ where
+ tot_wds = closureGoodStuffSize cl_info
+
+closureGoodStuffSize :: ClosureInfo -> WordOff
+closureGoodStuffSize cl_info
+ = let (ptrs, nonptrs) = sizes_from_SMRep (closureSMRep cl_info)
+ in ptrs + nonptrs
+
+closurePtrsSize :: ClosureInfo -> WordOff
+closurePtrsSize cl_info
+ = let (ptrs, _) = sizes_from_SMRep (closureSMRep cl_info)
+ in ptrs
+
+-- not exported:
+sizes_from_SMRep :: SMRep -> (WordOff,WordOff)
+sizes_from_SMRep (GenericRep _ ptrs nonptrs _) = (ptrs, nonptrs)
+sizes_from_SMRep BlackHoleRep = (0, 0)
+\end{code}
+
+Computing slop size. WARNING: this looks dodgy --- it has deep
+knowledge of what the storage manager does with the various
+representations...
+
+Slop Requirements: every thunk gets an extra padding word in the
+header, which takes the the updated value.
+
+\begin{code}
+slopSize cl_info = computeSlopSize payload_size cl_info
+ where payload_size = closureGoodStuffSize cl_info
+
+computeSlopSize :: WordOff -> ClosureInfo -> WordOff
+computeSlopSize payload_size cl_info
+ = max 0 (minPayloadSize smrep updatable - payload_size)
+ where
+ smrep = closureSMRep cl_info
+ updatable = closureNeedsUpdSpace cl_info
+
+-- we leave space for an update if either (a) the closure is updatable
+-- or (b) it is a static thunk. This is because a static thunk needs
+-- a static link field in a predictable place (after the slop), regardless
+-- of whether it is updatable or not.
+closureNeedsUpdSpace (ClosureInfo { closureLFInfo =
+ LFThunk TopLevel _ _ _ _ }) = True
+closureNeedsUpdSpace cl_info = closureUpdReqd cl_info
+
+minPayloadSize :: SMRep -> Bool -> WordOff
+minPayloadSize smrep updatable
+ = case smrep of
+ BlackHoleRep -> min_upd_size
+ GenericRep _ _ _ _ | updatable -> min_upd_size
+ GenericRep True _ _ _ -> 0 -- static
+ GenericRep False _ _ _ -> mIN_PAYLOAD_SIZE
+ -- ^^^^^___ dynamic
+ where
+ min_upd_size =
+ ASSERT(mIN_PAYLOAD_SIZE <= sIZEOF_StgSMPThunkHeader)
+ 0 -- check that we already have enough
+ -- room for mIN_SIZE_NonUpdHeapObject,
+ -- due to the extra header word in SMP
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[SMreps]{Choosing SM reps}
+%* *
+%************************************************************************
+
+\begin{code}
+chooseSMRep
+ :: Bool -- True <=> static closure
+ -> LambdaFormInfo
+ -> WordOff -> WordOff -- Tot wds, ptr wds
+ -> SMRep
+
+chooseSMRep is_static lf_info tot_wds ptr_wds
+ = let
+ nonptr_wds = tot_wds - ptr_wds
+ closure_type = getClosureType is_static ptr_wds lf_info
+ in
+ GenericRep is_static ptr_wds nonptr_wds closure_type
+
+-- We *do* get non-updatable top-level thunks sometimes. eg. f = g
+-- gets compiled to a jump to g (if g has non-zero arity), instead of
+-- messing around with update frames and PAPs. We set the closure type
+-- to FUN_STATIC in this case.
+
+getClosureType :: Bool -> WordOff -> LambdaFormInfo -> ClosureType
+getClosureType is_static ptr_wds lf_info
+ = case lf_info of
+ LFCon con | is_static && ptr_wds == 0 -> ConstrNoCaf
+ | otherwise -> Constr
+ LFReEntrant _ _ _ _ -> Fun
+ LFThunk _ _ _ (SelectorThunk _) _ -> ThunkSelector
+ LFThunk _ _ _ _ _ -> Thunk
+ _ -> panic "getClosureType"
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[ClosureInfo-4-questions]{Four major questions about @ClosureInfo@}
+%* *
+%************************************************************************
+
+Be sure to see the stg-details notes about these...
+
+\begin{code}
+nodeMustPointToIt :: LambdaFormInfo -> Bool
+nodeMustPointToIt (LFReEntrant top _ no_fvs _)
+ = not no_fvs || -- Certainly if it has fvs we need to point to it
+ isNotTopLevel top
+ -- If it is not top level we will point to it
+ -- We can have a \r closure with no_fvs which
+ -- is not top level as special case cgRhsClosure
+ -- has been dissabled in favour of let floating
+
+ -- For lex_profiling we also access the cost centre for a
+ -- non-inherited function i.e. not top level
+ -- the not top case above ensures this is ok.
+
+nodeMustPointToIt (LFCon _) = True
+
+ -- Strictly speaking, the above two don't need Node to point
+ -- to it if the arity = 0. But this is a *really* unlikely
+ -- situation. If we know it's nil (say) and we are entering
+ -- it. Eg: let x = [] in x then we will certainly have inlined
+ -- x, since nil is a simple atom. So we gain little by not
+ -- having Node point to known zero-arity things. On the other
+ -- hand, we do lose something; Patrick's code for figuring out
+ -- when something has been updated but not entered relies on
+ -- having Node point to the result of an update. SLPJ
+ -- 27/11/92.
+
+nodeMustPointToIt (LFThunk _ no_fvs updatable NonStandardThunk _)
+ = updatable || not no_fvs || opt_SccProfilingOn
+ -- For the non-updatable (single-entry case):
+ --
+ -- True if has fvs (in which case we need access to them, and we
+ -- should black-hole it)
+ -- or profiling (in which case we need to recover the cost centre
+ -- from inside it)
+
+nodeMustPointToIt (LFThunk _ no_fvs updatable some_standard_form_thunk _)
+ = True -- Node must point to any standard-form thunk
+
+nodeMustPointToIt (LFUnknown _) = True
+nodeMustPointToIt (LFBlackHole _) = True -- BH entry may require Node to point
+nodeMustPointToIt (LFLetNoEscape _) = False
+\end{code}
+
+The entry conventions depend on the type of closure being entered,
+whether or not it has free variables, and whether we're running
+sequentially or in parallel.
+
+\begin{tabular}{lllll}
+Closure Characteristics & Parallel & Node Req'd & Argument Passing & Enter Via \\
+Unknown & no & yes & stack & node \\
+Known fun ($\ge$ 1 arg), no fvs & no & no & registers & fast entry (enough args) \\
+\ & \ & \ & \ & slow entry (otherwise) \\
+Known fun ($\ge$ 1 arg), fvs & no & yes & registers & fast entry (enough args) \\
+0 arg, no fvs @\r,\s@ & no & no & n/a & direct entry \\
+0 arg, no fvs @\u@ & no & yes & n/a & node \\
+0 arg, fvs @\r,\s@ & no & yes & n/a & direct entry \\
+0 arg, fvs @\u@ & no & yes & n/a & node \\
+
+Unknown & yes & yes & stack & node \\
+Known fun ($\ge$ 1 arg), no fvs & yes & no & registers & fast entry (enough args) \\
+\ & \ & \ & \ & slow entry (otherwise) \\
+Known fun ($\ge$ 1 arg), fvs & yes & yes & registers & node \\
+0 arg, no fvs @\r,\s@ & yes & no & n/a & direct entry \\
+0 arg, no fvs @\u@ & yes & yes & n/a & node \\
+0 arg, fvs @\r,\s@ & yes & yes & n/a & node \\
+0 arg, fvs @\u@ & yes & yes & n/a & node\\
+\end{tabular}
+
+When black-holing, single-entry closures could also be entered via node
+(rather than directly) to catch double-entry.
+
+\begin{code}
+data CallMethod
+ = EnterIt -- no args, not a function
+
+ | JumpToIt CLabel -- no args, not a function, but we
+ -- know what its entry code is
+
+ | ReturnIt -- it's a function, but we have
+ -- zero args to apply to it, so just
+ -- return it.
+
+ | ReturnCon DataCon -- It's a data constructor, just return it
+
+ | SlowCall -- Unknown fun, or known fun with
+ -- too few args.
+
+ | DirectEntry -- Jump directly, with args in regs
+ CLabel -- The code label
+ Int -- Its arity
+
+getCallMethod :: HomeModules
+ -> Name -- Function being applied
+ -> LambdaFormInfo -- Its info
+ -> Int -- Number of available arguments
+ -> CallMethod
+
+getCallMethod hmods name lf_info n_args
+ | nodeMustPointToIt lf_info && opt_Parallel
+ = -- If we're parallel, then we must always enter via node.
+ -- The reason is that the closure may have been
+ -- fetched since we allocated it.
+ EnterIt
+
+getCallMethod hmods name (LFReEntrant _ arity _ _) n_args
+ | n_args == 0 = ASSERT( arity /= 0 )
+ ReturnIt -- No args at all
+ | n_args < arity = SlowCall -- Not enough args
+ | otherwise = DirectEntry (enterIdLabel hmods name) arity
+
+getCallMethod hmods name (LFCon con) n_args
+ = ASSERT( n_args == 0 )
+ ReturnCon con
+
+getCallMethod hmods name (LFThunk _ _ updatable std_form_info is_fun) n_args
+ | is_fun -- Must always "call" a function-typed
+ = SlowCall -- thing, cannot just enter it [in eval/apply, the entry code
+ -- is the fast-entry code]
+
+ | updatable || opt_DoTickyProfiling -- to catch double entry
+ {- OLD: || opt_SMP
+ I decided to remove this, because in SMP mode it doesn't matter
+ if we enter the same thunk multiple times, so the optimisation
+ of jumping directly to the entry code is still valid. --SDM
+ -}
+ = ASSERT( n_args == 0 ) EnterIt
+
+ | otherwise -- Jump direct to code for single-entry thunks
+ = ASSERT( n_args == 0 )
+ JumpToIt (thunkEntryLabel hmods name std_form_info updatable)
+
+getCallMethod hmods name (LFUnknown True) n_args
+ = SlowCall -- might be a function
+
+getCallMethod hmods name (LFUnknown False) n_args
+ = ASSERT2 ( n_args == 0, ppr name <+> ppr n_args )
+ EnterIt -- Not a function
+
+getCallMethod hmods name (LFBlackHole _) n_args
+ = SlowCall -- Presumably the black hole has by now
+ -- been updated, but we don't know with
+ -- what, so we slow call it
+
+getCallMethod hmods name (LFLetNoEscape 0) n_args
+ = JumpToIt (enterReturnPtLabel (nameUnique name))
+
+getCallMethod hmods name (LFLetNoEscape arity) n_args
+ | n_args == arity = DirectEntry (enterReturnPtLabel (nameUnique name)) arity
+ | otherwise = pprPanic "let-no-escape: " (ppr name <+> ppr arity)
+
+blackHoleOnEntry :: ClosureInfo -> Bool
+-- Static closures are never themselves black-holed.
+-- Updatable ones will be overwritten with a CAFList cell, which points to a
+-- black hole;
+-- Single-entry ones have no fvs to plug, and we trust they don't form part
+-- of a loop.
+
+blackHoleOnEntry ConInfo{} = False
+blackHoleOnEntry (ClosureInfo { closureLFInfo = lf_info, closureSMRep = rep })
+ | isStaticRep rep
+ = False -- Never black-hole a static closure
+
+ | otherwise
+ = case lf_info of
+ LFReEntrant _ _ _ _ -> False
+ LFLetNoEscape _ -> False
+ LFThunk _ no_fvs updatable _ _
+ -> if updatable
+ then not opt_OmitBlackHoling
+ else opt_DoTickyProfiling || not no_fvs
+ -- the former to catch double entry,
+ -- and the latter to plug space-leaks. KSW/SDM 1999-04.
+
+ other -> panic "blackHoleOnEntry" -- Should never happen
+
+isStandardFormThunk :: LambdaFormInfo -> Bool
+isStandardFormThunk (LFThunk _ _ _ (SelectorThunk _) _) = True
+isStandardFormThunk (LFThunk _ _ _ (ApThunk _) _) = True
+isStandardFormThunk other_lf_info = False
+
+isKnownFun :: LambdaFormInfo -> Bool
+isKnownFun (LFReEntrant _ _ _ _) = True
+isKnownFun (LFLetNoEscape _) = True
+isKnownFun _ = False
+\end{code}
+
+-----------------------------------------------------------------------------
+SRT-related stuff
+
+\begin{code}
+staticClosureNeedsLink :: ClosureInfo -> Bool
+-- A static closure needs a link field to aid the GC when traversing
+-- the static closure graph. But it only needs such a field if either
+-- a) it has an SRT
+-- b) it's a constructor with one or more pointer fields
+-- In case (b), the constructor's fields themselves play the role
+-- of the SRT.
+staticClosureNeedsLink (ClosureInfo { closureSRT = srt })
+ = needsSRT srt
+staticClosureNeedsLink (ConInfo { closureSMRep = sm_rep, closureCon = con })
+ = not (isNullaryRepDataCon con) && not_nocaf_constr
+ where
+ not_nocaf_constr =
+ case sm_rep of
+ GenericRep _ _ _ ConstrNoCaf -> False
+ _other -> True
+\end{code}
+
+Avoiding generating entries and info tables
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+At present, for every function we generate all of the following,
+just in case. But they aren't always all needed, as noted below:
+
+[NB1: all of this applies only to *functions*. Thunks always
+have closure, info table, and entry code.]
+
+[NB2: All are needed if the function is *exported*, just to play safe.]
+
+
+* Fast-entry code ALWAYS NEEDED
+
+* Slow-entry code
+ Needed iff (a) we have any un-saturated calls to the function
+ OR (b) the function is passed as an arg
+ OR (c) we're in the parallel world and the function has free vars
+ [Reason: in parallel world, we always enter functions
+ with free vars via the closure.]
+
+* The function closure
+ Needed iff (a) we have any un-saturated calls to the function
+ OR (b) the function is passed as an arg
+ OR (c) if the function has free vars (ie not top level)
+
+ Why case (a) here? Because if the arg-satis check fails,
+ UpdatePAP stuffs a pointer to the function closure in the PAP.
+ [Could be changed; UpdatePAP could stuff in a code ptr instead,
+ but doesn't seem worth it.]
+
+ [NB: these conditions imply that we might need the closure
+ without the slow-entry code. Here's how.
+
+ f x y = let g w = ...x..y..w...
+ in
+ ...(g t)...
+
+ Here we need a closure for g which contains x and y,
+ but since the calls are all saturated we just jump to the
+ fast entry point for g, with R1 pointing to the closure for g.]
+
+
+* Standard info table
+ Needed iff (a) we have any un-saturated calls to the function
+ OR (b) the function is passed as an arg
+ OR (c) the function has free vars (ie not top level)
+
+ NB. In the sequential world, (c) is only required so that the function closure has
+ an info table to point to, to keep the storage manager happy.
+ If (c) alone is true we could fake up an info table by choosing
+ one of a standard family of info tables, whose entry code just
+ bombs out.
+
+ [NB In the parallel world (c) is needed regardless because
+ we enter functions with free vars via the closure.]
+
+ If (c) is retained, then we'll sometimes generate an info table
+ (for storage mgr purposes) without slow-entry code. Then we need
+ to use an error label in the info table to substitute for the absent
+ slow entry code.
+
+\begin{code}
+staticClosureRequired
+ :: Name
+ -> StgBinderInfo
+ -> LambdaFormInfo
+ -> Bool
+staticClosureRequired binder bndr_info
+ (LFReEntrant top_level _ _ _) -- It's a function
+ = ASSERT( isTopLevel top_level )
+ -- Assumption: it's a top-level, no-free-var binding
+ not (satCallsOnly bndr_info)
+
+staticClosureRequired binder other_binder_info other_lf_info = True
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[ClosureInfo-misc-funs]{Misc functions about @ClosureInfo@, etc.}
+%* *
+%************************************************************************
+
+\begin{code}
+
+isStaticClosure :: ClosureInfo -> Bool
+isStaticClosure cl_info = isStaticRep (closureSMRep cl_info)
+
+closureUpdReqd :: ClosureInfo -> Bool
+closureUpdReqd ClosureInfo{ closureLFInfo = lf_info } = lfUpdatable lf_info
+closureUpdReqd ConInfo{} = False
+
+lfUpdatable :: LambdaFormInfo -> Bool
+lfUpdatable (LFThunk _ _ upd _ _) = upd
+lfUpdatable (LFBlackHole _) = True
+ -- Black-hole closures are allocated to receive the results of an
+ -- alg case with a named default... so they need to be updated.
+lfUpdatable _ = False
+
+closureIsThunk :: ClosureInfo -> Bool
+closureIsThunk ClosureInfo{ closureLFInfo = lf_info } = isLFThunk lf_info
+closureIsThunk ConInfo{} = False
+
+closureSingleEntry :: ClosureInfo -> Bool
+closureSingleEntry (ClosureInfo { closureLFInfo = LFThunk _ _ upd _ _}) = not upd
+closureSingleEntry other_closure = False
+
+closureReEntrant :: ClosureInfo -> Bool
+closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant _ _ _ _ }) = True
+closureReEntrant other_closure = False
+
+isConstrClosure_maybe :: ClosureInfo -> Maybe DataCon
+isConstrClosure_maybe (ConInfo { closureCon = data_con }) = Just data_con
+isConstrClosure_maybe _ = Nothing
+
+closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr)
+closureFunInfo (ClosureInfo { closureLFInfo = LFReEntrant _ arity _ arg_desc})
+ = Just (arity, arg_desc)
+closureFunInfo _
+ = Nothing
+\end{code}
+
+\begin{code}
+isToplevClosure :: ClosureInfo -> Bool
+isToplevClosure (ClosureInfo { closureLFInfo = lf_info })
+ = case lf_info of
+ LFReEntrant TopLevel _ _ _ -> True
+ LFThunk TopLevel _ _ _ _ -> True
+ other -> False
+isToplevClosure _ = False
+\end{code}
+
+Label generation.
+
+\begin{code}
+infoTableLabelFromCI :: ClosureInfo -> CLabel
+infoTableLabelFromCI (ClosureInfo { closureName = name,
+ closureLFInfo = lf_info,
+ closureSMRep = rep })
+ = case lf_info of
+ LFBlackHole info -> info
+
+ LFThunk _ _ upd_flag (SelectorThunk offset) _ ->
+ mkSelectorInfoLabel upd_flag offset
+
+ LFThunk _ _ upd_flag (ApThunk arity) _ ->
+ mkApInfoTableLabel upd_flag arity
+
+ LFThunk{} -> mkLocalInfoTableLabel name
+
+ LFReEntrant _ _ _ _ -> mkLocalInfoTableLabel name
+
+ other -> panic "infoTableLabelFromCI"
+
+infoTableLabelFromCI (ConInfo { closureCon = con,
+ closureSMRep = rep,
+ closureDllCon = dll })
+ | isStaticRep rep = mkStaticInfoTableLabel name dll
+ | otherwise = mkConInfoTableLabel name dll
+ where
+ name = dataConName con
+
+-- ClosureInfo for a closure (as opposed to a constructor) is always local
+closureLabelFromCI (ClosureInfo { closureName = nm }) = mkLocalClosureLabel nm
+closureLabelFromCI _ = panic "closureLabelFromCI"
+
+-- thunkEntryLabel is a local help function, not exported. It's used from both
+-- entryLabelFromCI and getCallMethod.
+
+thunkEntryLabel hmods thunk_id (ApThunk arity) is_updatable
+ = enterApLabel is_updatable arity
+thunkEntryLabel hmods thunk_id (SelectorThunk offset) upd_flag
+ = enterSelectorLabel upd_flag offset
+thunkEntryLabel hmods thunk_id _ is_updatable
+ = enterIdLabel hmods thunk_id
+
+enterApLabel is_updatable arity
+ | tablesNextToCode = mkApInfoTableLabel is_updatable arity
+ | otherwise = mkApEntryLabel is_updatable arity
+
+enterSelectorLabel upd_flag offset
+ | tablesNextToCode = mkSelectorInfoLabel upd_flag offset
+ | otherwise = mkSelectorEntryLabel upd_flag offset
+
+enterIdLabel hmods id
+ | tablesNextToCode = mkInfoTableLabel hmods id
+ | otherwise = mkEntryLabel hmods id
+
+enterLocalIdLabel id
+ | tablesNextToCode = mkLocalInfoTableLabel id
+ | otherwise = mkLocalEntryLabel id
+
+enterReturnPtLabel name
+ | tablesNextToCode = mkReturnInfoLabel name
+ | otherwise = mkReturnPtLabel name
+\end{code}
+
+
+We need a black-hole closure info to pass to @allocDynClosure@ when we
+want to allocate the black hole on entry to a CAF. These are the only
+ways to build an LFBlackHole, maintaining the invariant that it really
+is a black hole and not something else.
+
+\begin{code}
+cafBlackHoleClosureInfo (ClosureInfo { closureName = nm,
+ closureType = ty })
+ = ClosureInfo { closureName = nm,
+ closureLFInfo = LFBlackHole mkCAFBlackHoleInfoTableLabel,
+ closureSMRep = BlackHoleRep,
+ closureSRT = NoC_SRT,
+ closureType = ty,
+ closureDescr = "" }
+cafBlackHoleClosureInfo _ = panic "cafBlackHoleClosureInfo"
+
+seCafBlackHoleClosureInfo (ClosureInfo { closureName = nm,
+ closureType = ty })
+ = ClosureInfo { closureName = nm,
+ closureLFInfo = LFBlackHole mkSECAFBlackHoleInfoTableLabel,
+ closureSMRep = BlackHoleRep,
+ closureSRT = NoC_SRT,
+ closureType = ty,
+ closureDescr = "" }
+seCafBlackHoleClosureInfo _ = panic "seCafBlackHoleClosureInfo"
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[ClosureInfo-Profiling-funs]{Misc functions about for profiling info.}
+%* *
+%************************************************************************
+
+Profiling requires two pieces of information to be determined for
+each closure's info table --- description and type.
+
+The description is stored directly in the @CClosureInfoTable@ when the
+info table is built.
+
+The type is determined from the type information stored with the @Id@
+in the closure info using @closureTypeDescr@.
+
+\begin{code}
+closureValDescr, closureTypeDescr :: ClosureInfo -> String
+closureValDescr (ClosureInfo {closureDescr = descr})
+ = descr
+closureValDescr (ConInfo {closureCon = con})
+ = occNameString (getOccName con)
+
+closureTypeDescr (ClosureInfo { closureType = ty })
+ = getTyDescription ty
+closureTypeDescr (ConInfo { closureCon = data_con })
+ = occNameString (getOccName (dataConTyCon data_con))
+
+getTyDescription :: Type -> String
+getTyDescription ty
+ = case (tcSplitSigmaTy ty) of { (_, _, tau_ty) ->
+ case tau_ty of
+ TyVarTy _ -> "*"
+ AppTy fun _ -> getTyDescription fun
+ FunTy _ res -> '-' : '>' : fun_result res
+ TyConApp tycon _ -> getOccString tycon
+ NoteTy (FTVNote _) ty -> getTyDescription ty
+ PredTy sty -> getPredTyDescription sty
+ ForAllTy _ ty -> getTyDescription ty
+ }
+ where
+ fun_result (FunTy _ res) = '>' : fun_result res
+ fun_result other = getTyDescription other
+
+getPredTyDescription (ClassP cl tys) = getOccString cl
+getPredTyDescription (IParam ip ty) = getOccString (ipNameName ip)
+\end{code}
+
+
diff --git a/compiler/codeGen/ClosureInfo.lhs-boot b/compiler/codeGen/ClosureInfo.lhs-boot
new file mode 100644
index 0000000000..b069905d3e
--- /dev/null
+++ b/compiler/codeGen/ClosureInfo.lhs-boot
@@ -0,0 +1,6 @@
+\begin{code}
+module ClosureInfo where
+
+data LambdaFormInfo
+data ClosureInfo
+\end{code} \ No newline at end of file
diff --git a/compiler/codeGen/CodeGen.lhs b/compiler/codeGen/CodeGen.lhs
new file mode 100644
index 0000000000..e8d83a5a43
--- /dev/null
+++ b/compiler/codeGen/CodeGen.lhs
@@ -0,0 +1,343 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+\section[CodeGen]{@CodeGen@: main module of the code generator}
+
+This module says how things get going at the top level.
+
+@codeGen@ is the interface to the outside world. The \tr{cgTop*}
+functions drive the mangling of top-level bindings.
+
+%************************************************************************
+%* *
+\subsection[codeGen-outside-interface]{The code generator's offering to the world}
+%* *
+%************************************************************************
+
+\begin{code}
+module CodeGen ( codeGen ) where
+
+#include "HsVersions.h"
+
+-- Kludge (??) so that CgExpr is reached via at least one non-SOURCE
+-- import. Before, that wasn't the case, and CM therefore didn't
+-- bother to compile it.
+import CgExpr ( {-NOTHING!-} ) -- DO NOT DELETE THIS IMPORT
+import CgProf
+import CgMonad
+import CgBindery ( CgIdInfo, addBindC, addBindsC, getCgIdInfo,
+ cgIdInfoId )
+import CgClosure ( cgTopRhsClosure )
+import CgCon ( cgTopRhsCon, cgTyCon )
+import CgUtils ( cmmRegOffW, emitRODataLits, cmmNeWord )
+
+import CLabel
+import Cmm
+import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr )
+import PprCmm ( pprCmms )
+import MachOp ( wordRep, MachHint(..) )
+
+import StgSyn
+import PrelNames ( gHC_PRIM, rOOT_MAIN, mAIN, pREL_TOP_HANDLER )
+import Packages ( HomeModules )
+import DynFlags ( DynFlags(..), DynFlag(..), dopt )
+import StaticFlags ( opt_SccProfilingOn )
+
+import HscTypes ( ForeignStubs(..), TypeEnv, typeEnvTyCons )
+import CostCentre ( CollectedCCs )
+import Id ( Id, idName, setIdName )
+import Name ( nameSrcLoc, nameOccName, nameUnique, isInternalName, mkExternalName )
+import OccName ( mkLocalOcc )
+import TyCon ( TyCon )
+import Module ( Module, mkModule )
+import ErrUtils ( dumpIfSet_dyn, showPass )
+import Panic ( assertPanic )
+
+#ifdef DEBUG
+import Outputable
+#endif
+\end{code}
+
+\begin{code}
+codeGen :: DynFlags
+ -> HomeModules
+ -> Module
+ -> [TyCon]
+ -> ForeignStubs
+ -> [Module] -- directly-imported modules
+ -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering.
+ -> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs
+ -> IO [Cmm] -- Output
+
+codeGen dflags hmods this_mod data_tycons foreign_stubs imported_mods
+ cost_centre_info stg_binds
+ = do
+ { showPass dflags "CodeGen"
+ ; let way = buildTag dflags
+ main_mod = mainModIs dflags
+
+-- Why?
+-- ; mapM_ (\x -> seq x (return ())) data_tycons
+
+ ; code_stuff <- initC dflags hmods this_mod $ do
+ { cmm_binds <- mapM (getCmm . cgTopBinding dflags hmods) stg_binds
+ ; cmm_tycons <- mapM cgTyCon data_tycons
+ ; cmm_init <- getCmm (mkModuleInit dflags hmods way cost_centre_info
+ this_mod main_mod
+ foreign_stubs imported_mods)
+ ; return (cmm_binds ++ concat cmm_tycons ++ [cmm_init])
+ }
+ -- Put datatype_stuff after code_stuff, because the
+ -- datatype closure table (for enumeration types) to
+ -- (say) PrelBase_True_closure, which is defined in
+ -- code_stuff
+
+ ; dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" (pprCmms code_stuff)
+
+ ; return code_stuff }
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[codegen-init]{Module initialisation code}
+%* *
+%************************************************************************
+
+/* -----------------------------------------------------------------------------
+ Module initialisation
+
+ The module initialisation code looks like this, roughly:
+
+ FN(__stginit_Foo) {
+ JMP_(__stginit_Foo_1_p)
+ }
+
+ FN(__stginit_Foo_1_p) {
+ ...
+ }
+
+ We have one version of the init code with a module version and the
+ 'way' attached to it. The version number helps to catch cases
+ where modules are not compiled in dependency order before being
+ linked: if a module has been compiled since any modules which depend on
+ it, then the latter modules will refer to a different version in their
+ init blocks and a link error will ensue.
+
+ The 'way' suffix helps to catch cases where modules compiled in different
+ ways are linked together (eg. profiled and non-profiled).
+
+ We provide a plain, unadorned, version of the module init code
+ which just jumps to the version with the label and way attached. The
+ reason for this is that when using foreign exports, the caller of
+ startupHaskell() must supply the name of the init function for the "top"
+ module in the program, and we don't want to require that this name
+ has the version and way info appended to it.
+ -------------------------------------------------------------------------- */
+
+We initialise the module tree by keeping a work-stack,
+ * pointed to by Sp
+ * that grows downward
+ * Sp points to the last occupied slot
+
+
+\begin{code}
+mkModuleInit
+ :: DynFlags
+ -> HomeModules
+ -> String -- the "way"
+ -> CollectedCCs -- cost centre info
+ -> Module
+ -> Module -- name of the Main module
+ -> ForeignStubs
+ -> [Module]
+ -> Code
+mkModuleInit dflags hmods way cost_centre_info this_mod main_mod foreign_stubs imported_mods
+ = do {
+ if opt_SccProfilingOn
+ then do { -- Allocate the static boolean that records if this
+ -- module has been registered already
+ emitData Data [CmmDataLabel moduleRegdLabel,
+ CmmStaticLit zeroCLit]
+
+ ; emitSimpleProc real_init_lbl $ do
+ { ret_blk <- forkLabelledCode ret_code
+
+ ; init_blk <- forkLabelledCode $ do
+ { mod_init_code; stmtC (CmmBranch ret_blk) }
+
+ ; stmtC (CmmCondBranch (cmmNeWord (CmmLit zeroCLit) mod_reg_val)
+ ret_blk)
+ ; stmtC (CmmBranch init_blk)
+ }
+ }
+ else emitSimpleProc real_init_lbl ret_code
+
+ -- Make the "plain" procedure jump to the "real" init procedure
+ ; emitSimpleProc plain_init_lbl jump_to_init
+
+ -- When compiling the module in which the 'main' function lives,
+ -- (that is, this_mod == main_mod)
+ -- we inject an extra stg_init procedure for stg_init_ZCMain, for the
+ -- RTS to invoke. We must consult the -main-is flag in case the
+ -- user specified a different function to Main.main
+ ; whenC (this_mod == main_mod)
+ (emitSimpleProc plain_main_init_lbl jump_to_init)
+ }
+ where
+ plain_init_lbl = mkPlainModuleInitLabel hmods this_mod
+ real_init_lbl = mkModuleInitLabel hmods this_mod way
+ plain_main_init_lbl = mkPlainModuleInitLabel hmods rOOT_MAIN
+
+ jump_to_init = stmtC (CmmJump (mkLblExpr real_init_lbl) [])
+
+ mod_reg_val = CmmLoad (mkLblExpr moduleRegdLabel) wordRep
+
+ -- Main refers to GHC.TopHandler.runIO, so make sure we call the
+ -- init function for GHC.TopHandler.
+ extra_imported_mods
+ | this_mod == main_mod = [pREL_TOP_HANDLER]
+ | otherwise = []
+
+ mod_init_code = do
+ { -- Set mod_reg to 1 to record that we've been here
+ stmtC (CmmStore (mkLblExpr moduleRegdLabel) (CmmLit (mkIntCLit 1)))
+
+ -- Now do local stuff
+ ; initCostCentres cost_centre_info
+ ; mapCs (registerModuleImport hmods way)
+ (imported_mods++extra_imported_mods)
+ }
+
+ -- The return-code pops the work stack by
+ -- incrementing Sp, and then jumpd to the popped item
+ ret_code = stmtsC [ CmmAssign spReg (cmmRegOffW spReg 1)
+ , CmmJump (CmmLoad (cmmRegOffW spReg (-1)) wordRep) [] ]
+
+-----------------------
+registerModuleImport :: HomeModules -> String -> Module -> Code
+registerModuleImport hmods way mod
+ | mod == gHC_PRIM
+ = nopC
+ | otherwise -- Push the init procedure onto the work stack
+ = stmtsC [ CmmAssign spReg (cmmRegOffW spReg (-1))
+ , CmmStore (CmmReg spReg) (mkLblExpr (mkModuleInitLabel hmods mod way)) ]
+\end{code}
+
+
+
+Cost-centre profiling: Besides the usual stuff, we must produce
+declarations for the cost-centres defined in this module;
+
+(The local cost-centres involved in this are passed into the
+code-generator.)
+
+\begin{code}
+initCostCentres :: CollectedCCs -> Code
+-- Emit the declarations, and return code to register them
+initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs)
+ | not opt_SccProfilingOn = nopC
+ | otherwise
+ = do { mapM_ emitCostCentreDecl local_CCs
+ ; mapM_ emitCostCentreStackDecl singleton_CCSs
+ ; mapM_ emitRegisterCC local_CCs
+ ; mapM_ emitRegisterCCS singleton_CCSs
+ }
+\end{code}
+
+%************************************************************************
+%* *
+\subsection[codegen-top-bindings]{Converting top-level STG bindings}
+%* *
+%************************************************************************
+
+@cgTopBinding@ is only used for top-level bindings, since they need
+to be allocated statically (not in the heap) and need to be labelled.
+No unboxed bindings can happen at top level.
+
+In the code below, the static bindings are accumulated in the
+@MkCgState@, and transferred into the ``statics'' slot by @forkStatics@.
+This is so that we can write the top level processing in a compositional
+style, with the increasing static environment being plumbed as a state
+variable.
+
+\begin{code}
+cgTopBinding :: DynFlags -> HomeModules -> (StgBinding,[(Id,[Id])]) -> Code
+cgTopBinding dflags hmods (StgNonRec id rhs, srts)
+ = do { id' <- maybeExternaliseId dflags id
+ ; mapM_ (mkSRT hmods [id']) srts
+ ; (id,info) <- cgTopRhs id' rhs
+ ; addBindC id info -- Add the *un-externalised* Id to the envt,
+ -- so we find it when we look up occurrences
+ }
+
+cgTopBinding dflags hmods (StgRec pairs, srts)
+ = do { let (bndrs, rhss) = unzip pairs
+ ; bndrs' <- mapFCs (maybeExternaliseId dflags) bndrs
+ ; let pairs' = zip bndrs' rhss
+ ; mapM_ (mkSRT hmods bndrs') srts
+ ; _new_binds <- fixC (\ new_binds -> do
+ { addBindsC new_binds
+ ; mapFCs ( \ (b,e) -> cgTopRhs b e ) pairs' })
+ ; nopC }
+
+mkSRT :: HomeModules -> [Id] -> (Id,[Id]) -> Code
+mkSRT hmods these (id,[]) = nopC
+mkSRT hmods these (id,ids)
+ = do { ids <- mapFCs remap ids
+ ; id <- remap id
+ ; emitRODataLits (mkSRTLabel (idName id))
+ (map (CmmLabel . mkClosureLabel hmods . idName) ids)
+ }
+ where
+ -- Sigh, better map all the ids against the environment in
+ -- case they've been externalised (see maybeExternaliseId below).
+ remap id = case filter (==id) these of
+ (id':_) -> returnFC id'
+ [] -> do { info <- getCgIdInfo id; return (cgIdInfoId info) }
+
+-- Urgh! I tried moving the forkStatics call from the rhss of cgTopRhs
+-- to enclose the listFCs in cgTopBinding, but that tickled the
+-- statics "error" call in initC. I DON'T UNDERSTAND WHY!
+
+cgTopRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo)
+ -- The Id is passed along for setting up a binding...
+ -- It's already been externalised if necessary
+
+cgTopRhs bndr (StgRhsCon cc con args)
+ = forkStatics (cgTopRhsCon bndr con args)
+
+cgTopRhs bndr (StgRhsClosure cc bi fvs upd_flag srt args body)
+ = ASSERT(null fvs) -- There should be no free variables
+ setSRTLabel (mkSRTLabel (idName bndr)) $
+ forkStatics (cgTopRhsClosure bndr cc bi srt upd_flag args body)
+\end{code}
+
+
+%************************************************************************
+%* *
+\subsection{Stuff to support splitting}
+%* *
+%************************************************************************
+
+If we're splitting the object, we need to externalise all the top-level names
+(and then make sure we only use the externalised one in any C label we use
+which refers to this name).
+
+\begin{code}
+maybeExternaliseId :: DynFlags -> Id -> FCode Id
+maybeExternaliseId dflags id
+ | dopt Opt_SplitObjs dflags, -- Externalise the name for -split-objs
+ isInternalName name = do { mod <- moduleName
+ ; returnFC (setIdName id (externalise mod)) }
+ | otherwise = returnFC id
+ where
+ externalise mod = mkExternalName uniq mod new_occ Nothing loc
+ name = idName id
+ uniq = nameUnique name
+ new_occ = mkLocalOcc uniq (nameOccName name)
+ loc = nameSrcLoc name
+ -- We want to conjure up a name that can't clash with any
+ -- existing name. So we generate
+ -- Mod_$L243foo
+ -- where 243 is the unique.
+\end{code}
diff --git a/compiler/codeGen/SMRep.lhs b/compiler/codeGen/SMRep.lhs
new file mode 100644
index 0000000000..c807703b13
--- /dev/null
+++ b/compiler/codeGen/SMRep.lhs
@@ -0,0 +1,361 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+\section[SMRep]{Storage manager representations of closure}
+
+This is here, rather than in ClosureInfo, just to keep nhc happy.
+Other modules should access this info through ClosureInfo.
+
+\begin{code}
+module SMRep (
+ -- Words and bytes
+ StgWord, StgHalfWord,
+ hALF_WORD_SIZE, hALF_WORD_SIZE_IN_BITS,
+ WordOff, ByteOff,
+
+ -- Argument/return representations
+ CgRep(..), nonVoidArg,
+ argMachRep, primRepToCgRep, primRepHint,
+ isFollowableArg, isVoidArg,
+ isFloatingArg, isNonPtrArg, is64BitArg,
+ separateByPtrFollowness,
+ cgRepSizeW, cgRepSizeB,
+ retAddrSizeW,
+
+ typeCgRep, idCgRep, tyConCgRep, typeHint,
+
+ -- Closure repesentation
+ SMRep(..), ClosureType(..),
+ isStaticRep,
+ fixedHdrSize, arrWordsHdrSize, arrPtrsHdrSize,
+ profHdrSize, thunkHdrSize,
+ tablesNextToCode,
+ smRepClosureType, smRepClosureTypeInt,
+
+ rET_SMALL, rET_VEC_SMALL, rET_BIG, rET_VEC_BIG
+ ) where
+
+#include "HsVersions.h"
+#include "../includes/MachDeps.h"
+
+import Id ( Id, idType )
+import Type ( Type, typePrimRep, PrimRep(..) )
+import TyCon ( TyCon, tyConPrimRep )
+import MachOp-- ( MachRep(..), MachHint(..), wordRep )
+import StaticFlags ( opt_SccProfilingOn, opt_GranMacros,
+ opt_Unregisterised )
+import Constants
+import Outputable
+
+import DATA_WORD
+\end{code}
+
+
+%************************************************************************
+%* *
+ Words and bytes
+%* *
+%************************************************************************
+
+\begin{code}
+type WordOff = Int -- Word offset, or word count
+type ByteOff = Int -- Byte offset, or byte count
+\end{code}
+
+StgWord is a type representing an StgWord on the target platform.
+
+\begin{code}
+#if SIZEOF_HSWORD == 4
+type StgWord = Word32
+type StgHalfWord = Word16
+hALF_WORD_SIZE = 2 :: ByteOff
+hALF_WORD_SIZE_IN_BITS = 16 :: Int
+#elif SIZEOF_HSWORD == 8
+type StgWord = Word64
+type StgHalfWord = Word32
+hALF_WORD_SIZE = 4 :: ByteOff
+hALF_WORD_SIZE_IN_BITS = 32 :: Int
+#else
+#error unknown SIZEOF_HSWORD
+#endif
+\end{code}
+
+
+%************************************************************************
+%* *
+ CgRep
+%* *
+%************************************************************************
+
+An CgRep is an abstraction of a Type which tells the code generator
+all it needs to know about the calling convention for arguments (and
+results) of that type. In particular, the ArgReps of a function's
+arguments are used to decide which of the RTS's generic apply
+functions to call when applying an unknown function.
+
+It contains more information than the back-end data type MachRep,
+so one can easily convert from CgRep -> MachRep. (Except that
+there's no MachRep for a VoidRep.)
+
+It distinguishes
+ pointers from non-pointers (we sort the pointers together
+ when building closures)
+
+ void from other types: a void argument is different from no argument
+
+All 64-bit types map to the same CgRep, because they're passed in the
+same register, but a PtrArg is still different from an NonPtrArg
+because the function's entry convention has to take into account the
+pointer-hood of arguments for the purposes of describing the stack on
+entry to the garbage collector.
+
+\begin{code}
+data CgRep
+ = VoidArg -- Void
+ | PtrArg -- Word-sized Ptr
+ | NonPtrArg -- Word-sized non-pointer
+ | LongArg -- 64-bit non-pointer
+ | FloatArg -- 32-bit float
+ | DoubleArg -- 64-bit float
+ deriving Eq
+
+instance Outputable CgRep where
+ ppr VoidArg = ptext SLIT("V_")
+ ppr PtrArg = ptext SLIT("P_")
+ ppr NonPtrArg = ptext SLIT("I_")
+ ppr LongArg = ptext SLIT("L_")
+ ppr FloatArg = ptext SLIT("F_")
+ ppr DoubleArg = ptext SLIT("D_")
+
+argMachRep :: CgRep -> MachRep
+argMachRep PtrArg = wordRep
+argMachRep NonPtrArg = wordRep
+argMachRep LongArg = I64
+argMachRep FloatArg = F32
+argMachRep DoubleArg = F64
+argMachRep VoidArg = panic "argMachRep:VoidRep"
+
+primRepToCgRep :: PrimRep -> CgRep
+primRepToCgRep VoidRep = VoidArg
+primRepToCgRep PtrRep = PtrArg
+primRepToCgRep IntRep = NonPtrArg
+primRepToCgRep WordRep = NonPtrArg
+primRepToCgRep Int64Rep = LongArg
+primRepToCgRep Word64Rep = LongArg
+primRepToCgRep AddrRep = NonPtrArg
+primRepToCgRep FloatRep = FloatArg
+primRepToCgRep DoubleRep = DoubleArg
+
+primRepHint :: PrimRep -> MachHint
+primRepHint VoidRep = panic "primRepHint:VoidRep"
+primRepHint PtrRep = PtrHint
+primRepHint IntRep = SignedHint
+primRepHint WordRep = NoHint
+primRepHint Int64Rep = SignedHint
+primRepHint Word64Rep = NoHint
+primRepHint AddrRep = PtrHint -- NB! PtrHint, but NonPtrArg
+primRepHint FloatRep = FloatHint
+primRepHint DoubleRep = FloatHint
+
+idCgRep :: Id -> CgRep
+idCgRep = typeCgRep . idType
+
+tyConCgRep :: TyCon -> CgRep
+tyConCgRep = primRepToCgRep . tyConPrimRep
+
+typeCgRep :: Type -> CgRep
+typeCgRep = primRepToCgRep . typePrimRep
+
+typeHint :: Type -> MachHint
+typeHint = primRepHint . typePrimRep
+\end{code}
+
+Whether or not the thing is a pointer that the garbage-collector
+should follow. Or, to put it another (less confusing) way, whether
+the object in question is a heap object.
+
+Depending on the outcome, this predicate determines what stack
+the pointer/object possibly will have to be saved onto, and the
+computation of GC liveness info.
+
+\begin{code}
+isFollowableArg :: CgRep -> Bool -- True <=> points to a heap object
+isFollowableArg PtrArg = True
+isFollowableArg other = False
+
+isVoidArg :: CgRep -> Bool
+isVoidArg VoidArg = True
+isVoidArg other = False
+
+nonVoidArg :: CgRep -> Bool
+nonVoidArg VoidArg = False
+nonVoidArg other = True
+
+-- isFloatingArg is used to distinguish @Double@ and @Float@ which
+-- cause inadvertent numeric conversions if you aren't jolly careful.
+-- See codeGen/CgCon:cgTopRhsCon.
+
+isFloatingArg :: CgRep -> Bool
+isFloatingArg DoubleArg = True
+isFloatingArg FloatArg = True
+isFloatingArg _ = False
+
+isNonPtrArg :: CgRep -> Bool
+-- Identify anything which is one word large and not a pointer.
+isNonPtrArg NonPtrArg = True
+isNonPtrArg other = False
+
+is64BitArg :: CgRep -> Bool
+is64BitArg LongArg = True
+is64BitArg _ = False
+\end{code}
+
+\begin{code}
+separateByPtrFollowness :: [(CgRep,a)] -> ([(CgRep,a)], [(CgRep,a)])
+-- Returns (ptrs, non-ptrs)
+separateByPtrFollowness things
+ = sep_things things [] []
+ -- accumulating params for follow-able and don't-follow things...
+ where
+ sep_things [] bs us = (reverse bs, reverse us)
+ sep_things ((PtrArg,a):ts) bs us = sep_things ts ((PtrArg,a):bs) us
+ sep_things (t :ts) bs us = sep_things ts bs (t:us)
+\end{code}
+
+\begin{code}
+cgRepSizeB :: CgRep -> ByteOff
+cgRepSizeB DoubleArg = dOUBLE_SIZE
+cgRepSizeB LongArg = wORD64_SIZE
+cgRepSizeB VoidArg = 0
+cgRepSizeB _ = wORD_SIZE
+
+cgRepSizeW :: CgRep -> ByteOff
+cgRepSizeW DoubleArg = dOUBLE_SIZE `quot` wORD_SIZE
+cgRepSizeW LongArg = wORD64_SIZE `quot` wORD_SIZE
+cgRepSizeW VoidArg = 0
+cgRepSizeW _ = 1
+
+retAddrSizeW :: WordOff
+retAddrSizeW = 1 -- One word
+\end{code}
+
+%************************************************************************
+%* *
+\subsubsection[SMRep-datatype]{@SMRep@---storage manager representation}
+%* *
+%************************************************************************
+
+\begin{code}
+data SMRep
+ -- static closure have an extra static link field at the end.
+ = GenericRep -- GC routines consult sizes in info tbl
+ Bool -- True <=> This is a static closure. Affects how
+ -- we garbage-collect it
+ !Int -- # ptr words
+ !Int -- # non-ptr words
+ ClosureType -- closure type
+
+ | BlackHoleRep
+
+data ClosureType -- Corresponds 1-1 with the varieties of closures
+ -- implemented by the RTS. Compare with ghc/includes/ClosureTypes.h
+ = Constr
+ | ConstrNoCaf
+ | Fun
+ | Thunk
+ | ThunkSelector
+\end{code}
+
+Size of a closure header.
+
+\begin{code}
+fixedHdrSize :: WordOff
+fixedHdrSize = sTD_HDR_SIZE + profHdrSize + granHdrSize
+
+profHdrSize :: WordOff
+profHdrSize | opt_SccProfilingOn = pROF_HDR_SIZE
+ | otherwise = 0
+
+granHdrSize :: WordOff
+granHdrSize | opt_GranMacros = gRAN_HDR_SIZE
+ | otherwise = 0
+
+arrWordsHdrSize :: ByteOff
+arrWordsHdrSize = fixedHdrSize*wORD_SIZE + sIZEOF_StgArrWords_NoHdr
+
+arrPtrsHdrSize :: ByteOff
+arrPtrsHdrSize = fixedHdrSize*wORD_SIZE + sIZEOF_StgMutArrPtrs_NoHdr
+
+-- Thunks have an extra header word on SMP, so the update doesn't
+-- splat the payload.
+thunkHdrSize :: WordOff
+thunkHdrSize = fixedHdrSize + smp_hdr
+ where smp_hdr = sIZEOF_StgSMPThunkHeader `quot` wORD_SIZE
+\end{code}
+
+\begin{code}
+-- IA64 mangler doesn't place tables next to code
+tablesNextToCode :: Bool
+#if defined(ia64_TARGET_ARCH) || defined(powerpc64_TARGET_ARCH)
+tablesNextToCode = False
+#else
+tablesNextToCode = not opt_Unregisterised
+#endif
+\end{code}
+
+\begin{code}
+isStaticRep :: SMRep -> Bool
+isStaticRep (GenericRep is_static _ _ _) = is_static
+isStaticRep BlackHoleRep = False
+\end{code}
+
+\begin{code}
+#include "../includes/ClosureTypes.h"
+-- Defines CONSTR, CONSTR_1_0 etc
+
+
+smRepClosureType :: SMRep -> ClosureType
+smRepClosureType (GenericRep _ _ _ ty) = ty
+smRepClosureType BlackHoleRep = panic "smRepClosureType: black hole"
+
+smRepClosureTypeInt :: SMRep -> Int
+smRepClosureTypeInt (GenericRep False 1 0 Constr) = CONSTR_1_0
+smRepClosureTypeInt (GenericRep False 0 1 Constr) = CONSTR_0_1
+smRepClosureTypeInt (GenericRep False 2 0 Constr) = CONSTR_2_0
+smRepClosureTypeInt (GenericRep False 1 1 Constr) = CONSTR_1_1
+smRepClosureTypeInt (GenericRep False 0 2 Constr) = CONSTR_0_2
+smRepClosureTypeInt (GenericRep False _ _ Constr) = CONSTR
+
+smRepClosureTypeInt (GenericRep False 1 0 Fun) = FUN_1_0
+smRepClosureTypeInt (GenericRep False 0 1 Fun) = FUN_0_1
+smRepClosureTypeInt (GenericRep False 2 0 Fun) = FUN_2_0
+smRepClosureTypeInt (GenericRep False 1 1 Fun) = FUN_1_1
+smRepClosureTypeInt (GenericRep False 0 2 Fun) = FUN_0_2
+smRepClosureTypeInt (GenericRep False _ _ Fun) = FUN
+
+smRepClosureTypeInt (GenericRep False 1 0 Thunk) = THUNK_1_0
+smRepClosureTypeInt (GenericRep False 0 1 Thunk) = THUNK_0_1
+smRepClosureTypeInt (GenericRep False 2 0 Thunk) = THUNK_2_0
+smRepClosureTypeInt (GenericRep False 1 1 Thunk) = THUNK_1_1
+smRepClosureTypeInt (GenericRep False 0 2 Thunk) = THUNK_0_2
+smRepClosureTypeInt (GenericRep False _ _ Thunk) = THUNK
+
+smRepClosureTypeInt (GenericRep False _ _ ThunkSelector) = THUNK_SELECTOR
+
+smRepClosureTypeInt (GenericRep True _ _ Constr) = CONSTR_STATIC
+smRepClosureTypeInt (GenericRep True _ _ ConstrNoCaf) = CONSTR_NOCAF_STATIC
+smRepClosureTypeInt (GenericRep True _ _ Fun) = FUN_STATIC
+smRepClosureTypeInt (GenericRep True _ _ Thunk) = THUNK_STATIC
+
+smRepClosureTypeInt BlackHoleRep = BLACKHOLE
+
+smRepClosureTypeInt rep = panic "smRepClosuretypeint"
+
+
+-- We export these ones
+rET_SMALL = (RET_SMALL :: Int)
+rET_VEC_SMALL = (RET_VEC_SMALL :: Int)
+rET_BIG = (RET_BIG :: Int)
+rET_VEC_BIG = (RET_VEC_BIG :: Int)
+\end{code}
+