diff options
Diffstat (limited to 'compiler/codeGen')
33 files changed, 10447 insertions, 0 deletions
diff --git a/compiler/codeGen/Bitmap.hs b/compiler/codeGen/Bitmap.hs new file mode 100644 index 0000000000..c0b490978c --- /dev/null +++ b/compiler/codeGen/Bitmap.hs @@ -0,0 +1,79 @@ +-- +-- (c) The University of Glasgow 2003 +-- + +-- Functions for constructing bitmaps, which are used in various +-- places in generated code (stack frame liveness masks, function +-- argument liveness masks, SRT bitmaps). + +module Bitmap ( + Bitmap, mkBitmap, + intsToBitmap, intsToReverseBitmap, + mAX_SMALL_BITMAP_SIZE + ) where + +#include "HsVersions.h" +#include "../includes/MachDeps.h" + +import SMRep +import Constants +import DATA_BITS + +{-| +A bitmap represented by a sequence of 'StgWord's on the /target/ +architecture. These are used for bitmaps in info tables and other +generated code which need to be emitted as sequences of StgWords. +-} +type Bitmap = [StgWord] + +-- | Make a bitmap from a sequence of bits +mkBitmap :: [Bool] -> Bitmap +mkBitmap [] = [] +mkBitmap stuff = chunkToBitmap chunk : mkBitmap rest + where (chunk, rest) = splitAt wORD_SIZE_IN_BITS stuff + +chunkToBitmap :: [Bool] -> StgWord +chunkToBitmap chunk = + foldr (.|.) 0 [ 1 `shiftL` n | (True,n) <- zip chunk [0..] ] + +-- | Make a bitmap where the slots specified are the /ones/ in the bitmap. +-- eg. @[1,2,4], size 4 ==> 0xb@. +-- +-- The list of @Int@s /must/ be already sorted. +intsToBitmap :: Int -> [Int] -> Bitmap +intsToBitmap size slots{- must be sorted -} + | size <= 0 = [] + | otherwise = + (foldr (.|.) 0 (map (1 `shiftL`) these)) : + intsToBitmap (size - wORD_SIZE_IN_BITS) + (map (\x -> x - wORD_SIZE_IN_BITS) rest) + where (these,rest) = span (<wORD_SIZE_IN_BITS) slots + +-- | Make a bitmap where the slots specified are the /zeros/ in the bitmap. +-- eg. @[1,2,4], size 4 ==> 0x8@ (we leave any bits outside the size as zero, +-- just to make the bitmap easier to read). +-- +-- The list of @Int@s /must/ be already sorted. +intsToReverseBitmap :: Int -> [Int] -> Bitmap +intsToReverseBitmap size slots{- must be sorted -} + | size <= 0 = [] + | otherwise = + (foldr xor init (map (1 `shiftL`) these)) : + intsToReverseBitmap (size - wORD_SIZE_IN_BITS) + (map (\x -> x - wORD_SIZE_IN_BITS) rest) + where (these,rest) = span (<wORD_SIZE_IN_BITS) slots + init + | size >= wORD_SIZE_IN_BITS = complement 0 + | otherwise = (1 `shiftL` size) - 1 + +{- | +Magic number, must agree with @BITMAP_BITS_SHIFT@ in InfoTables.h. +Some kinds of bitmap pack a size\/bitmap into a single word if +possible, or fall back to an external pointer when the bitmap is too +large. This value represents the largest size of bitmap that can be +packed into a single word. +-} +mAX_SMALL_BITMAP_SIZE :: Int +mAX_SMALL_BITMAP_SIZE | wORD_SIZE == 4 = 27 + | otherwise = 58 + diff --git a/compiler/codeGen/CgBindery.hi-boot-5 b/compiler/codeGen/CgBindery.hi-boot-5 new file mode 100644 index 0000000000..f375fcc6e1 --- /dev/null +++ b/compiler/codeGen/CgBindery.hi-boot-5 @@ -0,0 +1,7 @@ +__interface CgBindery 1 0 where +__export CgBindery CgBindings CgIdInfo VolatileLoc StableLoc nukeVolatileBinds; +1 type CgBindings = VarEnv.IdEnv CgIdInfo; +1 data CgIdInfo; +1 data VolatileLoc; +1 data StableLoc; +1 nukeVolatileBinds :: CgBindings -> CgBindings ; diff --git a/compiler/codeGen/CgBindery.hi-boot-6 b/compiler/codeGen/CgBindery.hi-boot-6 new file mode 100644 index 0000000000..7d1f300a86 --- /dev/null +++ b/compiler/codeGen/CgBindery.hi-boot-6 @@ -0,0 +1,8 @@ +module CgBindery where + +type CgBindings = VarEnv.IdEnv CgIdInfo +data CgIdInfo +data VolatileLoc +data StableLoc + +nukeVolatileBinds :: CgBindings -> CgBindings diff --git a/compiler/codeGen/CgBindery.lhs b/compiler/codeGen/CgBindery.lhs new file mode 100644 index 0000000000..f78edda655 --- /dev/null +++ b/compiler/codeGen/CgBindery.lhs @@ -0,0 +1,494 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +\section[CgBindery]{Utility functions related to doing @CgBindings@} + +\begin{code} +module CgBindery ( + CgBindings, CgIdInfo, + StableLoc, VolatileLoc, + + cgIdInfoId, cgIdInfoArgRep, cgIdInfoLF, + + stableIdInfo, heapIdInfo, + letNoEscapeIdInfo, idInfoToAmode, + + addBindC, addBindsC, + + nukeVolatileBinds, + nukeDeadBindings, + getLiveStackSlots, + + bindArgsToStack, rebindToStack, + bindNewToNode, bindNewToReg, bindArgsToRegs, + bindNewToTemp, + getArgAmode, getArgAmodes, + getCgIdInfo, + getCAddrModeIfVolatile, getVolatileRegs, + maybeLetNoEscape, + ) where + +#include "HsVersions.h" + +import CgMonad +import CgHeapery ( getHpRelOffset ) +import CgStackery ( freeStackSlots, getSpRelOffset ) +import CgUtils ( cgLit, cmmOffsetW ) +import CLabel ( mkClosureLabel, pprCLabel ) +import ClosureInfo ( mkLFImported, mkLFArgument, LambdaFormInfo ) + +import Cmm +import PprCmm ( {- instance Outputable -} ) +import SMRep ( CgRep(..), WordOff, isFollowableArg, + isVoidArg, cgRepSizeW, argMachRep, + idCgRep, typeCgRep ) +import Id ( Id, idName ) +import VarEnv +import VarSet ( varSetElems ) +import Literal ( literalType ) +import Maybes ( catMaybes ) +import Name ( isExternalName ) +import StgSyn ( StgArg, StgLiveVars, GenStgArg(..), isStgTypeArg ) +import Unique ( Uniquable(..) ) +import UniqSet ( elementOfUniqSet ) +import Outputable +\end{code} + + +%************************************************************************ +%* * +\subsection[Bindery-datatypes]{Data types} +%* * +%************************************************************************ + +@(CgBinding a b)@ is a type of finite maps from a to b. + +The assumption used to be that @lookupCgBind@ must get exactly one +match. This is {\em completely wrong} in the case of compiling +letrecs (where knot-tying is used). An initial binding is fed in (and +never evaluated); eventually, a correct binding is put into the +environment. So there can be two bindings for a given name. + +\begin{code} +type CgBindings = IdEnv CgIdInfo + +data CgIdInfo + = CgIdInfo + { cg_id :: Id -- Id that this is the info for + -- Can differ from the Id at occurrence sites by + -- virtue of being externalised, for splittable C + , cg_rep :: CgRep + , cg_vol :: VolatileLoc + , cg_stb :: StableLoc + , cg_lf :: LambdaFormInfo } + +mkCgIdInfo id vol stb lf + = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb, + cg_lf = lf, cg_rep = idCgRep id } + +voidIdInfo id = CgIdInfo { cg_id = id, cg_vol = NoVolatileLoc + , cg_stb = VoidLoc, cg_lf = mkLFArgument id + , cg_rep = VoidArg } + -- Used just for VoidRep things + +data VolatileLoc -- These locations die across a call + = NoVolatileLoc + | RegLoc CmmReg -- In one of the registers (global or local) + | VirHpLoc VirtualHpOffset -- Hp+offset (address of closure) + | VirNodeLoc VirtualHpOffset -- Cts of offset indirect from Node + -- ie *(Node+offset) +\end{code} + +@StableLoc@ encodes where an Id can be found, used by +the @CgBindings@ environment in @CgBindery@. + +\begin{code} +data StableLoc + = NoStableLoc + + | VirStkLoc VirtualSpOffset -- The thing is held in this + -- stack slot + + | VirStkLNE VirtualSpOffset -- A let-no-escape thing; the + -- value is this stack pointer + -- (as opposed to the contents of the slot) + + | StableLoc CmmExpr + | VoidLoc -- Used only for VoidRep variables. They never need to + -- be saved, so it makes sense to treat treat them as + -- having a stable location +\end{code} + +\begin{code} +instance Outputable CgIdInfo where + ppr (CgIdInfo id rep vol stb lf) + = ppr id <+> ptext SLIT("-->") <+> vcat [ppr vol, ppr stb] + +instance Outputable VolatileLoc where + ppr NoVolatileLoc = empty + ppr (RegLoc r) = ptext SLIT("reg") <+> ppr r + ppr (VirHpLoc v) = ptext SLIT("vh") <+> ppr v + ppr (VirNodeLoc v) = ptext SLIT("vn") <+> ppr v + +instance Outputable StableLoc where + ppr NoStableLoc = empty + ppr VoidLoc = ptext SLIT("void") + ppr (VirStkLoc v) = ptext SLIT("vs") <+> ppr v + ppr (VirStkLNE v) = ptext SLIT("lne") <+> ppr v + ppr (StableLoc a) = ptext SLIT("amode") <+> ppr a +\end{code} + +%************************************************************************ +%* * +\subsection[Bindery-idInfo]{Manipulating IdInfo} +%* * +%************************************************************************ + +\begin{code} +stableIdInfo id amode lf_info = mkCgIdInfo id NoVolatileLoc (StableLoc amode) lf_info +heapIdInfo id offset lf_info = mkCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info +letNoEscapeIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLNE sp) lf_info +stackIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLoc sp) lf_info +nodeIdInfo id offset lf_info = mkCgIdInfo id (VirNodeLoc offset) NoStableLoc lf_info +regIdInfo id reg lf_info = mkCgIdInfo id (RegLoc reg) NoStableLoc lf_info + +idInfoToAmode :: CgIdInfo -> FCode CmmExpr +idInfoToAmode info + = case cg_vol info of { + RegLoc reg -> returnFC (CmmReg reg) ; + VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetW (CmmReg nodeReg) nd_off) mach_rep) ; + VirHpLoc hp_off -> getHpRelOffset hp_off ; + NoVolatileLoc -> + + case cg_stb info of + StableLoc amode -> returnFC amode + VirStkLoc sp_off -> do { sp_rel <- getSpRelOffset sp_off + ; return (CmmLoad sp_rel mach_rep) } + + VirStkLNE sp_off -> getSpRelOffset sp_off + + VoidLoc -> return $ pprPanic "idInfoToAmode: void" (ppr (cg_id info)) + -- We return a 'bottom' amode, rather than panicing now + -- In this way getArgAmode returns a pair of (VoidArg, bottom) + -- and that's exactly what we want + + NoStableLoc -> pprPanic "idInfoToAmode: no loc" (ppr (cg_id info)) + } + where + mach_rep = argMachRep (cg_rep info) + +cgIdInfoId :: CgIdInfo -> Id +cgIdInfoId = cg_id + +cgIdInfoLF :: CgIdInfo -> LambdaFormInfo +cgIdInfoLF = cg_lf + +cgIdInfoArgRep :: CgIdInfo -> CgRep +cgIdInfoArgRep = cg_rep + +maybeLetNoEscape (CgIdInfo { cg_stb = VirStkLNE sp_off }) = Just sp_off +maybeLetNoEscape other = Nothing +\end{code} + +%************************************************************************ +%* * +\subsection[CgMonad-bindery]{Monad things for fiddling with @CgBindings@} +%* * +%************************************************************************ + +.There are three basic routines, for adding (@addBindC@), modifying +(@modifyBindC@) and looking up (@getCgIdInfo@) bindings. + +A @Id@ is bound to a @(VolatileLoc, StableLoc)@ triple. +The name should not already be bound. (nice ASSERT, eh?) + +\begin{code} +addBindC :: Id -> CgIdInfo -> Code +addBindC name stuff_to_bind = do + binds <- getBinds + setBinds $ extendVarEnv binds name stuff_to_bind + +addBindsC :: [(Id, CgIdInfo)] -> Code +addBindsC new_bindings = do + binds <- getBinds + let new_binds = foldl (\ binds (name,info) -> extendVarEnv binds name info) + binds + new_bindings + setBinds new_binds + +modifyBindC :: Id -> (CgIdInfo -> CgIdInfo) -> Code +modifyBindC name mangle_fn = do + binds <- getBinds + setBinds $ modifyVarEnv mangle_fn binds name + +getCgIdInfo :: Id -> FCode CgIdInfo +getCgIdInfo id + = do { -- Try local bindings first + ; local_binds <- getBinds + ; case lookupVarEnv local_binds id of { + Just info -> return info ; + Nothing -> do + + { -- Try top-level bindings + static_binds <- getStaticBinds + ; case lookupVarEnv static_binds id of { + Just info -> return info ; + Nothing -> + + -- Should be imported; make up a CgIdInfo for it + let + name = idName id + in + if isExternalName name then do + hmods <- getHomeModules + let ext_lbl = CmmLit (CmmLabel (mkClosureLabel hmods name)) + return (stableIdInfo id ext_lbl (mkLFImported id)) + else + if isVoidArg (idCgRep id) then + -- Void things are never in the environment + return (voidIdInfo id) + else + -- Bug + cgLookupPanic id + }}}} + + +cgLookupPanic :: Id -> FCode a +cgLookupPanic id + = do static_binds <- getStaticBinds + local_binds <- getBinds + srt <- getSRTLabel + pprPanic "cgPanic" + (vcat [ppr id, + ptext SLIT("static binds for:"), + vcat [ ppr (cg_id info) | info <- varEnvElts static_binds ], + ptext SLIT("local binds for:"), + vcat [ ppr (cg_id info) | info <- varEnvElts local_binds ], + ptext SLIT("SRT label") <+> pprCLabel srt + ]) +\end{code} + +%************************************************************************ +%* * +\subsection[Bindery-nuke-volatile]{Nuking volatile bindings} +%* * +%************************************************************************ + +We sometimes want to nuke all the volatile bindings; we must be sure +we don't leave any (NoVolatile, NoStable) binds around... + +\begin{code} +nukeVolatileBinds :: CgBindings -> CgBindings +nukeVolatileBinds binds + = mkVarEnv (foldr keep_if_stable [] (varEnvElts binds)) + where + keep_if_stable (CgIdInfo { cg_stb = NoStableLoc }) acc = acc + keep_if_stable info acc + = (cg_id info, info { cg_vol = NoVolatileLoc }) : acc +\end{code} + + +%************************************************************************ +%* * +\subsection[lookup-interface]{Interface functions to looking up bindings} +%* * +%************************************************************************ + +\begin{code} +getCAddrModeIfVolatile :: Id -> FCode (Maybe CmmExpr) +getCAddrModeIfVolatile id + = do { info <- getCgIdInfo id + ; case cg_stb info of + NoStableLoc -> do -- Aha! So it is volatile! + amode <- idInfoToAmode info + return $ Just amode + a_stable_loc -> return Nothing } +\end{code} + +@getVolatileRegs@ gets a set of live variables, and returns a list of +all registers on which these variables depend. These are the regs +which must be saved and restored across any C calls. If a variable is +both in a volatile location (depending on a register) {\em and} a +stable one (notably, on the stack), we modify the current bindings to +forget the volatile one. + +\begin{code} +getVolatileRegs :: StgLiveVars -> FCode [GlobalReg] + +getVolatileRegs vars = do + do { stuff <- mapFCs snaffle_it (varSetElems vars) + ; returnFC $ catMaybes stuff } + where + snaffle_it var = do + { info <- getCgIdInfo var + ; let + -- commoned-up code... + consider_reg reg + = -- We assume that all regs can die across C calls + -- We leave it to the save-macros to decide which + -- regs *really* need to be saved. + case cg_stb info of + NoStableLoc -> returnFC (Just reg) -- got one! + is_a_stable_loc -> do + { -- has both volatile & stable locations; + -- force it to rely on the stable location + modifyBindC var nuke_vol_bind + ; return Nothing } + + ; case cg_vol info of + RegLoc (CmmGlobal reg) -> consider_reg reg + VirNodeLoc _ -> consider_reg node + other_loc -> returnFC Nothing -- Local registers + } + + nuke_vol_bind info = info { cg_vol = NoVolatileLoc } +\end{code} + +\begin{code} +getArgAmode :: StgArg -> FCode (CgRep, CmmExpr) +getArgAmode (StgVarArg var) + = do { info <- getCgIdInfo var + ; amode <- idInfoToAmode info + ; return (cgIdInfoArgRep info, amode ) } + +getArgAmode (StgLitArg lit) + = do { cmm_lit <- cgLit lit + ; return (typeCgRep (literalType lit), CmmLit cmm_lit) } + +getArgAmode (StgTypeArg _) = panic "getArgAmode: type arg" + +getArgAmodes :: [StgArg] -> FCode [(CgRep, CmmExpr)] +getArgAmodes [] = returnFC [] +getArgAmodes (atom:atoms) + | isStgTypeArg atom = getArgAmodes atoms + | otherwise = do { amode <- getArgAmode atom + ; amodes <- getArgAmodes atoms + ; return ( amode : amodes ) } +\end{code} + +%************************************************************************ +%* * +\subsection[binding-and-rebinding-interface]{Interface functions for binding and re-binding names} +%* * +%************************************************************************ + +\begin{code} +bindArgsToStack :: [(Id, VirtualSpOffset)] -> Code +bindArgsToStack args + = mapCs bind args + where + bind(id, offset) = addBindC id (stackIdInfo id offset (mkLFArgument id)) + +bindArgsToRegs :: [(Id, GlobalReg)] -> Code +bindArgsToRegs args + = mapCs bind args + where + bind (arg, reg) = bindNewToReg arg (CmmGlobal reg) (mkLFArgument arg) + +bindNewToNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Code +bindNewToNode id offset lf_info + = addBindC id (nodeIdInfo id offset lf_info) + +-- Create a new temporary whose unique is that in the id, +-- bind the id to it, and return the addressing mode for the +-- temporary. +bindNewToTemp :: Id -> FCode CmmReg +bindNewToTemp id + = do addBindC id (regIdInfo id temp_reg lf_info) + return temp_reg + where + uniq = getUnique id + temp_reg = CmmLocal (LocalReg uniq (argMachRep (idCgRep id))) + lf_info = mkLFArgument id -- Always used of things we + -- know nothing about + +bindNewToReg :: Id -> CmmReg -> LambdaFormInfo -> Code +bindNewToReg name reg lf_info + = addBindC name info + where + info = mkCgIdInfo name (RegLoc reg) NoStableLoc lf_info +\end{code} + +\begin{code} +rebindToStack :: Id -> VirtualSpOffset -> Code +rebindToStack name offset + = modifyBindC name replace_stable_fn + where + replace_stable_fn info = info { cg_stb = VirStkLoc offset } +\end{code} + +%************************************************************************ +%* * +\subsection[CgMonad-deadslots]{Finding dead stack slots} +%* * +%************************************************************************ + +nukeDeadBindings does the following: + + - Removes all bindings from the environment other than those + for variables in the argument to nukeDeadBindings. + - Collects any stack slots so freed, and returns them to the stack free + list. + - Moves the virtual stack pointer to point to the topmost used + stack locations. + +You can have multi-word slots on the stack (where a Double# used to +be, for instance); if dead, such a slot will be reported as *several* +offsets (one per word). + +Probably *naughty* to look inside monad... + +\begin{code} +nukeDeadBindings :: StgLiveVars -- All the *live* variables + -> Code +nukeDeadBindings live_vars = do + binds <- getBinds + let (dead_stk_slots, bs') = + dead_slots live_vars + [] [] + [ (cg_id b, b) | b <- varEnvElts binds ] + setBinds $ mkVarEnv bs' + freeStackSlots dead_stk_slots +\end{code} + +Several boring auxiliary functions to do the dirty work. + +\begin{code} +dead_slots :: StgLiveVars + -> [(Id,CgIdInfo)] + -> [VirtualSpOffset] + -> [(Id,CgIdInfo)] + -> ([VirtualSpOffset], [(Id,CgIdInfo)]) + +-- dead_slots carries accumulating parameters for +-- filtered bindings, dead slots +dead_slots live_vars fbs ds [] + = (ds, reverse fbs) -- Finished; rm the dups, if any + +dead_slots live_vars fbs ds ((v,i):bs) + | v `elementOfUniqSet` live_vars + = dead_slots live_vars ((v,i):fbs) ds bs + -- Live, so don't record it in dead slots + -- Instead keep it in the filtered bindings + + | otherwise + = case cg_stb i of + VirStkLoc offset + | size > 0 + -> dead_slots live_vars fbs ([offset-size+1 .. offset] ++ ds) bs + + _ -> dead_slots live_vars fbs ds bs + where + size :: WordOff + size = cgRepSizeW (cg_rep i) +\end{code} + +\begin{code} +getLiveStackSlots :: FCode [VirtualSpOffset] +-- Return the offsets of slots in stack containig live pointers +getLiveStackSlots + = do { binds <- getBinds + ; return [off | CgIdInfo { cg_stb = VirStkLoc off, + cg_rep = rep } <- varEnvElts binds, + isFollowableArg rep] } +\end{code} diff --git a/compiler/codeGen/CgBindery.lhs-boot b/compiler/codeGen/CgBindery.lhs-boot new file mode 100644 index 0000000000..e504a6a9ba --- /dev/null +++ b/compiler/codeGen/CgBindery.lhs-boot @@ -0,0 +1,11 @@ +\begin{code} +module CgBindery where +import VarEnv( IdEnv ) + +data CgIdInfo +data VolatileLoc +data StableLoc +type CgBindings = IdEnv CgIdInfo + +nukeVolatileBinds :: CgBindings -> CgBindings +\end{code}
\ No newline at end of file diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs new file mode 100644 index 0000000000..f463255807 --- /dev/null +++ b/compiler/codeGen/CgCallConv.hs @@ -0,0 +1,512 @@ +----------------------------------------------------------------------------- +-- +-- CgCallConv +-- +-- The datatypes and functions here encapsulate the +-- calling and return conventions used by the code generator. +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + + +module CgCallConv ( + -- Argument descriptors + mkArgDescr, argDescrType, + + -- Liveness + isBigLiveness, buildContLiveness, mkRegLiveness, + smallLiveness, mkLivenessCLit, + + -- Register assignment + assignCallRegs, assignReturnRegs, assignPrimOpCallRegs, + + -- Calls + constructSlowCall, slowArgs, slowCallPattern, + + -- Returns + CtrlReturnConvention(..), + ctrlReturnConvAlg, + dataReturnConvPrim, + getSequelAmode + ) where + +#include "HsVersions.h" + +import CgUtils ( emitRODataLits, mkWordCLit ) +import CgMonad + +import Constants ( mAX_FAMILY_SIZE_FOR_VEC_RETURNS, + mAX_Vanilla_REG, mAX_Float_REG, + mAX_Double_REG, mAX_Long_REG, + mAX_Real_Vanilla_REG, mAX_Real_Float_REG, + mAX_Real_Double_REG, mAX_Real_Long_REG, + bITMAP_BITS_SHIFT + ) + +import ClosureInfo ( ArgDescr(..), Liveness(..) ) +import CgStackery ( getSpRelOffset ) +import SMRep +import MachOp ( wordRep ) +import Cmm ( CmmExpr(..), GlobalReg(..), CmmLit(..), CmmReg(..), node ) +import CmmUtils ( mkLblExpr ) +import CLabel +import Maybes ( mapCatMaybes ) +import Id ( Id ) +import Name ( Name ) +import TyCon ( TyCon, tyConFamilySize ) +import Bitmap ( Bitmap, mAX_SMALL_BITMAP_SIZE, + mkBitmap, intsToReverseBitmap ) +import Util ( isn'tIn, sortLe ) +import StaticFlags ( opt_Unregisterised ) +import FastString ( LitString ) +import Outputable +import DATA_BITS + + +------------------------------------------------------------------------- +-- +-- Making argument descriptors +-- +-- An argument descriptor describes the layout of args on the stack, +-- both for * GC (stack-layout) purposes, and +-- * saving/restoring registers when a heap-check fails +-- +-- Void arguments aren't important, therefore (contrast constructSlowCall) +-- +------------------------------------------------------------------------- + +-- bring in ARG_P, ARG_N, etc. +#include "../includes/StgFun.h" + +------------------------- +argDescrType :: ArgDescr -> Int +-- The "argument type" RTS field type +argDescrType (ArgSpec n) = n +argDescrType (ArgGen liveness) + | isBigLiveness liveness = ARG_GEN_BIG + | otherwise = ARG_GEN + + +mkArgDescr :: Name -> [Id] -> FCode ArgDescr +mkArgDescr nm args + = case stdPattern arg_reps of + Just spec_id -> return (ArgSpec spec_id) + Nothing -> do { liveness <- mkLiveness nm size bitmap + ; return (ArgGen liveness) } + where + arg_reps = filter nonVoidArg (map idCgRep args) + -- Getting rid of voids eases matching of standard patterns + + bitmap = mkBitmap arg_bits + arg_bits = argBits arg_reps + size = length arg_bits + +argBits :: [CgRep] -> [Bool] -- True for non-ptr, False for ptr +argBits [] = [] +argBits (PtrArg : args) = False : argBits args +argBits (arg : args) = take (cgRepSizeW arg) (repeat True) ++ argBits args + +stdPattern :: [CgRep] -> Maybe Int +stdPattern [] = Just ARG_NONE -- just void args, probably + +stdPattern [PtrArg] = Just ARG_P +stdPattern [FloatArg] = Just ARG_F +stdPattern [DoubleArg] = Just ARG_D +stdPattern [LongArg] = Just ARG_L +stdPattern [NonPtrArg] = Just ARG_N + +stdPattern [NonPtrArg,NonPtrArg] = Just ARG_NN +stdPattern [NonPtrArg,PtrArg] = Just ARG_NP +stdPattern [PtrArg,NonPtrArg] = Just ARG_PN +stdPattern [PtrArg,PtrArg] = Just ARG_PP + +stdPattern [NonPtrArg,NonPtrArg,NonPtrArg] = Just ARG_NNN +stdPattern [NonPtrArg,NonPtrArg,PtrArg] = Just ARG_NNP +stdPattern [NonPtrArg,PtrArg,NonPtrArg] = Just ARG_NPN +stdPattern [NonPtrArg,PtrArg,PtrArg] = Just ARG_NPP +stdPattern [PtrArg,NonPtrArg,NonPtrArg] = Just ARG_PNN +stdPattern [PtrArg,NonPtrArg,PtrArg] = Just ARG_PNP +stdPattern [PtrArg,PtrArg,NonPtrArg] = Just ARG_PPN +stdPattern [PtrArg,PtrArg,PtrArg] = Just ARG_PPP + +stdPattern [PtrArg,PtrArg,PtrArg,PtrArg] = Just ARG_PPPP +stdPattern [PtrArg,PtrArg,PtrArg,PtrArg,PtrArg] = Just ARG_PPPPP +stdPattern [PtrArg,PtrArg,PtrArg,PtrArg,PtrArg,PtrArg] = Just ARG_PPPPPP +stdPattern other = Nothing + + +------------------------------------------------------------------------- +-- +-- Liveness info +-- +------------------------------------------------------------------------- + +mkLiveness :: Name -> Int -> Bitmap -> FCode Liveness +mkLiveness name size bits + | size > mAX_SMALL_BITMAP_SIZE -- Bitmap does not fit in one word + = do { let lbl = mkBitmapLabel name + ; emitRODataLits lbl ( mkWordCLit (fromIntegral size) + : map mkWordCLit bits) + ; return (BigLiveness lbl) } + + | otherwise -- Bitmap fits in one word + = let + small_bits = case bits of + [] -> 0 + [b] -> fromIntegral b + _ -> panic "livenessToAddrMode" + in + return (smallLiveness size small_bits) + +smallLiveness :: Int -> StgWord -> Liveness +smallLiveness size small_bits = SmallLiveness bits + where bits = fromIntegral size .|. (small_bits `shiftL` bITMAP_BITS_SHIFT) + +------------------- +isBigLiveness :: Liveness -> Bool +isBigLiveness (BigLiveness _) = True +isBigLiveness (SmallLiveness _) = False + +------------------- +mkLivenessCLit :: Liveness -> CmmLit +mkLivenessCLit (BigLiveness lbl) = CmmLabel lbl +mkLivenessCLit (SmallLiveness bits) = mkWordCLit bits + + +------------------------------------------------------------------------- +-- +-- Bitmap describing register liveness +-- across GC when doing a "generic" heap check +-- (a RET_DYN stack frame). +-- +-- NB. Must agree with these macros (currently in StgMacros.h): +-- GET_NON_PTRS(), GET_PTRS(), GET_LIVENESS(). +------------------------------------------------------------------------- + +mkRegLiveness :: [(Id, GlobalReg)] -> Int -> Int -> StgWord +mkRegLiveness regs ptrs nptrs + = (fromIntegral nptrs `shiftL` 16) .|. + (fromIntegral ptrs `shiftL` 24) .|. + all_non_ptrs `xor` reg_bits regs + where + all_non_ptrs = 0xff + + reg_bits [] = 0 + reg_bits ((id, VanillaReg i) : regs) | isFollowableArg (idCgRep id) + = (1 `shiftL` (i - 1)) .|. reg_bits regs + reg_bits (_ : regs) + = reg_bits regs + +------------------------------------------------------------------------- +-- +-- Pushing the arguments for a slow call +-- +------------------------------------------------------------------------- + +-- For a slow call, we must take a bunch of arguments and intersperse +-- some stg_ap_<pattern>_ret_info return addresses. +constructSlowCall + :: [(CgRep,CmmExpr)] + -> (CLabel, -- RTS entry point for call + [(CgRep,CmmExpr)], -- args to pass to the entry point + [(CgRep,CmmExpr)]) -- stuff to save on the stack + + -- don't forget the zero case +constructSlowCall [] + = (mkRtsApFastLabel SLIT("stg_ap_0"), [], []) + +constructSlowCall amodes + = (stg_ap_pat, these, rest) + where + stg_ap_pat = mkRtsApFastLabel arg_pat + (arg_pat, these, rest) = matchSlowPattern amodes + +enterRtsRetLabel arg_pat + | tablesNextToCode = mkRtsRetInfoLabel arg_pat + | otherwise = mkRtsRetLabel arg_pat + +-- | 'slowArgs' takes a list of function arguments and prepares them for +-- pushing on the stack for "extra" arguments to a function which requires +-- fewer arguments than we currently have. +slowArgs :: [(CgRep,CmmExpr)] -> [(CgRep,CmmExpr)] +slowArgs [] = [] +slowArgs amodes = (NonPtrArg, mkLblExpr stg_ap_pat) : args ++ slowArgs rest + where (arg_pat, args, rest) = matchSlowPattern amodes + stg_ap_pat = mkRtsRetInfoLabel arg_pat + +matchSlowPattern :: [(CgRep,CmmExpr)] + -> (LitString, [(CgRep,CmmExpr)], [(CgRep,CmmExpr)]) +matchSlowPattern amodes = (arg_pat, these, rest) + where (arg_pat, n) = slowCallPattern (map fst amodes) + (these, rest) = splitAt n amodes + +-- These cases were found to cover about 99% of all slow calls: +slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_pppppp"), 6) +slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_ppppp"), 5) +slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_pppp"), 4) +slowCallPattern (PtrArg: PtrArg: PtrArg: VoidArg: _) = (SLIT("stg_ap_pppv"), 4) +slowCallPattern (PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_ppp"), 3) +slowCallPattern (PtrArg: PtrArg: VoidArg: _) = (SLIT("stg_ap_ppv"), 3) +slowCallPattern (PtrArg: PtrArg: _) = (SLIT("stg_ap_pp"), 2) +slowCallPattern (PtrArg: VoidArg: _) = (SLIT("stg_ap_pv"), 2) +slowCallPattern (PtrArg: _) = (SLIT("stg_ap_p"), 1) +slowCallPattern (VoidArg: _) = (SLIT("stg_ap_v"), 1) +slowCallPattern (NonPtrArg: _) = (SLIT("stg_ap_n"), 1) +slowCallPattern (FloatArg: _) = (SLIT("stg_ap_f"), 1) +slowCallPattern (DoubleArg: _) = (SLIT("stg_ap_d"), 1) +slowCallPattern (LongArg: _) = (SLIT("stg_ap_l"), 1) +slowCallPattern _ = panic "CgStackery.slowCallPattern" + +------------------------------------------------------------------------- +-- +-- Return conventions +-- +------------------------------------------------------------------------- + +-- A @CtrlReturnConvention@ says how {\em control} is returned. + +data CtrlReturnConvention + = VectoredReturn Int -- size of the vector table (family size) + | UnvectoredReturn Int -- family size + +ctrlReturnConvAlg :: TyCon -> CtrlReturnConvention +ctrlReturnConvAlg tycon + = case (tyConFamilySize tycon) of + size -> -- we're supposed to know... + if (size > (1::Int) && size <= mAX_FAMILY_SIZE_FOR_VEC_RETURNS) then + VectoredReturn size + else + UnvectoredReturn size + -- NB: unvectored returns Include size 0 (no constructors), so that + -- the following perverse code compiles (it crashed GHC in 5.02) + -- data T1 + -- data T2 = T2 !T1 Int + -- The only value of type T1 is bottom, which never returns anyway. + +dataReturnConvPrim :: CgRep -> CmmReg +dataReturnConvPrim PtrArg = CmmGlobal (VanillaReg 1) +dataReturnConvPrim NonPtrArg = CmmGlobal (VanillaReg 1) +dataReturnConvPrim LongArg = CmmGlobal (LongReg 1) +dataReturnConvPrim FloatArg = CmmGlobal (FloatReg 1) +dataReturnConvPrim DoubleArg = CmmGlobal (DoubleReg 1) +dataReturnConvPrim VoidArg = panic "dataReturnConvPrim: void" + + +-- getSequelAmode returns an amode which refers to an info table. The info +-- table will always be of the RET(_VEC)?_(BIG|SMALL) kind. We're careful +-- not to handle real code pointers, just in case we're compiling for +-- an unregisterised/untailcallish architecture, where info pointers and +-- code pointers aren't the same. +-- DIRE WARNING. +-- The OnStack case of sequelToAmode delivers an Amode which is only +-- valid just before the final control transfer, because it assumes +-- that Sp is pointing to the top word of the return address. This +-- seems unclean but there you go. + +getSequelAmode :: FCode CmmExpr +getSequelAmode + = do { EndOfBlockInfo virt_sp sequel <- getEndOfBlockInfo + ; case sequel of + OnStack -> do { sp_rel <- getSpRelOffset virt_sp + ; returnFC (CmmLoad sp_rel wordRep) } + + UpdateCode -> returnFC (CmmLit (CmmLabel mkUpdInfoLabel)) + CaseAlts lbl _ _ True -> returnFC (CmmLit (CmmLabel mkSeqInfoLabel)) + CaseAlts lbl _ _ False -> returnFC (CmmLit (CmmLabel lbl)) + } + +------------------------------------------------------------------------- +-- +-- Build a liveness mask for the current stack +-- +------------------------------------------------------------------------- + +-- There are four kinds of things on the stack: +-- +-- - pointer variables (bound in the environment) +-- - non-pointer variables (boudn in the environment) +-- - free slots (recorded in the stack free list) +-- - non-pointer data slots (recorded in the stack free list) +-- +-- We build up a bitmap of non-pointer slots by searching the environment +-- for all the pointer variables, and subtracting these from a bitmap +-- with initially all bits set (up to the size of the stack frame). + +buildContLiveness :: Name -- Basis for label (only) + -> [VirtualSpOffset] -- Live stack slots + -> FCode Liveness +buildContLiveness name live_slots + = do { stk_usg <- getStkUsage + ; let StackUsage { realSp = real_sp, + frameSp = frame_sp } = stk_usg + + start_sp :: VirtualSpOffset + start_sp = real_sp - retAddrSizeW + -- In a continuation, we want a liveness mask that + -- starts from just after the return address, which is + -- on the stack at real_sp. + + frame_size :: WordOff + frame_size = start_sp - frame_sp + -- real_sp points to the frame-header for the current + -- stack frame, and the end of this frame is frame_sp. + -- The size is therefore real_sp - frame_sp - retAddrSizeW + -- (subtract one for the frame-header = return address). + + rel_slots :: [WordOff] + rel_slots = sortLe (<=) + [ start_sp - ofs -- Get slots relative to top of frame + | ofs <- live_slots ] + + bitmap = intsToReverseBitmap frame_size rel_slots + + ; WARN( not (all (>=0) rel_slots), + ppr name $$ ppr live_slots $$ ppr frame_size $$ ppr start_sp $$ ppr rel_slots ) + mkLiveness name frame_size bitmap } + + +------------------------------------------------------------------------- +-- +-- Register assignment +-- +------------------------------------------------------------------------- + +-- How to assign registers for +-- +-- 1) Calling a fast entry point. +-- 2) Returning an unboxed tuple. +-- 3) Invoking an out-of-line PrimOp. +-- +-- Registers are assigned in order. +-- +-- If we run out, we don't attempt to assign any further registers (even +-- though we might have run out of only one kind of register); we just +-- return immediately with the left-overs specified. +-- +-- The alternative version @assignAllRegs@ uses the complete set of +-- registers, including those that aren't mapped to real machine +-- registers. This is used for calling special RTS functions and PrimOps +-- which expect their arguments to always be in the same registers. + +assignCallRegs, assignPrimOpCallRegs, assignReturnRegs + :: [(CgRep,a)] -- Arg or result values to assign + -> ([(a, GlobalReg)], -- Register assignment in same order + -- for *initial segment of* input list + -- (but reversed; doesn't matter) + -- VoidRep args do not appear here + [(CgRep,a)]) -- Leftover arg or result values + +assignCallRegs args + = assign_regs args (mkRegTbl [node]) + -- The entry convention for a function closure + -- never uses Node for argument passing; instead + -- Node points to the function closure itself + +assignPrimOpCallRegs args + = assign_regs args (mkRegTbl_allRegs []) + -- For primops, *all* arguments must be passed in registers + +assignReturnRegs args + = assign_regs args (mkRegTbl []) + -- For returning unboxed tuples etc, + -- we use all regs + +assign_regs :: [(CgRep,a)] -- Arg or result values to assign + -> AvailRegs -- Regs still avail: Vanilla, Float, Double, Longs + -> ([(a, GlobalReg)], [(CgRep, a)]) +assign_regs args supply + = go args [] supply + where + go [] acc supply = (acc, []) -- Return the results reversed (doesn't matter) + go ((VoidArg,_) : args) acc supply -- Skip void arguments; they aren't passed, and + = go args acc supply -- there's nothign to bind them to + go ((rep,arg) : args) acc supply + = case assign_reg rep supply of + Just (reg, supply') -> go args ((arg,reg):acc) supply' + Nothing -> (acc, (rep,arg):args) -- No more regs + +assign_reg :: CgRep -> AvailRegs -> Maybe (GlobalReg, AvailRegs) +assign_reg FloatArg (vs, f:fs, ds, ls) = Just (FloatReg f, (vs, fs, ds, ls)) +assign_reg DoubleArg (vs, fs, d:ds, ls) = Just (DoubleReg d, (vs, fs, ds, ls)) +assign_reg LongArg (vs, fs, ds, l:ls) = Just (LongReg l, (vs, fs, ds, ls)) +assign_reg PtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls)) +assign_reg NonPtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls)) + -- PtrArg and NonPtrArg both go in a vanilla register +assign_reg other not_enough_regs = Nothing + + +------------------------------------------------------------------------- +-- +-- Register supplies +-- +------------------------------------------------------------------------- + +-- Vanilla registers can contain pointers, Ints, Chars. +-- Floats and doubles have separate register supplies. +-- +-- We take these register supplies from the *real* registers, i.e. those +-- that are guaranteed to map to machine registers. + +useVanillaRegs | opt_Unregisterised = 0 + | otherwise = mAX_Real_Vanilla_REG +useFloatRegs | opt_Unregisterised = 0 + | otherwise = mAX_Real_Float_REG +useDoubleRegs | opt_Unregisterised = 0 + | otherwise = mAX_Real_Double_REG +useLongRegs | opt_Unregisterised = 0 + | otherwise = mAX_Real_Long_REG + +vanillaRegNos, floatRegNos, doubleRegNos, longRegNos :: [Int] +vanillaRegNos = regList useVanillaRegs +floatRegNos = regList useFloatRegs +doubleRegNos = regList useDoubleRegs +longRegNos = regList useLongRegs + +allVanillaRegNos, allFloatRegNos, allDoubleRegNos, allLongRegNos :: [Int] +allVanillaRegNos = regList mAX_Vanilla_REG +allFloatRegNos = regList mAX_Float_REG +allDoubleRegNos = regList mAX_Double_REG +allLongRegNos = regList mAX_Long_REG + +regList 0 = [] +regList n = [1 .. n] + +type AvailRegs = ( [Int] -- available vanilla regs. + , [Int] -- floats + , [Int] -- doubles + , [Int] -- longs (int64 and word64) + ) + +mkRegTbl :: [GlobalReg] -> AvailRegs +mkRegTbl regs_in_use + = mkRegTbl' regs_in_use vanillaRegNos floatRegNos doubleRegNos longRegNos + +mkRegTbl_allRegs :: [GlobalReg] -> AvailRegs +mkRegTbl_allRegs regs_in_use + = mkRegTbl' regs_in_use allVanillaRegNos allFloatRegNos allDoubleRegNos allLongRegNos + +mkRegTbl' regs_in_use vanillas floats doubles longs + = (ok_vanilla, ok_float, ok_double, ok_long) + where + ok_vanilla = mapCatMaybes (select VanillaReg) vanillas + ok_float = mapCatMaybes (select FloatReg) floats + ok_double = mapCatMaybes (select DoubleReg) doubles + ok_long = mapCatMaybes (select LongReg) longs + -- rep isn't looked at, hence we can use any old rep. + + select :: (Int -> GlobalReg) -> Int{-cand-} -> Maybe Int + -- one we've unboxed the Int, we make a GlobalReg + -- and see if it is already in use; if not, return its number. + + select mk_reg_fun cand + = let + reg = mk_reg_fun cand + in + if reg `not_elem` regs_in_use + then Just cand + else Nothing + where + not_elem = isn'tIn "mkRegTbl" + + diff --git a/compiler/codeGen/CgCase.lhs b/compiler/codeGen/CgCase.lhs new file mode 100644 index 0000000000..e7c08940c5 --- /dev/null +++ b/compiler/codeGen/CgCase.lhs @@ -0,0 +1,634 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgCase.lhs,v 1.75 2005/06/21 10:44:41 simonmar Exp $ +% +%******************************************************** +%* * +\section[CgCase]{Converting @StgCase@ expressions} +%* * +%******************************************************** + +\begin{code} +module CgCase ( cgCase, saveVolatileVarsAndRegs, + restoreCurrentCostCentre + ) where + +#include "HsVersions.h" + +import {-# SOURCE #-} CgExpr ( cgExpr ) + +import CgMonad +import StgSyn +import CgBindery ( getArgAmodes, + bindNewToReg, bindNewToTemp, + getCgIdInfo, getArgAmode, + rebindToStack, getCAddrModeIfVolatile, + nukeDeadBindings, idInfoToAmode + ) +import CgCon ( bindConArgs, bindUnboxedTupleComponents ) +import CgHeapery ( altHeapCheck, unbxTupleHeapCheck ) +import CgCallConv ( dataReturnConvPrim, ctrlReturnConvAlg, + CtrlReturnConvention(..) + ) +import CgStackery ( allocPrimStack, allocStackTop, getSpRelOffset, + deAllocStackTop, freeStackSlots + ) +import CgTailCall ( performTailCall ) +import CgPrimOp ( cgPrimOp ) +import CgForeignCall ( cgForeignCall ) +import CgUtils ( newTemp, cgLit, emitLitSwitch, emitSwitch, + tagToClosure ) +import CgProf ( curCCS, curCCSAddr ) +import CgInfoTbls ( emitDirectReturnTarget, emitAlgReturnTarget, + dataConTagZ ) +import SMRep ( CgRep(..), retAddrSizeW, nonVoidArg, isVoidArg, + idCgRep, tyConCgRep, typeHint ) +import CmmUtils ( CmmStmts, noStmts, oneStmt, plusStmts ) +import Cmm +import MachOp ( wordRep ) +import ClosureInfo ( mkLFArgument ) +import StaticFlags ( opt_SccProfilingOn ) +import Id ( Id, idName, isDeadBinder, idType ) +import ForeignCall ( ForeignCall(..), CCallSpec(..), playSafe ) +import VarSet ( varSetElems ) +import CoreSyn ( AltCon(..) ) +import PrimOp ( PrimOp(..), primOpOutOfLine ) +import TyCon ( isEnumerationTyCon, tyConFamilySize ) +import Util ( isSingleton ) +import Outputable +\end{code} + +\begin{code} +data GCFlag + = GCMayHappen -- The scrutinee may involve GC, so everything must be + -- tidy before the code for the scrutinee. + + | NoGC -- The scrutinee is a primitive value, or a call to a + -- primitive op which does no GC. Hence the case can + -- be done inline, without tidying up first. +\end{code} + +It is quite interesting to decide whether to put a heap-check +at the start of each alternative. Of course we certainly have +to do so if the case forces an evaluation, or if there is a primitive +op which can trigger GC. + +A more interesting situation is this: + + \begin{verbatim} + !A!; + ...A... + case x# of + 0# -> !B!; ...B... + default -> !C!; ...C... + \end{verbatim} + +where \tr{!x!} indicates a possible heap-check point. The heap checks +in the alternatives {\em can} be omitted, in which case the topmost +heapcheck will take their worst case into account. + +In favour of omitting \tr{!B!}, \tr{!C!}: + + - {\em May} save a heap overflow test, + if ...A... allocates anything. The other advantage + of this is that we can use relative addressing + from a single Hp to get at all the closures so allocated. + + - No need to save volatile vars etc across the case + +Against: + + - May do more allocation than reqd. This sometimes bites us + badly. For example, nfib (ha!) allocates about 30\% more space if the + worst-casing is done, because many many calls to nfib are leaf calls + which don't need to allocate anything. + + This never hurts us if there is only one alternative. + +\begin{code} +cgCase :: StgExpr + -> StgLiveVars + -> StgLiveVars + -> Id + -> SRT + -> AltType + -> [StgAlt] + -> Code +\end{code} + +Special case #1: case of literal. + +\begin{code} +cgCase (StgLit lit) live_in_whole_case live_in_alts bndr srt + alt_type@(PrimAlt tycon) alts + = do { tmp_reg <- bindNewToTemp bndr + ; cm_lit <- cgLit lit + ; stmtC (CmmAssign tmp_reg (CmmLit cm_lit)) + ; cgPrimAlts NoGC alt_type tmp_reg alts } +\end{code} + +Special case #2: scrutinising a primitive-typed variable. No +evaluation required. We don't save volatile variables, nor do we do a +heap-check in the alternatives. Instead, the heap usage of the +alternatives is worst-cased and passed upstream. This can result in +allocating more heap than strictly necessary, but it will sometimes +eliminate a heap check altogether. + +\begin{code} +cgCase (StgApp v []) live_in_whole_case live_in_alts bndr srt + alt_type@(PrimAlt tycon) alts + = do { -- Careful! we can't just bind the default binder to the same thing + -- as the scrutinee, since it might be a stack location, and having + -- two bindings pointing at the same stack locn doesn't work (it + -- confuses nukeDeadBindings). Hence, use a new temp. + v_info <- getCgIdInfo v + ; amode <- idInfoToAmode v_info + ; tmp_reg <- bindNewToTemp bndr + ; stmtC (CmmAssign tmp_reg amode) + ; cgPrimAlts NoGC alt_type tmp_reg alts } +\end{code} + +Special case #3: inline PrimOps and foreign calls. + +\begin{code} +cgCase (StgOpApp op@(StgPrimOp primop) args _) + live_in_whole_case live_in_alts bndr srt alt_type alts + | not (primOpOutOfLine primop) + = cgInlinePrimOp primop args bndr alt_type live_in_alts alts +\end{code} + +TODO: Case-of-case of primop can probably be done inline too (but +maybe better to translate it out beforehand). See +ghc/lib/misc/PackedString.lhs for examples where this crops up (with +4.02). + +Special case #4: inline foreign calls: an unsafe foreign call can be done +right here, just like an inline primop. + +\begin{code} +cgCase (StgOpApp op@(StgFCallOp fcall _) args _) + live_in_whole_case live_in_alts bndr srt alt_type alts + | unsafe_foreign_call + = ASSERT( isSingleton alts ) + do -- *must* be an unboxed tuple alt. + -- exactly like the cgInlinePrimOp case for unboxed tuple alts.. + { res_tmps <- mapFCs bindNewToTemp non_void_res_ids + ; let res_hints = map (typeHint.idType) non_void_res_ids + ; cgForeignCall (zip res_tmps res_hints) fcall args live_in_alts + ; cgExpr rhs } + where + (_, res_ids, _, rhs) = head alts + non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids + + unsafe_foreign_call + = case fcall of + CCall (CCallSpec _ _ s) -> not (playSafe s) + _other -> False +\end{code} + +Special case: scrutinising a non-primitive variable. +This can be done a little better than the general case, because +we can reuse/trim the stack slot holding the variable (if it is in one). + +\begin{code} +cgCase (StgApp fun args) + live_in_whole_case live_in_alts bndr srt alt_type alts + = do { fun_info <- getCgIdInfo fun + ; arg_amodes <- getArgAmodes args + + -- Nuking dead bindings *before* calculating the saves is the + -- value-add here. We might end up freeing up some slots currently + -- occupied by variables only required for the call. + -- NOTE: we need to look up the variables used in the call before + -- doing this, because some of them may not be in the environment + -- afterward. + ; nukeDeadBindings live_in_alts + ; (save_assts, alts_eob_info, maybe_cc_slot) + <- saveVolatileVarsAndRegs live_in_alts + + ; scrut_eob_info + <- forkEval alts_eob_info + (allocStackTop retAddrSizeW >> nopC) + (do { deAllocStackTop retAddrSizeW + ; cgEvalAlts maybe_cc_slot bndr srt alt_type alts }) + + ; setEndOfBlockInfo (maybeReserveSeqFrame alt_type scrut_eob_info) + (performTailCall fun_info arg_amodes save_assts) } +\end{code} + +Note about return addresses: we *always* push a return address, even +if because of an optimisation we end up jumping direct to the return +code (not through the address itself). The alternatives always assume +that the return address is on the stack. The return address is +required in case the alternative performs a heap check, since it +encodes the liveness of the slots in the activation record. + +On entry to the case alternative, we can re-use the slot containing +the return address immediately after the heap check. That's what the +deAllocStackTop call is doing above. + +Finally, here is the general case. + +\begin{code} +cgCase expr live_in_whole_case live_in_alts bndr srt alt_type alts + = do { -- Figure out what volatile variables to save + nukeDeadBindings live_in_whole_case + + ; (save_assts, alts_eob_info, maybe_cc_slot) + <- saveVolatileVarsAndRegs live_in_alts + + -- Save those variables right now! + ; emitStmts save_assts + + -- generate code for the alts + ; scrut_eob_info + <- forkEval alts_eob_info + (do { nukeDeadBindings live_in_alts + ; allocStackTop retAddrSizeW -- space for retn address + ; nopC }) + (do { deAllocStackTop retAddrSizeW + ; cgEvalAlts maybe_cc_slot bndr srt alt_type alts }) + + ; setEndOfBlockInfo (maybeReserveSeqFrame alt_type scrut_eob_info) + (cgExpr expr) + } +\end{code} + +There's a lot of machinery going on behind the scenes to manage the +stack pointer here. forkEval takes the virtual Sp and free list from +the first argument, and turns that into the *real* Sp for the second +argument. It also uses this virtual Sp as the args-Sp in the EOB info +returned, so that the scrutinee will trim the real Sp back to the +right place before doing whatever it does. + --SDM (who just spent an hour figuring this out, and didn't want to + forget it). + +Why don't we push the return address just before evaluating the +scrutinee? Because the slot reserved for the return address might +contain something useful, so we wait until performing a tail call or +return before pushing the return address (see +CgTailCall.pushReturnAddress). + +This also means that the environment doesn't need to know about the +free stack slot for the return address (for generating bitmaps), +because we don't reserve it until just before the eval. + +TODO!! Problem: however, we have to save the current cost centre +stack somewhere, because at the eval point the current CCS might be +different. So we pick a free stack slot and save CCCS in it. One +consequence of this is that activation records on the stack don't +follow the layout of closures when we're profiling. The CCS could be +anywhere within the record). + +\begin{code} +maybeReserveSeqFrame PolyAlt (EndOfBlockInfo args_sp (CaseAlts amode stuff bndr _)) + = EndOfBlockInfo (args_sp + retAddrSizeW) (CaseAlts amode stuff bndr True) +maybeReserveSeqFrame other scrut_eob_info = scrut_eob_info +\end{code} + + +%************************************************************************ +%* * + Inline primops +%* * +%************************************************************************ + +\begin{code} +cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts + | isVoidArg (idCgRep bndr) + = ASSERT( con == DEFAULT && isSingleton alts && null bs ) + do { -- VOID RESULT; just sequencing, + -- so get in there and do it + cgPrimOp [] primop args live_in_alts + ; cgExpr rhs } + where + (con,bs,_,rhs) = head alts + +cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts + = do { -- PRIMITIVE ALTS, with non-void result + tmp_reg <- bindNewToTemp bndr + ; cgPrimOp [tmp_reg] primop args live_in_alts + ; cgPrimAlts NoGC (PrimAlt tycon) tmp_reg alts } + +cgInlinePrimOp primop args bndr (UbxTupAlt tycon) live_in_alts alts + = ASSERT( isSingleton alts ) + do { -- UNBOXED TUPLE ALTS + -- No heap check, no yield, just get in there and do it. + -- NB: the case binder isn't bound to anything; + -- it has a unboxed tuple type + + res_tmps <- mapFCs bindNewToTemp non_void_res_ids + ; cgPrimOp res_tmps primop args live_in_alts + ; cgExpr rhs } + where + (_, res_ids, _, rhs) = head alts + non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids + +cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts + = do { -- ENUMERATION TYPE RETURN + -- Typical: case a ># b of { True -> ..; False -> .. } + -- The primop itself returns an index into the table of + -- closures for the enumeration type. + tag_amode <- ASSERT( isEnumerationTyCon tycon ) + do_enum_primop primop + + -- Bind the default binder if necessary + -- (avoiding it avoids the assignment) + -- The deadness info is set by StgVarInfo + ; hmods <- getHomeModules + ; whenC (not (isDeadBinder bndr)) + (do { tmp_reg <- bindNewToTemp bndr + ; stmtC (CmmAssign tmp_reg (tagToClosure hmods tycon tag_amode)) }) + + -- Compile the alts + ; (branches, mb_deflt) <- cgAlgAlts NoGC Nothing{-cc_slot-} + (AlgAlt tycon) alts + + -- Do the switch + ; emitSwitch tag_amode branches mb_deflt 0 (tyConFamilySize tycon - 1) + } + where + + do_enum_primop :: PrimOp -> FCode CmmExpr -- Returns amode for result + do_enum_primop TagToEnumOp -- No code! + | [arg] <- args = do + (_,e) <- getArgAmode arg + return e + do_enum_primop primop + = do tmp <- newTemp wordRep + cgPrimOp [tmp] primop args live_in_alts + returnFC (CmmReg tmp) + +cgInlinePrimOp primop arg_amodes bndr PolyAlt live_in_alts alts + = pprPanic "cgCase: case of primop has polymorphic type" (ppr bndr) +\end{code} + +%************************************************************************ +%* * +\subsection[CgCase-alts]{Alternatives} +%* * +%************************************************************************ + +@cgEvalAlts@ returns an addressing mode for a continuation for the +alternatives of a @case@, used in a context when there +is some evaluation to be done. + +\begin{code} +cgEvalAlts :: Maybe VirtualSpOffset -- Offset of cost-centre to be restored, if any + -> Id + -> SRT -- SRT for the continuation + -> AltType + -> [StgAlt] + -> FCode Sequel -- Any addr modes inside are guaranteed + -- to be a label so that we can duplicate it + -- without risk of duplicating code + +cgEvalAlts cc_slot bndr srt alt_type@(PrimAlt tycon) alts + = do { let rep = tyConCgRep tycon + reg = dataReturnConvPrim rep -- Bottom for voidRep + + ; abs_c <- forkProc $ do + { -- Bind the case binder, except if it's void + -- (reg is bottom in that case) + whenC (nonVoidArg rep) $ + bindNewToReg bndr reg (mkLFArgument bndr) + ; restoreCurrentCostCentre cc_slot True + ; cgPrimAlts GCMayHappen alt_type reg alts } + + ; lbl <- emitDirectReturnTarget (idName bndr) abs_c srt + ; returnFC (CaseAlts lbl Nothing bndr False) } + +cgEvalAlts cc_slot bndr srt (UbxTupAlt _) [(con,args,_,rhs)] + = -- Unboxed tuple case + -- By now, the simplifier should have have turned it + -- into case e of (# a,b #) -> e + -- There shouldn't be a + -- case e of DEFAULT -> e + ASSERT2( case con of { DataAlt _ -> True; other -> False }, + text "cgEvalAlts: dodgy case of unboxed tuple type" ) + do { -- forkAbsC for the RHS, so that the envt is + -- not changed for the emitDirectReturn call + abs_c <- forkProc $ do + { (live_regs, ptrs, nptrs, _) <- bindUnboxedTupleComponents args + -- Restore the CC *after* binding the tuple components, + -- so that we get the stack offset of the saved CC right. + ; restoreCurrentCostCentre cc_slot True + -- Generate a heap check if necessary + -- and finally the code for the alternative + ; unbxTupleHeapCheck live_regs ptrs nptrs noStmts + (cgExpr rhs) } + ; lbl <- emitDirectReturnTarget (idName bndr) abs_c srt + ; returnFC (CaseAlts lbl Nothing bndr False) } + +cgEvalAlts cc_slot bndr srt alt_type alts + = -- Algebraic and polymorphic case + do { -- Bind the default binder + bindNewToReg bndr nodeReg (mkLFArgument bndr) + + -- Generate sequel info for use downstream + -- At the moment, we only do it if the type is vector-returnable. + -- Reason: if not, then it costs extra to label the + -- alternatives, because we'd get return code like: + -- + -- switch TagReg { 0 : JMP(alt_1); 1 : JMP(alt_2) ..etc } + -- + -- which is worse than having the alt code in the switch statement + + ; (alts, mb_deflt) <- cgAlgAlts GCMayHappen cc_slot alt_type alts + + ; (lbl, branches) <- emitAlgReturnTarget (idName bndr) + alts mb_deflt srt ret_conv + + ; returnFC (CaseAlts lbl branches bndr False) } + where + ret_conv = case alt_type of + AlgAlt tc -> ctrlReturnConvAlg tc + PolyAlt -> UnvectoredReturn 0 +\end{code} + + +HWL comment on {\em GrAnSim\/} (adding GRAN_YIELDs for context switch): If +we do an inlining of the case no separate functions for returning are +created, so we don't have to generate a GRAN_YIELD in that case. This info +must be propagated to cgAlgAltRhs (where the GRAN_YIELD macro might be +emitted). Hence, the new Bool arg to cgAlgAltRhs. + +%************************************************************************ +%* * +\subsection[CgCase-alg-alts]{Algebraic alternatives} +%* * +%************************************************************************ + +In @cgAlgAlts@, none of the binders in the alternatives are +assumed to be yet bound. + +HWL comment on {\em GrAnSim\/} (adding GRAN_YIELDs for context switch): The +last arg of cgAlgAlts indicates if we want a context switch at the +beginning of each alternative. Normally we want that. The only exception +are inlined alternatives. + +\begin{code} +cgAlgAlts :: GCFlag + -> Maybe VirtualSpOffset + -> AltType -- ** AlgAlt or PolyAlt only ** + -> [StgAlt] -- The alternatives + -> FCode ( [(ConTagZ, CgStmts)], -- The branches + Maybe CgStmts ) -- The default case + +cgAlgAlts gc_flag cc_slot alt_type alts + = do alts <- forkAlts [ cgAlgAlt gc_flag cc_slot alt_type alt | alt <- alts] + let + mb_deflt = case alts of -- DEFAULT is always first, if present + ((DEFAULT,blks) : _) -> Just blks + other -> Nothing + + branches = [(dataConTagZ con, blks) + | (DataAlt con, blks) <- alts] + -- in + return (branches, mb_deflt) + + +cgAlgAlt :: GCFlag + -> Maybe VirtualSpOffset -- Turgid state + -> AltType -- ** AlgAlt or PolyAlt only ** + -> StgAlt + -> FCode (AltCon, CgStmts) + +cgAlgAlt gc_flag cc_slot alt_type (con, args, use_mask, rhs) + = do { abs_c <- getCgStmts $ do + { bind_con_args con args + ; restoreCurrentCostCentre cc_slot True + ; maybeAltHeapCheck gc_flag alt_type (cgExpr rhs) } + ; return (con, abs_c) } + where + bind_con_args DEFAULT args = nopC + bind_con_args (DataAlt dc) args = bindConArgs dc args +\end{code} + + +%************************************************************************ +%* * +\subsection[CgCase-prim-alts]{Primitive alternatives} +%* * +%************************************************************************ + +@cgPrimAlts@ generates suitable a @CSwitch@ +for dealing with the alternatives of a primitive @case@, given an +addressing mode for the thing to scrutinise. It also keeps track of +the maximum stack depth encountered down any branch. + +As usual, no binders in the alternatives are yet bound. + +\begin{code} +cgPrimAlts :: GCFlag + -> AltType -- Always PrimAlt, but passed to maybeAltHeapCheck + -> CmmReg -- Scrutinee + -> [StgAlt] -- Alternatives + -> Code +-- NB: cgPrimAlts emits code that does the case analysis. +-- It's often used in inline situations, rather than to genearte +-- a labelled return point. That's why its interface is a little +-- different to cgAlgAlts +-- +-- INVARIANT: the default binder is already bound +cgPrimAlts gc_flag alt_type scrutinee alts + = do { tagged_absCs <- forkAlts (map (cgPrimAlt gc_flag alt_type) alts) + ; let ((DEFAULT, deflt_absC) : others) = tagged_absCs -- There is always a default + alt_absCs = [(lit,rhs) | (LitAlt lit, rhs) <- others] + ; emitLitSwitch (CmmReg scrutinee) alt_absCs deflt_absC } + +cgPrimAlt :: GCFlag + -> AltType + -> StgAlt -- The alternative + -> FCode (AltCon, CgStmts) -- Its compiled form + +cgPrimAlt gc_flag alt_type (con, [], [], rhs) + = ASSERT( case con of { DEFAULT -> True; LitAlt _ -> True; other -> False } ) + do { abs_c <- getCgStmts (maybeAltHeapCheck gc_flag alt_type (cgExpr rhs)) + ; returnFC (con, abs_c) } +\end{code} + + +%************************************************************************ +%* * +\subsection[CgCase-tidy]{Code for tidying up prior to an eval} +%* * +%************************************************************************ + +\begin{code} +maybeAltHeapCheck + :: GCFlag + -> AltType -- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt + -> Code -- Continuation + -> Code +maybeAltHeapCheck NoGC _ code = code +maybeAltHeapCheck GCMayHappen alt_type code = altHeapCheck alt_type code + +saveVolatileVarsAndRegs + :: StgLiveVars -- Vars which should be made safe + -> FCode (CmmStmts, -- Assignments to do the saves + EndOfBlockInfo, -- sequel for the alts + Maybe VirtualSpOffset) -- Slot for current cost centre + +saveVolatileVarsAndRegs vars + = do { var_saves <- saveVolatileVars vars + ; (maybe_cc_slot, cc_save) <- saveCurrentCostCentre + ; eob_info <- getEndOfBlockInfo + ; returnFC (var_saves `plusStmts` cc_save, + eob_info, + maybe_cc_slot) } + + +saveVolatileVars :: StgLiveVars -- Vars which should be made safe + -> FCode CmmStmts -- Assignments to to the saves + +saveVolatileVars vars + = do { stmts_s <- mapFCs save_it (varSetElems vars) + ; return (foldr plusStmts noStmts stmts_s) } + where + save_it var + = do { v <- getCAddrModeIfVolatile var + ; case v of + Nothing -> return noStmts -- Non-volatile + Just vol_amode -> save_var var vol_amode -- Aha! It's volatile + } + + save_var var vol_amode + = do { slot <- allocPrimStack (idCgRep var) + ; rebindToStack var slot + ; sp_rel <- getSpRelOffset slot + ; returnFC (oneStmt (CmmStore sp_rel vol_amode)) } +\end{code} + +--------------------------------------------------------------------------- + +When we save the current cost centre (which is done for lexical +scoping), we allocate a free stack location, and return (a)~the +virtual offset of the location, to pass on to the alternatives, and +(b)~the assignment to do the save (just as for @saveVolatileVars@). + +\begin{code} +saveCurrentCostCentre :: + FCode (Maybe VirtualSpOffset, -- Where we decide to store it + CmmStmts) -- Assignment to save it + +saveCurrentCostCentre + | not opt_SccProfilingOn + = returnFC (Nothing, noStmts) + | otherwise + = do { slot <- allocPrimStack PtrArg + ; sp_rel <- getSpRelOffset slot + ; returnFC (Just slot, + oneStmt (CmmStore sp_rel curCCS)) } + +-- Sometimes we don't free the slot containing the cost centre after restoring it +-- (see CgLetNoEscape.cgLetNoEscapeBody). +restoreCurrentCostCentre :: Maybe VirtualSpOffset -> Bool -> Code +restoreCurrentCostCentre Nothing _freeit = nopC +restoreCurrentCostCentre (Just slot) freeit + = do { sp_rel <- getSpRelOffset slot + ; whenC freeit (freeStackSlots [slot]) + ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel wordRep)) } +\end{code} + diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs new file mode 100644 index 0000000000..1a2cbc5202 --- /dev/null +++ b/compiler/codeGen/CgClosure.lhs @@ -0,0 +1,599 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgClosure.lhs,v 1.72 2005/05/18 12:06:51 simonmar Exp $ +% +\section[CgClosure]{Code generation for closures} + +This module provides the support code for @StgToAbstractC@ to deal +with {\em closures} on the RHSs of let(rec)s. See also +@CgCon@, which deals with constructors. + +\begin{code} +module CgClosure ( cgTopRhsClosure, + cgStdRhsClosure, + cgRhsClosure, + emitBlackHoleCode, + ) where + +#include "HsVersions.h" + +import {-# SOURCE #-} CgExpr ( cgExpr ) + +import CgMonad +import CgBindery +import CgHeapery +import CgStackery ( mkVirtStkOffsets, pushUpdateFrame, getVirtSp, + setRealAndVirtualSp ) +import CgProf ( chooseDynCostCentres, ldvEnter, enterCostCentre, + costCentreFrom ) +import CgTicky +import CgParallel ( granYield, granFetchAndReschedule ) +import CgInfoTbls ( emitClosureCodeAndInfoTable, getSRTInfo ) +import CgCallConv ( assignCallRegs, mkArgDescr ) +import CgUtils ( emitDataLits, addIdReps, cmmRegOffW, + emitRtsCallWithVols ) +import ClosureInfo -- lots and lots of stuff +import SMRep ( CgRep, cgRepSizeW, argMachRep, fixedHdrSize, WordOff, + idCgRep ) +import MachOp ( MachHint(..) ) +import Cmm +import CmmUtils ( CmmStmts, mkStmts, oneStmt, plusStmts, noStmts, + mkLblExpr ) +import CLabel +import StgSyn +import StaticFlags ( opt_DoTickyProfiling ) +import CostCentre +import Id ( Id, idName, idType ) +import Name ( Name, isExternalName ) +import Module ( Module, pprModule ) +import ListSetOps ( minusList ) +import Util ( isIn, mapAccumL, zipWithEqual ) +import BasicTypes ( TopLevelFlag(..) ) +import Constants ( oFFSET_StgInd_indirectee, wORD_SIZE ) +import Outputable +import FastString +\end{code} + +%******************************************************** +%* * +\subsection[closures-no-free-vars]{Top-level closures} +%* * +%******************************************************** + +For closures bound at top level, allocate in static space. +They should have no free variables. + +\begin{code} +cgTopRhsClosure :: Id + -> CostCentreStack -- Optional cost centre annotation + -> StgBinderInfo + -> SRT + -> UpdateFlag + -> [Id] -- Args + -> StgExpr + -> FCode (Id, CgIdInfo) + +cgTopRhsClosure id ccs binder_info srt upd_flag args body = do + { -- LAY OUT THE OBJECT + let name = idName id + ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args + ; srt_info <- getSRTInfo name srt + ; mod_name <- moduleName + ; let descr = closureDescription mod_name name + closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr + closure_label = mkLocalClosureLabel name + cg_id_info = stableIdInfo id (mkLblExpr closure_label) lf_info + closure_rep = mkStaticClosureFields closure_info ccs True [] + + -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY) + ; emitDataLits closure_label closure_rep + ; forkClosureBody (closureCodeBody binder_info closure_info + ccs args body) + + ; returnFC (id, cg_id_info) } +\end{code} + +%******************************************************** +%* * +\subsection[non-top-level-closures]{Non top-level closures} +%* * +%******************************************************** + +For closures with free vars, allocate in heap. + +\begin{code} +cgStdRhsClosure + :: Id + -> CostCentreStack -- Optional cost centre annotation + -> StgBinderInfo + -> [Id] -- Free vars + -> [Id] -- Args + -> StgExpr + -> LambdaFormInfo + -> [StgArg] -- payload + -> FCode (Id, CgIdInfo) + +cgStdRhsClosure bndr cc bndr_info fvs args body lf_info payload + = do -- AHA! A STANDARD-FORM THUNK + { -- LAY OUT THE OBJECT + amodes <- getArgAmodes payload + ; mod_name <- moduleName + ; let (tot_wds, ptr_wds, amodes_w_offsets) + = mkVirtHeapOffsets (isLFThunk lf_info) amodes + + descr = closureDescription mod_name (idName bndr) + closure_info = mkClosureInfo False -- Not static + bndr lf_info tot_wds ptr_wds + NoC_SRT -- No SRT for a std-form closure + descr + + ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body + + -- BUILD THE OBJECT + ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets + + -- RETURN + ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) } +\end{code} + +Here's the general case. + +\begin{code} +cgRhsClosure :: Id + -> CostCentreStack -- Optional cost centre annotation + -> StgBinderInfo + -> SRT + -> [Id] -- Free vars + -> UpdateFlag + -> [Id] -- Args + -> StgExpr + -> FCode (Id, CgIdInfo) + +cgRhsClosure bndr cc bndr_info srt fvs upd_flag args body = do + { -- LAY OUT THE OBJECT + -- If the binder is itself a free variable, then don't store + -- it in the closure. Instead, just bind it to Node on entry. + -- NB we can be sure that Node will point to it, because we + -- havn't told mkClosureLFInfo about this; so if the binder + -- _was_ a free var of its RHS, mkClosureLFInfo thinks it *is* + -- stored in the closure itself, so it will make sure that + -- Node points to it... + let + name = idName bndr + is_elem = isIn "cgRhsClosure" + bndr_is_a_fv = bndr `is_elem` fvs + reduced_fvs | bndr_is_a_fv = fvs `minusList` [bndr] + | otherwise = fvs + + ; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args + ; fv_infos <- mapFCs getCgIdInfo reduced_fvs + ; srt_info <- getSRTInfo name srt + ; mod_name <- moduleName + ; let bind_details :: [(CgIdInfo, VirtualHpOffset)] + (tot_wds, ptr_wds, bind_details) + = mkVirtHeapOffsets (isLFThunk lf_info) (map add_rep fv_infos) + + add_rep info = (cgIdInfoArgRep info, info) + + descr = closureDescription mod_name name + closure_info = mkClosureInfo False -- Not static + bndr lf_info tot_wds ptr_wds + srt_info descr + + -- BUILD ITS INFO TABLE AND CODE + ; forkClosureBody (do + { -- Bind the fvs + let bind_fv (info, offset) + = bindNewToNode (cgIdInfoId info) offset (cgIdInfoLF info) + ; mapCs bind_fv bind_details + + -- Bind the binder itself, if it is a free var + ; whenC bndr_is_a_fv (bindNewToReg bndr nodeReg lf_info) + + -- Compile the body + ; closureCodeBody bndr_info closure_info cc args body }) + + -- BUILD THE OBJECT + ; let + to_amode (info, offset) = do { amode <- idInfoToAmode info + ; return (amode, offset) } + ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body + ; amodes_w_offsets <- mapFCs to_amode bind_details + ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets + + -- RETURN + ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) } + + +mkClosureLFInfo :: Id -- The binder + -> TopLevelFlag -- True of top level + -> [Id] -- Free vars + -> UpdateFlag -- Update flag + -> [Id] -- Args + -> FCode LambdaFormInfo +mkClosureLFInfo bndr top fvs upd_flag args + | null args = return (mkLFThunk (idType bndr) top fvs upd_flag) + | otherwise = do { arg_descr <- mkArgDescr (idName bndr) args + ; return (mkLFReEntrant top fvs args arg_descr) } +\end{code} + + +%************************************************************************ +%* * +\subsection[code-for-closures]{The code for closures} +%* * +%************************************************************************ + +\begin{code} +closureCodeBody :: StgBinderInfo + -> ClosureInfo -- Lots of information about this closure + -> CostCentreStack -- Optional cost centre attached to closure + -> [Id] + -> StgExpr + -> Code +\end{code} + +There are two main cases for the code for closures. If there are {\em +no arguments}, then the closure is a thunk, and not in normal form. +So it should set up an update frame (if it is shared). +NB: Thunks cannot have a primitive type! + +\begin{code} +closureCodeBody binder_info cl_info cc [{- No args i.e. thunk -}] body = do + { body_absC <- getCgStmts $ do + { tickyEnterThunk cl_info + ; ldvEnter (CmmReg nodeReg) -- NB: Node always points when profiling + ; thunkWrapper cl_info $ do + -- We only enter cc after setting up update so + -- that cc of enclosing scope will be recorded + -- in update frame CAF/DICT functions will be + -- subsumed by this enclosing cc + { enterCostCentre cl_info cc body + ; cgExpr body } + } + + ; emitClosureCodeAndInfoTable cl_info [] body_absC } +\end{code} + +If there is /at least one argument/, then this closure is in +normal form, so there is no need to set up an update frame. + +The Macros for GrAnSim are produced at the beginning of the +argSatisfactionCheck (by calling fetchAndReschedule). There info if +Node points to closure is available. -- HWL + +\begin{code} +closureCodeBody binder_info cl_info cc args body + = ASSERT( length args > 0 ) + do { -- Get the current virtual Sp (it might not be zero, + -- eg. if we're compiling a let-no-escape). + vSp <- getVirtSp + ; let (reg_args, other_args) = assignCallRegs (addIdReps args) + (sp_top, stk_args) = mkVirtStkOffsets vSp other_args + + -- Allocate the global ticky counter + ; let ticky_ctr_lbl = mkRednCountsLabel (closureName cl_info) + ; emitTickyCounter cl_info args sp_top + + -- ...and establish the ticky-counter + -- label for this block + ; setTickyCtrLabel ticky_ctr_lbl $ do + + -- Emit the slow-entry code + { reg_save_code <- mkSlowEntryCode cl_info reg_args + + -- Emit the main entry code + ; blks <- forkProc $ + mkFunEntryCode cl_info cc reg_args stk_args + sp_top reg_save_code body + ; emitClosureCodeAndInfoTable cl_info [] blks + }} + + + +mkFunEntryCode :: ClosureInfo + -> CostCentreStack + -> [(Id,GlobalReg)] -- Args in regs + -> [(Id,VirtualSpOffset)] -- Args on stack + -> VirtualSpOffset -- Last allocated word on stack + -> CmmStmts -- Register-save code in case of GC + -> StgExpr + -> Code +-- The main entry code for the closure +mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do + { -- Bind args to regs/stack as appropriate, + -- and record expected position of sps + ; bindArgsToRegs reg_args + ; bindArgsToStack stk_args + ; setRealAndVirtualSp sp_top + + -- Enter the cost-centre, if required + -- ToDo: It's not clear why this is outside the funWrapper, + -- but the tickyEnterFun is inside. Perhaps we can put + -- them together? + ; enterCostCentre cl_info cc body + + -- Do the business + ; funWrapper cl_info reg_args reg_save_code $ do + { tickyEnterFun cl_info + ; cgExpr body } + } +\end{code} + +The "slow entry" code for a function. This entry point takes its +arguments on the stack. It loads the arguments into registers +according to the calling convention, and jumps to the function's +normal entry point. The function's closure is assumed to be in +R1/node. + +The slow entry point is used in two places: + + (a) unknown calls: eg. stg_PAP_entry + (b) returning from a heap-check failure + +\begin{code} +mkSlowEntryCode :: ClosureInfo -> [(Id,GlobalReg)] -> FCode CmmStmts +-- If this function doesn't have a specialised ArgDescr, we need +-- to generate the function's arg bitmap, slow-entry code, and +-- register-save code for the heap-check failure +-- Here, we emit the slow-entry code, and +-- return the register-save assignments +mkSlowEntryCode cl_info reg_args + | Just (_, ArgGen _) <- closureFunInfo cl_info + = do { emitSimpleProc slow_lbl (emitStmts load_stmts) + ; return save_stmts } + | otherwise = return noStmts + where + name = closureName cl_info + slow_lbl = mkSlowEntryLabel name + + load_stmts = mkStmts load_assts `plusStmts` mkStmts [stk_adj_pop, jump_to_entry] + save_stmts = oneStmt stk_adj_push `plusStmts` mkStmts save_assts + + reps_w_regs :: [(CgRep,GlobalReg)] + reps_w_regs = [(idCgRep id, reg) | (id,reg) <- reverse reg_args] + (final_stk_offset, stk_offsets) + = mapAccumL (\off (rep,_) -> (off + cgRepSizeW rep, off)) + 0 reps_w_regs + + load_assts = zipWithEqual "mk_load" mk_load reps_w_regs stk_offsets + mk_load (rep,reg) offset = CmmAssign (CmmGlobal reg) + (CmmLoad (cmmRegOffW spReg offset) + (argMachRep rep)) + + save_assts = zipWithEqual "mk_save" mk_save reps_w_regs stk_offsets + mk_save (rep,reg) offset = ASSERT( argMachRep rep == globalRegRep reg ) + CmmStore (cmmRegOffW spReg offset) + (CmmReg (CmmGlobal reg)) + + stk_adj_pop = CmmAssign spReg (cmmRegOffW spReg final_stk_offset) + stk_adj_push = CmmAssign spReg (cmmRegOffW spReg (- final_stk_offset)) + jump_to_entry = CmmJump (mkLblExpr (enterLocalIdLabel name)) [] +\end{code} + + +%************************************************************************ +%* * +\subsubsection[closure-code-wrappers]{Wrappers around closure code} +%* * +%************************************************************************ + +\begin{code} +thunkWrapper:: ClosureInfo -> Code -> Code +thunkWrapper closure_info thunk_code = do + { let node_points = nodeMustPointToIt (closureLFInfo closure_info) + + -- HWL: insert macros for GrAnSim; 2 versions depending on liveness of node + -- (we prefer fetchAndReschedule-style context switches to yield ones) + ; if node_points + then granFetchAndReschedule [] node_points + else granYield [] node_points + + -- Stack and/or heap checks + ; thunkEntryChecks closure_info $ do + { -- Overwrite with black hole if necessary + whenC (blackHoleOnEntry closure_info && node_points) + (blackHoleIt closure_info) + ; setupUpdate closure_info thunk_code } + -- setupUpdate *encloses* the thunk_code + } + +funWrapper :: ClosureInfo -- Closure whose code body this is + -> [(Id,GlobalReg)] -- List of argument registers (if any) + -> CmmStmts -- reg saves for the heap check failure + -> Code -- Body of function being compiled + -> Code +funWrapper closure_info arg_regs reg_save_code fun_body = do + { let node_points = nodeMustPointToIt (closureLFInfo closure_info) + + -- Enter for Ldv profiling + ; whenC node_points (ldvEnter (CmmReg nodeReg)) + + -- GranSim yeild poin + ; granYield arg_regs node_points + + -- Heap and/or stack checks wrap the function body + ; funEntryChecks closure_info reg_save_code + fun_body + } +\end{code} + + +%************************************************************************ +%* * +\subsubsubsection[update-and-BHs]{Update and black-hole wrappers} +%* * +%************************************************************************ + + +\begin{code} +blackHoleIt :: ClosureInfo -> Code +-- Only called for closures with no args +-- Node points to the closure +blackHoleIt closure_info = emitBlackHoleCode (closureSingleEntry closure_info) + +emitBlackHoleCode :: Bool -> Code +emitBlackHoleCode is_single_entry + | eager_blackholing = do + tickyBlackHole (not is_single_entry) + stmtC (CmmStore (CmmReg nodeReg) (CmmLit (CmmLabel bh_lbl))) + | otherwise = + nopC + where + bh_lbl | is_single_entry = mkRtsDataLabel SLIT("stg_SE_BLACKHOLE_info") + | otherwise = mkRtsDataLabel SLIT("stg_BLACKHOLE_info") + + -- If we wanted to do eager blackholing with slop filling, + -- we'd need to do it at the *end* of a basic block, otherwise + -- we overwrite the free variables in the thunk that we still + -- need. We have a patch for this from Andy Cheadle, but not + -- incorporated yet. --SDM [6/2004] + -- + -- Profiling needs slop filling (to support LDV profiling), so + -- currently eager blackholing doesn't work with profiling. + -- + -- TICKY_TICKY needs EAGER_BLACKHOLING to verify no double-entries of + -- single-entry thunks. + eager_blackholing + | opt_DoTickyProfiling = True + | otherwise = False + +\end{code} + +\begin{code} +setupUpdate :: ClosureInfo -> Code -> Code -- Only called for closures with no args + -- Nota Bene: this function does not change Node (even if it's a CAF), + -- so that the cost centre in the original closure can still be + -- extracted by a subsequent enterCostCentre +setupUpdate closure_info code + | closureReEntrant closure_info + = code + + | not (isStaticClosure closure_info) + = if closureUpdReqd closure_info + then do { tickyPushUpdateFrame; pushUpdateFrame (CmmReg nodeReg) code } + else do { tickyUpdateFrameOmitted; code } + + | otherwise -- A static closure + = do { tickyUpdateBhCaf closure_info + + ; if closureUpdReqd closure_info + then do -- Blackhole the (updatable) CAF: + { upd_closure <- link_caf closure_info True + ; pushUpdateFrame upd_closure code } + else do + { -- No update reqd, you'd think we don't need to + -- black-hole it. But when ticky-ticky is on, we + -- black-hole it regardless, to catch errors in which + -- an allegedly single-entry closure is entered twice + -- + -- We discard the pointer returned by link_caf, because + -- we don't push an update frame + whenC opt_DoTickyProfiling -- Blackhole even a SE CAF + (link_caf closure_info False >> nopC) + ; tickyUpdateFrameOmitted + ; code } + } + + +----------------------------------------------------------------------------- +-- Entering a CAF +-- +-- When a CAF is first entered, it creates a black hole in the heap, +-- and updates itself with an indirection to this new black hole. +-- +-- We update the CAF with an indirection to a newly-allocated black +-- hole in the heap. We also set the blocking queue on the newly +-- allocated black hole to be empty. +-- +-- Why do we make a black hole in the heap when we enter a CAF? +-- +-- - for a generational garbage collector, which needs a fast +-- test for whether an updatee is in an old generation or not +-- +-- - for the parallel system, which can implement updates more +-- easily if the updatee is always in the heap. (allegedly). +-- +-- When debugging, we maintain a separate CAF list so we can tell when +-- a CAF has been garbage collected. + +-- newCAF must be called before the itbl ptr is overwritten, since +-- newCAF records the old itbl ptr in order to do CAF reverting +-- (which Hugs needs to do in order that combined mode works right.) +-- + +-- ToDo [Feb 04] This entire link_caf nonsense could all be moved +-- into the "newCAF" RTS procedure, which we call anyway, including +-- the allocation of the black-hole indirection closure. +-- That way, code size would fall, the CAF-handling code would +-- be closer together, and the compiler wouldn't need to know +-- about off_indirectee etc. + +link_caf :: ClosureInfo + -> Bool -- True <=> updatable, False <=> single-entry + -> FCode CmmExpr -- Returns amode for closure to be updated +-- To update a CAF we must allocate a black hole, link the CAF onto the +-- CAF list, then update the CAF to point to the fresh black hole. +-- This function returns the address of the black hole, so it can be +-- updated with the new value when available. The reason for all of this +-- is that we only want to update dynamic heap objects, not static ones, +-- so that generational GC is easier. +link_caf cl_info is_upd = do + { -- Alloc black hole specifying CC_HDR(Node) as the cost centre + ; let use_cc = costCentreFrom (CmmReg nodeReg) + blame_cc = use_cc + ; hp_offset <- allocDynClosure bh_cl_info use_cc blame_cc [] + ; hp_rel <- getHpRelOffset hp_offset + + -- Call the RTS function newCAF to add the CAF to the CafList + -- so that the garbage collector can find them + -- This must be done *before* the info table pointer is overwritten, + -- because the old info table ptr is needed for reversion + ; emitRtsCallWithVols SLIT("newCAF") [(CmmReg nodeReg,PtrHint)] [node] + -- node is live, so save it. + + -- Overwrite the closure with a (static) indirection + -- to the newly-allocated black hole + ; stmtsC [ CmmStore (cmmRegOffW nodeReg off_indirectee) hp_rel + , CmmStore (CmmReg nodeReg) ind_static_info ] + + ; returnFC hp_rel } + where + bh_cl_info :: ClosureInfo + bh_cl_info | is_upd = cafBlackHoleClosureInfo cl_info + | otherwise = seCafBlackHoleClosureInfo cl_info + + ind_static_info :: CmmExpr + ind_static_info = mkLblExpr mkIndStaticInfoLabel + + off_indirectee :: WordOff + off_indirectee = fixedHdrSize + oFFSET_StgInd_indirectee*wORD_SIZE +\end{code} + + +%************************************************************************ +%* * +\subsection[CgClosure-Description]{Profiling Closure Description.} +%* * +%************************************************************************ + +For "global" data constructors the description is simply occurrence +name of the data constructor itself. Otherwise it is determined by +@closureDescription@ from the let binding information. + +\begin{code} +closureDescription :: Module -- Module + -> Name -- Id of closure binding + -> String + -- Not called for StgRhsCon which have global info tables built in + -- CgConTbls.lhs with a description generated from the data constructor +closureDescription mod_name name + = showSDocDump (char '<' <> + (if isExternalName name + then ppr name -- ppr will include the module name prefix + else pprModule mod_name <> char '.' <> ppr name) <> + char '>') + -- showSDocDump, because we want to see the unique on the Name. +\end{code} + diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs new file mode 100644 index 0000000000..bfb55bf46e --- /dev/null +++ b/compiler/codeGen/CgCon.lhs @@ -0,0 +1,457 @@ +% +% (c) The GRASP Project, Glasgow University, 1992-1998 +% +\section[CgCon]{Code generation for constructors} + +This module provides the support code for @StgToAbstractC@ to deal +with {\em constructors} on the RHSs of let(rec)s. See also +@CgClosure@, which deals with closures. + +\begin{code} +module CgCon ( + cgTopRhsCon, buildDynCon, + bindConArgs, bindUnboxedTupleComponents, + cgReturnDataCon, + cgTyCon + ) where + +#include "HsVersions.h" + +import CgMonad +import StgSyn + +import CgBindery ( getArgAmodes, bindNewToNode, + bindArgsToRegs, idInfoToAmode, stableIdInfo, + heapIdInfo, CgIdInfo, bindArgsToStack + ) +import CgStackery ( mkVirtStkOffsets, freeStackSlots, + getRealSp, getVirtSp, setRealAndVirtualSp ) +import CgUtils ( addIdReps, cmmLabelOffW, emitRODataLits, emitDataLits ) +import CgCallConv ( assignReturnRegs ) +import Constants ( mAX_INTLIKE, mIN_INTLIKE, mAX_CHARLIKE, mIN_CHARLIKE ) +import CgHeapery ( allocDynClosure, layOutDynConstr, + layOutStaticConstr, mkStaticClosureFields ) +import CgTailCall ( performReturn, emitKnownConReturnCode, returnUnboxedTuple ) +import CgProf ( mkCCostCentreStack, ldvEnter, curCCS ) +import CgTicky +import CgInfoTbls ( emitClosureCodeAndInfoTable, dataConTagZ ) +import CLabel +import ClosureInfo ( mkConLFInfo, mkLFArgument ) +import CmmUtils ( mkLblExpr ) +import Cmm +import SMRep ( WordOff, CgRep, separateByPtrFollowness, + fixedHdrSize, typeCgRep ) +import CostCentre ( currentOrSubsumedCCS, dontCareCCS, CostCentreStack, + currentCCS ) +import Constants ( mIN_INTLIKE, mAX_INTLIKE, mIN_CHARLIKE, mAX_CHARLIKE ) +import TyCon ( TyCon, tyConDataCons, isEnumerationTyCon, tyConName ) +import DataCon ( DataCon, dataConRepArgTys, isNullaryRepDataCon, + isUnboxedTupleCon, dataConWorkId, + dataConName, dataConRepArity + ) +import Id ( Id, idName, isDeadBinder ) +import Type ( Type ) +import PrelInfo ( maybeCharLikeCon, maybeIntLikeCon ) +import Outputable +import Util ( lengthIs ) +import ListSetOps ( assocMaybe ) +\end{code} + + +%************************************************************************ +%* * +\subsection[toplevel-constructors]{Top-level constructors} +%* * +%************************************************************************ + +\begin{code} +cgTopRhsCon :: Id -- Name of thing bound to this RHS + -> DataCon -- Id + -> [StgArg] -- Args + -> FCode (Id, CgIdInfo) +cgTopRhsCon id con args + = do { + ; hmods <- getHomeModules +#if mingw32_TARGET_OS + -- Windows DLLs have a problem with static cross-DLL refs. + ; ASSERT( not (isDllConApp hmods con args) ) return () +#endif + ; ASSERT( args `lengthIs` dataConRepArity con ) return () + + -- LAY IT OUT + ; amodes <- getArgAmodes args + + ; let + name = idName id + lf_info = mkConLFInfo con + closure_label = mkClosureLabel hmods name + caffy = any stgArgHasCafRefs args + (closure_info, amodes_w_offsets) = layOutStaticConstr hmods con amodes + closure_rep = mkStaticClosureFields + closure_info + dontCareCCS -- Because it's static data + caffy -- Has CAF refs + payload + + payload = map get_lit amodes_w_offsets + get_lit (CmmLit lit, _offset) = lit + get_lit other = pprPanic "CgCon.get_lit" (ppr other) + -- NB1: amodes_w_offsets is sorted into ptrs first, then non-ptrs + -- NB2: all the amodes should be Lits! + + -- BUILD THE OBJECT + ; emitDataLits closure_label closure_rep + + -- RETURN + ; returnFC (id, stableIdInfo id (mkLblExpr closure_label) lf_info) } +\end{code} + +%************************************************************************ +%* * +%* non-top-level constructors * +%* * +%************************************************************************ +\subsection[code-for-constructors]{The code for constructors} + +\begin{code} +buildDynCon :: Id -- Name of the thing to which this constr will + -- be bound + -> CostCentreStack -- Where to grab cost centre from; + -- current CCS if currentOrSubsumedCCS + -> DataCon -- The data constructor + -> [(CgRep,CmmExpr)] -- Its args + -> FCode CgIdInfo -- Return details about how to find it + +-- We used to pass a boolean indicating whether all the +-- args were of size zero, so we could use a static +-- construtor; but I concluded that it just isn't worth it. +-- Now I/O uses unboxed tuples there just aren't any constructors +-- with all size-zero args. +-- +-- The reason for having a separate argument, rather than looking at +-- the addr modes of the args is that we may be in a "knot", and +-- premature looking at the args will cause the compiler to black-hole! +\end{code} + +First we deal with the case of zero-arity constructors. Now, they +will probably be unfolded, so we don't expect to see this case much, +if at all, but it does no harm, and sets the scene for characters. + +In the case of zero-arity constructors, or, more accurately, those +which have exclusively size-zero (VoidRep) args, we generate no code +at all. + +\begin{code} +buildDynCon binder cc con [] + = do hmods <- getHomeModules + returnFC (stableIdInfo binder + (mkLblExpr (mkClosureLabel hmods (dataConName con))) + (mkConLFInfo con)) +\end{code} + +The following three paragraphs about @Char@-like and @Int@-like +closures are obsolete, but I don't understand the details well enough +to properly word them, sorry. I've changed the treatment of @Char@s to +be analogous to @Int@s: only a subset is preallocated, because @Char@ +has now 31 bits. Only literals are handled here. -- Qrczak + +Now for @Char@-like closures. We generate an assignment of the +address of the closure to a temporary. It would be possible simply to +generate no code, and record the addressing mode in the environment, +but we'd have to be careful if the argument wasn't a constant --- so +for simplicity we just always asssign to a temporary. + +Last special case: @Int@-like closures. We only special-case the +situation in which the argument is a literal in the range +@mIN_INTLIKE@..@mAX_INTLILKE@. NB: for @Char@-like closures we can +work with any old argument, but for @Int@-like ones the argument has +to be a literal. Reason: @Char@ like closures have an argument type +which is guaranteed in range. + +Because of this, we use can safely return an addressing mode. + +\begin{code} +buildDynCon binder cc con [arg_amode] + | maybeIntLikeCon con + , (_, CmmLit (CmmInt val _)) <- arg_amode + , let val_int = (fromIntegral val) :: Int + , val_int <= mAX_INTLIKE && val_int >= mIN_INTLIKE + = do { let intlike_lbl = mkRtsDataLabel SLIT("stg_INTLIKE_closure") + offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1) + -- INTLIKE closures consist of a header and one word payload + intlike_amode = CmmLit (cmmLabelOffW intlike_lbl offsetW) + ; returnFC (stableIdInfo binder intlike_amode (mkConLFInfo con)) } + +buildDynCon binder cc con [arg_amode] + | maybeCharLikeCon con + , (_, CmmLit (CmmInt val _)) <- arg_amode + , let val_int = (fromIntegral val) :: Int + , val_int <= mAX_CHARLIKE && val_int >= mIN_CHARLIKE + = do { let charlike_lbl = mkRtsDataLabel SLIT("stg_CHARLIKE_closure") + offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1) + -- CHARLIKE closures consist of a header and one word payload + charlike_amode = CmmLit (cmmLabelOffW charlike_lbl offsetW) + ; returnFC (stableIdInfo binder charlike_amode (mkConLFInfo con)) } +\end{code} + +Now the general case. + +\begin{code} +buildDynCon binder ccs con args + = do { + ; hmods <- getHomeModules + ; let + (closure_info, amodes_w_offsets) = layOutDynConstr hmods con args + + ; hp_off <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets + ; returnFC (heapIdInfo binder hp_off lf_info) } + where + lf_info = mkConLFInfo con + + use_cc -- cost-centre to stick in the object + | currentOrSubsumedCCS ccs = curCCS + | otherwise = CmmLit (mkCCostCentreStack ccs) + + blame_cc = use_cc -- cost-centre on which to blame the alloc (same) +\end{code} + + +%************************************************************************ +%* * +%* constructor-related utility function: * +%* bindConArgs is called from cgAlt of a case * +%* * +%************************************************************************ +\subsection[constructor-utilities]{@bindConArgs@: constructor-related utility} + +@bindConArgs@ $con args$ augments the environment with bindings for the +binders $args$, assuming that we have just returned from a @case@ which +found a $con$. + +\begin{code} +bindConArgs :: DataCon -> [Id] -> Code +bindConArgs con args + = do hmods <- getHomeModules + let + bind_arg (arg, offset) = bindNewToNode arg offset (mkLFArgument arg) + (_, args_w_offsets) = layOutDynConstr hmods con (addIdReps args) + -- + ASSERT(not (isUnboxedTupleCon con)) return () + mapCs bind_arg args_w_offsets +\end{code} + +Unboxed tuples are handled slightly differently - the object is +returned in registers and on the stack instead of the heap. + +\begin{code} +bindUnboxedTupleComponents + :: [Id] -- Args + -> FCode ([(Id,GlobalReg)], -- Regs assigned + WordOff, -- Number of pointer stack slots + WordOff, -- Number of non-pointer stack slots + VirtualSpOffset) -- Offset of return address slot + -- (= realSP on entry) + +bindUnboxedTupleComponents args + = do { + vsp <- getVirtSp + ; rsp <- getRealSp + + -- Assign as many components as possible to registers + ; let (reg_args, stk_args) = assignReturnRegs (addIdReps args) + + -- Separate the rest of the args into pointers and non-pointers + (ptr_args, nptr_args) = separateByPtrFollowness stk_args + + -- Allocate the rest on the stack + -- The real SP points to the return address, above which any + -- leftover unboxed-tuple components will be allocated + (ptr_sp, ptr_offsets) = mkVirtStkOffsets rsp ptr_args + (nptr_sp, nptr_offsets) = mkVirtStkOffsets ptr_sp nptr_args + ptrs = ptr_sp - rsp + nptrs = nptr_sp - ptr_sp + + -- The stack pointer points to the last stack-allocated component + ; setRealAndVirtualSp nptr_sp + + -- We have just allocated slots starting at real SP + 1, and set the new + -- virtual SP to the topmost allocated slot. + -- If the virtual SP started *below* the real SP, we've just jumped over + -- some slots that won't be in the free-list, so put them there + -- This commonly happens because we've freed the return-address slot + -- (trimming back the virtual SP), but the real SP still points to that slot + ; freeStackSlots [vsp+1,vsp+2 .. rsp] + + ; bindArgsToRegs reg_args + ; bindArgsToStack ptr_offsets + ; bindArgsToStack nptr_offsets + + ; returnFC (reg_args, ptrs, nptrs, rsp) } +\end{code} + +%************************************************************************ +%* * + Actually generate code for a constructor return +%* * +%************************************************************************ + + +Note: it's the responsibility of the @cgReturnDataCon@ caller to be +sure the @amodes@ passed don't conflict with each other. +\begin{code} +cgReturnDataCon :: DataCon -> [(CgRep, CmmExpr)] -> Code + +cgReturnDataCon con amodes + = ASSERT( amodes `lengthIs` dataConRepArity con ) + do { EndOfBlockInfo _ sequel <- getEndOfBlockInfo + ; case sequel of + CaseAlts _ (Just (alts, deflt_lbl)) bndr _ + -> -- Ho! We know the constructor so we can + -- go straight to the right alternative + case assocMaybe alts (dataConTagZ con) of { + Just join_lbl -> build_it_then (jump_to join_lbl); + Nothing + -- Special case! We're returning a constructor to the default case + -- of an enclosing case. For example: + -- + -- case (case e of (a,b) -> C a b) of + -- D x -> ... + -- y -> ...<returning here!>... + -- + -- In this case, + -- if the default is a non-bind-default (ie does not use y), + -- then we should simply jump to the default join point; + + | isDeadBinder bndr -> performReturn (jump_to deflt_lbl) + | otherwise -> build_it_then (jump_to deflt_lbl) } + + other_sequel -- The usual case + | isUnboxedTupleCon con -> returnUnboxedTuple amodes + | otherwise -> build_it_then (emitKnownConReturnCode con) + } + where + jump_to lbl = stmtC (CmmJump (CmmLit lbl) []) + build_it_then return_code + = do { -- BUILD THE OBJECT IN THE HEAP + -- The first "con" says that the name bound to this + -- closure is "con", which is a bit of a fudge, but it only + -- affects profiling + + -- This Id is also used to get a unique for a + -- temporary variable, if the closure is a CHARLIKE. + -- funnily enough, this makes the unique always come + -- out as '54' :-) + tickyReturnNewCon (length amodes) + ; idinfo <- buildDynCon (dataConWorkId con) currentCCS con amodes + ; amode <- idInfoToAmode idinfo + ; checkedAbsC (CmmAssign nodeReg amode) + ; performReturn return_code } +\end{code} + + +%************************************************************************ +%* * + Generating static stuff for algebraic data types +%* * +%************************************************************************ + + [These comments are rather out of date] + +\begin{tabular}{lll} +Info tbls & Macro & Kind of constructor \\ +\hline +info & @CONST_INFO_TABLE@& Zero arity (no info -- compiler uses static closure)\\ +info & @CHARLIKE_INFO_TABLE@& Charlike (no info -- compiler indexes fixed array)\\ +info & @INTLIKE_INFO_TABLE@& Intlike; the one macro generates both info tbls\\ +info & @SPEC_INFO_TABLE@& SPECish, and bigger than or equal to @MIN_UPD_SIZE@\\ +info & @GEN_INFO_TABLE@& GENish (hence bigger than or equal to @MIN_UPD_SIZE@)\\ +\end{tabular} + +Possible info tables for constructor con: + +\begin{description} +\item[@_con_info@:] +Used for dynamically let(rec)-bound occurrences of +the constructor, and for updates. For constructors +which are int-like, char-like or nullary, when GC occurs, +the closure tries to get rid of itself. + +\item[@_static_info@:] +Static occurrences of the constructor +macro: @STATIC_INFO_TABLE@. +\end{description} + +For zero-arity constructors, \tr{con}, we NO LONGER generate a static closure; +it's place is taken by the top level defn of the constructor. + +For charlike and intlike closures there is a fixed array of static +closures predeclared. + +\begin{code} +cgTyCon :: TyCon -> FCode [Cmm] -- each constructor gets a separate Cmm +cgTyCon tycon + = do { constrs <- mapM (getCmm . cgDataCon) (tyConDataCons tycon) + + -- Generate a table of static closures for an enumeration type + -- Put the table after the data constructor decls, because the + -- datatype closure table (for enumeration types) + -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff + ; extra <- + if isEnumerationTyCon tycon then do + tbl <- getCmm (emitRODataLits (mkLocalClosureTableLabel + (tyConName tycon)) + [ CmmLabel (mkLocalClosureLabel (dataConName con)) + | con <- tyConDataCons tycon]) + return [tbl] + else + return [] + + ; return (extra ++ constrs) + } +\end{code} + +Generate the entry code, info tables, and (for niladic constructor) the +static closure, for a constructor. + +\begin{code} +cgDataCon :: DataCon -> Code +cgDataCon data_con + = do { -- Don't need any dynamic closure code for zero-arity constructors + hmods <- getHomeModules + + ; let + -- To allow the debuggers, interpreters, etc to cope with + -- static data structures (ie those built at compile + -- time), we take care that info-table contains the + -- information we need. + (static_cl_info, _) = + layOutStaticConstr hmods data_con arg_reps + + (dyn_cl_info, arg_things) = + layOutDynConstr hmods data_con arg_reps + + emit_info cl_info ticky_code + = do { code_blks <- getCgStmts the_code + ; emitClosureCodeAndInfoTable cl_info [] code_blks } + where + the_code = do { ticky_code + ; ldvEnter (CmmReg nodeReg) + ; body_code } + + arg_reps :: [(CgRep, Type)] + arg_reps = [(typeCgRep ty, ty) | ty <- dataConRepArgTys data_con] + + body_code = do { + -- NB: We don't set CC when entering data (WDP 94/06) + tickyReturnOldCon (length arg_things) + ; performReturn (emitKnownConReturnCode data_con) } + -- noStmts: Ptr to thing already in Node + + ; whenC (not (isNullaryRepDataCon data_con)) + (emit_info dyn_cl_info tickyEnterDynCon) + + -- Dynamic-Closure first, to reduce forward references + ; emit_info static_cl_info tickyEnterStaticCon } + + where +\end{code} diff --git a/compiler/codeGen/CgExpr.hi-boot-5 b/compiler/codeGen/CgExpr.hi-boot-5 new file mode 100644 index 0000000000..588e63f8f1 --- /dev/null +++ b/compiler/codeGen/CgExpr.hi-boot-5 @@ -0,0 +1,3 @@ +__interface CgExpr 1 0 where +__export CgExpr cgExpr; +1 cgExpr :: StgSyn.StgExpr -> CgMonad.Code ; diff --git a/compiler/codeGen/CgExpr.hi-boot-6 b/compiler/codeGen/CgExpr.hi-boot-6 new file mode 100644 index 0000000000..dc2d75cefe --- /dev/null +++ b/compiler/codeGen/CgExpr.hi-boot-6 @@ -0,0 +1,3 @@ +module CgExpr where + +cgExpr :: StgSyn.StgExpr -> CgMonad.Code diff --git a/compiler/codeGen/CgExpr.lhs b/compiler/codeGen/CgExpr.lhs new file mode 100644 index 0000000000..33d72f1608 --- /dev/null +++ b/compiler/codeGen/CgExpr.lhs @@ -0,0 +1,454 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgExpr.lhs,v 1.62 2005/06/21 10:44:41 simonmar Exp $ +% +%******************************************************** +%* * +\section[CgExpr]{Converting @StgExpr@s} +%* * +%******************************************************** + +\begin{code} +module CgExpr ( cgExpr ) where + +#include "HsVersions.h" + +import Constants ( mAX_SPEC_SELECTEE_SIZE, mAX_SPEC_AP_SIZE ) +import StgSyn +import CgMonad + +import SMRep ( fixedHdrSize, isFollowableArg, CgRep(..), argMachRep, + nonVoidArg, idCgRep, typeCgRep, typeHint, + primRepToCgRep ) +import CoreSyn ( AltCon(..) ) +import CgProf ( emitSetCCC ) +import CgHeapery ( layOutDynConstr ) +import CgBindery ( getArgAmodes, getArgAmode, CgIdInfo, + nukeDeadBindings, addBindC, addBindsC ) +import CgCase ( cgCase, saveVolatileVarsAndRegs ) +import CgClosure ( cgRhsClosure, cgStdRhsClosure ) +import CgCon ( buildDynCon, cgReturnDataCon ) +import CgLetNoEscape ( cgLetNoEscapeClosure ) +import CgCallConv ( dataReturnConvPrim ) +import CgTailCall +import CgInfoTbls ( emitDirectReturnInstr ) +import CgForeignCall ( emitForeignCall, shimForeignCallArg ) +import CgPrimOp ( cgPrimOp ) +import CgUtils ( addIdReps, newTemp, assignTemp, cgLit, tagToClosure ) +import ClosureInfo ( mkSelectorLFInfo, mkApLFInfo ) +import Cmm ( CmmExpr(..), CmmStmt(..), CmmReg, nodeReg ) +import MachOp ( wordRep, MachHint ) +import VarSet +import Literal ( literalType ) +import PrimOp ( primOpOutOfLine, getPrimOpResultInfo, + PrimOp(..), PrimOpResultInfo(..) ) +import Id ( Id ) +import TyCon ( isUnboxedTupleTyCon, isEnumerationTyCon ) +import Type ( Type, tyConAppArgs, tyConAppTyCon, repType, + PrimRep(VoidRep) ) +import Maybes ( maybeToBool ) +import ListSetOps ( assocMaybe ) +import BasicTypes ( RecFlag(..) ) +import Util ( lengthIs ) +import Outputable +\end{code} + +This module provides the support code for @StgToAbstractC@ to deal +with STG {\em expressions}. See also @CgClosure@, which deals +with closures, and @CgCon@, which deals with constructors. + +\begin{code} +cgExpr :: StgExpr -- input + -> Code -- output +\end{code} + +%******************************************************** +%* * +%* Tail calls * +%* * +%******************************************************** + +``Applications'' mean {\em tail calls}, a service provided by module +@CgTailCall@. This includes literals, which show up as +@(STGApp (StgLitArg 42) [])@. + +\begin{code} +cgExpr (StgApp fun args) = cgTailCall fun args +\end{code} + +%******************************************************** +%* * +%* STG ConApps (for inline versions) * +%* * +%******************************************************** + +\begin{code} +cgExpr (StgConApp con args) + = do { amodes <- getArgAmodes args + ; cgReturnDataCon con amodes } +\end{code} + +Literals are similar to constructors; they return by putting +themselves in an appropriate register and returning to the address on +top of the stack. + +\begin{code} +cgExpr (StgLit lit) + = do { cmm_lit <- cgLit lit + ; performPrimReturn rep (CmmLit cmm_lit) } + where + rep = typeCgRep (literalType lit) +\end{code} + + +%******************************************************** +%* * +%* PrimOps and foreign calls. +%* * +%******************************************************** + +NOTE about "safe" foreign calls: a safe foreign call is never compiled +inline in a case expression. When we see + + case (ccall ...) of { ... } + +We generate a proper return address for the alternatives and push the +stack frame before doing the call, so that in the event that the call +re-enters the RTS the stack is in a sane state. + +\begin{code} +cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do + {- + First, copy the args into temporaries. We're going to push + a return address right before doing the call, so the args + must be out of the way. + -} + reps_n_amodes <- getArgAmodes stg_args + let + -- Get the *non-void* args, and jiggle them with shimForeignCall + arg_exprs = [ shimForeignCallArg stg_arg expr + | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, + nonVoidArg rep] + + -- in + arg_tmps <- mapM assignTemp arg_exprs + let + arg_hints = zip arg_tmps (map (typeHint.stgArgType) stg_args) + -- in + {- + Now, allocate some result regs. + -} + (res_reps,res_regs,res_hints) <- newUnboxedTupleRegs res_ty + ccallReturnUnboxedTuple (zip res_reps (map CmmReg res_regs)) $ + emitForeignCall (zip res_regs res_hints) fcall + arg_hints emptyVarSet{-no live vars-} + +-- tagToEnum# is special: we need to pull the constructor out of the table, +-- and perform an appropriate return. + +cgExpr (StgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty) + = ASSERT(isEnumerationTyCon tycon) + do { (_,amode) <- getArgAmode arg + ; amode' <- assignTemp amode -- We're going to use it twice, + -- so save in a temp if non-trivial + ; hmods <- getHomeModules + ; stmtC (CmmAssign nodeReg (tagToClosure hmods tycon amode')) + ; performReturn (emitAlgReturnCode tycon amode') } + where + -- If you're reading this code in the attempt to figure + -- out why the compiler panic'ed here, it is probably because + -- you used tagToEnum# in a non-monomorphic setting, e.g., + -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x# + -- That won't work. + tycon = tyConAppTyCon res_ty + + +cgExpr x@(StgOpApp op@(StgPrimOp primop) args res_ty) + | primOpOutOfLine primop + = tailCallPrimOp primop args + + | ReturnsPrim VoidRep <- result_info + = do cgPrimOp [] primop args emptyVarSet + performReturn emitDirectReturnInstr + + | ReturnsPrim rep <- result_info + = do cgPrimOp [dataReturnConvPrim (primRepToCgRep rep)] + primop args emptyVarSet + performReturn emitDirectReturnInstr + + | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon + = do (reps, regs, _hints) <- newUnboxedTupleRegs res_ty + cgPrimOp regs primop args emptyVarSet{-no live vars-} + returnUnboxedTuple (zip reps (map CmmReg regs)) + + | ReturnsAlg tycon <- result_info, isEnumerationTyCon tycon + -- c.f. cgExpr (...TagToEnumOp...) + = do tag_reg <- newTemp wordRep + hmods <- getHomeModules + cgPrimOp [tag_reg] primop args emptyVarSet + stmtC (CmmAssign nodeReg (tagToClosure hmods tycon (CmmReg tag_reg))) + performReturn (emitAlgReturnCode tycon (CmmReg tag_reg)) + where + result_info = getPrimOpResultInfo primop +\end{code} + +%******************************************************** +%* * +%* Case expressions * +%* * +%******************************************************** +Case-expression conversion is complicated enough to have its own +module, @CgCase@. +\begin{code} + +cgExpr (StgCase expr live_vars save_vars bndr srt alt_type alts) + = cgCase expr live_vars save_vars bndr srt alt_type alts +\end{code} + + +%******************************************************** +%* * +%* Let and letrec * +%* * +%******************************************************** +\subsection[let-and-letrec-codegen]{Converting @StgLet@ and @StgLetrec@} + +\begin{code} +cgExpr (StgLet (StgNonRec name rhs) expr) + = cgRhs name rhs `thenFC` \ (name, info) -> + addBindC name info `thenC` + cgExpr expr + +cgExpr (StgLet (StgRec pairs) expr) + = fixC (\ new_bindings -> addBindsC new_bindings `thenC` + listFCs [ cgRhs b e | (b,e) <- pairs ] + ) `thenFC` \ new_bindings -> + + addBindsC new_bindings `thenC` + cgExpr expr +\end{code} + +\begin{code} +cgExpr (StgLetNoEscape live_in_whole_let live_in_rhss bindings body) + = do { -- Figure out what volatile variables to save + ; nukeDeadBindings live_in_whole_let + ; (save_assts, rhs_eob_info, maybe_cc_slot) + <- saveVolatileVarsAndRegs live_in_rhss + + -- Save those variables right now! + ; emitStmts save_assts + + -- Produce code for the rhss + -- and add suitable bindings to the environment + ; cgLetNoEscapeBindings live_in_rhss rhs_eob_info + maybe_cc_slot bindings + + -- Do the body + ; setEndOfBlockInfo rhs_eob_info (cgExpr body) } +\end{code} + + +%******************************************************** +%* * +%* SCC Expressions * +%* * +%******************************************************** + +SCC expressions are treated specially. They set the current cost +centre. + +\begin{code} +cgExpr (StgSCC cc expr) = do emitSetCCC cc; cgExpr expr +\end{code} + +%******************************************************** +%* * +%* Non-top-level bindings * +%* * +%******************************************************** +\subsection[non-top-level-bindings]{Converting non-top-level bindings} + +We rely on the support code in @CgCon@ (to do constructors) and +in @CgClosure@ (to do closures). + +\begin{code} +cgRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo) + -- the Id is passed along so a binding can be set up + +cgRhs name (StgRhsCon maybe_cc con args) + = do { amodes <- getArgAmodes args + ; idinfo <- buildDynCon name maybe_cc con amodes + ; returnFC (name, idinfo) } + +cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) + = do hmods <- getHomeModules + mkRhsClosure hmods name cc bi srt fvs upd_flag args body +\end{code} + +mkRhsClosure looks for two special forms of the right-hand side: + a) selector thunks. + b) AP thunks + +If neither happens, it just calls mkClosureLFInfo. You might think +that mkClosureLFInfo should do all this, but it seems wrong for the +latter to look at the structure of an expression + +Selectors +~~~~~~~~~ +We look at the body of the closure to see if it's a selector---turgid, +but nothing deep. We are looking for a closure of {\em exactly} the +form: + +... = [the_fv] \ u [] -> + case the_fv of + con a_1 ... a_n -> a_i + + +\begin{code} +mkRhsClosure hmods bndr cc bi srt + [the_fv] -- Just one free var + upd_flag -- Updatable thunk + [] -- A thunk + body@(StgCase (StgApp scrutinee [{-no args-}]) + _ _ _ _ -- ignore uniq, etc. + (AlgAlt tycon) + [(DataAlt con, params, use_mask, + (StgApp selectee [{-no args-}]))]) + | the_fv == scrutinee -- Scrutinee is the only free variable + && maybeToBool maybe_offset -- Selectee is a component of the tuple + && offset_into_int <= mAX_SPEC_SELECTEE_SIZE -- Offset is small enough + = -- NOT TRUE: ASSERT(is_single_constructor) + -- The simplifier may have statically determined that the single alternative + -- is the only possible case and eliminated the others, even if there are + -- other constructors in the datatype. It's still ok to make a selector + -- thunk in this case, because we *know* which constructor the scrutinee + -- will evaluate to. + cgStdRhsClosure bndr cc bi [the_fv] [] body lf_info [StgVarArg the_fv] + where + lf_info = mkSelectorLFInfo bndr offset_into_int + (isUpdatable upd_flag) + (_, params_w_offsets) = layOutDynConstr hmods con (addIdReps params) + -- Just want the layout + maybe_offset = assocMaybe params_w_offsets selectee + Just the_offset = maybe_offset + offset_into_int = the_offset - fixedHdrSize +\end{code} + +Ap thunks +~~~~~~~~~ + +A more generic AP thunk of the form + + x = [ x_1...x_n ] \.. [] -> x_1 ... x_n + +A set of these is compiled statically into the RTS, so we just use +those. We could extend the idea to thunks where some of the x_i are +global ids (and hence not free variables), but this would entail +generating a larger thunk. It might be an option for non-optimising +compilation, though. + +We only generate an Ap thunk if all the free variables are pointers, +for semi-obvious reasons. + +\begin{code} +mkRhsClosure hmods bndr cc bi srt + fvs + upd_flag + [] -- No args; a thunk + body@(StgApp fun_id args) + + | args `lengthIs` (arity-1) + && all isFollowableArg (map idCgRep fvs) + && isUpdatable upd_flag + && arity <= mAX_SPEC_AP_SIZE + + -- Ha! an Ap thunk + = cgStdRhsClosure bndr cc bi fvs [] body lf_info payload + + where + lf_info = mkApLFInfo bndr upd_flag arity + -- the payload has to be in the correct order, hence we can't + -- just use the fvs. + payload = StgVarArg fun_id : args + arity = length fvs +\end{code} + +The default case +~~~~~~~~~~~~~~~~ +\begin{code} +mkRhsClosure hmods bndr cc bi srt fvs upd_flag args body + = cgRhsClosure bndr cc bi srt fvs upd_flag args body +\end{code} + + +%******************************************************** +%* * +%* Let-no-escape bindings +%* * +%******************************************************** +\begin{code} +cgLetNoEscapeBindings live_in_rhss rhs_eob_info maybe_cc_slot + (StgNonRec binder rhs) + = do { (binder,info) <- cgLetNoEscapeRhs live_in_rhss rhs_eob_info + maybe_cc_slot + NonRecursive binder rhs + ; addBindC binder info } + +cgLetNoEscapeBindings live_in_rhss rhs_eob_info maybe_cc_slot (StgRec pairs) + = do { new_bindings <- fixC (\ new_bindings -> do + { addBindsC new_bindings + ; listFCs [ cgLetNoEscapeRhs full_live_in_rhss + rhs_eob_info maybe_cc_slot Recursive b e + | (b,e) <- pairs ] }) + + ; addBindsC new_bindings } + where + -- We add the binders to the live-in-rhss set so that we don't + -- delete the bindings for the binder from the environment! + full_live_in_rhss = live_in_rhss `unionVarSet` (mkVarSet [b | (b,r) <- pairs]) + +cgLetNoEscapeRhs + :: StgLiveVars -- Live in rhss + -> EndOfBlockInfo + -> Maybe VirtualSpOffset + -> RecFlag + -> Id + -> StgRhs + -> FCode (Id, CgIdInfo) + +cgLetNoEscapeRhs full_live_in_rhss rhs_eob_info maybe_cc_slot rec binder + (StgRhsClosure cc bi _ upd_flag srt args body) + = -- We could check the update flag, but currently we don't switch it off + -- for let-no-escaped things, so we omit the check too! + -- case upd_flag of + -- Updatable -> panic "cgLetNoEscapeRhs" -- Nothing to update! + -- other -> cgLetNoEscapeClosure binder cc bi live_in_whole_let live_in_rhss args body + cgLetNoEscapeClosure binder cc bi srt full_live_in_rhss rhs_eob_info + maybe_cc_slot rec args body + +-- For a constructor RHS we want to generate a single chunk of code which +-- can be jumped to from many places, which will return the constructor. +-- It's easy; just behave as if it was an StgRhsClosure with a ConApp inside! +cgLetNoEscapeRhs full_live_in_rhss rhs_eob_info maybe_cc_slot rec binder + (StgRhsCon cc con args) + = cgLetNoEscapeClosure binder cc noBinderInfo{-safe-} NoSRT + full_live_in_rhss rhs_eob_info maybe_cc_slot rec + [] --No args; the binder is data structure, not a function + (StgConApp con args) +\end{code} + +Little helper for primitives that return unboxed tuples. + +\begin{code} +newUnboxedTupleRegs :: Type -> FCode ([CgRep], [CmmReg], [MachHint]) +newUnboxedTupleRegs res_ty = + let + ty_args = tyConAppArgs (repType res_ty) + (reps,hints) = unzip [ (rep, typeHint ty) | ty <- ty_args, + let rep = typeCgRep ty, + nonVoidArg rep ] + in do + regs <- mapM (newTemp . argMachRep) reps + return (reps,regs,hints) +\end{code} diff --git a/compiler/codeGen/CgExpr.lhs-boot b/compiler/codeGen/CgExpr.lhs-boot new file mode 100644 index 0000000000..29cdc3a605 --- /dev/null +++ b/compiler/codeGen/CgExpr.lhs-boot @@ -0,0 +1,7 @@ +\begin{code} +module CgExpr where +import StgSyn( StgExpr ) +import CgMonad( Code ) + +cgExpr :: StgExpr -> Code +\end{code} diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs new file mode 100644 index 0000000000..10f41bdf8b --- /dev/null +++ b/compiler/codeGen/CgForeignCall.hs @@ -0,0 +1,256 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for foreign calls. +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgForeignCall ( + cgForeignCall, + emitForeignCall, + emitForeignCall', + shimForeignCallArg, + emitSaveThreadState, -- will be needed by the Cmm parser + emitLoadThreadState, -- ditto + emitCloseNursery, + emitOpenNursery, + ) where + +#include "HsVersions.h" + +import StgSyn ( StgLiveVars, StgArg, stgArgType ) +import CgProf ( curCCS, curCCSAddr ) +import CgBindery ( getVolatileRegs, getArgAmodes ) +import CgMonad +import CgUtils ( cmmOffsetW, cmmOffsetB, cmmLoadIndexW, newTemp, + assignTemp ) +import Type ( tyConAppTyCon, repType ) +import TysPrim +import CLabel ( mkForeignLabel, mkRtsCodeLabel ) +import Cmm +import CmmUtils +import MachOp +import SMRep +import ForeignCall +import Constants +import StaticFlags ( opt_SccProfilingOn ) +import Outputable + +import Monad ( when ) + +-- ----------------------------------------------------------------------------- +-- Code generation for Foreign Calls + +cgForeignCall + :: [(CmmReg,MachHint)] -- where to put the results + -> ForeignCall -- the op + -> [StgArg] -- arguments + -> StgLiveVars -- live vars, in case we need to save them + -> Code +cgForeignCall results fcall stg_args live + = do + reps_n_amodes <- getArgAmodes stg_args + let + -- Get the *non-void* args, and jiggle them with shimForeignCall + arg_exprs = [ shimForeignCallArg stg_arg expr + | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, + nonVoidArg rep] + + arg_hints = zip arg_exprs (map (typeHint.stgArgType) stg_args) + -- in + emitForeignCall results fcall arg_hints live + + +emitForeignCall + :: [(CmmReg,MachHint)] -- where to put the results + -> ForeignCall -- the op + -> [(CmmExpr,MachHint)] -- arguments + -> StgLiveVars -- live vars, in case we need to save them + -> Code + +emitForeignCall results (CCall (CCallSpec target cconv safety)) args live + = do vols <- getVolatileRegs live + emitForeignCall' safety results + (CmmForeignCall cmm_target cconv) call_args (Just vols) + where + (call_args, cmm_target) + = case target of + StaticTarget lbl -> (args, CmmLit (CmmLabel + (mkForeignLabel lbl call_size False))) + DynamicTarget -> case args of (fn,_):rest -> (rest, fn) + + -- in the stdcall calling convention, the symbol needs @size appended + -- to it, where size is the total number of bytes of arguments. We + -- attach this info to the CLabel here, and the CLabel pretty printer + -- will generate the suffix when the label is printed. + call_size + | StdCallConv <- cconv = Just (sum (map (arg_size.cmmExprRep.fst) args)) + | otherwise = Nothing + + -- ToDo: this might not be correct for 64-bit API + arg_size rep = max (machRepByteWidth rep) wORD_SIZE + +emitForeignCall results (DNCall _) args live + = panic "emitForeignCall: DNCall" + + +-- alternative entry point, used by CmmParse +emitForeignCall' + :: Safety + -> [(CmmReg,MachHint)] -- where to put the results + -> CmmCallTarget -- the op + -> [(CmmExpr,MachHint)] -- arguments + -> Maybe [GlobalReg] -- live vars, in case we need to save them + -> Code +emitForeignCall' safety results target args vols + | not (playSafe safety) = do + temp_args <- load_args_into_temps args + stmtC (CmmCall target results temp_args vols) + + | otherwise = do + id <- newTemp wordRep + temp_args <- load_args_into_temps args + emitSaveThreadState + stmtC (CmmCall (CmmForeignCall suspendThread CCallConv) + [(id,PtrHint)] + [ (CmmReg (CmmGlobal BaseReg), PtrHint) ] + vols + ) + stmtC (CmmCall target results temp_args vols) + stmtC (CmmCall (CmmForeignCall resumeThread CCallConv) + [ (CmmGlobal BaseReg, PtrHint) ] + -- Assign the result to BaseReg: we + -- might now have a different + -- Capability! + [ (CmmReg id, PtrHint) ] + vols + ) + emitLoadThreadState + + +suspendThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("suspendThread"))) +resumeThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("resumeThread"))) + + +-- we might need to load arguments into temporaries before +-- making the call, because certain global registers might +-- overlap with registers that the C calling convention uses +-- for passing arguments. +-- +-- This is a HACK; really it should be done in the back end, but +-- it's easier to generate the temporaries here. +load_args_into_temps args = mapM maybe_assignTemp args + +maybe_assignTemp (e, hint) + | hasNoGlobalRegs e = return (e, hint) + | otherwise = do + -- don't use assignTemp, it uses its own notion of "trivial" + -- expressions, which are wrong here + reg <- newTemp (cmmExprRep e) + stmtC (CmmAssign reg e) + return (CmmReg reg, hint) + +-- ----------------------------------------------------------------------------- +-- Save/restore the thread state in the TSO + +-- This stuff can't be done in suspendThread/resumeThread, because it +-- refers to global registers which aren't available in the C world. + +emitSaveThreadState = do + -- CurrentTSO->sp = Sp; + stmtC $ CmmStore (cmmOffset stgCurrentTSO tso_SP) stgSp + emitCloseNursery + -- and save the current cost centre stack in the TSO when profiling: + when opt_SccProfilingOn $ + stmtC (CmmStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS) + + -- CurrentNursery->free = Hp+1; +emitCloseNursery = stmtC $ CmmStore nursery_bdescr_free (cmmOffsetW stgHp 1) + +emitLoadThreadState = do + tso <- newTemp wordRep + stmtsC [ + -- tso = CurrentTSO; + CmmAssign tso stgCurrentTSO, + -- Sp = tso->sp; + CmmAssign sp (CmmLoad (cmmOffset (CmmReg tso) tso_SP) + wordRep), + -- SpLim = tso->stack + RESERVED_STACK_WORDS; + CmmAssign spLim (cmmOffsetW (cmmOffset (CmmReg tso) tso_STACK) + rESERVED_STACK_WORDS) + ] + emitOpenNursery + -- and load the current cost centre stack from the TSO when profiling: + when opt_SccProfilingOn $ + stmtC (CmmStore curCCSAddr + (CmmLoad (cmmOffset (CmmReg tso) tso_CCCS) wordRep)) + +emitOpenNursery = stmtsC [ + -- Hp = CurrentNursery->free - 1; + CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free wordRep) (-1)), + + -- HpLim = CurrentNursery->start + + -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; + CmmAssign hpLim + (cmmOffsetExpr + (CmmLoad nursery_bdescr_start wordRep) + (cmmOffset + (CmmMachOp mo_wordMul [ + CmmMachOp (MO_S_Conv I32 wordRep) + [CmmLoad nursery_bdescr_blocks I32], + CmmLit (mkIntCLit bLOCK_SIZE) + ]) + (-1) + ) + ) + ] + + +nursery_bdescr_free = cmmOffset stgCurrentNursery oFFSET_bdescr_free +nursery_bdescr_start = cmmOffset stgCurrentNursery oFFSET_bdescr_start +nursery_bdescr_blocks = cmmOffset stgCurrentNursery oFFSET_bdescr_blocks + +tso_SP = tsoFieldB oFFSET_StgTSO_sp +tso_STACK = tsoFieldB oFFSET_StgTSO_stack +tso_CCCS = tsoProfFieldB oFFSET_StgTSO_CCCS + +-- The TSO struct has a variable header, and an optional StgTSOProfInfo in +-- the middle. The fields we're interested in are after the StgTSOProfInfo. +tsoFieldB :: ByteOff -> ByteOff +tsoFieldB off + | opt_SccProfilingOn = off + sIZEOF_StgTSOProfInfo + fixedHdrSize * wORD_SIZE + | otherwise = off + fixedHdrSize * wORD_SIZE + +tsoProfFieldB :: ByteOff -> ByteOff +tsoProfFieldB off = off + fixedHdrSize * wORD_SIZE + +stgSp = CmmReg sp +stgHp = CmmReg hp +stgCurrentTSO = CmmReg currentTSO +stgCurrentNursery = CmmReg currentNursery + +sp = CmmGlobal Sp +spLim = CmmGlobal SpLim +hp = CmmGlobal Hp +hpLim = CmmGlobal HpLim +currentTSO = CmmGlobal CurrentTSO +currentNursery = CmmGlobal CurrentNursery + +-- ----------------------------------------------------------------------------- +-- For certain types passed to foreign calls, we adjust the actual +-- value passed to the call. For ByteArray#/Array# we pass the +-- address of the actual array, not the address of the heap object. + +shimForeignCallArg :: StgArg -> CmmExpr -> CmmExpr +shimForeignCallArg arg expr + | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon + = cmmOffsetB expr arrPtrsHdrSize + + | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon + = cmmOffsetB expr arrWordsHdrSize + + | otherwise = expr + where + -- should be a tycon app, since this is a foreign call + tycon = tyConAppTyCon (repType (stgArgType arg)) diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs new file mode 100644 index 0000000000..184af904df --- /dev/null +++ b/compiler/codeGen/CgHeapery.lhs @@ -0,0 +1,588 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgHeapery.lhs,v 1.47 2005/06/21 10:44:41 simonmar Exp $ +% +\section[CgHeapery]{Heap management functions} + +\begin{code} +module CgHeapery ( + initHeapUsage, getVirtHp, setVirtHp, setRealHp, + getHpRelOffset, hpRel, + + funEntryChecks, thunkEntryChecks, + altHeapCheck, unbxTupleHeapCheck, + hpChkGen, hpChkNodePointsAssignSp0, + stkChkGen, stkChkNodePoints, + + layOutDynConstr, layOutStaticConstr, + mkVirtHeapOffsets, mkStaticClosureFields, mkStaticClosure, + + allocDynClosure, emitSetDynHdr + ) where + +#include "HsVersions.h" + +import StgSyn ( AltType(..) ) +import CLabel ( CLabel, mkRtsCodeLabel ) +import CgUtils ( mkWordCLit, cmmRegOffW, cmmOffsetW, + cmmOffsetExprB ) +import CgMonad +import CgProf ( staticProfHdr, profDynAlloc, dynProfHdr ) +import CgTicky ( staticTickyHdr, tickyDynAlloc, tickyAllocHeap ) +import CgParallel ( staticGranHdr, staticParHdr, doGranAllocate ) +import CgStackery ( getFinalStackHW, getRealSp ) +import CgCallConv ( mkRegLiveness ) +import ClosureInfo ( closureSize, staticClosureNeedsLink, + mkConInfo, closureNeedsUpdSpace, + infoTableLabelFromCI, closureLabelFromCI, + nodeMustPointToIt, closureLFInfo, + ClosureInfo ) +import SMRep ( CgRep(..), cgRepSizeW, separateByPtrFollowness, + WordOff, fixedHdrSize, thunkHdrSize, + isVoidArg, primRepToCgRep ) + +import Cmm ( CmmLit(..), CmmStmt(..), CmmExpr(..), GlobalReg(..), + CmmReg(..), hpReg, nodeReg, spReg ) +import MachOp ( mo_wordULt, mo_wordUGt, mo_wordSub ) +import CmmUtils ( mkIntCLit, CmmStmts, noStmts, oneStmt, plusStmts, + mkStmts ) +import Id ( Id ) +import DataCon ( DataCon ) +import TyCon ( tyConPrimRep ) +import CostCentre ( CostCentreStack ) +import Util ( mapAccumL, filterOut ) +import Constants ( wORD_SIZE ) +import Packages ( HomeModules ) +import Outputable + +\end{code} + + +%************************************************************************ +%* * +\subsection[CgUsages-heapery]{Monad things for fiddling with heap usage} +%* * +%************************************************************************ + +The heap always grows upwards, so hpRel is easy + +\begin{code} +hpRel :: VirtualHpOffset -- virtual offset of Hp + -> VirtualHpOffset -- virtual offset of The Thing + -> WordOff -- integer word offset +hpRel hp off = off - hp +\end{code} + +@initHeapUsage@ applies a function to the amount of heap that it uses. +It initialises the heap usage to zeros, and passes on an unchanged +heap usage. + +It is usually a prelude to performing a GC check, so everything must +be in a tidy and consistent state. + +rje: Note the slightly suble fixed point behaviour needed here + +\begin{code} +initHeapUsage :: (VirtualHpOffset -> Code) -> Code +initHeapUsage fcode + = do { orig_hp_usage <- getHpUsage + ; setHpUsage initHpUsage + ; fixC (\heap_usage2 -> do + { fcode (heapHWM heap_usage2) + ; getHpUsage }) + ; setHpUsage orig_hp_usage } + +setVirtHp :: VirtualHpOffset -> Code +setVirtHp new_virtHp + = do { hp_usage <- getHpUsage + ; setHpUsage (hp_usage {virtHp = new_virtHp}) } + +getVirtHp :: FCode VirtualHpOffset +getVirtHp + = do { hp_usage <- getHpUsage + ; return (virtHp hp_usage) } + +setRealHp :: VirtualHpOffset -> Code +setRealHp new_realHp + = do { hp_usage <- getHpUsage + ; setHpUsage (hp_usage {realHp = new_realHp}) } + +getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr +getHpRelOffset virtual_offset + = do { hp_usg <- getHpUsage + ; return (cmmRegOffW hpReg (hpRel (realHp hp_usg) virtual_offset)) } +\end{code} + + +%************************************************************************ +%* * + Layout of heap objects +%* * +%************************************************************************ + +\begin{code} +layOutDynConstr, layOutStaticConstr + :: HomeModules + -> DataCon + -> [(CgRep,a)] + -> (ClosureInfo, + [(a,VirtualHpOffset)]) + +layOutDynConstr = layOutConstr False +layOutStaticConstr = layOutConstr True + +layOutConstr is_static hmods data_con args + = (mkConInfo hmods is_static data_con tot_wds ptr_wds, + things_w_offsets) + where + (tot_wds, -- #ptr_wds + #nonptr_wds + ptr_wds, -- #ptr_wds + things_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} args +\end{code} + +@mkVirtHeapOffsets@ always returns boxed things with smaller offsets +than the unboxed things, and furthermore, the offsets in the result +list + +\begin{code} +mkVirtHeapOffsets + :: Bool -- True <=> is a thunk + -> [(CgRep,a)] -- Things to make offsets for + -> (WordOff, -- _Total_ number of words allocated + WordOff, -- Number of words allocated for *pointers* + [(a, VirtualHpOffset)]) + -- Things with their offsets from start of + -- object in order of increasing offset + +-- First in list gets lowest offset, which is initial offset + 1. + +mkVirtHeapOffsets is_thunk things + = let non_void_things = filterOut (isVoidArg . fst) things + (ptrs, non_ptrs) = separateByPtrFollowness non_void_things + (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs + (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs + in + (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets) + where + hdr_size | is_thunk = thunkHdrSize + | otherwise = fixedHdrSize + + computeOffset wds_so_far (rep, thing) + = (wds_so_far + cgRepSizeW rep, (thing, hdr_size + wds_so_far)) +\end{code} + + +%************************************************************************ +%* * + Lay out a static closure +%* * +%************************************************************************ + +Make a static closure, adding on any extra padding needed for CAFs, +and adding a static link field if necessary. + +\begin{code} +mkStaticClosureFields + :: ClosureInfo + -> CostCentreStack + -> Bool -- Has CAF refs + -> [CmmLit] -- Payload + -> [CmmLit] -- The full closure +mkStaticClosureFields cl_info ccs caf_refs payload + = mkStaticClosure info_lbl ccs payload padding_wds + static_link_field saved_info_field + where + info_lbl = infoTableLabelFromCI cl_info + + -- CAFs must have consistent layout, regardless of whether they + -- are actually updatable or not. The layout of a CAF is: + -- + -- 3 saved_info + -- 2 static_link + -- 1 indirectee + -- 0 info ptr + -- + -- the static_link and saved_info fields must always be in the same + -- place. So we use closureNeedsUpdSpace rather than + -- closureUpdReqd here: + + is_caf = closureNeedsUpdSpace cl_info + + padding_wds + | not is_caf = [] + | otherwise = ASSERT(null payload) [mkIntCLit 0] + + static_link_field + | is_caf || staticClosureNeedsLink cl_info = [static_link_value] + | otherwise = [] + + saved_info_field + | is_caf = [mkIntCLit 0] + | otherwise = [] + + -- for a static constructor which has NoCafRefs, we set the + -- static link field to a non-zero value so the garbage + -- collector will ignore it. + static_link_value + | caf_refs = mkIntCLit 0 + | otherwise = mkIntCLit 1 + + +mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit] + -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] +mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field + = [CmmLabel info_lbl] + ++ variable_header_words + ++ payload + ++ padding_wds + ++ static_link_field + ++ saved_info_field + where + variable_header_words + = staticGranHdr + ++ staticParHdr + ++ staticProfHdr ccs + ++ staticTickyHdr +\end{code} + +%************************************************************************ +%* * +\subsection[CgHeapery-heap-overflow]{Heap overflow checking} +%* * +%************************************************************************ + +The new code for heapChecks. For GrAnSim the code for doing a heap check +and doing a context switch has been separated. Especially, the HEAP_CHK +macro only performs a heap check. THREAD_CONTEXT_SWITCH should be used for +doing a context switch. GRAN_FETCH_AND_RESCHEDULE must be put at the +beginning of every slow entry code in order to simulate the fetching of +closures. If fetching is necessary (i.e. current closure is not local) then +an automatic context switch is done. + +-------------------------------------------------------------- +A heap/stack check at a function or thunk entry point. + +\begin{code} +funEntryChecks :: ClosureInfo -> CmmStmts -> Code -> Code +funEntryChecks cl_info reg_save_code code + = hpStkCheck cl_info True reg_save_code code + +thunkEntryChecks :: ClosureInfo -> Code -> Code +thunkEntryChecks cl_info code + = hpStkCheck cl_info False noStmts code + +hpStkCheck :: ClosureInfo -- Function closure + -> Bool -- Is a function? (not a thunk) + -> CmmStmts -- Register saves + -> Code + -> Code + +hpStkCheck cl_info is_fun reg_save_code code + = getFinalStackHW $ \ spHw -> do + { sp <- getRealSp + ; let stk_words = spHw - sp + ; initHeapUsage $ \ hpHw -> do + { -- Emit heap checks, but be sure to do it lazily so + -- that the conditionals on hpHw don't cause a black hole + codeOnly $ do + { do_checks stk_words hpHw full_save_code rts_label + ; tickyAllocHeap hpHw } + ; setRealHp hpHw + ; code } + } + where + node_asst + | nodeMustPointToIt (closureLFInfo cl_info) + = noStmts + | otherwise + = oneStmt (CmmAssign nodeReg (CmmLit (CmmLabel closure_lbl))) + closure_lbl = closureLabelFromCI cl_info + + full_save_code = node_asst `plusStmts` reg_save_code + + rts_label | is_fun = CmmReg (CmmGlobal GCFun) + -- Function entry point + | otherwise = CmmReg (CmmGlobal GCEnter1) + -- Thunk or case return + -- In the thunk/case-return case, R1 points to a closure + -- which should be (re)-entered after GC +\end{code} + +Heap checks in a case alternative are nice and easy, provided this is +a bog-standard algebraic case. We have in our hand: + + * one return address, on the stack, + * one return value, in Node. + +the canned code for this heap check failure just pushes Node on the +stack, saying 'EnterGHC' to return. The scheduler will return by +entering the top value on the stack, which in turn will return through +the return address, getting us back to where we were. This is +therefore only valid if the return value is *lifted* (just being +boxed isn't good enough). + +For primitive returns, we have an unlifted value in some register +(either R1 or FloatReg1 or DblReg1). This means using specialised +heap-check code for these cases. + +\begin{code} +altHeapCheck + :: AltType -- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt + -- (Unboxed tuples are dealt with by ubxTupleHeapCheck) + -> Code -- Continuation + -> Code +altHeapCheck alt_type code + = initHeapUsage $ \ hpHw -> do + { codeOnly $ do + { do_checks 0 {- no stack chk -} hpHw + noStmts {- nothign to save -} + (rts_label alt_type) + ; tickyAllocHeap hpHw } + ; setRealHp hpHw + ; code } + where + rts_label PolyAlt = CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "stg_gc_unpt_r1"))) + -- Do *not* enter R1 after a heap check in + -- a polymorphic case. It might be a function + -- and the entry code for a function (currently) + -- applies it + -- + -- However R1 is guaranteed to be a pointer + + rts_label (AlgAlt tc) = stg_gc_enter1 + -- Enter R1 after the heap check; it's a pointer + + rts_label (PrimAlt tc) + = CmmLit $ CmmLabel $ + case primRepToCgRep (tyConPrimRep tc) of + VoidArg -> mkRtsCodeLabel SLIT( "stg_gc_noregs") + FloatArg -> mkRtsCodeLabel SLIT( "stg_gc_f1") + DoubleArg -> mkRtsCodeLabel SLIT( "stg_gc_d1") + LongArg -> mkRtsCodeLabel SLIT( "stg_gc_l1") + -- R1 is boxed but unlifted: + PtrArg -> mkRtsCodeLabel SLIT( "stg_gc_unpt_r1") + -- R1 is unboxed: + NonPtrArg -> mkRtsCodeLabel SLIT( "stg_gc_unbx_r1") + + rts_label (UbxTupAlt _) = panic "altHeapCheck" +\end{code} + + +Unboxed tuple alternatives and let-no-escapes (the two most annoying +constructs to generate code for!) For unboxed tuple returns, there +are an arbitrary number of possibly unboxed return values, some of +which will be in registers, and the others will be on the stack. We +always organise the stack-resident fields into pointers & +non-pointers, and pass the number of each to the heap check code. + +\begin{code} +unbxTupleHeapCheck + :: [(Id, GlobalReg)] -- Live registers + -> WordOff -- no. of stack slots containing ptrs + -> WordOff -- no. of stack slots containing nonptrs + -> CmmStmts -- code to insert in the failure path + -> Code + -> Code + +unbxTupleHeapCheck regs ptrs nptrs fail_code code + -- We can't manage more than 255 pointers/non-pointers + -- in a generic heap check. + | ptrs > 255 || nptrs > 255 = panic "altHeapCheck" + | otherwise + = initHeapUsage $ \ hpHw -> do + { codeOnly $ do { do_checks 0 {- no stack check -} hpHw + full_fail_code rts_label + ; tickyAllocHeap hpHw } + ; setRealHp hpHw + ; code } + where + full_fail_code = fail_code `plusStmts` oneStmt assign_liveness + assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho! + (CmmLit (mkWordCLit liveness)) + liveness = mkRegLiveness regs ptrs nptrs + rts_label = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("stg_gc_ut"))) + +\end{code} + + +%************************************************************************ +%* * + Heap/Stack Checks. +%* * +%************************************************************************ + +When failing a check, we save a return address on the stack and +jump to a pre-compiled code fragment that saves the live registers +and returns to the scheduler. + +The return address in most cases will be the beginning of the basic +block in which the check resides, since we need to perform the check +again on re-entry because someone else might have stolen the resource +in the meantime. + +\begin{code} +do_checks :: WordOff -- Stack headroom + -> WordOff -- Heap headroom + -> CmmStmts -- Assignments to perform on failure + -> CmmExpr -- Rts address to jump to on failure + -> Code +do_checks 0 0 _ _ = nopC +do_checks stk hp reg_save_code rts_lbl + = do_checks' (CmmLit (mkIntCLit (stk*wORD_SIZE))) + (CmmLit (mkIntCLit (hp*wORD_SIZE))) + (stk /= 0) (hp /= 0) reg_save_code rts_lbl + +-- The offsets are now in *bytes* +do_checks' stk_expr hp_expr stk_nonzero hp_nonzero reg_save_code rts_lbl + = do { doGranAllocate hp_expr + + -- Emit a block for the heap-check-failure code + ; blk_id <- forkLabelledCode $ do + { whenC hp_nonzero $ + stmtC (CmmAssign (CmmGlobal HpAlloc) hp_expr) + ; emitStmts reg_save_code + ; stmtC (CmmJump rts_lbl []) } + + -- Check for stack overflow *FIRST*; otherwise + -- we might bumping Hp and then failing stack oflo + ; whenC stk_nonzero + (stmtC (CmmCondBranch stk_oflo blk_id)) + + ; whenC hp_nonzero + (stmtsC [CmmAssign hpReg + (cmmOffsetExprB (CmmReg hpReg) hp_expr), + CmmCondBranch hp_oflo blk_id]) + -- Bump heap pointer, and test for heap exhaustion + -- Note that we don't move the heap pointer unless the + -- stack check succeeds. Otherwise we might end up + -- with slop at the end of the current block, which can + -- confuse the LDV profiler. + } + where + -- Stk overflow if (Sp - stk_bytes < SpLim) + stk_oflo = CmmMachOp mo_wordULt + [CmmMachOp mo_wordSub [CmmReg spReg, stk_expr], + CmmReg (CmmGlobal SpLim)] + + -- Hp overflow if (Hpp > HpLim) + -- (Hp has been incremented by now) + -- HpLim points to the LAST WORD of valid allocation space. + hp_oflo = CmmMachOp mo_wordUGt + [CmmReg hpReg, CmmReg (CmmGlobal HpLim)] +\end{code} + +%************************************************************************ +%* * + Generic Heap/Stack Checks - used in the RTS +%* * +%************************************************************************ + +\begin{code} +hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code +hpChkGen bytes liveness reentry + = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assigns stg_gc_gen + where + assigns = mkStmts [ + CmmAssign (CmmGlobal (VanillaReg 9)) liveness, + CmmAssign (CmmGlobal (VanillaReg 10)) reentry + ] + +-- a heap check where R1 points to the closure to enter on return, and +-- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP). +hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> Code +hpChkNodePointsAssignSp0 bytes sp0 + = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assign stg_gc_enter1 + where assign = oneStmt (CmmStore (CmmReg spReg) sp0) + +stkChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code +stkChkGen bytes liveness reentry + = do_checks' bytes (CmmLit (mkIntCLit 0)) True False assigns stg_gc_gen + where + assigns = mkStmts [ + CmmAssign (CmmGlobal (VanillaReg 9)) liveness, + CmmAssign (CmmGlobal (VanillaReg 10)) reentry + ] + +stkChkNodePoints :: CmmExpr -> Code +stkChkNodePoints bytes + = do_checks' bytes (CmmLit (mkIntCLit 0)) True False noStmts stg_gc_enter1 + +stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("stg_gc_gen"))) +stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1) +\end{code} + +%************************************************************************ +%* * +\subsection[initClosure]{Initialise a dynamic closure} +%* * +%************************************************************************ + +@allocDynClosure@ puts the thing in the heap, and modifies the virtual Hp +to account for this. + +\begin{code} +allocDynClosure + :: ClosureInfo + -> CmmExpr -- Cost Centre to stick in the object + -> CmmExpr -- Cost Centre to blame for this alloc + -- (usually the same; sometimes "OVERHEAD") + + -> [(CmmExpr, VirtualHpOffset)] -- Offsets from start of the object + -- ie Info ptr has offset zero. + -> FCode VirtualHpOffset -- Returns virt offset of object + +allocDynClosure cl_info use_cc blame_cc amodes_with_offsets + = do { virt_hp <- getVirtHp + + -- FIND THE OFFSET OF THE INFO-PTR WORD + ; let info_offset = virt_hp + 1 + -- info_offset is the VirtualHpOffset of the first + -- word of the new object + -- Remember, virtHp points to last allocated word, + -- ie 1 *before* the info-ptr word of new object. + + info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info)) + hdr_w_offsets = initDynHdr info_ptr use_cc `zip` [0..] + + -- SAY WHAT WE ARE ABOUT TO DO + ; profDynAlloc cl_info use_cc + -- ToDo: This is almost certainly wrong + -- We're ignoring blame_cc. But until we've + -- fixed the boxing hack in chooseDynCostCentres etc, + -- we're worried about making things worse by "fixing" + -- this part to use blame_cc! + + ; tickyDynAlloc cl_info + + -- ALLOCATE THE OBJECT + ; base <- getHpRelOffset info_offset + ; hpStore base (hdr_w_offsets ++ amodes_with_offsets) + + -- BUMP THE VIRTUAL HEAP POINTER + ; setVirtHp (virt_hp + closureSize cl_info) + + -- RETURN PTR TO START OF OBJECT + ; returnFC info_offset } + + +initDynHdr :: CmmExpr + -> CmmExpr -- Cost centre to put in object + -> [CmmExpr] +initDynHdr info_ptr cc + = [info_ptr] + -- ToDo: Gransim stuff + -- ToDo: Parallel stuff + ++ dynProfHdr cc + -- No ticky header + +hpStore :: CmmExpr -> [(CmmExpr, VirtualHpOffset)] -> Code +-- Store the item (expr,off) in base[off] +hpStore base es + = stmtsC [ CmmStore (cmmOffsetW base off) val + | (val, off) <- es ] + +emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> Code +emitSetDynHdr base info_ptr ccs + = hpStore base (zip (initDynHdr info_ptr ccs) [0..]) +\end{code} diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs new file mode 100644 index 0000000000..b769950d87 --- /dev/null +++ b/compiler/codeGen/CgInfoTbls.hs @@ -0,0 +1,591 @@ +----------------------------------------------------------------------------- +-- +-- Building info tables. +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgInfoTbls ( + emitClosureCodeAndInfoTable, + emitInfoTableAndCode, + dataConTagZ, + getSRTInfo, + emitDirectReturnTarget, emitAlgReturnTarget, + emitDirectReturnInstr, emitVectoredReturnInstr, + mkRetInfoTable, + mkStdInfoTable, + stdInfoTableSizeB, + mkFunGenInfoExtraBits, + entryCode, closureInfoPtr, + getConstrTag, + infoTable, infoTableClosureType, + infoTablePtrs, infoTableNonPtrs, + funInfoTable, + retVec + ) where + + +#include "HsVersions.h" + +import ClosureInfo ( ClosureInfo, closureTypeDescr, closureName, + infoTableLabelFromCI, Liveness, + closureValDescr, closureSRT, closureSMRep, + closurePtrsSize, closureNonHdrSize, closureFunInfo, + C_SRT(..), needsSRT, isConstrClosure_maybe, + ArgDescr(..) ) +import SMRep ( StgHalfWord, hALF_WORD_SIZE_IN_BITS, hALF_WORD_SIZE, + WordOff, ByteOff, + smRepClosureTypeInt, tablesNextToCode, + rET_BIG, rET_SMALL, rET_VEC_BIG, rET_VEC_SMALL ) +import CgBindery ( getLiveStackSlots ) +import CgCallConv ( isBigLiveness, mkLivenessCLit, buildContLiveness, + argDescrType, getSequelAmode, + CtrlReturnConvention(..) ) +import CgUtils ( mkStringCLit, packHalfWordsCLit, mkWordCLit, + cmmOffsetB, cmmOffsetExprW, cmmLabelOffW, cmmOffsetW, + emitDataLits, emitRODataLits, emitSwitch, cmmNegate, + newTemp ) +import CgMonad + +import CmmUtils ( mkIntCLit, zeroCLit ) +import Cmm ( CmmStmt(..), CmmExpr(..), CmmLit(..), LocalReg, + CmmBasicBlock, nodeReg ) +import MachOp ( MachOp(..), wordRep, halfWordRep ) +import CLabel +import StgSyn ( SRT(..) ) +import Name ( Name ) +import DataCon ( DataCon, dataConTag, fIRST_TAG ) +import Unique ( Uniquable(..) ) +import DynFlags ( DynFlags(..), HscTarget(..) ) +import StaticFlags ( opt_SccProfilingOn ) +import ListSetOps ( assocDefault ) +import Maybes ( isJust ) +import Constants ( wORD_SIZE, sIZEOF_StgFunInfoExtraRev ) +import Outputable + + +------------------------------------------------------------------------- +-- +-- Generating the info table and code for a closure +-- +------------------------------------------------------------------------- + +-- Here we make a concrete info table, represented as a list of CmmAddr +-- (it can't be simply a list of Word, because the SRT field is +-- represented by a label+offset expression). + +-- With tablesNextToCode, the layout is +-- <reversed variable part> +-- <normal forward StgInfoTable, but without +-- an entry point at the front> +-- <code> +-- +-- Without tablesNextToCode, the layout of an info table is +-- <entry label> +-- <normal forward rest of StgInfoTable> +-- <forward variable part> +-- +-- See includes/InfoTables.h + +emitClosureCodeAndInfoTable :: ClosureInfo -> [LocalReg] -> CgStmts -> Code +emitClosureCodeAndInfoTable cl_info args body + = do { ty_descr_lit <- + if opt_SccProfilingOn + then mkStringCLit (closureTypeDescr cl_info) + else return (mkIntCLit 0) + ; cl_descr_lit <- + if opt_SccProfilingOn + then mkStringCLit cl_descr_string + else return (mkIntCLit 0) + ; let std_info = mkStdInfoTable ty_descr_lit cl_descr_lit + cl_type srt_len layout_lit + + ; blks <- cgStmtsToBlocks body + ; emitInfoTableAndCode info_lbl std_info extra_bits args blks } + where + info_lbl = infoTableLabelFromCI cl_info + + cl_descr_string = closureValDescr cl_info + cl_type = smRepClosureTypeInt (closureSMRep cl_info) + + srt = closureSRT cl_info + needs_srt = needsSRT srt + + mb_con = isConstrClosure_maybe cl_info + is_con = isJust mb_con + + (srt_label,srt_len) + = case mb_con of + Just con -> -- Constructors don't have an SRT + -- We keep the *zero-indexed* tag in the srt_len + -- field of the info table. + (mkIntCLit 0, fromIntegral (dataConTagZ con)) + + Nothing -> -- Not a constructor + srtLabelAndLength srt info_lbl + + ptrs = closurePtrsSize cl_info + nptrs = size - ptrs + size = closureNonHdrSize cl_info + layout_lit = packHalfWordsCLit ptrs nptrs + + extra_bits + | is_fun = fun_extra_bits + | is_con = [] + | needs_srt = [srt_label] + | otherwise = [] + + maybe_fun_stuff = closureFunInfo cl_info + is_fun = isJust maybe_fun_stuff + (Just (arity, arg_descr)) = maybe_fun_stuff + + fun_extra_bits + | ArgGen liveness <- arg_descr + = [ fun_amode, + srt_label, + makeRelativeRefTo info_lbl $ mkLivenessCLit liveness, + slow_entry ] + | needs_srt = [fun_amode, srt_label] + | otherwise = [fun_amode] + + slow_entry = makeRelativeRefTo info_lbl (CmmLabel slow_entry_label) + slow_entry_label = mkSlowEntryLabel (closureName cl_info) + + fun_amode = packHalfWordsCLit fun_type arity + fun_type = argDescrType arg_descr + +-- We keep the *zero-indexed* tag in the srt_len field of the info +-- table of a data constructor. +dataConTagZ :: DataCon -> ConTagZ +dataConTagZ con = dataConTag con - fIRST_TAG + +-- A low-level way to generate the variable part of a fun-style info table. +-- (must match fun_extra_bits above). Used by the C-- parser. +mkFunGenInfoExtraBits :: Int -> Int -> CmmLit -> CmmLit -> CmmLit -> [CmmLit] +mkFunGenInfoExtraBits fun_type arity srt_label liveness slow_entry + = [ packHalfWordsCLit fun_type arity, + srt_label, + liveness, + slow_entry ] + +------------------------------------------------------------------------- +-- +-- Generating the info table and code for a return point +-- +------------------------------------------------------------------------- + +-- Here's the layout of a return-point info table +-- +-- Tables next to code: +-- +-- <reversed vector table> +-- <srt slot> +-- <standard info table> +-- ret-addr --> <entry code (if any)> +-- +-- Not tables-next-to-code: +-- +-- ret-addr --> <ptr to entry code> +-- <standard info table> +-- <srt slot> +-- <forward vector table> +-- +-- * The vector table is only present for vectored returns +-- +-- * The SRT slot is only there if either +-- (a) there is SRT info to record, OR +-- (b) if the return is vectored +-- The latter (b) is necessary so that the vector is in a +-- predictable place + +vectorSlot :: CmmExpr -> CmmExpr -> CmmExpr +-- Get the vector slot from the info pointer +vectorSlot info_amode zero_indexed_tag + | tablesNextToCode + = cmmOffsetExprW (cmmOffsetW info_amode (- (stdInfoTableSizeW + 2))) + (cmmNegate zero_indexed_tag) + -- The "2" is one for the SRT slot, and one more + -- to get to the first word of the vector + + | otherwise + = cmmOffsetExprW (cmmOffsetW info_amode (stdInfoTableSizeW + 2)) + zero_indexed_tag + -- The "2" is one for the entry-code slot and one for the SRT slot + +retVec :: CmmExpr -> CmmExpr -> CmmExpr +-- Get a return vector from the info pointer +retVec info_amode zero_indexed_tag + = let slot = vectorSlot info_amode zero_indexed_tag + tableEntry = CmmLoad slot wordRep + in if tablesNextToCode + then CmmMachOp (MO_Add wordRep) [tableEntry, info_amode] + else tableEntry + +emitReturnTarget + :: Name + -> CgStmts -- The direct-return code (if any) + -- (empty for vectored returns) + -> [CmmLit] -- Vector of return points + -- (empty for non-vectored returns) + -> SRT + -> FCode CLabel +emitReturnTarget name stmts vector srt + = do { live_slots <- getLiveStackSlots + ; liveness <- buildContLiveness name live_slots + ; srt_info <- getSRTInfo name srt + + ; let + cl_type = case (null vector, isBigLiveness liveness) of + (True, True) -> rET_BIG + (True, False) -> rET_SMALL + (False, True) -> rET_VEC_BIG + (False, False) -> rET_VEC_SMALL + + (std_info, extra_bits) = + mkRetInfoTable info_lbl liveness srt_info cl_type vector + + ; blks <- cgStmtsToBlocks stmts + ; emitInfoTableAndCode info_lbl std_info extra_bits args blks + ; return info_lbl } + where + args = {- trace "emitReturnTarget: missing args" -} [] + uniq = getUnique name + info_lbl = mkReturnInfoLabel uniq + + +mkRetInfoTable + :: CLabel -- info label + -> Liveness -- liveness + -> C_SRT -- SRT Info + -> Int -- type (eg. rET_SMALL) + -> [CmmLit] -- vector + -> ([CmmLit],[CmmLit]) +mkRetInfoTable info_lbl liveness srt_info cl_type vector + = (std_info, extra_bits) + where + (srt_label, srt_len) = srtLabelAndLength srt_info info_lbl + + srt_slot | need_srt = [srt_label] + | otherwise = [] + + need_srt = needsSRT srt_info || not (null vector) + -- If there's a vector table then we must allocate + -- an SRT slot, so that the vector table is at a + -- known offset from the info pointer + + liveness_lit = makeRelativeRefTo info_lbl $ mkLivenessCLit liveness + std_info = mkStdInfoTable zeroCLit zeroCLit cl_type srt_len liveness_lit + extra_bits = srt_slot ++ map (makeRelativeRefTo info_lbl) vector + + +emitDirectReturnTarget + :: Name + -> CgStmts -- The direct-return code + -> SRT + -> FCode CLabel +emitDirectReturnTarget name code srt + = emitReturnTarget name code [] srt + +emitAlgReturnTarget + :: Name -- Just for its unique + -> [(ConTagZ, CgStmts)] -- Tagged branches + -> Maybe CgStmts -- Default branch (if any) + -> SRT -- Continuation's SRT + -> CtrlReturnConvention + -> FCode (CLabel, SemiTaggingStuff) + +emitAlgReturnTarget name branches mb_deflt srt ret_conv + = case ret_conv of + UnvectoredReturn fam_sz -> do + { blks <- getCgStmts $ + emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) + -- NB: tag_expr is zero-based + ; lbl <- emitDirectReturnTarget name blks srt + ; return (lbl, Nothing) } + -- Nothing: the internal branches in the switch don't have + -- global labels, so we can't use them at the 'call site' + + VectoredReturn fam_sz -> do + { let tagged_lbls = zip (map fst branches) $ + map (CmmLabel . mkAltLabel uniq . fst) branches + deflt_lbl | isJust mb_deflt = CmmLabel $ mkDefaultLabel uniq + | otherwise = mkIntCLit 0 + ; let vector = [ assocDefault deflt_lbl tagged_lbls i + | i <- [0..fam_sz-1]] + ; lbl <- emitReturnTarget name noCgStmts vector srt + ; mapFCs emit_alt branches + ; emit_deflt mb_deflt + ; return (lbl, Just (tagged_lbls, deflt_lbl)) } + where + uniq = getUnique name + tag_expr = getConstrTag (CmmReg nodeReg) + + emit_alt :: (Int, CgStmts) -> FCode (Int, CmmLit) + -- Emit the code for the alternative as a top-level + -- code block returning a label for it + emit_alt (tag, stmts) = do { let lbl = mkAltLabel uniq tag + ; blks <- cgStmtsToBlocks stmts + ; emitProc [] lbl [] blks + ; return (tag, CmmLabel lbl) } + + emit_deflt (Just stmts) = do { let lbl = mkDefaultLabel uniq + ; blks <- cgStmtsToBlocks stmts + ; emitProc [] lbl [] blks + ; return (CmmLabel lbl) } + emit_deflt Nothing = return (mkIntCLit 0) + -- Nothing case: the simplifier might have eliminated a case + -- so we may have e.g. case xs of + -- [] -> e + -- In that situation the default should never be taken, + -- so we just use a NULL pointer + +-------------------------------- +emitDirectReturnInstr :: Code +emitDirectReturnInstr + = do { info_amode <- getSequelAmode + ; stmtC (CmmJump (entryCode info_amode) []) } + +emitVectoredReturnInstr :: CmmExpr -- _Zero-indexed_ constructor tag + -> Code +emitVectoredReturnInstr zero_indexed_tag + = do { info_amode <- getSequelAmode + -- HACK! assign info_amode to a temp, because retVec + -- uses it twice and the NCG doesn't have any CSE yet. + -- Only do this for the NCG, because gcc is too stupid + -- to optimise away the extra tmp (grrr). + ; dflags <- getDynFlags + ; x <- if hscTarget dflags == HscAsm + then do z <- newTemp wordRep + stmtC (CmmAssign z info_amode) + return (CmmReg z) + else + return info_amode + ; let target = retVec x zero_indexed_tag + ; stmtC (CmmJump target []) } + + +------------------------------------------------------------------------- +-- +-- Generating a standard info table +-- +------------------------------------------------------------------------- + +-- The standard bits of an info table. This part of the info table +-- corresponds to the StgInfoTable type defined in InfoTables.h. +-- +-- Its shape varies with ticky/profiling/tables next to code etc +-- so we can't use constant offsets from Constants + +mkStdInfoTable + :: CmmLit -- closure type descr (profiling) + -> CmmLit -- closure descr (profiling) + -> Int -- closure type + -> StgHalfWord -- SRT length + -> CmmLit -- layout field + -> [CmmLit] + +mkStdInfoTable type_descr closure_descr cl_type srt_len layout_lit + = -- Parallel revertible-black hole field + prof_info + -- Ticky info (none at present) + -- Debug info (none at present) + ++ [layout_lit, type_lit] + + where + prof_info + | opt_SccProfilingOn = [type_descr, closure_descr] + | otherwise = [] + + type_lit = packHalfWordsCLit cl_type srt_len + +stdInfoTableSizeW :: WordOff +-- The size of a standard info table varies with profiling/ticky etc, +-- so we can't get it from Constants +-- It must vary in sync with mkStdInfoTable +stdInfoTableSizeW + = size_fixed + size_prof + where + size_fixed = 2 -- layout, type + size_prof | opt_SccProfilingOn = 2 + | otherwise = 0 + +stdInfoTableSizeB = stdInfoTableSizeW * wORD_SIZE :: ByteOff + +stdSrtBitmapOffset :: ByteOff +-- Byte offset of the SRT bitmap half-word which is +-- in the *higher-addressed* part of the type_lit +stdSrtBitmapOffset = stdInfoTableSizeB - hALF_WORD_SIZE + +stdClosureTypeOffset :: ByteOff +-- Byte offset of the closure type half-word +stdClosureTypeOffset = stdInfoTableSizeB - wORD_SIZE + +stdPtrsOffset, stdNonPtrsOffset :: ByteOff +stdPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE +stdNonPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE + hALF_WORD_SIZE + +------------------------------------------------------------------------- +-- +-- Accessing fields of an info table +-- +------------------------------------------------------------------------- + +closureInfoPtr :: CmmExpr -> CmmExpr +-- Takes a closure pointer and returns the info table pointer +closureInfoPtr e = CmmLoad e wordRep + +entryCode :: CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns its entry code +entryCode e | tablesNextToCode = e + | otherwise = CmmLoad e wordRep + +getConstrTag :: CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the *zero-indexed* +-- constructor tag obtained from the info table +-- This lives in the SRT field of the info table +-- (constructors don't need SRTs). +getConstrTag closure_ptr + = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableConstrTag info_table] + where + info_table = infoTable (closureInfoPtr closure_ptr) + +infoTable :: CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns a pointer to the first word of the standard-form +-- info table, excluding the entry-code word (if present) +infoTable info_ptr + | tablesNextToCode = cmmOffsetB info_ptr (- stdInfoTableSizeB) + | otherwise = cmmOffsetW info_ptr 1 -- Past the entry code pointer + +infoTableConstrTag :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the constr tag +-- field of the info table (same as the srt_bitmap field) +infoTableConstrTag = infoTableSrtBitmap + +infoTableSrtBitmap :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the srt_bitmap +-- field of the info table +infoTableSrtBitmap info_tbl + = CmmLoad (cmmOffsetB info_tbl stdSrtBitmapOffset) halfWordRep + +infoTableClosureType :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the closure type +-- field of the info table. +infoTableClosureType info_tbl + = CmmLoad (cmmOffsetB info_tbl stdClosureTypeOffset) halfWordRep + +infoTablePtrs :: CmmExpr -> CmmExpr +infoTablePtrs info_tbl + = CmmLoad (cmmOffsetB info_tbl stdPtrsOffset) halfWordRep + +infoTableNonPtrs :: CmmExpr -> CmmExpr +infoTableNonPtrs info_tbl + = CmmLoad (cmmOffsetB info_tbl stdNonPtrsOffset) halfWordRep + +funInfoTable :: CmmExpr -> CmmExpr +-- Takes the info pointer of a function, +-- and returns a pointer to the first word of the StgFunInfoExtra struct +-- in the info table. +funInfoTable info_ptr + | tablesNextToCode + = cmmOffsetB info_ptr (- stdInfoTableSizeB - sIZEOF_StgFunInfoExtraRev) + | otherwise + = cmmOffsetW info_ptr (1 + stdInfoTableSizeW) + -- Past the entry code pointer + +------------------------------------------------------------------------- +-- +-- Emit the code for a closure (or return address) +-- and its associated info table +-- +------------------------------------------------------------------------- + +-- The complication here concerns whether or not we can +-- put the info table next to the code + +emitInfoTableAndCode + :: CLabel -- Label of info table + -> [CmmLit] -- ...its invariant part + -> [CmmLit] -- ...and its variant part + -> [LocalReg] -- ...args + -> [CmmBasicBlock] -- ...and body + -> Code + +emitInfoTableAndCode info_lbl std_info extra_bits args blocks + | tablesNextToCode -- Reverse the extra_bits; and emit the top-level proc + = emitProc (reverse extra_bits ++ std_info) + entry_lbl args blocks + -- NB: the info_lbl is discarded + + | null blocks -- No actual code; only the info table is significant + = -- Use a zero place-holder in place of the + -- entry-label in the info table + emitRODataLits info_lbl (zeroCLit : std_info ++ extra_bits) + + | otherwise -- Separately emit info table (with the function entry + = -- point as first entry) and the entry code + do { emitDataLits info_lbl (CmmLabel entry_lbl : std_info ++ extra_bits) + ; emitProc [] entry_lbl args blocks } + + where + entry_lbl = infoLblToEntryLbl info_lbl + +------------------------------------------------------------------------- +-- +-- Static reference tables +-- +------------------------------------------------------------------------- + +-- There is just one SRT for each top level binding; all the nested +-- bindings use sub-sections of this SRT. The label is passed down to +-- the nested bindings via the monad. + +getSRTInfo :: Name -> SRT -> FCode C_SRT +getSRTInfo id NoSRT = return NoC_SRT +getSRTInfo id (SRT off len bmp) + | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape] + = do { srt_lbl <- getSRTLabel + ; let srt_desc_lbl = mkSRTDescLabel id + ; emitRODataLits srt_desc_lbl + ( cmmLabelOffW srt_lbl off + : mkWordCLit (fromIntegral len) + : map mkWordCLit bmp) + ; return (C_SRT srt_desc_lbl 0 srt_escape) } + + | otherwise + = do { srt_lbl <- getSRTLabel + ; return (C_SRT srt_lbl off (fromIntegral (head bmp))) } + -- The fromIntegral converts to StgHalfWord + +srt_escape = (-1) :: StgHalfWord + +srtLabelAndLength :: C_SRT -> CLabel -> (CmmLit, StgHalfWord) +srtLabelAndLength NoC_SRT _ + = (zeroCLit, 0) +srtLabelAndLength (C_SRT lbl off bitmap) info_lbl + = (makeRelativeRefTo info_lbl $ cmmLabelOffW lbl off, bitmap) + +------------------------------------------------------------------------- +-- +-- Position independent code +-- +------------------------------------------------------------------------- +-- In order to support position independent code, we mustn't put absolute +-- references into read-only space. Info tables in the tablesNextToCode +-- case must be in .text, which is read-only, so we doctor the CmmLits +-- to use relative offsets instead. + +-- Note that this is done even when the -fPIC flag is not specified, +-- as we want to keep binary compatibility between PIC and non-PIC. + +makeRelativeRefTo :: CLabel -> CmmLit -> CmmLit + +makeRelativeRefTo info_lbl (CmmLabel lbl) + | tablesNextToCode + = CmmLabelDiffOff lbl info_lbl 0 +makeRelativeRefTo info_lbl (CmmLabelOff lbl off) + | tablesNextToCode + = CmmLabelDiffOff lbl info_lbl off +makeRelativeRefTo _ lit = lit diff --git a/compiler/codeGen/CgLetNoEscape.lhs b/compiler/codeGen/CgLetNoEscape.lhs new file mode 100644 index 0000000000..39860f4ee0 --- /dev/null +++ b/compiler/codeGen/CgLetNoEscape.lhs @@ -0,0 +1,212 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1993-1998 +% +% $Id: CgLetNoEscape.lhs,v 1.26 2004/09/30 10:35:47 simonpj Exp $ +% +%******************************************************** +%* * +\section[CgLetNoEscape]{Handling ``let-no-escapes''} +%* * +%******************************************************** + +\begin{code} +module CgLetNoEscape ( cgLetNoEscapeClosure ) where + +#include "HsVersions.h" + +import {-# SOURCE #-} CgExpr ( cgExpr ) + +import StgSyn +import CgMonad + +import CgBindery ( CgIdInfo, letNoEscapeIdInfo, nukeDeadBindings ) +import CgCase ( restoreCurrentCostCentre ) +import CgCon ( bindUnboxedTupleComponents ) +import CgHeapery ( unbxTupleHeapCheck ) +import CgInfoTbls ( emitDirectReturnTarget ) +import CgStackery ( allocStackTop, deAllocStackTop, getSpRelOffset ) +import Cmm ( CmmStmt(..) ) +import CmmUtils ( mkLblExpr, oneStmt ) +import CLabel ( mkReturnInfoLabel ) +import ClosureInfo ( mkLFLetNoEscape ) +import CostCentre ( CostCentreStack ) +import Id ( Id, idName ) +import Var ( idUnique ) +import SMRep ( retAddrSizeW ) +import BasicTypes ( RecFlag(..) ) +import Outputable +\end{code} + +%************************************************************************ +%* * +\subsection[what-is-non-escaping]{What {\em is} a ``non-escaping let''?} +%* * +%************************************************************************ + +[The {\em code} that detects these things is elsewhere.] + +Consider: +\begin{verbatim} + let x = fvs \ args -> e + in + if ... then x else + if ... then x else ... +\end{verbatim} +@x@ is used twice (so we probably can't unfold it), but when it is +entered, the stack is deeper than it was when the definition of @x@ +happened. Specifically, if instead of allocating a closure for @x@, +we saved all @x@'s fvs on the stack, and remembered the stack depth at +that moment, then whenever we enter @x@ we can simply set the stack +pointer(s) to these remembered (compile-time-fixed) values, and jump +to the code for @x@. + +All of this is provided x is: +\begin{enumerate} +\item +non-updatable; +\item +guaranteed to be entered before the stack retreats -- ie x is not +buried in a heap-allocated closure, or passed as an argument to something; +\item +all the enters have exactly the right number of arguments, +no more no less; +\item +all the enters are tail calls; that is, they return to the +caller enclosing the definition of @x@. +\end{enumerate} + +Under these circumstances we say that @x@ is {\em non-escaping}. + +An example of when (4) does {\em not} hold: +\begin{verbatim} + let x = ... + in case x of ...alts... +\end{verbatim} + +Here, @x@ is certainly entered only when the stack is deeper than when +@x@ is defined, but here it must return to \tr{...alts...} So we can't +just adjust the stack down to @x@'s recalled points, because that +would lost @alts@' context. + +Things can get a little more complicated. Consider: +\begin{verbatim} + let y = ... + in let x = fvs \ args -> ...y... + in ...x... +\end{verbatim} + +Now, if @x@ is used in a non-escaping way in \tr{...x...}, {\em and} +@y@ is used in a non-escaping way in \tr{...y...}, {\em then} @y@ is +non-escaping. + +@x@ can even be recursive! Eg: +\begin{verbatim} + letrec x = [y] \ [v] -> if v then x True else ... + in + ...(x b)... +\end{verbatim} + + +%************************************************************************ +%* * +\subsection[codeGen-for-non-escaping]{Generating code for a ``non-escaping let''} +%* * +%************************************************************************ + + +Generating code for this is fun. It is all very very similar to what +we do for a case expression. The duality is between +\begin{verbatim} + let-no-escape x = b + in e +\end{verbatim} +and +\begin{verbatim} + case e of ... -> b +\end{verbatim} + +That is, the RHS of @x@ (ie @b@) will execute {\em later}, just like +the alternative of the case; it needs to be compiled in an environment +in which all volatile bindings are forgotten, and the free vars are +bound only to stable things like stack locations.. The @e@ part will +execute {\em next}, just like the scrutinee of a case. + +First, we need to save all @x@'s free vars +on the stack, if they aren't there already. + +\begin{code} +cgLetNoEscapeClosure + :: Id -- binder + -> CostCentreStack -- NB: *** NOT USED *** ToDo (WDP 94/06) + -> StgBinderInfo -- NB: ditto + -> SRT + -> StgLiveVars -- variables live in RHS, including the binders + -- themselves in the case of a recursive group + -> EndOfBlockInfo -- where are we going to? + -> Maybe VirtualSpOffset -- Slot for current cost centre + -> RecFlag -- is the binding recursive? + -> [Id] -- args (as in \ args -> body) + -> StgExpr -- body (as in above) + -> FCode (Id, CgIdInfo) + +-- ToDo: deal with the cost-centre issues + +cgLetNoEscapeClosure + bndr cc binder_info srt full_live_in_rhss + rhs_eob_info cc_slot rec args body + = let + arity = length args + lf_info = mkLFLetNoEscape arity + in + -- saveVolatileVarsAndRegs done earlier in cgExpr. + + do { (vSp, _) <- forkEvalHelp rhs_eob_info + + (do { allocStackTop retAddrSizeW + ; nukeDeadBindings full_live_in_rhss }) + + (do { deAllocStackTop retAddrSizeW + ; abs_c <- forkProc $ cgLetNoEscapeBody bndr cc + cc_slot args body + + -- Ignore the label that comes back from + -- mkRetDirectTarget. It must be conjured up elswhere + ; emitDirectReturnTarget (idName bndr) abs_c srt + ; return () }) + + ; returnFC (bndr, letNoEscapeIdInfo bndr vSp lf_info) } +\end{code} + +\begin{code} +cgLetNoEscapeBody :: Id -- Name of the joint point + -> CostCentreStack + -> Maybe VirtualSpOffset + -> [Id] -- Args + -> StgExpr -- Body + -> Code + +cgLetNoEscapeBody bndr cc cc_slot all_args body = do + { (arg_regs, ptrs, nptrs, ret_slot) <- bindUnboxedTupleComponents all_args + + -- restore the saved cost centre. BUT: we must not free the stack slot + -- containing the cost centre, because it might be needed for a + -- recursive call to this let-no-escape. + ; restoreCurrentCostCentre cc_slot False{-don't free-} + + -- Enter the closures cc, if required + ; -- enterCostCentreCode closure_info cc IsFunction + + -- The "return address" slot doesn't have a return address in it; + -- but the heap-check needs it filled in if the heap-check fails. + -- So we pass code to fill it in to the heap-check macro + ; sp_rel <- getSpRelOffset ret_slot + + ; let lbl = mkReturnInfoLabel (idUnique bndr) + frame_hdr_asst = oneStmt (CmmStore sp_rel (mkLblExpr lbl)) + + -- Do heap check [ToDo: omit for non-recursive case by recording in + -- in envt and absorbing at call site] + ; unbxTupleHeapCheck arg_regs ptrs nptrs frame_hdr_asst + (cgExpr body) + } +\end{code} diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs new file mode 100644 index 0000000000..4f95c9b36a --- /dev/null +++ b/compiler/codeGen/CgMonad.lhs @@ -0,0 +1,853 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgMonad.lhs,v 1.45 2005/06/21 10:44:41 simonmar Exp $ +% +\section[CgMonad]{The code generation monad} + +See the beginning of the top-level @CodeGen@ module, to see how this +monadic stuff fits into the Big Picture. + +\begin{code} +module CgMonad ( + Code, -- type + FCode, -- type + + initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, + returnFC, fixC, checkedAbsC, + stmtC, stmtsC, labelC, emitStmts, nopC, whenC, newLabelC, + newUnique, newUniqSupply, + + CgStmts, emitCgStmts, forkCgStmts, cgStmtsToBlocks, + getCgStmts', getCgStmts, + noCgStmts, oneCgStmt, consCgStmt, + + getCmm, + emitData, emitProc, emitSimpleProc, + + forkLabelledCode, + forkClosureBody, forkStatics, forkAlts, forkEval, + forkEvalHelp, forkProc, codeOnly, + SemiTaggingStuff, ConTagZ, + + EndOfBlockInfo(..), + setEndOfBlockInfo, getEndOfBlockInfo, + + setSRTLabel, getSRTLabel, + setTickyCtrLabel, getTickyCtrLabel, + + StackUsage(..), HeapUsage(..), + VirtualSpOffset, VirtualHpOffset, + initStkUsage, initHpUsage, + getHpUsage, setHpUsage, + heapHWM, + + moduleName, + + Sequel(..), -- ToDo: unabstract? + + -- ideally we wouldn't export these, but some other modules access internal state + getState, setState, getInfoDown, getDynFlags, getHomeModules, + + -- more localised access to monad state + getStkUsage, setStkUsage, + getBinds, setBinds, getStaticBinds, + + -- out of general friendliness, we also export ... + CgInfoDownwards(..), CgState(..) -- non-abstract + ) where + +#include "HsVersions.h" + +import {-# SOURCE #-} CgBindery ( CgBindings, nukeVolatileBinds ) + +import DynFlags ( DynFlags ) +import Packages ( HomeModules ) +import Cmm +import CmmUtils ( CmmStmts, isNopStmt ) +import CLabel +import SMRep ( WordOff ) +import Module ( Module ) +import Id ( Id ) +import VarEnv +import OrdList +import Unique ( Unique ) +import Util ( mapAccumL ) +import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply, uniqFromSupply ) +import FastString +import Outputable + +import Control.Monad ( liftM ) + +infixr 9 `thenC` -- Right-associative! +infixr 9 `thenFC` +\end{code} + +%************************************************************************ +%* * +\subsection[CgMonad-environment]{Stuff for manipulating environments} +%* * +%************************************************************************ + +This monadery has some information that it only passes {\em +downwards}, as well as some ``state'' which is modified as we go +along. + +\begin{code} +data CgInfoDownwards -- information only passed *downwards* by the monad + = MkCgInfoDown { + cgd_dflags :: DynFlags, + cgd_hmods :: HomeModules, -- Packages we depend on + cgd_mod :: Module, -- Module being compiled + cgd_statics :: CgBindings, -- [Id -> info] : static environment + cgd_srt :: CLabel, -- label of the current SRT + cgd_ticky :: CLabel, -- current destination for ticky counts + cgd_eob :: EndOfBlockInfo -- Info for stuff to do at end of basic block: + } + +initCgInfoDown :: DynFlags -> HomeModules -> Module -> CgInfoDownwards +initCgInfoDown dflags hmods mod + = MkCgInfoDown { cgd_dflags = dflags, + cgd_hmods = hmods, + cgd_mod = mod, + cgd_statics = emptyVarEnv, + cgd_srt = error "initC: srt", + cgd_ticky = mkTopTickyCtrLabel, + cgd_eob = initEobInfo } + +data CgState + = MkCgState { + cgs_stmts :: OrdList CgStmt, -- Current proc + cgs_tops :: OrdList CmmTop, + -- Other procedures and data blocks in this compilation unit + -- Both the latter two are ordered only so that we can + -- reduce forward references, when it's easy to do so + + cgs_binds :: CgBindings, -- [Id -> info] : *local* bindings environment + -- Bindings for top-level things are given in + -- the info-down part + + cgs_stk_usg :: StackUsage, + cgs_hp_usg :: HeapUsage, + + cgs_uniqs :: UniqSupply } + +initCgState :: UniqSupply -> CgState +initCgState uniqs + = MkCgState { cgs_stmts = nilOL, cgs_tops = nilOL, + cgs_binds = emptyVarEnv, + cgs_stk_usg = initStkUsage, + cgs_hp_usg = initHpUsage, + cgs_uniqs = uniqs } +\end{code} + +@EndOfBlockInfo@ tells what to do at the end of this block of code or, +if the expression is a @case@, what to do at the end of each +alternative. + +\begin{code} +data EndOfBlockInfo + = EndOfBlockInfo + VirtualSpOffset -- Args Sp: trim the stack to this point at a + -- return; push arguments starting just + -- above this point on a tail call. + + -- This is therefore the stk ptr as seen + -- by a case alternative. + Sequel + +initEobInfo = EndOfBlockInfo 0 OnStack +\end{code} + +Any addressing modes inside @Sequel@ must be ``robust,'' in the sense +that it must survive stack pointer adjustments at the end of the +block. + +\begin{code} +data Sequel + = OnStack -- Continuation is on the stack + | UpdateCode -- Continuation is update + + | CaseAlts + CLabel -- Jump to this; if the continuation is for a vectored + -- case this might be the label of a return vector + SemiTaggingStuff + Id -- The case binder, only used to see if it's dead + Bool -- True <=> polymorphic, push a SEQ frame too + +type SemiTaggingStuff + = Maybe -- Maybe[1] we don't have any semi-tagging stuff... + ([(ConTagZ, CmmLit)], -- Alternatives + CmmLit) -- Default (will be a can't happen RTS label if can't happen) + +type ConTagZ = Int -- A *zero-indexed* contructor tag + +-- The case branch is executed only from a successful semitagging +-- venture, when a case has looked at a variable, found that it's +-- evaluated, and wants to load up the contents and go to the join +-- point. +\end{code} + +%************************************************************************ +%* * + CgStmt type +%* * +%************************************************************************ + +The CgStmts type is what the code generator outputs: it is a tree of +statements, including in-line labels. The job of flattenCgStmts is to +turn this into a list of basic blocks, each of which ends in a jump +statement (either a local branch or a non-local jump). + +\begin{code} +type CgStmts = OrdList CgStmt + +data CgStmt + = CgStmt CmmStmt + | CgLabel BlockId + | CgFork BlockId CgStmts + +flattenCgStmts :: BlockId -> CgStmts -> [CmmBasicBlock] +flattenCgStmts id stmts = + case flatten (fromOL stmts) of + ([],blocks) -> blocks + (block,blocks) -> BasicBlock id block : blocks + where + flatten [] = ([],[]) + + -- A label at the end of a function or fork: this label must not be reachable, + -- but it might be referred to from another BB that also isn't reachable. + -- Eliminating these has to be done with a dead-code analysis. For now, + -- we just make it into a well-formed block by adding a recursive jump. + flatten [CgLabel id] + = ( [], [BasicBlock id [CmmBranch id]] ) + + -- A jump/branch: throw away all the code up to the next label, because + -- it is unreachable. Be careful to keep forks that we find on the way. + flatten (CgStmt stmt : stmts) + | isJump stmt + = case dropWhile isOrdinaryStmt stmts of + [] -> ( [stmt], [] ) + [CgLabel id] -> ( [stmt], [BasicBlock id [CmmBranch id]]) + (CgLabel id : stmts) -> ( [stmt], BasicBlock id block : blocks ) + where (block,blocks) = flatten stmts + (CgFork fork_id stmts : ss) -> + flatten (CgFork fork_id stmts : CgStmt stmt : ss) + + flatten (s:ss) = + case s of + CgStmt stmt -> (stmt:block,blocks) + CgLabel id -> ([CmmBranch id],BasicBlock id block:blocks) + CgFork fork_id stmts -> + (block, BasicBlock fork_id fork_block : fork_blocks ++ blocks) + where (fork_block, fork_blocks) = flatten (fromOL stmts) + where (block,blocks) = flatten ss + +isJump (CmmJump _ _) = True +isJump (CmmBranch _) = True +isJump _ = False + +isOrdinaryStmt (CgStmt _) = True +isOrdinaryStmt _ = False +\end{code} + +%************************************************************************ +%* * + Stack and heap models +%* * +%************************************************************************ + +\begin{code} +type VirtualHpOffset = WordOff -- Both are in +type VirtualSpOffset = WordOff -- units of words + +data StackUsage + = StackUsage { + virtSp :: VirtualSpOffset, + -- Virtual offset of topmost allocated slot + + frameSp :: VirtualSpOffset, + -- Virtual offset of the return address of the enclosing frame. + -- This RA describes the liveness/pointedness of + -- all the stack from frameSp downwards + -- INVARIANT: less than or equal to virtSp + + freeStk :: [VirtualSpOffset], + -- List of free slots, in *increasing* order + -- INVARIANT: all <= virtSp + -- All slots <= virtSp are taken except these ones + + realSp :: VirtualSpOffset, + -- Virtual offset of real stack pointer register + + hwSp :: VirtualSpOffset + } -- Highest value ever taken by virtSp + +-- INVARIANT: The environment contains no Stable references to +-- stack slots below (lower offset) frameSp +-- It can contain volatile references to this area though. + +data HeapUsage = + HeapUsage { + virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word + realHp :: VirtualHpOffset -- realHp: Virtual offset of real heap ptr + } +\end{code} + +The heap high water mark is the larger of virtHp and hwHp. The latter is +only records the high water marks of forked-off branches, so to find the +heap high water mark you have to take the max of virtHp and hwHp. Remember, +virtHp never retreats! + +Note Jan 04: ok, so why do we only look at the virtual Hp?? + +\begin{code} +heapHWM :: HeapUsage -> VirtualHpOffset +heapHWM = virtHp +\end{code} + +Initialisation. + +\begin{code} +initStkUsage :: StackUsage +initStkUsage = StackUsage { + virtSp = 0, + frameSp = 0, + freeStk = [], + realSp = 0, + hwSp = 0 + } + +initHpUsage :: HeapUsage +initHpUsage = HeapUsage { + virtHp = 0, + realHp = 0 + } +\end{code} + +@stateIncUsage@$~e_1~e_2$ incorporates in $e_1$ the stack and heap high water +marks found in $e_2$. + +\begin{code} +stateIncUsage :: CgState -> CgState -> CgState +stateIncUsage s1 s2@(MkCgState { cgs_stk_usg = stk_usg, cgs_hp_usg = hp_usg }) + = s1 { cgs_hp_usg = cgs_hp_usg s1 `maxHpHw` virtHp hp_usg, + cgs_stk_usg = cgs_stk_usg s1 `maxStkHw` hwSp stk_usg } + `addCodeBlocksFrom` s2 + +stateIncUsageEval :: CgState -> CgState -> CgState +stateIncUsageEval s1 s2 + = s1 { cgs_stk_usg = cgs_stk_usg s1 `maxStkHw` hwSp (cgs_stk_usg s2) } + `addCodeBlocksFrom` s2 + -- We don't max the heap high-watermark because stateIncUsageEval is + -- used only in forkEval, which in turn is only used for blocks of code + -- which do their own heap-check. + +addCodeBlocksFrom :: CgState -> CgState -> CgState +-- Add code blocks from the latter to the former +-- (The cgs_stmts will often be empty, but not always; see codeOnly) +s1 `addCodeBlocksFrom` s2 + = s1 { cgs_stmts = cgs_stmts s1 `appOL` cgs_stmts s2, + cgs_tops = cgs_tops s1 `appOL` cgs_tops s2 } + +maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage +hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw } + +maxStkHw :: StackUsage -> VirtualSpOffset -> StackUsage +stk_usg `maxStkHw` hw = stk_usg { hwSp = hwSp stk_usg `max` hw } +\end{code} + +%************************************************************************ +%* * + The FCode monad +%* * +%************************************************************************ + +\begin{code} +newtype FCode a = FCode (CgInfoDownwards -> CgState -> (a, CgState)) +type Code = FCode () + +instance Monad FCode where + (>>=) = thenFC + return = returnFC + +{-# INLINE thenC #-} +{-# INLINE thenFC #-} +{-# INLINE returnFC #-} +\end{code} +The Abstract~C is not in the environment so as to improve strictness. + +\begin{code} +initC :: DynFlags -> HomeModules -> Module -> FCode a -> IO a + +initC dflags hmods mod (FCode code) + = do { uniqs <- mkSplitUniqSupply 'c' + ; case code (initCgInfoDown dflags hmods mod) (initCgState uniqs) of + (res, _) -> return res + } + +returnFC :: a -> FCode a +returnFC val = FCode (\info_down state -> (val, state)) +\end{code} + +\begin{code} +thenC :: Code -> FCode a -> FCode a +thenC (FCode m) (FCode k) = + FCode (\info_down state -> let (_,new_state) = m info_down state in + k info_down new_state) + +listCs :: [Code] -> Code +listCs [] = return () +listCs (fc:fcs) = do + fc + listCs fcs + +mapCs :: (a -> Code) -> [a] -> Code +mapCs = mapM_ +\end{code} + +\begin{code} +thenFC :: FCode a -> (a -> FCode c) -> FCode c +thenFC (FCode m) k = FCode ( + \info_down state -> + let + (m_result, new_state) = m info_down state + (FCode kcode) = k m_result + in + kcode info_down new_state + ) + +listFCs :: [FCode a] -> FCode [a] +listFCs = sequence + +mapFCs :: (a -> FCode b) -> [a] -> FCode [b] +mapFCs = mapM +\end{code} + +And the knot-tying combinator: +\begin{code} +fixC :: (a -> FCode a) -> FCode a +fixC fcode = FCode ( + \info_down state -> + let + FCode fc = fcode v + result@(v,_) = fc info_down state + -- ^--------^ + in + result + ) +\end{code} + +%************************************************************************ +%* * + Operators for getting and setting the state and "info_down". + +%* * +%************************************************************************ + +\begin{code} +getState :: FCode CgState +getState = FCode $ \info_down state -> (state,state) + +setState :: CgState -> FCode () +setState state = FCode $ \info_down _ -> ((),state) + +getStkUsage :: FCode StackUsage +getStkUsage = do + state <- getState + return $ cgs_stk_usg state + +setStkUsage :: StackUsage -> Code +setStkUsage new_stk_usg = do + state <- getState + setState $ state {cgs_stk_usg = new_stk_usg} + +getHpUsage :: FCode HeapUsage +getHpUsage = do + state <- getState + return $ cgs_hp_usg state + +setHpUsage :: HeapUsage -> Code +setHpUsage new_hp_usg = do + state <- getState + setState $ state {cgs_hp_usg = new_hp_usg} + +getBinds :: FCode CgBindings +getBinds = do + state <- getState + return $ cgs_binds state + +setBinds :: CgBindings -> FCode () +setBinds new_binds = do + state <- getState + setState $ state {cgs_binds = new_binds} + +getStaticBinds :: FCode CgBindings +getStaticBinds = do + info <- getInfoDown + return (cgd_statics info) + +withState :: FCode a -> CgState -> FCode (a,CgState) +withState (FCode fcode) newstate = FCode $ \info_down state -> + let (retval, state2) = fcode info_down newstate in ((retval,state2), state) + +newUniqSupply :: FCode UniqSupply +newUniqSupply = do + state <- getState + let (us1, us2) = splitUniqSupply (cgs_uniqs state) + setState $ state { cgs_uniqs = us1 } + return us2 + +newUnique :: FCode Unique +newUnique = do + us <- newUniqSupply + return (uniqFromSupply us) + +------------------ +getInfoDown :: FCode CgInfoDownwards +getInfoDown = FCode $ \info_down state -> (info_down,state) + +getDynFlags :: FCode DynFlags +getDynFlags = liftM cgd_dflags getInfoDown + +getHomeModules :: FCode HomeModules +getHomeModules = liftM cgd_hmods getInfoDown + +withInfoDown :: FCode a -> CgInfoDownwards -> FCode a +withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state + +doFCode :: FCode a -> CgInfoDownwards -> CgState -> (a,CgState) +doFCode (FCode fcode) info_down state = fcode info_down state +\end{code} + + +%************************************************************************ +%* * + Forking +%* * +%************************************************************************ + +@forkClosureBody@ takes a code, $c$, and compiles it in a completely +fresh environment, except that: + - compilation info and statics are passed in unchanged. +The current environment is passed on completely unaltered, except that +abstract C from the fork is incorporated. + +@forkProc@ takes a code and compiles it in the current environment, +returning the basic blocks thus constructed. The current environment +is passed on completely unchanged. It is pretty similar to +@getBlocks@, except that the latter does affect the environment. + +@forkStatics@ $fc$ compiles $fc$ in an environment whose statics come +from the current bindings, but which is otherwise freshly initialised. +The Abstract~C returned is attached to the current state, but the +bindings and usage information is otherwise unchanged. + +\begin{code} +forkClosureBody :: Code -> Code +forkClosureBody body_code + = do { info <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let body_info_down = info { cgd_eob = initEobInfo } + ((),fork_state) = doFCode body_code body_info_down + (initCgState us) + ; ASSERT( isNilOL (cgs_stmts fork_state) ) + setState $ state `addCodeBlocksFrom` fork_state } + +forkStatics :: FCode a -> FCode a +forkStatics body_code + = do { info <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let rhs_info_down = info { cgd_statics = cgs_binds state, + cgd_eob = initEobInfo } + (result, fork_state_out) = doFCode body_code rhs_info_down + (initCgState us) + ; ASSERT( isNilOL (cgs_stmts fork_state_out) ) + setState (state `addCodeBlocksFrom` fork_state_out) + ; return result } + +forkProc :: Code -> FCode CgStmts +forkProc body_code + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let fork_state_in = (initCgState us) + { cgs_binds = cgs_binds state, + cgs_stk_usg = cgs_stk_usg state, + cgs_hp_usg = cgs_hp_usg state } + -- ToDo: is the hp usage necesary? + (code_blks, fork_state_out) = doFCode (getCgStmts body_code) + info_down fork_state_in + ; setState $ state `stateIncUsageEval` fork_state_out + ; return code_blks } + +codeOnly :: Code -> Code +-- Emit any code from the inner thing into the outer thing +-- Do not affect anything else in the outer state +-- Used in almost-circular code to prevent false loop dependencies +codeOnly body_code + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state, + cgs_stk_usg = cgs_stk_usg state, + cgs_hp_usg = cgs_hp_usg state } + ((), fork_state_out) = doFCode body_code info_down fork_state_in + ; setState $ state `addCodeBlocksFrom` fork_state_out } +\end{code} + +@forkAlts@ $bs~d$ takes fcodes $bs$ for the branches of a @case@, and +an fcode for the default case $d$, and compiles each in the current +environment. The current environment is passed on unmodified, except +that + - the worst stack high-water mark is incorporated + - the virtual Hp is moved on to the worst virtual Hp for the branches + +\begin{code} +forkAlts :: [FCode a] -> FCode [a] + +forkAlts branch_fcodes + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let compile us branch + = (us2, doFCode branch info_down branch_state) + where + (us1,us2) = splitUniqSupply us + branch_state = (initCgState us1) { + cgs_binds = cgs_binds state, + cgs_stk_usg = cgs_stk_usg state, + cgs_hp_usg = cgs_hp_usg state } + + (_us, results) = mapAccumL compile us branch_fcodes + (branch_results, branch_out_states) = unzip results + ; setState $ foldl stateIncUsage state branch_out_states + -- NB foldl. state is the *left* argument to stateIncUsage + ; return branch_results } +\end{code} + +@forkEval@ takes two blocks of code. + + - The first meddles with the environment to set it up as expected by + the alternatives of a @case@ which does an eval (or gc-possible primop). + - The second block is the code for the alternatives. + (plus info for semi-tagging purposes) + +@forkEval@ picks up the virtual stack pointer and returns a suitable +@EndOfBlockInfo@ for the caller to use, together with whatever value +is returned by the second block. + +It uses @initEnvForAlternatives@ to initialise the environment, and +@stateIncUsageAlt@ to incorporate usage; the latter ignores the heap +usage. + +\begin{code} +forkEval :: EndOfBlockInfo -- For the body + -> Code -- Code to set environment + -> FCode Sequel -- Semi-tagging info to store + -> FCode EndOfBlockInfo -- The new end of block info + +forkEval body_eob_info env_code body_code + = do { (v, sequel) <- forkEvalHelp body_eob_info env_code body_code + ; returnFC (EndOfBlockInfo v sequel) } + +forkEvalHelp :: EndOfBlockInfo -- For the body + -> Code -- Code to set environment + -> FCode a -- The code to do after the eval + -> FCode (VirtualSpOffset, -- Sp + a) -- Result of the FCode + -- A disturbingly complicated function +forkEvalHelp body_eob_info env_code body_code + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let { info_down_for_body = info_down {cgd_eob = body_eob_info} + ; (_, env_state) = doFCode env_code info_down_for_body + (state {cgs_uniqs = us}) + ; state_for_body = (initCgState (cgs_uniqs env_state)) + { cgs_binds = binds_for_body, + cgs_stk_usg = stk_usg_for_body } + ; binds_for_body = nukeVolatileBinds (cgs_binds env_state) + ; stk_usg_from_env = cgs_stk_usg env_state + ; virtSp_from_env = virtSp stk_usg_from_env + ; stk_usg_for_body = stk_usg_from_env {realSp = virtSp_from_env, + hwSp = virtSp_from_env} + ; (value_returned, state_at_end_return) + = doFCode body_code info_down_for_body state_for_body + } + ; ASSERT( isNilOL (cgs_stmts state_at_end_return) ) + -- The code coming back should consist only of nested declarations, + -- notably of the return vector! + setState $ state `stateIncUsageEval` state_at_end_return + ; return (virtSp_from_env, value_returned) } + + +-- ---------------------------------------------------------------------------- +-- Combinators for emitting code + +nopC :: Code +nopC = return () + +whenC :: Bool -> Code -> Code +whenC True code = code +whenC False code = nopC + +stmtC :: CmmStmt -> Code +stmtC stmt = emitCgStmt (CgStmt stmt) + +labelC :: BlockId -> Code +labelC id = emitCgStmt (CgLabel id) + +newLabelC :: FCode BlockId +newLabelC = do { id <- newUnique; return (BlockId id) } + +checkedAbsC :: CmmStmt -> Code +-- Emit code, eliminating no-ops +checkedAbsC stmt = emitStmts (if isNopStmt stmt then nilOL + else unitOL stmt) + +stmtsC :: [CmmStmt] -> Code +stmtsC stmts = emitStmts (toOL stmts) + +-- Emit code; no no-op checking +emitStmts :: CmmStmts -> Code +emitStmts stmts = emitCgStmts (fmap CgStmt stmts) + +-- forkLabelledCode is for emitting a chunk of code with a label, outside +-- of the current instruction stream. +forkLabelledCode :: Code -> FCode BlockId +forkLabelledCode code = getCgStmts code >>= forkCgStmts + +emitCgStmt :: CgStmt -> Code +emitCgStmt stmt + = do { state <- getState + ; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt } + } + +emitData :: Section -> [CmmStatic] -> Code +emitData sect lits + = do { state <- getState + ; setState $ state { cgs_tops = cgs_tops state `snocOL` data_block } } + where + data_block = CmmData sect lits + +emitProc :: [CmmLit] -> CLabel -> [LocalReg] -> [CmmBasicBlock] -> Code +emitProc lits lbl args blocks + = do { let proc_block = CmmProc (map CmmStaticLit lits) lbl args blocks + ; state <- getState + ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } } + +emitSimpleProc :: CLabel -> Code -> Code +-- Emit a procedure whose body is the specified code; no info table +emitSimpleProc lbl code + = do { stmts <- getCgStmts code + ; blks <- cgStmtsToBlocks stmts + ; emitProc [] lbl [] blks } + +getCmm :: Code -> FCode Cmm +-- Get all the CmmTops (there should be no stmts) +getCmm code + = do { state1 <- getState + ; ((), state2) <- withState code (state1 { cgs_tops = nilOL }) + ; setState $ state2 { cgs_tops = cgs_tops state1 } + ; return (Cmm (fromOL (cgs_tops state2))) } + +-- ---------------------------------------------------------------------------- +-- CgStmts + +-- These functions deal in terms of CgStmts, which is an abstract type +-- representing the code in the current proc. + + +-- emit CgStmts into the current instruction stream +emitCgStmts :: CgStmts -> Code +emitCgStmts stmts + = do { state <- getState + ; setState $ state { cgs_stmts = cgs_stmts state `appOL` stmts } } + +-- emit CgStmts outside the current instruction stream, and return a label +forkCgStmts :: CgStmts -> FCode BlockId +forkCgStmts stmts + = do { id <- newLabelC + ; emitCgStmt (CgFork id stmts) + ; return id + } + +-- turn CgStmts into [CmmBasicBlock], for making a new proc. +cgStmtsToBlocks :: CgStmts -> FCode [CmmBasicBlock] +cgStmtsToBlocks stmts + = do { id <- newLabelC + ; return (flattenCgStmts id stmts) + } + +-- collect the code emitted by an FCode computation +getCgStmts' :: FCode a -> FCode (a, CgStmts) +getCgStmts' fcode + = do { state1 <- getState + ; (a, state2) <- withState fcode (state1 { cgs_stmts = nilOL }) + ; setState $ state2 { cgs_stmts = cgs_stmts state1 } + ; return (a, cgs_stmts state2) } + +getCgStmts :: FCode a -> FCode CgStmts +getCgStmts fcode = do { (_,stmts) <- getCgStmts' fcode; return stmts } + +-- Simple ways to construct CgStmts: +noCgStmts :: CgStmts +noCgStmts = nilOL + +oneCgStmt :: CmmStmt -> CgStmts +oneCgStmt stmt = unitOL (CgStmt stmt) + +consCgStmt :: CmmStmt -> CgStmts -> CgStmts +consCgStmt stmt stmts = CgStmt stmt `consOL` stmts + +-- ---------------------------------------------------------------------------- +-- Get the current module name + +moduleName :: FCode Module +moduleName = do { info <- getInfoDown; return (cgd_mod info) } + +-- ---------------------------------------------------------------------------- +-- Get/set the end-of-block info + +setEndOfBlockInfo :: EndOfBlockInfo -> Code -> Code +setEndOfBlockInfo eob_info code = do + info <- getInfoDown + withInfoDown code (info {cgd_eob = eob_info}) + +getEndOfBlockInfo :: FCode EndOfBlockInfo +getEndOfBlockInfo = do + info <- getInfoDown + return (cgd_eob info) + +-- ---------------------------------------------------------------------------- +-- Get/set the current SRT label + +-- There is just one SRT for each top level binding; all the nested +-- bindings use sub-sections of this SRT. The label is passed down to +-- the nested bindings via the monad. + +getSRTLabel :: FCode CLabel -- Used only by cgPanic +getSRTLabel = do info <- getInfoDown + return (cgd_srt info) + +setSRTLabel :: CLabel -> FCode a -> FCode a +setSRTLabel srt_lbl code + = do info <- getInfoDown + withInfoDown code (info { cgd_srt = srt_lbl}) + +-- ---------------------------------------------------------------------------- +-- Get/set the current ticky counter label + +getTickyCtrLabel :: FCode CLabel +getTickyCtrLabel = do + info <- getInfoDown + return (cgd_ticky info) + +setTickyCtrLabel :: CLabel -> Code -> Code +setTickyCtrLabel ticky code = do + info <- getInfoDown + withInfoDown code (info {cgd_ticky = ticky}) +\end{code} diff --git a/compiler/codeGen/CgParallel.hs b/compiler/codeGen/CgParallel.hs new file mode 100644 index 0000000000..b826a33cba --- /dev/null +++ b/compiler/codeGen/CgParallel.hs @@ -0,0 +1,90 @@ +-- Code generation relaed to GpH +-- (a) parallel +-- (b) GranSim + +module CgParallel( + staticGranHdr,staticParHdr, + granFetchAndReschedule, granYield, + doGranAllocate + ) where + +import CgMonad +import CgCallConv ( mkRegLiveness ) +import Id ( Id ) +import Cmm ( CmmLit, GlobalReg(..), node, CmmExpr ) +import StaticFlags ( opt_GranMacros ) +import Outputable + +staticParHdr :: [CmmLit] +-- Parallel header words in a static closure +staticParHdr = [] + +-------------------------------------------------------- +-- GranSim stuff +-------------------------------------------------------- + +staticGranHdr :: [CmmLit] +-- Gransim header words in a static closure +staticGranHdr = [] + +doGranAllocate :: CmmExpr -> Code +-- macro DO_GRAN_ALLOCATE +doGranAllocate hp + | not opt_GranMacros = nopC + | otherwise = panic "doGranAllocate" + + + +------------------------- +granFetchAndReschedule :: [(Id,GlobalReg)] -- Live registers + -> Bool -- Node reqd? + -> Code +-- Emit code for simulating a fetch and then reschedule. +granFetchAndReschedule regs node_reqd + | opt_GranMacros && (node `elem` map snd regs || node_reqd) + = do { fetch + ; reschedule liveness node_reqd } + | otherwise + = nopC + where + liveness = mkRegLiveness regs 0 0 + +fetch = panic "granFetch" + -- Was: absC (CMacroStmt GRAN_FETCH []) + --HWL: generate GRAN_FETCH macro for GrAnSim + -- currently GRAN_FETCH and GRAN_FETCH_AND_RESCHEDULE are miai + +reschedule liveness node_reqd = panic "granReschedule" + -- Was: absC (CMacroStmt GRAN_RESCHEDULE [ + -- mkIntCLit (I# (word2Int# liveness_mask)), + -- mkIntCLit (if node_reqd then 1 else 0)]) + + +------------------------- +-- The @GRAN_YIELD@ macro is taken from JSM's code for Concurrent Haskell. It +-- allows to context-switch at places where @node@ is not alive (it uses the +-- @Continue@ rather than the @EnterNodeCode@ function in the RTS). We emit +-- this kind of macro at the beginning of the following kinds of basic bocks: +-- \begin{itemize} +-- \item Slow entry code where node is not alive (see @CgClosure.lhs@). Normally +-- we use @fetchAndReschedule@ at a slow entry code. +-- \item Fast entry code (see @CgClosure.lhs@). +-- \item Alternatives in case expressions (@CLabelledCode@ structures), provided +-- that they are not inlined (see @CgCases.lhs@). These alternatives will +-- be turned into separate functions. + +granYield :: [(Id,GlobalReg)] -- Live registers + -> Bool -- Node reqd? + -> Code + +granYield regs node_reqd + | opt_GranMacros && node_reqd = yield liveness + | otherwise = nopC + where + liveness = mkRegLiveness regs 0 0 + +yield liveness = panic "granYield" + -- Was : absC (CMacroStmt GRAN_YIELD + -- [mkIntCLit (I# (word2Int# liveness_mask))]) + + diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs new file mode 100644 index 0000000000..bc7c9140ed --- /dev/null +++ b/compiler/codeGen/CgPrimOp.hs @@ -0,0 +1,584 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for PrimOps. +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgPrimOp ( + cgPrimOp + ) where + +#include "HsVersions.h" + +import ForeignCall ( CCallConv(CCallConv) ) +import StgSyn ( StgLiveVars, StgArg ) +import CgForeignCall ( emitForeignCall' ) +import CgBindery ( getVolatileRegs, getArgAmodes ) +import CgMonad +import CgInfoTbls ( getConstrTag ) +import CgUtils ( cmmOffsetW, cmmOffsetB, cmmLoadIndexW ) +import ForeignCall +import Cmm +import CLabel ( mkMAP_FROZEN_infoLabel, mkMAP_DIRTY_infoLabel, + mkDirty_MUT_VAR_Label, mkRtsCodeLabel ) +import CmmUtils +import MachOp +import SMRep +import PrimOp ( PrimOp(..) ) +import SMRep ( tablesNextToCode ) +import Constants ( wORD_SIZE, wORD_SIZE_IN_BITS ) +import StaticFlags ( opt_Parallel ) +import Outputable + +-- --------------------------------------------------------------------------- +-- Code generation for PrimOps + +cgPrimOp :: [CmmReg] -- where to put the results + -> PrimOp -- the op + -> [StgArg] -- arguments + -> StgLiveVars -- live vars, in case we need to save them + -> Code + +cgPrimOp results op args live + = do arg_exprs <- getArgAmodes args + let non_void_args = [ e | (r,e) <- arg_exprs, nonVoidArg r ] + emitPrimOp results op non_void_args live + + +emitPrimOp :: [CmmReg] -- where to put the results + -> PrimOp -- the op + -> [CmmExpr] -- arguments + -> StgLiveVars -- live vars, in case we need to save them + -> Code + +-- First we handle various awkward cases specially. The remaining +-- easy cases are then handled by translateOp, defined below. + +emitPrimOp [res_r,res_c] IntAddCOp [aa,bb] live +{- + With some bit-twiddling, we can define int{Add,Sub}Czh portably in + C, and without needing any comparisons. This may not be the + fastest way to do it - if you have better code, please send it! --SDM + + Return : r = a + b, c = 0 if no overflow, 1 on overflow. + + We currently don't make use of the r value if c is != 0 (i.e. + overflow), we just convert to big integers and try again. This + could be improved by making r and c the correct values for + plugging into a new J#. + + { r = ((I_)(a)) + ((I_)(b)); \ + c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \ + >> (BITS_IN (I_) - 1); \ + } + Wading through the mass of bracketry, it seems to reduce to: + c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1) + +-} + = stmtsC [ + CmmAssign res_r (CmmMachOp mo_wordAdd [aa,bb]), + CmmAssign res_c $ + CmmMachOp mo_wordUShr [ + CmmMachOp mo_wordAnd [ + CmmMachOp mo_wordNot [CmmMachOp mo_wordXor [aa,bb]], + CmmMachOp mo_wordXor [aa, CmmReg res_r] + ], + CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1)) + ] + ] + + +emitPrimOp [res_r,res_c] IntSubCOp [aa,bb] live +{- Similarly: + #define subIntCzh(r,c,a,b) \ + { r = ((I_)(a)) - ((I_)(b)); \ + c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \ + >> (BITS_IN (I_) - 1); \ + } + + c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1) +-} + = stmtsC [ + CmmAssign res_r (CmmMachOp mo_wordSub [aa,bb]), + CmmAssign res_c $ + CmmMachOp mo_wordUShr [ + CmmMachOp mo_wordAnd [ + CmmMachOp mo_wordXor [aa,bb], + CmmMachOp mo_wordXor [aa, CmmReg res_r] + ], + CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1)) + ] + ] + + +emitPrimOp [res] ParOp [arg] live + = do + -- for now, just implement this in a C function + -- later, we might want to inline it. + vols <- getVolatileRegs live + emitForeignCall' PlayRisky + [(res,NoHint)] + (CmmForeignCall newspark CCallConv) + [(CmmReg (CmmGlobal BaseReg), PtrHint), (arg,PtrHint)] + (Just vols) + where + newspark = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("newSpark"))) + +emitPrimOp [res] ReadMutVarOp [mutv] live + = stmtC (CmmAssign res (cmmLoadIndexW mutv fixedHdrSize)) + +emitPrimOp [] WriteMutVarOp [mutv,var] live + = do + stmtC (CmmStore (cmmOffsetW mutv fixedHdrSize) var) + vols <- getVolatileRegs live + emitForeignCall' PlayRisky + [{-no results-}] + (CmmForeignCall (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) + CCallConv) + [(CmmReg (CmmGlobal BaseReg), PtrHint), (mutv,PtrHint)] + (Just vols) + +-- #define sizzeofByteArrayzh(r,a) \ +-- r = (((StgArrWords *)(a))->words * sizeof(W_)) +emitPrimOp [res] SizeofByteArrayOp [arg] live + = stmtC $ + CmmAssign res (CmmMachOp mo_wordMul [ + cmmLoadIndexW arg fixedHdrSize, + CmmLit (mkIntCLit wORD_SIZE) + ]) + +-- #define sizzeofMutableByteArrayzh(r,a) \ +-- r = (((StgArrWords *)(a))->words * sizeof(W_)) +emitPrimOp [res] SizeofMutableByteArrayOp [arg] live + = emitPrimOp [res] SizeofByteArrayOp [arg] live + + +-- #define touchzh(o) /* nothing */ +emitPrimOp [] TouchOp [arg] live + = nopC + +-- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a) +emitPrimOp [res] ByteArrayContents_Char [arg] live + = stmtC (CmmAssign res (cmmOffsetB arg arrWordsHdrSize)) + +-- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn) +emitPrimOp [res] StableNameToIntOp [arg] live + = stmtC (CmmAssign res (cmmLoadIndexW arg fixedHdrSize)) + +-- #define eqStableNamezh(r,sn1,sn2) \ +-- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn)) +emitPrimOp [res] EqStableNameOp [arg1,arg2] live + = stmtC (CmmAssign res (CmmMachOp mo_wordEq [ + cmmLoadIndexW arg1 fixedHdrSize, + cmmLoadIndexW arg2 fixedHdrSize + ])) + + +emitPrimOp [res] ReallyUnsafePtrEqualityOp [arg1,arg2] live + = stmtC (CmmAssign res (CmmMachOp mo_wordEq [arg1,arg2])) + +-- #define addrToHValuezh(r,a) r=(P_)a +emitPrimOp [res] AddrToHValueOp [arg] live + = stmtC (CmmAssign res arg) + +-- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info)) +emitPrimOp [res] DataToTagOp [arg] live + = stmtC (CmmAssign res (getConstrTag arg)) + +{- Freezing arrays-of-ptrs requires changing an info table, for the + benefit of the generational collector. It needs to scavenge mutable + objects, even if they are in old space. When they become immutable, + they can be removed from this scavenge list. -} + +-- #define unsafeFreezzeArrayzh(r,a) +-- { +-- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info); +-- r = a; +-- } +emitPrimOp [res] UnsafeFreezeArrayOp [arg] live + = stmtsC [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)), + CmmAssign res arg ] + +-- #define unsafeFreezzeByteArrayzh(r,a) r=(a) +emitPrimOp [res] UnsafeFreezeByteArrayOp [arg] live + = stmtC (CmmAssign res arg) + +-- Reading/writing pointer arrays + +emitPrimOp [r] ReadArrayOp [obj,ix] live = doReadPtrArrayOp r obj ix +emitPrimOp [r] IndexArrayOp [obj,ix] live = doReadPtrArrayOp r obj ix +emitPrimOp [] WriteArrayOp [obj,ix,v] live = doWritePtrArrayOp obj ix v + +-- IndexXXXoffAddr + +emitPrimOp res IndexOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res IndexOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res IndexOffAddrOp_Int args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res IndexOffAddrOp_Word args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res IndexOffAddrOp_Addr args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res IndexOffAddrOp_Float args live = doIndexOffAddrOp Nothing F32 res args +emitPrimOp res IndexOffAddrOp_Double args live = doIndexOffAddrOp Nothing F64 res args +emitPrimOp res IndexOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res IndexOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8 res args +emitPrimOp res IndexOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args +emitPrimOp res IndexOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args +emitPrimOp res IndexOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing I64 res args +emitPrimOp res IndexOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res IndexOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args +emitPrimOp res IndexOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res IndexOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing I64 res args + +-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr. + +emitPrimOp res ReadOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res ReadOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res ReadOffAddrOp_Int args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res ReadOffAddrOp_Word args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res ReadOffAddrOp_Addr args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res ReadOffAddrOp_Float args live = doIndexOffAddrOp Nothing F32 res args +emitPrimOp res ReadOffAddrOp_Double args live = doIndexOffAddrOp Nothing F64 res args +emitPrimOp res ReadOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res ReadOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8 res args +emitPrimOp res ReadOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args +emitPrimOp res ReadOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args +emitPrimOp res ReadOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing I64 res args +emitPrimOp res ReadOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res ReadOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args +emitPrimOp res ReadOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res ReadOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing I64 res args + +-- IndexXXXArray + +emitPrimOp res IndexByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res IndexByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res IndexByteArrayOp_Int args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res IndexByteArrayOp_Word args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res IndexByteArrayOp_Addr args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res IndexByteArrayOp_Float args live = doIndexByteArrayOp Nothing F32 res args +emitPrimOp res IndexByteArrayOp_Double args live = doIndexByteArrayOp Nothing F64 res args +emitPrimOp res IndexByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res IndexByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8 res args +emitPrimOp res IndexByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16 res args +emitPrimOp res IndexByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32 res args +emitPrimOp res IndexByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing I64 res args +emitPrimOp res IndexByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res IndexByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16 res args +emitPrimOp res IndexByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res IndexByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing I64 res args + +-- ReadXXXArray, identical to IndexXXXArray. + +emitPrimOp res ReadByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res ReadByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res ReadByteArrayOp_Int args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res ReadByteArrayOp_Word args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res ReadByteArrayOp_Addr args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res ReadByteArrayOp_Float args live = doIndexByteArrayOp Nothing F32 res args +emitPrimOp res ReadByteArrayOp_Double args live = doIndexByteArrayOp Nothing F64 res args +emitPrimOp res ReadByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res ReadByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8 res args +emitPrimOp res ReadByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16 res args +emitPrimOp res ReadByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32 res args +emitPrimOp res ReadByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing I64 res args +emitPrimOp res ReadByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res ReadByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16 res args +emitPrimOp res ReadByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res ReadByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing I64 res args + +-- WriteXXXoffAddr + +emitPrimOp res WriteOffAddrOp_Char args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteOffAddrOp_WideChar args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteOffAddrOp_Int args live = doWriteOffAddrOp Nothing wordRep res args +emitPrimOp res WriteOffAddrOp_Word args live = doWriteOffAddrOp Nothing wordRep res args +emitPrimOp res WriteOffAddrOp_Addr args live = doWriteOffAddrOp Nothing wordRep res args +emitPrimOp res WriteOffAddrOp_Float args live = doWriteOffAddrOp Nothing F32 res args +emitPrimOp res WriteOffAddrOp_Double args live = doWriteOffAddrOp Nothing F64 res args +emitPrimOp res WriteOffAddrOp_StablePtr args live = doWriteOffAddrOp Nothing wordRep res args +emitPrimOp res WriteOffAddrOp_Int8 args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteOffAddrOp_Int16 args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args +emitPrimOp res WriteOffAddrOp_Int32 args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteOffAddrOp_Int64 args live = doWriteOffAddrOp Nothing I64 res args +emitPrimOp res WriteOffAddrOp_Word8 args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteOffAddrOp_Word16 args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args +emitPrimOp res WriteOffAddrOp_Word32 args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteOffAddrOp_Word64 args live = doWriteOffAddrOp Nothing I64 res args + +-- WriteXXXArray + +emitPrimOp res WriteByteArrayOp_Char args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteByteArrayOp_WideChar args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteByteArrayOp_Int args live = doWriteByteArrayOp Nothing wordRep res args +emitPrimOp res WriteByteArrayOp_Word args live = doWriteByteArrayOp Nothing wordRep res args +emitPrimOp res WriteByteArrayOp_Addr args live = doWriteByteArrayOp Nothing wordRep res args +emitPrimOp res WriteByteArrayOp_Float args live = doWriteByteArrayOp Nothing F32 res args +emitPrimOp res WriteByteArrayOp_Double args live = doWriteByteArrayOp Nothing F64 res args +emitPrimOp res WriteByteArrayOp_StablePtr args live = doWriteByteArrayOp Nothing wordRep res args +emitPrimOp res WriteByteArrayOp_Int8 args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteByteArrayOp_Int16 args live = doWriteByteArrayOp (Just mo_WordTo16) I16 res args +emitPrimOp res WriteByteArrayOp_Int32 args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteByteArrayOp_Int64 args live = doWriteByteArrayOp Nothing I64 res args +emitPrimOp res WriteByteArrayOp_Word8 args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteByteArrayOp_Word16 args live = doWriteByteArrayOp (Just mo_WordTo16) I16 res args +emitPrimOp res WriteByteArrayOp_Word32 args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteByteArrayOp_Word64 args live = doWriteByteArrayOp Nothing I64 res args + + +-- The rest just translate straightforwardly +emitPrimOp [res] op [arg] live + | nopOp op + = stmtC (CmmAssign res arg) + + | Just (mop,rep) <- narrowOp op + = stmtC (CmmAssign res (CmmMachOp (mop rep wordRep) [ + CmmMachOp (mop wordRep rep) [arg]])) + +emitPrimOp [res] op args live + | Just prim <- callishOp op + = do vols <- getVolatileRegs live + emitForeignCall' PlayRisky + [(res,NoHint)] + (CmmPrim prim) + [(a,NoHint) | a<-args] -- ToDo: hints? + (Just vols) + + | Just mop <- translateOp op + = let stmt = CmmAssign res (CmmMachOp mop args) in + stmtC stmt + +emitPrimOp _ op _ _ + = pprPanic "emitPrimOp: can't translate PrimOp" (ppr op) + + +-- These PrimOps are NOPs in Cmm + +nopOp Int2WordOp = True +nopOp Word2IntOp = True +nopOp Int2AddrOp = True +nopOp Addr2IntOp = True +nopOp ChrOp = True -- Int# and Char# are rep'd the same +nopOp OrdOp = True +nopOp _ = False + +-- These PrimOps turn into double casts + +narrowOp Narrow8IntOp = Just (MO_S_Conv, I8) +narrowOp Narrow16IntOp = Just (MO_S_Conv, I16) +narrowOp Narrow32IntOp = Just (MO_S_Conv, I32) +narrowOp Narrow8WordOp = Just (MO_U_Conv, I8) +narrowOp Narrow16WordOp = Just (MO_U_Conv, I16) +narrowOp Narrow32WordOp = Just (MO_U_Conv, I32) +narrowOp _ = Nothing + +-- Native word signless ops + +translateOp IntAddOp = Just mo_wordAdd +translateOp IntSubOp = Just mo_wordSub +translateOp WordAddOp = Just mo_wordAdd +translateOp WordSubOp = Just mo_wordSub +translateOp AddrAddOp = Just mo_wordAdd +translateOp AddrSubOp = Just mo_wordSub + +translateOp IntEqOp = Just mo_wordEq +translateOp IntNeOp = Just mo_wordNe +translateOp WordEqOp = Just mo_wordEq +translateOp WordNeOp = Just mo_wordNe +translateOp AddrEqOp = Just mo_wordEq +translateOp AddrNeOp = Just mo_wordNe + +translateOp AndOp = Just mo_wordAnd +translateOp OrOp = Just mo_wordOr +translateOp XorOp = Just mo_wordXor +translateOp NotOp = Just mo_wordNot +translateOp SllOp = Just mo_wordShl +translateOp SrlOp = Just mo_wordUShr + +translateOp AddrRemOp = Just mo_wordURem + +-- Native word signed ops + +translateOp IntMulOp = Just mo_wordMul +translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordRep) +translateOp IntQuotOp = Just mo_wordSQuot +translateOp IntRemOp = Just mo_wordSRem +translateOp IntNegOp = Just mo_wordSNeg + + +translateOp IntGeOp = Just mo_wordSGe +translateOp IntLeOp = Just mo_wordSLe +translateOp IntGtOp = Just mo_wordSGt +translateOp IntLtOp = Just mo_wordSLt + +translateOp ISllOp = Just mo_wordShl +translateOp ISraOp = Just mo_wordSShr +translateOp ISrlOp = Just mo_wordUShr + +-- Native word unsigned ops + +translateOp WordGeOp = Just mo_wordUGe +translateOp WordLeOp = Just mo_wordULe +translateOp WordGtOp = Just mo_wordUGt +translateOp WordLtOp = Just mo_wordULt + +translateOp WordMulOp = Just mo_wordMul +translateOp WordQuotOp = Just mo_wordUQuot +translateOp WordRemOp = Just mo_wordURem + +translateOp AddrGeOp = Just mo_wordUGe +translateOp AddrLeOp = Just mo_wordULe +translateOp AddrGtOp = Just mo_wordUGt +translateOp AddrLtOp = Just mo_wordULt + +-- Char# ops + +translateOp CharEqOp = Just (MO_Eq wordRep) +translateOp CharNeOp = Just (MO_Ne wordRep) +translateOp CharGeOp = Just (MO_U_Ge wordRep) +translateOp CharLeOp = Just (MO_U_Le wordRep) +translateOp CharGtOp = Just (MO_U_Gt wordRep) +translateOp CharLtOp = Just (MO_U_Lt wordRep) + +-- Double ops + +translateOp DoubleEqOp = Just (MO_Eq F64) +translateOp DoubleNeOp = Just (MO_Ne F64) +translateOp DoubleGeOp = Just (MO_S_Ge F64) +translateOp DoubleLeOp = Just (MO_S_Le F64) +translateOp DoubleGtOp = Just (MO_S_Gt F64) +translateOp DoubleLtOp = Just (MO_S_Lt F64) + +translateOp DoubleAddOp = Just (MO_Add F64) +translateOp DoubleSubOp = Just (MO_Sub F64) +translateOp DoubleMulOp = Just (MO_Mul F64) +translateOp DoubleDivOp = Just (MO_S_Quot F64) +translateOp DoubleNegOp = Just (MO_S_Neg F64) + +-- Float ops + +translateOp FloatEqOp = Just (MO_Eq F32) +translateOp FloatNeOp = Just (MO_Ne F32) +translateOp FloatGeOp = Just (MO_S_Ge F32) +translateOp FloatLeOp = Just (MO_S_Le F32) +translateOp FloatGtOp = Just (MO_S_Gt F32) +translateOp FloatLtOp = Just (MO_S_Lt F32) + +translateOp FloatAddOp = Just (MO_Add F32) +translateOp FloatSubOp = Just (MO_Sub F32) +translateOp FloatMulOp = Just (MO_Mul F32) +translateOp FloatDivOp = Just (MO_S_Quot F32) +translateOp FloatNegOp = Just (MO_S_Neg F32) + +-- Conversions + +translateOp Int2DoubleOp = Just (MO_S_Conv wordRep F64) +translateOp Double2IntOp = Just (MO_S_Conv F64 wordRep) + +translateOp Int2FloatOp = Just (MO_S_Conv wordRep F32) +translateOp Float2IntOp = Just (MO_S_Conv F32 wordRep) + +translateOp Float2DoubleOp = Just (MO_S_Conv F32 F64) +translateOp Double2FloatOp = Just (MO_S_Conv F64 F32) + +-- Word comparisons masquerading as more exotic things. + +translateOp SameMutVarOp = Just mo_wordEq +translateOp SameMVarOp = Just mo_wordEq +translateOp SameMutableArrayOp = Just mo_wordEq +translateOp SameMutableByteArrayOp = Just mo_wordEq +translateOp SameTVarOp = Just mo_wordEq +translateOp EqStablePtrOp = Just mo_wordEq + +translateOp _ = Nothing + +-- These primops are implemented by CallishMachOps, because they sometimes +-- turn into foreign calls depending on the backend. + +callishOp DoublePowerOp = Just MO_F64_Pwr +callishOp DoubleSinOp = Just MO_F64_Sin +callishOp DoubleCosOp = Just MO_F64_Cos +callishOp DoubleTanOp = Just MO_F64_Tan +callishOp DoubleSinhOp = Just MO_F64_Sinh +callishOp DoubleCoshOp = Just MO_F64_Cosh +callishOp DoubleTanhOp = Just MO_F64_Tanh +callishOp DoubleAsinOp = Just MO_F64_Asin +callishOp DoubleAcosOp = Just MO_F64_Acos +callishOp DoubleAtanOp = Just MO_F64_Atan +callishOp DoubleLogOp = Just MO_F64_Log +callishOp DoubleExpOp = Just MO_F64_Exp +callishOp DoubleSqrtOp = Just MO_F64_Sqrt + +callishOp FloatPowerOp = Just MO_F32_Pwr +callishOp FloatSinOp = Just MO_F32_Sin +callishOp FloatCosOp = Just MO_F32_Cos +callishOp FloatTanOp = Just MO_F32_Tan +callishOp FloatSinhOp = Just MO_F32_Sinh +callishOp FloatCoshOp = Just MO_F32_Cosh +callishOp FloatTanhOp = Just MO_F32_Tanh +callishOp FloatAsinOp = Just MO_F32_Asin +callishOp FloatAcosOp = Just MO_F32_Acos +callishOp FloatAtanOp = Just MO_F32_Atan +callishOp FloatLogOp = Just MO_F32_Log +callishOp FloatExpOp = Just MO_F32_Exp +callishOp FloatSqrtOp = Just MO_F32_Sqrt + +callishOp _ = Nothing + +------------------------------------------------------------------------------ +-- Helpers for translating various minor variants of array indexing. + +doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx] + = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx +doIndexOffAddrOp _ _ _ _ + = panic "CgPrimOp: doIndexOffAddrOp" + +doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx] + = mkBasicIndexedRead arrWordsHdrSize maybe_post_read_cast rep res addr idx +doIndexByteArrayOp _ _ _ _ + = panic "CgPrimOp: doIndexByteArrayOp" + +doReadPtrArrayOp res addr idx + = mkBasicIndexedRead arrPtrsHdrSize Nothing wordRep res addr idx + + +doWriteOffAddrOp maybe_pre_write_cast rep [] [addr,idx,val] + = mkBasicIndexedWrite 0 maybe_pre_write_cast rep addr idx val +doWriteOffAddrOp _ _ _ _ + = panic "CgPrimOp: doWriteOffAddrOp" + +doWriteByteArrayOp maybe_pre_write_cast rep [] [addr,idx,val] + = mkBasicIndexedWrite arrWordsHdrSize maybe_pre_write_cast rep addr idx val +doWriteByteArrayOp _ _ _ _ + = panic "CgPrimOp: doWriteByteArrayOp" + +doWritePtrArrayOp addr idx val + = do stmtC (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel))) + mkBasicIndexedWrite arrPtrsHdrSize Nothing wordRep addr idx val + + +mkBasicIndexedRead off Nothing read_rep res base idx + = stmtC (CmmAssign res (cmmLoadIndexOffExpr off read_rep base idx)) +mkBasicIndexedRead off (Just cast) read_rep res base idx + = stmtC (CmmAssign res (CmmMachOp cast [ + cmmLoadIndexOffExpr off read_rep base idx])) + +mkBasicIndexedWrite off Nothing write_rep base idx val + = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) val) +mkBasicIndexedWrite off (Just cast) write_rep base idx val + = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) (CmmMachOp cast [val])) + +-- ---------------------------------------------------------------------------- +-- Misc utils + +cmmIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr +cmmIndexOffExpr off rep base idx + = cmmIndexExpr rep (cmmOffsetB base off) idx + +cmmLoadIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr +cmmLoadIndexOffExpr off rep base idx + = CmmLoad (cmmIndexOffExpr off rep base idx) rep + +setInfo :: CmmExpr -> CmmExpr -> CmmStmt +setInfo closure_ptr info_ptr = CmmStore closure_ptr info_ptr + diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs new file mode 100644 index 0000000000..1488e34956 --- /dev/null +++ b/compiler/codeGen/CgProf.hs @@ -0,0 +1,478 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for profiling +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgProf ( + mkCCostCentre, mkCCostCentreStack, + + -- Cost-centre Profiling + dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, + enterCostCentre, enterCostCentrePAP, enterCostCentreThunk, + chooseDynCostCentres, + costCentreFrom, + curCCS, curCCSAddr, + emitCostCentreDecl, emitCostCentreStackDecl, + emitRegisterCC, emitRegisterCCS, + emitSetCCC, emitCCS, + + -- Lag/drag/void stuff + ldvEnter, ldvRecordCreate + ) where + +#include "HsVersions.h" +#include "MachDeps.h" + -- For WORD_SIZE_IN_BITS only. +#include "../includes/Constants.h" + -- For LDV_CREATE_MASK, LDV_STATE_USE + -- which are StgWords +#include "../includes/DerivedConstants.h" + -- For REP_xxx constants, which are MachReps + +import ClosureInfo ( ClosureInfo, closureSize, + closureName, isToplevClosure, closureReEntrant, ) +import CgUtils +import CgMonad +import SMRep ( StgWord, profHdrSize ) + +import Cmm +import MachOp +import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr ) +import CLabel ( mkCCLabel, mkCCSLabel, mkRtsDataLabel ) + +import Module ( moduleString ) +import Id ( Id ) +import CostCentre +import StgSyn ( GenStgExpr(..), StgExpr ) +import StaticFlags ( opt_SccProfilingOn ) +import FastString ( FastString, mkFastString, LitString ) +import Constants -- Lots of field offsets +import Outputable + +import Maybe +import Char ( ord ) +import Monad ( when ) + +----------------------------------------------------------------------------- +-- +-- Cost-centre-stack Profiling +-- +----------------------------------------------------------------------------- + +-- Expression representing the current cost centre stack +curCCS :: CmmExpr +curCCS = CmmLoad curCCSAddr wordRep + +-- Address of current CCS variable, for storing into +curCCSAddr :: CmmExpr +curCCSAddr = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCCS"))) + +mkCCostCentre :: CostCentre -> CmmLit +mkCCostCentre cc = CmmLabel (mkCCLabel cc) + +mkCCostCentreStack :: CostCentreStack -> CmmLit +mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs) + +costCentreFrom :: CmmExpr -- A closure pointer + -> CmmExpr -- The cost centre from that closure +costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) wordRep + +staticProfHdr :: CostCentreStack -> [CmmLit] +-- The profiling header words in a static closure +-- Was SET_STATIC_PROF_HDR +staticProfHdr ccs = ifProfilingL [mkCCostCentreStack ccs, + staticLdvInit] + +dynProfHdr :: CmmExpr -> [CmmExpr] +-- Profiling header words in a dynamic closure +dynProfHdr ccs = ifProfilingL [ccs, dynLdvInit] + +initUpdFrameProf :: CmmExpr -> Code +-- Initialise the profiling field of an update frame +initUpdFrameProf frame_amode + = ifProfiling $ -- frame->header.prof.ccs = CCCS + stmtC (CmmStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS) + -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) + -- is unnecessary because it is not used anyhow. + +-- ----------------------------------------------------------------------------- +-- Recording allocation in a cost centre + +-- | Record the allocation of a closure. The CmmExpr is the cost +-- centre stack to which to attribute the allocation. +profDynAlloc :: ClosureInfo -> CmmExpr -> Code +profDynAlloc cl_info ccs + = ifProfiling $ + profAlloc (CmmLit (mkIntCLit (closureSize cl_info))) ccs + +-- | Record the allocation of a closure (size is given by a CmmExpr) +-- The size must be in words, because the allocation counter in a CCS counts +-- in words. +profAlloc :: CmmExpr -> CmmExpr -> Code +profAlloc words ccs + = ifProfiling $ + stmtC (addToMemE alloc_rep + (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc) + (CmmMachOp (MO_U_Conv wordRep alloc_rep) $ + [CmmMachOp mo_wordSub [words, + CmmLit (mkIntCLit profHdrSize)]])) + -- subtract the "profiling overhead", which is the + -- profiling header in a closure. + where + alloc_rep = REP_CostCentreStack_mem_alloc + +-- ---------------------------------------------------------------------- +-- Setting the cost centre in a new closure + +chooseDynCostCentres :: CostCentreStack + -> [Id] -- Args + -> StgExpr -- Body + -> FCode (CmmExpr, CmmExpr) +-- Called when alllcating a closure +-- Tells which cost centre to put in the object, and which +-- to blame the cost of allocation on +chooseDynCostCentres ccs args body = do + -- Cost-centre we record in the object + use_ccs <- emitCCS ccs + + -- Cost-centre on whom we blame the allocation + let blame_ccs + | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS) + | otherwise = use_ccs + + return (use_ccs, blame_ccs) + + +-- Some CostCentreStacks are a sequence of pushes on top of CCCS. +-- These pushes must be performed before we can refer to the stack in +-- an expression. +emitCCS :: CostCentreStack -> FCode CmmExpr +emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's) + where + (cc's, ccs') = decomposeCCS ccs + + push_em ccs [] = return ccs + push_em ccs (cc:rest) = do + tmp <- newTemp wordRep + pushCostCentre tmp ccs cc + push_em (CmmReg tmp) rest + +ccsExpr :: CostCentreStack -> CmmExpr +ccsExpr ccs + | isCurrentCCS ccs = curCCS + | otherwise = CmmLit (mkCCostCentreStack ccs) + + +isBox :: StgExpr -> Bool +-- If it's an utterly trivial RHS, then it must be +-- one introduced by boxHigherOrderArgs for profiling, +-- so we charge it to "OVERHEAD". +-- This looks like a GROSS HACK to me --SDM +isBox (StgApp fun []) = True +isBox other = False + + +-- ----------------------------------------------------------------------- +-- Setting the current cost centre on entry to a closure + +-- For lexically scoped profiling we have to load the cost centre from +-- the closure entered, if the costs are not supposed to be inherited. +-- This is done immediately on entering the fast entry point. + +-- Load current cost centre from closure, if not inherited. +-- Node is guaranteed to point to it, if profiling and not inherited. + +enterCostCentre + :: ClosureInfo + -> CostCentreStack + -> StgExpr -- The RHS of the closure + -> Code + +-- We used to have a special case for bindings of form +-- f = g True +-- where g has arity 2. The RHS is a thunk, but we don't +-- need to update it; and we want to subsume costs. +-- We don't have these sort of PAPs any more, so the special +-- case has gone away. + +enterCostCentre closure_info ccs body + = ifProfiling $ + ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs) + enter_cost_centre closure_info ccs body + +enter_cost_centre closure_info ccs body + | isSubsumedCCS ccs + = ASSERT(isToplevClosure closure_info) + ASSERT(re_entrant) + enter_ccs_fsub + + | isDerivedFromCurrentCCS ccs + = do { + if re_entrant && not is_box + then + enter_ccs_fun node_ccs + else + stmtC (CmmStore curCCSAddr node_ccs) + + -- don't forget to bump the scc count. This closure might have been + -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal + -- pass has turned into simply let x = e in ...x... and attached + -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that + -- we don't lose the scc counter, bump it in the entry code for x. + -- ToDo: for a multi-push we should really bump the counter for + -- each of the intervening CCSs, not just the top one. + ; when (not (isCurrentCCS ccs)) $ + stmtC (bumpSccCount curCCS) + } + + | isCafCCS ccs + = ASSERT(isToplevClosure closure_info) + ASSERT(not re_entrant) + do { -- This is just a special case of the isDerivedFromCurrentCCS + -- case above. We could delete this, but it's a micro + -- optimisation and saves a bit of code. + stmtC (CmmStore curCCSAddr enc_ccs) + ; stmtC (bumpSccCount node_ccs) + } + + | otherwise + = panic "enterCostCentre" + where + enc_ccs = CmmLit (mkCCostCentreStack ccs) + re_entrant = closureReEntrant closure_info + node_ccs = costCentreFrom (CmmReg nodeReg) + is_box = isBox body + +-- set the current CCS when entering a PAP +enterCostCentrePAP :: CmmExpr -> Code +enterCostCentrePAP closure = + ifProfiling $ do + enter_ccs_fun (costCentreFrom closure) + enteringPAP 1 + +enterCostCentreThunk :: CmmExpr -> Code +enterCostCentreThunk closure = + ifProfiling $ do + stmtC $ CmmStore curCCSAddr (costCentreFrom closure) + +enter_ccs_fun stack = emitRtsCall SLIT("EnterFunCCS") [(stack,PtrHint)] + -- ToDo: vols + +enter_ccs_fsub = enteringPAP 0 + +-- When entering a PAP, EnterFunCCS is called by both the PAP entry +-- code and the function entry code; we don't want the function's +-- entry code to also update CCCS in the event that it was called via +-- a PAP, so we set the flag entering_PAP to indicate that we are +-- entering via a PAP. +enteringPAP :: Integer -> Code +enteringPAP n + = stmtC (CmmStore (CmmLit (CmmLabel (mkRtsDataLabel SLIT("entering_PAP")))) + (CmmLit (CmmInt n cIntRep))) + +ifProfiling :: Code -> Code +ifProfiling code + | opt_SccProfilingOn = code + | otherwise = nopC + +ifProfilingL :: [a] -> [a] +ifProfilingL xs + | opt_SccProfilingOn = xs + | otherwise = [] + + +-- --------------------------------------------------------------------------- +-- Initialising Cost Centres & CCSs + +emitCostCentreDecl + :: CostCentre + -> Code +emitCostCentreDecl cc = do + { label <- mkStringCLit (costCentreUserName cc) + ; modl <- mkStringCLit (moduleString (cc_mod cc)) + ; let + lits = [ zero, -- StgInt ccID, + label, -- char *label, + modl, -- char *module, + zero, -- StgWord time_ticks + zero64, -- StgWord64 mem_alloc + subsumed, -- StgInt is_caf + zero -- struct _CostCentre *link + ] + ; emitDataLits (mkCCLabel cc) lits + } + where + subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF + | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring + + +emitCostCentreStackDecl + :: CostCentreStack + -> Code +emitCostCentreStackDecl ccs + | Just cc <- maybeSingletonCCS ccs = do + { let + -- Note: to avoid making any assumptions about how the + -- C compiler (that compiles the RTS, in particular) does + -- layouts of structs containing long-longs, simply + -- pad out the struct with zero words until we hit the + -- size of the overall struct (which we get via DerivedConstants.h) + -- + lits = zero : mkCCostCentre cc : replicate (sizeof_ccs_words - 2) zero + ; emitDataLits (mkCCSLabel ccs) lits + } + | otherwise = pprPanic "emitCostCentreStackDecl" (ppr ccs) + +zero = mkIntCLit 0 +zero64 = CmmInt 0 I64 + +sizeof_ccs_words :: Int +sizeof_ccs_words + -- round up to the next word. + | ms == 0 = ws + | otherwise = ws + 1 + where + (ws,ms) = SIZEOF_CostCentreStack `divMod` wORD_SIZE + +-- --------------------------------------------------------------------------- +-- Registering CCs and CCSs + +-- (cc)->link = CC_LIST; +-- CC_LIST = (cc); +-- (cc)->ccID = CC_ID++; + +emitRegisterCC :: CostCentre -> Code +emitRegisterCC cc = do + { tmp <- newTemp cIntRep + ; stmtsC [ + CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_link) + (CmmLoad cC_LIST wordRep), + CmmStore cC_LIST cc_lit, + CmmAssign tmp (CmmLoad cC_ID cIntRep), + CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_ccID) (CmmReg tmp), + CmmStore cC_ID (cmmRegOffB tmp 1) + ] + } + where + cc_lit = CmmLit (CmmLabel (mkCCLabel cc)) + +-- (ccs)->prevStack = CCS_LIST; +-- CCS_LIST = (ccs); +-- (ccs)->ccsID = CCS_ID++; + +emitRegisterCCS :: CostCentreStack -> Code +emitRegisterCCS ccs = do + { tmp <- newTemp cIntRep + ; stmtsC [ + CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_prevStack) + (CmmLoad cCS_LIST wordRep), + CmmStore cCS_LIST ccs_lit, + CmmAssign tmp (CmmLoad cCS_ID cIntRep), + CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_ccsID) (CmmReg tmp), + CmmStore cCS_ID (cmmRegOffB tmp 1) + ] + } + where + ccs_lit = CmmLit (CmmLabel (mkCCSLabel ccs)) + + +cC_LIST = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CC_LIST"))) +cC_ID = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CC_ID"))) + +cCS_LIST = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCS_LIST"))) +cCS_ID = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCS_ID"))) + +-- --------------------------------------------------------------------------- +-- Set the current cost centre stack + +emitSetCCC :: CostCentre -> Code +emitSetCCC cc + | not opt_SccProfilingOn = nopC + | otherwise = do + tmp <- newTemp wordRep + ASSERT( sccAbleCostCentre cc ) + pushCostCentre tmp curCCS cc + stmtC (CmmStore curCCSAddr (CmmReg tmp)) + when (isSccCountCostCentre cc) $ + stmtC (bumpSccCount curCCS) + +pushCostCentre :: CmmReg -> CmmExpr -> CostCentre -> Code +pushCostCentre result ccs cc + = emitRtsCallWithResult result PtrHint + SLIT("PushCostCentre") [(ccs,PtrHint), + (CmmLit (mkCCostCentre cc), PtrHint)] + +bumpSccCount :: CmmExpr -> CmmStmt +bumpSccCount ccs + = addToMem REP_CostCentreStack_scc_count + (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1 + +----------------------------------------------------------------------------- +-- +-- Lag/drag/void stuff +-- +----------------------------------------------------------------------------- + +-- +-- Initial value for the LDV field in a static closure +-- +staticLdvInit :: CmmLit +staticLdvInit = zeroCLit + +-- +-- Initial value of the LDV field in a dynamic closure +-- +dynLdvInit :: CmmExpr +dynLdvInit = -- (era << LDV_SHIFT) | LDV_STATE_CREATE + CmmMachOp mo_wordOr [ + CmmMachOp mo_wordShl [loadEra, CmmLit (mkIntCLit lDV_SHIFT) ], + CmmLit (mkWordCLit lDV_STATE_CREATE) + ] + +-- +-- Initialise the LDV word of a new closure +-- +ldvRecordCreate :: CmmExpr -> Code +ldvRecordCreate closure = stmtC $ CmmStore (ldvWord closure) dynLdvInit + +-- +-- Called when a closure is entered, marks the closure as having been "used". +-- The closure is not an 'inherently used' one. +-- The closure is not IND or IND_OLDGEN because neither is considered for LDV +-- profiling. +-- +ldvEnter :: CmmExpr -> Code +-- Argument is a closure pointer +ldvEnter cl_ptr + = ifProfiling $ + -- if (era > 0) { + -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | + -- era | LDV_STATE_USE } + emitIf (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) + (stmtC (CmmStore ldv_wd new_ldv_wd)) + where + ldv_wd = ldvWord cl_ptr + new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd wordRep) + (CmmLit (mkWordCLit lDV_CREATE_MASK))) + (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE))) + +loadEra :: CmmExpr +loadEra = CmmMachOp (MO_U_Conv cIntRep wordRep) + [CmmLoad (mkLblExpr (mkRtsDataLabel SLIT("era"))) cIntRep] + +ldvWord :: CmmExpr -> CmmExpr +-- Takes the address of a closure, and returns +-- the address of the LDV word in the closure +ldvWord closure_ptr = cmmOffsetB closure_ptr oFFSET_StgHeader_ldvw + +-- LDV constants, from ghc/includes/Constants.h +lDV_SHIFT = (LDV_SHIFT :: Int) +--lDV_STATE_MASK = (LDV_STATE_MASK :: StgWord) +lDV_CREATE_MASK = (LDV_CREATE_MASK :: StgWord) +--lDV_LAST_MASK = (LDV_LAST_MASK :: StgWord) +lDV_STATE_CREATE = (LDV_STATE_CREATE :: StgWord) +lDV_STATE_USE = (LDV_STATE_USE :: StgWord) + diff --git a/compiler/codeGen/CgStackery.lhs b/compiler/codeGen/CgStackery.lhs new file mode 100644 index 0000000000..7cb310d521 --- /dev/null +++ b/compiler/codeGen/CgStackery.lhs @@ -0,0 +1,339 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgStackery.lhs,v 1.27 2004/09/30 10:35:49 simonpj Exp $ +% +\section[CgStackery]{Stack management functions} + +Stack-twiddling operations, which are pretty low-down and grimy. +(This is the module that knows all about stack layouts, etc.) + +\begin{code} +module CgStackery ( + spRel, getVirtSp, getRealSp, setRealSp, + setRealAndVirtualSp, getSpRelOffset, + + allocPrimStack, allocStackTop, deAllocStackTop, + adjustStackHW, getFinalStackHW, + setStackFrame, getStackFrame, + mkVirtStkOffsets, mkStkAmodes, + freeStackSlots, + pushUpdateFrame, emitPushUpdateFrame, + ) where + +#include "HsVersions.h" + +import CgMonad +import CgUtils ( cmmOffsetB, cmmRegOffW ) +import CgProf ( initUpdFrameProf ) +import SMRep +import Cmm +import CmmUtils ( CmmStmts, mkLblExpr ) +import CLabel ( mkUpdInfoLabel ) +import Constants +import Util ( sortLe ) +import FastString ( LitString ) +import OrdList ( toOL ) +import Outputable +\end{code} + +%************************************************************************ +%* * +\subsection[CgUsages-stackery]{Monad things for fiddling with stack usage} +%* * +%************************************************************************ + +spRel is a little function that abstracts the stack direction. Note that most +of the code generator is dependent on the stack direction anyway, so +changing this on its own spells certain doom. ToDo: remove? + + THIS IS DIRECTION SENSITIVE! + +Stack grows down, positive virtual offsets correspond to negative +additions to the stack pointer. + +\begin{code} +spRel :: VirtualSpOffset -- virtual offset of Sp + -> VirtualSpOffset -- virtual offset of The Thing + -> WordOff -- integer offset +spRel sp off = sp - off +\end{code} + +@setRealAndVirtualSp@ sets into the environment the offsets of the +current position of the real and virtual stack pointers in the current +stack frame. The high-water mark is set too. It generates no code. +It is used to initialise things at the beginning of a closure body. + +\begin{code} +setRealAndVirtualSp :: VirtualSpOffset -- New real Sp + -> Code + +setRealAndVirtualSp new_sp + = do { stk_usg <- getStkUsage + ; setStkUsage (stk_usg {virtSp = new_sp, + realSp = new_sp, + hwSp = new_sp}) } + +getVirtSp :: FCode VirtualSpOffset +getVirtSp + = do { stk_usg <- getStkUsage + ; return (virtSp stk_usg) } + +getRealSp :: FCode VirtualSpOffset +getRealSp + = do { stk_usg <- getStkUsage + ; return (realSp stk_usg) } + +setRealSp :: VirtualSpOffset -> Code +setRealSp new_real_sp + = do { stk_usg <- getStkUsage + ; setStkUsage (stk_usg {realSp = new_real_sp}) } + +getSpRelOffset :: VirtualSpOffset -> FCode CmmExpr +getSpRelOffset virtual_offset + = do { real_sp <- getRealSp + ; return (cmmRegOffW spReg (spRel real_sp virtual_offset)) } +\end{code} + + +%************************************************************************ +%* * +\subsection[CgStackery-layout]{Laying out a stack frame} +%* * +%************************************************************************ + +'mkVirtStkOffsets' is given a list of arguments. The first argument +gets the /largest/ virtual stack offset (remember, virtual offsets +increase towards the top of stack). + +\begin{code} +mkVirtStkOffsets + :: VirtualSpOffset -- Offset of the last allocated thing + -> [(CgRep,a)] -- things to make offsets for + -> (VirtualSpOffset, -- OUTPUTS: Topmost allocated word + [(a, VirtualSpOffset)]) -- things with offsets (voids filtered out) + +mkVirtStkOffsets init_Sp_offset things + = loop init_Sp_offset [] (reverse things) + where + loop offset offs [] = (offset,offs) + loop offset offs ((VoidArg,t):things) = loop offset offs things + -- ignore Void arguments + loop offset offs ((rep,t):things) + = loop thing_slot ((t,thing_slot):offs) things + where + thing_slot = offset + cgRepSizeW rep + -- offset of thing is offset+size, because we're + -- growing the stack *downwards* as the offsets increase. + +-- | 'mkStkAmodes' is a higher-level version of +-- 'mkVirtStkOffsets'. It starts from the tail-call locations. +-- It returns a single list of addressing modes for the stack +-- locations, and therefore is in the monad. It /doesn't/ adjust the +-- high water mark. + +mkStkAmodes + :: VirtualSpOffset -- Tail call positions + -> [(CgRep,CmmExpr)] -- things to make offsets for + -> FCode (VirtualSpOffset, -- OUTPUTS: Topmost allocated word + CmmStmts) -- Assignments to appropriate stk slots + +mkStkAmodes tail_Sp things + = do { rSp <- getRealSp + ; let (last_Sp_offset, offsets) = mkVirtStkOffsets tail_Sp things + abs_cs = [ CmmStore (cmmRegOffW spReg (spRel rSp offset)) amode + | (amode, offset) <- offsets + ] + ; returnFC (last_Sp_offset, toOL abs_cs) } +\end{code} + +%************************************************************************ +%* * +\subsection[CgStackery-monadery]{Inside-monad functions for stack manipulation} +%* * +%************************************************************************ + +Allocate a virtual offset for something. + +\begin{code} +allocPrimStack :: CgRep -> FCode VirtualSpOffset +allocPrimStack rep + = do { stk_usg <- getStkUsage + ; let free_stk = freeStk stk_usg + ; case find_block free_stk of + Nothing -> do + { let push_virt_sp = virtSp stk_usg + size + ; setStkUsage (stk_usg { virtSp = push_virt_sp, + hwSp = hwSp stk_usg `max` push_virt_sp }) + -- Adjust high water mark + ; return push_virt_sp } + Just slot -> do + { setStkUsage (stk_usg { freeStk = delete_block free_stk slot }) + ; return slot } + } + where + size :: WordOff + size = cgRepSizeW rep + + -- Find_block looks for a contiguous chunk of free slots + -- returning the offset of its topmost word + find_block :: [VirtualSpOffset] -> Maybe VirtualSpOffset + find_block [] = Nothing + find_block (slot:slots) + | take size (slot:slots) == [slot..top_slot] + = Just top_slot + | otherwise + = find_block slots + where -- The stack grows downwards, with increasing virtual offsets. + -- Therefore, the address of a multi-word object is the *highest* + -- virtual offset it occupies (top_slot below). + top_slot = slot+size-1 + + delete_block free_stk slot = [ s | s <- free_stk, + (s<=slot-size) || (s>slot) ] + -- Retain slots which are not in the range + -- slot-size+1..slot +\end{code} + +Allocate a chunk ON TOP OF the stack. + +\begin{code} +allocStackTop :: WordOff -> FCode VirtualSpOffset +allocStackTop size + = do { stk_usg <- getStkUsage + ; let push_virt_sp = virtSp stk_usg + size + ; setStkUsage (stk_usg { virtSp = push_virt_sp, + hwSp = hwSp stk_usg `max` push_virt_sp }) + ; return push_virt_sp } +\end{code} + +Pop some words from the current top of stack. This is used for +de-allocating the return address in a case alternative. + +\begin{code} +deAllocStackTop :: WordOff -> FCode VirtualSpOffset +deAllocStackTop size + = do { stk_usg <- getStkUsage + ; let pop_virt_sp = virtSp stk_usg - size + ; setStkUsage (stk_usg { virtSp = pop_virt_sp }) + ; return pop_virt_sp } +\end{code} + +\begin{code} +adjustStackHW :: VirtualSpOffset -> Code +adjustStackHW offset + = do { stk_usg <- getStkUsage + ; setStkUsage (stk_usg { hwSp = hwSp stk_usg `max` offset }) } +\end{code} + +A knot-tying beast. + +\begin{code} +getFinalStackHW :: (VirtualSpOffset -> Code) -> Code +getFinalStackHW fcode + = do { fixC (\hw_sp -> do + { fcode hw_sp + ; stk_usg <- getStkUsage + ; return (hwSp stk_usg) }) + ; return () } +\end{code} + +\begin{code} +setStackFrame :: VirtualSpOffset -> Code +setStackFrame offset + = do { stk_usg <- getStkUsage + ; setStkUsage (stk_usg { frameSp = offset }) } + +getStackFrame :: FCode VirtualSpOffset +getStackFrame + = do { stk_usg <- getStkUsage + ; return (frameSp stk_usg) } +\end{code} + + +%******************************************************** +%* * +%* Setting up update frames * +%* * +%******************************************************** + +@pushUpdateFrame@ $updatee$ pushes a general update frame which +points to $updatee$ as the thing to be updated. It is only used +when a thunk has just been entered, so the (real) stack pointers +are guaranteed to be nicely aligned with the top of stack. +@pushUpdateFrame@ adjusts the virtual and tail stack pointers +to reflect the frame pushed. + +\begin{code} +pushUpdateFrame :: CmmExpr -> Code -> Code + +pushUpdateFrame updatee code + = do { +#ifdef DEBUG + EndOfBlockInfo _ sequel <- getEndOfBlockInfo ; + ASSERT(case sequel of { OnStack -> True; _ -> False}) +#endif + + allocStackTop (fixedHdrSize + + sIZEOF_StgUpdateFrame_NoHdr `quot` wORD_SIZE) + ; vsp <- getVirtSp + ; setStackFrame vsp + ; frame_addr <- getSpRelOffset vsp + -- The location of the lowest-address + -- word of the update frame itself + + ; setEndOfBlockInfo (EndOfBlockInfo vsp UpdateCode) $ + do { emitPushUpdateFrame frame_addr updatee + ; code } + } + +emitPushUpdateFrame :: CmmExpr -> CmmExpr -> Code +emitPushUpdateFrame frame_addr updatee = do + stmtsC [ -- Set the info word + CmmStore frame_addr (mkLblExpr mkUpdInfoLabel) + , -- And the updatee + CmmStore (cmmOffsetB frame_addr off_updatee) updatee ] + initUpdFrameProf frame_addr + +off_updatee :: ByteOff +off_updatee = fixedHdrSize*wORD_SIZE + oFFSET_StgUpdateFrame_updatee +\end{code} + + +%************************************************************************ +%* * +\subsection[CgStackery-free]{Free stack slots} +%* * +%************************************************************************ + +Explicitly free some stack space. + +\begin{code} +freeStackSlots :: [VirtualSpOffset] -> Code +freeStackSlots extra_free + = do { stk_usg <- getStkUsage + ; let all_free = addFreeSlots (freeStk stk_usg) (sortLe (<=) extra_free) + ; let (new_vsp, new_free) = trim (virtSp stk_usg) all_free + ; setStkUsage (stk_usg { virtSp = new_vsp, freeStk = new_free }) } + +addFreeSlots :: [VirtualSpOffset] -> [VirtualSpOffset] -> [VirtualSpOffset] +-- Merge the two, assuming both are in increasing order +addFreeSlots cs [] = cs +addFreeSlots [] ns = ns +addFreeSlots (c:cs) (n:ns) + | c < n = c : addFreeSlots cs (n:ns) + | otherwise = n : addFreeSlots (c:cs) ns + +trim :: VirtualSpOffset -> [VirtualSpOffset] -> (VirtualSpOffset, [VirtualSpOffset]) +-- Try to trim back the virtual stack pointer, where there is a +-- continuous bunch of free slots at the end of the free list +trim vsp [] = (vsp, []) +trim vsp (slot:slots) + = case trim vsp slots of + (vsp', []) + | vsp' < slot -> pprTrace "trim: strange" (ppr vsp <+> ppr (slot:slots)) + (vsp', []) + | vsp' == slot -> (vsp'-1, []) + | otherwise -> (vsp', [slot]) + (vsp', slots') -> (vsp', slot:slots') +\end{code} diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs new file mode 100644 index 0000000000..dd7327b745 --- /dev/null +++ b/compiler/codeGen/CgTailCall.lhs @@ -0,0 +1,455 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgTailCall.lhs,v 1.43 2005/06/21 10:44:41 simonmar Exp $ +% +%******************************************************** +%* * +\section[CgTailCall]{Tail calls: converting @StgApps@} +%* * +%******************************************************** + +\begin{code} +module CgTailCall ( + cgTailCall, performTailCall, + performReturn, performPrimReturn, + emitKnownConReturnCode, emitAlgReturnCode, + returnUnboxedTuple, ccallReturnUnboxedTuple, + pushUnboxedTuple, + tailCallPrimOp, + + pushReturnAddress + ) where + +#include "HsVersions.h" + +import CgMonad +import CgBindery ( getArgAmodes, getCgIdInfo, CgIdInfo, maybeLetNoEscape, + idInfoToAmode, cgIdInfoId, cgIdInfoLF, + cgIdInfoArgRep ) +import CgInfoTbls ( entryCode, emitDirectReturnInstr, dataConTagZ, + emitVectoredReturnInstr, closureInfoPtr ) +import CgCallConv +import CgStackery ( setRealSp, mkStkAmodes, adjustStackHW, + getSpRelOffset ) +import CgHeapery ( setRealHp, getHpRelOffset ) +import CgUtils ( emitSimultaneously ) +import CgTicky +import ClosureInfo +import SMRep ( CgRep, isVoidArg, separateByPtrFollowness ) +import Cmm +import CmmUtils +import CLabel ( CLabel, mkRtsPrimOpLabel, mkSeqInfoLabel ) +import Type ( isUnLiftedType ) +import Id ( Id, idName, idUnique, idType ) +import DataCon ( DataCon, dataConTyCon ) +import StgSyn ( StgArg ) +import TyCon ( TyCon ) +import PrimOp ( PrimOp ) +import Outputable + +import Monad ( when ) + +----------------------------------------------------------------------------- +-- Tail Calls + +cgTailCall :: Id -> [StgArg] -> Code + +-- Here's the code we generate for a tail call. (NB there may be no +-- arguments, in which case this boils down to just entering a variable.) +-- +-- * Put args in the top locations of the stack. +-- * Adjust the stack ptr +-- * Make R1 point to the function closure if necessary. +-- * Perform the call. +-- +-- Things to be careful about: +-- +-- * Don't overwrite stack locations before you have finished with +-- them (remember you need the function and the as-yet-unmoved +-- arguments). +-- * Preferably, generate no code to replace x by x on the stack (a +-- common situation in tail-recursion). +-- * Adjust the stack high water mark appropriately. +-- +-- Treat unboxed locals exactly like literals (above) except use the addr +-- mode for the local instead of (CLit lit) in the assignment. + +cgTailCall fun args + = do { fun_info <- getCgIdInfo fun + + ; if isUnLiftedType (idType fun) + then -- Primitive return + ASSERT( null args ) + do { fun_amode <- idInfoToAmode fun_info + ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode } + + else -- Normal case, fun is boxed + do { arg_amodes <- getArgAmodes args + ; performTailCall fun_info arg_amodes noStmts } + } + + +-- ----------------------------------------------------------------------------- +-- The guts of a tail-call + +performTailCall + :: CgIdInfo -- The function + -> [(CgRep,CmmExpr)] -- Args + -> CmmStmts -- Pending simultaneous assignments + -- *** GUARANTEED to contain only stack assignments. + -> Code + +performTailCall fun_info arg_amodes pending_assts + | Just join_sp <- maybeLetNoEscape fun_info + = -- A let-no-escape is slightly different, because we + -- arrange the stack arguments into pointers and non-pointers + -- to make the heap check easier. The tail-call sequence + -- is very similar to returning an unboxed tuple, so we + -- share some code. + do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes + ; emitSimultaneously (pending_assts `plusStmts` arg_assts) + ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info)) + ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) } + + | otherwise + = do { fun_amode <- idInfoToAmode fun_info + ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode) + opt_node_asst | nodeMustPointToIt lf_info = node_asst + | otherwise = noStmts + ; EndOfBlockInfo sp _ <- getEndOfBlockInfo + ; hmods <- getHomeModules + + ; case (getCallMethod hmods fun_name lf_info (length arg_amodes)) of + + -- Node must always point to things we enter + EnterIt -> do + { emitSimultaneously (node_asst `plusStmts` pending_assts) + ; let target = entryCode (closureInfoPtr (CmmReg nodeReg)) + ; doFinalJump sp False (stmtC (CmmJump target [])) } + + -- A function, but we have zero arguments. It is already in WHNF, + -- so we can just return it. + -- As with any return, Node must point to it. + ReturnIt -> do + { emitSimultaneously (node_asst `plusStmts` pending_assts) + ; doFinalJump sp False emitDirectReturnInstr } + + -- A real constructor. Don't bother entering it, + -- just do the right sort of return instead. + -- As with any return, Node must point to it. + ReturnCon con -> do + { emitSimultaneously (node_asst `plusStmts` pending_assts) + ; doFinalJump sp False (emitKnownConReturnCode con) } + + JumpToIt lbl -> do + { emitSimultaneously (opt_node_asst `plusStmts` pending_assts) + ; doFinalJump sp False (jumpToLbl lbl) } + + -- A slow function call via the RTS apply routines + -- Node must definitely point to the thing + SlowCall -> do + { when (not (null arg_amodes)) $ do + { if (isKnownFun lf_info) + then tickyKnownCallTooFewArgs + else tickyUnknownCall + ; tickySlowCallPat (map fst arg_amodes) + } + + ; let (apply_lbl, args, extra_args) + = constructSlowCall arg_amodes + + ; directCall sp apply_lbl args extra_args + (node_asst `plusStmts` pending_assts) + } + + -- A direct function call (possibly with some left-over arguments) + DirectEntry lbl arity -> do + { if arity == length arg_amodes + then tickyKnownCallExact + else do tickyKnownCallExtraArgs + tickySlowCallPat (map fst (drop arity arg_amodes)) + + ; let + -- The args beyond the arity go straight on the stack + (arity_args, extra_args) = splitAt arity arg_amodes + + ; directCall sp lbl arity_args extra_args + (opt_node_asst `plusStmts` pending_assts) + } + } + where + fun_name = idName (cgIdInfoId fun_info) + lf_info = cgIdInfoLF fun_info + + + +directCall sp lbl args extra_args assts = do + let + -- First chunk of args go in registers + (reg_arg_amodes, stk_args) = assignCallRegs args + + -- Any "extra" arguments are placed in frames on the + -- stack after the other arguments. + slow_stk_args = slowArgs extra_args + + reg_assts = assignToRegs reg_arg_amodes + -- + (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args) + + emitSimultaneously (reg_assts `plusStmts` + stk_assts `plusStmts` + assts) + + doFinalJump final_sp False (jumpToLbl lbl) + +-- ----------------------------------------------------------------------------- +-- The final clean-up before we do a jump at the end of a basic block. +-- This code is shared by tail-calls and returns. + +doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code +doFinalJump final_sp is_let_no_escape jump_code + = do { -- Adjust the high-water mark if necessary + adjustStackHW final_sp + + -- Push a return address if necessary (after the assignments + -- above, in case we clobber a live stack location) + -- + -- DONT push the return address when we're about to jump to a + -- let-no-escape: the final tail call in the let-no-escape + -- will do this. + ; eob <- getEndOfBlockInfo + ; whenC (not is_let_no_escape) (pushReturnAddress eob) + + -- Final adjustment of Sp/Hp + ; adjustSpAndHp final_sp + + -- and do the jump + ; jump_code } + +-- ----------------------------------------------------------------------------- +-- A general return (just a special case of doFinalJump, above) + +performReturn :: Code -- The code to execute to actually do the return + -> Code + +performReturn finish_code + = do { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo + ; doFinalJump args_sp False{-not a LNE-} finish_code } + +-- ----------------------------------------------------------------------------- +-- Primitive Returns +-- Just load the return value into the right register, and return. + +performPrimReturn :: CgRep -> CmmExpr -- The thing to return + -> Code +performPrimReturn rep amode + = do { whenC (not (isVoidArg rep)) + (stmtC (CmmAssign ret_reg amode)) + ; performReturn emitDirectReturnInstr } + where + ret_reg = dataReturnConvPrim rep + +-- ----------------------------------------------------------------------------- +-- Algebraic constructor returns + +-- Constructor is built on the heap; Node is set. +-- All that remains is to do the right sort of jump. + +emitKnownConReturnCode :: DataCon -> Code +emitKnownConReturnCode con + = emitAlgReturnCode (dataConTyCon con) + (CmmLit (mkIntCLit (dataConTagZ con))) + -- emitAlgReturnCode requires zero-indexed tag + +emitAlgReturnCode :: TyCon -> CmmExpr -> Code +-- emitAlgReturnCode is used both by emitKnownConReturnCode, +-- and by by PrimOps that return enumerated types (i.e. +-- all the comparison operators). +emitAlgReturnCode tycon tag + = do { case ctrlReturnConvAlg tycon of + VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz + ; emitVectoredReturnInstr tag } + UnvectoredReturn _ -> emitDirectReturnInstr + } + + +-- --------------------------------------------------------------------------- +-- Unboxed tuple returns + +-- These are a bit like a normal tail call, except that: +-- +-- - The tail-call target is an info table on the stack +-- +-- - We separate stack arguments into pointers and non-pointers, +-- to make it easier to leave things in a sane state for a heap check. +-- This is OK because we can never partially-apply an unboxed tuple, +-- unlike a function. The same technique is used when calling +-- let-no-escape functions, because they also can't be partially +-- applied. + +returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code +returnUnboxedTuple amodes + = do { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo + ; tickyUnboxedTupleReturn (length amodes) + ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes + ; emitSimultaneously assts + ; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr } + +pushUnboxedTuple :: VirtualSpOffset -- Sp at which to start pushing + -> [(CgRep, CmmExpr)] -- amodes of the components + -> FCode (VirtualSpOffset, -- final Sp + CmmStmts) -- assignments (regs+stack) + +pushUnboxedTuple sp [] + = return (sp, noStmts) +pushUnboxedTuple sp amodes + = do { let (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes + + -- separate the rest of the args into pointers and non-pointers + (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes + reg_arg_assts = assignToRegs reg_arg_amodes + + -- push ptrs, then nonptrs, on the stack + ; (ptr_sp, ptr_assts) <- mkStkAmodes sp ptr_args + ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args + + ; returnFC (final_sp, + reg_arg_assts `plusStmts` + ptr_assts `plusStmts` nptr_assts) } + + +-- ----------------------------------------------------------------------------- +-- Returning unboxed tuples. This is mainly to support _ccall_GC_, where +-- we want to do things in a slightly different order to normal: +-- +-- - push return address +-- - adjust stack pointer +-- - r = call(args...) +-- - assign regs for unboxed tuple (usually just R1 = r) +-- - return to continuation +-- +-- The return address (i.e. stack frame) must be on the stack before +-- doing the call in case the call ends up in the garbage collector. +-- +-- Sadly, the information about the continuation is lost after we push it +-- (in order to avoid pushing it again), so we end up doing a needless +-- indirect jump (ToDo). + +ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code +ccallReturnUnboxedTuple amodes before_jump + = do { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo + + -- Push a return address if necessary + ; pushReturnAddress eob + ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack) + (do { adjustSpAndHp args_sp + ; before_jump + ; returnUnboxedTuple amodes }) + } + +-- ----------------------------------------------------------------------------- +-- Calling an out-of-line primop + +tailCallPrimOp :: PrimOp -> [StgArg] -> Code +tailCallPrimOp op args + = do { -- We're going to perform a normal-looking tail call, + -- except that *all* the arguments will be in registers. + -- Hence the ASSERT( null leftovers ) + arg_amodes <- getArgAmodes args + ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes + jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op) + + ; ASSERT(null leftovers) -- no stack-resident args + emitSimultaneously (assignToRegs arg_regs) + + ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo + ; doFinalJump args_sp False{-not a LNE-} jump_to_primop } + +-- ----------------------------------------------------------------------------- +-- Return Addresses + +-- We always push the return address just before performing a tail call +-- or return. The reason we leave it until then is because the stack +-- slot that the return address is to go into might contain something +-- useful. +-- +-- If the end of block info is 'CaseAlts', then we're in the scrutinee of a +-- case expression and the return address is still to be pushed. +-- +-- There are cases where it doesn't look necessary to push the return +-- address: for example, just before doing a return to a known +-- continuation. However, the continuation will expect to find the +-- return address on the stack in case it needs to do a heap check. + +pushReturnAddress :: EndOfBlockInfo -> Code + +pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False)) + = do { sp_rel <- getSpRelOffset args_sp + ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) } + +-- For a polymorphic case, we have two return addresses to push: the case +-- return, and stg_seq_frame_info which turns a possible vectored return +-- into a direct one. +pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True)) + = do { sp_rel <- getSpRelOffset (args_sp-1) + ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) + ; sp_rel <- getSpRelOffset args_sp + ; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) } + +pushReturnAddress _ = nopC + +-- ----------------------------------------------------------------------------- +-- Misc. + +jumpToLbl :: CLabel -> Code +-- Passes no argument to the destination procedure +jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}]) + +assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts +assignToRegs reg_args + = mkStmts [ CmmAssign (CmmGlobal reg_id) expr + | (expr, reg_id) <- reg_args ] +\end{code} + + +%************************************************************************ +%* * +\subsection[CgStackery-adjust]{Adjusting the stack pointers} +%* * +%************************************************************************ + +This function adjusts the stack and heap pointers just before a tail +call or return. The stack pointer is adjusted to its final position +(i.e. to point to the last argument for a tail call, or the activation +record for a return). The heap pointer may be moved backwards, in +cases where we overallocated at the beginning of the basic block (see +CgCase.lhs for discussion). + +These functions {\em do not} deal with high-water-mark adjustment. +That's done by functions which allocate stack space. + +\begin{code} +adjustSpAndHp :: VirtualSpOffset -- New offset for Arg stack ptr + -> Code +adjustSpAndHp newRealSp + = do { -- Adjust stack, if necessary. + -- NB: the conditional on the monad-carried realSp + -- is out of line (via codeOnly), to avoid a black hole + ; new_sp <- getSpRelOffset newRealSp + ; checkedAbsC (CmmAssign spReg new_sp) -- Will generate no code in the case + ; setRealSp newRealSp -- where realSp==newRealSp + + -- Adjust heap. The virtual heap pointer may be less than the real Hp + -- because the latter was advanced to deal with the worst-case branch + -- of the code, and we may be in a better-case branch. In that case, + -- move the real Hp *back* and retract some ticky allocation count. + ; hp_usg <- getHpUsage + ; let rHp = realHp hp_usg + vHp = virtHp hp_usg + ; new_hp <- getHpRelOffset vHp + ; checkedAbsC (CmmAssign hpReg new_hp) -- Generates nothing when vHp==rHp + ; tickyAllocHeap (vHp - rHp) -- ...ditto + ; setRealHp vHp + } +\end{code} diff --git a/compiler/codeGen/CgTicky.hs b/compiler/codeGen/CgTicky.hs new file mode 100644 index 0000000000..3e72981c50 --- /dev/null +++ b/compiler/codeGen/CgTicky.hs @@ -0,0 +1,370 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for ticky-ticky profiling +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgTicky ( + emitTickyCounter, + + tickyDynAlloc, + tickyAllocHeap, + tickyAllocPrim, + tickyAllocThunk, + tickyAllocPAP, + + tickyPushUpdateFrame, + tickyUpdateFrameOmitted, + + tickyEnterDynCon, + tickyEnterStaticCon, + tickyEnterViaNode, + + tickyEnterFun, + tickyEnterThunk, + + tickyUpdateBhCaf, + tickyBlackHole, + tickyUnboxedTupleReturn, tickyVectoredReturn, + tickyReturnOldCon, tickyReturnNewCon, + + tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs, + tickyUnknownCall, tickySlowCallPat, + + staticTickyHdr, + ) where + +#include "HsVersions.h" +#include "../includes/DerivedConstants.h" + -- For REP_xxx constants, which are MachReps + +import ClosureInfo ( ClosureInfo, closureSize, slopSize, closureSMRep, + closureUpdReqd, closureName, isStaticClosure ) +import CgUtils +import CgMonad +import SMRep ( ClosureType(..), smRepClosureType, CgRep ) + +import Cmm +import MachOp +import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr, cmmIndexExpr ) +import CLabel ( CLabel, mkRtsDataLabel, mkRednCountsLabel ) + +import Name ( isInternalName ) +import Id ( Id, idType ) +import StaticFlags ( opt_DoTickyProfiling ) +import BasicTypes ( Arity ) +import FastString ( FastString, mkFastString, LitString ) +import Constants -- Lots of field offsets +import Outputable + +-- Turgid imports for showTypeCategory +import PrelNames +import TcType ( Type, isDictTy, tcSplitTyConApp_maybe, + tcSplitFunTy_maybe ) +import TyCon ( isPrimTyCon, isTupleTyCon, isEnumerationTyCon, + maybeTyConSingleCon ) +import Maybe + +----------------------------------------------------------------------------- +-- +-- Ticky-ticky profiling +-- +----------------------------------------------------------------------------- + +staticTickyHdr :: [CmmLit] +-- The ticky header words in a static closure +-- Was SET_STATIC_TICKY_HDR +staticTickyHdr + | not opt_DoTickyProfiling = [] + | otherwise = [zeroCLit] + +emitTickyCounter :: ClosureInfo -> [Id] -> Int -> Code +emitTickyCounter cl_info args on_stk + = ifTicky $ + do { mod_name <- moduleName + ; fun_descr_lit <- mkStringCLit (fun_descr mod_name) + ; arg_descr_lit <- mkStringCLit arg_descr + ; emitDataLits ticky_ctr_label -- Must match layout of StgEntCounter + [ CmmInt 0 I16, + CmmInt (fromIntegral (length args)) I16, -- Arity + CmmInt (fromIntegral on_stk) I16, -- Words passed on stack + CmmInt 0 I16, -- 2-byte gap + fun_descr_lit, + arg_descr_lit, + zeroCLit, -- Entry count + zeroCLit, -- Allocs + zeroCLit -- Link + ] } + where + name = closureName cl_info + ticky_ctr_label = mkRednCountsLabel name + arg_descr = map (showTypeCategory . idType) args + fun_descr mod_name = ppr_for_ticky_name mod_name name + +-- When printing the name of a thing in a ticky file, we want to +-- give the module name even for *local* things. We print +-- just "x (M)" rather that "M.x" to distinguish them from the global kind. +ppr_for_ticky_name mod_name name + | isInternalName name = showSDocDebug (ppr name <+> (parens (ppr mod_name))) + | otherwise = showSDocDebug (ppr name) + +-- ----------------------------------------------------------------------------- +-- Ticky stack frames + +tickyPushUpdateFrame = ifTicky $ bumpTickyCounter SLIT("UPDF_PUSHED_ctr") +tickyUpdateFrameOmitted = ifTicky $ bumpTickyCounter SLIT("UPDF_OMITTED_ctr") + +-- ----------------------------------------------------------------------------- +-- Ticky entries + +tickyEnterDynCon = ifTicky $ bumpTickyCounter SLIT("ENT_DYN_CON_ctr") +tickyEnterDynThunk = ifTicky $ bumpTickyCounter SLIT("ENT_DYN_THK_ctr") +tickyEnterStaticCon = ifTicky $ bumpTickyCounter SLIT("ENT_STATIC_CON_ctr") +tickyEnterStaticThunk = ifTicky $ bumpTickyCounter SLIT("ENT_STATIC_THK_ctr") +tickyEnterViaNode = ifTicky $ bumpTickyCounter SLIT("ENT_VIA_NODE_ctr") + +tickyEnterThunk :: ClosureInfo -> Code +tickyEnterThunk cl_info + | isStaticClosure cl_info = tickyEnterStaticThunk + | otherwise = tickyEnterDynThunk + +tickyBlackHole :: Bool{-updatable-} -> Code +tickyBlackHole updatable + = ifTicky (bumpTickyCounter ctr) + where + ctr | updatable = SLIT("UPD_BH_SINGLE_ENTRY_ctr") + | otherwise = SLIT("UPD_BH_UPDATABLE_ctr") + +tickyUpdateBhCaf cl_info + = ifTicky (bumpTickyCounter ctr) + where + ctr | closureUpdReqd cl_info = SLIT("UPD_CAF_BH_SINGLE_ENTRY_ctr") + | otherwise = SLIT("UPD_CAF_BH_UPDATABLE_ctr") + +tickyEnterFun :: ClosureInfo -> Code +tickyEnterFun cl_info + = ifTicky $ + do { bumpTickyCounter ctr + ; fun_ctr_lbl <- getTickyCtrLabel + ; registerTickyCtr fun_ctr_lbl + ; bumpTickyCounter' fun_ctr_lbl } + where + ctr | isStaticClosure cl_info = SLIT("TICK_ENT_STATIC_FUN_DIRECT") + | otherwise = SLIT("TICK_ENT_DYN_FUN_DIRECT") + +registerTickyCtr :: CLabel -> Code +-- Register a ticky counter +-- if ( ! f_ct.registeredp ) { +-- f_ct.link = ticky_entry_ctrs; /* hook this one onto the front of the list */ +-- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */ +-- f_ct.registeredp = 1 } +registerTickyCtr ctr_lbl + = emitIf test (stmtsC register_stmts) + where + test = CmmMachOp (MO_Not I16) + [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl + oFFSET_StgEntCounter_registeredp)) I16] + register_stmts + = [ CmmStore (CmmLit (cmmLabelOffB ctr_lbl oFFSET_StgEntCounter_link)) + (CmmLoad ticky_entry_ctrs wordRep) + , CmmStore ticky_entry_ctrs (mkLblExpr ctr_lbl) + , CmmStore (CmmLit (cmmLabelOffB ctr_lbl + oFFSET_StgEntCounter_registeredp)) + (CmmLit (mkIntCLit 1)) ] + ticky_entry_ctrs = mkLblExpr (mkRtsDataLabel SLIT("ticky_entry_ctrs")) + +tickyReturnOldCon, tickyReturnNewCon :: Arity -> Code +tickyReturnOldCon arity + = ifTicky $ do { bumpTickyCounter SLIT("RET_OLD_ctr") + ; bumpHistogram SLIT("RET_OLD_hst") arity } +tickyReturnNewCon arity + | not opt_DoTickyProfiling = nopC + | otherwise + = ifTicky $ do { bumpTickyCounter SLIT("RET_NEW_ctr") + ; bumpHistogram SLIT("RET_NEW_hst") arity } + +tickyUnboxedTupleReturn :: Int -> Code +tickyUnboxedTupleReturn arity + = ifTicky $ do { bumpTickyCounter SLIT("RET_UNBOXED_TUP_ctr") + ; bumpHistogram SLIT("RET_UNBOXED_TUP_hst") arity } + +tickyVectoredReturn :: Int -> Code +tickyVectoredReturn family_size + = ifTicky $ do { bumpTickyCounter SLIT("VEC_RETURN_ctr") + ; bumpHistogram SLIT("RET_VEC_RETURN_hst") family_size } + +-- ----------------------------------------------------------------------------- +-- Ticky calls + +-- Ticks at a *call site*: +tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_TOO_FEW_ARGS_ctr") +tickyKnownCallExact = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_ctr") +tickyKnownCallExtraArgs = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_EXTRA_ctr") +tickyUnknownCall = ifTicky $ bumpTickyCounter SLIT("UNKNOWN_CALL_ctr") + +-- Tick for the call pattern at slow call site (i.e. in addition to +-- tickyUnknownCall, tickyKnownCallExtraArgs, etc.) +tickySlowCallPat :: [CgRep] -> Code +tickySlowCallPat args = return () +{- LATER: (introduces recursive module dependency now). + case callPattern args of + (str, True) -> bumpTickyCounter' (mkRtsSlowTickyCtrLabel pat) + (str, False) -> bumpTickyCounter SLIT("TICK_SLOW_CALL_OTHER") + +callPattern :: [CgRep] -> (String,Bool) +callPattern reps + | match == length reps = (chars, True) + | otherwise = (chars, False) + where (_,match) = findMatch reps + chars = map argChar reps + +argChar VoidArg = 'v' +argChar PtrArg = 'p' +argChar NonPtrArg = 'n' +argChar LongArg = 'l' +argChar FloatArg = 'f' +argChar DoubleArg = 'd' +-} + +-- ----------------------------------------------------------------------------- +-- Ticky allocation + +tickyDynAlloc :: ClosureInfo -> Code +-- Called when doing a dynamic heap allocation +tickyDynAlloc cl_info + = ifTicky $ + case smRepClosureType (closureSMRep cl_info) of + Constr -> tick_alloc_con + ConstrNoCaf -> tick_alloc_con + Fun -> tick_alloc_fun + Thunk -> tick_alloc_thk + ThunkSelector -> tick_alloc_thk + where + -- will be needed when we fill in stubs + cl_size = closureSize cl_info + slop_size = slopSize cl_info + + tick_alloc_thk + | closureUpdReqd cl_info = tick_alloc_up_thk + | otherwise = tick_alloc_se_thk + + tick_alloc_con = panic "ToDo: tick_alloc" + tick_alloc_fun = panic "ToDo: tick_alloc" + tick_alloc_up_thk = panic "ToDo: tick_alloc" + tick_alloc_se_thk = panic "ToDo: tick_alloc" + +tickyAllocPrim :: CmmExpr -> CmmExpr -> CmmExpr -> Code +tickyAllocPrim hdr goods slop = ifTicky $ panic "ToDo: tickyAllocPrim" + +tickyAllocThunk :: CmmExpr -> CmmExpr -> Code +tickyAllocThunk goods slop = ifTicky $ panic "ToDo: tickyAllocThunk" + +tickyAllocPAP :: CmmExpr -> CmmExpr -> Code +tickyAllocPAP goods slop = ifTicky $ panic "ToDo: tickyAllocPAP" + +tickyAllocHeap :: VirtualHpOffset -> Code +-- Called when doing a heap check [TICK_ALLOC_HEAP] +tickyAllocHeap hp + = ifTicky $ + do { ticky_ctr <- getTickyCtrLabel + ; stmtsC $ + if hp == 0 then [] -- Inside the stmtC to avoid control + else [ -- dependency on the argument + -- Bump the allcoation count in the StgEntCounter + addToMem REP_StgEntCounter_allocs + (CmmLit (cmmLabelOffB ticky_ctr + oFFSET_StgEntCounter_allocs)) hp, + -- Bump ALLOC_HEAP_ctr + addToMemLbl cLongRep (mkRtsDataLabel SLIT("ALLOC_HEAP_ctr")) 1, + -- Bump ALLOC_HEAP_tot + addToMemLbl cLongRep (mkRtsDataLabel SLIT("ALLOC_HEAP_tot")) hp] } + +-- ----------------------------------------------------------------------------- +-- Ticky utils + +ifTicky :: Code -> Code +ifTicky code + | opt_DoTickyProfiling = code + | otherwise = nopC + +addToMemLbl :: MachRep -> CLabel -> Int -> CmmStmt +addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n + +-- All the ticky-ticky counters are declared "unsigned long" in C +bumpTickyCounter :: LitString -> Code +bumpTickyCounter lbl = bumpTickyCounter' (mkRtsDataLabel lbl) + +bumpTickyCounter' :: CLabel -> Code +bumpTickyCounter' lbl = stmtC (addToMemLbl cLongRep lbl 1) + +addToMemLong = addToMem cLongRep + +bumpHistogram :: LitString -> Int -> Code +bumpHistogram lbl n + = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLongRep)) + +bumpHistogramE :: LitString -> CmmExpr -> Code +bumpHistogramE lbl n + = do t <- newTemp cLongRep + stmtC (CmmAssign t n) + emitIf (CmmMachOp (MO_U_Le cLongRep) [CmmReg t, eight]) $ + stmtC (CmmAssign t eight) + stmtC (addToMemLong (cmmIndexExpr cLongRep + (CmmLit (CmmLabel (mkRtsDataLabel lbl))) + (CmmReg t)) + 1) + where + eight = CmmLit (CmmInt 8 cLongRep) + +------------------------------------------------------------------ +-- Showing the "type category" for ticky-ticky profiling + +showTypeCategory :: Type -> Char + {- {C,I,F,D} char, int, float, double + T tuple + S other single-constructor type + {c,i,f,d} unboxed ditto + t *unpacked* tuple + s *unpacked" single-cons... + + v void# + a primitive array + + E enumeration type + + dictionary, unless it's a ... + L List + > function + M other (multi-constructor) data-con type + . other type + - reserved for others to mark as "uninteresting" + -} +showTypeCategory ty + = if isDictTy ty + then '+' + else + case tcSplitTyConApp_maybe ty of + Nothing -> if isJust (tcSplitFunTy_maybe ty) + then '>' + else '.' + + Just (tycon, _) -> + let utc = getUnique tycon in + if utc == charDataConKey then 'C' + else if utc == intDataConKey then 'I' + else if utc == floatDataConKey then 'F' + else if utc == doubleDataConKey then 'D' + else if utc == smallIntegerDataConKey || + utc == largeIntegerDataConKey then 'J' + else if utc == charPrimTyConKey then 'c' + else if (utc == intPrimTyConKey || utc == wordPrimTyConKey + || utc == addrPrimTyConKey) then 'i' + else if utc == floatPrimTyConKey then 'f' + else if utc == doublePrimTyConKey then 'd' + else if isPrimTyCon tycon {- array, we hope -} then 'A' -- Bogus + else if isEnumerationTyCon tycon then 'E' + else if isTupleTyCon tycon then 'T' + else if isJust (maybeTyConSingleCon tycon) then 'S' + else if utc == listTyConKey then 'L' + else 'M' -- oh, well... diff --git a/compiler/codeGen/CgUsages.hi-boot-5 b/compiler/codeGen/CgUsages.hi-boot-5 new file mode 100644 index 0000000000..abb98cec1a --- /dev/null +++ b/compiler/codeGen/CgUsages.hi-boot-5 @@ -0,0 +1,3 @@ +__interface CgUsages 1 0 where +__export CgUsages getSpRelOffset; +1 getSpRelOffset :: AbsCSyn.VirtualSpOffset -> CgMonad.FCode AbsCSyn.RegRelative ; diff --git a/compiler/codeGen/CgUsages.hi-boot-6 b/compiler/codeGen/CgUsages.hi-boot-6 new file mode 100644 index 0000000000..9640603cfb --- /dev/null +++ b/compiler/codeGen/CgUsages.hi-boot-6 @@ -0,0 +1,3 @@ +module CgUsages where + +getSpRelOffset :: AbsCSyn.VirtualSpOffset -> CgMonad.FCode AbsCSyn.RegRelative diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs new file mode 100644 index 0000000000..2f69927db0 --- /dev/null +++ b/compiler/codeGen/CgUtils.hs @@ -0,0 +1,688 @@ +----------------------------------------------------------------------------- +-- +-- Code generator utilities; mostly monadic +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgUtils ( + addIdReps, + cgLit, + emitDataLits, emitRODataLits, emitIf, emitIfThenElse, + emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, + assignTemp, newTemp, + emitSimultaneously, + emitSwitch, emitLitSwitch, + tagToClosure, + + cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord, + cmmOffsetExprW, cmmOffsetExprB, + cmmRegOffW, cmmRegOffB, + cmmLabelOffW, cmmLabelOffB, + cmmOffsetW, cmmOffsetB, + cmmOffsetLitW, cmmOffsetLitB, + cmmLoadIndexW, + + addToMem, addToMemE, + mkWordCLit, + mkStringCLit, + packHalfWordsCLit, + blankWord + ) where + +#include "HsVersions.h" + +import CgMonad +import TyCon ( TyCon, tyConName ) +import Id ( Id ) +import Constants ( wORD_SIZE ) +import SMRep ( CgRep, StgWord, hALF_WORD_SIZE_IN_BITS, ByteOff, + WordOff, idCgRep ) +import PprCmm ( {- instances -} ) +import Cmm +import CLabel +import CmmUtils +import MachOp ( MachRep(..), wordRep, MachOp(..), MachHint(..), + mo_wordOr, mo_wordAnd, mo_wordNe, mo_wordEq, + mo_wordULt, mo_wordUGt, mo_wordUGe, machRepByteWidth ) +import ForeignCall ( CCallConv(..) ) +import Literal ( Literal(..) ) +import CLabel ( CLabel, mkStringLitLabel ) +import Digraph ( SCC(..), stronglyConnComp ) +import ListSetOps ( assocDefault ) +import Util ( filterOut, sortLe ) +import DynFlags ( DynFlags(..), HscTarget(..) ) +import Packages ( HomeModules ) +import FastString ( LitString, FastString, bytesFS ) +import Outputable + +import Char ( ord ) +import DATA_BITS +import DATA_WORD ( Word8 ) +import Maybe ( isNothing ) + +------------------------------------------------------------------------- +-- +-- Random small functions +-- +------------------------------------------------------------------------- + +addIdReps :: [Id] -> [(CgRep, Id)] +addIdReps ids = [(idCgRep id, id) | id <- ids] + +------------------------------------------------------------------------- +-- +-- Literals +-- +------------------------------------------------------------------------- + +cgLit :: Literal -> FCode CmmLit +cgLit (MachStr s) = mkByteStringCLit (bytesFS s) + -- not unpackFS; we want the UTF-8 byte stream. +cgLit other_lit = return (mkSimpleLit other_lit) + +mkSimpleLit :: Literal -> CmmLit +mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordRep +mkSimpleLit MachNullAddr = zeroCLit +mkSimpleLit (MachInt i) = CmmInt i wordRep +mkSimpleLit (MachInt64 i) = CmmInt i I64 +mkSimpleLit (MachWord i) = CmmInt i wordRep +mkSimpleLit (MachWord64 i) = CmmInt i I64 +mkSimpleLit (MachFloat r) = CmmFloat r F32 +mkSimpleLit (MachDouble r) = CmmFloat r F64 +mkSimpleLit (MachLabel fs ms) = CmmLabel (mkForeignLabel fs ms is_dyn) + where + is_dyn = False -- ToDo: fix me + +mkLtOp :: Literal -> MachOp +-- On signed literals we must do a signed comparison +mkLtOp (MachInt _) = MO_S_Lt wordRep +mkLtOp (MachFloat _) = MO_S_Lt F32 +mkLtOp (MachDouble _) = MO_S_Lt F64 +mkLtOp lit = MO_U_Lt (cmmLitRep (mkSimpleLit lit)) + + +--------------------------------------------------- +-- +-- Cmm data type functions +-- +--------------------------------------------------- + +----------------------- +-- The "B" variants take byte offsets +cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr +cmmRegOffB = cmmRegOff + +cmmOffsetB :: CmmExpr -> ByteOff -> CmmExpr +cmmOffsetB = cmmOffset + +cmmOffsetExprB :: CmmExpr -> CmmExpr -> CmmExpr +cmmOffsetExprB = cmmOffsetExpr + +cmmLabelOffB :: CLabel -> ByteOff -> CmmLit +cmmLabelOffB = cmmLabelOff + +cmmOffsetLitB :: CmmLit -> ByteOff -> CmmLit +cmmOffsetLitB = cmmOffsetLit + +----------------------- +-- The "W" variants take word offsets +cmmOffsetExprW :: CmmExpr -> CmmExpr -> CmmExpr +-- The second arg is a *word* offset; need to change it to bytes +cmmOffsetExprW e (CmmLit (CmmInt n _)) = cmmOffsetW e (fromInteger n) +cmmOffsetExprW e wd_off = cmmIndexExpr wordRep e wd_off + +cmmOffsetW :: CmmExpr -> WordOff -> CmmExpr +cmmOffsetW e n = cmmOffsetB e (wORD_SIZE * n) + +cmmRegOffW :: CmmReg -> WordOff -> CmmExpr +cmmRegOffW reg wd_off = cmmRegOffB reg (wd_off * wORD_SIZE) + +cmmOffsetLitW :: CmmLit -> WordOff -> CmmLit +cmmOffsetLitW lit wd_off = cmmOffsetLitB lit (wORD_SIZE * wd_off) + +cmmLabelOffW :: CLabel -> WordOff -> CmmLit +cmmLabelOffW lbl wd_off = cmmLabelOffB lbl (wORD_SIZE * wd_off) + +cmmLoadIndexW :: CmmExpr -> Int -> CmmExpr +cmmLoadIndexW base off + = CmmLoad (cmmOffsetW base off) wordRep + +----------------------- +cmmNeWord, cmmEqWord, cmmOrWord, cmmAndWord :: CmmExpr -> CmmExpr -> CmmExpr +cmmOrWord e1 e2 = CmmMachOp mo_wordOr [e1, e2] +cmmAndWord e1 e2 = CmmMachOp mo_wordAnd [e1, e2] +cmmNeWord e1 e2 = CmmMachOp mo_wordNe [e1, e2] +cmmEqWord e1 e2 = CmmMachOp mo_wordEq [e1, e2] +cmmULtWord e1 e2 = CmmMachOp mo_wordULt [e1, e2] +cmmUGeWord e1 e2 = CmmMachOp mo_wordUGe [e1, e2] +cmmUGtWord e1 e2 = CmmMachOp mo_wordUGt [e1, e2] + +cmmNegate :: CmmExpr -> CmmExpr +cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep) +cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprRep e)) [e] + +blankWord :: CmmStatic +blankWord = CmmUninitialised wORD_SIZE + +----------------------- +-- Making literals + +mkWordCLit :: StgWord -> CmmLit +mkWordCLit wd = CmmInt (fromIntegral wd) wordRep + +packHalfWordsCLit :: (Integral a, Integral b) => a -> b -> CmmLit +-- Make a single word literal in which the lower_half_word is +-- at the lower address, and the upper_half_word is at the +-- higher address +-- ToDo: consider using half-word lits instead +-- but be careful: that's vulnerable when reversed +packHalfWordsCLit lower_half_word upper_half_word +#ifdef WORDS_BIGENDIAN + = mkWordCLit ((fromIntegral lower_half_word `shiftL` hALF_WORD_SIZE_IN_BITS) + .|. fromIntegral upper_half_word) +#else + = mkWordCLit ((fromIntegral lower_half_word) + .|. (fromIntegral upper_half_word `shiftL` hALF_WORD_SIZE_IN_BITS)) +#endif + +-------------------------------------------------------------------------- +-- +-- Incrementing a memory location +-- +-------------------------------------------------------------------------- + +addToMem :: MachRep -- rep of the counter + -> CmmExpr -- Address + -> Int -- What to add (a word) + -> CmmStmt +addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) rep)) + +addToMemE :: MachRep -- rep of the counter + -> CmmExpr -- Address + -> CmmExpr -- What to add (a word-typed expression) + -> CmmStmt +addToMemE rep ptr n + = CmmStore ptr (CmmMachOp (MO_Add rep) [CmmLoad ptr rep, n]) + +------------------------------------------------------------------------- +-- +-- Converting a closure tag to a closure for enumeration types +-- (this is the implementation of tagToEnum#). +-- +------------------------------------------------------------------------- + +tagToClosure :: HomeModules -> TyCon -> CmmExpr -> CmmExpr +tagToClosure hmods tycon tag + = CmmLoad (cmmOffsetExprW closure_tbl tag) wordRep + where closure_tbl = CmmLit (CmmLabel lbl) + lbl = mkClosureTableLabel hmods (tyConName tycon) + +------------------------------------------------------------------------- +-- +-- Conditionals and rts calls +-- +------------------------------------------------------------------------- + +emitIf :: CmmExpr -- Boolean + -> Code -- Then part + -> Code +-- Emit (if e then x) +-- ToDo: reverse the condition to avoid the extra branch instruction if possible +-- (some conditionals aren't reversible. eg. floating point comparisons cannot +-- be inverted because there exist some values for which both comparisons +-- return False, such as NaN.) +emitIf cond then_part + = do { then_id <- newLabelC + ; join_id <- newLabelC + ; stmtC (CmmCondBranch cond then_id) + ; stmtC (CmmBranch join_id) + ; labelC then_id + ; then_part + ; labelC join_id + } + +emitIfThenElse :: CmmExpr -- Boolean + -> Code -- Then part + -> Code -- Else part + -> Code +-- Emit (if e then x else y) +emitIfThenElse cond then_part else_part + = do { then_id <- newLabelC + ; else_id <- newLabelC + ; join_id <- newLabelC + ; stmtC (CmmCondBranch cond then_id) + ; else_part + ; stmtC (CmmBranch join_id) + ; labelC then_id + ; then_part + ; labelC join_id + } + +emitRtsCall :: LitString -> [(CmmExpr,MachHint)] -> Code +emitRtsCall fun args = emitRtsCall' [] fun args Nothing + -- The 'Nothing' says "save all global registers" + +emitRtsCallWithVols :: LitString -> [(CmmExpr,MachHint)] -> [GlobalReg] -> Code +emitRtsCallWithVols fun args vols + = emitRtsCall' [] fun args (Just vols) + +emitRtsCallWithResult :: CmmReg -> MachHint -> LitString + -> [(CmmExpr,MachHint)] -> Code +emitRtsCallWithResult res hint fun args + = emitRtsCall' [(res,hint)] fun args Nothing + +-- Make a call to an RTS C procedure +emitRtsCall' + :: [(CmmReg,MachHint)] + -> LitString + -> [(CmmExpr,MachHint)] + -> Maybe [GlobalReg] + -> Code +emitRtsCall' res fun args vols = stmtC (CmmCall target res args vols) + where + target = CmmForeignCall fun_expr CCallConv + fun_expr = mkLblExpr (mkRtsCodeLabel fun) + + +------------------------------------------------------------------------- +-- +-- Strings gnerate a top-level data block +-- +------------------------------------------------------------------------- + +emitDataLits :: CLabel -> [CmmLit] -> Code +-- Emit a data-segment data block +emitDataLits lbl lits + = emitData Data (CmmDataLabel lbl : map CmmStaticLit lits) + +emitRODataLits :: CLabel -> [CmmLit] -> Code +-- Emit a read-only data block +emitRODataLits lbl lits + = emitData section (CmmDataLabel lbl : map CmmStaticLit lits) + where section | any needsRelocation lits = RelocatableReadOnlyData + | otherwise = ReadOnlyData + needsRelocation (CmmLabel _) = True + needsRelocation (CmmLabelOff _ _) = True + needsRelocation _ = False + +mkStringCLit :: String -> FCode CmmLit +-- Make a global definition for the string, +-- and return its label +mkStringCLit str = mkByteStringCLit (map (fromIntegral.ord) str) + +mkByteStringCLit :: [Word8] -> FCode CmmLit +mkByteStringCLit bytes + = do { uniq <- newUnique + ; let lbl = mkStringLitLabel uniq + ; emitData ReadOnlyData [CmmDataLabel lbl, CmmString bytes] + ; return (CmmLabel lbl) } + +------------------------------------------------------------------------- +-- +-- Assigning expressions to temporaries +-- +------------------------------------------------------------------------- + +assignTemp :: CmmExpr -> FCode CmmExpr +-- For a non-trivial expression, e, create a local +-- variable and assign the expression to it +assignTemp e + | isTrivialCmmExpr e = return e + | otherwise = do { reg <- newTemp (cmmExprRep e) + ; stmtC (CmmAssign reg e) + ; return (CmmReg reg) } + + +newTemp :: MachRep -> FCode CmmReg +newTemp rep = do { uniq <- newUnique; return (CmmLocal (LocalReg uniq rep)) } + + +------------------------------------------------------------------------- +-- +-- Building case analysis +-- +------------------------------------------------------------------------- + +emitSwitch + :: CmmExpr -- Tag to switch on + -> [(ConTagZ, CgStmts)] -- Tagged branches + -> Maybe CgStmts -- Default branch (if any) + -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour + -- outside this range is undefined + -> Code + +-- ONLY A DEFAULT BRANCH: no case analysis to do +emitSwitch tag_expr [] (Just stmts) _ _ + = emitCgStmts stmts + +-- Right, off we go +emitSwitch tag_expr branches mb_deflt lo_tag hi_tag + = -- Just sort the branches before calling mk_sritch + do { mb_deflt_id <- + case mb_deflt of + Nothing -> return Nothing + Just stmts -> do id <- forkCgStmts stmts; return (Just id) + + ; dflags <- getDynFlags + ; let via_C | HscC <- hscTarget dflags = True + | otherwise = False + + ; stmts <- mk_switch tag_expr (sortLe le branches) + mb_deflt_id lo_tag hi_tag via_C + ; emitCgStmts stmts + } + where + (t1,_) `le` (t2,_) = t1 <= t2 + + +mk_switch :: CmmExpr -> [(ConTagZ, CgStmts)] + -> Maybe BlockId -> ConTagZ -> ConTagZ -> Bool + -> FCode CgStmts + +-- SINGLETON TAG RANGE: no case analysis to do +mk_switch tag_expr [(tag,stmts)] _ lo_tag hi_tag via_C + | lo_tag == hi_tag + = ASSERT( tag == lo_tag ) + return stmts + +-- SINGLETON BRANCH, NO DEFUALT: no case analysis to do +mk_switch tag_expr [(tag,stmts)] Nothing lo_tag hi_tag via_C + = return stmts + -- The simplifier might have eliminated a case + -- so we may have e.g. case xs of + -- [] -> e + -- In that situation we can be sure the (:) case + -- can't happen, so no need to test + +-- SINGLETON BRANCH: one equality check to do +mk_switch tag_expr [(tag,stmts)] (Just deflt) lo_tag hi_tag via_C + = return (CmmCondBranch cond deflt `consCgStmt` stmts) + where + cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag)) + -- We have lo_tag < hi_tag, but there's only one branch, + -- so there must be a default + +-- ToDo: we might want to check for the two branch case, where one of +-- the branches is the tag 0, because comparing '== 0' is likely to be +-- more efficient than other kinds of comparison. + +-- DENSE TAG RANGE: use a switch statment. +-- +-- We also use a switch uncoditionally when compiling via C, because +-- this will get emitted as a C switch statement and the C compiler +-- should do a good job of optimising it. Also, older GCC versions +-- (2.95 in particular) have problems compiling the complicated +-- if-trees generated by this code, so compiling to a switch every +-- time works around that problem. +-- +mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C + | use_switch -- Use a switch + = do { branch_ids <- mapM forkCgStmts (map snd branches) + ; let + tagged_blk_ids = zip (map fst branches) (map Just branch_ids) + + find_branch :: ConTagZ -> Maybe BlockId + find_branch i = assocDefault mb_deflt tagged_blk_ids i + + -- NB. we have eliminated impossible branches at + -- either end of the range (see below), so the first + -- tag of a real branch is real_lo_tag (not lo_tag). + arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]] + + switch_stmt = CmmSwitch (cmmOffset tag_expr (- real_lo_tag)) arms + + ; ASSERT(not (all isNothing arms)) + return (oneCgStmt switch_stmt) + } + + -- if we can knock off a bunch of default cases with one if, then do so + | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr + ; let cond = cmmULtWord tag_expr' (CmmLit (mkIntCLit lowest_branch)) + branch = CmmCondBranch cond deflt + ; stmts <- mk_switch tag_expr' branches mb_deflt + lowest_branch hi_tag via_C + ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts)) + } + + | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr + ; let cond = cmmUGtWord tag_expr' (CmmLit (mkIntCLit highest_branch)) + branch = CmmCondBranch cond deflt + ; stmts <- mk_switch tag_expr' branches mb_deflt + lo_tag highest_branch via_C + ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts)) + } + + | otherwise -- Use an if-tree + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr + -- To avoid duplication + ; lo_stmts <- mk_switch tag_expr' lo_branches mb_deflt + lo_tag (mid_tag-1) via_C + ; hi_stmts <- mk_switch tag_expr' hi_branches mb_deflt + mid_tag hi_tag via_C + ; hi_id <- forkCgStmts hi_stmts + ; let cond = cmmUGeWord tag_expr' (CmmLit (mkIntCLit mid_tag)) + branch_stmt = CmmCondBranch cond hi_id + ; return (assign_tag `consCgStmt` (branch_stmt `consCgStmt` lo_stmts)) + } + -- we test (e >= mid_tag) rather than (e < mid_tag), because + -- the former works better when e is a comparison, and there + -- are two tags 0 & 1 (mid_tag == 1). In this case, the code + -- generator can reduce the condition to e itself without + -- having to reverse the sense of the comparison: comparisons + -- can't always be easily reversed (eg. floating + -- pt. comparisons). + where + use_switch = {- pprTrace "mk_switch" ( + ppr tag_expr <+> text "n_tags:" <+> int n_tags <+> + text "n_branches:" <+> int n_branches <+> + text "lo_tag: " <+> int lo_tag <+> + text "hi_tag: " <+> int hi_tag <+> + text "real_lo_tag: " <+> int real_lo_tag <+> + text "real_hi_tag: " <+> int real_hi_tag) $ -} + ASSERT( n_branches > 1 && n_tags > 1 ) + n_tags > 2 && (small || dense || via_C) + -- a 2-branch switch always turns into an if. + small = n_tags <= 4 + dense = n_branches > (n_tags `div` 2) + exhaustive = n_tags == n_branches + n_branches = length branches + + -- ignore default slots at each end of the range if there's + -- no default branch defined. + lowest_branch = fst (head branches) + highest_branch = fst (last branches) + + real_lo_tag + | isNothing mb_deflt = lowest_branch + | otherwise = lo_tag + + real_hi_tag + | isNothing mb_deflt = highest_branch + | otherwise = hi_tag + + n_tags = real_hi_tag - real_lo_tag + 1 + + -- INVARIANT: Provided hi_tag > lo_tag (which is true) + -- lo_tag <= mid_tag < hi_tag + -- lo_branches have tags < mid_tag + -- hi_branches have tags >= mid_tag + + (mid_tag,_) = branches !! (n_branches `div` 2) + -- 2 branches => n_branches `div` 2 = 1 + -- => branches !! 1 give the *second* tag + -- There are always at least 2 branches here + + (lo_branches, hi_branches) = span is_lo branches + is_lo (t,_) = t < mid_tag + + +assignTemp' e + | isTrivialCmmExpr e = return (CmmNop, e) + | otherwise = do { reg <- newTemp (cmmExprRep e) + ; return (CmmAssign reg e, CmmReg reg) } + + +emitLitSwitch :: CmmExpr -- Tag to switch on + -> [(Literal, CgStmts)] -- Tagged branches + -> CgStmts -- Default branch (always) + -> Code -- Emit the code +-- Used for general literals, whose size might not be a word, +-- where there is always a default case, and where we don't know +-- the range of values for certain. For simplicity we always generate a tree. +-- +-- ToDo: for integers we could do better here, perhaps by generalising +-- mk_switch and using that. --SDM 15/09/2004 +emitLitSwitch scrut [] deflt + = emitCgStmts deflt +emitLitSwitch scrut branches deflt_blk + = do { scrut' <- assignTemp scrut + ; deflt_blk_id <- forkCgStmts deflt_blk + ; blk <- mk_lit_switch scrut' deflt_blk_id (sortLe le branches) + ; emitCgStmts blk } + where + le (t1,_) (t2,_) = t1 <= t2 + +mk_lit_switch :: CmmExpr -> BlockId + -> [(Literal,CgStmts)] + -> FCode CgStmts +mk_lit_switch scrut deflt_blk_id [(lit,blk)] + = return (consCgStmt if_stmt blk) + where + cmm_lit = mkSimpleLit lit + rep = cmmLitRep cmm_lit + cond = CmmMachOp (MO_Ne rep) [scrut, CmmLit cmm_lit] + if_stmt = CmmCondBranch cond deflt_blk_id + +mk_lit_switch scrut deflt_blk_id branches + = do { hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches + ; lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches + ; lo_blk_id <- forkCgStmts lo_blk + ; let if_stmt = CmmCondBranch cond lo_blk_id + ; return (if_stmt `consCgStmt` hi_blk) } + where + n_branches = length branches + (mid_lit,_) = branches !! (n_branches `div` 2) + -- See notes above re mid_tag + + (lo_branches, hi_branches) = span is_lo branches + is_lo (t,_) = t < mid_lit + + cond = CmmMachOp (mkLtOp mid_lit) + [scrut, CmmLit (mkSimpleLit mid_lit)] + +------------------------------------------------------------------------- +-- +-- Simultaneous assignment +-- +------------------------------------------------------------------------- + + +emitSimultaneously :: CmmStmts -> Code +-- Emit code to perform the assignments in the +-- input simultaneously, using temporary variables when necessary. +-- +-- The Stmts must be: +-- CmmNop, CmmComment, CmmAssign, CmmStore +-- and nothing else + + +-- We use the strongly-connected component algorithm, in which +-- * the vertices are the statements +-- * an edge goes from s1 to s2 iff +-- s1 assigns to something s2 uses +-- that is, if s1 should *follow* s2 in the final order + +type CVertex = (Int, CmmStmt) -- Give each vertex a unique number, + -- for fast comparison + +emitSimultaneously stmts + = codeOnly $ + case filterOut isNopStmt (stmtList stmts) of + -- Remove no-ops + [] -> nopC + [stmt] -> stmtC stmt -- It's often just one stmt + stmt_list -> doSimultaneously1 (zip [(1::Int)..] stmt_list) + +doSimultaneously1 :: [CVertex] -> Code +doSimultaneously1 vertices + = let + edges = [ (vertex, key1, edges_from stmt1) + | vertex@(key1, stmt1) <- vertices + ] + edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices, + stmt1 `mustFollow` stmt2 + ] + components = stronglyConnComp edges + + -- do_components deal with one strongly-connected component + -- Not cyclic, or singleton? Just do it + do_component (AcyclicSCC (n,stmt)) = stmtC stmt + do_component (CyclicSCC [(n,stmt)]) = stmtC stmt + + -- Cyclic? Then go via temporaries. Pick one to + -- break the loop and try again with the rest. + do_component (CyclicSCC ((n,first_stmt) : rest)) + = do { from_temp <- go_via_temp first_stmt + ; doSimultaneously1 rest + ; stmtC from_temp } + + go_via_temp (CmmAssign dest src) + = do { tmp <- newTemp (cmmRegRep dest) + ; stmtC (CmmAssign tmp src) + ; return (CmmAssign dest (CmmReg tmp)) } + go_via_temp (CmmStore dest src) + = do { tmp <- newTemp (cmmExprRep src) + ; stmtC (CmmAssign tmp src) + ; return (CmmStore dest (CmmReg tmp)) } + in + mapCs do_component components + +mustFollow :: CmmStmt -> CmmStmt -> Bool +CmmAssign reg _ `mustFollow` stmt = anySrc (reg `regUsedIn`) stmt +CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprRep e)) stmt +CmmNop `mustFollow` stmt = False +CmmComment _ `mustFollow` stmt = False + + +anySrc :: (CmmExpr -> Bool) -> CmmStmt -> Bool +-- True if the fn is true of any input of the stmt +anySrc p (CmmAssign _ e) = p e +anySrc p (CmmStore e1 e2) = p e1 || p e2 -- Might be used in either side +anySrc p (CmmComment _) = False +anySrc p CmmNop = False +anySrc p other = True -- Conservative + +regUsedIn :: CmmReg -> CmmExpr -> Bool +reg `regUsedIn` CmmLit _ = False +reg `regUsedIn` CmmLoad e _ = reg `regUsedIn` e +reg `regUsedIn` CmmReg reg' = reg == reg' +reg `regUsedIn` CmmRegOff reg' _ = reg == reg' +reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es + +locUsedIn :: CmmExpr -> MachRep -> CmmExpr -> Bool +-- (locUsedIn a r e) checks whether writing to r[a] could affect the value of +-- 'e'. Returns True if it's not sure. +locUsedIn loc rep (CmmLit _) = False +locUsedIn loc rep (CmmLoad e ld_rep) = possiblySameLoc loc rep e ld_rep +locUsedIn loc rep (CmmReg reg') = False +locUsedIn loc rep (CmmRegOff reg' _) = False +locUsedIn loc rep (CmmMachOp _ es) = any (locUsedIn loc rep) es + +possiblySameLoc :: CmmExpr -> MachRep -> CmmExpr -> MachRep -> Bool +-- Assumes that distinct registers (eg Hp, Sp) do not +-- point to the same location, nor any offset thereof. +possiblySameLoc (CmmReg r1) rep1 (CmmReg r2) rep2 = r1==r2 +possiblySameLoc (CmmReg r1) rep1 (CmmRegOff r2 0) rep2 = r1==r2 +possiblySameLoc (CmmRegOff r1 0) rep1 (CmmReg r2) rep2 = r1==r2 +possiblySameLoc (CmmRegOff r1 start1) rep1 (CmmRegOff r2 start2) rep2 + = r1==r2 && end1 > start2 && end2 > start1 + where + end1 = start1 + machRepByteWidth rep1 + end2 = start2 + machRepByteWidth rep2 + +possiblySameLoc l1 rep1 (CmmLit _) rep2 = False +possiblySameLoc l1 rep1 l2 rep2 = True -- Conservative diff --git a/compiler/codeGen/ClosureInfo.hi-boot-5 b/compiler/codeGen/ClosureInfo.hi-boot-5 new file mode 100644 index 0000000000..2291f93cc6 --- /dev/null +++ b/compiler/codeGen/ClosureInfo.hi-boot-5 @@ -0,0 +1,4 @@ +__interface ClosureInfo 1 0 where +__export ClosureInfo ClosureInfo LambdaFormInfo; +1 data LambdaFormInfo; +1 data ClosureInfo; diff --git a/compiler/codeGen/ClosureInfo.hi-boot-6 b/compiler/codeGen/ClosureInfo.hi-boot-6 new file mode 100644 index 0000000000..d313ccde80 --- /dev/null +++ b/compiler/codeGen/ClosureInfo.hi-boot-6 @@ -0,0 +1,4 @@ +module ClosureInfo where + +data LambdaFormInfo +data ClosureInfo diff --git a/compiler/codeGen/ClosureInfo.lhs b/compiler/codeGen/ClosureInfo.lhs new file mode 100644 index 0000000000..84d9dd95ef --- /dev/null +++ b/compiler/codeGen/ClosureInfo.lhs @@ -0,0 +1,951 @@ +% +% (c) The Univserity of Glasgow 1992-2004 +% + + Data structures which describe closures, and + operations over those data structures + + Nothing monadic in here + +Much of the rationale for these things is in the ``details'' part of +the STG paper. + +\begin{code} +module ClosureInfo ( + ClosureInfo, LambdaFormInfo, SMRep, -- all abstract + StandardFormInfo, + + ArgDescr(..), Liveness(..), + C_SRT(..), needsSRT, + + mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo, + mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape, + + mkClosureInfo, mkConInfo, + + closureSize, closureNonHdrSize, + closureGoodStuffSize, closurePtrsSize, + slopSize, + + closureName, infoTableLabelFromCI, + closureLabelFromCI, closureSRT, + closureLFInfo, isLFThunk,closureSMRep, closureUpdReqd, + closureNeedsUpdSpace, closureIsThunk, + closureSingleEntry, closureReEntrant, isConstrClosure_maybe, + closureFunInfo, isStandardFormThunk, isKnownFun, + + enterIdLabel, enterLocalIdLabel, enterReturnPtLabel, + + nodeMustPointToIt, + CallMethod(..), getCallMethod, + + blackHoleOnEntry, + + staticClosureRequired, + getClosureType, + + isToplevClosure, + closureValDescr, closureTypeDescr, -- profiling + + isStaticClosure, + cafBlackHoleClosureInfo, seCafBlackHoleClosureInfo, + + staticClosureNeedsLink, + ) where + +#include "../includes/MachDeps.h" +#include "HsVersions.h" + +import StgSyn +import SMRep -- all of it + +import CLabel + +import Constants ( mIN_PAYLOAD_SIZE ) +import Packages ( isDllName, HomeModules ) +import StaticFlags ( opt_SccProfilingOn, opt_OmitBlackHoling, + opt_Parallel, opt_DoTickyProfiling ) +import Id ( Id, idType, idArity, idName ) +import DataCon ( DataCon, dataConTyCon, isNullaryRepDataCon, dataConName ) +import Name ( Name, nameUnique, getOccName, getOccString ) +import OccName ( occNameString ) +import Type ( isUnLiftedType, Type, repType, splitTyConApp_maybe ) +import TcType ( tcSplitSigmaTy ) +import TyCon ( isFunTyCon, isAbstractTyCon ) +import BasicTypes ( TopLevelFlag(..), isNotTopLevel, isTopLevel, ipNameName ) +import FastString +import Outputable +import Constants + +import TypeRep -- TEMP +\end{code} + + +%************************************************************************ +%* * +\subsection[ClosureInfo-datatypes]{Data types for closure information} +%* * +%************************************************************************ + +Information about a closure, from the code generator's point of view. + +A ClosureInfo decribes the info pointer of a closure. It has +enough information + a) to construct the info table itself + b) to allocate a closure containing that info pointer (i.e. + it knows the info table label) + +We make a ClosureInfo for + - each let binding (both top level and not) + - each data constructor (for its shared static and + dynamic info tables) + +\begin{code} +data ClosureInfo + = ClosureInfo { + closureName :: !Name, -- The thing bound to this closure + closureLFInfo :: !LambdaFormInfo, -- NOTE: not an LFCon (see below) + closureSMRep :: !SMRep, -- representation used by storage mgr + closureSRT :: !C_SRT, -- What SRT applies to this closure + closureType :: !Type, -- Type of closure (ToDo: remove) + closureDescr :: !String -- closure description (for profiling) + } + + -- Constructor closures don't have a unique info table label (they use + -- the constructor's info table), and they don't have an SRT. + | ConInfo { + closureCon :: !DataCon, + closureSMRep :: !SMRep, + closureDllCon :: !Bool -- is in a separate DLL + } + +-- C_SRT is what StgSyn.SRT gets translated to... +-- we add a label for the table, and expect only the 'offset/length' form + +data C_SRT = NoC_SRT + | C_SRT !CLabel !WordOff !StgHalfWord {-bitmap or escape-} + +needsSRT :: C_SRT -> Bool +needsSRT NoC_SRT = False +needsSRT (C_SRT _ _ _) = True +\end{code} + +%************************************************************************ +%* * +\subsubsection[LambdaFormInfo-datatype]{@LambdaFormInfo@: source-derivable info} +%* * +%************************************************************************ + +Information about an identifier, from the code generator's point of +view. Every identifier is bound to a LambdaFormInfo in the +environment, which gives the code generator enough info to be able to +tail call or return that identifier. + +Note that a closure is usually bound to an identifier, so a +ClosureInfo contains a LambdaFormInfo. + +\begin{code} +data LambdaFormInfo + = LFReEntrant -- Reentrant closure (a function) + TopLevelFlag -- True if top level + !Int -- Arity. Invariant: always > 0 + !Bool -- True <=> no fvs + ArgDescr -- Argument descriptor (should reall be in ClosureInfo) + + | LFCon -- A saturated constructor application + DataCon -- The constructor + + | LFThunk -- Thunk (zero arity) + TopLevelFlag + !Bool -- True <=> no free vars + !Bool -- True <=> updatable (i.e., *not* single-entry) + StandardFormInfo + !Bool -- True <=> *might* be a function type + + | LFUnknown -- Used for function arguments and imported things. + -- We know nothing about this closure. Treat like + -- updatable "LFThunk"... + -- Imported things which we do know something about use + -- one of the other LF constructors (eg LFReEntrant for + -- known functions) + !Bool -- True <=> *might* be a function type + + | LFLetNoEscape -- See LetNoEscape module for precise description of + -- these "lets". + !Int -- arity; + + | LFBlackHole -- Used for the closures allocated to hold the result + -- of a CAF. We want the target of the update frame to + -- be in the heap, so we make a black hole to hold it. + CLabel -- Flavour (info label, eg CAF_BLACKHOLE_info). + + +------------------------- +-- An ArgDsecr describes the argument pattern of a function + +data ArgDescr + = ArgSpec -- Fits one of the standard patterns + !Int -- RTS type identifier ARG_P, ARG_N, ... + + | ArgGen -- General case + Liveness -- Details about the arguments + + +------------------------- +-- We represent liveness bitmaps as a Bitmap (whose internal +-- representation really is a bitmap). These are pinned onto case return +-- vectors to indicate the state of the stack for the garbage collector. +-- +-- In the compiled program, liveness bitmaps that fit inside a single +-- word (StgWord) are stored as a single word, while larger bitmaps are +-- stored as a pointer to an array of words. + +data Liveness + = SmallLiveness -- Liveness info that fits in one word + StgWord -- Here's the bitmap + + | BigLiveness -- Liveness info witha a multi-word bitmap + CLabel -- Label for the bitmap + + +------------------------- +-- StandardFormInfo tells whether this thunk has one of +-- a small number of standard forms + +data StandardFormInfo + = NonStandardThunk + -- Not of of the standard forms + + | SelectorThunk + -- A SelectorThunk is of form + -- case x of + -- con a1,..,an -> ak + -- and the constructor is from a single-constr type. + WordOff -- 0-origin offset of ak within the "goods" of + -- constructor (Recall that the a1,...,an may be laid + -- out in the heap in a non-obvious order.) + + | ApThunk + -- An ApThunk is of form + -- x1 ... xn + -- The code for the thunk just pushes x2..xn on the stack and enters x1. + -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled + -- in the RTS to save space. + Int -- Arity, n +\end{code} + +%************************************************************************ +%* * +\subsection[ClosureInfo-construction]{Functions which build LFInfos} +%* * +%************************************************************************ + +\begin{code} +mkLFReEntrant :: TopLevelFlag -- True of top level + -> [Id] -- Free vars + -> [Id] -- Args + -> ArgDescr -- Argument descriptor + -> LambdaFormInfo + +mkLFReEntrant top fvs args arg_descr + = LFReEntrant top (length args) (null fvs) arg_descr + +mkLFThunk thunk_ty top fvs upd_flag + = ASSERT( not (isUpdatable upd_flag) || not (isUnLiftedType thunk_ty) ) + LFThunk top (null fvs) + (isUpdatable upd_flag) + NonStandardThunk + (might_be_a_function thunk_ty) + +might_be_a_function :: Type -> Bool +might_be_a_function ty + | Just (tc,_) <- splitTyConApp_maybe (repType ty), + not (isFunTyCon tc) && not (isAbstractTyCon tc) = False + -- don't forget to check for abstract types, which might + -- be functions too. + | otherwise = True +\end{code} + +@mkConLFInfo@ is similar, for constructors. + +\begin{code} +mkConLFInfo :: DataCon -> LambdaFormInfo +mkConLFInfo con = LFCon con + +mkSelectorLFInfo id offset updatable + = LFThunk NotTopLevel False updatable (SelectorThunk offset) + (might_be_a_function (idType id)) + +mkApLFInfo id upd_flag arity + = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity) + (might_be_a_function (idType id)) +\end{code} + +Miscellaneous LF-infos. + +\begin{code} +mkLFArgument id = LFUnknown (might_be_a_function (idType id)) + +mkLFLetNoEscape = LFLetNoEscape + +mkLFImported :: Id -> LambdaFormInfo +mkLFImported id + = case idArity id of + n | n > 0 -> LFReEntrant TopLevel n True (panic "arg_descr") -- n > 0 + other -> mkLFArgument id -- Not sure of exact arity +\end{code} + +\begin{code} +isLFThunk :: LambdaFormInfo -> Bool +isLFThunk (LFThunk _ _ _ _ _) = True +isLFThunk (LFBlackHole _) = True + -- return True for a blackhole: this function is used to determine + -- whether to use the thunk header in SMP mode, and a blackhole + -- must have one. +isLFThunk _ = False +\end{code} + +%************************************************************************ +%* * + Building ClosureInfos +%* * +%************************************************************************ + +\begin{code} +mkClosureInfo :: Bool -- Is static + -> Id + -> LambdaFormInfo + -> Int -> Int -- Total and pointer words + -> C_SRT + -> String -- String descriptor + -> ClosureInfo +mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info descr + = ClosureInfo { closureName = name, + closureLFInfo = lf_info, + closureSMRep = sm_rep, + closureSRT = srt_info, + closureType = idType id, + closureDescr = descr } + where + name = idName id + sm_rep = chooseSMRep is_static lf_info tot_wds ptr_wds + +mkConInfo :: HomeModules + -> Bool -- Is static + -> DataCon + -> Int -> Int -- Total and pointer words + -> ClosureInfo +mkConInfo hmods is_static data_con tot_wds ptr_wds + = ConInfo { closureSMRep = sm_rep, + closureCon = data_con, + closureDllCon = isDllName hmods (dataConName data_con) } + where + sm_rep = chooseSMRep is_static (mkConLFInfo data_con) tot_wds ptr_wds +\end{code} + +%************************************************************************ +%* * +\subsection[ClosureInfo-sizes]{Functions about closure {\em sizes}} +%* * +%************************************************************************ + +\begin{code} +closureSize :: ClosureInfo -> WordOff +closureSize cl_info = hdr_size + closureNonHdrSize cl_info + where hdr_size | closureIsThunk cl_info = thunkHdrSize + | otherwise = fixedHdrSize + -- All thunks use thunkHdrSize, even if they are non-updatable. + -- this is because we don't have separate closure types for + -- updatable vs. non-updatable thunks, so the GC can't tell the + -- difference. If we ever have significant numbers of non- + -- updatable thunks, it might be worth fixing this. + +closureNonHdrSize :: ClosureInfo -> WordOff +closureNonHdrSize cl_info + = tot_wds + computeSlopSize tot_wds cl_info + where + tot_wds = closureGoodStuffSize cl_info + +closureGoodStuffSize :: ClosureInfo -> WordOff +closureGoodStuffSize cl_info + = let (ptrs, nonptrs) = sizes_from_SMRep (closureSMRep cl_info) + in ptrs + nonptrs + +closurePtrsSize :: ClosureInfo -> WordOff +closurePtrsSize cl_info + = let (ptrs, _) = sizes_from_SMRep (closureSMRep cl_info) + in ptrs + +-- not exported: +sizes_from_SMRep :: SMRep -> (WordOff,WordOff) +sizes_from_SMRep (GenericRep _ ptrs nonptrs _) = (ptrs, nonptrs) +sizes_from_SMRep BlackHoleRep = (0, 0) +\end{code} + +Computing slop size. WARNING: this looks dodgy --- it has deep +knowledge of what the storage manager does with the various +representations... + +Slop Requirements: every thunk gets an extra padding word in the +header, which takes the the updated value. + +\begin{code} +slopSize cl_info = computeSlopSize payload_size cl_info + where payload_size = closureGoodStuffSize cl_info + +computeSlopSize :: WordOff -> ClosureInfo -> WordOff +computeSlopSize payload_size cl_info + = max 0 (minPayloadSize smrep updatable - payload_size) + where + smrep = closureSMRep cl_info + updatable = closureNeedsUpdSpace cl_info + +-- we leave space for an update if either (a) the closure is updatable +-- or (b) it is a static thunk. This is because a static thunk needs +-- a static link field in a predictable place (after the slop), regardless +-- of whether it is updatable or not. +closureNeedsUpdSpace (ClosureInfo { closureLFInfo = + LFThunk TopLevel _ _ _ _ }) = True +closureNeedsUpdSpace cl_info = closureUpdReqd cl_info + +minPayloadSize :: SMRep -> Bool -> WordOff +minPayloadSize smrep updatable + = case smrep of + BlackHoleRep -> min_upd_size + GenericRep _ _ _ _ | updatable -> min_upd_size + GenericRep True _ _ _ -> 0 -- static + GenericRep False _ _ _ -> mIN_PAYLOAD_SIZE + -- ^^^^^___ dynamic + where + min_upd_size = + ASSERT(mIN_PAYLOAD_SIZE <= sIZEOF_StgSMPThunkHeader) + 0 -- check that we already have enough + -- room for mIN_SIZE_NonUpdHeapObject, + -- due to the extra header word in SMP +\end{code} + +%************************************************************************ +%* * +\subsection[SMreps]{Choosing SM reps} +%* * +%************************************************************************ + +\begin{code} +chooseSMRep + :: Bool -- True <=> static closure + -> LambdaFormInfo + -> WordOff -> WordOff -- Tot wds, ptr wds + -> SMRep + +chooseSMRep is_static lf_info tot_wds ptr_wds + = let + nonptr_wds = tot_wds - ptr_wds + closure_type = getClosureType is_static ptr_wds lf_info + in + GenericRep is_static ptr_wds nonptr_wds closure_type + +-- We *do* get non-updatable top-level thunks sometimes. eg. f = g +-- gets compiled to a jump to g (if g has non-zero arity), instead of +-- messing around with update frames and PAPs. We set the closure type +-- to FUN_STATIC in this case. + +getClosureType :: Bool -> WordOff -> LambdaFormInfo -> ClosureType +getClosureType is_static ptr_wds lf_info + = case lf_info of + LFCon con | is_static && ptr_wds == 0 -> ConstrNoCaf + | otherwise -> Constr + LFReEntrant _ _ _ _ -> Fun + LFThunk _ _ _ (SelectorThunk _) _ -> ThunkSelector + LFThunk _ _ _ _ _ -> Thunk + _ -> panic "getClosureType" +\end{code} + +%************************************************************************ +%* * +\subsection[ClosureInfo-4-questions]{Four major questions about @ClosureInfo@} +%* * +%************************************************************************ + +Be sure to see the stg-details notes about these... + +\begin{code} +nodeMustPointToIt :: LambdaFormInfo -> Bool +nodeMustPointToIt (LFReEntrant top _ no_fvs _) + = not no_fvs || -- Certainly if it has fvs we need to point to it + isNotTopLevel top + -- If it is not top level we will point to it + -- We can have a \r closure with no_fvs which + -- is not top level as special case cgRhsClosure + -- has been dissabled in favour of let floating + + -- For lex_profiling we also access the cost centre for a + -- non-inherited function i.e. not top level + -- the not top case above ensures this is ok. + +nodeMustPointToIt (LFCon _) = True + + -- Strictly speaking, the above two don't need Node to point + -- to it if the arity = 0. But this is a *really* unlikely + -- situation. If we know it's nil (say) and we are entering + -- it. Eg: let x = [] in x then we will certainly have inlined + -- x, since nil is a simple atom. So we gain little by not + -- having Node point to known zero-arity things. On the other + -- hand, we do lose something; Patrick's code for figuring out + -- when something has been updated but not entered relies on + -- having Node point to the result of an update. SLPJ + -- 27/11/92. + +nodeMustPointToIt (LFThunk _ no_fvs updatable NonStandardThunk _) + = updatable || not no_fvs || opt_SccProfilingOn + -- For the non-updatable (single-entry case): + -- + -- True if has fvs (in which case we need access to them, and we + -- should black-hole it) + -- or profiling (in which case we need to recover the cost centre + -- from inside it) + +nodeMustPointToIt (LFThunk _ no_fvs updatable some_standard_form_thunk _) + = True -- Node must point to any standard-form thunk + +nodeMustPointToIt (LFUnknown _) = True +nodeMustPointToIt (LFBlackHole _) = True -- BH entry may require Node to point +nodeMustPointToIt (LFLetNoEscape _) = False +\end{code} + +The entry conventions depend on the type of closure being entered, +whether or not it has free variables, and whether we're running +sequentially or in parallel. + +\begin{tabular}{lllll} +Closure Characteristics & Parallel & Node Req'd & Argument Passing & Enter Via \\ +Unknown & no & yes & stack & node \\ +Known fun ($\ge$ 1 arg), no fvs & no & no & registers & fast entry (enough args) \\ +\ & \ & \ & \ & slow entry (otherwise) \\ +Known fun ($\ge$ 1 arg), fvs & no & yes & registers & fast entry (enough args) \\ +0 arg, no fvs @\r,\s@ & no & no & n/a & direct entry \\ +0 arg, no fvs @\u@ & no & yes & n/a & node \\ +0 arg, fvs @\r,\s@ & no & yes & n/a & direct entry \\ +0 arg, fvs @\u@ & no & yes & n/a & node \\ + +Unknown & yes & yes & stack & node \\ +Known fun ($\ge$ 1 arg), no fvs & yes & no & registers & fast entry (enough args) \\ +\ & \ & \ & \ & slow entry (otherwise) \\ +Known fun ($\ge$ 1 arg), fvs & yes & yes & registers & node \\ +0 arg, no fvs @\r,\s@ & yes & no & n/a & direct entry \\ +0 arg, no fvs @\u@ & yes & yes & n/a & node \\ +0 arg, fvs @\r,\s@ & yes & yes & n/a & node \\ +0 arg, fvs @\u@ & yes & yes & n/a & node\\ +\end{tabular} + +When black-holing, single-entry closures could also be entered via node +(rather than directly) to catch double-entry. + +\begin{code} +data CallMethod + = EnterIt -- no args, not a function + + | JumpToIt CLabel -- no args, not a function, but we + -- know what its entry code is + + | ReturnIt -- it's a function, but we have + -- zero args to apply to it, so just + -- return it. + + | ReturnCon DataCon -- It's a data constructor, just return it + + | SlowCall -- Unknown fun, or known fun with + -- too few args. + + | DirectEntry -- Jump directly, with args in regs + CLabel -- The code label + Int -- Its arity + +getCallMethod :: HomeModules + -> Name -- Function being applied + -> LambdaFormInfo -- Its info + -> Int -- Number of available arguments + -> CallMethod + +getCallMethod hmods name lf_info n_args + | nodeMustPointToIt lf_info && opt_Parallel + = -- If we're parallel, then we must always enter via node. + -- The reason is that the closure may have been + -- fetched since we allocated it. + EnterIt + +getCallMethod hmods name (LFReEntrant _ arity _ _) n_args + | n_args == 0 = ASSERT( arity /= 0 ) + ReturnIt -- No args at all + | n_args < arity = SlowCall -- Not enough args + | otherwise = DirectEntry (enterIdLabel hmods name) arity + +getCallMethod hmods name (LFCon con) n_args + = ASSERT( n_args == 0 ) + ReturnCon con + +getCallMethod hmods name (LFThunk _ _ updatable std_form_info is_fun) n_args + | is_fun -- Must always "call" a function-typed + = SlowCall -- thing, cannot just enter it [in eval/apply, the entry code + -- is the fast-entry code] + + | updatable || opt_DoTickyProfiling -- to catch double entry + {- OLD: || opt_SMP + I decided to remove this, because in SMP mode it doesn't matter + if we enter the same thunk multiple times, so the optimisation + of jumping directly to the entry code is still valid. --SDM + -} + = ASSERT( n_args == 0 ) EnterIt + + | otherwise -- Jump direct to code for single-entry thunks + = ASSERT( n_args == 0 ) + JumpToIt (thunkEntryLabel hmods name std_form_info updatable) + +getCallMethod hmods name (LFUnknown True) n_args + = SlowCall -- might be a function + +getCallMethod hmods name (LFUnknown False) n_args + = ASSERT2 ( n_args == 0, ppr name <+> ppr n_args ) + EnterIt -- Not a function + +getCallMethod hmods name (LFBlackHole _) n_args + = SlowCall -- Presumably the black hole has by now + -- been updated, but we don't know with + -- what, so we slow call it + +getCallMethod hmods name (LFLetNoEscape 0) n_args + = JumpToIt (enterReturnPtLabel (nameUnique name)) + +getCallMethod hmods name (LFLetNoEscape arity) n_args + | n_args == arity = DirectEntry (enterReturnPtLabel (nameUnique name)) arity + | otherwise = pprPanic "let-no-escape: " (ppr name <+> ppr arity) + +blackHoleOnEntry :: ClosureInfo -> Bool +-- Static closures are never themselves black-holed. +-- Updatable ones will be overwritten with a CAFList cell, which points to a +-- black hole; +-- Single-entry ones have no fvs to plug, and we trust they don't form part +-- of a loop. + +blackHoleOnEntry ConInfo{} = False +blackHoleOnEntry (ClosureInfo { closureLFInfo = lf_info, closureSMRep = rep }) + | isStaticRep rep + = False -- Never black-hole a static closure + + | otherwise + = case lf_info of + LFReEntrant _ _ _ _ -> False + LFLetNoEscape _ -> False + LFThunk _ no_fvs updatable _ _ + -> if updatable + then not opt_OmitBlackHoling + else opt_DoTickyProfiling || not no_fvs + -- the former to catch double entry, + -- and the latter to plug space-leaks. KSW/SDM 1999-04. + + other -> panic "blackHoleOnEntry" -- Should never happen + +isStandardFormThunk :: LambdaFormInfo -> Bool +isStandardFormThunk (LFThunk _ _ _ (SelectorThunk _) _) = True +isStandardFormThunk (LFThunk _ _ _ (ApThunk _) _) = True +isStandardFormThunk other_lf_info = False + +isKnownFun :: LambdaFormInfo -> Bool +isKnownFun (LFReEntrant _ _ _ _) = True +isKnownFun (LFLetNoEscape _) = True +isKnownFun _ = False +\end{code} + +----------------------------------------------------------------------------- +SRT-related stuff + +\begin{code} +staticClosureNeedsLink :: ClosureInfo -> Bool +-- A static closure needs a link field to aid the GC when traversing +-- the static closure graph. But it only needs such a field if either +-- a) it has an SRT +-- b) it's a constructor with one or more pointer fields +-- In case (b), the constructor's fields themselves play the role +-- of the SRT. +staticClosureNeedsLink (ClosureInfo { closureSRT = srt }) + = needsSRT srt +staticClosureNeedsLink (ConInfo { closureSMRep = sm_rep, closureCon = con }) + = not (isNullaryRepDataCon con) && not_nocaf_constr + where + not_nocaf_constr = + case sm_rep of + GenericRep _ _ _ ConstrNoCaf -> False + _other -> True +\end{code} + +Avoiding generating entries and info tables +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +At present, for every function we generate all of the following, +just in case. But they aren't always all needed, as noted below: + +[NB1: all of this applies only to *functions*. Thunks always +have closure, info table, and entry code.] + +[NB2: All are needed if the function is *exported*, just to play safe.] + + +* Fast-entry code ALWAYS NEEDED + +* Slow-entry code + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) we're in the parallel world and the function has free vars + [Reason: in parallel world, we always enter functions + with free vars via the closure.] + +* The function closure + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) if the function has free vars (ie not top level) + + Why case (a) here? Because if the arg-satis check fails, + UpdatePAP stuffs a pointer to the function closure in the PAP. + [Could be changed; UpdatePAP could stuff in a code ptr instead, + but doesn't seem worth it.] + + [NB: these conditions imply that we might need the closure + without the slow-entry code. Here's how. + + f x y = let g w = ...x..y..w... + in + ...(g t)... + + Here we need a closure for g which contains x and y, + but since the calls are all saturated we just jump to the + fast entry point for g, with R1 pointing to the closure for g.] + + +* Standard info table + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) the function has free vars (ie not top level) + + NB. In the sequential world, (c) is only required so that the function closure has + an info table to point to, to keep the storage manager happy. + If (c) alone is true we could fake up an info table by choosing + one of a standard family of info tables, whose entry code just + bombs out. + + [NB In the parallel world (c) is needed regardless because + we enter functions with free vars via the closure.] + + If (c) is retained, then we'll sometimes generate an info table + (for storage mgr purposes) without slow-entry code. Then we need + to use an error label in the info table to substitute for the absent + slow entry code. + +\begin{code} +staticClosureRequired + :: Name + -> StgBinderInfo + -> LambdaFormInfo + -> Bool +staticClosureRequired binder bndr_info + (LFReEntrant top_level _ _ _) -- It's a function + = ASSERT( isTopLevel top_level ) + -- Assumption: it's a top-level, no-free-var binding + not (satCallsOnly bndr_info) + +staticClosureRequired binder other_binder_info other_lf_info = True +\end{code} + +%************************************************************************ +%* * +\subsection[ClosureInfo-misc-funs]{Misc functions about @ClosureInfo@, etc.} +%* * +%************************************************************************ + +\begin{code} + +isStaticClosure :: ClosureInfo -> Bool +isStaticClosure cl_info = isStaticRep (closureSMRep cl_info) + +closureUpdReqd :: ClosureInfo -> Bool +closureUpdReqd ClosureInfo{ closureLFInfo = lf_info } = lfUpdatable lf_info +closureUpdReqd ConInfo{} = False + +lfUpdatable :: LambdaFormInfo -> Bool +lfUpdatable (LFThunk _ _ upd _ _) = upd +lfUpdatable (LFBlackHole _) = True + -- Black-hole closures are allocated to receive the results of an + -- alg case with a named default... so they need to be updated. +lfUpdatable _ = False + +closureIsThunk :: ClosureInfo -> Bool +closureIsThunk ClosureInfo{ closureLFInfo = lf_info } = isLFThunk lf_info +closureIsThunk ConInfo{} = False + +closureSingleEntry :: ClosureInfo -> Bool +closureSingleEntry (ClosureInfo { closureLFInfo = LFThunk _ _ upd _ _}) = not upd +closureSingleEntry other_closure = False + +closureReEntrant :: ClosureInfo -> Bool +closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant _ _ _ _ }) = True +closureReEntrant other_closure = False + +isConstrClosure_maybe :: ClosureInfo -> Maybe DataCon +isConstrClosure_maybe (ConInfo { closureCon = data_con }) = Just data_con +isConstrClosure_maybe _ = Nothing + +closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr) +closureFunInfo (ClosureInfo { closureLFInfo = LFReEntrant _ arity _ arg_desc}) + = Just (arity, arg_desc) +closureFunInfo _ + = Nothing +\end{code} + +\begin{code} +isToplevClosure :: ClosureInfo -> Bool +isToplevClosure (ClosureInfo { closureLFInfo = lf_info }) + = case lf_info of + LFReEntrant TopLevel _ _ _ -> True + LFThunk TopLevel _ _ _ _ -> True + other -> False +isToplevClosure _ = False +\end{code} + +Label generation. + +\begin{code} +infoTableLabelFromCI :: ClosureInfo -> CLabel +infoTableLabelFromCI (ClosureInfo { closureName = name, + closureLFInfo = lf_info, + closureSMRep = rep }) + = case lf_info of + LFBlackHole info -> info + + LFThunk _ _ upd_flag (SelectorThunk offset) _ -> + mkSelectorInfoLabel upd_flag offset + + LFThunk _ _ upd_flag (ApThunk arity) _ -> + mkApInfoTableLabel upd_flag arity + + LFThunk{} -> mkLocalInfoTableLabel name + + LFReEntrant _ _ _ _ -> mkLocalInfoTableLabel name + + other -> panic "infoTableLabelFromCI" + +infoTableLabelFromCI (ConInfo { closureCon = con, + closureSMRep = rep, + closureDllCon = dll }) + | isStaticRep rep = mkStaticInfoTableLabel name dll + | otherwise = mkConInfoTableLabel name dll + where + name = dataConName con + +-- ClosureInfo for a closure (as opposed to a constructor) is always local +closureLabelFromCI (ClosureInfo { closureName = nm }) = mkLocalClosureLabel nm +closureLabelFromCI _ = panic "closureLabelFromCI" + +-- thunkEntryLabel is a local help function, not exported. It's used from both +-- entryLabelFromCI and getCallMethod. + +thunkEntryLabel hmods thunk_id (ApThunk arity) is_updatable + = enterApLabel is_updatable arity +thunkEntryLabel hmods thunk_id (SelectorThunk offset) upd_flag + = enterSelectorLabel upd_flag offset +thunkEntryLabel hmods thunk_id _ is_updatable + = enterIdLabel hmods thunk_id + +enterApLabel is_updatable arity + | tablesNextToCode = mkApInfoTableLabel is_updatable arity + | otherwise = mkApEntryLabel is_updatable arity + +enterSelectorLabel upd_flag offset + | tablesNextToCode = mkSelectorInfoLabel upd_flag offset + | otherwise = mkSelectorEntryLabel upd_flag offset + +enterIdLabel hmods id + | tablesNextToCode = mkInfoTableLabel hmods id + | otherwise = mkEntryLabel hmods id + +enterLocalIdLabel id + | tablesNextToCode = mkLocalInfoTableLabel id + | otherwise = mkLocalEntryLabel id + +enterReturnPtLabel name + | tablesNextToCode = mkReturnInfoLabel name + | otherwise = mkReturnPtLabel name +\end{code} + + +We need a black-hole closure info to pass to @allocDynClosure@ when we +want to allocate the black hole on entry to a CAF. These are the only +ways to build an LFBlackHole, maintaining the invariant that it really +is a black hole and not something else. + +\begin{code} +cafBlackHoleClosureInfo (ClosureInfo { closureName = nm, + closureType = ty }) + = ClosureInfo { closureName = nm, + closureLFInfo = LFBlackHole mkCAFBlackHoleInfoTableLabel, + closureSMRep = BlackHoleRep, + closureSRT = NoC_SRT, + closureType = ty, + closureDescr = "" } +cafBlackHoleClosureInfo _ = panic "cafBlackHoleClosureInfo" + +seCafBlackHoleClosureInfo (ClosureInfo { closureName = nm, + closureType = ty }) + = ClosureInfo { closureName = nm, + closureLFInfo = LFBlackHole mkSECAFBlackHoleInfoTableLabel, + closureSMRep = BlackHoleRep, + closureSRT = NoC_SRT, + closureType = ty, + closureDescr = "" } +seCafBlackHoleClosureInfo _ = panic "seCafBlackHoleClosureInfo" +\end{code} + +%************************************************************************ +%* * +\subsection[ClosureInfo-Profiling-funs]{Misc functions about for profiling info.} +%* * +%************************************************************************ + +Profiling requires two pieces of information to be determined for +each closure's info table --- description and type. + +The description is stored directly in the @CClosureInfoTable@ when the +info table is built. + +The type is determined from the type information stored with the @Id@ +in the closure info using @closureTypeDescr@. + +\begin{code} +closureValDescr, closureTypeDescr :: ClosureInfo -> String +closureValDescr (ClosureInfo {closureDescr = descr}) + = descr +closureValDescr (ConInfo {closureCon = con}) + = occNameString (getOccName con) + +closureTypeDescr (ClosureInfo { closureType = ty }) + = getTyDescription ty +closureTypeDescr (ConInfo { closureCon = data_con }) + = occNameString (getOccName (dataConTyCon data_con)) + +getTyDescription :: Type -> String +getTyDescription ty + = case (tcSplitSigmaTy ty) of { (_, _, tau_ty) -> + case tau_ty of + TyVarTy _ -> "*" + AppTy fun _ -> getTyDescription fun + FunTy _ res -> '-' : '>' : fun_result res + TyConApp tycon _ -> getOccString tycon + NoteTy (FTVNote _) ty -> getTyDescription ty + PredTy sty -> getPredTyDescription sty + ForAllTy _ ty -> getTyDescription ty + } + where + fun_result (FunTy _ res) = '>' : fun_result res + fun_result other = getTyDescription other + +getPredTyDescription (ClassP cl tys) = getOccString cl +getPredTyDescription (IParam ip ty) = getOccString (ipNameName ip) +\end{code} + + diff --git a/compiler/codeGen/ClosureInfo.lhs-boot b/compiler/codeGen/ClosureInfo.lhs-boot new file mode 100644 index 0000000000..b069905d3e --- /dev/null +++ b/compiler/codeGen/ClosureInfo.lhs-boot @@ -0,0 +1,6 @@ +\begin{code} +module ClosureInfo where + +data LambdaFormInfo +data ClosureInfo +\end{code}
\ No newline at end of file diff --git a/compiler/codeGen/CodeGen.lhs b/compiler/codeGen/CodeGen.lhs new file mode 100644 index 0000000000..e8d83a5a43 --- /dev/null +++ b/compiler/codeGen/CodeGen.lhs @@ -0,0 +1,343 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +\section[CodeGen]{@CodeGen@: main module of the code generator} + +This module says how things get going at the top level. + +@codeGen@ is the interface to the outside world. The \tr{cgTop*} +functions drive the mangling of top-level bindings. + +%************************************************************************ +%* * +\subsection[codeGen-outside-interface]{The code generator's offering to the world} +%* * +%************************************************************************ + +\begin{code} +module CodeGen ( codeGen ) where + +#include "HsVersions.h" + +-- Kludge (??) so that CgExpr is reached via at least one non-SOURCE +-- import. Before, that wasn't the case, and CM therefore didn't +-- bother to compile it. +import CgExpr ( {-NOTHING!-} ) -- DO NOT DELETE THIS IMPORT +import CgProf +import CgMonad +import CgBindery ( CgIdInfo, addBindC, addBindsC, getCgIdInfo, + cgIdInfoId ) +import CgClosure ( cgTopRhsClosure ) +import CgCon ( cgTopRhsCon, cgTyCon ) +import CgUtils ( cmmRegOffW, emitRODataLits, cmmNeWord ) + +import CLabel +import Cmm +import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr ) +import PprCmm ( pprCmms ) +import MachOp ( wordRep, MachHint(..) ) + +import StgSyn +import PrelNames ( gHC_PRIM, rOOT_MAIN, mAIN, pREL_TOP_HANDLER ) +import Packages ( HomeModules ) +import DynFlags ( DynFlags(..), DynFlag(..), dopt ) +import StaticFlags ( opt_SccProfilingOn ) + +import HscTypes ( ForeignStubs(..), TypeEnv, typeEnvTyCons ) +import CostCentre ( CollectedCCs ) +import Id ( Id, idName, setIdName ) +import Name ( nameSrcLoc, nameOccName, nameUnique, isInternalName, mkExternalName ) +import OccName ( mkLocalOcc ) +import TyCon ( TyCon ) +import Module ( Module, mkModule ) +import ErrUtils ( dumpIfSet_dyn, showPass ) +import Panic ( assertPanic ) + +#ifdef DEBUG +import Outputable +#endif +\end{code} + +\begin{code} +codeGen :: DynFlags + -> HomeModules + -> Module + -> [TyCon] + -> ForeignStubs + -> [Module] -- directly-imported modules + -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering. + -> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs + -> IO [Cmm] -- Output + +codeGen dflags hmods this_mod data_tycons foreign_stubs imported_mods + cost_centre_info stg_binds + = do + { showPass dflags "CodeGen" + ; let way = buildTag dflags + main_mod = mainModIs dflags + +-- Why? +-- ; mapM_ (\x -> seq x (return ())) data_tycons + + ; code_stuff <- initC dflags hmods this_mod $ do + { cmm_binds <- mapM (getCmm . cgTopBinding dflags hmods) stg_binds + ; cmm_tycons <- mapM cgTyCon data_tycons + ; cmm_init <- getCmm (mkModuleInit dflags hmods way cost_centre_info + this_mod main_mod + foreign_stubs imported_mods) + ; return (cmm_binds ++ concat cmm_tycons ++ [cmm_init]) + } + -- Put datatype_stuff after code_stuff, because the + -- datatype closure table (for enumeration types) to + -- (say) PrelBase_True_closure, which is defined in + -- code_stuff + + ; dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" (pprCmms code_stuff) + + ; return code_stuff } +\end{code} + +%************************************************************************ +%* * +\subsection[codegen-init]{Module initialisation code} +%* * +%************************************************************************ + +/* ----------------------------------------------------------------------------- + Module initialisation + + The module initialisation code looks like this, roughly: + + FN(__stginit_Foo) { + JMP_(__stginit_Foo_1_p) + } + + FN(__stginit_Foo_1_p) { + ... + } + + We have one version of the init code with a module version and the + 'way' attached to it. The version number helps to catch cases + where modules are not compiled in dependency order before being + linked: if a module has been compiled since any modules which depend on + it, then the latter modules will refer to a different version in their + init blocks and a link error will ensue. + + The 'way' suffix helps to catch cases where modules compiled in different + ways are linked together (eg. profiled and non-profiled). + + We provide a plain, unadorned, version of the module init code + which just jumps to the version with the label and way attached. The + reason for this is that when using foreign exports, the caller of + startupHaskell() must supply the name of the init function for the "top" + module in the program, and we don't want to require that this name + has the version and way info appended to it. + -------------------------------------------------------------------------- */ + +We initialise the module tree by keeping a work-stack, + * pointed to by Sp + * that grows downward + * Sp points to the last occupied slot + + +\begin{code} +mkModuleInit + :: DynFlags + -> HomeModules + -> String -- the "way" + -> CollectedCCs -- cost centre info + -> Module + -> Module -- name of the Main module + -> ForeignStubs + -> [Module] + -> Code +mkModuleInit dflags hmods way cost_centre_info this_mod main_mod foreign_stubs imported_mods + = do { + if opt_SccProfilingOn + then do { -- Allocate the static boolean that records if this + -- module has been registered already + emitData Data [CmmDataLabel moduleRegdLabel, + CmmStaticLit zeroCLit] + + ; emitSimpleProc real_init_lbl $ do + { ret_blk <- forkLabelledCode ret_code + + ; init_blk <- forkLabelledCode $ do + { mod_init_code; stmtC (CmmBranch ret_blk) } + + ; stmtC (CmmCondBranch (cmmNeWord (CmmLit zeroCLit) mod_reg_val) + ret_blk) + ; stmtC (CmmBranch init_blk) + } + } + else emitSimpleProc real_init_lbl ret_code + + -- Make the "plain" procedure jump to the "real" init procedure + ; emitSimpleProc plain_init_lbl jump_to_init + + -- When compiling the module in which the 'main' function lives, + -- (that is, this_mod == main_mod) + -- we inject an extra stg_init procedure for stg_init_ZCMain, for the + -- RTS to invoke. We must consult the -main-is flag in case the + -- user specified a different function to Main.main + ; whenC (this_mod == main_mod) + (emitSimpleProc plain_main_init_lbl jump_to_init) + } + where + plain_init_lbl = mkPlainModuleInitLabel hmods this_mod + real_init_lbl = mkModuleInitLabel hmods this_mod way + plain_main_init_lbl = mkPlainModuleInitLabel hmods rOOT_MAIN + + jump_to_init = stmtC (CmmJump (mkLblExpr real_init_lbl) []) + + mod_reg_val = CmmLoad (mkLblExpr moduleRegdLabel) wordRep + + -- Main refers to GHC.TopHandler.runIO, so make sure we call the + -- init function for GHC.TopHandler. + extra_imported_mods + | this_mod == main_mod = [pREL_TOP_HANDLER] + | otherwise = [] + + mod_init_code = do + { -- Set mod_reg to 1 to record that we've been here + stmtC (CmmStore (mkLblExpr moduleRegdLabel) (CmmLit (mkIntCLit 1))) + + -- Now do local stuff + ; initCostCentres cost_centre_info + ; mapCs (registerModuleImport hmods way) + (imported_mods++extra_imported_mods) + } + + -- The return-code pops the work stack by + -- incrementing Sp, and then jumpd to the popped item + ret_code = stmtsC [ CmmAssign spReg (cmmRegOffW spReg 1) + , CmmJump (CmmLoad (cmmRegOffW spReg (-1)) wordRep) [] ] + +----------------------- +registerModuleImport :: HomeModules -> String -> Module -> Code +registerModuleImport hmods way mod + | mod == gHC_PRIM + = nopC + | otherwise -- Push the init procedure onto the work stack + = stmtsC [ CmmAssign spReg (cmmRegOffW spReg (-1)) + , CmmStore (CmmReg spReg) (mkLblExpr (mkModuleInitLabel hmods mod way)) ] +\end{code} + + + +Cost-centre profiling: Besides the usual stuff, we must produce +declarations for the cost-centres defined in this module; + +(The local cost-centres involved in this are passed into the +code-generator.) + +\begin{code} +initCostCentres :: CollectedCCs -> Code +-- Emit the declarations, and return code to register them +initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs) + | not opt_SccProfilingOn = nopC + | otherwise + = do { mapM_ emitCostCentreDecl local_CCs + ; mapM_ emitCostCentreStackDecl singleton_CCSs + ; mapM_ emitRegisterCC local_CCs + ; mapM_ emitRegisterCCS singleton_CCSs + } +\end{code} + +%************************************************************************ +%* * +\subsection[codegen-top-bindings]{Converting top-level STG bindings} +%* * +%************************************************************************ + +@cgTopBinding@ is only used for top-level bindings, since they need +to be allocated statically (not in the heap) and need to be labelled. +No unboxed bindings can happen at top level. + +In the code below, the static bindings are accumulated in the +@MkCgState@, and transferred into the ``statics'' slot by @forkStatics@. +This is so that we can write the top level processing in a compositional +style, with the increasing static environment being plumbed as a state +variable. + +\begin{code} +cgTopBinding :: DynFlags -> HomeModules -> (StgBinding,[(Id,[Id])]) -> Code +cgTopBinding dflags hmods (StgNonRec id rhs, srts) + = do { id' <- maybeExternaliseId dflags id + ; mapM_ (mkSRT hmods [id']) srts + ; (id,info) <- cgTopRhs id' rhs + ; addBindC id info -- Add the *un-externalised* Id to the envt, + -- so we find it when we look up occurrences + } + +cgTopBinding dflags hmods (StgRec pairs, srts) + = do { let (bndrs, rhss) = unzip pairs + ; bndrs' <- mapFCs (maybeExternaliseId dflags) bndrs + ; let pairs' = zip bndrs' rhss + ; mapM_ (mkSRT hmods bndrs') srts + ; _new_binds <- fixC (\ new_binds -> do + { addBindsC new_binds + ; mapFCs ( \ (b,e) -> cgTopRhs b e ) pairs' }) + ; nopC } + +mkSRT :: HomeModules -> [Id] -> (Id,[Id]) -> Code +mkSRT hmods these (id,[]) = nopC +mkSRT hmods these (id,ids) + = do { ids <- mapFCs remap ids + ; id <- remap id + ; emitRODataLits (mkSRTLabel (idName id)) + (map (CmmLabel . mkClosureLabel hmods . idName) ids) + } + where + -- Sigh, better map all the ids against the environment in + -- case they've been externalised (see maybeExternaliseId below). + remap id = case filter (==id) these of + (id':_) -> returnFC id' + [] -> do { info <- getCgIdInfo id; return (cgIdInfoId info) } + +-- Urgh! I tried moving the forkStatics call from the rhss of cgTopRhs +-- to enclose the listFCs in cgTopBinding, but that tickled the +-- statics "error" call in initC. I DON'T UNDERSTAND WHY! + +cgTopRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo) + -- The Id is passed along for setting up a binding... + -- It's already been externalised if necessary + +cgTopRhs bndr (StgRhsCon cc con args) + = forkStatics (cgTopRhsCon bndr con args) + +cgTopRhs bndr (StgRhsClosure cc bi fvs upd_flag srt args body) + = ASSERT(null fvs) -- There should be no free variables + setSRTLabel (mkSRTLabel (idName bndr)) $ + forkStatics (cgTopRhsClosure bndr cc bi srt upd_flag args body) +\end{code} + + +%************************************************************************ +%* * +\subsection{Stuff to support splitting} +%* * +%************************************************************************ + +If we're splitting the object, we need to externalise all the top-level names +(and then make sure we only use the externalised one in any C label we use +which refers to this name). + +\begin{code} +maybeExternaliseId :: DynFlags -> Id -> FCode Id +maybeExternaliseId dflags id + | dopt Opt_SplitObjs dflags, -- Externalise the name for -split-objs + isInternalName name = do { mod <- moduleName + ; returnFC (setIdName id (externalise mod)) } + | otherwise = returnFC id + where + externalise mod = mkExternalName uniq mod new_occ Nothing loc + name = idName id + uniq = nameUnique name + new_occ = mkLocalOcc uniq (nameOccName name) + loc = nameSrcLoc name + -- We want to conjure up a name that can't clash with any + -- existing name. So we generate + -- Mod_$L243foo + -- where 243 is the unique. +\end{code} diff --git a/compiler/codeGen/SMRep.lhs b/compiler/codeGen/SMRep.lhs new file mode 100644 index 0000000000..c807703b13 --- /dev/null +++ b/compiler/codeGen/SMRep.lhs @@ -0,0 +1,361 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +\section[SMRep]{Storage manager representations of closure} + +This is here, rather than in ClosureInfo, just to keep nhc happy. +Other modules should access this info through ClosureInfo. + +\begin{code} +module SMRep ( + -- Words and bytes + StgWord, StgHalfWord, + hALF_WORD_SIZE, hALF_WORD_SIZE_IN_BITS, + WordOff, ByteOff, + + -- Argument/return representations + CgRep(..), nonVoidArg, + argMachRep, primRepToCgRep, primRepHint, + isFollowableArg, isVoidArg, + isFloatingArg, isNonPtrArg, is64BitArg, + separateByPtrFollowness, + cgRepSizeW, cgRepSizeB, + retAddrSizeW, + + typeCgRep, idCgRep, tyConCgRep, typeHint, + + -- Closure repesentation + SMRep(..), ClosureType(..), + isStaticRep, + fixedHdrSize, arrWordsHdrSize, arrPtrsHdrSize, + profHdrSize, thunkHdrSize, + tablesNextToCode, + smRepClosureType, smRepClosureTypeInt, + + rET_SMALL, rET_VEC_SMALL, rET_BIG, rET_VEC_BIG + ) where + +#include "HsVersions.h" +#include "../includes/MachDeps.h" + +import Id ( Id, idType ) +import Type ( Type, typePrimRep, PrimRep(..) ) +import TyCon ( TyCon, tyConPrimRep ) +import MachOp-- ( MachRep(..), MachHint(..), wordRep ) +import StaticFlags ( opt_SccProfilingOn, opt_GranMacros, + opt_Unregisterised ) +import Constants +import Outputable + +import DATA_WORD +\end{code} + + +%************************************************************************ +%* * + Words and bytes +%* * +%************************************************************************ + +\begin{code} +type WordOff = Int -- Word offset, or word count +type ByteOff = Int -- Byte offset, or byte count +\end{code} + +StgWord is a type representing an StgWord on the target platform. + +\begin{code} +#if SIZEOF_HSWORD == 4 +type StgWord = Word32 +type StgHalfWord = Word16 +hALF_WORD_SIZE = 2 :: ByteOff +hALF_WORD_SIZE_IN_BITS = 16 :: Int +#elif SIZEOF_HSWORD == 8 +type StgWord = Word64 +type StgHalfWord = Word32 +hALF_WORD_SIZE = 4 :: ByteOff +hALF_WORD_SIZE_IN_BITS = 32 :: Int +#else +#error unknown SIZEOF_HSWORD +#endif +\end{code} + + +%************************************************************************ +%* * + CgRep +%* * +%************************************************************************ + +An CgRep is an abstraction of a Type which tells the code generator +all it needs to know about the calling convention for arguments (and +results) of that type. In particular, the ArgReps of a function's +arguments are used to decide which of the RTS's generic apply +functions to call when applying an unknown function. + +It contains more information than the back-end data type MachRep, +so one can easily convert from CgRep -> MachRep. (Except that +there's no MachRep for a VoidRep.) + +It distinguishes + pointers from non-pointers (we sort the pointers together + when building closures) + + void from other types: a void argument is different from no argument + +All 64-bit types map to the same CgRep, because they're passed in the +same register, but a PtrArg is still different from an NonPtrArg +because the function's entry convention has to take into account the +pointer-hood of arguments for the purposes of describing the stack on +entry to the garbage collector. + +\begin{code} +data CgRep + = VoidArg -- Void + | PtrArg -- Word-sized Ptr + | NonPtrArg -- Word-sized non-pointer + | LongArg -- 64-bit non-pointer + | FloatArg -- 32-bit float + | DoubleArg -- 64-bit float + deriving Eq + +instance Outputable CgRep where + ppr VoidArg = ptext SLIT("V_") + ppr PtrArg = ptext SLIT("P_") + ppr NonPtrArg = ptext SLIT("I_") + ppr LongArg = ptext SLIT("L_") + ppr FloatArg = ptext SLIT("F_") + ppr DoubleArg = ptext SLIT("D_") + +argMachRep :: CgRep -> MachRep +argMachRep PtrArg = wordRep +argMachRep NonPtrArg = wordRep +argMachRep LongArg = I64 +argMachRep FloatArg = F32 +argMachRep DoubleArg = F64 +argMachRep VoidArg = panic "argMachRep:VoidRep" + +primRepToCgRep :: PrimRep -> CgRep +primRepToCgRep VoidRep = VoidArg +primRepToCgRep PtrRep = PtrArg +primRepToCgRep IntRep = NonPtrArg +primRepToCgRep WordRep = NonPtrArg +primRepToCgRep Int64Rep = LongArg +primRepToCgRep Word64Rep = LongArg +primRepToCgRep AddrRep = NonPtrArg +primRepToCgRep FloatRep = FloatArg +primRepToCgRep DoubleRep = DoubleArg + +primRepHint :: PrimRep -> MachHint +primRepHint VoidRep = panic "primRepHint:VoidRep" +primRepHint PtrRep = PtrHint +primRepHint IntRep = SignedHint +primRepHint WordRep = NoHint +primRepHint Int64Rep = SignedHint +primRepHint Word64Rep = NoHint +primRepHint AddrRep = PtrHint -- NB! PtrHint, but NonPtrArg +primRepHint FloatRep = FloatHint +primRepHint DoubleRep = FloatHint + +idCgRep :: Id -> CgRep +idCgRep = typeCgRep . idType + +tyConCgRep :: TyCon -> CgRep +tyConCgRep = primRepToCgRep . tyConPrimRep + +typeCgRep :: Type -> CgRep +typeCgRep = primRepToCgRep . typePrimRep + +typeHint :: Type -> MachHint +typeHint = primRepHint . typePrimRep +\end{code} + +Whether or not the thing is a pointer that the garbage-collector +should follow. Or, to put it another (less confusing) way, whether +the object in question is a heap object. + +Depending on the outcome, this predicate determines what stack +the pointer/object possibly will have to be saved onto, and the +computation of GC liveness info. + +\begin{code} +isFollowableArg :: CgRep -> Bool -- True <=> points to a heap object +isFollowableArg PtrArg = True +isFollowableArg other = False + +isVoidArg :: CgRep -> Bool +isVoidArg VoidArg = True +isVoidArg other = False + +nonVoidArg :: CgRep -> Bool +nonVoidArg VoidArg = False +nonVoidArg other = True + +-- isFloatingArg is used to distinguish @Double@ and @Float@ which +-- cause inadvertent numeric conversions if you aren't jolly careful. +-- See codeGen/CgCon:cgTopRhsCon. + +isFloatingArg :: CgRep -> Bool +isFloatingArg DoubleArg = True +isFloatingArg FloatArg = True +isFloatingArg _ = False + +isNonPtrArg :: CgRep -> Bool +-- Identify anything which is one word large and not a pointer. +isNonPtrArg NonPtrArg = True +isNonPtrArg other = False + +is64BitArg :: CgRep -> Bool +is64BitArg LongArg = True +is64BitArg _ = False +\end{code} + +\begin{code} +separateByPtrFollowness :: [(CgRep,a)] -> ([(CgRep,a)], [(CgRep,a)]) +-- Returns (ptrs, non-ptrs) +separateByPtrFollowness things + = sep_things things [] [] + -- accumulating params for follow-able and don't-follow things... + where + sep_things [] bs us = (reverse bs, reverse us) + sep_things ((PtrArg,a):ts) bs us = sep_things ts ((PtrArg,a):bs) us + sep_things (t :ts) bs us = sep_things ts bs (t:us) +\end{code} + +\begin{code} +cgRepSizeB :: CgRep -> ByteOff +cgRepSizeB DoubleArg = dOUBLE_SIZE +cgRepSizeB LongArg = wORD64_SIZE +cgRepSizeB VoidArg = 0 +cgRepSizeB _ = wORD_SIZE + +cgRepSizeW :: CgRep -> ByteOff +cgRepSizeW DoubleArg = dOUBLE_SIZE `quot` wORD_SIZE +cgRepSizeW LongArg = wORD64_SIZE `quot` wORD_SIZE +cgRepSizeW VoidArg = 0 +cgRepSizeW _ = 1 + +retAddrSizeW :: WordOff +retAddrSizeW = 1 -- One word +\end{code} + +%************************************************************************ +%* * +\subsubsection[SMRep-datatype]{@SMRep@---storage manager representation} +%* * +%************************************************************************ + +\begin{code} +data SMRep + -- static closure have an extra static link field at the end. + = GenericRep -- GC routines consult sizes in info tbl + Bool -- True <=> This is a static closure. Affects how + -- we garbage-collect it + !Int -- # ptr words + !Int -- # non-ptr words + ClosureType -- closure type + + | BlackHoleRep + +data ClosureType -- Corresponds 1-1 with the varieties of closures + -- implemented by the RTS. Compare with ghc/includes/ClosureTypes.h + = Constr + | ConstrNoCaf + | Fun + | Thunk + | ThunkSelector +\end{code} + +Size of a closure header. + +\begin{code} +fixedHdrSize :: WordOff +fixedHdrSize = sTD_HDR_SIZE + profHdrSize + granHdrSize + +profHdrSize :: WordOff +profHdrSize | opt_SccProfilingOn = pROF_HDR_SIZE + | otherwise = 0 + +granHdrSize :: WordOff +granHdrSize | opt_GranMacros = gRAN_HDR_SIZE + | otherwise = 0 + +arrWordsHdrSize :: ByteOff +arrWordsHdrSize = fixedHdrSize*wORD_SIZE + sIZEOF_StgArrWords_NoHdr + +arrPtrsHdrSize :: ByteOff +arrPtrsHdrSize = fixedHdrSize*wORD_SIZE + sIZEOF_StgMutArrPtrs_NoHdr + +-- Thunks have an extra header word on SMP, so the update doesn't +-- splat the payload. +thunkHdrSize :: WordOff +thunkHdrSize = fixedHdrSize + smp_hdr + where smp_hdr = sIZEOF_StgSMPThunkHeader `quot` wORD_SIZE +\end{code} + +\begin{code} +-- IA64 mangler doesn't place tables next to code +tablesNextToCode :: Bool +#if defined(ia64_TARGET_ARCH) || defined(powerpc64_TARGET_ARCH) +tablesNextToCode = False +#else +tablesNextToCode = not opt_Unregisterised +#endif +\end{code} + +\begin{code} +isStaticRep :: SMRep -> Bool +isStaticRep (GenericRep is_static _ _ _) = is_static +isStaticRep BlackHoleRep = False +\end{code} + +\begin{code} +#include "../includes/ClosureTypes.h" +-- Defines CONSTR, CONSTR_1_0 etc + + +smRepClosureType :: SMRep -> ClosureType +smRepClosureType (GenericRep _ _ _ ty) = ty +smRepClosureType BlackHoleRep = panic "smRepClosureType: black hole" + +smRepClosureTypeInt :: SMRep -> Int +smRepClosureTypeInt (GenericRep False 1 0 Constr) = CONSTR_1_0 +smRepClosureTypeInt (GenericRep False 0 1 Constr) = CONSTR_0_1 +smRepClosureTypeInt (GenericRep False 2 0 Constr) = CONSTR_2_0 +smRepClosureTypeInt (GenericRep False 1 1 Constr) = CONSTR_1_1 +smRepClosureTypeInt (GenericRep False 0 2 Constr) = CONSTR_0_2 +smRepClosureTypeInt (GenericRep False _ _ Constr) = CONSTR + +smRepClosureTypeInt (GenericRep False 1 0 Fun) = FUN_1_0 +smRepClosureTypeInt (GenericRep False 0 1 Fun) = FUN_0_1 +smRepClosureTypeInt (GenericRep False 2 0 Fun) = FUN_2_0 +smRepClosureTypeInt (GenericRep False 1 1 Fun) = FUN_1_1 +smRepClosureTypeInt (GenericRep False 0 2 Fun) = FUN_0_2 +smRepClosureTypeInt (GenericRep False _ _ Fun) = FUN + +smRepClosureTypeInt (GenericRep False 1 0 Thunk) = THUNK_1_0 +smRepClosureTypeInt (GenericRep False 0 1 Thunk) = THUNK_0_1 +smRepClosureTypeInt (GenericRep False 2 0 Thunk) = THUNK_2_0 +smRepClosureTypeInt (GenericRep False 1 1 Thunk) = THUNK_1_1 +smRepClosureTypeInt (GenericRep False 0 2 Thunk) = THUNK_0_2 +smRepClosureTypeInt (GenericRep False _ _ Thunk) = THUNK + +smRepClosureTypeInt (GenericRep False _ _ ThunkSelector) = THUNK_SELECTOR + +smRepClosureTypeInt (GenericRep True _ _ Constr) = CONSTR_STATIC +smRepClosureTypeInt (GenericRep True _ _ ConstrNoCaf) = CONSTR_NOCAF_STATIC +smRepClosureTypeInt (GenericRep True _ _ Fun) = FUN_STATIC +smRepClosureTypeInt (GenericRep True _ _ Thunk) = THUNK_STATIC + +smRepClosureTypeInt BlackHoleRep = BLACKHOLE + +smRepClosureTypeInt rep = panic "smRepClosuretypeint" + + +-- We export these ones +rET_SMALL = (RET_SMALL :: Int) +rET_VEC_SMALL = (RET_VEC_SMALL :: Int) +rET_BIG = (RET_BIG :: Int) +rET_VEC_BIG = (RET_VEC_BIG :: Int) +\end{code} + |