diff options
author | Simon Marlow <simonmar@microsoft.com> | 2006-04-07 02:05:11 +0000 |
---|---|---|
committer | Simon Marlow <simonmar@microsoft.com> | 2006-04-07 02:05:11 +0000 |
commit | 0065d5ab628975892cea1ec7303f968c3338cbe1 (patch) | |
tree | 8e2afe0ab48ee33cf95009809d67c9649573ef92 /compiler/codeGen | |
parent | 28a464a75e14cece5db40f2765a29348273ff2d2 (diff) | |
download | haskell-0065d5ab628975892cea1ec7303f968c3338cbe1.tar.gz |
Reorganisation of the source tree
Most of the other users of the fptools build system have migrated to
Cabal, and with the move to darcs we can now flatten the source tree
without losing history, so here goes.
The main change is that the ghc/ subdir is gone, and most of what it
contained is now at the top level. The build system now makes no
pretense at being multi-project, it is just the GHC build system.
No doubt this will break many things, and there will be a period of
instability while we fix the dependencies. A straightforward build
should work, but I haven't yet fixed binary/source distributions.
Changes to the Building Guide will follow, too.
Diffstat (limited to 'compiler/codeGen')
33 files changed, 10447 insertions, 0 deletions
diff --git a/compiler/codeGen/Bitmap.hs b/compiler/codeGen/Bitmap.hs new file mode 100644 index 0000000000..c0b490978c --- /dev/null +++ b/compiler/codeGen/Bitmap.hs @@ -0,0 +1,79 @@ +-- +-- (c) The University of Glasgow 2003 +-- + +-- Functions for constructing bitmaps, which are used in various +-- places in generated code (stack frame liveness masks, function +-- argument liveness masks, SRT bitmaps). + +module Bitmap ( + Bitmap, mkBitmap, + intsToBitmap, intsToReverseBitmap, + mAX_SMALL_BITMAP_SIZE + ) where + +#include "HsVersions.h" +#include "../includes/MachDeps.h" + +import SMRep +import Constants +import DATA_BITS + +{-| +A bitmap represented by a sequence of 'StgWord's on the /target/ +architecture. These are used for bitmaps in info tables and other +generated code which need to be emitted as sequences of StgWords. +-} +type Bitmap = [StgWord] + +-- | Make a bitmap from a sequence of bits +mkBitmap :: [Bool] -> Bitmap +mkBitmap [] = [] +mkBitmap stuff = chunkToBitmap chunk : mkBitmap rest + where (chunk, rest) = splitAt wORD_SIZE_IN_BITS stuff + +chunkToBitmap :: [Bool] -> StgWord +chunkToBitmap chunk = + foldr (.|.) 0 [ 1 `shiftL` n | (True,n) <- zip chunk [0..] ] + +-- | Make a bitmap where the slots specified are the /ones/ in the bitmap. +-- eg. @[1,2,4], size 4 ==> 0xb@. +-- +-- The list of @Int@s /must/ be already sorted. +intsToBitmap :: Int -> [Int] -> Bitmap +intsToBitmap size slots{- must be sorted -} + | size <= 0 = [] + | otherwise = + (foldr (.|.) 0 (map (1 `shiftL`) these)) : + intsToBitmap (size - wORD_SIZE_IN_BITS) + (map (\x -> x - wORD_SIZE_IN_BITS) rest) + where (these,rest) = span (<wORD_SIZE_IN_BITS) slots + +-- | Make a bitmap where the slots specified are the /zeros/ in the bitmap. +-- eg. @[1,2,4], size 4 ==> 0x8@ (we leave any bits outside the size as zero, +-- just to make the bitmap easier to read). +-- +-- The list of @Int@s /must/ be already sorted. +intsToReverseBitmap :: Int -> [Int] -> Bitmap +intsToReverseBitmap size slots{- must be sorted -} + | size <= 0 = [] + | otherwise = + (foldr xor init (map (1 `shiftL`) these)) : + intsToReverseBitmap (size - wORD_SIZE_IN_BITS) + (map (\x -> x - wORD_SIZE_IN_BITS) rest) + where (these,rest) = span (<wORD_SIZE_IN_BITS) slots + init + | size >= wORD_SIZE_IN_BITS = complement 0 + | otherwise = (1 `shiftL` size) - 1 + +{- | +Magic number, must agree with @BITMAP_BITS_SHIFT@ in InfoTables.h. +Some kinds of bitmap pack a size\/bitmap into a single word if +possible, or fall back to an external pointer when the bitmap is too +large. This value represents the largest size of bitmap that can be +packed into a single word. +-} +mAX_SMALL_BITMAP_SIZE :: Int +mAX_SMALL_BITMAP_SIZE | wORD_SIZE == 4 = 27 + | otherwise = 58 + diff --git a/compiler/codeGen/CgBindery.hi-boot-5 b/compiler/codeGen/CgBindery.hi-boot-5 new file mode 100644 index 0000000000..f375fcc6e1 --- /dev/null +++ b/compiler/codeGen/CgBindery.hi-boot-5 @@ -0,0 +1,7 @@ +__interface CgBindery 1 0 where +__export CgBindery CgBindings CgIdInfo VolatileLoc StableLoc nukeVolatileBinds; +1 type CgBindings = VarEnv.IdEnv CgIdInfo; +1 data CgIdInfo; +1 data VolatileLoc; +1 data StableLoc; +1 nukeVolatileBinds :: CgBindings -> CgBindings ; diff --git a/compiler/codeGen/CgBindery.hi-boot-6 b/compiler/codeGen/CgBindery.hi-boot-6 new file mode 100644 index 0000000000..7d1f300a86 --- /dev/null +++ b/compiler/codeGen/CgBindery.hi-boot-6 @@ -0,0 +1,8 @@ +module CgBindery where + +type CgBindings = VarEnv.IdEnv CgIdInfo +data CgIdInfo +data VolatileLoc +data StableLoc + +nukeVolatileBinds :: CgBindings -> CgBindings diff --git a/compiler/codeGen/CgBindery.lhs b/compiler/codeGen/CgBindery.lhs new file mode 100644 index 0000000000..f78edda655 --- /dev/null +++ b/compiler/codeGen/CgBindery.lhs @@ -0,0 +1,494 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +\section[CgBindery]{Utility functions related to doing @CgBindings@} + +\begin{code} +module CgBindery ( + CgBindings, CgIdInfo, + StableLoc, VolatileLoc, + + cgIdInfoId, cgIdInfoArgRep, cgIdInfoLF, + + stableIdInfo, heapIdInfo, + letNoEscapeIdInfo, idInfoToAmode, + + addBindC, addBindsC, + + nukeVolatileBinds, + nukeDeadBindings, + getLiveStackSlots, + + bindArgsToStack, rebindToStack, + bindNewToNode, bindNewToReg, bindArgsToRegs, + bindNewToTemp, + getArgAmode, getArgAmodes, + getCgIdInfo, + getCAddrModeIfVolatile, getVolatileRegs, + maybeLetNoEscape, + ) where + +#include "HsVersions.h" + +import CgMonad +import CgHeapery ( getHpRelOffset ) +import CgStackery ( freeStackSlots, getSpRelOffset ) +import CgUtils ( cgLit, cmmOffsetW ) +import CLabel ( mkClosureLabel, pprCLabel ) +import ClosureInfo ( mkLFImported, mkLFArgument, LambdaFormInfo ) + +import Cmm +import PprCmm ( {- instance Outputable -} ) +import SMRep ( CgRep(..), WordOff, isFollowableArg, + isVoidArg, cgRepSizeW, argMachRep, + idCgRep, typeCgRep ) +import Id ( Id, idName ) +import VarEnv +import VarSet ( varSetElems ) +import Literal ( literalType ) +import Maybes ( catMaybes ) +import Name ( isExternalName ) +import StgSyn ( StgArg, StgLiveVars, GenStgArg(..), isStgTypeArg ) +import Unique ( Uniquable(..) ) +import UniqSet ( elementOfUniqSet ) +import Outputable +\end{code} + + +%************************************************************************ +%* * +\subsection[Bindery-datatypes]{Data types} +%* * +%************************************************************************ + +@(CgBinding a b)@ is a type of finite maps from a to b. + +The assumption used to be that @lookupCgBind@ must get exactly one +match. This is {\em completely wrong} in the case of compiling +letrecs (where knot-tying is used). An initial binding is fed in (and +never evaluated); eventually, a correct binding is put into the +environment. So there can be two bindings for a given name. + +\begin{code} +type CgBindings = IdEnv CgIdInfo + +data CgIdInfo + = CgIdInfo + { cg_id :: Id -- Id that this is the info for + -- Can differ from the Id at occurrence sites by + -- virtue of being externalised, for splittable C + , cg_rep :: CgRep + , cg_vol :: VolatileLoc + , cg_stb :: StableLoc + , cg_lf :: LambdaFormInfo } + +mkCgIdInfo id vol stb lf + = CgIdInfo { cg_id = id, cg_vol = vol, cg_stb = stb, + cg_lf = lf, cg_rep = idCgRep id } + +voidIdInfo id = CgIdInfo { cg_id = id, cg_vol = NoVolatileLoc + , cg_stb = VoidLoc, cg_lf = mkLFArgument id + , cg_rep = VoidArg } + -- Used just for VoidRep things + +data VolatileLoc -- These locations die across a call + = NoVolatileLoc + | RegLoc CmmReg -- In one of the registers (global or local) + | VirHpLoc VirtualHpOffset -- Hp+offset (address of closure) + | VirNodeLoc VirtualHpOffset -- Cts of offset indirect from Node + -- ie *(Node+offset) +\end{code} + +@StableLoc@ encodes where an Id can be found, used by +the @CgBindings@ environment in @CgBindery@. + +\begin{code} +data StableLoc + = NoStableLoc + + | VirStkLoc VirtualSpOffset -- The thing is held in this + -- stack slot + + | VirStkLNE VirtualSpOffset -- A let-no-escape thing; the + -- value is this stack pointer + -- (as opposed to the contents of the slot) + + | StableLoc CmmExpr + | VoidLoc -- Used only for VoidRep variables. They never need to + -- be saved, so it makes sense to treat treat them as + -- having a stable location +\end{code} + +\begin{code} +instance Outputable CgIdInfo where + ppr (CgIdInfo id rep vol stb lf) + = ppr id <+> ptext SLIT("-->") <+> vcat [ppr vol, ppr stb] + +instance Outputable VolatileLoc where + ppr NoVolatileLoc = empty + ppr (RegLoc r) = ptext SLIT("reg") <+> ppr r + ppr (VirHpLoc v) = ptext SLIT("vh") <+> ppr v + ppr (VirNodeLoc v) = ptext SLIT("vn") <+> ppr v + +instance Outputable StableLoc where + ppr NoStableLoc = empty + ppr VoidLoc = ptext SLIT("void") + ppr (VirStkLoc v) = ptext SLIT("vs") <+> ppr v + ppr (VirStkLNE v) = ptext SLIT("lne") <+> ppr v + ppr (StableLoc a) = ptext SLIT("amode") <+> ppr a +\end{code} + +%************************************************************************ +%* * +\subsection[Bindery-idInfo]{Manipulating IdInfo} +%* * +%************************************************************************ + +\begin{code} +stableIdInfo id amode lf_info = mkCgIdInfo id NoVolatileLoc (StableLoc amode) lf_info +heapIdInfo id offset lf_info = mkCgIdInfo id (VirHpLoc offset) NoStableLoc lf_info +letNoEscapeIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLNE sp) lf_info +stackIdInfo id sp lf_info = mkCgIdInfo id NoVolatileLoc (VirStkLoc sp) lf_info +nodeIdInfo id offset lf_info = mkCgIdInfo id (VirNodeLoc offset) NoStableLoc lf_info +regIdInfo id reg lf_info = mkCgIdInfo id (RegLoc reg) NoStableLoc lf_info + +idInfoToAmode :: CgIdInfo -> FCode CmmExpr +idInfoToAmode info + = case cg_vol info of { + RegLoc reg -> returnFC (CmmReg reg) ; + VirNodeLoc nd_off -> returnFC (CmmLoad (cmmOffsetW (CmmReg nodeReg) nd_off) mach_rep) ; + VirHpLoc hp_off -> getHpRelOffset hp_off ; + NoVolatileLoc -> + + case cg_stb info of + StableLoc amode -> returnFC amode + VirStkLoc sp_off -> do { sp_rel <- getSpRelOffset sp_off + ; return (CmmLoad sp_rel mach_rep) } + + VirStkLNE sp_off -> getSpRelOffset sp_off + + VoidLoc -> return $ pprPanic "idInfoToAmode: void" (ppr (cg_id info)) + -- We return a 'bottom' amode, rather than panicing now + -- In this way getArgAmode returns a pair of (VoidArg, bottom) + -- and that's exactly what we want + + NoStableLoc -> pprPanic "idInfoToAmode: no loc" (ppr (cg_id info)) + } + where + mach_rep = argMachRep (cg_rep info) + +cgIdInfoId :: CgIdInfo -> Id +cgIdInfoId = cg_id + +cgIdInfoLF :: CgIdInfo -> LambdaFormInfo +cgIdInfoLF = cg_lf + +cgIdInfoArgRep :: CgIdInfo -> CgRep +cgIdInfoArgRep = cg_rep + +maybeLetNoEscape (CgIdInfo { cg_stb = VirStkLNE sp_off }) = Just sp_off +maybeLetNoEscape other = Nothing +\end{code} + +%************************************************************************ +%* * +\subsection[CgMonad-bindery]{Monad things for fiddling with @CgBindings@} +%* * +%************************************************************************ + +.There are three basic routines, for adding (@addBindC@), modifying +(@modifyBindC@) and looking up (@getCgIdInfo@) bindings. + +A @Id@ is bound to a @(VolatileLoc, StableLoc)@ triple. +The name should not already be bound. (nice ASSERT, eh?) + +\begin{code} +addBindC :: Id -> CgIdInfo -> Code +addBindC name stuff_to_bind = do + binds <- getBinds + setBinds $ extendVarEnv binds name stuff_to_bind + +addBindsC :: [(Id, CgIdInfo)] -> Code +addBindsC new_bindings = do + binds <- getBinds + let new_binds = foldl (\ binds (name,info) -> extendVarEnv binds name info) + binds + new_bindings + setBinds new_binds + +modifyBindC :: Id -> (CgIdInfo -> CgIdInfo) -> Code +modifyBindC name mangle_fn = do + binds <- getBinds + setBinds $ modifyVarEnv mangle_fn binds name + +getCgIdInfo :: Id -> FCode CgIdInfo +getCgIdInfo id + = do { -- Try local bindings first + ; local_binds <- getBinds + ; case lookupVarEnv local_binds id of { + Just info -> return info ; + Nothing -> do + + { -- Try top-level bindings + static_binds <- getStaticBinds + ; case lookupVarEnv static_binds id of { + Just info -> return info ; + Nothing -> + + -- Should be imported; make up a CgIdInfo for it + let + name = idName id + in + if isExternalName name then do + hmods <- getHomeModules + let ext_lbl = CmmLit (CmmLabel (mkClosureLabel hmods name)) + return (stableIdInfo id ext_lbl (mkLFImported id)) + else + if isVoidArg (idCgRep id) then + -- Void things are never in the environment + return (voidIdInfo id) + else + -- Bug + cgLookupPanic id + }}}} + + +cgLookupPanic :: Id -> FCode a +cgLookupPanic id + = do static_binds <- getStaticBinds + local_binds <- getBinds + srt <- getSRTLabel + pprPanic "cgPanic" + (vcat [ppr id, + ptext SLIT("static binds for:"), + vcat [ ppr (cg_id info) | info <- varEnvElts static_binds ], + ptext SLIT("local binds for:"), + vcat [ ppr (cg_id info) | info <- varEnvElts local_binds ], + ptext SLIT("SRT label") <+> pprCLabel srt + ]) +\end{code} + +%************************************************************************ +%* * +\subsection[Bindery-nuke-volatile]{Nuking volatile bindings} +%* * +%************************************************************************ + +We sometimes want to nuke all the volatile bindings; we must be sure +we don't leave any (NoVolatile, NoStable) binds around... + +\begin{code} +nukeVolatileBinds :: CgBindings -> CgBindings +nukeVolatileBinds binds + = mkVarEnv (foldr keep_if_stable [] (varEnvElts binds)) + where + keep_if_stable (CgIdInfo { cg_stb = NoStableLoc }) acc = acc + keep_if_stable info acc + = (cg_id info, info { cg_vol = NoVolatileLoc }) : acc +\end{code} + + +%************************************************************************ +%* * +\subsection[lookup-interface]{Interface functions to looking up bindings} +%* * +%************************************************************************ + +\begin{code} +getCAddrModeIfVolatile :: Id -> FCode (Maybe CmmExpr) +getCAddrModeIfVolatile id + = do { info <- getCgIdInfo id + ; case cg_stb info of + NoStableLoc -> do -- Aha! So it is volatile! + amode <- idInfoToAmode info + return $ Just amode + a_stable_loc -> return Nothing } +\end{code} + +@getVolatileRegs@ gets a set of live variables, and returns a list of +all registers on which these variables depend. These are the regs +which must be saved and restored across any C calls. If a variable is +both in a volatile location (depending on a register) {\em and} a +stable one (notably, on the stack), we modify the current bindings to +forget the volatile one. + +\begin{code} +getVolatileRegs :: StgLiveVars -> FCode [GlobalReg] + +getVolatileRegs vars = do + do { stuff <- mapFCs snaffle_it (varSetElems vars) + ; returnFC $ catMaybes stuff } + where + snaffle_it var = do + { info <- getCgIdInfo var + ; let + -- commoned-up code... + consider_reg reg + = -- We assume that all regs can die across C calls + -- We leave it to the save-macros to decide which + -- regs *really* need to be saved. + case cg_stb info of + NoStableLoc -> returnFC (Just reg) -- got one! + is_a_stable_loc -> do + { -- has both volatile & stable locations; + -- force it to rely on the stable location + modifyBindC var nuke_vol_bind + ; return Nothing } + + ; case cg_vol info of + RegLoc (CmmGlobal reg) -> consider_reg reg + VirNodeLoc _ -> consider_reg node + other_loc -> returnFC Nothing -- Local registers + } + + nuke_vol_bind info = info { cg_vol = NoVolatileLoc } +\end{code} + +\begin{code} +getArgAmode :: StgArg -> FCode (CgRep, CmmExpr) +getArgAmode (StgVarArg var) + = do { info <- getCgIdInfo var + ; amode <- idInfoToAmode info + ; return (cgIdInfoArgRep info, amode ) } + +getArgAmode (StgLitArg lit) + = do { cmm_lit <- cgLit lit + ; return (typeCgRep (literalType lit), CmmLit cmm_lit) } + +getArgAmode (StgTypeArg _) = panic "getArgAmode: type arg" + +getArgAmodes :: [StgArg] -> FCode [(CgRep, CmmExpr)] +getArgAmodes [] = returnFC [] +getArgAmodes (atom:atoms) + | isStgTypeArg atom = getArgAmodes atoms + | otherwise = do { amode <- getArgAmode atom + ; amodes <- getArgAmodes atoms + ; return ( amode : amodes ) } +\end{code} + +%************************************************************************ +%* * +\subsection[binding-and-rebinding-interface]{Interface functions for binding and re-binding names} +%* * +%************************************************************************ + +\begin{code} +bindArgsToStack :: [(Id, VirtualSpOffset)] -> Code +bindArgsToStack args + = mapCs bind args + where + bind(id, offset) = addBindC id (stackIdInfo id offset (mkLFArgument id)) + +bindArgsToRegs :: [(Id, GlobalReg)] -> Code +bindArgsToRegs args + = mapCs bind args + where + bind (arg, reg) = bindNewToReg arg (CmmGlobal reg) (mkLFArgument arg) + +bindNewToNode :: Id -> VirtualHpOffset -> LambdaFormInfo -> Code +bindNewToNode id offset lf_info + = addBindC id (nodeIdInfo id offset lf_info) + +-- Create a new temporary whose unique is that in the id, +-- bind the id to it, and return the addressing mode for the +-- temporary. +bindNewToTemp :: Id -> FCode CmmReg +bindNewToTemp id + = do addBindC id (regIdInfo id temp_reg lf_info) + return temp_reg + where + uniq = getUnique id + temp_reg = CmmLocal (LocalReg uniq (argMachRep (idCgRep id))) + lf_info = mkLFArgument id -- Always used of things we + -- know nothing about + +bindNewToReg :: Id -> CmmReg -> LambdaFormInfo -> Code +bindNewToReg name reg lf_info + = addBindC name info + where + info = mkCgIdInfo name (RegLoc reg) NoStableLoc lf_info +\end{code} + +\begin{code} +rebindToStack :: Id -> VirtualSpOffset -> Code +rebindToStack name offset + = modifyBindC name replace_stable_fn + where + replace_stable_fn info = info { cg_stb = VirStkLoc offset } +\end{code} + +%************************************************************************ +%* * +\subsection[CgMonad-deadslots]{Finding dead stack slots} +%* * +%************************************************************************ + +nukeDeadBindings does the following: + + - Removes all bindings from the environment other than those + for variables in the argument to nukeDeadBindings. + - Collects any stack slots so freed, and returns them to the stack free + list. + - Moves the virtual stack pointer to point to the topmost used + stack locations. + +You can have multi-word slots on the stack (where a Double# used to +be, for instance); if dead, such a slot will be reported as *several* +offsets (one per word). + +Probably *naughty* to look inside monad... + +\begin{code} +nukeDeadBindings :: StgLiveVars -- All the *live* variables + -> Code +nukeDeadBindings live_vars = do + binds <- getBinds + let (dead_stk_slots, bs') = + dead_slots live_vars + [] [] + [ (cg_id b, b) | b <- varEnvElts binds ] + setBinds $ mkVarEnv bs' + freeStackSlots dead_stk_slots +\end{code} + +Several boring auxiliary functions to do the dirty work. + +\begin{code} +dead_slots :: StgLiveVars + -> [(Id,CgIdInfo)] + -> [VirtualSpOffset] + -> [(Id,CgIdInfo)] + -> ([VirtualSpOffset], [(Id,CgIdInfo)]) + +-- dead_slots carries accumulating parameters for +-- filtered bindings, dead slots +dead_slots live_vars fbs ds [] + = (ds, reverse fbs) -- Finished; rm the dups, if any + +dead_slots live_vars fbs ds ((v,i):bs) + | v `elementOfUniqSet` live_vars + = dead_slots live_vars ((v,i):fbs) ds bs + -- Live, so don't record it in dead slots + -- Instead keep it in the filtered bindings + + | otherwise + = case cg_stb i of + VirStkLoc offset + | size > 0 + -> dead_slots live_vars fbs ([offset-size+1 .. offset] ++ ds) bs + + _ -> dead_slots live_vars fbs ds bs + where + size :: WordOff + size = cgRepSizeW (cg_rep i) +\end{code} + +\begin{code} +getLiveStackSlots :: FCode [VirtualSpOffset] +-- Return the offsets of slots in stack containig live pointers +getLiveStackSlots + = do { binds <- getBinds + ; return [off | CgIdInfo { cg_stb = VirStkLoc off, + cg_rep = rep } <- varEnvElts binds, + isFollowableArg rep] } +\end{code} diff --git a/compiler/codeGen/CgBindery.lhs-boot b/compiler/codeGen/CgBindery.lhs-boot new file mode 100644 index 0000000000..e504a6a9ba --- /dev/null +++ b/compiler/codeGen/CgBindery.lhs-boot @@ -0,0 +1,11 @@ +\begin{code} +module CgBindery where +import VarEnv( IdEnv ) + +data CgIdInfo +data VolatileLoc +data StableLoc +type CgBindings = IdEnv CgIdInfo + +nukeVolatileBinds :: CgBindings -> CgBindings +\end{code}
\ No newline at end of file diff --git a/compiler/codeGen/CgCallConv.hs b/compiler/codeGen/CgCallConv.hs new file mode 100644 index 0000000000..f463255807 --- /dev/null +++ b/compiler/codeGen/CgCallConv.hs @@ -0,0 +1,512 @@ +----------------------------------------------------------------------------- +-- +-- CgCallConv +-- +-- The datatypes and functions here encapsulate the +-- calling and return conventions used by the code generator. +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + + +module CgCallConv ( + -- Argument descriptors + mkArgDescr, argDescrType, + + -- Liveness + isBigLiveness, buildContLiveness, mkRegLiveness, + smallLiveness, mkLivenessCLit, + + -- Register assignment + assignCallRegs, assignReturnRegs, assignPrimOpCallRegs, + + -- Calls + constructSlowCall, slowArgs, slowCallPattern, + + -- Returns + CtrlReturnConvention(..), + ctrlReturnConvAlg, + dataReturnConvPrim, + getSequelAmode + ) where + +#include "HsVersions.h" + +import CgUtils ( emitRODataLits, mkWordCLit ) +import CgMonad + +import Constants ( mAX_FAMILY_SIZE_FOR_VEC_RETURNS, + mAX_Vanilla_REG, mAX_Float_REG, + mAX_Double_REG, mAX_Long_REG, + mAX_Real_Vanilla_REG, mAX_Real_Float_REG, + mAX_Real_Double_REG, mAX_Real_Long_REG, + bITMAP_BITS_SHIFT + ) + +import ClosureInfo ( ArgDescr(..), Liveness(..) ) +import CgStackery ( getSpRelOffset ) +import SMRep +import MachOp ( wordRep ) +import Cmm ( CmmExpr(..), GlobalReg(..), CmmLit(..), CmmReg(..), node ) +import CmmUtils ( mkLblExpr ) +import CLabel +import Maybes ( mapCatMaybes ) +import Id ( Id ) +import Name ( Name ) +import TyCon ( TyCon, tyConFamilySize ) +import Bitmap ( Bitmap, mAX_SMALL_BITMAP_SIZE, + mkBitmap, intsToReverseBitmap ) +import Util ( isn'tIn, sortLe ) +import StaticFlags ( opt_Unregisterised ) +import FastString ( LitString ) +import Outputable +import DATA_BITS + + +------------------------------------------------------------------------- +-- +-- Making argument descriptors +-- +-- An argument descriptor describes the layout of args on the stack, +-- both for * GC (stack-layout) purposes, and +-- * saving/restoring registers when a heap-check fails +-- +-- Void arguments aren't important, therefore (contrast constructSlowCall) +-- +------------------------------------------------------------------------- + +-- bring in ARG_P, ARG_N, etc. +#include "../includes/StgFun.h" + +------------------------- +argDescrType :: ArgDescr -> Int +-- The "argument type" RTS field type +argDescrType (ArgSpec n) = n +argDescrType (ArgGen liveness) + | isBigLiveness liveness = ARG_GEN_BIG + | otherwise = ARG_GEN + + +mkArgDescr :: Name -> [Id] -> FCode ArgDescr +mkArgDescr nm args + = case stdPattern arg_reps of + Just spec_id -> return (ArgSpec spec_id) + Nothing -> do { liveness <- mkLiveness nm size bitmap + ; return (ArgGen liveness) } + where + arg_reps = filter nonVoidArg (map idCgRep args) + -- Getting rid of voids eases matching of standard patterns + + bitmap = mkBitmap arg_bits + arg_bits = argBits arg_reps + size = length arg_bits + +argBits :: [CgRep] -> [Bool] -- True for non-ptr, False for ptr +argBits [] = [] +argBits (PtrArg : args) = False : argBits args +argBits (arg : args) = take (cgRepSizeW arg) (repeat True) ++ argBits args + +stdPattern :: [CgRep] -> Maybe Int +stdPattern [] = Just ARG_NONE -- just void args, probably + +stdPattern [PtrArg] = Just ARG_P +stdPattern [FloatArg] = Just ARG_F +stdPattern [DoubleArg] = Just ARG_D +stdPattern [LongArg] = Just ARG_L +stdPattern [NonPtrArg] = Just ARG_N + +stdPattern [NonPtrArg,NonPtrArg] = Just ARG_NN +stdPattern [NonPtrArg,PtrArg] = Just ARG_NP +stdPattern [PtrArg,NonPtrArg] = Just ARG_PN +stdPattern [PtrArg,PtrArg] = Just ARG_PP + +stdPattern [NonPtrArg,NonPtrArg,NonPtrArg] = Just ARG_NNN +stdPattern [NonPtrArg,NonPtrArg,PtrArg] = Just ARG_NNP +stdPattern [NonPtrArg,PtrArg,NonPtrArg] = Just ARG_NPN +stdPattern [NonPtrArg,PtrArg,PtrArg] = Just ARG_NPP +stdPattern [PtrArg,NonPtrArg,NonPtrArg] = Just ARG_PNN +stdPattern [PtrArg,NonPtrArg,PtrArg] = Just ARG_PNP +stdPattern [PtrArg,PtrArg,NonPtrArg] = Just ARG_PPN +stdPattern [PtrArg,PtrArg,PtrArg] = Just ARG_PPP + +stdPattern [PtrArg,PtrArg,PtrArg,PtrArg] = Just ARG_PPPP +stdPattern [PtrArg,PtrArg,PtrArg,PtrArg,PtrArg] = Just ARG_PPPPP +stdPattern [PtrArg,PtrArg,PtrArg,PtrArg,PtrArg,PtrArg] = Just ARG_PPPPPP +stdPattern other = Nothing + + +------------------------------------------------------------------------- +-- +-- Liveness info +-- +------------------------------------------------------------------------- + +mkLiveness :: Name -> Int -> Bitmap -> FCode Liveness +mkLiveness name size bits + | size > mAX_SMALL_BITMAP_SIZE -- Bitmap does not fit in one word + = do { let lbl = mkBitmapLabel name + ; emitRODataLits lbl ( mkWordCLit (fromIntegral size) + : map mkWordCLit bits) + ; return (BigLiveness lbl) } + + | otherwise -- Bitmap fits in one word + = let + small_bits = case bits of + [] -> 0 + [b] -> fromIntegral b + _ -> panic "livenessToAddrMode" + in + return (smallLiveness size small_bits) + +smallLiveness :: Int -> StgWord -> Liveness +smallLiveness size small_bits = SmallLiveness bits + where bits = fromIntegral size .|. (small_bits `shiftL` bITMAP_BITS_SHIFT) + +------------------- +isBigLiveness :: Liveness -> Bool +isBigLiveness (BigLiveness _) = True +isBigLiveness (SmallLiveness _) = False + +------------------- +mkLivenessCLit :: Liveness -> CmmLit +mkLivenessCLit (BigLiveness lbl) = CmmLabel lbl +mkLivenessCLit (SmallLiveness bits) = mkWordCLit bits + + +------------------------------------------------------------------------- +-- +-- Bitmap describing register liveness +-- across GC when doing a "generic" heap check +-- (a RET_DYN stack frame). +-- +-- NB. Must agree with these macros (currently in StgMacros.h): +-- GET_NON_PTRS(), GET_PTRS(), GET_LIVENESS(). +------------------------------------------------------------------------- + +mkRegLiveness :: [(Id, GlobalReg)] -> Int -> Int -> StgWord +mkRegLiveness regs ptrs nptrs + = (fromIntegral nptrs `shiftL` 16) .|. + (fromIntegral ptrs `shiftL` 24) .|. + all_non_ptrs `xor` reg_bits regs + where + all_non_ptrs = 0xff + + reg_bits [] = 0 + reg_bits ((id, VanillaReg i) : regs) | isFollowableArg (idCgRep id) + = (1 `shiftL` (i - 1)) .|. reg_bits regs + reg_bits (_ : regs) + = reg_bits regs + +------------------------------------------------------------------------- +-- +-- Pushing the arguments for a slow call +-- +------------------------------------------------------------------------- + +-- For a slow call, we must take a bunch of arguments and intersperse +-- some stg_ap_<pattern>_ret_info return addresses. +constructSlowCall + :: [(CgRep,CmmExpr)] + -> (CLabel, -- RTS entry point for call + [(CgRep,CmmExpr)], -- args to pass to the entry point + [(CgRep,CmmExpr)]) -- stuff to save on the stack + + -- don't forget the zero case +constructSlowCall [] + = (mkRtsApFastLabel SLIT("stg_ap_0"), [], []) + +constructSlowCall amodes + = (stg_ap_pat, these, rest) + where + stg_ap_pat = mkRtsApFastLabel arg_pat + (arg_pat, these, rest) = matchSlowPattern amodes + +enterRtsRetLabel arg_pat + | tablesNextToCode = mkRtsRetInfoLabel arg_pat + | otherwise = mkRtsRetLabel arg_pat + +-- | 'slowArgs' takes a list of function arguments and prepares them for +-- pushing on the stack for "extra" arguments to a function which requires +-- fewer arguments than we currently have. +slowArgs :: [(CgRep,CmmExpr)] -> [(CgRep,CmmExpr)] +slowArgs [] = [] +slowArgs amodes = (NonPtrArg, mkLblExpr stg_ap_pat) : args ++ slowArgs rest + where (arg_pat, args, rest) = matchSlowPattern amodes + stg_ap_pat = mkRtsRetInfoLabel arg_pat + +matchSlowPattern :: [(CgRep,CmmExpr)] + -> (LitString, [(CgRep,CmmExpr)], [(CgRep,CmmExpr)]) +matchSlowPattern amodes = (arg_pat, these, rest) + where (arg_pat, n) = slowCallPattern (map fst amodes) + (these, rest) = splitAt n amodes + +-- These cases were found to cover about 99% of all slow calls: +slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_pppppp"), 6) +slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_ppppp"), 5) +slowCallPattern (PtrArg: PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_pppp"), 4) +slowCallPattern (PtrArg: PtrArg: PtrArg: VoidArg: _) = (SLIT("stg_ap_pppv"), 4) +slowCallPattern (PtrArg: PtrArg: PtrArg: _) = (SLIT("stg_ap_ppp"), 3) +slowCallPattern (PtrArg: PtrArg: VoidArg: _) = (SLIT("stg_ap_ppv"), 3) +slowCallPattern (PtrArg: PtrArg: _) = (SLIT("stg_ap_pp"), 2) +slowCallPattern (PtrArg: VoidArg: _) = (SLIT("stg_ap_pv"), 2) +slowCallPattern (PtrArg: _) = (SLIT("stg_ap_p"), 1) +slowCallPattern (VoidArg: _) = (SLIT("stg_ap_v"), 1) +slowCallPattern (NonPtrArg: _) = (SLIT("stg_ap_n"), 1) +slowCallPattern (FloatArg: _) = (SLIT("stg_ap_f"), 1) +slowCallPattern (DoubleArg: _) = (SLIT("stg_ap_d"), 1) +slowCallPattern (LongArg: _) = (SLIT("stg_ap_l"), 1) +slowCallPattern _ = panic "CgStackery.slowCallPattern" + +------------------------------------------------------------------------- +-- +-- Return conventions +-- +------------------------------------------------------------------------- + +-- A @CtrlReturnConvention@ says how {\em control} is returned. + +data CtrlReturnConvention + = VectoredReturn Int -- size of the vector table (family size) + | UnvectoredReturn Int -- family size + +ctrlReturnConvAlg :: TyCon -> CtrlReturnConvention +ctrlReturnConvAlg tycon + = case (tyConFamilySize tycon) of + size -> -- we're supposed to know... + if (size > (1::Int) && size <= mAX_FAMILY_SIZE_FOR_VEC_RETURNS) then + VectoredReturn size + else + UnvectoredReturn size + -- NB: unvectored returns Include size 0 (no constructors), so that + -- the following perverse code compiles (it crashed GHC in 5.02) + -- data T1 + -- data T2 = T2 !T1 Int + -- The only value of type T1 is bottom, which never returns anyway. + +dataReturnConvPrim :: CgRep -> CmmReg +dataReturnConvPrim PtrArg = CmmGlobal (VanillaReg 1) +dataReturnConvPrim NonPtrArg = CmmGlobal (VanillaReg 1) +dataReturnConvPrim LongArg = CmmGlobal (LongReg 1) +dataReturnConvPrim FloatArg = CmmGlobal (FloatReg 1) +dataReturnConvPrim DoubleArg = CmmGlobal (DoubleReg 1) +dataReturnConvPrim VoidArg = panic "dataReturnConvPrim: void" + + +-- getSequelAmode returns an amode which refers to an info table. The info +-- table will always be of the RET(_VEC)?_(BIG|SMALL) kind. We're careful +-- not to handle real code pointers, just in case we're compiling for +-- an unregisterised/untailcallish architecture, where info pointers and +-- code pointers aren't the same. +-- DIRE WARNING. +-- The OnStack case of sequelToAmode delivers an Amode which is only +-- valid just before the final control transfer, because it assumes +-- that Sp is pointing to the top word of the return address. This +-- seems unclean but there you go. + +getSequelAmode :: FCode CmmExpr +getSequelAmode + = do { EndOfBlockInfo virt_sp sequel <- getEndOfBlockInfo + ; case sequel of + OnStack -> do { sp_rel <- getSpRelOffset virt_sp + ; returnFC (CmmLoad sp_rel wordRep) } + + UpdateCode -> returnFC (CmmLit (CmmLabel mkUpdInfoLabel)) + CaseAlts lbl _ _ True -> returnFC (CmmLit (CmmLabel mkSeqInfoLabel)) + CaseAlts lbl _ _ False -> returnFC (CmmLit (CmmLabel lbl)) + } + +------------------------------------------------------------------------- +-- +-- Build a liveness mask for the current stack +-- +------------------------------------------------------------------------- + +-- There are four kinds of things on the stack: +-- +-- - pointer variables (bound in the environment) +-- - non-pointer variables (boudn in the environment) +-- - free slots (recorded in the stack free list) +-- - non-pointer data slots (recorded in the stack free list) +-- +-- We build up a bitmap of non-pointer slots by searching the environment +-- for all the pointer variables, and subtracting these from a bitmap +-- with initially all bits set (up to the size of the stack frame). + +buildContLiveness :: Name -- Basis for label (only) + -> [VirtualSpOffset] -- Live stack slots + -> FCode Liveness +buildContLiveness name live_slots + = do { stk_usg <- getStkUsage + ; let StackUsage { realSp = real_sp, + frameSp = frame_sp } = stk_usg + + start_sp :: VirtualSpOffset + start_sp = real_sp - retAddrSizeW + -- In a continuation, we want a liveness mask that + -- starts from just after the return address, which is + -- on the stack at real_sp. + + frame_size :: WordOff + frame_size = start_sp - frame_sp + -- real_sp points to the frame-header for the current + -- stack frame, and the end of this frame is frame_sp. + -- The size is therefore real_sp - frame_sp - retAddrSizeW + -- (subtract one for the frame-header = return address). + + rel_slots :: [WordOff] + rel_slots = sortLe (<=) + [ start_sp - ofs -- Get slots relative to top of frame + | ofs <- live_slots ] + + bitmap = intsToReverseBitmap frame_size rel_slots + + ; WARN( not (all (>=0) rel_slots), + ppr name $$ ppr live_slots $$ ppr frame_size $$ ppr start_sp $$ ppr rel_slots ) + mkLiveness name frame_size bitmap } + + +------------------------------------------------------------------------- +-- +-- Register assignment +-- +------------------------------------------------------------------------- + +-- How to assign registers for +-- +-- 1) Calling a fast entry point. +-- 2) Returning an unboxed tuple. +-- 3) Invoking an out-of-line PrimOp. +-- +-- Registers are assigned in order. +-- +-- If we run out, we don't attempt to assign any further registers (even +-- though we might have run out of only one kind of register); we just +-- return immediately with the left-overs specified. +-- +-- The alternative version @assignAllRegs@ uses the complete set of +-- registers, including those that aren't mapped to real machine +-- registers. This is used for calling special RTS functions and PrimOps +-- which expect their arguments to always be in the same registers. + +assignCallRegs, assignPrimOpCallRegs, assignReturnRegs + :: [(CgRep,a)] -- Arg or result values to assign + -> ([(a, GlobalReg)], -- Register assignment in same order + -- for *initial segment of* input list + -- (but reversed; doesn't matter) + -- VoidRep args do not appear here + [(CgRep,a)]) -- Leftover arg or result values + +assignCallRegs args + = assign_regs args (mkRegTbl [node]) + -- The entry convention for a function closure + -- never uses Node for argument passing; instead + -- Node points to the function closure itself + +assignPrimOpCallRegs args + = assign_regs args (mkRegTbl_allRegs []) + -- For primops, *all* arguments must be passed in registers + +assignReturnRegs args + = assign_regs args (mkRegTbl []) + -- For returning unboxed tuples etc, + -- we use all regs + +assign_regs :: [(CgRep,a)] -- Arg or result values to assign + -> AvailRegs -- Regs still avail: Vanilla, Float, Double, Longs + -> ([(a, GlobalReg)], [(CgRep, a)]) +assign_regs args supply + = go args [] supply + where + go [] acc supply = (acc, []) -- Return the results reversed (doesn't matter) + go ((VoidArg,_) : args) acc supply -- Skip void arguments; they aren't passed, and + = go args acc supply -- there's nothign to bind them to + go ((rep,arg) : args) acc supply + = case assign_reg rep supply of + Just (reg, supply') -> go args ((arg,reg):acc) supply' + Nothing -> (acc, (rep,arg):args) -- No more regs + +assign_reg :: CgRep -> AvailRegs -> Maybe (GlobalReg, AvailRegs) +assign_reg FloatArg (vs, f:fs, ds, ls) = Just (FloatReg f, (vs, fs, ds, ls)) +assign_reg DoubleArg (vs, fs, d:ds, ls) = Just (DoubleReg d, (vs, fs, ds, ls)) +assign_reg LongArg (vs, fs, ds, l:ls) = Just (LongReg l, (vs, fs, ds, ls)) +assign_reg PtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls)) +assign_reg NonPtrArg (v:vs, fs, ds, ls) = Just (VanillaReg v, (vs, fs, ds, ls)) + -- PtrArg and NonPtrArg both go in a vanilla register +assign_reg other not_enough_regs = Nothing + + +------------------------------------------------------------------------- +-- +-- Register supplies +-- +------------------------------------------------------------------------- + +-- Vanilla registers can contain pointers, Ints, Chars. +-- Floats and doubles have separate register supplies. +-- +-- We take these register supplies from the *real* registers, i.e. those +-- that are guaranteed to map to machine registers. + +useVanillaRegs | opt_Unregisterised = 0 + | otherwise = mAX_Real_Vanilla_REG +useFloatRegs | opt_Unregisterised = 0 + | otherwise = mAX_Real_Float_REG +useDoubleRegs | opt_Unregisterised = 0 + | otherwise = mAX_Real_Double_REG +useLongRegs | opt_Unregisterised = 0 + | otherwise = mAX_Real_Long_REG + +vanillaRegNos, floatRegNos, doubleRegNos, longRegNos :: [Int] +vanillaRegNos = regList useVanillaRegs +floatRegNos = regList useFloatRegs +doubleRegNos = regList useDoubleRegs +longRegNos = regList useLongRegs + +allVanillaRegNos, allFloatRegNos, allDoubleRegNos, allLongRegNos :: [Int] +allVanillaRegNos = regList mAX_Vanilla_REG +allFloatRegNos = regList mAX_Float_REG +allDoubleRegNos = regList mAX_Double_REG +allLongRegNos = regList mAX_Long_REG + +regList 0 = [] +regList n = [1 .. n] + +type AvailRegs = ( [Int] -- available vanilla regs. + , [Int] -- floats + , [Int] -- doubles + , [Int] -- longs (int64 and word64) + ) + +mkRegTbl :: [GlobalReg] -> AvailRegs +mkRegTbl regs_in_use + = mkRegTbl' regs_in_use vanillaRegNos floatRegNos doubleRegNos longRegNos + +mkRegTbl_allRegs :: [GlobalReg] -> AvailRegs +mkRegTbl_allRegs regs_in_use + = mkRegTbl' regs_in_use allVanillaRegNos allFloatRegNos allDoubleRegNos allLongRegNos + +mkRegTbl' regs_in_use vanillas floats doubles longs + = (ok_vanilla, ok_float, ok_double, ok_long) + where + ok_vanilla = mapCatMaybes (select VanillaReg) vanillas + ok_float = mapCatMaybes (select FloatReg) floats + ok_double = mapCatMaybes (select DoubleReg) doubles + ok_long = mapCatMaybes (select LongReg) longs + -- rep isn't looked at, hence we can use any old rep. + + select :: (Int -> GlobalReg) -> Int{-cand-} -> Maybe Int + -- one we've unboxed the Int, we make a GlobalReg + -- and see if it is already in use; if not, return its number. + + select mk_reg_fun cand + = let + reg = mk_reg_fun cand + in + if reg `not_elem` regs_in_use + then Just cand + else Nothing + where + not_elem = isn'tIn "mkRegTbl" + + diff --git a/compiler/codeGen/CgCase.lhs b/compiler/codeGen/CgCase.lhs new file mode 100644 index 0000000000..e7c08940c5 --- /dev/null +++ b/compiler/codeGen/CgCase.lhs @@ -0,0 +1,634 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgCase.lhs,v 1.75 2005/06/21 10:44:41 simonmar Exp $ +% +%******************************************************** +%* * +\section[CgCase]{Converting @StgCase@ expressions} +%* * +%******************************************************** + +\begin{code} +module CgCase ( cgCase, saveVolatileVarsAndRegs, + restoreCurrentCostCentre + ) where + +#include "HsVersions.h" + +import {-# SOURCE #-} CgExpr ( cgExpr ) + +import CgMonad +import StgSyn +import CgBindery ( getArgAmodes, + bindNewToReg, bindNewToTemp, + getCgIdInfo, getArgAmode, + rebindToStack, getCAddrModeIfVolatile, + nukeDeadBindings, idInfoToAmode + ) +import CgCon ( bindConArgs, bindUnboxedTupleComponents ) +import CgHeapery ( altHeapCheck, unbxTupleHeapCheck ) +import CgCallConv ( dataReturnConvPrim, ctrlReturnConvAlg, + CtrlReturnConvention(..) + ) +import CgStackery ( allocPrimStack, allocStackTop, getSpRelOffset, + deAllocStackTop, freeStackSlots + ) +import CgTailCall ( performTailCall ) +import CgPrimOp ( cgPrimOp ) +import CgForeignCall ( cgForeignCall ) +import CgUtils ( newTemp, cgLit, emitLitSwitch, emitSwitch, + tagToClosure ) +import CgProf ( curCCS, curCCSAddr ) +import CgInfoTbls ( emitDirectReturnTarget, emitAlgReturnTarget, + dataConTagZ ) +import SMRep ( CgRep(..), retAddrSizeW, nonVoidArg, isVoidArg, + idCgRep, tyConCgRep, typeHint ) +import CmmUtils ( CmmStmts, noStmts, oneStmt, plusStmts ) +import Cmm +import MachOp ( wordRep ) +import ClosureInfo ( mkLFArgument ) +import StaticFlags ( opt_SccProfilingOn ) +import Id ( Id, idName, isDeadBinder, idType ) +import ForeignCall ( ForeignCall(..), CCallSpec(..), playSafe ) +import VarSet ( varSetElems ) +import CoreSyn ( AltCon(..) ) +import PrimOp ( PrimOp(..), primOpOutOfLine ) +import TyCon ( isEnumerationTyCon, tyConFamilySize ) +import Util ( isSingleton ) +import Outputable +\end{code} + +\begin{code} +data GCFlag + = GCMayHappen -- The scrutinee may involve GC, so everything must be + -- tidy before the code for the scrutinee. + + | NoGC -- The scrutinee is a primitive value, or a call to a + -- primitive op which does no GC. Hence the case can + -- be done inline, without tidying up first. +\end{code} + +It is quite interesting to decide whether to put a heap-check +at the start of each alternative. Of course we certainly have +to do so if the case forces an evaluation, or if there is a primitive +op which can trigger GC. + +A more interesting situation is this: + + \begin{verbatim} + !A!; + ...A... + case x# of + 0# -> !B!; ...B... + default -> !C!; ...C... + \end{verbatim} + +where \tr{!x!} indicates a possible heap-check point. The heap checks +in the alternatives {\em can} be omitted, in which case the topmost +heapcheck will take their worst case into account. + +In favour of omitting \tr{!B!}, \tr{!C!}: + + - {\em May} save a heap overflow test, + if ...A... allocates anything. The other advantage + of this is that we can use relative addressing + from a single Hp to get at all the closures so allocated. + + - No need to save volatile vars etc across the case + +Against: + + - May do more allocation than reqd. This sometimes bites us + badly. For example, nfib (ha!) allocates about 30\% more space if the + worst-casing is done, because many many calls to nfib are leaf calls + which don't need to allocate anything. + + This never hurts us if there is only one alternative. + +\begin{code} +cgCase :: StgExpr + -> StgLiveVars + -> StgLiveVars + -> Id + -> SRT + -> AltType + -> [StgAlt] + -> Code +\end{code} + +Special case #1: case of literal. + +\begin{code} +cgCase (StgLit lit) live_in_whole_case live_in_alts bndr srt + alt_type@(PrimAlt tycon) alts + = do { tmp_reg <- bindNewToTemp bndr + ; cm_lit <- cgLit lit + ; stmtC (CmmAssign tmp_reg (CmmLit cm_lit)) + ; cgPrimAlts NoGC alt_type tmp_reg alts } +\end{code} + +Special case #2: scrutinising a primitive-typed variable. No +evaluation required. We don't save volatile variables, nor do we do a +heap-check in the alternatives. Instead, the heap usage of the +alternatives is worst-cased and passed upstream. This can result in +allocating more heap than strictly necessary, but it will sometimes +eliminate a heap check altogether. + +\begin{code} +cgCase (StgApp v []) live_in_whole_case live_in_alts bndr srt + alt_type@(PrimAlt tycon) alts + = do { -- Careful! we can't just bind the default binder to the same thing + -- as the scrutinee, since it might be a stack location, and having + -- two bindings pointing at the same stack locn doesn't work (it + -- confuses nukeDeadBindings). Hence, use a new temp. + v_info <- getCgIdInfo v + ; amode <- idInfoToAmode v_info + ; tmp_reg <- bindNewToTemp bndr + ; stmtC (CmmAssign tmp_reg amode) + ; cgPrimAlts NoGC alt_type tmp_reg alts } +\end{code} + +Special case #3: inline PrimOps and foreign calls. + +\begin{code} +cgCase (StgOpApp op@(StgPrimOp primop) args _) + live_in_whole_case live_in_alts bndr srt alt_type alts + | not (primOpOutOfLine primop) + = cgInlinePrimOp primop args bndr alt_type live_in_alts alts +\end{code} + +TODO: Case-of-case of primop can probably be done inline too (but +maybe better to translate it out beforehand). See +ghc/lib/misc/PackedString.lhs for examples where this crops up (with +4.02). + +Special case #4: inline foreign calls: an unsafe foreign call can be done +right here, just like an inline primop. + +\begin{code} +cgCase (StgOpApp op@(StgFCallOp fcall _) args _) + live_in_whole_case live_in_alts bndr srt alt_type alts + | unsafe_foreign_call + = ASSERT( isSingleton alts ) + do -- *must* be an unboxed tuple alt. + -- exactly like the cgInlinePrimOp case for unboxed tuple alts.. + { res_tmps <- mapFCs bindNewToTemp non_void_res_ids + ; let res_hints = map (typeHint.idType) non_void_res_ids + ; cgForeignCall (zip res_tmps res_hints) fcall args live_in_alts + ; cgExpr rhs } + where + (_, res_ids, _, rhs) = head alts + non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids + + unsafe_foreign_call + = case fcall of + CCall (CCallSpec _ _ s) -> not (playSafe s) + _other -> False +\end{code} + +Special case: scrutinising a non-primitive variable. +This can be done a little better than the general case, because +we can reuse/trim the stack slot holding the variable (if it is in one). + +\begin{code} +cgCase (StgApp fun args) + live_in_whole_case live_in_alts bndr srt alt_type alts + = do { fun_info <- getCgIdInfo fun + ; arg_amodes <- getArgAmodes args + + -- Nuking dead bindings *before* calculating the saves is the + -- value-add here. We might end up freeing up some slots currently + -- occupied by variables only required for the call. + -- NOTE: we need to look up the variables used in the call before + -- doing this, because some of them may not be in the environment + -- afterward. + ; nukeDeadBindings live_in_alts + ; (save_assts, alts_eob_info, maybe_cc_slot) + <- saveVolatileVarsAndRegs live_in_alts + + ; scrut_eob_info + <- forkEval alts_eob_info + (allocStackTop retAddrSizeW >> nopC) + (do { deAllocStackTop retAddrSizeW + ; cgEvalAlts maybe_cc_slot bndr srt alt_type alts }) + + ; setEndOfBlockInfo (maybeReserveSeqFrame alt_type scrut_eob_info) + (performTailCall fun_info arg_amodes save_assts) } +\end{code} + +Note about return addresses: we *always* push a return address, even +if because of an optimisation we end up jumping direct to the return +code (not through the address itself). The alternatives always assume +that the return address is on the stack. The return address is +required in case the alternative performs a heap check, since it +encodes the liveness of the slots in the activation record. + +On entry to the case alternative, we can re-use the slot containing +the return address immediately after the heap check. That's what the +deAllocStackTop call is doing above. + +Finally, here is the general case. + +\begin{code} +cgCase expr live_in_whole_case live_in_alts bndr srt alt_type alts + = do { -- Figure out what volatile variables to save + nukeDeadBindings live_in_whole_case + + ; (save_assts, alts_eob_info, maybe_cc_slot) + <- saveVolatileVarsAndRegs live_in_alts + + -- Save those variables right now! + ; emitStmts save_assts + + -- generate code for the alts + ; scrut_eob_info + <- forkEval alts_eob_info + (do { nukeDeadBindings live_in_alts + ; allocStackTop retAddrSizeW -- space for retn address + ; nopC }) + (do { deAllocStackTop retAddrSizeW + ; cgEvalAlts maybe_cc_slot bndr srt alt_type alts }) + + ; setEndOfBlockInfo (maybeReserveSeqFrame alt_type scrut_eob_info) + (cgExpr expr) + } +\end{code} + +There's a lot of machinery going on behind the scenes to manage the +stack pointer here. forkEval takes the virtual Sp and free list from +the first argument, and turns that into the *real* Sp for the second +argument. It also uses this virtual Sp as the args-Sp in the EOB info +returned, so that the scrutinee will trim the real Sp back to the +right place before doing whatever it does. + --SDM (who just spent an hour figuring this out, and didn't want to + forget it). + +Why don't we push the return address just before evaluating the +scrutinee? Because the slot reserved for the return address might +contain something useful, so we wait until performing a tail call or +return before pushing the return address (see +CgTailCall.pushReturnAddress). + +This also means that the environment doesn't need to know about the +free stack slot for the return address (for generating bitmaps), +because we don't reserve it until just before the eval. + +TODO!! Problem: however, we have to save the current cost centre +stack somewhere, because at the eval point the current CCS might be +different. So we pick a free stack slot and save CCCS in it. One +consequence of this is that activation records on the stack don't +follow the layout of closures when we're profiling. The CCS could be +anywhere within the record). + +\begin{code} +maybeReserveSeqFrame PolyAlt (EndOfBlockInfo args_sp (CaseAlts amode stuff bndr _)) + = EndOfBlockInfo (args_sp + retAddrSizeW) (CaseAlts amode stuff bndr True) +maybeReserveSeqFrame other scrut_eob_info = scrut_eob_info +\end{code} + + +%************************************************************************ +%* * + Inline primops +%* * +%************************************************************************ + +\begin{code} +cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts + | isVoidArg (idCgRep bndr) + = ASSERT( con == DEFAULT && isSingleton alts && null bs ) + do { -- VOID RESULT; just sequencing, + -- so get in there and do it + cgPrimOp [] primop args live_in_alts + ; cgExpr rhs } + where + (con,bs,_,rhs) = head alts + +cgInlinePrimOp primop args bndr (PrimAlt tycon) live_in_alts alts + = do { -- PRIMITIVE ALTS, with non-void result + tmp_reg <- bindNewToTemp bndr + ; cgPrimOp [tmp_reg] primop args live_in_alts + ; cgPrimAlts NoGC (PrimAlt tycon) tmp_reg alts } + +cgInlinePrimOp primop args bndr (UbxTupAlt tycon) live_in_alts alts + = ASSERT( isSingleton alts ) + do { -- UNBOXED TUPLE ALTS + -- No heap check, no yield, just get in there and do it. + -- NB: the case binder isn't bound to anything; + -- it has a unboxed tuple type + + res_tmps <- mapFCs bindNewToTemp non_void_res_ids + ; cgPrimOp res_tmps primop args live_in_alts + ; cgExpr rhs } + where + (_, res_ids, _, rhs) = head alts + non_void_res_ids = filter (nonVoidArg . idCgRep) res_ids + +cgInlinePrimOp primop args bndr (AlgAlt tycon) live_in_alts alts + = do { -- ENUMERATION TYPE RETURN + -- Typical: case a ># b of { True -> ..; False -> .. } + -- The primop itself returns an index into the table of + -- closures for the enumeration type. + tag_amode <- ASSERT( isEnumerationTyCon tycon ) + do_enum_primop primop + + -- Bind the default binder if necessary + -- (avoiding it avoids the assignment) + -- The deadness info is set by StgVarInfo + ; hmods <- getHomeModules + ; whenC (not (isDeadBinder bndr)) + (do { tmp_reg <- bindNewToTemp bndr + ; stmtC (CmmAssign tmp_reg (tagToClosure hmods tycon tag_amode)) }) + + -- Compile the alts + ; (branches, mb_deflt) <- cgAlgAlts NoGC Nothing{-cc_slot-} + (AlgAlt tycon) alts + + -- Do the switch + ; emitSwitch tag_amode branches mb_deflt 0 (tyConFamilySize tycon - 1) + } + where + + do_enum_primop :: PrimOp -> FCode CmmExpr -- Returns amode for result + do_enum_primop TagToEnumOp -- No code! + | [arg] <- args = do + (_,e) <- getArgAmode arg + return e + do_enum_primop primop + = do tmp <- newTemp wordRep + cgPrimOp [tmp] primop args live_in_alts + returnFC (CmmReg tmp) + +cgInlinePrimOp primop arg_amodes bndr PolyAlt live_in_alts alts + = pprPanic "cgCase: case of primop has polymorphic type" (ppr bndr) +\end{code} + +%************************************************************************ +%* * +\subsection[CgCase-alts]{Alternatives} +%* * +%************************************************************************ + +@cgEvalAlts@ returns an addressing mode for a continuation for the +alternatives of a @case@, used in a context when there +is some evaluation to be done. + +\begin{code} +cgEvalAlts :: Maybe VirtualSpOffset -- Offset of cost-centre to be restored, if any + -> Id + -> SRT -- SRT for the continuation + -> AltType + -> [StgAlt] + -> FCode Sequel -- Any addr modes inside are guaranteed + -- to be a label so that we can duplicate it + -- without risk of duplicating code + +cgEvalAlts cc_slot bndr srt alt_type@(PrimAlt tycon) alts + = do { let rep = tyConCgRep tycon + reg = dataReturnConvPrim rep -- Bottom for voidRep + + ; abs_c <- forkProc $ do + { -- Bind the case binder, except if it's void + -- (reg is bottom in that case) + whenC (nonVoidArg rep) $ + bindNewToReg bndr reg (mkLFArgument bndr) + ; restoreCurrentCostCentre cc_slot True + ; cgPrimAlts GCMayHappen alt_type reg alts } + + ; lbl <- emitDirectReturnTarget (idName bndr) abs_c srt + ; returnFC (CaseAlts lbl Nothing bndr False) } + +cgEvalAlts cc_slot bndr srt (UbxTupAlt _) [(con,args,_,rhs)] + = -- Unboxed tuple case + -- By now, the simplifier should have have turned it + -- into case e of (# a,b #) -> e + -- There shouldn't be a + -- case e of DEFAULT -> e + ASSERT2( case con of { DataAlt _ -> True; other -> False }, + text "cgEvalAlts: dodgy case of unboxed tuple type" ) + do { -- forkAbsC for the RHS, so that the envt is + -- not changed for the emitDirectReturn call + abs_c <- forkProc $ do + { (live_regs, ptrs, nptrs, _) <- bindUnboxedTupleComponents args + -- Restore the CC *after* binding the tuple components, + -- so that we get the stack offset of the saved CC right. + ; restoreCurrentCostCentre cc_slot True + -- Generate a heap check if necessary + -- and finally the code for the alternative + ; unbxTupleHeapCheck live_regs ptrs nptrs noStmts + (cgExpr rhs) } + ; lbl <- emitDirectReturnTarget (idName bndr) abs_c srt + ; returnFC (CaseAlts lbl Nothing bndr False) } + +cgEvalAlts cc_slot bndr srt alt_type alts + = -- Algebraic and polymorphic case + do { -- Bind the default binder + bindNewToReg bndr nodeReg (mkLFArgument bndr) + + -- Generate sequel info for use downstream + -- At the moment, we only do it if the type is vector-returnable. + -- Reason: if not, then it costs extra to label the + -- alternatives, because we'd get return code like: + -- + -- switch TagReg { 0 : JMP(alt_1); 1 : JMP(alt_2) ..etc } + -- + -- which is worse than having the alt code in the switch statement + + ; (alts, mb_deflt) <- cgAlgAlts GCMayHappen cc_slot alt_type alts + + ; (lbl, branches) <- emitAlgReturnTarget (idName bndr) + alts mb_deflt srt ret_conv + + ; returnFC (CaseAlts lbl branches bndr False) } + where + ret_conv = case alt_type of + AlgAlt tc -> ctrlReturnConvAlg tc + PolyAlt -> UnvectoredReturn 0 +\end{code} + + +HWL comment on {\em GrAnSim\/} (adding GRAN_YIELDs for context switch): If +we do an inlining of the case no separate functions for returning are +created, so we don't have to generate a GRAN_YIELD in that case. This info +must be propagated to cgAlgAltRhs (where the GRAN_YIELD macro might be +emitted). Hence, the new Bool arg to cgAlgAltRhs. + +%************************************************************************ +%* * +\subsection[CgCase-alg-alts]{Algebraic alternatives} +%* * +%************************************************************************ + +In @cgAlgAlts@, none of the binders in the alternatives are +assumed to be yet bound. + +HWL comment on {\em GrAnSim\/} (adding GRAN_YIELDs for context switch): The +last arg of cgAlgAlts indicates if we want a context switch at the +beginning of each alternative. Normally we want that. The only exception +are inlined alternatives. + +\begin{code} +cgAlgAlts :: GCFlag + -> Maybe VirtualSpOffset + -> AltType -- ** AlgAlt or PolyAlt only ** + -> [StgAlt] -- The alternatives + -> FCode ( [(ConTagZ, CgStmts)], -- The branches + Maybe CgStmts ) -- The default case + +cgAlgAlts gc_flag cc_slot alt_type alts + = do alts <- forkAlts [ cgAlgAlt gc_flag cc_slot alt_type alt | alt <- alts] + let + mb_deflt = case alts of -- DEFAULT is always first, if present + ((DEFAULT,blks) : _) -> Just blks + other -> Nothing + + branches = [(dataConTagZ con, blks) + | (DataAlt con, blks) <- alts] + -- in + return (branches, mb_deflt) + + +cgAlgAlt :: GCFlag + -> Maybe VirtualSpOffset -- Turgid state + -> AltType -- ** AlgAlt or PolyAlt only ** + -> StgAlt + -> FCode (AltCon, CgStmts) + +cgAlgAlt gc_flag cc_slot alt_type (con, args, use_mask, rhs) + = do { abs_c <- getCgStmts $ do + { bind_con_args con args + ; restoreCurrentCostCentre cc_slot True + ; maybeAltHeapCheck gc_flag alt_type (cgExpr rhs) } + ; return (con, abs_c) } + where + bind_con_args DEFAULT args = nopC + bind_con_args (DataAlt dc) args = bindConArgs dc args +\end{code} + + +%************************************************************************ +%* * +\subsection[CgCase-prim-alts]{Primitive alternatives} +%* * +%************************************************************************ + +@cgPrimAlts@ generates suitable a @CSwitch@ +for dealing with the alternatives of a primitive @case@, given an +addressing mode for the thing to scrutinise. It also keeps track of +the maximum stack depth encountered down any branch. + +As usual, no binders in the alternatives are yet bound. + +\begin{code} +cgPrimAlts :: GCFlag + -> AltType -- Always PrimAlt, but passed to maybeAltHeapCheck + -> CmmReg -- Scrutinee + -> [StgAlt] -- Alternatives + -> Code +-- NB: cgPrimAlts emits code that does the case analysis. +-- It's often used in inline situations, rather than to genearte +-- a labelled return point. That's why its interface is a little +-- different to cgAlgAlts +-- +-- INVARIANT: the default binder is already bound +cgPrimAlts gc_flag alt_type scrutinee alts + = do { tagged_absCs <- forkAlts (map (cgPrimAlt gc_flag alt_type) alts) + ; let ((DEFAULT, deflt_absC) : others) = tagged_absCs -- There is always a default + alt_absCs = [(lit,rhs) | (LitAlt lit, rhs) <- others] + ; emitLitSwitch (CmmReg scrutinee) alt_absCs deflt_absC } + +cgPrimAlt :: GCFlag + -> AltType + -> StgAlt -- The alternative + -> FCode (AltCon, CgStmts) -- Its compiled form + +cgPrimAlt gc_flag alt_type (con, [], [], rhs) + = ASSERT( case con of { DEFAULT -> True; LitAlt _ -> True; other -> False } ) + do { abs_c <- getCgStmts (maybeAltHeapCheck gc_flag alt_type (cgExpr rhs)) + ; returnFC (con, abs_c) } +\end{code} + + +%************************************************************************ +%* * +\subsection[CgCase-tidy]{Code for tidying up prior to an eval} +%* * +%************************************************************************ + +\begin{code} +maybeAltHeapCheck + :: GCFlag + -> AltType -- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt + -> Code -- Continuation + -> Code +maybeAltHeapCheck NoGC _ code = code +maybeAltHeapCheck GCMayHappen alt_type code = altHeapCheck alt_type code + +saveVolatileVarsAndRegs + :: StgLiveVars -- Vars which should be made safe + -> FCode (CmmStmts, -- Assignments to do the saves + EndOfBlockInfo, -- sequel for the alts + Maybe VirtualSpOffset) -- Slot for current cost centre + +saveVolatileVarsAndRegs vars + = do { var_saves <- saveVolatileVars vars + ; (maybe_cc_slot, cc_save) <- saveCurrentCostCentre + ; eob_info <- getEndOfBlockInfo + ; returnFC (var_saves `plusStmts` cc_save, + eob_info, + maybe_cc_slot) } + + +saveVolatileVars :: StgLiveVars -- Vars which should be made safe + -> FCode CmmStmts -- Assignments to to the saves + +saveVolatileVars vars + = do { stmts_s <- mapFCs save_it (varSetElems vars) + ; return (foldr plusStmts noStmts stmts_s) } + where + save_it var + = do { v <- getCAddrModeIfVolatile var + ; case v of + Nothing -> return noStmts -- Non-volatile + Just vol_amode -> save_var var vol_amode -- Aha! It's volatile + } + + save_var var vol_amode + = do { slot <- allocPrimStack (idCgRep var) + ; rebindToStack var slot + ; sp_rel <- getSpRelOffset slot + ; returnFC (oneStmt (CmmStore sp_rel vol_amode)) } +\end{code} + +--------------------------------------------------------------------------- + +When we save the current cost centre (which is done for lexical +scoping), we allocate a free stack location, and return (a)~the +virtual offset of the location, to pass on to the alternatives, and +(b)~the assignment to do the save (just as for @saveVolatileVars@). + +\begin{code} +saveCurrentCostCentre :: + FCode (Maybe VirtualSpOffset, -- Where we decide to store it + CmmStmts) -- Assignment to save it + +saveCurrentCostCentre + | not opt_SccProfilingOn + = returnFC (Nothing, noStmts) + | otherwise + = do { slot <- allocPrimStack PtrArg + ; sp_rel <- getSpRelOffset slot + ; returnFC (Just slot, + oneStmt (CmmStore sp_rel curCCS)) } + +-- Sometimes we don't free the slot containing the cost centre after restoring it +-- (see CgLetNoEscape.cgLetNoEscapeBody). +restoreCurrentCostCentre :: Maybe VirtualSpOffset -> Bool -> Code +restoreCurrentCostCentre Nothing _freeit = nopC +restoreCurrentCostCentre (Just slot) freeit + = do { sp_rel <- getSpRelOffset slot + ; whenC freeit (freeStackSlots [slot]) + ; stmtC (CmmStore curCCSAddr (CmmLoad sp_rel wordRep)) } +\end{code} + diff --git a/compiler/codeGen/CgClosure.lhs b/compiler/codeGen/CgClosure.lhs new file mode 100644 index 0000000000..1a2cbc5202 --- /dev/null +++ b/compiler/codeGen/CgClosure.lhs @@ -0,0 +1,599 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgClosure.lhs,v 1.72 2005/05/18 12:06:51 simonmar Exp $ +% +\section[CgClosure]{Code generation for closures} + +This module provides the support code for @StgToAbstractC@ to deal +with {\em closures} on the RHSs of let(rec)s. See also +@CgCon@, which deals with constructors. + +\begin{code} +module CgClosure ( cgTopRhsClosure, + cgStdRhsClosure, + cgRhsClosure, + emitBlackHoleCode, + ) where + +#include "HsVersions.h" + +import {-# SOURCE #-} CgExpr ( cgExpr ) + +import CgMonad +import CgBindery +import CgHeapery +import CgStackery ( mkVirtStkOffsets, pushUpdateFrame, getVirtSp, + setRealAndVirtualSp ) +import CgProf ( chooseDynCostCentres, ldvEnter, enterCostCentre, + costCentreFrom ) +import CgTicky +import CgParallel ( granYield, granFetchAndReschedule ) +import CgInfoTbls ( emitClosureCodeAndInfoTable, getSRTInfo ) +import CgCallConv ( assignCallRegs, mkArgDescr ) +import CgUtils ( emitDataLits, addIdReps, cmmRegOffW, + emitRtsCallWithVols ) +import ClosureInfo -- lots and lots of stuff +import SMRep ( CgRep, cgRepSizeW, argMachRep, fixedHdrSize, WordOff, + idCgRep ) +import MachOp ( MachHint(..) ) +import Cmm +import CmmUtils ( CmmStmts, mkStmts, oneStmt, plusStmts, noStmts, + mkLblExpr ) +import CLabel +import StgSyn +import StaticFlags ( opt_DoTickyProfiling ) +import CostCentre +import Id ( Id, idName, idType ) +import Name ( Name, isExternalName ) +import Module ( Module, pprModule ) +import ListSetOps ( minusList ) +import Util ( isIn, mapAccumL, zipWithEqual ) +import BasicTypes ( TopLevelFlag(..) ) +import Constants ( oFFSET_StgInd_indirectee, wORD_SIZE ) +import Outputable +import FastString +\end{code} + +%******************************************************** +%* * +\subsection[closures-no-free-vars]{Top-level closures} +%* * +%******************************************************** + +For closures bound at top level, allocate in static space. +They should have no free variables. + +\begin{code} +cgTopRhsClosure :: Id + -> CostCentreStack -- Optional cost centre annotation + -> StgBinderInfo + -> SRT + -> UpdateFlag + -> [Id] -- Args + -> StgExpr + -> FCode (Id, CgIdInfo) + +cgTopRhsClosure id ccs binder_info srt upd_flag args body = do + { -- LAY OUT THE OBJECT + let name = idName id + ; lf_info <- mkClosureLFInfo id TopLevel [] upd_flag args + ; srt_info <- getSRTInfo name srt + ; mod_name <- moduleName + ; let descr = closureDescription mod_name name + closure_info = mkClosureInfo True id lf_info 0 0 srt_info descr + closure_label = mkLocalClosureLabel name + cg_id_info = stableIdInfo id (mkLblExpr closure_label) lf_info + closure_rep = mkStaticClosureFields closure_info ccs True [] + + -- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY) + ; emitDataLits closure_label closure_rep + ; forkClosureBody (closureCodeBody binder_info closure_info + ccs args body) + + ; returnFC (id, cg_id_info) } +\end{code} + +%******************************************************** +%* * +\subsection[non-top-level-closures]{Non top-level closures} +%* * +%******************************************************** + +For closures with free vars, allocate in heap. + +\begin{code} +cgStdRhsClosure + :: Id + -> CostCentreStack -- Optional cost centre annotation + -> StgBinderInfo + -> [Id] -- Free vars + -> [Id] -- Args + -> StgExpr + -> LambdaFormInfo + -> [StgArg] -- payload + -> FCode (Id, CgIdInfo) + +cgStdRhsClosure bndr cc bndr_info fvs args body lf_info payload + = do -- AHA! A STANDARD-FORM THUNK + { -- LAY OUT THE OBJECT + amodes <- getArgAmodes payload + ; mod_name <- moduleName + ; let (tot_wds, ptr_wds, amodes_w_offsets) + = mkVirtHeapOffsets (isLFThunk lf_info) amodes + + descr = closureDescription mod_name (idName bndr) + closure_info = mkClosureInfo False -- Not static + bndr lf_info tot_wds ptr_wds + NoC_SRT -- No SRT for a std-form closure + descr + + ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body + + -- BUILD THE OBJECT + ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets + + -- RETURN + ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) } +\end{code} + +Here's the general case. + +\begin{code} +cgRhsClosure :: Id + -> CostCentreStack -- Optional cost centre annotation + -> StgBinderInfo + -> SRT + -> [Id] -- Free vars + -> UpdateFlag + -> [Id] -- Args + -> StgExpr + -> FCode (Id, CgIdInfo) + +cgRhsClosure bndr cc bndr_info srt fvs upd_flag args body = do + { -- LAY OUT THE OBJECT + -- If the binder is itself a free variable, then don't store + -- it in the closure. Instead, just bind it to Node on entry. + -- NB we can be sure that Node will point to it, because we + -- havn't told mkClosureLFInfo about this; so if the binder + -- _was_ a free var of its RHS, mkClosureLFInfo thinks it *is* + -- stored in the closure itself, so it will make sure that + -- Node points to it... + let + name = idName bndr + is_elem = isIn "cgRhsClosure" + bndr_is_a_fv = bndr `is_elem` fvs + reduced_fvs | bndr_is_a_fv = fvs `minusList` [bndr] + | otherwise = fvs + + ; lf_info <- mkClosureLFInfo bndr NotTopLevel fvs upd_flag args + ; fv_infos <- mapFCs getCgIdInfo reduced_fvs + ; srt_info <- getSRTInfo name srt + ; mod_name <- moduleName + ; let bind_details :: [(CgIdInfo, VirtualHpOffset)] + (tot_wds, ptr_wds, bind_details) + = mkVirtHeapOffsets (isLFThunk lf_info) (map add_rep fv_infos) + + add_rep info = (cgIdInfoArgRep info, info) + + descr = closureDescription mod_name name + closure_info = mkClosureInfo False -- Not static + bndr lf_info tot_wds ptr_wds + srt_info descr + + -- BUILD ITS INFO TABLE AND CODE + ; forkClosureBody (do + { -- Bind the fvs + let bind_fv (info, offset) + = bindNewToNode (cgIdInfoId info) offset (cgIdInfoLF info) + ; mapCs bind_fv bind_details + + -- Bind the binder itself, if it is a free var + ; whenC bndr_is_a_fv (bindNewToReg bndr nodeReg lf_info) + + -- Compile the body + ; closureCodeBody bndr_info closure_info cc args body }) + + -- BUILD THE OBJECT + ; let + to_amode (info, offset) = do { amode <- idInfoToAmode info + ; return (amode, offset) } + ; (use_cc, blame_cc) <- chooseDynCostCentres cc args body + ; amodes_w_offsets <- mapFCs to_amode bind_details + ; heap_offset <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets + + -- RETURN + ; returnFC (bndr, heapIdInfo bndr heap_offset lf_info) } + + +mkClosureLFInfo :: Id -- The binder + -> TopLevelFlag -- True of top level + -> [Id] -- Free vars + -> UpdateFlag -- Update flag + -> [Id] -- Args + -> FCode LambdaFormInfo +mkClosureLFInfo bndr top fvs upd_flag args + | null args = return (mkLFThunk (idType bndr) top fvs upd_flag) + | otherwise = do { arg_descr <- mkArgDescr (idName bndr) args + ; return (mkLFReEntrant top fvs args arg_descr) } +\end{code} + + +%************************************************************************ +%* * +\subsection[code-for-closures]{The code for closures} +%* * +%************************************************************************ + +\begin{code} +closureCodeBody :: StgBinderInfo + -> ClosureInfo -- Lots of information about this closure + -> CostCentreStack -- Optional cost centre attached to closure + -> [Id] + -> StgExpr + -> Code +\end{code} + +There are two main cases for the code for closures. If there are {\em +no arguments}, then the closure is a thunk, and not in normal form. +So it should set up an update frame (if it is shared). +NB: Thunks cannot have a primitive type! + +\begin{code} +closureCodeBody binder_info cl_info cc [{- No args i.e. thunk -}] body = do + { body_absC <- getCgStmts $ do + { tickyEnterThunk cl_info + ; ldvEnter (CmmReg nodeReg) -- NB: Node always points when profiling + ; thunkWrapper cl_info $ do + -- We only enter cc after setting up update so + -- that cc of enclosing scope will be recorded + -- in update frame CAF/DICT functions will be + -- subsumed by this enclosing cc + { enterCostCentre cl_info cc body + ; cgExpr body } + } + + ; emitClosureCodeAndInfoTable cl_info [] body_absC } +\end{code} + +If there is /at least one argument/, then this closure is in +normal form, so there is no need to set up an update frame. + +The Macros for GrAnSim are produced at the beginning of the +argSatisfactionCheck (by calling fetchAndReschedule). There info if +Node points to closure is available. -- HWL + +\begin{code} +closureCodeBody binder_info cl_info cc args body + = ASSERT( length args > 0 ) + do { -- Get the current virtual Sp (it might not be zero, + -- eg. if we're compiling a let-no-escape). + vSp <- getVirtSp + ; let (reg_args, other_args) = assignCallRegs (addIdReps args) + (sp_top, stk_args) = mkVirtStkOffsets vSp other_args + + -- Allocate the global ticky counter + ; let ticky_ctr_lbl = mkRednCountsLabel (closureName cl_info) + ; emitTickyCounter cl_info args sp_top + + -- ...and establish the ticky-counter + -- label for this block + ; setTickyCtrLabel ticky_ctr_lbl $ do + + -- Emit the slow-entry code + { reg_save_code <- mkSlowEntryCode cl_info reg_args + + -- Emit the main entry code + ; blks <- forkProc $ + mkFunEntryCode cl_info cc reg_args stk_args + sp_top reg_save_code body + ; emitClosureCodeAndInfoTable cl_info [] blks + }} + + + +mkFunEntryCode :: ClosureInfo + -> CostCentreStack + -> [(Id,GlobalReg)] -- Args in regs + -> [(Id,VirtualSpOffset)] -- Args on stack + -> VirtualSpOffset -- Last allocated word on stack + -> CmmStmts -- Register-save code in case of GC + -> StgExpr + -> Code +-- The main entry code for the closure +mkFunEntryCode cl_info cc reg_args stk_args sp_top reg_save_code body = do + { -- Bind args to regs/stack as appropriate, + -- and record expected position of sps + ; bindArgsToRegs reg_args + ; bindArgsToStack stk_args + ; setRealAndVirtualSp sp_top + + -- Enter the cost-centre, if required + -- ToDo: It's not clear why this is outside the funWrapper, + -- but the tickyEnterFun is inside. Perhaps we can put + -- them together? + ; enterCostCentre cl_info cc body + + -- Do the business + ; funWrapper cl_info reg_args reg_save_code $ do + { tickyEnterFun cl_info + ; cgExpr body } + } +\end{code} + +The "slow entry" code for a function. This entry point takes its +arguments on the stack. It loads the arguments into registers +according to the calling convention, and jumps to the function's +normal entry point. The function's closure is assumed to be in +R1/node. + +The slow entry point is used in two places: + + (a) unknown calls: eg. stg_PAP_entry + (b) returning from a heap-check failure + +\begin{code} +mkSlowEntryCode :: ClosureInfo -> [(Id,GlobalReg)] -> FCode CmmStmts +-- If this function doesn't have a specialised ArgDescr, we need +-- to generate the function's arg bitmap, slow-entry code, and +-- register-save code for the heap-check failure +-- Here, we emit the slow-entry code, and +-- return the register-save assignments +mkSlowEntryCode cl_info reg_args + | Just (_, ArgGen _) <- closureFunInfo cl_info + = do { emitSimpleProc slow_lbl (emitStmts load_stmts) + ; return save_stmts } + | otherwise = return noStmts + where + name = closureName cl_info + slow_lbl = mkSlowEntryLabel name + + load_stmts = mkStmts load_assts `plusStmts` mkStmts [stk_adj_pop, jump_to_entry] + save_stmts = oneStmt stk_adj_push `plusStmts` mkStmts save_assts + + reps_w_regs :: [(CgRep,GlobalReg)] + reps_w_regs = [(idCgRep id, reg) | (id,reg) <- reverse reg_args] + (final_stk_offset, stk_offsets) + = mapAccumL (\off (rep,_) -> (off + cgRepSizeW rep, off)) + 0 reps_w_regs + + load_assts = zipWithEqual "mk_load" mk_load reps_w_regs stk_offsets + mk_load (rep,reg) offset = CmmAssign (CmmGlobal reg) + (CmmLoad (cmmRegOffW spReg offset) + (argMachRep rep)) + + save_assts = zipWithEqual "mk_save" mk_save reps_w_regs stk_offsets + mk_save (rep,reg) offset = ASSERT( argMachRep rep == globalRegRep reg ) + CmmStore (cmmRegOffW spReg offset) + (CmmReg (CmmGlobal reg)) + + stk_adj_pop = CmmAssign spReg (cmmRegOffW spReg final_stk_offset) + stk_adj_push = CmmAssign spReg (cmmRegOffW spReg (- final_stk_offset)) + jump_to_entry = CmmJump (mkLblExpr (enterLocalIdLabel name)) [] +\end{code} + + +%************************************************************************ +%* * +\subsubsection[closure-code-wrappers]{Wrappers around closure code} +%* * +%************************************************************************ + +\begin{code} +thunkWrapper:: ClosureInfo -> Code -> Code +thunkWrapper closure_info thunk_code = do + { let node_points = nodeMustPointToIt (closureLFInfo closure_info) + + -- HWL: insert macros for GrAnSim; 2 versions depending on liveness of node + -- (we prefer fetchAndReschedule-style context switches to yield ones) + ; if node_points + then granFetchAndReschedule [] node_points + else granYield [] node_points + + -- Stack and/or heap checks + ; thunkEntryChecks closure_info $ do + { -- Overwrite with black hole if necessary + whenC (blackHoleOnEntry closure_info && node_points) + (blackHoleIt closure_info) + ; setupUpdate closure_info thunk_code } + -- setupUpdate *encloses* the thunk_code + } + +funWrapper :: ClosureInfo -- Closure whose code body this is + -> [(Id,GlobalReg)] -- List of argument registers (if any) + -> CmmStmts -- reg saves for the heap check failure + -> Code -- Body of function being compiled + -> Code +funWrapper closure_info arg_regs reg_save_code fun_body = do + { let node_points = nodeMustPointToIt (closureLFInfo closure_info) + + -- Enter for Ldv profiling + ; whenC node_points (ldvEnter (CmmReg nodeReg)) + + -- GranSim yeild poin + ; granYield arg_regs node_points + + -- Heap and/or stack checks wrap the function body + ; funEntryChecks closure_info reg_save_code + fun_body + } +\end{code} + + +%************************************************************************ +%* * +\subsubsubsection[update-and-BHs]{Update and black-hole wrappers} +%* * +%************************************************************************ + + +\begin{code} +blackHoleIt :: ClosureInfo -> Code +-- Only called for closures with no args +-- Node points to the closure +blackHoleIt closure_info = emitBlackHoleCode (closureSingleEntry closure_info) + +emitBlackHoleCode :: Bool -> Code +emitBlackHoleCode is_single_entry + | eager_blackholing = do + tickyBlackHole (not is_single_entry) + stmtC (CmmStore (CmmReg nodeReg) (CmmLit (CmmLabel bh_lbl))) + | otherwise = + nopC + where + bh_lbl | is_single_entry = mkRtsDataLabel SLIT("stg_SE_BLACKHOLE_info") + | otherwise = mkRtsDataLabel SLIT("stg_BLACKHOLE_info") + + -- If we wanted to do eager blackholing with slop filling, + -- we'd need to do it at the *end* of a basic block, otherwise + -- we overwrite the free variables in the thunk that we still + -- need. We have a patch for this from Andy Cheadle, but not + -- incorporated yet. --SDM [6/2004] + -- + -- Profiling needs slop filling (to support LDV profiling), so + -- currently eager blackholing doesn't work with profiling. + -- + -- TICKY_TICKY needs EAGER_BLACKHOLING to verify no double-entries of + -- single-entry thunks. + eager_blackholing + | opt_DoTickyProfiling = True + | otherwise = False + +\end{code} + +\begin{code} +setupUpdate :: ClosureInfo -> Code -> Code -- Only called for closures with no args + -- Nota Bene: this function does not change Node (even if it's a CAF), + -- so that the cost centre in the original closure can still be + -- extracted by a subsequent enterCostCentre +setupUpdate closure_info code + | closureReEntrant closure_info + = code + + | not (isStaticClosure closure_info) + = if closureUpdReqd closure_info + then do { tickyPushUpdateFrame; pushUpdateFrame (CmmReg nodeReg) code } + else do { tickyUpdateFrameOmitted; code } + + | otherwise -- A static closure + = do { tickyUpdateBhCaf closure_info + + ; if closureUpdReqd closure_info + then do -- Blackhole the (updatable) CAF: + { upd_closure <- link_caf closure_info True + ; pushUpdateFrame upd_closure code } + else do + { -- No update reqd, you'd think we don't need to + -- black-hole it. But when ticky-ticky is on, we + -- black-hole it regardless, to catch errors in which + -- an allegedly single-entry closure is entered twice + -- + -- We discard the pointer returned by link_caf, because + -- we don't push an update frame + whenC opt_DoTickyProfiling -- Blackhole even a SE CAF + (link_caf closure_info False >> nopC) + ; tickyUpdateFrameOmitted + ; code } + } + + +----------------------------------------------------------------------------- +-- Entering a CAF +-- +-- When a CAF is first entered, it creates a black hole in the heap, +-- and updates itself with an indirection to this new black hole. +-- +-- We update the CAF with an indirection to a newly-allocated black +-- hole in the heap. We also set the blocking queue on the newly +-- allocated black hole to be empty. +-- +-- Why do we make a black hole in the heap when we enter a CAF? +-- +-- - for a generational garbage collector, which needs a fast +-- test for whether an updatee is in an old generation or not +-- +-- - for the parallel system, which can implement updates more +-- easily if the updatee is always in the heap. (allegedly). +-- +-- When debugging, we maintain a separate CAF list so we can tell when +-- a CAF has been garbage collected. + +-- newCAF must be called before the itbl ptr is overwritten, since +-- newCAF records the old itbl ptr in order to do CAF reverting +-- (which Hugs needs to do in order that combined mode works right.) +-- + +-- ToDo [Feb 04] This entire link_caf nonsense could all be moved +-- into the "newCAF" RTS procedure, which we call anyway, including +-- the allocation of the black-hole indirection closure. +-- That way, code size would fall, the CAF-handling code would +-- be closer together, and the compiler wouldn't need to know +-- about off_indirectee etc. + +link_caf :: ClosureInfo + -> Bool -- True <=> updatable, False <=> single-entry + -> FCode CmmExpr -- Returns amode for closure to be updated +-- To update a CAF we must allocate a black hole, link the CAF onto the +-- CAF list, then update the CAF to point to the fresh black hole. +-- This function returns the address of the black hole, so it can be +-- updated with the new value when available. The reason for all of this +-- is that we only want to update dynamic heap objects, not static ones, +-- so that generational GC is easier. +link_caf cl_info is_upd = do + { -- Alloc black hole specifying CC_HDR(Node) as the cost centre + ; let use_cc = costCentreFrom (CmmReg nodeReg) + blame_cc = use_cc + ; hp_offset <- allocDynClosure bh_cl_info use_cc blame_cc [] + ; hp_rel <- getHpRelOffset hp_offset + + -- Call the RTS function newCAF to add the CAF to the CafList + -- so that the garbage collector can find them + -- This must be done *before* the info table pointer is overwritten, + -- because the old info table ptr is needed for reversion + ; emitRtsCallWithVols SLIT("newCAF") [(CmmReg nodeReg,PtrHint)] [node] + -- node is live, so save it. + + -- Overwrite the closure with a (static) indirection + -- to the newly-allocated black hole + ; stmtsC [ CmmStore (cmmRegOffW nodeReg off_indirectee) hp_rel + , CmmStore (CmmReg nodeReg) ind_static_info ] + + ; returnFC hp_rel } + where + bh_cl_info :: ClosureInfo + bh_cl_info | is_upd = cafBlackHoleClosureInfo cl_info + | otherwise = seCafBlackHoleClosureInfo cl_info + + ind_static_info :: CmmExpr + ind_static_info = mkLblExpr mkIndStaticInfoLabel + + off_indirectee :: WordOff + off_indirectee = fixedHdrSize + oFFSET_StgInd_indirectee*wORD_SIZE +\end{code} + + +%************************************************************************ +%* * +\subsection[CgClosure-Description]{Profiling Closure Description.} +%* * +%************************************************************************ + +For "global" data constructors the description is simply occurrence +name of the data constructor itself. Otherwise it is determined by +@closureDescription@ from the let binding information. + +\begin{code} +closureDescription :: Module -- Module + -> Name -- Id of closure binding + -> String + -- Not called for StgRhsCon which have global info tables built in + -- CgConTbls.lhs with a description generated from the data constructor +closureDescription mod_name name + = showSDocDump (char '<' <> + (if isExternalName name + then ppr name -- ppr will include the module name prefix + else pprModule mod_name <> char '.' <> ppr name) <> + char '>') + -- showSDocDump, because we want to see the unique on the Name. +\end{code} + diff --git a/compiler/codeGen/CgCon.lhs b/compiler/codeGen/CgCon.lhs new file mode 100644 index 0000000000..bfb55bf46e --- /dev/null +++ b/compiler/codeGen/CgCon.lhs @@ -0,0 +1,457 @@ +% +% (c) The GRASP Project, Glasgow University, 1992-1998 +% +\section[CgCon]{Code generation for constructors} + +This module provides the support code for @StgToAbstractC@ to deal +with {\em constructors} on the RHSs of let(rec)s. See also +@CgClosure@, which deals with closures. + +\begin{code} +module CgCon ( + cgTopRhsCon, buildDynCon, + bindConArgs, bindUnboxedTupleComponents, + cgReturnDataCon, + cgTyCon + ) where + +#include "HsVersions.h" + +import CgMonad +import StgSyn + +import CgBindery ( getArgAmodes, bindNewToNode, + bindArgsToRegs, idInfoToAmode, stableIdInfo, + heapIdInfo, CgIdInfo, bindArgsToStack + ) +import CgStackery ( mkVirtStkOffsets, freeStackSlots, + getRealSp, getVirtSp, setRealAndVirtualSp ) +import CgUtils ( addIdReps, cmmLabelOffW, emitRODataLits, emitDataLits ) +import CgCallConv ( assignReturnRegs ) +import Constants ( mAX_INTLIKE, mIN_INTLIKE, mAX_CHARLIKE, mIN_CHARLIKE ) +import CgHeapery ( allocDynClosure, layOutDynConstr, + layOutStaticConstr, mkStaticClosureFields ) +import CgTailCall ( performReturn, emitKnownConReturnCode, returnUnboxedTuple ) +import CgProf ( mkCCostCentreStack, ldvEnter, curCCS ) +import CgTicky +import CgInfoTbls ( emitClosureCodeAndInfoTable, dataConTagZ ) +import CLabel +import ClosureInfo ( mkConLFInfo, mkLFArgument ) +import CmmUtils ( mkLblExpr ) +import Cmm +import SMRep ( WordOff, CgRep, separateByPtrFollowness, + fixedHdrSize, typeCgRep ) +import CostCentre ( currentOrSubsumedCCS, dontCareCCS, CostCentreStack, + currentCCS ) +import Constants ( mIN_INTLIKE, mAX_INTLIKE, mIN_CHARLIKE, mAX_CHARLIKE ) +import TyCon ( TyCon, tyConDataCons, isEnumerationTyCon, tyConName ) +import DataCon ( DataCon, dataConRepArgTys, isNullaryRepDataCon, + isUnboxedTupleCon, dataConWorkId, + dataConName, dataConRepArity + ) +import Id ( Id, idName, isDeadBinder ) +import Type ( Type ) +import PrelInfo ( maybeCharLikeCon, maybeIntLikeCon ) +import Outputable +import Util ( lengthIs ) +import ListSetOps ( assocMaybe ) +\end{code} + + +%************************************************************************ +%* * +\subsection[toplevel-constructors]{Top-level constructors} +%* * +%************************************************************************ + +\begin{code} +cgTopRhsCon :: Id -- Name of thing bound to this RHS + -> DataCon -- Id + -> [StgArg] -- Args + -> FCode (Id, CgIdInfo) +cgTopRhsCon id con args + = do { + ; hmods <- getHomeModules +#if mingw32_TARGET_OS + -- Windows DLLs have a problem with static cross-DLL refs. + ; ASSERT( not (isDllConApp hmods con args) ) return () +#endif + ; ASSERT( args `lengthIs` dataConRepArity con ) return () + + -- LAY IT OUT + ; amodes <- getArgAmodes args + + ; let + name = idName id + lf_info = mkConLFInfo con + closure_label = mkClosureLabel hmods name + caffy = any stgArgHasCafRefs args + (closure_info, amodes_w_offsets) = layOutStaticConstr hmods con amodes + closure_rep = mkStaticClosureFields + closure_info + dontCareCCS -- Because it's static data + caffy -- Has CAF refs + payload + + payload = map get_lit amodes_w_offsets + get_lit (CmmLit lit, _offset) = lit + get_lit other = pprPanic "CgCon.get_lit" (ppr other) + -- NB1: amodes_w_offsets is sorted into ptrs first, then non-ptrs + -- NB2: all the amodes should be Lits! + + -- BUILD THE OBJECT + ; emitDataLits closure_label closure_rep + + -- RETURN + ; returnFC (id, stableIdInfo id (mkLblExpr closure_label) lf_info) } +\end{code} + +%************************************************************************ +%* * +%* non-top-level constructors * +%* * +%************************************************************************ +\subsection[code-for-constructors]{The code for constructors} + +\begin{code} +buildDynCon :: Id -- Name of the thing to which this constr will + -- be bound + -> CostCentreStack -- Where to grab cost centre from; + -- current CCS if currentOrSubsumedCCS + -> DataCon -- The data constructor + -> [(CgRep,CmmExpr)] -- Its args + -> FCode CgIdInfo -- Return details about how to find it + +-- We used to pass a boolean indicating whether all the +-- args were of size zero, so we could use a static +-- construtor; but I concluded that it just isn't worth it. +-- Now I/O uses unboxed tuples there just aren't any constructors +-- with all size-zero args. +-- +-- The reason for having a separate argument, rather than looking at +-- the addr modes of the args is that we may be in a "knot", and +-- premature looking at the args will cause the compiler to black-hole! +\end{code} + +First we deal with the case of zero-arity constructors. Now, they +will probably be unfolded, so we don't expect to see this case much, +if at all, but it does no harm, and sets the scene for characters. + +In the case of zero-arity constructors, or, more accurately, those +which have exclusively size-zero (VoidRep) args, we generate no code +at all. + +\begin{code} +buildDynCon binder cc con [] + = do hmods <- getHomeModules + returnFC (stableIdInfo binder + (mkLblExpr (mkClosureLabel hmods (dataConName con))) + (mkConLFInfo con)) +\end{code} + +The following three paragraphs about @Char@-like and @Int@-like +closures are obsolete, but I don't understand the details well enough +to properly word them, sorry. I've changed the treatment of @Char@s to +be analogous to @Int@s: only a subset is preallocated, because @Char@ +has now 31 bits. Only literals are handled here. -- Qrczak + +Now for @Char@-like closures. We generate an assignment of the +address of the closure to a temporary. It would be possible simply to +generate no code, and record the addressing mode in the environment, +but we'd have to be careful if the argument wasn't a constant --- so +for simplicity we just always asssign to a temporary. + +Last special case: @Int@-like closures. We only special-case the +situation in which the argument is a literal in the range +@mIN_INTLIKE@..@mAX_INTLILKE@. NB: for @Char@-like closures we can +work with any old argument, but for @Int@-like ones the argument has +to be a literal. Reason: @Char@ like closures have an argument type +which is guaranteed in range. + +Because of this, we use can safely return an addressing mode. + +\begin{code} +buildDynCon binder cc con [arg_amode] + | maybeIntLikeCon con + , (_, CmmLit (CmmInt val _)) <- arg_amode + , let val_int = (fromIntegral val) :: Int + , val_int <= mAX_INTLIKE && val_int >= mIN_INTLIKE + = do { let intlike_lbl = mkRtsDataLabel SLIT("stg_INTLIKE_closure") + offsetW = (val_int - mIN_INTLIKE) * (fixedHdrSize + 1) + -- INTLIKE closures consist of a header and one word payload + intlike_amode = CmmLit (cmmLabelOffW intlike_lbl offsetW) + ; returnFC (stableIdInfo binder intlike_amode (mkConLFInfo con)) } + +buildDynCon binder cc con [arg_amode] + | maybeCharLikeCon con + , (_, CmmLit (CmmInt val _)) <- arg_amode + , let val_int = (fromIntegral val) :: Int + , val_int <= mAX_CHARLIKE && val_int >= mIN_CHARLIKE + = do { let charlike_lbl = mkRtsDataLabel SLIT("stg_CHARLIKE_closure") + offsetW = (val_int - mIN_CHARLIKE) * (fixedHdrSize + 1) + -- CHARLIKE closures consist of a header and one word payload + charlike_amode = CmmLit (cmmLabelOffW charlike_lbl offsetW) + ; returnFC (stableIdInfo binder charlike_amode (mkConLFInfo con)) } +\end{code} + +Now the general case. + +\begin{code} +buildDynCon binder ccs con args + = do { + ; hmods <- getHomeModules + ; let + (closure_info, amodes_w_offsets) = layOutDynConstr hmods con args + + ; hp_off <- allocDynClosure closure_info use_cc blame_cc amodes_w_offsets + ; returnFC (heapIdInfo binder hp_off lf_info) } + where + lf_info = mkConLFInfo con + + use_cc -- cost-centre to stick in the object + | currentOrSubsumedCCS ccs = curCCS + | otherwise = CmmLit (mkCCostCentreStack ccs) + + blame_cc = use_cc -- cost-centre on which to blame the alloc (same) +\end{code} + + +%************************************************************************ +%* * +%* constructor-related utility function: * +%* bindConArgs is called from cgAlt of a case * +%* * +%************************************************************************ +\subsection[constructor-utilities]{@bindConArgs@: constructor-related utility} + +@bindConArgs@ $con args$ augments the environment with bindings for the +binders $args$, assuming that we have just returned from a @case@ which +found a $con$. + +\begin{code} +bindConArgs :: DataCon -> [Id] -> Code +bindConArgs con args + = do hmods <- getHomeModules + let + bind_arg (arg, offset) = bindNewToNode arg offset (mkLFArgument arg) + (_, args_w_offsets) = layOutDynConstr hmods con (addIdReps args) + -- + ASSERT(not (isUnboxedTupleCon con)) return () + mapCs bind_arg args_w_offsets +\end{code} + +Unboxed tuples are handled slightly differently - the object is +returned in registers and on the stack instead of the heap. + +\begin{code} +bindUnboxedTupleComponents + :: [Id] -- Args + -> FCode ([(Id,GlobalReg)], -- Regs assigned + WordOff, -- Number of pointer stack slots + WordOff, -- Number of non-pointer stack slots + VirtualSpOffset) -- Offset of return address slot + -- (= realSP on entry) + +bindUnboxedTupleComponents args + = do { + vsp <- getVirtSp + ; rsp <- getRealSp + + -- Assign as many components as possible to registers + ; let (reg_args, stk_args) = assignReturnRegs (addIdReps args) + + -- Separate the rest of the args into pointers and non-pointers + (ptr_args, nptr_args) = separateByPtrFollowness stk_args + + -- Allocate the rest on the stack + -- The real SP points to the return address, above which any + -- leftover unboxed-tuple components will be allocated + (ptr_sp, ptr_offsets) = mkVirtStkOffsets rsp ptr_args + (nptr_sp, nptr_offsets) = mkVirtStkOffsets ptr_sp nptr_args + ptrs = ptr_sp - rsp + nptrs = nptr_sp - ptr_sp + + -- The stack pointer points to the last stack-allocated component + ; setRealAndVirtualSp nptr_sp + + -- We have just allocated slots starting at real SP + 1, and set the new + -- virtual SP to the topmost allocated slot. + -- If the virtual SP started *below* the real SP, we've just jumped over + -- some slots that won't be in the free-list, so put them there + -- This commonly happens because we've freed the return-address slot + -- (trimming back the virtual SP), but the real SP still points to that slot + ; freeStackSlots [vsp+1,vsp+2 .. rsp] + + ; bindArgsToRegs reg_args + ; bindArgsToStack ptr_offsets + ; bindArgsToStack nptr_offsets + + ; returnFC (reg_args, ptrs, nptrs, rsp) } +\end{code} + +%************************************************************************ +%* * + Actually generate code for a constructor return +%* * +%************************************************************************ + + +Note: it's the responsibility of the @cgReturnDataCon@ caller to be +sure the @amodes@ passed don't conflict with each other. +\begin{code} +cgReturnDataCon :: DataCon -> [(CgRep, CmmExpr)] -> Code + +cgReturnDataCon con amodes + = ASSERT( amodes `lengthIs` dataConRepArity con ) + do { EndOfBlockInfo _ sequel <- getEndOfBlockInfo + ; case sequel of + CaseAlts _ (Just (alts, deflt_lbl)) bndr _ + -> -- Ho! We know the constructor so we can + -- go straight to the right alternative + case assocMaybe alts (dataConTagZ con) of { + Just join_lbl -> build_it_then (jump_to join_lbl); + Nothing + -- Special case! We're returning a constructor to the default case + -- of an enclosing case. For example: + -- + -- case (case e of (a,b) -> C a b) of + -- D x -> ... + -- y -> ...<returning here!>... + -- + -- In this case, + -- if the default is a non-bind-default (ie does not use y), + -- then we should simply jump to the default join point; + + | isDeadBinder bndr -> performReturn (jump_to deflt_lbl) + | otherwise -> build_it_then (jump_to deflt_lbl) } + + other_sequel -- The usual case + | isUnboxedTupleCon con -> returnUnboxedTuple amodes + | otherwise -> build_it_then (emitKnownConReturnCode con) + } + where + jump_to lbl = stmtC (CmmJump (CmmLit lbl) []) + build_it_then return_code + = do { -- BUILD THE OBJECT IN THE HEAP + -- The first "con" says that the name bound to this + -- closure is "con", which is a bit of a fudge, but it only + -- affects profiling + + -- This Id is also used to get a unique for a + -- temporary variable, if the closure is a CHARLIKE. + -- funnily enough, this makes the unique always come + -- out as '54' :-) + tickyReturnNewCon (length amodes) + ; idinfo <- buildDynCon (dataConWorkId con) currentCCS con amodes + ; amode <- idInfoToAmode idinfo + ; checkedAbsC (CmmAssign nodeReg amode) + ; performReturn return_code } +\end{code} + + +%************************************************************************ +%* * + Generating static stuff for algebraic data types +%* * +%************************************************************************ + + [These comments are rather out of date] + +\begin{tabular}{lll} +Info tbls & Macro & Kind of constructor \\ +\hline +info & @CONST_INFO_TABLE@& Zero arity (no info -- compiler uses static closure)\\ +info & @CHARLIKE_INFO_TABLE@& Charlike (no info -- compiler indexes fixed array)\\ +info & @INTLIKE_INFO_TABLE@& Intlike; the one macro generates both info tbls\\ +info & @SPEC_INFO_TABLE@& SPECish, and bigger than or equal to @MIN_UPD_SIZE@\\ +info & @GEN_INFO_TABLE@& GENish (hence bigger than or equal to @MIN_UPD_SIZE@)\\ +\end{tabular} + +Possible info tables for constructor con: + +\begin{description} +\item[@_con_info@:] +Used for dynamically let(rec)-bound occurrences of +the constructor, and for updates. For constructors +which are int-like, char-like or nullary, when GC occurs, +the closure tries to get rid of itself. + +\item[@_static_info@:] +Static occurrences of the constructor +macro: @STATIC_INFO_TABLE@. +\end{description} + +For zero-arity constructors, \tr{con}, we NO LONGER generate a static closure; +it's place is taken by the top level defn of the constructor. + +For charlike and intlike closures there is a fixed array of static +closures predeclared. + +\begin{code} +cgTyCon :: TyCon -> FCode [Cmm] -- each constructor gets a separate Cmm +cgTyCon tycon + = do { constrs <- mapM (getCmm . cgDataCon) (tyConDataCons tycon) + + -- Generate a table of static closures for an enumeration type + -- Put the table after the data constructor decls, because the + -- datatype closure table (for enumeration types) + -- to (say) PrelBase_$wTrue_closure, which is defined in code_stuff + ; extra <- + if isEnumerationTyCon tycon then do + tbl <- getCmm (emitRODataLits (mkLocalClosureTableLabel + (tyConName tycon)) + [ CmmLabel (mkLocalClosureLabel (dataConName con)) + | con <- tyConDataCons tycon]) + return [tbl] + else + return [] + + ; return (extra ++ constrs) + } +\end{code} + +Generate the entry code, info tables, and (for niladic constructor) the +static closure, for a constructor. + +\begin{code} +cgDataCon :: DataCon -> Code +cgDataCon data_con + = do { -- Don't need any dynamic closure code for zero-arity constructors + hmods <- getHomeModules + + ; let + -- To allow the debuggers, interpreters, etc to cope with + -- static data structures (ie those built at compile + -- time), we take care that info-table contains the + -- information we need. + (static_cl_info, _) = + layOutStaticConstr hmods data_con arg_reps + + (dyn_cl_info, arg_things) = + layOutDynConstr hmods data_con arg_reps + + emit_info cl_info ticky_code + = do { code_blks <- getCgStmts the_code + ; emitClosureCodeAndInfoTable cl_info [] code_blks } + where + the_code = do { ticky_code + ; ldvEnter (CmmReg nodeReg) + ; body_code } + + arg_reps :: [(CgRep, Type)] + arg_reps = [(typeCgRep ty, ty) | ty <- dataConRepArgTys data_con] + + body_code = do { + -- NB: We don't set CC when entering data (WDP 94/06) + tickyReturnOldCon (length arg_things) + ; performReturn (emitKnownConReturnCode data_con) } + -- noStmts: Ptr to thing already in Node + + ; whenC (not (isNullaryRepDataCon data_con)) + (emit_info dyn_cl_info tickyEnterDynCon) + + -- Dynamic-Closure first, to reduce forward references + ; emit_info static_cl_info tickyEnterStaticCon } + + where +\end{code} diff --git a/compiler/codeGen/CgExpr.hi-boot-5 b/compiler/codeGen/CgExpr.hi-boot-5 new file mode 100644 index 0000000000..588e63f8f1 --- /dev/null +++ b/compiler/codeGen/CgExpr.hi-boot-5 @@ -0,0 +1,3 @@ +__interface CgExpr 1 0 where +__export CgExpr cgExpr; +1 cgExpr :: StgSyn.StgExpr -> CgMonad.Code ; diff --git a/compiler/codeGen/CgExpr.hi-boot-6 b/compiler/codeGen/CgExpr.hi-boot-6 new file mode 100644 index 0000000000..dc2d75cefe --- /dev/null +++ b/compiler/codeGen/CgExpr.hi-boot-6 @@ -0,0 +1,3 @@ +module CgExpr where + +cgExpr :: StgSyn.StgExpr -> CgMonad.Code diff --git a/compiler/codeGen/CgExpr.lhs b/compiler/codeGen/CgExpr.lhs new file mode 100644 index 0000000000..33d72f1608 --- /dev/null +++ b/compiler/codeGen/CgExpr.lhs @@ -0,0 +1,454 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgExpr.lhs,v 1.62 2005/06/21 10:44:41 simonmar Exp $ +% +%******************************************************** +%* * +\section[CgExpr]{Converting @StgExpr@s} +%* * +%******************************************************** + +\begin{code} +module CgExpr ( cgExpr ) where + +#include "HsVersions.h" + +import Constants ( mAX_SPEC_SELECTEE_SIZE, mAX_SPEC_AP_SIZE ) +import StgSyn +import CgMonad + +import SMRep ( fixedHdrSize, isFollowableArg, CgRep(..), argMachRep, + nonVoidArg, idCgRep, typeCgRep, typeHint, + primRepToCgRep ) +import CoreSyn ( AltCon(..) ) +import CgProf ( emitSetCCC ) +import CgHeapery ( layOutDynConstr ) +import CgBindery ( getArgAmodes, getArgAmode, CgIdInfo, + nukeDeadBindings, addBindC, addBindsC ) +import CgCase ( cgCase, saveVolatileVarsAndRegs ) +import CgClosure ( cgRhsClosure, cgStdRhsClosure ) +import CgCon ( buildDynCon, cgReturnDataCon ) +import CgLetNoEscape ( cgLetNoEscapeClosure ) +import CgCallConv ( dataReturnConvPrim ) +import CgTailCall +import CgInfoTbls ( emitDirectReturnInstr ) +import CgForeignCall ( emitForeignCall, shimForeignCallArg ) +import CgPrimOp ( cgPrimOp ) +import CgUtils ( addIdReps, newTemp, assignTemp, cgLit, tagToClosure ) +import ClosureInfo ( mkSelectorLFInfo, mkApLFInfo ) +import Cmm ( CmmExpr(..), CmmStmt(..), CmmReg, nodeReg ) +import MachOp ( wordRep, MachHint ) +import VarSet +import Literal ( literalType ) +import PrimOp ( primOpOutOfLine, getPrimOpResultInfo, + PrimOp(..), PrimOpResultInfo(..) ) +import Id ( Id ) +import TyCon ( isUnboxedTupleTyCon, isEnumerationTyCon ) +import Type ( Type, tyConAppArgs, tyConAppTyCon, repType, + PrimRep(VoidRep) ) +import Maybes ( maybeToBool ) +import ListSetOps ( assocMaybe ) +import BasicTypes ( RecFlag(..) ) +import Util ( lengthIs ) +import Outputable +\end{code} + +This module provides the support code for @StgToAbstractC@ to deal +with STG {\em expressions}. See also @CgClosure@, which deals +with closures, and @CgCon@, which deals with constructors. + +\begin{code} +cgExpr :: StgExpr -- input + -> Code -- output +\end{code} + +%******************************************************** +%* * +%* Tail calls * +%* * +%******************************************************** + +``Applications'' mean {\em tail calls}, a service provided by module +@CgTailCall@. This includes literals, which show up as +@(STGApp (StgLitArg 42) [])@. + +\begin{code} +cgExpr (StgApp fun args) = cgTailCall fun args +\end{code} + +%******************************************************** +%* * +%* STG ConApps (for inline versions) * +%* * +%******************************************************** + +\begin{code} +cgExpr (StgConApp con args) + = do { amodes <- getArgAmodes args + ; cgReturnDataCon con amodes } +\end{code} + +Literals are similar to constructors; they return by putting +themselves in an appropriate register and returning to the address on +top of the stack. + +\begin{code} +cgExpr (StgLit lit) + = do { cmm_lit <- cgLit lit + ; performPrimReturn rep (CmmLit cmm_lit) } + where + rep = typeCgRep (literalType lit) +\end{code} + + +%******************************************************** +%* * +%* PrimOps and foreign calls. +%* * +%******************************************************** + +NOTE about "safe" foreign calls: a safe foreign call is never compiled +inline in a case expression. When we see + + case (ccall ...) of { ... } + +We generate a proper return address for the alternatives and push the +stack frame before doing the call, so that in the event that the call +re-enters the RTS the stack is in a sane state. + +\begin{code} +cgExpr (StgOpApp (StgFCallOp fcall _) stg_args res_ty) = do + {- + First, copy the args into temporaries. We're going to push + a return address right before doing the call, so the args + must be out of the way. + -} + reps_n_amodes <- getArgAmodes stg_args + let + -- Get the *non-void* args, and jiggle them with shimForeignCall + arg_exprs = [ shimForeignCallArg stg_arg expr + | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, + nonVoidArg rep] + + -- in + arg_tmps <- mapM assignTemp arg_exprs + let + arg_hints = zip arg_tmps (map (typeHint.stgArgType) stg_args) + -- in + {- + Now, allocate some result regs. + -} + (res_reps,res_regs,res_hints) <- newUnboxedTupleRegs res_ty + ccallReturnUnboxedTuple (zip res_reps (map CmmReg res_regs)) $ + emitForeignCall (zip res_regs res_hints) fcall + arg_hints emptyVarSet{-no live vars-} + +-- tagToEnum# is special: we need to pull the constructor out of the table, +-- and perform an appropriate return. + +cgExpr (StgOpApp (StgPrimOp TagToEnumOp) [arg] res_ty) + = ASSERT(isEnumerationTyCon tycon) + do { (_,amode) <- getArgAmode arg + ; amode' <- assignTemp amode -- We're going to use it twice, + -- so save in a temp if non-trivial + ; hmods <- getHomeModules + ; stmtC (CmmAssign nodeReg (tagToClosure hmods tycon amode')) + ; performReturn (emitAlgReturnCode tycon amode') } + where + -- If you're reading this code in the attempt to figure + -- out why the compiler panic'ed here, it is probably because + -- you used tagToEnum# in a non-monomorphic setting, e.g., + -- intToTg :: Enum a => Int -> a ; intToTg (I# x#) = tagToEnum# x# + -- That won't work. + tycon = tyConAppTyCon res_ty + + +cgExpr x@(StgOpApp op@(StgPrimOp primop) args res_ty) + | primOpOutOfLine primop + = tailCallPrimOp primop args + + | ReturnsPrim VoidRep <- result_info + = do cgPrimOp [] primop args emptyVarSet + performReturn emitDirectReturnInstr + + | ReturnsPrim rep <- result_info + = do cgPrimOp [dataReturnConvPrim (primRepToCgRep rep)] + primop args emptyVarSet + performReturn emitDirectReturnInstr + + | ReturnsAlg tycon <- result_info, isUnboxedTupleTyCon tycon + = do (reps, regs, _hints) <- newUnboxedTupleRegs res_ty + cgPrimOp regs primop args emptyVarSet{-no live vars-} + returnUnboxedTuple (zip reps (map CmmReg regs)) + + | ReturnsAlg tycon <- result_info, isEnumerationTyCon tycon + -- c.f. cgExpr (...TagToEnumOp...) + = do tag_reg <- newTemp wordRep + hmods <- getHomeModules + cgPrimOp [tag_reg] primop args emptyVarSet + stmtC (CmmAssign nodeReg (tagToClosure hmods tycon (CmmReg tag_reg))) + performReturn (emitAlgReturnCode tycon (CmmReg tag_reg)) + where + result_info = getPrimOpResultInfo primop +\end{code} + +%******************************************************** +%* * +%* Case expressions * +%* * +%******************************************************** +Case-expression conversion is complicated enough to have its own +module, @CgCase@. +\begin{code} + +cgExpr (StgCase expr live_vars save_vars bndr srt alt_type alts) + = cgCase expr live_vars save_vars bndr srt alt_type alts +\end{code} + + +%******************************************************** +%* * +%* Let and letrec * +%* * +%******************************************************** +\subsection[let-and-letrec-codegen]{Converting @StgLet@ and @StgLetrec@} + +\begin{code} +cgExpr (StgLet (StgNonRec name rhs) expr) + = cgRhs name rhs `thenFC` \ (name, info) -> + addBindC name info `thenC` + cgExpr expr + +cgExpr (StgLet (StgRec pairs) expr) + = fixC (\ new_bindings -> addBindsC new_bindings `thenC` + listFCs [ cgRhs b e | (b,e) <- pairs ] + ) `thenFC` \ new_bindings -> + + addBindsC new_bindings `thenC` + cgExpr expr +\end{code} + +\begin{code} +cgExpr (StgLetNoEscape live_in_whole_let live_in_rhss bindings body) + = do { -- Figure out what volatile variables to save + ; nukeDeadBindings live_in_whole_let + ; (save_assts, rhs_eob_info, maybe_cc_slot) + <- saveVolatileVarsAndRegs live_in_rhss + + -- Save those variables right now! + ; emitStmts save_assts + + -- Produce code for the rhss + -- and add suitable bindings to the environment + ; cgLetNoEscapeBindings live_in_rhss rhs_eob_info + maybe_cc_slot bindings + + -- Do the body + ; setEndOfBlockInfo rhs_eob_info (cgExpr body) } +\end{code} + + +%******************************************************** +%* * +%* SCC Expressions * +%* * +%******************************************************** + +SCC expressions are treated specially. They set the current cost +centre. + +\begin{code} +cgExpr (StgSCC cc expr) = do emitSetCCC cc; cgExpr expr +\end{code} + +%******************************************************** +%* * +%* Non-top-level bindings * +%* * +%******************************************************** +\subsection[non-top-level-bindings]{Converting non-top-level bindings} + +We rely on the support code in @CgCon@ (to do constructors) and +in @CgClosure@ (to do closures). + +\begin{code} +cgRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo) + -- the Id is passed along so a binding can be set up + +cgRhs name (StgRhsCon maybe_cc con args) + = do { amodes <- getArgAmodes args + ; idinfo <- buildDynCon name maybe_cc con amodes + ; returnFC (name, idinfo) } + +cgRhs name (StgRhsClosure cc bi fvs upd_flag srt args body) + = do hmods <- getHomeModules + mkRhsClosure hmods name cc bi srt fvs upd_flag args body +\end{code} + +mkRhsClosure looks for two special forms of the right-hand side: + a) selector thunks. + b) AP thunks + +If neither happens, it just calls mkClosureLFInfo. You might think +that mkClosureLFInfo should do all this, but it seems wrong for the +latter to look at the structure of an expression + +Selectors +~~~~~~~~~ +We look at the body of the closure to see if it's a selector---turgid, +but nothing deep. We are looking for a closure of {\em exactly} the +form: + +... = [the_fv] \ u [] -> + case the_fv of + con a_1 ... a_n -> a_i + + +\begin{code} +mkRhsClosure hmods bndr cc bi srt + [the_fv] -- Just one free var + upd_flag -- Updatable thunk + [] -- A thunk + body@(StgCase (StgApp scrutinee [{-no args-}]) + _ _ _ _ -- ignore uniq, etc. + (AlgAlt tycon) + [(DataAlt con, params, use_mask, + (StgApp selectee [{-no args-}]))]) + | the_fv == scrutinee -- Scrutinee is the only free variable + && maybeToBool maybe_offset -- Selectee is a component of the tuple + && offset_into_int <= mAX_SPEC_SELECTEE_SIZE -- Offset is small enough + = -- NOT TRUE: ASSERT(is_single_constructor) + -- The simplifier may have statically determined that the single alternative + -- is the only possible case and eliminated the others, even if there are + -- other constructors in the datatype. It's still ok to make a selector + -- thunk in this case, because we *know* which constructor the scrutinee + -- will evaluate to. + cgStdRhsClosure bndr cc bi [the_fv] [] body lf_info [StgVarArg the_fv] + where + lf_info = mkSelectorLFInfo bndr offset_into_int + (isUpdatable upd_flag) + (_, params_w_offsets) = layOutDynConstr hmods con (addIdReps params) + -- Just want the layout + maybe_offset = assocMaybe params_w_offsets selectee + Just the_offset = maybe_offset + offset_into_int = the_offset - fixedHdrSize +\end{code} + +Ap thunks +~~~~~~~~~ + +A more generic AP thunk of the form + + x = [ x_1...x_n ] \.. [] -> x_1 ... x_n + +A set of these is compiled statically into the RTS, so we just use +those. We could extend the idea to thunks where some of the x_i are +global ids (and hence not free variables), but this would entail +generating a larger thunk. It might be an option for non-optimising +compilation, though. + +We only generate an Ap thunk if all the free variables are pointers, +for semi-obvious reasons. + +\begin{code} +mkRhsClosure hmods bndr cc bi srt + fvs + upd_flag + [] -- No args; a thunk + body@(StgApp fun_id args) + + | args `lengthIs` (arity-1) + && all isFollowableArg (map idCgRep fvs) + && isUpdatable upd_flag + && arity <= mAX_SPEC_AP_SIZE + + -- Ha! an Ap thunk + = cgStdRhsClosure bndr cc bi fvs [] body lf_info payload + + where + lf_info = mkApLFInfo bndr upd_flag arity + -- the payload has to be in the correct order, hence we can't + -- just use the fvs. + payload = StgVarArg fun_id : args + arity = length fvs +\end{code} + +The default case +~~~~~~~~~~~~~~~~ +\begin{code} +mkRhsClosure hmods bndr cc bi srt fvs upd_flag args body + = cgRhsClosure bndr cc bi srt fvs upd_flag args body +\end{code} + + +%******************************************************** +%* * +%* Let-no-escape bindings +%* * +%******************************************************** +\begin{code} +cgLetNoEscapeBindings live_in_rhss rhs_eob_info maybe_cc_slot + (StgNonRec binder rhs) + = do { (binder,info) <- cgLetNoEscapeRhs live_in_rhss rhs_eob_info + maybe_cc_slot + NonRecursive binder rhs + ; addBindC binder info } + +cgLetNoEscapeBindings live_in_rhss rhs_eob_info maybe_cc_slot (StgRec pairs) + = do { new_bindings <- fixC (\ new_bindings -> do + { addBindsC new_bindings + ; listFCs [ cgLetNoEscapeRhs full_live_in_rhss + rhs_eob_info maybe_cc_slot Recursive b e + | (b,e) <- pairs ] }) + + ; addBindsC new_bindings } + where + -- We add the binders to the live-in-rhss set so that we don't + -- delete the bindings for the binder from the environment! + full_live_in_rhss = live_in_rhss `unionVarSet` (mkVarSet [b | (b,r) <- pairs]) + +cgLetNoEscapeRhs + :: StgLiveVars -- Live in rhss + -> EndOfBlockInfo + -> Maybe VirtualSpOffset + -> RecFlag + -> Id + -> StgRhs + -> FCode (Id, CgIdInfo) + +cgLetNoEscapeRhs full_live_in_rhss rhs_eob_info maybe_cc_slot rec binder + (StgRhsClosure cc bi _ upd_flag srt args body) + = -- We could check the update flag, but currently we don't switch it off + -- for let-no-escaped things, so we omit the check too! + -- case upd_flag of + -- Updatable -> panic "cgLetNoEscapeRhs" -- Nothing to update! + -- other -> cgLetNoEscapeClosure binder cc bi live_in_whole_let live_in_rhss args body + cgLetNoEscapeClosure binder cc bi srt full_live_in_rhss rhs_eob_info + maybe_cc_slot rec args body + +-- For a constructor RHS we want to generate a single chunk of code which +-- can be jumped to from many places, which will return the constructor. +-- It's easy; just behave as if it was an StgRhsClosure with a ConApp inside! +cgLetNoEscapeRhs full_live_in_rhss rhs_eob_info maybe_cc_slot rec binder + (StgRhsCon cc con args) + = cgLetNoEscapeClosure binder cc noBinderInfo{-safe-} NoSRT + full_live_in_rhss rhs_eob_info maybe_cc_slot rec + [] --No args; the binder is data structure, not a function + (StgConApp con args) +\end{code} + +Little helper for primitives that return unboxed tuples. + +\begin{code} +newUnboxedTupleRegs :: Type -> FCode ([CgRep], [CmmReg], [MachHint]) +newUnboxedTupleRegs res_ty = + let + ty_args = tyConAppArgs (repType res_ty) + (reps,hints) = unzip [ (rep, typeHint ty) | ty <- ty_args, + let rep = typeCgRep ty, + nonVoidArg rep ] + in do + regs <- mapM (newTemp . argMachRep) reps + return (reps,regs,hints) +\end{code} diff --git a/compiler/codeGen/CgExpr.lhs-boot b/compiler/codeGen/CgExpr.lhs-boot new file mode 100644 index 0000000000..29cdc3a605 --- /dev/null +++ b/compiler/codeGen/CgExpr.lhs-boot @@ -0,0 +1,7 @@ +\begin{code} +module CgExpr where +import StgSyn( StgExpr ) +import CgMonad( Code ) + +cgExpr :: StgExpr -> Code +\end{code} diff --git a/compiler/codeGen/CgForeignCall.hs b/compiler/codeGen/CgForeignCall.hs new file mode 100644 index 0000000000..10f41bdf8b --- /dev/null +++ b/compiler/codeGen/CgForeignCall.hs @@ -0,0 +1,256 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for foreign calls. +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgForeignCall ( + cgForeignCall, + emitForeignCall, + emitForeignCall', + shimForeignCallArg, + emitSaveThreadState, -- will be needed by the Cmm parser + emitLoadThreadState, -- ditto + emitCloseNursery, + emitOpenNursery, + ) where + +#include "HsVersions.h" + +import StgSyn ( StgLiveVars, StgArg, stgArgType ) +import CgProf ( curCCS, curCCSAddr ) +import CgBindery ( getVolatileRegs, getArgAmodes ) +import CgMonad +import CgUtils ( cmmOffsetW, cmmOffsetB, cmmLoadIndexW, newTemp, + assignTemp ) +import Type ( tyConAppTyCon, repType ) +import TysPrim +import CLabel ( mkForeignLabel, mkRtsCodeLabel ) +import Cmm +import CmmUtils +import MachOp +import SMRep +import ForeignCall +import Constants +import StaticFlags ( opt_SccProfilingOn ) +import Outputable + +import Monad ( when ) + +-- ----------------------------------------------------------------------------- +-- Code generation for Foreign Calls + +cgForeignCall + :: [(CmmReg,MachHint)] -- where to put the results + -> ForeignCall -- the op + -> [StgArg] -- arguments + -> StgLiveVars -- live vars, in case we need to save them + -> Code +cgForeignCall results fcall stg_args live + = do + reps_n_amodes <- getArgAmodes stg_args + let + -- Get the *non-void* args, and jiggle them with shimForeignCall + arg_exprs = [ shimForeignCallArg stg_arg expr + | (stg_arg, (rep,expr)) <- stg_args `zip` reps_n_amodes, + nonVoidArg rep] + + arg_hints = zip arg_exprs (map (typeHint.stgArgType) stg_args) + -- in + emitForeignCall results fcall arg_hints live + + +emitForeignCall + :: [(CmmReg,MachHint)] -- where to put the results + -> ForeignCall -- the op + -> [(CmmExpr,MachHint)] -- arguments + -> StgLiveVars -- live vars, in case we need to save them + -> Code + +emitForeignCall results (CCall (CCallSpec target cconv safety)) args live + = do vols <- getVolatileRegs live + emitForeignCall' safety results + (CmmForeignCall cmm_target cconv) call_args (Just vols) + where + (call_args, cmm_target) + = case target of + StaticTarget lbl -> (args, CmmLit (CmmLabel + (mkForeignLabel lbl call_size False))) + DynamicTarget -> case args of (fn,_):rest -> (rest, fn) + + -- in the stdcall calling convention, the symbol needs @size appended + -- to it, where size is the total number of bytes of arguments. We + -- attach this info to the CLabel here, and the CLabel pretty printer + -- will generate the suffix when the label is printed. + call_size + | StdCallConv <- cconv = Just (sum (map (arg_size.cmmExprRep.fst) args)) + | otherwise = Nothing + + -- ToDo: this might not be correct for 64-bit API + arg_size rep = max (machRepByteWidth rep) wORD_SIZE + +emitForeignCall results (DNCall _) args live + = panic "emitForeignCall: DNCall" + + +-- alternative entry point, used by CmmParse +emitForeignCall' + :: Safety + -> [(CmmReg,MachHint)] -- where to put the results + -> CmmCallTarget -- the op + -> [(CmmExpr,MachHint)] -- arguments + -> Maybe [GlobalReg] -- live vars, in case we need to save them + -> Code +emitForeignCall' safety results target args vols + | not (playSafe safety) = do + temp_args <- load_args_into_temps args + stmtC (CmmCall target results temp_args vols) + + | otherwise = do + id <- newTemp wordRep + temp_args <- load_args_into_temps args + emitSaveThreadState + stmtC (CmmCall (CmmForeignCall suspendThread CCallConv) + [(id,PtrHint)] + [ (CmmReg (CmmGlobal BaseReg), PtrHint) ] + vols + ) + stmtC (CmmCall target results temp_args vols) + stmtC (CmmCall (CmmForeignCall resumeThread CCallConv) + [ (CmmGlobal BaseReg, PtrHint) ] + -- Assign the result to BaseReg: we + -- might now have a different + -- Capability! + [ (CmmReg id, PtrHint) ] + vols + ) + emitLoadThreadState + + +suspendThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("suspendThread"))) +resumeThread = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("resumeThread"))) + + +-- we might need to load arguments into temporaries before +-- making the call, because certain global registers might +-- overlap with registers that the C calling convention uses +-- for passing arguments. +-- +-- This is a HACK; really it should be done in the back end, but +-- it's easier to generate the temporaries here. +load_args_into_temps args = mapM maybe_assignTemp args + +maybe_assignTemp (e, hint) + | hasNoGlobalRegs e = return (e, hint) + | otherwise = do + -- don't use assignTemp, it uses its own notion of "trivial" + -- expressions, which are wrong here + reg <- newTemp (cmmExprRep e) + stmtC (CmmAssign reg e) + return (CmmReg reg, hint) + +-- ----------------------------------------------------------------------------- +-- Save/restore the thread state in the TSO + +-- This stuff can't be done in suspendThread/resumeThread, because it +-- refers to global registers which aren't available in the C world. + +emitSaveThreadState = do + -- CurrentTSO->sp = Sp; + stmtC $ CmmStore (cmmOffset stgCurrentTSO tso_SP) stgSp + emitCloseNursery + -- and save the current cost centre stack in the TSO when profiling: + when opt_SccProfilingOn $ + stmtC (CmmStore (cmmOffset stgCurrentTSO tso_CCCS) curCCS) + + -- CurrentNursery->free = Hp+1; +emitCloseNursery = stmtC $ CmmStore nursery_bdescr_free (cmmOffsetW stgHp 1) + +emitLoadThreadState = do + tso <- newTemp wordRep + stmtsC [ + -- tso = CurrentTSO; + CmmAssign tso stgCurrentTSO, + -- Sp = tso->sp; + CmmAssign sp (CmmLoad (cmmOffset (CmmReg tso) tso_SP) + wordRep), + -- SpLim = tso->stack + RESERVED_STACK_WORDS; + CmmAssign spLim (cmmOffsetW (cmmOffset (CmmReg tso) tso_STACK) + rESERVED_STACK_WORDS) + ] + emitOpenNursery + -- and load the current cost centre stack from the TSO when profiling: + when opt_SccProfilingOn $ + stmtC (CmmStore curCCSAddr + (CmmLoad (cmmOffset (CmmReg tso) tso_CCCS) wordRep)) + +emitOpenNursery = stmtsC [ + -- Hp = CurrentNursery->free - 1; + CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free wordRep) (-1)), + + -- HpLim = CurrentNursery->start + + -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; + CmmAssign hpLim + (cmmOffsetExpr + (CmmLoad nursery_bdescr_start wordRep) + (cmmOffset + (CmmMachOp mo_wordMul [ + CmmMachOp (MO_S_Conv I32 wordRep) + [CmmLoad nursery_bdescr_blocks I32], + CmmLit (mkIntCLit bLOCK_SIZE) + ]) + (-1) + ) + ) + ] + + +nursery_bdescr_free = cmmOffset stgCurrentNursery oFFSET_bdescr_free +nursery_bdescr_start = cmmOffset stgCurrentNursery oFFSET_bdescr_start +nursery_bdescr_blocks = cmmOffset stgCurrentNursery oFFSET_bdescr_blocks + +tso_SP = tsoFieldB oFFSET_StgTSO_sp +tso_STACK = tsoFieldB oFFSET_StgTSO_stack +tso_CCCS = tsoProfFieldB oFFSET_StgTSO_CCCS + +-- The TSO struct has a variable header, and an optional StgTSOProfInfo in +-- the middle. The fields we're interested in are after the StgTSOProfInfo. +tsoFieldB :: ByteOff -> ByteOff +tsoFieldB off + | opt_SccProfilingOn = off + sIZEOF_StgTSOProfInfo + fixedHdrSize * wORD_SIZE + | otherwise = off + fixedHdrSize * wORD_SIZE + +tsoProfFieldB :: ByteOff -> ByteOff +tsoProfFieldB off = off + fixedHdrSize * wORD_SIZE + +stgSp = CmmReg sp +stgHp = CmmReg hp +stgCurrentTSO = CmmReg currentTSO +stgCurrentNursery = CmmReg currentNursery + +sp = CmmGlobal Sp +spLim = CmmGlobal SpLim +hp = CmmGlobal Hp +hpLim = CmmGlobal HpLim +currentTSO = CmmGlobal CurrentTSO +currentNursery = CmmGlobal CurrentNursery + +-- ----------------------------------------------------------------------------- +-- For certain types passed to foreign calls, we adjust the actual +-- value passed to the call. For ByteArray#/Array# we pass the +-- address of the actual array, not the address of the heap object. + +shimForeignCallArg :: StgArg -> CmmExpr -> CmmExpr +shimForeignCallArg arg expr + | tycon == arrayPrimTyCon || tycon == mutableArrayPrimTyCon + = cmmOffsetB expr arrPtrsHdrSize + + | tycon == byteArrayPrimTyCon || tycon == mutableByteArrayPrimTyCon + = cmmOffsetB expr arrWordsHdrSize + + | otherwise = expr + where + -- should be a tycon app, since this is a foreign call + tycon = tyConAppTyCon (repType (stgArgType arg)) diff --git a/compiler/codeGen/CgHeapery.lhs b/compiler/codeGen/CgHeapery.lhs new file mode 100644 index 0000000000..184af904df --- /dev/null +++ b/compiler/codeGen/CgHeapery.lhs @@ -0,0 +1,588 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgHeapery.lhs,v 1.47 2005/06/21 10:44:41 simonmar Exp $ +% +\section[CgHeapery]{Heap management functions} + +\begin{code} +module CgHeapery ( + initHeapUsage, getVirtHp, setVirtHp, setRealHp, + getHpRelOffset, hpRel, + + funEntryChecks, thunkEntryChecks, + altHeapCheck, unbxTupleHeapCheck, + hpChkGen, hpChkNodePointsAssignSp0, + stkChkGen, stkChkNodePoints, + + layOutDynConstr, layOutStaticConstr, + mkVirtHeapOffsets, mkStaticClosureFields, mkStaticClosure, + + allocDynClosure, emitSetDynHdr + ) where + +#include "HsVersions.h" + +import StgSyn ( AltType(..) ) +import CLabel ( CLabel, mkRtsCodeLabel ) +import CgUtils ( mkWordCLit, cmmRegOffW, cmmOffsetW, + cmmOffsetExprB ) +import CgMonad +import CgProf ( staticProfHdr, profDynAlloc, dynProfHdr ) +import CgTicky ( staticTickyHdr, tickyDynAlloc, tickyAllocHeap ) +import CgParallel ( staticGranHdr, staticParHdr, doGranAllocate ) +import CgStackery ( getFinalStackHW, getRealSp ) +import CgCallConv ( mkRegLiveness ) +import ClosureInfo ( closureSize, staticClosureNeedsLink, + mkConInfo, closureNeedsUpdSpace, + infoTableLabelFromCI, closureLabelFromCI, + nodeMustPointToIt, closureLFInfo, + ClosureInfo ) +import SMRep ( CgRep(..), cgRepSizeW, separateByPtrFollowness, + WordOff, fixedHdrSize, thunkHdrSize, + isVoidArg, primRepToCgRep ) + +import Cmm ( CmmLit(..), CmmStmt(..), CmmExpr(..), GlobalReg(..), + CmmReg(..), hpReg, nodeReg, spReg ) +import MachOp ( mo_wordULt, mo_wordUGt, mo_wordSub ) +import CmmUtils ( mkIntCLit, CmmStmts, noStmts, oneStmt, plusStmts, + mkStmts ) +import Id ( Id ) +import DataCon ( DataCon ) +import TyCon ( tyConPrimRep ) +import CostCentre ( CostCentreStack ) +import Util ( mapAccumL, filterOut ) +import Constants ( wORD_SIZE ) +import Packages ( HomeModules ) +import Outputable + +\end{code} + + +%************************************************************************ +%* * +\subsection[CgUsages-heapery]{Monad things for fiddling with heap usage} +%* * +%************************************************************************ + +The heap always grows upwards, so hpRel is easy + +\begin{code} +hpRel :: VirtualHpOffset -- virtual offset of Hp + -> VirtualHpOffset -- virtual offset of The Thing + -> WordOff -- integer word offset +hpRel hp off = off - hp +\end{code} + +@initHeapUsage@ applies a function to the amount of heap that it uses. +It initialises the heap usage to zeros, and passes on an unchanged +heap usage. + +It is usually a prelude to performing a GC check, so everything must +be in a tidy and consistent state. + +rje: Note the slightly suble fixed point behaviour needed here + +\begin{code} +initHeapUsage :: (VirtualHpOffset -> Code) -> Code +initHeapUsage fcode + = do { orig_hp_usage <- getHpUsage + ; setHpUsage initHpUsage + ; fixC (\heap_usage2 -> do + { fcode (heapHWM heap_usage2) + ; getHpUsage }) + ; setHpUsage orig_hp_usage } + +setVirtHp :: VirtualHpOffset -> Code +setVirtHp new_virtHp + = do { hp_usage <- getHpUsage + ; setHpUsage (hp_usage {virtHp = new_virtHp}) } + +getVirtHp :: FCode VirtualHpOffset +getVirtHp + = do { hp_usage <- getHpUsage + ; return (virtHp hp_usage) } + +setRealHp :: VirtualHpOffset -> Code +setRealHp new_realHp + = do { hp_usage <- getHpUsage + ; setHpUsage (hp_usage {realHp = new_realHp}) } + +getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr +getHpRelOffset virtual_offset + = do { hp_usg <- getHpUsage + ; return (cmmRegOffW hpReg (hpRel (realHp hp_usg) virtual_offset)) } +\end{code} + + +%************************************************************************ +%* * + Layout of heap objects +%* * +%************************************************************************ + +\begin{code} +layOutDynConstr, layOutStaticConstr + :: HomeModules + -> DataCon + -> [(CgRep,a)] + -> (ClosureInfo, + [(a,VirtualHpOffset)]) + +layOutDynConstr = layOutConstr False +layOutStaticConstr = layOutConstr True + +layOutConstr is_static hmods data_con args + = (mkConInfo hmods is_static data_con tot_wds ptr_wds, + things_w_offsets) + where + (tot_wds, -- #ptr_wds + #nonptr_wds + ptr_wds, -- #ptr_wds + things_w_offsets) = mkVirtHeapOffsets False{-not a thunk-} args +\end{code} + +@mkVirtHeapOffsets@ always returns boxed things with smaller offsets +than the unboxed things, and furthermore, the offsets in the result +list + +\begin{code} +mkVirtHeapOffsets + :: Bool -- True <=> is a thunk + -> [(CgRep,a)] -- Things to make offsets for + -> (WordOff, -- _Total_ number of words allocated + WordOff, -- Number of words allocated for *pointers* + [(a, VirtualHpOffset)]) + -- Things with their offsets from start of + -- object in order of increasing offset + +-- First in list gets lowest offset, which is initial offset + 1. + +mkVirtHeapOffsets is_thunk things + = let non_void_things = filterOut (isVoidArg . fst) things + (ptrs, non_ptrs) = separateByPtrFollowness non_void_things + (wds_of_ptrs, ptrs_w_offsets) = mapAccumL computeOffset 0 ptrs + (tot_wds, non_ptrs_w_offsets) = mapAccumL computeOffset wds_of_ptrs non_ptrs + in + (tot_wds, wds_of_ptrs, ptrs_w_offsets ++ non_ptrs_w_offsets) + where + hdr_size | is_thunk = thunkHdrSize + | otherwise = fixedHdrSize + + computeOffset wds_so_far (rep, thing) + = (wds_so_far + cgRepSizeW rep, (thing, hdr_size + wds_so_far)) +\end{code} + + +%************************************************************************ +%* * + Lay out a static closure +%* * +%************************************************************************ + +Make a static closure, adding on any extra padding needed for CAFs, +and adding a static link field if necessary. + +\begin{code} +mkStaticClosureFields + :: ClosureInfo + -> CostCentreStack + -> Bool -- Has CAF refs + -> [CmmLit] -- Payload + -> [CmmLit] -- The full closure +mkStaticClosureFields cl_info ccs caf_refs payload + = mkStaticClosure info_lbl ccs payload padding_wds + static_link_field saved_info_field + where + info_lbl = infoTableLabelFromCI cl_info + + -- CAFs must have consistent layout, regardless of whether they + -- are actually updatable or not. The layout of a CAF is: + -- + -- 3 saved_info + -- 2 static_link + -- 1 indirectee + -- 0 info ptr + -- + -- the static_link and saved_info fields must always be in the same + -- place. So we use closureNeedsUpdSpace rather than + -- closureUpdReqd here: + + is_caf = closureNeedsUpdSpace cl_info + + padding_wds + | not is_caf = [] + | otherwise = ASSERT(null payload) [mkIntCLit 0] + + static_link_field + | is_caf || staticClosureNeedsLink cl_info = [static_link_value] + | otherwise = [] + + saved_info_field + | is_caf = [mkIntCLit 0] + | otherwise = [] + + -- for a static constructor which has NoCafRefs, we set the + -- static link field to a non-zero value so the garbage + -- collector will ignore it. + static_link_value + | caf_refs = mkIntCLit 0 + | otherwise = mkIntCLit 1 + + +mkStaticClosure :: CLabel -> CostCentreStack -> [CmmLit] + -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] +mkStaticClosure info_lbl ccs payload padding_wds static_link_field saved_info_field + = [CmmLabel info_lbl] + ++ variable_header_words + ++ payload + ++ padding_wds + ++ static_link_field + ++ saved_info_field + where + variable_header_words + = staticGranHdr + ++ staticParHdr + ++ staticProfHdr ccs + ++ staticTickyHdr +\end{code} + +%************************************************************************ +%* * +\subsection[CgHeapery-heap-overflow]{Heap overflow checking} +%* * +%************************************************************************ + +The new code for heapChecks. For GrAnSim the code for doing a heap check +and doing a context switch has been separated. Especially, the HEAP_CHK +macro only performs a heap check. THREAD_CONTEXT_SWITCH should be used for +doing a context switch. GRAN_FETCH_AND_RESCHEDULE must be put at the +beginning of every slow entry code in order to simulate the fetching of +closures. If fetching is necessary (i.e. current closure is not local) then +an automatic context switch is done. + +-------------------------------------------------------------- +A heap/stack check at a function or thunk entry point. + +\begin{code} +funEntryChecks :: ClosureInfo -> CmmStmts -> Code -> Code +funEntryChecks cl_info reg_save_code code + = hpStkCheck cl_info True reg_save_code code + +thunkEntryChecks :: ClosureInfo -> Code -> Code +thunkEntryChecks cl_info code + = hpStkCheck cl_info False noStmts code + +hpStkCheck :: ClosureInfo -- Function closure + -> Bool -- Is a function? (not a thunk) + -> CmmStmts -- Register saves + -> Code + -> Code + +hpStkCheck cl_info is_fun reg_save_code code + = getFinalStackHW $ \ spHw -> do + { sp <- getRealSp + ; let stk_words = spHw - sp + ; initHeapUsage $ \ hpHw -> do + { -- Emit heap checks, but be sure to do it lazily so + -- that the conditionals on hpHw don't cause a black hole + codeOnly $ do + { do_checks stk_words hpHw full_save_code rts_label + ; tickyAllocHeap hpHw } + ; setRealHp hpHw + ; code } + } + where + node_asst + | nodeMustPointToIt (closureLFInfo cl_info) + = noStmts + | otherwise + = oneStmt (CmmAssign nodeReg (CmmLit (CmmLabel closure_lbl))) + closure_lbl = closureLabelFromCI cl_info + + full_save_code = node_asst `plusStmts` reg_save_code + + rts_label | is_fun = CmmReg (CmmGlobal GCFun) + -- Function entry point + | otherwise = CmmReg (CmmGlobal GCEnter1) + -- Thunk or case return + -- In the thunk/case-return case, R1 points to a closure + -- which should be (re)-entered after GC +\end{code} + +Heap checks in a case alternative are nice and easy, provided this is +a bog-standard algebraic case. We have in our hand: + + * one return address, on the stack, + * one return value, in Node. + +the canned code for this heap check failure just pushes Node on the +stack, saying 'EnterGHC' to return. The scheduler will return by +entering the top value on the stack, which in turn will return through +the return address, getting us back to where we were. This is +therefore only valid if the return value is *lifted* (just being +boxed isn't good enough). + +For primitive returns, we have an unlifted value in some register +(either R1 or FloatReg1 or DblReg1). This means using specialised +heap-check code for these cases. + +\begin{code} +altHeapCheck + :: AltType -- PolyAlt, PrimAlt, AlgAlt, but *not* UbxTupAlt + -- (Unboxed tuples are dealt with by ubxTupleHeapCheck) + -> Code -- Continuation + -> Code +altHeapCheck alt_type code + = initHeapUsage $ \ hpHw -> do + { codeOnly $ do + { do_checks 0 {- no stack chk -} hpHw + noStmts {- nothign to save -} + (rts_label alt_type) + ; tickyAllocHeap hpHw } + ; setRealHp hpHw + ; code } + where + rts_label PolyAlt = CmmLit (CmmLabel (mkRtsCodeLabel SLIT( "stg_gc_unpt_r1"))) + -- Do *not* enter R1 after a heap check in + -- a polymorphic case. It might be a function + -- and the entry code for a function (currently) + -- applies it + -- + -- However R1 is guaranteed to be a pointer + + rts_label (AlgAlt tc) = stg_gc_enter1 + -- Enter R1 after the heap check; it's a pointer + + rts_label (PrimAlt tc) + = CmmLit $ CmmLabel $ + case primRepToCgRep (tyConPrimRep tc) of + VoidArg -> mkRtsCodeLabel SLIT( "stg_gc_noregs") + FloatArg -> mkRtsCodeLabel SLIT( "stg_gc_f1") + DoubleArg -> mkRtsCodeLabel SLIT( "stg_gc_d1") + LongArg -> mkRtsCodeLabel SLIT( "stg_gc_l1") + -- R1 is boxed but unlifted: + PtrArg -> mkRtsCodeLabel SLIT( "stg_gc_unpt_r1") + -- R1 is unboxed: + NonPtrArg -> mkRtsCodeLabel SLIT( "stg_gc_unbx_r1") + + rts_label (UbxTupAlt _) = panic "altHeapCheck" +\end{code} + + +Unboxed tuple alternatives and let-no-escapes (the two most annoying +constructs to generate code for!) For unboxed tuple returns, there +are an arbitrary number of possibly unboxed return values, some of +which will be in registers, and the others will be on the stack. We +always organise the stack-resident fields into pointers & +non-pointers, and pass the number of each to the heap check code. + +\begin{code} +unbxTupleHeapCheck + :: [(Id, GlobalReg)] -- Live registers + -> WordOff -- no. of stack slots containing ptrs + -> WordOff -- no. of stack slots containing nonptrs + -> CmmStmts -- code to insert in the failure path + -> Code + -> Code + +unbxTupleHeapCheck regs ptrs nptrs fail_code code + -- We can't manage more than 255 pointers/non-pointers + -- in a generic heap check. + | ptrs > 255 || nptrs > 255 = panic "altHeapCheck" + | otherwise + = initHeapUsage $ \ hpHw -> do + { codeOnly $ do { do_checks 0 {- no stack check -} hpHw + full_fail_code rts_label + ; tickyAllocHeap hpHw } + ; setRealHp hpHw + ; code } + where + full_fail_code = fail_code `plusStmts` oneStmt assign_liveness + assign_liveness = CmmAssign (CmmGlobal (VanillaReg 9)) -- Ho ho ho! + (CmmLit (mkWordCLit liveness)) + liveness = mkRegLiveness regs ptrs nptrs + rts_label = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("stg_gc_ut"))) + +\end{code} + + +%************************************************************************ +%* * + Heap/Stack Checks. +%* * +%************************************************************************ + +When failing a check, we save a return address on the stack and +jump to a pre-compiled code fragment that saves the live registers +and returns to the scheduler. + +The return address in most cases will be the beginning of the basic +block in which the check resides, since we need to perform the check +again on re-entry because someone else might have stolen the resource +in the meantime. + +\begin{code} +do_checks :: WordOff -- Stack headroom + -> WordOff -- Heap headroom + -> CmmStmts -- Assignments to perform on failure + -> CmmExpr -- Rts address to jump to on failure + -> Code +do_checks 0 0 _ _ = nopC +do_checks stk hp reg_save_code rts_lbl + = do_checks' (CmmLit (mkIntCLit (stk*wORD_SIZE))) + (CmmLit (mkIntCLit (hp*wORD_SIZE))) + (stk /= 0) (hp /= 0) reg_save_code rts_lbl + +-- The offsets are now in *bytes* +do_checks' stk_expr hp_expr stk_nonzero hp_nonzero reg_save_code rts_lbl + = do { doGranAllocate hp_expr + + -- Emit a block for the heap-check-failure code + ; blk_id <- forkLabelledCode $ do + { whenC hp_nonzero $ + stmtC (CmmAssign (CmmGlobal HpAlloc) hp_expr) + ; emitStmts reg_save_code + ; stmtC (CmmJump rts_lbl []) } + + -- Check for stack overflow *FIRST*; otherwise + -- we might bumping Hp and then failing stack oflo + ; whenC stk_nonzero + (stmtC (CmmCondBranch stk_oflo blk_id)) + + ; whenC hp_nonzero + (stmtsC [CmmAssign hpReg + (cmmOffsetExprB (CmmReg hpReg) hp_expr), + CmmCondBranch hp_oflo blk_id]) + -- Bump heap pointer, and test for heap exhaustion + -- Note that we don't move the heap pointer unless the + -- stack check succeeds. Otherwise we might end up + -- with slop at the end of the current block, which can + -- confuse the LDV profiler. + } + where + -- Stk overflow if (Sp - stk_bytes < SpLim) + stk_oflo = CmmMachOp mo_wordULt + [CmmMachOp mo_wordSub [CmmReg spReg, stk_expr], + CmmReg (CmmGlobal SpLim)] + + -- Hp overflow if (Hpp > HpLim) + -- (Hp has been incremented by now) + -- HpLim points to the LAST WORD of valid allocation space. + hp_oflo = CmmMachOp mo_wordUGt + [CmmReg hpReg, CmmReg (CmmGlobal HpLim)] +\end{code} + +%************************************************************************ +%* * + Generic Heap/Stack Checks - used in the RTS +%* * +%************************************************************************ + +\begin{code} +hpChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code +hpChkGen bytes liveness reentry + = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assigns stg_gc_gen + where + assigns = mkStmts [ + CmmAssign (CmmGlobal (VanillaReg 9)) liveness, + CmmAssign (CmmGlobal (VanillaReg 10)) reentry + ] + +-- a heap check where R1 points to the closure to enter on return, and +-- we want to assign to Sp[0] on failure (used in AutoApply.cmm:BUILD_PAP). +hpChkNodePointsAssignSp0 :: CmmExpr -> CmmExpr -> Code +hpChkNodePointsAssignSp0 bytes sp0 + = do_checks' (CmmLit (mkIntCLit 0)) bytes False True assign stg_gc_enter1 + where assign = oneStmt (CmmStore (CmmReg spReg) sp0) + +stkChkGen :: CmmExpr -> CmmExpr -> CmmExpr -> Code +stkChkGen bytes liveness reentry + = do_checks' bytes (CmmLit (mkIntCLit 0)) True False assigns stg_gc_gen + where + assigns = mkStmts [ + CmmAssign (CmmGlobal (VanillaReg 9)) liveness, + CmmAssign (CmmGlobal (VanillaReg 10)) reentry + ] + +stkChkNodePoints :: CmmExpr -> Code +stkChkNodePoints bytes + = do_checks' bytes (CmmLit (mkIntCLit 0)) True False noStmts stg_gc_enter1 + +stg_gc_gen = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("stg_gc_gen"))) +stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1) +\end{code} + +%************************************************************************ +%* * +\subsection[initClosure]{Initialise a dynamic closure} +%* * +%************************************************************************ + +@allocDynClosure@ puts the thing in the heap, and modifies the virtual Hp +to account for this. + +\begin{code} +allocDynClosure + :: ClosureInfo + -> CmmExpr -- Cost Centre to stick in the object + -> CmmExpr -- Cost Centre to blame for this alloc + -- (usually the same; sometimes "OVERHEAD") + + -> [(CmmExpr, VirtualHpOffset)] -- Offsets from start of the object + -- ie Info ptr has offset zero. + -> FCode VirtualHpOffset -- Returns virt offset of object + +allocDynClosure cl_info use_cc blame_cc amodes_with_offsets + = do { virt_hp <- getVirtHp + + -- FIND THE OFFSET OF THE INFO-PTR WORD + ; let info_offset = virt_hp + 1 + -- info_offset is the VirtualHpOffset of the first + -- word of the new object + -- Remember, virtHp points to last allocated word, + -- ie 1 *before* the info-ptr word of new object. + + info_ptr = CmmLit (CmmLabel (infoTableLabelFromCI cl_info)) + hdr_w_offsets = initDynHdr info_ptr use_cc `zip` [0..] + + -- SAY WHAT WE ARE ABOUT TO DO + ; profDynAlloc cl_info use_cc + -- ToDo: This is almost certainly wrong + -- We're ignoring blame_cc. But until we've + -- fixed the boxing hack in chooseDynCostCentres etc, + -- we're worried about making things worse by "fixing" + -- this part to use blame_cc! + + ; tickyDynAlloc cl_info + + -- ALLOCATE THE OBJECT + ; base <- getHpRelOffset info_offset + ; hpStore base (hdr_w_offsets ++ amodes_with_offsets) + + -- BUMP THE VIRTUAL HEAP POINTER + ; setVirtHp (virt_hp + closureSize cl_info) + + -- RETURN PTR TO START OF OBJECT + ; returnFC info_offset } + + +initDynHdr :: CmmExpr + -> CmmExpr -- Cost centre to put in object + -> [CmmExpr] +initDynHdr info_ptr cc + = [info_ptr] + -- ToDo: Gransim stuff + -- ToDo: Parallel stuff + ++ dynProfHdr cc + -- No ticky header + +hpStore :: CmmExpr -> [(CmmExpr, VirtualHpOffset)] -> Code +-- Store the item (expr,off) in base[off] +hpStore base es + = stmtsC [ CmmStore (cmmOffsetW base off) val + | (val, off) <- es ] + +emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> Code +emitSetDynHdr base info_ptr ccs + = hpStore base (zip (initDynHdr info_ptr ccs) [0..]) +\end{code} diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs new file mode 100644 index 0000000000..b769950d87 --- /dev/null +++ b/compiler/codeGen/CgInfoTbls.hs @@ -0,0 +1,591 @@ +----------------------------------------------------------------------------- +-- +-- Building info tables. +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgInfoTbls ( + emitClosureCodeAndInfoTable, + emitInfoTableAndCode, + dataConTagZ, + getSRTInfo, + emitDirectReturnTarget, emitAlgReturnTarget, + emitDirectReturnInstr, emitVectoredReturnInstr, + mkRetInfoTable, + mkStdInfoTable, + stdInfoTableSizeB, + mkFunGenInfoExtraBits, + entryCode, closureInfoPtr, + getConstrTag, + infoTable, infoTableClosureType, + infoTablePtrs, infoTableNonPtrs, + funInfoTable, + retVec + ) where + + +#include "HsVersions.h" + +import ClosureInfo ( ClosureInfo, closureTypeDescr, closureName, + infoTableLabelFromCI, Liveness, + closureValDescr, closureSRT, closureSMRep, + closurePtrsSize, closureNonHdrSize, closureFunInfo, + C_SRT(..), needsSRT, isConstrClosure_maybe, + ArgDescr(..) ) +import SMRep ( StgHalfWord, hALF_WORD_SIZE_IN_BITS, hALF_WORD_SIZE, + WordOff, ByteOff, + smRepClosureTypeInt, tablesNextToCode, + rET_BIG, rET_SMALL, rET_VEC_BIG, rET_VEC_SMALL ) +import CgBindery ( getLiveStackSlots ) +import CgCallConv ( isBigLiveness, mkLivenessCLit, buildContLiveness, + argDescrType, getSequelAmode, + CtrlReturnConvention(..) ) +import CgUtils ( mkStringCLit, packHalfWordsCLit, mkWordCLit, + cmmOffsetB, cmmOffsetExprW, cmmLabelOffW, cmmOffsetW, + emitDataLits, emitRODataLits, emitSwitch, cmmNegate, + newTemp ) +import CgMonad + +import CmmUtils ( mkIntCLit, zeroCLit ) +import Cmm ( CmmStmt(..), CmmExpr(..), CmmLit(..), LocalReg, + CmmBasicBlock, nodeReg ) +import MachOp ( MachOp(..), wordRep, halfWordRep ) +import CLabel +import StgSyn ( SRT(..) ) +import Name ( Name ) +import DataCon ( DataCon, dataConTag, fIRST_TAG ) +import Unique ( Uniquable(..) ) +import DynFlags ( DynFlags(..), HscTarget(..) ) +import StaticFlags ( opt_SccProfilingOn ) +import ListSetOps ( assocDefault ) +import Maybes ( isJust ) +import Constants ( wORD_SIZE, sIZEOF_StgFunInfoExtraRev ) +import Outputable + + +------------------------------------------------------------------------- +-- +-- Generating the info table and code for a closure +-- +------------------------------------------------------------------------- + +-- Here we make a concrete info table, represented as a list of CmmAddr +-- (it can't be simply a list of Word, because the SRT field is +-- represented by a label+offset expression). + +-- With tablesNextToCode, the layout is +-- <reversed variable part> +-- <normal forward StgInfoTable, but without +-- an entry point at the front> +-- <code> +-- +-- Without tablesNextToCode, the layout of an info table is +-- <entry label> +-- <normal forward rest of StgInfoTable> +-- <forward variable part> +-- +-- See includes/InfoTables.h + +emitClosureCodeAndInfoTable :: ClosureInfo -> [LocalReg] -> CgStmts -> Code +emitClosureCodeAndInfoTable cl_info args body + = do { ty_descr_lit <- + if opt_SccProfilingOn + then mkStringCLit (closureTypeDescr cl_info) + else return (mkIntCLit 0) + ; cl_descr_lit <- + if opt_SccProfilingOn + then mkStringCLit cl_descr_string + else return (mkIntCLit 0) + ; let std_info = mkStdInfoTable ty_descr_lit cl_descr_lit + cl_type srt_len layout_lit + + ; blks <- cgStmtsToBlocks body + ; emitInfoTableAndCode info_lbl std_info extra_bits args blks } + where + info_lbl = infoTableLabelFromCI cl_info + + cl_descr_string = closureValDescr cl_info + cl_type = smRepClosureTypeInt (closureSMRep cl_info) + + srt = closureSRT cl_info + needs_srt = needsSRT srt + + mb_con = isConstrClosure_maybe cl_info + is_con = isJust mb_con + + (srt_label,srt_len) + = case mb_con of + Just con -> -- Constructors don't have an SRT + -- We keep the *zero-indexed* tag in the srt_len + -- field of the info table. + (mkIntCLit 0, fromIntegral (dataConTagZ con)) + + Nothing -> -- Not a constructor + srtLabelAndLength srt info_lbl + + ptrs = closurePtrsSize cl_info + nptrs = size - ptrs + size = closureNonHdrSize cl_info + layout_lit = packHalfWordsCLit ptrs nptrs + + extra_bits + | is_fun = fun_extra_bits + | is_con = [] + | needs_srt = [srt_label] + | otherwise = [] + + maybe_fun_stuff = closureFunInfo cl_info + is_fun = isJust maybe_fun_stuff + (Just (arity, arg_descr)) = maybe_fun_stuff + + fun_extra_bits + | ArgGen liveness <- arg_descr + = [ fun_amode, + srt_label, + makeRelativeRefTo info_lbl $ mkLivenessCLit liveness, + slow_entry ] + | needs_srt = [fun_amode, srt_label] + | otherwise = [fun_amode] + + slow_entry = makeRelativeRefTo info_lbl (CmmLabel slow_entry_label) + slow_entry_label = mkSlowEntryLabel (closureName cl_info) + + fun_amode = packHalfWordsCLit fun_type arity + fun_type = argDescrType arg_descr + +-- We keep the *zero-indexed* tag in the srt_len field of the info +-- table of a data constructor. +dataConTagZ :: DataCon -> ConTagZ +dataConTagZ con = dataConTag con - fIRST_TAG + +-- A low-level way to generate the variable part of a fun-style info table. +-- (must match fun_extra_bits above). Used by the C-- parser. +mkFunGenInfoExtraBits :: Int -> Int -> CmmLit -> CmmLit -> CmmLit -> [CmmLit] +mkFunGenInfoExtraBits fun_type arity srt_label liveness slow_entry + = [ packHalfWordsCLit fun_type arity, + srt_label, + liveness, + slow_entry ] + +------------------------------------------------------------------------- +-- +-- Generating the info table and code for a return point +-- +------------------------------------------------------------------------- + +-- Here's the layout of a return-point info table +-- +-- Tables next to code: +-- +-- <reversed vector table> +-- <srt slot> +-- <standard info table> +-- ret-addr --> <entry code (if any)> +-- +-- Not tables-next-to-code: +-- +-- ret-addr --> <ptr to entry code> +-- <standard info table> +-- <srt slot> +-- <forward vector table> +-- +-- * The vector table is only present for vectored returns +-- +-- * The SRT slot is only there if either +-- (a) there is SRT info to record, OR +-- (b) if the return is vectored +-- The latter (b) is necessary so that the vector is in a +-- predictable place + +vectorSlot :: CmmExpr -> CmmExpr -> CmmExpr +-- Get the vector slot from the info pointer +vectorSlot info_amode zero_indexed_tag + | tablesNextToCode + = cmmOffsetExprW (cmmOffsetW info_amode (- (stdInfoTableSizeW + 2))) + (cmmNegate zero_indexed_tag) + -- The "2" is one for the SRT slot, and one more + -- to get to the first word of the vector + + | otherwise + = cmmOffsetExprW (cmmOffsetW info_amode (stdInfoTableSizeW + 2)) + zero_indexed_tag + -- The "2" is one for the entry-code slot and one for the SRT slot + +retVec :: CmmExpr -> CmmExpr -> CmmExpr +-- Get a return vector from the info pointer +retVec info_amode zero_indexed_tag + = let slot = vectorSlot info_amode zero_indexed_tag + tableEntry = CmmLoad slot wordRep + in if tablesNextToCode + then CmmMachOp (MO_Add wordRep) [tableEntry, info_amode] + else tableEntry + +emitReturnTarget + :: Name + -> CgStmts -- The direct-return code (if any) + -- (empty for vectored returns) + -> [CmmLit] -- Vector of return points + -- (empty for non-vectored returns) + -> SRT + -> FCode CLabel +emitReturnTarget name stmts vector srt + = do { live_slots <- getLiveStackSlots + ; liveness <- buildContLiveness name live_slots + ; srt_info <- getSRTInfo name srt + + ; let + cl_type = case (null vector, isBigLiveness liveness) of + (True, True) -> rET_BIG + (True, False) -> rET_SMALL + (False, True) -> rET_VEC_BIG + (False, False) -> rET_VEC_SMALL + + (std_info, extra_bits) = + mkRetInfoTable info_lbl liveness srt_info cl_type vector + + ; blks <- cgStmtsToBlocks stmts + ; emitInfoTableAndCode info_lbl std_info extra_bits args blks + ; return info_lbl } + where + args = {- trace "emitReturnTarget: missing args" -} [] + uniq = getUnique name + info_lbl = mkReturnInfoLabel uniq + + +mkRetInfoTable + :: CLabel -- info label + -> Liveness -- liveness + -> C_SRT -- SRT Info + -> Int -- type (eg. rET_SMALL) + -> [CmmLit] -- vector + -> ([CmmLit],[CmmLit]) +mkRetInfoTable info_lbl liveness srt_info cl_type vector + = (std_info, extra_bits) + where + (srt_label, srt_len) = srtLabelAndLength srt_info info_lbl + + srt_slot | need_srt = [srt_label] + | otherwise = [] + + need_srt = needsSRT srt_info || not (null vector) + -- If there's a vector table then we must allocate + -- an SRT slot, so that the vector table is at a + -- known offset from the info pointer + + liveness_lit = makeRelativeRefTo info_lbl $ mkLivenessCLit liveness + std_info = mkStdInfoTable zeroCLit zeroCLit cl_type srt_len liveness_lit + extra_bits = srt_slot ++ map (makeRelativeRefTo info_lbl) vector + + +emitDirectReturnTarget + :: Name + -> CgStmts -- The direct-return code + -> SRT + -> FCode CLabel +emitDirectReturnTarget name code srt + = emitReturnTarget name code [] srt + +emitAlgReturnTarget + :: Name -- Just for its unique + -> [(ConTagZ, CgStmts)] -- Tagged branches + -> Maybe CgStmts -- Default branch (if any) + -> SRT -- Continuation's SRT + -> CtrlReturnConvention + -> FCode (CLabel, SemiTaggingStuff) + +emitAlgReturnTarget name branches mb_deflt srt ret_conv + = case ret_conv of + UnvectoredReturn fam_sz -> do + { blks <- getCgStmts $ + emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1) + -- NB: tag_expr is zero-based + ; lbl <- emitDirectReturnTarget name blks srt + ; return (lbl, Nothing) } + -- Nothing: the internal branches in the switch don't have + -- global labels, so we can't use them at the 'call site' + + VectoredReturn fam_sz -> do + { let tagged_lbls = zip (map fst branches) $ + map (CmmLabel . mkAltLabel uniq . fst) branches + deflt_lbl | isJust mb_deflt = CmmLabel $ mkDefaultLabel uniq + | otherwise = mkIntCLit 0 + ; let vector = [ assocDefault deflt_lbl tagged_lbls i + | i <- [0..fam_sz-1]] + ; lbl <- emitReturnTarget name noCgStmts vector srt + ; mapFCs emit_alt branches + ; emit_deflt mb_deflt + ; return (lbl, Just (tagged_lbls, deflt_lbl)) } + where + uniq = getUnique name + tag_expr = getConstrTag (CmmReg nodeReg) + + emit_alt :: (Int, CgStmts) -> FCode (Int, CmmLit) + -- Emit the code for the alternative as a top-level + -- code block returning a label for it + emit_alt (tag, stmts) = do { let lbl = mkAltLabel uniq tag + ; blks <- cgStmtsToBlocks stmts + ; emitProc [] lbl [] blks + ; return (tag, CmmLabel lbl) } + + emit_deflt (Just stmts) = do { let lbl = mkDefaultLabel uniq + ; blks <- cgStmtsToBlocks stmts + ; emitProc [] lbl [] blks + ; return (CmmLabel lbl) } + emit_deflt Nothing = return (mkIntCLit 0) + -- Nothing case: the simplifier might have eliminated a case + -- so we may have e.g. case xs of + -- [] -> e + -- In that situation the default should never be taken, + -- so we just use a NULL pointer + +-------------------------------- +emitDirectReturnInstr :: Code +emitDirectReturnInstr + = do { info_amode <- getSequelAmode + ; stmtC (CmmJump (entryCode info_amode) []) } + +emitVectoredReturnInstr :: CmmExpr -- _Zero-indexed_ constructor tag + -> Code +emitVectoredReturnInstr zero_indexed_tag + = do { info_amode <- getSequelAmode + -- HACK! assign info_amode to a temp, because retVec + -- uses it twice and the NCG doesn't have any CSE yet. + -- Only do this for the NCG, because gcc is too stupid + -- to optimise away the extra tmp (grrr). + ; dflags <- getDynFlags + ; x <- if hscTarget dflags == HscAsm + then do z <- newTemp wordRep + stmtC (CmmAssign z info_amode) + return (CmmReg z) + else + return info_amode + ; let target = retVec x zero_indexed_tag + ; stmtC (CmmJump target []) } + + +------------------------------------------------------------------------- +-- +-- Generating a standard info table +-- +------------------------------------------------------------------------- + +-- The standard bits of an info table. This part of the info table +-- corresponds to the StgInfoTable type defined in InfoTables.h. +-- +-- Its shape varies with ticky/profiling/tables next to code etc +-- so we can't use constant offsets from Constants + +mkStdInfoTable + :: CmmLit -- closure type descr (profiling) + -> CmmLit -- closure descr (profiling) + -> Int -- closure type + -> StgHalfWord -- SRT length + -> CmmLit -- layout field + -> [CmmLit] + +mkStdInfoTable type_descr closure_descr cl_type srt_len layout_lit + = -- Parallel revertible-black hole field + prof_info + -- Ticky info (none at present) + -- Debug info (none at present) + ++ [layout_lit, type_lit] + + where + prof_info + | opt_SccProfilingOn = [type_descr, closure_descr] + | otherwise = [] + + type_lit = packHalfWordsCLit cl_type srt_len + +stdInfoTableSizeW :: WordOff +-- The size of a standard info table varies with profiling/ticky etc, +-- so we can't get it from Constants +-- It must vary in sync with mkStdInfoTable +stdInfoTableSizeW + = size_fixed + size_prof + where + size_fixed = 2 -- layout, type + size_prof | opt_SccProfilingOn = 2 + | otherwise = 0 + +stdInfoTableSizeB = stdInfoTableSizeW * wORD_SIZE :: ByteOff + +stdSrtBitmapOffset :: ByteOff +-- Byte offset of the SRT bitmap half-word which is +-- in the *higher-addressed* part of the type_lit +stdSrtBitmapOffset = stdInfoTableSizeB - hALF_WORD_SIZE + +stdClosureTypeOffset :: ByteOff +-- Byte offset of the closure type half-word +stdClosureTypeOffset = stdInfoTableSizeB - wORD_SIZE + +stdPtrsOffset, stdNonPtrsOffset :: ByteOff +stdPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE +stdNonPtrsOffset = stdInfoTableSizeB - 2*wORD_SIZE + hALF_WORD_SIZE + +------------------------------------------------------------------------- +-- +-- Accessing fields of an info table +-- +------------------------------------------------------------------------- + +closureInfoPtr :: CmmExpr -> CmmExpr +-- Takes a closure pointer and returns the info table pointer +closureInfoPtr e = CmmLoad e wordRep + +entryCode :: CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns its entry code +entryCode e | tablesNextToCode = e + | otherwise = CmmLoad e wordRep + +getConstrTag :: CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the *zero-indexed* +-- constructor tag obtained from the info table +-- This lives in the SRT field of the info table +-- (constructors don't need SRTs). +getConstrTag closure_ptr + = CmmMachOp (MO_U_Conv halfWordRep wordRep) [infoTableConstrTag info_table] + where + info_table = infoTable (closureInfoPtr closure_ptr) + +infoTable :: CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns a pointer to the first word of the standard-form +-- info table, excluding the entry-code word (if present) +infoTable info_ptr + | tablesNextToCode = cmmOffsetB info_ptr (- stdInfoTableSizeB) + | otherwise = cmmOffsetW info_ptr 1 -- Past the entry code pointer + +infoTableConstrTag :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the constr tag +-- field of the info table (same as the srt_bitmap field) +infoTableConstrTag = infoTableSrtBitmap + +infoTableSrtBitmap :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the srt_bitmap +-- field of the info table +infoTableSrtBitmap info_tbl + = CmmLoad (cmmOffsetB info_tbl stdSrtBitmapOffset) halfWordRep + +infoTableClosureType :: CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the closure type +-- field of the info table. +infoTableClosureType info_tbl + = CmmLoad (cmmOffsetB info_tbl stdClosureTypeOffset) halfWordRep + +infoTablePtrs :: CmmExpr -> CmmExpr +infoTablePtrs info_tbl + = CmmLoad (cmmOffsetB info_tbl stdPtrsOffset) halfWordRep + +infoTableNonPtrs :: CmmExpr -> CmmExpr +infoTableNonPtrs info_tbl + = CmmLoad (cmmOffsetB info_tbl stdNonPtrsOffset) halfWordRep + +funInfoTable :: CmmExpr -> CmmExpr +-- Takes the info pointer of a function, +-- and returns a pointer to the first word of the StgFunInfoExtra struct +-- in the info table. +funInfoTable info_ptr + | tablesNextToCode + = cmmOffsetB info_ptr (- stdInfoTableSizeB - sIZEOF_StgFunInfoExtraRev) + | otherwise + = cmmOffsetW info_ptr (1 + stdInfoTableSizeW) + -- Past the entry code pointer + +------------------------------------------------------------------------- +-- +-- Emit the code for a closure (or return address) +-- and its associated info table +-- +------------------------------------------------------------------------- + +-- The complication here concerns whether or not we can +-- put the info table next to the code + +emitInfoTableAndCode + :: CLabel -- Label of info table + -> [CmmLit] -- ...its invariant part + -> [CmmLit] -- ...and its variant part + -> [LocalReg] -- ...args + -> [CmmBasicBlock] -- ...and body + -> Code + +emitInfoTableAndCode info_lbl std_info extra_bits args blocks + | tablesNextToCode -- Reverse the extra_bits; and emit the top-level proc + = emitProc (reverse extra_bits ++ std_info) + entry_lbl args blocks + -- NB: the info_lbl is discarded + + | null blocks -- No actual code; only the info table is significant + = -- Use a zero place-holder in place of the + -- entry-label in the info table + emitRODataLits info_lbl (zeroCLit : std_info ++ extra_bits) + + | otherwise -- Separately emit info table (with the function entry + = -- point as first entry) and the entry code + do { emitDataLits info_lbl (CmmLabel entry_lbl : std_info ++ extra_bits) + ; emitProc [] entry_lbl args blocks } + + where + entry_lbl = infoLblToEntryLbl info_lbl + +------------------------------------------------------------------------- +-- +-- Static reference tables +-- +------------------------------------------------------------------------- + +-- There is just one SRT for each top level binding; all the nested +-- bindings use sub-sections of this SRT. The label is passed down to +-- the nested bindings via the monad. + +getSRTInfo :: Name -> SRT -> FCode C_SRT +getSRTInfo id NoSRT = return NoC_SRT +getSRTInfo id (SRT off len bmp) + | len > hALF_WORD_SIZE_IN_BITS || bmp == [fromIntegral srt_escape] + = do { srt_lbl <- getSRTLabel + ; let srt_desc_lbl = mkSRTDescLabel id + ; emitRODataLits srt_desc_lbl + ( cmmLabelOffW srt_lbl off + : mkWordCLit (fromIntegral len) + : map mkWordCLit bmp) + ; return (C_SRT srt_desc_lbl 0 srt_escape) } + + | otherwise + = do { srt_lbl <- getSRTLabel + ; return (C_SRT srt_lbl off (fromIntegral (head bmp))) } + -- The fromIntegral converts to StgHalfWord + +srt_escape = (-1) :: StgHalfWord + +srtLabelAndLength :: C_SRT -> CLabel -> (CmmLit, StgHalfWord) +srtLabelAndLength NoC_SRT _ + = (zeroCLit, 0) +srtLabelAndLength (C_SRT lbl off bitmap) info_lbl + = (makeRelativeRefTo info_lbl $ cmmLabelOffW lbl off, bitmap) + +------------------------------------------------------------------------- +-- +-- Position independent code +-- +------------------------------------------------------------------------- +-- In order to support position independent code, we mustn't put absolute +-- references into read-only space. Info tables in the tablesNextToCode +-- case must be in .text, which is read-only, so we doctor the CmmLits +-- to use relative offsets instead. + +-- Note that this is done even when the -fPIC flag is not specified, +-- as we want to keep binary compatibility between PIC and non-PIC. + +makeRelativeRefTo :: CLabel -> CmmLit -> CmmLit + +makeRelativeRefTo info_lbl (CmmLabel lbl) + | tablesNextToCode + = CmmLabelDiffOff lbl info_lbl 0 +makeRelativeRefTo info_lbl (CmmLabelOff lbl off) + | tablesNextToCode + = CmmLabelDiffOff lbl info_lbl off +makeRelativeRefTo _ lit = lit diff --git a/compiler/codeGen/CgLetNoEscape.lhs b/compiler/codeGen/CgLetNoEscape.lhs new file mode 100644 index 0000000000..39860f4ee0 --- /dev/null +++ b/compiler/codeGen/CgLetNoEscape.lhs @@ -0,0 +1,212 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1993-1998 +% +% $Id: CgLetNoEscape.lhs,v 1.26 2004/09/30 10:35:47 simonpj Exp $ +% +%******************************************************** +%* * +\section[CgLetNoEscape]{Handling ``let-no-escapes''} +%* * +%******************************************************** + +\begin{code} +module CgLetNoEscape ( cgLetNoEscapeClosure ) where + +#include "HsVersions.h" + +import {-# SOURCE #-} CgExpr ( cgExpr ) + +import StgSyn +import CgMonad + +import CgBindery ( CgIdInfo, letNoEscapeIdInfo, nukeDeadBindings ) +import CgCase ( restoreCurrentCostCentre ) +import CgCon ( bindUnboxedTupleComponents ) +import CgHeapery ( unbxTupleHeapCheck ) +import CgInfoTbls ( emitDirectReturnTarget ) +import CgStackery ( allocStackTop, deAllocStackTop, getSpRelOffset ) +import Cmm ( CmmStmt(..) ) +import CmmUtils ( mkLblExpr, oneStmt ) +import CLabel ( mkReturnInfoLabel ) +import ClosureInfo ( mkLFLetNoEscape ) +import CostCentre ( CostCentreStack ) +import Id ( Id, idName ) +import Var ( idUnique ) +import SMRep ( retAddrSizeW ) +import BasicTypes ( RecFlag(..) ) +import Outputable +\end{code} + +%************************************************************************ +%* * +\subsection[what-is-non-escaping]{What {\em is} a ``non-escaping let''?} +%* * +%************************************************************************ + +[The {\em code} that detects these things is elsewhere.] + +Consider: +\begin{verbatim} + let x = fvs \ args -> e + in + if ... then x else + if ... then x else ... +\end{verbatim} +@x@ is used twice (so we probably can't unfold it), but when it is +entered, the stack is deeper than it was when the definition of @x@ +happened. Specifically, if instead of allocating a closure for @x@, +we saved all @x@'s fvs on the stack, and remembered the stack depth at +that moment, then whenever we enter @x@ we can simply set the stack +pointer(s) to these remembered (compile-time-fixed) values, and jump +to the code for @x@. + +All of this is provided x is: +\begin{enumerate} +\item +non-updatable; +\item +guaranteed to be entered before the stack retreats -- ie x is not +buried in a heap-allocated closure, or passed as an argument to something; +\item +all the enters have exactly the right number of arguments, +no more no less; +\item +all the enters are tail calls; that is, they return to the +caller enclosing the definition of @x@. +\end{enumerate} + +Under these circumstances we say that @x@ is {\em non-escaping}. + +An example of when (4) does {\em not} hold: +\begin{verbatim} + let x = ... + in case x of ...alts... +\end{verbatim} + +Here, @x@ is certainly entered only when the stack is deeper than when +@x@ is defined, but here it must return to \tr{...alts...} So we can't +just adjust the stack down to @x@'s recalled points, because that +would lost @alts@' context. + +Things can get a little more complicated. Consider: +\begin{verbatim} + let y = ... + in let x = fvs \ args -> ...y... + in ...x... +\end{verbatim} + +Now, if @x@ is used in a non-escaping way in \tr{...x...}, {\em and} +@y@ is used in a non-escaping way in \tr{...y...}, {\em then} @y@ is +non-escaping. + +@x@ can even be recursive! Eg: +\begin{verbatim} + letrec x = [y] \ [v] -> if v then x True else ... + in + ...(x b)... +\end{verbatim} + + +%************************************************************************ +%* * +\subsection[codeGen-for-non-escaping]{Generating code for a ``non-escaping let''} +%* * +%************************************************************************ + + +Generating code for this is fun. It is all very very similar to what +we do for a case expression. The duality is between +\begin{verbatim} + let-no-escape x = b + in e +\end{verbatim} +and +\begin{verbatim} + case e of ... -> b +\end{verbatim} + +That is, the RHS of @x@ (ie @b@) will execute {\em later}, just like +the alternative of the case; it needs to be compiled in an environment +in which all volatile bindings are forgotten, and the free vars are +bound only to stable things like stack locations.. The @e@ part will +execute {\em next}, just like the scrutinee of a case. + +First, we need to save all @x@'s free vars +on the stack, if they aren't there already. + +\begin{code} +cgLetNoEscapeClosure + :: Id -- binder + -> CostCentreStack -- NB: *** NOT USED *** ToDo (WDP 94/06) + -> StgBinderInfo -- NB: ditto + -> SRT + -> StgLiveVars -- variables live in RHS, including the binders + -- themselves in the case of a recursive group + -> EndOfBlockInfo -- where are we going to? + -> Maybe VirtualSpOffset -- Slot for current cost centre + -> RecFlag -- is the binding recursive? + -> [Id] -- args (as in \ args -> body) + -> StgExpr -- body (as in above) + -> FCode (Id, CgIdInfo) + +-- ToDo: deal with the cost-centre issues + +cgLetNoEscapeClosure + bndr cc binder_info srt full_live_in_rhss + rhs_eob_info cc_slot rec args body + = let + arity = length args + lf_info = mkLFLetNoEscape arity + in + -- saveVolatileVarsAndRegs done earlier in cgExpr. + + do { (vSp, _) <- forkEvalHelp rhs_eob_info + + (do { allocStackTop retAddrSizeW + ; nukeDeadBindings full_live_in_rhss }) + + (do { deAllocStackTop retAddrSizeW + ; abs_c <- forkProc $ cgLetNoEscapeBody bndr cc + cc_slot args body + + -- Ignore the label that comes back from + -- mkRetDirectTarget. It must be conjured up elswhere + ; emitDirectReturnTarget (idName bndr) abs_c srt + ; return () }) + + ; returnFC (bndr, letNoEscapeIdInfo bndr vSp lf_info) } +\end{code} + +\begin{code} +cgLetNoEscapeBody :: Id -- Name of the joint point + -> CostCentreStack + -> Maybe VirtualSpOffset + -> [Id] -- Args + -> StgExpr -- Body + -> Code + +cgLetNoEscapeBody bndr cc cc_slot all_args body = do + { (arg_regs, ptrs, nptrs, ret_slot) <- bindUnboxedTupleComponents all_args + + -- restore the saved cost centre. BUT: we must not free the stack slot + -- containing the cost centre, because it might be needed for a + -- recursive call to this let-no-escape. + ; restoreCurrentCostCentre cc_slot False{-don't free-} + + -- Enter the closures cc, if required + ; -- enterCostCentreCode closure_info cc IsFunction + + -- The "return address" slot doesn't have a return address in it; + -- but the heap-check needs it filled in if the heap-check fails. + -- So we pass code to fill it in to the heap-check macro + ; sp_rel <- getSpRelOffset ret_slot + + ; let lbl = mkReturnInfoLabel (idUnique bndr) + frame_hdr_asst = oneStmt (CmmStore sp_rel (mkLblExpr lbl)) + + -- Do heap check [ToDo: omit for non-recursive case by recording in + -- in envt and absorbing at call site] + ; unbxTupleHeapCheck arg_regs ptrs nptrs frame_hdr_asst + (cgExpr body) + } +\end{code} diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs new file mode 100644 index 0000000000..4f95c9b36a --- /dev/null +++ b/compiler/codeGen/CgMonad.lhs @@ -0,0 +1,853 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgMonad.lhs,v 1.45 2005/06/21 10:44:41 simonmar Exp $ +% +\section[CgMonad]{The code generation monad} + +See the beginning of the top-level @CodeGen@ module, to see how this +monadic stuff fits into the Big Picture. + +\begin{code} +module CgMonad ( + Code, -- type + FCode, -- type + + initC, thenC, thenFC, listCs, listFCs, mapCs, mapFCs, + returnFC, fixC, checkedAbsC, + stmtC, stmtsC, labelC, emitStmts, nopC, whenC, newLabelC, + newUnique, newUniqSupply, + + CgStmts, emitCgStmts, forkCgStmts, cgStmtsToBlocks, + getCgStmts', getCgStmts, + noCgStmts, oneCgStmt, consCgStmt, + + getCmm, + emitData, emitProc, emitSimpleProc, + + forkLabelledCode, + forkClosureBody, forkStatics, forkAlts, forkEval, + forkEvalHelp, forkProc, codeOnly, + SemiTaggingStuff, ConTagZ, + + EndOfBlockInfo(..), + setEndOfBlockInfo, getEndOfBlockInfo, + + setSRTLabel, getSRTLabel, + setTickyCtrLabel, getTickyCtrLabel, + + StackUsage(..), HeapUsage(..), + VirtualSpOffset, VirtualHpOffset, + initStkUsage, initHpUsage, + getHpUsage, setHpUsage, + heapHWM, + + moduleName, + + Sequel(..), -- ToDo: unabstract? + + -- ideally we wouldn't export these, but some other modules access internal state + getState, setState, getInfoDown, getDynFlags, getHomeModules, + + -- more localised access to monad state + getStkUsage, setStkUsage, + getBinds, setBinds, getStaticBinds, + + -- out of general friendliness, we also export ... + CgInfoDownwards(..), CgState(..) -- non-abstract + ) where + +#include "HsVersions.h" + +import {-# SOURCE #-} CgBindery ( CgBindings, nukeVolatileBinds ) + +import DynFlags ( DynFlags ) +import Packages ( HomeModules ) +import Cmm +import CmmUtils ( CmmStmts, isNopStmt ) +import CLabel +import SMRep ( WordOff ) +import Module ( Module ) +import Id ( Id ) +import VarEnv +import OrdList +import Unique ( Unique ) +import Util ( mapAccumL ) +import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply, uniqFromSupply ) +import FastString +import Outputable + +import Control.Monad ( liftM ) + +infixr 9 `thenC` -- Right-associative! +infixr 9 `thenFC` +\end{code} + +%************************************************************************ +%* * +\subsection[CgMonad-environment]{Stuff for manipulating environments} +%* * +%************************************************************************ + +This monadery has some information that it only passes {\em +downwards}, as well as some ``state'' which is modified as we go +along. + +\begin{code} +data CgInfoDownwards -- information only passed *downwards* by the monad + = MkCgInfoDown { + cgd_dflags :: DynFlags, + cgd_hmods :: HomeModules, -- Packages we depend on + cgd_mod :: Module, -- Module being compiled + cgd_statics :: CgBindings, -- [Id -> info] : static environment + cgd_srt :: CLabel, -- label of the current SRT + cgd_ticky :: CLabel, -- current destination for ticky counts + cgd_eob :: EndOfBlockInfo -- Info for stuff to do at end of basic block: + } + +initCgInfoDown :: DynFlags -> HomeModules -> Module -> CgInfoDownwards +initCgInfoDown dflags hmods mod + = MkCgInfoDown { cgd_dflags = dflags, + cgd_hmods = hmods, + cgd_mod = mod, + cgd_statics = emptyVarEnv, + cgd_srt = error "initC: srt", + cgd_ticky = mkTopTickyCtrLabel, + cgd_eob = initEobInfo } + +data CgState + = MkCgState { + cgs_stmts :: OrdList CgStmt, -- Current proc + cgs_tops :: OrdList CmmTop, + -- Other procedures and data blocks in this compilation unit + -- Both the latter two are ordered only so that we can + -- reduce forward references, when it's easy to do so + + cgs_binds :: CgBindings, -- [Id -> info] : *local* bindings environment + -- Bindings for top-level things are given in + -- the info-down part + + cgs_stk_usg :: StackUsage, + cgs_hp_usg :: HeapUsage, + + cgs_uniqs :: UniqSupply } + +initCgState :: UniqSupply -> CgState +initCgState uniqs + = MkCgState { cgs_stmts = nilOL, cgs_tops = nilOL, + cgs_binds = emptyVarEnv, + cgs_stk_usg = initStkUsage, + cgs_hp_usg = initHpUsage, + cgs_uniqs = uniqs } +\end{code} + +@EndOfBlockInfo@ tells what to do at the end of this block of code or, +if the expression is a @case@, what to do at the end of each +alternative. + +\begin{code} +data EndOfBlockInfo + = EndOfBlockInfo + VirtualSpOffset -- Args Sp: trim the stack to this point at a + -- return; push arguments starting just + -- above this point on a tail call. + + -- This is therefore the stk ptr as seen + -- by a case alternative. + Sequel + +initEobInfo = EndOfBlockInfo 0 OnStack +\end{code} + +Any addressing modes inside @Sequel@ must be ``robust,'' in the sense +that it must survive stack pointer adjustments at the end of the +block. + +\begin{code} +data Sequel + = OnStack -- Continuation is on the stack + | UpdateCode -- Continuation is update + + | CaseAlts + CLabel -- Jump to this; if the continuation is for a vectored + -- case this might be the label of a return vector + SemiTaggingStuff + Id -- The case binder, only used to see if it's dead + Bool -- True <=> polymorphic, push a SEQ frame too + +type SemiTaggingStuff + = Maybe -- Maybe[1] we don't have any semi-tagging stuff... + ([(ConTagZ, CmmLit)], -- Alternatives + CmmLit) -- Default (will be a can't happen RTS label if can't happen) + +type ConTagZ = Int -- A *zero-indexed* contructor tag + +-- The case branch is executed only from a successful semitagging +-- venture, when a case has looked at a variable, found that it's +-- evaluated, and wants to load up the contents and go to the join +-- point. +\end{code} + +%************************************************************************ +%* * + CgStmt type +%* * +%************************************************************************ + +The CgStmts type is what the code generator outputs: it is a tree of +statements, including in-line labels. The job of flattenCgStmts is to +turn this into a list of basic blocks, each of which ends in a jump +statement (either a local branch or a non-local jump). + +\begin{code} +type CgStmts = OrdList CgStmt + +data CgStmt + = CgStmt CmmStmt + | CgLabel BlockId + | CgFork BlockId CgStmts + +flattenCgStmts :: BlockId -> CgStmts -> [CmmBasicBlock] +flattenCgStmts id stmts = + case flatten (fromOL stmts) of + ([],blocks) -> blocks + (block,blocks) -> BasicBlock id block : blocks + where + flatten [] = ([],[]) + + -- A label at the end of a function or fork: this label must not be reachable, + -- but it might be referred to from another BB that also isn't reachable. + -- Eliminating these has to be done with a dead-code analysis. For now, + -- we just make it into a well-formed block by adding a recursive jump. + flatten [CgLabel id] + = ( [], [BasicBlock id [CmmBranch id]] ) + + -- A jump/branch: throw away all the code up to the next label, because + -- it is unreachable. Be careful to keep forks that we find on the way. + flatten (CgStmt stmt : stmts) + | isJump stmt + = case dropWhile isOrdinaryStmt stmts of + [] -> ( [stmt], [] ) + [CgLabel id] -> ( [stmt], [BasicBlock id [CmmBranch id]]) + (CgLabel id : stmts) -> ( [stmt], BasicBlock id block : blocks ) + where (block,blocks) = flatten stmts + (CgFork fork_id stmts : ss) -> + flatten (CgFork fork_id stmts : CgStmt stmt : ss) + + flatten (s:ss) = + case s of + CgStmt stmt -> (stmt:block,blocks) + CgLabel id -> ([CmmBranch id],BasicBlock id block:blocks) + CgFork fork_id stmts -> + (block, BasicBlock fork_id fork_block : fork_blocks ++ blocks) + where (fork_block, fork_blocks) = flatten (fromOL stmts) + where (block,blocks) = flatten ss + +isJump (CmmJump _ _) = True +isJump (CmmBranch _) = True +isJump _ = False + +isOrdinaryStmt (CgStmt _) = True +isOrdinaryStmt _ = False +\end{code} + +%************************************************************************ +%* * + Stack and heap models +%* * +%************************************************************************ + +\begin{code} +type VirtualHpOffset = WordOff -- Both are in +type VirtualSpOffset = WordOff -- units of words + +data StackUsage + = StackUsage { + virtSp :: VirtualSpOffset, + -- Virtual offset of topmost allocated slot + + frameSp :: VirtualSpOffset, + -- Virtual offset of the return address of the enclosing frame. + -- This RA describes the liveness/pointedness of + -- all the stack from frameSp downwards + -- INVARIANT: less than or equal to virtSp + + freeStk :: [VirtualSpOffset], + -- List of free slots, in *increasing* order + -- INVARIANT: all <= virtSp + -- All slots <= virtSp are taken except these ones + + realSp :: VirtualSpOffset, + -- Virtual offset of real stack pointer register + + hwSp :: VirtualSpOffset + } -- Highest value ever taken by virtSp + +-- INVARIANT: The environment contains no Stable references to +-- stack slots below (lower offset) frameSp +-- It can contain volatile references to this area though. + +data HeapUsage = + HeapUsage { + virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word + realHp :: VirtualHpOffset -- realHp: Virtual offset of real heap ptr + } +\end{code} + +The heap high water mark is the larger of virtHp and hwHp. The latter is +only records the high water marks of forked-off branches, so to find the +heap high water mark you have to take the max of virtHp and hwHp. Remember, +virtHp never retreats! + +Note Jan 04: ok, so why do we only look at the virtual Hp?? + +\begin{code} +heapHWM :: HeapUsage -> VirtualHpOffset +heapHWM = virtHp +\end{code} + +Initialisation. + +\begin{code} +initStkUsage :: StackUsage +initStkUsage = StackUsage { + virtSp = 0, + frameSp = 0, + freeStk = [], + realSp = 0, + hwSp = 0 + } + +initHpUsage :: HeapUsage +initHpUsage = HeapUsage { + virtHp = 0, + realHp = 0 + } +\end{code} + +@stateIncUsage@$~e_1~e_2$ incorporates in $e_1$ the stack and heap high water +marks found in $e_2$. + +\begin{code} +stateIncUsage :: CgState -> CgState -> CgState +stateIncUsage s1 s2@(MkCgState { cgs_stk_usg = stk_usg, cgs_hp_usg = hp_usg }) + = s1 { cgs_hp_usg = cgs_hp_usg s1 `maxHpHw` virtHp hp_usg, + cgs_stk_usg = cgs_stk_usg s1 `maxStkHw` hwSp stk_usg } + `addCodeBlocksFrom` s2 + +stateIncUsageEval :: CgState -> CgState -> CgState +stateIncUsageEval s1 s2 + = s1 { cgs_stk_usg = cgs_stk_usg s1 `maxStkHw` hwSp (cgs_stk_usg s2) } + `addCodeBlocksFrom` s2 + -- We don't max the heap high-watermark because stateIncUsageEval is + -- used only in forkEval, which in turn is only used for blocks of code + -- which do their own heap-check. + +addCodeBlocksFrom :: CgState -> CgState -> CgState +-- Add code blocks from the latter to the former +-- (The cgs_stmts will often be empty, but not always; see codeOnly) +s1 `addCodeBlocksFrom` s2 + = s1 { cgs_stmts = cgs_stmts s1 `appOL` cgs_stmts s2, + cgs_tops = cgs_tops s1 `appOL` cgs_tops s2 } + +maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage +hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw } + +maxStkHw :: StackUsage -> VirtualSpOffset -> StackUsage +stk_usg `maxStkHw` hw = stk_usg { hwSp = hwSp stk_usg `max` hw } +\end{code} + +%************************************************************************ +%* * + The FCode monad +%* * +%************************************************************************ + +\begin{code} +newtype FCode a = FCode (CgInfoDownwards -> CgState -> (a, CgState)) +type Code = FCode () + +instance Monad FCode where + (>>=) = thenFC + return = returnFC + +{-# INLINE thenC #-} +{-# INLINE thenFC #-} +{-# INLINE returnFC #-} +\end{code} +The Abstract~C is not in the environment so as to improve strictness. + +\begin{code} +initC :: DynFlags -> HomeModules -> Module -> FCode a -> IO a + +initC dflags hmods mod (FCode code) + = do { uniqs <- mkSplitUniqSupply 'c' + ; case code (initCgInfoDown dflags hmods mod) (initCgState uniqs) of + (res, _) -> return res + } + +returnFC :: a -> FCode a +returnFC val = FCode (\info_down state -> (val, state)) +\end{code} + +\begin{code} +thenC :: Code -> FCode a -> FCode a +thenC (FCode m) (FCode k) = + FCode (\info_down state -> let (_,new_state) = m info_down state in + k info_down new_state) + +listCs :: [Code] -> Code +listCs [] = return () +listCs (fc:fcs) = do + fc + listCs fcs + +mapCs :: (a -> Code) -> [a] -> Code +mapCs = mapM_ +\end{code} + +\begin{code} +thenFC :: FCode a -> (a -> FCode c) -> FCode c +thenFC (FCode m) k = FCode ( + \info_down state -> + let + (m_result, new_state) = m info_down state + (FCode kcode) = k m_result + in + kcode info_down new_state + ) + +listFCs :: [FCode a] -> FCode [a] +listFCs = sequence + +mapFCs :: (a -> FCode b) -> [a] -> FCode [b] +mapFCs = mapM +\end{code} + +And the knot-tying combinator: +\begin{code} +fixC :: (a -> FCode a) -> FCode a +fixC fcode = FCode ( + \info_down state -> + let + FCode fc = fcode v + result@(v,_) = fc info_down state + -- ^--------^ + in + result + ) +\end{code} + +%************************************************************************ +%* * + Operators for getting and setting the state and "info_down". + +%* * +%************************************************************************ + +\begin{code} +getState :: FCode CgState +getState = FCode $ \info_down state -> (state,state) + +setState :: CgState -> FCode () +setState state = FCode $ \info_down _ -> ((),state) + +getStkUsage :: FCode StackUsage +getStkUsage = do + state <- getState + return $ cgs_stk_usg state + +setStkUsage :: StackUsage -> Code +setStkUsage new_stk_usg = do + state <- getState + setState $ state {cgs_stk_usg = new_stk_usg} + +getHpUsage :: FCode HeapUsage +getHpUsage = do + state <- getState + return $ cgs_hp_usg state + +setHpUsage :: HeapUsage -> Code +setHpUsage new_hp_usg = do + state <- getState + setState $ state {cgs_hp_usg = new_hp_usg} + +getBinds :: FCode CgBindings +getBinds = do + state <- getState + return $ cgs_binds state + +setBinds :: CgBindings -> FCode () +setBinds new_binds = do + state <- getState + setState $ state {cgs_binds = new_binds} + +getStaticBinds :: FCode CgBindings +getStaticBinds = do + info <- getInfoDown + return (cgd_statics info) + +withState :: FCode a -> CgState -> FCode (a,CgState) +withState (FCode fcode) newstate = FCode $ \info_down state -> + let (retval, state2) = fcode info_down newstate in ((retval,state2), state) + +newUniqSupply :: FCode UniqSupply +newUniqSupply = do + state <- getState + let (us1, us2) = splitUniqSupply (cgs_uniqs state) + setState $ state { cgs_uniqs = us1 } + return us2 + +newUnique :: FCode Unique +newUnique = do + us <- newUniqSupply + return (uniqFromSupply us) + +------------------ +getInfoDown :: FCode CgInfoDownwards +getInfoDown = FCode $ \info_down state -> (info_down,state) + +getDynFlags :: FCode DynFlags +getDynFlags = liftM cgd_dflags getInfoDown + +getHomeModules :: FCode HomeModules +getHomeModules = liftM cgd_hmods getInfoDown + +withInfoDown :: FCode a -> CgInfoDownwards -> FCode a +withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state + +doFCode :: FCode a -> CgInfoDownwards -> CgState -> (a,CgState) +doFCode (FCode fcode) info_down state = fcode info_down state +\end{code} + + +%************************************************************************ +%* * + Forking +%* * +%************************************************************************ + +@forkClosureBody@ takes a code, $c$, and compiles it in a completely +fresh environment, except that: + - compilation info and statics are passed in unchanged. +The current environment is passed on completely unaltered, except that +abstract C from the fork is incorporated. + +@forkProc@ takes a code and compiles it in the current environment, +returning the basic blocks thus constructed. The current environment +is passed on completely unchanged. It is pretty similar to +@getBlocks@, except that the latter does affect the environment. + +@forkStatics@ $fc$ compiles $fc$ in an environment whose statics come +from the current bindings, but which is otherwise freshly initialised. +The Abstract~C returned is attached to the current state, but the +bindings and usage information is otherwise unchanged. + +\begin{code} +forkClosureBody :: Code -> Code +forkClosureBody body_code + = do { info <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let body_info_down = info { cgd_eob = initEobInfo } + ((),fork_state) = doFCode body_code body_info_down + (initCgState us) + ; ASSERT( isNilOL (cgs_stmts fork_state) ) + setState $ state `addCodeBlocksFrom` fork_state } + +forkStatics :: FCode a -> FCode a +forkStatics body_code + = do { info <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let rhs_info_down = info { cgd_statics = cgs_binds state, + cgd_eob = initEobInfo } + (result, fork_state_out) = doFCode body_code rhs_info_down + (initCgState us) + ; ASSERT( isNilOL (cgs_stmts fork_state_out) ) + setState (state `addCodeBlocksFrom` fork_state_out) + ; return result } + +forkProc :: Code -> FCode CgStmts +forkProc body_code + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let fork_state_in = (initCgState us) + { cgs_binds = cgs_binds state, + cgs_stk_usg = cgs_stk_usg state, + cgs_hp_usg = cgs_hp_usg state } + -- ToDo: is the hp usage necesary? + (code_blks, fork_state_out) = doFCode (getCgStmts body_code) + info_down fork_state_in + ; setState $ state `stateIncUsageEval` fork_state_out + ; return code_blks } + +codeOnly :: Code -> Code +-- Emit any code from the inner thing into the outer thing +-- Do not affect anything else in the outer state +-- Used in almost-circular code to prevent false loop dependencies +codeOnly body_code + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state, + cgs_stk_usg = cgs_stk_usg state, + cgs_hp_usg = cgs_hp_usg state } + ((), fork_state_out) = doFCode body_code info_down fork_state_in + ; setState $ state `addCodeBlocksFrom` fork_state_out } +\end{code} + +@forkAlts@ $bs~d$ takes fcodes $bs$ for the branches of a @case@, and +an fcode for the default case $d$, and compiles each in the current +environment. The current environment is passed on unmodified, except +that + - the worst stack high-water mark is incorporated + - the virtual Hp is moved on to the worst virtual Hp for the branches + +\begin{code} +forkAlts :: [FCode a] -> FCode [a] + +forkAlts branch_fcodes + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let compile us branch + = (us2, doFCode branch info_down branch_state) + where + (us1,us2) = splitUniqSupply us + branch_state = (initCgState us1) { + cgs_binds = cgs_binds state, + cgs_stk_usg = cgs_stk_usg state, + cgs_hp_usg = cgs_hp_usg state } + + (_us, results) = mapAccumL compile us branch_fcodes + (branch_results, branch_out_states) = unzip results + ; setState $ foldl stateIncUsage state branch_out_states + -- NB foldl. state is the *left* argument to stateIncUsage + ; return branch_results } +\end{code} + +@forkEval@ takes two blocks of code. + + - The first meddles with the environment to set it up as expected by + the alternatives of a @case@ which does an eval (or gc-possible primop). + - The second block is the code for the alternatives. + (plus info for semi-tagging purposes) + +@forkEval@ picks up the virtual stack pointer and returns a suitable +@EndOfBlockInfo@ for the caller to use, together with whatever value +is returned by the second block. + +It uses @initEnvForAlternatives@ to initialise the environment, and +@stateIncUsageAlt@ to incorporate usage; the latter ignores the heap +usage. + +\begin{code} +forkEval :: EndOfBlockInfo -- For the body + -> Code -- Code to set environment + -> FCode Sequel -- Semi-tagging info to store + -> FCode EndOfBlockInfo -- The new end of block info + +forkEval body_eob_info env_code body_code + = do { (v, sequel) <- forkEvalHelp body_eob_info env_code body_code + ; returnFC (EndOfBlockInfo v sequel) } + +forkEvalHelp :: EndOfBlockInfo -- For the body + -> Code -- Code to set environment + -> FCode a -- The code to do after the eval + -> FCode (VirtualSpOffset, -- Sp + a) -- Result of the FCode + -- A disturbingly complicated function +forkEvalHelp body_eob_info env_code body_code + = do { info_down <- getInfoDown + ; us <- newUniqSupply + ; state <- getState + ; let { info_down_for_body = info_down {cgd_eob = body_eob_info} + ; (_, env_state) = doFCode env_code info_down_for_body + (state {cgs_uniqs = us}) + ; state_for_body = (initCgState (cgs_uniqs env_state)) + { cgs_binds = binds_for_body, + cgs_stk_usg = stk_usg_for_body } + ; binds_for_body = nukeVolatileBinds (cgs_binds env_state) + ; stk_usg_from_env = cgs_stk_usg env_state + ; virtSp_from_env = virtSp stk_usg_from_env + ; stk_usg_for_body = stk_usg_from_env {realSp = virtSp_from_env, + hwSp = virtSp_from_env} + ; (value_returned, state_at_end_return) + = doFCode body_code info_down_for_body state_for_body + } + ; ASSERT( isNilOL (cgs_stmts state_at_end_return) ) + -- The code coming back should consist only of nested declarations, + -- notably of the return vector! + setState $ state `stateIncUsageEval` state_at_end_return + ; return (virtSp_from_env, value_returned) } + + +-- ---------------------------------------------------------------------------- +-- Combinators for emitting code + +nopC :: Code +nopC = return () + +whenC :: Bool -> Code -> Code +whenC True code = code +whenC False code = nopC + +stmtC :: CmmStmt -> Code +stmtC stmt = emitCgStmt (CgStmt stmt) + +labelC :: BlockId -> Code +labelC id = emitCgStmt (CgLabel id) + +newLabelC :: FCode BlockId +newLabelC = do { id <- newUnique; return (BlockId id) } + +checkedAbsC :: CmmStmt -> Code +-- Emit code, eliminating no-ops +checkedAbsC stmt = emitStmts (if isNopStmt stmt then nilOL + else unitOL stmt) + +stmtsC :: [CmmStmt] -> Code +stmtsC stmts = emitStmts (toOL stmts) + +-- Emit code; no no-op checking +emitStmts :: CmmStmts -> Code +emitStmts stmts = emitCgStmts (fmap CgStmt stmts) + +-- forkLabelledCode is for emitting a chunk of code with a label, outside +-- of the current instruction stream. +forkLabelledCode :: Code -> FCode BlockId +forkLabelledCode code = getCgStmts code >>= forkCgStmts + +emitCgStmt :: CgStmt -> Code +emitCgStmt stmt + = do { state <- getState + ; setState $ state { cgs_stmts = cgs_stmts state `snocOL` stmt } + } + +emitData :: Section -> [CmmStatic] -> Code +emitData sect lits + = do { state <- getState + ; setState $ state { cgs_tops = cgs_tops state `snocOL` data_block } } + where + data_block = CmmData sect lits + +emitProc :: [CmmLit] -> CLabel -> [LocalReg] -> [CmmBasicBlock] -> Code +emitProc lits lbl args blocks + = do { let proc_block = CmmProc (map CmmStaticLit lits) lbl args blocks + ; state <- getState + ; setState $ state { cgs_tops = cgs_tops state `snocOL` proc_block } } + +emitSimpleProc :: CLabel -> Code -> Code +-- Emit a procedure whose body is the specified code; no info table +emitSimpleProc lbl code + = do { stmts <- getCgStmts code + ; blks <- cgStmtsToBlocks stmts + ; emitProc [] lbl [] blks } + +getCmm :: Code -> FCode Cmm +-- Get all the CmmTops (there should be no stmts) +getCmm code + = do { state1 <- getState + ; ((), state2) <- withState code (state1 { cgs_tops = nilOL }) + ; setState $ state2 { cgs_tops = cgs_tops state1 } + ; return (Cmm (fromOL (cgs_tops state2))) } + +-- ---------------------------------------------------------------------------- +-- CgStmts + +-- These functions deal in terms of CgStmts, which is an abstract type +-- representing the code in the current proc. + + +-- emit CgStmts into the current instruction stream +emitCgStmts :: CgStmts -> Code +emitCgStmts stmts + = do { state <- getState + ; setState $ state { cgs_stmts = cgs_stmts state `appOL` stmts } } + +-- emit CgStmts outside the current instruction stream, and return a label +forkCgStmts :: CgStmts -> FCode BlockId +forkCgStmts stmts + = do { id <- newLabelC + ; emitCgStmt (CgFork id stmts) + ; return id + } + +-- turn CgStmts into [CmmBasicBlock], for making a new proc. +cgStmtsToBlocks :: CgStmts -> FCode [CmmBasicBlock] +cgStmtsToBlocks stmts + = do { id <- newLabelC + ; return (flattenCgStmts id stmts) + } + +-- collect the code emitted by an FCode computation +getCgStmts' :: FCode a -> FCode (a, CgStmts) +getCgStmts' fcode + = do { state1 <- getState + ; (a, state2) <- withState fcode (state1 { cgs_stmts = nilOL }) + ; setState $ state2 { cgs_stmts = cgs_stmts state1 } + ; return (a, cgs_stmts state2) } + +getCgStmts :: FCode a -> FCode CgStmts +getCgStmts fcode = do { (_,stmts) <- getCgStmts' fcode; return stmts } + +-- Simple ways to construct CgStmts: +noCgStmts :: CgStmts +noCgStmts = nilOL + +oneCgStmt :: CmmStmt -> CgStmts +oneCgStmt stmt = unitOL (CgStmt stmt) + +consCgStmt :: CmmStmt -> CgStmts -> CgStmts +consCgStmt stmt stmts = CgStmt stmt `consOL` stmts + +-- ---------------------------------------------------------------------------- +-- Get the current module name + +moduleName :: FCode Module +moduleName = do { info <- getInfoDown; return (cgd_mod info) } + +-- ---------------------------------------------------------------------------- +-- Get/set the end-of-block info + +setEndOfBlockInfo :: EndOfBlockInfo -> Code -> Code +setEndOfBlockInfo eob_info code = do + info <- getInfoDown + withInfoDown code (info {cgd_eob = eob_info}) + +getEndOfBlockInfo :: FCode EndOfBlockInfo +getEndOfBlockInfo = do + info <- getInfoDown + return (cgd_eob info) + +-- ---------------------------------------------------------------------------- +-- Get/set the current SRT label + +-- There is just one SRT for each top level binding; all the nested +-- bindings use sub-sections of this SRT. The label is passed down to +-- the nested bindings via the monad. + +getSRTLabel :: FCode CLabel -- Used only by cgPanic +getSRTLabel = do info <- getInfoDown + return (cgd_srt info) + +setSRTLabel :: CLabel -> FCode a -> FCode a +setSRTLabel srt_lbl code + = do info <- getInfoDown + withInfoDown code (info { cgd_srt = srt_lbl}) + +-- ---------------------------------------------------------------------------- +-- Get/set the current ticky counter label + +getTickyCtrLabel :: FCode CLabel +getTickyCtrLabel = do + info <- getInfoDown + return (cgd_ticky info) + +setTickyCtrLabel :: CLabel -> Code -> Code +setTickyCtrLabel ticky code = do + info <- getInfoDown + withInfoDown code (info {cgd_ticky = ticky}) +\end{code} diff --git a/compiler/codeGen/CgParallel.hs b/compiler/codeGen/CgParallel.hs new file mode 100644 index 0000000000..b826a33cba --- /dev/null +++ b/compiler/codeGen/CgParallel.hs @@ -0,0 +1,90 @@ +-- Code generation relaed to GpH +-- (a) parallel +-- (b) GranSim + +module CgParallel( + staticGranHdr,staticParHdr, + granFetchAndReschedule, granYield, + doGranAllocate + ) where + +import CgMonad +import CgCallConv ( mkRegLiveness ) +import Id ( Id ) +import Cmm ( CmmLit, GlobalReg(..), node, CmmExpr ) +import StaticFlags ( opt_GranMacros ) +import Outputable + +staticParHdr :: [CmmLit] +-- Parallel header words in a static closure +staticParHdr = [] + +-------------------------------------------------------- +-- GranSim stuff +-------------------------------------------------------- + +staticGranHdr :: [CmmLit] +-- Gransim header words in a static closure +staticGranHdr = [] + +doGranAllocate :: CmmExpr -> Code +-- macro DO_GRAN_ALLOCATE +doGranAllocate hp + | not opt_GranMacros = nopC + | otherwise = panic "doGranAllocate" + + + +------------------------- +granFetchAndReschedule :: [(Id,GlobalReg)] -- Live registers + -> Bool -- Node reqd? + -> Code +-- Emit code for simulating a fetch and then reschedule. +granFetchAndReschedule regs node_reqd + | opt_GranMacros && (node `elem` map snd regs || node_reqd) + = do { fetch + ; reschedule liveness node_reqd } + | otherwise + = nopC + where + liveness = mkRegLiveness regs 0 0 + +fetch = panic "granFetch" + -- Was: absC (CMacroStmt GRAN_FETCH []) + --HWL: generate GRAN_FETCH macro for GrAnSim + -- currently GRAN_FETCH and GRAN_FETCH_AND_RESCHEDULE are miai + +reschedule liveness node_reqd = panic "granReschedule" + -- Was: absC (CMacroStmt GRAN_RESCHEDULE [ + -- mkIntCLit (I# (word2Int# liveness_mask)), + -- mkIntCLit (if node_reqd then 1 else 0)]) + + +------------------------- +-- The @GRAN_YIELD@ macro is taken from JSM's code for Concurrent Haskell. It +-- allows to context-switch at places where @node@ is not alive (it uses the +-- @Continue@ rather than the @EnterNodeCode@ function in the RTS). We emit +-- this kind of macro at the beginning of the following kinds of basic bocks: +-- \begin{itemize} +-- \item Slow entry code where node is not alive (see @CgClosure.lhs@). Normally +-- we use @fetchAndReschedule@ at a slow entry code. +-- \item Fast entry code (see @CgClosure.lhs@). +-- \item Alternatives in case expressions (@CLabelledCode@ structures), provided +-- that they are not inlined (see @CgCases.lhs@). These alternatives will +-- be turned into separate functions. + +granYield :: [(Id,GlobalReg)] -- Live registers + -> Bool -- Node reqd? + -> Code + +granYield regs node_reqd + | opt_GranMacros && node_reqd = yield liveness + | otherwise = nopC + where + liveness = mkRegLiveness regs 0 0 + +yield liveness = panic "granYield" + -- Was : absC (CMacroStmt GRAN_YIELD + -- [mkIntCLit (I# (word2Int# liveness_mask))]) + + diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs new file mode 100644 index 0000000000..bc7c9140ed --- /dev/null +++ b/compiler/codeGen/CgPrimOp.hs @@ -0,0 +1,584 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for PrimOps. +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgPrimOp ( + cgPrimOp + ) where + +#include "HsVersions.h" + +import ForeignCall ( CCallConv(CCallConv) ) +import StgSyn ( StgLiveVars, StgArg ) +import CgForeignCall ( emitForeignCall' ) +import CgBindery ( getVolatileRegs, getArgAmodes ) +import CgMonad +import CgInfoTbls ( getConstrTag ) +import CgUtils ( cmmOffsetW, cmmOffsetB, cmmLoadIndexW ) +import ForeignCall +import Cmm +import CLabel ( mkMAP_FROZEN_infoLabel, mkMAP_DIRTY_infoLabel, + mkDirty_MUT_VAR_Label, mkRtsCodeLabel ) +import CmmUtils +import MachOp +import SMRep +import PrimOp ( PrimOp(..) ) +import SMRep ( tablesNextToCode ) +import Constants ( wORD_SIZE, wORD_SIZE_IN_BITS ) +import StaticFlags ( opt_Parallel ) +import Outputable + +-- --------------------------------------------------------------------------- +-- Code generation for PrimOps + +cgPrimOp :: [CmmReg] -- where to put the results + -> PrimOp -- the op + -> [StgArg] -- arguments + -> StgLiveVars -- live vars, in case we need to save them + -> Code + +cgPrimOp results op args live + = do arg_exprs <- getArgAmodes args + let non_void_args = [ e | (r,e) <- arg_exprs, nonVoidArg r ] + emitPrimOp results op non_void_args live + + +emitPrimOp :: [CmmReg] -- where to put the results + -> PrimOp -- the op + -> [CmmExpr] -- arguments + -> StgLiveVars -- live vars, in case we need to save them + -> Code + +-- First we handle various awkward cases specially. The remaining +-- easy cases are then handled by translateOp, defined below. + +emitPrimOp [res_r,res_c] IntAddCOp [aa,bb] live +{- + With some bit-twiddling, we can define int{Add,Sub}Czh portably in + C, and without needing any comparisons. This may not be the + fastest way to do it - if you have better code, please send it! --SDM + + Return : r = a + b, c = 0 if no overflow, 1 on overflow. + + We currently don't make use of the r value if c is != 0 (i.e. + overflow), we just convert to big integers and try again. This + could be improved by making r and c the correct values for + plugging into a new J#. + + { r = ((I_)(a)) + ((I_)(b)); \ + c = ((StgWord)(~(((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \ + >> (BITS_IN (I_) - 1); \ + } + Wading through the mass of bracketry, it seems to reduce to: + c = ( (~(a^b)) & (a^r) ) >>unsigned (BITS_IN(I_)-1) + +-} + = stmtsC [ + CmmAssign res_r (CmmMachOp mo_wordAdd [aa,bb]), + CmmAssign res_c $ + CmmMachOp mo_wordUShr [ + CmmMachOp mo_wordAnd [ + CmmMachOp mo_wordNot [CmmMachOp mo_wordXor [aa,bb]], + CmmMachOp mo_wordXor [aa, CmmReg res_r] + ], + CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1)) + ] + ] + + +emitPrimOp [res_r,res_c] IntSubCOp [aa,bb] live +{- Similarly: + #define subIntCzh(r,c,a,b) \ + { r = ((I_)(a)) - ((I_)(b)); \ + c = ((StgWord)((((I_)(a))^((I_)(b))) & (((I_)(a))^r))) \ + >> (BITS_IN (I_) - 1); \ + } + + c = ((a^b) & (a^r)) >>unsigned (BITS_IN(I_)-1) +-} + = stmtsC [ + CmmAssign res_r (CmmMachOp mo_wordSub [aa,bb]), + CmmAssign res_c $ + CmmMachOp mo_wordUShr [ + CmmMachOp mo_wordAnd [ + CmmMachOp mo_wordXor [aa,bb], + CmmMachOp mo_wordXor [aa, CmmReg res_r] + ], + CmmLit (mkIntCLit (wORD_SIZE_IN_BITS - 1)) + ] + ] + + +emitPrimOp [res] ParOp [arg] live + = do + -- for now, just implement this in a C function + -- later, we might want to inline it. + vols <- getVolatileRegs live + emitForeignCall' PlayRisky + [(res,NoHint)] + (CmmForeignCall newspark CCallConv) + [(CmmReg (CmmGlobal BaseReg), PtrHint), (arg,PtrHint)] + (Just vols) + where + newspark = CmmLit (CmmLabel (mkRtsCodeLabel SLIT("newSpark"))) + +emitPrimOp [res] ReadMutVarOp [mutv] live + = stmtC (CmmAssign res (cmmLoadIndexW mutv fixedHdrSize)) + +emitPrimOp [] WriteMutVarOp [mutv,var] live + = do + stmtC (CmmStore (cmmOffsetW mutv fixedHdrSize) var) + vols <- getVolatileRegs live + emitForeignCall' PlayRisky + [{-no results-}] + (CmmForeignCall (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) + CCallConv) + [(CmmReg (CmmGlobal BaseReg), PtrHint), (mutv,PtrHint)] + (Just vols) + +-- #define sizzeofByteArrayzh(r,a) \ +-- r = (((StgArrWords *)(a))->words * sizeof(W_)) +emitPrimOp [res] SizeofByteArrayOp [arg] live + = stmtC $ + CmmAssign res (CmmMachOp mo_wordMul [ + cmmLoadIndexW arg fixedHdrSize, + CmmLit (mkIntCLit wORD_SIZE) + ]) + +-- #define sizzeofMutableByteArrayzh(r,a) \ +-- r = (((StgArrWords *)(a))->words * sizeof(W_)) +emitPrimOp [res] SizeofMutableByteArrayOp [arg] live + = emitPrimOp [res] SizeofByteArrayOp [arg] live + + +-- #define touchzh(o) /* nothing */ +emitPrimOp [] TouchOp [arg] live + = nopC + +-- #define byteArrayContentszh(r,a) r = BYTE_ARR_CTS(a) +emitPrimOp [res] ByteArrayContents_Char [arg] live + = stmtC (CmmAssign res (cmmOffsetB arg arrWordsHdrSize)) + +-- #define stableNameToIntzh(r,s) (r = ((StgStableName *)s)->sn) +emitPrimOp [res] StableNameToIntOp [arg] live + = stmtC (CmmAssign res (cmmLoadIndexW arg fixedHdrSize)) + +-- #define eqStableNamezh(r,sn1,sn2) \ +-- (r = (((StgStableName *)sn1)->sn == ((StgStableName *)sn2)->sn)) +emitPrimOp [res] EqStableNameOp [arg1,arg2] live + = stmtC (CmmAssign res (CmmMachOp mo_wordEq [ + cmmLoadIndexW arg1 fixedHdrSize, + cmmLoadIndexW arg2 fixedHdrSize + ])) + + +emitPrimOp [res] ReallyUnsafePtrEqualityOp [arg1,arg2] live + = stmtC (CmmAssign res (CmmMachOp mo_wordEq [arg1,arg2])) + +-- #define addrToHValuezh(r,a) r=(P_)a +emitPrimOp [res] AddrToHValueOp [arg] live + = stmtC (CmmAssign res arg) + +-- #define dataToTagzh(r,a) r=(GET_TAG(((StgClosure *)a)->header.info)) +emitPrimOp [res] DataToTagOp [arg] live + = stmtC (CmmAssign res (getConstrTag arg)) + +{- Freezing arrays-of-ptrs requires changing an info table, for the + benefit of the generational collector. It needs to scavenge mutable + objects, even if they are in old space. When they become immutable, + they can be removed from this scavenge list. -} + +-- #define unsafeFreezzeArrayzh(r,a) +-- { +-- SET_INFO((StgClosure *)a,&stg_MUT_ARR_PTRS_FROZEN0_info); +-- r = a; +-- } +emitPrimOp [res] UnsafeFreezeArrayOp [arg] live + = stmtsC [ setInfo arg (CmmLit (CmmLabel mkMAP_FROZEN_infoLabel)), + CmmAssign res arg ] + +-- #define unsafeFreezzeByteArrayzh(r,a) r=(a) +emitPrimOp [res] UnsafeFreezeByteArrayOp [arg] live + = stmtC (CmmAssign res arg) + +-- Reading/writing pointer arrays + +emitPrimOp [r] ReadArrayOp [obj,ix] live = doReadPtrArrayOp r obj ix +emitPrimOp [r] IndexArrayOp [obj,ix] live = doReadPtrArrayOp r obj ix +emitPrimOp [] WriteArrayOp [obj,ix,v] live = doWritePtrArrayOp obj ix v + +-- IndexXXXoffAddr + +emitPrimOp res IndexOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res IndexOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res IndexOffAddrOp_Int args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res IndexOffAddrOp_Word args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res IndexOffAddrOp_Addr args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res IndexOffAddrOp_Float args live = doIndexOffAddrOp Nothing F32 res args +emitPrimOp res IndexOffAddrOp_Double args live = doIndexOffAddrOp Nothing F64 res args +emitPrimOp res IndexOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res IndexOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8 res args +emitPrimOp res IndexOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args +emitPrimOp res IndexOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args +emitPrimOp res IndexOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing I64 res args +emitPrimOp res IndexOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res IndexOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args +emitPrimOp res IndexOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res IndexOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing I64 res args + +-- ReadXXXoffAddr, which are identical, for our purposes, to IndexXXXoffAddr. + +emitPrimOp res ReadOffAddrOp_Char args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res ReadOffAddrOp_WideChar args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res ReadOffAddrOp_Int args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res ReadOffAddrOp_Word args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res ReadOffAddrOp_Addr args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res ReadOffAddrOp_Float args live = doIndexOffAddrOp Nothing F32 res args +emitPrimOp res ReadOffAddrOp_Double args live = doIndexOffAddrOp Nothing F64 res args +emitPrimOp res ReadOffAddrOp_StablePtr args live = doIndexOffAddrOp Nothing wordRep res args +emitPrimOp res ReadOffAddrOp_Int8 args live = doIndexOffAddrOp (Just mo_s_8ToWord) I8 res args +emitPrimOp res ReadOffAddrOp_Int16 args live = doIndexOffAddrOp (Just mo_s_16ToWord) I16 res args +emitPrimOp res ReadOffAddrOp_Int32 args live = doIndexOffAddrOp (Just mo_s_32ToWord) I32 res args +emitPrimOp res ReadOffAddrOp_Int64 args live = doIndexOffAddrOp Nothing I64 res args +emitPrimOp res ReadOffAddrOp_Word8 args live = doIndexOffAddrOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res ReadOffAddrOp_Word16 args live = doIndexOffAddrOp (Just mo_u_16ToWord) I16 res args +emitPrimOp res ReadOffAddrOp_Word32 args live = doIndexOffAddrOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res ReadOffAddrOp_Word64 args live = doIndexOffAddrOp Nothing I64 res args + +-- IndexXXXArray + +emitPrimOp res IndexByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res IndexByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res IndexByteArrayOp_Int args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res IndexByteArrayOp_Word args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res IndexByteArrayOp_Addr args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res IndexByteArrayOp_Float args live = doIndexByteArrayOp Nothing F32 res args +emitPrimOp res IndexByteArrayOp_Double args live = doIndexByteArrayOp Nothing F64 res args +emitPrimOp res IndexByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res IndexByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8 res args +emitPrimOp res IndexByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16 res args +emitPrimOp res IndexByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32 res args +emitPrimOp res IndexByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing I64 res args +emitPrimOp res IndexByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res IndexByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16 res args +emitPrimOp res IndexByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res IndexByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing I64 res args + +-- ReadXXXArray, identical to IndexXXXArray. + +emitPrimOp res ReadByteArrayOp_Char args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res ReadByteArrayOp_WideChar args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res ReadByteArrayOp_Int args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res ReadByteArrayOp_Word args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res ReadByteArrayOp_Addr args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res ReadByteArrayOp_Float args live = doIndexByteArrayOp Nothing F32 res args +emitPrimOp res ReadByteArrayOp_Double args live = doIndexByteArrayOp Nothing F64 res args +emitPrimOp res ReadByteArrayOp_StablePtr args live = doIndexByteArrayOp Nothing wordRep res args +emitPrimOp res ReadByteArrayOp_Int8 args live = doIndexByteArrayOp (Just mo_s_8ToWord) I8 res args +emitPrimOp res ReadByteArrayOp_Int16 args live = doIndexByteArrayOp (Just mo_s_16ToWord) I16 res args +emitPrimOp res ReadByteArrayOp_Int32 args live = doIndexByteArrayOp (Just mo_s_32ToWord) I32 res args +emitPrimOp res ReadByteArrayOp_Int64 args live = doIndexByteArrayOp Nothing I64 res args +emitPrimOp res ReadByteArrayOp_Word8 args live = doIndexByteArrayOp (Just mo_u_8ToWord) I8 res args +emitPrimOp res ReadByteArrayOp_Word16 args live = doIndexByteArrayOp (Just mo_u_16ToWord) I16 res args +emitPrimOp res ReadByteArrayOp_Word32 args live = doIndexByteArrayOp (Just mo_u_32ToWord) I32 res args +emitPrimOp res ReadByteArrayOp_Word64 args live = doIndexByteArrayOp Nothing I64 res args + +-- WriteXXXoffAddr + +emitPrimOp res WriteOffAddrOp_Char args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteOffAddrOp_WideChar args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteOffAddrOp_Int args live = doWriteOffAddrOp Nothing wordRep res args +emitPrimOp res WriteOffAddrOp_Word args live = doWriteOffAddrOp Nothing wordRep res args +emitPrimOp res WriteOffAddrOp_Addr args live = doWriteOffAddrOp Nothing wordRep res args +emitPrimOp res WriteOffAddrOp_Float args live = doWriteOffAddrOp Nothing F32 res args +emitPrimOp res WriteOffAddrOp_Double args live = doWriteOffAddrOp Nothing F64 res args +emitPrimOp res WriteOffAddrOp_StablePtr args live = doWriteOffAddrOp Nothing wordRep res args +emitPrimOp res WriteOffAddrOp_Int8 args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteOffAddrOp_Int16 args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args +emitPrimOp res WriteOffAddrOp_Int32 args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteOffAddrOp_Int64 args live = doWriteOffAddrOp Nothing I64 res args +emitPrimOp res WriteOffAddrOp_Word8 args live = doWriteOffAddrOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteOffAddrOp_Word16 args live = doWriteOffAddrOp (Just mo_WordTo16) I16 res args +emitPrimOp res WriteOffAddrOp_Word32 args live = doWriteOffAddrOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteOffAddrOp_Word64 args live = doWriteOffAddrOp Nothing I64 res args + +-- WriteXXXArray + +emitPrimOp res WriteByteArrayOp_Char args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteByteArrayOp_WideChar args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteByteArrayOp_Int args live = doWriteByteArrayOp Nothing wordRep res args +emitPrimOp res WriteByteArrayOp_Word args live = doWriteByteArrayOp Nothing wordRep res args +emitPrimOp res WriteByteArrayOp_Addr args live = doWriteByteArrayOp Nothing wordRep res args +emitPrimOp res WriteByteArrayOp_Float args live = doWriteByteArrayOp Nothing F32 res args +emitPrimOp res WriteByteArrayOp_Double args live = doWriteByteArrayOp Nothing F64 res args +emitPrimOp res WriteByteArrayOp_StablePtr args live = doWriteByteArrayOp Nothing wordRep res args +emitPrimOp res WriteByteArrayOp_Int8 args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteByteArrayOp_Int16 args live = doWriteByteArrayOp (Just mo_WordTo16) I16 res args +emitPrimOp res WriteByteArrayOp_Int32 args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteByteArrayOp_Int64 args live = doWriteByteArrayOp Nothing I64 res args +emitPrimOp res WriteByteArrayOp_Word8 args live = doWriteByteArrayOp (Just mo_WordTo8) I8 res args +emitPrimOp res WriteByteArrayOp_Word16 args live = doWriteByteArrayOp (Just mo_WordTo16) I16 res args +emitPrimOp res WriteByteArrayOp_Word32 args live = doWriteByteArrayOp (Just mo_WordTo32) I32 res args +emitPrimOp res WriteByteArrayOp_Word64 args live = doWriteByteArrayOp Nothing I64 res args + + +-- The rest just translate straightforwardly +emitPrimOp [res] op [arg] live + | nopOp op + = stmtC (CmmAssign res arg) + + | Just (mop,rep) <- narrowOp op + = stmtC (CmmAssign res (CmmMachOp (mop rep wordRep) [ + CmmMachOp (mop wordRep rep) [arg]])) + +emitPrimOp [res] op args live + | Just prim <- callishOp op + = do vols <- getVolatileRegs live + emitForeignCall' PlayRisky + [(res,NoHint)] + (CmmPrim prim) + [(a,NoHint) | a<-args] -- ToDo: hints? + (Just vols) + + | Just mop <- translateOp op + = let stmt = CmmAssign res (CmmMachOp mop args) in + stmtC stmt + +emitPrimOp _ op _ _ + = pprPanic "emitPrimOp: can't translate PrimOp" (ppr op) + + +-- These PrimOps are NOPs in Cmm + +nopOp Int2WordOp = True +nopOp Word2IntOp = True +nopOp Int2AddrOp = True +nopOp Addr2IntOp = True +nopOp ChrOp = True -- Int# and Char# are rep'd the same +nopOp OrdOp = True +nopOp _ = False + +-- These PrimOps turn into double casts + +narrowOp Narrow8IntOp = Just (MO_S_Conv, I8) +narrowOp Narrow16IntOp = Just (MO_S_Conv, I16) +narrowOp Narrow32IntOp = Just (MO_S_Conv, I32) +narrowOp Narrow8WordOp = Just (MO_U_Conv, I8) +narrowOp Narrow16WordOp = Just (MO_U_Conv, I16) +narrowOp Narrow32WordOp = Just (MO_U_Conv, I32) +narrowOp _ = Nothing + +-- Native word signless ops + +translateOp IntAddOp = Just mo_wordAdd +translateOp IntSubOp = Just mo_wordSub +translateOp WordAddOp = Just mo_wordAdd +translateOp WordSubOp = Just mo_wordSub +translateOp AddrAddOp = Just mo_wordAdd +translateOp AddrSubOp = Just mo_wordSub + +translateOp IntEqOp = Just mo_wordEq +translateOp IntNeOp = Just mo_wordNe +translateOp WordEqOp = Just mo_wordEq +translateOp WordNeOp = Just mo_wordNe +translateOp AddrEqOp = Just mo_wordEq +translateOp AddrNeOp = Just mo_wordNe + +translateOp AndOp = Just mo_wordAnd +translateOp OrOp = Just mo_wordOr +translateOp XorOp = Just mo_wordXor +translateOp NotOp = Just mo_wordNot +translateOp SllOp = Just mo_wordShl +translateOp SrlOp = Just mo_wordUShr + +translateOp AddrRemOp = Just mo_wordURem + +-- Native word signed ops + +translateOp IntMulOp = Just mo_wordMul +translateOp IntMulMayOfloOp = Just (MO_S_MulMayOflo wordRep) +translateOp IntQuotOp = Just mo_wordSQuot +translateOp IntRemOp = Just mo_wordSRem +translateOp IntNegOp = Just mo_wordSNeg + + +translateOp IntGeOp = Just mo_wordSGe +translateOp IntLeOp = Just mo_wordSLe +translateOp IntGtOp = Just mo_wordSGt +translateOp IntLtOp = Just mo_wordSLt + +translateOp ISllOp = Just mo_wordShl +translateOp ISraOp = Just mo_wordSShr +translateOp ISrlOp = Just mo_wordUShr + +-- Native word unsigned ops + +translateOp WordGeOp = Just mo_wordUGe +translateOp WordLeOp = Just mo_wordULe +translateOp WordGtOp = Just mo_wordUGt +translateOp WordLtOp = Just mo_wordULt + +translateOp WordMulOp = Just mo_wordMul +translateOp WordQuotOp = Just mo_wordUQuot +translateOp WordRemOp = Just mo_wordURem + +translateOp AddrGeOp = Just mo_wordUGe +translateOp AddrLeOp = Just mo_wordULe +translateOp AddrGtOp = Just mo_wordUGt +translateOp AddrLtOp = Just mo_wordULt + +-- Char# ops + +translateOp CharEqOp = Just (MO_Eq wordRep) +translateOp CharNeOp = Just (MO_Ne wordRep) +translateOp CharGeOp = Just (MO_U_Ge wordRep) +translateOp CharLeOp = Just (MO_U_Le wordRep) +translateOp CharGtOp = Just (MO_U_Gt wordRep) +translateOp CharLtOp = Just (MO_U_Lt wordRep) + +-- Double ops + +translateOp DoubleEqOp = Just (MO_Eq F64) +translateOp DoubleNeOp = Just (MO_Ne F64) +translateOp DoubleGeOp = Just (MO_S_Ge F64) +translateOp DoubleLeOp = Just (MO_S_Le F64) +translateOp DoubleGtOp = Just (MO_S_Gt F64) +translateOp DoubleLtOp = Just (MO_S_Lt F64) + +translateOp DoubleAddOp = Just (MO_Add F64) +translateOp DoubleSubOp = Just (MO_Sub F64) +translateOp DoubleMulOp = Just (MO_Mul F64) +translateOp DoubleDivOp = Just (MO_S_Quot F64) +translateOp DoubleNegOp = Just (MO_S_Neg F64) + +-- Float ops + +translateOp FloatEqOp = Just (MO_Eq F32) +translateOp FloatNeOp = Just (MO_Ne F32) +translateOp FloatGeOp = Just (MO_S_Ge F32) +translateOp FloatLeOp = Just (MO_S_Le F32) +translateOp FloatGtOp = Just (MO_S_Gt F32) +translateOp FloatLtOp = Just (MO_S_Lt F32) + +translateOp FloatAddOp = Just (MO_Add F32) +translateOp FloatSubOp = Just (MO_Sub F32) +translateOp FloatMulOp = Just (MO_Mul F32) +translateOp FloatDivOp = Just (MO_S_Quot F32) +translateOp FloatNegOp = Just (MO_S_Neg F32) + +-- Conversions + +translateOp Int2DoubleOp = Just (MO_S_Conv wordRep F64) +translateOp Double2IntOp = Just (MO_S_Conv F64 wordRep) + +translateOp Int2FloatOp = Just (MO_S_Conv wordRep F32) +translateOp Float2IntOp = Just (MO_S_Conv F32 wordRep) + +translateOp Float2DoubleOp = Just (MO_S_Conv F32 F64) +translateOp Double2FloatOp = Just (MO_S_Conv F64 F32) + +-- Word comparisons masquerading as more exotic things. + +translateOp SameMutVarOp = Just mo_wordEq +translateOp SameMVarOp = Just mo_wordEq +translateOp SameMutableArrayOp = Just mo_wordEq +translateOp SameMutableByteArrayOp = Just mo_wordEq +translateOp SameTVarOp = Just mo_wordEq +translateOp EqStablePtrOp = Just mo_wordEq + +translateOp _ = Nothing + +-- These primops are implemented by CallishMachOps, because they sometimes +-- turn into foreign calls depending on the backend. + +callishOp DoublePowerOp = Just MO_F64_Pwr +callishOp DoubleSinOp = Just MO_F64_Sin +callishOp DoubleCosOp = Just MO_F64_Cos +callishOp DoubleTanOp = Just MO_F64_Tan +callishOp DoubleSinhOp = Just MO_F64_Sinh +callishOp DoubleCoshOp = Just MO_F64_Cosh +callishOp DoubleTanhOp = Just MO_F64_Tanh +callishOp DoubleAsinOp = Just MO_F64_Asin +callishOp DoubleAcosOp = Just MO_F64_Acos +callishOp DoubleAtanOp = Just MO_F64_Atan +callishOp DoubleLogOp = Just MO_F64_Log +callishOp DoubleExpOp = Just MO_F64_Exp +callishOp DoubleSqrtOp = Just MO_F64_Sqrt + +callishOp FloatPowerOp = Just MO_F32_Pwr +callishOp FloatSinOp = Just MO_F32_Sin +callishOp FloatCosOp = Just MO_F32_Cos +callishOp FloatTanOp = Just MO_F32_Tan +callishOp FloatSinhOp = Just MO_F32_Sinh +callishOp FloatCoshOp = Just MO_F32_Cosh +callishOp FloatTanhOp = Just MO_F32_Tanh +callishOp FloatAsinOp = Just MO_F32_Asin +callishOp FloatAcosOp = Just MO_F32_Acos +callishOp FloatAtanOp = Just MO_F32_Atan +callishOp FloatLogOp = Just MO_F32_Log +callishOp FloatExpOp = Just MO_F32_Exp +callishOp FloatSqrtOp = Just MO_F32_Sqrt + +callishOp _ = Nothing + +------------------------------------------------------------------------------ +-- Helpers for translating various minor variants of array indexing. + +doIndexOffAddrOp maybe_post_read_cast rep [res] [addr,idx] + = mkBasicIndexedRead 0 maybe_post_read_cast rep res addr idx +doIndexOffAddrOp _ _ _ _ + = panic "CgPrimOp: doIndexOffAddrOp" + +doIndexByteArrayOp maybe_post_read_cast rep [res] [addr,idx] + = mkBasicIndexedRead arrWordsHdrSize maybe_post_read_cast rep res addr idx +doIndexByteArrayOp _ _ _ _ + = panic "CgPrimOp: doIndexByteArrayOp" + +doReadPtrArrayOp res addr idx + = mkBasicIndexedRead arrPtrsHdrSize Nothing wordRep res addr idx + + +doWriteOffAddrOp maybe_pre_write_cast rep [] [addr,idx,val] + = mkBasicIndexedWrite 0 maybe_pre_write_cast rep addr idx val +doWriteOffAddrOp _ _ _ _ + = panic "CgPrimOp: doWriteOffAddrOp" + +doWriteByteArrayOp maybe_pre_write_cast rep [] [addr,idx,val] + = mkBasicIndexedWrite arrWordsHdrSize maybe_pre_write_cast rep addr idx val +doWriteByteArrayOp _ _ _ _ + = panic "CgPrimOp: doWriteByteArrayOp" + +doWritePtrArrayOp addr idx val + = do stmtC (setInfo addr (CmmLit (CmmLabel mkMAP_DIRTY_infoLabel))) + mkBasicIndexedWrite arrPtrsHdrSize Nothing wordRep addr idx val + + +mkBasicIndexedRead off Nothing read_rep res base idx + = stmtC (CmmAssign res (cmmLoadIndexOffExpr off read_rep base idx)) +mkBasicIndexedRead off (Just cast) read_rep res base idx + = stmtC (CmmAssign res (CmmMachOp cast [ + cmmLoadIndexOffExpr off read_rep base idx])) + +mkBasicIndexedWrite off Nothing write_rep base idx val + = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) val) +mkBasicIndexedWrite off (Just cast) write_rep base idx val + = stmtC (CmmStore (cmmIndexOffExpr off write_rep base idx) (CmmMachOp cast [val])) + +-- ---------------------------------------------------------------------------- +-- Misc utils + +cmmIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr +cmmIndexOffExpr off rep base idx + = cmmIndexExpr rep (cmmOffsetB base off) idx + +cmmLoadIndexOffExpr :: ByteOff -> MachRep -> CmmExpr -> CmmExpr -> CmmExpr +cmmLoadIndexOffExpr off rep base idx + = CmmLoad (cmmIndexOffExpr off rep base idx) rep + +setInfo :: CmmExpr -> CmmExpr -> CmmStmt +setInfo closure_ptr info_ptr = CmmStore closure_ptr info_ptr + diff --git a/compiler/codeGen/CgProf.hs b/compiler/codeGen/CgProf.hs new file mode 100644 index 0000000000..1488e34956 --- /dev/null +++ b/compiler/codeGen/CgProf.hs @@ -0,0 +1,478 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for profiling +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgProf ( + mkCCostCentre, mkCCostCentreStack, + + -- Cost-centre Profiling + dynProfHdr, profDynAlloc, profAlloc, staticProfHdr, initUpdFrameProf, + enterCostCentre, enterCostCentrePAP, enterCostCentreThunk, + chooseDynCostCentres, + costCentreFrom, + curCCS, curCCSAddr, + emitCostCentreDecl, emitCostCentreStackDecl, + emitRegisterCC, emitRegisterCCS, + emitSetCCC, emitCCS, + + -- Lag/drag/void stuff + ldvEnter, ldvRecordCreate + ) where + +#include "HsVersions.h" +#include "MachDeps.h" + -- For WORD_SIZE_IN_BITS only. +#include "../includes/Constants.h" + -- For LDV_CREATE_MASK, LDV_STATE_USE + -- which are StgWords +#include "../includes/DerivedConstants.h" + -- For REP_xxx constants, which are MachReps + +import ClosureInfo ( ClosureInfo, closureSize, + closureName, isToplevClosure, closureReEntrant, ) +import CgUtils +import CgMonad +import SMRep ( StgWord, profHdrSize ) + +import Cmm +import MachOp +import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr ) +import CLabel ( mkCCLabel, mkCCSLabel, mkRtsDataLabel ) + +import Module ( moduleString ) +import Id ( Id ) +import CostCentre +import StgSyn ( GenStgExpr(..), StgExpr ) +import StaticFlags ( opt_SccProfilingOn ) +import FastString ( FastString, mkFastString, LitString ) +import Constants -- Lots of field offsets +import Outputable + +import Maybe +import Char ( ord ) +import Monad ( when ) + +----------------------------------------------------------------------------- +-- +-- Cost-centre-stack Profiling +-- +----------------------------------------------------------------------------- + +-- Expression representing the current cost centre stack +curCCS :: CmmExpr +curCCS = CmmLoad curCCSAddr wordRep + +-- Address of current CCS variable, for storing into +curCCSAddr :: CmmExpr +curCCSAddr = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCCS"))) + +mkCCostCentre :: CostCentre -> CmmLit +mkCCostCentre cc = CmmLabel (mkCCLabel cc) + +mkCCostCentreStack :: CostCentreStack -> CmmLit +mkCCostCentreStack ccs = CmmLabel (mkCCSLabel ccs) + +costCentreFrom :: CmmExpr -- A closure pointer + -> CmmExpr -- The cost centre from that closure +costCentreFrom cl = CmmLoad (cmmOffsetB cl oFFSET_StgHeader_ccs) wordRep + +staticProfHdr :: CostCentreStack -> [CmmLit] +-- The profiling header words in a static closure +-- Was SET_STATIC_PROF_HDR +staticProfHdr ccs = ifProfilingL [mkCCostCentreStack ccs, + staticLdvInit] + +dynProfHdr :: CmmExpr -> [CmmExpr] +-- Profiling header words in a dynamic closure +dynProfHdr ccs = ifProfilingL [ccs, dynLdvInit] + +initUpdFrameProf :: CmmExpr -> Code +-- Initialise the profiling field of an update frame +initUpdFrameProf frame_amode + = ifProfiling $ -- frame->header.prof.ccs = CCCS + stmtC (CmmStore (cmmOffsetB frame_amode oFFSET_StgHeader_ccs) curCCS) + -- frame->header.prof.hp.rs = NULL (or frame-header.prof.hp.ldvw = 0) + -- is unnecessary because it is not used anyhow. + +-- ----------------------------------------------------------------------------- +-- Recording allocation in a cost centre + +-- | Record the allocation of a closure. The CmmExpr is the cost +-- centre stack to which to attribute the allocation. +profDynAlloc :: ClosureInfo -> CmmExpr -> Code +profDynAlloc cl_info ccs + = ifProfiling $ + profAlloc (CmmLit (mkIntCLit (closureSize cl_info))) ccs + +-- | Record the allocation of a closure (size is given by a CmmExpr) +-- The size must be in words, because the allocation counter in a CCS counts +-- in words. +profAlloc :: CmmExpr -> CmmExpr -> Code +profAlloc words ccs + = ifProfiling $ + stmtC (addToMemE alloc_rep + (cmmOffsetB ccs oFFSET_CostCentreStack_mem_alloc) + (CmmMachOp (MO_U_Conv wordRep alloc_rep) $ + [CmmMachOp mo_wordSub [words, + CmmLit (mkIntCLit profHdrSize)]])) + -- subtract the "profiling overhead", which is the + -- profiling header in a closure. + where + alloc_rep = REP_CostCentreStack_mem_alloc + +-- ---------------------------------------------------------------------- +-- Setting the cost centre in a new closure + +chooseDynCostCentres :: CostCentreStack + -> [Id] -- Args + -> StgExpr -- Body + -> FCode (CmmExpr, CmmExpr) +-- Called when alllcating a closure +-- Tells which cost centre to put in the object, and which +-- to blame the cost of allocation on +chooseDynCostCentres ccs args body = do + -- Cost-centre we record in the object + use_ccs <- emitCCS ccs + + -- Cost-centre on whom we blame the allocation + let blame_ccs + | null args && isBox body = CmmLit (mkCCostCentreStack overheadCCS) + | otherwise = use_ccs + + return (use_ccs, blame_ccs) + + +-- Some CostCentreStacks are a sequence of pushes on top of CCCS. +-- These pushes must be performed before we can refer to the stack in +-- an expression. +emitCCS :: CostCentreStack -> FCode CmmExpr +emitCCS ccs = push_em (ccsExpr ccs') (reverse cc's) + where + (cc's, ccs') = decomposeCCS ccs + + push_em ccs [] = return ccs + push_em ccs (cc:rest) = do + tmp <- newTemp wordRep + pushCostCentre tmp ccs cc + push_em (CmmReg tmp) rest + +ccsExpr :: CostCentreStack -> CmmExpr +ccsExpr ccs + | isCurrentCCS ccs = curCCS + | otherwise = CmmLit (mkCCostCentreStack ccs) + + +isBox :: StgExpr -> Bool +-- If it's an utterly trivial RHS, then it must be +-- one introduced by boxHigherOrderArgs for profiling, +-- so we charge it to "OVERHEAD". +-- This looks like a GROSS HACK to me --SDM +isBox (StgApp fun []) = True +isBox other = False + + +-- ----------------------------------------------------------------------- +-- Setting the current cost centre on entry to a closure + +-- For lexically scoped profiling we have to load the cost centre from +-- the closure entered, if the costs are not supposed to be inherited. +-- This is done immediately on entering the fast entry point. + +-- Load current cost centre from closure, if not inherited. +-- Node is guaranteed to point to it, if profiling and not inherited. + +enterCostCentre + :: ClosureInfo + -> CostCentreStack + -> StgExpr -- The RHS of the closure + -> Code + +-- We used to have a special case for bindings of form +-- f = g True +-- where g has arity 2. The RHS is a thunk, but we don't +-- need to update it; and we want to subsume costs. +-- We don't have these sort of PAPs any more, so the special +-- case has gone away. + +enterCostCentre closure_info ccs body + = ifProfiling $ + ASSERT2(not (noCCSAttached ccs), ppr (closureName closure_info) <+> ppr ccs) + enter_cost_centre closure_info ccs body + +enter_cost_centre closure_info ccs body + | isSubsumedCCS ccs + = ASSERT(isToplevClosure closure_info) + ASSERT(re_entrant) + enter_ccs_fsub + + | isDerivedFromCurrentCCS ccs + = do { + if re_entrant && not is_box + then + enter_ccs_fun node_ccs + else + stmtC (CmmStore curCCSAddr node_ccs) + + -- don't forget to bump the scc count. This closure might have been + -- of the form let x = _scc_ "x" e in ...x..., which the SCCfinal + -- pass has turned into simply let x = e in ...x... and attached + -- the _scc_ as PushCostCentre(x,CCCS) on the x closure. So that + -- we don't lose the scc counter, bump it in the entry code for x. + -- ToDo: for a multi-push we should really bump the counter for + -- each of the intervening CCSs, not just the top one. + ; when (not (isCurrentCCS ccs)) $ + stmtC (bumpSccCount curCCS) + } + + | isCafCCS ccs + = ASSERT(isToplevClosure closure_info) + ASSERT(not re_entrant) + do { -- This is just a special case of the isDerivedFromCurrentCCS + -- case above. We could delete this, but it's a micro + -- optimisation and saves a bit of code. + stmtC (CmmStore curCCSAddr enc_ccs) + ; stmtC (bumpSccCount node_ccs) + } + + | otherwise + = panic "enterCostCentre" + where + enc_ccs = CmmLit (mkCCostCentreStack ccs) + re_entrant = closureReEntrant closure_info + node_ccs = costCentreFrom (CmmReg nodeReg) + is_box = isBox body + +-- set the current CCS when entering a PAP +enterCostCentrePAP :: CmmExpr -> Code +enterCostCentrePAP closure = + ifProfiling $ do + enter_ccs_fun (costCentreFrom closure) + enteringPAP 1 + +enterCostCentreThunk :: CmmExpr -> Code +enterCostCentreThunk closure = + ifProfiling $ do + stmtC $ CmmStore curCCSAddr (costCentreFrom closure) + +enter_ccs_fun stack = emitRtsCall SLIT("EnterFunCCS") [(stack,PtrHint)] + -- ToDo: vols + +enter_ccs_fsub = enteringPAP 0 + +-- When entering a PAP, EnterFunCCS is called by both the PAP entry +-- code and the function entry code; we don't want the function's +-- entry code to also update CCCS in the event that it was called via +-- a PAP, so we set the flag entering_PAP to indicate that we are +-- entering via a PAP. +enteringPAP :: Integer -> Code +enteringPAP n + = stmtC (CmmStore (CmmLit (CmmLabel (mkRtsDataLabel SLIT("entering_PAP")))) + (CmmLit (CmmInt n cIntRep))) + +ifProfiling :: Code -> Code +ifProfiling code + | opt_SccProfilingOn = code + | otherwise = nopC + +ifProfilingL :: [a] -> [a] +ifProfilingL xs + | opt_SccProfilingOn = xs + | otherwise = [] + + +-- --------------------------------------------------------------------------- +-- Initialising Cost Centres & CCSs + +emitCostCentreDecl + :: CostCentre + -> Code +emitCostCentreDecl cc = do + { label <- mkStringCLit (costCentreUserName cc) + ; modl <- mkStringCLit (moduleString (cc_mod cc)) + ; let + lits = [ zero, -- StgInt ccID, + label, -- char *label, + modl, -- char *module, + zero, -- StgWord time_ticks + zero64, -- StgWord64 mem_alloc + subsumed, -- StgInt is_caf + zero -- struct _CostCentre *link + ] + ; emitDataLits (mkCCLabel cc) lits + } + where + subsumed | isCafCC cc = mkIntCLit (ord 'c') -- 'c' == is a CAF + | otherwise = mkIntCLit (ord 'B') -- 'B' == is boring + + +emitCostCentreStackDecl + :: CostCentreStack + -> Code +emitCostCentreStackDecl ccs + | Just cc <- maybeSingletonCCS ccs = do + { let + -- Note: to avoid making any assumptions about how the + -- C compiler (that compiles the RTS, in particular) does + -- layouts of structs containing long-longs, simply + -- pad out the struct with zero words until we hit the + -- size of the overall struct (which we get via DerivedConstants.h) + -- + lits = zero : mkCCostCentre cc : replicate (sizeof_ccs_words - 2) zero + ; emitDataLits (mkCCSLabel ccs) lits + } + | otherwise = pprPanic "emitCostCentreStackDecl" (ppr ccs) + +zero = mkIntCLit 0 +zero64 = CmmInt 0 I64 + +sizeof_ccs_words :: Int +sizeof_ccs_words + -- round up to the next word. + | ms == 0 = ws + | otherwise = ws + 1 + where + (ws,ms) = SIZEOF_CostCentreStack `divMod` wORD_SIZE + +-- --------------------------------------------------------------------------- +-- Registering CCs and CCSs + +-- (cc)->link = CC_LIST; +-- CC_LIST = (cc); +-- (cc)->ccID = CC_ID++; + +emitRegisterCC :: CostCentre -> Code +emitRegisterCC cc = do + { tmp <- newTemp cIntRep + ; stmtsC [ + CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_link) + (CmmLoad cC_LIST wordRep), + CmmStore cC_LIST cc_lit, + CmmAssign tmp (CmmLoad cC_ID cIntRep), + CmmStore (cmmOffsetB cc_lit oFFSET_CostCentre_ccID) (CmmReg tmp), + CmmStore cC_ID (cmmRegOffB tmp 1) + ] + } + where + cc_lit = CmmLit (CmmLabel (mkCCLabel cc)) + +-- (ccs)->prevStack = CCS_LIST; +-- CCS_LIST = (ccs); +-- (ccs)->ccsID = CCS_ID++; + +emitRegisterCCS :: CostCentreStack -> Code +emitRegisterCCS ccs = do + { tmp <- newTemp cIntRep + ; stmtsC [ + CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_prevStack) + (CmmLoad cCS_LIST wordRep), + CmmStore cCS_LIST ccs_lit, + CmmAssign tmp (CmmLoad cCS_ID cIntRep), + CmmStore (cmmOffsetB ccs_lit oFFSET_CostCentreStack_ccsID) (CmmReg tmp), + CmmStore cCS_ID (cmmRegOffB tmp 1) + ] + } + where + ccs_lit = CmmLit (CmmLabel (mkCCSLabel ccs)) + + +cC_LIST = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CC_LIST"))) +cC_ID = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CC_ID"))) + +cCS_LIST = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCS_LIST"))) +cCS_ID = CmmLit (CmmLabel (mkRtsDataLabel SLIT("CCS_ID"))) + +-- --------------------------------------------------------------------------- +-- Set the current cost centre stack + +emitSetCCC :: CostCentre -> Code +emitSetCCC cc + | not opt_SccProfilingOn = nopC + | otherwise = do + tmp <- newTemp wordRep + ASSERT( sccAbleCostCentre cc ) + pushCostCentre tmp curCCS cc + stmtC (CmmStore curCCSAddr (CmmReg tmp)) + when (isSccCountCostCentre cc) $ + stmtC (bumpSccCount curCCS) + +pushCostCentre :: CmmReg -> CmmExpr -> CostCentre -> Code +pushCostCentre result ccs cc + = emitRtsCallWithResult result PtrHint + SLIT("PushCostCentre") [(ccs,PtrHint), + (CmmLit (mkCCostCentre cc), PtrHint)] + +bumpSccCount :: CmmExpr -> CmmStmt +bumpSccCount ccs + = addToMem REP_CostCentreStack_scc_count + (cmmOffsetB ccs oFFSET_CostCentreStack_scc_count) 1 + +----------------------------------------------------------------------------- +-- +-- Lag/drag/void stuff +-- +----------------------------------------------------------------------------- + +-- +-- Initial value for the LDV field in a static closure +-- +staticLdvInit :: CmmLit +staticLdvInit = zeroCLit + +-- +-- Initial value of the LDV field in a dynamic closure +-- +dynLdvInit :: CmmExpr +dynLdvInit = -- (era << LDV_SHIFT) | LDV_STATE_CREATE + CmmMachOp mo_wordOr [ + CmmMachOp mo_wordShl [loadEra, CmmLit (mkIntCLit lDV_SHIFT) ], + CmmLit (mkWordCLit lDV_STATE_CREATE) + ] + +-- +-- Initialise the LDV word of a new closure +-- +ldvRecordCreate :: CmmExpr -> Code +ldvRecordCreate closure = stmtC $ CmmStore (ldvWord closure) dynLdvInit + +-- +-- Called when a closure is entered, marks the closure as having been "used". +-- The closure is not an 'inherently used' one. +-- The closure is not IND or IND_OLDGEN because neither is considered for LDV +-- profiling. +-- +ldvEnter :: CmmExpr -> Code +-- Argument is a closure pointer +ldvEnter cl_ptr + = ifProfiling $ + -- if (era > 0) { + -- LDVW((c)) = (LDVW((c)) & LDV_CREATE_MASK) | + -- era | LDV_STATE_USE } + emitIf (CmmMachOp mo_wordUGt [loadEra, CmmLit zeroCLit]) + (stmtC (CmmStore ldv_wd new_ldv_wd)) + where + ldv_wd = ldvWord cl_ptr + new_ldv_wd = cmmOrWord (cmmAndWord (CmmLoad ldv_wd wordRep) + (CmmLit (mkWordCLit lDV_CREATE_MASK))) + (cmmOrWord loadEra (CmmLit (mkWordCLit lDV_STATE_USE))) + +loadEra :: CmmExpr +loadEra = CmmMachOp (MO_U_Conv cIntRep wordRep) + [CmmLoad (mkLblExpr (mkRtsDataLabel SLIT("era"))) cIntRep] + +ldvWord :: CmmExpr -> CmmExpr +-- Takes the address of a closure, and returns +-- the address of the LDV word in the closure +ldvWord closure_ptr = cmmOffsetB closure_ptr oFFSET_StgHeader_ldvw + +-- LDV constants, from ghc/includes/Constants.h +lDV_SHIFT = (LDV_SHIFT :: Int) +--lDV_STATE_MASK = (LDV_STATE_MASK :: StgWord) +lDV_CREATE_MASK = (LDV_CREATE_MASK :: StgWord) +--lDV_LAST_MASK = (LDV_LAST_MASK :: StgWord) +lDV_STATE_CREATE = (LDV_STATE_CREATE :: StgWord) +lDV_STATE_USE = (LDV_STATE_USE :: StgWord) + diff --git a/compiler/codeGen/CgStackery.lhs b/compiler/codeGen/CgStackery.lhs new file mode 100644 index 0000000000..7cb310d521 --- /dev/null +++ b/compiler/codeGen/CgStackery.lhs @@ -0,0 +1,339 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgStackery.lhs,v 1.27 2004/09/30 10:35:49 simonpj Exp $ +% +\section[CgStackery]{Stack management functions} + +Stack-twiddling operations, which are pretty low-down and grimy. +(This is the module that knows all about stack layouts, etc.) + +\begin{code} +module CgStackery ( + spRel, getVirtSp, getRealSp, setRealSp, + setRealAndVirtualSp, getSpRelOffset, + + allocPrimStack, allocStackTop, deAllocStackTop, + adjustStackHW, getFinalStackHW, + setStackFrame, getStackFrame, + mkVirtStkOffsets, mkStkAmodes, + freeStackSlots, + pushUpdateFrame, emitPushUpdateFrame, + ) where + +#include "HsVersions.h" + +import CgMonad +import CgUtils ( cmmOffsetB, cmmRegOffW ) +import CgProf ( initUpdFrameProf ) +import SMRep +import Cmm +import CmmUtils ( CmmStmts, mkLblExpr ) +import CLabel ( mkUpdInfoLabel ) +import Constants +import Util ( sortLe ) +import FastString ( LitString ) +import OrdList ( toOL ) +import Outputable +\end{code} + +%************************************************************************ +%* * +\subsection[CgUsages-stackery]{Monad things for fiddling with stack usage} +%* * +%************************************************************************ + +spRel is a little function that abstracts the stack direction. Note that most +of the code generator is dependent on the stack direction anyway, so +changing this on its own spells certain doom. ToDo: remove? + + THIS IS DIRECTION SENSITIVE! + +Stack grows down, positive virtual offsets correspond to negative +additions to the stack pointer. + +\begin{code} +spRel :: VirtualSpOffset -- virtual offset of Sp + -> VirtualSpOffset -- virtual offset of The Thing + -> WordOff -- integer offset +spRel sp off = sp - off +\end{code} + +@setRealAndVirtualSp@ sets into the environment the offsets of the +current position of the real and virtual stack pointers in the current +stack frame. The high-water mark is set too. It generates no code. +It is used to initialise things at the beginning of a closure body. + +\begin{code} +setRealAndVirtualSp :: VirtualSpOffset -- New real Sp + -> Code + +setRealAndVirtualSp new_sp + = do { stk_usg <- getStkUsage + ; setStkUsage (stk_usg {virtSp = new_sp, + realSp = new_sp, + hwSp = new_sp}) } + +getVirtSp :: FCode VirtualSpOffset +getVirtSp + = do { stk_usg <- getStkUsage + ; return (virtSp stk_usg) } + +getRealSp :: FCode VirtualSpOffset +getRealSp + = do { stk_usg <- getStkUsage + ; return (realSp stk_usg) } + +setRealSp :: VirtualSpOffset -> Code +setRealSp new_real_sp + = do { stk_usg <- getStkUsage + ; setStkUsage (stk_usg {realSp = new_real_sp}) } + +getSpRelOffset :: VirtualSpOffset -> FCode CmmExpr +getSpRelOffset virtual_offset + = do { real_sp <- getRealSp + ; return (cmmRegOffW spReg (spRel real_sp virtual_offset)) } +\end{code} + + +%************************************************************************ +%* * +\subsection[CgStackery-layout]{Laying out a stack frame} +%* * +%************************************************************************ + +'mkVirtStkOffsets' is given a list of arguments. The first argument +gets the /largest/ virtual stack offset (remember, virtual offsets +increase towards the top of stack). + +\begin{code} +mkVirtStkOffsets + :: VirtualSpOffset -- Offset of the last allocated thing + -> [(CgRep,a)] -- things to make offsets for + -> (VirtualSpOffset, -- OUTPUTS: Topmost allocated word + [(a, VirtualSpOffset)]) -- things with offsets (voids filtered out) + +mkVirtStkOffsets init_Sp_offset things + = loop init_Sp_offset [] (reverse things) + where + loop offset offs [] = (offset,offs) + loop offset offs ((VoidArg,t):things) = loop offset offs things + -- ignore Void arguments + loop offset offs ((rep,t):things) + = loop thing_slot ((t,thing_slot):offs) things + where + thing_slot = offset + cgRepSizeW rep + -- offset of thing is offset+size, because we're + -- growing the stack *downwards* as the offsets increase. + +-- | 'mkStkAmodes' is a higher-level version of +-- 'mkVirtStkOffsets'. It starts from the tail-call locations. +-- It returns a single list of addressing modes for the stack +-- locations, and therefore is in the monad. It /doesn't/ adjust the +-- high water mark. + +mkStkAmodes + :: VirtualSpOffset -- Tail call positions + -> [(CgRep,CmmExpr)] -- things to make offsets for + -> FCode (VirtualSpOffset, -- OUTPUTS: Topmost allocated word + CmmStmts) -- Assignments to appropriate stk slots + +mkStkAmodes tail_Sp things + = do { rSp <- getRealSp + ; let (last_Sp_offset, offsets) = mkVirtStkOffsets tail_Sp things + abs_cs = [ CmmStore (cmmRegOffW spReg (spRel rSp offset)) amode + | (amode, offset) <- offsets + ] + ; returnFC (last_Sp_offset, toOL abs_cs) } +\end{code} + +%************************************************************************ +%* * +\subsection[CgStackery-monadery]{Inside-monad functions for stack manipulation} +%* * +%************************************************************************ + +Allocate a virtual offset for something. + +\begin{code} +allocPrimStack :: CgRep -> FCode VirtualSpOffset +allocPrimStack rep + = do { stk_usg <- getStkUsage + ; let free_stk = freeStk stk_usg + ; case find_block free_stk of + Nothing -> do + { let push_virt_sp = virtSp stk_usg + size + ; setStkUsage (stk_usg { virtSp = push_virt_sp, + hwSp = hwSp stk_usg `max` push_virt_sp }) + -- Adjust high water mark + ; return push_virt_sp } + Just slot -> do + { setStkUsage (stk_usg { freeStk = delete_block free_stk slot }) + ; return slot } + } + where + size :: WordOff + size = cgRepSizeW rep + + -- Find_block looks for a contiguous chunk of free slots + -- returning the offset of its topmost word + find_block :: [VirtualSpOffset] -> Maybe VirtualSpOffset + find_block [] = Nothing + find_block (slot:slots) + | take size (slot:slots) == [slot..top_slot] + = Just top_slot + | otherwise + = find_block slots + where -- The stack grows downwards, with increasing virtual offsets. + -- Therefore, the address of a multi-word object is the *highest* + -- virtual offset it occupies (top_slot below). + top_slot = slot+size-1 + + delete_block free_stk slot = [ s | s <- free_stk, + (s<=slot-size) || (s>slot) ] + -- Retain slots which are not in the range + -- slot-size+1..slot +\end{code} + +Allocate a chunk ON TOP OF the stack. + +\begin{code} +allocStackTop :: WordOff -> FCode VirtualSpOffset +allocStackTop size + = do { stk_usg <- getStkUsage + ; let push_virt_sp = virtSp stk_usg + size + ; setStkUsage (stk_usg { virtSp = push_virt_sp, + hwSp = hwSp stk_usg `max` push_virt_sp }) + ; return push_virt_sp } +\end{code} + +Pop some words from the current top of stack. This is used for +de-allocating the return address in a case alternative. + +\begin{code} +deAllocStackTop :: WordOff -> FCode VirtualSpOffset +deAllocStackTop size + = do { stk_usg <- getStkUsage + ; let pop_virt_sp = virtSp stk_usg - size + ; setStkUsage (stk_usg { virtSp = pop_virt_sp }) + ; return pop_virt_sp } +\end{code} + +\begin{code} +adjustStackHW :: VirtualSpOffset -> Code +adjustStackHW offset + = do { stk_usg <- getStkUsage + ; setStkUsage (stk_usg { hwSp = hwSp stk_usg `max` offset }) } +\end{code} + +A knot-tying beast. + +\begin{code} +getFinalStackHW :: (VirtualSpOffset -> Code) -> Code +getFinalStackHW fcode + = do { fixC (\hw_sp -> do + { fcode hw_sp + ; stk_usg <- getStkUsage + ; return (hwSp stk_usg) }) + ; return () } +\end{code} + +\begin{code} +setStackFrame :: VirtualSpOffset -> Code +setStackFrame offset + = do { stk_usg <- getStkUsage + ; setStkUsage (stk_usg { frameSp = offset }) } + +getStackFrame :: FCode VirtualSpOffset +getStackFrame + = do { stk_usg <- getStkUsage + ; return (frameSp stk_usg) } +\end{code} + + +%******************************************************** +%* * +%* Setting up update frames * +%* * +%******************************************************** + +@pushUpdateFrame@ $updatee$ pushes a general update frame which +points to $updatee$ as the thing to be updated. It is only used +when a thunk has just been entered, so the (real) stack pointers +are guaranteed to be nicely aligned with the top of stack. +@pushUpdateFrame@ adjusts the virtual and tail stack pointers +to reflect the frame pushed. + +\begin{code} +pushUpdateFrame :: CmmExpr -> Code -> Code + +pushUpdateFrame updatee code + = do { +#ifdef DEBUG + EndOfBlockInfo _ sequel <- getEndOfBlockInfo ; + ASSERT(case sequel of { OnStack -> True; _ -> False}) +#endif + + allocStackTop (fixedHdrSize + + sIZEOF_StgUpdateFrame_NoHdr `quot` wORD_SIZE) + ; vsp <- getVirtSp + ; setStackFrame vsp + ; frame_addr <- getSpRelOffset vsp + -- The location of the lowest-address + -- word of the update frame itself + + ; setEndOfBlockInfo (EndOfBlockInfo vsp UpdateCode) $ + do { emitPushUpdateFrame frame_addr updatee + ; code } + } + +emitPushUpdateFrame :: CmmExpr -> CmmExpr -> Code +emitPushUpdateFrame frame_addr updatee = do + stmtsC [ -- Set the info word + CmmStore frame_addr (mkLblExpr mkUpdInfoLabel) + , -- And the updatee + CmmStore (cmmOffsetB frame_addr off_updatee) updatee ] + initUpdFrameProf frame_addr + +off_updatee :: ByteOff +off_updatee = fixedHdrSize*wORD_SIZE + oFFSET_StgUpdateFrame_updatee +\end{code} + + +%************************************************************************ +%* * +\subsection[CgStackery-free]{Free stack slots} +%* * +%************************************************************************ + +Explicitly free some stack space. + +\begin{code} +freeStackSlots :: [VirtualSpOffset] -> Code +freeStackSlots extra_free + = do { stk_usg <- getStkUsage + ; let all_free = addFreeSlots (freeStk stk_usg) (sortLe (<=) extra_free) + ; let (new_vsp, new_free) = trim (virtSp stk_usg) all_free + ; setStkUsage (stk_usg { virtSp = new_vsp, freeStk = new_free }) } + +addFreeSlots :: [VirtualSpOffset] -> [VirtualSpOffset] -> [VirtualSpOffset] +-- Merge the two, assuming both are in increasing order +addFreeSlots cs [] = cs +addFreeSlots [] ns = ns +addFreeSlots (c:cs) (n:ns) + | c < n = c : addFreeSlots cs (n:ns) + | otherwise = n : addFreeSlots (c:cs) ns + +trim :: VirtualSpOffset -> [VirtualSpOffset] -> (VirtualSpOffset, [VirtualSpOffset]) +-- Try to trim back the virtual stack pointer, where there is a +-- continuous bunch of free slots at the end of the free list +trim vsp [] = (vsp, []) +trim vsp (slot:slots) + = case trim vsp slots of + (vsp', []) + | vsp' < slot -> pprTrace "trim: strange" (ppr vsp <+> ppr (slot:slots)) + (vsp', []) + | vsp' == slot -> (vsp'-1, []) + | otherwise -> (vsp', [slot]) + (vsp', slots') -> (vsp', slot:slots') +\end{code} diff --git a/compiler/codeGen/CgTailCall.lhs b/compiler/codeGen/CgTailCall.lhs new file mode 100644 index 0000000000..dd7327b745 --- /dev/null +++ b/compiler/codeGen/CgTailCall.lhs @@ -0,0 +1,455 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +% $Id: CgTailCall.lhs,v 1.43 2005/06/21 10:44:41 simonmar Exp $ +% +%******************************************************** +%* * +\section[CgTailCall]{Tail calls: converting @StgApps@} +%* * +%******************************************************** + +\begin{code} +module CgTailCall ( + cgTailCall, performTailCall, + performReturn, performPrimReturn, + emitKnownConReturnCode, emitAlgReturnCode, + returnUnboxedTuple, ccallReturnUnboxedTuple, + pushUnboxedTuple, + tailCallPrimOp, + + pushReturnAddress + ) where + +#include "HsVersions.h" + +import CgMonad +import CgBindery ( getArgAmodes, getCgIdInfo, CgIdInfo, maybeLetNoEscape, + idInfoToAmode, cgIdInfoId, cgIdInfoLF, + cgIdInfoArgRep ) +import CgInfoTbls ( entryCode, emitDirectReturnInstr, dataConTagZ, + emitVectoredReturnInstr, closureInfoPtr ) +import CgCallConv +import CgStackery ( setRealSp, mkStkAmodes, adjustStackHW, + getSpRelOffset ) +import CgHeapery ( setRealHp, getHpRelOffset ) +import CgUtils ( emitSimultaneously ) +import CgTicky +import ClosureInfo +import SMRep ( CgRep, isVoidArg, separateByPtrFollowness ) +import Cmm +import CmmUtils +import CLabel ( CLabel, mkRtsPrimOpLabel, mkSeqInfoLabel ) +import Type ( isUnLiftedType ) +import Id ( Id, idName, idUnique, idType ) +import DataCon ( DataCon, dataConTyCon ) +import StgSyn ( StgArg ) +import TyCon ( TyCon ) +import PrimOp ( PrimOp ) +import Outputable + +import Monad ( when ) + +----------------------------------------------------------------------------- +-- Tail Calls + +cgTailCall :: Id -> [StgArg] -> Code + +-- Here's the code we generate for a tail call. (NB there may be no +-- arguments, in which case this boils down to just entering a variable.) +-- +-- * Put args in the top locations of the stack. +-- * Adjust the stack ptr +-- * Make R1 point to the function closure if necessary. +-- * Perform the call. +-- +-- Things to be careful about: +-- +-- * Don't overwrite stack locations before you have finished with +-- them (remember you need the function and the as-yet-unmoved +-- arguments). +-- * Preferably, generate no code to replace x by x on the stack (a +-- common situation in tail-recursion). +-- * Adjust the stack high water mark appropriately. +-- +-- Treat unboxed locals exactly like literals (above) except use the addr +-- mode for the local instead of (CLit lit) in the assignment. + +cgTailCall fun args + = do { fun_info <- getCgIdInfo fun + + ; if isUnLiftedType (idType fun) + then -- Primitive return + ASSERT( null args ) + do { fun_amode <- idInfoToAmode fun_info + ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode } + + else -- Normal case, fun is boxed + do { arg_amodes <- getArgAmodes args + ; performTailCall fun_info arg_amodes noStmts } + } + + +-- ----------------------------------------------------------------------------- +-- The guts of a tail-call + +performTailCall + :: CgIdInfo -- The function + -> [(CgRep,CmmExpr)] -- Args + -> CmmStmts -- Pending simultaneous assignments + -- *** GUARANTEED to contain only stack assignments. + -> Code + +performTailCall fun_info arg_amodes pending_assts + | Just join_sp <- maybeLetNoEscape fun_info + = -- A let-no-escape is slightly different, because we + -- arrange the stack arguments into pointers and non-pointers + -- to make the heap check easier. The tail-call sequence + -- is very similar to returning an unboxed tuple, so we + -- share some code. + do { (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes + ; emitSimultaneously (pending_assts `plusStmts` arg_assts) + ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info)) + ; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) } + + | otherwise + = do { fun_amode <- idInfoToAmode fun_info + ; let node_asst = oneStmt (CmmAssign nodeReg fun_amode) + opt_node_asst | nodeMustPointToIt lf_info = node_asst + | otherwise = noStmts + ; EndOfBlockInfo sp _ <- getEndOfBlockInfo + ; hmods <- getHomeModules + + ; case (getCallMethod hmods fun_name lf_info (length arg_amodes)) of + + -- Node must always point to things we enter + EnterIt -> do + { emitSimultaneously (node_asst `plusStmts` pending_assts) + ; let target = entryCode (closureInfoPtr (CmmReg nodeReg)) + ; doFinalJump sp False (stmtC (CmmJump target [])) } + + -- A function, but we have zero arguments. It is already in WHNF, + -- so we can just return it. + -- As with any return, Node must point to it. + ReturnIt -> do + { emitSimultaneously (node_asst `plusStmts` pending_assts) + ; doFinalJump sp False emitDirectReturnInstr } + + -- A real constructor. Don't bother entering it, + -- just do the right sort of return instead. + -- As with any return, Node must point to it. + ReturnCon con -> do + { emitSimultaneously (node_asst `plusStmts` pending_assts) + ; doFinalJump sp False (emitKnownConReturnCode con) } + + JumpToIt lbl -> do + { emitSimultaneously (opt_node_asst `plusStmts` pending_assts) + ; doFinalJump sp False (jumpToLbl lbl) } + + -- A slow function call via the RTS apply routines + -- Node must definitely point to the thing + SlowCall -> do + { when (not (null arg_amodes)) $ do + { if (isKnownFun lf_info) + then tickyKnownCallTooFewArgs + else tickyUnknownCall + ; tickySlowCallPat (map fst arg_amodes) + } + + ; let (apply_lbl, args, extra_args) + = constructSlowCall arg_amodes + + ; directCall sp apply_lbl args extra_args + (node_asst `plusStmts` pending_assts) + } + + -- A direct function call (possibly with some left-over arguments) + DirectEntry lbl arity -> do + { if arity == length arg_amodes + then tickyKnownCallExact + else do tickyKnownCallExtraArgs + tickySlowCallPat (map fst (drop arity arg_amodes)) + + ; let + -- The args beyond the arity go straight on the stack + (arity_args, extra_args) = splitAt arity arg_amodes + + ; directCall sp lbl arity_args extra_args + (opt_node_asst `plusStmts` pending_assts) + } + } + where + fun_name = idName (cgIdInfoId fun_info) + lf_info = cgIdInfoLF fun_info + + + +directCall sp lbl args extra_args assts = do + let + -- First chunk of args go in registers + (reg_arg_amodes, stk_args) = assignCallRegs args + + -- Any "extra" arguments are placed in frames on the + -- stack after the other arguments. + slow_stk_args = slowArgs extra_args + + reg_assts = assignToRegs reg_arg_amodes + -- + (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args) + + emitSimultaneously (reg_assts `plusStmts` + stk_assts `plusStmts` + assts) + + doFinalJump final_sp False (jumpToLbl lbl) + +-- ----------------------------------------------------------------------------- +-- The final clean-up before we do a jump at the end of a basic block. +-- This code is shared by tail-calls and returns. + +doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code +doFinalJump final_sp is_let_no_escape jump_code + = do { -- Adjust the high-water mark if necessary + adjustStackHW final_sp + + -- Push a return address if necessary (after the assignments + -- above, in case we clobber a live stack location) + -- + -- DONT push the return address when we're about to jump to a + -- let-no-escape: the final tail call in the let-no-escape + -- will do this. + ; eob <- getEndOfBlockInfo + ; whenC (not is_let_no_escape) (pushReturnAddress eob) + + -- Final adjustment of Sp/Hp + ; adjustSpAndHp final_sp + + -- and do the jump + ; jump_code } + +-- ----------------------------------------------------------------------------- +-- A general return (just a special case of doFinalJump, above) + +performReturn :: Code -- The code to execute to actually do the return + -> Code + +performReturn finish_code + = do { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo + ; doFinalJump args_sp False{-not a LNE-} finish_code } + +-- ----------------------------------------------------------------------------- +-- Primitive Returns +-- Just load the return value into the right register, and return. + +performPrimReturn :: CgRep -> CmmExpr -- The thing to return + -> Code +performPrimReturn rep amode + = do { whenC (not (isVoidArg rep)) + (stmtC (CmmAssign ret_reg amode)) + ; performReturn emitDirectReturnInstr } + where + ret_reg = dataReturnConvPrim rep + +-- ----------------------------------------------------------------------------- +-- Algebraic constructor returns + +-- Constructor is built on the heap; Node is set. +-- All that remains is to do the right sort of jump. + +emitKnownConReturnCode :: DataCon -> Code +emitKnownConReturnCode con + = emitAlgReturnCode (dataConTyCon con) + (CmmLit (mkIntCLit (dataConTagZ con))) + -- emitAlgReturnCode requires zero-indexed tag + +emitAlgReturnCode :: TyCon -> CmmExpr -> Code +-- emitAlgReturnCode is used both by emitKnownConReturnCode, +-- and by by PrimOps that return enumerated types (i.e. +-- all the comparison operators). +emitAlgReturnCode tycon tag + = do { case ctrlReturnConvAlg tycon of + VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz + ; emitVectoredReturnInstr tag } + UnvectoredReturn _ -> emitDirectReturnInstr + } + + +-- --------------------------------------------------------------------------- +-- Unboxed tuple returns + +-- These are a bit like a normal tail call, except that: +-- +-- - The tail-call target is an info table on the stack +-- +-- - We separate stack arguments into pointers and non-pointers, +-- to make it easier to leave things in a sane state for a heap check. +-- This is OK because we can never partially-apply an unboxed tuple, +-- unlike a function. The same technique is used when calling +-- let-no-escape functions, because they also can't be partially +-- applied. + +returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code +returnUnboxedTuple amodes + = do { eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo + ; tickyUnboxedTupleReturn (length amodes) + ; (final_sp, assts) <- pushUnboxedTuple args_sp amodes + ; emitSimultaneously assts + ; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr } + +pushUnboxedTuple :: VirtualSpOffset -- Sp at which to start pushing + -> [(CgRep, CmmExpr)] -- amodes of the components + -> FCode (VirtualSpOffset, -- final Sp + CmmStmts) -- assignments (regs+stack) + +pushUnboxedTuple sp [] + = return (sp, noStmts) +pushUnboxedTuple sp amodes + = do { let (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes + + -- separate the rest of the args into pointers and non-pointers + (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes + reg_arg_assts = assignToRegs reg_arg_amodes + + -- push ptrs, then nonptrs, on the stack + ; (ptr_sp, ptr_assts) <- mkStkAmodes sp ptr_args + ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args + + ; returnFC (final_sp, + reg_arg_assts `plusStmts` + ptr_assts `plusStmts` nptr_assts) } + + +-- ----------------------------------------------------------------------------- +-- Returning unboxed tuples. This is mainly to support _ccall_GC_, where +-- we want to do things in a slightly different order to normal: +-- +-- - push return address +-- - adjust stack pointer +-- - r = call(args...) +-- - assign regs for unboxed tuple (usually just R1 = r) +-- - return to continuation +-- +-- The return address (i.e. stack frame) must be on the stack before +-- doing the call in case the call ends up in the garbage collector. +-- +-- Sadly, the information about the continuation is lost after we push it +-- (in order to avoid pushing it again), so we end up doing a needless +-- indirect jump (ToDo). + +ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code +ccallReturnUnboxedTuple amodes before_jump + = do { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo + + -- Push a return address if necessary + ; pushReturnAddress eob + ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack) + (do { adjustSpAndHp args_sp + ; before_jump + ; returnUnboxedTuple amodes }) + } + +-- ----------------------------------------------------------------------------- +-- Calling an out-of-line primop + +tailCallPrimOp :: PrimOp -> [StgArg] -> Code +tailCallPrimOp op args + = do { -- We're going to perform a normal-looking tail call, + -- except that *all* the arguments will be in registers. + -- Hence the ASSERT( null leftovers ) + arg_amodes <- getArgAmodes args + ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes + jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op) + + ; ASSERT(null leftovers) -- no stack-resident args + emitSimultaneously (assignToRegs arg_regs) + + ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo + ; doFinalJump args_sp False{-not a LNE-} jump_to_primop } + +-- ----------------------------------------------------------------------------- +-- Return Addresses + +-- We always push the return address just before performing a tail call +-- or return. The reason we leave it until then is because the stack +-- slot that the return address is to go into might contain something +-- useful. +-- +-- If the end of block info is 'CaseAlts', then we're in the scrutinee of a +-- case expression and the return address is still to be pushed. +-- +-- There are cases where it doesn't look necessary to push the return +-- address: for example, just before doing a return to a known +-- continuation. However, the continuation will expect to find the +-- return address on the stack in case it needs to do a heap check. + +pushReturnAddress :: EndOfBlockInfo -> Code + +pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False)) + = do { sp_rel <- getSpRelOffset args_sp + ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) } + +-- For a polymorphic case, we have two return addresses to push: the case +-- return, and stg_seq_frame_info which turns a possible vectored return +-- into a direct one. +pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True)) + = do { sp_rel <- getSpRelOffset (args_sp-1) + ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) + ; sp_rel <- getSpRelOffset args_sp + ; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) } + +pushReturnAddress _ = nopC + +-- ----------------------------------------------------------------------------- +-- Misc. + +jumpToLbl :: CLabel -> Code +-- Passes no argument to the destination procedure +jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}]) + +assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts +assignToRegs reg_args + = mkStmts [ CmmAssign (CmmGlobal reg_id) expr + | (expr, reg_id) <- reg_args ] +\end{code} + + +%************************************************************************ +%* * +\subsection[CgStackery-adjust]{Adjusting the stack pointers} +%* * +%************************************************************************ + +This function adjusts the stack and heap pointers just before a tail +call or return. The stack pointer is adjusted to its final position +(i.e. to point to the last argument for a tail call, or the activation +record for a return). The heap pointer may be moved backwards, in +cases where we overallocated at the beginning of the basic block (see +CgCase.lhs for discussion). + +These functions {\em do not} deal with high-water-mark adjustment. +That's done by functions which allocate stack space. + +\begin{code} +adjustSpAndHp :: VirtualSpOffset -- New offset for Arg stack ptr + -> Code +adjustSpAndHp newRealSp + = do { -- Adjust stack, if necessary. + -- NB: the conditional on the monad-carried realSp + -- is out of line (via codeOnly), to avoid a black hole + ; new_sp <- getSpRelOffset newRealSp + ; checkedAbsC (CmmAssign spReg new_sp) -- Will generate no code in the case + ; setRealSp newRealSp -- where realSp==newRealSp + + -- Adjust heap. The virtual heap pointer may be less than the real Hp + -- because the latter was advanced to deal with the worst-case branch + -- of the code, and we may be in a better-case branch. In that case, + -- move the real Hp *back* and retract some ticky allocation count. + ; hp_usg <- getHpUsage + ; let rHp = realHp hp_usg + vHp = virtHp hp_usg + ; new_hp <- getHpRelOffset vHp + ; checkedAbsC (CmmAssign hpReg new_hp) -- Generates nothing when vHp==rHp + ; tickyAllocHeap (vHp - rHp) -- ...ditto + ; setRealHp vHp + } +\end{code} diff --git a/compiler/codeGen/CgTicky.hs b/compiler/codeGen/CgTicky.hs new file mode 100644 index 0000000000..3e72981c50 --- /dev/null +++ b/compiler/codeGen/CgTicky.hs @@ -0,0 +1,370 @@ +----------------------------------------------------------------------------- +-- +-- Code generation for ticky-ticky profiling +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgTicky ( + emitTickyCounter, + + tickyDynAlloc, + tickyAllocHeap, + tickyAllocPrim, + tickyAllocThunk, + tickyAllocPAP, + + tickyPushUpdateFrame, + tickyUpdateFrameOmitted, + + tickyEnterDynCon, + tickyEnterStaticCon, + tickyEnterViaNode, + + tickyEnterFun, + tickyEnterThunk, + + tickyUpdateBhCaf, + tickyBlackHole, + tickyUnboxedTupleReturn, tickyVectoredReturn, + tickyReturnOldCon, tickyReturnNewCon, + + tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs, + tickyUnknownCall, tickySlowCallPat, + + staticTickyHdr, + ) where + +#include "HsVersions.h" +#include "../includes/DerivedConstants.h" + -- For REP_xxx constants, which are MachReps + +import ClosureInfo ( ClosureInfo, closureSize, slopSize, closureSMRep, + closureUpdReqd, closureName, isStaticClosure ) +import CgUtils +import CgMonad +import SMRep ( ClosureType(..), smRepClosureType, CgRep ) + +import Cmm +import MachOp +import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr, cmmIndexExpr ) +import CLabel ( CLabel, mkRtsDataLabel, mkRednCountsLabel ) + +import Name ( isInternalName ) +import Id ( Id, idType ) +import StaticFlags ( opt_DoTickyProfiling ) +import BasicTypes ( Arity ) +import FastString ( FastString, mkFastString, LitString ) +import Constants -- Lots of field offsets +import Outputable + +-- Turgid imports for showTypeCategory +import PrelNames +import TcType ( Type, isDictTy, tcSplitTyConApp_maybe, + tcSplitFunTy_maybe ) +import TyCon ( isPrimTyCon, isTupleTyCon, isEnumerationTyCon, + maybeTyConSingleCon ) +import Maybe + +----------------------------------------------------------------------------- +-- +-- Ticky-ticky profiling +-- +----------------------------------------------------------------------------- + +staticTickyHdr :: [CmmLit] +-- The ticky header words in a static closure +-- Was SET_STATIC_TICKY_HDR +staticTickyHdr + | not opt_DoTickyProfiling = [] + | otherwise = [zeroCLit] + +emitTickyCounter :: ClosureInfo -> [Id] -> Int -> Code +emitTickyCounter cl_info args on_stk + = ifTicky $ + do { mod_name <- moduleName + ; fun_descr_lit <- mkStringCLit (fun_descr mod_name) + ; arg_descr_lit <- mkStringCLit arg_descr + ; emitDataLits ticky_ctr_label -- Must match layout of StgEntCounter + [ CmmInt 0 I16, + CmmInt (fromIntegral (length args)) I16, -- Arity + CmmInt (fromIntegral on_stk) I16, -- Words passed on stack + CmmInt 0 I16, -- 2-byte gap + fun_descr_lit, + arg_descr_lit, + zeroCLit, -- Entry count + zeroCLit, -- Allocs + zeroCLit -- Link + ] } + where + name = closureName cl_info + ticky_ctr_label = mkRednCountsLabel name + arg_descr = map (showTypeCategory . idType) args + fun_descr mod_name = ppr_for_ticky_name mod_name name + +-- When printing the name of a thing in a ticky file, we want to +-- give the module name even for *local* things. We print +-- just "x (M)" rather that "M.x" to distinguish them from the global kind. +ppr_for_ticky_name mod_name name + | isInternalName name = showSDocDebug (ppr name <+> (parens (ppr mod_name))) + | otherwise = showSDocDebug (ppr name) + +-- ----------------------------------------------------------------------------- +-- Ticky stack frames + +tickyPushUpdateFrame = ifTicky $ bumpTickyCounter SLIT("UPDF_PUSHED_ctr") +tickyUpdateFrameOmitted = ifTicky $ bumpTickyCounter SLIT("UPDF_OMITTED_ctr") + +-- ----------------------------------------------------------------------------- +-- Ticky entries + +tickyEnterDynCon = ifTicky $ bumpTickyCounter SLIT("ENT_DYN_CON_ctr") +tickyEnterDynThunk = ifTicky $ bumpTickyCounter SLIT("ENT_DYN_THK_ctr") +tickyEnterStaticCon = ifTicky $ bumpTickyCounter SLIT("ENT_STATIC_CON_ctr") +tickyEnterStaticThunk = ifTicky $ bumpTickyCounter SLIT("ENT_STATIC_THK_ctr") +tickyEnterViaNode = ifTicky $ bumpTickyCounter SLIT("ENT_VIA_NODE_ctr") + +tickyEnterThunk :: ClosureInfo -> Code +tickyEnterThunk cl_info + | isStaticClosure cl_info = tickyEnterStaticThunk + | otherwise = tickyEnterDynThunk + +tickyBlackHole :: Bool{-updatable-} -> Code +tickyBlackHole updatable + = ifTicky (bumpTickyCounter ctr) + where + ctr | updatable = SLIT("UPD_BH_SINGLE_ENTRY_ctr") + | otherwise = SLIT("UPD_BH_UPDATABLE_ctr") + +tickyUpdateBhCaf cl_info + = ifTicky (bumpTickyCounter ctr) + where + ctr | closureUpdReqd cl_info = SLIT("UPD_CAF_BH_SINGLE_ENTRY_ctr") + | otherwise = SLIT("UPD_CAF_BH_UPDATABLE_ctr") + +tickyEnterFun :: ClosureInfo -> Code +tickyEnterFun cl_info + = ifTicky $ + do { bumpTickyCounter ctr + ; fun_ctr_lbl <- getTickyCtrLabel + ; registerTickyCtr fun_ctr_lbl + ; bumpTickyCounter' fun_ctr_lbl } + where + ctr | isStaticClosure cl_info = SLIT("TICK_ENT_STATIC_FUN_DIRECT") + | otherwise = SLIT("TICK_ENT_DYN_FUN_DIRECT") + +registerTickyCtr :: CLabel -> Code +-- Register a ticky counter +-- if ( ! f_ct.registeredp ) { +-- f_ct.link = ticky_entry_ctrs; /* hook this one onto the front of the list */ +-- ticky_entry_ctrs = & (f_ct); /* mark it as "registered" */ +-- f_ct.registeredp = 1 } +registerTickyCtr ctr_lbl + = emitIf test (stmtsC register_stmts) + where + test = CmmMachOp (MO_Not I16) + [CmmLoad (CmmLit (cmmLabelOffB ctr_lbl + oFFSET_StgEntCounter_registeredp)) I16] + register_stmts + = [ CmmStore (CmmLit (cmmLabelOffB ctr_lbl oFFSET_StgEntCounter_link)) + (CmmLoad ticky_entry_ctrs wordRep) + , CmmStore ticky_entry_ctrs (mkLblExpr ctr_lbl) + , CmmStore (CmmLit (cmmLabelOffB ctr_lbl + oFFSET_StgEntCounter_registeredp)) + (CmmLit (mkIntCLit 1)) ] + ticky_entry_ctrs = mkLblExpr (mkRtsDataLabel SLIT("ticky_entry_ctrs")) + +tickyReturnOldCon, tickyReturnNewCon :: Arity -> Code +tickyReturnOldCon arity + = ifTicky $ do { bumpTickyCounter SLIT("RET_OLD_ctr") + ; bumpHistogram SLIT("RET_OLD_hst") arity } +tickyReturnNewCon arity + | not opt_DoTickyProfiling = nopC + | otherwise + = ifTicky $ do { bumpTickyCounter SLIT("RET_NEW_ctr") + ; bumpHistogram SLIT("RET_NEW_hst") arity } + +tickyUnboxedTupleReturn :: Int -> Code +tickyUnboxedTupleReturn arity + = ifTicky $ do { bumpTickyCounter SLIT("RET_UNBOXED_TUP_ctr") + ; bumpHistogram SLIT("RET_UNBOXED_TUP_hst") arity } + +tickyVectoredReturn :: Int -> Code +tickyVectoredReturn family_size + = ifTicky $ do { bumpTickyCounter SLIT("VEC_RETURN_ctr") + ; bumpHistogram SLIT("RET_VEC_RETURN_hst") family_size } + +-- ----------------------------------------------------------------------------- +-- Ticky calls + +-- Ticks at a *call site*: +tickyKnownCallTooFewArgs = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_TOO_FEW_ARGS_ctr") +tickyKnownCallExact = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_ctr") +tickyKnownCallExtraArgs = ifTicky $ bumpTickyCounter SLIT("KNOWN_CALL_EXTRA_ctr") +tickyUnknownCall = ifTicky $ bumpTickyCounter SLIT("UNKNOWN_CALL_ctr") + +-- Tick for the call pattern at slow call site (i.e. in addition to +-- tickyUnknownCall, tickyKnownCallExtraArgs, etc.) +tickySlowCallPat :: [CgRep] -> Code +tickySlowCallPat args = return () +{- LATER: (introduces recursive module dependency now). + case callPattern args of + (str, True) -> bumpTickyCounter' (mkRtsSlowTickyCtrLabel pat) + (str, False) -> bumpTickyCounter SLIT("TICK_SLOW_CALL_OTHER") + +callPattern :: [CgRep] -> (String,Bool) +callPattern reps + | match == length reps = (chars, True) + | otherwise = (chars, False) + where (_,match) = findMatch reps + chars = map argChar reps + +argChar VoidArg = 'v' +argChar PtrArg = 'p' +argChar NonPtrArg = 'n' +argChar LongArg = 'l' +argChar FloatArg = 'f' +argChar DoubleArg = 'd' +-} + +-- ----------------------------------------------------------------------------- +-- Ticky allocation + +tickyDynAlloc :: ClosureInfo -> Code +-- Called when doing a dynamic heap allocation +tickyDynAlloc cl_info + = ifTicky $ + case smRepClosureType (closureSMRep cl_info) of + Constr -> tick_alloc_con + ConstrNoCaf -> tick_alloc_con + Fun -> tick_alloc_fun + Thunk -> tick_alloc_thk + ThunkSelector -> tick_alloc_thk + where + -- will be needed when we fill in stubs + cl_size = closureSize cl_info + slop_size = slopSize cl_info + + tick_alloc_thk + | closureUpdReqd cl_info = tick_alloc_up_thk + | otherwise = tick_alloc_se_thk + + tick_alloc_con = panic "ToDo: tick_alloc" + tick_alloc_fun = panic "ToDo: tick_alloc" + tick_alloc_up_thk = panic "ToDo: tick_alloc" + tick_alloc_se_thk = panic "ToDo: tick_alloc" + +tickyAllocPrim :: CmmExpr -> CmmExpr -> CmmExpr -> Code +tickyAllocPrim hdr goods slop = ifTicky $ panic "ToDo: tickyAllocPrim" + +tickyAllocThunk :: CmmExpr -> CmmExpr -> Code +tickyAllocThunk goods slop = ifTicky $ panic "ToDo: tickyAllocThunk" + +tickyAllocPAP :: CmmExpr -> CmmExpr -> Code +tickyAllocPAP goods slop = ifTicky $ panic "ToDo: tickyAllocPAP" + +tickyAllocHeap :: VirtualHpOffset -> Code +-- Called when doing a heap check [TICK_ALLOC_HEAP] +tickyAllocHeap hp + = ifTicky $ + do { ticky_ctr <- getTickyCtrLabel + ; stmtsC $ + if hp == 0 then [] -- Inside the stmtC to avoid control + else [ -- dependency on the argument + -- Bump the allcoation count in the StgEntCounter + addToMem REP_StgEntCounter_allocs + (CmmLit (cmmLabelOffB ticky_ctr + oFFSET_StgEntCounter_allocs)) hp, + -- Bump ALLOC_HEAP_ctr + addToMemLbl cLongRep (mkRtsDataLabel SLIT("ALLOC_HEAP_ctr")) 1, + -- Bump ALLOC_HEAP_tot + addToMemLbl cLongRep (mkRtsDataLabel SLIT("ALLOC_HEAP_tot")) hp] } + +-- ----------------------------------------------------------------------------- +-- Ticky utils + +ifTicky :: Code -> Code +ifTicky code + | opt_DoTickyProfiling = code + | otherwise = nopC + +addToMemLbl :: MachRep -> CLabel -> Int -> CmmStmt +addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n + +-- All the ticky-ticky counters are declared "unsigned long" in C +bumpTickyCounter :: LitString -> Code +bumpTickyCounter lbl = bumpTickyCounter' (mkRtsDataLabel lbl) + +bumpTickyCounter' :: CLabel -> Code +bumpTickyCounter' lbl = stmtC (addToMemLbl cLongRep lbl 1) + +addToMemLong = addToMem cLongRep + +bumpHistogram :: LitString -> Int -> Code +bumpHistogram lbl n + = bumpHistogramE lbl (CmmLit (CmmInt (fromIntegral n) cLongRep)) + +bumpHistogramE :: LitString -> CmmExpr -> Code +bumpHistogramE lbl n + = do t <- newTemp cLongRep + stmtC (CmmAssign t n) + emitIf (CmmMachOp (MO_U_Le cLongRep) [CmmReg t, eight]) $ + stmtC (CmmAssign t eight) + stmtC (addToMemLong (cmmIndexExpr cLongRep + (CmmLit (CmmLabel (mkRtsDataLabel lbl))) + (CmmReg t)) + 1) + where + eight = CmmLit (CmmInt 8 cLongRep) + +------------------------------------------------------------------ +-- Showing the "type category" for ticky-ticky profiling + +showTypeCategory :: Type -> Char + {- {C,I,F,D} char, int, float, double + T tuple + S other single-constructor type + {c,i,f,d} unboxed ditto + t *unpacked* tuple + s *unpacked" single-cons... + + v void# + a primitive array + + E enumeration type + + dictionary, unless it's a ... + L List + > function + M other (multi-constructor) data-con type + . other type + - reserved for others to mark as "uninteresting" + -} +showTypeCategory ty + = if isDictTy ty + then '+' + else + case tcSplitTyConApp_maybe ty of + Nothing -> if isJust (tcSplitFunTy_maybe ty) + then '>' + else '.' + + Just (tycon, _) -> + let utc = getUnique tycon in + if utc == charDataConKey then 'C' + else if utc == intDataConKey then 'I' + else if utc == floatDataConKey then 'F' + else if utc == doubleDataConKey then 'D' + else if utc == smallIntegerDataConKey || + utc == largeIntegerDataConKey then 'J' + else if utc == charPrimTyConKey then 'c' + else if (utc == intPrimTyConKey || utc == wordPrimTyConKey + || utc == addrPrimTyConKey) then 'i' + else if utc == floatPrimTyConKey then 'f' + else if utc == doublePrimTyConKey then 'd' + else if isPrimTyCon tycon {- array, we hope -} then 'A' -- Bogus + else if isEnumerationTyCon tycon then 'E' + else if isTupleTyCon tycon then 'T' + else if isJust (maybeTyConSingleCon tycon) then 'S' + else if utc == listTyConKey then 'L' + else 'M' -- oh, well... diff --git a/compiler/codeGen/CgUsages.hi-boot-5 b/compiler/codeGen/CgUsages.hi-boot-5 new file mode 100644 index 0000000000..abb98cec1a --- /dev/null +++ b/compiler/codeGen/CgUsages.hi-boot-5 @@ -0,0 +1,3 @@ +__interface CgUsages 1 0 where +__export CgUsages getSpRelOffset; +1 getSpRelOffset :: AbsCSyn.VirtualSpOffset -> CgMonad.FCode AbsCSyn.RegRelative ; diff --git a/compiler/codeGen/CgUsages.hi-boot-6 b/compiler/codeGen/CgUsages.hi-boot-6 new file mode 100644 index 0000000000..9640603cfb --- /dev/null +++ b/compiler/codeGen/CgUsages.hi-boot-6 @@ -0,0 +1,3 @@ +module CgUsages where + +getSpRelOffset :: AbsCSyn.VirtualSpOffset -> CgMonad.FCode AbsCSyn.RegRelative diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs new file mode 100644 index 0000000000..2f69927db0 --- /dev/null +++ b/compiler/codeGen/CgUtils.hs @@ -0,0 +1,688 @@ +----------------------------------------------------------------------------- +-- +-- Code generator utilities; mostly monadic +-- +-- (c) The University of Glasgow 2004 +-- +----------------------------------------------------------------------------- + +module CgUtils ( + addIdReps, + cgLit, + emitDataLits, emitRODataLits, emitIf, emitIfThenElse, + emitRtsCall, emitRtsCallWithVols, emitRtsCallWithResult, + assignTemp, newTemp, + emitSimultaneously, + emitSwitch, emitLitSwitch, + tagToClosure, + + cmmAndWord, cmmOrWord, cmmNegate, cmmEqWord, cmmNeWord, + cmmOffsetExprW, cmmOffsetExprB, + cmmRegOffW, cmmRegOffB, + cmmLabelOffW, cmmLabelOffB, + cmmOffsetW, cmmOffsetB, + cmmOffsetLitW, cmmOffsetLitB, + cmmLoadIndexW, + + addToMem, addToMemE, + mkWordCLit, + mkStringCLit, + packHalfWordsCLit, + blankWord + ) where + +#include "HsVersions.h" + +import CgMonad +import TyCon ( TyCon, tyConName ) +import Id ( Id ) +import Constants ( wORD_SIZE ) +import SMRep ( CgRep, StgWord, hALF_WORD_SIZE_IN_BITS, ByteOff, + WordOff, idCgRep ) +import PprCmm ( {- instances -} ) +import Cmm +import CLabel +import CmmUtils +import MachOp ( MachRep(..), wordRep, MachOp(..), MachHint(..), + mo_wordOr, mo_wordAnd, mo_wordNe, mo_wordEq, + mo_wordULt, mo_wordUGt, mo_wordUGe, machRepByteWidth ) +import ForeignCall ( CCallConv(..) ) +import Literal ( Literal(..) ) +import CLabel ( CLabel, mkStringLitLabel ) +import Digraph ( SCC(..), stronglyConnComp ) +import ListSetOps ( assocDefault ) +import Util ( filterOut, sortLe ) +import DynFlags ( DynFlags(..), HscTarget(..) ) +import Packages ( HomeModules ) +import FastString ( LitString, FastString, bytesFS ) +import Outputable + +import Char ( ord ) +import DATA_BITS +import DATA_WORD ( Word8 ) +import Maybe ( isNothing ) + +------------------------------------------------------------------------- +-- +-- Random small functions +-- +------------------------------------------------------------------------- + +addIdReps :: [Id] -> [(CgRep, Id)] +addIdReps ids = [(idCgRep id, id) | id <- ids] + +------------------------------------------------------------------------- +-- +-- Literals +-- +------------------------------------------------------------------------- + +cgLit :: Literal -> FCode CmmLit +cgLit (MachStr s) = mkByteStringCLit (bytesFS s) + -- not unpackFS; we want the UTF-8 byte stream. +cgLit other_lit = return (mkSimpleLit other_lit) + +mkSimpleLit :: Literal -> CmmLit +mkSimpleLit (MachChar c) = CmmInt (fromIntegral (ord c)) wordRep +mkSimpleLit MachNullAddr = zeroCLit +mkSimpleLit (MachInt i) = CmmInt i wordRep +mkSimpleLit (MachInt64 i) = CmmInt i I64 +mkSimpleLit (MachWord i) = CmmInt i wordRep +mkSimpleLit (MachWord64 i) = CmmInt i I64 +mkSimpleLit (MachFloat r) = CmmFloat r F32 +mkSimpleLit (MachDouble r) = CmmFloat r F64 +mkSimpleLit (MachLabel fs ms) = CmmLabel (mkForeignLabel fs ms is_dyn) + where + is_dyn = False -- ToDo: fix me + +mkLtOp :: Literal -> MachOp +-- On signed literals we must do a signed comparison +mkLtOp (MachInt _) = MO_S_Lt wordRep +mkLtOp (MachFloat _) = MO_S_Lt F32 +mkLtOp (MachDouble _) = MO_S_Lt F64 +mkLtOp lit = MO_U_Lt (cmmLitRep (mkSimpleLit lit)) + + +--------------------------------------------------- +-- +-- Cmm data type functions +-- +--------------------------------------------------- + +----------------------- +-- The "B" variants take byte offsets +cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr +cmmRegOffB = cmmRegOff + +cmmOffsetB :: CmmExpr -> ByteOff -> CmmExpr +cmmOffsetB = cmmOffset + +cmmOffsetExprB :: CmmExpr -> CmmExpr -> CmmExpr +cmmOffsetExprB = cmmOffsetExpr + +cmmLabelOffB :: CLabel -> ByteOff -> CmmLit +cmmLabelOffB = cmmLabelOff + +cmmOffsetLitB :: CmmLit -> ByteOff -> CmmLit +cmmOffsetLitB = cmmOffsetLit + +----------------------- +-- The "W" variants take word offsets +cmmOffsetExprW :: CmmExpr -> CmmExpr -> CmmExpr +-- The second arg is a *word* offset; need to change it to bytes +cmmOffsetExprW e (CmmLit (CmmInt n _)) = cmmOffsetW e (fromInteger n) +cmmOffsetExprW e wd_off = cmmIndexExpr wordRep e wd_off + +cmmOffsetW :: CmmExpr -> WordOff -> CmmExpr +cmmOffsetW e n = cmmOffsetB e (wORD_SIZE * n) + +cmmRegOffW :: CmmReg -> WordOff -> CmmExpr +cmmRegOffW reg wd_off = cmmRegOffB reg (wd_off * wORD_SIZE) + +cmmOffsetLitW :: CmmLit -> WordOff -> CmmLit +cmmOffsetLitW lit wd_off = cmmOffsetLitB lit (wORD_SIZE * wd_off) + +cmmLabelOffW :: CLabel -> WordOff -> CmmLit +cmmLabelOffW lbl wd_off = cmmLabelOffB lbl (wORD_SIZE * wd_off) + +cmmLoadIndexW :: CmmExpr -> Int -> CmmExpr +cmmLoadIndexW base off + = CmmLoad (cmmOffsetW base off) wordRep + +----------------------- +cmmNeWord, cmmEqWord, cmmOrWord, cmmAndWord :: CmmExpr -> CmmExpr -> CmmExpr +cmmOrWord e1 e2 = CmmMachOp mo_wordOr [e1, e2] +cmmAndWord e1 e2 = CmmMachOp mo_wordAnd [e1, e2] +cmmNeWord e1 e2 = CmmMachOp mo_wordNe [e1, e2] +cmmEqWord e1 e2 = CmmMachOp mo_wordEq [e1, e2] +cmmULtWord e1 e2 = CmmMachOp mo_wordULt [e1, e2] +cmmUGeWord e1 e2 = CmmMachOp mo_wordUGe [e1, e2] +cmmUGtWord e1 e2 = CmmMachOp mo_wordUGt [e1, e2] + +cmmNegate :: CmmExpr -> CmmExpr +cmmNegate (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep) +cmmNegate e = CmmMachOp (MO_S_Neg (cmmExprRep e)) [e] + +blankWord :: CmmStatic +blankWord = CmmUninitialised wORD_SIZE + +----------------------- +-- Making literals + +mkWordCLit :: StgWord -> CmmLit +mkWordCLit wd = CmmInt (fromIntegral wd) wordRep + +packHalfWordsCLit :: (Integral a, Integral b) => a -> b -> CmmLit +-- Make a single word literal in which the lower_half_word is +-- at the lower address, and the upper_half_word is at the +-- higher address +-- ToDo: consider using half-word lits instead +-- but be careful: that's vulnerable when reversed +packHalfWordsCLit lower_half_word upper_half_word +#ifdef WORDS_BIGENDIAN + = mkWordCLit ((fromIntegral lower_half_word `shiftL` hALF_WORD_SIZE_IN_BITS) + .|. fromIntegral upper_half_word) +#else + = mkWordCLit ((fromIntegral lower_half_word) + .|. (fromIntegral upper_half_word `shiftL` hALF_WORD_SIZE_IN_BITS)) +#endif + +-------------------------------------------------------------------------- +-- +-- Incrementing a memory location +-- +-------------------------------------------------------------------------- + +addToMem :: MachRep -- rep of the counter + -> CmmExpr -- Address + -> Int -- What to add (a word) + -> CmmStmt +addToMem rep ptr n = addToMemE rep ptr (CmmLit (CmmInt (toInteger n) rep)) + +addToMemE :: MachRep -- rep of the counter + -> CmmExpr -- Address + -> CmmExpr -- What to add (a word-typed expression) + -> CmmStmt +addToMemE rep ptr n + = CmmStore ptr (CmmMachOp (MO_Add rep) [CmmLoad ptr rep, n]) + +------------------------------------------------------------------------- +-- +-- Converting a closure tag to a closure for enumeration types +-- (this is the implementation of tagToEnum#). +-- +------------------------------------------------------------------------- + +tagToClosure :: HomeModules -> TyCon -> CmmExpr -> CmmExpr +tagToClosure hmods tycon tag + = CmmLoad (cmmOffsetExprW closure_tbl tag) wordRep + where closure_tbl = CmmLit (CmmLabel lbl) + lbl = mkClosureTableLabel hmods (tyConName tycon) + +------------------------------------------------------------------------- +-- +-- Conditionals and rts calls +-- +------------------------------------------------------------------------- + +emitIf :: CmmExpr -- Boolean + -> Code -- Then part + -> Code +-- Emit (if e then x) +-- ToDo: reverse the condition to avoid the extra branch instruction if possible +-- (some conditionals aren't reversible. eg. floating point comparisons cannot +-- be inverted because there exist some values for which both comparisons +-- return False, such as NaN.) +emitIf cond then_part + = do { then_id <- newLabelC + ; join_id <- newLabelC + ; stmtC (CmmCondBranch cond then_id) + ; stmtC (CmmBranch join_id) + ; labelC then_id + ; then_part + ; labelC join_id + } + +emitIfThenElse :: CmmExpr -- Boolean + -> Code -- Then part + -> Code -- Else part + -> Code +-- Emit (if e then x else y) +emitIfThenElse cond then_part else_part + = do { then_id <- newLabelC + ; else_id <- newLabelC + ; join_id <- newLabelC + ; stmtC (CmmCondBranch cond then_id) + ; else_part + ; stmtC (CmmBranch join_id) + ; labelC then_id + ; then_part + ; labelC join_id + } + +emitRtsCall :: LitString -> [(CmmExpr,MachHint)] -> Code +emitRtsCall fun args = emitRtsCall' [] fun args Nothing + -- The 'Nothing' says "save all global registers" + +emitRtsCallWithVols :: LitString -> [(CmmExpr,MachHint)] -> [GlobalReg] -> Code +emitRtsCallWithVols fun args vols + = emitRtsCall' [] fun args (Just vols) + +emitRtsCallWithResult :: CmmReg -> MachHint -> LitString + -> [(CmmExpr,MachHint)] -> Code +emitRtsCallWithResult res hint fun args + = emitRtsCall' [(res,hint)] fun args Nothing + +-- Make a call to an RTS C procedure +emitRtsCall' + :: [(CmmReg,MachHint)] + -> LitString + -> [(CmmExpr,MachHint)] + -> Maybe [GlobalReg] + -> Code +emitRtsCall' res fun args vols = stmtC (CmmCall target res args vols) + where + target = CmmForeignCall fun_expr CCallConv + fun_expr = mkLblExpr (mkRtsCodeLabel fun) + + +------------------------------------------------------------------------- +-- +-- Strings gnerate a top-level data block +-- +------------------------------------------------------------------------- + +emitDataLits :: CLabel -> [CmmLit] -> Code +-- Emit a data-segment data block +emitDataLits lbl lits + = emitData Data (CmmDataLabel lbl : map CmmStaticLit lits) + +emitRODataLits :: CLabel -> [CmmLit] -> Code +-- Emit a read-only data block +emitRODataLits lbl lits + = emitData section (CmmDataLabel lbl : map CmmStaticLit lits) + where section | any needsRelocation lits = RelocatableReadOnlyData + | otherwise = ReadOnlyData + needsRelocation (CmmLabel _) = True + needsRelocation (CmmLabelOff _ _) = True + needsRelocation _ = False + +mkStringCLit :: String -> FCode CmmLit +-- Make a global definition for the string, +-- and return its label +mkStringCLit str = mkByteStringCLit (map (fromIntegral.ord) str) + +mkByteStringCLit :: [Word8] -> FCode CmmLit +mkByteStringCLit bytes + = do { uniq <- newUnique + ; let lbl = mkStringLitLabel uniq + ; emitData ReadOnlyData [CmmDataLabel lbl, CmmString bytes] + ; return (CmmLabel lbl) } + +------------------------------------------------------------------------- +-- +-- Assigning expressions to temporaries +-- +------------------------------------------------------------------------- + +assignTemp :: CmmExpr -> FCode CmmExpr +-- For a non-trivial expression, e, create a local +-- variable and assign the expression to it +assignTemp e + | isTrivialCmmExpr e = return e + | otherwise = do { reg <- newTemp (cmmExprRep e) + ; stmtC (CmmAssign reg e) + ; return (CmmReg reg) } + + +newTemp :: MachRep -> FCode CmmReg +newTemp rep = do { uniq <- newUnique; return (CmmLocal (LocalReg uniq rep)) } + + +------------------------------------------------------------------------- +-- +-- Building case analysis +-- +------------------------------------------------------------------------- + +emitSwitch + :: CmmExpr -- Tag to switch on + -> [(ConTagZ, CgStmts)] -- Tagged branches + -> Maybe CgStmts -- Default branch (if any) + -> ConTagZ -> ConTagZ -- Min and Max possible values; behaviour + -- outside this range is undefined + -> Code + +-- ONLY A DEFAULT BRANCH: no case analysis to do +emitSwitch tag_expr [] (Just stmts) _ _ + = emitCgStmts stmts + +-- Right, off we go +emitSwitch tag_expr branches mb_deflt lo_tag hi_tag + = -- Just sort the branches before calling mk_sritch + do { mb_deflt_id <- + case mb_deflt of + Nothing -> return Nothing + Just stmts -> do id <- forkCgStmts stmts; return (Just id) + + ; dflags <- getDynFlags + ; let via_C | HscC <- hscTarget dflags = True + | otherwise = False + + ; stmts <- mk_switch tag_expr (sortLe le branches) + mb_deflt_id lo_tag hi_tag via_C + ; emitCgStmts stmts + } + where + (t1,_) `le` (t2,_) = t1 <= t2 + + +mk_switch :: CmmExpr -> [(ConTagZ, CgStmts)] + -> Maybe BlockId -> ConTagZ -> ConTagZ -> Bool + -> FCode CgStmts + +-- SINGLETON TAG RANGE: no case analysis to do +mk_switch tag_expr [(tag,stmts)] _ lo_tag hi_tag via_C + | lo_tag == hi_tag + = ASSERT( tag == lo_tag ) + return stmts + +-- SINGLETON BRANCH, NO DEFUALT: no case analysis to do +mk_switch tag_expr [(tag,stmts)] Nothing lo_tag hi_tag via_C + = return stmts + -- The simplifier might have eliminated a case + -- so we may have e.g. case xs of + -- [] -> e + -- In that situation we can be sure the (:) case + -- can't happen, so no need to test + +-- SINGLETON BRANCH: one equality check to do +mk_switch tag_expr [(tag,stmts)] (Just deflt) lo_tag hi_tag via_C + = return (CmmCondBranch cond deflt `consCgStmt` stmts) + where + cond = cmmNeWord tag_expr (CmmLit (mkIntCLit tag)) + -- We have lo_tag < hi_tag, but there's only one branch, + -- so there must be a default + +-- ToDo: we might want to check for the two branch case, where one of +-- the branches is the tag 0, because comparing '== 0' is likely to be +-- more efficient than other kinds of comparison. + +-- DENSE TAG RANGE: use a switch statment. +-- +-- We also use a switch uncoditionally when compiling via C, because +-- this will get emitted as a C switch statement and the C compiler +-- should do a good job of optimising it. Also, older GCC versions +-- (2.95 in particular) have problems compiling the complicated +-- if-trees generated by this code, so compiling to a switch every +-- time works around that problem. +-- +mk_switch tag_expr branches mb_deflt lo_tag hi_tag via_C + | use_switch -- Use a switch + = do { branch_ids <- mapM forkCgStmts (map snd branches) + ; let + tagged_blk_ids = zip (map fst branches) (map Just branch_ids) + + find_branch :: ConTagZ -> Maybe BlockId + find_branch i = assocDefault mb_deflt tagged_blk_ids i + + -- NB. we have eliminated impossible branches at + -- either end of the range (see below), so the first + -- tag of a real branch is real_lo_tag (not lo_tag). + arms = [ find_branch i | i <- [real_lo_tag..real_hi_tag]] + + switch_stmt = CmmSwitch (cmmOffset tag_expr (- real_lo_tag)) arms + + ; ASSERT(not (all isNothing arms)) + return (oneCgStmt switch_stmt) + } + + -- if we can knock off a bunch of default cases with one if, then do so + | Just deflt <- mb_deflt, (lowest_branch - lo_tag) >= n_branches + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr + ; let cond = cmmULtWord tag_expr' (CmmLit (mkIntCLit lowest_branch)) + branch = CmmCondBranch cond deflt + ; stmts <- mk_switch tag_expr' branches mb_deflt + lowest_branch hi_tag via_C + ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts)) + } + + | Just deflt <- mb_deflt, (hi_tag - highest_branch) >= n_branches + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr + ; let cond = cmmUGtWord tag_expr' (CmmLit (mkIntCLit highest_branch)) + branch = CmmCondBranch cond deflt + ; stmts <- mk_switch tag_expr' branches mb_deflt + lo_tag highest_branch via_C + ; return (assign_tag `consCgStmt` (branch `consCgStmt` stmts)) + } + + | otherwise -- Use an if-tree + = do { (assign_tag, tag_expr') <- assignTemp' tag_expr + -- To avoid duplication + ; lo_stmts <- mk_switch tag_expr' lo_branches mb_deflt + lo_tag (mid_tag-1) via_C + ; hi_stmts <- mk_switch tag_expr' hi_branches mb_deflt + mid_tag hi_tag via_C + ; hi_id <- forkCgStmts hi_stmts + ; let cond = cmmUGeWord tag_expr' (CmmLit (mkIntCLit mid_tag)) + branch_stmt = CmmCondBranch cond hi_id + ; return (assign_tag `consCgStmt` (branch_stmt `consCgStmt` lo_stmts)) + } + -- we test (e >= mid_tag) rather than (e < mid_tag), because + -- the former works better when e is a comparison, and there + -- are two tags 0 & 1 (mid_tag == 1). In this case, the code + -- generator can reduce the condition to e itself without + -- having to reverse the sense of the comparison: comparisons + -- can't always be easily reversed (eg. floating + -- pt. comparisons). + where + use_switch = {- pprTrace "mk_switch" ( + ppr tag_expr <+> text "n_tags:" <+> int n_tags <+> + text "n_branches:" <+> int n_branches <+> + text "lo_tag: " <+> int lo_tag <+> + text "hi_tag: " <+> int hi_tag <+> + text "real_lo_tag: " <+> int real_lo_tag <+> + text "real_hi_tag: " <+> int real_hi_tag) $ -} + ASSERT( n_branches > 1 && n_tags > 1 ) + n_tags > 2 && (small || dense || via_C) + -- a 2-branch switch always turns into an if. + small = n_tags <= 4 + dense = n_branches > (n_tags `div` 2) + exhaustive = n_tags == n_branches + n_branches = length branches + + -- ignore default slots at each end of the range if there's + -- no default branch defined. + lowest_branch = fst (head branches) + highest_branch = fst (last branches) + + real_lo_tag + | isNothing mb_deflt = lowest_branch + | otherwise = lo_tag + + real_hi_tag + | isNothing mb_deflt = highest_branch + | otherwise = hi_tag + + n_tags = real_hi_tag - real_lo_tag + 1 + + -- INVARIANT: Provided hi_tag > lo_tag (which is true) + -- lo_tag <= mid_tag < hi_tag + -- lo_branches have tags < mid_tag + -- hi_branches have tags >= mid_tag + + (mid_tag,_) = branches !! (n_branches `div` 2) + -- 2 branches => n_branches `div` 2 = 1 + -- => branches !! 1 give the *second* tag + -- There are always at least 2 branches here + + (lo_branches, hi_branches) = span is_lo branches + is_lo (t,_) = t < mid_tag + + +assignTemp' e + | isTrivialCmmExpr e = return (CmmNop, e) + | otherwise = do { reg <- newTemp (cmmExprRep e) + ; return (CmmAssign reg e, CmmReg reg) } + + +emitLitSwitch :: CmmExpr -- Tag to switch on + -> [(Literal, CgStmts)] -- Tagged branches + -> CgStmts -- Default branch (always) + -> Code -- Emit the code +-- Used for general literals, whose size might not be a word, +-- where there is always a default case, and where we don't know +-- the range of values for certain. For simplicity we always generate a tree. +-- +-- ToDo: for integers we could do better here, perhaps by generalising +-- mk_switch and using that. --SDM 15/09/2004 +emitLitSwitch scrut [] deflt + = emitCgStmts deflt +emitLitSwitch scrut branches deflt_blk + = do { scrut' <- assignTemp scrut + ; deflt_blk_id <- forkCgStmts deflt_blk + ; blk <- mk_lit_switch scrut' deflt_blk_id (sortLe le branches) + ; emitCgStmts blk } + where + le (t1,_) (t2,_) = t1 <= t2 + +mk_lit_switch :: CmmExpr -> BlockId + -> [(Literal,CgStmts)] + -> FCode CgStmts +mk_lit_switch scrut deflt_blk_id [(lit,blk)] + = return (consCgStmt if_stmt blk) + where + cmm_lit = mkSimpleLit lit + rep = cmmLitRep cmm_lit + cond = CmmMachOp (MO_Ne rep) [scrut, CmmLit cmm_lit] + if_stmt = CmmCondBranch cond deflt_blk_id + +mk_lit_switch scrut deflt_blk_id branches + = do { hi_blk <- mk_lit_switch scrut deflt_blk_id hi_branches + ; lo_blk <- mk_lit_switch scrut deflt_blk_id lo_branches + ; lo_blk_id <- forkCgStmts lo_blk + ; let if_stmt = CmmCondBranch cond lo_blk_id + ; return (if_stmt `consCgStmt` hi_blk) } + where + n_branches = length branches + (mid_lit,_) = branches !! (n_branches `div` 2) + -- See notes above re mid_tag + + (lo_branches, hi_branches) = span is_lo branches + is_lo (t,_) = t < mid_lit + + cond = CmmMachOp (mkLtOp mid_lit) + [scrut, CmmLit (mkSimpleLit mid_lit)] + +------------------------------------------------------------------------- +-- +-- Simultaneous assignment +-- +------------------------------------------------------------------------- + + +emitSimultaneously :: CmmStmts -> Code +-- Emit code to perform the assignments in the +-- input simultaneously, using temporary variables when necessary. +-- +-- The Stmts must be: +-- CmmNop, CmmComment, CmmAssign, CmmStore +-- and nothing else + + +-- We use the strongly-connected component algorithm, in which +-- * the vertices are the statements +-- * an edge goes from s1 to s2 iff +-- s1 assigns to something s2 uses +-- that is, if s1 should *follow* s2 in the final order + +type CVertex = (Int, CmmStmt) -- Give each vertex a unique number, + -- for fast comparison + +emitSimultaneously stmts + = codeOnly $ + case filterOut isNopStmt (stmtList stmts) of + -- Remove no-ops + [] -> nopC + [stmt] -> stmtC stmt -- It's often just one stmt + stmt_list -> doSimultaneously1 (zip [(1::Int)..] stmt_list) + +doSimultaneously1 :: [CVertex] -> Code +doSimultaneously1 vertices + = let + edges = [ (vertex, key1, edges_from stmt1) + | vertex@(key1, stmt1) <- vertices + ] + edges_from stmt1 = [ key2 | (key2, stmt2) <- vertices, + stmt1 `mustFollow` stmt2 + ] + components = stronglyConnComp edges + + -- do_components deal with one strongly-connected component + -- Not cyclic, or singleton? Just do it + do_component (AcyclicSCC (n,stmt)) = stmtC stmt + do_component (CyclicSCC [(n,stmt)]) = stmtC stmt + + -- Cyclic? Then go via temporaries. Pick one to + -- break the loop and try again with the rest. + do_component (CyclicSCC ((n,first_stmt) : rest)) + = do { from_temp <- go_via_temp first_stmt + ; doSimultaneously1 rest + ; stmtC from_temp } + + go_via_temp (CmmAssign dest src) + = do { tmp <- newTemp (cmmRegRep dest) + ; stmtC (CmmAssign tmp src) + ; return (CmmAssign dest (CmmReg tmp)) } + go_via_temp (CmmStore dest src) + = do { tmp <- newTemp (cmmExprRep src) + ; stmtC (CmmAssign tmp src) + ; return (CmmStore dest (CmmReg tmp)) } + in + mapCs do_component components + +mustFollow :: CmmStmt -> CmmStmt -> Bool +CmmAssign reg _ `mustFollow` stmt = anySrc (reg `regUsedIn`) stmt +CmmStore loc e `mustFollow` stmt = anySrc (locUsedIn loc (cmmExprRep e)) stmt +CmmNop `mustFollow` stmt = False +CmmComment _ `mustFollow` stmt = False + + +anySrc :: (CmmExpr -> Bool) -> CmmStmt -> Bool +-- True if the fn is true of any input of the stmt +anySrc p (CmmAssign _ e) = p e +anySrc p (CmmStore e1 e2) = p e1 || p e2 -- Might be used in either side +anySrc p (CmmComment _) = False +anySrc p CmmNop = False +anySrc p other = True -- Conservative + +regUsedIn :: CmmReg -> CmmExpr -> Bool +reg `regUsedIn` CmmLit _ = False +reg `regUsedIn` CmmLoad e _ = reg `regUsedIn` e +reg `regUsedIn` CmmReg reg' = reg == reg' +reg `regUsedIn` CmmRegOff reg' _ = reg == reg' +reg `regUsedIn` CmmMachOp _ es = any (reg `regUsedIn`) es + +locUsedIn :: CmmExpr -> MachRep -> CmmExpr -> Bool +-- (locUsedIn a r e) checks whether writing to r[a] could affect the value of +-- 'e'. Returns True if it's not sure. +locUsedIn loc rep (CmmLit _) = False +locUsedIn loc rep (CmmLoad e ld_rep) = possiblySameLoc loc rep e ld_rep +locUsedIn loc rep (CmmReg reg') = False +locUsedIn loc rep (CmmRegOff reg' _) = False +locUsedIn loc rep (CmmMachOp _ es) = any (locUsedIn loc rep) es + +possiblySameLoc :: CmmExpr -> MachRep -> CmmExpr -> MachRep -> Bool +-- Assumes that distinct registers (eg Hp, Sp) do not +-- point to the same location, nor any offset thereof. +possiblySameLoc (CmmReg r1) rep1 (CmmReg r2) rep2 = r1==r2 +possiblySameLoc (CmmReg r1) rep1 (CmmRegOff r2 0) rep2 = r1==r2 +possiblySameLoc (CmmRegOff r1 0) rep1 (CmmReg r2) rep2 = r1==r2 +possiblySameLoc (CmmRegOff r1 start1) rep1 (CmmRegOff r2 start2) rep2 + = r1==r2 && end1 > start2 && end2 > start1 + where + end1 = start1 + machRepByteWidth rep1 + end2 = start2 + machRepByteWidth rep2 + +possiblySameLoc l1 rep1 (CmmLit _) rep2 = False +possiblySameLoc l1 rep1 l2 rep2 = True -- Conservative diff --git a/compiler/codeGen/ClosureInfo.hi-boot-5 b/compiler/codeGen/ClosureInfo.hi-boot-5 new file mode 100644 index 0000000000..2291f93cc6 --- /dev/null +++ b/compiler/codeGen/ClosureInfo.hi-boot-5 @@ -0,0 +1,4 @@ +__interface ClosureInfo 1 0 where +__export ClosureInfo ClosureInfo LambdaFormInfo; +1 data LambdaFormInfo; +1 data ClosureInfo; diff --git a/compiler/codeGen/ClosureInfo.hi-boot-6 b/compiler/codeGen/ClosureInfo.hi-boot-6 new file mode 100644 index 0000000000..d313ccde80 --- /dev/null +++ b/compiler/codeGen/ClosureInfo.hi-boot-6 @@ -0,0 +1,4 @@ +module ClosureInfo where + +data LambdaFormInfo +data ClosureInfo diff --git a/compiler/codeGen/ClosureInfo.lhs b/compiler/codeGen/ClosureInfo.lhs new file mode 100644 index 0000000000..84d9dd95ef --- /dev/null +++ b/compiler/codeGen/ClosureInfo.lhs @@ -0,0 +1,951 @@ +% +% (c) The Univserity of Glasgow 1992-2004 +% + + Data structures which describe closures, and + operations over those data structures + + Nothing monadic in here + +Much of the rationale for these things is in the ``details'' part of +the STG paper. + +\begin{code} +module ClosureInfo ( + ClosureInfo, LambdaFormInfo, SMRep, -- all abstract + StandardFormInfo, + + ArgDescr(..), Liveness(..), + C_SRT(..), needsSRT, + + mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo, + mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape, + + mkClosureInfo, mkConInfo, + + closureSize, closureNonHdrSize, + closureGoodStuffSize, closurePtrsSize, + slopSize, + + closureName, infoTableLabelFromCI, + closureLabelFromCI, closureSRT, + closureLFInfo, isLFThunk,closureSMRep, closureUpdReqd, + closureNeedsUpdSpace, closureIsThunk, + closureSingleEntry, closureReEntrant, isConstrClosure_maybe, + closureFunInfo, isStandardFormThunk, isKnownFun, + + enterIdLabel, enterLocalIdLabel, enterReturnPtLabel, + + nodeMustPointToIt, + CallMethod(..), getCallMethod, + + blackHoleOnEntry, + + staticClosureRequired, + getClosureType, + + isToplevClosure, + closureValDescr, closureTypeDescr, -- profiling + + isStaticClosure, + cafBlackHoleClosureInfo, seCafBlackHoleClosureInfo, + + staticClosureNeedsLink, + ) where + +#include "../includes/MachDeps.h" +#include "HsVersions.h" + +import StgSyn +import SMRep -- all of it + +import CLabel + +import Constants ( mIN_PAYLOAD_SIZE ) +import Packages ( isDllName, HomeModules ) +import StaticFlags ( opt_SccProfilingOn, opt_OmitBlackHoling, + opt_Parallel, opt_DoTickyProfiling ) +import Id ( Id, idType, idArity, idName ) +import DataCon ( DataCon, dataConTyCon, isNullaryRepDataCon, dataConName ) +import Name ( Name, nameUnique, getOccName, getOccString ) +import OccName ( occNameString ) +import Type ( isUnLiftedType, Type, repType, splitTyConApp_maybe ) +import TcType ( tcSplitSigmaTy ) +import TyCon ( isFunTyCon, isAbstractTyCon ) +import BasicTypes ( TopLevelFlag(..), isNotTopLevel, isTopLevel, ipNameName ) +import FastString +import Outputable +import Constants + +import TypeRep -- TEMP +\end{code} + + +%************************************************************************ +%* * +\subsection[ClosureInfo-datatypes]{Data types for closure information} +%* * +%************************************************************************ + +Information about a closure, from the code generator's point of view. + +A ClosureInfo decribes the info pointer of a closure. It has +enough information + a) to construct the info table itself + b) to allocate a closure containing that info pointer (i.e. + it knows the info table label) + +We make a ClosureInfo for + - each let binding (both top level and not) + - each data constructor (for its shared static and + dynamic info tables) + +\begin{code} +data ClosureInfo + = ClosureInfo { + closureName :: !Name, -- The thing bound to this closure + closureLFInfo :: !LambdaFormInfo, -- NOTE: not an LFCon (see below) + closureSMRep :: !SMRep, -- representation used by storage mgr + closureSRT :: !C_SRT, -- What SRT applies to this closure + closureType :: !Type, -- Type of closure (ToDo: remove) + closureDescr :: !String -- closure description (for profiling) + } + + -- Constructor closures don't have a unique info table label (they use + -- the constructor's info table), and they don't have an SRT. + | ConInfo { + closureCon :: !DataCon, + closureSMRep :: !SMRep, + closureDllCon :: !Bool -- is in a separate DLL + } + +-- C_SRT is what StgSyn.SRT gets translated to... +-- we add a label for the table, and expect only the 'offset/length' form + +data C_SRT = NoC_SRT + | C_SRT !CLabel !WordOff !StgHalfWord {-bitmap or escape-} + +needsSRT :: C_SRT -> Bool +needsSRT NoC_SRT = False +needsSRT (C_SRT _ _ _) = True +\end{code} + +%************************************************************************ +%* * +\subsubsection[LambdaFormInfo-datatype]{@LambdaFormInfo@: source-derivable info} +%* * +%************************************************************************ + +Information about an identifier, from the code generator's point of +view. Every identifier is bound to a LambdaFormInfo in the +environment, which gives the code generator enough info to be able to +tail call or return that identifier. + +Note that a closure is usually bound to an identifier, so a +ClosureInfo contains a LambdaFormInfo. + +\begin{code} +data LambdaFormInfo + = LFReEntrant -- Reentrant closure (a function) + TopLevelFlag -- True if top level + !Int -- Arity. Invariant: always > 0 + !Bool -- True <=> no fvs + ArgDescr -- Argument descriptor (should reall be in ClosureInfo) + + | LFCon -- A saturated constructor application + DataCon -- The constructor + + | LFThunk -- Thunk (zero arity) + TopLevelFlag + !Bool -- True <=> no free vars + !Bool -- True <=> updatable (i.e., *not* single-entry) + StandardFormInfo + !Bool -- True <=> *might* be a function type + + | LFUnknown -- Used for function arguments and imported things. + -- We know nothing about this closure. Treat like + -- updatable "LFThunk"... + -- Imported things which we do know something about use + -- one of the other LF constructors (eg LFReEntrant for + -- known functions) + !Bool -- True <=> *might* be a function type + + | LFLetNoEscape -- See LetNoEscape module for precise description of + -- these "lets". + !Int -- arity; + + | LFBlackHole -- Used for the closures allocated to hold the result + -- of a CAF. We want the target of the update frame to + -- be in the heap, so we make a black hole to hold it. + CLabel -- Flavour (info label, eg CAF_BLACKHOLE_info). + + +------------------------- +-- An ArgDsecr describes the argument pattern of a function + +data ArgDescr + = ArgSpec -- Fits one of the standard patterns + !Int -- RTS type identifier ARG_P, ARG_N, ... + + | ArgGen -- General case + Liveness -- Details about the arguments + + +------------------------- +-- We represent liveness bitmaps as a Bitmap (whose internal +-- representation really is a bitmap). These are pinned onto case return +-- vectors to indicate the state of the stack for the garbage collector. +-- +-- In the compiled program, liveness bitmaps that fit inside a single +-- word (StgWord) are stored as a single word, while larger bitmaps are +-- stored as a pointer to an array of words. + +data Liveness + = SmallLiveness -- Liveness info that fits in one word + StgWord -- Here's the bitmap + + | BigLiveness -- Liveness info witha a multi-word bitmap + CLabel -- Label for the bitmap + + +------------------------- +-- StandardFormInfo tells whether this thunk has one of +-- a small number of standard forms + +data StandardFormInfo + = NonStandardThunk + -- Not of of the standard forms + + | SelectorThunk + -- A SelectorThunk is of form + -- case x of + -- con a1,..,an -> ak + -- and the constructor is from a single-constr type. + WordOff -- 0-origin offset of ak within the "goods" of + -- constructor (Recall that the a1,...,an may be laid + -- out in the heap in a non-obvious order.) + + | ApThunk + -- An ApThunk is of form + -- x1 ... xn + -- The code for the thunk just pushes x2..xn on the stack and enters x1. + -- There are a few of these (for 1 <= n <= MAX_SPEC_AP_SIZE) pre-compiled + -- in the RTS to save space. + Int -- Arity, n +\end{code} + +%************************************************************************ +%* * +\subsection[ClosureInfo-construction]{Functions which build LFInfos} +%* * +%************************************************************************ + +\begin{code} +mkLFReEntrant :: TopLevelFlag -- True of top level + -> [Id] -- Free vars + -> [Id] -- Args + -> ArgDescr -- Argument descriptor + -> LambdaFormInfo + +mkLFReEntrant top fvs args arg_descr + = LFReEntrant top (length args) (null fvs) arg_descr + +mkLFThunk thunk_ty top fvs upd_flag + = ASSERT( not (isUpdatable upd_flag) || not (isUnLiftedType thunk_ty) ) + LFThunk top (null fvs) + (isUpdatable upd_flag) + NonStandardThunk + (might_be_a_function thunk_ty) + +might_be_a_function :: Type -> Bool +might_be_a_function ty + | Just (tc,_) <- splitTyConApp_maybe (repType ty), + not (isFunTyCon tc) && not (isAbstractTyCon tc) = False + -- don't forget to check for abstract types, which might + -- be functions too. + | otherwise = True +\end{code} + +@mkConLFInfo@ is similar, for constructors. + +\begin{code} +mkConLFInfo :: DataCon -> LambdaFormInfo +mkConLFInfo con = LFCon con + +mkSelectorLFInfo id offset updatable + = LFThunk NotTopLevel False updatable (SelectorThunk offset) + (might_be_a_function (idType id)) + +mkApLFInfo id upd_flag arity + = LFThunk NotTopLevel (arity == 0) (isUpdatable upd_flag) (ApThunk arity) + (might_be_a_function (idType id)) +\end{code} + +Miscellaneous LF-infos. + +\begin{code} +mkLFArgument id = LFUnknown (might_be_a_function (idType id)) + +mkLFLetNoEscape = LFLetNoEscape + +mkLFImported :: Id -> LambdaFormInfo +mkLFImported id + = case idArity id of + n | n > 0 -> LFReEntrant TopLevel n True (panic "arg_descr") -- n > 0 + other -> mkLFArgument id -- Not sure of exact arity +\end{code} + +\begin{code} +isLFThunk :: LambdaFormInfo -> Bool +isLFThunk (LFThunk _ _ _ _ _) = True +isLFThunk (LFBlackHole _) = True + -- return True for a blackhole: this function is used to determine + -- whether to use the thunk header in SMP mode, and a blackhole + -- must have one. +isLFThunk _ = False +\end{code} + +%************************************************************************ +%* * + Building ClosureInfos +%* * +%************************************************************************ + +\begin{code} +mkClosureInfo :: Bool -- Is static + -> Id + -> LambdaFormInfo + -> Int -> Int -- Total and pointer words + -> C_SRT + -> String -- String descriptor + -> ClosureInfo +mkClosureInfo is_static id lf_info tot_wds ptr_wds srt_info descr + = ClosureInfo { closureName = name, + closureLFInfo = lf_info, + closureSMRep = sm_rep, + closureSRT = srt_info, + closureType = idType id, + closureDescr = descr } + where + name = idName id + sm_rep = chooseSMRep is_static lf_info tot_wds ptr_wds + +mkConInfo :: HomeModules + -> Bool -- Is static + -> DataCon + -> Int -> Int -- Total and pointer words + -> ClosureInfo +mkConInfo hmods is_static data_con tot_wds ptr_wds + = ConInfo { closureSMRep = sm_rep, + closureCon = data_con, + closureDllCon = isDllName hmods (dataConName data_con) } + where + sm_rep = chooseSMRep is_static (mkConLFInfo data_con) tot_wds ptr_wds +\end{code} + +%************************************************************************ +%* * +\subsection[ClosureInfo-sizes]{Functions about closure {\em sizes}} +%* * +%************************************************************************ + +\begin{code} +closureSize :: ClosureInfo -> WordOff +closureSize cl_info = hdr_size + closureNonHdrSize cl_info + where hdr_size | closureIsThunk cl_info = thunkHdrSize + | otherwise = fixedHdrSize + -- All thunks use thunkHdrSize, even if they are non-updatable. + -- this is because we don't have separate closure types for + -- updatable vs. non-updatable thunks, so the GC can't tell the + -- difference. If we ever have significant numbers of non- + -- updatable thunks, it might be worth fixing this. + +closureNonHdrSize :: ClosureInfo -> WordOff +closureNonHdrSize cl_info + = tot_wds + computeSlopSize tot_wds cl_info + where + tot_wds = closureGoodStuffSize cl_info + +closureGoodStuffSize :: ClosureInfo -> WordOff +closureGoodStuffSize cl_info + = let (ptrs, nonptrs) = sizes_from_SMRep (closureSMRep cl_info) + in ptrs + nonptrs + +closurePtrsSize :: ClosureInfo -> WordOff +closurePtrsSize cl_info + = let (ptrs, _) = sizes_from_SMRep (closureSMRep cl_info) + in ptrs + +-- not exported: +sizes_from_SMRep :: SMRep -> (WordOff,WordOff) +sizes_from_SMRep (GenericRep _ ptrs nonptrs _) = (ptrs, nonptrs) +sizes_from_SMRep BlackHoleRep = (0, 0) +\end{code} + +Computing slop size. WARNING: this looks dodgy --- it has deep +knowledge of what the storage manager does with the various +representations... + +Slop Requirements: every thunk gets an extra padding word in the +header, which takes the the updated value. + +\begin{code} +slopSize cl_info = computeSlopSize payload_size cl_info + where payload_size = closureGoodStuffSize cl_info + +computeSlopSize :: WordOff -> ClosureInfo -> WordOff +computeSlopSize payload_size cl_info + = max 0 (minPayloadSize smrep updatable - payload_size) + where + smrep = closureSMRep cl_info + updatable = closureNeedsUpdSpace cl_info + +-- we leave space for an update if either (a) the closure is updatable +-- or (b) it is a static thunk. This is because a static thunk needs +-- a static link field in a predictable place (after the slop), regardless +-- of whether it is updatable or not. +closureNeedsUpdSpace (ClosureInfo { closureLFInfo = + LFThunk TopLevel _ _ _ _ }) = True +closureNeedsUpdSpace cl_info = closureUpdReqd cl_info + +minPayloadSize :: SMRep -> Bool -> WordOff +minPayloadSize smrep updatable + = case smrep of + BlackHoleRep -> min_upd_size + GenericRep _ _ _ _ | updatable -> min_upd_size + GenericRep True _ _ _ -> 0 -- static + GenericRep False _ _ _ -> mIN_PAYLOAD_SIZE + -- ^^^^^___ dynamic + where + min_upd_size = + ASSERT(mIN_PAYLOAD_SIZE <= sIZEOF_StgSMPThunkHeader) + 0 -- check that we already have enough + -- room for mIN_SIZE_NonUpdHeapObject, + -- due to the extra header word in SMP +\end{code} + +%************************************************************************ +%* * +\subsection[SMreps]{Choosing SM reps} +%* * +%************************************************************************ + +\begin{code} +chooseSMRep + :: Bool -- True <=> static closure + -> LambdaFormInfo + -> WordOff -> WordOff -- Tot wds, ptr wds + -> SMRep + +chooseSMRep is_static lf_info tot_wds ptr_wds + = let + nonptr_wds = tot_wds - ptr_wds + closure_type = getClosureType is_static ptr_wds lf_info + in + GenericRep is_static ptr_wds nonptr_wds closure_type + +-- We *do* get non-updatable top-level thunks sometimes. eg. f = g +-- gets compiled to a jump to g (if g has non-zero arity), instead of +-- messing around with update frames and PAPs. We set the closure type +-- to FUN_STATIC in this case. + +getClosureType :: Bool -> WordOff -> LambdaFormInfo -> ClosureType +getClosureType is_static ptr_wds lf_info + = case lf_info of + LFCon con | is_static && ptr_wds == 0 -> ConstrNoCaf + | otherwise -> Constr + LFReEntrant _ _ _ _ -> Fun + LFThunk _ _ _ (SelectorThunk _) _ -> ThunkSelector + LFThunk _ _ _ _ _ -> Thunk + _ -> panic "getClosureType" +\end{code} + +%************************************************************************ +%* * +\subsection[ClosureInfo-4-questions]{Four major questions about @ClosureInfo@} +%* * +%************************************************************************ + +Be sure to see the stg-details notes about these... + +\begin{code} +nodeMustPointToIt :: LambdaFormInfo -> Bool +nodeMustPointToIt (LFReEntrant top _ no_fvs _) + = not no_fvs || -- Certainly if it has fvs we need to point to it + isNotTopLevel top + -- If it is not top level we will point to it + -- We can have a \r closure with no_fvs which + -- is not top level as special case cgRhsClosure + -- has been dissabled in favour of let floating + + -- For lex_profiling we also access the cost centre for a + -- non-inherited function i.e. not top level + -- the not top case above ensures this is ok. + +nodeMustPointToIt (LFCon _) = True + + -- Strictly speaking, the above two don't need Node to point + -- to it if the arity = 0. But this is a *really* unlikely + -- situation. If we know it's nil (say) and we are entering + -- it. Eg: let x = [] in x then we will certainly have inlined + -- x, since nil is a simple atom. So we gain little by not + -- having Node point to known zero-arity things. On the other + -- hand, we do lose something; Patrick's code for figuring out + -- when something has been updated but not entered relies on + -- having Node point to the result of an update. SLPJ + -- 27/11/92. + +nodeMustPointToIt (LFThunk _ no_fvs updatable NonStandardThunk _) + = updatable || not no_fvs || opt_SccProfilingOn + -- For the non-updatable (single-entry case): + -- + -- True if has fvs (in which case we need access to them, and we + -- should black-hole it) + -- or profiling (in which case we need to recover the cost centre + -- from inside it) + +nodeMustPointToIt (LFThunk _ no_fvs updatable some_standard_form_thunk _) + = True -- Node must point to any standard-form thunk + +nodeMustPointToIt (LFUnknown _) = True +nodeMustPointToIt (LFBlackHole _) = True -- BH entry may require Node to point +nodeMustPointToIt (LFLetNoEscape _) = False +\end{code} + +The entry conventions depend on the type of closure being entered, +whether or not it has free variables, and whether we're running +sequentially or in parallel. + +\begin{tabular}{lllll} +Closure Characteristics & Parallel & Node Req'd & Argument Passing & Enter Via \\ +Unknown & no & yes & stack & node \\ +Known fun ($\ge$ 1 arg), no fvs & no & no & registers & fast entry (enough args) \\ +\ & \ & \ & \ & slow entry (otherwise) \\ +Known fun ($\ge$ 1 arg), fvs & no & yes & registers & fast entry (enough args) \\ +0 arg, no fvs @\r,\s@ & no & no & n/a & direct entry \\ +0 arg, no fvs @\u@ & no & yes & n/a & node \\ +0 arg, fvs @\r,\s@ & no & yes & n/a & direct entry \\ +0 arg, fvs @\u@ & no & yes & n/a & node \\ + +Unknown & yes & yes & stack & node \\ +Known fun ($\ge$ 1 arg), no fvs & yes & no & registers & fast entry (enough args) \\ +\ & \ & \ & \ & slow entry (otherwise) \\ +Known fun ($\ge$ 1 arg), fvs & yes & yes & registers & node \\ +0 arg, no fvs @\r,\s@ & yes & no & n/a & direct entry \\ +0 arg, no fvs @\u@ & yes & yes & n/a & node \\ +0 arg, fvs @\r,\s@ & yes & yes & n/a & node \\ +0 arg, fvs @\u@ & yes & yes & n/a & node\\ +\end{tabular} + +When black-holing, single-entry closures could also be entered via node +(rather than directly) to catch double-entry. + +\begin{code} +data CallMethod + = EnterIt -- no args, not a function + + | JumpToIt CLabel -- no args, not a function, but we + -- know what its entry code is + + | ReturnIt -- it's a function, but we have + -- zero args to apply to it, so just + -- return it. + + | ReturnCon DataCon -- It's a data constructor, just return it + + | SlowCall -- Unknown fun, or known fun with + -- too few args. + + | DirectEntry -- Jump directly, with args in regs + CLabel -- The code label + Int -- Its arity + +getCallMethod :: HomeModules + -> Name -- Function being applied + -> LambdaFormInfo -- Its info + -> Int -- Number of available arguments + -> CallMethod + +getCallMethod hmods name lf_info n_args + | nodeMustPointToIt lf_info && opt_Parallel + = -- If we're parallel, then we must always enter via node. + -- The reason is that the closure may have been + -- fetched since we allocated it. + EnterIt + +getCallMethod hmods name (LFReEntrant _ arity _ _) n_args + | n_args == 0 = ASSERT( arity /= 0 ) + ReturnIt -- No args at all + | n_args < arity = SlowCall -- Not enough args + | otherwise = DirectEntry (enterIdLabel hmods name) arity + +getCallMethod hmods name (LFCon con) n_args + = ASSERT( n_args == 0 ) + ReturnCon con + +getCallMethod hmods name (LFThunk _ _ updatable std_form_info is_fun) n_args + | is_fun -- Must always "call" a function-typed + = SlowCall -- thing, cannot just enter it [in eval/apply, the entry code + -- is the fast-entry code] + + | updatable || opt_DoTickyProfiling -- to catch double entry + {- OLD: || opt_SMP + I decided to remove this, because in SMP mode it doesn't matter + if we enter the same thunk multiple times, so the optimisation + of jumping directly to the entry code is still valid. --SDM + -} + = ASSERT( n_args == 0 ) EnterIt + + | otherwise -- Jump direct to code for single-entry thunks + = ASSERT( n_args == 0 ) + JumpToIt (thunkEntryLabel hmods name std_form_info updatable) + +getCallMethod hmods name (LFUnknown True) n_args + = SlowCall -- might be a function + +getCallMethod hmods name (LFUnknown False) n_args + = ASSERT2 ( n_args == 0, ppr name <+> ppr n_args ) + EnterIt -- Not a function + +getCallMethod hmods name (LFBlackHole _) n_args + = SlowCall -- Presumably the black hole has by now + -- been updated, but we don't know with + -- what, so we slow call it + +getCallMethod hmods name (LFLetNoEscape 0) n_args + = JumpToIt (enterReturnPtLabel (nameUnique name)) + +getCallMethod hmods name (LFLetNoEscape arity) n_args + | n_args == arity = DirectEntry (enterReturnPtLabel (nameUnique name)) arity + | otherwise = pprPanic "let-no-escape: " (ppr name <+> ppr arity) + +blackHoleOnEntry :: ClosureInfo -> Bool +-- Static closures are never themselves black-holed. +-- Updatable ones will be overwritten with a CAFList cell, which points to a +-- black hole; +-- Single-entry ones have no fvs to plug, and we trust they don't form part +-- of a loop. + +blackHoleOnEntry ConInfo{} = False +blackHoleOnEntry (ClosureInfo { closureLFInfo = lf_info, closureSMRep = rep }) + | isStaticRep rep + = False -- Never black-hole a static closure + + | otherwise + = case lf_info of + LFReEntrant _ _ _ _ -> False + LFLetNoEscape _ -> False + LFThunk _ no_fvs updatable _ _ + -> if updatable + then not opt_OmitBlackHoling + else opt_DoTickyProfiling || not no_fvs + -- the former to catch double entry, + -- and the latter to plug space-leaks. KSW/SDM 1999-04. + + other -> panic "blackHoleOnEntry" -- Should never happen + +isStandardFormThunk :: LambdaFormInfo -> Bool +isStandardFormThunk (LFThunk _ _ _ (SelectorThunk _) _) = True +isStandardFormThunk (LFThunk _ _ _ (ApThunk _) _) = True +isStandardFormThunk other_lf_info = False + +isKnownFun :: LambdaFormInfo -> Bool +isKnownFun (LFReEntrant _ _ _ _) = True +isKnownFun (LFLetNoEscape _) = True +isKnownFun _ = False +\end{code} + +----------------------------------------------------------------------------- +SRT-related stuff + +\begin{code} +staticClosureNeedsLink :: ClosureInfo -> Bool +-- A static closure needs a link field to aid the GC when traversing +-- the static closure graph. But it only needs such a field if either +-- a) it has an SRT +-- b) it's a constructor with one or more pointer fields +-- In case (b), the constructor's fields themselves play the role +-- of the SRT. +staticClosureNeedsLink (ClosureInfo { closureSRT = srt }) + = needsSRT srt +staticClosureNeedsLink (ConInfo { closureSMRep = sm_rep, closureCon = con }) + = not (isNullaryRepDataCon con) && not_nocaf_constr + where + not_nocaf_constr = + case sm_rep of + GenericRep _ _ _ ConstrNoCaf -> False + _other -> True +\end{code} + +Avoiding generating entries and info tables +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +At present, for every function we generate all of the following, +just in case. But they aren't always all needed, as noted below: + +[NB1: all of this applies only to *functions*. Thunks always +have closure, info table, and entry code.] + +[NB2: All are needed if the function is *exported*, just to play safe.] + + +* Fast-entry code ALWAYS NEEDED + +* Slow-entry code + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) we're in the parallel world and the function has free vars + [Reason: in parallel world, we always enter functions + with free vars via the closure.] + +* The function closure + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) if the function has free vars (ie not top level) + + Why case (a) here? Because if the arg-satis check fails, + UpdatePAP stuffs a pointer to the function closure in the PAP. + [Could be changed; UpdatePAP could stuff in a code ptr instead, + but doesn't seem worth it.] + + [NB: these conditions imply that we might need the closure + without the slow-entry code. Here's how. + + f x y = let g w = ...x..y..w... + in + ...(g t)... + + Here we need a closure for g which contains x and y, + but since the calls are all saturated we just jump to the + fast entry point for g, with R1 pointing to the closure for g.] + + +* Standard info table + Needed iff (a) we have any un-saturated calls to the function + OR (b) the function is passed as an arg + OR (c) the function has free vars (ie not top level) + + NB. In the sequential world, (c) is only required so that the function closure has + an info table to point to, to keep the storage manager happy. + If (c) alone is true we could fake up an info table by choosing + one of a standard family of info tables, whose entry code just + bombs out. + + [NB In the parallel world (c) is needed regardless because + we enter functions with free vars via the closure.] + + If (c) is retained, then we'll sometimes generate an info table + (for storage mgr purposes) without slow-entry code. Then we need + to use an error label in the info table to substitute for the absent + slow entry code. + +\begin{code} +staticClosureRequired + :: Name + -> StgBinderInfo + -> LambdaFormInfo + -> Bool +staticClosureRequired binder bndr_info + (LFReEntrant top_level _ _ _) -- It's a function + = ASSERT( isTopLevel top_level ) + -- Assumption: it's a top-level, no-free-var binding + not (satCallsOnly bndr_info) + +staticClosureRequired binder other_binder_info other_lf_info = True +\end{code} + +%************************************************************************ +%* * +\subsection[ClosureInfo-misc-funs]{Misc functions about @ClosureInfo@, etc.} +%* * +%************************************************************************ + +\begin{code} + +isStaticClosure :: ClosureInfo -> Bool +isStaticClosure cl_info = isStaticRep (closureSMRep cl_info) + +closureUpdReqd :: ClosureInfo -> Bool +closureUpdReqd ClosureInfo{ closureLFInfo = lf_info } = lfUpdatable lf_info +closureUpdReqd ConInfo{} = False + +lfUpdatable :: LambdaFormInfo -> Bool +lfUpdatable (LFThunk _ _ upd _ _) = upd +lfUpdatable (LFBlackHole _) = True + -- Black-hole closures are allocated to receive the results of an + -- alg case with a named default... so they need to be updated. +lfUpdatable _ = False + +closureIsThunk :: ClosureInfo -> Bool +closureIsThunk ClosureInfo{ closureLFInfo = lf_info } = isLFThunk lf_info +closureIsThunk ConInfo{} = False + +closureSingleEntry :: ClosureInfo -> Bool +closureSingleEntry (ClosureInfo { closureLFInfo = LFThunk _ _ upd _ _}) = not upd +closureSingleEntry other_closure = False + +closureReEntrant :: ClosureInfo -> Bool +closureReEntrant (ClosureInfo { closureLFInfo = LFReEntrant _ _ _ _ }) = True +closureReEntrant other_closure = False + +isConstrClosure_maybe :: ClosureInfo -> Maybe DataCon +isConstrClosure_maybe (ConInfo { closureCon = data_con }) = Just data_con +isConstrClosure_maybe _ = Nothing + +closureFunInfo :: ClosureInfo -> Maybe (Int, ArgDescr) +closureFunInfo (ClosureInfo { closureLFInfo = LFReEntrant _ arity _ arg_desc}) + = Just (arity, arg_desc) +closureFunInfo _ + = Nothing +\end{code} + +\begin{code} +isToplevClosure :: ClosureInfo -> Bool +isToplevClosure (ClosureInfo { closureLFInfo = lf_info }) + = case lf_info of + LFReEntrant TopLevel _ _ _ -> True + LFThunk TopLevel _ _ _ _ -> True + other -> False +isToplevClosure _ = False +\end{code} + +Label generation. + +\begin{code} +infoTableLabelFromCI :: ClosureInfo -> CLabel +infoTableLabelFromCI (ClosureInfo { closureName = name, + closureLFInfo = lf_info, + closureSMRep = rep }) + = case lf_info of + LFBlackHole info -> info + + LFThunk _ _ upd_flag (SelectorThunk offset) _ -> + mkSelectorInfoLabel upd_flag offset + + LFThunk _ _ upd_flag (ApThunk arity) _ -> + mkApInfoTableLabel upd_flag arity + + LFThunk{} -> mkLocalInfoTableLabel name + + LFReEntrant _ _ _ _ -> mkLocalInfoTableLabel name + + other -> panic "infoTableLabelFromCI" + +infoTableLabelFromCI (ConInfo { closureCon = con, + closureSMRep = rep, + closureDllCon = dll }) + | isStaticRep rep = mkStaticInfoTableLabel name dll + | otherwise = mkConInfoTableLabel name dll + where + name = dataConName con + +-- ClosureInfo for a closure (as opposed to a constructor) is always local +closureLabelFromCI (ClosureInfo { closureName = nm }) = mkLocalClosureLabel nm +closureLabelFromCI _ = panic "closureLabelFromCI" + +-- thunkEntryLabel is a local help function, not exported. It's used from both +-- entryLabelFromCI and getCallMethod. + +thunkEntryLabel hmods thunk_id (ApThunk arity) is_updatable + = enterApLabel is_updatable arity +thunkEntryLabel hmods thunk_id (SelectorThunk offset) upd_flag + = enterSelectorLabel upd_flag offset +thunkEntryLabel hmods thunk_id _ is_updatable + = enterIdLabel hmods thunk_id + +enterApLabel is_updatable arity + | tablesNextToCode = mkApInfoTableLabel is_updatable arity + | otherwise = mkApEntryLabel is_updatable arity + +enterSelectorLabel upd_flag offset + | tablesNextToCode = mkSelectorInfoLabel upd_flag offset + | otherwise = mkSelectorEntryLabel upd_flag offset + +enterIdLabel hmods id + | tablesNextToCode = mkInfoTableLabel hmods id + | otherwise = mkEntryLabel hmods id + +enterLocalIdLabel id + | tablesNextToCode = mkLocalInfoTableLabel id + | otherwise = mkLocalEntryLabel id + +enterReturnPtLabel name + | tablesNextToCode = mkReturnInfoLabel name + | otherwise = mkReturnPtLabel name +\end{code} + + +We need a black-hole closure info to pass to @allocDynClosure@ when we +want to allocate the black hole on entry to a CAF. These are the only +ways to build an LFBlackHole, maintaining the invariant that it really +is a black hole and not something else. + +\begin{code} +cafBlackHoleClosureInfo (ClosureInfo { closureName = nm, + closureType = ty }) + = ClosureInfo { closureName = nm, + closureLFInfo = LFBlackHole mkCAFBlackHoleInfoTableLabel, + closureSMRep = BlackHoleRep, + closureSRT = NoC_SRT, + closureType = ty, + closureDescr = "" } +cafBlackHoleClosureInfo _ = panic "cafBlackHoleClosureInfo" + +seCafBlackHoleClosureInfo (ClosureInfo { closureName = nm, + closureType = ty }) + = ClosureInfo { closureName = nm, + closureLFInfo = LFBlackHole mkSECAFBlackHoleInfoTableLabel, + closureSMRep = BlackHoleRep, + closureSRT = NoC_SRT, + closureType = ty, + closureDescr = "" } +seCafBlackHoleClosureInfo _ = panic "seCafBlackHoleClosureInfo" +\end{code} + +%************************************************************************ +%* * +\subsection[ClosureInfo-Profiling-funs]{Misc functions about for profiling info.} +%* * +%************************************************************************ + +Profiling requires two pieces of information to be determined for +each closure's info table --- description and type. + +The description is stored directly in the @CClosureInfoTable@ when the +info table is built. + +The type is determined from the type information stored with the @Id@ +in the closure info using @closureTypeDescr@. + +\begin{code} +closureValDescr, closureTypeDescr :: ClosureInfo -> String +closureValDescr (ClosureInfo {closureDescr = descr}) + = descr +closureValDescr (ConInfo {closureCon = con}) + = occNameString (getOccName con) + +closureTypeDescr (ClosureInfo { closureType = ty }) + = getTyDescription ty +closureTypeDescr (ConInfo { closureCon = data_con }) + = occNameString (getOccName (dataConTyCon data_con)) + +getTyDescription :: Type -> String +getTyDescription ty + = case (tcSplitSigmaTy ty) of { (_, _, tau_ty) -> + case tau_ty of + TyVarTy _ -> "*" + AppTy fun _ -> getTyDescription fun + FunTy _ res -> '-' : '>' : fun_result res + TyConApp tycon _ -> getOccString tycon + NoteTy (FTVNote _) ty -> getTyDescription ty + PredTy sty -> getPredTyDescription sty + ForAllTy _ ty -> getTyDescription ty + } + where + fun_result (FunTy _ res) = '>' : fun_result res + fun_result other = getTyDescription other + +getPredTyDescription (ClassP cl tys) = getOccString cl +getPredTyDescription (IParam ip ty) = getOccString (ipNameName ip) +\end{code} + + diff --git a/compiler/codeGen/ClosureInfo.lhs-boot b/compiler/codeGen/ClosureInfo.lhs-boot new file mode 100644 index 0000000000..b069905d3e --- /dev/null +++ b/compiler/codeGen/ClosureInfo.lhs-boot @@ -0,0 +1,6 @@ +\begin{code} +module ClosureInfo where + +data LambdaFormInfo +data ClosureInfo +\end{code}
\ No newline at end of file diff --git a/compiler/codeGen/CodeGen.lhs b/compiler/codeGen/CodeGen.lhs new file mode 100644 index 0000000000..e8d83a5a43 --- /dev/null +++ b/compiler/codeGen/CodeGen.lhs @@ -0,0 +1,343 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +\section[CodeGen]{@CodeGen@: main module of the code generator} + +This module says how things get going at the top level. + +@codeGen@ is the interface to the outside world. The \tr{cgTop*} +functions drive the mangling of top-level bindings. + +%************************************************************************ +%* * +\subsection[codeGen-outside-interface]{The code generator's offering to the world} +%* * +%************************************************************************ + +\begin{code} +module CodeGen ( codeGen ) where + +#include "HsVersions.h" + +-- Kludge (??) so that CgExpr is reached via at least one non-SOURCE +-- import. Before, that wasn't the case, and CM therefore didn't +-- bother to compile it. +import CgExpr ( {-NOTHING!-} ) -- DO NOT DELETE THIS IMPORT +import CgProf +import CgMonad +import CgBindery ( CgIdInfo, addBindC, addBindsC, getCgIdInfo, + cgIdInfoId ) +import CgClosure ( cgTopRhsClosure ) +import CgCon ( cgTopRhsCon, cgTyCon ) +import CgUtils ( cmmRegOffW, emitRODataLits, cmmNeWord ) + +import CLabel +import Cmm +import CmmUtils ( zeroCLit, mkIntCLit, mkLblExpr ) +import PprCmm ( pprCmms ) +import MachOp ( wordRep, MachHint(..) ) + +import StgSyn +import PrelNames ( gHC_PRIM, rOOT_MAIN, mAIN, pREL_TOP_HANDLER ) +import Packages ( HomeModules ) +import DynFlags ( DynFlags(..), DynFlag(..), dopt ) +import StaticFlags ( opt_SccProfilingOn ) + +import HscTypes ( ForeignStubs(..), TypeEnv, typeEnvTyCons ) +import CostCentre ( CollectedCCs ) +import Id ( Id, idName, setIdName ) +import Name ( nameSrcLoc, nameOccName, nameUnique, isInternalName, mkExternalName ) +import OccName ( mkLocalOcc ) +import TyCon ( TyCon ) +import Module ( Module, mkModule ) +import ErrUtils ( dumpIfSet_dyn, showPass ) +import Panic ( assertPanic ) + +#ifdef DEBUG +import Outputable +#endif +\end{code} + +\begin{code} +codeGen :: DynFlags + -> HomeModules + -> Module + -> [TyCon] + -> ForeignStubs + -> [Module] -- directly-imported modules + -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering. + -> [(StgBinding,[(Id,[Id])])] -- Bindings to convert, with SRTs + -> IO [Cmm] -- Output + +codeGen dflags hmods this_mod data_tycons foreign_stubs imported_mods + cost_centre_info stg_binds + = do + { showPass dflags "CodeGen" + ; let way = buildTag dflags + main_mod = mainModIs dflags + +-- Why? +-- ; mapM_ (\x -> seq x (return ())) data_tycons + + ; code_stuff <- initC dflags hmods this_mod $ do + { cmm_binds <- mapM (getCmm . cgTopBinding dflags hmods) stg_binds + ; cmm_tycons <- mapM cgTyCon data_tycons + ; cmm_init <- getCmm (mkModuleInit dflags hmods way cost_centre_info + this_mod main_mod + foreign_stubs imported_mods) + ; return (cmm_binds ++ concat cmm_tycons ++ [cmm_init]) + } + -- Put datatype_stuff after code_stuff, because the + -- datatype closure table (for enumeration types) to + -- (say) PrelBase_True_closure, which is defined in + -- code_stuff + + ; dumpIfSet_dyn dflags Opt_D_dump_cmm "Cmm" (pprCmms code_stuff) + + ; return code_stuff } +\end{code} + +%************************************************************************ +%* * +\subsection[codegen-init]{Module initialisation code} +%* * +%************************************************************************ + +/* ----------------------------------------------------------------------------- + Module initialisation + + The module initialisation code looks like this, roughly: + + FN(__stginit_Foo) { + JMP_(__stginit_Foo_1_p) + } + + FN(__stginit_Foo_1_p) { + ... + } + + We have one version of the init code with a module version and the + 'way' attached to it. The version number helps to catch cases + where modules are not compiled in dependency order before being + linked: if a module has been compiled since any modules which depend on + it, then the latter modules will refer to a different version in their + init blocks and a link error will ensue. + + The 'way' suffix helps to catch cases where modules compiled in different + ways are linked together (eg. profiled and non-profiled). + + We provide a plain, unadorned, version of the module init code + which just jumps to the version with the label and way attached. The + reason for this is that when using foreign exports, the caller of + startupHaskell() must supply the name of the init function for the "top" + module in the program, and we don't want to require that this name + has the version and way info appended to it. + -------------------------------------------------------------------------- */ + +We initialise the module tree by keeping a work-stack, + * pointed to by Sp + * that grows downward + * Sp points to the last occupied slot + + +\begin{code} +mkModuleInit + :: DynFlags + -> HomeModules + -> String -- the "way" + -> CollectedCCs -- cost centre info + -> Module + -> Module -- name of the Main module + -> ForeignStubs + -> [Module] + -> Code +mkModuleInit dflags hmods way cost_centre_info this_mod main_mod foreign_stubs imported_mods + = do { + if opt_SccProfilingOn + then do { -- Allocate the static boolean that records if this + -- module has been registered already + emitData Data [CmmDataLabel moduleRegdLabel, + CmmStaticLit zeroCLit] + + ; emitSimpleProc real_init_lbl $ do + { ret_blk <- forkLabelledCode ret_code + + ; init_blk <- forkLabelledCode $ do + { mod_init_code; stmtC (CmmBranch ret_blk) } + + ; stmtC (CmmCondBranch (cmmNeWord (CmmLit zeroCLit) mod_reg_val) + ret_blk) + ; stmtC (CmmBranch init_blk) + } + } + else emitSimpleProc real_init_lbl ret_code + + -- Make the "plain" procedure jump to the "real" init procedure + ; emitSimpleProc plain_init_lbl jump_to_init + + -- When compiling the module in which the 'main' function lives, + -- (that is, this_mod == main_mod) + -- we inject an extra stg_init procedure for stg_init_ZCMain, for the + -- RTS to invoke. We must consult the -main-is flag in case the + -- user specified a different function to Main.main + ; whenC (this_mod == main_mod) + (emitSimpleProc plain_main_init_lbl jump_to_init) + } + where + plain_init_lbl = mkPlainModuleInitLabel hmods this_mod + real_init_lbl = mkModuleInitLabel hmods this_mod way + plain_main_init_lbl = mkPlainModuleInitLabel hmods rOOT_MAIN + + jump_to_init = stmtC (CmmJump (mkLblExpr real_init_lbl) []) + + mod_reg_val = CmmLoad (mkLblExpr moduleRegdLabel) wordRep + + -- Main refers to GHC.TopHandler.runIO, so make sure we call the + -- init function for GHC.TopHandler. + extra_imported_mods + | this_mod == main_mod = [pREL_TOP_HANDLER] + | otherwise = [] + + mod_init_code = do + { -- Set mod_reg to 1 to record that we've been here + stmtC (CmmStore (mkLblExpr moduleRegdLabel) (CmmLit (mkIntCLit 1))) + + -- Now do local stuff + ; initCostCentres cost_centre_info + ; mapCs (registerModuleImport hmods way) + (imported_mods++extra_imported_mods) + } + + -- The return-code pops the work stack by + -- incrementing Sp, and then jumpd to the popped item + ret_code = stmtsC [ CmmAssign spReg (cmmRegOffW spReg 1) + , CmmJump (CmmLoad (cmmRegOffW spReg (-1)) wordRep) [] ] + +----------------------- +registerModuleImport :: HomeModules -> String -> Module -> Code +registerModuleImport hmods way mod + | mod == gHC_PRIM + = nopC + | otherwise -- Push the init procedure onto the work stack + = stmtsC [ CmmAssign spReg (cmmRegOffW spReg (-1)) + , CmmStore (CmmReg spReg) (mkLblExpr (mkModuleInitLabel hmods mod way)) ] +\end{code} + + + +Cost-centre profiling: Besides the usual stuff, we must produce +declarations for the cost-centres defined in this module; + +(The local cost-centres involved in this are passed into the +code-generator.) + +\begin{code} +initCostCentres :: CollectedCCs -> Code +-- Emit the declarations, and return code to register them +initCostCentres (local_CCs, ___extern_CCs, singleton_CCSs) + | not opt_SccProfilingOn = nopC + | otherwise + = do { mapM_ emitCostCentreDecl local_CCs + ; mapM_ emitCostCentreStackDecl singleton_CCSs + ; mapM_ emitRegisterCC local_CCs + ; mapM_ emitRegisterCCS singleton_CCSs + } +\end{code} + +%************************************************************************ +%* * +\subsection[codegen-top-bindings]{Converting top-level STG bindings} +%* * +%************************************************************************ + +@cgTopBinding@ is only used for top-level bindings, since they need +to be allocated statically (not in the heap) and need to be labelled. +No unboxed bindings can happen at top level. + +In the code below, the static bindings are accumulated in the +@MkCgState@, and transferred into the ``statics'' slot by @forkStatics@. +This is so that we can write the top level processing in a compositional +style, with the increasing static environment being plumbed as a state +variable. + +\begin{code} +cgTopBinding :: DynFlags -> HomeModules -> (StgBinding,[(Id,[Id])]) -> Code +cgTopBinding dflags hmods (StgNonRec id rhs, srts) + = do { id' <- maybeExternaliseId dflags id + ; mapM_ (mkSRT hmods [id']) srts + ; (id,info) <- cgTopRhs id' rhs + ; addBindC id info -- Add the *un-externalised* Id to the envt, + -- so we find it when we look up occurrences + } + +cgTopBinding dflags hmods (StgRec pairs, srts) + = do { let (bndrs, rhss) = unzip pairs + ; bndrs' <- mapFCs (maybeExternaliseId dflags) bndrs + ; let pairs' = zip bndrs' rhss + ; mapM_ (mkSRT hmods bndrs') srts + ; _new_binds <- fixC (\ new_binds -> do + { addBindsC new_binds + ; mapFCs ( \ (b,e) -> cgTopRhs b e ) pairs' }) + ; nopC } + +mkSRT :: HomeModules -> [Id] -> (Id,[Id]) -> Code +mkSRT hmods these (id,[]) = nopC +mkSRT hmods these (id,ids) + = do { ids <- mapFCs remap ids + ; id <- remap id + ; emitRODataLits (mkSRTLabel (idName id)) + (map (CmmLabel . mkClosureLabel hmods . idName) ids) + } + where + -- Sigh, better map all the ids against the environment in + -- case they've been externalised (see maybeExternaliseId below). + remap id = case filter (==id) these of + (id':_) -> returnFC id' + [] -> do { info <- getCgIdInfo id; return (cgIdInfoId info) } + +-- Urgh! I tried moving the forkStatics call from the rhss of cgTopRhs +-- to enclose the listFCs in cgTopBinding, but that tickled the +-- statics "error" call in initC. I DON'T UNDERSTAND WHY! + +cgTopRhs :: Id -> StgRhs -> FCode (Id, CgIdInfo) + -- The Id is passed along for setting up a binding... + -- It's already been externalised if necessary + +cgTopRhs bndr (StgRhsCon cc con args) + = forkStatics (cgTopRhsCon bndr con args) + +cgTopRhs bndr (StgRhsClosure cc bi fvs upd_flag srt args body) + = ASSERT(null fvs) -- There should be no free variables + setSRTLabel (mkSRTLabel (idName bndr)) $ + forkStatics (cgTopRhsClosure bndr cc bi srt upd_flag args body) +\end{code} + + +%************************************************************************ +%* * +\subsection{Stuff to support splitting} +%* * +%************************************************************************ + +If we're splitting the object, we need to externalise all the top-level names +(and then make sure we only use the externalised one in any C label we use +which refers to this name). + +\begin{code} +maybeExternaliseId :: DynFlags -> Id -> FCode Id +maybeExternaliseId dflags id + | dopt Opt_SplitObjs dflags, -- Externalise the name for -split-objs + isInternalName name = do { mod <- moduleName + ; returnFC (setIdName id (externalise mod)) } + | otherwise = returnFC id + where + externalise mod = mkExternalName uniq mod new_occ Nothing loc + name = idName id + uniq = nameUnique name + new_occ = mkLocalOcc uniq (nameOccName name) + loc = nameSrcLoc name + -- We want to conjure up a name that can't clash with any + -- existing name. So we generate + -- Mod_$L243foo + -- where 243 is the unique. +\end{code} diff --git a/compiler/codeGen/SMRep.lhs b/compiler/codeGen/SMRep.lhs new file mode 100644 index 0000000000..c807703b13 --- /dev/null +++ b/compiler/codeGen/SMRep.lhs @@ -0,0 +1,361 @@ +% +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +% +\section[SMRep]{Storage manager representations of closure} + +This is here, rather than in ClosureInfo, just to keep nhc happy. +Other modules should access this info through ClosureInfo. + +\begin{code} +module SMRep ( + -- Words and bytes + StgWord, StgHalfWord, + hALF_WORD_SIZE, hALF_WORD_SIZE_IN_BITS, + WordOff, ByteOff, + + -- Argument/return representations + CgRep(..), nonVoidArg, + argMachRep, primRepToCgRep, primRepHint, + isFollowableArg, isVoidArg, + isFloatingArg, isNonPtrArg, is64BitArg, + separateByPtrFollowness, + cgRepSizeW, cgRepSizeB, + retAddrSizeW, + + typeCgRep, idCgRep, tyConCgRep, typeHint, + + -- Closure repesentation + SMRep(..), ClosureType(..), + isStaticRep, + fixedHdrSize, arrWordsHdrSize, arrPtrsHdrSize, + profHdrSize, thunkHdrSize, + tablesNextToCode, + smRepClosureType, smRepClosureTypeInt, + + rET_SMALL, rET_VEC_SMALL, rET_BIG, rET_VEC_BIG + ) where + +#include "HsVersions.h" +#include "../includes/MachDeps.h" + +import Id ( Id, idType ) +import Type ( Type, typePrimRep, PrimRep(..) ) +import TyCon ( TyCon, tyConPrimRep ) +import MachOp-- ( MachRep(..), MachHint(..), wordRep ) +import StaticFlags ( opt_SccProfilingOn, opt_GranMacros, + opt_Unregisterised ) +import Constants +import Outputable + +import DATA_WORD +\end{code} + + +%************************************************************************ +%* * + Words and bytes +%* * +%************************************************************************ + +\begin{code} +type WordOff = Int -- Word offset, or word count +type ByteOff = Int -- Byte offset, or byte count +\end{code} + +StgWord is a type representing an StgWord on the target platform. + +\begin{code} +#if SIZEOF_HSWORD == 4 +type StgWord = Word32 +type StgHalfWord = Word16 +hALF_WORD_SIZE = 2 :: ByteOff +hALF_WORD_SIZE_IN_BITS = 16 :: Int +#elif SIZEOF_HSWORD == 8 +type StgWord = Word64 +type StgHalfWord = Word32 +hALF_WORD_SIZE = 4 :: ByteOff +hALF_WORD_SIZE_IN_BITS = 32 :: Int +#else +#error unknown SIZEOF_HSWORD +#endif +\end{code} + + +%************************************************************************ +%* * + CgRep +%* * +%************************************************************************ + +An CgRep is an abstraction of a Type which tells the code generator +all it needs to know about the calling convention for arguments (and +results) of that type. In particular, the ArgReps of a function's +arguments are used to decide which of the RTS's generic apply +functions to call when applying an unknown function. + +It contains more information than the back-end data type MachRep, +so one can easily convert from CgRep -> MachRep. (Except that +there's no MachRep for a VoidRep.) + +It distinguishes + pointers from non-pointers (we sort the pointers together + when building closures) + + void from other types: a void argument is different from no argument + +All 64-bit types map to the same CgRep, because they're passed in the +same register, but a PtrArg is still different from an NonPtrArg +because the function's entry convention has to take into account the +pointer-hood of arguments for the purposes of describing the stack on +entry to the garbage collector. + +\begin{code} +data CgRep + = VoidArg -- Void + | PtrArg -- Word-sized Ptr + | NonPtrArg -- Word-sized non-pointer + | LongArg -- 64-bit non-pointer + | FloatArg -- 32-bit float + | DoubleArg -- 64-bit float + deriving Eq + +instance Outputable CgRep where + ppr VoidArg = ptext SLIT("V_") + ppr PtrArg = ptext SLIT("P_") + ppr NonPtrArg = ptext SLIT("I_") + ppr LongArg = ptext SLIT("L_") + ppr FloatArg = ptext SLIT("F_") + ppr DoubleArg = ptext SLIT("D_") + +argMachRep :: CgRep -> MachRep +argMachRep PtrArg = wordRep +argMachRep NonPtrArg = wordRep +argMachRep LongArg = I64 +argMachRep FloatArg = F32 +argMachRep DoubleArg = F64 +argMachRep VoidArg = panic "argMachRep:VoidRep" + +primRepToCgRep :: PrimRep -> CgRep +primRepToCgRep VoidRep = VoidArg +primRepToCgRep PtrRep = PtrArg +primRepToCgRep IntRep = NonPtrArg +primRepToCgRep WordRep = NonPtrArg +primRepToCgRep Int64Rep = LongArg +primRepToCgRep Word64Rep = LongArg +primRepToCgRep AddrRep = NonPtrArg +primRepToCgRep FloatRep = FloatArg +primRepToCgRep DoubleRep = DoubleArg + +primRepHint :: PrimRep -> MachHint +primRepHint VoidRep = panic "primRepHint:VoidRep" +primRepHint PtrRep = PtrHint +primRepHint IntRep = SignedHint +primRepHint WordRep = NoHint +primRepHint Int64Rep = SignedHint +primRepHint Word64Rep = NoHint +primRepHint AddrRep = PtrHint -- NB! PtrHint, but NonPtrArg +primRepHint FloatRep = FloatHint +primRepHint DoubleRep = FloatHint + +idCgRep :: Id -> CgRep +idCgRep = typeCgRep . idType + +tyConCgRep :: TyCon -> CgRep +tyConCgRep = primRepToCgRep . tyConPrimRep + +typeCgRep :: Type -> CgRep +typeCgRep = primRepToCgRep . typePrimRep + +typeHint :: Type -> MachHint +typeHint = primRepHint . typePrimRep +\end{code} + +Whether or not the thing is a pointer that the garbage-collector +should follow. Or, to put it another (less confusing) way, whether +the object in question is a heap object. + +Depending on the outcome, this predicate determines what stack +the pointer/object possibly will have to be saved onto, and the +computation of GC liveness info. + +\begin{code} +isFollowableArg :: CgRep -> Bool -- True <=> points to a heap object +isFollowableArg PtrArg = True +isFollowableArg other = False + +isVoidArg :: CgRep -> Bool +isVoidArg VoidArg = True +isVoidArg other = False + +nonVoidArg :: CgRep -> Bool +nonVoidArg VoidArg = False +nonVoidArg other = True + +-- isFloatingArg is used to distinguish @Double@ and @Float@ which +-- cause inadvertent numeric conversions if you aren't jolly careful. +-- See codeGen/CgCon:cgTopRhsCon. + +isFloatingArg :: CgRep -> Bool +isFloatingArg DoubleArg = True +isFloatingArg FloatArg = True +isFloatingArg _ = False + +isNonPtrArg :: CgRep -> Bool +-- Identify anything which is one word large and not a pointer. +isNonPtrArg NonPtrArg = True +isNonPtrArg other = False + +is64BitArg :: CgRep -> Bool +is64BitArg LongArg = True +is64BitArg _ = False +\end{code} + +\begin{code} +separateByPtrFollowness :: [(CgRep,a)] -> ([(CgRep,a)], [(CgRep,a)]) +-- Returns (ptrs, non-ptrs) +separateByPtrFollowness things + = sep_things things [] [] + -- accumulating params for follow-able and don't-follow things... + where + sep_things [] bs us = (reverse bs, reverse us) + sep_things ((PtrArg,a):ts) bs us = sep_things ts ((PtrArg,a):bs) us + sep_things (t :ts) bs us = sep_things ts bs (t:us) +\end{code} + +\begin{code} +cgRepSizeB :: CgRep -> ByteOff +cgRepSizeB DoubleArg = dOUBLE_SIZE +cgRepSizeB LongArg = wORD64_SIZE +cgRepSizeB VoidArg = 0 +cgRepSizeB _ = wORD_SIZE + +cgRepSizeW :: CgRep -> ByteOff +cgRepSizeW DoubleArg = dOUBLE_SIZE `quot` wORD_SIZE +cgRepSizeW LongArg = wORD64_SIZE `quot` wORD_SIZE +cgRepSizeW VoidArg = 0 +cgRepSizeW _ = 1 + +retAddrSizeW :: WordOff +retAddrSizeW = 1 -- One word +\end{code} + +%************************************************************************ +%* * +\subsubsection[SMRep-datatype]{@SMRep@---storage manager representation} +%* * +%************************************************************************ + +\begin{code} +data SMRep + -- static closure have an extra static link field at the end. + = GenericRep -- GC routines consult sizes in info tbl + Bool -- True <=> This is a static closure. Affects how + -- we garbage-collect it + !Int -- # ptr words + !Int -- # non-ptr words + ClosureType -- closure type + + | BlackHoleRep + +data ClosureType -- Corresponds 1-1 with the varieties of closures + -- implemented by the RTS. Compare with ghc/includes/ClosureTypes.h + = Constr + | ConstrNoCaf + | Fun + | Thunk + | ThunkSelector +\end{code} + +Size of a closure header. + +\begin{code} +fixedHdrSize :: WordOff +fixedHdrSize = sTD_HDR_SIZE + profHdrSize + granHdrSize + +profHdrSize :: WordOff +profHdrSize | opt_SccProfilingOn = pROF_HDR_SIZE + | otherwise = 0 + +granHdrSize :: WordOff +granHdrSize | opt_GranMacros = gRAN_HDR_SIZE + | otherwise = 0 + +arrWordsHdrSize :: ByteOff +arrWordsHdrSize = fixedHdrSize*wORD_SIZE + sIZEOF_StgArrWords_NoHdr + +arrPtrsHdrSize :: ByteOff +arrPtrsHdrSize = fixedHdrSize*wORD_SIZE + sIZEOF_StgMutArrPtrs_NoHdr + +-- Thunks have an extra header word on SMP, so the update doesn't +-- splat the payload. +thunkHdrSize :: WordOff +thunkHdrSize = fixedHdrSize + smp_hdr + where smp_hdr = sIZEOF_StgSMPThunkHeader `quot` wORD_SIZE +\end{code} + +\begin{code} +-- IA64 mangler doesn't place tables next to code +tablesNextToCode :: Bool +#if defined(ia64_TARGET_ARCH) || defined(powerpc64_TARGET_ARCH) +tablesNextToCode = False +#else +tablesNextToCode = not opt_Unregisterised +#endif +\end{code} + +\begin{code} +isStaticRep :: SMRep -> Bool +isStaticRep (GenericRep is_static _ _ _) = is_static +isStaticRep BlackHoleRep = False +\end{code} + +\begin{code} +#include "../includes/ClosureTypes.h" +-- Defines CONSTR, CONSTR_1_0 etc + + +smRepClosureType :: SMRep -> ClosureType +smRepClosureType (GenericRep _ _ _ ty) = ty +smRepClosureType BlackHoleRep = panic "smRepClosureType: black hole" + +smRepClosureTypeInt :: SMRep -> Int +smRepClosureTypeInt (GenericRep False 1 0 Constr) = CONSTR_1_0 +smRepClosureTypeInt (GenericRep False 0 1 Constr) = CONSTR_0_1 +smRepClosureTypeInt (GenericRep False 2 0 Constr) = CONSTR_2_0 +smRepClosureTypeInt (GenericRep False 1 1 Constr) = CONSTR_1_1 +smRepClosureTypeInt (GenericRep False 0 2 Constr) = CONSTR_0_2 +smRepClosureTypeInt (GenericRep False _ _ Constr) = CONSTR + +smRepClosureTypeInt (GenericRep False 1 0 Fun) = FUN_1_0 +smRepClosureTypeInt (GenericRep False 0 1 Fun) = FUN_0_1 +smRepClosureTypeInt (GenericRep False 2 0 Fun) = FUN_2_0 +smRepClosureTypeInt (GenericRep False 1 1 Fun) = FUN_1_1 +smRepClosureTypeInt (GenericRep False 0 2 Fun) = FUN_0_2 +smRepClosureTypeInt (GenericRep False _ _ Fun) = FUN + +smRepClosureTypeInt (GenericRep False 1 0 Thunk) = THUNK_1_0 +smRepClosureTypeInt (GenericRep False 0 1 Thunk) = THUNK_0_1 +smRepClosureTypeInt (GenericRep False 2 0 Thunk) = THUNK_2_0 +smRepClosureTypeInt (GenericRep False 1 1 Thunk) = THUNK_1_1 +smRepClosureTypeInt (GenericRep False 0 2 Thunk) = THUNK_0_2 +smRepClosureTypeInt (GenericRep False _ _ Thunk) = THUNK + +smRepClosureTypeInt (GenericRep False _ _ ThunkSelector) = THUNK_SELECTOR + +smRepClosureTypeInt (GenericRep True _ _ Constr) = CONSTR_STATIC +smRepClosureTypeInt (GenericRep True _ _ ConstrNoCaf) = CONSTR_NOCAF_STATIC +smRepClosureTypeInt (GenericRep True _ _ Fun) = FUN_STATIC +smRepClosureTypeInt (GenericRep True _ _ Thunk) = THUNK_STATIC + +smRepClosureTypeInt BlackHoleRep = BLACKHOLE + +smRepClosureTypeInt rep = panic "smRepClosuretypeint" + + +-- We export these ones +rET_SMALL = (RET_SMALL :: Int) +rET_VEC_SMALL = (RET_VEC_SMALL :: Int) +rET_BIG = (RET_BIG :: Int) +rET_VEC_BIG = (RET_VEC_BIG :: Int) +\end{code} + |